feat: architecture multi-modèles LLM + quality engine + benchmark
- Multi-modèles : 4 rôles LLM (coding=gemma3:27b-cloud, cpam=gemma3:27b-cloud, validation=deepseek-v3.2:cloud, qc=gemma3:12b) avec get_model(role) - Prompts externalisés : 7 templates dans src/prompts/templates.py - Cache Ollama : modèle stocké par entrée (migration auto ancien format) - call_ollama() : paramètre role= (priorité: model > role > global) - Quality engine : veto_engine + decision_engine + rules_router (YAML) - Benchmark qualité : scripts/benchmark_quality.py (A/B, métriques CIM-10) - Fix biologie : valeurs qualitatives (troponine négative) non filtrées - Fix CPAM : gemma3:27b-cloud au lieu de deepseek (JSON tronqué par thinking) - CPAM max_tokens 4000→6000, viewer admin multi-modèles - Benchmark 10 dossiers : 100% DAS valides, 10/10 CPAM, 243s/dossier Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -4,6 +4,7 @@ from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import unicodedata
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
@@ -19,6 +20,7 @@ from ..config import (
|
||||
Complication,
|
||||
Diagnostic,
|
||||
DossierMedical,
|
||||
load_lab_value_sanity,
|
||||
Imagerie,
|
||||
Sejour,
|
||||
Traitement,
|
||||
@@ -168,13 +170,13 @@ def _extract_das_llm(text: str, dossier: DossierMedical) -> None:
|
||||
try:
|
||||
from .rag_search import extract_das_llm
|
||||
from .ollama_cache import OllamaCache
|
||||
from ..config import OLLAMA_CACHE_PATH, OLLAMA_MODEL
|
||||
from ..config import OLLAMA_CACHE_PATH, get_model
|
||||
except ImportError:
|
||||
logger.warning("Module RAG non disponible pour l'extraction DAS LLM")
|
||||
return
|
||||
|
||||
try:
|
||||
cache = OllamaCache(OLLAMA_CACHE_PATH, OLLAMA_MODEL)
|
||||
cache = OllamaCache(OLLAMA_CACHE_PATH, get_model("coding"))
|
||||
|
||||
# Construire le contexte
|
||||
contexte = {
|
||||
@@ -684,37 +686,181 @@ def _match_drug_atc(med_name: str, drug_atc: dict[str, str]) -> Optional[str]:
|
||||
return None
|
||||
|
||||
|
||||
def _extract_biologie(text: str, dossier: DossierMedical) -> None:
|
||||
"""Extrait les résultats biologiques clés.
|
||||
|
||||
Supporte les aliases (TGO/TGP, Hb), variantes d'unités (UI/L, µmol/L, g/dL),
|
||||
et des tests additionnels (hémoglobine, plaquettes, leucocytes, créatinine).
|
||||
def _norm_key(s: str) -> str:
|
||||
"""Normalise une clé (minuscules, sans accents) pour index YAML."""
|
||||
s = (s or "").strip().lower()
|
||||
s = unicodedata.normalize("NFKD", s)
|
||||
s = "".join(ch for ch in s if not unicodedata.combining(ch))
|
||||
return re.sub(r"\s+", " ", s)
|
||||
|
||||
|
||||
def _parse_float_and_token(raw: str) -> tuple[float | None, str | None]:
|
||||
"""Parse un float et renvoie aussi le token numérique normalisé (avec '.')."""
|
||||
if raw is None:
|
||||
return None, None
|
||||
s = str(raw).strip()
|
||||
m = re.search(r"(-?\d+(?:[\.,]\d+)?)", s)
|
||||
if not m:
|
||||
return None, None
|
||||
token = m.group(1).replace(",", ".")
|
||||
try:
|
||||
return float(token), token
|
||||
except ValueError:
|
||||
return None, None
|
||||
|
||||
|
||||
def _sanitize_bio_value(test_name: str, raw_value: str, sanity_cfg: dict) -> tuple[str, float, str, str | None] | None:
|
||||
"""Applique des garde-fous anti-artefacts (OCR/PDF).
|
||||
|
||||
Retour:
|
||||
(token, value_float, quality, reason) ou None si non parsable.
|
||||
quality: ok | suspect | discarded
|
||||
"""
|
||||
bio_patterns = [
|
||||
(r"[Ll]ipas[ée]mie\s*(?:[àa=:])?\s*(\d+)\s*(?:UI/L|U/L)?", "Lipasémie", None),
|
||||
(r"CRP\s*[=:àa]?\s*(\d+(?:[.,]\d+)?)\s*(?:mg/[Ll])?", "CRP", None),
|
||||
(r"(?:ASAT|TGO)\s*[=:àa]?\s*([\d.,]+)\s*(?:N|U(?:I)?/L)?", "ASAT", None),
|
||||
(r"(?:ALAT|TGP)\s*[=:àa]?\s*([\d.,]+)\s*(?:N|U(?:I)?/L)?", "ALAT", None),
|
||||
(r"GGT\s*[=:àa]?\s*(\d+)\s*(?:U(?:I)?/L)?", "GGT", None),
|
||||
(r"PAL\s*[=:àa]?\s*(\d+)\s*(?:U(?:I)?/L)?", "PAL", None),
|
||||
(r"[Bb]ilirubine\s+(?:totale\s+)?[àa=:]\s*(\d+(?:[.,]\d+)?)\s*(?:µmol/L|mg/dL)?", "Bilirubine totale", None),
|
||||
(r"[Tt]roponine\s+(?:us\s+)?(n[ée]gative|positive|normale)", "Troponine", None),
|
||||
(r"(?:[Hh][ée]moglobine|Hb)\s*[=:àa]?\s*(\d+(?:[.,]\d+)?)\s*(?:g/dL|g/L)?", "Hémoglobine", None),
|
||||
(r"[Pp]laquettes?\s*[=:àa]?\s*(\d+(?:\s*000)?)\s*(?:/mm3|G/L)?", "Plaquettes", None),
|
||||
(r"[Ll]eucocytes?\s*[=:àa]?\s*(\d+(?:\s*000)?)\s*(?:/mm3|G/L)?", "Leucocytes", None),
|
||||
(r"[Cc]r[ée]atinine?\s*[=:àa]?\s*(\d+(?:[.,]\d+)?)\s*(?:µmol/L|mg/dL)?", "Créatinine", None),
|
||||
val, token = _parse_float_and_token(raw_value)
|
||||
if val is None or token is None:
|
||||
return None
|
||||
|
||||
key = _norm_key(test_name)
|
||||
tests_cfg = (sanity_cfg or {}).get("tests") or {}
|
||||
cfg = tests_cfg.get(key) or {}
|
||||
hard_min = cfg.get("hard_min")
|
||||
hard_max = cfg.get("hard_max")
|
||||
|
||||
if hard_min is not None and val < float(hard_min):
|
||||
return token, val, "discarded", f"Valeur hors bornes plausibles (<{hard_min})"
|
||||
if hard_max is not None and val > float(hard_max):
|
||||
return token, val, "discarded", f"Valeur hors bornes plausibles (>{hard_max})"
|
||||
|
||||
quality = "ok"
|
||||
reason: str | None = None
|
||||
|
||||
suspect_cfg = cfg.get("suspect") or {}
|
||||
single_digit_over = suspect_cfg.get("single_digit_over")
|
||||
if single_digit_over is not None:
|
||||
# Ex: potassium '8' au lieu de '4.8' (décimale perdue)
|
||||
if re.fullmatch(r"\d", str(raw_value).strip()) and val >= float(single_digit_over):
|
||||
quality = "suspect"
|
||||
reason = f"Valeur à 1 chiffre (possible décimale perdue) : vérifier dans le CR"
|
||||
|
||||
return token, val, quality, reason
|
||||
|
||||
|
||||
def _extract_biologie(text: str, dossier: DossierMedical) -> None:
|
||||
"""Extrait des résultats biologiques clés.
|
||||
|
||||
Notes:
|
||||
- Supporte des aliases (TGO/TGP, Hb, Na/K…)
|
||||
- Capte plusieurs occurrences (utile pour valider/infirmer des diagnostics)
|
||||
- Reste volontairement *simple* (regex sur texte extrait) : si une valeur est
|
||||
uniquement dans un tableau PDF mal extrait, elle peut manquer.
|
||||
"""
|
||||
# (pattern, test_name)
|
||||
bio_patterns: list[tuple[str, str]] = [
|
||||
(r"[Ll]ipas[ée]mie\s*(?:[àa=:])?\s*(\d+)\s*(?:UI/L|U/L)?", "Lipasémie"),
|
||||
(r"\bCRP\b\s*[=:àa]?\s*(\d+(?:[.,]\d+)?)\s*(?:mg/[Ll])?", "CRP"),
|
||||
(r"(?:\bASAT\b|\bTGO\b)\s*[=:àa]?\s*([\d.,]+)\s*(?:N|U(?:I)?/L)?", "ASAT"),
|
||||
(r"(?:\bALAT\b|\bTGP\b)\s*[=:àa]?\s*([\d.,]+)\s*(?:N|U(?:I)?/L)?", "ALAT"),
|
||||
(r"\bGGT\b\s*[=:àa]?\s*(\d+)\s*(?:U(?:I)?/L)?", "GGT"),
|
||||
(r"\bPAL\b\s*[=:àa]?\s*(\d+)\s*(?:U(?:I)?/L)?", "PAL"),
|
||||
(r"[Bb]ilirubine\s+(?:totale\s+)?[àa=:]\s*(\d+(?:[.,]\d+)?)\s*(?:µmol/L|mg/dL)?", "Bilirubine totale"),
|
||||
|
||||
# Ionogramme / électrolytes
|
||||
(r"(?:[Ss]odium|[Nn]atr[ée]mie|(?<![A-Za-z])Na\+?(?![A-Za-z]))\s*[=:àa]?\s*([0-9]{2,3}(?:[.,][0-9]+)?)\s*(?:mmol/L|mEq/L)?", "Sodium"),
|
||||
(r"(?:[Pp]otassium|[Kk]ali[ée]mie|(?<![A-Za-z])K\+?(?![A-Za-z]))\s*[=:àa]?\s*([0-9](?:[.,][0-9]+)?)\s*(?:mmol/L|mEq/L)?", "Potassium"),
|
||||
|
||||
(r"[Tt]roponine\s+(?:us\s+)?(n[ée]gative|positive|normale)", "Troponine"),
|
||||
(r"(?:[Hh][ée]moglobine|\bHb\b)\s*[=:àa]?\s*(\d+(?:[.,]\d+)?)\s*(?:g/dL|g/L)?", "Hémoglobine"),
|
||||
(r"[Pp]laquettes?\s*[=:àa]?\s*(\d+(?:[.,]\d+)?)\s*(?:/mm3|G/L)?", "Plaquettes"),
|
||||
(r"[Ll]eucocytes?\s*[=:àa]?\s*(\d+(?:[.,]\d+)?)\s*(?:/mm3|G/L)?", "Leucocytes"),
|
||||
(r"[Cc]r[ée]atinine?\s*[=:àa]?\s*(\d+(?:[.,]\d+)?)\s*(?:µmol/L|mg/dL)?", "Créatinine"),
|
||||
]
|
||||
|
||||
for pattern, test_name, _ in bio_patterns:
|
||||
m = re.search(pattern, text)
|
||||
if m:
|
||||
value = m.group(1)
|
||||
anomalie = _is_abnormal(test_name, value)
|
||||
dossier.biologie_cle.append(BiologieCle(
|
||||
test=test_name,
|
||||
valeur=value,
|
||||
anomalie=anomalie,
|
||||
))
|
||||
|
||||
# Anti-doublons + limite par test (évite d'exploser le JSON)
|
||||
max_per_test = 6
|
||||
counts: dict[str, int] = {}
|
||||
seen: set[tuple[str, str]] = set()
|
||||
|
||||
sanity_cfg = load_lab_value_sanity()
|
||||
policy = (sanity_cfg or {}).get("policy") or {}
|
||||
drop_out_of_range = bool(policy.get("drop_out_of_range", True))
|
||||
keep_suspect = bool(policy.get("keep_suspect", True))
|
||||
|
||||
for pattern, test_name in bio_patterns:
|
||||
for m in re.finditer(pattern, text):
|
||||
raw_value = (m.group(1) or "").strip()
|
||||
if not raw_value:
|
||||
continue
|
||||
|
||||
# Valeurs qualitatives (troponine négative/positive/normale) :
|
||||
# pas de sanitization numérique.
|
||||
if re.fullmatch(r"[a-zA-Zéèêëàâôûùïîç]+", raw_value):
|
||||
key = (test_name, raw_value.lower())
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
counts[test_name] = counts.get(test_name, 0) + 1
|
||||
if counts[test_name] > max_per_test:
|
||||
break
|
||||
anomalie = _is_abnormal(test_name, raw_value)
|
||||
dossier.biologie_cle.append(
|
||||
BiologieCle(
|
||||
test=test_name,
|
||||
valeur=raw_value,
|
||||
valeur_num=None,
|
||||
anomalie=anomalie,
|
||||
quality="ok",
|
||||
discard_reason=None,
|
||||
)
|
||||
)
|
||||
continue
|
||||
|
||||
sanitized = _sanitize_bio_value(test_name, raw_value, sanity_cfg)
|
||||
if sanitized is None:
|
||||
continue
|
||||
token, val_num, quality, reason = sanitized
|
||||
|
||||
if quality == "suspect" and not keep_suspect:
|
||||
quality = "discarded"
|
||||
reason = reason or "Valeur suspecte (policy keep_suspect=false)"
|
||||
|
||||
# Déduplication sur la valeur normalisée
|
||||
key = (test_name, token)
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
|
||||
counts[test_name] = counts.get(test_name, 0) + 1
|
||||
if counts[test_name] > max_per_test:
|
||||
break
|
||||
|
||||
if quality == "discarded":
|
||||
# On garde la trace pour audit, sans polluer les règles qualité.
|
||||
dossier.biologie_discarded.append(
|
||||
{
|
||||
"test": test_name,
|
||||
"raw": raw_value,
|
||||
"valeur": token,
|
||||
"valeur_num": val_num,
|
||||
"reason": reason,
|
||||
}
|
||||
)
|
||||
if drop_out_of_range:
|
||||
continue
|
||||
|
||||
anomalie = _is_abnormal(test_name, token)
|
||||
dossier.biologie_cle.append(
|
||||
BiologieCle(
|
||||
test=test_name,
|
||||
valeur=token,
|
||||
valeur_num=val_num,
|
||||
anomalie=anomalie,
|
||||
quality=quality,
|
||||
discard_reason=reason,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
|
||||
def _extract_imagerie(text: str, dossier: DossierMedical) -> None:
|
||||
@@ -1013,6 +1159,9 @@ BIO_NORMALS: dict[str, tuple[float, float]] = {
|
||||
"GGT": (0, 60),
|
||||
"PAL": (0, 150),
|
||||
"Bilirubine totale": (0, 17),
|
||||
# Ionogramme (fallback adulte ; les règles de décision utilisent reference_ranges.yaml)
|
||||
"Sodium": (135, 145),
|
||||
"Potassium": (3.5, 5.0),
|
||||
"Hémoglobine": (12, 17),
|
||||
"Plaquettes": (150, 400),
|
||||
"Leucocytes": (4, 10),
|
||||
@@ -1152,36 +1301,11 @@ def _validate_justifications(dossier: DossierMedical) -> None:
|
||||
ctx = build_enriched_context(dossier)
|
||||
ctx_str = format_enriched_context(ctx)
|
||||
|
||||
prompt = f"""Tu es un médecin DIM contrôleur qualité PMSI.
|
||||
Vérifie la cohérence et la justification de ce codage complet.
|
||||
|
||||
DOSSIER CLINIQUE :
|
||||
{ctx_str}
|
||||
|
||||
CODAGE À VALIDER :
|
||||
{codes_section}
|
||||
|
||||
Pour CHAQUE code, vérifie :
|
||||
1. Existe-t-il une preuve clinique concrète dans le dossier ?
|
||||
2. Le code est-il le plus spécifique possible ?
|
||||
3. Y a-t-il des conflits ou redondances avec d'autres codes ?
|
||||
|
||||
Réponds avec un JSON :
|
||||
{{
|
||||
"validations": [
|
||||
{{
|
||||
"numero": 1,
|
||||
"code": "X99.9",
|
||||
"verdict": "maintenir|reclasser|supprimer",
|
||||
"confidence_recommandee": "high|medium|low",
|
||||
"commentaire": "explication courte"
|
||||
}}
|
||||
],
|
||||
"alertes_globales": ["..."]
|
||||
}}"""
|
||||
from ..prompts import QC_VALIDATION
|
||||
prompt = QC_VALIDATION.format(ctx_str=ctx_str, codes_section=codes_section)
|
||||
|
||||
try:
|
||||
result = call_ollama(prompt, temperature=0.1, max_tokens=2500)
|
||||
result = call_ollama(prompt, temperature=0.1, max_tokens=2500, role="qc")
|
||||
except Exception:
|
||||
logger.warning("Erreur lors de l'appel Ollama pour validation QC", exc_info=True)
|
||||
return
|
||||
|
||||
Reference in New Issue
Block a user