feat: architecture multi-modèles LLM + quality engine + benchmark
- Multi-modèles : 4 rôles LLM (coding=gemma3:27b-cloud, cpam=gemma3:27b-cloud, validation=deepseek-v3.2:cloud, qc=gemma3:12b) avec get_model(role) - Prompts externalisés : 7 templates dans src/prompts/templates.py - Cache Ollama : modèle stocké par entrée (migration auto ancien format) - call_ollama() : paramètre role= (priorité: model > role > global) - Quality engine : veto_engine + decision_engine + rules_router (YAML) - Benchmark qualité : scripts/benchmark_quality.py (A/B, métriques CIM-10) - Fix biologie : valeurs qualitatives (troponine négative) non filtrées - Fix CPAM : gemma3:27b-cloud au lieu de deepseek (JSON tronqué par thinking) - CPAM max_tokens 4000→6000, viewer admin multi-modèles - Benchmark 10 dossiers : 100% DAS valides, 10/10 CPAM, 243s/dossier Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
267
src/main.py
267
src/main.py
@@ -10,7 +10,19 @@ import time
|
||||
from pathlib import Path
|
||||
|
||||
from .anonymization.anonymizer import Anonymizer
|
||||
from .config import ANONYMIZED_DIR, INPUT_DIR, OUTPUT_DIR, REPORTS_DIR, STRUCTURED_DIR, AnonymizationReport, DossierMedical
|
||||
from .config import (
|
||||
ANONYMIZED_DIR,
|
||||
INPUT_DIR,
|
||||
OUTPUT_DIR,
|
||||
REPORTS_DIR,
|
||||
STRUCTURED_DIR,
|
||||
AnonymizationReport,
|
||||
DossierMedical,
|
||||
DossierMetrics,
|
||||
VetoReport,
|
||||
set_rules_runtime,
|
||||
reset_rules_runtime,
|
||||
)
|
||||
from .extraction.document_classifier import classify
|
||||
from .extraction.crh_parser import parse_crh
|
||||
from .extraction.document_splitter import split_documents
|
||||
@@ -18,6 +30,9 @@ from .extraction.pdf_extractor import extract_text, extract_text_with_pages
|
||||
from .extraction.trackare_parser import parse_trackare
|
||||
from .medical.cim10_extractor import extract_medical_info
|
||||
from .medical.ghm import estimate_ghm
|
||||
from .quality.veto_engine import apply_vetos
|
||||
from .quality.decision_engine import apply_decisions, decision_summaries
|
||||
from .quality.rules_router import build_rules_runtime_context
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
@@ -25,6 +40,102 @@ logging.basicConfig(
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _compute_metrics(dossier: DossierMedical) -> DossierMetrics:
|
||||
"""Calcule les métriques "actifs vs écartés" pour reporting.
|
||||
|
||||
Règle pro : les métriques (GHM, sévérité, stats) ne doivent compter
|
||||
que les diagnostics *actifs* (pas ceux écartés par décisions).
|
||||
"""
|
||||
|
||||
def _has_any_code(diag) -> bool:
|
||||
return bool(getattr(diag, "cim10_final", None) or getattr(diag, "cim10_suggestion", None))
|
||||
|
||||
def _is_active_diag(diag) -> bool:
|
||||
dec = getattr(diag, "cim10_decision", None)
|
||||
if dec is not None and getattr(dec, "action", None) == "REMOVE":
|
||||
return False
|
||||
if getattr(diag, "cim10_final", None):
|
||||
return True
|
||||
# Backward compat : si pas de final + suggestion et pas de décision
|
||||
if getattr(diag, "cim10_final", None) is None and getattr(diag, "cim10_suggestion", None) and dec is None:
|
||||
return True
|
||||
return False
|
||||
|
||||
das_total = len(dossier.diagnostics_associes)
|
||||
das_active = 0
|
||||
das_removed = 0
|
||||
das_no_code = 0
|
||||
for d in dossier.diagnostics_associes:
|
||||
dec = getattr(d, "cim10_decision", None)
|
||||
if dec is not None and getattr(dec, "action", None) == "REMOVE":
|
||||
das_removed += 1
|
||||
if not _has_any_code(d):
|
||||
das_no_code += 1
|
||||
if _is_active_diag(d):
|
||||
das_active += 1
|
||||
|
||||
actes_total = len(dossier.actes_ccam)
|
||||
actes_with_code = sum(1 for a in dossier.actes_ccam if getattr(a, "code_ccam_suggestion", None))
|
||||
|
||||
dp_has_code = False
|
||||
if dossier.diagnostic_principal is not None:
|
||||
dp = dossier.diagnostic_principal
|
||||
dp_dec = getattr(dp, "cim10_decision", None)
|
||||
if not (dp_dec is not None and getattr(dp_dec, "action", None) == "REMOVE"):
|
||||
dp_has_code = bool(getattr(dp, "cim10_final", None) or getattr(dp, "cim10_suggestion", None))
|
||||
|
||||
metrics = DossierMetrics(
|
||||
das_total=das_total,
|
||||
das_active=das_active,
|
||||
das_excluded=max(0, das_total - das_active),
|
||||
das_removed=das_removed,
|
||||
das_no_code=das_no_code,
|
||||
actes_total=actes_total,
|
||||
actes_with_code=actes_with_code,
|
||||
dp_has_code=dp_has_code,
|
||||
)
|
||||
dossier.metrics = metrics
|
||||
return metrics
|
||||
|
||||
|
||||
def _inject_veto_alerts(dossier: DossierMedical, veto: VetoReport, scope: str = "FINAL") -> None:
|
||||
"""Injecte les alertes liées aux vetos dans alertes_codage en évitant les doublons.
|
||||
|
||||
On *remplace* la section VETO précédente (qu'elle vienne d'un PDF individuel ou d'une passe de fusion),
|
||||
afin que le JSON fusionné reste lisible.
|
||||
"""
|
||||
cleaned: list[str] = []
|
||||
for line in (dossier.alertes_codage or []):
|
||||
if isinstance(line, str) and (line.startswith("VETOS:") or line.startswith("VETOS[") or line.startswith("VETO-")):
|
||||
continue
|
||||
cleaned.append(line)
|
||||
dossier.alertes_codage = cleaned
|
||||
|
||||
if veto.verdict != "PASS":
|
||||
dossier.alertes_codage.append(f"VETOS[{scope}]: {veto.verdict} (score={veto.score_contestabilite})")
|
||||
for it in veto.issues[:25]:
|
||||
dossier.alertes_codage.append(f"{it.veto} [{it.severity}] {it.where}: {it.message}")
|
||||
|
||||
|
||||
def _inject_decision_alerts(dossier: DossierMedical, scope: str = "FINAL") -> None:
|
||||
"""Injecte les décisions (downgrade/suppression) dans alertes_codage.
|
||||
|
||||
On remplace la section DECISION précédente pour garder un JSON lisible.
|
||||
"""
|
||||
cleaned: list[str] = []
|
||||
for line in (dossier.alertes_codage or []):
|
||||
if isinstance(line, str) and line.startswith("DECISION:"):
|
||||
continue
|
||||
cleaned.append(line)
|
||||
dossier.alertes_codage = cleaned
|
||||
|
||||
lines = decision_summaries(dossier)
|
||||
if lines:
|
||||
dossier.alertes_codage.append(f"DECISIONS[{scope}]: {len(lines)} ligne(s)")
|
||||
dossier.alertes_codage.extend(lines[:30])
|
||||
|
||||
|
||||
# Flags globaux
|
||||
_use_edsnlp = True
|
||||
_use_rag = True
|
||||
@@ -89,17 +200,67 @@ def process_pdf(pdf_path: Path) -> list[tuple[str, DossierMedical, Anonymization
|
||||
dossier.source_file = pdf_path.name
|
||||
dossier.document_type = doc_type
|
||||
logger.info(" DP%s : %s", part_label, dossier.diagnostic_principal)
|
||||
logger.info(" DAS : %d, Actes : %d", len(dossier.diagnostics_associes), len(dossier.actes_ccam))
|
||||
|
||||
# 8. Estimation GHM
|
||||
# 8. Vetos (contestabilité) + décisions (post-traitement)
|
||||
# Routage des règles (packs) : par défaut, on garde le socle vetos/decisions,
|
||||
# et on active des packs additionnels selon les signaux du dossier (codes/labs/extraits).
|
||||
rules_token = None
|
||||
try:
|
||||
rules_ctx = build_rules_runtime_context(dossier)
|
||||
dossier.rules_runtime = rules_ctx
|
||||
rules_token = set_rules_runtime(rules_ctx)
|
||||
|
||||
packs = ",".join(rules_ctx.get("enabled_packs", []))
|
||||
if packs:
|
||||
logger.info(" Règles%s : packs=%s", part_label, packs)
|
||||
if rules_ctx.get("triggers_fired"):
|
||||
logger.info(" Règles%s : triggers=%s", part_label, ",".join(rules_ctx["triggers_fired"]))
|
||||
except Exception:
|
||||
logger.warning(" Routage règles : erreur", exc_info=True)
|
||||
|
||||
veto = None
|
||||
try:
|
||||
veto = apply_vetos(dossier)
|
||||
dossier.veto_report = veto
|
||||
except Exception:
|
||||
logger.warning(" Vetos : erreur lors du contrôle", exc_info=True)
|
||||
|
||||
try:
|
||||
apply_decisions(dossier)
|
||||
_inject_decision_alerts(dossier, scope="PDF")
|
||||
if veto is not None:
|
||||
_inject_veto_alerts(dossier, veto, scope="PDF")
|
||||
except Exception:
|
||||
logger.warning(" Décisions : erreur lors du post-traitement", exc_info=True)
|
||||
finally:
|
||||
if rules_token is not None:
|
||||
reset_rules_runtime(rules_token)
|
||||
|
||||
# 9. Estimation GHM (sur codes finaux) + métriques (actifs vs écartés)
|
||||
try:
|
||||
metrics = _compute_metrics(dossier)
|
||||
ghm = estimate_ghm(dossier)
|
||||
dossier.ghm_estimation = ghm
|
||||
logger.info(" GHM : CMD=%s, Type=%s, Sévérité=%d → %s",
|
||||
ghm.cmd or "?", ghm.type_ghm or "?",
|
||||
ghm.severite, ghm.ghm_approx or "?")
|
||||
|
||||
logger.info(
|
||||
" DAS : actifs=%d / total=%d (écartés=%d, removed=%d, no_code=%d) | Actes : %d (avec code=%d)",
|
||||
metrics.das_active,
|
||||
metrics.das_total,
|
||||
metrics.das_excluded,
|
||||
metrics.das_removed,
|
||||
metrics.das_no_code,
|
||||
metrics.actes_total,
|
||||
metrics.actes_with_code,
|
||||
)
|
||||
logger.info(
|
||||
" GHM : CMD=%s, Type=%s, Sévérité=%d → %s",
|
||||
ghm.cmd or "?",
|
||||
ghm.type_ghm or "?",
|
||||
ghm.severite,
|
||||
ghm.ghm_approx or "?",
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(" Erreur estimation GHM", exc_info=True)
|
||||
logger.warning(" Erreur estimation GHM/metrics", exc_info=True)
|
||||
|
||||
dossier.processing_time_s = round(time.time() - t0, 2)
|
||||
results.append((anonymized_text, dossier, report))
|
||||
@@ -223,6 +384,11 @@ def main(input_path: str | None = None) -> None:
|
||||
action="store_true",
|
||||
help="Forcer la reconstruction de l'index FAISS",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--rebuild-index-all",
|
||||
action="store_true",
|
||||
help="Reconstruit les index FAISS + ré-indexe tous les référentiels uploadés",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--export-rum",
|
||||
action="store_true",
|
||||
@@ -246,6 +412,24 @@ def main(input_path: str | None = None) -> None:
|
||||
logger.info("Dictionnaire CCAM : %d codes générés", len(result))
|
||||
return
|
||||
|
||||
if args.rebuild_index_all:
|
||||
from .medical.rag_index import build_index
|
||||
build_index(force=True)
|
||||
# Ré-indexer tous les référentiels uploadés (pour appliquer le nouveau chunking/filtrage)
|
||||
try:
|
||||
from .viewer.referentiels import ReferentielManager
|
||||
rm = ReferentielManager()
|
||||
total = 0
|
||||
for ref in rm.list_all():
|
||||
try:
|
||||
total += rm.index_referentiel(ref["id"])
|
||||
except Exception:
|
||||
logger.warning("Ré-indexation référentiel échouée : %s", ref.get("filename"), exc_info=True)
|
||||
logger.info("Ré-indexation référentiels terminée : %d chunks ajoutés", total)
|
||||
except Exception:
|
||||
logger.warning("Impossible de ré-indexer les référentiels uploadés", exc_info=True)
|
||||
return
|
||||
|
||||
if args.rebuild_index:
|
||||
from .medical.rag_index import build_index
|
||||
build_index(force=True)
|
||||
@@ -341,16 +525,6 @@ def main(input_path: str | None = None) -> None:
|
||||
from .medical.fusion import merge_dossiers
|
||||
merged = merge_dossiers(group_dossiers)
|
||||
|
||||
# Re-estimer le GHM sur le dossier fusionné (DP/DAS consolidés)
|
||||
try:
|
||||
ghm = estimate_ghm(merged)
|
||||
merged.ghm_estimation = ghm
|
||||
logger.info(" GHM fusionné : CMD=%s, Type=%s, Sévérité=%d → %s",
|
||||
ghm.cmd or "?", ghm.type_ghm or "?",
|
||||
ghm.severite, ghm.ghm_approx or "?")
|
||||
except Exception:
|
||||
logger.warning(" Erreur estimation GHM fusionné", exc_info=True)
|
||||
|
||||
struct_dir = STRUCTURED_DIR / subdir
|
||||
struct_dir.mkdir(parents=True, exist_ok=True)
|
||||
merged_path = struct_dir / f"{subdir}_fusionne_cim10.json"
|
||||
@@ -389,6 +563,65 @@ def main(input_path: str | None = None) -> None:
|
||||
# Écrire le dossier fusionné (après enrichissement CPAM éventuel)
|
||||
if merged is not None and subdir:
|
||||
try:
|
||||
# Vetos sur la version finale (fusion + CPAM) + décisions
|
||||
# Routage des règles (packs) pour la version fusionnée
|
||||
rules_token = None
|
||||
try:
|
||||
rules_ctx = build_rules_runtime_context(merged)
|
||||
merged.rules_runtime = rules_ctx
|
||||
rules_token = set_rules_runtime(rules_ctx)
|
||||
|
||||
packs = ",".join(rules_ctx.get("enabled_packs", []))
|
||||
if packs:
|
||||
logger.info(" Règles fusionné : packs=%s", packs)
|
||||
if rules_ctx.get("triggers_fired"):
|
||||
logger.info(" Règles fusionné : triggers=%s", ",".join(rules_ctx["triggers_fired"]))
|
||||
except Exception:
|
||||
logger.warning(" Routage règles fusionné : erreur", exc_info=True)
|
||||
|
||||
veto = None
|
||||
try:
|
||||
veto = apply_vetos(merged)
|
||||
merged.veto_report = veto
|
||||
except Exception:
|
||||
logger.warning(" Vetos fusionné : erreur lors du contrôle", exc_info=True)
|
||||
|
||||
try:
|
||||
apply_decisions(merged)
|
||||
_inject_decision_alerts(merged, scope="FINAL")
|
||||
if veto is not None:
|
||||
_inject_veto_alerts(merged, veto, scope="FINAL")
|
||||
except Exception:
|
||||
logger.warning(" Décisions fusionné : erreur lors du post-traitement", exc_info=True)
|
||||
finally:
|
||||
if rules_token is not None:
|
||||
reset_rules_runtime(rules_token)
|
||||
|
||||
# Re-estimer le GHM (sur codes finaux) + métriques (actifs vs écartés)
|
||||
try:
|
||||
metrics = _compute_metrics(merged)
|
||||
ghm = estimate_ghm(merged)
|
||||
merged.ghm_estimation = ghm
|
||||
logger.info(
|
||||
" Fusion métriques : DAS actifs=%d / total=%d (écartés=%d, removed=%d, no_code=%d) | Actes=%d (avec code=%d)",
|
||||
metrics.das_active,
|
||||
metrics.das_total,
|
||||
metrics.das_excluded,
|
||||
metrics.das_removed,
|
||||
metrics.das_no_code,
|
||||
metrics.actes_total,
|
||||
metrics.actes_with_code,
|
||||
)
|
||||
logger.info(
|
||||
" GHM final : CMD=%s, Type=%s, Sévérité=%d → %s",
|
||||
ghm.cmd or "?",
|
||||
ghm.type_ghm or "?",
|
||||
ghm.severite,
|
||||
ghm.ghm_approx or "?",
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(" Erreur estimation GHM/metrics final", exc_info=True)
|
||||
|
||||
struct_dir = STRUCTURED_DIR / subdir
|
||||
struct_dir.mkdir(parents=True, exist_ok=True)
|
||||
merged_path = struct_dir / f"{subdir}_fusionne_cim10.json"
|
||||
|
||||
Reference in New Issue
Block a user