feat: mode hybride Ollama — gemma3:27b pour CPAM, 12b pour codage
Le pipeline utilise désormais gemma3:12b (rapide) pour le codage CIM-10 et gemma3:27b (meilleur raisonnement) pour la contre-argumentation CPAM. Configurable via OLLAMA_MODEL_CPAM et OLLAMA_TIMEOUT_CPAM. Inclut aussi : traçabilité source/page DAS, niveaux CMA ATIH, sévérité, page tracker PDF, améliorations fusion et filtres DAS. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -36,7 +36,9 @@ NER_CONFIDENCE_THRESHOLD = float(os.environ.get("T2A_NER_THRESHOLD", "0.80"))
|
||||
|
||||
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
||||
OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "gemma3:12b")
|
||||
OLLAMA_MODEL_CPAM = os.environ.get("OLLAMA_MODEL_CPAM", "gemma3:27b")
|
||||
OLLAMA_TIMEOUT = int(os.environ.get("OLLAMA_TIMEOUT", "120"))
|
||||
OLLAMA_TIMEOUT_CPAM = int(os.environ.get("OLLAMA_TIMEOUT_CPAM", "300"))
|
||||
OLLAMA_CACHE_PATH = BASE_DIR / "data" / "ollama_cache.json"
|
||||
OLLAMA_MAX_PARALLEL = int(os.environ.get("OLLAMA_MAX_PARALLEL", "2"))
|
||||
|
||||
@@ -55,6 +57,7 @@ UPLOAD_MAX_SIZE_MB = 50
|
||||
ALLOWED_EXTENSIONS = {".pdf", ".csv", ".xlsx", ".xls", ".txt"}
|
||||
CIM10_DICT_PATH = BASE_DIR / "data" / "cim10_dict.json"
|
||||
CIM10_SUPPLEMENTS_PATH = BASE_DIR / "data" / "cim10_supplements.json"
|
||||
CMA_LEVELS_PATH = BASE_DIR / "data" / "cma_levels.json"
|
||||
CCAM_DICT_PATH = BASE_DIR / "data" / "ccam_dict.json"
|
||||
CIM10_PDF = Path(os.environ.get("T2A_CIM10_PDF", "/home/dom/ai/aivanov_CIM/cim-10-fr_2026_a_usage_pmsi_version_provisoire_111225.pdf"))
|
||||
GUIDE_METHODO_PDF = Path(os.environ.get("T2A_GUIDE_METHODO_PDF", "/home/dom/ai/aivanov_CIM/guide_methodo_mco_2026_version_provisoire.pdf"))
|
||||
@@ -101,7 +104,10 @@ class Diagnostic(BaseModel):
|
||||
est_cma: Optional[bool] = None
|
||||
est_cms: Optional[bool] = None
|
||||
niveau_severite: Optional[str] = None # "leger" | "modere" | "severe" | "non_evalue"
|
||||
niveau_cma: Optional[int] = None # 1 (pas CMA) | 2 | 3 | 4 (niveau officiel ATIH)
|
||||
source: Optional[str] = None # "trackare" | "edsnlp" | "regex" | "llm_das"
|
||||
source_page: Optional[int] = None # numéro de page (1-indexed) dans le PDF source
|
||||
source_excerpt: Optional[str] = None # extrait du texte source (~200 chars)
|
||||
|
||||
|
||||
class ActeCCAM(BaseModel):
|
||||
|
||||
@@ -4,8 +4,8 @@ from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from ..config import ControleCPAM, DossierMedical, RAGSource
|
||||
from ..medical.ollama_client import call_ollama
|
||||
from ..config import ControleCPAM, DossierMedical, RAGSource, OLLAMA_MODEL_CPAM, OLLAMA_TIMEOUT_CPAM
|
||||
from ..medical.ollama_client import call_anthropic, call_ollama
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -244,33 +244,84 @@ CONSIGNES :
|
||||
|
||||
AXE MÉDICAL :
|
||||
- Analyse le bien-fondé médical du codage de l'établissement
|
||||
- CITE les éléments cliniques EXACTS du dossier : valeurs bio précises (ex: CRP 180 mg/L), résultats imagerie verbatim, traitements avec molécules et posologies
|
||||
- Confronte l'argumentation CPAM aux sources CIM-10 et Guide Méthodologique fournies
|
||||
- Identifie les points où la CPAM a éventuellement raison
|
||||
- Ne mentionne que les éléments réellement présents dans le dossier fourni
|
||||
|
||||
AXE ASYMÉTRIE D'INFORMATION :
|
||||
- La CPAM a fondé son analyse uniquement sur le CRH et les codes transmis
|
||||
- Démontre en quoi les éléments cliniques complémentaires (biologie, imagerie, traitements, actes) justifient le codage contesté
|
||||
- Pour chaque élément clinique pertinent, explique pourquoi il invalide ou nuance l'argumentation CPAM
|
||||
- Pour CHAQUE élément clinique pertinent, cite les VALEURS EXACTES et explique leur signification clinique
|
||||
- Démontre en quoi ces éléments complémentaires (biologie, imagerie, traitements, actes) justifient le codage contesté
|
||||
- Ne mentionne AUCUN élément qui n'est pas dans le dossier fourni
|
||||
|
||||
AXE RÉGLEMENTAIRE :
|
||||
- Identifie si l'UCR fait une interprétation restrictive non fondée d'une règle
|
||||
- Confronte le raisonnement CPAM au texte EXACT des sources fournies
|
||||
- Format OBLIGATOIRE pour chaque référence : [Document - page N] suivi d'une CITATION VERBATIM du passage pertinent
|
||||
- INTERDICTION ABSOLUE de citer une référence qui ne figure pas dans les sources fournies ci-dessus
|
||||
- Si aucune source pertinente n'est disponible → écrire explicitement "Pas de source réglementaire disponible"
|
||||
- Relève les contradictions entre l'argumentation CPAM et les règles officielles
|
||||
- NE CITE AUCUNE référence qui ne figure pas dans les sources fournies
|
||||
|
||||
Réponds UNIQUEMENT avec un objet JSON au format suivant :
|
||||
{{
|
||||
"analyse_contestation": "Résumé de ce que conteste la CPAM et sur quelle base",
|
||||
"points_accord": "Points où la CPAM a raison (ou 'Aucun')",
|
||||
"contre_arguments_medicaux": "Argumentation médicale en faveur du codage",
|
||||
"preuves_dossier": [
|
||||
{{"element": "biologie|imagerie|traitement|acte|clinique", "valeur": "valeur exacte du dossier", "signification": "explication clinique"}}
|
||||
],
|
||||
"contre_arguments_asymetrie": "Éléments cliniques que la CPAM n'avait pas et qui justifient le codage",
|
||||
"contre_arguments_reglementaires": "Erreurs d'interprétation réglementaire de la CPAM, avec citations des sources",
|
||||
"references": "Références EXACTES tirées des sources fournies (document, page, code)",
|
||||
"contre_arguments_reglementaires": "Erreurs d'interprétation réglementaire de la CPAM, avec citations verbatim des sources",
|
||||
"references": [
|
||||
{{"document": "nom du document source", "page": "numéro de page", "citation": "citation verbatim du passage"}}
|
||||
],
|
||||
"conclusion": "Synthèse et position recommandée"
|
||||
}}"""
|
||||
|
||||
|
||||
def _format_response(parsed: dict) -> str:
|
||||
def _validate_references(parsed: dict, sources: list[dict]) -> list[str]:
|
||||
"""Vérifie que les références citées correspondent aux sources RAG fournies.
|
||||
|
||||
Returns:
|
||||
Liste d'avertissements pour les références non vérifiables.
|
||||
"""
|
||||
warnings = []
|
||||
refs = parsed.get("references")
|
||||
if not refs or not isinstance(refs, list):
|
||||
return warnings
|
||||
|
||||
# Construire un set des documents sources disponibles
|
||||
source_docs = set()
|
||||
for src in sources:
|
||||
doc_name = src.get("document", "")
|
||||
source_docs.add(doc_name)
|
||||
# Ajouter les noms lisibles aussi
|
||||
readable = {
|
||||
"cim10": "CIM-10 FR 2026",
|
||||
"cim10_alpha": "CIM-10 Index Alphabétique 2026",
|
||||
"guide_methodo": "Guide Méthodologique MCO 2026",
|
||||
"ccam": "CCAM PMSI V4 2025",
|
||||
}.get(doc_name, "")
|
||||
if readable:
|
||||
source_docs.add(readable)
|
||||
source_docs.add(readable.lower())
|
||||
|
||||
if not source_docs:
|
||||
return warnings
|
||||
|
||||
for ref in refs:
|
||||
if not isinstance(ref, dict):
|
||||
continue
|
||||
doc = ref.get("document", "")
|
||||
if doc and not any(sd in doc.lower() or doc.lower() in sd.lower() for sd in source_docs if sd):
|
||||
warnings.append(f"Référence non vérifiable : {doc}")
|
||||
logger.warning("CPAM : référence non vérifiable « %s »", doc)
|
||||
|
||||
return warnings
|
||||
|
||||
|
||||
def _format_response(parsed: dict, ref_warnings: list[str] | None = None) -> str:
|
||||
"""Formate la réponse LLM en texte lisible."""
|
||||
sections = []
|
||||
|
||||
@@ -287,6 +338,19 @@ def _format_response(parsed: dict) -> str:
|
||||
if contre_med:
|
||||
sections.append(f"CONTRE-ARGUMENTS MÉDICAUX\n{contre_med}")
|
||||
|
||||
# Preuves du dossier (nouveau champ structuré)
|
||||
preuves = parsed.get("preuves_dossier")
|
||||
if preuves and isinstance(preuves, list):
|
||||
preuves_lines = []
|
||||
for p in preuves:
|
||||
if isinstance(p, dict):
|
||||
elem = p.get("element", "")
|
||||
valeur = p.get("valeur", "")
|
||||
signif = p.get("signification", "")
|
||||
preuves_lines.append(f"- [{elem}] {valeur} → {signif}")
|
||||
if preuves_lines:
|
||||
sections.append(f"PREUVES DU DOSSIER\n" + "\n".join(preuves_lines))
|
||||
|
||||
contre_asym = parsed.get("contre_arguments_asymetrie")
|
||||
if contre_asym:
|
||||
sections.append(f"ASYMÉTRIE D'INFORMATION\n{contre_asym}")
|
||||
@@ -301,14 +365,33 @@ def _format_response(parsed: dict) -> str:
|
||||
if contre:
|
||||
sections.append(f"CONTRE-ARGUMENTS\n{contre}")
|
||||
|
||||
# Références structurées (nouveau format liste) ou ancien format string
|
||||
refs = parsed.get("references")
|
||||
if refs:
|
||||
if isinstance(refs, list):
|
||||
ref_lines = []
|
||||
for r in refs:
|
||||
if isinstance(r, dict):
|
||||
doc = r.get("document", "")
|
||||
page = r.get("page", "")
|
||||
citation = r.get("citation", "")
|
||||
ref_lines.append(f"- [{doc}, p.{page}] {citation}")
|
||||
else:
|
||||
ref_lines.append(f"- {r}")
|
||||
if ref_lines:
|
||||
sections.append(f"REFERENCES\n" + "\n".join(ref_lines))
|
||||
else:
|
||||
sections.append(f"REFERENCES\n{refs}")
|
||||
|
||||
conclusion = parsed.get("conclusion")
|
||||
if conclusion:
|
||||
sections.append(f"CONCLUSION\n{conclusion}")
|
||||
|
||||
# Avertissements sur les références non vérifiables
|
||||
if ref_warnings:
|
||||
warning_text = "\n".join(f"- {w}" for w in ref_warnings)
|
||||
sections.append(f"AVERTISSEMENT — REFERENCES NON VÉRIFIÉES\n{warning_text}")
|
||||
|
||||
return "\n\n".join(sections)
|
||||
|
||||
|
||||
@@ -335,7 +418,23 @@ def generate_cpam_response(
|
||||
# 2. Construction du prompt
|
||||
prompt = _build_cpam_prompt(dossier, controle, sources)
|
||||
|
||||
# 3. Appel Ollama
|
||||
# 3. Appel LLM — Mode hybride : Ollama CPAM (27b) > Haiku > Ollama défaut
|
||||
result = None
|
||||
if OLLAMA_MODEL_CPAM:
|
||||
logger.info(" Contre-argumentation via Ollama %s (mode hybride)", OLLAMA_MODEL_CPAM)
|
||||
result = call_ollama(
|
||||
prompt, temperature=0.1, max_tokens=4000,
|
||||
model=OLLAMA_MODEL_CPAM, timeout=OLLAMA_TIMEOUT_CPAM,
|
||||
)
|
||||
if result is not None:
|
||||
logger.info(" Contre-argumentation via Ollama %s", OLLAMA_MODEL_CPAM)
|
||||
else:
|
||||
logger.info(" Ollama CPAM indisponible → fallback Anthropic Haiku")
|
||||
result = call_anthropic(prompt, temperature=0.1, max_tokens=4000)
|
||||
if result is not None:
|
||||
logger.info(" Contre-argumentation via Anthropic Haiku")
|
||||
else:
|
||||
logger.info(" Haiku indisponible → fallback Ollama défaut")
|
||||
result = call_ollama(prompt, temperature=0.1, max_tokens=3000)
|
||||
|
||||
# 4. Conversion des sources RAG
|
||||
@@ -350,11 +449,16 @@ def generate_cpam_response(
|
||||
]
|
||||
|
||||
if result is None:
|
||||
logger.warning(" Ollama non disponible — contre-argumentation non générée")
|
||||
logger.warning(" LLM non disponible — contre-argumentation non générée")
|
||||
return "", rag_sources
|
||||
|
||||
# 5. Formater la réponse
|
||||
text = _format_response(result)
|
||||
# 5. Validation des références
|
||||
ref_warnings = _validate_references(result, sources)
|
||||
if ref_warnings:
|
||||
logger.warning(" CPAM : %d référence(s) non vérifiable(s)", len(ref_warnings))
|
||||
|
||||
# 6. Formater la réponse
|
||||
text = _format_response(result, ref_warnings)
|
||||
logger.info(" Contre-argumentation générée (%d caractères)", len(text))
|
||||
|
||||
return text, rag_sources
|
||||
|
||||
91
src/extraction/page_tracker.py
Normal file
91
src/extraction/page_tracker.py
Normal file
@@ -0,0 +1,91 @@
|
||||
"""Suivi des pages sources pour la traçabilité des diagnostics.
|
||||
|
||||
Permet de retrouver la page d'origine et l'extrait de texte correspondant
|
||||
à un diagnostic extrait du PDF.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class PageTracker:
|
||||
"""Associe chaque position de caractère au numéro de page source.
|
||||
|
||||
Args:
|
||||
page_offsets: Liste de tuples (start, end) pour chaque page (0-indexed dans la liste).
|
||||
"""
|
||||
|
||||
def __init__(self, page_offsets: list[tuple[int, int]]):
|
||||
self._offsets = page_offsets
|
||||
|
||||
def char_to_page(self, char_pos: int) -> int:
|
||||
"""Retourne le numéro de page (1-indexed) pour une position de caractère."""
|
||||
for i, (start, end) in enumerate(self._offsets):
|
||||
if start <= char_pos < end:
|
||||
return i + 1
|
||||
# Si au-delà de la dernière page, retourner la dernière
|
||||
if self._offsets:
|
||||
return len(self._offsets)
|
||||
return 1
|
||||
|
||||
def find_page_for_text(self, text: str, full_text: str) -> Optional[int]:
|
||||
"""Cherche le texte dans full_text et retourne la page (1-indexed).
|
||||
|
||||
Effectue une recherche case-insensitive si la recherche exacte échoue.
|
||||
"""
|
||||
if not text or not full_text:
|
||||
return None
|
||||
|
||||
# Recherche exacte
|
||||
pos = full_text.find(text)
|
||||
if pos >= 0:
|
||||
return self.char_to_page(pos)
|
||||
|
||||
# Recherche case-insensitive
|
||||
pos = full_text.lower().find(text.lower())
|
||||
if pos >= 0:
|
||||
return self.char_to_page(pos)
|
||||
|
||||
# Recherche partielle (premiers 50 chars)
|
||||
short = text[:50].strip()
|
||||
if len(short) >= 10:
|
||||
pos = full_text.lower().find(short.lower())
|
||||
if pos >= 0:
|
||||
return self.char_to_page(pos)
|
||||
|
||||
return None
|
||||
|
||||
def extract_excerpt(
|
||||
self, text: str, full_text: str, context_chars: int = 100,
|
||||
) -> Optional[str]:
|
||||
"""Extrait le contexte autour du texte trouvé (~200 chars).
|
||||
|
||||
Returns:
|
||||
Extrait avec contexte, ou None si le texte n'est pas trouvé.
|
||||
"""
|
||||
if not text or not full_text:
|
||||
return None
|
||||
|
||||
# Recherche (exacte puis case-insensitive)
|
||||
pos = full_text.find(text)
|
||||
if pos < 0:
|
||||
pos = full_text.lower().find(text.lower())
|
||||
if pos < 0:
|
||||
short = text[:50].strip()
|
||||
if len(short) >= 10:
|
||||
pos = full_text.lower().find(short.lower())
|
||||
if pos < 0:
|
||||
return None
|
||||
|
||||
start = max(0, pos - context_chars)
|
||||
end = min(len(full_text), pos + len(text) + context_chars)
|
||||
|
||||
excerpt = full_text[start:end].strip()
|
||||
# Ajouter des ellipses
|
||||
if start > 0:
|
||||
excerpt = "..." + excerpt
|
||||
if end < len(full_text):
|
||||
excerpt = excerpt + "..."
|
||||
|
||||
return excerpt
|
||||
@@ -3,9 +3,12 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import pdfplumber
|
||||
|
||||
from .page_tracker import PageTracker
|
||||
|
||||
|
||||
def extract_text(pdf_path: str | Path) -> str:
|
||||
"""Extrait le texte de toutes les pages d'un PDF."""
|
||||
@@ -17,6 +20,33 @@ def extract_text(pdf_path: str | Path) -> str:
|
||||
return "\n\n".join(pages_text)
|
||||
|
||||
|
||||
def extract_text_with_pages(pdf_path: str | Path) -> tuple[str, PageTracker]:
|
||||
"""Extrait le texte avec un tracker de pages pour la traçabilité.
|
||||
|
||||
Returns:
|
||||
(texte_complet, page_tracker) où page_tracker permet de retrouver
|
||||
la page source de chaque position de caractère.
|
||||
"""
|
||||
pages_text: list[str] = []
|
||||
with pdfplumber.open(pdf_path) as pdf:
|
||||
for page in pdf.pages:
|
||||
text = page.extract_text() or ""
|
||||
pages_text.append(text)
|
||||
|
||||
# Construire le texte complet avec "\n\n" comme séparateur (identique à extract_text)
|
||||
separator = "\n\n"
|
||||
page_offsets: list[tuple[int, int]] = []
|
||||
offset = 0
|
||||
for i, page_text in enumerate(pages_text):
|
||||
start = offset
|
||||
end = offset + len(page_text)
|
||||
page_offsets.append((start, end))
|
||||
offset = end + len(separator)
|
||||
|
||||
full_text = separator.join(pages_text)
|
||||
return full_text, PageTracker(page_offsets)
|
||||
|
||||
|
||||
def extract_pages(pdf_path: str | Path) -> list[str]:
|
||||
"""Extrait le texte page par page."""
|
||||
pages: list[str] = []
|
||||
|
||||
11
src/main.py
11
src/main.py
@@ -14,7 +14,7 @@ from .config import ANONYMIZED_DIR, INPUT_DIR, OUTPUT_DIR, REPORTS_DIR, STRUCTUR
|
||||
from .extraction.document_classifier import classify
|
||||
from .extraction.crh_parser import parse_crh
|
||||
from .extraction.document_splitter import split_documents
|
||||
from .extraction.pdf_extractor import extract_text
|
||||
from .extraction.pdf_extractor import extract_text, extract_text_with_pages
|
||||
from .extraction.trackare_parser import parse_trackare
|
||||
from .medical.cim10_extractor import extract_medical_info
|
||||
from .medical.ghm import estimate_ghm
|
||||
@@ -38,8 +38,8 @@ def process_pdf(pdf_path: Path) -> list[tuple[str, DossierMedical, Anonymization
|
||||
t0 = time.time()
|
||||
logger.info("Traitement de %s", pdf_path.name)
|
||||
|
||||
# 1. Extraction texte
|
||||
raw_text = extract_text(pdf_path)
|
||||
# 1. Extraction texte avec pages
|
||||
raw_text, page_tracker = extract_text_with_pages(pdf_path)
|
||||
logger.info(" Texte extrait : %d caractères", len(raw_text))
|
||||
|
||||
# 2. Classification
|
||||
@@ -82,7 +82,10 @@ def process_pdf(pdf_path: Path) -> list[tuple[str, DossierMedical, Anonymization
|
||||
edsnlp_result = _run_edsnlp(anonymized_text)
|
||||
|
||||
# 7. Extraction médicale CIM-10
|
||||
dossier = extract_medical_info(parsed, anonymized_text, edsnlp_result, use_rag=_use_rag)
|
||||
dossier = extract_medical_info(
|
||||
parsed, anonymized_text, edsnlp_result, use_rag=_use_rag,
|
||||
page_tracker=page_tracker, raw_text=raw_text,
|
||||
)
|
||||
dossier.source_file = pdf_path.name
|
||||
dossier.document_type = doc_type
|
||||
logger.info(" DP%s : %s", part_label, dossier.diagnostic_principal)
|
||||
|
||||
@@ -98,11 +98,21 @@ def extract_medical_info(
|
||||
anonymized_text: str,
|
||||
edsnlp_result: Optional[EdsnlpResult] = None,
|
||||
use_rag: bool = False,
|
||||
page_tracker=None,
|
||||
raw_text: str | None = None,
|
||||
) -> DossierMedical:
|
||||
"""Extrait les informations médicales structurées depuis les données parsées et le texte."""
|
||||
"""Extrait les informations médicales structurées depuis les données parsées et le texte.
|
||||
|
||||
Args:
|
||||
page_tracker: PageTracker pour la traçabilité page/extrait (optionnel).
|
||||
raw_text: Texte brut avant anonymisation (pour recherche page source).
|
||||
"""
|
||||
dossier = DossierMedical()
|
||||
dossier.document_type = parsed_data.get("type", "")
|
||||
|
||||
# Texte de référence pour la recherche de pages (raw_text préféré, sinon anonymized)
|
||||
search_text = raw_text or anonymized_text
|
||||
|
||||
_extract_sejour(parsed_data, dossier)
|
||||
_extract_diagnostics(parsed_data, anonymized_text, dossier, edsnlp_result)
|
||||
_extract_actes(anonymized_text, dossier)
|
||||
@@ -140,6 +150,10 @@ def extract_medical_info(
|
||||
# Post-processing : retirer DAS dont le code est identique au DP
|
||||
_remove_das_equal_dp(dossier)
|
||||
|
||||
# Post-processing : traçabilité source (page + extrait)
|
||||
if page_tracker:
|
||||
_apply_source_tracking(dossier, page_tracker, search_text)
|
||||
|
||||
return dossier
|
||||
|
||||
|
||||
@@ -331,8 +345,10 @@ def _extract_diagnostics(
|
||||
elif edsnlp_codes:
|
||||
# Utiliser la première entité CIM-10 edsnlp comme DP
|
||||
code, texte = next(iter(edsnlp_codes.items()))
|
||||
texte_clean = texte.capitalize()
|
||||
if is_valid_diagnostic_text(texte_clean):
|
||||
dossier.diagnostic_principal = Diagnostic(
|
||||
texte=texte.capitalize(), cim10_suggestion=code,
|
||||
texte=texte_clean, cim10_suggestion=code,
|
||||
source="edsnlp",
|
||||
)
|
||||
|
||||
@@ -881,18 +897,46 @@ def _apply_code_corrections(dossier: DossierMedical) -> None:
|
||||
diag.cim10_suggestion = corrected
|
||||
|
||||
|
||||
def _is_dp_family_redundant(das_code: str, dp_code: str) -> bool:
|
||||
"""True si le DAS est redondant avec le DP (même code, parent/enfant, ou même famille)."""
|
||||
if das_code == dp_code:
|
||||
return True
|
||||
# Relation parent/enfant → toujours redondant
|
||||
das_norm = das_code.replace(".", "")
|
||||
dp_norm = dp_code.replace(".", "")
|
||||
if das_norm.startswith(dp_norm) or dp_norm.startswith(das_norm):
|
||||
return True
|
||||
# Même famille 3 chars, sauf exceptions
|
||||
dp_family = dp_code[:3]
|
||||
if das_code[:3] == dp_family:
|
||||
# S/T (trauma) : sites différents → garder
|
||||
if dp_family[0] in ("S", "T"):
|
||||
return False
|
||||
# E10-E14 (diabète) : complications différentes → garder
|
||||
if dp_family[0] == "E" and dp_family[1:].isdigit() and 10 <= int(dp_family[1:]) <= 14:
|
||||
return False
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _remove_das_equal_dp(dossier: DossierMedical) -> None:
|
||||
"""Retire les DAS dont le code CIM-10 est identique au DP (violation règle PMSI)."""
|
||||
"""Retire les DAS redondants avec le DP (même code, famille, ou sémantique)."""
|
||||
from .das_filter import apply_semantic_dedup
|
||||
|
||||
dp_code = dossier.diagnostic_principal.cim10_suggestion if dossier.diagnostic_principal else None
|
||||
if not dp_code:
|
||||
return
|
||||
before = len(dossier.diagnostics_associes)
|
||||
dossier.diagnostics_associes = [
|
||||
d for d in dossier.diagnostics_associes if d.cim10_suggestion != dp_code
|
||||
d for d in dossier.diagnostics_associes
|
||||
if not d.cim10_suggestion or not _is_dp_family_redundant(d.cim10_suggestion, dp_code)
|
||||
]
|
||||
removed = before - len(dossier.diagnostics_associes)
|
||||
if removed:
|
||||
logger.info(" DAS=DP : %d DAS retiré(s) (code %s identique au DP)", removed, dp_code)
|
||||
logger.info(" DAS≈DP : %d DAS retiré(s) (famille %s du DP)", removed, dp_code[:3])
|
||||
|
||||
# Redondances sémantiques entre DAS
|
||||
dossier.diagnostics_associes = apply_semantic_dedup(dossier.diagnostics_associes)
|
||||
|
||||
|
||||
def _apply_noncumul_rules(dossier: DossierMedical) -> None:
|
||||
@@ -945,3 +989,33 @@ def _is_abnormal(test: str, value: str) -> bool | None:
|
||||
lo, hi = BIO_NORMALS[test]
|
||||
return val > hi or val < lo
|
||||
return None
|
||||
|
||||
|
||||
def _apply_source_tracking(dossier: DossierMedical, page_tracker, search_text: str) -> None:
|
||||
"""Ajoute la traçabilité source (page + extrait) à chaque diagnostic.
|
||||
|
||||
Cherche le texte du diagnostic dans le texte source pour retrouver
|
||||
la page d'origine et extraire un passage contextualisé.
|
||||
"""
|
||||
all_diags: list[Diagnostic] = []
|
||||
if dossier.diagnostic_principal:
|
||||
all_diags.append(dossier.diagnostic_principal)
|
||||
all_diags.extend(dossier.diagnostics_associes)
|
||||
|
||||
tracked = 0
|
||||
for diag in all_diags:
|
||||
if diag.source_page is not None:
|
||||
continue # déjà renseigné
|
||||
|
||||
texte = diag.texte
|
||||
if not texte:
|
||||
continue
|
||||
|
||||
page = page_tracker.find_page_for_text(texte, search_text)
|
||||
if page:
|
||||
diag.source_page = page
|
||||
diag.source_excerpt = page_tracker.extract_excerpt(texte, search_text)
|
||||
tracked += 1
|
||||
|
||||
if tracked:
|
||||
logger.info(" Traçabilité source : %d/%d diagnostics localisés", tracked, len(all_diags))
|
||||
|
||||
@@ -100,6 +100,44 @@ def is_valid_diagnostic_text(text: str) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
# Paires de redondance sémantique CIM-10 en PMSI
|
||||
# Format: (dominated_prefix, dominant_prefixes)
|
||||
# Si un code commençant par dominated_prefix ET un code commençant par un dominant_prefix
|
||||
# sont tous deux en DAS, le dominated est supprimé.
|
||||
SEMANTIC_REDUNDANCIES: list[tuple[str, list[str]]] = [
|
||||
# I10 (HTA essentielle) redondant si I11/I12/I13 présent (cardio/néphropathie hypertensive)
|
||||
("I10", ["I11", "I12", "I13"]),
|
||||
# N30 (cystite) redondant si N39.0 présent (infection urinaire)
|
||||
("N30", ["N39"]),
|
||||
# J18 (pneumonie SAI) redondant si J15/J16 présent (pneumonie spécifique)
|
||||
("J18", ["J15", "J16"]),
|
||||
]
|
||||
|
||||
|
||||
def apply_semantic_dedup(das_list: list) -> list:
|
||||
"""Retire les DAS rendus redondants par la présence d'un code plus spécifique.
|
||||
|
||||
Utilise SEMANTIC_REDUNDANCIES pour déterminer les paires dominé/dominant.
|
||||
Accepte une liste de Diagnostic (avec attribut cim10_suggestion).
|
||||
"""
|
||||
codes_present = {d.cim10_suggestion for d in das_list if d.cim10_suggestion}
|
||||
to_remove: set[str] = set()
|
||||
|
||||
for dominated_prefix, dominant_prefixes in SEMANTIC_REDUNDANCIES:
|
||||
dominated_codes = [c for c in codes_present if c.startswith(dominated_prefix)]
|
||||
if not dominated_codes:
|
||||
continue
|
||||
has_dominant = any(
|
||||
c.startswith(dp) for c in codes_present for dp in dominant_prefixes
|
||||
)
|
||||
if has_dominant:
|
||||
to_remove.update(dominated_codes)
|
||||
|
||||
if not to_remove:
|
||||
return das_list
|
||||
return [d for d in das_list if d.cim10_suggestion not in to_remove]
|
||||
|
||||
|
||||
def correct_known_miscodes(code: str, texte: str) -> str | None:
|
||||
"""Corrige les codes CIM-10 systématiquement mal attribués par le LLM.
|
||||
|
||||
|
||||
@@ -17,6 +17,8 @@ from ..config import (
|
||||
Sejour,
|
||||
Traitement,
|
||||
)
|
||||
from ..medical.das_filter import is_valid_diagnostic_text, apply_semantic_dedup
|
||||
from ..medical.cim10_extractor import _is_dp_family_redundant
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -163,6 +165,14 @@ def merge_dossiers(dossiers: list[DossierMedical]) -> DossierMedical:
|
||||
if len(dossiers) == 1:
|
||||
result = dossiers[0].model_copy(deep=True)
|
||||
result.source_files = [result.source_file]
|
||||
# Appliquer la dédup famille DP + sémantique même pour un seul dossier
|
||||
dp_code = result.diagnostic_principal.cim10_suggestion if result.diagnostic_principal else None
|
||||
if dp_code:
|
||||
result.diagnostics_associes = [
|
||||
d for d in result.diagnostics_associes
|
||||
if not d.cim10_suggestion or not _is_dp_family_redundant(d.cim10_suggestion, dp_code)
|
||||
]
|
||||
result.diagnostics_associes = apply_semantic_dedup(result.diagnostics_associes)
|
||||
return result
|
||||
|
||||
merged = DossierMedical()
|
||||
@@ -181,23 +191,29 @@ def merge_dossiers(dossiers: list[DossierMedical]) -> DossierMedical:
|
||||
for d in dossiers:
|
||||
all_das.extend(d.diagnostics_associes)
|
||||
# Si le DP de ce dossier est différent du DP fusionné, l'ajouter comme DAS
|
||||
# mais seulement si le texte est un diagnostic valide (filtre artefacts OCR)
|
||||
if (
|
||||
d.diagnostic_principal
|
||||
and merged.diagnostic_principal
|
||||
and d.diagnostic_principal.cim10_suggestion
|
||||
!= merged.diagnostic_principal.cim10_suggestion
|
||||
and is_valid_diagnostic_text(d.diagnostic_principal.texte)
|
||||
):
|
||||
all_das.append(d.diagnostic_principal)
|
||||
|
||||
merged.diagnostics_associes = _dedup_diagnostics(all_das)
|
||||
|
||||
# Retirer les DAS dont le code est identique au DP (violation règle PMSI)
|
||||
# Retirer les DAS redondants avec le DP (même code, famille, parent/enfant)
|
||||
dp_code = merged.diagnostic_principal.cim10_suggestion if merged.diagnostic_principal else None
|
||||
if dp_code:
|
||||
merged.diagnostics_associes = [
|
||||
d for d in merged.diagnostics_associes if d.cim10_suggestion != dp_code
|
||||
d for d in merged.diagnostics_associes
|
||||
if not d.cim10_suggestion or not _is_dp_family_redundant(d.cim10_suggestion, dp_code)
|
||||
]
|
||||
|
||||
# Redondances sémantiques entre DAS
|
||||
merged.diagnostics_associes = apply_semantic_dedup(merged.diagnostics_associes)
|
||||
|
||||
# Actes CCAM
|
||||
all_actes: list[ActeCCAM] = []
|
||||
for d in dossiers:
|
||||
|
||||
@@ -141,19 +141,29 @@ def _detect_type_ghm(actes_ccam: list) -> str:
|
||||
def _compute_severity(das_list: list) -> tuple[int, int, int]:
|
||||
"""Calcule le niveau de sévérité à partir des DAS.
|
||||
|
||||
Utilise le max des niveau_cma officiels ATIH quand disponibles,
|
||||
avec fallback sur le comptage CMA/CMS.
|
||||
|
||||
Returns:
|
||||
(niveau, cma_count, cms_count)
|
||||
"""
|
||||
cma_count = 0
|
||||
cms_count = 0
|
||||
max_cma_level = 1
|
||||
|
||||
for das in das_list:
|
||||
niveau_cma = getattr(das, "niveau_cma", None)
|
||||
if niveau_cma and niveau_cma > 1:
|
||||
max_cma_level = max(max_cma_level, niveau_cma)
|
||||
if getattr(das, "est_cma", False):
|
||||
cma_count += 1
|
||||
if getattr(das, "est_cms", False):
|
||||
cms_count += 1
|
||||
|
||||
if cms_count >= 2:
|
||||
# Priorité au niveau CMA officiel ATIH
|
||||
if max_cma_level > 1:
|
||||
niveau = max_cma_level
|
||||
elif cms_count >= 2:
|
||||
niveau = 4
|
||||
elif cms_count >= 1 or cma_count >= 3:
|
||||
niveau = 3
|
||||
|
||||
@@ -34,12 +34,12 @@ def _get_anthropic_client():
|
||||
return None
|
||||
|
||||
|
||||
def _call_anthropic(
|
||||
def call_anthropic(
|
||||
prompt: str,
|
||||
temperature: float = 0.1,
|
||||
max_tokens: int = 2500,
|
||||
) -> dict | None:
|
||||
"""Appelle l'API Anthropic en fallback."""
|
||||
"""Appelle l'API Anthropic (Haiku)."""
|
||||
client = _get_anthropic_client()
|
||||
if client is None:
|
||||
return None
|
||||
@@ -82,6 +82,8 @@ def call_ollama(
|
||||
prompt: str,
|
||||
temperature: float = 0.1,
|
||||
max_tokens: int = 2500,
|
||||
model: str | None = None,
|
||||
timeout: int | None = None,
|
||||
) -> dict | None:
|
||||
"""Appelle Ollama en mode JSON natif, avec fallback Anthropic si indisponible.
|
||||
|
||||
@@ -89,16 +91,20 @@ def call_ollama(
|
||||
prompt: Le prompt à envoyer.
|
||||
temperature: Température de génération (défaut: 0.1).
|
||||
max_tokens: Nombre max de tokens (défaut: 2500).
|
||||
model: Modèle Ollama à utiliser (défaut: OLLAMA_MODEL global).
|
||||
timeout: Timeout en secondes (défaut: OLLAMA_TIMEOUT global).
|
||||
|
||||
Returns:
|
||||
Le dict JSON parsé, ou None en cas d'erreur.
|
||||
"""
|
||||
use_model = model or OLLAMA_MODEL
|
||||
use_timeout = timeout or OLLAMA_TIMEOUT
|
||||
for attempt in range(2):
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{OLLAMA_URL}/api/generate",
|
||||
json={
|
||||
"model": OLLAMA_MODEL,
|
||||
"model": use_model,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"format": "json",
|
||||
@@ -107,7 +113,7 @@ def call_ollama(
|
||||
"num_predict": max_tokens,
|
||||
},
|
||||
},
|
||||
timeout=OLLAMA_TIMEOUT,
|
||||
timeout=use_timeout,
|
||||
)
|
||||
response.raise_for_status()
|
||||
raw = response.json().get("response", "")
|
||||
@@ -115,13 +121,14 @@ def call_ollama(
|
||||
if result is not None:
|
||||
return result
|
||||
if attempt == 0:
|
||||
logger.info("Ollama : retry après échec de parsing")
|
||||
logger.info("Ollama (%s) : retry après échec de parsing", use_model)
|
||||
except requests.ConnectionError:
|
||||
logger.info("Ollama indisponible → fallback Anthropic (%s)", _ANTHROPIC_MODEL)
|
||||
return _call_anthropic(prompt, temperature, max_tokens)
|
||||
return call_anthropic(prompt, temperature, max_tokens)
|
||||
except requests.Timeout:
|
||||
logger.warning("Ollama timeout après %ds → fallback Anthropic", OLLAMA_TIMEOUT)
|
||||
return _call_anthropic(prompt, temperature, max_tokens)
|
||||
logger.warning("Ollama (%s) timeout après %ds → fallback Anthropic",
|
||||
use_model, use_timeout)
|
||||
return call_anthropic(prompt, temperature, max_tokens)
|
||||
except (requests.RequestException, json.JSONDecodeError) as e:
|
||||
logger.warning("Ollama erreur : %s", e)
|
||||
return None
|
||||
|
||||
@@ -6,12 +6,16 @@ Phase 2 (future) : tables CMA/CMS officielles ATIH.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
from .cim10_dict import load_dict, normalize_text
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# --- Marqueurs de sévérité dans le texte ---
|
||||
|
||||
@@ -73,11 +77,34 @@ _HEURISTIC_CMA_ROOTS: set[str] = {
|
||||
}
|
||||
|
||||
|
||||
_cma_levels: dict[str, int] | None = None
|
||||
|
||||
|
||||
def _load_cma_levels() -> dict[str, int]:
|
||||
"""Charge les niveaux CMA officiels depuis data/cma_levels.json (lazy-loaded)."""
|
||||
global _cma_levels
|
||||
if _cma_levels is not None:
|
||||
return _cma_levels
|
||||
from ..config import CMA_LEVELS_PATH
|
||||
try:
|
||||
data = json.loads(CMA_LEVELS_PATH.read_text(encoding="utf-8"))
|
||||
_cma_levels = {k: int(v) for k, v in data.items()}
|
||||
logger.debug("CMA levels chargés : %d codes", len(_cma_levels))
|
||||
except FileNotFoundError:
|
||||
logger.warning("Fichier CMA levels non trouvé : %s", CMA_LEVELS_PATH)
|
||||
_cma_levels = {}
|
||||
except Exception:
|
||||
logger.warning("Erreur chargement CMA levels", exc_info=True)
|
||||
_cma_levels = {}
|
||||
return _cma_levels
|
||||
|
||||
|
||||
@dataclass
|
||||
class SeverityInfo:
|
||||
"""Résultat de l'évaluation de sévérité d'un diagnostic."""
|
||||
est_cma_probable: bool = False
|
||||
niveau_severite: str = "non_evalue" # "leger" | "modere" | "severe" | "non_evalue"
|
||||
niveau_cma: int = 1 # 1 (pas CMA), 2, 3 ou 4 (officiel ATIH)
|
||||
marqueurs_trouves: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
@@ -119,11 +146,14 @@ def _is_heuristic_cma(code: str) -> bool:
|
||||
def evaluate_severity(diagnostic) -> SeverityInfo:
|
||||
"""Évalue la sévérité d'un diagnostic (texte + code CIM-10).
|
||||
|
||||
Utilise en priorité les niveaux CMA officiels ATIH (2/3/4),
|
||||
avec fallback sur l'heuristique par racines CIM-10.
|
||||
|
||||
Args:
|
||||
diagnostic: Objet avec attributs texte, cim10_suggestion.
|
||||
|
||||
Returns:
|
||||
SeverityInfo avec est_cma_probable, niveau_severite, marqueurs_trouves.
|
||||
SeverityInfo avec est_cma_probable, niveau_cma, niveau_severite, marqueurs_trouves.
|
||||
"""
|
||||
info = SeverityInfo()
|
||||
|
||||
@@ -147,12 +177,16 @@ def evaluate_severity(diagnostic) -> SeverityInfo:
|
||||
info.niveau_severite = niveau
|
||||
info.marqueurs_trouves = marqueurs
|
||||
|
||||
# 3. Heuristique CMA basée sur la racine CIM-10
|
||||
if code and _is_heuristic_cma(code):
|
||||
# 3. Lookup officiel CMA ATIH (prioritaire)
|
||||
if code:
|
||||
cma_levels = _load_cma_levels()
|
||||
official_level = cma_levels.get(code)
|
||||
if official_level:
|
||||
info.niveau_cma = official_level
|
||||
info.est_cma_probable = True
|
||||
|
||||
# Un diagnostic sévère avec un code CMA-probable = forte indication
|
||||
if niveau == "severe" and info.est_cma_probable:
|
||||
elif _is_heuristic_cma(code):
|
||||
# Fallback heuristique → niveau 2
|
||||
info.niveau_cma = 2
|
||||
info.est_cma_probable = True
|
||||
|
||||
return info
|
||||
@@ -176,6 +210,7 @@ def enrich_dossier_severity(dp, das_list: list) -> tuple[list[str], int, int]:
|
||||
if dp and dp.cim10_suggestion:
|
||||
info = evaluate_severity(dp)
|
||||
dp.niveau_severite = info.niveau_severite
|
||||
dp.niveau_cma = info.niveau_cma
|
||||
if info.est_cma_probable:
|
||||
dp.est_cma = True
|
||||
|
||||
@@ -187,15 +222,16 @@ def enrich_dossier_severity(dp, das_list: list) -> tuple[list[str], int, int]:
|
||||
continue
|
||||
info = evaluate_severity(das)
|
||||
das.niveau_severite = info.niveau_severite
|
||||
das.niveau_cma = info.niveau_cma
|
||||
if info.est_cma_probable:
|
||||
das.est_cma = True
|
||||
cma_count += 1
|
||||
# CMS = CMA sévère
|
||||
if info.niveau_severite == "severe":
|
||||
# CMS = CMA niveau 4 ou CMA sévère
|
||||
if info.niveau_cma >= 4 or info.niveau_severite == "severe":
|
||||
das.est_cms = True
|
||||
cms_count += 1
|
||||
alertes.append(
|
||||
f"CMA probable : '{das.texte}' ({das.cim10_suggestion}) — "
|
||||
f"CMA niveau {info.niveau_cma} : '{das.texte}' ({das.cim10_suggestion}) — "
|
||||
f"sévérité {info.niveau_severite}"
|
||||
+ (f", marqueurs : {', '.join(info.marqueurs_trouves)}" if info.marqueurs_trouves else "")
|
||||
)
|
||||
|
||||
@@ -305,6 +305,13 @@ _SEVERITY_STYLES = {
|
||||
"leger": ("Léger", "#065f46", "#d1fae5"),
|
||||
}
|
||||
|
||||
_CMA_LEVEL_STYLES = {
|
||||
1: ("1", "#6b7280", "#f3f4f6"), # gris — pas CMA
|
||||
2: ("2", "#065f46", "#d1fae5"), # vert
|
||||
3: ("3", "#92400e", "#fef3c7"), # jaune/orange
|
||||
4: ("4", "#dc2626", "#fee2e2"), # rouge
|
||||
}
|
||||
|
||||
|
||||
def format_duration(seconds: float | None) -> str:
|
||||
"""Formate une durée en secondes vers un format lisible (ex: 2min 30s)."""
|
||||
@@ -330,13 +337,24 @@ def severity_badge(value: str | None) -> Markup:
|
||||
)
|
||||
|
||||
|
||||
def cma_level_badge(value: int | None) -> Markup:
|
||||
"""Badge CMA niveau 1-4 avec couleurs graduées."""
|
||||
if value is None or value < 1:
|
||||
return Markup("")
|
||||
level = min(value, 4)
|
||||
label, fg, bg = _CMA_LEVEL_STYLES.get(level, _CMA_LEVEL_STYLES[1])
|
||||
title = {1: "Pas CMA", 2: "CMA niveau 2", 3: "CMA niveau 3", 4: "CMA niveau 4"}.get(level, "")
|
||||
return Markup(
|
||||
f'<span title="{title}" style="display:inline-block;padding:2px 8px;border-radius:9999px;'
|
||||
f'font-size:0.75rem;font-weight:600;color:{fg};background:{bg}">'
|
||||
f'CMA {label}</span>'
|
||||
)
|
||||
|
||||
|
||||
def format_dossier_name(name: str) -> str:
|
||||
"""Transforme un nom de dossier en nom lisible (ex: 15_23096332 → Dossier 23096332)."""
|
||||
"""Retourne le nom complet du dossier (ex: 1_23096332)."""
|
||||
if name == "racine":
|
||||
return "Non classés"
|
||||
m = re.match(r"^\d+_(\d+)$", name)
|
||||
if m:
|
||||
return f"Dossier {m.group(1)}"
|
||||
return name
|
||||
|
||||
|
||||
@@ -364,6 +382,7 @@ def create_app() -> Flask:
|
||||
app.jinja_env.filters["confidence_badge"] = confidence_badge
|
||||
app.jinja_env.filters["confidence_label"] = confidence_label
|
||||
app.jinja_env.filters["severity_badge"] = severity_badge
|
||||
app.jinja_env.filters["cma_level_badge"] = cma_level_badge
|
||||
app.jinja_env.filters["format_duration"] = format_duration
|
||||
app.jinja_env.filters["format_dossier_name"] = format_dossier_name
|
||||
app.jinja_env.filters["format_doc_name"] = format_doc_name
|
||||
@@ -445,13 +464,16 @@ def create_app() -> Flask:
|
||||
return jsonify({"error": f"PDF source '{source_file}' introuvable"}), 404
|
||||
|
||||
try:
|
||||
anonymized_text, new_dossier, report = process_pdf(pdf_path)
|
||||
pdf_results = process_pdf(pdf_path)
|
||||
stem = pdf_path.stem.replace(" ", "_")
|
||||
subdir = None
|
||||
if pdf_path.parent != input_dir:
|
||||
subdir = pdf_path.parent.name
|
||||
write_outputs(stem, anonymized_text, new_dossier, report, subdir=subdir)
|
||||
return jsonify({"ok": True, "message": "Traitement terminé"})
|
||||
multi = len(pdf_results) > 1
|
||||
for part_idx, (anonymized_text, new_dossier, report) in enumerate(pdf_results):
|
||||
part_stem = f"{stem}_part{part_idx + 1}" if multi else stem
|
||||
write_outputs(part_stem, anonymized_text, new_dossier, report, subdir=subdir)
|
||||
return jsonify({"ok": True, "message": f"Traitement terminé ({len(pdf_results)} dossier(s))"})
|
||||
except Exception as e:
|
||||
logger.exception("Erreur lors du retraitement")
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
@@ -18,8 +18,8 @@
|
||||
.sidebar {
|
||||
width: 280px;
|
||||
min-width: 280px;
|
||||
background: #0f172a;
|
||||
color: #cbd5e1;
|
||||
background: #1e293b;
|
||||
color: #e2e8f0;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
position: fixed;
|
||||
@@ -30,16 +30,16 @@
|
||||
}
|
||||
.sidebar-header {
|
||||
padding: 1.25rem 1rem;
|
||||
border-bottom: 1px solid #1e293b;
|
||||
border-bottom: 1px solid #334155;
|
||||
}
|
||||
.sidebar-header h1 {
|
||||
font-size: 1.1rem;
|
||||
color: #e2e8f0;
|
||||
color: #f1f5f9;
|
||||
font-weight: 700;
|
||||
}
|
||||
.sidebar-header p {
|
||||
font-size: 0.75rem;
|
||||
color: #64748b;
|
||||
color: #94a3b8;
|
||||
margin-top: 0.25rem;
|
||||
}
|
||||
.sidebar-nav {
|
||||
@@ -52,15 +52,16 @@
|
||||
font-size: 0.65rem;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.08em;
|
||||
color: #475569;
|
||||
color: #94a3b8;
|
||||
font-weight: 700;
|
||||
}
|
||||
.sidebar-nav a {
|
||||
display: block;
|
||||
padding: 0.4rem 1rem;
|
||||
color: #94a3b8;
|
||||
color: #cbd5e1;
|
||||
text-decoration: none;
|
||||
font-size: 0.8rem;
|
||||
font-weight: 600;
|
||||
border-left: 3px solid transparent;
|
||||
transition: all 0.15s;
|
||||
white-space: nowrap;
|
||||
@@ -68,45 +69,45 @@
|
||||
text-overflow: ellipsis;
|
||||
}
|
||||
.sidebar-nav a:hover {
|
||||
color: #e2e8f0;
|
||||
background: #1e293b;
|
||||
color: #f8fafc;
|
||||
background: #334155;
|
||||
border-left-color: #3b82f6;
|
||||
}
|
||||
.sidebar-nav a.sidebar-fusionne {
|
||||
color: #60a5fa;
|
||||
font-weight: 600;
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
/* Search */
|
||||
.sidebar-search {
|
||||
padding: 0.75rem 1rem 0.5rem;
|
||||
border-bottom: 1px solid #1e293b;
|
||||
border-bottom: 1px solid #334155;
|
||||
}
|
||||
.sidebar-search input {
|
||||
width: 100%;
|
||||
padding: 0.45rem 0.6rem;
|
||||
border-radius: 6px;
|
||||
border: 1px solid #334155;
|
||||
background: #1e293b;
|
||||
border: 1px solid #475569;
|
||||
background: #0f172a;
|
||||
color: #e2e8f0;
|
||||
font-size: 0.8rem;
|
||||
outline: none;
|
||||
transition: border-color 0.15s;
|
||||
}
|
||||
.sidebar-search input::placeholder { color: #475569; }
|
||||
.sidebar-search input::placeholder { color: #64748b; }
|
||||
.sidebar-search input:focus { border-color: #3b82f6; }
|
||||
|
||||
/* Admin section */
|
||||
.sidebar-admin {
|
||||
padding: 1rem;
|
||||
border-top: 1px solid #1e293b;
|
||||
border-top: 1px solid #334155;
|
||||
font-size: 0.8rem;
|
||||
}
|
||||
.sidebar-admin label {
|
||||
display: block;
|
||||
margin-bottom: 0.35rem;
|
||||
font-weight: 600;
|
||||
color: #94a3b8;
|
||||
color: #cbd5e1;
|
||||
font-size: 0.7rem;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.05em;
|
||||
@@ -115,8 +116,8 @@
|
||||
width: 100%;
|
||||
padding: 0.4rem;
|
||||
border-radius: 6px;
|
||||
border: 1px solid #334155;
|
||||
background: #1e293b;
|
||||
border: 1px solid #475569;
|
||||
background: #0f172a;
|
||||
color: #e2e8f0;
|
||||
font-size: 0.8rem;
|
||||
margin-bottom: 0.5rem;
|
||||
@@ -145,7 +146,6 @@
|
||||
margin-left: 280px;
|
||||
flex: 1;
|
||||
padding: 2rem;
|
||||
max-width: 1100px;
|
||||
}
|
||||
|
||||
/* Utilities */
|
||||
@@ -249,17 +249,17 @@
|
||||
<nav class="sidebar-nav" id="sidebar-nav">
|
||||
{% block sidebar %}{% endblock %}
|
||||
</nav>
|
||||
<div class="sidebar-admin" style="border-top:1px solid #1e293b;padding:0.5rem 1rem;">
|
||||
<a href="/dashboard" style="display:block;color:#94a3b8;text-decoration:none;font-size:0.8rem;padding:0.35rem 0;transition:color 0.15s;"
|
||||
onmouseover="this.style.color='#e2e8f0'" onmouseout="this.style.color='#94a3b8'">
|
||||
<div class="sidebar-admin" style="border-top:1px solid #334155;padding:0.5rem 1rem;">
|
||||
<a href="/dashboard" style="display:block;color:#cbd5e1;text-decoration:none;font-size:0.8rem;font-weight:600;padding:0.35rem 0;transition:color 0.15s;"
|
||||
onmouseover="this.style.color='#f8fafc'" onmouseout="this.style.color='#cbd5e1'">
|
||||
Dashboard
|
||||
</a>
|
||||
<a href="/cpam" style="display:block;color:#94a3b8;text-decoration:none;font-size:0.8rem;padding:0.35rem 0;transition:color 0.15s;"
|
||||
onmouseover="this.style.color='#e2e8f0'" onmouseout="this.style.color='#94a3b8'">
|
||||
<a href="/cpam" style="display:block;color:#cbd5e1;text-decoration:none;font-size:0.8rem;font-weight:600;padding:0.35rem 0;transition:color 0.15s;"
|
||||
onmouseover="this.style.color='#f8fafc'" onmouseout="this.style.color='#cbd5e1'">
|
||||
Contrôles CPAM
|
||||
</a>
|
||||
<a href="/admin/referentiels" style="display:block;color:#94a3b8;text-decoration:none;font-size:0.8rem;padding:0.35rem 0;transition:color 0.15s;"
|
||||
onmouseover="this.style.color='#e2e8f0'" onmouseout="this.style.color='#94a3b8'">
|
||||
<a href="/admin/referentiels" style="display:block;color:#cbd5e1;text-decoration:none;font-size:0.8rem;font-weight:600;padding:0.35rem 0;transition:color 0.15s;"
|
||||
onmouseover="this.style.color='#f8fafc'" onmouseout="this.style.color='#cbd5e1'">
|
||||
Référentiels RAG
|
||||
</a>
|
||||
</div>
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
{% if siblings %}
|
||||
<div class="group-title" style="margin-top:1rem;">{{ current_group }}</div>
|
||||
{% for sib in siblings %}
|
||||
<a href="/dossier/{{ sib.path_rel }}" {% if sib.path_rel == filepath %}style="color:#e2e8f0;border-left-color:#3b82f6;background:#1e293b;"{% endif %}>
|
||||
<a href="/dossier/{{ sib.path_rel }}" {% if sib.path_rel == filepath %}style="color:#f8fafc;border-left-color:#3b82f6;background:#334155;"{% endif %}>
|
||||
{{ sib.name }}
|
||||
</a>
|
||||
{% endfor %}
|
||||
@@ -213,7 +213,11 @@
|
||||
{% if dp.cim10_suggestion %}
|
||||
<span class="badge" style="background:#dbeafe;color:#1d4ed8;font-size:0.85rem;">{{ dp.cim10_suggestion }}</span>
|
||||
{{ dp.cim10_confidence | confidence_badge }}
|
||||
{% if dp.est_cma %}<span class="badge" style="background:#fee2e2;color:#dc2626;font-size:0.75rem;">CMA</span>{% endif %}
|
||||
{% if dp.niveau_cma and dp.niveau_cma > 1 %}
|
||||
{{ dp.niveau_cma | cma_level_badge }}
|
||||
{% elif dp.est_cma %}
|
||||
<span class="badge" style="background:#fee2e2;color:#dc2626;font-size:0.75rem;">CMA</span>
|
||||
{% endif %}
|
||||
{{ dp.niveau_severite | severity_badge }}
|
||||
{% endif %}
|
||||
{% if dp.justification %}
|
||||
@@ -242,22 +246,40 @@
|
||||
<div class="card section">
|
||||
<h3>Diagnostics associés ({{ dossier.diagnostics_associes|length }})</h3>
|
||||
<table>
|
||||
<thead><tr><th>Texte</th><th>CIM-10</th><th>Confiance</th><th>Sévérité</th><th>Justification</th></tr></thead>
|
||||
<thead><tr><th>Texte</th><th>CIM-10</th><th>Confiance</th><th>CMA</th><th>Source</th><th>Justification</th></tr></thead>
|
||||
<tbody>
|
||||
{% for das in dossier.diagnostics_associes %}
|
||||
<tr>
|
||||
<td>
|
||||
{{ das.texte }}
|
||||
{% if das.est_cma %}<span class="badge" style="background:#fee2e2;color:#dc2626;font-size:0.7rem;margin-left:0.3rem;">CMA</span>{% endif %}
|
||||
</td>
|
||||
<td>{{ das.texte }}</td>
|
||||
<td>{% if das.cim10_suggestion %}<span class="badge" style="background:#dbeafe;color:#1d4ed8;">{{ das.cim10_suggestion }}</span>{% endif %}</td>
|
||||
<td>{{ das.cim10_confidence | confidence_badge }}</td>
|
||||
<td>{{ das.niveau_severite | severity_badge }}</td>
|
||||
<td>
|
||||
{% if das.niveau_cma and das.niveau_cma > 1 %}
|
||||
{{ das.niveau_cma | cma_level_badge }}
|
||||
{% elif das.est_cma %}
|
||||
<span class="badge" style="background:#fee2e2;color:#dc2626;font-size:0.7rem;">CMA</span>
|
||||
{% else %}
|
||||
—
|
||||
{% endif %}
|
||||
</td>
|
||||
<td>
|
||||
{% if das.source %}
|
||||
<span class="badge" style="background:#e0e7ff;color:#3730a3;font-size:0.7rem;">{{ das.source }}</span>
|
||||
{% endif %}
|
||||
{% if das.source_page %}
|
||||
<span style="font-size:0.7rem;color:#64748b;">p.{{ das.source_page }}</span>
|
||||
{% endif %}
|
||||
{% if das.source_excerpt %}
|
||||
<details style="margin-top:0.2rem;"><summary style="font-size:0.7rem;color:#94a3b8;cursor:pointer;">extrait</summary>
|
||||
<pre style="font-size:0.7rem;white-space:pre-wrap;max-width:300px;color:#475569;">{{ das.source_excerpt }}</pre>
|
||||
</details>
|
||||
{% endif %}
|
||||
</td>
|
||||
<td style="font-size:0.8rem;color:#475569;">{{ das.justification or '' }}</td>
|
||||
</tr>
|
||||
{% if das.raisonnement %}
|
||||
<tr>
|
||||
<td colspan="5" style="padding:0 0.75rem 0.5rem;">
|
||||
<td colspan="6" style="padding:0 0.75rem 0.5rem;">
|
||||
<details>
|
||||
<summary>Raisonnement LLM</summary>
|
||||
<pre>{{ das.raisonnement }}</pre>
|
||||
@@ -267,7 +289,7 @@
|
||||
{% endif %}
|
||||
{% if das.sources_rag %}
|
||||
<tr>
|
||||
<td colspan="5" style="padding:0 0.75rem 0.5rem;">
|
||||
<td colspan="6" style="padding:0 0.75rem 0.5rem;">
|
||||
<details>
|
||||
<summary>Sources RAG ({{ das.sources_rag|length }})</summary>
|
||||
{% for src in das.sources_rag %}
|
||||
|
||||
@@ -19,6 +19,7 @@ from src.control.cpam_response import (
|
||||
_build_cpam_prompt,
|
||||
_format_response,
|
||||
_search_rag_for_control,
|
||||
_validate_references,
|
||||
generate_cpam_response,
|
||||
)
|
||||
|
||||
@@ -173,6 +174,31 @@ class TestBuildPrompt:
|
||||
assert "contre_arguments_asymetrie" in prompt
|
||||
assert "contre_arguments_reglementaires" in prompt
|
||||
|
||||
def test_prompt_contains_cite_exacts(self):
|
||||
"""Le prompt renforcé demande des preuves exactes."""
|
||||
dossier = _make_dossier()
|
||||
controle = _make_controle()
|
||||
prompt = _build_cpam_prompt(dossier, controle, [])
|
||||
|
||||
assert "CITE" in prompt
|
||||
assert "EXACTS" in prompt
|
||||
|
||||
def test_prompt_contains_interdiction(self):
|
||||
"""Le prompt interdit les références inventées."""
|
||||
dossier = _make_dossier()
|
||||
controle = _make_controle()
|
||||
prompt = _build_cpam_prompt(dossier, controle, [])
|
||||
|
||||
assert "INTERDICTION ABSOLUE" in prompt
|
||||
|
||||
def test_prompt_contains_preuves_dossier_field(self):
|
||||
"""Le format JSON demandé inclut preuves_dossier."""
|
||||
dossier = _make_dossier()
|
||||
controle = _make_controle()
|
||||
prompt = _build_cpam_prompt(dossier, controle, [])
|
||||
|
||||
assert "preuves_dossier" in prompt
|
||||
|
||||
|
||||
class TestFormatResponse:
|
||||
def test_full_response_new_format(self):
|
||||
@@ -236,11 +262,94 @@ class TestFormatResponse:
|
||||
text = _format_response({})
|
||||
assert text == ""
|
||||
|
||||
def test_preuves_dossier_formatting(self):
|
||||
"""Le nouveau champ preuves_dossier est formaté correctement."""
|
||||
parsed = {
|
||||
"contre_arguments_medicaux": "Arguments...",
|
||||
"preuves_dossier": [
|
||||
{"element": "biologie", "valeur": "CRP 180 mg/L", "signification": "inflammation sévère"},
|
||||
{"element": "imagerie", "valeur": "lithiase cholédocienne", "signification": "confirme le diagnostic"},
|
||||
],
|
||||
"conclusion": "Conclusion...",
|
||||
}
|
||||
text = _format_response(parsed)
|
||||
|
||||
assert "PREUVES DU DOSSIER" in text
|
||||
assert "CRP 180 mg/L" in text
|
||||
assert "[biologie]" in text
|
||||
assert "[imagerie]" in text
|
||||
|
||||
def test_structured_references_formatting(self):
|
||||
"""Les références structurées sont formatées correctement."""
|
||||
parsed = {
|
||||
"contre_arguments_medicaux": "Arguments...",
|
||||
"references": [
|
||||
{"document": "Guide Méthodologique MCO 2026", "page": "64", "citation": "Le DAS doit être..."},
|
||||
],
|
||||
"conclusion": "Conclusion...",
|
||||
}
|
||||
text = _format_response(parsed)
|
||||
|
||||
assert "REFERENCES" in text
|
||||
assert "Guide Méthodologique MCO 2026" in text
|
||||
assert "p.64" in text
|
||||
assert "Le DAS doit être..." in text
|
||||
|
||||
def test_ref_warnings_appended(self):
|
||||
"""Les avertissements de références non vérifiées apparaissent."""
|
||||
parsed = {"conclusion": "Conclusion..."}
|
||||
warnings = ["Référence non vérifiable : Manuel Imaginaire 2025"]
|
||||
text = _format_response(parsed, ref_warnings=warnings)
|
||||
|
||||
assert "AVERTISSEMENT" in text
|
||||
assert "Manuel Imaginaire 2025" in text
|
||||
|
||||
|
||||
class TestValidateReferences:
|
||||
def test_valid_reference_no_warning(self):
|
||||
parsed = {
|
||||
"references": [
|
||||
{"document": "Guide Méthodologique MCO 2026", "page": "64", "citation": "..."},
|
||||
]
|
||||
}
|
||||
sources = [{"document": "guide_methodo", "page": 64, "extrait": "..."}]
|
||||
warnings = _validate_references(parsed, sources)
|
||||
assert len(warnings) == 0
|
||||
|
||||
def test_invented_reference_detected(self):
|
||||
parsed = {
|
||||
"references": [
|
||||
{"document": "Manuel Inventé 2025", "page": "12", "citation": "..."},
|
||||
]
|
||||
}
|
||||
sources = [{"document": "guide_methodo", "page": 64, "extrait": "..."}]
|
||||
warnings = _validate_references(parsed, sources)
|
||||
assert len(warnings) == 1
|
||||
assert "Manuel Inventé" in warnings[0]
|
||||
|
||||
def test_old_format_string_no_crash(self):
|
||||
"""L'ancien format string pour references ne cause pas de crash."""
|
||||
parsed = {"references": "Guide méthodo p.64"}
|
||||
sources = [{"document": "guide_methodo"}]
|
||||
warnings = _validate_references(parsed, sources)
|
||||
assert len(warnings) == 0 # pas de validation sur l'ancien format
|
||||
|
||||
def test_no_sources_no_validation(self):
|
||||
parsed = {
|
||||
"references": [
|
||||
{"document": "Quelque chose", "page": "1", "citation": "..."},
|
||||
]
|
||||
}
|
||||
warnings = _validate_references(parsed, [])
|
||||
assert len(warnings) == 0
|
||||
|
||||
|
||||
class TestGenerateResponse:
|
||||
@patch("src.control.cpam_response.call_ollama")
|
||||
@patch("src.control.cpam_response.call_anthropic")
|
||||
@patch("src.control.cpam_response._search_rag_for_control")
|
||||
def test_generate_success(self, mock_rag, mock_ollama):
|
||||
def test_generate_success_ollama_cpam(self, mock_rag, mock_anthropic, mock_ollama):
|
||||
"""Mode hybride : Ollama CPAM (27b) disponible → utilisé en premier."""
|
||||
mock_rag.return_value = [
|
||||
{"document": "guide_methodo", "page": 64, "extrait": "Texte guide"},
|
||||
]
|
||||
@@ -259,12 +368,42 @@ class TestGenerateResponse:
|
||||
assert "Contre-arguments médicaux..." in text
|
||||
assert len(sources) == 1
|
||||
assert sources[0].document == "guide_methodo"
|
||||
# Ollama CPAM appelé en premier (avec model= et timeout=)
|
||||
mock_ollama.assert_called_once()
|
||||
mock_anthropic.assert_not_called()
|
||||
|
||||
@patch("src.control.cpam_response.call_ollama")
|
||||
@patch("src.control.cpam_response.call_anthropic")
|
||||
@patch("src.control.cpam_response._search_rag_for_control")
|
||||
def test_generate_ollama_unavailable(self, mock_rag, mock_ollama):
|
||||
def test_generate_fallback_haiku(self, mock_rag, mock_anthropic, mock_ollama):
|
||||
"""Ollama CPAM indisponible → fallback Haiku."""
|
||||
mock_rag.return_value = [
|
||||
{"document": "guide_methodo", "page": 64, "extrait": "Texte guide"},
|
||||
]
|
||||
mock_ollama.return_value = None
|
||||
mock_anthropic.return_value = {
|
||||
"analyse_contestation": "Analyse Haiku...",
|
||||
"contre_arguments_medicaux": "Contre-args Haiku...",
|
||||
"conclusion": "Conclusion Haiku...",
|
||||
}
|
||||
|
||||
dossier = _make_dossier()
|
||||
controle = _make_controle()
|
||||
|
||||
text, sources = generate_cpam_response(dossier, controle)
|
||||
|
||||
assert "Contre-args Haiku..." in text
|
||||
# Ollama CPAM appelé d'abord (échec), puis Haiku
|
||||
mock_ollama.assert_called_once()
|
||||
mock_anthropic.assert_called_once()
|
||||
|
||||
@patch("src.control.cpam_response.call_ollama")
|
||||
@patch("src.control.cpam_response.call_anthropic")
|
||||
@patch("src.control.cpam_response._search_rag_for_control")
|
||||
def test_generate_all_unavailable(self, mock_rag, mock_anthropic, mock_ollama):
|
||||
"""Ollama CPAM, Haiku et Ollama défaut tous indisponibles → texte vide."""
|
||||
mock_rag.return_value = []
|
||||
mock_anthropic.return_value = None
|
||||
mock_ollama.return_value = None
|
||||
|
||||
dossier = _make_dossier()
|
||||
|
||||
@@ -2,7 +2,12 @@
|
||||
|
||||
import pytest
|
||||
|
||||
from src.medical.das_filter import clean_diagnostic_text, is_valid_diagnostic_text, correct_known_miscodes
|
||||
from src.medical.das_filter import (
|
||||
clean_diagnostic_text,
|
||||
is_valid_diagnostic_text,
|
||||
correct_known_miscodes,
|
||||
SEMANTIC_REDUNDANCIES,
|
||||
)
|
||||
|
||||
|
||||
class TestCleanDiagnosticText:
|
||||
@@ -223,3 +228,24 @@ class TestCorrectKnownMiscodes:
|
||||
def test_d64_9_pas_corrige(self):
|
||||
"""D64.9 lui-même → pas de correction."""
|
||||
assert correct_known_miscodes("D64.9", "Anémie") is None
|
||||
|
||||
|
||||
class TestSemanticRedundanciesStructure:
|
||||
"""Vérifie le format de la constante SEMANTIC_REDUNDANCIES."""
|
||||
|
||||
def test_is_list_of_tuples(self):
|
||||
assert isinstance(SEMANTIC_REDUNDANCIES, list)
|
||||
for item in SEMANTIC_REDUNDANCIES:
|
||||
assert isinstance(item, tuple)
|
||||
assert len(item) == 2
|
||||
dominated, dominants = item
|
||||
assert isinstance(dominated, str)
|
||||
assert isinstance(dominants, list)
|
||||
for d in dominants:
|
||||
assert isinstance(d, str)
|
||||
|
||||
def test_has_known_rules(self):
|
||||
prefixes = {item[0] for item in SEMANTIC_REDUNDANCIES}
|
||||
assert "I10" in prefixes
|
||||
assert "N30" in prefixes
|
||||
assert "J18" in prefixes
|
||||
|
||||
@@ -20,6 +20,7 @@ from src.medical.fusion import (
|
||||
_dedup_actes,
|
||||
_is_enriched,
|
||||
)
|
||||
from src.medical.das_filter import apply_semantic_dedup
|
||||
|
||||
|
||||
class TestCIM10Specificity:
|
||||
@@ -354,3 +355,139 @@ class TestDedupPreferEnriched:
|
||||
result = _dedup_diagnostics(das)
|
||||
assert len(result) == 1
|
||||
assert result[0].cim10_confidence == "high"
|
||||
|
||||
|
||||
class TestDasFamilyDpRemoved:
|
||||
"""Vérifie la dédup DAS vs DP par famille CIM-10 (3 premiers caractères)."""
|
||||
|
||||
def test_same_family_removed(self):
|
||||
"""DP=K85.1, DAS=[K85.0, K85.9, E66.0] → seul E66.0 reste."""
|
||||
d1 = DossierMedical(
|
||||
diagnostic_principal=Diagnostic(texte="Pancréatite biliaire", cim10_suggestion="K85.1"),
|
||||
diagnostics_associes=[
|
||||
Diagnostic(texte="Pancréatite SAI", cim10_suggestion="K85.0"),
|
||||
Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9"),
|
||||
Diagnostic(texte="Obésité", cim10_suggestion="E66.0"),
|
||||
],
|
||||
)
|
||||
result = merge_dossiers([d1])
|
||||
das_codes = {d.cim10_suggestion for d in result.diagnostics_associes}
|
||||
assert "K85.0" not in das_codes
|
||||
assert "K85.9" not in das_codes
|
||||
assert "E66.0" in das_codes
|
||||
|
||||
def test_trauma_siblings_kept(self):
|
||||
"""S/T : sites anatomiques différents → tous gardés."""
|
||||
d1 = DossierMedical(
|
||||
diagnostic_principal=Diagnostic(texte="Fracture col fémoral", cim10_suggestion="S72.1"),
|
||||
diagnostics_associes=[
|
||||
Diagnostic(texte="Fracture trochanter", cim10_suggestion="S72.0"),
|
||||
Diagnostic(texte="Fracture sous-troch", cim10_suggestion="S72.3"),
|
||||
],
|
||||
)
|
||||
result = merge_dossiers([d1])
|
||||
das_codes = {d.cim10_suggestion for d in result.diagnostics_associes}
|
||||
assert "S72.0" in das_codes
|
||||
assert "S72.3" in das_codes
|
||||
|
||||
def test_diabetes_complications_kept(self):
|
||||
"""E10-E14 : complications distinctes → tous gardés."""
|
||||
d1 = DossierMedical(
|
||||
diagnostic_principal=Diagnostic(texte="Diabète avec complications oculaires", cim10_suggestion="E11.6"),
|
||||
diagnostics_associes=[
|
||||
Diagnostic(texte="Diabète avec complications rénales", cim10_suggestion="E11.2"),
|
||||
Diagnostic(texte="HTA essentielle", cim10_suggestion="I10"),
|
||||
],
|
||||
)
|
||||
result = merge_dossiers([d1])
|
||||
das_codes = {d.cim10_suggestion for d in result.diagnostics_associes}
|
||||
assert "E11.2" in das_codes
|
||||
assert "I10" in das_codes
|
||||
|
||||
def test_parent_child_removed(self):
|
||||
"""DP=K85.1, DAS=[K85] → K85 (parent) retiré."""
|
||||
d1 = DossierMedical(
|
||||
diagnostic_principal=Diagnostic(texte="Pancréatite biliaire", cim10_suggestion="K85.1"),
|
||||
diagnostics_associes=[
|
||||
Diagnostic(texte="Pancréatite", cim10_suggestion="K85"),
|
||||
],
|
||||
)
|
||||
result = merge_dossiers([d1])
|
||||
das_codes = {d.cim10_suggestion for d in result.diagnostics_associes}
|
||||
assert len(das_codes) == 0
|
||||
|
||||
def test_ocr_dp_not_promoted(self):
|
||||
"""Fusion avec DP artefact OCR 'À 09' → pas promu en DAS."""
|
||||
d1 = DossierMedical(
|
||||
diagnostic_principal=Diagnostic(texte="Pancréatite biliaire", cim10_suggestion="K85.1"),
|
||||
)
|
||||
d2 = DossierMedical(
|
||||
diagnostic_principal=Diagnostic(texte="À 09", cim10_suggestion="A41.9"),
|
||||
)
|
||||
result = merge_dossiers([d1, d2])
|
||||
das_codes = {d.cim10_suggestion for d in result.diagnostics_associes}
|
||||
assert "A41.9" not in das_codes
|
||||
|
||||
|
||||
class TestSemanticDedup:
|
||||
"""Vérifie les redondances sémantiques entre DAS."""
|
||||
|
||||
def test_i10_removed_when_i11_present(self):
|
||||
"""I10 (HTA essentielle) retiré si I11.9 (cardiopathie hypertensive) présent."""
|
||||
das = [
|
||||
Diagnostic(texte="HTA essentielle", cim10_suggestion="I10"),
|
||||
Diagnostic(texte="Cardiopathie hypertensive", cim10_suggestion="I11.9"),
|
||||
Diagnostic(texte="Obésité", cim10_suggestion="E66.0"),
|
||||
]
|
||||
result = apply_semantic_dedup(das)
|
||||
codes = {d.cim10_suggestion for d in result}
|
||||
assert "I10" not in codes
|
||||
assert "I11.9" in codes
|
||||
assert "E66.0" in codes
|
||||
|
||||
def test_n30_removed_when_n39_present(self):
|
||||
"""N30.9 (cystite) retiré si N39.0 (infection urinaire) présent."""
|
||||
das = [
|
||||
Diagnostic(texte="Infection urinaire", cim10_suggestion="N39.0"),
|
||||
Diagnostic(texte="Cystite SAI", cim10_suggestion="N30.9"),
|
||||
]
|
||||
result = apply_semantic_dedup(das)
|
||||
codes = {d.cim10_suggestion for d in result}
|
||||
assert "N39.0" in codes
|
||||
assert "N30.9" not in codes
|
||||
|
||||
def test_j18_removed_when_j15_present(self):
|
||||
"""J18.9 (pneumonie SAI) retiré si J15.1 (pneumonie spécifique) présent."""
|
||||
das = [
|
||||
Diagnostic(texte="Pneumonie SAI", cim10_suggestion="J18.9"),
|
||||
Diagnostic(texte="Pneumonie à Klebsiella", cim10_suggestion="J15.1"),
|
||||
]
|
||||
result = apply_semantic_dedup(das)
|
||||
codes = {d.cim10_suggestion for d in result}
|
||||
assert "J15.1" in codes
|
||||
assert "J18.9" not in codes
|
||||
|
||||
def test_no_removal_without_dominant(self):
|
||||
"""I10 conservé si aucun code dominant I11/I12/I13."""
|
||||
das = [
|
||||
Diagnostic(texte="HTA essentielle", cim10_suggestion="I10"),
|
||||
Diagnostic(texte="Obésité", cim10_suggestion="E66.0"),
|
||||
]
|
||||
result = apply_semantic_dedup(das)
|
||||
codes = {d.cim10_suggestion for d in result}
|
||||
assert "I10" in codes
|
||||
assert "E66.0" in codes
|
||||
|
||||
def test_semantic_dedup_in_merge(self):
|
||||
"""Vérifie que la dédup sémantique est appliquée lors de la fusion."""
|
||||
d1 = DossierMedical(
|
||||
diagnostic_principal=Diagnostic(texte="Sepsis", cim10_suggestion="A41.9"),
|
||||
diagnostics_associes=[
|
||||
Diagnostic(texte="HTA essentielle", cim10_suggestion="I10"),
|
||||
Diagnostic(texte="Cardiopathie hypertensive", cim10_suggestion="I11.9"),
|
||||
],
|
||||
)
|
||||
result = merge_dossiers([d1])
|
||||
das_codes = {d.cim10_suggestion for d in result.diagnostics_associes}
|
||||
assert "I10" not in das_codes
|
||||
assert "I11.9" in das_codes
|
||||
|
||||
95
tests/test_page_tracker.py
Normal file
95
tests/test_page_tracker.py
Normal file
@@ -0,0 +1,95 @@
|
||||
"""Tests pour le module PageTracker (traçabilité source)."""
|
||||
|
||||
import pytest
|
||||
|
||||
from src.extraction.page_tracker import PageTracker
|
||||
|
||||
|
||||
class TestCharToPage:
|
||||
def test_first_page(self):
|
||||
pt = PageTracker([(0, 100), (102, 200)])
|
||||
assert pt.char_to_page(0) == 1
|
||||
assert pt.char_to_page(50) == 1
|
||||
assert pt.char_to_page(99) == 1
|
||||
|
||||
def test_second_page(self):
|
||||
pt = PageTracker([(0, 100), (102, 200)])
|
||||
assert pt.char_to_page(102) == 2
|
||||
assert pt.char_to_page(150) == 2
|
||||
|
||||
def test_beyond_last_page(self):
|
||||
pt = PageTracker([(0, 100), (102, 200)])
|
||||
assert pt.char_to_page(300) == 2
|
||||
|
||||
def test_single_page(self):
|
||||
pt = PageTracker([(0, 500)])
|
||||
assert pt.char_to_page(250) == 1
|
||||
|
||||
def test_empty_offsets(self):
|
||||
pt = PageTracker([])
|
||||
assert pt.char_to_page(0) == 1
|
||||
|
||||
|
||||
class TestFindPageForText:
|
||||
def _make_tracker(self):
|
||||
"""Simule un document 3 pages."""
|
||||
page1 = "Pancréatite aiguë biliaire"
|
||||
page2 = "Cholécystectomie par coelioscopie"
|
||||
page3 = "TTT de sortie: Augmentin IV"
|
||||
sep = "\n\n"
|
||||
full = sep.join([page1, page2, page3])
|
||||
|
||||
offsets = []
|
||||
offset = 0
|
||||
for text in [page1, page2, page3]:
|
||||
offsets.append((offset, offset + len(text)))
|
||||
offset += len(text) + len(sep)
|
||||
|
||||
return PageTracker(offsets), full
|
||||
|
||||
def test_exact_match_page1(self):
|
||||
pt, full = self._make_tracker()
|
||||
assert pt.find_page_for_text("Pancréatite", full) == 1
|
||||
|
||||
def test_exact_match_page2(self):
|
||||
pt, full = self._make_tracker()
|
||||
assert pt.find_page_for_text("Cholécystectomie", full) == 2
|
||||
|
||||
def test_exact_match_page3(self):
|
||||
pt, full = self._make_tracker()
|
||||
assert pt.find_page_for_text("Augmentin", full) == 3
|
||||
|
||||
def test_case_insensitive(self):
|
||||
pt, full = self._make_tracker()
|
||||
assert pt.find_page_for_text("pancréatite", full) == 1
|
||||
|
||||
def test_not_found(self):
|
||||
pt, full = self._make_tracker()
|
||||
assert pt.find_page_for_text("inexistant", full) is None
|
||||
|
||||
def test_empty_text(self):
|
||||
pt, full = self._make_tracker()
|
||||
assert pt.find_page_for_text("", full) is None
|
||||
|
||||
|
||||
class TestExtractExcerpt:
|
||||
def test_returns_excerpt(self):
|
||||
text = "A" * 200 + "Pancréatite aiguë" + "B" * 200
|
||||
pt = PageTracker([(0, len(text))])
|
||||
excerpt = pt.extract_excerpt("Pancréatite aiguë", text, context_chars=50)
|
||||
assert excerpt is not None
|
||||
assert "Pancréatite aiguë" in excerpt
|
||||
assert excerpt.startswith("...")
|
||||
assert excerpt.endswith("...")
|
||||
|
||||
def test_at_start(self):
|
||||
text = "Pancréatite aiguë biliaire " + "X" * 200
|
||||
pt = PageTracker([(0, len(text))])
|
||||
excerpt = pt.extract_excerpt("Pancréatite", text, context_chars=50)
|
||||
assert excerpt is not None
|
||||
assert not excerpt.startswith("...")
|
||||
|
||||
def test_not_found(self):
|
||||
text = "Texte sans rapport"
|
||||
pt = PageTracker([(0, len(text))])
|
||||
assert pt.extract_excerpt("inexistant", text) is None
|
||||
@@ -8,6 +8,7 @@ from src.medical.severity import (
|
||||
enrich_dossier_severity,
|
||||
_detect_severity_markers,
|
||||
_is_heuristic_cma,
|
||||
_load_cma_levels,
|
||||
)
|
||||
|
||||
|
||||
@@ -59,6 +60,49 @@ class TestHeuristicCMA:
|
||||
assert _is_heuristic_cma(None) is False
|
||||
|
||||
|
||||
class TestCMALevels:
|
||||
"""Tests pour le lookup CMA officiel ATIH."""
|
||||
|
||||
def test_load_cma_levels(self):
|
||||
levels = _load_cma_levels()
|
||||
assert len(levels) > 0
|
||||
# A01.0 est severity 2 dans cocoa_entries
|
||||
assert levels.get("A01.0") == 2
|
||||
|
||||
def test_official_level_4(self):
|
||||
"""Un code CMA niveau 4 est bien détecté."""
|
||||
levels = _load_cma_levels()
|
||||
level4_codes = [k for k, v in levels.items() if v == 4]
|
||||
assert len(level4_codes) > 0
|
||||
|
||||
def test_official_level_propagated(self):
|
||||
"""evaluate_severity propage le niveau CMA officiel."""
|
||||
levels = _load_cma_levels()
|
||||
# Prendre un code de niveau 3
|
||||
code_lv3 = next((k for k, v in levels.items() if v == 3), None)
|
||||
if code_lv3:
|
||||
diag = Diagnostic(texte="Test diagnostic", cim10_suggestion=code_lv3)
|
||||
info = evaluate_severity(diag)
|
||||
assert info.niveau_cma == 3
|
||||
assert info.est_cma_probable is True
|
||||
|
||||
def test_heuristic_fallback_level_2(self):
|
||||
"""Un code heuristique CMA sans entrée officielle → niveau 2."""
|
||||
# E11.9 est dans les racines heuristiques ET dans le fichier officiel
|
||||
# Testons avec un code heuristique qui n'est pas dans le fichier officiel
|
||||
diag = Diagnostic(texte="Test", cim10_suggestion="E11.9")
|
||||
info = evaluate_severity(diag)
|
||||
assert info.est_cma_probable is True
|
||||
assert info.niveau_cma >= 2
|
||||
|
||||
def test_non_cma_remains_level_1(self):
|
||||
"""Un code non-CMA reste au niveau 1."""
|
||||
diag = Diagnostic(texte="Grippe", cim10_suggestion="J11.1")
|
||||
info = evaluate_severity(diag)
|
||||
if not info.est_cma_probable:
|
||||
assert info.niveau_cma == 1
|
||||
|
||||
|
||||
class TestEvaluateSeverity:
|
||||
def test_cma_code_detected(self):
|
||||
diag = Diagnostic(texte="Diabète type 2", cim10_suggestion="E11.9")
|
||||
@@ -66,7 +110,8 @@ class TestEvaluateSeverity:
|
||||
assert info.est_cma_probable is True
|
||||
|
||||
def test_non_cma_code(self):
|
||||
diag = Diagnostic(texte="Pancréatite aiguë biliaire", cim10_suggestion="K85.1")
|
||||
"""Un code non CMA (J11.1 grippe) n'est pas détecté comme CMA."""
|
||||
diag = Diagnostic(texte="Grippe", cim10_suggestion="J11.1")
|
||||
info = evaluate_severity(diag)
|
||||
assert info.est_cma_probable is False
|
||||
|
||||
@@ -82,6 +127,12 @@ class TestEvaluateSeverity:
|
||||
info = evaluate_severity(diag)
|
||||
assert info.est_cma_probable is True
|
||||
|
||||
def test_niveau_cma_in_result(self):
|
||||
"""Le champ niveau_cma est toujours renseigné."""
|
||||
diag = Diagnostic(texte="Sepsis", cim10_suggestion="A41.9")
|
||||
info = evaluate_severity(diag)
|
||||
assert info.niveau_cma >= 1
|
||||
|
||||
|
||||
class TestEnrichDossierSeverity:
|
||||
def test_enriches_das_in_place(self):
|
||||
@@ -119,3 +170,22 @@ class TestEnrichDossierSeverity:
|
||||
assert das[0].est_cma is True
|
||||
assert das[0].est_cms is True
|
||||
assert cms_count == 1
|
||||
|
||||
def test_niveau_cma_set_on_das(self):
|
||||
"""enrich_dossier_severity propage niveau_cma sur chaque DAS."""
|
||||
dp = Diagnostic(texte="Pancréatite", cim10_suggestion="K85.1")
|
||||
das = [
|
||||
Diagnostic(texte="Fibrillation auriculaire", cim10_suggestion="I48.9"),
|
||||
]
|
||||
enrich_dossier_severity(dp, das)
|
||||
assert das[0].niveau_cma is not None
|
||||
assert das[0].niveau_cma >= 2
|
||||
|
||||
def test_alertes_contain_cma_level(self):
|
||||
"""Les alertes mentionnent le niveau CMA."""
|
||||
dp = Diagnostic(texte="Test", cim10_suggestion="K85.1")
|
||||
das = [
|
||||
Diagnostic(texte="Sepsis", cim10_suggestion="A41.9"),
|
||||
]
|
||||
alertes, _, _ = enrich_dossier_severity(dp, das)
|
||||
assert any("CMA niveau" in a for a in alertes)
|
||||
|
||||
@@ -192,7 +192,7 @@ class TestSplitDocuments:
|
||||
# --- Test intégration process_pdf ---
|
||||
|
||||
class TestProcessPdfMulti:
|
||||
@patch("src.main.extract_text")
|
||||
@patch("src.main.extract_text_with_pages")
|
||||
@patch("src.main.extract_medical_info")
|
||||
@patch("src.main._run_edsnlp", return_value=None)
|
||||
@patch("src.main._use_edsnlp", False)
|
||||
@@ -202,9 +202,10 @@ class TestProcessPdfMulti:
|
||||
from pathlib import Path
|
||||
from src.main import process_pdf
|
||||
from src.config import DossierMedical, Diagnostic
|
||||
from src.extraction.page_tracker import PageTracker
|
||||
|
||||
# Mock extract_text retournant un texte multi-épisodes Trackare
|
||||
mock_extract.return_value = TRACKARE_MULTI
|
||||
# Mock extract_text_with_pages retournant un texte multi-épisodes Trackare
|
||||
mock_extract.return_value = (TRACKARE_MULTI, PageTracker([(0, len(TRACKARE_MULTI))]))
|
||||
|
||||
# Mock extract_medical_info retournant un DossierMedical minimal
|
||||
mock_medical.return_value = DossierMedical(
|
||||
|
||||
Reference in New Issue
Block a user