feat: méthode TIM experte CPAM + moteur de règles étendu
CPAM — Méthode TIM (mémoire en défense) : - Réécriture CPAM_ARGUMENTATION avec raisonnement 5 passes TIM (contexte admin → motif réel → confrontation bio → hiérarchie → validation défensive) - _BIO_THRESHOLDS (19 entrées) + _build_bio_confrontation() pour confrontation biologie/diagnostic avec seuils chiffrés et verdicts - _format_response() dual format : nouveau TIM (moyens numérotés, tableau bio, codes non défendables, conclusion dispositive) + rétrocompat legacy - CPAM_ADVERSARIAL mis à jour pour vérifier honnêteté intellectuelle - Tests adaptés + 12 nouveaux tests (bio confrontation, format TIM) Moteur de règles : - Nouvelles règles YAML : demographic, diagnostic_conflicts, procedure_diagnosis, temporal, parcours - Bio extraction FAISS (synonymes vectoriels) - Veto engine enrichi (citations, Trackare skip, règles démographiques) - Decision engine : _apply_bio_rules_gen() + matchers analytiques Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -4,10 +4,14 @@ from __future__ import annotations
|
||||
|
||||
import re
|
||||
import unicodedata
|
||||
import logging
|
||||
|
||||
import numpy as np
|
||||
from ..config import BiologieCle, DossierMedical, load_lab_value_sanity
|
||||
from .bio_normals import BIO_NORMALS, _is_abnormal
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _norm_key(s: str) -> str:
|
||||
"""Normalise une clé (minuscules, sans accents) pour index YAML."""
|
||||
@@ -68,6 +72,100 @@ def _sanitize_bio_value(test_name: str, raw_value: str, sanity_cfg: dict) -> tup
|
||||
return token, val, quality, reason
|
||||
|
||||
|
||||
def _extract_biologie_faiss(text: str, dossier: DossierMedical) -> None:
|
||||
"""Extraction biologique via recherche vectorielle FAISS pour les synonymes.
|
||||
|
||||
Complète les regex pour les termes non prévus ou les variations complexes.
|
||||
"""
|
||||
from .rag_index import get_index
|
||||
from .rag_search import _get_embed_model
|
||||
|
||||
res = get_index(kind="bio")
|
||||
if not res:
|
||||
return
|
||||
faiss_index, metadata = res
|
||||
|
||||
try:
|
||||
model = _get_embed_model()
|
||||
except Exception as e:
|
||||
logger.warning("FAISS Bio: modèle d'embedding indisponible (%s)", e)
|
||||
return
|
||||
|
||||
# 1. Découpage du texte en segments glissants (phrases ou groupes de mots)
|
||||
lines = [l.strip() for l in text.split("\n") if len(l.strip()) > 5]
|
||||
if not lines:
|
||||
return
|
||||
|
||||
segments = []
|
||||
for line in lines:
|
||||
if len(line.split()) > 15:
|
||||
words = line.split()
|
||||
for i in range(0, len(words), 10):
|
||||
segments.append(" ".join(words[i:i+12]))
|
||||
else:
|
||||
segments.append(line)
|
||||
|
||||
if not segments:
|
||||
return
|
||||
|
||||
# 2. Encodage des segments
|
||||
try:
|
||||
embeddings = model.encode(segments, normalize_embeddings=True, show_progress_bar=False)
|
||||
embeddings = np.array(embeddings, dtype=np.float32)
|
||||
except Exception as e:
|
||||
logger.warning("FAISS Bio: erreur encodage segments (%s)", e)
|
||||
return
|
||||
|
||||
# 3. Recherche dans l'index bio
|
||||
MIN_SCORE_BIO = 0.82
|
||||
scores, indices = faiss_index.search(embeddings, 1)
|
||||
|
||||
sanity_cfg = load_lab_value_sanity()
|
||||
seen_faiss = set()
|
||||
|
||||
for i, (score, idx) in enumerate(zip(scores, indices)):
|
||||
s = float(score[0])
|
||||
if s < MIN_SCORE_BIO or idx[0] < 0:
|
||||
continue
|
||||
|
||||
meta = metadata[idx[0]]
|
||||
concept_name = meta.get("code")
|
||||
synonym_matched = meta.get("extrait")
|
||||
segment = segments[i]
|
||||
|
||||
# 4. Capture de la valeur numérique
|
||||
val_match = re.search(r"(?:[=àa:]\s*)?(\d+(?:[.,]\d+)?)\s*(?:[a-zA-Z/%/µ/mm3/G/L/U/I]+)?", segment)
|
||||
if not val_match:
|
||||
continue
|
||||
|
||||
raw_value = val_match.group(1)
|
||||
entry_key = (concept_name, raw_value)
|
||||
if entry_key in seen_faiss:
|
||||
continue
|
||||
seen_faiss.add(entry_key)
|
||||
|
||||
sanitized = _sanitize_bio_value(concept_name, raw_value, sanity_cfg)
|
||||
if sanitized:
|
||||
token, val_num, quality, reason = sanitized
|
||||
anomalie = _is_abnormal(concept_name, token)
|
||||
|
||||
is_dup = any(b.test == concept_name and b.valeur == raw_value for b in dossier.biologie_cle)
|
||||
if is_dup:
|
||||
continue
|
||||
|
||||
dossier.biologie_cle.append(
|
||||
BiologieCle(
|
||||
test=concept_name,
|
||||
valeur=raw_value,
|
||||
valeur_num=val_num,
|
||||
anomalie=anomalie,
|
||||
quality=quality,
|
||||
discard_reason=reason,
|
||||
)
|
||||
)
|
||||
logger.debug("FAISS Bio match: %s (%s) = %s dans '%s'", concept_name, synonym_matched, raw_value, segment)
|
||||
|
||||
|
||||
def _extract_biologie(text: str, dossier: DossierMedical) -> None:
|
||||
"""Extrait des résultats biologiques clés.
|
||||
|
||||
@@ -90,12 +188,20 @@ def _extract_biologie(text: str, dossier: DossierMedical) -> None:
|
||||
# Ionogramme / électrolytes
|
||||
(r"(?:[Ss]odium|[Nn]atr[ée]mie|(?<![A-Za-z])Na\+?(?![A-Za-z]))\s*[=:àa]?\s*([0-9]{2,3}(?:[.,][0-9]+)?)\s*(?:mmol/L|mEq/L)?", "Sodium"),
|
||||
(r"(?:[Pp]otassium|[Kk]ali[ée]mie|(?<![A-Za-z])K\+?(?![A-Za-z]))\s*[=:àa]?\s*([0-9](?:[.,][0-9]+)?)\s*(?:mmol/L|mEq/L)?", "Potassium"),
|
||||
(r"(?:[Cc]hlore|[Cc]hlor[ée]mie|(?<![A-Za-z])Cl-?(?![A-Za-z]))\s*[=:àa]?\s*(\d+(?:[.,]\d+)?)\s*(?:mmol/L)?", "Chlore"),
|
||||
(r"(?:[Cc]alcium|[Cc]alci[ée]mie|(?<![A-Za-z])Ca\+?(?![A-Za-z]))\s*[=:àa]?\s*(\d+(?:[.,]\d+)?)\s*(?:mmol/L|mg/dL)?", "Calcium"),
|
||||
|
||||
(r"[Tt]roponine\s+(?:us\s+)?(n[ée]gative|positive|normale)", "Troponine"),
|
||||
(r"(?:[Hh][ée]moglobine|\bHb\b)\s*[=:àa]?\s*(\d+(?:[.,]\d+)?)\s*(?:g/dL|g/L)?", "Hémoglobine"),
|
||||
(r"\bVGM\b\s*[=:àa]?\s*(\d+(?:[.,]\d+)?)\s*(?:fL)?", "VGM"),
|
||||
(r"\bFerritine\b\s*[=:àa]?\s*(\d+(?:[.,]\d+)?)\s*(?:µg/L|ng/mL)?", "Ferritine"),
|
||||
(r"[Pp]laquettes?\s*[=:àa]?\s*(\d+(?:[.,]\d+)?)\s*(?:/mm3|G/L)?", "Plaquettes"),
|
||||
(r"[Ll]eucocytes?\s*[=:àa]?\s*(\d+(?:[.,]\d+)?)\s*(?:/mm3|G/L)?", "Leucocytes"),
|
||||
(r"[Cc]r[ée]atinine?\s*[=:àa]?\s*(\d+(?:[.,]\d+)?)\s*(?:µmol/L|mg/dL)?", "Créatinine"),
|
||||
(r"\bUr[ée]e\b\s*[=:àa]?\s*(\d+(?:[.,]\d+)?)\s*(?:mmol/L|g/L)?", "Urée"),
|
||||
(r"(?:[Gg]lyc[ée]mie|[Gg]lucose)\s*[=:àa]?\s*(\d+(?:[.,]\d+)?)\s*(?:mmol/L|g/L)?", "Glycémie"),
|
||||
(r"\bHbA1c\b\s*[=:àa]?\s*(\d+(?:[.,]\d+)?)\s*(?:%)?", "HbA1c"),
|
||||
(r"\bTSH\b\s*[=:àa]?\s*(\d+(?:[.,]\d+)?)\s*(?:mUI/L)?", "TSH"),
|
||||
]
|
||||
|
||||
|
||||
@@ -182,3 +288,6 @@ def _extract_biologie(text: str, dossier: DossierMedical) -> None:
|
||||
discard_reason=reason,
|
||||
)
|
||||
)
|
||||
|
||||
# --- Complément par recherche vectorielle (Synonymes) ---
|
||||
_extract_biologie_faiss(text, dossier)
|
||||
|
||||
@@ -96,6 +96,46 @@ def extract_medical_info(
|
||||
if use_rag:
|
||||
_enrich_with_rag(dossier)
|
||||
|
||||
# NUKE-3 : sélection DP type DIM (CRH uniquement)
|
||||
if dossier.document_type != "trackare":
|
||||
try:
|
||||
from .dp_selector import select_dp, build_synthese
|
||||
|
||||
synthese = build_synthese(dossier, parsed_data)
|
||||
selection = select_dp(
|
||||
dossier, synthese, config={"llm_enabled": use_rag},
|
||||
)
|
||||
dossier.dp_selection = selection
|
||||
|
||||
if selection.chosen_code:
|
||||
current_code = (
|
||||
dossier.diagnostic_principal.cim10_suggestion
|
||||
if dossier.diagnostic_principal else None
|
||||
)
|
||||
has_multiple = len(selection.candidates) >= 2
|
||||
# MAJ DP si :
|
||||
# - DP existant et NUKE-3 sélectionne un code différent
|
||||
# - Pas de DP mais plusieurs candidats (choix non trivial)
|
||||
# Le cas "1 seul candidat, pas de DP" est géré par RULE-DAS-TO-DP
|
||||
should_update = (
|
||||
(current_code and selection.chosen_code != current_code)
|
||||
or (not current_code and has_multiple)
|
||||
)
|
||||
if should_update:
|
||||
dossier.diagnostic_principal = Diagnostic(
|
||||
texte=selection.chosen_term or "",
|
||||
cim10_suggestion=selection.chosen_code,
|
||||
cim10_confidence=selection.confidence,
|
||||
source="nuke3",
|
||||
)
|
||||
|
||||
if selection.verdict == "REVIEW":
|
||||
dossier.alertes_codage.append(
|
||||
f"NUKE-3 REVIEW: DP ambigu — {selection.reason}"
|
||||
)
|
||||
except Exception:
|
||||
logger.warning("NUKE-3: erreur sélection DP", exc_info=True)
|
||||
|
||||
# Post-processing : validation des codes CCAM contre le dictionnaire
|
||||
_validate_ccam(dossier)
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ from typing import Optional
|
||||
|
||||
import pdfplumber
|
||||
|
||||
from ..config import RAG_INDEX_DIR, CIM10_PDF, GUIDE_METHODO_PDF, CCAM_PDF, CCAM_DICT_PATH, REFERENTIELS_DIR, EMBEDDING_MODEL
|
||||
from ..config import RAG_INDEX_DIR, CIM10_PDF, GUIDE_METHODO_PDF, CCAM_PDF, CCAM_DICT_PATH, BIO_CONCEPTS_PATH, REFERENTIELS_DIR, EMBEDDING_MODEL
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -112,11 +112,14 @@ def _paths(kind: str) -> tuple[Path, Path]:
|
||||
kind:
|
||||
- "ref" : référentiels
|
||||
- "proc" : procédures
|
||||
- "bio" : concepts biologiques
|
||||
- "all" : legacy (faiss.index)
|
||||
"""
|
||||
kind = (kind or "ref").lower()
|
||||
if kind == "proc":
|
||||
return (RAG_INDEX_DIR / "faiss_proc.index", RAG_INDEX_DIR / "metadata_proc.json")
|
||||
if kind == "bio":
|
||||
return (RAG_INDEX_DIR / "faiss_bio.index", RAG_INDEX_DIR / "metadata_bio.json")
|
||||
if kind == "all":
|
||||
return (RAG_INDEX_DIR / "faiss.index", RAG_INDEX_DIR / "metadata.json")
|
||||
# ref (default)
|
||||
@@ -470,6 +473,25 @@ def _chunk_cim10_alpha(pdf_path: Path) -> list[Chunk]:
|
||||
return chunks
|
||||
|
||||
|
||||
def _chunk_bio_concepts() -> list[Chunk]:
|
||||
"""Génère des chunks à partir de bio_concepts.json pour la recherche sémantique de tests."""
|
||||
if not BIO_CONCEPTS_PATH.exists():
|
||||
return []
|
||||
with open(BIO_CONCEPTS_PATH, encoding="utf-8") as f:
|
||||
concepts = json.load(f)
|
||||
chunks = []
|
||||
for item in concepts:
|
||||
concept_name = item["concept"]
|
||||
# On indexe le nom du concept + tous les synonymes
|
||||
for syn in ([concept_name] + item.get("synonyms", [])):
|
||||
chunks.append(Chunk(
|
||||
text=syn,
|
||||
document="bio_concepts",
|
||||
code=concept_name, # On stocke le nom du concept "pivot" dans 'code'
|
||||
))
|
||||
return chunks
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Construction de l'index FAISS
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -489,18 +511,24 @@ def build_index(force: bool = False) -> None:
|
||||
|
||||
ref_index_path, ref_meta_path = _paths("ref")
|
||||
proc_index_path, proc_meta_path = _paths("proc")
|
||||
bio_index_path, bio_meta_path = _paths("bio")
|
||||
|
||||
# Si tout existe déjà et pas de force
|
||||
ref_ok = ref_index_path.exists() and ref_meta_path.exists()
|
||||
proc_ok = proc_index_path.exists() and proc_meta_path.exists()
|
||||
bio_ok = bio_index_path.exists() and bio_meta_path.exists()
|
||||
guide_expected = GUIDE_METHODO_PDF.exists()
|
||||
if not force and ref_ok and ((not guide_expected) or proc_ok):
|
||||
if not force and ref_ok and bio_ok and ((not guide_expected) or proc_ok):
|
||||
logger.info("Index FAISS déjà existants dans %s (use force=True pour reconstruire)", RAG_INDEX_DIR)
|
||||
return
|
||||
|
||||
# Collecter les chunks
|
||||
ref_chunks: list[Chunk] = []
|
||||
proc_chunks: list[Chunk] = []
|
||||
bio_chunks: list[Chunk] = []
|
||||
|
||||
# Concepts biologiques
|
||||
bio_chunks.extend(_chunk_bio_concepts())
|
||||
|
||||
# CIM-10 (référentiel)
|
||||
if CIM10_PDF.exists():
|
||||
@@ -560,6 +588,7 @@ def build_index(force: bool = False) -> None:
|
||||
|
||||
_write_index(ref_chunks, ref_index_path, ref_meta_path, "ref")
|
||||
_write_index(proc_chunks, proc_index_path, proc_meta_path, "proc")
|
||||
_write_index(bio_chunks, bio_index_path, bio_meta_path, "bio")
|
||||
|
||||
# Invalider les singletons
|
||||
reset_index()
|
||||
@@ -569,7 +598,7 @@ def get_index(kind: str = "ref") -> tuple | None:
|
||||
"""Charge un index FAISS et ses métadonnées (singleton lazy-loaded).
|
||||
|
||||
Args:
|
||||
kind: "ref" | "proc" | "all".
|
||||
kind: "ref" | "proc" | "bio" | "all".
|
||||
|
||||
Returns:
|
||||
Tuple (faiss_index, metadata_list) ou None si l'index n'existe pas.
|
||||
@@ -586,8 +615,8 @@ def get_index(kind: str = "ref") -> tuple | None:
|
||||
|
||||
index_path, meta_path = _paths(kind)
|
||||
|
||||
# Backwards compat : si ref/proc absent, fallback sur all
|
||||
if kind in ("ref", "proc") and (not index_path.exists() or not meta_path.exists()):
|
||||
# Backwards compat : si ref/proc/bio absent, fallback sur all
|
||||
if kind in ("ref", "proc", "bio") and (not index_path.exists() or not meta_path.exists()):
|
||||
legacy_idx, legacy_meta = _paths("all")
|
||||
if legacy_idx.exists() and legacy_meta.exists():
|
||||
logger.warning("Index %s absent — fallback legacy faiss.index", kind)
|
||||
|
||||
@@ -561,7 +561,13 @@ def enrich_diagnostic(
|
||||
sources = search_similar(diagnostic.texte, top_k=10)
|
||||
|
||||
if not sources:
|
||||
logger.debug("Aucune source RAG trouvée pour : %s", diagnostic.texte)
|
||||
# Toujours initialiser sources_rag (même vide) pour traçabilité
|
||||
diagnostic.sources_rag = []
|
||||
logger.debug("RAG: 0 résultat FAISS pour « %s »", diagnostic.texte)
|
||||
# Si un cache hit existe, appliquer le résultat LLM malgré l'absence de sources
|
||||
if cached is not None:
|
||||
logger.info("Cache hit (sans sources FAISS) pour %s : « %s »", diag_type.upper(), diagnostic.texte)
|
||||
_apply_llm_result_diagnostic(diagnostic, cached)
|
||||
return
|
||||
|
||||
# 3. Stocker les sources RAG
|
||||
|
||||
Reference in New Issue
Block a user