From 4d49d4e114d435f60c92d99ce7b01ec408ac8568 Mon Sep 17 00:00:00 2001
From: dom <dom@local>
Date: Fri, 20 Feb 2026 13:56:07 +0100
Subject: [PATCH] =?UTF-8?q?feat:=20grounding=20CPAM=20=E2=80=94=20tags=20D?=
 =?UTF-8?q?P/DAS/ANT/COMPL=20+=20fuzzy=20matching=20CIM-10=20+=20prompt=20?=
 =?UTF-8?q?renforc=C3=A9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cause racine du Tier C : le LLM inventait des tags ([C83.3], [Antécédents])
car _build_tagged_context() ne taguait que bio/img/trt/actes. Le DP, les DAS,
antécédents et complications n'avaient aucun tag citable.

- cpam_context: 4 nouveaux types de tags [DP], [DAS-N], [ANT-N], [COMPL-N]
- cpam_validation: fuzzy matching — résout les refs CIM-10 nues vers le tag contenant ce code
- templates: liste explicite des tags valides, interdiction d'inventer des tags
- tests: 18 nouveaux tests (tags, fuzzy match, grounding DAS/DP)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/control/cpam_context.py    |  34 +++++-
 src/control/cpam_response.py   |   2 +-
 src/control/cpam_validation.py |  24 ++++
 src/prompts/templates.py       |   6 +-
 tests/test_cpam_response.py    | 205 ++++++++++++++++++++++++++++++++-
 5 files changed, 261 insertions(+), 10 deletions(-)

diff --git a/src/control/cpam_context.py b/src/control/cpam_context.py
index 2d744a3..66a048c 100644
--- a/src/control/cpam_context.py
+++ b/src/control/cpam_context.py
@@ -100,7 +100,8 @@ def _get_cim10_definitions(
 def _build_tagged_context(dossier: DossierMedical) -> tuple[str, dict[str, str]]:
     """Construit un contexte clinique avec des tags de référence pour le grounding.
 
-    Chaque élément clinique reçoit un tag unique ([BIO-1], [IMG-1], [TRT-1], [ACTE-1])
+    Chaque élément clinique reçoit un tag unique :
+    [BIO-N], [IMG-N], [TRT-N], [ACTE-N], [DP], [DAS-N], [ANT-N], [COMPL-N]
     que le LLM doit citer dans ses preuves pour garantir la traçabilité.
 
     Returns:
@@ -156,6 +157,37 @@ def _build_tagged_context(dossier: DossierMedical) -> tuple[str, dict[str, str]]
         tag_map[tag] = content
         lines.append(f"  [{tag}] {content}")
 
+    # Diagnostic principal
+    if dossier.diagnostic_principal:
+        dp = dossier.diagnostic_principal
+        tag = "DP"
+        code = f" ({dp.cim10_suggestion})" if dp.cim10_suggestion else ""
+        content = f"{dp.texte}{code}"
+        tag_map[tag] = content
+        lines.append(f"  [{tag}] {content}")
+
+    # Diagnostics associés
+    for i, das in enumerate(dossier.diagnostics_associes, 1):
+        tag = f"DAS-{i}"
+        code = f" ({das.cim10_suggestion})" if das.cim10_suggestion else ""
+        content = f"{das.texte}{code}"
+        tag_map[tag] = content
+        lines.append(f"  [{tag}] {content}")
+
+    # Antécédents (top 10)
+    for i, ant in enumerate(dossier.antecedents[:10], 1):
+        tag = f"ANT-{i}"
+        content = ant.texte
+        tag_map[tag] = content
+        lines.append(f"  [{tag}] {content}")
+
+    # Complications
+    for i, compl in enumerate(dossier.complications, 1):
+        tag = f"COMPL-{i}"
+        content = compl.texte
+        tag_map[tag] = content
+        lines.append(f"  [{tag}] {content}")
+
     if not lines:
         return "", tag_map
 
diff --git a/src/control/cpam_response.py b/src/control/cpam_response.py
index 0640c3f..6bba558 100644
--- a/src/control/cpam_response.py
+++ b/src/control/cpam_response.py
@@ -39,7 +39,7 @@ from .cpam_context import (  # noqa: F401
     _build_bio_summary,
     _check_das_bio_coherence,
 )
-from .cpam_validation import _CIM10_CODE_RE, _validate_adversarial as _validate_adversarial, _assess_quality_tier as _assess_quality_tier  # noqa: F401
+from .cpam_validation import _CIM10_CODE_RE, _validate_adversarial as _validate_adversarial, _assess_quality_tier as _assess_quality_tier, _fuzzy_match_ref as _fuzzy_match_ref  # noqa: F401
 
 logger = logging.getLogger(__name__)
 
diff --git a/src/control/cpam_validation.py b/src/control/cpam_validation.py
index 8915efa..35bd672 100644
--- a/src/control/cpam_validation.py
+++ b/src/control/cpam_validation.py
@@ -14,9 +14,29 @@ from ..prompts import CPAM_ADVERSARIAL
 logger = logging.getLogger(__name__)
 
 
+def _fuzzy_match_ref(ref: str, tag_map: dict[str, str]) -> str | None:
+    """Tente de résoudre une ref inventée vers un tag réel.
+
+    Stratégie : si la ref ressemble à un code CIM-10 (ex: "C83.3"),
+    chercher dans tag_map un tag dont le contenu contient ce code.
+
+    Returns:
+        Le tag réel trouvé, ou None si aucun match.
+    """
+    ref_upper = ref.strip().upper()
+    # Match par code CIM-10 dans le contenu des tags
+    if re.match(r"^[A-Z]\d{2}\.?\d{0,2}$", ref_upper):
+        for tag, content in tag_map.items():
+            if ref_upper in content.upper() or ref in content:
+                return tag
+    return None
+
+
 def _validate_grounding(response_data: dict, tag_map: dict[str, str]) -> list[str]:
     """Vérifie que les références dans preuves_dossier correspondent à des tags existants.
 
+    Applique un fuzzy matching par code CIM-10 avant de flaguer un warning.
+
     Returns:
         Liste de warnings pour les références inventées.
     """
@@ -35,6 +55,10 @@ def _validate_grounding(response_data: dict, tag_map: dict[str, str]) -> list[st
         if not ref:
             continue
         if ref not in tag_map:
+            matched_tag = _fuzzy_match_ref(ref, tag_map)
+            if matched_tag:
+                logger.info("Grounding : ref [%s] résolue vers [%s]", ref, matched_tag)
+                continue  # pas de warning
             valeur = p.get("valeur", "?")
             warnings.append(f"Preuve [{ref}] non traçable (« {valeur} »)")
             logger.warning("Grounding : preuve [%s] introuvable dans les tags du dossier", ref)
diff --git a/src/prompts/templates.py b/src/prompts/templates.py
index 4760443..ab56113 100644
--- a/src/prompts/templates.py
+++ b/src/prompts/templates.py
@@ -267,7 +267,9 @@ CONTEXTE CLINIQUE :
 
 AXE MÉDICAL :
 - Analyse le bien-fondé médical du codage de l'établissement
-- CITE les éléments cliniques EXACTS du dossier en utilisant les tags [XX-N] fournis (ex: [BIO-1] CRP 180 mg/L)
+- CITE les éléments cliniques EXACTS du dossier en utilisant UNIQUEMENT les tags [XX-N] fournis dans la section ÉLÉMENTS CLINIQUES RÉFÉRENCÉS
+- Tags valides : [DP], [DAS-N], [BIO-N], [IMG-N], [TRT-N], [ACTE-N], [ANT-N], [COMPL-N]
+- N'invente JAMAIS un tag qui ne figure pas dans la liste ci-dessus. Si un élément n'a pas de tag, décris-le en texte libre SANS crochets.
 - Confronte l'argumentation CPAM aux sources CIM-10 et Guide Méthodologique fournies
 - Ne mentionne AUCUN élément qui ne figure pas dans les éléments référencés ci-dessus
 
@@ -295,7 +297,7 @@ Réponds UNIQUEMENT avec un objet JSON au format suivant :
   "points_accord": "Points CONCRETS où la CPAM a raison ou partiellement raison (JAMAIS 'Aucun' — il y a toujours au moins un point légitime à reconnaître)",
   "contre_arguments_medicaux": "Argumentation médicale en faveur du codage, en expliquant pourquoi les points d'accord ne suffisent pas à invalider le codage",
   "preuves_dossier": [
-    {{"ref": "BIO-1", "element": "biologie|imagerie|traitement|acte|clinique", "valeur": "valeur exacte du dossier", "signification": "explication clinique"}}
+    {{"ref": "BIO-1 ou DAS-3 ou DP (UNIQUEMENT un tag existant de la section ÉLÉMENTS CLINIQUES RÉFÉRENCÉS)", "element": "biologie|imagerie|traitement|acte|diagnostic|antécédent|complication", "valeur": "valeur exacte du dossier", "signification": "explication clinique"}}
   ],
   "contre_arguments_asymetrie": "Éléments cliniques que la CPAM n'avait pas et qui justifient le codage",
   "contre_arguments_reglementaires": "Erreurs d'interprétation réglementaire de la CPAM, avec citations verbatim des sources",
diff --git a/tests/test_cpam_response.py b/tests/test_cpam_response.py
index 0fd9ddb..37898eb 100644
--- a/tests/test_cpam_response.py
+++ b/tests/test_cpam_response.py
@@ -6,7 +6,9 @@ import pytest
 
 from src.config import (
     ActeCCAM,
+    Antecedent,
     BiologieCle,
+    Complication,
     ControleCPAM,
     Diagnostic,
     DossierMedical,
@@ -23,6 +25,7 @@ from src.control.cpam_response import (
     _check_das_bio_coherence,
     _extraction_pass,
     _format_response,
+    _fuzzy_match_ref,
     _get_cim10_definitions,
     _get_code_label,
     _search_rag_for_control,
@@ -854,15 +857,21 @@ class TestBuildTaggedContext:
         assert "BAS" in tag_map.get("BIO-3", "")
 
     def test_tagged_context_empty_dossier(self):
-        """Dossier sans données cliniques → texte vide, tag_map vide."""
+        """Dossier sans aucune donnée clinique → texte vide, tag_map vide."""
+        dossier = DossierMedical(source_file="test.pdf")
+        text, tag_map = _build_tagged_context(dossier)
+        assert text == ""
+        assert tag_map == {}
+
+    def test_tagged_context_dp_only_dossier(self):
+        """Dossier avec DP mais sans bio/img/trt → tag [DP] généré."""
         dossier = DossierMedical(
             source_file="test.pdf",
             diagnostic_principal=Diagnostic(texte="Test", cim10_suggestion="Z00"),
         )
         text, tag_map = _build_tagged_context(dossier)
-
-        assert text == ""
-        assert tag_map == {}
+        assert "DP" in tag_map
+        assert "[DP]" in text
 
     def test_tagged_context_in_prompt(self):
         """Le contexte tagué apparaît dans le prompt généré."""
@@ -876,10 +885,9 @@ class TestBuildTaggedContext:
         assert len(tag_map) > 0
 
     def test_poor_dossier_warning_in_prompt(self):
-        """Dossier sans bio/imagerie → avertissement dans le prompt."""
+        """Dossier totalement vide → avertissement DOSSIER PAUVRE dans le prompt."""
         dossier = DossierMedical(
             source_file="test.pdf",
-            diagnostic_principal=Diagnostic(texte="Test", cim10_suggestion="Z00"),
             sejour=Sejour(sexe="M", age=70),
         )
         controle = _make_controle()
@@ -889,6 +897,19 @@ class TestBuildTaggedContext:
         assert "Ne spécule PAS" in prompt
         assert len(tag_map) == 0
 
+    def test_dp_only_dossier_not_poor(self):
+        """Dossier avec DP mais sans bio/img → PAS de warning DOSSIER PAUVRE (DP génère un tag)."""
+        dossier = DossierMedical(
+            source_file="test.pdf",
+            diagnostic_principal=Diagnostic(texte="Test", cim10_suggestion="Z00"),
+            sejour=Sejour(sexe="M", age=70),
+        )
+        controle = _make_controle()
+        prompt, tag_map = _build_cpam_prompt(dossier, controle, [])
+
+        assert "DOSSIER PAUVRE" not in prompt
+        assert "DP" in tag_map
+
 
 class TestValidateGrounding:
     """Tests pour la validation des preuves grounded."""
@@ -1996,3 +2017,175 @@ class TestCodesAutorisesWhitelist:
         controle = _make_controle()
         prompt, _ = _build_cpam_prompt(dossier, controle, [])
         assert "Ne mentionne AUCUN code CIM-10 qui ne figure pas" in prompt
+
+
+class TestTaggedContextNewTags:
+    """Tests pour les tags DP, DAS-N, ANT-N, COMPL-N dans _build_tagged_context()."""
+
+    def test_dp_tag_generated(self):
+        """Le tag [DP] est généré pour le diagnostic principal."""
+        dossier = DossierMedical(
+            source_file="test.pdf",
+            diagnostic_principal=Diagnostic(texte="Cholécystite aiguë", cim10_suggestion="K81.0"),
+            biologie_cle=[BiologieCle(test="CRP", valeur="180 mg/L")],
+        )
+        text, tag_map = _build_tagged_context(dossier)
+        assert "DP" in tag_map
+        assert "Cholécystite aiguë (K81.0)" in tag_map["DP"]
+        assert "[DP]" in text
+
+    def test_dp_without_code(self):
+        """Le tag [DP] fonctionne même sans code CIM-10."""
+        dossier = DossierMedical(
+            source_file="test.pdf",
+            diagnostic_principal=Diagnostic(texte="Infection urinaire"),
+            biologie_cle=[BiologieCle(test="CRP", valeur="50 mg/L")],
+        )
+        text, tag_map = _build_tagged_context(dossier)
+        assert "DP" in tag_map
+        assert "Infection urinaire" in tag_map["DP"]
+        assert "()" not in tag_map["DP"]
+
+    def test_das_tags_generated(self):
+        """Les tags [DAS-1], [DAS-2] sont générés pour les diagnostics associés."""
+        dossier = DossierMedical(
+            source_file="test.pdf",
+            diagnostic_principal=Diagnostic(texte="DP test"),
+            diagnostics_associes=[
+                Diagnostic(texte="Iléus réflexe", cim10_suggestion="K56.0"),
+                Diagnostic(texte="HTA", cim10_suggestion="I10"),
+            ],
+        )
+        text, tag_map = _build_tagged_context(dossier)
+        assert "DAS-1" in tag_map
+        assert "DAS-2" in tag_map
+        assert "K56.0" in tag_map["DAS-1"]
+        assert "[DAS-1]" in text
+        assert "[DAS-2]" in text
+
+    def test_ant_tags_generated(self):
+        """Les tags [ANT-1], [ANT-2] sont générés pour les antécédents."""
+        dossier = DossierMedical(
+            source_file="test.pdf",
+            diagnostic_principal=Diagnostic(texte="DP test"),
+            antecedents=[
+                Antecedent(texte="Diabète type 2"),
+                Antecedent(texte="HTA"),
+            ],
+        )
+        text, tag_map = _build_tagged_context(dossier)
+        assert "ANT-1" in tag_map
+        assert "ANT-2" in tag_map
+        assert "Diabète type 2" in tag_map["ANT-1"]
+        assert "[ANT-1]" in text
+        assert "[ANT-2]" in text
+
+    def test_ant_tags_capped_at_10(self):
+        """Les antécédents sont limités à 10 tags maximum."""
+        dossier = DossierMedical(
+            source_file="test.pdf",
+            diagnostic_principal=Diagnostic(texte="DP test"),
+            antecedents=[Antecedent(texte=f"Antécédent {i}") for i in range(15)],
+        )
+        _, tag_map = _build_tagged_context(dossier)
+        assert "ANT-10" in tag_map
+        assert "ANT-11" not in tag_map
+
+    def test_compl_tags_generated(self):
+        """Les tags [COMPL-1] sont générés pour les complications."""
+        dossier = DossierMedical(
+            source_file="test.pdf",
+            diagnostic_principal=Diagnostic(texte="DP test"),
+            complications=[
+                Complication(texte="Infection de paroi"),
+                Complication(texte="Hémorragie post-op"),
+            ],
+        )
+        text, tag_map = _build_tagged_context(dossier)
+        assert "COMPL-1" in tag_map
+        assert "COMPL-2" in tag_map
+        assert "Infection de paroi" in tag_map["COMPL-1"]
+        assert "[COMPL-1]" in text
+
+    def test_all_new_tags_in_complet_dossier(self):
+        """Un dossier complet génère tous les types de tags."""
+        dossier = DossierMedical(
+            source_file="test.pdf",
+            diagnostic_principal=Diagnostic(texte="Cholécystite", cim10_suggestion="K81.0"),
+            diagnostics_associes=[Diagnostic(texte="Iléus", cim10_suggestion="K56.0")],
+            biologie_cle=[BiologieCle(test="CRP", valeur="180 mg/L")],
+            imagerie=[Imagerie(type="Scanner")],
+            traitements_sortie=[Traitement(medicament="Augmentin")],
+            actes_ccam=[ActeCCAM(texte="Cholécystectomie")],
+            antecedents=[Antecedent(texte="HTA")],
+            complications=[Complication(texte="Hémorragie")],
+        )
+        text, tag_map = _build_tagged_context(dossier)
+        for expected_tag in ["BIO-1", "IMG-1", "TRT-1", "ACTE-1", "DP", "DAS-1", "ANT-1", "COMPL-1"]:
+            assert expected_tag in tag_map, f"Tag {expected_tag} manquant"
+
+    def test_grounding_das_ref_valid(self):
+        """Ref DAS-1 dans preuves_dossier → pas de warning."""
+        tag_map = {"DAS-1": "Iléus réflexe (K56.0)", "DP": "Cholécystite (K81.0)"}
+        response_data = {
+            "preuves_dossier": [
+                {"ref": "DAS-1", "element": "diagnostic", "valeur": "Iléus réflexe", "signification": "DAS justifié"},
+                {"ref": "DP", "element": "diagnostic", "valeur": "Cholécystite", "signification": "DP confirmé"},
+            ]
+        }
+        warnings = _validate_grounding(response_data, tag_map)
+        assert len(warnings) == 0
+
+
+class TestFuzzyMatchRef:
+    """Tests pour le fuzzy matching de refs CIM-10 dans _fuzzy_match_ref()."""
+
+    def test_cim10_code_matches_das_content(self):
+        """Un code CIM-10 nu (C83.3) est résolu vers le DAS qui le contient."""
+        tag_map = {
+            "DAS-1": "Lymphome folliculaire (C83.3)",
+            "BIO-1": "CRP: 180 mg/L",
+        }
+        result = _fuzzy_match_ref("C83.3", tag_map)
+        assert result == "DAS-1"
+
+    def test_cim10_code_matches_dp(self):
+        """Un code CIM-10 résolu vers le tag DP."""
+        tag_map = {"DP": "Cholécystite aiguë (K81.0)"}
+        result = _fuzzy_match_ref("K81.0", tag_map)
+        assert result == "DP"
+
+    def test_cim10_code_no_match(self):
+        """Un code CIM-10 absent du tag_map → None."""
+        tag_map = {"BIO-1": "CRP: 180 mg/L", "DAS-1": "Iléus (K56.0)"}
+        result = _fuzzy_match_ref("Z45.8", tag_map)
+        assert result is None
+
+    def test_non_cim10_ref_no_match(self):
+        """Une ref non-CIM-10 (ex: 'Antécédents') → None."""
+        tag_map = {"ANT-1": "HTA", "DP": "Test (K81.0)"}
+        result = _fuzzy_match_ref("Antécédents", tag_map)
+        assert result is None
+
+    def test_grounding_fuzzy_resolves_cim10(self):
+        """_validate_grounding résout une ref CIM-10 via fuzzy matching → pas de warning."""
+        tag_map = {"DAS-1": "Lymphome (C83.3)", "BIO-1": "CRP: 180"}
+        response_data = {
+            "preuves_dossier": [
+                {"ref": "C83.3", "element": "clinique", "valeur": "Lymphome", "signification": "onco"},
+            ]
+        }
+        warnings = _validate_grounding(response_data, tag_map)
+        assert len(warnings) == 0
+
+    def test_grounding_category_name_still_warns(self):
+        """Une ref catégorielle ('Antécédents') n'est pas résolue → warning maintenu."""
+        tag_map = {"ANT-1": "HTA", "BIO-1": "CRP: 5"}
+        response_data = {
+            "preuves_dossier": [
+                {"ref": "Antécédents", "element": "clinique", "valeur": "HTA", "signification": "contexte"},
+            ]
+        }
+        warnings = _validate_grounding(response_data, tag_map)
+        assert len(warnings) == 1
+        assert "Antécédents" in warnings[0]