fix: filtre DAS décimaux, dédup parents CIM-10, tiebreak enrichissement
- Rule 3 das_filter étendue pour rejeter "K 3.6", "B 12,5" (valeurs labo) - Suppression codes parents dans la fusion (K85 retiré si K85.9 présent) - Préférence du diagnostic enrichi RAG à confiance égale lors de la dédup Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -35,8 +35,8 @@ def is_valid_diagnostic_text(text: str) -> bool:
|
|||||||
if digits >= len(t) * 0.5:
|
if digits >= len(t) * 0.5:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# 3. Lettre + chiffres OCR : "H 51", "À 08", "H\n10"
|
# 3. Lettre + chiffres OCR : "H 51", "À 08", "H\n10", "K 3.6", "B 12,5"
|
||||||
if re.match(r"^[A-ZÀ-Ú]\s*\d{1,3}$", t):
|
if re.match(r"^[A-ZÀ-Ú]\s*\d{1,3}([.,]\d+)?$", t):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# 4. Mots concaténés : "Ventilationventilation"
|
# 4. Mots concaténés : "Ventilationventilation"
|
||||||
|
|||||||
@@ -85,6 +85,11 @@ def _merge_sejour(dossiers: list[DossierMedical]) -> Sejour:
|
|||||||
return merged
|
return merged
|
||||||
|
|
||||||
|
|
||||||
|
def _is_enriched(d: Diagnostic) -> bool:
|
||||||
|
"""Retourne True si le diagnostic a une justification RAG."""
|
||||||
|
return bool(d.justification or d.sources_rag)
|
||||||
|
|
||||||
|
|
||||||
def _dedup_diagnostics(all_das: list[Diagnostic]) -> list[Diagnostic]:
|
def _dedup_diagnostics(all_das: list[Diagnostic]) -> list[Diagnostic]:
|
||||||
"""Déduplique les diagnostics associés par code CIM-10, garde la meilleure confiance."""
|
"""Déduplique les diagnostics associés par code CIM-10, garde la meilleure confiance."""
|
||||||
conf_order = {"high": 0, "medium": 1, "low": 2}
|
conf_order = {"high": 0, "medium": 1, "low": 2}
|
||||||
@@ -100,12 +105,30 @@ def _dedup_diagnostics(all_das: list[Diagnostic]) -> list[Diagnostic]:
|
|||||||
seen[key] = d
|
seen[key] = d
|
||||||
else:
|
else:
|
||||||
existing = seen[key]
|
existing = seen[key]
|
||||||
# Garder celui avec la meilleure confiance
|
new_conf = conf_order.get(d.cim10_confidence or "", 3)
|
||||||
if conf_order.get(d.cim10_confidence or "", 3) < conf_order.get(
|
old_conf = conf_order.get(existing.cim10_confidence or "", 3)
|
||||||
existing.cim10_confidence or "", 3
|
# Garder celui avec la meilleure confiance, ou à confiance égale celui enrichi
|
||||||
):
|
if new_conf < old_conf or (new_conf == old_conf and _is_enriched(d) and not _is_enriched(existing)):
|
||||||
seen[key] = d
|
seen[key] = d
|
||||||
|
|
||||||
|
# Supprimer les codes parents quand un code plus spécifique existe
|
||||||
|
# Ex: K85 retiré si K85.9 présent (K85 est préfixe strict de K859)
|
||||||
|
codes = {k for k in seen if k and not k.startswith("__text__")}
|
||||||
|
normalized = {c: c.replace(".", "") for c in codes}
|
||||||
|
parents_to_remove: set[str] = set()
|
||||||
|
for code_a in codes:
|
||||||
|
norm_a = normalized[code_a]
|
||||||
|
for code_b in codes:
|
||||||
|
if code_a == code_b:
|
||||||
|
continue
|
||||||
|
norm_b = normalized[code_b]
|
||||||
|
if norm_b.startswith(norm_a) and len(norm_b) > len(norm_a):
|
||||||
|
parents_to_remove.add(code_a)
|
||||||
|
break
|
||||||
|
|
||||||
|
for parent in parents_to_remove:
|
||||||
|
del seen[parent]
|
||||||
|
|
||||||
return list(seen.values())
|
return list(seen.values())
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -49,6 +49,14 @@ class TestIsValidDiagnosticText:
|
|||||||
# Après clean, "H\n10" devient "H 10"
|
# Après clean, "H\n10" devient "H 10"
|
||||||
assert not is_valid_diagnostic_text("H 10")
|
assert not is_valid_diagnostic_text("H 10")
|
||||||
|
|
||||||
|
def test_reject_letter_decimal(self):
|
||||||
|
"""K 3.6 = kaliémie 3.6, pas un diagnostic."""
|
||||||
|
assert not is_valid_diagnostic_text("K 3.6")
|
||||||
|
|
||||||
|
def test_reject_letter_decimal_comma(self):
|
||||||
|
"""B 12,5 = valeur labo avec virgule, pas un diagnostic."""
|
||||||
|
assert not is_valid_diagnostic_text("B 12,5")
|
||||||
|
|
||||||
def test_reject_concatenated_words(self):
|
def test_reject_concatenated_words(self):
|
||||||
assert not is_valid_diagnostic_text("Ventilationventilation")
|
assert not is_valid_diagnostic_text("Ventilationventilation")
|
||||||
|
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ from src.medical.fusion import (
|
|||||||
_merge_sejour,
|
_merge_sejour,
|
||||||
_dedup_diagnostics,
|
_dedup_diagnostics,
|
||||||
_dedup_actes,
|
_dedup_actes,
|
||||||
|
_is_enriched,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -268,3 +269,88 @@ class TestFullMergeCROTrackare:
|
|||||||
|
|
||||||
# Type prioritaire : trackare
|
# Type prioritaire : trackare
|
||||||
assert result.document_type == "trackare"
|
assert result.document_type == "trackare"
|
||||||
|
|
||||||
|
|
||||||
|
class TestDedupParentCodes:
|
||||||
|
"""Vérifie que les codes CIM-10 parents sont retirés quand un code plus spécifique existe."""
|
||||||
|
|
||||||
|
def test_parent_removed(self):
|
||||||
|
"""K85 + K85.9 → seul K85.9 est gardé."""
|
||||||
|
das = [
|
||||||
|
Diagnostic(texte="Pancréatite", cim10_suggestion="K85", cim10_confidence="high"),
|
||||||
|
Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9", cim10_confidence="medium"),
|
||||||
|
]
|
||||||
|
result = _dedup_diagnostics(das)
|
||||||
|
codes = {d.cim10_suggestion for d in result}
|
||||||
|
assert "K85.9" in codes
|
||||||
|
assert "K85" not in codes
|
||||||
|
assert len(result) == 1
|
||||||
|
|
||||||
|
def test_siblings_kept(self):
|
||||||
|
"""K85.1 + K85.9 → les deux gardés (aucun n'est préfixe de l'autre)."""
|
||||||
|
das = [
|
||||||
|
Diagnostic(texte="Pancréatite biliaire", cim10_suggestion="K85.1"),
|
||||||
|
Diagnostic(texte="Pancréatite SAI", cim10_suggestion="K85.9"),
|
||||||
|
]
|
||||||
|
result = _dedup_diagnostics(das)
|
||||||
|
codes = {d.cim10_suggestion for d in result}
|
||||||
|
assert "K85.1" in codes
|
||||||
|
assert "K85.9" in codes
|
||||||
|
assert len(result) == 2
|
||||||
|
|
||||||
|
def test_parent_removed_in_merge(self):
|
||||||
|
"""Test intégré via merge_dossiers : K85 + K85.9 → K85 retiré des DAS."""
|
||||||
|
d1 = DossierMedical(
|
||||||
|
diagnostic_principal=Diagnostic(texte="HTA", cim10_suggestion="I10"),
|
||||||
|
diagnostics_associes=[
|
||||||
|
Diagnostic(texte="Pancréatite", cim10_suggestion="K85"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
d2 = DossierMedical(
|
||||||
|
diagnostic_principal=Diagnostic(texte="HTA", cim10_suggestion="I10"),
|
||||||
|
diagnostics_associes=[
|
||||||
|
Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
result = merge_dossiers([d1, d2])
|
||||||
|
das_codes = {d.cim10_suggestion for d in result.diagnostics_associes}
|
||||||
|
assert "K85.9" in das_codes
|
||||||
|
assert "K85" not in das_codes
|
||||||
|
|
||||||
|
|
||||||
|
class TestDedupPreferEnriched:
|
||||||
|
"""Vérifie que la dédup préfère le diagnostic enrichi à confiance égale."""
|
||||||
|
|
||||||
|
def test_enriched_preferred(self):
|
||||||
|
"""Même code, même confiance None → celui avec justification gagne."""
|
||||||
|
das = [
|
||||||
|
Diagnostic(texte="Pancréatite", cim10_suggestion="K85.9"),
|
||||||
|
Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9",
|
||||||
|
justification="Confirmé par RAG"),
|
||||||
|
]
|
||||||
|
result = _dedup_diagnostics(das)
|
||||||
|
assert len(result) == 1
|
||||||
|
assert result[0].justification == "Confirmé par RAG"
|
||||||
|
|
||||||
|
def test_enriched_preferred_reverse_order(self):
|
||||||
|
"""L'enrichi en premier, le non-enrichi en second → l'enrichi est gardé."""
|
||||||
|
das = [
|
||||||
|
Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9",
|
||||||
|
justification="Confirmé par RAG"),
|
||||||
|
Diagnostic(texte="Pancréatite", cim10_suggestion="K85.9"),
|
||||||
|
]
|
||||||
|
result = _dedup_diagnostics(das)
|
||||||
|
assert len(result) == 1
|
||||||
|
assert result[0].justification == "Confirmé par RAG"
|
||||||
|
|
||||||
|
def test_higher_confidence_still_wins(self):
|
||||||
|
"""Confiance high > medium, même si medium est enrichi."""
|
||||||
|
das = [
|
||||||
|
Diagnostic(texte="Pancréatite", cim10_suggestion="K85.9",
|
||||||
|
cim10_confidence="medium", justification="RAG"),
|
||||||
|
Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9",
|
||||||
|
cim10_confidence="high"),
|
||||||
|
]
|
||||||
|
result = _dedup_diagnostics(das)
|
||||||
|
assert len(result) == 1
|
||||||
|
assert result[0].cim10_confidence == "high"
|
||||||
|
|||||||
Reference in New Issue
Block a user