Files
t2a/tests/test_fusion.py
dom 01d47f3c4b feat: mode hybride Ollama — gemma3:27b pour CPAM, 12b pour codage
Le pipeline utilise désormais gemma3:12b (rapide) pour le codage CIM-10
et gemma3:27b (meilleur raisonnement) pour la contre-argumentation CPAM.
Configurable via OLLAMA_MODEL_CPAM et OLLAMA_TIMEOUT_CPAM.

Inclut aussi : traçabilité source/page DAS, niveaux CMA ATIH, sévérité,
page tracker PDF, améliorations fusion et filtres DAS.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-17 17:53:53 +01:00

494 lines
19 KiB
Python

"""Tests pour le module de fusion multi-PDFs."""
import pytest
from src.config import (
ActeCCAM,
Diagnostic,
DossierMedical,
Sejour,
Traitement,
BiologieCle,
Imagerie,
)
from src.medical.fusion import (
merge_dossiers,
_cim10_specificity,
_prefer_most_specific_dp,
_merge_sejour,
_dedup_diagnostics,
_dedup_actes,
_is_enriched,
)
from src.medical.das_filter import apply_semantic_dedup
class TestCIM10Specificity:
def test_none(self):
assert _cim10_specificity(None) == 0
def test_short_code(self):
assert _cim10_specificity("I10") == 3
def test_long_code(self):
assert _cim10_specificity("K85.1") == 4
def test_longer_code(self):
assert _cim10_specificity("K80.50") == 5
class TestSpecificityLongerCodeWins:
def test_specificity_longer_code_wins(self):
d1 = DossierMedical(
diagnostic_principal=Diagnostic(texte="Calcul biliaire", cim10_suggestion="K80"),
)
d2 = DossierMedical(
diagnostic_principal=Diagnostic(texte="Calcul cholédoque", cim10_suggestion="K80.5"),
)
dp = _prefer_most_specific_dp([d1, d2])
assert dp is not None
assert dp.cim10_suggestion == "K80.5"
class TestMergeSejourTrackarePriority:
def test_merge_sejour_trackare_priority(self):
d1 = DossierMedical(
document_type="trackare",
sejour=Sejour(sexe="F", age=43, date_entree="25/02/2023"),
)
d2 = DossierMedical(
document_type="crh",
sejour=Sejour(sexe="M", age=45, date_entree="24/02/2023", mode_sortie="domicile"),
)
merged = _merge_sejour([d1, d2])
assert merged.sexe == "F" # Trackare prioritaire
assert merged.age == 43
assert merged.date_entree == "25/02/2023"
assert merged.mode_sortie == "domicile" # Complété depuis CRH
def test_merge_sejour_fills_missing(self):
d1 = DossierMedical(
document_type="trackare",
sejour=Sejour(sexe="F"),
)
d2 = DossierMedical(
document_type="crh",
sejour=Sejour(age=50, poids=75.0),
)
merged = _merge_sejour([d1, d2])
assert merged.sexe == "F"
assert merged.age == 50
assert merged.poids == 75.0
class TestDedupDiagnostics:
def test_dedup_diagnostics_by_code(self):
das = [
Diagnostic(texte="HTA", cim10_suggestion="I10", cim10_confidence="medium"),
Diagnostic(texte="Hypertension", cim10_suggestion="I10", cim10_confidence="high"),
]
result = _dedup_diagnostics(das)
assert len(result) == 1
assert result[0].cim10_confidence == "high"
def test_dedup_keeps_distinct_codes(self):
das = [
Diagnostic(texte="HTA", cim10_suggestion="I10"),
Diagnostic(texte="Diabète", cim10_suggestion="E11.9"),
]
result = _dedup_diagnostics(das)
assert len(result) == 2
class TestDedupActes:
def test_dedup_actes_by_code(self):
actes = [
ActeCCAM(texte="Cholé", code_ccam_suggestion="HMFC004"),
ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004", date="01/03"),
]
result = _dedup_actes(actes)
assert len(result) == 1
assert result[0].date == "01/03" # Celui avec la date est préféré
class TestSingleDossierPassthrough:
def test_single_dossier_passthrough(self):
d = DossierMedical(
source_file="test.pdf",
document_type="crh",
diagnostic_principal=Diagnostic(texte="HTA", cim10_suggestion="I10"),
)
result = merge_dossiers([d])
assert result.diagnostic_principal.cim10_suggestion == "I10"
assert result.source_files == ["test.pdf"]
class TestDpNonRetainedBecomesDas:
def test_dp_non_retained_becomes_das(self):
d1 = DossierMedical(
diagnostic_principal=Diagnostic(texte="HTA", cim10_suggestion="I10"),
)
d2 = DossierMedical(
diagnostic_principal=Diagnostic(texte="Calcul cholédoque", cim10_suggestion="K80.5"),
)
result = merge_dossiers([d1, d2])
# K80.5 est plus spécifique, donc DP
assert result.diagnostic_principal.cim10_suggestion == "K80.5"
# I10 (ancien DP de d1) doit être dans les DAS
das_codes = {d.cim10_suggestion for d in result.diagnostics_associes}
assert "I10" in das_codes
class TestFusionAlertAdded:
def test_fusion_alert_added(self):
d1 = DossierMedical(source_file="a.pdf", alertes_codage=["Alerte 1"])
d2 = DossierMedical(source_file="b.pdf", alertes_codage=["Alerte 2"])
result = merge_dossiers([d1, d2])
assert result.alertes_codage[0] == "FUSION: 2 documents fusionnés"
assert "Alerte 1" in result.alertes_codage
assert "Alerte 2" in result.alertes_codage
class TestSourceFilesPopulated:
def test_source_files_populated(self):
d1 = DossierMedical(source_file="a.pdf")
d2 = DossierMedical(source_file="b.pdf")
result = merge_dossiers([d1, d2])
assert result.source_files == ["a.pdf", "b.pdf"]
class TestDasEqualDpRemoved:
"""Vérifie que les DAS dont le code est identique au DP sont retirés après fusion."""
def test_das_same_code_as_dp_removed(self):
d1 = DossierMedical(
diagnostic_principal=Diagnostic(texte="HTA", cim10_suggestion="I10"),
diagnostics_associes=[
Diagnostic(texte="Hypertension artérielle", cim10_suggestion="I10"),
Diagnostic(texte="Diabète", cim10_suggestion="E11.9"),
],
)
d2 = DossierMedical(
diagnostic_principal=Diagnostic(texte="HTA essentielle", cim10_suggestion="I10"),
)
result = merge_dossiers([d1, d2])
das_codes = [d.cim10_suggestion for d in result.diagnostics_associes]
assert "I10" not in das_codes, "DAS=DP doit être retiré"
assert "E11.9" in das_codes
def test_das_different_code_kept(self):
d1 = DossierMedical(
diagnostic_principal=Diagnostic(texte="Cholécystite", cim10_suggestion="K81.0"),
diagnostics_associes=[
Diagnostic(texte="HTA", cim10_suggestion="I10"),
],
)
result = merge_dossiers([d1])
das_codes = [d.cim10_suggestion for d in result.diagnostics_associes]
assert "I10" in das_codes
class TestFullMergeCROTrackare:
def test_full_merge_cro_trackare(self):
"""Cas réel : fusion Trackare + CRO."""
trackare = DossierMedical(
source_file="trackare.pdf",
document_type="trackare",
sejour=Sejour(sexe="F", age=43, date_entree="25/02/2023", date_sortie="03/03/2023"),
diagnostic_principal=Diagnostic(
texte="Calcul des canaux biliaires",
cim10_suggestion="K80.5",
),
diagnostics_associes=[
Diagnostic(texte="HTA", cim10_suggestion="I10"),
],
actes_ccam=[
ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004", date="01/03"),
],
traitements_sortie=[
Traitement(medicament="Paracétamol"),
],
alertes_codage=["Alerte trackare"],
)
cro = DossierMedical(
source_file="cro.pdf",
document_type="cro",
sejour=Sejour(sexe="F"),
diagnostic_principal=Diagnostic(
texte="Pancréatite aiguë lithiasique",
cim10_suggestion="K85.1",
cim10_confidence="high",
),
diagnostics_associes=[
Diagnostic(texte="Obésité", cim10_suggestion="E66.0"),
Diagnostic(texte="HTA", cim10_suggestion="I10"), # doublon
],
actes_ccam=[
ActeCCAM(texte="TDM", code_ccam_suggestion="ZCQK002"),
ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004"), # doublon
],
traitements_sortie=[
Traitement(medicament="Paracétamol"), # doublon
Traitement(medicament="Cétirizine"),
],
alertes_codage=["Alerte CRO"],
)
result = merge_dossiers([trackare, cro])
# DP : K85.1 est plus spécifique que K80.5
assert result.diagnostic_principal.cim10_suggestion == "K85.1"
# K80.5 (ancien DP trackare) doit être dans les DAS
das_codes = {d.cim10_suggestion for d in result.diagnostics_associes}
assert "K80.5" in das_codes
assert "I10" in das_codes
assert "E66.0" in das_codes
# DAS dédupliqués : I10 ne doit pas être en double
i10_count = sum(1 for d in result.diagnostics_associes if d.cim10_suggestion == "I10")
assert i10_count == 1
# Actes dédupliqués
acte_codes = [a.code_ccam_suggestion for a in result.actes_ccam]
assert acte_codes.count("HMFC004") == 1
assert "ZCQK002" in acte_codes
# Traitements dédupliqués
meds = [t.medicament for t in result.traitements_sortie]
assert meds.count("Paracétamol") == 1
assert "Cétirizine" in meds
# Source files
assert result.source_files == ["trackare.pdf", "cro.pdf"]
# Alertes
assert result.alertes_codage[0].startswith("FUSION:")
assert "Alerte trackare" in result.alertes_codage
assert "Alerte CRO" in result.alertes_codage
# Type prioritaire : trackare
assert result.document_type == "trackare"
class TestDedupParentCodes:
"""Vérifie que les codes CIM-10 parents sont retirés quand un code plus spécifique existe."""
def test_parent_removed(self):
"""K85 + K85.9 → seul K85.9 est gardé."""
das = [
Diagnostic(texte="Pancréatite", cim10_suggestion="K85", cim10_confidence="high"),
Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9", cim10_confidence="medium"),
]
result = _dedup_diagnostics(das)
codes = {d.cim10_suggestion for d in result}
assert "K85.9" in codes
assert "K85" not in codes
assert len(result) == 1
def test_siblings_kept(self):
"""K85.1 + K85.9 → les deux gardés (aucun n'est préfixe de l'autre)."""
das = [
Diagnostic(texte="Pancréatite biliaire", cim10_suggestion="K85.1"),
Diagnostic(texte="Pancréatite SAI", cim10_suggestion="K85.9"),
]
result = _dedup_diagnostics(das)
codes = {d.cim10_suggestion for d in result}
assert "K85.1" in codes
assert "K85.9" in codes
assert len(result) == 2
def test_parent_removed_in_merge(self):
"""Test intégré via merge_dossiers : K85 + K85.9 → K85 retiré des DAS."""
d1 = DossierMedical(
diagnostic_principal=Diagnostic(texte="HTA", cim10_suggestion="I10"),
diagnostics_associes=[
Diagnostic(texte="Pancréatite", cim10_suggestion="K85"),
],
)
d2 = DossierMedical(
diagnostic_principal=Diagnostic(texte="HTA", cim10_suggestion="I10"),
diagnostics_associes=[
Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9"),
],
)
result = merge_dossiers([d1, d2])
das_codes = {d.cim10_suggestion for d in result.diagnostics_associes}
assert "K85.9" in das_codes
assert "K85" not in das_codes
class TestDedupPreferEnriched:
"""Vérifie que la dédup préfère le diagnostic enrichi à confiance égale."""
def test_enriched_preferred(self):
"""Même code, même confiance None → celui avec justification gagne."""
das = [
Diagnostic(texte="Pancréatite", cim10_suggestion="K85.9"),
Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9",
justification="Confirmé par RAG"),
]
result = _dedup_diagnostics(das)
assert len(result) == 1
assert result[0].justification == "Confirmé par RAG"
def test_enriched_preferred_reverse_order(self):
"""L'enrichi en premier, le non-enrichi en second → l'enrichi est gardé."""
das = [
Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9",
justification="Confirmé par RAG"),
Diagnostic(texte="Pancréatite", cim10_suggestion="K85.9"),
]
result = _dedup_diagnostics(das)
assert len(result) == 1
assert result[0].justification == "Confirmé par RAG"
def test_higher_confidence_still_wins(self):
"""Confiance high > medium, même si medium est enrichi."""
das = [
Diagnostic(texte="Pancréatite", cim10_suggestion="K85.9",
cim10_confidence="medium", justification="RAG"),
Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9",
cim10_confidence="high"),
]
result = _dedup_diagnostics(das)
assert len(result) == 1
assert result[0].cim10_confidence == "high"
class TestDasFamilyDpRemoved:
"""Vérifie la dédup DAS vs DP par famille CIM-10 (3 premiers caractères)."""
def test_same_family_removed(self):
"""DP=K85.1, DAS=[K85.0, K85.9, E66.0] → seul E66.0 reste."""
d1 = DossierMedical(
diagnostic_principal=Diagnostic(texte="Pancréatite biliaire", cim10_suggestion="K85.1"),
diagnostics_associes=[
Diagnostic(texte="Pancréatite SAI", cim10_suggestion="K85.0"),
Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9"),
Diagnostic(texte="Obésité", cim10_suggestion="E66.0"),
],
)
result = merge_dossiers([d1])
das_codes = {d.cim10_suggestion for d in result.diagnostics_associes}
assert "K85.0" not in das_codes
assert "K85.9" not in das_codes
assert "E66.0" in das_codes
def test_trauma_siblings_kept(self):
"""S/T : sites anatomiques différents → tous gardés."""
d1 = DossierMedical(
diagnostic_principal=Diagnostic(texte="Fracture col fémoral", cim10_suggestion="S72.1"),
diagnostics_associes=[
Diagnostic(texte="Fracture trochanter", cim10_suggestion="S72.0"),
Diagnostic(texte="Fracture sous-troch", cim10_suggestion="S72.3"),
],
)
result = merge_dossiers([d1])
das_codes = {d.cim10_suggestion for d in result.diagnostics_associes}
assert "S72.0" in das_codes
assert "S72.3" in das_codes
def test_diabetes_complications_kept(self):
"""E10-E14 : complications distinctes → tous gardés."""
d1 = DossierMedical(
diagnostic_principal=Diagnostic(texte="Diabète avec complications oculaires", cim10_suggestion="E11.6"),
diagnostics_associes=[
Diagnostic(texte="Diabète avec complications rénales", cim10_suggestion="E11.2"),
Diagnostic(texte="HTA essentielle", cim10_suggestion="I10"),
],
)
result = merge_dossiers([d1])
das_codes = {d.cim10_suggestion for d in result.diagnostics_associes}
assert "E11.2" in das_codes
assert "I10" in das_codes
def test_parent_child_removed(self):
"""DP=K85.1, DAS=[K85] → K85 (parent) retiré."""
d1 = DossierMedical(
diagnostic_principal=Diagnostic(texte="Pancréatite biliaire", cim10_suggestion="K85.1"),
diagnostics_associes=[
Diagnostic(texte="Pancréatite", cim10_suggestion="K85"),
],
)
result = merge_dossiers([d1])
das_codes = {d.cim10_suggestion for d in result.diagnostics_associes}
assert len(das_codes) == 0
def test_ocr_dp_not_promoted(self):
"""Fusion avec DP artefact OCR 'À 09' → pas promu en DAS."""
d1 = DossierMedical(
diagnostic_principal=Diagnostic(texte="Pancréatite biliaire", cim10_suggestion="K85.1"),
)
d2 = DossierMedical(
diagnostic_principal=Diagnostic(texte="À 09", cim10_suggestion="A41.9"),
)
result = merge_dossiers([d1, d2])
das_codes = {d.cim10_suggestion for d in result.diagnostics_associes}
assert "A41.9" not in das_codes
class TestSemanticDedup:
"""Vérifie les redondances sémantiques entre DAS."""
def test_i10_removed_when_i11_present(self):
"""I10 (HTA essentielle) retiré si I11.9 (cardiopathie hypertensive) présent."""
das = [
Diagnostic(texte="HTA essentielle", cim10_suggestion="I10"),
Diagnostic(texte="Cardiopathie hypertensive", cim10_suggestion="I11.9"),
Diagnostic(texte="Obésité", cim10_suggestion="E66.0"),
]
result = apply_semantic_dedup(das)
codes = {d.cim10_suggestion for d in result}
assert "I10" not in codes
assert "I11.9" in codes
assert "E66.0" in codes
def test_n30_removed_when_n39_present(self):
"""N30.9 (cystite) retiré si N39.0 (infection urinaire) présent."""
das = [
Diagnostic(texte="Infection urinaire", cim10_suggestion="N39.0"),
Diagnostic(texte="Cystite SAI", cim10_suggestion="N30.9"),
]
result = apply_semantic_dedup(das)
codes = {d.cim10_suggestion for d in result}
assert "N39.0" in codes
assert "N30.9" not in codes
def test_j18_removed_when_j15_present(self):
"""J18.9 (pneumonie SAI) retiré si J15.1 (pneumonie spécifique) présent."""
das = [
Diagnostic(texte="Pneumonie SAI", cim10_suggestion="J18.9"),
Diagnostic(texte="Pneumonie à Klebsiella", cim10_suggestion="J15.1"),
]
result = apply_semantic_dedup(das)
codes = {d.cim10_suggestion for d in result}
assert "J15.1" in codes
assert "J18.9" not in codes
def test_no_removal_without_dominant(self):
"""I10 conservé si aucun code dominant I11/I12/I13."""
das = [
Diagnostic(texte="HTA essentielle", cim10_suggestion="I10"),
Diagnostic(texte="Obésité", cim10_suggestion="E66.0"),
]
result = apply_semantic_dedup(das)
codes = {d.cim10_suggestion for d in result}
assert "I10" in codes
assert "E66.0" in codes
def test_semantic_dedup_in_merge(self):
"""Vérifie que la dédup sémantique est appliquée lors de la fusion."""
d1 = DossierMedical(
diagnostic_principal=Diagnostic(texte="Sepsis", cim10_suggestion="A41.9"),
diagnostics_associes=[
Diagnostic(texte="HTA essentielle", cim10_suggestion="I10"),
Diagnostic(texte="Cardiopathie hypertensive", cim10_suggestion="I11.9"),
],
)
result = merge_dossiers([d1])
das_codes = {d.cim10_suggestion for d in result.diagnostics_associes}
assert "I10" not in das_codes
assert "I11.9" in das_codes