"""Tests pour le module de fusion multi-PDFs.""" import pytest from src.config import ( ActeCCAM, Diagnostic, DossierMedical, DPCandidate, DPSelection, Sejour, Traitement, BiologieCle, Imagerie, ) from src.medical.fusion import ( merge_dossiers, _cim10_specificity, _prefer_most_specific_dp, _merge_sejour, _dedup_diagnostics, _dedup_actes, _is_enriched, ) from src.medical.das_filter import apply_semantic_dedup class TestCIM10Specificity: def test_none(self): assert _cim10_specificity(None) == 0 def test_short_code(self): assert _cim10_specificity("I10") == 3 def test_long_code(self): assert _cim10_specificity("K85.1") == 4 def test_longer_code(self): assert _cim10_specificity("K80.50") == 5 class TestSpecificityLongerCodeWins: def test_specificity_longer_code_wins(self): d1 = DossierMedical( diagnostic_principal=Diagnostic(texte="Calcul biliaire", cim10_suggestion="K80"), ) d2 = DossierMedical( diagnostic_principal=Diagnostic(texte="Calcul cholédoque", cim10_suggestion="K80.5"), ) dp = _prefer_most_specific_dp([d1, d2]) assert dp is not None assert dp.cim10_suggestion == "K80.5" class TestMergeSejourTrackarePriority: def test_merge_sejour_trackare_priority(self): d1 = DossierMedical( document_type="trackare", sejour=Sejour(sexe="F", age=43, date_entree="25/02/2023"), ) d2 = DossierMedical( document_type="crh", sejour=Sejour(sexe="M", age=45, date_entree="24/02/2023", mode_sortie="domicile"), ) merged = _merge_sejour([d1, d2]) assert merged.sexe == "F" # Trackare prioritaire assert merged.age == 43 assert merged.date_entree == "25/02/2023" assert merged.mode_sortie == "domicile" # Complété depuis CRH def test_merge_sejour_fills_missing(self): d1 = DossierMedical( document_type="trackare", sejour=Sejour(sexe="F"), ) d2 = DossierMedical( document_type="crh", sejour=Sejour(age=50, poids=75.0), ) merged = _merge_sejour([d1, d2]) assert merged.sexe == "F" assert merged.age == 50 assert merged.poids == 75.0 class TestDedupDiagnostics: def test_dedup_diagnostics_by_code(self): das = [ Diagnostic(texte="HTA", cim10_suggestion="I10", cim10_confidence="medium"), Diagnostic(texte="Hypertension", cim10_suggestion="I10", cim10_confidence="high"), ] result = _dedup_diagnostics(das) assert len(result) == 1 assert result[0].cim10_confidence == "high" def test_dedup_keeps_distinct_codes(self): das = [ Diagnostic(texte="HTA", cim10_suggestion="I10"), Diagnostic(texte="Diabète", cim10_suggestion="E11.9"), ] result = _dedup_diagnostics(das) assert len(result) == 2 class TestDedupActes: def test_dedup_actes_by_code(self): actes = [ ActeCCAM(texte="Cholé", code_ccam_suggestion="HMFC004"), ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004", date="01/03"), ] result = _dedup_actes(actes) assert len(result) == 1 assert result[0].date == "01/03" # Celui avec la date est préféré class TestSingleDossierPassthrough: def test_single_dossier_passthrough(self): d = DossierMedical( source_file="test.pdf", document_type="crh", diagnostic_principal=Diagnostic(texte="HTA", cim10_suggestion="I10"), ) result = merge_dossiers([d]) assert result.diagnostic_principal.cim10_suggestion == "I10" assert result.source_files == ["test.pdf"] class TestDpNonRetainedBecomesDas: def test_dp_non_retained_becomes_das(self): d1 = DossierMedical( diagnostic_principal=Diagnostic(texte="HTA", cim10_suggestion="I10"), ) d2 = DossierMedical( diagnostic_principal=Diagnostic(texte="Calcul cholédoque", cim10_suggestion="K80.5"), ) result = merge_dossiers([d1, d2]) # K80.5 est plus spécifique, donc DP assert result.diagnostic_principal.cim10_suggestion == "K80.5" # I10 (ancien DP de d1) doit être dans les DAS das_codes = {d.cim10_suggestion for d in result.diagnostics_associes} assert "I10" in das_codes class TestFusionAlertAdded: def test_fusion_alert_added(self): d1 = DossierMedical(source_file="a.pdf", alertes_codage=["Alerte 1"]) d2 = DossierMedical(source_file="b.pdf", alertes_codage=["Alerte 2"]) result = merge_dossiers([d1, d2]) assert result.alertes_codage[0] == "FUSION: 2 documents fusionnés" assert "Alerte 1" in result.alertes_codage assert "Alerte 2" in result.alertes_codage class TestSourceFilesPopulated: def test_source_files_populated(self): d1 = DossierMedical(source_file="a.pdf") d2 = DossierMedical(source_file="b.pdf") result = merge_dossiers([d1, d2]) assert result.source_files == ["a.pdf", "b.pdf"] class TestDasEqualDpRemoved: """Vérifie que les DAS dont le code est identique au DP sont retirés après fusion.""" def test_das_same_code_as_dp_removed(self): d1 = DossierMedical( diagnostic_principal=Diagnostic(texte="HTA", cim10_suggestion="I10"), diagnostics_associes=[ Diagnostic(texte="Hypertension artérielle", cim10_suggestion="I10"), Diagnostic(texte="Diabète", cim10_suggestion="E11.9"), ], ) d2 = DossierMedical( diagnostic_principal=Diagnostic(texte="HTA essentielle", cim10_suggestion="I10"), ) result = merge_dossiers([d1, d2]) das_codes = [d.cim10_suggestion for d in result.diagnostics_associes] assert "I10" not in das_codes, "DAS=DP doit être retiré" assert "E11.9" in das_codes def test_das_different_code_kept(self): d1 = DossierMedical( diagnostic_principal=Diagnostic(texte="Cholécystite", cim10_suggestion="K81.0"), diagnostics_associes=[ Diagnostic(texte="HTA", cim10_suggestion="I10"), ], ) result = merge_dossiers([d1]) das_codes = [d.cim10_suggestion for d in result.diagnostics_associes] assert "I10" in das_codes class TestFullMergeCROTrackare: def test_full_merge_cro_trackare(self): """Cas réel : fusion Trackare + CRO.""" trackare = DossierMedical( source_file="trackare.pdf", document_type="trackare", sejour=Sejour(sexe="F", age=43, date_entree="25/02/2023", date_sortie="03/03/2023"), diagnostic_principal=Diagnostic( texte="Calcul des canaux biliaires", cim10_suggestion="K80.5", ), diagnostics_associes=[ Diagnostic(texte="HTA", cim10_suggestion="I10"), ], actes_ccam=[ ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004", date="01/03"), ], traitements_sortie=[ Traitement(medicament="Paracétamol"), ], alertes_codage=["Alerte trackare"], ) cro = DossierMedical( source_file="cro.pdf", document_type="cro", sejour=Sejour(sexe="F"), diagnostic_principal=Diagnostic( texte="Pancréatite aiguë lithiasique", cim10_suggestion="K85.1", cim10_confidence="high", ), diagnostics_associes=[ Diagnostic(texte="Obésité", cim10_suggestion="E66.0"), Diagnostic(texte="HTA", cim10_suggestion="I10"), # doublon ], actes_ccam=[ ActeCCAM(texte="TDM", code_ccam_suggestion="ZCQK002"), ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004"), # doublon ], traitements_sortie=[ Traitement(medicament="Paracétamol"), # doublon Traitement(medicament="Cétirizine"), ], alertes_codage=["Alerte CRO"], ) result = merge_dossiers([trackare, cro]) # DP : K85.1 est plus spécifique que K80.5 assert result.diagnostic_principal.cim10_suggestion == "K85.1" # K80.5 (ancien DP trackare) doit être dans les DAS das_codes = {d.cim10_suggestion for d in result.diagnostics_associes} assert "K80.5" in das_codes assert "I10" in das_codes assert "E66.0" in das_codes # DAS dédupliqués : I10 ne doit pas être en double i10_count = sum(1 for d in result.diagnostics_associes if d.cim10_suggestion == "I10") assert i10_count == 1 # Actes dédupliqués acte_codes = [a.code_ccam_suggestion for a in result.actes_ccam] assert acte_codes.count("HMFC004") == 1 assert "ZCQK002" in acte_codes # Traitements dédupliqués meds = [t.medicament for t in result.traitements_sortie] assert meds.count("Paracétamol") == 1 assert "Cétirizine" in meds # Source files assert result.source_files == ["trackare.pdf", "cro.pdf"] # Alertes assert result.alertes_codage[0].startswith("FUSION:") assert "Alerte trackare" in result.alertes_codage assert "Alerte CRO" in result.alertes_codage # Type prioritaire : trackare assert result.document_type == "trackare" class TestDedupParentCodes: """Vérifie que les codes CIM-10 parents sont retirés quand un code plus spécifique existe.""" def test_parent_removed(self): """K85 + K85.9 → seul K85.9 est gardé.""" das = [ Diagnostic(texte="Pancréatite", cim10_suggestion="K85", cim10_confidence="high"), Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9", cim10_confidence="medium"), ] result = _dedup_diagnostics(das) codes = {d.cim10_suggestion for d in result} assert "K85.9" in codes assert "K85" not in codes assert len(result) == 1 def test_siblings_kept(self): """K85.1 + K85.9 → les deux gardés (aucun n'est préfixe de l'autre).""" das = [ Diagnostic(texte="Pancréatite biliaire", cim10_suggestion="K85.1"), Diagnostic(texte="Pancréatite SAI", cim10_suggestion="K85.9"), ] result = _dedup_diagnostics(das) codes = {d.cim10_suggestion for d in result} assert "K85.1" in codes assert "K85.9" in codes assert len(result) == 2 def test_parent_removed_in_merge(self): """Test intégré via merge_dossiers : K85 + K85.9 → K85 retiré des DAS.""" d1 = DossierMedical( diagnostic_principal=Diagnostic(texte="HTA", cim10_suggestion="I10"), diagnostics_associes=[ Diagnostic(texte="Pancréatite", cim10_suggestion="K85"), ], ) d2 = DossierMedical( diagnostic_principal=Diagnostic(texte="HTA", cim10_suggestion="I10"), diagnostics_associes=[ Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9"), ], ) result = merge_dossiers([d1, d2]) das_codes = {d.cim10_suggestion for d in result.diagnostics_associes} assert "K85.9" in das_codes assert "K85" not in das_codes class TestDedupPreferEnriched: """Vérifie que la dédup préfère le diagnostic enrichi à confiance égale.""" def test_enriched_preferred(self): """Même code, même confiance None → celui avec justification gagne.""" das = [ Diagnostic(texte="Pancréatite", cim10_suggestion="K85.9"), Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9", justification="Confirmé par RAG"), ] result = _dedup_diagnostics(das) assert len(result) == 1 assert result[0].justification == "Confirmé par RAG" def test_enriched_preferred_reverse_order(self): """L'enrichi en premier, le non-enrichi en second → l'enrichi est gardé.""" das = [ Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9", justification="Confirmé par RAG"), Diagnostic(texte="Pancréatite", cim10_suggestion="K85.9"), ] result = _dedup_diagnostics(das) assert len(result) == 1 assert result[0].justification == "Confirmé par RAG" def test_higher_confidence_still_wins(self): """Confiance high > medium, même si medium est enrichi.""" das = [ Diagnostic(texte="Pancréatite", cim10_suggestion="K85.9", cim10_confidence="medium", justification="RAG"), Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9", cim10_confidence="high"), ] result = _dedup_diagnostics(das) assert len(result) == 1 assert result[0].cim10_confidence == "high" class TestDasFamilyDpRemoved: """Vérifie la dédup DAS vs DP par famille CIM-10 (3 premiers caractères).""" def test_same_family_removed(self): """DP=K85.1, DAS=[K85.0, K85.9, E66.0] → seul E66.0 reste.""" d1 = DossierMedical( diagnostic_principal=Diagnostic(texte="Pancréatite biliaire", cim10_suggestion="K85.1"), diagnostics_associes=[ Diagnostic(texte="Pancréatite SAI", cim10_suggestion="K85.0"), Diagnostic(texte="Pancréatite aiguë", cim10_suggestion="K85.9"), Diagnostic(texte="Obésité", cim10_suggestion="E66.0"), ], ) result = merge_dossiers([d1]) das_codes = {d.cim10_suggestion for d in result.diagnostics_associes} assert "K85.0" not in das_codes assert "K85.9" not in das_codes assert "E66.0" in das_codes def test_trauma_siblings_kept(self): """S/T : sites anatomiques différents → tous gardés.""" d1 = DossierMedical( diagnostic_principal=Diagnostic(texte="Fracture col fémoral", cim10_suggestion="S72.1"), diagnostics_associes=[ Diagnostic(texte="Fracture trochanter", cim10_suggestion="S72.0"), Diagnostic(texte="Fracture sous-troch", cim10_suggestion="S72.3"), ], ) result = merge_dossiers([d1]) das_codes = {d.cim10_suggestion for d in result.diagnostics_associes} assert "S72.0" in das_codes assert "S72.3" in das_codes def test_diabetes_complications_kept(self): """E10-E14 : complications distinctes → tous gardés.""" d1 = DossierMedical( diagnostic_principal=Diagnostic(texte="Diabète avec complications oculaires", cim10_suggestion="E11.6"), diagnostics_associes=[ Diagnostic(texte="Diabète avec complications rénales", cim10_suggestion="E11.2"), Diagnostic(texte="HTA essentielle", cim10_suggestion="I10"), ], ) result = merge_dossiers([d1]) das_codes = {d.cim10_suggestion for d in result.diagnostics_associes} assert "E11.2" in das_codes assert "I10" in das_codes def test_parent_child_removed(self): """DP=K85.1, DAS=[K85] → K85 (parent) retiré.""" d1 = DossierMedical( diagnostic_principal=Diagnostic(texte="Pancréatite biliaire", cim10_suggestion="K85.1"), diagnostics_associes=[ Diagnostic(texte="Pancréatite", cim10_suggestion="K85"), ], ) result = merge_dossiers([d1]) das_codes = {d.cim10_suggestion for d in result.diagnostics_associes} assert len(das_codes) == 0 def test_ocr_dp_not_promoted(self): """Fusion avec DP artefact OCR 'À 09' → pas promu en DAS.""" d1 = DossierMedical( diagnostic_principal=Diagnostic(texte="Pancréatite biliaire", cim10_suggestion="K85.1"), ) d2 = DossierMedical( diagnostic_principal=Diagnostic(texte="À 09", cim10_suggestion="A41.9"), ) result = merge_dossiers([d1, d2]) das_codes = {d.cim10_suggestion for d in result.diagnostics_associes} assert "A41.9" not in das_codes class TestSemanticDedup: """Vérifie les redondances sémantiques entre DAS.""" def test_i10_removed_when_i11_present(self): """I10 (HTA essentielle) retiré si I11.9 (cardiopathie hypertensive) présent.""" das = [ Diagnostic(texte="HTA essentielle", cim10_suggestion="I10"), Diagnostic(texte="Cardiopathie hypertensive", cim10_suggestion="I11.9"), Diagnostic(texte="Obésité", cim10_suggestion="E66.0"), ] result = apply_semantic_dedup(das) codes = {d.cim10_suggestion for d in result} assert "I10" not in codes assert "I11.9" in codes assert "E66.0" in codes def test_n30_removed_when_n39_present(self): """N30.9 (cystite) retiré si N39.0 (infection urinaire) présent.""" das = [ Diagnostic(texte="Infection urinaire", cim10_suggestion="N39.0"), Diagnostic(texte="Cystite SAI", cim10_suggestion="N30.9"), ] result = apply_semantic_dedup(das) codes = {d.cim10_suggestion for d in result} assert "N39.0" in codes assert "N30.9" not in codes def test_j18_removed_when_j15_present(self): """J18.9 (pneumonie SAI) retiré si J15.1 (pneumonie spécifique) présent.""" das = [ Diagnostic(texte="Pneumonie SAI", cim10_suggestion="J18.9"), Diagnostic(texte="Pneumonie à Klebsiella", cim10_suggestion="J15.1"), ] result = apply_semantic_dedup(das) codes = {d.cim10_suggestion for d in result} assert "J15.1" in codes assert "J18.9" not in codes def test_no_removal_without_dominant(self): """I10 conservé si aucun code dominant I11/I12/I13.""" das = [ Diagnostic(texte="HTA essentielle", cim10_suggestion="I10"), Diagnostic(texte="Obésité", cim10_suggestion="E66.0"), ] result = apply_semantic_dedup(das) codes = {d.cim10_suggestion for d in result} assert "I10" in codes assert "E66.0" in codes def test_semantic_dedup_in_merge(self): """Vérifie que la dédup sémantique est appliquée lors de la fusion.""" d1 = DossierMedical( diagnostic_principal=Diagnostic(texte="Sepsis", cim10_suggestion="A41.9"), diagnostics_associes=[ Diagnostic(texte="HTA essentielle", cim10_suggestion="I10"), Diagnostic(texte="Cardiopathie hypertensive", cim10_suggestion="I11.9"), ], ) result = merge_dossiers([d1]) das_codes = {d.cim10_suggestion for d in result.diagnostics_associes} assert "I10" not in das_codes assert "I11.9" in das_codes class TestDPSelectionPropagation: """Vérifie que dp_selection est propagée depuis le dossier source du DP retenu.""" def test_dp_selection_propagated_multi_dossier(self): """Fusion 2 dossiers : Trackare prioritaire à spécificité égale. K85.1 et K80.2 ont la même spécificité (4 chars), donc le bonus Trackare l'emporte. dp_selection est recréée pour le DP retenu. """ sel = DPSelection( chosen_index=0, chosen_term="Pancréatite aiguë biliaire", chosen_code="K85.1", verdict="CONFIRMED", confidence="high", evidence=["Score 8.0 — source: regex (section forte)"], reason="Écart net", candidates=[DPCandidate(index=0, term="Pancréatite", code="K85.1", section_strength=3, confidence="high")], ) d1 = DossierMedical( document_type="crh", diagnostic_principal=Diagnostic(texte="Pancréatite aiguë biliaire", cim10_suggestion="K85.1"), dp_selection=sel, ) d2 = DossierMedical( document_type="trackare", diagnostic_principal=Diagnostic(texte="Lithiase vésiculaire", cim10_suggestion="K80.2"), ) result = merge_dossiers([d1, d2]) # Trackare gagne à spécificité égale assert result.diagnostic_principal.cim10_suggestion == "K80.2" assert result.dp_selection is not None assert result.dp_selection.chosen_code == "K80.2" def test_dp_selection_synthetic_when_no_source(self): """Si aucun dossier n'a de dp_selection, la fusion en crée une synthétique.""" d1 = DossierMedical( diagnostic_principal=Diagnostic(texte="HTA", cim10_suggestion="I10"), ) d2 = DossierMedical( diagnostic_principal=Diagnostic(texte="HTA", cim10_suggestion="I10"), ) result = merge_dossiers([d1, d2]) assert result.dp_selection is not None assert result.dp_selection.chosen_code == "I10" assert result.dp_selection.reason == "DP fusion (synthétique)" def test_dp_selection_single_dossier(self): """Dossier unique : dp_selection est conservée via model_copy.""" sel = DPSelection( chosen_index=0, chosen_term="Pneumopathie", chosen_code="J18.9", verdict="REVIEW", confidence="medium", ) d1 = DossierMedical( diagnostic_principal=Diagnostic(texte="Pneumopathie", cim10_suggestion="J18.9"), dp_selection=sel, ) result = merge_dossiers([d1]) assert result.dp_selection is not None assert result.dp_selection.verdict == "REVIEW" def test_dp_selection_preserves_evidence_reason_verdict(self): """Fusion multi-docs : dp_selection du CRH préservée quand CRH est plus spécifique. I26.99 (5 chars) > I80.2 (4 chars) → CRH gagne par spécificité malgré le bonus Trackare. La dp_selection originale est alors propagée intacte. """ sel = DPSelection( chosen_index=0, chosen_term="Embolie pulmonaire", chosen_code="I26.99", verdict="CONFIRMED", confidence="high", evidence=[ "Score 9.0 — source: edsnlp", "Diagnostic de sortie: «EP massive bilatérale»", "Delta +5.0 vs Thrombose (I80.2)", ], reason="Écart score 5.0 >= seuil 3.0", candidates=[ DPCandidate(index=0, term="Embolie pulmonaire", code="I26.99", section_strength=2, confidence="high", score=9.0, score_details={"section": 2, "confidence": 3, "diag_section_bonus": 4}), DPCandidate(index=1, term="Thrombose veineuse", code="I80.2", section_strength=1, confidence="high", score=4.0), ], debug_scores={"top1": 9.0, "top2": 4.0, "delta": 5.0}, ) d1 = DossierMedical( document_type="crh", diagnostic_principal=Diagnostic(texte="Embolie pulmonaire", cim10_suggestion="I26.99"), dp_selection=sel, ) d2 = DossierMedical( document_type="trackare", diagnostic_principal=Diagnostic(texte="TVP", cim10_suggestion="I80.2"), ) result = merge_dossiers([d1, d2]) # CRH gagne par spécificité (I26.99 > I80.2) assert result.diagnostic_principal.cim10_suggestion == "I26.99" assert result.dp_selection is not None rs = result.dp_selection # Verdict/confidence/reason intacts assert rs.verdict == "CONFIRMED" assert rs.confidence == "high" assert "5.0" in rs.reason # Evidence complète (3 éléments) assert len(rs.evidence) == 3 assert any("Diagnostic de sortie" in e for e in rs.evidence) assert any("Delta" in e for e in rs.evidence) # Candidates préservés avec score_details assert len(rs.candidates) == 2 assert rs.candidates[0].score_details.get("diag_section_bonus") == 4 # Debug scores assert rs.debug_scores["delta"] == 5.0 def test_dp_selection_from_second_dossier(self): """Si le DP retenu vient du 2e dossier, sa dp_selection est prise.""" sel_d2 = DPSelection( chosen_index=0, chosen_term="Sepsis", chosen_code="A41.9", verdict="CONFIRMED", confidence="high", evidence=["Score 7.0"], reason="Candidat unique", ) d1 = DossierMedical( document_type="trackare", diagnostic_principal=Diagnostic(texte="HTA", cim10_suggestion="I10"), # Pas de dp_selection ) d2 = DossierMedical( document_type="crh", diagnostic_principal=Diagnostic(texte="Sepsis à staphylocoque", cim10_suggestion="A41.9"), dp_selection=sel_d2, ) result = merge_dossiers([d1, d2]) # A41.9 (5 chars) > I10 (3 chars) → DP = A41.9 venant de d2 assert result.diagnostic_principal.cim10_suggestion == "A41.9" assert result.dp_selection is not None assert result.dp_selection.chosen_code == "A41.9" assert result.dp_selection.verdict == "CONFIRMED" def test_dp_selection_no_crash_empty_dossiers(self): """Fusion de dossiers sans DP et sans dp_selection → pas de crash.""" d1 = DossierMedical( diagnostics_associes=[ Diagnostic(texte="HTA", cim10_suggestion="I10"), ], ) d2 = DossierMedical( diagnostics_associes=[ Diagnostic(texte="Diabète", cim10_suggestion="E11.9"), ], ) result = merge_dossiers([d1, d2]) assert result.dp_selection is None assert result.diagnostic_principal is None