Files
t2a/tests/test_fusion.py
dom 9d07894c6f feat: Phase 4 — viewer enrichi, non-cumul CCAM, fusion multi-PDFs + rebuild FAISS (21 141 vecteurs)
- Viewer : badges compteurs (DAS, actes, alertes, CMA), raisonnement LLM pliable, regroupement CCAM, navigation patient, alertes NON-CUMUL en rouge
- Non-cumul CCAM : 3 règles heuristiques (même base, même regroupement/jour, paires incompatibles)
- Fusion multi-PDFs : merge_dossiers() avec priorité Trackare, spécificité CIM-10, déduplication, champ source_files
- Index FAISS reconstruit : 21 141 vecteurs (CCAM dict 8 257 + CIM-10 alpha 306)
- 192 tests unitaires passent

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 12:43:34 +01:00

240 lines
8.2 KiB
Python

"""Tests pour le module de fusion multi-PDFs."""
import pytest
from src.config import (
ActeCCAM,
Diagnostic,
DossierMedical,
Sejour,
Traitement,
BiologieCle,
Imagerie,
)
from src.medical.fusion import (
merge_dossiers,
_cim10_specificity,
_prefer_most_specific_dp,
_merge_sejour,
_dedup_diagnostics,
_dedup_actes,
)
class TestCIM10Specificity:
def test_none(self):
assert _cim10_specificity(None) == 0
def test_short_code(self):
assert _cim10_specificity("I10") == 3
def test_long_code(self):
assert _cim10_specificity("K85.1") == 4
def test_longer_code(self):
assert _cim10_specificity("K80.50") == 5
class TestSpecificityLongerCodeWins:
def test_specificity_longer_code_wins(self):
d1 = DossierMedical(
diagnostic_principal=Diagnostic(texte="Calcul biliaire", cim10_suggestion="K80"),
)
d2 = DossierMedical(
diagnostic_principal=Diagnostic(texte="Calcul cholédoque", cim10_suggestion="K80.5"),
)
dp = _prefer_most_specific_dp([d1, d2])
assert dp is not None
assert dp.cim10_suggestion == "K80.5"
class TestMergeSejourTrackarePriority:
def test_merge_sejour_trackare_priority(self):
d1 = DossierMedical(
document_type="trackare",
sejour=Sejour(sexe="F", age=43, date_entree="25/02/2023"),
)
d2 = DossierMedical(
document_type="crh",
sejour=Sejour(sexe="M", age=45, date_entree="24/02/2023", mode_sortie="domicile"),
)
merged = _merge_sejour([d1, d2])
assert merged.sexe == "F" # Trackare prioritaire
assert merged.age == 43
assert merged.date_entree == "25/02/2023"
assert merged.mode_sortie == "domicile" # Complété depuis CRH
def test_merge_sejour_fills_missing(self):
d1 = DossierMedical(
document_type="trackare",
sejour=Sejour(sexe="F"),
)
d2 = DossierMedical(
document_type="crh",
sejour=Sejour(age=50, poids=75.0),
)
merged = _merge_sejour([d1, d2])
assert merged.sexe == "F"
assert merged.age == 50
assert merged.poids == 75.0
class TestDedupDiagnostics:
def test_dedup_diagnostics_by_code(self):
das = [
Diagnostic(texte="HTA", cim10_suggestion="I10", cim10_confidence="medium"),
Diagnostic(texte="Hypertension", cim10_suggestion="I10", cim10_confidence="high"),
]
result = _dedup_diagnostics(das)
assert len(result) == 1
assert result[0].cim10_confidence == "high"
def test_dedup_keeps_distinct_codes(self):
das = [
Diagnostic(texte="HTA", cim10_suggestion="I10"),
Diagnostic(texte="Diabète", cim10_suggestion="E11.9"),
]
result = _dedup_diagnostics(das)
assert len(result) == 2
class TestDedupActes:
def test_dedup_actes_by_code(self):
actes = [
ActeCCAM(texte="Cholé", code_ccam_suggestion="HMFC004"),
ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004", date="01/03"),
]
result = _dedup_actes(actes)
assert len(result) == 1
assert result[0].date == "01/03" # Celui avec la date est préféré
class TestSingleDossierPassthrough:
def test_single_dossier_passthrough(self):
d = DossierMedical(
source_file="test.pdf",
document_type="crh",
diagnostic_principal=Diagnostic(texte="HTA", cim10_suggestion="I10"),
)
result = merge_dossiers([d])
assert result.diagnostic_principal.cim10_suggestion == "I10"
assert result.source_files == ["test.pdf"]
class TestDpNonRetainedBecomesDas:
def test_dp_non_retained_becomes_das(self):
d1 = DossierMedical(
diagnostic_principal=Diagnostic(texte="HTA", cim10_suggestion="I10"),
)
d2 = DossierMedical(
diagnostic_principal=Diagnostic(texte="Calcul cholédoque", cim10_suggestion="K80.5"),
)
result = merge_dossiers([d1, d2])
# K80.5 est plus spécifique, donc DP
assert result.diagnostic_principal.cim10_suggestion == "K80.5"
# I10 (ancien DP de d1) doit être dans les DAS
das_codes = {d.cim10_suggestion for d in result.diagnostics_associes}
assert "I10" in das_codes
class TestFusionAlertAdded:
def test_fusion_alert_added(self):
d1 = DossierMedical(source_file="a.pdf", alertes_codage=["Alerte 1"])
d2 = DossierMedical(source_file="b.pdf", alertes_codage=["Alerte 2"])
result = merge_dossiers([d1, d2])
assert result.alertes_codage[0] == "FUSION: 2 documents fusionnés"
assert "Alerte 1" in result.alertes_codage
assert "Alerte 2" in result.alertes_codage
class TestSourceFilesPopulated:
def test_source_files_populated(self):
d1 = DossierMedical(source_file="a.pdf")
d2 = DossierMedical(source_file="b.pdf")
result = merge_dossiers([d1, d2])
assert result.source_files == ["a.pdf", "b.pdf"]
class TestFullMergeCROTrackare:
def test_full_merge_cro_trackare(self):
"""Cas réel : fusion Trackare + CRO."""
trackare = DossierMedical(
source_file="trackare.pdf",
document_type="trackare",
sejour=Sejour(sexe="F", age=43, date_entree="25/02/2023", date_sortie="03/03/2023"),
diagnostic_principal=Diagnostic(
texte="Calcul des canaux biliaires",
cim10_suggestion="K80.5",
),
diagnostics_associes=[
Diagnostic(texte="HTA", cim10_suggestion="I10"),
],
actes_ccam=[
ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004", date="01/03"),
],
traitements_sortie=[
Traitement(medicament="Paracétamol"),
],
alertes_codage=["Alerte trackare"],
)
cro = DossierMedical(
source_file="cro.pdf",
document_type="cro",
sejour=Sejour(sexe="F"),
diagnostic_principal=Diagnostic(
texte="Pancréatite aiguë lithiasique",
cim10_suggestion="K85.1",
cim10_confidence="high",
),
diagnostics_associes=[
Diagnostic(texte="Obésité", cim10_suggestion="E66.0"),
Diagnostic(texte="HTA", cim10_suggestion="I10"), # doublon
],
actes_ccam=[
ActeCCAM(texte="TDM", code_ccam_suggestion="ZCQK002"),
ActeCCAM(texte="Cholécystectomie", code_ccam_suggestion="HMFC004"), # doublon
],
traitements_sortie=[
Traitement(medicament="Paracétamol"), # doublon
Traitement(medicament="Cétirizine"),
],
alertes_codage=["Alerte CRO"],
)
result = merge_dossiers([trackare, cro])
# DP : K85.1 est plus spécifique que K80.5
assert result.diagnostic_principal.cim10_suggestion == "K85.1"
# K80.5 (ancien DP trackare) doit être dans les DAS
das_codes = {d.cim10_suggestion for d in result.diagnostics_associes}
assert "K80.5" in das_codes
assert "I10" in das_codes
assert "E66.0" in das_codes
# DAS dédupliqués : I10 ne doit pas être en double
i10_count = sum(1 for d in result.diagnostics_associes if d.cim10_suggestion == "I10")
assert i10_count == 1
# Actes dédupliqués
acte_codes = [a.code_ccam_suggestion for a in result.actes_ccam]
assert acte_codes.count("HMFC004") == 1
assert "ZCQK002" in acte_codes
# Traitements dédupliqués
meds = [t.medicament for t in result.traitements_sortie]
assert meds.count("Paracétamol") == 1
assert "Cétirizine" in meds
# Source files
assert result.source_files == ["trackare.pdf", "cro.pdf"]
# Alertes
assert result.alertes_codage[0].startswith("FUSION:")
assert "Alerte trackare" in result.alertes_codage
assert "Alerte CRO" in result.alertes_codage
# Type prioritaire : trackare
assert result.document_type == "trackare"