Ajoute source_page/source_excerpt à tous les types (biologie, imagerie, traitements, actes CCAM, antécédents, complications). Convertit antecedents et complications en types structurés (Antecedent/Complication) avec validators backward-compat pour les vieux JSON. Étend _apply_source_tracking à tous les éléments du dossier. Ajoute un endpoint /api/source-text/ et un modal interactif dans le viewer avec surlignage du texte source. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
295 lines
10 KiB
Python
295 lines
10 KiB
Python
"""Fusion de dossiers médicaux multi-PDFs pour un même patient.
|
|
|
|
Combine les informations de plusieurs documents (Trackare, CRH, CRO) en un
|
|
dossier unique avec des règles de priorité et de déduplication.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
|
|
from ..config import (
|
|
ActeCCAM,
|
|
Antecedent,
|
|
BiologieCle,
|
|
Complication,
|
|
Diagnostic,
|
|
DossierMedical,
|
|
Imagerie,
|
|
Sejour,
|
|
Traitement,
|
|
)
|
|
from ..medical.das_filter import is_valid_diagnostic_text, apply_semantic_dedup
|
|
from ..medical.cim10_extractor import _is_dp_family_redundant
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Priorité des types de documents pour les données de séjour
|
|
_DOC_PRIORITY = {"trackare": 0, "crh": 1, "cro": 2}
|
|
|
|
|
|
def _cim10_specificity(code: str | None) -> int:
|
|
"""Score de spécificité d'un code CIM-10 : longueur sans le point."""
|
|
if not code:
|
|
return 0
|
|
return len(code.replace(".", ""))
|
|
|
|
|
|
def _prefer_most_specific_dp(dossiers: list[DossierMedical]) -> Diagnostic | None:
|
|
"""Sélectionne le DP le plus spécifique parmi tous les dossiers."""
|
|
candidates: list[tuple[Diagnostic, int]] = []
|
|
for d in dossiers:
|
|
if d.diagnostic_principal:
|
|
spec = _cim10_specificity(d.diagnostic_principal.cim10_suggestion)
|
|
candidates.append((d.diagnostic_principal, spec))
|
|
|
|
if not candidates:
|
|
return None
|
|
|
|
# Tri : spécificité décroissante, puis confiance (high > medium > low)
|
|
conf_order = {"high": 0, "medium": 1, "low": 2}
|
|
candidates.sort(
|
|
key=lambda x: (-x[1], conf_order.get(x[0].cim10_confidence or "", 3))
|
|
)
|
|
return candidates[0][0]
|
|
|
|
|
|
def _merge_sejour(dossiers: list[DossierMedical]) -> Sejour:
|
|
"""Fusionne les informations de séjour avec priorité Trackare > CRH > CRO."""
|
|
# Trier par priorité de type de document
|
|
sorted_dossiers = sorted(
|
|
dossiers,
|
|
key=lambda d: _DOC_PRIORITY.get(d.document_type, 99),
|
|
)
|
|
|
|
merged = Sejour()
|
|
for d in sorted_dossiers:
|
|
s = d.sejour
|
|
if s.sexe and not merged.sexe:
|
|
merged.sexe = s.sexe
|
|
if s.age is not None and merged.age is None:
|
|
merged.age = s.age
|
|
if s.date_entree and not merged.date_entree:
|
|
merged.date_entree = s.date_entree
|
|
if s.date_sortie and not merged.date_sortie:
|
|
merged.date_sortie = s.date_sortie
|
|
if s.duree_sejour is not None and merged.duree_sejour is None:
|
|
merged.duree_sejour = s.duree_sejour
|
|
if s.mode_entree and not merged.mode_entree:
|
|
merged.mode_entree = s.mode_entree
|
|
if s.mode_sortie and not merged.mode_sortie:
|
|
merged.mode_sortie = s.mode_sortie
|
|
if s.imc is not None and merged.imc is None:
|
|
merged.imc = s.imc
|
|
if s.poids is not None and merged.poids is None:
|
|
merged.poids = s.poids
|
|
if s.taille is not None and merged.taille is None:
|
|
merged.taille = s.taille
|
|
|
|
return merged
|
|
|
|
|
|
def _is_enriched(d: Diagnostic) -> bool:
|
|
"""Retourne True si le diagnostic a une justification RAG."""
|
|
return bool(d.justification or d.sources_rag)
|
|
|
|
|
|
def _dedup_diagnostics(all_das: list[Diagnostic]) -> list[Diagnostic]:
|
|
"""Déduplique les diagnostics associés par code CIM-10, garde la meilleure confiance."""
|
|
conf_order = {"high": 0, "medium": 1, "low": 2}
|
|
seen: dict[str | None, Diagnostic] = {}
|
|
|
|
for d in all_das:
|
|
key = d.cim10_suggestion
|
|
if key is None:
|
|
# Sans code, dédup par texte normalisé
|
|
key = f"__text__{d.texte.lower().strip()}"
|
|
|
|
if key not in seen:
|
|
seen[key] = d
|
|
else:
|
|
existing = seen[key]
|
|
new_conf = conf_order.get(d.cim10_confidence or "", 3)
|
|
old_conf = conf_order.get(existing.cim10_confidence or "", 3)
|
|
# Garder celui avec la meilleure confiance, ou à confiance égale celui enrichi
|
|
if new_conf < old_conf or (new_conf == old_conf and _is_enriched(d) and not _is_enriched(existing)):
|
|
seen[key] = d
|
|
|
|
# Supprimer les codes parents quand un code plus spécifique existe
|
|
# Ex: K85 retiré si K85.9 présent (K85 est préfixe strict de K859)
|
|
codes = {k for k in seen if k and not k.startswith("__text__")}
|
|
normalized = {c: c.replace(".", "") for c in codes}
|
|
parents_to_remove: set[str] = set()
|
|
for code_a in codes:
|
|
norm_a = normalized[code_a]
|
|
for code_b in codes:
|
|
if code_a == code_b:
|
|
continue
|
|
norm_b = normalized[code_b]
|
|
if norm_b.startswith(norm_a) and len(norm_b) > len(norm_a):
|
|
parents_to_remove.add(code_a)
|
|
break
|
|
|
|
for parent in parents_to_remove:
|
|
del seen[parent]
|
|
|
|
return list(seen.values())
|
|
|
|
|
|
def _dedup_actes(all_actes: list[ActeCCAM]) -> list[ActeCCAM]:
|
|
"""Déduplique les actes CCAM par code."""
|
|
seen: dict[str | None, ActeCCAM] = {}
|
|
for a in all_actes:
|
|
key = a.code_ccam_suggestion
|
|
if key is None:
|
|
key = f"__text__{a.texte.lower().strip()}"
|
|
|
|
if key not in seen:
|
|
seen[key] = a
|
|
else:
|
|
existing = seen[key]
|
|
# Garder celui avec date si possible
|
|
if a.date and not existing.date:
|
|
seen[key] = a
|
|
|
|
return list(seen.values())
|
|
|
|
|
|
def merge_dossiers(dossiers: list[DossierMedical]) -> DossierMedical:
|
|
"""Fusionne plusieurs dossiers médicaux d'un même patient.
|
|
|
|
Args:
|
|
dossiers: Liste de DossierMedical issus de PDFs différents.
|
|
|
|
Returns:
|
|
Un DossierMedical fusionné.
|
|
"""
|
|
if len(dossiers) == 1:
|
|
result = dossiers[0].model_copy(deep=True)
|
|
result.source_files = [result.source_file]
|
|
# Appliquer la dédup famille DP + sémantique même pour un seul dossier
|
|
dp_code = result.diagnostic_principal.cim10_suggestion if result.diagnostic_principal else None
|
|
if dp_code:
|
|
result.diagnostics_associes = [
|
|
d for d in result.diagnostics_associes
|
|
if not d.cim10_suggestion or not _is_dp_family_redundant(d.cim10_suggestion, dp_code)
|
|
]
|
|
result.diagnostics_associes = apply_semantic_dedup(result.diagnostics_associes)
|
|
return result
|
|
|
|
merged = DossierMedical()
|
|
|
|
# Source files
|
|
merged.source_files = [d.source_file for d in dossiers if d.source_file]
|
|
|
|
# Séjour
|
|
merged.sejour = _merge_sejour(dossiers)
|
|
|
|
# Diagnostic principal : le plus spécifique
|
|
merged.diagnostic_principal = _prefer_most_specific_dp(dossiers)
|
|
|
|
# Collecter tous les DAS + DP non retenus comme DAS
|
|
all_das: list[Diagnostic] = []
|
|
for d in dossiers:
|
|
all_das.extend(d.diagnostics_associes)
|
|
# Si le DP de ce dossier est différent du DP fusionné, l'ajouter comme DAS
|
|
# mais seulement si le texte est un diagnostic valide (filtre artefacts OCR)
|
|
if (
|
|
d.diagnostic_principal
|
|
and merged.diagnostic_principal
|
|
and d.diagnostic_principal.cim10_suggestion
|
|
!= merged.diagnostic_principal.cim10_suggestion
|
|
and is_valid_diagnostic_text(d.diagnostic_principal.texte)
|
|
):
|
|
all_das.append(d.diagnostic_principal)
|
|
|
|
merged.diagnostics_associes = _dedup_diagnostics(all_das)
|
|
|
|
# Retirer les DAS redondants avec le DP (même code, famille, parent/enfant)
|
|
dp_code = merged.diagnostic_principal.cim10_suggestion if merged.diagnostic_principal else None
|
|
if dp_code:
|
|
merged.diagnostics_associes = [
|
|
d for d in merged.diagnostics_associes
|
|
if not d.cim10_suggestion or not _is_dp_family_redundant(d.cim10_suggestion, dp_code)
|
|
]
|
|
|
|
# Redondances sémantiques entre DAS
|
|
merged.diagnostics_associes = apply_semantic_dedup(merged.diagnostics_associes)
|
|
|
|
# Actes CCAM
|
|
all_actes: list[ActeCCAM] = []
|
|
for d in dossiers:
|
|
all_actes.extend(d.actes_ccam)
|
|
merged.actes_ccam = _dedup_actes(all_actes)
|
|
|
|
# Biologie : union, dédup par (test, valeur)
|
|
bio_seen: set[tuple[str, str | None]] = set()
|
|
for d in dossiers:
|
|
for b in d.biologie_cle:
|
|
key = (b.test, b.valeur)
|
|
if key not in bio_seen:
|
|
merged.biologie_cle.append(b)
|
|
bio_seen.add(key)
|
|
|
|
# Imagerie : union, dédup par (type, conclusion)
|
|
img_seen: set[tuple[str, str | None]] = set()
|
|
for d in dossiers:
|
|
for i in d.imagerie:
|
|
key = (i.type, i.conclusion)
|
|
if key not in img_seen:
|
|
merged.imagerie.append(i)
|
|
img_seen.add(key)
|
|
|
|
# Traitements : union, dédup par médicament (normalisé)
|
|
med_seen: set[str] = set()
|
|
for d in dossiers:
|
|
for t in d.traitements_sortie:
|
|
key = t.medicament.lower().strip()
|
|
if key not in med_seen:
|
|
merged.traitements_sortie.append(t)
|
|
med_seen.add(key)
|
|
|
|
# Antécédents : union, dédup par texte normalisé
|
|
ant_seen: set[str] = set()
|
|
for d in dossiers:
|
|
for a in d.antecedents:
|
|
key = a.texte.lower().strip()
|
|
if key not in ant_seen:
|
|
merged.antecedents.append(a)
|
|
ant_seen.add(key)
|
|
|
|
# Complications : union, dédup par texte normalisé
|
|
comp_seen: set[str] = set()
|
|
for d in dossiers:
|
|
for c in d.complications:
|
|
key = c.texte.lower().strip()
|
|
if key not in comp_seen:
|
|
merged.complications.append(c)
|
|
comp_seen.add(key)
|
|
|
|
# Alertes : alerte de fusion en tête + union
|
|
merged.alertes_codage = [f"FUSION: {len(dossiers)} documents fusionnés"]
|
|
alert_seen: set[str] = set()
|
|
for d in dossiers:
|
|
for a in d.alertes_codage:
|
|
if a not in alert_seen:
|
|
merged.alertes_codage.append(a)
|
|
alert_seen.add(a)
|
|
|
|
# Document type : le type prioritaire
|
|
sorted_by_prio = sorted(
|
|
dossiers,
|
|
key=lambda d: _DOC_PRIORITY.get(d.document_type, 99),
|
|
)
|
|
merged.document_type = sorted_by_prio[0].document_type
|
|
|
|
logger.info(
|
|
"Fusion de %d dossiers : DP=%s, %d DAS, %d actes",
|
|
len(dossiers),
|
|
merged.diagnostic_principal.cim10_suggestion if merged.diagnostic_principal else "aucun",
|
|
len(merged.diagnostics_associes),
|
|
len(merged.actes_ccam),
|
|
)
|
|
|
|
return merged
|