feat: traçabilité source systématique + viewer interactif
Ajoute source_page/source_excerpt à tous les types (biologie, imagerie, traitements, actes CCAM, antécédents, complications). Convertit antecedents et complications en types structurés (Antecedent/Complication) avec validators backward-compat pour les vieux JSON. Étend _apply_source_tracking à tous les éléments du dossier. Ajoute un endpoint /api/source-text/ et un modal interactif dans le viewer avec surlignage du texte source. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -14,7 +14,9 @@ from .ccam_dict import lookup as ccam_lookup, validate_code as ccam_validate
|
||||
from .das_filter import clean_diagnostic_text, is_valid_diagnostic_text, correct_known_miscodes
|
||||
from ..config import (
|
||||
ActeCCAM,
|
||||
Antecedent,
|
||||
BiologieCle,
|
||||
Complication,
|
||||
Diagnostic,
|
||||
DossierMedical,
|
||||
Imagerie,
|
||||
@@ -180,10 +182,10 @@ def _extract_das_llm(text: str, dossier: DossierMedical) -> None:
|
||||
"age": dossier.sejour.age,
|
||||
"duree_sejour": dossier.sejour.duree_sejour,
|
||||
"imc": dossier.sejour.imc,
|
||||
"antecedents": dossier.antecedents[:5],
|
||||
"antecedents": [a.texte for a in dossier.antecedents[:5]],
|
||||
"biologie_cle": [(b.test, b.valeur, b.anomalie) for b in dossier.biologie_cle],
|
||||
"imagerie": [(i.type, (i.conclusion or "")[:200]) for i in dossier.imagerie],
|
||||
"complications": dossier.complications,
|
||||
"complications": [c.texte for c in dossier.complications],
|
||||
}
|
||||
|
||||
# DAS existants (texte + code)
|
||||
@@ -532,7 +534,8 @@ _ANTECEDENT_NOISE = (
|
||||
"item de", "surveillance", "température", "signes vitaux",
|
||||
"pouls", "type de note", "aucune donnée", "renseignée",
|
||||
"habitudes de vie", "systolique", "diastolique", "saturation",
|
||||
"texte libre", "mode de vie", "n° rpps",
|
||||
"texte libre", "mode de vie", "n° rpps", "secrétariat",
|
||||
"aucune aide",
|
||||
)
|
||||
|
||||
_SURVEILLANCE_SINGLE_WORDS = frozenset({
|
||||
@@ -569,8 +572,14 @@ def _is_valid_antecedent(line: str) -> bool:
|
||||
# Deux mots identiques
|
||||
if len(words) == 2 and len(set(words)) == 1:
|
||||
return False
|
||||
# Identifiants administratifs isolés
|
||||
if re.match(r'^\[MEDECIN\]\s', line) and len(line) < 30:
|
||||
# Lignes commençant par un tag médecin (artefact colonne gauche CRH)
|
||||
if re.match(r'^\[MEDECIN', line):
|
||||
return False
|
||||
# Lignes commençant par "Dr [MEDECIN" ou "Dr[PERSONNE" (nom de médecin)
|
||||
if re.match(r'^Dr\s*\[', line):
|
||||
return False
|
||||
# Fragment de localisation : "de Bordeaux", "de Lyon", "de Paris"
|
||||
if re.match(r'^de [A-ZÀ-Ú]', line) and len(line) < 25:
|
||||
return False
|
||||
return True
|
||||
|
||||
@@ -578,7 +587,7 @@ def _is_valid_antecedent(line: str) -> bool:
|
||||
def _extract_antecedents(text: str, dossier: DossierMedical) -> None:
|
||||
"""Extrait les antécédents."""
|
||||
m = re.search(
|
||||
r"Antécédents?\s*[::]?\s*\n?(.*?)(?=\n\s*(?:Traitements?\s*[::]|Allergie|Histoire de la maladie|Examen clinique|Signes\s+[Vv]itaux|Observations?\s+m[eé]dicale|Passage aux|\n\n))",
|
||||
r"Antécédents?\s*[::]?\s*\n?(.*?)(?=\n\s*(?:Traitements?\s*[::]|Allergie|Histoire de la maladie|Examen clinique|Signes\s+[Vv]itaux|Observations?\s+m[eé]dicale|Passage aux|Mode de vie|\n\n))",
|
||||
text,
|
||||
re.DOTALL | re.IGNORECASE,
|
||||
)
|
||||
@@ -587,7 +596,7 @@ def _extract_antecedents(text: str, dossier: DossierMedical) -> None:
|
||||
for line in block.split("\n"):
|
||||
line = line.strip().lstrip("- •")
|
||||
if _is_valid_antecedent(line):
|
||||
dossier.antecedents.append(line)
|
||||
dossier.antecedents.append(Antecedent(texte=line))
|
||||
|
||||
|
||||
def _extract_traitements(
|
||||
@@ -778,7 +787,7 @@ def _extract_complications(
|
||||
# Fallback regex pour la négation
|
||||
pattern = rf"(?:pas de|sans|absence de|aucun[e]?)\s+{re.escape(term)}"
|
||||
if not re.search(pattern, text_lower):
|
||||
dossier.complications.append(term.capitalize())
|
||||
dossier.complications.append(Complication(texte=term.capitalize()))
|
||||
|
||||
|
||||
def _is_negated_by_edsnlp(term: str, negated_terms: set[str]) -> bool:
|
||||
@@ -1028,34 +1037,84 @@ def _is_abnormal(test: str, value: str) -> bool | None:
|
||||
return None
|
||||
|
||||
|
||||
def _apply_source_tracking(dossier: DossierMedical, page_tracker, search_text: str) -> None:
|
||||
"""Ajoute la traçabilité source (page + extrait) à chaque diagnostic.
|
||||
def _track_item(item, search_key: str, page_tracker, search_text: str) -> bool:
|
||||
"""Cherche la page source et l'extrait pour un item avec source_page/source_excerpt."""
|
||||
if item.source_page is not None:
|
||||
return False
|
||||
if not search_key:
|
||||
return False
|
||||
page = page_tracker.find_page_for_text(search_key, search_text)
|
||||
if page:
|
||||
item.source_page = page
|
||||
item.source_excerpt = page_tracker.extract_excerpt(search_key, search_text)
|
||||
return True
|
||||
return False
|
||||
|
||||
Cherche le texte du diagnostic dans le texte source pour retrouver
|
||||
|
||||
def _apply_source_tracking(dossier: DossierMedical, page_tracker, search_text: str) -> None:
|
||||
"""Ajoute la traçabilité source (page + extrait) à tous les éléments du dossier.
|
||||
|
||||
Cherche le texte de chaque élément dans le texte source pour retrouver
|
||||
la page d'origine et extraire un passage contextualisé.
|
||||
"""
|
||||
tracked = 0
|
||||
total = 0
|
||||
|
||||
# Diagnostics (DP + DAS)
|
||||
all_diags: list[Diagnostic] = []
|
||||
if dossier.diagnostic_principal:
|
||||
all_diags.append(dossier.diagnostic_principal)
|
||||
all_diags.extend(dossier.diagnostics_associes)
|
||||
|
||||
tracked = 0
|
||||
for diag in all_diags:
|
||||
if diag.source_page is not None:
|
||||
continue # déjà renseigné
|
||||
total += 1
|
||||
if _track_item(diag, diag.texte, page_tracker, search_text):
|
||||
tracked += 1
|
||||
|
||||
texte = diag.texte
|
||||
if not texte:
|
||||
continue
|
||||
# Biologie
|
||||
for b in dossier.biologie_cle:
|
||||
total += 1
|
||||
search_key = f"{b.test}: {b.valeur}" if b.valeur else b.test
|
||||
if _track_item(b, search_key, page_tracker, search_text):
|
||||
tracked += 1
|
||||
elif b.valeur and _track_item(b, b.test, page_tracker, search_text):
|
||||
tracked += 1
|
||||
|
||||
page = page_tracker.find_page_for_text(texte, search_text)
|
||||
if page:
|
||||
diag.source_page = page
|
||||
diag.source_excerpt = page_tracker.extract_excerpt(texte, search_text)
|
||||
# Imagerie
|
||||
for img in dossier.imagerie:
|
||||
total += 1
|
||||
search_key = img.type
|
||||
if _track_item(img, search_key, page_tracker, search_text):
|
||||
tracked += 1
|
||||
elif img.conclusion and _track_item(img, img.conclusion[:50], page_tracker, search_text):
|
||||
tracked += 1
|
||||
|
||||
# Traitements
|
||||
for t in dossier.traitements_sortie:
|
||||
total += 1
|
||||
if _track_item(t, t.medicament, page_tracker, search_text):
|
||||
tracked += 1
|
||||
|
||||
# Actes CCAM
|
||||
for a in dossier.actes_ccam:
|
||||
total += 1
|
||||
if _track_item(a, a.texte, page_tracker, search_text):
|
||||
tracked += 1
|
||||
|
||||
# Antécédents
|
||||
for ant in dossier.antecedents:
|
||||
total += 1
|
||||
if _track_item(ant, ant.texte, page_tracker, search_text):
|
||||
tracked += 1
|
||||
|
||||
# Complications
|
||||
for comp in dossier.complications:
|
||||
total += 1
|
||||
if _track_item(comp, comp.texte, page_tracker, search_text):
|
||||
tracked += 1
|
||||
|
||||
if tracked:
|
||||
logger.info(" Traçabilité source : %d/%d diagnostics localisés", tracked, len(all_diags))
|
||||
logger.info(" Traçabilité source : %d/%d éléments localisés", tracked, total)
|
||||
|
||||
|
||||
def _validate_justifications(dossier: DossierMedical) -> None:
|
||||
|
||||
Reference in New Issue
Block a user