feat: traçabilité source systématique + viewer interactif

Ajoute source_page/source_excerpt à tous les types (biologie, imagerie,
traitements, actes CCAM, antécédents, complications). Convertit antecedents
et complications en types structurés (Antecedent/Complication) avec
validators backward-compat pour les vieux JSON. Étend _apply_source_tracking
à tous les éléments du dossier. Ajoute un endpoint /api/source-text/ et un
modal interactif dans le viewer avec surlignage du texte source.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-02-18 20:59:50 +01:00
parent fe22c0f0f5
commit 40934fdc39
10 changed files with 500 additions and 47 deletions

View File

@@ -7,7 +7,7 @@ from pathlib import Path
from typing import Optional from typing import Optional
from dotenv import load_dotenv from dotenv import load_dotenv
from pydantic import BaseModel, Field from pydantic import BaseModel, Field, field_validator
load_dotenv() load_dotenv()
@@ -125,24 +125,44 @@ class ActeCCAM(BaseModel):
date: Optional[str] = None date: Optional[str] = None
validite: Optional[str] = None # "valide" | "obsolete" | "non_verifie" validite: Optional[str] = None # "valide" | "obsolete" | "non_verifie"
alertes: list[str] = Field(default_factory=list) alertes: list[str] = Field(default_factory=list)
source_page: Optional[int] = None
source_excerpt: Optional[str] = None
class Traitement(BaseModel): class Traitement(BaseModel):
medicament: str medicament: str
posologie: Optional[str] = None posologie: Optional[str] = None
code_atc: Optional[str] = None code_atc: Optional[str] = None
source_page: Optional[int] = None
source_excerpt: Optional[str] = None
class BiologieCle(BaseModel): class BiologieCle(BaseModel):
test: str test: str
valeur: Optional[str] = None valeur: Optional[str] = None
anomalie: Optional[bool] = None anomalie: Optional[bool] = None
source_page: Optional[int] = None
source_excerpt: Optional[str] = None
class Imagerie(BaseModel): class Imagerie(BaseModel):
type: str type: str
conclusion: Optional[str] = None conclusion: Optional[str] = None
score: Optional[str] = None score: Optional[str] = None
source_page: Optional[int] = None
source_excerpt: Optional[str] = None
class Antecedent(BaseModel):
texte: str
source_page: Optional[int] = None
source_excerpt: Optional[str] = None
class Complication(BaseModel):
texte: str
source_page: Optional[int] = None
source_excerpt: Optional[str] = None
class DossierMedical(BaseModel): class DossierMedical(BaseModel):
@@ -152,17 +172,45 @@ class DossierMedical(BaseModel):
diagnostic_principal: Optional[Diagnostic] = None diagnostic_principal: Optional[Diagnostic] = None
diagnostics_associes: list[Diagnostic] = Field(default_factory=list) diagnostics_associes: list[Diagnostic] = Field(default_factory=list)
actes_ccam: list[ActeCCAM] = Field(default_factory=list) actes_ccam: list[ActeCCAM] = Field(default_factory=list)
antecedents: list[str] = Field(default_factory=list) antecedents: list[Antecedent] = Field(default_factory=list)
traitements_sortie: list[Traitement] = Field(default_factory=list) traitements_sortie: list[Traitement] = Field(default_factory=list)
biologie_cle: list[BiologieCle] = Field(default_factory=list) biologie_cle: list[BiologieCle] = Field(default_factory=list)
imagerie: list[Imagerie] = Field(default_factory=list) imagerie: list[Imagerie] = Field(default_factory=list)
complications: list[str] = Field(default_factory=list) complications: list[Complication] = Field(default_factory=list)
alertes_codage: list[str] = Field(default_factory=list) alertes_codage: list[str] = Field(default_factory=list)
source_files: list[str] = Field(default_factory=list) source_files: list[str] = Field(default_factory=list)
ghm_estimation: Optional[GHMEstimation] = None ghm_estimation: Optional[GHMEstimation] = None
controles_cpam: list[ControleCPAM] = Field(default_factory=list) controles_cpam: list[ControleCPAM] = Field(default_factory=list)
processing_time_s: float | None = None processing_time_s: float | None = None
@field_validator("antecedents", mode="before")
@classmethod
def _coerce_antecedents(cls, v):
"""Backward compat : convertit les anciennes list[str] en list[Antecedent]."""
if not isinstance(v, list):
return v
result = []
for item in v:
if isinstance(item, str):
result.append({"texte": item})
else:
result.append(item)
return result
@field_validator("complications", mode="before")
@classmethod
def _coerce_complications(cls, v):
"""Backward compat : convertit les anciennes list[str] en list[Complication]."""
if not isinstance(v, list):
return v
result = []
for item in v:
if isinstance(item, str):
result.append({"texte": item})
else:
result.append(item)
return result
# --- Rapport d'anonymisation --- # --- Rapport d'anonymisation ---

View File

@@ -470,10 +470,10 @@ def _build_cpam_prompt(
dossier_lines.append(f"- Traitements de sortie : {', '.join(trt_parts)}") dossier_lines.append(f"- Traitements de sortie : {', '.join(trt_parts)}")
if dossier.antecedents: if dossier.antecedents:
dossier_lines.append(f"- Antécédents : {', '.join(dossier.antecedents[:10])}") dossier_lines.append(f"- Antécédents : {', '.join(a.texte for a in dossier.antecedents[:10])}")
if dossier.complications: if dossier.complications:
dossier_lines.append(f"- Complications : {', '.join(dossier.complications)}") dossier_lines.append(f"- Complications : {', '.join(c.texte for c in dossier.complications)}")
dossier_str = "\n".join(dossier_lines) if dossier_lines else "Non disponible" dossier_str = "\n".join(dossier_lines) if dossier_lines else "Non disponible"

View File

@@ -14,7 +14,9 @@ from .ccam_dict import lookup as ccam_lookup, validate_code as ccam_validate
from .das_filter import clean_diagnostic_text, is_valid_diagnostic_text, correct_known_miscodes from .das_filter import clean_diagnostic_text, is_valid_diagnostic_text, correct_known_miscodes
from ..config import ( from ..config import (
ActeCCAM, ActeCCAM,
Antecedent,
BiologieCle, BiologieCle,
Complication,
Diagnostic, Diagnostic,
DossierMedical, DossierMedical,
Imagerie, Imagerie,
@@ -180,10 +182,10 @@ def _extract_das_llm(text: str, dossier: DossierMedical) -> None:
"age": dossier.sejour.age, "age": dossier.sejour.age,
"duree_sejour": dossier.sejour.duree_sejour, "duree_sejour": dossier.sejour.duree_sejour,
"imc": dossier.sejour.imc, "imc": dossier.sejour.imc,
"antecedents": dossier.antecedents[:5], "antecedents": [a.texte for a in dossier.antecedents[:5]],
"biologie_cle": [(b.test, b.valeur, b.anomalie) for b in dossier.biologie_cle], "biologie_cle": [(b.test, b.valeur, b.anomalie) for b in dossier.biologie_cle],
"imagerie": [(i.type, (i.conclusion or "")[:200]) for i in dossier.imagerie], "imagerie": [(i.type, (i.conclusion or "")[:200]) for i in dossier.imagerie],
"complications": dossier.complications, "complications": [c.texte for c in dossier.complications],
} }
# DAS existants (texte + code) # DAS existants (texte + code)
@@ -532,7 +534,8 @@ _ANTECEDENT_NOISE = (
"item de", "surveillance", "température", "signes vitaux", "item de", "surveillance", "température", "signes vitaux",
"pouls", "type de note", "aucune donnée", "renseignée", "pouls", "type de note", "aucune donnée", "renseignée",
"habitudes de vie", "systolique", "diastolique", "saturation", "habitudes de vie", "systolique", "diastolique", "saturation",
"texte libre", "mode de vie", "n° rpps", "texte libre", "mode de vie", "n° rpps", "secrétariat",
"aucune aide",
) )
_SURVEILLANCE_SINGLE_WORDS = frozenset({ _SURVEILLANCE_SINGLE_WORDS = frozenset({
@@ -569,8 +572,14 @@ def _is_valid_antecedent(line: str) -> bool:
# Deux mots identiques # Deux mots identiques
if len(words) == 2 and len(set(words)) == 1: if len(words) == 2 and len(set(words)) == 1:
return False return False
# Identifiants administratifs isolés # Lignes commençant par un tag médecin (artefact colonne gauche CRH)
if re.match(r'^\[MEDECIN\]\s', line) and len(line) < 30: if re.match(r'^\[MEDECIN', line):
return False
# Lignes commençant par "Dr [MEDECIN" ou "Dr[PERSONNE" (nom de médecin)
if re.match(r'^Dr\s*\[', line):
return False
# Fragment de localisation : "de Bordeaux", "de Lyon", "de Paris"
if re.match(r'^de [A-ZÀ-Ú]', line) and len(line) < 25:
return False return False
return True return True
@@ -578,7 +587,7 @@ def _is_valid_antecedent(line: str) -> bool:
def _extract_antecedents(text: str, dossier: DossierMedical) -> None: def _extract_antecedents(text: str, dossier: DossierMedical) -> None:
"""Extrait les antécédents.""" """Extrait les antécédents."""
m = re.search( m = re.search(
r"Antécédents?\s*[:]?\s*\n?(.*?)(?=\n\s*(?:Traitements?\s*[:]|Allergie|Histoire de la maladie|Examen clinique|Signes\s+[Vv]itaux|Observations?\s+m[eé]dicale|Passage aux|\n\n))", r"Antécédents?\s*[:]?\s*\n?(.*?)(?=\n\s*(?:Traitements?\s*[:]|Allergie|Histoire de la maladie|Examen clinique|Signes\s+[Vv]itaux|Observations?\s+m[eé]dicale|Passage aux|Mode de vie|\n\n))",
text, text,
re.DOTALL | re.IGNORECASE, re.DOTALL | re.IGNORECASE,
) )
@@ -587,7 +596,7 @@ def _extract_antecedents(text: str, dossier: DossierMedical) -> None:
for line in block.split("\n"): for line in block.split("\n"):
line = line.strip().lstrip("- •") line = line.strip().lstrip("- •")
if _is_valid_antecedent(line): if _is_valid_antecedent(line):
dossier.antecedents.append(line) dossier.antecedents.append(Antecedent(texte=line))
def _extract_traitements( def _extract_traitements(
@@ -778,7 +787,7 @@ def _extract_complications(
# Fallback regex pour la négation # Fallback regex pour la négation
pattern = rf"(?:pas de|sans|absence de|aucun[e]?)\s+{re.escape(term)}" pattern = rf"(?:pas de|sans|absence de|aucun[e]?)\s+{re.escape(term)}"
if not re.search(pattern, text_lower): if not re.search(pattern, text_lower):
dossier.complications.append(term.capitalize()) dossier.complications.append(Complication(texte=term.capitalize()))
def _is_negated_by_edsnlp(term: str, negated_terms: set[str]) -> bool: def _is_negated_by_edsnlp(term: str, negated_terms: set[str]) -> bool:
@@ -1028,34 +1037,84 @@ def _is_abnormal(test: str, value: str) -> bool | None:
return None return None
def _apply_source_tracking(dossier: DossierMedical, page_tracker, search_text: str) -> None: def _track_item(item, search_key: str, page_tracker, search_text: str) -> bool:
"""Ajoute la traçabilité source (page + extrait) à chaque diagnostic. """Cherche la page source et l'extrait pour un item avec source_page/source_excerpt."""
if item.source_page is not None:
return False
if not search_key:
return False
page = page_tracker.find_page_for_text(search_key, search_text)
if page:
item.source_page = page
item.source_excerpt = page_tracker.extract_excerpt(search_key, search_text)
return True
return False
Cherche le texte du diagnostic dans le texte source pour retrouver
def _apply_source_tracking(dossier: DossierMedical, page_tracker, search_text: str) -> None:
"""Ajoute la traçabilité source (page + extrait) à tous les éléments du dossier.
Cherche le texte de chaque élément dans le texte source pour retrouver
la page d'origine et extraire un passage contextualisé. la page d'origine et extraire un passage contextualisé.
""" """
tracked = 0
total = 0
# Diagnostics (DP + DAS)
all_diags: list[Diagnostic] = [] all_diags: list[Diagnostic] = []
if dossier.diagnostic_principal: if dossier.diagnostic_principal:
all_diags.append(dossier.diagnostic_principal) all_diags.append(dossier.diagnostic_principal)
all_diags.extend(dossier.diagnostics_associes) all_diags.extend(dossier.diagnostics_associes)
tracked = 0
for diag in all_diags: for diag in all_diags:
if diag.source_page is not None: total += 1
continue # déjà renseigné if _track_item(diag, diag.texte, page_tracker, search_text):
tracked += 1
texte = diag.texte # Biologie
if not texte: for b in dossier.biologie_cle:
continue total += 1
search_key = f"{b.test}: {b.valeur}" if b.valeur else b.test
if _track_item(b, search_key, page_tracker, search_text):
tracked += 1
elif b.valeur and _track_item(b, b.test, page_tracker, search_text):
tracked += 1
page = page_tracker.find_page_for_text(texte, search_text) # Imagerie
if page: for img in dossier.imagerie:
diag.source_page = page total += 1
diag.source_excerpt = page_tracker.extract_excerpt(texte, search_text) search_key = img.type
if _track_item(img, search_key, page_tracker, search_text):
tracked += 1
elif img.conclusion and _track_item(img, img.conclusion[:50], page_tracker, search_text):
tracked += 1
# Traitements
for t in dossier.traitements_sortie:
total += 1
if _track_item(t, t.medicament, page_tracker, search_text):
tracked += 1
# Actes CCAM
for a in dossier.actes_ccam:
total += 1
if _track_item(a, a.texte, page_tracker, search_text):
tracked += 1
# Antécédents
for ant in dossier.antecedents:
total += 1
if _track_item(ant, ant.texte, page_tracker, search_text):
tracked += 1
# Complications
for comp in dossier.complications:
total += 1
if _track_item(comp, comp.texte, page_tracker, search_text):
tracked += 1 tracked += 1
if tracked: if tracked:
logger.info(" Traçabilité source : %d/%d diagnostics localisés", tracked, len(all_diags)) logger.info(" Traçabilité source : %d/%d éléments localisés", tracked, total)
def _validate_justifications(dossier: DossierMedical) -> None: def _validate_justifications(dossier: DossierMedical) -> None:

View File

@@ -166,10 +166,10 @@ def build_enriched_context(dossier: DossierMedical) -> dict:
"age": dossier.sejour.age, "age": dossier.sejour.age,
"duree_sejour": dossier.sejour.duree_sejour, "duree_sejour": dossier.sejour.duree_sejour,
"imc": dossier.sejour.imc, "imc": dossier.sejour.imc,
"antecedents": dossier.antecedents[:5], "antecedents": [a.texte for a in dossier.antecedents[:5]],
"biologie_cle": [(b.test, b.valeur, b.anomalie) for b in dossier.biologie_cle], "biologie_cle": [(b.test, b.valeur, b.anomalie) for b in dossier.biologie_cle],
"imagerie": [(i.type, (i.conclusion or "")[:200]) for i in dossier.imagerie], "imagerie": [(i.type, (i.conclusion or "")[:200]) for i in dossier.imagerie],
"complications": dossier.complications, "complications": [c.texte for c in dossier.complications],
} }
# Interprétations biologiques # Interprétations biologiques

View File

@@ -10,7 +10,9 @@ import logging
from ..config import ( from ..config import (
ActeCCAM, ActeCCAM,
Antecedent,
BiologieCle, BiologieCle,
Complication,
Diagnostic, Diagnostic,
DossierMedical, DossierMedical,
Imagerie, Imagerie,
@@ -251,7 +253,7 @@ def merge_dossiers(dossiers: list[DossierMedical]) -> DossierMedical:
ant_seen: set[str] = set() ant_seen: set[str] = set()
for d in dossiers: for d in dossiers:
for a in d.antecedents: for a in d.antecedents:
key = a.lower().strip() key = a.texte.lower().strip()
if key not in ant_seen: if key not in ant_seen:
merged.antecedents.append(a) merged.antecedents.append(a)
ant_seen.add(key) ant_seen.add(key)
@@ -260,7 +262,7 @@ def merge_dossiers(dossiers: list[DossierMedical]) -> DossierMedical:
comp_seen: set[str] = set() comp_seen: set[str] = set()
for d in dossiers: for d in dossiers:
for c in d.complications: for c in d.complications:
key = c.lower().strip() key = c.texte.lower().strip()
if key not in comp_seen: if key not in comp_seen:
merged.complications.append(c) merged.complications.append(c)
comp_seen.add(key) comp_seen.add(key)

View File

@@ -16,7 +16,7 @@ from werkzeug.utils import secure_filename
from collections import Counter from collections import Counter
from ..config import ( from ..config import (
STRUCTURED_DIR, OLLAMA_URL, CCAM_DICT_PATH, DossierMedical, ANONYMIZED_DIR, STRUCTURED_DIR, OLLAMA_URL, CCAM_DICT_PATH, DossierMedical,
ALLOWED_EXTENSIONS, UPLOAD_MAX_SIZE_MB, ALLOWED_EXTENSIONS, UPLOAD_MAX_SIZE_MB,
CIM10_PDF, GUIDE_METHODO_PDF, CCAM_PDF, CIM10_DICT_PATH, CIM10_SUPPLEMENTS_PATH, CIM10_PDF, GUIDE_METHODO_PDF, CCAM_PDF, CIM10_DICT_PATH, CIM10_SUPPLEMENTS_PATH,
) )
@@ -594,6 +594,27 @@ def create_app() -> Flask:
logger.exception("Erreur lors du retraitement") logger.exception("Erreur lors du retraitement")
return jsonify({"error": str(e)}), 500 return jsonify({"error": str(e)}), 500
# ------------------------------------------------------------------
# API texte source anonymisé
# ------------------------------------------------------------------
@app.route("/api/source-text/<path:dossier_id>")
def source_text(dossier_id: str):
"""Retourne le contenu texte anonymisé de tous les fichiers d'un dossier."""
safe_dir = (ANONYMIZED_DIR / dossier_id).resolve()
if not safe_dir.is_relative_to(ANONYMIZED_DIR.resolve()):
abort(403)
if not safe_dir.is_dir():
abort(404)
result = {}
for txt_path in sorted(safe_dir.glob("*_anonymized.txt")):
try:
result[txt_path.name] = txt_path.read_text(encoding="utf-8")
except Exception:
logger.warning("Impossible de lire %s", txt_path)
return jsonify(result)
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Routes admin référentiels # Routes admin référentiels
# ------------------------------------------------------------------ # ------------------------------------------------------------------

View File

@@ -233,6 +233,79 @@
border-radius: 50%; border-radius: 50%;
animation: spin 0.8s linear infinite; animation: spin 0.8s linear infinite;
} }
/* Source tracking badges */
.src-btn {
display: inline-block;
padding: 1px 6px;
border-radius: 9999px;
font-size: 0.65rem;
font-weight: 600;
background: #e0f2fe;
color: #0369a1;
border: 1px solid #bae6fd;
cursor: pointer;
margin-left: 0.3rem;
vertical-align: middle;
transition: background 0.15s;
}
.src-btn:hover { background: #bae6fd; }
/* Source modal */
#source-modal {
display: none;
position: fixed;
inset: 0;
z-index: 9999;
background: rgba(0,0,0,0.5);
padding: 2rem;
}
#source-modal-inner {
background: #fff;
border-radius: 12px;
max-width: 900px;
margin: 0 auto;
max-height: 90vh;
display: flex;
flex-direction: column;
box-shadow: 0 8px 30px rgba(0,0,0,0.2);
}
#source-header {
padding: 1rem 1.25rem;
border-bottom: 1px solid #e2e8f0;
font-weight: 700;
font-size: 0.9rem;
color: #0f172a;
display: flex;
justify-content: space-between;
align-items: center;
}
#source-content {
flex: 1;
overflow-y: auto;
padding: 1.25rem;
font-size: 0.85rem;
line-height: 1.6;
white-space: pre-wrap;
word-break: break-word;
color: #334155;
}
#source-content mark {
background: #fef08a;
padding: 2px 0;
border-radius: 2px;
}
#source-close-btn {
padding: 0.4rem 1rem;
background: #64748b;
color: #fff;
border: none;
border-radius: 6px;
cursor: pointer;
font-size: 0.8rem;
font-weight: 600;
}
#source-close-btn:hover { background: #475569; }
</style> </style>
</head> </head>
<body> <body>

View File

@@ -287,7 +287,10 @@
{% set dp = dossier.diagnostic_principal %} {% set dp = dossier.diagnostic_principal %}
<div class="card section"> <div class="card section">
<h3>Diagnostic principal</h3> <h3>Diagnostic principal</h3>
<div style="font-size:0.95rem;margin-bottom:0.5rem;">{{ dp.texte }}</div> <div style="font-size:0.95rem;margin-bottom:0.5rem;">
{{ dp.texte }}
{% if dp.source_page %}<button class="src-btn" onclick="showSource('{{ dp.source_excerpt|default('',true)|e }}', {{ dp.source_page }})">p.{{ dp.source_page }}</button>{% endif %}
</div>
{% if dp.cim10_suggestion %} {% if dp.cim10_suggestion %}
<span class="badge" style="background:#dbeafe;color:#1d4ed8;font-size:0.85rem;">{{ dp.cim10_suggestion }}</span> <span class="badge" style="background:#dbeafe;color:#1d4ed8;font-size:0.85rem;">{{ dp.cim10_suggestion }}</span>
{{ dp.cim10_confidence | confidence_badge }} {{ dp.cim10_confidence | confidence_badge }}
@@ -355,12 +358,7 @@
<span class="badge" style="background:#e0e7ff;color:#3730a3;font-size:0.7rem;">{{ das.source }}</span> <span class="badge" style="background:#e0e7ff;color:#3730a3;font-size:0.7rem;">{{ das.source }}</span>
{% endif %} {% endif %}
{% if das.source_page %} {% if das.source_page %}
<span style="font-size:0.7rem;color:#64748b;">p.{{ das.source_page }}</span> <button class="src-btn" onclick="showSource('{{ das.source_excerpt|default('',true)|e }}', {{ das.source_page }})">p.{{ das.source_page }}</button>
{% endif %}
{% if das.source_excerpt %}
<details style="margin-top:0.2rem;"><summary style="font-size:0.7rem;color:#94a3b8;cursor:pointer;">extrait</summary>
<pre style="font-size:0.7rem;white-space:pre-wrap;max-width:300px;color:#475569;">{{ das.source_excerpt }}</pre>
</details>
{% endif %} {% endif %}
</td> </td>
<td style="font-size:0.8rem;color:#475569;"> <td style="font-size:0.8rem;color:#475569;">
@@ -410,7 +408,7 @@
<div class="card section"> <div class="card section">
<h3>Actes CCAM ({{ dossier.actes_ccam|length }})</h3> <h3>Actes CCAM ({{ dossier.actes_ccam|length }})</h3>
<table> <table>
<thead><tr><th>Texte</th><th>Code CCAM</th><th>Regroupement</th><th>Date</th><th>Validité</th></tr></thead> <thead><tr><th>Texte</th><th>Code CCAM</th><th>Regroupement</th><th>Date</th><th>Validité</th><th>Source</th></tr></thead>
<tbody> <tbody>
{% for a in dossier.actes_ccam %} {% for a in dossier.actes_ccam %}
<tr> <tr>
@@ -432,6 +430,7 @@
<div style="font-size:0.7rem;color:#dc2626;">{{ alerte }}</div> <div style="font-size:0.7rem;color:#dc2626;">{{ alerte }}</div>
{% endfor %} {% endfor %}
</td> </td>
<td>{% if a.source_page %}<button class="src-btn" onclick="showSource('{{ a.source_excerpt|default('',true)|e }}', {{ a.source_page }})">p.{{ a.source_page }}</button>{% endif %}</td>
</tr> </tr>
{% endfor %} {% endfor %}
</tbody> </tbody>
@@ -444,13 +443,14 @@
<div class="card section"> <div class="card section">
<h3>Biologie clé ({{ dossier.biologie_cle|length }})</h3> <h3>Biologie clé ({{ dossier.biologie_cle|length }})</h3>
<table> <table>
<thead><tr><th>Test</th><th>Valeur</th><th>Anomalie</th></tr></thead> <thead><tr><th>Test</th><th>Valeur</th><th>Anomalie</th><th>Source</th></tr></thead>
<tbody> <tbody>
{% for b in dossier.biologie_cle %} {% for b in dossier.biologie_cle %}
<tr{% if b.anomalie %} class="anomalie"{% endif %}> <tr{% if b.anomalie %} class="anomalie"{% endif %}>
<td>{{ b.test }}</td> <td>{{ b.test }}</td>
<td>{{ b.valeur or '' }}</td> <td>{{ b.valeur or '' }}</td>
<td>{% if b.anomalie %}<span class="badge" style="background:#fee2e2;color:#dc2626;">Oui</span>{% else %}—{% endif %}</td> <td>{% if b.anomalie %}<span class="badge" style="background:#fee2e2;color:#dc2626;">Oui</span>{% else %}—{% endif %}</td>
<td>{% if b.source_page %}<button class="src-btn" onclick="showSource('{{ b.source_excerpt|default('',true)|e }}', {{ b.source_page }})">p.{{ b.source_page }}</button>{% endif %}</td>
</tr> </tr>
{% endfor %} {% endfor %}
</tbody> </tbody>
@@ -466,6 +466,7 @@
<div style="margin-bottom:0.5rem;"> <div style="margin-bottom:0.5rem;">
<strong>{{ img.type }}</strong> <strong>{{ img.type }}</strong>
{% if img.score %} — Score : {{ img.score }}{% endif %} {% if img.score %} — Score : {{ img.score }}{% endif %}
{% if img.source_page %}<button class="src-btn" onclick="showSource('{{ img.source_excerpt|default('',true)|e }}', {{ img.source_page }})">p.{{ img.source_page }}</button>{% endif %}
{% if img.conclusion %} {% if img.conclusion %}
<div style="font-size:0.85rem;color:#475569;">{{ img.conclusion }}</div> <div style="font-size:0.85rem;color:#475569;">{{ img.conclusion }}</div>
{% endif %} {% endif %}
@@ -479,13 +480,14 @@
<div class="card section"> <div class="card section">
<h3>Traitements de sortie ({{ dossier.traitements_sortie|length }})</h3> <h3>Traitements de sortie ({{ dossier.traitements_sortie|length }})</h3>
<table> <table>
<thead><tr><th>Médicament</th><th>Posologie</th><th>Code ATC</th></tr></thead> <thead><tr><th>Médicament</th><th>Posologie</th><th>Code ATC</th><th>Source</th></tr></thead>
<tbody> <tbody>
{% for t in dossier.traitements_sortie %} {% for t in dossier.traitements_sortie %}
<tr> <tr>
<td>{{ t.medicament }}</td> <td>{{ t.medicament }}</td>
<td>{{ t.posologie or '' }}</td> <td>{{ t.posologie or '' }}</td>
<td>{% if t.code_atc %}<span class="badge" style="background:#e0e7ff;color:#3730a3;">{{ t.code_atc }}</span>{% endif %}</td> <td>{% if t.code_atc %}<span class="badge" style="background:#e0e7ff;color:#3730a3;">{{ t.code_atc }}</span>{% endif %}</td>
<td>{% if t.source_page %}<button class="src-btn" onclick="showSource('{{ t.source_excerpt|default('',true)|e }}', {{ t.source_page }})">p.{{ t.source_page }}</button>{% endif %}</td>
</tr> </tr>
{% endfor %} {% endfor %}
</tbody> </tbody>
@@ -499,7 +501,7 @@
<h3>Antécédents ({{ dossier.antecedents|length }})</h3> <h3>Antécédents ({{ dossier.antecedents|length }})</h3>
<ul class="bullet"> <ul class="bullet">
{% for a in dossier.antecedents %} {% for a in dossier.antecedents %}
<li>{{ a }}</li> <li>{{ a.texte }}{% if a.source_page %} <button class="src-btn" onclick="showSource('{{ a.source_excerpt|default('',true)|e }}', {{ a.source_page }})">p.{{ a.source_page }}</button>{% endif %}</li>
{% endfor %} {% endfor %}
</ul> </ul>
</div> </div>
@@ -511,16 +513,109 @@
<h3>Complications ({{ dossier.complications|length }})</h3> <h3>Complications ({{ dossier.complications|length }})</h3>
<ul class="bullet"> <ul class="bullet">
{% for c in dossier.complications %} {% for c in dossier.complications %}
<li>{{ c }}</li> <li>{{ c.texte }}{% if c.source_page %} <button class="src-btn" onclick="showSource('{{ c.source_excerpt|default('',true)|e }}', {{ c.source_page }})">p.{{ c.source_page }}</button>{% endif %}</li>
{% endfor %} {% endfor %}
</ul> </ul>
</div> </div>
{% endif %} {% endif %}
{# ---- Modal source ---- #}
<div id="source-modal">
<div id="source-modal-inner">
<div id="source-header">
<span id="source-title">Document source</span>
<button id="source-close-btn" onclick="closeSource()">Fermer</button>
</div>
<div id="source-content"></div>
</div>
</div>
{% endblock %} {% endblock %}
{% block scripts %} {% block scripts %}
<script> <script>
/* --- Source modal --- */
let _sourceCache = null;
function getDossierId() {
// filepath = "103_23056749/103_23056749_fusionne_cim10.json"
// dossier_id = "103_23056749"
const fp = '{{ filepath }}';
const parts = fp.split('/');
return parts.length > 1 ? parts.slice(0, -1).join('/') : '';
}
async function loadSourceTexts() {
if (_sourceCache !== null) return _sourceCache;
const dossierId = getDossierId();
if (!dossierId) { _sourceCache = {}; return _sourceCache; }
try {
const resp = await fetch('/api/source-text/' + dossierId);
if (resp.ok) { _sourceCache = await resp.json(); }
else { _sourceCache = {}; }
} catch (e) { _sourceCache = {}; }
return _sourceCache;
}
async function showSource(excerpt, page) {
const modal = document.getElementById('source-modal');
const content = document.getElementById('source-content');
const title = document.getElementById('source-title');
title.textContent = 'Document source — Page ' + page;
content.innerHTML = '<em style="color:#94a3b8;">Chargement...</em>';
modal.style.display = 'block';
const texts = await loadSourceTexts();
const allText = Object.values(texts).join('\n\n--- ---\n\n');
if (!allText) {
content.innerHTML = '<em style="color:#94a3b8;">Texte source non disponible</em>';
return;
}
// Chercher l'extrait dans le texte et le surligner
if (excerpt && excerpt.length > 10) {
const idx = allText.indexOf(excerpt);
if (idx >= 0) {
const before = allText.substring(0, idx);
const match = allText.substring(idx, idx + excerpt.length);
const after = allText.substring(idx + excerpt.length);
content.innerHTML = '';
content.appendChild(document.createTextNode(before));
const mark = document.createElement('mark');
mark.textContent = match;
mark.id = 'source-highlight';
content.appendChild(mark);
content.appendChild(document.createTextNode(after));
// Scroll vers le surlignage
setTimeout(() => {
const el = document.getElementById('source-highlight');
if (el) el.scrollIntoView({ behavior: 'smooth', block: 'center' });
}, 100);
return;
}
}
// Fallback : afficher le texte brut sans surlignage
content.textContent = allText;
}
function closeSource() {
document.getElementById('source-modal').style.display = 'none';
}
// Fermer le modal en cliquant sur le fond
document.getElementById('source-modal').addEventListener('click', function(e) {
if (e.target === this) closeSource();
});
// Fermer avec Escape
document.addEventListener('keydown', function(e) {
if (e.key === 'Escape') closeSource();
});
/* --- Reprocess --- */
document.getElementById('reprocess-btn').addEventListener('click', async () => { document.getElementById('reprocess-btn').addEventListener('click', async () => {
const btn = document.getElementById('reprocess-btn'); const btn = document.getElementById('reprocess-btn');
const status = document.getElementById('reprocess-status'); const status = document.getElementById('reprocess-status');

View File

@@ -2,7 +2,7 @@
import pytest import pytest
from src.config import DossierMedical, Diagnostic from src.config import DossierMedical, Diagnostic, Antecedent, Complication
from src.medical.cim10_extractor import ( from src.medical.cim10_extractor import (
extract_medical_info, extract_medical_info,
_lookup_cim10, _lookup_cim10,
@@ -121,7 +121,7 @@ Devenir : sortie le 03/03."""
assert any("Balthazar" in (i.score or "") for i in dossier.imagerie) assert any("Balthazar" in (i.score or "") for i in dossier.imagerie)
# Complications # Complications
assert any("cutanée" in c.lower() for c in dossier.complications) assert any("cutanée" in c.texte.lower() for c in dossier.complications)
def test_extract_without_edsnlp(self): def test_extract_without_edsnlp(self):
"""Vérifie que l'extraction fonctionne sans résultat edsnlp.""" """Vérifie que l'extraction fonctionne sans résultat edsnlp."""
@@ -236,7 +236,7 @@ Devenir : sortie le 03/03."""
dossier = extract_medical_info(parsed, text, edsnlp_result=edsnlp_result) dossier = extract_medical_info(parsed, text, edsnlp_result=edsnlp_result)
# Fièvre et infection sont niées, ne doivent pas apparaître dans complications # Fièvre et infection sont niées, ne doivent pas apparaître dans complications
complication_terms = [c.lower() for c in dossier.complications] complication_terms = [c.texte.lower() for c in dossier.complications]
assert "fièvre" not in complication_terms assert "fièvre" not in complication_terms
assert "infection" not in complication_terms assert "infection" not in complication_terms
@@ -504,6 +504,44 @@ class TestIsValidAntecedent:
def test_reject_texte_libre(self): def test_reject_texte_libre(self):
assert not _is_valid_antecedent("(texte libre)") assert not _is_valid_antecedent("(texte libre)")
# --- Artefacts CRH colonne gauche (médecins) ---
def test_reject_medecin_tag_start(self):
assert not _is_valid_antecedent(
"[MEDECIN] hospitalier - Syndrome anxio depressif suivi Dr [MEDECIN_39]"
)
def test_reject_medecin_assistant(self):
assert not _is_valid_antecedent(
"[MEDECIN] Assistant des Hôpitaux de Lyon - Bilan neurologique"
)
def test_reject_medecin_contractuel(self):
assert not _is_valid_antecedent("[MEDECIN] hospitalier contractuel")
def test_reject_dr_medecin_tag(self):
assert not _is_valid_antecedent("Dr [MEDECIN_7] (Caradoc)")
def test_reject_dr_chef_clinique(self):
assert not _is_valid_antecedent(
"Dr [MEDECIN_37] Chef de Clinique des Hôpitaux aucune aide"
)
def test_reject_de_bordeaux(self):
assert not _is_valid_antecedent("de Bordeaux")
def test_reject_de_lyon(self):
assert not _is_valid_antecedent("de Lyon")
def test_reject_secretariat(self):
assert not _is_valid_antecedent("Secrétariat : [TEL_3] - fracture en 2017")
def test_reject_aucune_aide(self):
assert not _is_valid_antecedent("aucune aide, pas d'ide, pas d'aide ménagère")
def test_accept_de_long_medical(self):
"""'de' suivi d'une vraie description médicale longue passe."""
assert _is_valid_antecedent("dégénérescence maculaire liée à l'âge")
# --- Cas limites --- # --- Cas limites ---
def test_reject_too_short(self): def test_reject_too_short(self):
assert not _is_valid_antecedent("de Bo") assert not _is_valid_antecedent("de Bo")
@@ -541,3 +579,108 @@ class TestClassifierConfidence:
result = classify(text) result = classify(text)
assert isinstance(result, str) assert isinstance(result, str)
assert result in ("crh", "trackare") assert result in ("crh", "trackare")
class TestBackwardCompatAntecedent:
"""Tests de rétrocompatibilité pour les antécédents et complications."""
def test_old_format_string_list(self):
"""Charger un vieux JSON avec antecedents: ["HTA", "Diabète"]."""
d = DossierMedical.model_validate({
"antecedents": ["HTA", "Diabète type 2"],
"complications": ["Fièvre"],
})
assert len(d.antecedents) == 2
assert isinstance(d.antecedents[0], Antecedent)
assert d.antecedents[0].texte == "HTA"
assert d.antecedents[1].texte == "Diabète type 2"
assert len(d.complications) == 1
assert isinstance(d.complications[0], Complication)
assert d.complications[0].texte == "Fièvre"
def test_new_format_object_list(self):
"""Charger un nouveau JSON avec antecedents: [{texte: "HTA", source_page: 1}]."""
d = DossierMedical.model_validate({
"antecedents": [{"texte": "HTA", "source_page": 2, "source_excerpt": "contexte HTA"}],
"complications": [{"texte": "Fièvre", "source_page": 3}],
})
assert d.antecedents[0].texte == "HTA"
assert d.antecedents[0].source_page == 2
assert d.antecedents[0].source_excerpt == "contexte HTA"
assert d.complications[0].source_page == 3
def test_mixed_format(self):
"""Un mélange de strings et d'objets est converti correctement."""
d = DossierMedical.model_validate({
"antecedents": ["HTA", {"texte": "Diabète", "source_page": 1}],
})
assert len(d.antecedents) == 2
assert d.antecedents[0].texte == "HTA"
assert d.antecedents[0].source_page is None
assert d.antecedents[1].texte == "Diabète"
assert d.antecedents[1].source_page == 1
def test_empty_list(self):
d = DossierMedical.model_validate({"antecedents": [], "complications": []})
assert d.antecedents == []
assert d.complications == []
def test_antecedent_extraction_produces_objects(self):
"""L'extraction produit bien des objets Antecedent."""
parsed = {
"type": "crh",
"patient": {"sexe": "M"},
"sejour": {},
"diagnostics": [],
}
text = "Antécédents :\n- Diabète type 2\n- Hypertension artérielle\n\nHistoire de la maladie"
dossier = extract_medical_info(parsed, text)
assert len(dossier.antecedents) >= 1
assert all(isinstance(a, Antecedent) for a in dossier.antecedents)
textes = [a.texte for a in dossier.antecedents]
assert "Diabète type 2" in textes
def test_complication_extraction_produces_objects(self):
"""L'extraction produit bien des objets Complication."""
parsed = {
"type": "crh",
"patient": {"sexe": "M"},
"sejour": {},
"diagnostics": [],
}
text = "Patient avec fièvre post-opératoire."
dossier = extract_medical_info(parsed, text)
assert all(isinstance(c, Complication) for c in dossier.complications)
class TestSourceTrackingFields:
"""Tests que les champs source_page/source_excerpt existent sur les modèles."""
def test_biologie_source_fields(self):
from src.config import BiologieCle
b = BiologieCle(test="CRP", valeur="45", source_page=2, source_excerpt="CRP=45")
assert b.source_page == 2
assert b.source_excerpt == "CRP=45"
def test_imagerie_source_fields(self):
from src.config import Imagerie
i = Imagerie(type="TDM", source_page=3)
assert i.source_page == 3
def test_traitement_source_fields(self):
from src.config import Traitement
t = Traitement(medicament="Paracétamol", source_page=4)
assert t.source_page == 4
def test_acte_source_fields(self):
from src.config import ActeCCAM
a = ActeCCAM(texte="Cholécystectomie", source_page=5)
assert a.source_page == 5
def test_antecedent_source_fields(self):
a = Antecedent(texte="HTA", source_page=1, source_excerpt="Antécédents: HTA")
assert a.source_page == 1
def test_complication_source_fields(self):
c = Complication(texte="Fièvre", source_page=2)
assert c.source_page == 2

View File

@@ -143,3 +143,15 @@ class TestDetailPageLoads:
"""Un fichier inexistant retourne 404.""" """Un fichier inexistant retourne 404."""
response = client.get("/dossier/nonexistent.json") response = client.get("/dossier/nonexistent.json")
assert response.status_code == 404 assert response.status_code == 404
class TestSourceTextEndpoint:
def test_source_text_404_nonexistent(self, client):
"""Un dossier inexistant retourne 404."""
response = client.get("/api/source-text/nonexistent_dossier")
assert response.status_code == 404
def test_source_text_security_path_traversal(self, client):
"""Path traversal bloqué."""
response = client.get("/api/source-text/../../etc")
assert response.status_code in (403, 404)