feat: mode Validation DIM dans le viewer Flask

Permet aux médecins DIM de valider/corriger les codes CIM-10 extraits
par le pipeline pour construire un gold standard (50 dossiers).

- ValidationManager : gestion annotations JSON dans data/gold_standard/
- Script sélection 50 dossiers (25 CPAM + 25 stratifiés CMD/confiance)
- Routes /validation, /api/cim10/search, /api/validation/save, /validation/metrics
- Formulaire avec autocomplete CIM-10, boutons Correct/Modifier/Supprimer
- Dashboard métriques : precision, recall, F1, hallucination par confiance/source

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-02-17 21:43:02 +01:00
parent aad925ebea
commit dbc5bdbaf4
7 changed files with 1488 additions and 0 deletions

View File

@@ -0,0 +1,231 @@
#!/usr/bin/env python3
"""Sélectionne 50 dossiers pour le gold standard de validation DIM.
- 25 dossiers CPAM (cas complexes, déjà contrôlés)
- 25 dossiers non-CPAM stratifiés par CMD, confiance DP, nombre de DAS
Crée data/gold_standard/_selection.json et initialise les annotations vides.
"""
from __future__ import annotations
import json
import random
import sys
from pathlib import Path
# Ajouter le répertoire racine au path
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from src.config import STRUCTURED_DIR, BASE_DIR, DossierMedical
GOLD_DIR = BASE_DIR / "data" / "gold_standard"
TARGET_TOTAL = 50
TARGET_CPAM = 25
def load_all_dossiers() -> list[dict]:
"""Charge tous les dossiers fusionnés depuis output/structured/."""
dossiers = []
for subdir in sorted(STRUCTURED_DIR.iterdir()):
if not subdir.is_dir():
continue
# Chercher le fichier fusionné
fusionne = None
for f in subdir.glob("*fusionne*.json"):
fusionne = f
break
if not fusionne:
# Prendre le premier JSON du dossier
jsons = sorted(subdir.glob("*.json"))
if jsons:
fusionne = jsons[0]
if not fusionne:
continue
try:
data = json.loads(fusionne.read_text(encoding="utf-8"))
dossier = DossierMedical.model_validate(data)
rel_path = str(fusionne.relative_to(STRUCTURED_DIR))
group_name = subdir.name
dossiers.append({
"dossier_id": f"{group_name}/{fusionne.stem}",
"group_name": group_name,
"path_rel": rel_path,
"dossier": dossier,
})
except Exception as e:
print(f" Erreur chargement {fusionne.name}: {e}")
return dossiers
def select_dossiers(all_dossiers: list[dict]) -> list[dict]:
"""Sélectionne les 50 dossiers selon la stratégie définie."""
# Séparer CPAM / non-CPAM
cpam = [d for d in all_dossiers if d["dossier"].controles_cpam]
non_cpam = [d for d in all_dossiers if not d["dossier"].controles_cpam]
print(f"Dossiers CPAM disponibles : {len(cpam)}")
print(f"Dossiers non-CPAM disponibles : {len(non_cpam)}")
# Prendre tous les CPAM (ou max TARGET_CPAM)
selected_cpam = cpam[:TARGET_CPAM]
remaining_target = TARGET_TOTAL - len(selected_cpam)
# Stratifier les non-CPAM
selected_non_cpam = stratified_sample(non_cpam, remaining_target)
selected = selected_cpam + selected_non_cpam
print(f"\nSélection finale : {len(selected)} dossiers")
print(f" - CPAM : {len(selected_cpam)}")
print(f" - Non-CPAM : {len(selected_non_cpam)}")
return selected
def stratified_sample(dossiers: list[dict], n: int) -> list[dict]:
"""Échantillonnage stratifié par CMD, confiance DP et nombre de DAS."""
if len(dossiers) <= n:
return dossiers
# Grouper par CMD
by_cmd: dict[str, list[dict]] = {}
for d in dossiers:
ghm = d["dossier"].ghm_estimation
cmd = ghm.cmd if ghm else "inconnu"
by_cmd.setdefault(cmd or "inconnu", []).append(d)
selected = []
seen_ids = set()
# Phase 1 : 1 dossier par CMD (diversité maximale)
cmds = sorted(by_cmd.keys())
random.seed(42) # Reproductible
for cmd in cmds:
if len(selected) >= n:
break
candidates = by_cmd[cmd]
# Préférer un mix de confiances
random.shuffle(candidates)
d = candidates[0]
selected.append(d)
seen_ids.add(d["dossier_id"])
# Phase 2 : compléter avec diversité confiance DP
if len(selected) < n:
remaining = [d for d in dossiers if d["dossier_id"] not in seen_ids]
# Trier par confiance DP (low > medium > high pour surreprésenter les cas difficiles)
conf_order = {"low": 0, "medium": 1, "high": 2, None: 3}
remaining.sort(key=lambda d: (
conf_order.get(
d["dossier"].diagnostic_principal.cim10_confidence
if d["dossier"].diagnostic_principal else None,
3
),
-len(d["dossier"].diagnostics_associes), # beaucoup de DAS d'abord
))
for d in remaining:
if len(selected) >= n:
break
selected.append(d)
return selected[:n]
def create_empty_annotation(dossier_id: str, dossier: DossierMedical) -> dict:
"""Crée une annotation vide pour un dossier."""
dp = dossier.diagnostic_principal
das_list = []
for i, das in enumerate(dossier.diagnostics_associes):
das_list.append({
"index": i,
"texte_original": das.texte,
"code_pipeline": das.cim10_suggestion or "",
"confidence": das.cim10_confidence or "",
"source": das.source or "",
"statut": "correct",
"code_corrige": None,
"commentaire": "",
})
return {
"dossier_id": dossier_id,
"validateur": "",
"date_validation": "",
"statut": "non_commence",
"dp": {
"texte_original": dp.texte if dp else "",
"code_pipeline": dp.cim10_suggestion if dp else "",
"confidence": dp.cim10_confidence if dp else "",
"statut": "correct",
"code_corrige": None,
"commentaire": "",
},
"das": das_list,
"das_ajoutes": [],
"commentaire_general": "",
}
def main():
print("=== Sélection des dossiers pour validation DIM ===\n")
all_dossiers = load_all_dossiers()
print(f"Total dossiers chargés : {len(all_dossiers)}\n")
if not all_dossiers:
print("Aucun dossier trouvé dans output/structured/")
sys.exit(1)
selected = select_dossiers(all_dossiers)
# Créer le répertoire gold standard
GOLD_DIR.mkdir(parents=True, exist_ok=True)
# Sauvegarder la sélection
selection = {
"date_selection": __import__("datetime").datetime.now().isoformat(timespec="seconds"),
"total": len(selected),
"cpam": sum(1 for d in selected if d["dossier"].controles_cpam),
"non_cpam": sum(1 for d in selected if not d["dossier"].controles_cpam),
"dossiers": [d["dossier_id"] for d in selected],
}
selection_path = GOLD_DIR / "_selection.json"
selection_path.write_text(
json.dumps(selection, ensure_ascii=False, indent=2),
encoding="utf-8",
)
print(f"\nSélection sauvegardée : {selection_path}")
# Initialiser les annotations vides
created = 0
for d in selected:
dossier_id = d["dossier_id"]
safe_name = dossier_id.replace("/", "__") + ".json"
annot_path = GOLD_DIR / safe_name
if not annot_path.exists():
annotation = create_empty_annotation(dossier_id, d["dossier"])
annot_path.write_text(
json.dumps(annotation, ensure_ascii=False, indent=2),
encoding="utf-8",
)
created += 1
print(f"Annotations vides créées : {created}")
print(f"Annotations existantes préservées : {len(selected) - created}")
# Résumé
print(f"\n--- Résumé ---")
for i, d in enumerate(selected, 1):
dos = d["dossier"]
dp_code = dos.diagnostic_principal.cim10_suggestion if dos.diagnostic_principal else "?"
dp_conf = (dos.diagnostic_principal.cim10_confidence or "?") if dos.diagnostic_principal else "?"
n_das = len(dos.diagnostics_associes)
cpam_flag = " [CPAM]" if dos.controles_cpam else ""
ghm = dos.ghm_estimation
cmd = ghm.cmd if ghm else "?"
print(f" {i:2d}. {d['group_name']:<20s} DP={dp_code:<6s} conf={dp_conf:<7s} DAS={n_das:2d} CMD={cmd}{cpam_flag}")
if __name__ == "__main__":
main()

View File

@@ -22,6 +22,7 @@ from ..config import (
) )
from .. import config as cfg from .. import config as cfg
from .referentiels import ReferentielManager from .referentiels import ReferentielManager
from .validation import ValidationManager
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -539,4 +540,158 @@ def create_app() -> Flask:
logger.exception("Erreur lors du rebuild de l'index") logger.exception("Erreur lors du rebuild de l'index")
return jsonify({"error": str(e)}), 500 return jsonify({"error": str(e)}), 500
# ------------------------------------------------------------------
# Routes validation DIM
# ------------------------------------------------------------------
val_manager = ValidationManager()
@app.route("/validation")
def validation_list():
groups = scan_dossiers()
selection = val_manager.load_selection()
annotations = {a["dossier_id"]: a for a in val_manager.list_annotations()}
# Construire la liste enrichie
items = []
for dossier_id in selection:
annot = annotations.get(dossier_id, {})
# Trouver les données pipeline
parts = dossier_id.split("/")
group_name = parts[0] if parts else ""
group_items = groups.get(group_name, [])
pipeline = None
for gi in group_items:
if "fusionne" in gi["name"]:
pipeline = gi
break
if not pipeline and group_items:
pipeline = group_items[0]
d = pipeline["dossier"] if pipeline else None
items.append({
"dossier_id": dossier_id,
"group_name": group_name,
"dp_code": d.diagnostic_principal.cim10_suggestion if d and d.diagnostic_principal else "",
"dp_texte": d.diagnostic_principal.texte if d and d.diagnostic_principal else "",
"dp_confidence": d.diagnostic_principal.cim10_confidence if d and d.diagnostic_principal else "",
"nb_das": len(d.diagnostics_associes) if d else 0,
"has_cpam": bool(d and d.controles_cpam),
"statut": annot.get("statut", "non_commence"),
"validateur": annot.get("validateur", ""),
"date_validation": annot.get("date_validation", ""),
})
total = len(items)
valides = sum(1 for i in items if i["statut"] == "valide")
en_cours = sum(1 for i in items if i["statut"] == "en_cours")
return render_template(
"validation_list.html",
items=items,
total=total,
valides=valides,
en_cours=en_cours,
groups=groups,
)
@app.route("/validation/<path:dossier_id>")
def validation_detail(dossier_id: str):
groups = scan_dossiers()
# Charger l'annotation
annotation = val_manager.load_annotation(dossier_id)
if not annotation:
abort(404)
# Charger les données pipeline
parts = dossier_id.split("/")
group_name = parts[0] if parts else ""
group_items = groups.get(group_name, [])
pipeline = None
for gi in group_items:
if "fusionne" in gi["name"]:
pipeline = gi
break
if not pipeline and group_items:
pipeline = group_items[0]
dossier = pipeline["dossier"] if pipeline else None
# Navigation : dossier précédent / suivant
selection = val_manager.load_selection()
current_idx = selection.index(dossier_id) if dossier_id in selection else -1
prev_id = selection[current_idx - 1] if current_idx > 0 else None
next_id = selection[current_idx + 1] if current_idx < len(selection) - 1 else None
return render_template(
"validation_detail.html",
annotation=annotation,
dossier=dossier,
dossier_id=dossier_id,
group_name=group_name,
prev_id=prev_id,
next_id=next_id,
groups=groups,
)
@app.route("/api/validation/save", methods=["POST"])
def api_validation_save():
data = request.get_json(silent=True)
if not data or "dossier_id" not in data:
return jsonify({"error": "dossier_id requis"}), 400
dossier_id = data["dossier_id"]
# Vérifier que le dossier fait partie de la sélection
selection = val_manager.load_selection()
if selection and dossier_id not in selection:
return jsonify({"error": "Dossier non sélectionné pour validation"}), 403
try:
val_manager.save_annotation(dossier_id, data)
return jsonify({"ok": True})
except Exception as e:
logger.exception("Erreur sauvegarde annotation %s", dossier_id)
return jsonify({"error": str(e)}), 500
@app.route("/api/cim10/search")
def api_cim10_search():
from ..medical.cim10_dict import load_dict, normalize_text
q = request.args.get("q", "").strip()
if len(q) < 2:
return jsonify({"results": []})
cim10 = load_dict()
q_norm = normalize_text(q)
q_upper = q.upper().strip()
results = []
# Recherche par code exact d'abord
for code, label in cim10.items():
if code.upper().startswith(q_upper):
results.append({"code": code, "label": label})
if len(results) >= 20:
break
# Puis recherche par texte normalisé
if len(results) < 20:
for code, label in cim10.items():
if any(r["code"] == code for r in results):
continue
if q_norm in normalize_text(label):
results.append({"code": code, "label": label})
if len(results) >= 20:
break
return jsonify({"results": results})
@app.route("/validation/metrics")
def validation_metrics():
groups = scan_dossiers()
metrics = val_manager.compute_metrics(groups)
selection = val_manager.load_selection()
return render_template(
"validation_metrics.html",
metrics=metrics,
total_selection=len(selection),
groups=groups,
)
return app return app

View File

@@ -262,6 +262,10 @@
onmouseover="this.style.color='#f8fafc'" onmouseout="this.style.color='#cbd5e1'"> onmouseover="this.style.color='#f8fafc'" onmouseout="this.style.color='#cbd5e1'">
Référentiels RAG Référentiels RAG
</a> </a>
<a href="/validation" style="display:block;color:#fbbf24;text-decoration:none;font-size:0.8rem;font-weight:600;padding:0.35rem 0;transition:color 0.15s;"
onmouseover="this.style.color='#fde68a'" onmouseout="this.style.color='#fbbf24'">
Validation DIM
</a>
</div> </div>
<div class="sidebar-admin"> <div class="sidebar-admin">
<label for="model-select">Modèle Ollama</label> <label for="model-select">Modèle Ollama</label>

View File

@@ -0,0 +1,404 @@
{% extends "base.html" %}
{% block title %}Validation — {{ group_name }}{% endblock %}
{% block sidebar %}
{% for gn, items in groups.items() %}
<div class="group-title">{{ gn | format_dossier_name }}</div>
{% for item in items %}
{% if 'fusionne' in item.name %}
<a href="/dossier/{{ item.path_rel }}" class="sidebar-fusionne">&#9733; Fusionné</a>
{% else %}
<a href="/dossier/{{ item.path_rel }}">{{ item.name | format_doc_name }}</a>
{% endif %}
{% endfor %}
{% endfor %}
{% endblock %}
{% block content %}
<style>
.seg-btn { display:inline-block;padding:4px 12px;border:1px solid #cbd5e1;font-size:0.8rem;font-weight:600;cursor:pointer;background:#fff;color:#475569;transition:all 0.15s; }
.seg-btn:first-child { border-radius:6px 0 0 6px; }
.seg-btn:last-child { border-radius:0 6px 6px 0; }
.seg-btn:not(:first-child) { border-left:none; }
.seg-btn.active-correct { background:#dcfce7;color:#16a34a;border-color:#16a34a; }
.seg-btn.active-modifie { background:#fef9c3;color:#ca8a04;border-color:#ca8a04; }
.seg-btn.active-supprime { background:#fee2e2;color:#dc2626;border-color:#dc2626; }
.code-input { padding:4px 8px;border:1px solid #cbd5e1;border-radius:6px;font-size:0.85rem;font-family:monospace;width:120px; }
.comment-input { padding:4px 8px;border:1px solid #e2e8f0;border-radius:6px;font-size:0.8rem;width:100%;max-width:300px; }
.autocomplete-wrapper { position:relative;display:inline-block; }
.autocomplete-dropdown { position:absolute;top:100%;left:0;z-index:100;background:#fff;border:1px solid #cbd5e1;border-radius:6px;box-shadow:0 4px 12px rgba(0,0,0,0.1);max-height:250px;overflow-y:auto;width:400px;display:none; }
.autocomplete-dropdown .ac-item { padding:6px 10px;cursor:pointer;font-size:0.8rem;border-bottom:1px solid #f1f5f9; }
.autocomplete-dropdown .ac-item:hover { background:#f1f5f9; }
.autocomplete-dropdown .ac-code { font-family:monospace;font-weight:700;color:#1e293b;margin-right:8px; }
.autocomplete-dropdown .ac-label { color:#64748b; }
.save-bar { position:sticky;bottom:0;background:#fff;border-top:2px solid #e2e8f0;padding:1rem;display:flex;align-items:center;gap:1rem;z-index:50; }
.btn-save { padding:8px 20px;border-radius:8px;border:none;font-size:0.85rem;font-weight:600;cursor:pointer;transition:all 0.15s; }
.btn-brouillon { background:#f1f5f9;color:#475569; }
.btn-brouillon:hover { background:#e2e8f0; }
.btn-valider { background:#16a34a;color:#fff; }
.btn-valider:hover { background:#15803d; }
.nav-link { font-size:0.85rem;color:#3b82f6;text-decoration:none;font-weight:600; }
.nav-link:hover { text-decoration:underline; }
.das-row-added { background:#f0fdf4; }
</style>
<!-- Navigation -->
<div style="display:flex;align-items:center;gap:1rem;margin-bottom:1rem;">
<a href="/validation" class="back">Retour à la liste</a>
<span style="color:#cbd5e1;">|</span>
{% if prev_id %}
<a href="/validation/{{ prev_id }}" class="nav-link">Précédent</a>
{% endif %}
{% if next_id %}
<a href="/validation/{{ next_id }}" class="nav-link">Suivant</a>
{% endif %}
<span style="flex:1;"></span>
<a href="/dossier/{{ group_name }}/{{ group_name }}_fusionne_cim10.json" class="nav-link" target="_blank">Voir le dossier complet</a>
</div>
<h2>Validation : {{ group_name }}</h2>
{% if dossier %}
<!-- Infos séjour (lecture seule) -->
<div class="card" style="margin-bottom:1rem;">
<h3>Séjour</h3>
<div class="info-grid">
{% if dossier.sejour.sexe %}<div class="info-item"><label>Sexe</label><span>{{ dossier.sejour.sexe }}</span></div>{% endif %}
{% if dossier.sejour.age is not none %}<div class="info-item"><label>Âge</label><span>{{ dossier.sejour.age }} ans</span></div>{% endif %}
{% if dossier.sejour.date_entree %}<div class="info-item"><label>Entrée</label><span>{{ dossier.sejour.date_entree }}</span></div>{% endif %}
{% if dossier.sejour.date_sortie %}<div class="info-item"><label>Sortie</label><span>{{ dossier.sejour.date_sortie }}</span></div>{% endif %}
{% if dossier.sejour.duree_sejour is not none %}<div class="info-item"><label>Durée</label><span>{{ dossier.sejour.duree_sejour }}j</span></div>{% endif %}
</div>
</div>
<!-- DP -->
<div class="card" style="margin-bottom:1rem;">
<h3>Diagnostic Principal</h3>
{% set dp = annotation.dp %}
<div style="margin-bottom:0.75rem;">
<span style="font-size:0.9rem;">{{ dp.texte_original }}</span>
</div>
<div style="display:flex;align-items:center;gap:1rem;flex-wrap:wrap;">
<span style="font-family:monospace;font-size:1rem;font-weight:700;background:#dbeafe;color:#1d4ed8;padding:2px 10px;border-radius:6px;">{{ dp.code_pipeline }}</span>
{{ dp.confidence | confidence_badge }}
<!-- Boutons segmentés -->
<div class="seg-group" data-target="dp">
<span class="seg-btn active-correct" data-value="correct" onclick="setStatut(this)">Correct</span>
<span class="seg-btn" data-value="modifie" onclick="setStatut(this)">Modifier</span>
<span class="seg-btn" data-value="supprime" onclick="setStatut(this)">Supprimer</span>
</div>
<!-- Champ code alternatif -->
<div class="autocomplete-wrapper dp-code-field" style="display:none;">
<input type="text" class="code-input" placeholder="Code CIM-10" data-ac="dp"
value="{{ dp.code_corrige or '' }}" autocomplete="off">
<div class="autocomplete-dropdown"></div>
</div>
<input type="text" class="comment-input dp-comment" placeholder="Commentaire (optionnel)" value="{{ dp.commentaire or '' }}">
</div>
</div>
<!-- DAS -->
<div class="card" style="margin-bottom:1rem;">
<h3>Diagnostics Associés ({{ annotation.das|length }})</h3>
<table id="das-table">
<thead>
<tr>
<th style="width:30px;">#</th>
<th>Texte</th>
<th>Code pipeline</th>
<th>Conf.</th>
<th>Source</th>
<th>Validation</th>
<th>Code corrigé</th>
<th>Commentaire</th>
</tr>
</thead>
<tbody>
{% for das in annotation.das %}
<tr class="das-row" data-index="{{ das.index }}">
<td style="color:#94a3b8;font-size:0.8rem;">{{ loop.index }}</td>
<td style="font-size:0.85rem;max-width:250px;">{{ das.texte_original }}</td>
<td><span style="font-family:monospace;font-weight:600;">{{ das.code_pipeline }}</span></td>
<td>{{ das.confidence | confidence_badge }}</td>
<td>
{% if das.source %}
<span style="font-size:0.7rem;padding:2px 6px;border-radius:4px;background:#f1f5f9;color:#475569;">{{ das.source }}</span>
{% endif %}
</td>
<td>
<div class="seg-group" data-target="das-{{ das.index }}">
<span class="seg-btn {% if das.statut == 'correct' %}active-correct{% endif %}" data-value="correct" onclick="setStatut(this)">OK</span>
<span class="seg-btn {% if das.statut == 'modifie' %}active-modifie{% endif %}" data-value="modifie" onclick="setStatut(this)">Mod</span>
<span class="seg-btn {% if das.statut == 'supprime' %}active-supprime{% endif %}" data-value="supprime" onclick="setStatut(this)">Sup</span>
</div>
</td>
<td>
<div class="autocomplete-wrapper das-code-field-{{ das.index }}" style="{% if das.statut != 'modifie' %}display:none;{% endif %}">
<input type="text" class="code-input" placeholder="CIM-10" data-ac="das-{{ das.index }}"
value="{{ das.code_corrige or '' }}" autocomplete="off">
<div class="autocomplete-dropdown"></div>
</div>
</td>
<td>
<input type="text" class="comment-input das-comment" data-index="{{ das.index }}" placeholder="" value="{{ das.commentaire or '' }}">
</td>
</tr>
{% endfor %}
</tbody>
</table>
<!-- DAS ajoutés -->
<div id="das-ajoutes" style="margin-top:1rem;">
<h3 style="font-size:0.85rem;color:#16a34a;">DAS manquants (ajoutés par le DIM)</h3>
<div id="das-ajoutes-list">
{% for aj in annotation.das_ajoutes %}
<div class="das-added-row das-row-added" style="display:flex;align-items:center;gap:0.5rem;padding:0.5rem 0;border-bottom:1px solid #e2e8f0;" data-aj-index="{{ loop.index0 }}">
<input type="text" class="comment-input aj-texte" placeholder="Texte du diagnostic" value="{{ aj.texte or '' }}" style="flex:1;max-width:300px;">
<div class="autocomplete-wrapper">
<input type="text" class="code-input aj-code" placeholder="CIM-10" value="{{ aj.code or '' }}" autocomplete="off" data-ac="aj-{{ loop.index0 }}">
<div class="autocomplete-dropdown"></div>
</div>
<input type="text" class="comment-input aj-comment" placeholder="Commentaire" value="{{ aj.commentaire or '' }}" style="max-width:200px;">
<button onclick="removeAjoute(this)" style="background:none;border:none;color:#dc2626;cursor:pointer;font-size:1.1rem;padding:4px 8px;">&times;</button>
</div>
{% endfor %}
</div>
<button id="btn-add-das" onclick="addDasManquant()" style="margin-top:0.5rem;padding:6px 14px;border-radius:6px;border:1px dashed #16a34a;background:#f0fdf4;color:#16a34a;font-size:0.8rem;font-weight:600;cursor:pointer;">
+ Ajouter un DAS manquant
</button>
</div>
</div>
<!-- Commentaire général -->
<div class="card" style="margin-bottom:5rem;">
<h3>Commentaire général</h3>
<textarea id="commentaire-general" rows="3"
style="width:100%;padding:8px;border:1px solid #cbd5e1;border-radius:6px;font-size:0.85rem;resize:vertical;">{{ annotation.commentaire_general or '' }}</textarea>
</div>
{% endif %}
<!-- Barre de sauvegarde sticky -->
<div class="save-bar">
<button class="btn-save btn-brouillon" onclick="saveAnnotation('en_cours')">Enregistrer (brouillon)</button>
<button class="btn-save btn-valider" onclick="saveAnnotation('valide')">Marquer comme validé</button>
<span id="save-status" style="font-size:0.8rem;color:#64748b;"></span>
<span style="flex:1;"></span>
<span style="font-size:0.8rem;color:#94a3b8;">Dossier : {{ dossier_id }}</span>
</div>
{% endblock %}
{% block scripts %}
<script>
const DOSSIER_ID = {{ dossier_id | tojson }};
let ajouteCounter = {{ annotation.das_ajoutes|length }};
// --- Boutons segmentés ---
function setStatut(btn) {
const group = btn.parentElement;
const target = group.dataset.target;
const value = btn.dataset.value;
// Reset tous les boutons du groupe
group.querySelectorAll('.seg-btn').forEach(function(b) {
b.className = 'seg-btn';
});
btn.classList.add('active-' + value);
// Afficher/cacher le champ code
let codeField;
if (target === 'dp') {
codeField = document.querySelector('.dp-code-field');
} else {
const idx = target.replace('das-', '');
codeField = document.querySelector('.das-code-field-' + idx);
}
if (codeField) {
codeField.style.display = (value === 'modifie') ? '' : 'none';
}
}
// --- Autocomplete CIM-10 ---
let acDebounceTimer = null;
document.addEventListener('input', function(e) {
if (!e.target.matches('[data-ac]') && !e.target.matches('.aj-code')) return;
const input = e.target;
const dropdown = input.parentElement.querySelector('.autocomplete-dropdown');
const q = input.value.trim();
if (q.length < 2) {
dropdown.style.display = 'none';
return;
}
clearTimeout(acDebounceTimer);
acDebounceTimer = setTimeout(function() {
fetch('/api/cim10/search?q=' + encodeURIComponent(q))
.then(function(r) { return r.json(); })
.then(function(data) {
if (!data.results || !data.results.length) {
dropdown.style.display = 'none';
return;
}
dropdown.innerHTML = '';
data.results.forEach(function(item) {
const div = document.createElement('div');
div.className = 'ac-item';
div.innerHTML = '<span class="ac-code">' + item.code + '</span><span class="ac-label">' + item.label.substring(0, 80) + '</span>';
div.addEventListener('click', function() {
input.value = item.code;
dropdown.style.display = 'none';
});
dropdown.appendChild(div);
});
dropdown.style.display = 'block';
})
.catch(function() { dropdown.style.display = 'none'; });
}, 300);
});
// Fermer les dropdowns quand on clique ailleurs
document.addEventListener('click', function(e) {
if (!e.target.matches('[data-ac]') && !e.target.matches('.aj-code') && !e.target.closest('.autocomplete-dropdown')) {
document.querySelectorAll('.autocomplete-dropdown').forEach(function(d) { d.style.display = 'none'; });
}
});
// --- Ajouter DAS manquant ---
function addDasManquant() {
const list = document.getElementById('das-ajoutes-list');
const idx = ajouteCounter++;
const row = document.createElement('div');
row.className = 'das-added-row das-row-added';
row.style.cssText = 'display:flex;align-items:center;gap:0.5rem;padding:0.5rem 0;border-bottom:1px solid #e2e8f0;';
row.dataset.ajIndex = idx;
row.innerHTML = '<input type="text" class="comment-input aj-texte" placeholder="Texte du diagnostic" style="flex:1;max-width:300px;">' +
'<div class="autocomplete-wrapper">' +
'<input type="text" class="code-input aj-code" placeholder="CIM-10" autocomplete="off" data-ac="aj-' + idx + '">' +
'<div class="autocomplete-dropdown"></div>' +
'</div>' +
'<input type="text" class="comment-input aj-comment" placeholder="Commentaire" style="max-width:200px;">' +
'<button onclick="removeAjoute(this)" style="background:none;border:none;color:#dc2626;cursor:pointer;font-size:1.1rem;padding:4px 8px;">&times;</button>';
list.appendChild(row);
}
function removeAjoute(btn) {
btn.closest('.das-added-row').remove();
}
// --- Collecte et sauvegarde ---
function collectAnnotation(statut) {
const validateur = document.getElementById('validateur-name');
const valName = validateur ? validateur.value : (localStorage.getItem('t2a_validateur') || '');
// DP
const dpGroup = document.querySelector('.seg-group[data-target="dp"]');
const dpActive = dpGroup ? dpGroup.querySelector('.seg-btn[class*="active-"]') : null;
let dpStatut = 'correct';
if (dpActive) {
if (dpActive.classList.contains('active-modifie')) dpStatut = 'modifie';
else if (dpActive.classList.contains('active-supprime')) dpStatut = 'supprime';
}
const dpCodeField = document.querySelector('.dp-code-field input');
const dpComment = document.querySelector('.dp-comment');
const dp = {
texte_original: {{ (annotation.dp.texte_original or '') | tojson }},
code_pipeline: {{ (annotation.dp.code_pipeline or '') | tojson }},
confidence: {{ (annotation.dp.confidence or '') | tojson }},
statut: dpStatut,
code_corrige: (dpStatut === 'modifie' && dpCodeField) ? dpCodeField.value : null,
commentaire: dpComment ? dpComment.value : ''
};
// DAS
const dasRows = document.querySelectorAll('.das-row');
const das = [];
dasRows.forEach(function(row) {
const idx = parseInt(row.dataset.index);
const group = row.querySelector('.seg-group');
const active = group ? group.querySelector('.seg-btn[class*="active-"]') : null;
let dasStatut = 'correct';
if (active) {
if (active.classList.contains('active-modifie')) dasStatut = 'modifie';
else if (active.classList.contains('active-supprime')) dasStatut = 'supprime';
}
const codeInput = row.querySelector('.code-input');
const commentInput = row.querySelector('.das-comment');
das.push({
index: idx,
texte_original: row.querySelector('td:nth-child(2)').textContent.trim(),
code_pipeline: row.querySelector('td:nth-child(3)').textContent.trim(),
confidence: {{ annotation.das | tojson }}.find(function(d) { return d.index === idx; })?.confidence || '',
source: {{ annotation.das | tojson }}.find(function(d) { return d.index === idx; })?.source || '',
statut: dasStatut,
code_corrige: (dasStatut === 'modifie' && codeInput) ? codeInput.value : null,
commentaire: commentInput ? commentInput.value : ''
});
});
// DAS ajoutés
const ajRows = document.querySelectorAll('.das-added-row');
const das_ajoutes = [];
ajRows.forEach(function(row) {
const texte = row.querySelector('.aj-texte').value.trim();
const code = row.querySelector('.aj-code').value.trim();
const comment = row.querySelector('.aj-comment').value.trim();
if (texte || code) {
das_ajoutes.push({ texte: texte, code: code, commentaire: comment });
}
});
return {
dossier_id: DOSSIER_ID,
validateur: valName,
statut: statut,
dp: dp,
das: das,
das_ajoutes: das_ajoutes,
commentaire_general: document.getElementById('commentaire-general').value
};
}
function saveAnnotation(statut) {
const data = collectAnnotation(statut);
const statusEl = document.getElementById('save-status');
statusEl.textContent = 'Sauvegarde...';
statusEl.style.color = '#64748b';
fetch('/api/validation/save', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(data)
})
.then(function(r) { return r.json(); })
.then(function(d) {
if (d.ok) {
statusEl.textContent = statut === 'valide' ? 'Validé !' : 'Brouillon enregistré';
statusEl.style.color = '#16a34a';
} else {
statusEl.textContent = d.error || 'Erreur';
statusEl.style.color = '#dc2626';
}
})
.catch(function() {
statusEl.textContent = 'Erreur réseau';
statusEl.style.color = '#dc2626';
});
}
// Raccourci clavier : Ctrl+S pour sauvegarder en brouillon
document.addEventListener('keydown', function(e) {
if ((e.ctrlKey || e.metaKey) && e.key === 's') {
e.preventDefault();
saveAnnotation('en_cours');
}
});
</script>
{% endblock %}

View File

@@ -0,0 +1,179 @@
{% extends "base.html" %}
{% block title %}Validation DIM{% endblock %}
{% block sidebar %}
{% for group_name, items in groups.items() %}
<div class="group-title">{{ group_name | format_dossier_name }}</div>
{% for item in items %}
{% if 'fusionne' in item.name %}
<a href="/dossier/{{ item.path_rel }}" class="sidebar-fusionne">&#9733; Fusionné</a>
{% else %}
<a href="/dossier/{{ item.path_rel }}">{{ item.name | format_doc_name }}</a>
{% endif %}
{% endfor %}
{% endfor %}
{% endblock %}
{% block content %}
<div style="display:flex;align-items:center;gap:1rem;margin-bottom:1.5rem;">
<h2 style="margin:0;">Validation DIM</h2>
<a href="/validation/metrics" style="font-size:0.85rem;color:#3b82f6;text-decoration:none;font-weight:600;">Voir les métriques</a>
</div>
<!-- Barre de progression -->
<div class="card" style="margin-bottom:1.5rem;">
<div style="display:flex;align-items:center;gap:1rem;margin-bottom:0.75rem;">
<span style="font-weight:600;font-size:0.9rem;">Progression</span>
<span style="font-size:0.85rem;color:#64748b;">{{ valides }} / {{ total }} validés</span>
{% if en_cours > 0 %}
<span style="font-size:0.85rem;color:#ca8a04;">{{ en_cours }} en cours</span>
{% endif %}
</div>
<div style="background:#e2e8f0;border-radius:9999px;height:12px;overflow:hidden;">
{% set pct_valide = (valides / total * 100) if total > 0 else 0 %}
{% set pct_encours = (en_cours / total * 100) if total > 0 else 0 %}
<div style="display:flex;height:100%;">
<div style="width:{{ pct_valide }}%;background:#16a34a;transition:width 0.3s;"></div>
<div style="width:{{ pct_encours }}%;background:#eab308;transition:width 0.3s;"></div>
</div>
</div>
</div>
<!-- Nom du validateur -->
<div class="card" style="margin-bottom:1rem;padding:0.75rem 1.25rem;">
<div style="display:flex;align-items:center;gap:0.75rem;">
<label for="validateur-name" style="font-size:0.8rem;font-weight:600;color:#475569;white-space:nowrap;">Nom du validateur :</label>
<input type="text" id="validateur-name" placeholder="Dr. X"
style="flex:1;padding:0.35rem 0.6rem;border:1px solid #cbd5e1;border-radius:6px;font-size:0.85rem;max-width:300px;">
</div>
</div>
<!-- Filtres -->
<div style="display:flex;gap:0.5rem;margin-bottom:1rem;">
<button class="filter-btn active" data-filter="all"
style="padding:0.35rem 0.75rem;border-radius:6px;border:1px solid #cbd5e1;background:#fff;font-size:0.8rem;font-weight:600;cursor:pointer;">
Tous ({{ total }})
</button>
<button class="filter-btn" data-filter="non_commence"
style="padding:0.35rem 0.75rem;border-radius:6px;border:1px solid #cbd5e1;background:#fff;font-size:0.8rem;font-weight:600;cursor:pointer;">
Non commencés ({{ total - valides - en_cours }})
</button>
<button class="filter-btn" data-filter="en_cours"
style="padding:0.35rem 0.75rem;border-radius:6px;border:1px solid #cbd5e1;background:#fff;font-size:0.8rem;font-weight:600;cursor:pointer;">
En cours ({{ en_cours }})
</button>
<button class="filter-btn" data-filter="valide"
style="padding:0.35rem 0.75rem;border-radius:6px;border:1px solid #cbd5e1;background:#fff;font-size:0.8rem;font-weight:600;cursor:pointer;">
Validés ({{ valides }})
</button>
</div>
<!-- Tableau -->
<div class="card" style="padding:0;">
<table>
<thead>
<tr>
<th>#</th>
<th>Dossier</th>
<th>DP</th>
<th>Conf. DP</th>
<th>DAS</th>
<th>CPAM</th>
<th>Statut</th>
<th>Validateur</th>
</tr>
</thead>
<tbody>
{% for item in items %}
<tr class="validation-row" data-statut="{{ item.statut }}">
<td style="color:#94a3b8;font-size:0.8rem;">{{ loop.index }}</td>
<td>
<a href="/validation/{{ item.dossier_id }}" style="color:#1e293b;text-decoration:none;font-weight:600;font-size:0.85rem;">
{{ item.group_name }}
</a>
</td>
<td>
<span style="font-family:monospace;font-size:0.85rem;font-weight:600;">{{ item.dp_code or '—' }}</span>
{% if item.dp_texte %}
<br><span style="font-size:0.75rem;color:#64748b;">{{ item.dp_texte[:50] }}{% if item.dp_texte|length > 50 %}…{% endif %}</span>
{% endif %}
</td>
<td>{{ item.dp_confidence | confidence_badge }}</td>
<td style="text-align:center;font-weight:600;">{{ item.nb_das }}</td>
<td style="text-align:center;">
{% if item.has_cpam %}
<span style="display:inline-block;padding:2px 8px;border-radius:9999px;font-size:0.7rem;font-weight:600;color:#7c3aed;background:#ede9fe;">CPAM</span>
{% endif %}
</td>
<td>
{% if item.statut == 'valide' %}
<span style="display:inline-block;padding:2px 8px;border-radius:9999px;font-size:0.7rem;font-weight:600;color:#16a34a;background:#dcfce7;">Validé</span>
{% elif item.statut == 'en_cours' %}
<span style="display:inline-block;padding:2px 8px;border-radius:9999px;font-size:0.7rem;font-weight:600;color:#ca8a04;background:#fef9c3;">En cours</span>
{% else %}
<span style="display:inline-block;padding:2px 8px;border-radius:9999px;font-size:0.7rem;font-weight:600;color:#6b7280;background:#f3f4f6;">Non commencé</span>
{% endif %}
</td>
<td style="font-size:0.8rem;color:#64748b;">{{ item.validateur }}</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
{% if not items %}
<div class="card" style="text-align:center;padding:2rem;">
<p style="color:#64748b;font-size:0.9rem;">Aucun dossier sélectionné pour validation.</p>
<p style="margin-top:0.5rem;font-size:0.85rem;color:#94a3b8;">
Lancez <code>python scripts/select_validation_dossiers.py</code> pour sélectionner les dossiers.
</p>
</div>
{% endif %}
{% endblock %}
{% block scripts %}
<script>
(function() {
// Persistance nom validateur dans localStorage
const input = document.getElementById('validateur-name');
if (input) {
const saved = localStorage.getItem('t2a_validateur');
if (saved) input.value = saved;
input.addEventListener('input', function() {
localStorage.setItem('t2a_validateur', this.value);
});
}
// Filtres
document.querySelectorAll('.filter-btn').forEach(function(btn) {
btn.addEventListener('click', function() {
document.querySelectorAll('.filter-btn').forEach(function(b) {
b.classList.remove('active');
b.style.background = '#fff';
b.style.color = '#1e293b';
});
this.classList.add('active');
this.style.background = '#1e293b';
this.style.color = '#fff';
const filter = this.dataset.filter;
document.querySelectorAll('.validation-row').forEach(function(row) {
if (filter === 'all' || row.dataset.statut === filter) {
row.style.display = '';
} else {
row.style.display = 'none';
}
});
});
});
// Activer le style du premier bouton
const firstBtn = document.querySelector('.filter-btn.active');
if (firstBtn) {
firstBtn.style.background = '#1e293b';
firstBtn.style.color = '#fff';
}
})();
</script>
{% endblock %}

View File

@@ -0,0 +1,243 @@
{% extends "base.html" %}
{% block title %}Métriques Validation DIM{% endblock %}
{% block sidebar %}
{% for group_name, items in groups.items() %}
<div class="group-title">{{ group_name | format_dossier_name }}</div>
{% for item in items %}
{% if 'fusionne' in item.name %}
<a href="/dossier/{{ item.path_rel }}" class="sidebar-fusionne">&#9733; Fusionné</a>
{% else %}
<a href="/dossier/{{ item.path_rel }}">{{ item.name | format_doc_name }}</a>
{% endif %}
{% endfor %}
{% endfor %}
{% endblock %}
{% block content %}
<div style="display:flex;align-items:center;gap:1rem;margin-bottom:1.5rem;">
<a href="/validation" class="back">Retour à la liste</a>
<h2 style="margin:0;">Métriques de qualité</h2>
</div>
{% if metrics.total_valides == 0 %}
<div class="card" style="text-align:center;padding:2rem;">
<p style="color:#64748b;font-size:0.9rem;">Aucun dossier validé pour le moment.</p>
<p style="margin-top:0.5rem;font-size:0.85rem;color:#94a3b8;">
Validez des dossiers depuis la <a href="/validation">liste de validation</a> pour voir les métriques.
</p>
</div>
{% else %}
<!-- Progression -->
<div class="card" style="margin-bottom:1.5rem;">
<div style="display:flex;align-items:center;gap:1rem;margin-bottom:0.75rem;">
<span style="font-weight:600;font-size:0.9rem;">Progression</span>
<span style="font-size:0.85rem;color:#64748b;">{{ metrics.total_valides }} / {{ total_selection }} dossiers validés</span>
</div>
<div style="background:#e2e8f0;border-radius:9999px;height:12px;overflow:hidden;">
{% set pct = (metrics.total_valides / total_selection * 100) if total_selection > 0 else 0 %}
<div style="width:{{ pct }}%;background:#16a34a;transition:width 0.3s;height:100%;"></div>
</div>
</div>
<!-- Métriques DP + DAS côte à côte -->
<div style="display:grid;grid-template-columns:1fr 1fr;gap:1rem;margin-bottom:1.5rem;">
<!-- DP -->
<div class="card">
<h3>Diagnostic Principal</h3>
<div class="info-grid" style="margin-top:0.75rem;">
<div class="info-item">
<label>Accuracy</label>
<span style="font-size:1.3rem;font-weight:700;color:#16a34a;">{{ "%.1f" | format(metrics.dp.accuracy * 100) }}%</span>
</div>
<div class="info-item">
<label>Correct</label>
<span>{{ metrics.dp.correct }} / {{ metrics.dp.total }}</span>
</div>
<div class="info-item">
<label>Modifié</label>
<span style="color:#ca8a04;">{{ metrics.dp.modifie }}</span>
</div>
<div class="info-item">
<label>Supprimé</label>
<span style="color:#dc2626;">{{ metrics.dp.supprime }}</span>
</div>
</div>
<!-- Barre visuelle -->
<div style="margin-top:1rem;display:flex;height:20px;border-radius:6px;overflow:hidden;">
{% set dp_t = metrics.dp.total or 1 %}
<div style="width:{{ metrics.dp.correct / dp_t * 100 }}%;background:#16a34a;" title="Correct"></div>
<div style="width:{{ metrics.dp.modifie / dp_t * 100 }}%;background:#eab308;" title="Modifié"></div>
<div style="width:{{ metrics.dp.supprime / dp_t * 100 }}%;background:#dc2626;" title="Supprimé"></div>
</div>
<div style="display:flex;gap:1rem;margin-top:0.35rem;font-size:0.7rem;color:#64748b;">
<span><span style="display:inline-block;width:8px;height:8px;border-radius:50%;background:#16a34a;margin-right:3px;"></span>Correct</span>
<span><span style="display:inline-block;width:8px;height:8px;border-radius:50%;background:#eab308;margin-right:3px;"></span>Modifié</span>
<span><span style="display:inline-block;width:8px;height:8px;border-radius:50%;background:#dc2626;margin-right:3px;"></span>Supprimé</span>
</div>
</div>
<!-- DAS -->
<div class="card">
<h3>Diagnostics Associés</h3>
<div class="info-grid" style="margin-top:0.75rem;">
<div class="info-item">
<label>Precision</label>
<span style="font-size:1.3rem;font-weight:700;color:#1d4ed8;">{{ "%.1f" | format(metrics.das.precision * 100) }}%</span>
</div>
<div class="info-item">
<label>Recall</label>
<span style="font-size:1.3rem;font-weight:700;color:#7c3aed;">{{ "%.1f" | format(metrics.das.recall * 100) }}%</span>
</div>
<div class="info-item">
<label>F1-score</label>
<span style="font-size:1.3rem;font-weight:700;color:#0f172a;">{{ "%.1f" | format(metrics.das.f1 * 100) }}%</span>
</div>
<div class="info-item">
<label>Hallucination</label>
<span style="font-size:1.3rem;font-weight:700;color:#dc2626;">{{ "%.1f" | format(metrics.das.hallucination_rate * 100) }}%</span>
</div>
</div>
<div style="margin-top:0.75rem;font-size:0.8rem;color:#64748b;">
<div style="display:flex;gap:1.5rem;">
<span>Pipeline : {{ metrics.das.total_pipeline }} DAS</span>
<span>Référence DIM : {{ metrics.das.reference }}</span>
<span style="color:#16a34a;">Correct : {{ metrics.das.correct }}</span>
<span style="color:#ca8a04;">Modifié : {{ metrics.das.modifie }}</span>
<span style="color:#dc2626;">Supprimé : {{ metrics.das.supprime }}</span>
<span style="color:#7c3aed;">Ajouté : {{ metrics.das.ajoutes }}</span>
</div>
</div>
<!-- Taux manqués -->
<div style="margin-top:0.5rem;font-size:0.8rem;">
<span style="color:#64748b;">Taux DAS manqués : </span>
<span style="font-weight:600;color:#7c3aed;">{{ "%.1f" | format(metrics.das.miss_rate * 100) }}%</span>
<span style="color:#94a3b8;font-size:0.75rem;"> ({{ metrics.das.ajoutes }} ajoutés / {{ metrics.das.reference }} référence)</span>
</div>
</div>
</div>
<!-- Ventilation par confiance -->
{% if metrics.by_confidence %}
<div class="card" style="margin-bottom:1rem;">
<h3>Par niveau de confiance</h3>
<table style="margin-top:0.75rem;">
<thead>
<tr>
<th>Confiance</th>
<th>Total DAS</th>
<th>Correct</th>
<th>Modifié</th>
<th>Supprimé</th>
<th>Precision</th>
<th>Hallucination</th>
</tr>
</thead>
<tbody>
{% for conf, bucket in metrics.by_confidence.items() %}
<tr>
<td>{{ conf | confidence_badge }}</td>
<td>{{ bucket.total }}</td>
<td style="color:#16a34a;">{{ bucket.correct }}</td>
<td style="color:#ca8a04;">{{ bucket.modifie }}</td>
<td style="color:#dc2626;">{{ bucket.supprime }}</td>
<td style="font-weight:600;">{{ "%.1f" | format(bucket.precision * 100) }}%</td>
<td style="font-weight:600;color:#dc2626;">{{ "%.1f" | format(bucket.hallucination * 100) }}%</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
{% endif %}
<!-- Ventilation par source -->
{% if metrics.by_source %}
<div class="card" style="margin-bottom:1rem;">
<h3>Par source d'extraction</h3>
<table style="margin-top:0.75rem;">
<thead>
<tr>
<th>Source</th>
<th>Total DAS</th>
<th>Correct</th>
<th>Modifié</th>
<th>Supprimé</th>
<th>Precision</th>
<th>Hallucination</th>
</tr>
</thead>
<tbody>
{% for source, bucket in metrics.by_source.items() %}
<tr>
<td><span style="font-size:0.8rem;padding:2px 8px;border-radius:4px;background:#f1f5f9;font-weight:600;">{{ source }}</span></td>
<td>{{ bucket.total }}</td>
<td style="color:#16a34a;">{{ bucket.correct }}</td>
<td style="color:#ca8a04;">{{ bucket.modifie }}</td>
<td style="color:#dc2626;">{{ bucket.supprime }}</td>
<td style="font-weight:600;">{{ "%.1f" | format(bucket.precision * 100) }}%</td>
<td style="font-weight:600;color:#dc2626;">{{ "%.1f" | format(bucket.hallucination * 100) }}%</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
{% endif %}
<!-- Top corrections DAS -->
{% if metrics.top_corrections %}
<div class="card" style="margin-bottom:1rem;">
<h3>Top corrections DAS (code pipeline &rarr; code DIM)</h3>
<table style="margin-top:0.75rem;">
<thead>
<tr>
<th>Code pipeline</th>
<th></th>
<th>Code corrigé</th>
<th>Occurrences</th>
</tr>
</thead>
<tbody>
{% for (code_from, code_to), count in metrics.top_corrections %}
<tr>
<td><span style="font-family:monospace;font-weight:600;color:#dc2626;">{{ code_from }}</span></td>
<td style="color:#94a3b8;">&rarr;</td>
<td><span style="font-family:monospace;font-weight:600;color:#16a34a;">{{ code_to }}</span></td>
<td>{{ count }}</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
{% endif %}
<!-- Top corrections DP -->
{% if metrics.dp_corrections %}
<div class="card" style="margin-bottom:1rem;">
<h3>Top corrections DP (code pipeline &rarr; code DIM)</h3>
<table style="margin-top:0.75rem;">
<thead>
<tr>
<th>Code pipeline</th>
<th></th>
<th>Code corrigé</th>
<th>Occurrences</th>
</tr>
</thead>
<tbody>
{% for (code_from, code_to), count in metrics.dp_corrections %}
<tr>
<td><span style="font-family:monospace;font-weight:600;color:#dc2626;">{{ code_from }}</span></td>
<td style="color:#94a3b8;">&rarr;</td>
<td><span style="font-family:monospace;font-weight:600;color:#16a34a;">{{ code_to }}</span></td>
<td>{{ count }}</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
{% endif %}
{% endif %}
{% endblock %}

272
src/viewer/validation.py Normal file
View File

@@ -0,0 +1,272 @@
"""Gestionnaire de données pour la validation DIM (gold standard)."""
from __future__ import annotations
import json
import logging
import os
import tempfile
from datetime import datetime
from pathlib import Path
from ..config import BASE_DIR
logger = logging.getLogger(__name__)
GOLD_DIR = BASE_DIR / "data" / "gold_standard"
class ValidationManager:
"""Gère les annotations de validation DIM (fichiers JSON par dossier)."""
def __init__(self, gold_dir: Path | None = None):
self.gold_dir = gold_dir or GOLD_DIR
self.gold_dir.mkdir(parents=True, exist_ok=True)
# ------------------------------------------------------------------
# Helpers
# ------------------------------------------------------------------
def _annotation_path(self, dossier_id: str) -> Path:
"""Chemin du fichier annotation pour un dossier donné."""
safe_name = dossier_id.replace("/", "__") + ".json"
return self.gold_dir / safe_name
# ------------------------------------------------------------------
# CRUD
# ------------------------------------------------------------------
def load_annotation(self, dossier_id: str) -> dict | None:
"""Charge l'annotation existante pour un dossier."""
path = self._annotation_path(dossier_id)
if not path.exists():
return None
try:
return json.loads(path.read_text(encoding="utf-8"))
except Exception:
logger.warning("Impossible de charger l'annotation %s", path)
return None
def save_annotation(self, dossier_id: str, data: dict) -> None:
"""Sauvegarde atomique (write temp + rename) d'une annotation."""
path = self._annotation_path(dossier_id)
data["dossier_id"] = dossier_id
data.setdefault("date_validation", datetime.now().isoformat(timespec="seconds"))
fd, tmp_path = tempfile.mkstemp(
dir=str(self.gold_dir), suffix=".tmp", prefix=".annot_"
)
try:
with os.fdopen(fd, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
os.replace(tmp_path, path)
except Exception:
# Nettoyage en cas d'erreur
try:
os.unlink(tmp_path)
except OSError:
pass
raise
def list_annotations(self) -> list[dict]:
"""Liste toutes les annotations avec métadonnées (statut, validateur, date)."""
results = []
for path in sorted(self.gold_dir.glob("*.json")):
if path.name.startswith("_"):
continue
try:
data = json.loads(path.read_text(encoding="utf-8"))
results.append({
"dossier_id": data.get("dossier_id", path.stem.replace("__", "/")),
"statut": data.get("statut", "non_commence"),
"validateur": data.get("validateur", ""),
"date_validation": data.get("date_validation", ""),
})
except Exception:
logger.warning("Annotation illisible : %s", path)
return results
def load_selection(self) -> list[str]:
"""Charge la liste des dossiers sélectionnés depuis _selection.json."""
selection_path = self.gold_dir / "_selection.json"
if not selection_path.exists():
return []
try:
data = json.loads(selection_path.read_text(encoding="utf-8"))
return data.get("dossiers", [])
except Exception:
return []
# ------------------------------------------------------------------
# Métriques
# ------------------------------------------------------------------
def compute_metrics(self, groups: dict) -> dict:
"""Calcule precision, recall, F1 et hallucination rate depuis les annotations.
Args:
groups: résultat de scan_dossiers() pour accéder aux données pipeline.
Returns:
Dictionnaire de métriques globales et ventilées.
"""
annotations = []
for path in sorted(self.gold_dir.glob("*.json")):
if path.name.startswith("_"):
continue
try:
data = json.loads(path.read_text(encoding="utf-8"))
if data.get("statut") == "valide":
annotations.append(data)
except Exception:
continue
total = len(annotations)
if total == 0:
return {"total_valides": 0}
# --- Métriques DP ---
dp_correct = 0
dp_modifie = 0
dp_supprime = 0
dp_corrections: list[tuple[str, str]] = [] # (pipeline, corrige)
# --- Métriques DAS ---
das_correct = 0
das_modifie = 0
das_supprime = 0
das_ajoutes = 0
das_total_pipeline = 0
das_corrections: list[tuple[str, str]] = []
# --- Ventilation par confiance ---
by_confidence: dict[str, dict] = {}
# --- Ventilation par source ---
by_source: dict[str, dict] = {}
for annot in annotations:
dossier_id = annot.get("dossier_id", "")
# Trouver les données pipeline correspondantes
pipeline_data = self._find_pipeline_data(dossier_id, groups)
# DP
dp = annot.get("dp", {})
dp_statut = dp.get("statut", "correct")
if dp_statut == "correct":
dp_correct += 1
elif dp_statut == "modifie":
dp_modifie += 1
code_orig = dp.get("code_pipeline", "")
code_corr = dp.get("code_corrige", "")
if code_orig and code_corr:
dp_corrections.append((code_orig, code_corr))
elif dp_statut == "supprime":
dp_supprime += 1
# DAS
das_list = annot.get("das", [])
das_aj = annot.get("das_ajoutes", [])
das_total_pipeline += len(das_list)
das_ajoutes += len(das_aj)
for das in das_list:
das_statut = das.get("statut", "correct")
conf = das.get("confidence", "")
source = das.get("source", "")
if das_statut == "correct":
das_correct += 1
elif das_statut == "modifie":
das_modifie += 1
code_orig = das.get("code_pipeline", "")
code_corr = das.get("code_corrige", "")
if code_orig and code_corr:
das_corrections.append((code_orig, code_corr))
elif das_statut == "supprime":
das_supprime += 1
# Ventilation par confiance
if conf:
bucket = by_confidence.setdefault(conf, {
"correct": 0, "modifie": 0, "supprime": 0, "total": 0
})
bucket["total"] += 1
bucket[das_statut] = bucket.get(das_statut, 0) + 1
# Ventilation par source
if source:
bucket = by_source.setdefault(source, {
"correct": 0, "modifie": 0, "supprime": 0, "total": 0
})
bucket["total"] += 1
bucket[das_statut] = bucket.get(das_statut, 0) + 1
# --- Calculs ---
# DAS reference = correct + modifié + ajoutés (les vrais DAS selon le DIM)
das_reference = das_correct + das_modifie + das_ajoutes
# DAS pipeline valides = correct + modifié (non supprimés)
das_pipeline_valides = das_correct + das_modifie
precision = das_pipeline_valides / das_total_pipeline if das_total_pipeline > 0 else 0
recall = das_pipeline_valides / das_reference if das_reference > 0 else 0
f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
hallucination_rate = das_supprime / das_total_pipeline if das_total_pipeline > 0 else 0
miss_rate = das_ajoutes / das_reference if das_reference > 0 else 0
# Top corrections DAS
from collections import Counter
correction_counter = Counter(das_corrections)
top_corrections = correction_counter.most_common(10)
# Ventilation par confiance : calculer precision par bucket
for bucket in by_confidence.values():
t = bucket["total"]
valides = bucket.get("correct", 0) + bucket.get("modifie", 0)
bucket["precision"] = valides / t if t > 0 else 0
bucket["hallucination"] = bucket.get("supprime", 0) / t if t > 0 else 0
for bucket in by_source.values():
t = bucket["total"]
valides = bucket.get("correct", 0) + bucket.get("modifie", 0)
bucket["precision"] = valides / t if t > 0 else 0
bucket["hallucination"] = bucket.get("supprime", 0) / t if t > 0 else 0
return {
"total_valides": total,
"dp": {
"total": total,
"correct": dp_correct,
"modifie": dp_modifie,
"supprime": dp_supprime,
"accuracy": dp_correct / total if total > 0 else 0,
},
"das": {
"total_pipeline": das_total_pipeline,
"correct": das_correct,
"modifie": das_modifie,
"supprime": das_supprime,
"ajoutes": das_ajoutes,
"reference": das_reference,
"precision": precision,
"recall": recall,
"f1": f1,
"hallucination_rate": hallucination_rate,
"miss_rate": miss_rate,
},
"by_confidence": by_confidence,
"by_source": by_source,
"top_corrections": top_corrections,
"dp_corrections": Counter(dp_corrections).most_common(10),
}
def _find_pipeline_data(self, dossier_id: str, groups: dict) -> dict | None:
"""Trouve les données pipeline pour un dossier_id donné."""
# dossier_id est de la forme "45_23183041/fusionne"
parts = dossier_id.split("/")
group_name = parts[0] if parts else ""
items = groups.get(group_name, [])
for item in items:
if "fusionne" in item["name"]:
return item
return items[0] if items else None