feat: enrichissement contre-argumentation CPAM — libellés CIM-10, RAG ciblé, reprocess complet

- Résolution des libellés CIM-10 pour les codes contestés (dp_ucr, da_ucr, dr_ucr)
- Fallback DP depuis dp_ucr quand le pipeline n'extrait pas de diagnostic principal
- Troncature arg_ucr augmentée de 200 à 500 chars pour conserver les citations de règles
- Requête RAG 4 : définitions CIM-10 (inclusion/exclusion) des codes contestés
- Requête RAG 5 : extraction et recherche des règles nommées (RègleT7, Annexe, etc.)
- Cap résultats RAG de 10 à 12 pour absorber les nouvelles requêtes
- Reprocess viewer : pipeline complet (fusion + GHM + CPAM) pour dossiers multi-PDF
- Affichage structuré response_data dans le viewer (analyse, preuves, références)
- 7 nouveaux tests CPAM, 6 nouveaux tests viewer

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-02-17 23:24:10 +01:00
parent 94fa4e5f3b
commit bc0ccbef7c
7 changed files with 464 additions and 51 deletions

View File

@@ -373,6 +373,37 @@ def format_doc_name(name: str) -> str:
return name
def format_cpam_text(text: str | None) -> Markup:
"""Convertit un texte CPAM (section) en HTML avec puces et paragraphes."""
if not text:
return Markup("")
from markupsafe import escape
lines = str(text).split("\n")
html_parts: list[str] = []
in_list = False
for line in lines:
stripped = line.strip()
if not stripped:
if in_list:
html_parts.append("</ul>")
in_list = False
html_parts.append("<br>")
continue
if stripped.startswith("- "):
if not in_list:
html_parts.append("<ul style='margin:0.3rem 0;padding-left:1.2rem;'>")
in_list = True
html_parts.append(f"<li>{escape(stripped[2:])}</li>")
else:
if in_list:
html_parts.append("</ul>")
in_list = False
html_parts.append(f"<p style='margin:0.2rem 0;'>{escape(stripped)}</p>")
if in_list:
html_parts.append("</ul>")
return Markup("\n".join(html_parts))
# ---------------------------------------------------------------------------
# App factory
# ---------------------------------------------------------------------------
@@ -387,6 +418,7 @@ def create_app() -> Flask:
app.jinja_env.filters["format_duration"] = format_duration
app.jinja_env.filters["format_dossier_name"] = format_dossier_name
app.jinja_env.filters["format_doc_name"] = format_doc_name
app.jinja_env.filters["format_cpam_text"] = format_cpam_text
ccam_dict = load_ccam_dict()
@@ -445,36 +477,119 @@ def create_app() -> Flask:
@app.route("/reprocess/<path:filepath>", methods=["POST"])
def reprocess(filepath: str):
"""Relance le traitement d'un dossier."""
"""Relance le pipeline complet : process PDFs + fusion + GHM + CPAM."""
from ..main import process_pdf, write_outputs
from ..medical.ghm import estimate_ghm
dossier = load_dossier(filepath)
source_file = dossier.source_file
if not source_file:
input_dir = Path(__file__).parent.parent.parent / "input"
# Collecter les PDFs sources (fusionné → source_files, simple → source_file)
source_names = []
if dossier.source_files:
source_names = list(dossier.source_files)
elif dossier.source_file:
source_names = [dossier.source_file]
if not source_names:
return jsonify({"error": "Fichier source introuvable"}), 400
# Chercher le PDF source dans input/
input_dir = Path(__file__).parent.parent.parent / "input"
pdf_path = None
for p in input_dir.rglob(source_file):
if p.is_file():
pdf_path = p
break
# Résoudre les chemins PDF dans input/
pdf_paths = []
missing = []
for name in source_names:
found = None
for p in input_dir.rglob(name):
if p.is_file():
found = p
break
if found:
pdf_paths.append(found)
else:
missing.append(name)
if not pdf_path:
return jsonify({"error": f"PDF source '{source_file}' introuvable"}), 404
if not pdf_paths:
return jsonify({"error": f"PDF sources introuvables : {', '.join(missing)}"}), 404
try:
pdf_results = process_pdf(pdf_path)
stem = pdf_path.stem.replace(" ", "_")
# Déterminer le subdir depuis le premier PDF trouvé
subdir = None
if pdf_path.parent != input_dir:
subdir = pdf_path.parent.name
multi = len(pdf_results) > 1
for part_idx, (anonymized_text, new_dossier, report) in enumerate(pdf_results):
part_stem = f"{stem}_part{part_idx + 1}" if multi else stem
write_outputs(part_stem, anonymized_text, new_dossier, report, subdir=subdir)
return jsonify({"ok": True, "message": f"Traitement terminé ({len(pdf_results)} dossier(s))"})
if pdf_paths[0].parent != input_dir:
subdir = pdf_paths[0].parent.name
# 1. Traiter chaque PDF
group_dossiers = []
for pdf_path in pdf_paths:
pdf_results = process_pdf(pdf_path)
stem = pdf_path.stem.replace(" ", "_")
multi = len(pdf_results) > 1
for part_idx, (anonymized_text, new_dossier, report) in enumerate(pdf_results):
part_stem = f"{stem}_part{part_idx + 1}" if multi else stem
write_outputs(part_stem, anonymized_text, new_dossier, report, subdir=subdir)
group_dossiers.append(new_dossier)
# 2. Fusion multi-PDF
merged = None
if len(group_dossiers) > 1 and subdir:
try:
from ..medical.fusion import merge_dossiers
merged = merge_dossiers(group_dossiers)
try:
ghm = estimate_ghm(merged)
merged.ghm_estimation = ghm
except Exception:
logger.warning("Erreur estimation GHM fusionné", exc_info=True)
except Exception:
logger.exception("Erreur fusion groupe %s", subdir)
# 3. Contrôle CPAM (auto-détection Excel)
target = merged if merged else (group_dossiers[-1] if group_dossiers else None)
if target and subdir:
cpam_dir = input_dir / "Control_cpam"
cpam_path = None
if cpam_dir.is_dir():
xlsx_files = sorted(cpam_dir.glob("*.xlsx"))
if xlsx_files:
cpam_path = xlsx_files[0]
if cpam_path:
try:
from ..control.cpam_parser import parse_cpam_excel, match_dossier_ogc
from ..control.cpam_response import generate_cpam_response
cpam_data = parse_cpam_excel(str(cpam_path))
if cpam_data:
controles = match_dossier_ogc(subdir, cpam_data)
if controles:
logger.info("CPAM reprocess : %d contrôle(s) pour %s",
len(controles), subdir)
for ctrl in controles:
text, response_data, sources = generate_cpam_response(target, ctrl)
ctrl.contre_argumentation = text
ctrl.response_data = response_data
ctrl.sources_reponse = sources
target.controles_cpam = controles
except Exception:
logger.exception("Erreur CPAM reprocess pour %s", subdir)
# 4. Écrire le dossier fusionné (après CPAM)
if merged is not None and subdir:
struct_dir = STRUCTURED_DIR / subdir
struct_dir.mkdir(parents=True, exist_ok=True)
merged_path = struct_dir / f"{subdir}_fusionne_cim10.json"
merged_path.write_text(
merged.model_dump_json(indent=2, exclude_none=True),
encoding="utf-8",
)
logger.info("Dossier fusionné réécrit : %s", merged_path)
msg = f"Traitement terminé ({len(group_dossiers)} dossier(s)"
if merged:
msg += ", fusionné"
if target and getattr(target, "controles_cpam", None):
msg += f", {len(target.controles_cpam)} contrôle(s) CPAM"
if missing:
msg += f", {len(missing)} PDF(s) manquant(s)"
msg += ")"
return jsonify({"ok": True, "message": msg})
except Exception as e:
logger.exception("Erreur lors du retraitement")
return jsonify({"error": str(e)}), 500