feat: enrichissement contre-argumentation CPAM — libellés CIM-10, RAG ciblé, reprocess complet
- Résolution des libellés CIM-10 pour les codes contestés (dp_ucr, da_ucr, dr_ucr) - Fallback DP depuis dp_ucr quand le pipeline n'extrait pas de diagnostic principal - Troncature arg_ucr augmentée de 200 à 500 chars pour conserver les citations de règles - Requête RAG 4 : définitions CIM-10 (inclusion/exclusion) des codes contestés - Requête RAG 5 : extraction et recherche des règles nommées (RègleT7, Annexe, etc.) - Cap résultats RAG de 10 à 12 pour absorber les nouvelles requêtes - Reprocess viewer : pipeline complet (fusion + GHM + CPAM) pour dossiers multi-PDF - Affichage structuré response_data dans le viewer (analyse, preuves, références) - 7 nouveaux tests CPAM, 6 nouveaux tests viewer Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -373,6 +373,37 @@ def format_doc_name(name: str) -> str:
|
||||
return name
|
||||
|
||||
|
||||
def format_cpam_text(text: str | None) -> Markup:
|
||||
"""Convertit un texte CPAM (section) en HTML avec puces et paragraphes."""
|
||||
if not text:
|
||||
return Markup("")
|
||||
from markupsafe import escape
|
||||
lines = str(text).split("\n")
|
||||
html_parts: list[str] = []
|
||||
in_list = False
|
||||
for line in lines:
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
if in_list:
|
||||
html_parts.append("</ul>")
|
||||
in_list = False
|
||||
html_parts.append("<br>")
|
||||
continue
|
||||
if stripped.startswith("- "):
|
||||
if not in_list:
|
||||
html_parts.append("<ul style='margin:0.3rem 0;padding-left:1.2rem;'>")
|
||||
in_list = True
|
||||
html_parts.append(f"<li>{escape(stripped[2:])}</li>")
|
||||
else:
|
||||
if in_list:
|
||||
html_parts.append("</ul>")
|
||||
in_list = False
|
||||
html_parts.append(f"<p style='margin:0.2rem 0;'>{escape(stripped)}</p>")
|
||||
if in_list:
|
||||
html_parts.append("</ul>")
|
||||
return Markup("\n".join(html_parts))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# App factory
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -387,6 +418,7 @@ def create_app() -> Flask:
|
||||
app.jinja_env.filters["format_duration"] = format_duration
|
||||
app.jinja_env.filters["format_dossier_name"] = format_dossier_name
|
||||
app.jinja_env.filters["format_doc_name"] = format_doc_name
|
||||
app.jinja_env.filters["format_cpam_text"] = format_cpam_text
|
||||
|
||||
ccam_dict = load_ccam_dict()
|
||||
|
||||
@@ -445,36 +477,119 @@ def create_app() -> Flask:
|
||||
|
||||
@app.route("/reprocess/<path:filepath>", methods=["POST"])
|
||||
def reprocess(filepath: str):
|
||||
"""Relance le traitement d'un dossier."""
|
||||
"""Relance le pipeline complet : process PDFs + fusion + GHM + CPAM."""
|
||||
from ..main import process_pdf, write_outputs
|
||||
from ..medical.ghm import estimate_ghm
|
||||
|
||||
dossier = load_dossier(filepath)
|
||||
source_file = dossier.source_file
|
||||
if not source_file:
|
||||
input_dir = Path(__file__).parent.parent.parent / "input"
|
||||
|
||||
# Collecter les PDFs sources (fusionné → source_files, simple → source_file)
|
||||
source_names = []
|
||||
if dossier.source_files:
|
||||
source_names = list(dossier.source_files)
|
||||
elif dossier.source_file:
|
||||
source_names = [dossier.source_file]
|
||||
|
||||
if not source_names:
|
||||
return jsonify({"error": "Fichier source introuvable"}), 400
|
||||
|
||||
# Chercher le PDF source dans input/
|
||||
input_dir = Path(__file__).parent.parent.parent / "input"
|
||||
pdf_path = None
|
||||
for p in input_dir.rglob(source_file):
|
||||
if p.is_file():
|
||||
pdf_path = p
|
||||
break
|
||||
# Résoudre les chemins PDF dans input/
|
||||
pdf_paths = []
|
||||
missing = []
|
||||
for name in source_names:
|
||||
found = None
|
||||
for p in input_dir.rglob(name):
|
||||
if p.is_file():
|
||||
found = p
|
||||
break
|
||||
if found:
|
||||
pdf_paths.append(found)
|
||||
else:
|
||||
missing.append(name)
|
||||
|
||||
if not pdf_path:
|
||||
return jsonify({"error": f"PDF source '{source_file}' introuvable"}), 404
|
||||
if not pdf_paths:
|
||||
return jsonify({"error": f"PDF sources introuvables : {', '.join(missing)}"}), 404
|
||||
|
||||
try:
|
||||
pdf_results = process_pdf(pdf_path)
|
||||
stem = pdf_path.stem.replace(" ", "_")
|
||||
# Déterminer le subdir depuis le premier PDF trouvé
|
||||
subdir = None
|
||||
if pdf_path.parent != input_dir:
|
||||
subdir = pdf_path.parent.name
|
||||
multi = len(pdf_results) > 1
|
||||
for part_idx, (anonymized_text, new_dossier, report) in enumerate(pdf_results):
|
||||
part_stem = f"{stem}_part{part_idx + 1}" if multi else stem
|
||||
write_outputs(part_stem, anonymized_text, new_dossier, report, subdir=subdir)
|
||||
return jsonify({"ok": True, "message": f"Traitement terminé ({len(pdf_results)} dossier(s))"})
|
||||
if pdf_paths[0].parent != input_dir:
|
||||
subdir = pdf_paths[0].parent.name
|
||||
|
||||
# 1. Traiter chaque PDF
|
||||
group_dossiers = []
|
||||
for pdf_path in pdf_paths:
|
||||
pdf_results = process_pdf(pdf_path)
|
||||
stem = pdf_path.stem.replace(" ", "_")
|
||||
multi = len(pdf_results) > 1
|
||||
for part_idx, (anonymized_text, new_dossier, report) in enumerate(pdf_results):
|
||||
part_stem = f"{stem}_part{part_idx + 1}" if multi else stem
|
||||
write_outputs(part_stem, anonymized_text, new_dossier, report, subdir=subdir)
|
||||
group_dossiers.append(new_dossier)
|
||||
|
||||
# 2. Fusion multi-PDF
|
||||
merged = None
|
||||
if len(group_dossiers) > 1 and subdir:
|
||||
try:
|
||||
from ..medical.fusion import merge_dossiers
|
||||
merged = merge_dossiers(group_dossiers)
|
||||
try:
|
||||
ghm = estimate_ghm(merged)
|
||||
merged.ghm_estimation = ghm
|
||||
except Exception:
|
||||
logger.warning("Erreur estimation GHM fusionné", exc_info=True)
|
||||
except Exception:
|
||||
logger.exception("Erreur fusion groupe %s", subdir)
|
||||
|
||||
# 3. Contrôle CPAM (auto-détection Excel)
|
||||
target = merged if merged else (group_dossiers[-1] if group_dossiers else None)
|
||||
if target and subdir:
|
||||
cpam_dir = input_dir / "Control_cpam"
|
||||
cpam_path = None
|
||||
if cpam_dir.is_dir():
|
||||
xlsx_files = sorted(cpam_dir.glob("*.xlsx"))
|
||||
if xlsx_files:
|
||||
cpam_path = xlsx_files[0]
|
||||
if cpam_path:
|
||||
try:
|
||||
from ..control.cpam_parser import parse_cpam_excel, match_dossier_ogc
|
||||
from ..control.cpam_response import generate_cpam_response
|
||||
cpam_data = parse_cpam_excel(str(cpam_path))
|
||||
if cpam_data:
|
||||
controles = match_dossier_ogc(subdir, cpam_data)
|
||||
if controles:
|
||||
logger.info("CPAM reprocess : %d contrôle(s) pour %s",
|
||||
len(controles), subdir)
|
||||
for ctrl in controles:
|
||||
text, response_data, sources = generate_cpam_response(target, ctrl)
|
||||
ctrl.contre_argumentation = text
|
||||
ctrl.response_data = response_data
|
||||
ctrl.sources_reponse = sources
|
||||
target.controles_cpam = controles
|
||||
except Exception:
|
||||
logger.exception("Erreur CPAM reprocess pour %s", subdir)
|
||||
|
||||
# 4. Écrire le dossier fusionné (après CPAM)
|
||||
if merged is not None and subdir:
|
||||
struct_dir = STRUCTURED_DIR / subdir
|
||||
struct_dir.mkdir(parents=True, exist_ok=True)
|
||||
merged_path = struct_dir / f"{subdir}_fusionne_cim10.json"
|
||||
merged_path.write_text(
|
||||
merged.model_dump_json(indent=2, exclude_none=True),
|
||||
encoding="utf-8",
|
||||
)
|
||||
logger.info("Dossier fusionné réécrit : %s", merged_path)
|
||||
|
||||
msg = f"Traitement terminé ({len(group_dossiers)} dossier(s)"
|
||||
if merged:
|
||||
msg += ", fusionné"
|
||||
if target and getattr(target, "controles_cpam", None):
|
||||
msg += f", {len(target.controles_cpam)} contrôle(s) CPAM"
|
||||
if missing:
|
||||
msg += f", {len(missing)} PDF(s) manquant(s)"
|
||||
msg += ")"
|
||||
return jsonify({"ok": True, "message": msg})
|
||||
except Exception as e:
|
||||
logger.exception("Erreur lors du retraitement")
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
Reference in New Issue
Block a user