- Réorganisation data/referentiels/ : pdfs/, dicts/, user/ (structure unifiée) - Fix badges "Source absente" sur page admin référentiels - Ré-indexation COCOA 2025 (555 → 1451 chunks, couverture 94%) - Fix VRAM OOM : embeddings forcés CPU via T2A_EMBED_CPU - Nouveaux modules : document_router, docx_extractor, image_extractor, ocr_engine - Module complétude (quality/completude.py + config YAML) - Template DIM (synthèse dimensionnelle) - Gunicorn config + systemd service t2a-viewer - Suppression t2a_install_rag_cleanup/ (copie obsolète) - Suppression scripts/ et scripts_t2a_v2/ (anciens benchmarks) - Suppression 81 fichiers _doc.txt de test - Cache Ollama : TTL configurable, corrections loader YAML - Dashboard : améliorations templates (base, index, detail, cpam, validation) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
363 lines
14 KiB
Python
363 lines
14 KiB
Python
"""Génération de contre-argumentation pour les contrôles CPAM via RAG + Ollama.
|
|
|
|
Orchestrateur principal — délègue aux sous-modules :
|
|
- cpam_rag : _search_rag_for_control(), _search_rag_queries()
|
|
- cpam_context : _build_cpam_prompt(), _build_tagged_context(), _build_bio_summary(), etc.
|
|
- cpam_validation : _validate_adversarial(), _validate_grounding(), _format_response(), etc.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import os
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
from ..config import ControleCPAM, DossierMedical, RAGSource, STRUCTURED_DIR, rule_enabled
|
|
from ..medical.ollama_client import call_anthropic, call_ollama
|
|
from ..prompts import CPAM_EXTRACTION
|
|
|
|
# --- Imports depuis les sous-modules ---
|
|
from .cpam_rag import _search_rag_for_control
|
|
from .cpam_context import (
|
|
_assess_dossier_strength,
|
|
_build_cpam_prompt,
|
|
_build_tagged_context,
|
|
)
|
|
from .cpam_validation import (
|
|
_validate_adversarial,
|
|
_validate_grounding,
|
|
_validate_references,
|
|
_validate_codes_in_response,
|
|
_sanitize_unauthorized_codes,
|
|
_build_correction_prompt,
|
|
_format_response,
|
|
_assess_quality_tier,
|
|
_guardian_deterministic,
|
|
)
|
|
|
|
# Backward compat — sera retiré dans un commit futur
|
|
from .cpam_rag import _search_rag_queries # noqa: F401
|
|
from .cpam_context import ( # noqa: F401
|
|
_get_code_label,
|
|
_get_cim10_definitions,
|
|
_BIO_INTERPRETATION,
|
|
_BIO_THRESHOLDS,
|
|
_assess_dossier_strength,
|
|
_build_bio_summary,
|
|
_build_bio_confrontation,
|
|
_check_das_bio_coherence,
|
|
)
|
|
from .cpam_validation import _CIM10_CODE_RE, _validate_adversarial as _validate_adversarial, _assess_quality_tier as _assess_quality_tier, _fuzzy_match_ref as _fuzzy_match_ref, _sanitize_unauthorized_codes as _sanitize_unauthorized_codes # noqa: F401
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _save_version(
|
|
dossier: DossierMedical,
|
|
controle: ControleCPAM,
|
|
) -> None:
|
|
"""Sauvegarde la version actuelle de l'argumentaire avant régénération.
|
|
|
|
Stocke dans output/structured/{dossier}/_cpam_versions/{ogc}_{timestamp}.json
|
|
"""
|
|
if not controle.contre_argumentation and not controle.response_data:
|
|
return # rien à versionner
|
|
|
|
# Trouver le dossier structuré (depuis source_files ou source_file)
|
|
dossier_dir = None
|
|
if not STRUCTURED_DIR.is_dir():
|
|
logger.debug("Versioning : STRUCTURED_DIR inexistant, skip")
|
|
return
|
|
|
|
structured_dirs = [d for d in STRUCTURED_DIR.iterdir() if d.is_dir()]
|
|
|
|
# Tentative 1 : matcher un source_file contre les noms de sous-dossiers
|
|
candidates = list(dossier.source_files or [])
|
|
if dossier.source_file and dossier.source_file not in candidates:
|
|
candidates.append(dossier.source_file)
|
|
|
|
for src in candidates:
|
|
src_stem = Path(src).stem.replace(" ", "_")
|
|
for d in structured_dirs:
|
|
if src_stem in d.name:
|
|
dossier_dir = d
|
|
break
|
|
if dossier_dir:
|
|
break
|
|
|
|
if not dossier_dir:
|
|
logger.debug("Versioning : pas de dossier structuré trouvé, skip")
|
|
return
|
|
|
|
versions_dir = dossier_dir / "_cpam_versions"
|
|
versions_dir.mkdir(exist_ok=True)
|
|
|
|
# Compter les versions existantes pour cet OGC
|
|
existing = sorted(versions_dir.glob(f"{controle.numero_ogc}_*.json"))
|
|
version_num = len(existing) + 1
|
|
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
filename = f"{controle.numero_ogc}_{timestamp}_v{version_num}.json"
|
|
|
|
version_data = {
|
|
"numero_ogc": controle.numero_ogc,
|
|
"version": version_num,
|
|
"timestamp": timestamp,
|
|
"contre_argumentation": controle.contre_argumentation,
|
|
"response_data": controle.response_data,
|
|
"quality_tier": controle.quality_tier,
|
|
"validation_dim": controle.validation_dim,
|
|
}
|
|
|
|
(versions_dir / filename).write_text(
|
|
json.dumps(version_data, ensure_ascii=False, indent=2),
|
|
encoding="utf-8",
|
|
)
|
|
logger.info(" Version %d sauvegardée : %s", version_num, filename)
|
|
|
|
|
|
def _extraction_pass(
|
|
dossier: DossierMedical,
|
|
controle: ControleCPAM,
|
|
) -> dict | None:
|
|
"""Passe 1 — Extraction structurée du contexte avant argumentation.
|
|
|
|
Prompt court centré sur la compréhension de la contestation et l'extraction
|
|
des éléments cliniques pertinents. Pas de rédaction argumentative.
|
|
|
|
Returns:
|
|
dict structuré ou None si le LLM échoue.
|
|
"""
|
|
# Résumé dossier compact
|
|
dp_str = ""
|
|
if dossier.diagnostic_principal:
|
|
dp = dossier.diagnostic_principal
|
|
code = f" ({dp.cim10_suggestion})" if dp.cim10_suggestion else ""
|
|
dp_str = f"{dp.texte}{code}"
|
|
elif controle.dp_ucr:
|
|
dp_str = f"code {controle.dp_ucr} (codé par l'établissement)"
|
|
|
|
das_str = ", ".join(
|
|
f"{d.texte} ({d.cim10_suggestion})" if d.cim10_suggestion else d.texte
|
|
for d in dossier.diagnostics_associes
|
|
)
|
|
|
|
# Contexte tagué (réutilise la même fonction)
|
|
tagged_text, _ = _build_tagged_context(dossier)
|
|
|
|
dp_ucr_line = f"DP proposé UCR : {controle.dp_ucr}" if controle.dp_ucr else ""
|
|
da_ucr_line = f"DA proposés UCR : {controle.da_ucr}" if controle.da_ucr else ""
|
|
|
|
prompt = CPAM_EXTRACTION.format(
|
|
dp_str=dp_str or "Non extrait",
|
|
das_str=das_str or "Aucun",
|
|
tagged_text=tagged_text,
|
|
titre=controle.titre,
|
|
arg_ucr=controle.arg_ucr,
|
|
decision_ucr=controle.decision_ucr,
|
|
dp_ucr_line=dp_ucr_line,
|
|
da_ucr_line=da_ucr_line,
|
|
)
|
|
|
|
logger.debug(" Passe 1 — extraction structurée")
|
|
result = call_ollama(prompt, temperature=0.0, max_tokens=3000, role="cpam")
|
|
if result is None:
|
|
result = call_anthropic(prompt, temperature=0.0, max_tokens=3000)
|
|
if result is not None:
|
|
logger.info(" Passe 1 OK : %d éléments cliniques extraits",
|
|
len(result.get("elements_cliniques_pertinents", [])))
|
|
else:
|
|
logger.warning(" Passe 1 échouée — fallback single-pass")
|
|
return result
|
|
|
|
|
|
def generate_cpam_response(
|
|
dossier: DossierMedical,
|
|
controle: ControleCPAM,
|
|
) -> tuple[str, dict | None, list[RAGSource]]:
|
|
"""Génère une contre-argumentation pour un contrôle CPAM.
|
|
|
|
Args:
|
|
dossier: Le dossier médical analysé.
|
|
controle: Le contrôle CPAM à contester.
|
|
|
|
Returns:
|
|
Tuple (texte de contre-argumentation, dict LLM structuré ou None, sources RAG utilisées).
|
|
"""
|
|
logger.info("CPAM : génération contre-argumentation pour OGC %d — %s",
|
|
controle.numero_ogc, controle.titre)
|
|
|
|
# 0. Versioning — sauvegarder la version précédente avant d'écraser
|
|
_save_version(dossier, controle)
|
|
|
|
# 1. Passe 1 — Extraction structurée (compréhension avant argumentation)
|
|
extraction = _extraction_pass(dossier, controle)
|
|
degraded_pass1 = extraction is None
|
|
if degraded_pass1:
|
|
dossier.alertes_codage.append(
|
|
"CPAM: passe 1 (extraction structurée) échouée → mode dégradé"
|
|
)
|
|
|
|
# 2. Recherche RAG ciblée
|
|
sources = _search_rag_for_control(controle, dossier)
|
|
logger.info(" RAG : %d sources trouvées", len(sources))
|
|
|
|
# 3. Construction du prompt (passe 2 — argumentation)
|
|
prompt, tag_map = _build_cpam_prompt(dossier, controle, sources, extraction)
|
|
|
|
# 4. Appel LLM — Ollama (rôle cpam) > Haiku fallback
|
|
result = call_ollama(prompt, temperature=0.1, max_tokens=8000, role="cpam")
|
|
if result is not None:
|
|
logger.info(" Contre-argumentation via Ollama")
|
|
else:
|
|
logger.info(" Ollama indisponible → fallback Anthropic Haiku")
|
|
result = call_anthropic(prompt, temperature=0.1, max_tokens=8000)
|
|
if result is not None:
|
|
logger.info(" Contre-argumentation via Anthropic Haiku")
|
|
|
|
# 5. Conversion des sources RAG
|
|
rag_sources = [
|
|
RAGSource(
|
|
document=s.get("document", ""),
|
|
page=s.get("page"),
|
|
code=s.get("code"),
|
|
extrait=s.get("extrait", "")[:200],
|
|
)
|
|
for s in sources
|
|
]
|
|
|
|
if result is None:
|
|
logger.warning(" LLM non disponible — contre-argumentation non générée")
|
|
return "", None, rag_sources
|
|
|
|
# 5b. LOGIC-2 — Marquer le mode dégradé dans le résultat
|
|
if degraded_pass1:
|
|
result.setdefault("quality_flags", {})
|
|
result["quality_flags"]["cpam_pass1_failed"] = True
|
|
result["quality_flags"]["degraded_mode"] = True
|
|
|
|
# 6. Sanitisation déterministe — supprime les codes CIM-10 hors périmètre
|
|
sanitized = _sanitize_unauthorized_codes(result, dossier, controle)
|
|
if sanitized:
|
|
logger.info(" CPAM : %d code(s) hors périmètre supprimé(s) : %s",
|
|
len(sanitized), ", ".join(sanitized))
|
|
|
|
# 6b. Gardien déterministe — corrige hallucinations bio, force R3
|
|
result = _guardian_deterministic(result, dossier, controle, tag_map)
|
|
|
|
# 7. Validation des références RAG
|
|
ref_warnings = _validate_references(result, sources)
|
|
if ref_warnings:
|
|
logger.warning(" CPAM : %d référence(s) non vérifiable(s)", len(ref_warnings))
|
|
|
|
# 8. Validation grounding (preuves traçables vers le dossier)
|
|
grounding_warnings = _validate_grounding(result, tag_map)
|
|
if grounding_warnings:
|
|
logger.warning(" CPAM : %d preuve(s) non traçable(s)", len(grounding_warnings))
|
|
|
|
# 8b. Validation codes fermée (périmètre dossier + UCR) — post-sanitisation
|
|
code_warnings = _validate_codes_in_response(result, dossier, controle)
|
|
if code_warnings:
|
|
logger.warning(" CPAM : %d code(s) hors périmètre", len(code_warnings))
|
|
|
|
# 8. Validation adversariale (cohérence factuelle)
|
|
# LOGIC-3 : détecter si modèles identiques AVANT l'appel
|
|
from ..config import check_adversarial_model_config
|
|
same_model, model_msg = check_adversarial_model_config()
|
|
if same_model:
|
|
result.setdefault("quality_flags", {})
|
|
result["quality_flags"]["adversarial_disabled_same_model"] = True
|
|
dossier.alertes_codage.append(
|
|
"Validation adversariale désactivée (modèles identiques)"
|
|
)
|
|
|
|
adversarial_warnings: list[str] = []
|
|
validation = _validate_adversarial(result, tag_map, controle)
|
|
if validation and not validation.get("coherent", True):
|
|
erreurs = validation.get("erreurs", [])
|
|
score = validation.get("score_confiance", "?")
|
|
for e in erreurs:
|
|
if isinstance(e, str) and e.strip():
|
|
adversarial_warnings.append(f"Incohérence détectée : {e}")
|
|
if adversarial_warnings:
|
|
adversarial_warnings.append(f"Score de confiance : {score}/10")
|
|
|
|
# 8b. Boucle de correction (configurable via T2A_CPAM_MAX_CORRECTIONS, défaut 2)
|
|
max_corrections = int(os.environ.get("T2A_CPAM_MAX_CORRECTIONS", "2"))
|
|
for attempt in range(max_corrections):
|
|
if not (validation
|
|
and not validation.get("coherent", True)
|
|
and validation.get("score_confiance", 10) <= 5
|
|
and rule_enabled("RULE-CPAM-CORRECTION-LOOP")):
|
|
break
|
|
|
|
erreurs_v = validation.get("erreurs", [])
|
|
logger.warning(" Score adversarial %s/10 — correction %d/%d (%d erreur(s))",
|
|
validation.get("score_confiance"), attempt + 1, max_corrections, len(erreurs_v))
|
|
|
|
correction_prompt = _build_correction_prompt(prompt, result, validation)
|
|
corrected = call_ollama(correction_prompt, temperature=0.0, max_tokens=16000, role="cpam")
|
|
if corrected is None:
|
|
corrected = call_anthropic(correction_prompt, temperature=0.0, max_tokens=16000)
|
|
|
|
if not corrected:
|
|
break
|
|
|
|
validation2 = _validate_adversarial(corrected, tag_map, controle)
|
|
score2 = validation2.get("score_confiance", 0) if validation2 else 0
|
|
score1 = validation.get("score_confiance", 0)
|
|
|
|
if score2 > score1:
|
|
logger.info(" Correction %d acceptée (score %s → %s)", attempt + 1, score1, score2)
|
|
result = corrected
|
|
validation = validation2
|
|
_sanitize_unauthorized_codes(result, dossier, controle)
|
|
result = _guardian_deterministic(result, dossier, controle, tag_map)
|
|
ref_warnings = _validate_references(result, sources)
|
|
grounding_warnings = _validate_grounding(result, tag_map)
|
|
code_warnings = _validate_codes_in_response(result, dossier, controle)
|
|
adversarial_warnings = []
|
|
if validation and not validation.get("coherent", True):
|
|
for e in validation.get("erreurs", []):
|
|
if isinstance(e, str) and e.strip():
|
|
adversarial_warnings.append(f"Incohérence détectée : {e}")
|
|
if adversarial_warnings:
|
|
adversarial_warnings.append(
|
|
f"Score de confiance : {validation.get('score_confiance', '?')}/10"
|
|
)
|
|
else:
|
|
logger.warning(" Correction %d rejetée (score %s → %s)", attempt + 1, score1, score2)
|
|
break
|
|
|
|
all_warnings = ref_warnings + grounding_warnings + code_warnings + adversarial_warnings
|
|
|
|
# 8c. Évaluation force probante du dossier
|
|
strength = _assess_dossier_strength(dossier)
|
|
if strength["is_weak"]:
|
|
logger.info(" Dossier à preuves limitées (score %d/10) : %s",
|
|
strength["score"], ", ".join(strength["missing"]))
|
|
|
|
# 8d. Classification qualité (A/B/C) — seuils relaxés si dossier faible
|
|
tier, needs_review, cat_warnings = _assess_quality_tier(
|
|
result, ref_warnings, grounding_warnings, code_warnings, validation,
|
|
is_weak_dossier=strength["is_weak"],
|
|
)
|
|
controle.quality_tier = tier
|
|
controle.requires_review = needs_review
|
|
controle.quality_warnings = cat_warnings
|
|
logger.info(" Qualité CPAM : tier %s, requires_review=%s, %d warnings",
|
|
tier, needs_review, len(cat_warnings))
|
|
|
|
# 9. Formater la réponse
|
|
text = _format_response(
|
|
result,
|
|
ref_warnings=all_warnings,
|
|
quality_tier=tier,
|
|
categorized_warnings=cat_warnings,
|
|
)
|
|
logger.info(" Contre-argumentation générée (%d caractères)", len(text))
|
|
|
|
return text, result, rag_sources
|