Files
t2a_v2/test_quality_tier_live.py
2026-03-05 00:37:41 +01:00

147 lines
4.6 KiB
Python

#!/usr/bin/env python3
"""Test live du quality_tier CPAM sur 3 dossiers existants.
Force l'embedding SentenceTransformer sur CPU pour libérer la VRAM à Ollama Cloud.
"""
import os
# Forcer CPU pour SentenceTransformer — la VRAM reste disponible pour Ollama
os.environ["CUDA_VISIBLE_DEVICES"] = ""
import json
import sys
import time
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent))
from src.config import DossierMedical, ControleCPAM
from src.control.cpam_parser import parse_cpam_excel
from src.control.cpam_response import generate_cpam_response
STRUCTURED_DIR = Path("output/structured")
CPAM_EXCEL = Path("input/Control_cpam/SPHO-FINANC26020915121_ogc_structure.xlsx")
# 3 dossiers avec contrôles CPAM connus
DOSSIERS = [
"116_23065570",
"132_23080179",
"134_23050890",
]
# Délai entre les dossiers pour éviter les 429 sur Ollama Cloud
INTER_DOSSIER_DELAY = 5 # secondes
def load_dossier(subdir: str) -> DossierMedical | None:
fusionne = STRUCTURED_DIR / subdir / f"{subdir}_fusionne_cim10.json"
if not fusionne.exists():
# Fallback sur le premier JSON trouvé
jsons = list((STRUCTURED_DIR / subdir).glob("*_cim10.json"))
if not jsons:
return None
fusionne = jsons[0]
data = json.loads(fusionne.read_text(encoding="utf-8"))
return DossierMedical(**data)
def main():
print("=" * 70)
print("TEST QUALITY_TIER CPAM — Mode Cloud (embedding CPU)")
print("=" * 70)
print()
# Charger les contrôles CPAM
if not CPAM_EXCEL.exists():
print(f"Fichier CPAM introuvable : {CPAM_EXCEL}")
return
cpam_data = parse_cpam_excel(str(CPAM_EXCEL))
print(f"Contrôles CPAM chargés : {len(cpam_data)} OGC\n")
results_summary = []
for i, subdir in enumerate(DOSSIERS):
if i > 0:
print(f" [pause {INTER_DOSSIER_DELAY}s entre les dossiers...]\n")
time.sleep(INTER_DOSSIER_DELAY)
ogc_num = int(subdir.split("_")[0])
dossier = load_dossier(subdir)
if not dossier:
print(f" [{subdir}] Dossier introuvable")
continue
# Trouver le contrôle CPAM correspondant (dict ogc → list[ControleCPAM])
ctrls = cpam_data.get(ogc_num)
if not ctrls:
print(f" [{subdir}] Aucun contrôle OGC {ogc_num} trouvé")
continue
ctrl = ctrls[0] # Premier contrôle pour cet OGC
dp_code = dossier.diagnostic_principal.cim10_suggestion if dossier.diagnostic_principal else ""
n_das = len(dossier.diagnostics_associes)
n_bio = len(dossier.biologie_cle)
print(f"{'='*70}")
print(f"Dossier {subdir} — OGC {ogc_num}")
print(f" DP: {dp_code}, DAS: {n_das}, Bio: {n_bio}")
print(f" Titre: {ctrl.titre}")
print(f" Décision UCR: {ctrl.decision_ucr}")
if ctrl.dp_ucr:
print(f" DP UCR: {ctrl.dp_ucr}")
if ctrl.da_ucr:
print(f" DA UCR: {ctrl.da_ucr}")
print()
t0 = time.time()
try:
text, result, sources = generate_cpam_response(dossier, ctrl)
except Exception as e:
print(f" ERREUR: {e}")
results_summary.append((subdir, "ERREUR", str(e)))
continue
elapsed = time.time() - t0
print(f" Temps: {elapsed:.1f}s")
print(f" Sources RAG: {len(sources)}")
print(f" Longueur texte: {len(text)} chars")
print()
# Quality tier (enrichi par generate_cpam_response)
print(f" >>> QUALITY TIER: {ctrl.quality_tier}")
print(f" >>> REQUIRES REVIEW: {ctrl.requires_review}")
if ctrl.quality_warnings:
print(f" >>> WARNINGS ({len(ctrl.quality_warnings)}):")
for w in ctrl.quality_warnings:
print(f" {w}")
else:
print(f" >>> 0 warnings")
# Score adversarial si disponible
if result:
score_match = [
w for w in ctrl.quality_warnings
if "Score adversarial" in w
]
if not score_match:
print(f" >>> Score adversarial: non extrait des warnings (tier A implicite)")
print()
results_summary.append((subdir, ctrl.quality_tier, ctrl.requires_review))
# Résumé final
print("\n" + "=" * 70)
print("RÉSUMÉ")
print("=" * 70)
for subdir, tier, *rest in results_summary:
if tier == "ERREUR":
print(f" {subdir}: ERREUR — {rest[0][:80]}")
else:
review = rest[0] if rest else "?"
print(f" {subdir}: Tier {tier} | requires_review={review}")
if __name__ == "__main__":
main()