chore: add .gitignore
This commit is contained in:
163
scripts/regenerate_tier_c.py
Normal file
163
scripts/regenerate_tier_c.py
Normal file
@@ -0,0 +1,163 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Régénération ciblée des contrôles CPAM classés Tier C ou sans response_data.
|
||||
|
||||
Usage :
|
||||
cd /home/dom/ai/t2a_v2
|
||||
.venv/bin/python3 scripts/regenerate_tier_c.py [--dry-run]
|
||||
|
||||
Le script :
|
||||
1. Scanne output/structured/ pour trouver les contrôles Tier C + ceux sans response_data
|
||||
2. Pour chaque contrôle, relance generate_cpam_response() avec le pipeline corrigé
|
||||
3. Sauvegarde le JSON mis à jour (backup automatique .bak)
|
||||
|
||||
Options :
|
||||
--dry-run Affiche les contrôles ciblés sans régénérer
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import shutil
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
# Ajouter le répertoire racine au path
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from src.config import DossierMedical
|
||||
from src.control.cpam_response import generate_cpam_response
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s %(levelname)-7s %(message)s",
|
||||
datefmt="%H:%M:%S",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
STRUCTURED_DIR = ROOT / "output" / "structured"
|
||||
|
||||
|
||||
def find_targets() -> list[tuple[Path, int]]:
|
||||
"""Trouve les fichiers JSON contenant des contrôles Tier C ou sans response_data.
|
||||
|
||||
Returns:
|
||||
Liste de (chemin_json, index_du_controle_dans_la_liste).
|
||||
"""
|
||||
targets: list[tuple[Path, int]] = []
|
||||
|
||||
for sub in sorted(STRUCTURED_DIR.iterdir()):
|
||||
if not sub.is_dir():
|
||||
continue
|
||||
for jf in sub.glob("*_fusionne_cim10.json"):
|
||||
data = json.loads(jf.read_text(encoding="utf-8"))
|
||||
controles = data.get("controles_cpam", [])
|
||||
for i, ctrl in enumerate(controles):
|
||||
tier = ctrl.get("quality_tier")
|
||||
has_resp = ctrl.get("response_data") is not None
|
||||
if tier == "C" or not has_resp:
|
||||
targets.append((jf, i))
|
||||
|
||||
return targets
|
||||
|
||||
|
||||
def regenerate(targets: list[tuple[Path, int]]) -> dict[str, int]:
|
||||
"""Régénère les contrôles CPAM ciblés.
|
||||
|
||||
Returns:
|
||||
Statistiques {tier_A, tier_B, tier_C, errors}.
|
||||
"""
|
||||
stats = {"A": 0, "B": 0, "C": 0, "errors": 0}
|
||||
# Grouper par fichier pour ne charger/sauver qu'une fois par dossier
|
||||
by_file: dict[Path, list[int]] = {}
|
||||
for path, idx in targets:
|
||||
by_file.setdefault(path, []).append(idx)
|
||||
|
||||
total = len(targets)
|
||||
done = 0
|
||||
|
||||
for json_path, indices in by_file.items():
|
||||
dossier_id = json_path.parent.name
|
||||
logger.info("=== Dossier %s (%d contrôle(s) à régénérer) ===", dossier_id, len(indices))
|
||||
|
||||
# Charger le dossier
|
||||
data = json.loads(json_path.read_text(encoding="utf-8"))
|
||||
dossier = DossierMedical.model_validate(data)
|
||||
|
||||
modified = False
|
||||
|
||||
for idx in indices:
|
||||
ctrl = dossier.controles_cpam[idx]
|
||||
done += 1
|
||||
old_tier = ctrl.quality_tier or "?"
|
||||
logger.info("[%d/%d] OGC %d — %s (ancien tier: %s)",
|
||||
done, total, ctrl.numero_ogc, ctrl.titre[:60], old_tier)
|
||||
|
||||
t0 = time.time()
|
||||
try:
|
||||
text, response_data, sources = generate_cpam_response(dossier, ctrl)
|
||||
elapsed = time.time() - t0
|
||||
|
||||
ctrl.contre_argumentation = text
|
||||
ctrl.response_data = response_data
|
||||
ctrl.sources_reponse = sources
|
||||
|
||||
new_tier = ctrl.quality_tier or "?"
|
||||
stats[new_tier] = stats.get(new_tier, 0) + 1
|
||||
modified = True
|
||||
|
||||
logger.info(" Résultat : tier %s → %s (%d chars, %.1fs)",
|
||||
old_tier, new_tier, len(text), elapsed)
|
||||
except Exception:
|
||||
logger.exception(" ERREUR sur OGC %d", ctrl.numero_ogc)
|
||||
stats["errors"] += 1
|
||||
|
||||
if modified:
|
||||
# Backup + sauvegarde
|
||||
backup_path = json_path.with_suffix(".json.bak")
|
||||
shutil.copy2(json_path, backup_path)
|
||||
json_path.write_text(
|
||||
dossier.model_dump_json(indent=2, exclude_none=True),
|
||||
encoding="utf-8",
|
||||
)
|
||||
logger.info(" Sauvegardé : %s (backup: %s)", json_path.name, backup_path.name)
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
def main() -> None:
|
||||
dry_run = "--dry-run" in sys.argv
|
||||
|
||||
logger.info("Recherche des contrôles Tier C et sans response_data...")
|
||||
targets = find_targets()
|
||||
|
||||
if not targets:
|
||||
logger.info("Aucun contrôle à régénérer.")
|
||||
return
|
||||
|
||||
logger.info("Trouvé %d contrôle(s) à régénérer :", len(targets))
|
||||
for path, idx in targets:
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
ctrl = data["controles_cpam"][idx]
|
||||
tier = ctrl.get("quality_tier", "?")
|
||||
has_resp = "oui" if ctrl.get("response_data") else "NON"
|
||||
logger.info(" %s OGC %d — tier %s, response_data: %s",
|
||||
path.parent.name, ctrl["numero_ogc"], tier, has_resp)
|
||||
|
||||
if dry_run:
|
||||
logger.info("Mode dry-run — aucune régénération effectuée.")
|
||||
return
|
||||
|
||||
t0 = time.time()
|
||||
stats = regenerate(targets)
|
||||
elapsed = time.time() - t0
|
||||
|
||||
logger.info("=== TERMINÉ en %.1f min ===", elapsed / 60)
|
||||
logger.info("Distribution : A=%d, B=%d, C=%d, erreurs=%d",
|
||||
stats.get("A", 0), stats.get("B", 0), stats.get("C", 0), stats["errors"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user