refactor: réorganisation référentiels, nouveaux modules extraction, nettoyage code obsolète
- Réorganisation data/referentiels/ : pdfs/, dicts/, user/ (structure unifiée) - Fix badges "Source absente" sur page admin référentiels - Ré-indexation COCOA 2025 (555 → 1451 chunks, couverture 94%) - Fix VRAM OOM : embeddings forcés CPU via T2A_EMBED_CPU - Nouveaux modules : document_router, docx_extractor, image_extractor, ocr_engine - Module complétude (quality/completude.py + config YAML) - Template DIM (synthèse dimensionnelle) - Gunicorn config + systemd service t2a-viewer - Suppression t2a_install_rag_cleanup/ (copie obsolète) - Suppression scripts/ et scripts_t2a_v2/ (anciens benchmarks) - Suppression 81 fichiers _doc.txt de test - Cache Ollama : TTL configurable, corrections loader YAML - Dashboard : améliorations templates (base, index, detail, cpam, validation) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -8,9 +8,13 @@ Orchestrateur principal — délègue aux sous-modules :
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from ..config import ControleCPAM, DossierMedical, RAGSource, rule_enabled
|
||||
from ..config import ControleCPAM, DossierMedical, RAGSource, STRUCTURED_DIR, rule_enabled
|
||||
from ..medical.ollama_client import call_anthropic, call_ollama
|
||||
from ..prompts import CPAM_EXTRACTION
|
||||
|
||||
@@ -50,6 +54,70 @@ from .cpam_validation import _CIM10_CODE_RE, _validate_adversarial as _validate_
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _save_version(
|
||||
dossier: DossierMedical,
|
||||
controle: ControleCPAM,
|
||||
) -> None:
|
||||
"""Sauvegarde la version actuelle de l'argumentaire avant régénération.
|
||||
|
||||
Stocke dans output/structured/{dossier}/_cpam_versions/{ogc}_{timestamp}.json
|
||||
"""
|
||||
if not controle.contre_argumentation and not controle.response_data:
|
||||
return # rien à versionner
|
||||
|
||||
# Trouver le dossier structuré (depuis source_files ou source_file)
|
||||
dossier_dir = None
|
||||
if not STRUCTURED_DIR.is_dir():
|
||||
logger.debug("Versioning : STRUCTURED_DIR inexistant, skip")
|
||||
return
|
||||
|
||||
structured_dirs = [d for d in STRUCTURED_DIR.iterdir() if d.is_dir()]
|
||||
|
||||
# Tentative 1 : matcher un source_file contre les noms de sous-dossiers
|
||||
candidates = list(dossier.source_files or [])
|
||||
if dossier.source_file and dossier.source_file not in candidates:
|
||||
candidates.append(dossier.source_file)
|
||||
|
||||
for src in candidates:
|
||||
src_stem = Path(src).stem.replace(" ", "_")
|
||||
for d in structured_dirs:
|
||||
if src_stem in d.name:
|
||||
dossier_dir = d
|
||||
break
|
||||
if dossier_dir:
|
||||
break
|
||||
|
||||
if not dossier_dir:
|
||||
logger.debug("Versioning : pas de dossier structuré trouvé, skip")
|
||||
return
|
||||
|
||||
versions_dir = dossier_dir / "_cpam_versions"
|
||||
versions_dir.mkdir(exist_ok=True)
|
||||
|
||||
# Compter les versions existantes pour cet OGC
|
||||
existing = sorted(versions_dir.glob(f"{controle.numero_ogc}_*.json"))
|
||||
version_num = len(existing) + 1
|
||||
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
filename = f"{controle.numero_ogc}_{timestamp}_v{version_num}.json"
|
||||
|
||||
version_data = {
|
||||
"numero_ogc": controle.numero_ogc,
|
||||
"version": version_num,
|
||||
"timestamp": timestamp,
|
||||
"contre_argumentation": controle.contre_argumentation,
|
||||
"response_data": controle.response_data,
|
||||
"quality_tier": controle.quality_tier,
|
||||
"validation_dim": controle.validation_dim,
|
||||
}
|
||||
|
||||
(versions_dir / filename).write_text(
|
||||
json.dumps(version_data, ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
logger.info(" Version %d sauvegardée : %s", version_num, filename)
|
||||
|
||||
|
||||
def _extraction_pass(
|
||||
dossier: DossierMedical,
|
||||
controle: ControleCPAM,
|
||||
@@ -121,6 +189,9 @@ def generate_cpam_response(
|
||||
logger.info("CPAM : génération contre-argumentation pour OGC %d — %s",
|
||||
controle.numero_ogc, controle.titre)
|
||||
|
||||
# 0. Versioning — sauvegarder la version précédente avant d'écraser
|
||||
_save_version(dossier, controle)
|
||||
|
||||
# 1. Passe 1 — Extraction structurée (compréhension avant argumentation)
|
||||
extraction = _extraction_pass(dossier, controle)
|
||||
degraded_pass1 = extraction is None
|
||||
@@ -137,12 +208,12 @@ def generate_cpam_response(
|
||||
prompt, tag_map = _build_cpam_prompt(dossier, controle, sources, extraction)
|
||||
|
||||
# 4. Appel LLM — Ollama (rôle cpam) > Haiku fallback
|
||||
result = call_ollama(prompt, temperature=0.1, max_tokens=16000, role="cpam")
|
||||
result = call_ollama(prompt, temperature=0.1, max_tokens=8000, role="cpam")
|
||||
if result is not None:
|
||||
logger.info(" Contre-argumentation via Ollama")
|
||||
else:
|
||||
logger.info(" Ollama indisponible → fallback Anthropic Haiku")
|
||||
result = call_anthropic(prompt, temperature=0.1, max_tokens=16000)
|
||||
result = call_anthropic(prompt, temperature=0.1, max_tokens=8000)
|
||||
if result is not None:
|
||||
logger.info(" Contre-argumentation via Anthropic Haiku")
|
||||
|
||||
@@ -213,8 +284,8 @@ def generate_cpam_response(
|
||||
if adversarial_warnings:
|
||||
adversarial_warnings.append(f"Score de confiance : {score}/10")
|
||||
|
||||
# 8b. Boucle de correction (max 2 retries)
|
||||
max_corrections = 2
|
||||
# 8b. Boucle de correction (configurable via T2A_CPAM_MAX_CORRECTIONS, défaut 2)
|
||||
max_corrections = int(os.environ.get("T2A_CPAM_MAX_CORRECTIONS", "2"))
|
||||
for attempt in range(max_corrections):
|
||||
if not (validation
|
||||
and not validation.get("coherent", True)
|
||||
|
||||
Reference in New Issue
Block a user