feat: timings appels LLM CPAM (génération, validation, correction)

Ajoute des mesures time.time() autour de chaque appel Ollama dans le flux CPAM :
- [CPAM-EXTRACT] : extraction structurée (passe 1, role=cpam)
- [CPAM-GEN] : génération argumentation (passe 2, role=cpam)
- [CPAM-VALID] : validation adversariale (role=validation)
- [CPAM-CORR] : correction post-validation (role=cpam)

Permet de mesurer le temps réel de chaque phase et d'identifier
les coûts de swap de modèle VRAM entre les rôles cpam/validation.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-08 14:09:42 +01:00
parent 0bfc1a9d6e
commit d6b4e48989
2 changed files with 24 additions and 8 deletions

View File

@@ -11,6 +11,7 @@ from __future__ import annotations
import json import json
import logging import logging
import os import os
import time
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
@@ -149,14 +150,18 @@ def _extraction_pass(
) )
logger.debug(" Passe 1 — extraction structurée") logger.debug(" Passe 1 — extraction structurée")
t0 = time.time()
result = call_ollama(prompt, temperature=0.0, max_tokens=3000, role="cpam") result = call_ollama(prompt, temperature=0.0, max_tokens=3000, role="cpam")
if result is None: if result is None:
result = call_anthropic(prompt, temperature=0.0, max_tokens=3000) result = call_anthropic(prompt, temperature=0.0, max_tokens=3000)
elapsed = time.time() - t0
if result is not None: if result is not None:
logger.info(" Passe 1 OK : %d éléments cliniques extraits", logger.info(" [CPAM-EXTRACT] %.1fs — OGC %s %d éléments cliniques extraits",
elapsed, controle.numero_ogc,
len(result.get("elements_cliniques_pertinents", []))) len(result.get("elements_cliniques_pertinents", [])))
else: else:
logger.warning(" Passe 1 échouée — fallback single-pass") logger.warning(" [CPAM-EXTRACT] %.1fs — OGC %s — passe 1 échouée",
elapsed, controle.numero_ogc)
return result return result
@@ -195,14 +200,17 @@ def generate_cpam_response(
prompt, tag_map = _build_cpam_prompt(dossier, controle, sources, extraction) prompt, tag_map = _build_cpam_prompt(dossier, controle, sources, extraction)
# 4. Appel LLM — Ollama (rôle cpam) > Haiku fallback # 4. Appel LLM — Ollama (rôle cpam) > Haiku fallback
t_gen = time.time()
result = call_ollama(prompt, temperature=0.1, max_tokens=8000, role="cpam") result = call_ollama(prompt, temperature=0.1, max_tokens=8000, role="cpam")
if result is not None: if result is not None:
logger.info(" Contre-argumentation via Ollama") logger.info(" [CPAM-GEN] %.1fs — OGC %s — contre-argumentation via Ollama",
time.time() - t_gen, controle.numero_ogc)
else: else:
logger.info(" Ollama indisponible → fallback Anthropic Haiku") logger.info(" Ollama indisponible → fallback Anthropic Haiku")
result = call_anthropic(prompt, temperature=0.1, max_tokens=8000) result = call_anthropic(prompt, temperature=0.1, max_tokens=8000)
if result is not None: if result is not None:
logger.info(" Contre-argumentation via Anthropic Haiku") logger.info(" [CPAM-GEN] %.1fs — OGC %s — contre-argumentation via Anthropic",
time.time() - t_gen, controle.numero_ogc)
# 5. Conversion des sources RAG # 5. Conversion des sources RAG
rag_sources = [ rag_sources = [
@@ -285,9 +293,12 @@ def generate_cpam_response(
validation.get("score_confiance"), attempt + 1, max_corrections, len(erreurs_v)) validation.get("score_confiance"), attempt + 1, max_corrections, len(erreurs_v))
correction_prompt = _build_correction_prompt(prompt, result, validation) correction_prompt = _build_correction_prompt(prompt, result, validation)
t_corr = time.time()
corrected = call_ollama(correction_prompt, temperature=0.0, max_tokens=16000, role="cpam") corrected = call_ollama(correction_prompt, temperature=0.0, max_tokens=16000, role="cpam")
if corrected is None: if corrected is None:
corrected = call_anthropic(correction_prompt, temperature=0.0, max_tokens=16000) corrected = call_anthropic(correction_prompt, temperature=0.0, max_tokens=16000)
logger.info(" [CPAM-CORR] %.1fs — OGC %s — correction %d/%d",
time.time() - t_corr, controle.numero_ogc, attempt + 1, max_corrections)
if not corrected: if not corrected:
break break

View File

@@ -4,6 +4,7 @@ from __future__ import annotations
import logging import logging
import re import re
import time
from ..config import ControleCPAM, DossierMedical from ..config import ControleCPAM, DossierMedical
from ..medical.bio_normals import BIO_NORMALS from ..medical.bio_normals import BIO_NORMALS
@@ -477,11 +478,14 @@ def _validate_adversarial(
) )
logger.debug(" Validation adversariale") logger.debug(" Validation adversariale")
t_val = time.time()
result = call_ollama(prompt, temperature=0.0, max_tokens=6000, role="validation") result = call_ollama(prompt, temperature=0.0, max_tokens=6000, role="validation")
if result is None: if result is None:
result = call_anthropic(prompt, temperature=0.0, max_tokens=6000) result = call_anthropic(prompt, temperature=0.0, max_tokens=6000)
elapsed = time.time() - t_val
if result is None: if result is None:
logger.warning(" Validation adversariale échouée — LLM indisponible") logger.warning(" [CPAM-VALID] %.1fs — OGC %s — validation adversariale échouée",
elapsed, controle.numero_ogc)
return None return None
coherent = result.get("coherent", True) coherent = result.get("coherent", True)
@@ -489,12 +493,13 @@ def _validate_adversarial(
score = result.get("score_confiance", -1) score = result.get("score_confiance", -1)
if not coherent and erreurs: if not coherent and erreurs:
logger.warning(" Validation adversariale : %d incohérence(s) détectée(s) (score %s/10)", logger.warning(" [CPAM-VALID] %.1fs — OGC %s %d incohérence(s) (score %s/10)",
len(erreurs), score) elapsed, controle.numero_ogc, len(erreurs), score)
for e in erreurs: for e in erreurs:
logger.warning(" - %s", e) logger.warning(" - %s", e)
else: else:
logger.info(" Validation adversariale OK (score %s/10)", score) logger.info(" [CPAM-VALID] %.1fs — OGC %s OK (score %s/10)",
elapsed, controle.numero_ogc, score)
return result return result