feat: timings appels LLM CPAM (génération, validation, correction)
Ajoute des mesures time.time() autour de chaque appel Ollama dans le flux CPAM : - [CPAM-EXTRACT] : extraction structurée (passe 1, role=cpam) - [CPAM-GEN] : génération argumentation (passe 2, role=cpam) - [CPAM-VALID] : validation adversariale (role=validation) - [CPAM-CORR] : correction post-validation (role=cpam) Permet de mesurer le temps réel de chaque phase et d'identifier les coûts de swap de modèle VRAM entre les rôles cpam/validation. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -11,6 +11,7 @@ from __future__ import annotations
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
@@ -149,14 +150,18 @@ def _extraction_pass(
|
||||
)
|
||||
|
||||
logger.debug(" Passe 1 — extraction structurée")
|
||||
t0 = time.time()
|
||||
result = call_ollama(prompt, temperature=0.0, max_tokens=3000, role="cpam")
|
||||
if result is None:
|
||||
result = call_anthropic(prompt, temperature=0.0, max_tokens=3000)
|
||||
elapsed = time.time() - t0
|
||||
if result is not None:
|
||||
logger.info(" Passe 1 OK : %d éléments cliniques extraits",
|
||||
logger.info(" [CPAM-EXTRACT] %.1fs — OGC %s — %d éléments cliniques extraits",
|
||||
elapsed, controle.numero_ogc,
|
||||
len(result.get("elements_cliniques_pertinents", [])))
|
||||
else:
|
||||
logger.warning(" Passe 1 échouée — fallback single-pass")
|
||||
logger.warning(" [CPAM-EXTRACT] %.1fs — OGC %s — passe 1 échouée",
|
||||
elapsed, controle.numero_ogc)
|
||||
return result
|
||||
|
||||
|
||||
@@ -195,14 +200,17 @@ def generate_cpam_response(
|
||||
prompt, tag_map = _build_cpam_prompt(dossier, controle, sources, extraction)
|
||||
|
||||
# 4. Appel LLM — Ollama (rôle cpam) > Haiku fallback
|
||||
t_gen = time.time()
|
||||
result = call_ollama(prompt, temperature=0.1, max_tokens=8000, role="cpam")
|
||||
if result is not None:
|
||||
logger.info(" Contre-argumentation via Ollama")
|
||||
logger.info(" [CPAM-GEN] %.1fs — OGC %s — contre-argumentation via Ollama",
|
||||
time.time() - t_gen, controle.numero_ogc)
|
||||
else:
|
||||
logger.info(" Ollama indisponible → fallback Anthropic Haiku")
|
||||
result = call_anthropic(prompt, temperature=0.1, max_tokens=8000)
|
||||
if result is not None:
|
||||
logger.info(" Contre-argumentation via Anthropic Haiku")
|
||||
logger.info(" [CPAM-GEN] %.1fs — OGC %s — contre-argumentation via Anthropic",
|
||||
time.time() - t_gen, controle.numero_ogc)
|
||||
|
||||
# 5. Conversion des sources RAG
|
||||
rag_sources = [
|
||||
@@ -285,9 +293,12 @@ def generate_cpam_response(
|
||||
validation.get("score_confiance"), attempt + 1, max_corrections, len(erreurs_v))
|
||||
|
||||
correction_prompt = _build_correction_prompt(prompt, result, validation)
|
||||
t_corr = time.time()
|
||||
corrected = call_ollama(correction_prompt, temperature=0.0, max_tokens=16000, role="cpam")
|
||||
if corrected is None:
|
||||
corrected = call_anthropic(correction_prompt, temperature=0.0, max_tokens=16000)
|
||||
logger.info(" [CPAM-CORR] %.1fs — OGC %s — correction %d/%d",
|
||||
time.time() - t_corr, controle.numero_ogc, attempt + 1, max_corrections)
|
||||
|
||||
if not corrected:
|
||||
break
|
||||
|
||||
@@ -4,6 +4,7 @@ from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
|
||||
from ..config import ControleCPAM, DossierMedical
|
||||
from ..medical.bio_normals import BIO_NORMALS
|
||||
@@ -477,11 +478,14 @@ def _validate_adversarial(
|
||||
)
|
||||
|
||||
logger.debug(" Validation adversariale")
|
||||
t_val = time.time()
|
||||
result = call_ollama(prompt, temperature=0.0, max_tokens=6000, role="validation")
|
||||
if result is None:
|
||||
result = call_anthropic(prompt, temperature=0.0, max_tokens=6000)
|
||||
elapsed = time.time() - t_val
|
||||
if result is None:
|
||||
logger.warning(" Validation adversariale échouée — LLM indisponible")
|
||||
logger.warning(" [CPAM-VALID] %.1fs — OGC %s — validation adversariale échouée",
|
||||
elapsed, controle.numero_ogc)
|
||||
return None
|
||||
|
||||
coherent = result.get("coherent", True)
|
||||
@@ -489,12 +493,13 @@ def _validate_adversarial(
|
||||
score = result.get("score_confiance", -1)
|
||||
|
||||
if not coherent and erreurs:
|
||||
logger.warning(" Validation adversariale : %d incohérence(s) détectée(s) (score %s/10)",
|
||||
len(erreurs), score)
|
||||
logger.warning(" [CPAM-VALID] %.1fs — OGC %s — %d incohérence(s) (score %s/10)",
|
||||
elapsed, controle.numero_ogc, len(erreurs), score)
|
||||
for e in erreurs:
|
||||
logger.warning(" - %s", e)
|
||||
else:
|
||||
logger.info(" Validation adversariale OK (score %s/10)", score)
|
||||
logger.info(" [CPAM-VALID] %.1fs — OGC %s — OK (score %s/10)",
|
||||
elapsed, controle.numero_ogc, score)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
Reference in New Issue
Block a user