From 8e0ed1220d68f95f404181659c76582bd6374d01 Mon Sep 17 00:00:00 2001 From: dom Date: Fri, 20 Feb 2026 15:00:08 +0100 Subject: [PATCH] =?UTF-8?q?fix:=20max=5Ftokens=20CPAM=206000=E2=86=9216000?= =?UTF-8?q?=20+=20diagnostic=20troncature=20Ollama?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Argumentation + correction : max_tokens porté à 16000 (num_predict) - ollama_client : log done_reason=length pour détecter les troncatures serveur - Résultat live : 1/3 Tier B (dossier 132 passé de C à B, score 5/10) - Les 2 Tier C restants sont bloqués par hallucination de codes et absence de données bio, pas par max_tokens Co-Authored-By: Claude Opus 4.6 --- src/control/cpam_response.py | 8 ++++---- src/medical/ollama_client.py | 10 +++++++++- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/control/cpam_response.py b/src/control/cpam_response.py index 6bba558..c5985a7 100644 --- a/src/control/cpam_response.py +++ b/src/control/cpam_response.py @@ -126,12 +126,12 @@ def generate_cpam_response( prompt, tag_map = _build_cpam_prompt(dossier, controle, sources, extraction) # 4. Appel LLM — Ollama (rôle cpam) > Haiku fallback - result = call_ollama(prompt, temperature=0.1, max_tokens=6000, role="cpam") + result = call_ollama(prompt, temperature=0.1, max_tokens=16000, role="cpam") if result is not None: logger.info(" Contre-argumentation via Ollama") else: logger.info(" Ollama indisponible → fallback Anthropic Haiku") - result = call_anthropic(prompt, temperature=0.1, max_tokens=6000) + result = call_anthropic(prompt, temperature=0.1, max_tokens=16000) if result is not None: logger.info(" Contre-argumentation via Anthropic Haiku") @@ -188,9 +188,9 @@ def generate_cpam_response( validation.get("score_confiance"), len(erreurs_v)) correction_prompt = _build_correction_prompt(prompt, result, validation) - corrected = call_ollama(correction_prompt, temperature=0.0, max_tokens=6000, role="cpam") + corrected = call_ollama(correction_prompt, temperature=0.0, max_tokens=16000, role="cpam") if corrected is None: - corrected = call_anthropic(correction_prompt, temperature=0.0, max_tokens=6000) + corrected = call_anthropic(correction_prompt, temperature=0.0, max_tokens=16000) if corrected: # Re-valider la correction diff --git a/src/medical/ollama_client.py b/src/medical/ollama_client.py index f3d47de..933aa7d 100644 --- a/src/medical/ollama_client.py +++ b/src/medical/ollama_client.py @@ -189,7 +189,15 @@ def call_ollama( time.sleep(delay) continue response.raise_for_status() - raw = response.json().get("response", "") + resp_data = response.json() + raw = resp_data.get("response", "") + done_reason = resp_data.get("done_reason", "") + eval_count = resp_data.get("eval_count", 0) + if done_reason == "length": + logger.warning("Ollama : réponse tronquée (done_reason=length, %d tokens, %d chars)", + eval_count, len(raw)) + else: + logger.debug("Ollama : réponse complète (%d tokens, %d chars)", eval_count, len(raw)) result = parse_json_response(raw) if result is not None: return result