diff --git a/src/control/cpam_response.py b/src/control/cpam_response.py index 6bba558..c5985a7 100644 --- a/src/control/cpam_response.py +++ b/src/control/cpam_response.py @@ -126,12 +126,12 @@ def generate_cpam_response( prompt, tag_map = _build_cpam_prompt(dossier, controle, sources, extraction) # 4. Appel LLM — Ollama (rôle cpam) > Haiku fallback - result = call_ollama(prompt, temperature=0.1, max_tokens=6000, role="cpam") + result = call_ollama(prompt, temperature=0.1, max_tokens=16000, role="cpam") if result is not None: logger.info(" Contre-argumentation via Ollama") else: logger.info(" Ollama indisponible → fallback Anthropic Haiku") - result = call_anthropic(prompt, temperature=0.1, max_tokens=6000) + result = call_anthropic(prompt, temperature=0.1, max_tokens=16000) if result is not None: logger.info(" Contre-argumentation via Anthropic Haiku") @@ -188,9 +188,9 @@ def generate_cpam_response( validation.get("score_confiance"), len(erreurs_v)) correction_prompt = _build_correction_prompt(prompt, result, validation) - corrected = call_ollama(correction_prompt, temperature=0.0, max_tokens=6000, role="cpam") + corrected = call_ollama(correction_prompt, temperature=0.0, max_tokens=16000, role="cpam") if corrected is None: - corrected = call_anthropic(correction_prompt, temperature=0.0, max_tokens=6000) + corrected = call_anthropic(correction_prompt, temperature=0.0, max_tokens=16000) if corrected: # Re-valider la correction diff --git a/src/medical/ollama_client.py b/src/medical/ollama_client.py index f3d47de..933aa7d 100644 --- a/src/medical/ollama_client.py +++ b/src/medical/ollama_client.py @@ -189,7 +189,15 @@ def call_ollama( time.sleep(delay) continue response.raise_for_status() - raw = response.json().get("response", "") + resp_data = response.json() + raw = resp_data.get("response", "") + done_reason = resp_data.get("done_reason", "") + eval_count = resp_data.get("eval_count", 0) + if done_reason == "length": + logger.warning("Ollama : réponse tronquée (done_reason=length, %d tokens, %d chars)", + eval_count, len(raw)) + else: + logger.debug("Ollama : réponse complète (%d tokens, %d chars)", eval_count, len(raw)) result = parse_json_response(raw) if result is not None: return result