fix: max_tokens CPAM 6000→16000 + diagnostic troncature Ollama
- Argumentation + correction : max_tokens porté à 16000 (num_predict) - ollama_client : log done_reason=length pour détecter les troncatures serveur - Résultat live : 1/3 Tier B (dossier 132 passé de C à B, score 5/10) - Les 2 Tier C restants sont bloqués par hallucination de codes et absence de données bio, pas par max_tokens Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -126,12 +126,12 @@ def generate_cpam_response(
|
|||||||
prompt, tag_map = _build_cpam_prompt(dossier, controle, sources, extraction)
|
prompt, tag_map = _build_cpam_prompt(dossier, controle, sources, extraction)
|
||||||
|
|
||||||
# 4. Appel LLM — Ollama (rôle cpam) > Haiku fallback
|
# 4. Appel LLM — Ollama (rôle cpam) > Haiku fallback
|
||||||
result = call_ollama(prompt, temperature=0.1, max_tokens=6000, role="cpam")
|
result = call_ollama(prompt, temperature=0.1, max_tokens=16000, role="cpam")
|
||||||
if result is not None:
|
if result is not None:
|
||||||
logger.info(" Contre-argumentation via Ollama")
|
logger.info(" Contre-argumentation via Ollama")
|
||||||
else:
|
else:
|
||||||
logger.info(" Ollama indisponible → fallback Anthropic Haiku")
|
logger.info(" Ollama indisponible → fallback Anthropic Haiku")
|
||||||
result = call_anthropic(prompt, temperature=0.1, max_tokens=6000)
|
result = call_anthropic(prompt, temperature=0.1, max_tokens=16000)
|
||||||
if result is not None:
|
if result is not None:
|
||||||
logger.info(" Contre-argumentation via Anthropic Haiku")
|
logger.info(" Contre-argumentation via Anthropic Haiku")
|
||||||
|
|
||||||
@@ -188,9 +188,9 @@ def generate_cpam_response(
|
|||||||
validation.get("score_confiance"), len(erreurs_v))
|
validation.get("score_confiance"), len(erreurs_v))
|
||||||
|
|
||||||
correction_prompt = _build_correction_prompt(prompt, result, validation)
|
correction_prompt = _build_correction_prompt(prompt, result, validation)
|
||||||
corrected = call_ollama(correction_prompt, temperature=0.0, max_tokens=6000, role="cpam")
|
corrected = call_ollama(correction_prompt, temperature=0.0, max_tokens=16000, role="cpam")
|
||||||
if corrected is None:
|
if corrected is None:
|
||||||
corrected = call_anthropic(correction_prompt, temperature=0.0, max_tokens=6000)
|
corrected = call_anthropic(correction_prompt, temperature=0.0, max_tokens=16000)
|
||||||
|
|
||||||
if corrected:
|
if corrected:
|
||||||
# Re-valider la correction
|
# Re-valider la correction
|
||||||
|
|||||||
@@ -189,7 +189,15 @@ def call_ollama(
|
|||||||
time.sleep(delay)
|
time.sleep(delay)
|
||||||
continue
|
continue
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
raw = response.json().get("response", "")
|
resp_data = response.json()
|
||||||
|
raw = resp_data.get("response", "")
|
||||||
|
done_reason = resp_data.get("done_reason", "")
|
||||||
|
eval_count = resp_data.get("eval_count", 0)
|
||||||
|
if done_reason == "length":
|
||||||
|
logger.warning("Ollama : réponse tronquée (done_reason=length, %d tokens, %d chars)",
|
||||||
|
eval_count, len(raw))
|
||||||
|
else:
|
||||||
|
logger.debug("Ollama : réponse complète (%d tokens, %d chars)", eval_count, len(raw))
|
||||||
result = parse_json_response(raw)
|
result = parse_json_response(raw)
|
||||||
if result is not None:
|
if result is not None:
|
||||||
return result
|
return result
|
||||||
|
|||||||
Reference in New Issue
Block a user