fix: réparation JSON tronqué + retry 429 + whitelist codes CPAM anti-hallucination
- parse_json_response : réparation JSON tronqué par max_tokens (fermeture auto des structures ouvertes), meilleur stripping des blocs fencés avec texte superflu après la fermeture ``` - call_ollama : retry avec backoff exponentiel (1s/2s/4s) pour les erreurs 429 rate limit, 3 tentatives au lieu de 2 - Validation adversariale : max_tokens 800 → 1500 - Prompt CPAM : whitelist PÉRIMÈTRE DE CODES AUTORISÉS (dossier DP+DAS + UCR) avec interdiction explicite des codes hors périmètre - Tests : 19 tests parse_json/_repair_truncated_json, 6 tests whitelist Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
128
tests/test_ollama_client.py
Normal file
128
tests/test_ollama_client.py
Normal file
@@ -0,0 +1,128 @@
|
||||
"""Tests unitaires pour le client Ollama (parsing JSON, réparation tronqué)."""
|
||||
|
||||
import pytest
|
||||
|
||||
from src.medical.ollama_client import parse_json_response, _repair_truncated_json
|
||||
|
||||
|
||||
class TestParseJsonResponse:
|
||||
"""Tests de parse_json_response()."""
|
||||
|
||||
def test_valid_json(self):
|
||||
result = parse_json_response('{"key": "value"}')
|
||||
assert result == {"key": "value"}
|
||||
|
||||
def test_fenced_json(self):
|
||||
raw = '```json\n{"key": "value"}\n```'
|
||||
assert parse_json_response(raw) == {"key": "value"}
|
||||
|
||||
def test_fenced_no_closing(self):
|
||||
raw = '```json\n{"key": "value"}'
|
||||
assert parse_json_response(raw) == {"key": "value"}
|
||||
|
||||
def test_whitespace(self):
|
||||
assert parse_json_response(' \n {"a": 1} \n') == {"a": 1}
|
||||
|
||||
def test_invalid_json_returns_none(self):
|
||||
assert parse_json_response("pas du json") is None
|
||||
|
||||
def test_fenced_with_trailing_text(self):
|
||||
"""JSON fencé suivi de texte superflu du LLM après la fermeture."""
|
||||
raw = '```json\n{"coherent": true, "erreurs": [], "score_confiance": 9}\n```\n\n**Justification de la vérification :**\n1. OK'
|
||||
result = parse_json_response(raw)
|
||||
assert result is not None
|
||||
assert result["coherent"] is True
|
||||
assert result["score_confiance"] == 9
|
||||
|
||||
def test_empty_string(self):
|
||||
assert parse_json_response("") is None
|
||||
|
||||
|
||||
class TestRepairTruncatedJson:
|
||||
"""Tests de _repair_truncated_json() — réparation JSON tronqué par max_tokens."""
|
||||
|
||||
def test_truncated_object(self):
|
||||
"""Objet principal non fermé."""
|
||||
text = '{"coherent": false, "erreurs": ["erreur 1"]'
|
||||
result = _repair_truncated_json(text)
|
||||
assert result is not None
|
||||
assert result["coherent"] is False
|
||||
assert result["erreurs"] == ["erreur 1"]
|
||||
|
||||
def test_truncated_array_and_object(self):
|
||||
"""Array et objet non fermés."""
|
||||
text = '{"coherent": false, "erreurs": ["erreur 1", "erreur 2"'
|
||||
result = _repair_truncated_json(text)
|
||||
assert result is not None
|
||||
assert result["coherent"] is False
|
||||
assert len(result["erreurs"]) == 2
|
||||
|
||||
def test_truncated_string_in_array(self):
|
||||
"""Chaîne tronquée à l'intérieur d'un array."""
|
||||
text = '{"coherent": false, "erreurs": ["erreur longue qui se term'
|
||||
result = _repair_truncated_json(text)
|
||||
assert result is not None
|
||||
assert result["coherent"] is False
|
||||
assert len(result["erreurs"]) == 1
|
||||
assert "erreur longue" in result["erreurs"][0]
|
||||
|
||||
def test_deeply_nested_truncation(self):
|
||||
"""Troncation dans un objet imbriqué."""
|
||||
text = '{"data": {"inner": [1, 2'
|
||||
result = _repair_truncated_json(text)
|
||||
assert result is not None
|
||||
assert result["data"]["inner"] == [1, 2]
|
||||
|
||||
def test_valid_json_passthrough(self):
|
||||
"""JSON déjà valide → retourné tel quel."""
|
||||
text = '{"a": 1}'
|
||||
result = _repair_truncated_json(text)
|
||||
assert result == {"a": 1}
|
||||
|
||||
def test_complete_adversarial_format(self):
|
||||
"""Format exact de la validation adversariale."""
|
||||
text = '{"coherent": false, "erreurs": ["Incohérence bio CRP"], "score_confiance": 4}'
|
||||
result = _repair_truncated_json(text)
|
||||
assert result is not None
|
||||
assert result["score_confiance"] == 4
|
||||
|
||||
def test_adversarial_truncated_at_score(self):
|
||||
"""Troncation juste avant score_confiance."""
|
||||
text = '{"coherent": false, "erreurs": ["Incohérence bio"]'
|
||||
result = _repair_truncated_json(text)
|
||||
assert result is not None
|
||||
assert result["coherent"] is False
|
||||
# score_confiance absent → -1 par défaut dans le code appelant
|
||||
|
||||
def test_hopelessly_broken(self):
|
||||
"""Texte vraiment non réparable."""
|
||||
assert _repair_truncated_json("juste du texte libre") is None
|
||||
|
||||
def test_escaped_quotes(self):
|
||||
"""Chaînes avec des guillemets échappés."""
|
||||
text = '{"msg": "il a dit \\"bonjour\\""}'
|
||||
result = _repair_truncated_json(text)
|
||||
assert result is not None
|
||||
assert "bonjour" in result["msg"]
|
||||
|
||||
def test_truncated_after_escaped_quote(self):
|
||||
"""Troncation après un guillemet échappé dans une chaîne."""
|
||||
text = '{"msg": "valeur avec \\"guillemet'
|
||||
result = _repair_truncated_json(text)
|
||||
assert result is not None
|
||||
|
||||
def test_parse_json_uses_repair(self):
|
||||
"""parse_json_response() utilise la réparation en fallback."""
|
||||
# JSON tronqué (objet non fermé)
|
||||
raw = '{"coherent": true, "erreurs": [], "score_confiance": 8'
|
||||
result = parse_json_response(raw)
|
||||
assert result is not None
|
||||
assert result["coherent"] is True
|
||||
assert result["score_confiance"] == 8
|
||||
|
||||
def test_parse_json_repair_fenced_truncated(self):
|
||||
"""JSON fencé ET tronqué."""
|
||||
raw = '```json\n{"coherent": false, "erreurs": ["erreur"'
|
||||
result = parse_json_response(raw)
|
||||
assert result is not None
|
||||
assert result["coherent"] is False
|
||||
Reference in New Issue
Block a user