feat: architecture multi-modèles LLM + externalisation des prompts
- Ajout OLLAMA_MODELS (coding/cpam/validation/qc) dans config.py avec get_model() - Paramètre role= dans call_ollama() pour dispatch par rôle - Cache Ollama : modèle stocké par entrée (migration auto de l'ancien format) - 7 prompts externalisés dans src/prompts/templates.py (format str.format) - Viewer : admin multi-modèles, endpoint PDF avec redaction, source texte - Documentation prompts dans docs/prompts.md Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -407,7 +407,7 @@ class TestGenerateResponse:
|
||||
]
|
||||
call_count = {"n": 0}
|
||||
|
||||
def ollama_side_effect(prompt, temperature=0.1, max_tokens=4000):
|
||||
def ollama_side_effect(prompt, temperature=0.1, max_tokens=4000, **kwargs):
|
||||
call_count["n"] += 1
|
||||
if call_count["n"] == 1:
|
||||
return {"comprehension_contestation": "Extraction...", "elements_cliniques_pertinents": [], "points_accord_potentiels": [], "codes_en_jeu": {}}
|
||||
@@ -1155,7 +1155,7 @@ class TestExtractionPass:
|
||||
"""L'orchestrateur appelle extraction + argumentation + validation."""
|
||||
call_count = {"n": 0}
|
||||
|
||||
def ollama_side_effect(prompt, temperature=0.1, max_tokens=4000):
|
||||
def ollama_side_effect(prompt, temperature=0.1, max_tokens=4000, **kwargs):
|
||||
call_count["n"] += 1
|
||||
if call_count["n"] == 1:
|
||||
return {
|
||||
@@ -1249,7 +1249,7 @@ class TestValidateAdversarial:
|
||||
"""Incohérences détectées → avertissements dans le texte formaté."""
|
||||
call_count = {"n": 0}
|
||||
|
||||
def ollama_side_effect(prompt, temperature=0.1, max_tokens=4000):
|
||||
def ollama_side_effect(prompt, temperature=0.1, max_tokens=4000, **kwargs):
|
||||
call_count["n"] += 1
|
||||
if call_count["n"] == 1:
|
||||
return {"comprehension_contestation": "Extraction", "elements_cliniques_pertinents": [], "points_accord_potentiels": [], "codes_en_jeu": {}}
|
||||
|
||||
@@ -49,7 +49,30 @@ class TestOllamaCache:
|
||||
cache.save()
|
||||
assert not path.exists()
|
||||
|
||||
def test_model_change_invalidates(self, tmp_path):
|
||||
def test_model_per_entry_different_model_miss(self, tmp_path):
|
||||
"""Un get avec un modèle différent de celui du put retourne None."""
|
||||
cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
|
||||
cache.put("HTA", "das", {"code": "I10"})
|
||||
# Même cache, mais demande avec un modèle différent
|
||||
assert cache.get("HTA", "das", model="llama3:8b") is None
|
||||
|
||||
def test_model_per_entry_same_model_hit(self, tmp_path):
|
||||
"""Un get avec le même modèle retourne le résultat."""
|
||||
cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
|
||||
cache.put("HTA", "das", {"code": "I10"})
|
||||
assert cache.get("HTA", "das", model="gemma3:12b") == {"code": "I10"}
|
||||
|
||||
def test_model_per_entry_explicit_put_model(self, tmp_path):
|
||||
"""put() avec model= explicite stocke ce modèle."""
|
||||
cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
|
||||
cache.put("HTA", "das", {"code": "I10"}, model="llama3:8b")
|
||||
# Le default model ne matche pas
|
||||
assert cache.get("HTA", "das") is None
|
||||
# Le modèle explicite matche
|
||||
assert cache.get("HTA", "das", model="llama3:8b") == {"code": "I10"}
|
||||
|
||||
def test_save_reload_different_model_miss(self, tmp_path):
|
||||
"""Après save/reload, les entrées gardent leur modèle."""
|
||||
path = tmp_path / "cache.json"
|
||||
cache = OllamaCache(path, "gemma3:12b")
|
||||
cache.put("HTA", "das", {"code": "I10"})
|
||||
@@ -57,7 +80,16 @@ class TestOllamaCache:
|
||||
|
||||
cache2 = OllamaCache(path, "llama3:8b")
|
||||
assert cache2.get("HTA", "das") is None
|
||||
assert len(cache2) == 0
|
||||
|
||||
def test_save_reload_same_model_hit(self, tmp_path):
|
||||
"""Après save/reload avec le même modèle, le hit fonctionne."""
|
||||
path = tmp_path / "cache.json"
|
||||
cache = OllamaCache(path, "gemma3:12b")
|
||||
cache.put("HTA", "das", {"code": "I10"})
|
||||
cache.save()
|
||||
|
||||
cache2 = OllamaCache(path, "gemma3:12b")
|
||||
assert cache2.get("HTA", "das") == {"code": "I10"}
|
||||
|
||||
def test_corrupted_file(self, tmp_path):
|
||||
path = tmp_path / "cache.json"
|
||||
@@ -95,14 +127,60 @@ class TestOllamaCache:
|
||||
assert not errors
|
||||
assert len(cache) == 20
|
||||
|
||||
def test_json_format(self, tmp_path):
|
||||
"""Le fichier JSON contient le modèle et les entrées."""
|
||||
def test_json_format_new(self, tmp_path):
|
||||
"""Le nouveau format JSON contient entries avec model par entrée."""
|
||||
path = tmp_path / "cache.json"
|
||||
cache = OllamaCache(path, "gemma3:12b")
|
||||
cache.put("HTA", "das", {"code": "I10"})
|
||||
cache.save()
|
||||
|
||||
raw = json.loads(path.read_text(encoding="utf-8"))
|
||||
assert raw["model"] == "gemma3:12b"
|
||||
assert "model" not in raw # plus de modèle global
|
||||
assert "entries" in raw
|
||||
assert len(raw["entries"]) == 1
|
||||
entry = next(iter(raw["entries"].values()))
|
||||
assert entry["model"] == "gemma3:12b"
|
||||
assert entry["result"] == {"code": "I10"}
|
||||
|
||||
def test_backward_compat_old_format_migration(self, tmp_path):
|
||||
"""L'ancien format (model global, entrées sans model) est migré correctement."""
|
||||
path = tmp_path / "cache.json"
|
||||
# Écrire un fichier avec l'ancien format
|
||||
old_data = {
|
||||
"model": "gemma3:12b",
|
||||
"entries": {
|
||||
"das::hta": {"code": "I10"},
|
||||
"dp::diabète type 2": {"code": "E11.9"},
|
||||
},
|
||||
}
|
||||
path.write_text(json.dumps(old_data), encoding="utf-8")
|
||||
|
||||
# Charger avec le même modèle → doit migrer
|
||||
cache = OllamaCache(path, "gemma3:12b")
|
||||
assert len(cache) == 2
|
||||
assert cache.get("HTA", "das") == {"code": "I10"}
|
||||
assert cache.get("diabète type 2", "dp") == {"code": "E11.9"}
|
||||
|
||||
# Sauvegarder et vérifier le nouveau format
|
||||
cache.save()
|
||||
raw = json.loads(path.read_text(encoding="utf-8"))
|
||||
assert "model" not in raw # plus de modèle global
|
||||
entry = raw["entries"]["das::hta"]
|
||||
assert entry["model"] == "gemma3:12b"
|
||||
assert entry["result"] == {"code": "I10"}
|
||||
|
||||
def test_backward_compat_old_format_wrong_model(self, tmp_path):
|
||||
"""L'ancien format migré garde le modèle d'origine, pas celui du constructeur."""
|
||||
path = tmp_path / "cache.json"
|
||||
old_data = {
|
||||
"model": "gemma3:12b",
|
||||
"entries": {
|
||||
"das::hta": {"code": "I10"},
|
||||
},
|
||||
}
|
||||
path.write_text(json.dumps(old_data), encoding="utf-8")
|
||||
|
||||
# Charger avec un modèle différent → entrée a le modèle d'origine
|
||||
cache = OllamaCache(path, "llama3:8b")
|
||||
assert cache.get("HTA", "das") is None # llama3:8b != gemma3:12b
|
||||
assert cache.get("HTA", "das", model="gemma3:12b") == {"code": "I10"}
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
"""Tests pour le viewer Flask."""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
from src.viewer.app import create_app, compute_group_stats, severity_badge, format_duration, format_cpam_text
|
||||
from src.viewer.pdf_redactor import load_entities_from_report, redact_pdf, highlight_text
|
||||
from src.config import DossierMedical, Diagnostic, ActeCCAM
|
||||
|
||||
|
||||
@@ -155,3 +159,141 @@ class TestSourceTextEndpoint:
|
||||
"""Path traversal bloqué."""
|
||||
response = client.get("/api/source-text/../../etc")
|
||||
assert response.status_code in (403, 404)
|
||||
|
||||
|
||||
class TestPdfRedactorUnit:
|
||||
def test_load_entities_from_report(self, tmp_path):
|
||||
"""Charge les entités depuis un rapport JSON."""
|
||||
report = {
|
||||
"source_file": "test.pdf",
|
||||
"entities_found": [
|
||||
{"original": "Jean Dupont", "replacement": "[NOM_1]", "source": "ner", "category": "person"},
|
||||
{"original": "12345678901", "replacement": "[RPPS_1]", "source": "regex", "category": "rpps"},
|
||||
{"original": "A", "replacement": "[X]", "source": "ner", "category": "person"}, # trop court
|
||||
{"original": "[NOM_1]", "replacement": "[NOM_1]", "source": "ner", "category": "person"}, # pseudonyme
|
||||
],
|
||||
}
|
||||
report_path = tmp_path / "test_report.json"
|
||||
report_path.write_text(json.dumps(report), encoding="utf-8")
|
||||
entities = load_entities_from_report(report_path)
|
||||
assert "Jean Dupont" in entities
|
||||
assert "12345678901" in entities
|
||||
assert "A" not in entities # trop court
|
||||
assert "[NOM_1]" not in entities # pseudonyme
|
||||
|
||||
def test_redact_pdf_produces_bytes(self, tmp_path):
|
||||
"""redact_pdf retourne des bytes PDF valides."""
|
||||
import fitz
|
||||
# Créer un PDF de test avec du texte
|
||||
doc = fitz.open()
|
||||
page = doc.new_page()
|
||||
page.insert_text((72, 72), "Jean Dupont est le patient.", fontsize=12)
|
||||
pdf_path = tmp_path / "test.pdf"
|
||||
doc.save(str(pdf_path))
|
||||
doc.close()
|
||||
|
||||
result = redact_pdf(pdf_path, {"Jean Dupont"})
|
||||
assert isinstance(result, bytes)
|
||||
assert len(result) > 0
|
||||
# Vérifier que c'est bien un PDF
|
||||
assert result[:5] == b"%PDF-"
|
||||
|
||||
# Vérifier que le texte caviardé n'est plus présent
|
||||
doc2 = fitz.open(stream=result, filetype="pdf")
|
||||
text = doc2[0].get_text()
|
||||
doc2.close()
|
||||
assert "Jean Dupont" not in text
|
||||
|
||||
def test_highlight_text_adds_annotation(self, tmp_path):
|
||||
"""highlight_text ajoute une annotation de surlignage."""
|
||||
import fitz
|
||||
doc = fitz.open()
|
||||
page = doc.new_page()
|
||||
page.insert_text((72, 72), "CRP elevee a 180 mg/L", fontsize=12)
|
||||
pdf_bytes = doc.tobytes()
|
||||
doc.close()
|
||||
|
||||
result = highlight_text(pdf_bytes, "CRP elevee", page_num=1)
|
||||
assert isinstance(result, bytes)
|
||||
# Le PDF avec surlignage doit être différent de l'original
|
||||
assert result != pdf_bytes
|
||||
# Vérifier qu'au moins une annotation existe sur la page
|
||||
doc2 = fitz.open(stream=result, filetype="pdf")
|
||||
page2 = doc2[0]
|
||||
annot_count = 0
|
||||
for annot in page2.annots():
|
||||
annot_count += 1
|
||||
doc2.close()
|
||||
assert annot_count >= 1
|
||||
|
||||
def test_highlight_text_empty_excerpt(self, tmp_path):
|
||||
"""highlight_text avec texte vide retourne le PDF inchangé."""
|
||||
import fitz
|
||||
doc = fitz.open()
|
||||
doc.new_page()
|
||||
pdf_bytes = doc.tobytes()
|
||||
doc.close()
|
||||
|
||||
result = highlight_text(pdf_bytes, "")
|
||||
assert result == pdf_bytes
|
||||
|
||||
def test_highlight_text_ellipsis_cleaned(self, tmp_path):
|
||||
"""highlight_text nettoie les ... de l'excerpt."""
|
||||
import fitz
|
||||
doc = fitz.open()
|
||||
page = doc.new_page()
|
||||
page.insert_text((72, 72), "Patient present une infection urinaire", fontsize=12)
|
||||
pdf_bytes = doc.tobytes()
|
||||
doc.close()
|
||||
|
||||
result = highlight_text(pdf_bytes, "...infection urinaire...", page_num=1)
|
||||
doc2 = fitz.open(stream=result, filetype="pdf")
|
||||
annots = list(doc2[0].annots())
|
||||
doc2.close()
|
||||
assert len(annots) >= 1
|
||||
|
||||
def test_highlight_text_multiline_excerpt(self, tmp_path):
|
||||
"""highlight_text fonctionne avec un excerpt multi-lignes (cas réel)."""
|
||||
import fitz
|
||||
doc = fitz.open()
|
||||
page = doc.new_page()
|
||||
# Simuler un PDF avec plusieurs lignes de texte
|
||||
page.insert_text((72, 72), "Motif d'hospitalisation: Lombofessalgie", fontsize=12)
|
||||
page.insert_text((72, 92), "chez patiente suivie pour spondylarthrite", fontsize=12)
|
||||
page.insert_text((72, 112), "Praticien hospitalier", fontsize=12)
|
||||
page.insert_text((72, 132), "Antecedents medicaux importants", fontsize=12)
|
||||
pdf_bytes = doc.tobytes()
|
||||
doc.close()
|
||||
|
||||
# Excerpt multi-lignes typique (comme dans les vrais dossiers)
|
||||
multiline_excerpt = (
|
||||
"...Motif d'hospitalisation: Lombofessalgie\n"
|
||||
"chez patiente suivie pour spondylarthrite\n"
|
||||
"Praticien hospitalier\n"
|
||||
"Antecedents medicaux importants..."
|
||||
)
|
||||
result = highlight_text(pdf_bytes, multiline_excerpt, page_num=1)
|
||||
assert result != pdf_bytes
|
||||
doc2 = fitz.open(stream=result, filetype="pdf")
|
||||
annot_count = 0
|
||||
for annot in doc2[0].annots():
|
||||
annot_count += 1
|
||||
doc2.close()
|
||||
assert annot_count >= 1
|
||||
|
||||
|
||||
class TestPdfEndpoint:
|
||||
def test_pdf_404_nonexistent(self, client):
|
||||
"""Un PDF inexistant retourne 404."""
|
||||
response = client.get("/api/pdf/nonexistent_dossier/nonexistent.pdf")
|
||||
assert response.status_code == 404
|
||||
|
||||
def test_pdf_security_path_traversal(self, client):
|
||||
"""Path traversal bloqué."""
|
||||
response = client.get("/api/pdf/../../etc/passwd.pdf")
|
||||
assert response.status_code in (403, 404)
|
||||
|
||||
def test_pdf_non_pdf_extension(self, client):
|
||||
"""Un fichier non-PDF retourne 404."""
|
||||
response = client.get("/api/pdf/some_dossier/file.txt")
|
||||
assert response.status_code == 404
|
||||
|
||||
Reference in New Issue
Block a user