- Multi-modèles : 4 rôles LLM (coding=gemma3:27b-cloud, cpam=gemma3:27b-cloud, validation=deepseek-v3.2:cloud, qc=gemma3:12b) avec get_model(role) - Prompts externalisés : 7 templates dans src/prompts/templates.py - Cache Ollama : modèle stocké par entrée (migration auto ancien format) - call_ollama() : paramètre role= (priorité: model > role > global) - Quality engine : veto_engine + decision_engine + rules_router (YAML) - Benchmark qualité : scripts/benchmark_quality.py (A/B, métriques CIM-10) - Fix biologie : valeurs qualitatives (troponine négative) non filtrées - Fix CPAM : gemma3:27b-cloud au lieu de deepseek (JSON tronqué par thinking) - CPAM max_tokens 4000→6000, viewer admin multi-modèles - Benchmark 10 dossiers : 100% DAS valides, 10/10 CPAM, 243s/dossier Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
300 lines
10 KiB
Python
300 lines
10 KiB
Python
"""Tests pour le viewer Flask."""
|
|
|
|
import json
|
|
import pytest
|
|
from pathlib import Path
|
|
from unittest.mock import patch
|
|
|
|
from src.viewer.app import create_app, compute_group_stats, severity_badge, format_duration, format_cpam_text
|
|
from src.viewer.pdf_redactor import load_entities_from_report, redact_pdf, highlight_text
|
|
from src.config import DossierMedical, Diagnostic, ActeCCAM
|
|
|
|
|
|
@pytest.fixture
|
|
def app():
|
|
app = create_app()
|
|
app.config["TESTING"] = True
|
|
return app
|
|
|
|
|
|
@pytest.fixture
|
|
def client(app):
|
|
return app.test_client()
|
|
|
|
|
|
class TestGroupStats:
|
|
def test_group_stats(self):
|
|
items = [
|
|
{
|
|
"dossier": DossierMedical(
|
|
diagnostics_associes=[
|
|
Diagnostic(texte="HTA", cim10_suggestion="I10"),
|
|
Diagnostic(texte="Diabète", cim10_suggestion="E11.9", est_cma=True),
|
|
],
|
|
actes_ccam=[
|
|
ActeCCAM(texte="Cholé", code_ccam_suggestion="HMFC004"),
|
|
],
|
|
alertes_codage=["Alerte 1", "Alerte 2"],
|
|
),
|
|
},
|
|
{
|
|
"dossier": DossierMedical(
|
|
diagnostics_associes=[
|
|
Diagnostic(texte="Obésité", cim10_suggestion="E66.0"),
|
|
],
|
|
actes_ccam=[
|
|
ActeCCAM(texte="TDM", code_ccam_suggestion="ZCQK002"),
|
|
],
|
|
alertes_codage=[],
|
|
),
|
|
},
|
|
]
|
|
stats = compute_group_stats(items)
|
|
assert stats["das_count"] == 3
|
|
assert stats["actes_count"] == 2
|
|
assert stats["alertes_count"] == 2
|
|
assert stats["cma_count"] == 1
|
|
|
|
def test_group_stats_empty(self):
|
|
stats = compute_group_stats([])
|
|
assert stats["das_count"] == 0
|
|
assert stats["alertes_count"] == 0
|
|
|
|
|
|
class TestSeverityBadgeFilter:
|
|
def test_severe(self):
|
|
result = severity_badge("severe")
|
|
assert "Sévère" in result
|
|
assert "#dc2626" in result
|
|
|
|
def test_modere(self):
|
|
result = severity_badge("modere")
|
|
assert "Modéré" in result
|
|
|
|
def test_leger(self):
|
|
result = severity_badge("leger")
|
|
assert "Léger" in result
|
|
|
|
def test_none(self):
|
|
result = severity_badge(None)
|
|
assert result == ""
|
|
|
|
def test_unknown(self):
|
|
result = severity_badge("inconnu")
|
|
assert result == ""
|
|
|
|
|
|
class TestFormatDuration:
|
|
def test_none(self):
|
|
assert format_duration(None) == ""
|
|
|
|
def test_seconds_only(self):
|
|
assert format_duration(45.3) == "45.3s"
|
|
|
|
def test_minutes(self):
|
|
assert format_duration(150.0) == "2min 30s"
|
|
|
|
def test_exact_minutes(self):
|
|
assert format_duration(120.0) == "2min"
|
|
|
|
def test_large_duration(self):
|
|
assert format_duration(1257.65) == "20min 57s"
|
|
|
|
|
|
class TestIndexPageLoads:
|
|
def test_index_page_loads(self, client):
|
|
response = client.get("/")
|
|
assert response.status_code == 200
|
|
assert b"Dossiers" in response.data
|
|
|
|
|
|
class TestFormatCpamText:
|
|
def test_plain_text(self):
|
|
result = format_cpam_text("Un simple paragraphe.")
|
|
assert "<p" in result
|
|
assert "Un simple paragraphe." in result
|
|
|
|
def test_bullet_list(self):
|
|
result = format_cpam_text("- Premier argument\n- Deuxième argument")
|
|
assert "<ul" in result
|
|
assert "<li>Premier argument</li>" in result
|
|
assert "<li>Deuxième argument</li>" in result
|
|
|
|
def test_mixed_text_and_bullets(self):
|
|
text = "Introduction\n- Point A\n- Point B\nConclusion"
|
|
result = format_cpam_text(text)
|
|
assert "<p" in result
|
|
assert "<ul" in result
|
|
assert "<li>Point A</li>" in result
|
|
assert "Conclusion" in result
|
|
|
|
def test_none_input(self):
|
|
result = format_cpam_text(None)
|
|
assert result == ""
|
|
|
|
def test_empty_input(self):
|
|
result = format_cpam_text("")
|
|
assert result == ""
|
|
|
|
def test_html_escaping(self):
|
|
result = format_cpam_text("Test <script>alert('xss')</script>")
|
|
assert "<script>" not in result
|
|
assert "<script>" in result
|
|
|
|
|
|
class TestDetailPageLoads:
|
|
def test_detail_page_404(self, client):
|
|
"""Un fichier inexistant retourne 404."""
|
|
response = client.get("/dossier/nonexistent.json")
|
|
assert response.status_code == 404
|
|
|
|
|
|
class TestSourceTextEndpoint:
|
|
def test_source_text_404_nonexistent(self, client):
|
|
"""Un dossier inexistant retourne 404."""
|
|
response = client.get("/api/source-text/nonexistent_dossier")
|
|
assert response.status_code == 404
|
|
|
|
def test_source_text_security_path_traversal(self, client):
|
|
"""Path traversal bloqué."""
|
|
response = client.get("/api/source-text/../../etc")
|
|
assert response.status_code in (403, 404)
|
|
|
|
|
|
class TestPdfRedactorUnit:
|
|
def test_load_entities_from_report(self, tmp_path):
|
|
"""Charge les entités depuis un rapport JSON."""
|
|
report = {
|
|
"source_file": "test.pdf",
|
|
"entities_found": [
|
|
{"original": "Jean Dupont", "replacement": "[NOM_1]", "source": "ner", "category": "person"},
|
|
{"original": "12345678901", "replacement": "[RPPS_1]", "source": "regex", "category": "rpps"},
|
|
{"original": "A", "replacement": "[X]", "source": "ner", "category": "person"}, # trop court
|
|
{"original": "[NOM_1]", "replacement": "[NOM_1]", "source": "ner", "category": "person"}, # pseudonyme
|
|
],
|
|
}
|
|
report_path = tmp_path / "test_report.json"
|
|
report_path.write_text(json.dumps(report), encoding="utf-8")
|
|
entities = load_entities_from_report(report_path)
|
|
assert "Jean Dupont" in entities
|
|
assert "12345678901" in entities
|
|
assert "A" not in entities # trop court
|
|
assert "[NOM_1]" not in entities # pseudonyme
|
|
|
|
def test_redact_pdf_produces_bytes(self, tmp_path):
|
|
"""redact_pdf retourne des bytes PDF valides."""
|
|
import fitz
|
|
# Créer un PDF de test avec du texte
|
|
doc = fitz.open()
|
|
page = doc.new_page()
|
|
page.insert_text((72, 72), "Jean Dupont est le patient.", fontsize=12)
|
|
pdf_path = tmp_path / "test.pdf"
|
|
doc.save(str(pdf_path))
|
|
doc.close()
|
|
|
|
result = redact_pdf(pdf_path, {"Jean Dupont"})
|
|
assert isinstance(result, bytes)
|
|
assert len(result) > 0
|
|
# Vérifier que c'est bien un PDF
|
|
assert result[:5] == b"%PDF-"
|
|
|
|
# Vérifier que le texte caviardé n'est plus présent
|
|
doc2 = fitz.open(stream=result, filetype="pdf")
|
|
text = doc2[0].get_text()
|
|
doc2.close()
|
|
assert "Jean Dupont" not in text
|
|
|
|
def test_highlight_text_adds_annotation(self, tmp_path):
|
|
"""highlight_text ajoute une annotation de surlignage."""
|
|
import fitz
|
|
doc = fitz.open()
|
|
page = doc.new_page()
|
|
page.insert_text((72, 72), "CRP elevee a 180 mg/L", fontsize=12)
|
|
pdf_bytes = doc.tobytes()
|
|
doc.close()
|
|
|
|
result = highlight_text(pdf_bytes, "CRP elevee", page_num=1)
|
|
assert isinstance(result, bytes)
|
|
# Le PDF avec surlignage doit être différent de l'original
|
|
assert result != pdf_bytes
|
|
# Vérifier qu'au moins une annotation existe sur la page
|
|
doc2 = fitz.open(stream=result, filetype="pdf")
|
|
page2 = doc2[0]
|
|
annot_count = 0
|
|
for annot in page2.annots():
|
|
annot_count += 1
|
|
doc2.close()
|
|
assert annot_count >= 1
|
|
|
|
def test_highlight_text_empty_excerpt(self, tmp_path):
|
|
"""highlight_text avec texte vide retourne le PDF inchangé."""
|
|
import fitz
|
|
doc = fitz.open()
|
|
doc.new_page()
|
|
pdf_bytes = doc.tobytes()
|
|
doc.close()
|
|
|
|
result = highlight_text(pdf_bytes, "")
|
|
assert result == pdf_bytes
|
|
|
|
def test_highlight_text_ellipsis_cleaned(self, tmp_path):
|
|
"""highlight_text nettoie les ... de l'excerpt."""
|
|
import fitz
|
|
doc = fitz.open()
|
|
page = doc.new_page()
|
|
page.insert_text((72, 72), "Patient present une infection urinaire", fontsize=12)
|
|
pdf_bytes = doc.tobytes()
|
|
doc.close()
|
|
|
|
result = highlight_text(pdf_bytes, "...infection urinaire...", page_num=1)
|
|
doc2 = fitz.open(stream=result, filetype="pdf")
|
|
annots = list(doc2[0].annots())
|
|
doc2.close()
|
|
assert len(annots) >= 1
|
|
|
|
def test_highlight_text_multiline_excerpt(self, tmp_path):
|
|
"""highlight_text fonctionne avec un excerpt multi-lignes (cas réel)."""
|
|
import fitz
|
|
doc = fitz.open()
|
|
page = doc.new_page()
|
|
# Simuler un PDF avec plusieurs lignes de texte
|
|
page.insert_text((72, 72), "Motif d'hospitalisation: Lombofessalgie", fontsize=12)
|
|
page.insert_text((72, 92), "chez patiente suivie pour spondylarthrite", fontsize=12)
|
|
page.insert_text((72, 112), "Praticien hospitalier", fontsize=12)
|
|
page.insert_text((72, 132), "Antecedents medicaux importants", fontsize=12)
|
|
pdf_bytes = doc.tobytes()
|
|
doc.close()
|
|
|
|
# Excerpt multi-lignes typique (comme dans les vrais dossiers)
|
|
multiline_excerpt = (
|
|
"...Motif d'hospitalisation: Lombofessalgie\n"
|
|
"chez patiente suivie pour spondylarthrite\n"
|
|
"Praticien hospitalier\n"
|
|
"Antecedents medicaux importants..."
|
|
)
|
|
result = highlight_text(pdf_bytes, multiline_excerpt, page_num=1)
|
|
assert result != pdf_bytes
|
|
doc2 = fitz.open(stream=result, filetype="pdf")
|
|
annot_count = 0
|
|
for annot in doc2[0].annots():
|
|
annot_count += 1
|
|
doc2.close()
|
|
assert annot_count >= 1
|
|
|
|
|
|
class TestPdfEndpoint:
|
|
def test_pdf_404_nonexistent(self, client):
|
|
"""Un PDF inexistant retourne 404."""
|
|
response = client.get("/api/pdf/nonexistent_dossier/nonexistent.pdf")
|
|
assert response.status_code == 404
|
|
|
|
def test_pdf_security_path_traversal(self, client):
|
|
"""Path traversal bloqué."""
|
|
response = client.get("/api/pdf/../../etc/passwd.pdf")
|
|
assert response.status_code in (403, 404)
|
|
|
|
def test_pdf_non_pdf_extension(self, client):
|
|
"""Un fichier non-PDF retourne 404."""
|
|
response = client.get("/api/pdf/some_dossier/file.txt")
|
|
assert response.status_code == 404
|