feat: pass LLM hybride pour DAS + interface admin référentiels RAG
Chantier 1 — Extraction DAS par LLM : - Nouveau prompt expert DIM dans rag_search.py (extract_das_llm) - Phase 4 dans cim10_extractor.py : détection DAS supplémentaires avant enrichissement RAG - Cache persistant (clé hash du texte), validation CIM-10, déduplication - Activé uniquement avec use_rag=True (--no-rag le désactive) Chantier 2 — Admin référentiels : - Config : REFERENTIELS_DIR, UPLOAD_MAX_SIZE_MB, ALLOWED_EXTENSIONS - Chunking générique (PDF/CSV/Excel/TXT) + ajout incrémental FAISS dans rag_index.py - ReferentielManager CRUD dans viewer/referentiels.py - 5 routes Flask (listing, upload, indexation, suppression, rebuild) - Template admin avec tableau interactif + lien sidebar Fix : if cache → if cache is not None (OllamaCache vide évaluait à False) 410 tests passent (27 nouveaux, 0 régression). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
179
tests/test_referentiels.py
Normal file
179
tests/test_referentiels.py
Normal file
@@ -0,0 +1,179 @@
|
||||
"""Tests pour le gestionnaire de référentiels et les routes Flask associées."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from src.viewer.referentiels import ReferentielManager
|
||||
from src.config import ALLOWED_EXTENSIONS, UPLOAD_MAX_SIZE_MB
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests ReferentielManager
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestReferentielManager:
|
||||
|
||||
@pytest.fixture
|
||||
def manager(self, tmp_path):
|
||||
return ReferentielManager(tmp_path / "refs")
|
||||
|
||||
def test_add_file(self, manager):
|
||||
ref = manager.add_file("guide.pdf", b"fake pdf content")
|
||||
assert ref["filename"] == "guide.pdf"
|
||||
assert ref["extension"] == ".pdf"
|
||||
assert ref["status"] == "uploaded"
|
||||
assert ref["size_bytes"] == len(b"fake pdf content")
|
||||
assert ref["chunks_count"] == 0
|
||||
|
||||
def test_list_all(self, manager):
|
||||
manager.add_file("a.txt", b"hello")
|
||||
manager.add_file("b.csv", b"col1,col2")
|
||||
assert len(manager.list_all()) == 2
|
||||
|
||||
def test_get(self, manager):
|
||||
ref = manager.add_file("guide.pdf", b"content")
|
||||
found = manager.get(ref["id"])
|
||||
assert found is not None
|
||||
assert found["filename"] == "guide.pdf"
|
||||
|
||||
def test_get_not_found(self, manager):
|
||||
assert manager.get("nonexistent") is None
|
||||
|
||||
def test_remove(self, manager):
|
||||
ref = manager.add_file("guide.pdf", b"content")
|
||||
assert manager.remove(ref["id"]) is True
|
||||
assert len(manager.list_all()) == 0
|
||||
assert manager.get(ref["id"]) is None
|
||||
|
||||
def test_remove_not_found(self, manager):
|
||||
assert manager.remove("nonexistent") is False
|
||||
|
||||
def test_add_file_invalid_extension(self, manager):
|
||||
with pytest.raises(ValueError, match="Extension"):
|
||||
manager.add_file("malware.exe", b"evil")
|
||||
|
||||
def test_add_file_too_large(self, manager):
|
||||
big_data = b"x" * (UPLOAD_MAX_SIZE_MB * 1024 * 1024 + 1)
|
||||
with pytest.raises(ValueError, match="volumineux"):
|
||||
manager.add_file("big.pdf", big_data)
|
||||
|
||||
def test_persistence(self, tmp_path):
|
||||
"""L'index persiste entre les instances."""
|
||||
dir_path = tmp_path / "refs"
|
||||
m1 = ReferentielManager(dir_path)
|
||||
m1.add_file("a.txt", b"hello")
|
||||
|
||||
m2 = ReferentielManager(dir_path)
|
||||
assert len(m2.list_all()) == 1
|
||||
assert m2.list_all()[0]["filename"] == "a.txt"
|
||||
|
||||
def test_file_stored_on_disk(self, manager, tmp_path):
|
||||
ref = manager.add_file("test.txt", b"file content here")
|
||||
stored_path = manager._dir / ref["stored_name"]
|
||||
assert stored_path.exists()
|
||||
assert stored_path.read_bytes() == b"file content here"
|
||||
|
||||
def test_remove_deletes_file(self, manager):
|
||||
ref = manager.add_file("test.txt", b"content")
|
||||
stored_path = manager._dir / ref["stored_name"]
|
||||
assert stored_path.exists()
|
||||
manager.remove(ref["id"])
|
||||
assert not stored_path.exists()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests chunking générique
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestChunking:
|
||||
|
||||
def test_chunk_txt(self, tmp_path):
|
||||
from src.medical.rag_index import chunk_user_file
|
||||
|
||||
txt_file = tmp_path / "test.txt"
|
||||
txt_file.write_text(
|
||||
"Premier paragraphe avec assez de mots pour le seuil.\n\n"
|
||||
"Deuxième paragraphe avec encore plus de mots pour dépasser le minimum.\n\n"
|
||||
"Court\n\n"
|
||||
"Troisième paragraphe qui devrait aussi être un chunk valide.",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
chunks = chunk_user_file(txt_file, "test_doc")
|
||||
assert len(chunks) >= 2 # au moins 2 paragraphes assez longs
|
||||
assert all(c.document == "test_doc" for c in chunks)
|
||||
|
||||
def test_chunk_csv(self, tmp_path):
|
||||
from src.medical.rag_index import chunk_user_file
|
||||
|
||||
csv_file = tmp_path / "test.csv"
|
||||
csv_file.write_text(
|
||||
"code,description,note\n"
|
||||
"K85.1,Pancréatite aiguë biliaire,diagnostic fréquent\n"
|
||||
"I10,Hypertension essentielle,comorbidité courante\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
chunks = chunk_user_file(csv_file, "csv_doc")
|
||||
assert len(chunks) == 2
|
||||
assert "K85.1" in chunks[0].text
|
||||
assert "I10" in chunks[1].text
|
||||
|
||||
def test_chunk_unsupported_extension(self, tmp_path):
|
||||
from src.medical.rag_index import chunk_user_file
|
||||
|
||||
bad_file = tmp_path / "test.xyz"
|
||||
bad_file.write_text("content")
|
||||
|
||||
chunks = chunk_user_file(bad_file, "bad")
|
||||
assert chunks == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests routes Flask
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestReferentielRoutes:
|
||||
|
||||
@pytest.fixture
|
||||
def app(self, tmp_path):
|
||||
"""Crée une app Flask de test avec un manager temporaire."""
|
||||
from src.viewer.app import create_app
|
||||
app = create_app()
|
||||
app.config["TESTING"] = True
|
||||
return app
|
||||
|
||||
@pytest.fixture
|
||||
def client(self, app):
|
||||
return app.test_client()
|
||||
|
||||
def test_admin_page_loads(self, client):
|
||||
resp = client.get("/admin/referentiels")
|
||||
assert resp.status_code == 200
|
||||
assert "Référentiels RAG" in resp.data.decode()
|
||||
|
||||
def test_upload_no_file(self, client):
|
||||
resp = client.post("/admin/referentiels/upload")
|
||||
assert resp.status_code == 400
|
||||
data = resp.get_json()
|
||||
assert "error" in data
|
||||
|
||||
def test_upload_valid_file(self, client):
|
||||
from io import BytesIO
|
||||
data = {
|
||||
"file": (BytesIO(b"test content"), "doc.txt"),
|
||||
}
|
||||
resp = client.post("/admin/referentiels/upload", data=data, content_type="multipart/form-data")
|
||||
result = resp.get_json()
|
||||
assert resp.status_code == 200
|
||||
assert result["ok"] is True
|
||||
assert result["referentiel"]["filename"] == "doc.txt"
|
||||
|
||||
def test_delete_nonexistent(self, client):
|
||||
resp = client.delete("/admin/referentiels/nonexistent")
|
||||
assert resp.status_code == 404
|
||||
Reference in New Issue
Block a user