Le pipeline utilise désormais gemma3:12b (rapide) pour le codage CIM-10 et gemma3:27b (meilleur raisonnement) pour la contre-argumentation CPAM. Configurable via OLLAMA_MODEL_CPAM et OLLAMA_TIMEOUT_CPAM. Inclut aussi : traçabilité source/page DAS, niveaux CMA ATIH, sévérité, page tracker PDF, améliorations fusion et filtres DAS. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
96 lines
3.0 KiB
Python
96 lines
3.0 KiB
Python
"""Tests pour le module PageTracker (traçabilité source)."""
|
|
|
|
import pytest
|
|
|
|
from src.extraction.page_tracker import PageTracker
|
|
|
|
|
|
class TestCharToPage:
|
|
def test_first_page(self):
|
|
pt = PageTracker([(0, 100), (102, 200)])
|
|
assert pt.char_to_page(0) == 1
|
|
assert pt.char_to_page(50) == 1
|
|
assert pt.char_to_page(99) == 1
|
|
|
|
def test_second_page(self):
|
|
pt = PageTracker([(0, 100), (102, 200)])
|
|
assert pt.char_to_page(102) == 2
|
|
assert pt.char_to_page(150) == 2
|
|
|
|
def test_beyond_last_page(self):
|
|
pt = PageTracker([(0, 100), (102, 200)])
|
|
assert pt.char_to_page(300) == 2
|
|
|
|
def test_single_page(self):
|
|
pt = PageTracker([(0, 500)])
|
|
assert pt.char_to_page(250) == 1
|
|
|
|
def test_empty_offsets(self):
|
|
pt = PageTracker([])
|
|
assert pt.char_to_page(0) == 1
|
|
|
|
|
|
class TestFindPageForText:
|
|
def _make_tracker(self):
|
|
"""Simule un document 3 pages."""
|
|
page1 = "Pancréatite aiguë biliaire"
|
|
page2 = "Cholécystectomie par coelioscopie"
|
|
page3 = "TTT de sortie: Augmentin IV"
|
|
sep = "\n\n"
|
|
full = sep.join([page1, page2, page3])
|
|
|
|
offsets = []
|
|
offset = 0
|
|
for text in [page1, page2, page3]:
|
|
offsets.append((offset, offset + len(text)))
|
|
offset += len(text) + len(sep)
|
|
|
|
return PageTracker(offsets), full
|
|
|
|
def test_exact_match_page1(self):
|
|
pt, full = self._make_tracker()
|
|
assert pt.find_page_for_text("Pancréatite", full) == 1
|
|
|
|
def test_exact_match_page2(self):
|
|
pt, full = self._make_tracker()
|
|
assert pt.find_page_for_text("Cholécystectomie", full) == 2
|
|
|
|
def test_exact_match_page3(self):
|
|
pt, full = self._make_tracker()
|
|
assert pt.find_page_for_text("Augmentin", full) == 3
|
|
|
|
def test_case_insensitive(self):
|
|
pt, full = self._make_tracker()
|
|
assert pt.find_page_for_text("pancréatite", full) == 1
|
|
|
|
def test_not_found(self):
|
|
pt, full = self._make_tracker()
|
|
assert pt.find_page_for_text("inexistant", full) is None
|
|
|
|
def test_empty_text(self):
|
|
pt, full = self._make_tracker()
|
|
assert pt.find_page_for_text("", full) is None
|
|
|
|
|
|
class TestExtractExcerpt:
|
|
def test_returns_excerpt(self):
|
|
text = "A" * 200 + "Pancréatite aiguë" + "B" * 200
|
|
pt = PageTracker([(0, len(text))])
|
|
excerpt = pt.extract_excerpt("Pancréatite aiguë", text, context_chars=50)
|
|
assert excerpt is not None
|
|
assert "Pancréatite aiguë" in excerpt
|
|
assert excerpt.startswith("...")
|
|
assert excerpt.endswith("...")
|
|
|
|
def test_at_start(self):
|
|
text = "Pancréatite aiguë biliaire " + "X" * 200
|
|
pt = PageTracker([(0, len(text))])
|
|
excerpt = pt.extract_excerpt("Pancréatite", text, context_chars=50)
|
|
assert excerpt is not None
|
|
assert not excerpt.startswith("...")
|
|
|
|
def test_not_found(self):
|
|
text = "Texte sans rapport"
|
|
pt = PageTracker([(0, len(text))])
|
|
assert pt.extract_excerpt("inexistant", text) is None
|