Files
t2a_v2/tests/test_page_tracker.py
dom 01d47f3c4b feat: mode hybride Ollama — gemma3:27b pour CPAM, 12b pour codage
Le pipeline utilise désormais gemma3:12b (rapide) pour le codage CIM-10
et gemma3:27b (meilleur raisonnement) pour la contre-argumentation CPAM.
Configurable via OLLAMA_MODEL_CPAM et OLLAMA_TIMEOUT_CPAM.

Inclut aussi : traçabilité source/page DAS, niveaux CMA ATIH, sévérité,
page tracker PDF, améliorations fusion et filtres DAS.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-17 17:53:53 +01:00

96 lines
3.0 KiB
Python

"""Tests pour le module PageTracker (traçabilité source)."""
import pytest
from src.extraction.page_tracker import PageTracker
class TestCharToPage:
def test_first_page(self):
pt = PageTracker([(0, 100), (102, 200)])
assert pt.char_to_page(0) == 1
assert pt.char_to_page(50) == 1
assert pt.char_to_page(99) == 1
def test_second_page(self):
pt = PageTracker([(0, 100), (102, 200)])
assert pt.char_to_page(102) == 2
assert pt.char_to_page(150) == 2
def test_beyond_last_page(self):
pt = PageTracker([(0, 100), (102, 200)])
assert pt.char_to_page(300) == 2
def test_single_page(self):
pt = PageTracker([(0, 500)])
assert pt.char_to_page(250) == 1
def test_empty_offsets(self):
pt = PageTracker([])
assert pt.char_to_page(0) == 1
class TestFindPageForText:
def _make_tracker(self):
"""Simule un document 3 pages."""
page1 = "Pancréatite aiguë biliaire"
page2 = "Cholécystectomie par coelioscopie"
page3 = "TTT de sortie: Augmentin IV"
sep = "\n\n"
full = sep.join([page1, page2, page3])
offsets = []
offset = 0
for text in [page1, page2, page3]:
offsets.append((offset, offset + len(text)))
offset += len(text) + len(sep)
return PageTracker(offsets), full
def test_exact_match_page1(self):
pt, full = self._make_tracker()
assert pt.find_page_for_text("Pancréatite", full) == 1
def test_exact_match_page2(self):
pt, full = self._make_tracker()
assert pt.find_page_for_text("Cholécystectomie", full) == 2
def test_exact_match_page3(self):
pt, full = self._make_tracker()
assert pt.find_page_for_text("Augmentin", full) == 3
def test_case_insensitive(self):
pt, full = self._make_tracker()
assert pt.find_page_for_text("pancréatite", full) == 1
def test_not_found(self):
pt, full = self._make_tracker()
assert pt.find_page_for_text("inexistant", full) is None
def test_empty_text(self):
pt, full = self._make_tracker()
assert pt.find_page_for_text("", full) is None
class TestExtractExcerpt:
def test_returns_excerpt(self):
text = "A" * 200 + "Pancréatite aiguë" + "B" * 200
pt = PageTracker([(0, len(text))])
excerpt = pt.extract_excerpt("Pancréatite aiguë", text, context_chars=50)
assert excerpt is not None
assert "Pancréatite aiguë" in excerpt
assert excerpt.startswith("...")
assert excerpt.endswith("...")
def test_at_start(self):
text = "Pancréatite aiguë biliaire " + "X" * 200
pt = PageTracker([(0, len(text))])
excerpt = pt.extract_excerpt("Pancréatite", text, context_chars=50)
assert excerpt is not None
assert not excerpt.startswith("...")
def test_not_found(self):
text = "Texte sans rapport"
pt = PageTracker([(0, len(text))])
assert pt.extract_excerpt("inexistant", text) is None