"""Tests pour le module PageTracker (traçabilité source).""" import pytest from src.extraction.page_tracker import PageTracker class TestCharToPage: def test_first_page(self): pt = PageTracker([(0, 100), (102, 200)]) assert pt.char_to_page(0) == 1 assert pt.char_to_page(50) == 1 assert pt.char_to_page(99) == 1 def test_second_page(self): pt = PageTracker([(0, 100), (102, 200)]) assert pt.char_to_page(102) == 2 assert pt.char_to_page(150) == 2 def test_beyond_last_page(self): pt = PageTracker([(0, 100), (102, 200)]) assert pt.char_to_page(300) == 2 def test_single_page(self): pt = PageTracker([(0, 500)]) assert pt.char_to_page(250) == 1 def test_empty_offsets(self): pt = PageTracker([]) assert pt.char_to_page(0) == 1 class TestFindPageForText: def _make_tracker(self): """Simule un document 3 pages.""" page1 = "Pancréatite aiguë biliaire" page2 = "Cholécystectomie par coelioscopie" page3 = "TTT de sortie: Augmentin IV" sep = "\n\n" full = sep.join([page1, page2, page3]) offsets = [] offset = 0 for text in [page1, page2, page3]: offsets.append((offset, offset + len(text))) offset += len(text) + len(sep) return PageTracker(offsets), full def test_exact_match_page1(self): pt, full = self._make_tracker() assert pt.find_page_for_text("Pancréatite", full) == 1 def test_exact_match_page2(self): pt, full = self._make_tracker() assert pt.find_page_for_text("Cholécystectomie", full) == 2 def test_exact_match_page3(self): pt, full = self._make_tracker() assert pt.find_page_for_text("Augmentin", full) == 3 def test_case_insensitive(self): pt, full = self._make_tracker() assert pt.find_page_for_text("pancréatite", full) == 1 def test_not_found(self): pt, full = self._make_tracker() assert pt.find_page_for_text("inexistant", full) is None def test_empty_text(self): pt, full = self._make_tracker() assert pt.find_page_for_text("", full) is None class TestExtractExcerpt: def test_returns_excerpt(self): text = "A" * 200 + "Pancréatite aiguë" + "B" * 200 pt = PageTracker([(0, len(text))]) excerpt = pt.extract_excerpt("Pancréatite aiguë", text, context_chars=50) assert excerpt is not None assert "Pancréatite aiguë" in excerpt assert excerpt.startswith("...") assert excerpt.endswith("...") def test_at_start(self): text = "Pancréatite aiguë biliaire " + "X" * 200 pt = PageTracker([(0, len(text))]) excerpt = pt.extract_excerpt("Pancréatite", text, context_chars=50) assert excerpt is not None assert not excerpt.startswith("...") def test_not_found(self): text = "Texte sans rapport" pt = PageTracker([(0, len(text))]) assert pt.extract_excerpt("inexistant", text) is None