feat: mode hybride Ollama — gemma3:27b pour CPAM, 12b pour codage
Le pipeline utilise désormais gemma3:12b (rapide) pour le codage CIM-10 et gemma3:27b (meilleur raisonnement) pour la contre-argumentation CPAM. Configurable via OLLAMA_MODEL_CPAM et OLLAMA_TIMEOUT_CPAM. Inclut aussi : traçabilité source/page DAS, niveaux CMA ATIH, sévérité, page tracker PDF, améliorations fusion et filtres DAS. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
95
tests/test_page_tracker.py
Normal file
95
tests/test_page_tracker.py
Normal file
@@ -0,0 +1,95 @@
|
||||
"""Tests pour le module PageTracker (traçabilité source)."""
|
||||
|
||||
import pytest
|
||||
|
||||
from src.extraction.page_tracker import PageTracker
|
||||
|
||||
|
||||
class TestCharToPage:
|
||||
def test_first_page(self):
|
||||
pt = PageTracker([(0, 100), (102, 200)])
|
||||
assert pt.char_to_page(0) == 1
|
||||
assert pt.char_to_page(50) == 1
|
||||
assert pt.char_to_page(99) == 1
|
||||
|
||||
def test_second_page(self):
|
||||
pt = PageTracker([(0, 100), (102, 200)])
|
||||
assert pt.char_to_page(102) == 2
|
||||
assert pt.char_to_page(150) == 2
|
||||
|
||||
def test_beyond_last_page(self):
|
||||
pt = PageTracker([(0, 100), (102, 200)])
|
||||
assert pt.char_to_page(300) == 2
|
||||
|
||||
def test_single_page(self):
|
||||
pt = PageTracker([(0, 500)])
|
||||
assert pt.char_to_page(250) == 1
|
||||
|
||||
def test_empty_offsets(self):
|
||||
pt = PageTracker([])
|
||||
assert pt.char_to_page(0) == 1
|
||||
|
||||
|
||||
class TestFindPageForText:
|
||||
def _make_tracker(self):
|
||||
"""Simule un document 3 pages."""
|
||||
page1 = "Pancréatite aiguë biliaire"
|
||||
page2 = "Cholécystectomie par coelioscopie"
|
||||
page3 = "TTT de sortie: Augmentin IV"
|
||||
sep = "\n\n"
|
||||
full = sep.join([page1, page2, page3])
|
||||
|
||||
offsets = []
|
||||
offset = 0
|
||||
for text in [page1, page2, page3]:
|
||||
offsets.append((offset, offset + len(text)))
|
||||
offset += len(text) + len(sep)
|
||||
|
||||
return PageTracker(offsets), full
|
||||
|
||||
def test_exact_match_page1(self):
|
||||
pt, full = self._make_tracker()
|
||||
assert pt.find_page_for_text("Pancréatite", full) == 1
|
||||
|
||||
def test_exact_match_page2(self):
|
||||
pt, full = self._make_tracker()
|
||||
assert pt.find_page_for_text("Cholécystectomie", full) == 2
|
||||
|
||||
def test_exact_match_page3(self):
|
||||
pt, full = self._make_tracker()
|
||||
assert pt.find_page_for_text("Augmentin", full) == 3
|
||||
|
||||
def test_case_insensitive(self):
|
||||
pt, full = self._make_tracker()
|
||||
assert pt.find_page_for_text("pancréatite", full) == 1
|
||||
|
||||
def test_not_found(self):
|
||||
pt, full = self._make_tracker()
|
||||
assert pt.find_page_for_text("inexistant", full) is None
|
||||
|
||||
def test_empty_text(self):
|
||||
pt, full = self._make_tracker()
|
||||
assert pt.find_page_for_text("", full) is None
|
||||
|
||||
|
||||
class TestExtractExcerpt:
|
||||
def test_returns_excerpt(self):
|
||||
text = "A" * 200 + "Pancréatite aiguë" + "B" * 200
|
||||
pt = PageTracker([(0, len(text))])
|
||||
excerpt = pt.extract_excerpt("Pancréatite aiguë", text, context_chars=50)
|
||||
assert excerpt is not None
|
||||
assert "Pancréatite aiguë" in excerpt
|
||||
assert excerpt.startswith("...")
|
||||
assert excerpt.endswith("...")
|
||||
|
||||
def test_at_start(self):
|
||||
text = "Pancréatite aiguë biliaire " + "X" * 200
|
||||
pt = PageTracker([(0, len(text))])
|
||||
excerpt = pt.extract_excerpt("Pancréatite", text, context_chars=50)
|
||||
assert excerpt is not None
|
||||
assert not excerpt.startswith("...")
|
||||
|
||||
def test_not_found(self):
|
||||
text = "Texte sans rapport"
|
||||
pt = PageTracker([(0, len(text))])
|
||||
assert pt.extract_excerpt("inexistant", text) is None
|
||||
Reference in New Issue
Block a user