feat: mode hybride Ollama — gemma3:27b pour CPAM, 12b pour codage
Le pipeline utilise désormais gemma3:12b (rapide) pour le codage CIM-10 et gemma3:27b (meilleur raisonnement) pour la contre-argumentation CPAM. Configurable via OLLAMA_MODEL_CPAM et OLLAMA_TIMEOUT_CPAM. Inclut aussi : traçabilité source/page DAS, niveaux CMA ATIH, sévérité, page tracker PDF, améliorations fusion et filtres DAS. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
11
src/main.py
11
src/main.py
@@ -14,7 +14,7 @@ from .config import ANONYMIZED_DIR, INPUT_DIR, OUTPUT_DIR, REPORTS_DIR, STRUCTUR
|
||||
from .extraction.document_classifier import classify
|
||||
from .extraction.crh_parser import parse_crh
|
||||
from .extraction.document_splitter import split_documents
|
||||
from .extraction.pdf_extractor import extract_text
|
||||
from .extraction.pdf_extractor import extract_text, extract_text_with_pages
|
||||
from .extraction.trackare_parser import parse_trackare
|
||||
from .medical.cim10_extractor import extract_medical_info
|
||||
from .medical.ghm import estimate_ghm
|
||||
@@ -38,8 +38,8 @@ def process_pdf(pdf_path: Path) -> list[tuple[str, DossierMedical, Anonymization
|
||||
t0 = time.time()
|
||||
logger.info("Traitement de %s", pdf_path.name)
|
||||
|
||||
# 1. Extraction texte
|
||||
raw_text = extract_text(pdf_path)
|
||||
# 1. Extraction texte avec pages
|
||||
raw_text, page_tracker = extract_text_with_pages(pdf_path)
|
||||
logger.info(" Texte extrait : %d caractères", len(raw_text))
|
||||
|
||||
# 2. Classification
|
||||
@@ -82,7 +82,10 @@ def process_pdf(pdf_path: Path) -> list[tuple[str, DossierMedical, Anonymization
|
||||
edsnlp_result = _run_edsnlp(anonymized_text)
|
||||
|
||||
# 7. Extraction médicale CIM-10
|
||||
dossier = extract_medical_info(parsed, anonymized_text, edsnlp_result, use_rag=_use_rag)
|
||||
dossier = extract_medical_info(
|
||||
parsed, anonymized_text, edsnlp_result, use_rag=_use_rag,
|
||||
page_tracker=page_tracker, raw_text=raw_text,
|
||||
)
|
||||
dossier.source_file = pdf_path.name
|
||||
dossier.document_type = doc_type
|
||||
logger.info(" DP%s : %s", part_label, dossier.diagnostic_principal)
|
||||
|
||||
Reference in New Issue
Block a user