feat: parallélisation pipeline --workers N (ThreadPoolExecutor)

- Fix thread-safety FAISS index (Lock + double-check sur _loaded)
- Fix thread-safety reranker (Lock + double-check sur _reranker_model)
- main.py : flag --workers, extraction _process_group(), ThreadPoolExecutor
- benchmark_quality.py : flag --workers, subprocess en parallèle
- Validé sur 10 dossiers gold standard --workers 3 : 0 crash, codes identiques

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-02-20 01:30:51 +01:00
parent 0b94299975
commit 5cf7d74fa3
4 changed files with 109 additions and 37 deletions

View File

@@ -28,6 +28,7 @@ _embed_failed = False # Sentinelle pour éviter les retries infinis
# Singleton pour le cross-encoder de re-ranking (CPU uniquement)
_reranker_model = None
_reranker_lock = threading.Lock()
# Score minimum de similarité FAISS pour retenir un résultat
_MIN_SCORE = 0.3
@@ -84,12 +85,17 @@ def _get_embed_model():
def _get_reranker():
"""Charge le cross-encoder de re-ranking (singleton, CPU uniquement).
"""Charge le cross-encoder de re-ranking (singleton thread-safe, CPU uniquement).
Forcé sur CPU pour ne pas interférer avec Ollama sur GPU.
"""
global _reranker_model
if _reranker_model is None:
if _reranker_model is not None:
return _reranker_model
with _reranker_lock:
# Double-check après acquisition du lock
if _reranker_model is not None:
return _reranker_model
from sentence_transformers import CrossEncoder
logger.info("Chargement du cross-encoder de re-ranking (cpu)...")
_reranker_model = CrossEncoder(RERANKER_MODEL, device="cpu")