feat: parallélisation pipeline --workers N (ThreadPoolExecutor)
- Fix thread-safety FAISS index (Lock + double-check sur _loaded) - Fix thread-safety reranker (Lock + double-check sur _reranker_model) - main.py : flag --workers, extraction _process_group(), ThreadPoolExecutor - benchmark_quality.py : flag --workers, subprocess en parallèle - Validé sur 10 dossiers gold standard --workers 3 : 0 crash, codes identiques Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
27
src/main.py
27
src/main.py
@@ -399,6 +399,12 @@ def main(input_path: str | None = None) -> None:
|
||||
metavar="PATH",
|
||||
help="Fichier Excel de contrôle CPAM (enrichit les dossiers avec contre-argumentation)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--workers",
|
||||
type=int,
|
||||
default=1,
|
||||
help="Nombre de dossiers traités en parallèle (défaut: 1)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.build_dict:
|
||||
@@ -501,7 +507,8 @@ def main(input_path: str | None = None) -> None:
|
||||
|
||||
logger.info("Traitement de %d PDF(s)...", total)
|
||||
|
||||
for pdfs, subdir in groups:
|
||||
def _process_group(pdfs: list[Path], subdir: str | None) -> None:
|
||||
"""Traite un groupe de PDFs (un dossier patient)."""
|
||||
if subdir:
|
||||
logger.info("--- Dossier %s (%d PDFs) ---", subdir, len(pdfs))
|
||||
|
||||
@@ -633,6 +640,24 @@ def main(input_path: str | None = None) -> None:
|
||||
except Exception:
|
||||
logger.exception("Erreur écriture dossier fusionné %s", subdir)
|
||||
|
||||
# Exécution séquentielle ou parallèle selon --workers
|
||||
if args.workers > 1:
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
logger.info("Mode parallèle : %d workers", args.workers)
|
||||
with ThreadPoolExecutor(max_workers=args.workers) as executor:
|
||||
futures = {
|
||||
executor.submit(_process_group, pdfs, subdir): subdir
|
||||
for pdfs, subdir in groups
|
||||
}
|
||||
for future in as_completed(futures):
|
||||
try:
|
||||
future.result()
|
||||
except Exception:
|
||||
logger.exception("Erreur groupe %s", futures[future])
|
||||
else:
|
||||
for pdfs, subdir in groups:
|
||||
_process_group(pdfs, subdir)
|
||||
|
||||
logger.info("Terminé.")
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user