"""CLI : traite un PDF ou un répertoire de PDFs. Usage : python -m pipeline.cli [--out output/v2] """ import argparse import glob import sys import time from pathlib import Path from .extract import extract_dossier from .persist import save_result def main(): p = argparse.ArgumentParser(description="Pipeline OGC v1 (GLM-OCR)") p.add_argument("input", help="PDF unique ou répertoire contenant des PDFs") p.add_argument("--out", default="output/v2", help="Répertoire de sortie JSON") p.add_argument("--quiet", action="store_true") args = p.parse_args() input_path = Path(args.input) if input_path.is_dir(): pdfs = sorted(input_path.glob("*.pdf")) elif input_path.is_file() and input_path.suffix.lower() == ".pdf": pdfs = [input_path] else: # Globbing si chemin avec espaces/motifs pdfs = [Path(p) for p in sorted(glob.glob(str(input_path))) if p.lower().endswith(".pdf")] if not pdfs: print(f"Aucun PDF trouvé pour : {args.input}") return 1 print(f"{len(pdfs)} PDF(s) à traiter → {args.out}") t0 = time.time() for pdf in pdfs: t_pdf = time.time() try: result = extract_dossier(pdf, verbose=not args.quiet) out_path = save_result(result, args.out) print(f" ✓ {pdf.name} → {out_path} ({time.time()-t_pdf:.1f}s)") except Exception as e: print(f" ✗ {pdf.name} : {e}") import traceback traceback.print_exc() print(f"Terminé en {time.time()-t0:.1f}s") return 0 if __name__ == "__main__": sys.exit(main())