"""Test GOT-OCR2.0 sur une page OGC isolée.""" import sys import time import torch from transformers import AutoModel, AutoTokenizer from pdf2image import convert_from_path import os PDF_PATH = sys.argv[1] if len(sys.argv) > 1 else "2018 CARC/OGC 7.pdf" PAGE_NUM = int(sys.argv[2]) if len(sys.argv) > 2 else 1 OCR_TYPE = sys.argv[3] if len(sys.argv) > 3 else "format" # "ocr" ou "format" OUTPUT_MD = sys.argv[4] if len(sys.argv) > 4 else "test_got_result.md" print(f"PDF: {PDF_PATH} page: {PAGE_NUM} type: {OCR_TYPE}") print(f"--- Chargement GOT-OCR2.0 ---") t0 = time.time() model_name = "ucaslcl/GOT-OCR2_0" tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) model = AutoModel.from_pretrained( model_name, trust_remote_code=True, low_cpu_mem_usage=True, device_map="cuda", use_safetensors=True, pad_token_id=tokenizer.eos_token_id, ).eval().cuda() print(f"Modèle chargé en {time.time()-t0:.1f}s") print(f"--- Conversion PDF page {PAGE_NUM} ---") pages = convert_from_path(PDF_PATH, 300, first_page=PAGE_NUM, last_page=PAGE_NUM) tmp = f"/tmp/got_page_{PAGE_NUM}.png" pages[0].save(tmp, "PNG") print(f"Image: {tmp} ({pages[0].size})") print(f"--- OCR (type={OCR_TYPE}) ---") t0 = time.time() res = model.chat(tokenizer, tmp, ocr_type=OCR_TYPE) print(f"OCR terminé en {time.time()-t0:.1f}s ({len(res)} chars)") with open(OUTPUT_MD, "w", encoding="utf-8") as f: f.write(f"# {os.path.basename(PDF_PATH)} — page {PAGE_NUM} — type={OCR_TYPE}\n\n") f.write(res) print(f"--- Résultat sauvegardé : {OUTPUT_MD} ---") print("\n--- Aperçu (500 premiers chars) ---\n") print(res[:500])