"""Test GLM-OCR 0.9B sur une page OGC isolée.""" import sys import time import torch from transformers import AutoProcessor, AutoModelForImageTextToText from pdf2image import convert_from_path import os PDF_PATH = sys.argv[1] if len(sys.argv) > 1 else "2018 CARC/OGC 7.pdf" PAGE_NUM = int(sys.argv[2]) if len(sys.argv) > 2 else 1 MODE = sys.argv[3] if len(sys.argv) > 3 else "text" # text | table | json OUTPUT_MD = sys.argv[4] if len(sys.argv) > 4 else "test_glm_result.md" # Prompt selon mode JSON_SCHEMA_OGC = """Extrais les informations de cette fiche médicale OGC et réponds en JSON strict : { "etablissement": "", "finess": "", "date_debut_controle": "", "n_ogc": "", "n_champ": "", "dates_sejour": "", "codage_etablissement": { "dp": "", "dr": "", "das": [{"code": "", "position": ""}] }, "codage_recodage": { "dp": "", "dr": "", "das": [{"code": "", "position": ""}] }, "actes_etablissement": [{"code": "", "position": ""}], "actes_recodage": [{"code": "", "position": ""}], "ghm_etablissement": "", "ghs_etablissement": "", "ghm_recodage": "", "ghs_recodage": "", "accord_desaccord": "", "praticien_conseil": "" }""" PROMPTS = { "text": "Text Recognition:", "table": "Table Recognition:", "json": JSON_SCHEMA_OGC, } prompt_text = PROMPTS[MODE] print(f"PDF: {PDF_PATH} page: {PAGE_NUM} mode: {MODE}") print(f"--- Chargement GLM-OCR 0.9B ---") t0 = time.time() MODEL_PATH = "zai-org/GLM-OCR" processor = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True) model = AutoModelForImageTextToText.from_pretrained( MODEL_PATH, torch_dtype="auto", device_map="auto", trust_remote_code=True, ) print(f"Modèle chargé en {time.time()-t0:.1f}s") print(f"VRAM utilisée : {torch.cuda.memory_allocated()/1e9:.2f} Go") print(f"--- Conversion PDF page {PAGE_NUM} ---") pages = convert_from_path(PDF_PATH, 300, first_page=PAGE_NUM, last_page=PAGE_NUM) tmp = f"/tmp/glm_page_{PAGE_NUM}.png" pages[0].save(tmp, "PNG") print(f"Image: {tmp} ({pages[0].size})") messages = [{ "role": "user", "content": [ {"type": "image", "url": tmp}, {"type": "text", "text": prompt_text}, ], }] print(f"--- Génération (mode={MODE}) ---") t0 = time.time() inputs = processor.apply_chat_template( messages, tokenize=True, add_generation_prompt=True, return_dict=True, return_tensors="pt", ).to(model.device) inputs.pop("token_type_ids", None) generated_ids = model.generate(**inputs, max_new_tokens=8192) output_text = processor.decode( generated_ids[0][inputs["input_ids"].shape[1]:], skip_special_tokens=False, ) print(f"Génération en {time.time()-t0:.1f}s ({len(output_text)} chars)") with open(OUTPUT_MD, "w", encoding="utf-8") as f: f.write(f"# {os.path.basename(PDF_PATH)} — page {PAGE_NUM} — mode={MODE}\n\n") f.write(output_text) print(f"--- Sauvé dans : {OUTPUT_MD} ---") print("\n--- Aperçu (1000 premiers chars) ---\n") print(output_text[:1000])