"""Test Qwen2.5-VL-7B sur les 4 dossiers de référence.""" import sys import time import torch from pathlib import Path from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration from qwen_vl_utils import process_vision_info from pipeline.ingest import pdf_to_images MODEL = "Qwen/Qwen2.5-VL-7B-Instruct" PROMPT = """Lis cette fiche médicale OGC (contrôle T2A Assurance Maladie) et renvoie STRICTEMENT le JSON suivant, sans commentaire ni markdown. Les codes CIM-10 sont au format lettre + 2 à 4 chiffres (ex: K650, T814). Les codes CCAM sont au format 4 lettres + 3 chiffres (ex: EBFA012). Les codes GHM sont au format 2 chiffres + lettre + 3 chiffres (ex: 11M122). Les codes GHS sont des nombres à 3-5 chiffres (ex: 4323). Si un champ est illisible, laisse une chaîne vide. Ne devine pas. { "n_ogc": "", "codage_etab": {"dp": "", "dp_libelle": "", "dr": "", "das": [{"code": "", "position": ""}]}, "codage_reco": {"dp": "", "dr": "", "das": [{"code": "", "position": ""}]}, "ghm_etab": "", "ghs_etab": "", "ghm_reco": "", "ghs_reco": "", "praticien_conseil": "" }""" CASES = ["2018 CARC/OGC 7.pdf", "2018 CARC/OGC 27.pdf", "2018 CARC/OGC 55.pdf", "2018 CARC/OGC 86.pdf"] def main(): print(f"--- Chargement {MODEL} ---") t0 = time.time() # max_pixels limite les patches visuels ; Qwen2.5-VL par défaut pousse # jusqu'à 12M pixels ce qui explose la VRAM sur RTX 5070 avec 12 Go. # On cible ~1.25M pixels (env. 1120x1120) → ~1600 tokens visuels. processor = AutoProcessor.from_pretrained( MODEL, min_pixels=256 * 28 * 28, max_pixels=1280 * 28 * 28, ) model = Qwen2_5_VLForConditionalGeneration.from_pretrained( MODEL, torch_dtype=torch.bfloat16, device_map="auto", ) print(f"Modèle chargé en {time.time()-t0:.1f}s") print(f"VRAM = {torch.cuda.memory_allocated()/1e9:.2f} Go") Path("test_qwen_out").mkdir(exist_ok=True) for pdf in CASES: name = Path(pdf).stem img = str(pdf_to_images(pdf)[0]) messages = [{ "role": "user", "content": [ {"type": "image", "image": img}, {"type": "text", "text": PROMPT}, ], }] text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) image_inputs, video_inputs = process_vision_info(messages) inputs = processor( text=[text], images=image_inputs, videos=video_inputs, padding=True, return_tensors="pt", ).to(model.device) t0 = time.time() with torch.no_grad(): gen = model.generate(**inputs, max_new_tokens=2048) out_ids = gen[:, inputs.input_ids.shape[1]:] output = processor.batch_decode(out_ids, skip_special_tokens=True)[0] elapsed = time.time() - t0 print(f"\n=== {name} ({elapsed:.1f}s) ===") print(output[:1200]) (Path("test_qwen_out") / f"{name}.txt").write_text(output) if __name__ == "__main__": main()