""" Dump runtime des attributs et comportement effectif du processor Qwen3-VL-8B-Instruct. Script jetable, à supprimer après usage. Usage : python tools/probe_qwen3vl_processor.py """ from transformers import AutoProcessor from PIL import Image import torch MODEL_ID = "Qwen/Qwen3-VL-8B-Instruct" FIXTURE = "data/training/live_sessions/bg_DESKTOP-58D5CAC_windows/shots/heartbeat_1773792436.png" print("=" * 70) print("DUMP PROCESSOR :", MODEL_ID) print("=" * 70) proc = AutoProcessor.from_pretrained(MODEL_ID) ip = proc.image_processor # Section 1 — Attributs bruts print("\n--- ATTRIBUTS BRUTS ---") print("class:", type(ip).__name__) print("size:", ip.size) print("patch_size:", ip.patch_size) print("merge_size:", ip.merge_size) for attr in ['min_pixels', 'max_pixels', 'temporal_patch_size', 'image_mean', 'image_std', 'do_resize', 'do_rescale', 'rescale_factor', 'do_normalize', 'do_convert_rgb']: print(f"{attr}:", getattr(ip, attr, '')) # Section 2 — Comportement effectif sur fixture print("\n--- COMPORTEMENT EFFECTIF SUR FIXTURE ---") img = Image.open(FIXTURE) print(f"Image source : {img.size} (W×H)") out = ip(images=img, return_tensors='pt') print(f"Keys retournées : {list(out.keys())}") print(f"pixel_values shape : {out['pixel_values'].shape}") print(f"image_grid_thw : {out.get('image_grid_thw')}") # Section 3 — Reconstruction des dimensions resize print("\n--- RECONSTRUCTION DIMS RESIZE ---") grid = out.get('image_grid_thw') if grid is not None: grid = grid[0].tolist() # [t, h, w] factor = ip.patch_size * ip.merge_size H_resized = grid[1] * factor W_resized = grid[2] * factor print(f"grid_thw : t={grid[0]}, h={grid[1]}, w={grid[2]}") print(f"factor calculé (patch_size × merge_size) : {factor}") print(f"Dims resize reconstruites : {W_resized}×{H_resized} (W×H)") print(f"Dims source : {img.size}") print(f"Ratio resize : {W_resized / img.size[0]:.4f} (W), " f"{H_resized / img.size[1]:.4f} (H)") # Section 4 — Test borne haute pour comprendre min/max_pixels print("\n--- TEST BORNE HAUTE (image grande) ---") big_img = Image.new('RGB', (4096, 2560), color='white') big_out = ip(images=big_img, return_tensors='pt') big_grid = big_out['image_grid_thw'][0].tolist() factor = ip.patch_size * ip.merge_size print(f"Image source : {big_img.size}") print(f"grid_thw : {big_grid}") print(f"Dims resize : {big_grid[2] * factor}×{big_grid[1] * factor}") print(f"Pixels totaux après resize : " f"{big_grid[1] * factor * big_grid[2] * factor}") # Section 5 — Test borne basse (image petite) print("\n--- TEST BORNE BASSE (image petite) ---") small_img = Image.new('RGB', (128, 64), color='white') small_out = ip(images=small_img, return_tensors='pt') small_grid = small_out['image_grid_thw'][0].tolist() print(f"Image source : {small_img.size}") print(f"grid_thw : {small_grid}") print(f"Dims resize : {small_grid[2] * factor}×{small_grid[1] * factor}") print("\n" + "=" * 70) print("FIN DUMP") print("=" * 70)