diff --git a/core/grounding/smart_resize.py b/core/grounding/smart_resize.py new file mode 100644 index 000000000..99e65978f --- /dev/null +++ b/core/grounding/smart_resize.py @@ -0,0 +1,77 @@ +""" +Smart resize officiel Qwen3-VL (algorithme commun Qwen2-VL/Qwen3-VL pour images). + +Source de référence : transformers.models.qwen2_vl.image_processing_qwen2_vl.smart_resize +(transformers 4.57.3). Qwen3-VL utilise Qwen2VLImageProcessor pour les images via +Qwen3VLProcessor.image_processor_class — la formule est donc commune Qwen2-VL/Qwen3-VL +sur le pipeline image. + +Conditions garanties par smart_resize : +1. height et width retournés divisibles par `factor` (par défaut 28). +2. Total pixels dans l'intervalle [min_pixels, max_pixels]. +3. Aspect ratio conservé au plus près. + +Module image-only. Pour traitement vidéo Qwen3-VL (factor=32, autres bornes), +module dédié à créer si besoin futur. +""" + +# DETTE-007 — Trois implémentations smart_resize coexistent dans le repo +# (core/grounding/server.py:15, core/grounding/infigui_worker.py:99, ce module). +# Unification post-démo Kerella. + +import math + + +FACTOR_DEFAULT = 28 +MIN_PIXELS_DEFAULT = 56 * 56 # 3136 +MAX_PIXELS_DEFAULT = 14 * 14 * 4 * 1280 # 1_003_520 +MAX_RATIO_DEFAULT = 200 + + +def _round_by_factor(number: int, factor: int) -> int: + """Closest integer to `number` divisible by `factor`.""" + return round(number / factor) * factor + + +def _floor_by_factor(number: int, factor: int) -> int: + """Largest integer ≤ `number` divisible by `factor`.""" + return math.floor(number / factor) * factor + + +def _ceil_by_factor(number: int, factor: int) -> int: + """Smallest integer ≥ `number` divisible by `factor`.""" + return math.ceil(number / factor) * factor + + +def smart_resize( + height: int, + width: int, + factor: int = FACTOR_DEFAULT, + min_pixels: int = MIN_PIXELS_DEFAULT, + max_pixels: int = MAX_PIXELS_DEFAULT, +) -> tuple[int, int]: + """Rescale (height, width) to satisfy the three conditions of the module docstring. + + Raises: + ValueError: if max(height, width) / min(height, width) > MAX_RATIO_DEFAULT + (aspect ratio out of supported domain). + + Returns: + (resized_height, resized_width). + """ + if max(height, width) / min(height, width) > MAX_RATIO_DEFAULT: + raise ValueError( + f"absolute aspect ratio must be smaller than {MAX_RATIO_DEFAULT}, " + f"got {max(height, width) / min(height, width)}" + ) + h_bar = round(height / factor) * factor + w_bar = round(width / factor) * factor + if h_bar * w_bar > max_pixels: + beta = math.sqrt((height * width) / max_pixels) + h_bar = max(factor, math.floor(height / beta / factor) * factor) + w_bar = max(factor, math.floor(width / beta / factor) * factor) + elif h_bar * w_bar < min_pixels: + beta = math.sqrt(min_pixels / (height * width)) + h_bar = math.ceil(height * beta / factor) * factor + w_bar = math.ceil(width * beta / factor) * factor + return h_bar, w_bar diff --git a/tests/unit/test_smart_resize.py b/tests/unit/test_smart_resize.py new file mode 100644 index 000000000..2968dd0db --- /dev/null +++ b/tests/unit/test_smart_resize.py @@ -0,0 +1,234 @@ +""" +Tests unitaires pour core.grounding.smart_resize. + +Référence : transformers.models.qwen2_vl.image_processing_qwen2_vl.smart_resize +(transformers 4.57.3). Module image-only (pas de vidéo). + +Plan de tests : +- A. Constantes module-level (3 cas) +- B. _round_by_factor (8 cas — focus banker's rounding) +- C. _floor_by_factor (4 cas) +- D. _ceil_by_factor (4 cas) +- E. smart_resize public (11 cas, incluant golden bench 8 mai et E.11 limite) +- F. smart_resize compat server.py via paramètres explicites (2 cas) + +Total : 32 cas. +""" + +import pytest + +from core.grounding.smart_resize import ( + FACTOR_DEFAULT, + MAX_PIXELS_DEFAULT, + MAX_RATIO_DEFAULT, + MIN_PIXELS_DEFAULT, + _ceil_by_factor, + _floor_by_factor, + _round_by_factor, + smart_resize, +) + + +# ===================================================================== +# A. Constantes module-level +# ===================================================================== + + +class TestConstants: + def test_factor_default_is_28(self): + assert FACTOR_DEFAULT == 28 + + def test_min_pixels_default_is_3136(self): + # 56 * 56 — défaut transformers Qwen2VLImageProcessor + assert MIN_PIXELS_DEFAULT == 3136 + + def test_max_pixels_default_is_1_003_520(self): + # 14 * 14 * 4 * 1280 — défaut transformers Qwen2VLImageProcessor + # (utilisé par Qwen3VLProcessor pour les images) + assert MAX_PIXELS_DEFAULT == 1_003_520 + + +# ===================================================================== +# B. _round_by_factor — focus banker's rounding (round-half-to-even) +# ===================================================================== + + +class TestRoundByFactor: + def test_zero(self): + assert _round_by_factor(0, 28) == 0 + + def test_half_below_factor_rounds_to_zero(self): + # 14/28 = 0.5 → banker round vers pair (0) + assert _round_by_factor(14, 28) == 0 + + def test_just_above_half_rounds_up(self): + # 15/28 ≈ 0.535 → 1 → 28 + assert _round_by_factor(15, 28) == 28 + + def test_exact_factor(self): + assert _round_by_factor(28, 28) == 28 + + def test_one_and_half_factor_banker(self): + # 42/28 = 1.5 → banker round vers pair (2) → 56 + assert _round_by_factor(42, 28) == 56 + + def test_two_and_half_factor_banker(self): + # 70/28 = 2.5 → banker round vers pair (2) → 56 + assert _round_by_factor(70, 28) == 56 + + def test_three_and_half_factor_banker(self): + # 98/28 = 3.5 → banker round vers pair (4) → 112 + assert _round_by_factor(98, 28) == 112 + + def test_fourteen_and_half_factor_banker(self): + # 406/28 = 14.5 → banker round vers pair (14) → 392 + # Piège classique du round Python — fige le comportement. + assert _round_by_factor(406, 28) == 392 + + +# ===================================================================== +# C. _floor_by_factor +# ===================================================================== + + +class TestFloorByFactor: + def test_zero(self): + assert _floor_by_factor(0, 28) == 0 + + def test_below_factor_floors_to_zero(self): + assert _floor_by_factor(27, 28) == 0 + + def test_exact_factor(self): + assert _floor_by_factor(28, 28) == 28 + + def test_just_below_two_factor(self): + assert _floor_by_factor(55, 28) == 28 + + +# ===================================================================== +# D. _ceil_by_factor +# ===================================================================== + + +class TestCeilByFactor: + def test_zero(self): + assert _ceil_by_factor(0, 28) == 0 + + def test_one_ceils_to_factor(self): + assert _ceil_by_factor(1, 28) == 28 + + def test_exact_factor(self): + assert _ceil_by_factor(28, 28) == 28 + + def test_just_above_factor(self): + assert _ceil_by_factor(29, 28) == 56 + + +# ===================================================================== +# E. smart_resize — API publique +# ===================================================================== + + +class TestSmartResizePublic: + def test_idempotence_square(self): + # Image déjà multiple de 28, dans bornes : retour identique. + assert smart_resize(280, 280) == (280, 280) + + def test_idempotence_rectangle(self): + # 560*1120 = 627_200 ∈ [3136, 1_003_520] et tous deux multiples de 28. + assert smart_resize(560, 1120) == (560, 1120) + + def test_round_down(self): + # 290/28 ≈ 10.357 → round = 10 → 280 + assert smart_resize(290, 290) == (280, 280) + + def test_round_up(self): + # 295/28 ≈ 10.535 → round = 11 → 308 + assert smart_resize(295, 295) == (308, 308) + + def test_golden_bench_8_mai(self): + # Fixture bench du 8 mai : 2560×1600 (heartbeat_1773792436.png). + # h=1600, w=2560, defaults officiels Qwen3-VL image (max=1_003_520). + # h_bar_init=1596, w_bar_init=2548 ; produit=4_066_608 > max + # → resize down via beta = sqrt(4_096_000/1_003_520) ≈ 2.0203 + # → h_bar=floor(1600/beta/28)*28 = 28*28 = 784 + # → w_bar=floor(2560/beta/28)*28 = 45*28 = 1260 + # → 784*1260 = 987_840 ≤ 1_003_520 ✓ + assert smart_resize(1600, 2560) == (784, 1260) + + def test_clamp_min_pixels(self): + # 28*28 = 784 < 3136 → resize up. + h, w = smart_resize(28, 28) + assert h * w >= MIN_PIXELS_DEFAULT + assert h % FACTOR_DEFAULT == 0 + assert w % FACTOR_DEFAULT == 0 + + def test_clamp_max_pixels(self): + # 8000*8000 = 64M >> 1_003_520 → resize down. + h, w = smart_resize(8000, 8000) + assert h * w <= MAX_PIXELS_DEFAULT + assert h % FACTOR_DEFAULT == 0 + assert w % FACTOR_DEFAULT == 0 + + def test_extreme_ratio_raises(self): + # ratio = 5601/28 ≈ 200.04 > 200 → ValueError. + with pytest.raises(ValueError): + smart_resize(28, 5601) + + def test_ratio_at_limit_passes(self): + # ratio = 5600/28 = 200 exactement → ne lève pas (limite incluse). + result = smart_resize(28, 5600) + assert isinstance(result, tuple) + + def test_return_type(self): + result = smart_resize(560, 1120) + assert isinstance(result, tuple) + assert len(result) == 2 + assert all(isinstance(x, int) for x in result) + + def test_e11_very_small_image_clamped_up_to_min_pixels(self): + """Très petite image : comportement défini par la formule officielle. + + Hypothèse initiale (lors de la conception du module 2026-05-09) : + images avec h*w < min_pixels ET h= MIN_PIXELS_DEFAULT + assert h_bar % FACTOR_DEFAULT == 0 + assert w_bar % FACTOR_DEFAULT == 0 + + +# ===================================================================== +# F. smart_resize — compat server.py via paramètres explicites +# ===================================================================== + + +class TestSmartResizeServerCompat: + def test_bench_8_mai_with_server_bounds(self): + # Avec defaults server.py prod : min=78400, max=4_390_400. + # h_bar_init=1596, w_bar_init=2548 ; produit=4_066_608 ≤ 4_390_400 + # → pas de rescale → (1596, 2548) + assert smart_resize( + 1600, 2560, min_pixels=78_400, max_pixels=4_390_400 + ) == (1596, 2548) + + def test_large_image_with_server_bounds(self): + # Avec defaults server.py serrés (max=4_390_400) : 2560×2560 = 6.55M > max. + # → resize down sous le clamp serré. + h, w = smart_resize( + 2560, 2560, min_pixels=78_400, max_pixels=4_390_400 + ) + assert h * w <= 4_390_400 + assert h % FACTOR_DEFAULT == 0 + assert w % FACTOR_DEFAULT == 0