feat(grounding): module smart_resize officiel Qwen3-VL
Module pur core/grounding/smart_resize.py implémentant la formule
smart_resize officielle (transformers.qwen2_vl.image_processing_qwen2_vl,
utilisée par Qwen3VLProcessor pour les images via wrap Qwen2VLImageProcessor).
Helpers exposés : _round_by_factor, _floor_by_factor, _ceil_by_factor.
Constantes : FACTOR_DEFAULT=28, MIN_PIXELS_DEFAULT=3136,
MAX_PIXELS_DEFAULT=1_003_520, MAX_RATIO_DEFAULT=200.
Tests : tests/unit/test_smart_resize.py — 32 cas, 100% coverage sur le
module (mesure via coverage API directe, pytest-cov bloqué par bug cv2
préexistant tracé dans DETTE-011).
refs DETTE-006 (étape 1/5 du fix smart_resize)
refs DETTE-007 (création de la 3ème implémentation, à unifier post-démo)
refs DETTE-010 (vérif preprocessor_config.json checkpoint Qwen3-VL-8B
bloquante avant Étape 2)
refs DETTE-011 (bug cv2 contourné pour mesure coverage)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
77
core/grounding/smart_resize.py
Normal file
77
core/grounding/smart_resize.py
Normal file
@@ -0,0 +1,77 @@
|
||||
"""
|
||||
Smart resize officiel Qwen3-VL (algorithme commun Qwen2-VL/Qwen3-VL pour images).
|
||||
|
||||
Source de référence : transformers.models.qwen2_vl.image_processing_qwen2_vl.smart_resize
|
||||
(transformers 4.57.3). Qwen3-VL utilise Qwen2VLImageProcessor pour les images via
|
||||
Qwen3VLProcessor.image_processor_class — la formule est donc commune Qwen2-VL/Qwen3-VL
|
||||
sur le pipeline image.
|
||||
|
||||
Conditions garanties par smart_resize :
|
||||
1. height et width retournés divisibles par `factor` (par défaut 28).
|
||||
2. Total pixels dans l'intervalle [min_pixels, max_pixels].
|
||||
3. Aspect ratio conservé au plus près.
|
||||
|
||||
Module image-only. Pour traitement vidéo Qwen3-VL (factor=32, autres bornes),
|
||||
module dédié à créer si besoin futur.
|
||||
"""
|
||||
|
||||
# DETTE-007 — Trois implémentations smart_resize coexistent dans le repo
|
||||
# (core/grounding/server.py:15, core/grounding/infigui_worker.py:99, ce module).
|
||||
# Unification post-démo Kerella.
|
||||
|
||||
import math
|
||||
|
||||
|
||||
FACTOR_DEFAULT = 28
|
||||
MIN_PIXELS_DEFAULT = 56 * 56 # 3136
|
||||
MAX_PIXELS_DEFAULT = 14 * 14 * 4 * 1280 # 1_003_520
|
||||
MAX_RATIO_DEFAULT = 200
|
||||
|
||||
|
||||
def _round_by_factor(number: int, factor: int) -> int:
|
||||
"""Closest integer to `number` divisible by `factor`."""
|
||||
return round(number / factor) * factor
|
||||
|
||||
|
||||
def _floor_by_factor(number: int, factor: int) -> int:
|
||||
"""Largest integer ≤ `number` divisible by `factor`."""
|
||||
return math.floor(number / factor) * factor
|
||||
|
||||
|
||||
def _ceil_by_factor(number: int, factor: int) -> int:
|
||||
"""Smallest integer ≥ `number` divisible by `factor`."""
|
||||
return math.ceil(number / factor) * factor
|
||||
|
||||
|
||||
def smart_resize(
|
||||
height: int,
|
||||
width: int,
|
||||
factor: int = FACTOR_DEFAULT,
|
||||
min_pixels: int = MIN_PIXELS_DEFAULT,
|
||||
max_pixels: int = MAX_PIXELS_DEFAULT,
|
||||
) -> tuple[int, int]:
|
||||
"""Rescale (height, width) to satisfy the three conditions of the module docstring.
|
||||
|
||||
Raises:
|
||||
ValueError: if max(height, width) / min(height, width) > MAX_RATIO_DEFAULT
|
||||
(aspect ratio out of supported domain).
|
||||
|
||||
Returns:
|
||||
(resized_height, resized_width).
|
||||
"""
|
||||
if max(height, width) / min(height, width) > MAX_RATIO_DEFAULT:
|
||||
raise ValueError(
|
||||
f"absolute aspect ratio must be smaller than {MAX_RATIO_DEFAULT}, "
|
||||
f"got {max(height, width) / min(height, width)}"
|
||||
)
|
||||
h_bar = round(height / factor) * factor
|
||||
w_bar = round(width / factor) * factor
|
||||
if h_bar * w_bar > max_pixels:
|
||||
beta = math.sqrt((height * width) / max_pixels)
|
||||
h_bar = max(factor, math.floor(height / beta / factor) * factor)
|
||||
w_bar = max(factor, math.floor(width / beta / factor) * factor)
|
||||
elif h_bar * w_bar < min_pixels:
|
||||
beta = math.sqrt(min_pixels / (height * width))
|
||||
h_bar = math.ceil(height * beta / factor) * factor
|
||||
w_bar = math.ceil(width * beta / factor) * factor
|
||||
return h_bar, w_bar
|
||||
234
tests/unit/test_smart_resize.py
Normal file
234
tests/unit/test_smart_resize.py
Normal file
@@ -0,0 +1,234 @@
|
||||
"""
|
||||
Tests unitaires pour core.grounding.smart_resize.
|
||||
|
||||
Référence : transformers.models.qwen2_vl.image_processing_qwen2_vl.smart_resize
|
||||
(transformers 4.57.3). Module image-only (pas de vidéo).
|
||||
|
||||
Plan de tests :
|
||||
- A. Constantes module-level (3 cas)
|
||||
- B. _round_by_factor (8 cas — focus banker's rounding)
|
||||
- C. _floor_by_factor (4 cas)
|
||||
- D. _ceil_by_factor (4 cas)
|
||||
- E. smart_resize public (11 cas, incluant golden bench 8 mai et E.11 limite)
|
||||
- F. smart_resize compat server.py via paramètres explicites (2 cas)
|
||||
|
||||
Total : 32 cas.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from core.grounding.smart_resize import (
|
||||
FACTOR_DEFAULT,
|
||||
MAX_PIXELS_DEFAULT,
|
||||
MAX_RATIO_DEFAULT,
|
||||
MIN_PIXELS_DEFAULT,
|
||||
_ceil_by_factor,
|
||||
_floor_by_factor,
|
||||
_round_by_factor,
|
||||
smart_resize,
|
||||
)
|
||||
|
||||
|
||||
# =====================================================================
|
||||
# A. Constantes module-level
|
||||
# =====================================================================
|
||||
|
||||
|
||||
class TestConstants:
|
||||
def test_factor_default_is_28(self):
|
||||
assert FACTOR_DEFAULT == 28
|
||||
|
||||
def test_min_pixels_default_is_3136(self):
|
||||
# 56 * 56 — défaut transformers Qwen2VLImageProcessor
|
||||
assert MIN_PIXELS_DEFAULT == 3136
|
||||
|
||||
def test_max_pixels_default_is_1_003_520(self):
|
||||
# 14 * 14 * 4 * 1280 — défaut transformers Qwen2VLImageProcessor
|
||||
# (utilisé par Qwen3VLProcessor pour les images)
|
||||
assert MAX_PIXELS_DEFAULT == 1_003_520
|
||||
|
||||
|
||||
# =====================================================================
|
||||
# B. _round_by_factor — focus banker's rounding (round-half-to-even)
|
||||
# =====================================================================
|
||||
|
||||
|
||||
class TestRoundByFactor:
|
||||
def test_zero(self):
|
||||
assert _round_by_factor(0, 28) == 0
|
||||
|
||||
def test_half_below_factor_rounds_to_zero(self):
|
||||
# 14/28 = 0.5 → banker round vers pair (0)
|
||||
assert _round_by_factor(14, 28) == 0
|
||||
|
||||
def test_just_above_half_rounds_up(self):
|
||||
# 15/28 ≈ 0.535 → 1 → 28
|
||||
assert _round_by_factor(15, 28) == 28
|
||||
|
||||
def test_exact_factor(self):
|
||||
assert _round_by_factor(28, 28) == 28
|
||||
|
||||
def test_one_and_half_factor_banker(self):
|
||||
# 42/28 = 1.5 → banker round vers pair (2) → 56
|
||||
assert _round_by_factor(42, 28) == 56
|
||||
|
||||
def test_two_and_half_factor_banker(self):
|
||||
# 70/28 = 2.5 → banker round vers pair (2) → 56
|
||||
assert _round_by_factor(70, 28) == 56
|
||||
|
||||
def test_three_and_half_factor_banker(self):
|
||||
# 98/28 = 3.5 → banker round vers pair (4) → 112
|
||||
assert _round_by_factor(98, 28) == 112
|
||||
|
||||
def test_fourteen_and_half_factor_banker(self):
|
||||
# 406/28 = 14.5 → banker round vers pair (14) → 392
|
||||
# Piège classique du round Python — fige le comportement.
|
||||
assert _round_by_factor(406, 28) == 392
|
||||
|
||||
|
||||
# =====================================================================
|
||||
# C. _floor_by_factor
|
||||
# =====================================================================
|
||||
|
||||
|
||||
class TestFloorByFactor:
|
||||
def test_zero(self):
|
||||
assert _floor_by_factor(0, 28) == 0
|
||||
|
||||
def test_below_factor_floors_to_zero(self):
|
||||
assert _floor_by_factor(27, 28) == 0
|
||||
|
||||
def test_exact_factor(self):
|
||||
assert _floor_by_factor(28, 28) == 28
|
||||
|
||||
def test_just_below_two_factor(self):
|
||||
assert _floor_by_factor(55, 28) == 28
|
||||
|
||||
|
||||
# =====================================================================
|
||||
# D. _ceil_by_factor
|
||||
# =====================================================================
|
||||
|
||||
|
||||
class TestCeilByFactor:
|
||||
def test_zero(self):
|
||||
assert _ceil_by_factor(0, 28) == 0
|
||||
|
||||
def test_one_ceils_to_factor(self):
|
||||
assert _ceil_by_factor(1, 28) == 28
|
||||
|
||||
def test_exact_factor(self):
|
||||
assert _ceil_by_factor(28, 28) == 28
|
||||
|
||||
def test_just_above_factor(self):
|
||||
assert _ceil_by_factor(29, 28) == 56
|
||||
|
||||
|
||||
# =====================================================================
|
||||
# E. smart_resize — API publique
|
||||
# =====================================================================
|
||||
|
||||
|
||||
class TestSmartResizePublic:
|
||||
def test_idempotence_square(self):
|
||||
# Image déjà multiple de 28, dans bornes : retour identique.
|
||||
assert smart_resize(280, 280) == (280, 280)
|
||||
|
||||
def test_idempotence_rectangle(self):
|
||||
# 560*1120 = 627_200 ∈ [3136, 1_003_520] et tous deux multiples de 28.
|
||||
assert smart_resize(560, 1120) == (560, 1120)
|
||||
|
||||
def test_round_down(self):
|
||||
# 290/28 ≈ 10.357 → round = 10 → 280
|
||||
assert smart_resize(290, 290) == (280, 280)
|
||||
|
||||
def test_round_up(self):
|
||||
# 295/28 ≈ 10.535 → round = 11 → 308
|
||||
assert smart_resize(295, 295) == (308, 308)
|
||||
|
||||
def test_golden_bench_8_mai(self):
|
||||
# Fixture bench du 8 mai : 2560×1600 (heartbeat_1773792436.png).
|
||||
# h=1600, w=2560, defaults officiels Qwen3-VL image (max=1_003_520).
|
||||
# h_bar_init=1596, w_bar_init=2548 ; produit=4_066_608 > max
|
||||
# → resize down via beta = sqrt(4_096_000/1_003_520) ≈ 2.0203
|
||||
# → h_bar=floor(1600/beta/28)*28 = 28*28 = 784
|
||||
# → w_bar=floor(2560/beta/28)*28 = 45*28 = 1260
|
||||
# → 784*1260 = 987_840 ≤ 1_003_520 ✓
|
||||
assert smart_resize(1600, 2560) == (784, 1260)
|
||||
|
||||
def test_clamp_min_pixels(self):
|
||||
# 28*28 = 784 < 3136 → resize up.
|
||||
h, w = smart_resize(28, 28)
|
||||
assert h * w >= MIN_PIXELS_DEFAULT
|
||||
assert h % FACTOR_DEFAULT == 0
|
||||
assert w % FACTOR_DEFAULT == 0
|
||||
|
||||
def test_clamp_max_pixels(self):
|
||||
# 8000*8000 = 64M >> 1_003_520 → resize down.
|
||||
h, w = smart_resize(8000, 8000)
|
||||
assert h * w <= MAX_PIXELS_DEFAULT
|
||||
assert h % FACTOR_DEFAULT == 0
|
||||
assert w % FACTOR_DEFAULT == 0
|
||||
|
||||
def test_extreme_ratio_raises(self):
|
||||
# ratio = 5601/28 ≈ 200.04 > 200 → ValueError.
|
||||
with pytest.raises(ValueError):
|
||||
smart_resize(28, 5601)
|
||||
|
||||
def test_ratio_at_limit_passes(self):
|
||||
# ratio = 5600/28 = 200 exactement → ne lève pas (limite incluse).
|
||||
result = smart_resize(28, 5600)
|
||||
assert isinstance(result, tuple)
|
||||
|
||||
def test_return_type(self):
|
||||
result = smart_resize(560, 1120)
|
||||
assert isinstance(result, tuple)
|
||||
assert len(result) == 2
|
||||
assert all(isinstance(x, int) for x in result)
|
||||
|
||||
def test_e11_very_small_image_clamped_up_to_min_pixels(self):
|
||||
"""Très petite image : comportement défini par la formule officielle.
|
||||
|
||||
Hypothèse initiale (lors de la conception du module 2026-05-09) :
|
||||
images avec h*w < min_pixels ET h<factor pourraient produire
|
||||
ZeroDivisionError ou résultat indéfini (h_bar=0 dans step 2 init).
|
||||
|
||||
Vérification TDD : la formule officielle gère proprement ce cas via
|
||||
la branche `< min_pixels` qui rescale upward avec beta = sqrt(min/h*w).
|
||||
Pour (10, 10) : beta=5.6, h_bar = ceil(10 * 5.6 / 28) * 28 = 56.
|
||||
|
||||
Ce test fige le comportement réel et documente que l'hypothèse
|
||||
initiale était trop défensive. Aucune limite mathématique connue
|
||||
sur les petites images dans le domaine factor=28, min_pixels=3136.
|
||||
"""
|
||||
result = smart_resize(10, 10)
|
||||
assert result == (56, 56)
|
||||
h_bar, w_bar = result
|
||||
assert h_bar * w_bar >= MIN_PIXELS_DEFAULT
|
||||
assert h_bar % FACTOR_DEFAULT == 0
|
||||
assert w_bar % FACTOR_DEFAULT == 0
|
||||
|
||||
|
||||
# =====================================================================
|
||||
# F. smart_resize — compat server.py via paramètres explicites
|
||||
# =====================================================================
|
||||
|
||||
|
||||
class TestSmartResizeServerCompat:
|
||||
def test_bench_8_mai_with_server_bounds(self):
|
||||
# Avec defaults server.py prod : min=78400, max=4_390_400.
|
||||
# h_bar_init=1596, w_bar_init=2548 ; produit=4_066_608 ≤ 4_390_400
|
||||
# → pas de rescale → (1596, 2548)
|
||||
assert smart_resize(
|
||||
1600, 2560, min_pixels=78_400, max_pixels=4_390_400
|
||||
) == (1596, 2548)
|
||||
|
||||
def test_large_image_with_server_bounds(self):
|
||||
# Avec defaults server.py serrés (max=4_390_400) : 2560×2560 = 6.55M > max.
|
||||
# → resize down sous le clamp serré.
|
||||
h, w = smart_resize(
|
||||
2560, 2560, min_pixels=78_400, max_pixels=4_390_400
|
||||
)
|
||||
assert h * w <= 4_390_400
|
||||
assert h % FACTOR_DEFAULT == 0
|
||||
assert w % FACTOR_DEFAULT == 0
|
||||
Reference in New Issue
Block a user