Architecture grounding complète :
- core/grounding/server.py : serveur FastAPI (port 8200) avec UI-TARS-1.5-7B en 4-bit NF4
Process séparé avec son propre contexte CUDA (résout le crash Flask/CUDA)
- core/grounding/pipeline.py : orchestrateur cascade template→OCR→UI-TARS→static
- core/grounding/template_matcher.py : TemplateMatcher centralisé (remplace 5 copies)
- core/grounding/ui_tars_grounder.py : client HTTP vers le serveur de grounding
- core/grounding/target.py : GroundingTarget + GroundingResult
ORA modifié :
- _act_click() : capture unique de l'écran envoyée au serveur de grounding
- Pre-check VLM skippé pour ui_tars (redondant, et Ollama n'a plus de VRAM)
- verify_level='none' par défaut (vérification titre OCR prévue en Phase 2)
- Détection réponses négatives UI-TARS ("I don't see it" → fallback OCR)
Nettoyage :
- 9 fichiers morts archivés dans _archive/ (~6300 lignes supprimées)
- 21 tests ajoutés pour TemplateMatcher
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
312 lines
12 KiB
Python
312 lines
12 KiB
Python
"""Tests pour core/grounding/template_matcher.py"""
|
|
|
|
import base64
|
|
import io
|
|
import time
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import cv2
|
|
import numpy as np
|
|
import pytest
|
|
from PIL import Image
|
|
|
|
from core.grounding.template_matcher import MatchResult, TemplateMatcher
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _make_image(w: int, h: int, color: tuple = (128, 128, 128)) -> Image.Image:
|
|
"""Crée une image PIL unie."""
|
|
img = Image.new('RGB', (w, h), color)
|
|
return img
|
|
|
|
|
|
def _pil_to_b64(img: Image.Image) -> str:
|
|
"""Encode une image PIL en base64 PNG."""
|
|
buf = io.BytesIO()
|
|
img.save(buf, format='PNG')
|
|
return base64.b64encode(buf.getvalue()).decode()
|
|
|
|
|
|
def _make_screen_with_target(
|
|
screen_w: int = 800,
|
|
screen_h: int = 600,
|
|
target_x: int = 300,
|
|
target_y: int = 200,
|
|
target_w: int = 60,
|
|
target_h: int = 40,
|
|
):
|
|
"""Crée un screen bruité avec un motif unique et l'ancre correspondante.
|
|
|
|
Le screen a un fond aléatoire (bruit) pour que le template matching
|
|
ne puisse matcher qu'à l'endroit exact du motif injecté.
|
|
"""
|
|
rng = np.random.RandomState(42)
|
|
# Fond bruité — chaque pixel est différent, pas de faux match possible
|
|
screen = rng.randint(0, 256, (screen_h, screen_w, 3), dtype=np.uint8)
|
|
|
|
# Injecter un motif déterministe unique (damier rouge/bleu)
|
|
target = np.zeros((target_h, target_w, 3), dtype=np.uint8)
|
|
for r in range(target_h):
|
|
for c in range(target_w):
|
|
if (r + c) % 2 == 0:
|
|
target[r, c] = [255, 0, 0] # rouge
|
|
else:
|
|
target[r, c] = [0, 0, 255] # bleu
|
|
screen[target_y:target_y + target_h, target_x:target_x + target_w] = target
|
|
screen_pil = Image.fromarray(screen)
|
|
|
|
# L'ancre est exactement le même motif
|
|
anchor_pil = Image.fromarray(target)
|
|
|
|
expected_cx = target_x + target_w // 2
|
|
expected_cy = target_y + target_h // 2
|
|
|
|
return screen_pil, anchor_pil, expected_cx, expected_cy
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests MatchResult
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestMatchResult:
|
|
def test_fields(self):
|
|
r = MatchResult(x=100, y=200, score=0.85, method='template', time_ms=5.0)
|
|
assert r.x == 100
|
|
assert r.y == 200
|
|
assert r.score == 0.85
|
|
assert r.method == 'template'
|
|
assert r.time_ms == 5.0
|
|
assert r.scale == 1.0 # default
|
|
|
|
def test_with_scale(self):
|
|
r = MatchResult(x=10, y=20, score=0.9, method='template_multiscale', time_ms=12.0, scale=0.95)
|
|
assert r.scale == 0.95
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests TemplateMatcher — init
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestTemplateMatcherInit:
|
|
def test_defaults(self):
|
|
m = TemplateMatcher()
|
|
assert m.threshold == 0.75
|
|
assert m.multiscale is False
|
|
assert m.grayscale is False
|
|
|
|
def test_custom_params(self):
|
|
m = TemplateMatcher(threshold=0.5, multiscale=True, grayscale=True, scales=[1.0, 0.8])
|
|
assert m.threshold == 0.5
|
|
assert m.multiscale is True
|
|
assert m.grayscale is True
|
|
assert m.scales == [1.0, 0.8]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests TemplateMatcher — _decode_anchor
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestDecodeAnchor:
|
|
def test_pil_passthrough(self):
|
|
img = _make_image(50, 50)
|
|
result = TemplateMatcher._decode_anchor(None, img)
|
|
assert result is img
|
|
|
|
def test_b64_decode(self):
|
|
img = _make_image(50, 50, (255, 0, 0))
|
|
b64 = _pil_to_b64(img)
|
|
result = TemplateMatcher._decode_anchor(b64, None)
|
|
assert result is not None
|
|
assert result.size == (50, 50)
|
|
|
|
def test_b64_with_data_prefix(self):
|
|
img = _make_image(30, 30)
|
|
b64 = "data:image/png;base64," + _pil_to_b64(img)
|
|
result = TemplateMatcher._decode_anchor(b64, None)
|
|
assert result is not None
|
|
|
|
def test_none_inputs(self):
|
|
result = TemplateMatcher._decode_anchor(None, None)
|
|
assert result is None
|
|
|
|
def test_invalid_b64(self):
|
|
result = TemplateMatcher._decode_anchor("not-valid-base64!!!", None)
|
|
assert result is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests TemplateMatcher — match_screen avec screen_pil fourni
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestMatchScreenWithPIL:
|
|
def test_exact_match(self):
|
|
screen, anchor, cx, cy = _make_screen_with_target()
|
|
m = TemplateMatcher(threshold=0.75)
|
|
result = m.match_screen(anchor_pil=anchor, screen_pil=screen)
|
|
assert result is not None
|
|
assert abs(result.x - cx) <= 1
|
|
assert abs(result.y - cy) <= 1
|
|
assert result.score > 0.9
|
|
assert result.method == 'template'
|
|
assert result.time_ms >= 0
|
|
|
|
def test_no_match(self):
|
|
# Screen bruité, ancre = damier unique absent du screen
|
|
rng = np.random.RandomState(123)
|
|
screen_np = rng.randint(0, 256, (600, 800, 3), dtype=np.uint8)
|
|
screen = Image.fromarray(screen_np)
|
|
|
|
# Ancre = damier régulier non présent dans le bruit
|
|
anchor_np = np.zeros((40, 60, 3), dtype=np.uint8)
|
|
for r in range(40):
|
|
for c in range(60):
|
|
anchor_np[r, c] = [255, 255, 0] if (r + c) % 2 == 0 else [0, 255, 255]
|
|
anchor = Image.fromarray(anchor_np)
|
|
|
|
m = TemplateMatcher(threshold=0.75)
|
|
result = m.match_screen(anchor_pil=anchor, screen_pil=screen)
|
|
assert result is None
|
|
|
|
def test_b64_anchor(self):
|
|
screen, anchor, cx, cy = _make_screen_with_target()
|
|
b64 = _pil_to_b64(anchor)
|
|
m = TemplateMatcher(threshold=0.75)
|
|
result = m.match_screen(anchor_b64=b64, screen_pil=screen)
|
|
assert result is not None
|
|
assert abs(result.x - cx) <= 1
|
|
|
|
def test_anchor_bigger_than_screen(self):
|
|
screen = _make_image(100, 100)
|
|
anchor = _make_image(200, 200)
|
|
m = TemplateMatcher()
|
|
result = m.match_screen(anchor_pil=anchor, screen_pil=screen)
|
|
assert result is None
|
|
|
|
def test_threshold_configurable(self):
|
|
screen, anchor, cx, cy = _make_screen_with_target()
|
|
# Avec un seuil de 0.999, le match exact devrait quand même passer (score=1.0)
|
|
m = TemplateMatcher(threshold=0.999)
|
|
result = m.match_screen(anchor_pil=anchor, screen_pil=screen)
|
|
# Le score d'un match pixel-perfect peut être 1.0 ou très proche
|
|
# On accepte les deux cas
|
|
if result:
|
|
assert result.score >= 0.999
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests TemplateMatcher — multi-scale
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestMultiscale:
|
|
def test_multiscale_exact(self):
|
|
screen, anchor, cx, cy = _make_screen_with_target()
|
|
m = TemplateMatcher(threshold=0.75, multiscale=True)
|
|
result = m.match_screen(anchor_pil=anchor, screen_pil=screen)
|
|
assert result is not None
|
|
assert abs(result.x - cx) <= 2
|
|
assert abs(result.y - cy) <= 2
|
|
assert result.score > 0.9
|
|
|
|
def test_multiscale_scaled_anchor(self):
|
|
"""L'ancre a été capturée à une échelle légèrement différente.
|
|
|
|
On utilise un motif plus gros (bloc de couleur unie) pour que le resize
|
|
ne détruise pas le pattern comme avec un damier fin.
|
|
"""
|
|
# Screen bruité + gros bloc rouge
|
|
rng = np.random.RandomState(42)
|
|
screen_np = rng.randint(50, 200, (600, 800, 3), dtype=np.uint8)
|
|
target = np.full((80, 120, 3), dtype=np.uint8, fill_value=0)
|
|
target[:, :] = [220, 30, 30] # rouge vif unique
|
|
# Ajouter un bord vert pour le rendre encore plus unique
|
|
target[:5, :] = [30, 220, 30]
|
|
target[-5:, :] = [30, 220, 30]
|
|
screen_np[200:280, 300:420] = target
|
|
screen = Image.fromarray(screen_np)
|
|
|
|
# L'ancre d'origine
|
|
anchor_original = Image.fromarray(target)
|
|
# L'ancre à 105% (scale modeste pour que ça reste réaliste)
|
|
w, h = anchor_original.size
|
|
scaled_anchor = anchor_original.resize((int(w * 1.05), int(h * 1.05)), Image.BILINEAR)
|
|
|
|
m_multi = TemplateMatcher(threshold=0.60, multiscale=True)
|
|
result_multi = m_multi.match_screen(anchor_pil=scaled_anchor, screen_pil=screen)
|
|
assert result_multi is not None
|
|
assert result_multi.method == 'template_multiscale'
|
|
|
|
def test_multiscale_anchor_too_small(self):
|
|
"""Ancre très petite — certaines échelles sont sautées."""
|
|
screen = _make_image(800, 600)
|
|
anchor = _make_image(5, 5, (255, 0, 0))
|
|
m = TemplateMatcher(threshold=0.99, multiscale=True, scales=[0.5, 0.3])
|
|
result = m.match_screen(anchor_pil=anchor, screen_pil=screen)
|
|
# Pas de crash même avec des échelles qui produisent < 8px
|
|
# Le résultat peut être None ou un match selon le contenu
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests TemplateMatcher — match_in_region
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestMatchInRegion:
|
|
def test_region_match(self):
|
|
# Créer une region BGR bruitée avec un motif damier injecté
|
|
rng = np.random.RandomState(77)
|
|
region = rng.randint(0, 256, (200, 300, 3), dtype=np.uint8)
|
|
# Motif damier en BGR
|
|
anchor = np.zeros((40, 60, 3), dtype=np.uint8)
|
|
for r in range(40):
|
|
for c in range(60):
|
|
if (r + c) % 2 == 0:
|
|
anchor[r, c] = [255, 0, 0]
|
|
else:
|
|
anchor[r, c] = [0, 0, 255]
|
|
region[50:90, 100:160] = anchor
|
|
|
|
m = TemplateMatcher(threshold=0.75)
|
|
result = m.match_in_region(region, anchor)
|
|
assert result is not None
|
|
assert abs(result.x - 130) <= 1 # 100 + 60//2
|
|
assert abs(result.y - 70) <= 1 # 50 + 40//2
|
|
|
|
def test_region_no_match(self):
|
|
# Region bruitée, ancre damier absente
|
|
rng = np.random.RandomState(88)
|
|
region = rng.randint(0, 256, (200, 300, 3), dtype=np.uint8)
|
|
anchor = np.zeros((40, 60, 3), dtype=np.uint8)
|
|
for r in range(40):
|
|
for c in range(60):
|
|
anchor[r, c] = [255, 255, 0] if (r + c) % 2 == 0 else [0, 255, 255]
|
|
|
|
m = TemplateMatcher(threshold=0.75)
|
|
result = m.match_in_region(region, anchor)
|
|
assert result is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests grayscale mode
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestGrayscale:
|
|
def test_grayscale_match(self):
|
|
screen, anchor, cx, cy = _make_screen_with_target()
|
|
m = TemplateMatcher(threshold=0.75, grayscale=True)
|
|
result = m.match_screen(anchor_pil=anchor, screen_pil=screen)
|
|
assert result is not None
|
|
assert abs(result.x - cx) <= 1
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests _capture_screen (mocké)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestCaptureScreen:
|
|
@patch('core.grounding.template_matcher._MSS', False)
|
|
def test_no_mss(self):
|
|
result = TemplateMatcher._capture_screen()
|
|
assert result is None
|