Files
rpa_vision_v3/tests/unit/test_template_matcher.py
Dom 9da589c8c2 feat(grounding): pipeline centralisé + serveur UI-TARS transformers + nettoyage code mort
Architecture grounding complète :
- core/grounding/server.py : serveur FastAPI (port 8200) avec UI-TARS-1.5-7B en 4-bit NF4
  Process séparé avec son propre contexte CUDA (résout le crash Flask/CUDA)
- core/grounding/pipeline.py : orchestrateur cascade template→OCR→UI-TARS→static
- core/grounding/template_matcher.py : TemplateMatcher centralisé (remplace 5 copies)
- core/grounding/ui_tars_grounder.py : client HTTP vers le serveur de grounding
- core/grounding/target.py : GroundingTarget + GroundingResult

ORA modifié :
- _act_click() : capture unique de l'écran envoyée au serveur de grounding
- Pre-check VLM skippé pour ui_tars (redondant, et Ollama n'a plus de VRAM)
- verify_level='none' par défaut (vérification titre OCR prévue en Phase 2)
- Détection réponses négatives UI-TARS ("I don't see it" → fallback OCR)

Nettoyage :
- 9 fichiers morts archivés dans _archive/ (~6300 lignes supprimées)
- 21 tests ajoutés pour TemplateMatcher

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-25 17:48:18 +02:00

312 lines
12 KiB
Python

"""Tests pour core/grounding/template_matcher.py"""
import base64
import io
import time
from unittest.mock import MagicMock, patch
import cv2
import numpy as np
import pytest
from PIL import Image
from core.grounding.template_matcher import MatchResult, TemplateMatcher
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _make_image(w: int, h: int, color: tuple = (128, 128, 128)) -> Image.Image:
"""Crée une image PIL unie."""
img = Image.new('RGB', (w, h), color)
return img
def _pil_to_b64(img: Image.Image) -> str:
"""Encode une image PIL en base64 PNG."""
buf = io.BytesIO()
img.save(buf, format='PNG')
return base64.b64encode(buf.getvalue()).decode()
def _make_screen_with_target(
screen_w: int = 800,
screen_h: int = 600,
target_x: int = 300,
target_y: int = 200,
target_w: int = 60,
target_h: int = 40,
):
"""Crée un screen bruité avec un motif unique et l'ancre correspondante.
Le screen a un fond aléatoire (bruit) pour que le template matching
ne puisse matcher qu'à l'endroit exact du motif injecté.
"""
rng = np.random.RandomState(42)
# Fond bruité — chaque pixel est différent, pas de faux match possible
screen = rng.randint(0, 256, (screen_h, screen_w, 3), dtype=np.uint8)
# Injecter un motif déterministe unique (damier rouge/bleu)
target = np.zeros((target_h, target_w, 3), dtype=np.uint8)
for r in range(target_h):
for c in range(target_w):
if (r + c) % 2 == 0:
target[r, c] = [255, 0, 0] # rouge
else:
target[r, c] = [0, 0, 255] # bleu
screen[target_y:target_y + target_h, target_x:target_x + target_w] = target
screen_pil = Image.fromarray(screen)
# L'ancre est exactement le même motif
anchor_pil = Image.fromarray(target)
expected_cx = target_x + target_w // 2
expected_cy = target_y + target_h // 2
return screen_pil, anchor_pil, expected_cx, expected_cy
# ---------------------------------------------------------------------------
# Tests MatchResult
# ---------------------------------------------------------------------------
class TestMatchResult:
def test_fields(self):
r = MatchResult(x=100, y=200, score=0.85, method='template', time_ms=5.0)
assert r.x == 100
assert r.y == 200
assert r.score == 0.85
assert r.method == 'template'
assert r.time_ms == 5.0
assert r.scale == 1.0 # default
def test_with_scale(self):
r = MatchResult(x=10, y=20, score=0.9, method='template_multiscale', time_ms=12.0, scale=0.95)
assert r.scale == 0.95
# ---------------------------------------------------------------------------
# Tests TemplateMatcher — init
# ---------------------------------------------------------------------------
class TestTemplateMatcherInit:
def test_defaults(self):
m = TemplateMatcher()
assert m.threshold == 0.75
assert m.multiscale is False
assert m.grayscale is False
def test_custom_params(self):
m = TemplateMatcher(threshold=0.5, multiscale=True, grayscale=True, scales=[1.0, 0.8])
assert m.threshold == 0.5
assert m.multiscale is True
assert m.grayscale is True
assert m.scales == [1.0, 0.8]
# ---------------------------------------------------------------------------
# Tests TemplateMatcher — _decode_anchor
# ---------------------------------------------------------------------------
class TestDecodeAnchor:
def test_pil_passthrough(self):
img = _make_image(50, 50)
result = TemplateMatcher._decode_anchor(None, img)
assert result is img
def test_b64_decode(self):
img = _make_image(50, 50, (255, 0, 0))
b64 = _pil_to_b64(img)
result = TemplateMatcher._decode_anchor(b64, None)
assert result is not None
assert result.size == (50, 50)
def test_b64_with_data_prefix(self):
img = _make_image(30, 30)
b64 = "data:image/png;base64," + _pil_to_b64(img)
result = TemplateMatcher._decode_anchor(b64, None)
assert result is not None
def test_none_inputs(self):
result = TemplateMatcher._decode_anchor(None, None)
assert result is None
def test_invalid_b64(self):
result = TemplateMatcher._decode_anchor("not-valid-base64!!!", None)
assert result is None
# ---------------------------------------------------------------------------
# Tests TemplateMatcher — match_screen avec screen_pil fourni
# ---------------------------------------------------------------------------
class TestMatchScreenWithPIL:
def test_exact_match(self):
screen, anchor, cx, cy = _make_screen_with_target()
m = TemplateMatcher(threshold=0.75)
result = m.match_screen(anchor_pil=anchor, screen_pil=screen)
assert result is not None
assert abs(result.x - cx) <= 1
assert abs(result.y - cy) <= 1
assert result.score > 0.9
assert result.method == 'template'
assert result.time_ms >= 0
def test_no_match(self):
# Screen bruité, ancre = damier unique absent du screen
rng = np.random.RandomState(123)
screen_np = rng.randint(0, 256, (600, 800, 3), dtype=np.uint8)
screen = Image.fromarray(screen_np)
# Ancre = damier régulier non présent dans le bruit
anchor_np = np.zeros((40, 60, 3), dtype=np.uint8)
for r in range(40):
for c in range(60):
anchor_np[r, c] = [255, 255, 0] if (r + c) % 2 == 0 else [0, 255, 255]
anchor = Image.fromarray(anchor_np)
m = TemplateMatcher(threshold=0.75)
result = m.match_screen(anchor_pil=anchor, screen_pil=screen)
assert result is None
def test_b64_anchor(self):
screen, anchor, cx, cy = _make_screen_with_target()
b64 = _pil_to_b64(anchor)
m = TemplateMatcher(threshold=0.75)
result = m.match_screen(anchor_b64=b64, screen_pil=screen)
assert result is not None
assert abs(result.x - cx) <= 1
def test_anchor_bigger_than_screen(self):
screen = _make_image(100, 100)
anchor = _make_image(200, 200)
m = TemplateMatcher()
result = m.match_screen(anchor_pil=anchor, screen_pil=screen)
assert result is None
def test_threshold_configurable(self):
screen, anchor, cx, cy = _make_screen_with_target()
# Avec un seuil de 0.999, le match exact devrait quand même passer (score=1.0)
m = TemplateMatcher(threshold=0.999)
result = m.match_screen(anchor_pil=anchor, screen_pil=screen)
# Le score d'un match pixel-perfect peut être 1.0 ou très proche
# On accepte les deux cas
if result:
assert result.score >= 0.999
# ---------------------------------------------------------------------------
# Tests TemplateMatcher — multi-scale
# ---------------------------------------------------------------------------
class TestMultiscale:
def test_multiscale_exact(self):
screen, anchor, cx, cy = _make_screen_with_target()
m = TemplateMatcher(threshold=0.75, multiscale=True)
result = m.match_screen(anchor_pil=anchor, screen_pil=screen)
assert result is not None
assert abs(result.x - cx) <= 2
assert abs(result.y - cy) <= 2
assert result.score > 0.9
def test_multiscale_scaled_anchor(self):
"""L'ancre a été capturée à une échelle légèrement différente.
On utilise un motif plus gros (bloc de couleur unie) pour que le resize
ne détruise pas le pattern comme avec un damier fin.
"""
# Screen bruité + gros bloc rouge
rng = np.random.RandomState(42)
screen_np = rng.randint(50, 200, (600, 800, 3), dtype=np.uint8)
target = np.full((80, 120, 3), dtype=np.uint8, fill_value=0)
target[:, :] = [220, 30, 30] # rouge vif unique
# Ajouter un bord vert pour le rendre encore plus unique
target[:5, :] = [30, 220, 30]
target[-5:, :] = [30, 220, 30]
screen_np[200:280, 300:420] = target
screen = Image.fromarray(screen_np)
# L'ancre d'origine
anchor_original = Image.fromarray(target)
# L'ancre à 105% (scale modeste pour que ça reste réaliste)
w, h = anchor_original.size
scaled_anchor = anchor_original.resize((int(w * 1.05), int(h * 1.05)), Image.BILINEAR)
m_multi = TemplateMatcher(threshold=0.60, multiscale=True)
result_multi = m_multi.match_screen(anchor_pil=scaled_anchor, screen_pil=screen)
assert result_multi is not None
assert result_multi.method == 'template_multiscale'
def test_multiscale_anchor_too_small(self):
"""Ancre très petite — certaines échelles sont sautées."""
screen = _make_image(800, 600)
anchor = _make_image(5, 5, (255, 0, 0))
m = TemplateMatcher(threshold=0.99, multiscale=True, scales=[0.5, 0.3])
result = m.match_screen(anchor_pil=anchor, screen_pil=screen)
# Pas de crash même avec des échelles qui produisent < 8px
# Le résultat peut être None ou un match selon le contenu
# ---------------------------------------------------------------------------
# Tests TemplateMatcher — match_in_region
# ---------------------------------------------------------------------------
class TestMatchInRegion:
def test_region_match(self):
# Créer une region BGR bruitée avec un motif damier injecté
rng = np.random.RandomState(77)
region = rng.randint(0, 256, (200, 300, 3), dtype=np.uint8)
# Motif damier en BGR
anchor = np.zeros((40, 60, 3), dtype=np.uint8)
for r in range(40):
for c in range(60):
if (r + c) % 2 == 0:
anchor[r, c] = [255, 0, 0]
else:
anchor[r, c] = [0, 0, 255]
region[50:90, 100:160] = anchor
m = TemplateMatcher(threshold=0.75)
result = m.match_in_region(region, anchor)
assert result is not None
assert abs(result.x - 130) <= 1 # 100 + 60//2
assert abs(result.y - 70) <= 1 # 50 + 40//2
def test_region_no_match(self):
# Region bruitée, ancre damier absente
rng = np.random.RandomState(88)
region = rng.randint(0, 256, (200, 300, 3), dtype=np.uint8)
anchor = np.zeros((40, 60, 3), dtype=np.uint8)
for r in range(40):
for c in range(60):
anchor[r, c] = [255, 255, 0] if (r + c) % 2 == 0 else [0, 255, 255]
m = TemplateMatcher(threshold=0.75)
result = m.match_in_region(region, anchor)
assert result is None
# ---------------------------------------------------------------------------
# Tests grayscale mode
# ---------------------------------------------------------------------------
class TestGrayscale:
def test_grayscale_match(self):
screen, anchor, cx, cy = _make_screen_with_target()
m = TemplateMatcher(threshold=0.75, grayscale=True)
result = m.match_screen(anchor_pil=anchor, screen_pil=screen)
assert result is not None
assert abs(result.x - cx) <= 1
# ---------------------------------------------------------------------------
# Tests _capture_screen (mocké)
# ---------------------------------------------------------------------------
class TestCaptureScreen:
@patch('core.grounding.template_matcher._MSS', False)
def test_no_mss(self):
result = TemplateMatcher._capture_screen()
assert result is None