feat: intégration SomEngine dans build_replay (Phase 1) et resolve_target (Phase 2)
Phase 1 : enrichit chaque clic avec som_element (id, label, bbox) via YOLO+docTR Phase 2 : nouvelle résolution SoM+VLM — SomEngine numérote, VLM identifie le mark 10 tests unitaires ajoutés, conftest unit/ pour le bon path agent_v0 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
26
tests/unit/conftest.py
Normal file
26
tests/unit/conftest.py
Normal file
@@ -0,0 +1,26 @@
|
||||
"""Conftest pour les tests unitaires.
|
||||
|
||||
Force le bon chemin agent_v0 (rpa_vision_v3) pour éviter les conflits
|
||||
avec ~/ai/agent_v0 (standalone).
|
||||
"""
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = str(Path(__file__).resolve().parents[2])
|
||||
|
||||
if ROOT in sys.path:
|
||||
sys.path.remove(ROOT)
|
||||
sys.path.insert(0, ROOT)
|
||||
|
||||
# Si agent_v0 est déjà chargé depuis le mauvais chemin, le remplacer
|
||||
_agent_mod = sys.modules.get("agent_v0")
|
||||
if _agent_mod and not getattr(_agent_mod, "__file__", "").startswith(ROOT):
|
||||
to_remove = [k for k in sys.modules if k == "agent_v0" or k.startswith("agent_v0.")]
|
||||
for k in to_remove:
|
||||
del sys.modules[k]
|
||||
|
||||
# Pré-importer le bon agent_v0.server_v1
|
||||
try:
|
||||
import agent_v0.server_v1 # noqa: F401
|
||||
except ImportError:
|
||||
pass
|
||||
301
tests/unit/test_som_integration.py
Normal file
301
tests/unit/test_som_integration.py
Normal file
@@ -0,0 +1,301 @@
|
||||
"""Tests unitaires pour l'intégration SomEngine dans build_replay et resolve_target.
|
||||
|
||||
Vérifie :
|
||||
- Phase 1 : _som_identify_clicked_element enrichit target_spec avec som_element
|
||||
- Phase 2 : _resolve_by_som utilise SomEngine + VLM pour résoudre une cible
|
||||
- Fallbacks gracieux quand SomEngine ou VLM indisponible
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ── Phase 1 : Enrichissement build_replay ──
|
||||
|
||||
|
||||
class TestSomIdentifyClickedElement:
|
||||
"""Tests pour _som_identify_clicked_element (Phase 1)."""
|
||||
|
||||
def test_returns_none_when_engine_unavailable(self):
|
||||
"""Si SomEngine n'est pas disponible, retourne None sans erreur."""
|
||||
from agent_v0.server_v1.stream_processor import _som_identify_clicked_element
|
||||
|
||||
with patch(
|
||||
"agent_v0.server_v1.stream_processor._get_som_engine",
|
||||
return_value=None,
|
||||
):
|
||||
result = _som_identify_clicked_element(
|
||||
{"screenshot_id": "shot_0001", "pos": [500, 300]},
|
||||
Path("/fake/dir"),
|
||||
1920, 1080,
|
||||
)
|
||||
assert result is None
|
||||
|
||||
def test_returns_none_when_no_session_dir(self):
|
||||
"""Sans session_dir, retourne None."""
|
||||
from agent_v0.server_v1.stream_processor import _som_identify_clicked_element
|
||||
|
||||
result = _som_identify_clicked_element(
|
||||
{"screenshot_id": "shot_0001", "pos": [500, 300]},
|
||||
None, 1920, 1080,
|
||||
)
|
||||
assert result is None
|
||||
|
||||
def test_returns_none_when_no_screenshot_id(self):
|
||||
"""Sans screenshot_id, retourne None."""
|
||||
from agent_v0.server_v1.stream_processor import _som_identify_clicked_element
|
||||
|
||||
result = _som_identify_clicked_element(
|
||||
{"pos": [500, 300]},
|
||||
Path("/fake/dir"),
|
||||
1920, 1080,
|
||||
)
|
||||
assert result is None
|
||||
|
||||
def test_returns_element_when_found(self, tmp_path):
|
||||
"""Quand SomEngine trouve un élément sous le clic, retourne ses infos."""
|
||||
from core.detection.som_engine import SomElement, SomResult
|
||||
from agent_v0.server_v1.stream_processor import _som_identify_clicked_element
|
||||
|
||||
# Créer un faux screenshot
|
||||
shots_dir = tmp_path / "shots"
|
||||
shots_dir.mkdir()
|
||||
from PIL import Image
|
||||
img = Image.new("RGB", (1920, 1080), color="white")
|
||||
img.save(shots_dir / "shot_0001_full.png")
|
||||
|
||||
# Mock SomEngine
|
||||
mock_elem = SomElement(
|
||||
id=5,
|
||||
bbox=(480, 280, 520, 320),
|
||||
bbox_norm=(0.25, 0.259, 0.271, 0.296),
|
||||
center=(500, 300),
|
||||
center_norm=(0.2604, 0.2778),
|
||||
source="yolo",
|
||||
label="Enregistrer",
|
||||
confidence=0.92,
|
||||
)
|
||||
mock_result = SomResult(
|
||||
elements=[mock_elem],
|
||||
width=1920,
|
||||
height=1080,
|
||||
)
|
||||
mock_engine = MagicMock()
|
||||
mock_engine.analyze.return_value = mock_result
|
||||
|
||||
with patch(
|
||||
"agent_v0.server_v1.stream_processor._get_som_engine",
|
||||
return_value=mock_engine,
|
||||
):
|
||||
result = _som_identify_clicked_element(
|
||||
{"screenshot_id": "shot_0001", "pos": [500, 300]},
|
||||
tmp_path, 1920, 1080,
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result["id"] == 5
|
||||
assert result["label"] == "Enregistrer"
|
||||
assert result["source"] == "yolo"
|
||||
assert result["confidence"] == 0.92
|
||||
assert result["element_count"] == 1
|
||||
|
||||
def test_returns_none_when_no_element_at_click(self, tmp_path):
|
||||
"""Quand aucun élément n'est sous le clic, retourne None."""
|
||||
from core.detection.som_engine import SomResult
|
||||
from agent_v0.server_v1.stream_processor import _som_identify_clicked_element
|
||||
|
||||
shots_dir = tmp_path / "shots"
|
||||
shots_dir.mkdir()
|
||||
from PIL import Image
|
||||
img = Image.new("RGB", (1920, 1080), color="white")
|
||||
img.save(shots_dir / "shot_0001_full.png")
|
||||
|
||||
# Résultat avec des éléments mais pas au point du clic
|
||||
mock_result = SomResult(elements=[], width=1920, height=1080)
|
||||
mock_engine = MagicMock()
|
||||
mock_engine.analyze.return_value = mock_result
|
||||
|
||||
with patch(
|
||||
"agent_v0.server_v1.stream_processor._get_som_engine",
|
||||
return_value=mock_engine,
|
||||
):
|
||||
result = _som_identify_clicked_element(
|
||||
{"screenshot_id": "shot_0001", "pos": [500, 300]},
|
||||
tmp_path, 1920, 1080,
|
||||
)
|
||||
|
||||
assert result is None
|
||||
|
||||
|
||||
# ── Phase 2 : Résolution SoM + VLM ──
|
||||
|
||||
|
||||
class TestResolveBySom:
|
||||
"""Tests pour _resolve_by_som (Phase 2)."""
|
||||
|
||||
def test_returns_none_when_engine_unavailable(self):
|
||||
"""Sans SomEngine, retourne None."""
|
||||
from agent_v0.server_v1.api_stream import _resolve_by_som
|
||||
|
||||
with patch(
|
||||
"agent_v0.server_v1.api_stream._get_som_engine_api",
|
||||
return_value=None,
|
||||
):
|
||||
result = _resolve_by_som(
|
||||
"/fake/path.jpg",
|
||||
{"vlm_description": "un bouton"},
|
||||
1920, 1080,
|
||||
)
|
||||
assert result is None
|
||||
|
||||
def test_returns_none_when_vlm_unavailable(self):
|
||||
"""Sans VLM, retourne None."""
|
||||
from agent_v0.server_v1.api_stream import _resolve_by_som
|
||||
|
||||
mock_engine = MagicMock()
|
||||
|
||||
with patch(
|
||||
"agent_v0.server_v1.api_stream._get_som_engine_api",
|
||||
return_value=mock_engine,
|
||||
), patch(
|
||||
"agent_v0.server_v1.api_stream._get_vlm_client",
|
||||
return_value=None,
|
||||
):
|
||||
result = _resolve_by_som(
|
||||
"/fake/path.jpg",
|
||||
{"vlm_description": "un bouton"},
|
||||
1920, 1080,
|
||||
)
|
||||
assert result is None
|
||||
|
||||
def test_returns_none_without_description(self):
|
||||
"""Sans description ni som_element, retourne None."""
|
||||
from agent_v0.server_v1.api_stream import _resolve_by_som
|
||||
|
||||
mock_engine = MagicMock()
|
||||
mock_client = MagicMock()
|
||||
|
||||
with patch(
|
||||
"agent_v0.server_v1.api_stream._get_som_engine_api",
|
||||
return_value=mock_engine,
|
||||
), patch(
|
||||
"agent_v0.server_v1.api_stream._get_vlm_client",
|
||||
return_value=mock_client,
|
||||
):
|
||||
result = _resolve_by_som(
|
||||
"/fake/path.jpg",
|
||||
{}, # Pas de description
|
||||
1920, 1080,
|
||||
)
|
||||
assert result is None
|
||||
|
||||
def test_resolve_success(self, tmp_path):
|
||||
"""Résolution réussie : SomEngine détecte, VLM identifie le mark."""
|
||||
from core.detection.som_engine import SomElement, SomResult
|
||||
from agent_v0.server_v1.api_stream import _resolve_by_som
|
||||
|
||||
# Créer un faux screenshot
|
||||
from PIL import Image
|
||||
img = Image.new("RGB", (1920, 1080), color="white")
|
||||
screenshot_path = str(tmp_path / "screen.jpg")
|
||||
img.save(screenshot_path)
|
||||
|
||||
# Mock SomEngine
|
||||
mock_elem = SomElement(
|
||||
id=9,
|
||||
bbox=(960, 540, 1000, 570),
|
||||
bbox_norm=(0.5, 0.5, 0.521, 0.528),
|
||||
center=(980, 555),
|
||||
center_norm=(0.5104, 0.5139),
|
||||
source="ocr",
|
||||
label="Ouvrir",
|
||||
confidence=0.88,
|
||||
)
|
||||
mock_result = SomResult(
|
||||
elements=[mock_elem],
|
||||
som_image=img.copy(),
|
||||
som_image_b64="fake_b64",
|
||||
width=1920,
|
||||
height=1080,
|
||||
)
|
||||
mock_engine = MagicMock()
|
||||
mock_engine.analyze.return_value = mock_result
|
||||
|
||||
# Mock VLM client
|
||||
mock_client = MagicMock()
|
||||
mock_client.generate.return_value = {
|
||||
"success": True,
|
||||
"response": '{"mark_id": 9, "confidence": 0.95}',
|
||||
}
|
||||
mock_client._extract_json_from_response.return_value = {
|
||||
"mark_id": 9,
|
||||
"confidence": 0.95,
|
||||
}
|
||||
|
||||
with patch(
|
||||
"agent_v0.server_v1.api_stream._get_som_engine_api",
|
||||
return_value=mock_engine,
|
||||
), patch(
|
||||
"agent_v0.server_v1.api_stream._get_vlm_client",
|
||||
return_value=mock_client,
|
||||
):
|
||||
result = _resolve_by_som(
|
||||
screenshot_path,
|
||||
{
|
||||
"vlm_description": "le bouton Ouvrir",
|
||||
"som_element": {"id": 9, "label": "Ouvrir"},
|
||||
},
|
||||
1920, 1080,
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result["resolved"] is True
|
||||
assert result["method"] == "som_vlm"
|
||||
assert abs(result["x_pct"] - 0.5104) < 0.001
|
||||
assert abs(result["y_pct"] - 0.5139) < 0.001
|
||||
assert result["matched_element"]["som_id"] == 9
|
||||
|
||||
def test_resolve_vlm_low_confidence(self, tmp_path):
|
||||
"""VLM retourne une confiance trop basse → None."""
|
||||
from core.detection.som_engine import SomResult
|
||||
from agent_v0.server_v1.api_stream import _resolve_by_som
|
||||
|
||||
from PIL import Image
|
||||
img = Image.new("RGB", (1920, 1080), color="white")
|
||||
screenshot_path = str(tmp_path / "screen.jpg")
|
||||
img.save(screenshot_path)
|
||||
|
||||
mock_result = SomResult(
|
||||
elements=[MagicMock(id=1, label="test", source="ocr")],
|
||||
som_image=img.copy(),
|
||||
width=1920, height=1080,
|
||||
)
|
||||
mock_engine = MagicMock()
|
||||
mock_engine.analyze.return_value = mock_result
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.generate.return_value = {
|
||||
"success": True,
|
||||
"response": '{"mark_id": 1, "confidence": 0.1}',
|
||||
}
|
||||
mock_client._extract_json_from_response.return_value = {
|
||||
"mark_id": 1,
|
||||
"confidence": 0.1,
|
||||
}
|
||||
|
||||
with patch(
|
||||
"agent_v0.server_v1.api_stream._get_som_engine_api",
|
||||
return_value=mock_engine,
|
||||
), patch(
|
||||
"agent_v0.server_v1.api_stream._get_vlm_client",
|
||||
return_value=mock_client,
|
||||
):
|
||||
result = _resolve_by_som(
|
||||
screenshot_path,
|
||||
{"vlm_description": "un bouton"},
|
||||
1920, 1080,
|
||||
)
|
||||
|
||||
assert result is None
|
||||
Reference in New Issue
Block a user