"""Tests unitaires pour l'intégration SomEngine dans build_replay et resolve_target. Vérifie : - Phase 1 : _som_identify_clicked_element enrichit target_spec avec som_element - Phase 2 : _resolve_by_som utilise SomEngine + VLM pour résoudre une cible - Fallbacks gracieux quand SomEngine ou VLM indisponible """ from pathlib import Path from unittest.mock import MagicMock, patch import pytest # ── Phase 1 : Enrichissement build_replay ── class TestSomIdentifyClickedElement: """Tests pour _som_identify_clicked_element (Phase 1).""" def test_returns_none_when_engine_unavailable(self): """Si SomEngine n'est pas disponible, retourne None sans erreur.""" from agent_v0.server_v1.stream_processor import _som_identify_clicked_element with patch( "agent_v0.server_v1.stream_processor._get_som_engine", return_value=None, ): result = _som_identify_clicked_element( {"screenshot_id": "shot_0001", "pos": [500, 300]}, Path("/fake/dir"), 1920, 1080, ) assert result is None def test_returns_none_when_no_session_dir(self): """Sans session_dir, retourne None.""" from agent_v0.server_v1.stream_processor import _som_identify_clicked_element result = _som_identify_clicked_element( {"screenshot_id": "shot_0001", "pos": [500, 300]}, None, 1920, 1080, ) assert result is None def test_returns_none_when_no_screenshot_id(self): """Sans screenshot_id, retourne None.""" from agent_v0.server_v1.stream_processor import _som_identify_clicked_element result = _som_identify_clicked_element( {"pos": [500, 300]}, Path("/fake/dir"), 1920, 1080, ) assert result is None def test_returns_element_when_found(self, tmp_path): """Quand SomEngine trouve un élément sous le clic, retourne ses infos.""" from core.detection.som_engine import SomElement, SomResult from agent_v0.server_v1.stream_processor import _som_identify_clicked_element # Créer un faux screenshot shots_dir = tmp_path / "shots" shots_dir.mkdir() from PIL import Image img = Image.new("RGB", (1920, 1080), color="white") img.save(shots_dir / "shot_0001_full.png") # Mock SomEngine mock_elem = SomElement( id=5, bbox=(480, 280, 520, 320), bbox_norm=(0.25, 0.259, 0.271, 0.296), center=(500, 300), center_norm=(0.2604, 0.2778), source="yolo", label="Enregistrer", confidence=0.92, ) mock_result = SomResult( elements=[mock_elem], width=1920, height=1080, ) mock_engine = MagicMock() mock_engine.analyze.return_value = mock_result with patch( "agent_v0.server_v1.stream_processor._get_som_engine", return_value=mock_engine, ): result = _som_identify_clicked_element( {"screenshot_id": "shot_0001", "pos": [500, 300]}, tmp_path, 1920, 1080, ) assert result is not None assert result["id"] == 5 assert result["label"] == "Enregistrer" assert result["source"] == "yolo" assert result["confidence"] == 0.92 assert result["element_count"] == 1 def test_returns_none_when_no_element_at_click(self, tmp_path): """Quand aucun élément n'est sous le clic, retourne None.""" from core.detection.som_engine import SomResult from agent_v0.server_v1.stream_processor import _som_identify_clicked_element shots_dir = tmp_path / "shots" shots_dir.mkdir() from PIL import Image img = Image.new("RGB", (1920, 1080), color="white") img.save(shots_dir / "shot_0001_full.png") # Résultat avec des éléments mais pas au point du clic mock_result = SomResult(elements=[], width=1920, height=1080) mock_engine = MagicMock() mock_engine.analyze.return_value = mock_result with patch( "agent_v0.server_v1.stream_processor._get_som_engine", return_value=mock_engine, ): result = _som_identify_clicked_element( {"screenshot_id": "shot_0001", "pos": [500, 300]}, tmp_path, 1920, 1080, ) assert result is None # ── Phase 2 : Résolution SoM + VLM ── class TestResolveBySom: """Tests pour _resolve_by_som (Phase 2).""" def test_returns_none_when_engine_unavailable(self): """Sans SomEngine, retourne None.""" from agent_v0.server_v1.api_stream import _resolve_by_som with patch( "agent_v0.server_v1.api_stream._get_som_engine_api", return_value=None, ): result = _resolve_by_som( "/fake/path.jpg", {"vlm_description": "un bouton"}, 1920, 1080, ) assert result is None def test_returns_none_when_vlm_unavailable(self): """Sans VLM, retourne None.""" from agent_v0.server_v1.api_stream import _resolve_by_som mock_engine = MagicMock() with patch( "agent_v0.server_v1.api_stream._get_som_engine_api", return_value=mock_engine, ), patch( "agent_v0.server_v1.api_stream._get_vlm_client", return_value=None, ): result = _resolve_by_som( "/fake/path.jpg", {"vlm_description": "un bouton"}, 1920, 1080, ) assert result is None def test_returns_none_without_description(self): """Sans description ni som_element, retourne None.""" from agent_v0.server_v1.api_stream import _resolve_by_som mock_engine = MagicMock() mock_client = MagicMock() with patch( "agent_v0.server_v1.api_stream._get_som_engine_api", return_value=mock_engine, ), patch( "agent_v0.server_v1.api_stream._get_vlm_client", return_value=mock_client, ): result = _resolve_by_som( "/fake/path.jpg", {}, # Pas de description 1920, 1080, ) assert result is None def test_resolve_success(self, tmp_path): """Résolution réussie : SomEngine détecte, VLM identifie le mark.""" from core.detection.som_engine import SomElement, SomResult from agent_v0.server_v1.api_stream import _resolve_by_som # Créer un faux screenshot from PIL import Image img = Image.new("RGB", (1920, 1080), color="white") screenshot_path = str(tmp_path / "screen.jpg") img.save(screenshot_path) # Mock SomEngine mock_elem = SomElement( id=9, bbox=(960, 540, 1000, 570), bbox_norm=(0.5, 0.5, 0.521, 0.528), center=(980, 555), center_norm=(0.5104, 0.5139), source="ocr", label="Ouvrir", confidence=0.88, ) mock_result = SomResult( elements=[mock_elem], som_image=img.copy(), som_image_b64="fake_b64", width=1920, height=1080, ) mock_engine = MagicMock() mock_engine.analyze.return_value = mock_result # Mock VLM client mock_client = MagicMock() mock_client.generate.return_value = { "success": True, "response": '{"mark_id": 9, "confidence": 0.95}', } mock_client._extract_json_from_response.return_value = { "mark_id": 9, "confidence": 0.95, } with patch( "agent_v0.server_v1.api_stream._get_som_engine_api", return_value=mock_engine, ), patch( "agent_v0.server_v1.api_stream._get_vlm_client", return_value=mock_client, ): result = _resolve_by_som( screenshot_path, { "vlm_description": "le bouton Ouvrir", "som_element": {"id": 9, "label": "Ouvrir"}, }, 1920, 1080, ) assert result is not None assert result["resolved"] is True assert result["method"] in ("som_vlm", "som_text_match") assert abs(result["x_pct"] - 0.5104) < 0.001 assert abs(result["y_pct"] - 0.5139) < 0.001 assert result["matched_element"]["som_id"] == 9 def test_resolve_vlm_low_confidence(self, tmp_path): """VLM retourne une confiance trop basse → None.""" from core.detection.som_engine import SomResult from agent_v0.server_v1.api_stream import _resolve_by_som from PIL import Image img = Image.new("RGB", (1920, 1080), color="white") screenshot_path = str(tmp_path / "screen.jpg") img.save(screenshot_path) mock_result = SomResult( elements=[MagicMock(id=1, label="test", source="ocr")], som_image=img.copy(), width=1920, height=1080, ) mock_engine = MagicMock() mock_engine.analyze.return_value = mock_result mock_client = MagicMock() mock_client.generate.return_value = { "success": True, "response": '{"mark_id": 1, "confidence": 0.1}', } mock_client._extract_json_from_response.return_value = { "mark_id": 1, "confidence": 0.1, } with patch( "agent_v0.server_v1.api_stream._get_som_engine_api", return_value=mock_engine, ), patch( "agent_v0.server_v1.api_stream._get_vlm_client", return_value=mock_client, ): result = _resolve_by_som( screenshot_path, {"vlm_description": "un bouton"}, 1920, 1080, ) assert result is None