""" Tests unitaires de l'enrichissement visuel dans GraphBuilder (chantier C2). Couvre : - `_create_screen_states` : enrichit `ui_elements` via ScreenAnalyzer - `_find_clicked_element` : association spatiale clic → UIElement - `_build_single_action` : TargetSpec avec `by_role`/`by_text` quand ancre - Fallback `by_role="unknown_element"` quand aucun ancrage n'est possible - `_extract_common_ui_elements` : required_roles extrait du cluster - Analyzer qui crash → ScreenState vide, pas de propagation d'exception - Singleton partagé entre deux GraphBuilder (C1) """ from __future__ import annotations from datetime import datetime, timedelta from pathlib import Path from unittest.mock import MagicMock, patch import numpy as np import pytest from PIL import Image from core.graph.graph_builder import GraphBuilder from core.models.base_models import BBox from core.models.raw_session import ( Event, RawSession, RawWindowContext, Screenshot, ) from core.models.screen_state import ( ContextLevel, EmbeddingRef, PerceptionLevel, RawLevel, ScreenState, WindowContext, ) from core.models.ui_element import ( UIElement, UIElementEmbeddings, VisualFeatures, ) from core.pipeline import ( reset_screen_analyzer, reset_screen_state_cache, ) # ----------------------------------------------------------------------------- # Fixtures # ----------------------------------------------------------------------------- @pytest.fixture(autouse=True) def _reset_singletons(): """Isole chaque test des singletons globaux.""" reset_screen_analyzer() reset_screen_state_cache() yield reset_screen_analyzer() reset_screen_state_cache() def _make_click_event(pos, t: float = 1.0, button: str = "left") -> Event: """Event mouse_click minimal (window est requis par le dataclass).""" return Event( t=t, type="mouse_click", window=RawWindowContext(title="Test", app_name="test_app"), data={"button": button, "pos": list(pos)}, ) def _make_key_event(t: float = 1.0, keys=None, text: str = None, ev_type: str = "key_press") -> Event: """Event clavier (key_press ou text_input).""" data = {} if keys is not None: data["keys"] = keys if text is not None: data["text"] = text return Event( t=t, type=ev_type, window=RawWindowContext(title="Test", app_name="test_app"), data=data, ) def _make_ui_element( element_id: str, role: str, label: str, bbox: tuple, el_type: str = "button", ) -> UIElement: """Construire un UIElement minimal pour les tests.""" return UIElement( element_id=element_id, type=el_type, role=role, bbox=BBox.from_tuple(bbox), center=(bbox[0] + bbox[2] // 2, bbox[1] + bbox[3] // 2), label=label, label_confidence=0.95, embeddings=UIElementEmbeddings(), visual_features=VisualFeatures( dominant_color="blue", has_icon=False, shape="rectangle", size_category="medium", ), confidence=0.9, ) def _make_screen_state( session_id: str, index: int, ui_elements: list, title: str = "Test App", detected_text: list = None, ) -> ScreenState: """ScreenState minimal utilisable par _extract_common_ui_elements.""" return ScreenState( screen_state_id=f"{session_id}_state_{index:04d}", timestamp=datetime(2026, 4, 13, 10, 0, index), session_id=session_id, window=WindowContext( app_name="test_app", window_title=title, screen_resolution=[1920, 1080], ), raw=RawLevel( screenshot_path=f"/tmp/shot_{index}.png", capture_method="mss", file_size_bytes=1024, ), perception=PerceptionLevel( embedding=EmbeddingRef( provider="test", vector_id=f"v_{index}", dimensions=512 ), detected_text=detected_text or [], text_detection_method="test", confidence_avg=0.8, ), context=ContextLevel(), metadata={}, ui_elements=ui_elements, ) @pytest.fixture def synthetic_session(tmp_path): """RawSession synthétique avec 2 screenshots alternés.""" session_id = "ui_enrich_session" screens_dir = ( tmp_path / "data" / "training" / "sessions" / session_id / session_id / "screenshots" ) screens_dir.mkdir(parents=True) screenshots = [] events = [] for i in range(4): ts = datetime(2026, 4, 13, 10, 0, i) color = (200, 50, 50) if i % 2 == 0 else (50, 50, 200) img = Image.new("RGB", (400, 300), color) fname = f"screen_{i:03d}.png" img.save(str(screens_dir / fname)) screenshots.append(Screenshot( screenshot_id=f"ss_{i:03d}", relative_path=f"screenshots/{fname}", captured_at=ts.isoformat(), )) events.append(Event( t=float(i), type="mouse_click", window=RawWindowContext( title="App A" if i % 2 == 0 else "App B", app_name="app", ), screenshot_id=f"ss_{i:03d}", data={"button": "left", "pos": [150, 120]}, )) session = RawSession( session_id=session_id, agent_version="test", environment={"screen": {"primary_resolution": [1920, 1080]}}, user={"id": "tester"}, context={}, started_at=datetime(2026, 4, 13, 10, 0, 0), events=events, screenshots=screenshots, ) return session, tmp_path # ----------------------------------------------------------------------------- # Enrichissement des ScreenState via ScreenAnalyzer # ----------------------------------------------------------------------------- class TestCreateScreenStatesEnrichment: """_create_screen_states doit déléguer au ScreenAnalyzer.""" def test_build_from_session_enriches_screen_states( self, synthetic_session, monkeypatch ): """Avec un analyzer mocké, les ui_elements sont propagés aux ScreenState.""" session, tmp_path = synthetic_session monkeypatch.chdir(tmp_path) # Analyzer mocké : renvoie un ScreenState avec 3 UIElement canoniques. fake_elements = [ _make_ui_element("el_1", "primary_action", "Valider", (100, 100, 80, 30)), _make_ui_element("el_2", "cancel", "Annuler", (200, 100, 80, 30)), _make_ui_element("el_3", "form_input", "Nom", (100, 50, 200, 30)), ] def fake_analyze(path, **kwargs): # On renvoie un ScreenState avec le bon nombre d'éléments + OCR. return _make_screen_state( session.session_id, index=0, ui_elements=list(fake_elements), detected_text=["Nom", "Valider", "Annuler"], ) analyzer = MagicMock() analyzer.analyze.side_effect = fake_analyze builder = GraphBuilder( screen_analyzer=analyzer, enable_ui_enrichment=True, enable_quality_validation=False, ) states = builder._create_screen_states(session) assert len(states) == 4 for st in states: assert len(st.ui_elements) == 3 roles = {e.role for e in st.ui_elements} assert {"primary_action", "cancel", "form_input"}.issubset(roles) assert "Valider" in st.perception.detected_text def test_enrichment_disabled_leaves_ui_elements_empty( self, synthetic_session, monkeypatch ): """enable_ui_enrichment=False → ui_elements vide, analyzer jamais appelé.""" session, tmp_path = synthetic_session monkeypatch.chdir(tmp_path) analyzer = MagicMock() builder = GraphBuilder( screen_analyzer=analyzer, enable_ui_enrichment=False, enable_quality_validation=False, ) states = builder._create_screen_states(session) assert len(states) == 4 for st in states: assert st.ui_elements == [] assert st.perception.detected_text == [] # L'analyzer ne doit pas avoir été appelé. analyzer.analyze.assert_not_called() def test_analyzer_failure_falls_back_to_empty( self, synthetic_session, monkeypatch, caplog ): """Un analyzer qui crash → ScreenState vide, log warning, pas d'exception.""" session, tmp_path = synthetic_session monkeypatch.chdir(tmp_path) analyzer = MagicMock() analyzer.analyze.side_effect = RuntimeError("boom (GPU OOM)") builder = GraphBuilder( screen_analyzer=analyzer, enable_ui_enrichment=True, enable_quality_validation=False, ) with caplog.at_level("WARNING"): states = builder._create_screen_states(session) assert len(states) == 4 for st in states: assert st.ui_elements == [] # La metadata trace l'erreur pour le diagnostic assert "analyzer_error" in st.metadata # Un log warning a bien été émis assert any("Enrichissement visuel échoué" in r.getMessage() for r in caplog.records) def test_shared_analyzer_singleton(self, monkeypatch): """Deux GraphBuilder créés sans analyzer explicite partagent le singleton C1.""" fake_analyzer = MagicMock(name="singleton_analyzer") # Ne jamais appeler analyze (pas de screenshots dans ce test) with patch( "core.pipeline.get_screen_analyzer", return_value=fake_analyzer ) as getter: b1 = GraphBuilder(enable_quality_validation=False) b2 = GraphBuilder(enable_quality_validation=False) a1 = b1._get_screen_analyzer() a2 = b2._get_screen_analyzer() assert a1 is fake_analyzer assert a2 is fake_analyzer # get_screen_analyzer appelé deux fois (une par builder), mais # la vraie mutualisation passe par le singleton interne de C1. assert getter.call_count >= 1 # ----------------------------------------------------------------------------- # Association spatiale clic → UIElement # ----------------------------------------------------------------------------- class TestFindClickedElement: """Logique de proximité _find_clicked_element.""" def _builder(self, max_px: float = 50.0) -> GraphBuilder: return GraphBuilder( enable_quality_validation=False, enable_ui_enrichment=False, element_proximity_max_px=max_px, ) def test_find_clicked_element_inside_bbox(self): """Clic strictement dans un bbox → match exact.""" builder = self._builder() elements = [ _make_ui_element("e1", "primary_action", "OK", (50, 50, 150, 150)), _make_ui_element("e2", "cancel", "Annuler", (300, 300, 100, 50)), ] event = _make_click_event([100, 100]) result = builder._find_clicked_element(event, elements) assert result is not None assert result.element_id == "e1" def test_find_clicked_element_nearest_proximity(self): """Clic hors de tout bbox mais à <50px → match au plus proche.""" builder = self._builder(max_px=50.0) elements = [ # bbox à (50,50,100,40) → bord droit = 150, bord bas = 90 _make_ui_element("e_near", "primary_action", "Valider", (50, 50, 100, 40)), # bbox loin (distance >> 50px du clic) _make_ui_element("e_far", "cancel", "Annuler", (500, 500, 80, 30)), ] # Clic à (170, 70) → bord droit de e_near = 150, dx = 20, dy = 0 → 20px event = _make_click_event([170, 70]) result = builder._find_clicked_element(event, elements) assert result is not None assert result.element_id == "e_near" def test_find_clicked_element_too_far_returns_none(self): """Clic à >50px du bbox le plus proche → None.""" builder = self._builder(max_px=50.0) elements = [ _make_ui_element("e1", "primary_action", "OK", (50, 50, 100, 40)), ] # Clic à (300, 300), bbox à (50,50,100,40) → distance ~ 280px event = _make_click_event([300, 300]) result = builder._find_clicked_element(event, elements) assert result is None def test_find_clicked_element_prefers_smallest_containing(self): """Deux bbox contiennent le clic → retourne le plus spécifique (petit).""" builder = self._builder() elements = [ # Grand container _make_ui_element( "container", "data_display", "Form", (0, 0, 800, 600), el_type="container", ), # Petit bouton à l'intérieur _make_ui_element("btn", "primary_action", "OK", (100, 100, 80, 30)), ] event = _make_click_event([120, 110]) result = builder._find_clicked_element(event, elements) assert result is not None assert result.element_id == "btn" def test_find_clicked_element_empty_list(self): builder = self._builder() event = _make_click_event([100, 100]) assert builder._find_clicked_element(event, []) is None def test_find_clicked_element_non_click_event(self): """Un événement non-clic → None (pas d'ancrage spatial pertinent).""" builder = self._builder() elements = [ _make_ui_element("e1", "form_input", "Nom", (100, 100, 100, 30)), ] event = _make_key_event(keys=["Enter"]) assert builder._find_clicked_element(event, elements) is None # ----------------------------------------------------------------------------- # TargetSpec enrichi par _build_single_action # ----------------------------------------------------------------------------- class TestTargetSpecEnrichment: """_build_single_action doit produire des TargetSpec discriminants.""" def test_target_spec_uses_element_role(self): """Clic ancré sur un élément → by_role + by_text + context_hints.""" builder = GraphBuilder( enable_quality_validation=False, enable_ui_enrichment=False, ) elements = [ _make_ui_element("el_ok", "primary_action", "Valider", (100, 100, 120, 40)), ] event = _make_click_event([150, 120]) action = builder._build_single_action(event, source_ui_elements=elements) assert action.type == "mouse_click" assert action.target.by_role == "primary_action" assert action.target.by_text == "Valider" assert action.target.selection_policy == "by_similarity" # Traçabilité dans context_hints assert action.target.context_hints.get("anchor_element_id") == "el_ok" assert "anchor_bbox" in action.target.context_hints assert action.target.context_hints["anchor_bbox"]["x"] == 100 def test_target_spec_fallback_when_no_element(self): """Aucun UIElement → legacy by_role=unknown_element.""" builder = GraphBuilder( enable_quality_validation=False, enable_ui_enrichment=False, ) event = _make_click_event([400, 400]) action = builder._build_single_action(event, source_ui_elements=[]) assert action.target.by_role == "unknown_element" assert action.target.by_text is None # Pas de context_hints d'ancrage assert not action.target.context_hints.get("anchor_element_id") def test_target_spec_fallback_when_click_too_far(self): """Clic loin de tout bbox → fallback unknown_element.""" builder = GraphBuilder( enable_quality_validation=False, enable_ui_enrichment=False, element_proximity_max_px=30.0, ) elements = [ _make_ui_element("far", "cancel", "X", (50, 50, 20, 20)), ] event = _make_click_event([800, 800]) action = builder._build_single_action(event, source_ui_elements=elements) assert action.target.by_role == "unknown_element" def test_keyboard_event_target_unchanged(self): """Les events non-clic conservent leur target_role legacy.""" builder = GraphBuilder( enable_quality_validation=False, enable_ui_enrichment=False, ) event = _make_key_event(text="hello", ev_type="text_input") action = builder._build_single_action(event, source_ui_elements=[]) assert action.target.by_role == "text_field" # ----------------------------------------------------------------------------- # UIConstraint.required_roles depuis _extract_common_ui_elements # ----------------------------------------------------------------------------- class TestRequiredRolesExtraction: def test_required_roles_extracted_from_common_elements(self): """3 ScreenState avec rôle commun → required_roles le contient.""" builder = GraphBuilder( enable_quality_validation=False, enable_ui_enrichment=False, ) # 3 écrans, tous avec "primary_action" (Valider) et 2 avec "cancel" states = [ _make_screen_state( "sid", i, ui_elements=[ _make_ui_element( f"ok_{i}", "primary_action", "Valider", (100, 100, 80, 30), ), _make_ui_element( f"cancel_{i}", "cancel", "Annuler", (200, 100, 80, 30), ) if i < 2 else _make_ui_element( f"other_{i}", "navigation", "Menu", (300, 100, 80, 30), ), ], ) for i in range(3) ] prototype = np.zeros(512, dtype=np.float32) prototype[0] = 1.0 template = builder._create_screen_template(states, prototype) assert template.ui is not None # primary_action présent dans 3/3 écrans → inclus assert "primary_action" in template.ui.required_roles # cancel présent dans 2/3 → ratio 0.66 >= 0.5 → inclus assert "cancel" in template.ui.required_roles # navigation présent dans 1/3 → ratio 0.33 < 0.5 → exclu assert "navigation" not in template.ui.required_roles