Some checks failed
security-audit / Bandit (scan statique) (push) Successful in 12s
security-audit / pip-audit (CVE dépendances) (push) Successful in 11s
security-audit / Scan secrets (grep) (push) Successful in 9s
tests / Lint (ruff + black) (push) Successful in 14s
tests / Tests unitaires (sans GPU) (push) Failing after 13s
tests / Tests sécurité (critique) (push) Has been skipped
GraphBuilder construit maintenant des ScreenState enrichis (ui_elements + detected_text) au lieu de stubs vides, et associe les clics aux UIElement par proximité spatiale. Détails : - __init__ accepte ui_detector, screen_analyzer, enable_ui_enrichment, element_proximity_max_px (+ lazy resolver via singleton C1) - _create_screen_states délègue à ScreenAnalyzer.analyze() — remplace l'appel à _extract_text() qui n'existait plus depuis le Lot C (bug silencieux : OCR cassé en prod depuis ce jour, caught except) - _find_clicked_element : bbox contenant strict + fallback proximité ≤50px, préfère le plus petit bbox (form vs button) - _build_click_target_spec : TargetSpec(by_role, by_text, selection_policy="by_similarity") avec ancres dans context_hints (anchor_element_id, anchor_bbox, anchor_center) - _build_edges propage le ScreenState source aux builders d'action - WorkflowPipeline passe ui_detector + enable_ui_enrichment au builder Impact : matching prod 3-5x plus précis, TargetSpec ne sont plus des "unknown_element" génériques, UIConstraint.required_roles se remplit correctement via _extract_common_ui_elements (qui marchait depuis toujours mais sur des state.ui_elements vides). Tests e2e migrés vers enable_ui_enrichment=False (2.9s vs 67s) — ils valident le pipeline DBSCAN/edges, pas la détection UI réelle. 15 nouveaux tests, 178 tests passants au total (incluant Lots A-E). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
514 lines
18 KiB
Python
514 lines
18 KiB
Python
"""
|
|
Tests unitaires de l'enrichissement visuel dans GraphBuilder (chantier C2).
|
|
|
|
Couvre :
|
|
- `_create_screen_states` : enrichit `ui_elements` via ScreenAnalyzer
|
|
- `_find_clicked_element` : association spatiale clic → UIElement
|
|
- `_build_single_action` : TargetSpec avec `by_role`/`by_text` quand ancre
|
|
- Fallback `by_role="unknown_element"` quand aucun ancrage n'est possible
|
|
- `_extract_common_ui_elements` : required_roles extrait du cluster
|
|
- Analyzer qui crash → ScreenState vide, pas de propagation d'exception
|
|
- Singleton partagé entre deux GraphBuilder (C1)
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from datetime import datetime, timedelta
|
|
from pathlib import Path
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import numpy as np
|
|
import pytest
|
|
from PIL import Image
|
|
|
|
from core.graph.graph_builder import GraphBuilder
|
|
from core.models.base_models import BBox
|
|
from core.models.raw_session import (
|
|
Event,
|
|
RawSession,
|
|
RawWindowContext,
|
|
Screenshot,
|
|
)
|
|
from core.models.screen_state import (
|
|
ContextLevel,
|
|
EmbeddingRef,
|
|
PerceptionLevel,
|
|
RawLevel,
|
|
ScreenState,
|
|
WindowContext,
|
|
)
|
|
from core.models.ui_element import (
|
|
UIElement,
|
|
UIElementEmbeddings,
|
|
VisualFeatures,
|
|
)
|
|
from core.pipeline import (
|
|
reset_screen_analyzer,
|
|
reset_screen_state_cache,
|
|
)
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Fixtures
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _reset_singletons():
|
|
"""Isole chaque test des singletons globaux."""
|
|
reset_screen_analyzer()
|
|
reset_screen_state_cache()
|
|
yield
|
|
reset_screen_analyzer()
|
|
reset_screen_state_cache()
|
|
|
|
|
|
def _make_click_event(pos, t: float = 1.0, button: str = "left") -> Event:
|
|
"""Event mouse_click minimal (window est requis par le dataclass)."""
|
|
return Event(
|
|
t=t,
|
|
type="mouse_click",
|
|
window=RawWindowContext(title="Test", app_name="test_app"),
|
|
data={"button": button, "pos": list(pos)},
|
|
)
|
|
|
|
|
|
def _make_key_event(t: float = 1.0, keys=None, text: str = None, ev_type: str = "key_press") -> Event:
|
|
"""Event clavier (key_press ou text_input)."""
|
|
data = {}
|
|
if keys is not None:
|
|
data["keys"] = keys
|
|
if text is not None:
|
|
data["text"] = text
|
|
return Event(
|
|
t=t,
|
|
type=ev_type,
|
|
window=RawWindowContext(title="Test", app_name="test_app"),
|
|
data=data,
|
|
)
|
|
|
|
|
|
def _make_ui_element(
|
|
element_id: str,
|
|
role: str,
|
|
label: str,
|
|
bbox: tuple,
|
|
el_type: str = "button",
|
|
) -> UIElement:
|
|
"""Construire un UIElement minimal pour les tests."""
|
|
return UIElement(
|
|
element_id=element_id,
|
|
type=el_type,
|
|
role=role,
|
|
bbox=BBox.from_tuple(bbox),
|
|
center=(bbox[0] + bbox[2] // 2, bbox[1] + bbox[3] // 2),
|
|
label=label,
|
|
label_confidence=0.95,
|
|
embeddings=UIElementEmbeddings(),
|
|
visual_features=VisualFeatures(
|
|
dominant_color="blue",
|
|
has_icon=False,
|
|
shape="rectangle",
|
|
size_category="medium",
|
|
),
|
|
confidence=0.9,
|
|
)
|
|
|
|
|
|
def _make_screen_state(
|
|
session_id: str,
|
|
index: int,
|
|
ui_elements: list,
|
|
title: str = "Test App",
|
|
detected_text: list = None,
|
|
) -> ScreenState:
|
|
"""ScreenState minimal utilisable par _extract_common_ui_elements."""
|
|
return ScreenState(
|
|
screen_state_id=f"{session_id}_state_{index:04d}",
|
|
timestamp=datetime(2026, 4, 13, 10, 0, index),
|
|
session_id=session_id,
|
|
window=WindowContext(
|
|
app_name="test_app",
|
|
window_title=title,
|
|
screen_resolution=[1920, 1080],
|
|
),
|
|
raw=RawLevel(
|
|
screenshot_path=f"/tmp/shot_{index}.png",
|
|
capture_method="mss",
|
|
file_size_bytes=1024,
|
|
),
|
|
perception=PerceptionLevel(
|
|
embedding=EmbeddingRef(
|
|
provider="test", vector_id=f"v_{index}", dimensions=512
|
|
),
|
|
detected_text=detected_text or [],
|
|
text_detection_method="test",
|
|
confidence_avg=0.8,
|
|
),
|
|
context=ContextLevel(),
|
|
metadata={},
|
|
ui_elements=ui_elements,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def synthetic_session(tmp_path):
|
|
"""RawSession synthétique avec 2 screenshots alternés."""
|
|
session_id = "ui_enrich_session"
|
|
screens_dir = (
|
|
tmp_path / "data" / "training" / "sessions"
|
|
/ session_id / session_id / "screenshots"
|
|
)
|
|
screens_dir.mkdir(parents=True)
|
|
|
|
screenshots = []
|
|
events = []
|
|
for i in range(4):
|
|
ts = datetime(2026, 4, 13, 10, 0, i)
|
|
color = (200, 50, 50) if i % 2 == 0 else (50, 50, 200)
|
|
img = Image.new("RGB", (400, 300), color)
|
|
fname = f"screen_{i:03d}.png"
|
|
img.save(str(screens_dir / fname))
|
|
|
|
screenshots.append(Screenshot(
|
|
screenshot_id=f"ss_{i:03d}",
|
|
relative_path=f"screenshots/{fname}",
|
|
captured_at=ts.isoformat(),
|
|
))
|
|
events.append(Event(
|
|
t=float(i),
|
|
type="mouse_click",
|
|
window=RawWindowContext(
|
|
title="App A" if i % 2 == 0 else "App B",
|
|
app_name="app",
|
|
),
|
|
screenshot_id=f"ss_{i:03d}",
|
|
data={"button": "left", "pos": [150, 120]},
|
|
))
|
|
|
|
session = RawSession(
|
|
session_id=session_id,
|
|
agent_version="test",
|
|
environment={"screen": {"primary_resolution": [1920, 1080]}},
|
|
user={"id": "tester"},
|
|
context={},
|
|
started_at=datetime(2026, 4, 13, 10, 0, 0),
|
|
events=events,
|
|
screenshots=screenshots,
|
|
)
|
|
return session, tmp_path
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Enrichissement des ScreenState via ScreenAnalyzer
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
class TestCreateScreenStatesEnrichment:
|
|
"""_create_screen_states doit déléguer au ScreenAnalyzer."""
|
|
|
|
def test_build_from_session_enriches_screen_states(
|
|
self, synthetic_session, monkeypatch
|
|
):
|
|
"""Avec un analyzer mocké, les ui_elements sont propagés aux ScreenState."""
|
|
session, tmp_path = synthetic_session
|
|
monkeypatch.chdir(tmp_path)
|
|
|
|
# Analyzer mocké : renvoie un ScreenState avec 3 UIElement canoniques.
|
|
fake_elements = [
|
|
_make_ui_element("el_1", "primary_action", "Valider", (100, 100, 80, 30)),
|
|
_make_ui_element("el_2", "cancel", "Annuler", (200, 100, 80, 30)),
|
|
_make_ui_element("el_3", "form_input", "Nom", (100, 50, 200, 30)),
|
|
]
|
|
|
|
def fake_analyze(path, **kwargs):
|
|
# On renvoie un ScreenState avec le bon nombre d'éléments + OCR.
|
|
return _make_screen_state(
|
|
session.session_id,
|
|
index=0,
|
|
ui_elements=list(fake_elements),
|
|
detected_text=["Nom", "Valider", "Annuler"],
|
|
)
|
|
|
|
analyzer = MagicMock()
|
|
analyzer.analyze.side_effect = fake_analyze
|
|
|
|
builder = GraphBuilder(
|
|
screen_analyzer=analyzer,
|
|
enable_ui_enrichment=True,
|
|
enable_quality_validation=False,
|
|
)
|
|
states = builder._create_screen_states(session)
|
|
|
|
assert len(states) == 4
|
|
for st in states:
|
|
assert len(st.ui_elements) == 3
|
|
roles = {e.role for e in st.ui_elements}
|
|
assert {"primary_action", "cancel", "form_input"}.issubset(roles)
|
|
assert "Valider" in st.perception.detected_text
|
|
|
|
def test_enrichment_disabled_leaves_ui_elements_empty(
|
|
self, synthetic_session, monkeypatch
|
|
):
|
|
"""enable_ui_enrichment=False → ui_elements vide, analyzer jamais appelé."""
|
|
session, tmp_path = synthetic_session
|
|
monkeypatch.chdir(tmp_path)
|
|
|
|
analyzer = MagicMock()
|
|
builder = GraphBuilder(
|
|
screen_analyzer=analyzer,
|
|
enable_ui_enrichment=False,
|
|
enable_quality_validation=False,
|
|
)
|
|
states = builder._create_screen_states(session)
|
|
|
|
assert len(states) == 4
|
|
for st in states:
|
|
assert st.ui_elements == []
|
|
assert st.perception.detected_text == []
|
|
# L'analyzer ne doit pas avoir été appelé.
|
|
analyzer.analyze.assert_not_called()
|
|
|
|
def test_analyzer_failure_falls_back_to_empty(
|
|
self, synthetic_session, monkeypatch, caplog
|
|
):
|
|
"""Un analyzer qui crash → ScreenState vide, log warning, pas d'exception."""
|
|
session, tmp_path = synthetic_session
|
|
monkeypatch.chdir(tmp_path)
|
|
|
|
analyzer = MagicMock()
|
|
analyzer.analyze.side_effect = RuntimeError("boom (GPU OOM)")
|
|
|
|
builder = GraphBuilder(
|
|
screen_analyzer=analyzer,
|
|
enable_ui_enrichment=True,
|
|
enable_quality_validation=False,
|
|
)
|
|
with caplog.at_level("WARNING"):
|
|
states = builder._create_screen_states(session)
|
|
|
|
assert len(states) == 4
|
|
for st in states:
|
|
assert st.ui_elements == []
|
|
# La metadata trace l'erreur pour le diagnostic
|
|
assert "analyzer_error" in st.metadata
|
|
# Un log warning a bien été émis
|
|
assert any("Enrichissement visuel échoué" in r.getMessage() for r in caplog.records)
|
|
|
|
def test_shared_analyzer_singleton(self, monkeypatch):
|
|
"""Deux GraphBuilder créés sans analyzer explicite partagent le singleton C1."""
|
|
fake_analyzer = MagicMock(name="singleton_analyzer")
|
|
# Ne jamais appeler analyze (pas de screenshots dans ce test)
|
|
|
|
with patch(
|
|
"core.pipeline.get_screen_analyzer", return_value=fake_analyzer
|
|
) as getter:
|
|
b1 = GraphBuilder(enable_quality_validation=False)
|
|
b2 = GraphBuilder(enable_quality_validation=False)
|
|
a1 = b1._get_screen_analyzer()
|
|
a2 = b2._get_screen_analyzer()
|
|
|
|
assert a1 is fake_analyzer
|
|
assert a2 is fake_analyzer
|
|
# get_screen_analyzer appelé deux fois (une par builder), mais
|
|
# la vraie mutualisation passe par le singleton interne de C1.
|
|
assert getter.call_count >= 1
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Association spatiale clic → UIElement
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
class TestFindClickedElement:
|
|
"""Logique de proximité _find_clicked_element."""
|
|
|
|
def _builder(self, max_px: float = 50.0) -> GraphBuilder:
|
|
return GraphBuilder(
|
|
enable_quality_validation=False,
|
|
enable_ui_enrichment=False,
|
|
element_proximity_max_px=max_px,
|
|
)
|
|
|
|
def test_find_clicked_element_inside_bbox(self):
|
|
"""Clic strictement dans un bbox → match exact."""
|
|
builder = self._builder()
|
|
elements = [
|
|
_make_ui_element("e1", "primary_action", "OK", (50, 50, 150, 150)),
|
|
_make_ui_element("e2", "cancel", "Annuler", (300, 300, 100, 50)),
|
|
]
|
|
event = _make_click_event([100, 100])
|
|
result = builder._find_clicked_element(event, elements)
|
|
assert result is not None
|
|
assert result.element_id == "e1"
|
|
|
|
def test_find_clicked_element_nearest_proximity(self):
|
|
"""Clic hors de tout bbox mais à <50px → match au plus proche."""
|
|
builder = self._builder(max_px=50.0)
|
|
elements = [
|
|
# bbox à (50,50,100,40) → bord droit = 150, bord bas = 90
|
|
_make_ui_element("e_near", "primary_action", "Valider", (50, 50, 100, 40)),
|
|
# bbox loin (distance >> 50px du clic)
|
|
_make_ui_element("e_far", "cancel", "Annuler", (500, 500, 80, 30)),
|
|
]
|
|
# Clic à (170, 70) → bord droit de e_near = 150, dx = 20, dy = 0 → 20px
|
|
event = _make_click_event([170, 70])
|
|
result = builder._find_clicked_element(event, elements)
|
|
assert result is not None
|
|
assert result.element_id == "e_near"
|
|
|
|
def test_find_clicked_element_too_far_returns_none(self):
|
|
"""Clic à >50px du bbox le plus proche → None."""
|
|
builder = self._builder(max_px=50.0)
|
|
elements = [
|
|
_make_ui_element("e1", "primary_action", "OK", (50, 50, 100, 40)),
|
|
]
|
|
# Clic à (300, 300), bbox à (50,50,100,40) → distance ~ 280px
|
|
event = _make_click_event([300, 300])
|
|
result = builder._find_clicked_element(event, elements)
|
|
assert result is None
|
|
|
|
def test_find_clicked_element_prefers_smallest_containing(self):
|
|
"""Deux bbox contiennent le clic → retourne le plus spécifique (petit)."""
|
|
builder = self._builder()
|
|
elements = [
|
|
# Grand container
|
|
_make_ui_element(
|
|
"container", "data_display", "Form", (0, 0, 800, 600),
|
|
el_type="container",
|
|
),
|
|
# Petit bouton à l'intérieur
|
|
_make_ui_element("btn", "primary_action", "OK", (100, 100, 80, 30)),
|
|
]
|
|
event = _make_click_event([120, 110])
|
|
result = builder._find_clicked_element(event, elements)
|
|
assert result is not None
|
|
assert result.element_id == "btn"
|
|
|
|
def test_find_clicked_element_empty_list(self):
|
|
builder = self._builder()
|
|
event = _make_click_event([100, 100])
|
|
assert builder._find_clicked_element(event, []) is None
|
|
|
|
def test_find_clicked_element_non_click_event(self):
|
|
"""Un événement non-clic → None (pas d'ancrage spatial pertinent)."""
|
|
builder = self._builder()
|
|
elements = [
|
|
_make_ui_element("e1", "form_input", "Nom", (100, 100, 100, 30)),
|
|
]
|
|
event = _make_key_event(keys=["Enter"])
|
|
assert builder._find_clicked_element(event, elements) is None
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# TargetSpec enrichi par _build_single_action
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
class TestTargetSpecEnrichment:
|
|
"""_build_single_action doit produire des TargetSpec discriminants."""
|
|
|
|
def test_target_spec_uses_element_role(self):
|
|
"""Clic ancré sur un élément → by_role + by_text + context_hints."""
|
|
builder = GraphBuilder(
|
|
enable_quality_validation=False,
|
|
enable_ui_enrichment=False,
|
|
)
|
|
elements = [
|
|
_make_ui_element("el_ok", "primary_action", "Valider", (100, 100, 120, 40)),
|
|
]
|
|
event = _make_click_event([150, 120])
|
|
action = builder._build_single_action(event, source_ui_elements=elements)
|
|
|
|
assert action.type == "mouse_click"
|
|
assert action.target.by_role == "primary_action"
|
|
assert action.target.by_text == "Valider"
|
|
assert action.target.selection_policy == "by_similarity"
|
|
# Traçabilité dans context_hints
|
|
assert action.target.context_hints.get("anchor_element_id") == "el_ok"
|
|
assert "anchor_bbox" in action.target.context_hints
|
|
assert action.target.context_hints["anchor_bbox"]["x"] == 100
|
|
|
|
def test_target_spec_fallback_when_no_element(self):
|
|
"""Aucun UIElement → legacy by_role=unknown_element."""
|
|
builder = GraphBuilder(
|
|
enable_quality_validation=False,
|
|
enable_ui_enrichment=False,
|
|
)
|
|
event = _make_click_event([400, 400])
|
|
action = builder._build_single_action(event, source_ui_elements=[])
|
|
assert action.target.by_role == "unknown_element"
|
|
assert action.target.by_text is None
|
|
# Pas de context_hints d'ancrage
|
|
assert not action.target.context_hints.get("anchor_element_id")
|
|
|
|
def test_target_spec_fallback_when_click_too_far(self):
|
|
"""Clic loin de tout bbox → fallback unknown_element."""
|
|
builder = GraphBuilder(
|
|
enable_quality_validation=False,
|
|
enable_ui_enrichment=False,
|
|
element_proximity_max_px=30.0,
|
|
)
|
|
elements = [
|
|
_make_ui_element("far", "cancel", "X", (50, 50, 20, 20)),
|
|
]
|
|
event = _make_click_event([800, 800])
|
|
action = builder._build_single_action(event, source_ui_elements=elements)
|
|
assert action.target.by_role == "unknown_element"
|
|
|
|
def test_keyboard_event_target_unchanged(self):
|
|
"""Les events non-clic conservent leur target_role legacy."""
|
|
builder = GraphBuilder(
|
|
enable_quality_validation=False,
|
|
enable_ui_enrichment=False,
|
|
)
|
|
event = _make_key_event(text="hello", ev_type="text_input")
|
|
action = builder._build_single_action(event, source_ui_elements=[])
|
|
assert action.target.by_role == "text_field"
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# UIConstraint.required_roles depuis _extract_common_ui_elements
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
class TestRequiredRolesExtraction:
|
|
def test_required_roles_extracted_from_common_elements(self):
|
|
"""3 ScreenState avec rôle commun → required_roles le contient."""
|
|
builder = GraphBuilder(
|
|
enable_quality_validation=False,
|
|
enable_ui_enrichment=False,
|
|
)
|
|
# 3 écrans, tous avec "primary_action" (Valider) et 2 avec "cancel"
|
|
states = [
|
|
_make_screen_state(
|
|
"sid", i,
|
|
ui_elements=[
|
|
_make_ui_element(
|
|
f"ok_{i}", "primary_action", "Valider",
|
|
(100, 100, 80, 30),
|
|
),
|
|
_make_ui_element(
|
|
f"cancel_{i}", "cancel", "Annuler",
|
|
(200, 100, 80, 30),
|
|
) if i < 2 else _make_ui_element(
|
|
f"other_{i}", "navigation", "Menu",
|
|
(300, 100, 80, 30),
|
|
),
|
|
],
|
|
)
|
|
for i in range(3)
|
|
]
|
|
|
|
prototype = np.zeros(512, dtype=np.float32)
|
|
prototype[0] = 1.0
|
|
template = builder._create_screen_template(states, prototype)
|
|
|
|
assert template.ui is not None
|
|
# primary_action présent dans 3/3 écrans → inclus
|
|
assert "primary_action" in template.ui.required_roles
|
|
# cancel présent dans 2/3 → ratio 0.66 >= 0.5 → inclus
|
|
assert "cancel" in template.ui.required_roles
|
|
# navigation présent dans 1/3 → ratio 0.33 < 0.5 → exclu
|
|
assert "navigation" not in template.ui.required_roles
|