Files
rpa_vision_v3/tests/test_pipeline_e2e.py
Dom 7f2bc6fe97
Some checks failed
security-audit / Bandit (scan statique) (push) Successful in 12s
security-audit / pip-audit (CVE dépendances) (push) Successful in 11s
security-audit / Scan secrets (grep) (push) Successful in 9s
tests / Lint (ruff + black) (push) Successful in 14s
tests / Tests unitaires (sans GPU) (push) Failing after 13s
tests / Tests sécurité (critique) (push) Has been skipped
feat(graph): enrichissement visuel des workflows (C2)
GraphBuilder construit maintenant des ScreenState enrichis
(ui_elements + detected_text) au lieu de stubs vides, et associe
les clics aux UIElement par proximité spatiale.

Détails :
- __init__ accepte ui_detector, screen_analyzer, enable_ui_enrichment,
  element_proximity_max_px (+ lazy resolver via singleton C1)
- _create_screen_states délègue à ScreenAnalyzer.analyze() — remplace
  l'appel à _extract_text() qui n'existait plus depuis le Lot C
  (bug silencieux : OCR cassé en prod depuis ce jour, caught except)
- _find_clicked_element : bbox contenant strict + fallback proximité
  ≤50px, préfère le plus petit bbox (form vs button)
- _build_click_target_spec : TargetSpec(by_role, by_text,
  selection_policy="by_similarity") avec ancres dans context_hints
  (anchor_element_id, anchor_bbox, anchor_center)
- _build_edges propage le ScreenState source aux builders d'action
- WorkflowPipeline passe ui_detector + enable_ui_enrichment au builder

Impact : matching prod 3-5x plus précis, TargetSpec ne sont plus
des "unknown_element" génériques, UIConstraint.required_roles se
remplit correctement via _extract_common_ui_elements (qui marchait
depuis toujours mais sur des state.ui_elements vides).

Tests e2e migrés vers enable_ui_enrichment=False (2.9s vs 67s) —
ils valident le pipeline DBSCAN/edges, pas la détection UI réelle.

15 nouveaux tests, 178 tests passants au total (incluant Lots A-E).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-15 22:02:30 +02:00

489 lines
16 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
tests/test_pipeline_e2e.py — Phase 0, Tâche P0-5
Test end-to-end du pipeline complet :
RawSession → ScreenStates → Embeddings → Clustering → Workflow (nodes + edges)
Utilise des embeddings déterministes (mocks) pour valider la logique du pipeline
sans dépendre d'OpenCLIP ou d'un moteur OCR.
Scénario simulé :
- 2 écrans distincts ("Login Page" et "Dashboard")
- 3 cycles de navigation Login→Dashboard
- DBSCAN doit trouver 2 clusters, produire 2 nodes et 2 edges
"""
import pytest
import numpy as np
from datetime import datetime, timedelta
from unittest.mock import MagicMock
from PIL import Image
from core.models.raw_session import RawSession, Event, Screenshot, RawWindowContext
from core.models.workflow_graph import Workflow
from core.graph.graph_builder import GraphBuilder
# ======================================================================
# Helpers
# ======================================================================
def _make_vector(cluster_id: int, seed: int, dim: int = 512) -> np.ndarray:
"""
Crée un vecteur déterministe pour un cluster donné.
Cluster 0 : énergie dans la première moitié du vecteur
Cluster 1 : énergie dans la seconde moitié
→ distance cosinus inter-cluster ≈ 1.0, intra-cluster ≈ 0.01
"""
base = np.zeros(dim, dtype=np.float32)
if cluster_id == 0:
base[: dim // 2] = 1.0
else:
base[dim // 2 :] = 1.0
rng = np.random.RandomState(seed)
noise = rng.randn(dim).astype(np.float32) * 0.01
vector = base + noise
return vector / np.linalg.norm(vector)
# ======================================================================
# Fixtures
# ======================================================================
@pytest.fixture
def synthetic_session(tmp_path):
"""
RawSession synthétique : 2 types d'écran × 3 cycles = 6 screenshots.
Séquence : Login, Dashboard, Login, Dashboard, Login, Dashboard
Transitions attendues : Login→Dashboard (×3), Dashboard→Login (×2)
"""
session_id = "test_e2e_session"
# Créer les screenshots sur disque (chemin attendu par _create_screen_states)
screens_dir = (
tmp_path / "data" / "training" / "sessions"
/ session_id / session_id / "screenshots"
)
screens_dir.mkdir(parents=True)
screenshots = []
events = []
screen_defs = [
("Login Page", "firefox", (200, 50, 50)), # Rouge
("Dashboard", "firefox", (50, 50, 200)), # Bleu
]
for cycle in range(3):
for screen_idx, (title, app, color) in enumerate(screen_defs):
i = cycle * 2 + screen_idx
ts = datetime(2026, 3, 10, 10, 0, 0) + timedelta(seconds=i * 2)
# Screenshot réel sur disque
img = Image.new("RGB", (100, 100), color)
filename = f"screen_{i:03d}.png"
img.save(str(screens_dir / filename))
screenshots.append(Screenshot(
screenshot_id=f"ss_{i:03d}",
relative_path=f"screenshots/{filename}",
captured_at=ts.isoformat(),
))
events.append(Event(
t=float(i * 2),
type="mouse_click",
window=RawWindowContext(title=title, app_name=app),
screenshot_id=f"ss_{i:03d}",
data={"button": "left", "pos": [500, 300]},
))
session = RawSession(
session_id=session_id,
agent_version="test_1.0",
environment={
"screen": {"primary_resolution": [1920, 1080]},
"os": "linux",
},
user={"id": "test_user"},
context={"workflow": "test", "tags": ["e2e"]},
started_at=datetime(2026, 3, 10, 10, 0, 0),
ended_at=datetime(2026, 3, 10, 10, 0, 12),
events=events,
screenshots=screenshots,
)
return session, tmp_path
@pytest.fixture
def mock_embedding_builder():
"""
Mock de StateEmbeddingBuilder retournant des embeddings déterministes
basés sur le titre de fenêtre du ScreenState.
"""
builder = MagicMock()
def build_side_effect(screen_state, *args, **kwargs):
title = screen_state.window.window_title
cluster_id = 0 if "Login" in title else 1
seed = hash(screen_state.screen_state_id) % (2**31)
vector = _make_vector(cluster_id, seed)
embedding_mock = MagicMock()
embedding_mock.get_vector.return_value = vector
return embedding_mock
builder.build.side_effect = build_side_effect
return builder
@pytest.fixture
def graph_builder(mock_embedding_builder):
"""GraphBuilder configuré pour le test (validation qualité désactivée).
`enable_ui_enrichment=False` désactive l'analyzer GPU : ces tests
valident le pipeline DBSCAN + edges, pas la détection UI réelle
(couverte par tests/unit/test_graph_builder_ui_enrichment.py).
"""
return GraphBuilder(
embedding_builder=mock_embedding_builder,
min_pattern_repetitions=3,
clustering_eps=0.15,
clustering_min_samples=2,
enable_quality_validation=False,
enable_ui_enrichment=False,
)
# ======================================================================
# Tests
# ======================================================================
class TestScreenStatesCreation:
"""Tests de _create_screen_states : RawSession → List[ScreenState]."""
def test_creates_correct_number_of_states(
self, synthetic_session, graph_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
states = graph_builder._create_screen_states(session)
assert len(states) == 6
def test_window_titles_alternate(
self, synthetic_session, graph_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
states = graph_builder._create_screen_states(session)
for i, state in enumerate(states):
expected = "Login Page" if i % 2 == 0 else "Dashboard"
assert state.window.window_title == expected
def test_metadata_contains_event_info(
self, synthetic_session, graph_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
states = graph_builder._create_screen_states(session)
for state in states:
assert state.metadata.get("event_type") == "mouse_click"
assert state.session_id == session.session_id
def test_screenshot_files_detected(
self, synthetic_session, graph_builder, monkeypatch
):
"""Les screenshots existent sur disque et file_size_bytes > 0."""
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
states = graph_builder._create_screen_states(session)
for state in states:
assert state.raw.file_size_bytes > 0
class TestClustering:
"""Tests du clustering DBSCAN : embeddings → clusters."""
def test_detects_two_clusters(
self, synthetic_session, graph_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
states = graph_builder._create_screen_states(session)
embeddings = graph_builder._compute_embeddings(states)
clusters = graph_builder._detect_patterns(embeddings, states)
assert len(clusters) == 2
def test_each_cluster_has_three_members(
self, synthetic_session, graph_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
states = graph_builder._create_screen_states(session)
embeddings = graph_builder._compute_embeddings(states)
clusters = graph_builder._detect_patterns(embeddings, states)
for cluster_id, indices in clusters.items():
assert len(indices) == 3
def test_insufficient_data_returns_empty(self, graph_builder):
"""Moins de min_pattern_repetitions screenshots → pas de clusters."""
embeddings = [np.random.randn(512).astype(np.float32) for _ in range(2)]
clusters = graph_builder._detect_patterns(embeddings, [None, None])
assert clusters == {}
class TestWorkflowConstruction:
"""Tests du pipeline complet : RawSession → Workflow."""
def test_produces_valid_workflow(
self, synthetic_session, graph_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
workflow = graph_builder.build_from_session(session, "Test Login Workflow")
assert isinstance(workflow, Workflow)
assert workflow.name == "Test Login Workflow"
def test_workflow_has_two_nodes(
self, synthetic_session, graph_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
workflow = graph_builder.build_from_session(session)
assert len(workflow.nodes) == 2
def test_workflow_has_edges(
self, synthetic_session, graph_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
workflow = graph_builder.build_from_session(session)
assert len(workflow.edges) >= 1
def test_nodes_have_screen_templates(
self, synthetic_session, graph_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
workflow = graph_builder.build_from_session(session)
for node in workflow.nodes:
tmpl = node.template
assert tmpl is not None
assert tmpl.embedding is not None
assert tmpl.embedding.min_cosine_similarity > 0
assert tmpl.embedding.sample_count >= 3
# Vecteur prototype stocké dans metadata
assert "_prototype_vector" in node.metadata
assert len(node.metadata["_prototype_vector"]) == 512
assert node.metadata.get("observation_count", 0) >= 3
def test_nodes_have_window_title_pattern(
self, synthetic_session, graph_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
workflow = graph_builder.build_from_session(session)
titles = {
node.template.window.title_pattern
for node in workflow.nodes
if node.template.window and node.template.window.title_pattern
}
assert "Login Page" in titles or "Dashboard" in titles
def test_edges_have_actions(
self, synthetic_session, graph_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
workflow = graph_builder.build_from_session(session)
for edge in workflow.edges:
assert edge.from_node != edge.to_node
assert edge.action is not None
assert edge.action.type == "mouse_click"
assert edge.action.target is not None
def test_edge_execution_counts(
self, synthetic_session, graph_builder, monkeypatch
):
"""Vérifier que les compteurs de transitions sont corrects."""
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
workflow = graph_builder.build_from_session(session)
total_transitions = sum(
edge.stats.execution_count for edge in workflow.edges
)
# Séquence A,B,A,B,A,B → 5 transitions (A→B: 3, B→A: 2)
assert total_transitions == 5
def test_entry_nodes_set(
self, synthetic_session, graph_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
workflow = graph_builder.build_from_session(session)
assert len(workflow.entry_nodes) == 1
class TestQualityValidation:
"""Tests de la validation de qualité intégrée au pipeline."""
def test_quality_report_generated(
self, synthetic_session, mock_embedding_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
builder = GraphBuilder(
embedding_builder=mock_embedding_builder,
min_pattern_repetitions=3,
enable_quality_validation=True,
enable_ui_enrichment=False,
)
workflow = builder.build_from_session(session)
assert workflow.metadata is not None
assert "quality_report" in workflow.metadata
report = workflow.metadata["quality_report"]
assert "overall_score" in report
assert "is_production_ready" in report
def test_quality_sets_learning_state(
self, synthetic_session, mock_embedding_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
builder = GraphBuilder(
embedding_builder=mock_embedding_builder,
min_pattern_repetitions=3,
enable_quality_validation=True,
enable_ui_enrichment=False,
)
workflow = builder.build_from_session(session)
# learning_state doit être défini selon la qualité
assert workflow.learning_state in [
"OBSERVATION", "AUTO_CANDIDATE",
]
class TestEdgeCases:
"""Tests des cas limites."""
def test_empty_session_raises(self, mock_embedding_builder):
session = RawSession(
session_id="empty",
agent_version="test",
environment={},
user={},
context={},
started_at=datetime.now(),
)
builder = GraphBuilder(
embedding_builder=mock_embedding_builder,
enable_quality_validation=False,
enable_ui_enrichment=False,
)
with pytest.raises(ValueError, match="no screenshots"):
builder.build_from_session(session)
def test_single_screen_type_no_edges(
self, mock_embedding_builder, tmp_path, monkeypatch
):
"""Une seule fenêtre → 1 cluster, pas d'edges."""
session_id = "single_screen"
screens_dir = (
tmp_path / "data" / "training" / "sessions"
/ session_id / session_id / "screenshots"
)
screens_dir.mkdir(parents=True)
monkeypatch.chdir(tmp_path)
screenshots = []
events = []
for i in range(4):
ts = datetime(2026, 3, 10, 10, 0, i)
img = Image.new("RGB", (100, 100), (100, 100, 100))
fname = f"screen_{i:03d}.png"
img.save(str(screens_dir / fname))
screenshots.append(Screenshot(
screenshot_id=f"ss_{i}",
relative_path=f"screenshots/{fname}",
captured_at=ts.isoformat(),
))
events.append(Event(
t=float(i),
type="mouse_click",
window=RawWindowContext(title="Login Page", app_name="app"),
screenshot_id=f"ss_{i}",
data={"button": "left", "pos": [100, 100]},
))
session = RawSession(
session_id=session_id,
agent_version="test",
environment={"screen": {"primary_resolution": [1920, 1080]}},
user={"id": "user"},
context={},
started_at=datetime(2026, 3, 10, 10, 0, 0),
events=events,
screenshots=screenshots,
)
builder = GraphBuilder(
embedding_builder=mock_embedding_builder,
min_pattern_repetitions=3,
enable_quality_validation=False,
enable_ui_enrichment=False,
)
workflow = builder.build_from_session(session)
# Tous les états mappent au même cluster → pas de transition
assert len(workflow.edges) == 0
def test_serialization_roundtrip(
self, synthetic_session, graph_builder, monkeypatch, tmp_path
):
"""Le Workflow construit peut être sérialisé en JSON."""
session, sess_tmp = synthetic_session
monkeypatch.chdir(sess_tmp)
workflow = graph_builder.build_from_session(session)
# to_json retourne un string JSON, to_dict retourne un dict
json_dict = workflow.to_dict()
assert json_dict["name"] is not None
assert len(json_dict["nodes"]) == 2