Refonte majeure du système Agent Chat et ajout de nombreux modules : - Chat unifié : suppression du dual Workflows/Agent Libre, tout passe par /api/chat avec résolution en 3 niveaux (workflow → geste → "montre-moi") - GestureCatalog : 38 raccourcis clavier universels Windows avec matching sémantique, substitution automatique dans les replays, et endpoint /api/gestures - Mode Copilot : exécution pas-à-pas des workflows avec validation humaine via WebSocket (approve/skip/abort) avant chaque action - Léa UI (agent_v0/lea_ui/) : interface PyQt5 pour Windows avec overlay transparent pour feedback visuel pendant le replay - Data Extraction (core/extraction/) : moteur d'extraction visuelle de données (OCR + VLM → SQLite), avec schémas YAML et export CSV/Excel - ReplayVerifier (agent_v0/server_v1/) : vérification post-action par comparaison de screenshots, avec logique de retry (max 3) - IntentParser durci : meilleur fallback regex, type GREETING, patterns améliorés - Dashboard : nouvelles pages gestures, streaming, extractions - Tests : 63 tests GestureCatalog, 47 tests extraction, corrections tests existants - Dépréciation : /api/agent/plan et /api/agent/execute retournent HTTP 410, suppression du code hardcodé _plan_to_replay_actions Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
479 lines
16 KiB
Python
479 lines
16 KiB
Python
"""
|
||
tests/test_pipeline_e2e.py — Phase 0, Tâche P0-5
|
||
|
||
Test end-to-end du pipeline complet :
|
||
RawSession → ScreenStates → Embeddings → Clustering → Workflow (nodes + edges)
|
||
|
||
Utilise des embeddings déterministes (mocks) pour valider la logique du pipeline
|
||
sans dépendre d'OpenCLIP ou d'un moteur OCR.
|
||
|
||
Scénario simulé :
|
||
- 2 écrans distincts ("Login Page" et "Dashboard")
|
||
- 3 cycles de navigation Login→Dashboard
|
||
- DBSCAN doit trouver 2 clusters, produire 2 nodes et 2 edges
|
||
"""
|
||
|
||
import pytest
|
||
import numpy as np
|
||
from datetime import datetime, timedelta
|
||
from unittest.mock import MagicMock
|
||
from PIL import Image
|
||
|
||
from core.models.raw_session import RawSession, Event, Screenshot, RawWindowContext
|
||
from core.models.workflow_graph import Workflow
|
||
from core.graph.graph_builder import GraphBuilder
|
||
|
||
|
||
# ======================================================================
|
||
# Helpers
|
||
# ======================================================================
|
||
|
||
def _make_vector(cluster_id: int, seed: int, dim: int = 512) -> np.ndarray:
|
||
"""
|
||
Crée un vecteur déterministe pour un cluster donné.
|
||
|
||
Cluster 0 : énergie dans la première moitié du vecteur
|
||
Cluster 1 : énergie dans la seconde moitié
|
||
→ distance cosinus inter-cluster ≈ 1.0, intra-cluster ≈ 0.01
|
||
"""
|
||
base = np.zeros(dim, dtype=np.float32)
|
||
if cluster_id == 0:
|
||
base[: dim // 2] = 1.0
|
||
else:
|
||
base[dim // 2 :] = 1.0
|
||
|
||
rng = np.random.RandomState(seed)
|
||
noise = rng.randn(dim).astype(np.float32) * 0.01
|
||
vector = base + noise
|
||
return vector / np.linalg.norm(vector)
|
||
|
||
|
||
# ======================================================================
|
||
# Fixtures
|
||
# ======================================================================
|
||
|
||
@pytest.fixture
|
||
def synthetic_session(tmp_path):
|
||
"""
|
||
RawSession synthétique : 2 types d'écran × 3 cycles = 6 screenshots.
|
||
|
||
Séquence : Login, Dashboard, Login, Dashboard, Login, Dashboard
|
||
Transitions attendues : Login→Dashboard (×3), Dashboard→Login (×2)
|
||
"""
|
||
session_id = "test_e2e_session"
|
||
|
||
# Créer les screenshots sur disque (chemin attendu par _create_screen_states)
|
||
screens_dir = (
|
||
tmp_path / "data" / "training" / "sessions"
|
||
/ session_id / session_id / "screenshots"
|
||
)
|
||
screens_dir.mkdir(parents=True)
|
||
|
||
screenshots = []
|
||
events = []
|
||
|
||
screen_defs = [
|
||
("Login Page", "firefox", (200, 50, 50)), # Rouge
|
||
("Dashboard", "firefox", (50, 50, 200)), # Bleu
|
||
]
|
||
|
||
for cycle in range(3):
|
||
for screen_idx, (title, app, color) in enumerate(screen_defs):
|
||
i = cycle * 2 + screen_idx
|
||
ts = datetime(2026, 3, 10, 10, 0, 0) + timedelta(seconds=i * 2)
|
||
|
||
# Screenshot réel sur disque
|
||
img = Image.new("RGB", (100, 100), color)
|
||
filename = f"screen_{i:03d}.png"
|
||
img.save(str(screens_dir / filename))
|
||
|
||
screenshots.append(Screenshot(
|
||
screenshot_id=f"ss_{i:03d}",
|
||
relative_path=f"screenshots/{filename}",
|
||
captured_at=ts.isoformat(),
|
||
))
|
||
|
||
events.append(Event(
|
||
t=float(i * 2),
|
||
type="mouse_click",
|
||
window=RawWindowContext(title=title, app_name=app),
|
||
screenshot_id=f"ss_{i:03d}",
|
||
data={"button": "left", "pos": [500, 300]},
|
||
))
|
||
|
||
session = RawSession(
|
||
session_id=session_id,
|
||
agent_version="test_1.0",
|
||
environment={
|
||
"screen": {"primary_resolution": [1920, 1080]},
|
||
"os": "linux",
|
||
},
|
||
user={"id": "test_user"},
|
||
context={"workflow": "test", "tags": ["e2e"]},
|
||
started_at=datetime(2026, 3, 10, 10, 0, 0),
|
||
ended_at=datetime(2026, 3, 10, 10, 0, 12),
|
||
events=events,
|
||
screenshots=screenshots,
|
||
)
|
||
|
||
return session, tmp_path
|
||
|
||
|
||
@pytest.fixture
|
||
def mock_embedding_builder():
|
||
"""
|
||
Mock de StateEmbeddingBuilder retournant des embeddings déterministes
|
||
basés sur le titre de fenêtre du ScreenState.
|
||
"""
|
||
builder = MagicMock()
|
||
|
||
def build_side_effect(screen_state, *args, **kwargs):
|
||
title = screen_state.window.window_title
|
||
cluster_id = 0 if "Login" in title else 1
|
||
seed = hash(screen_state.screen_state_id) % (2**31)
|
||
vector = _make_vector(cluster_id, seed)
|
||
|
||
embedding_mock = MagicMock()
|
||
embedding_mock.get_vector.return_value = vector
|
||
return embedding_mock
|
||
|
||
builder.build.side_effect = build_side_effect
|
||
return builder
|
||
|
||
|
||
@pytest.fixture
|
||
def graph_builder(mock_embedding_builder):
|
||
"""GraphBuilder configuré pour le test (validation qualité désactivée)."""
|
||
return GraphBuilder(
|
||
embedding_builder=mock_embedding_builder,
|
||
min_pattern_repetitions=3,
|
||
clustering_eps=0.15,
|
||
clustering_min_samples=2,
|
||
enable_quality_validation=False,
|
||
)
|
||
|
||
|
||
# ======================================================================
|
||
# Tests
|
||
# ======================================================================
|
||
|
||
class TestScreenStatesCreation:
|
||
"""Tests de _create_screen_states : RawSession → List[ScreenState]."""
|
||
|
||
def test_creates_correct_number_of_states(
|
||
self, synthetic_session, graph_builder, monkeypatch
|
||
):
|
||
session, tmp_path = synthetic_session
|
||
monkeypatch.chdir(tmp_path)
|
||
|
||
states = graph_builder._create_screen_states(session)
|
||
assert len(states) == 6
|
||
|
||
def test_window_titles_alternate(
|
||
self, synthetic_session, graph_builder, monkeypatch
|
||
):
|
||
session, tmp_path = synthetic_session
|
||
monkeypatch.chdir(tmp_path)
|
||
|
||
states = graph_builder._create_screen_states(session)
|
||
for i, state in enumerate(states):
|
||
expected = "Login Page" if i % 2 == 0 else "Dashboard"
|
||
assert state.window.window_title == expected
|
||
|
||
def test_metadata_contains_event_info(
|
||
self, synthetic_session, graph_builder, monkeypatch
|
||
):
|
||
session, tmp_path = synthetic_session
|
||
monkeypatch.chdir(tmp_path)
|
||
|
||
states = graph_builder._create_screen_states(session)
|
||
for state in states:
|
||
assert state.metadata.get("event_type") == "mouse_click"
|
||
assert state.session_id == session.session_id
|
||
|
||
def test_screenshot_files_detected(
|
||
self, synthetic_session, graph_builder, monkeypatch
|
||
):
|
||
"""Les screenshots existent sur disque et file_size_bytes > 0."""
|
||
session, tmp_path = synthetic_session
|
||
monkeypatch.chdir(tmp_path)
|
||
|
||
states = graph_builder._create_screen_states(session)
|
||
for state in states:
|
||
assert state.raw.file_size_bytes > 0
|
||
|
||
|
||
class TestClustering:
|
||
"""Tests du clustering DBSCAN : embeddings → clusters."""
|
||
|
||
def test_detects_two_clusters(
|
||
self, synthetic_session, graph_builder, monkeypatch
|
||
):
|
||
session, tmp_path = synthetic_session
|
||
monkeypatch.chdir(tmp_path)
|
||
|
||
states = graph_builder._create_screen_states(session)
|
||
embeddings = graph_builder._compute_embeddings(states)
|
||
clusters = graph_builder._detect_patterns(embeddings, states)
|
||
|
||
assert len(clusters) == 2
|
||
|
||
def test_each_cluster_has_three_members(
|
||
self, synthetic_session, graph_builder, monkeypatch
|
||
):
|
||
session, tmp_path = synthetic_session
|
||
monkeypatch.chdir(tmp_path)
|
||
|
||
states = graph_builder._create_screen_states(session)
|
||
embeddings = graph_builder._compute_embeddings(states)
|
||
clusters = graph_builder._detect_patterns(embeddings, states)
|
||
|
||
for cluster_id, indices in clusters.items():
|
||
assert len(indices) == 3
|
||
|
||
def test_insufficient_data_returns_empty(self, graph_builder):
|
||
"""Moins de min_pattern_repetitions screenshots → pas de clusters."""
|
||
embeddings = [np.random.randn(512).astype(np.float32) for _ in range(2)]
|
||
clusters = graph_builder._detect_patterns(embeddings, [None, None])
|
||
assert clusters == {}
|
||
|
||
|
||
class TestWorkflowConstruction:
|
||
"""Tests du pipeline complet : RawSession → Workflow."""
|
||
|
||
def test_produces_valid_workflow(
|
||
self, synthetic_session, graph_builder, monkeypatch
|
||
):
|
||
session, tmp_path = synthetic_session
|
||
monkeypatch.chdir(tmp_path)
|
||
|
||
workflow = graph_builder.build_from_session(session, "Test Login Workflow")
|
||
|
||
assert isinstance(workflow, Workflow)
|
||
assert workflow.name == "Test Login Workflow"
|
||
|
||
def test_workflow_has_two_nodes(
|
||
self, synthetic_session, graph_builder, monkeypatch
|
||
):
|
||
session, tmp_path = synthetic_session
|
||
monkeypatch.chdir(tmp_path)
|
||
|
||
workflow = graph_builder.build_from_session(session)
|
||
assert len(workflow.nodes) == 2
|
||
|
||
def test_workflow_has_edges(
|
||
self, synthetic_session, graph_builder, monkeypatch
|
||
):
|
||
session, tmp_path = synthetic_session
|
||
monkeypatch.chdir(tmp_path)
|
||
|
||
workflow = graph_builder.build_from_session(session)
|
||
assert len(workflow.edges) >= 1
|
||
|
||
def test_nodes_have_screen_templates(
|
||
self, synthetic_session, graph_builder, monkeypatch
|
||
):
|
||
session, tmp_path = synthetic_session
|
||
monkeypatch.chdir(tmp_path)
|
||
|
||
workflow = graph_builder.build_from_session(session)
|
||
|
||
for node in workflow.nodes:
|
||
tmpl = node.template
|
||
assert tmpl is not None
|
||
assert tmpl.embedding is not None
|
||
assert tmpl.embedding.min_cosine_similarity > 0
|
||
assert tmpl.embedding.sample_count >= 3
|
||
# Vecteur prototype stocké dans metadata
|
||
assert "_prototype_vector" in node.metadata
|
||
assert len(node.metadata["_prototype_vector"]) == 512
|
||
assert node.metadata.get("observation_count", 0) >= 3
|
||
|
||
def test_nodes_have_window_title_pattern(
|
||
self, synthetic_session, graph_builder, monkeypatch
|
||
):
|
||
session, tmp_path = synthetic_session
|
||
monkeypatch.chdir(tmp_path)
|
||
|
||
workflow = graph_builder.build_from_session(session)
|
||
|
||
titles = {
|
||
node.template.window.title_pattern
|
||
for node in workflow.nodes
|
||
if node.template.window and node.template.window.title_pattern
|
||
}
|
||
assert "Login Page" in titles or "Dashboard" in titles
|
||
|
||
def test_edges_have_actions(
|
||
self, synthetic_session, graph_builder, monkeypatch
|
||
):
|
||
session, tmp_path = synthetic_session
|
||
monkeypatch.chdir(tmp_path)
|
||
|
||
workflow = graph_builder.build_from_session(session)
|
||
|
||
for edge in workflow.edges:
|
||
assert edge.from_node != edge.to_node
|
||
assert edge.action is not None
|
||
assert edge.action.type == "mouse_click"
|
||
assert edge.action.target is not None
|
||
|
||
def test_edge_execution_counts(
|
||
self, synthetic_session, graph_builder, monkeypatch
|
||
):
|
||
"""Vérifier que les compteurs de transitions sont corrects."""
|
||
session, tmp_path = synthetic_session
|
||
monkeypatch.chdir(tmp_path)
|
||
|
||
workflow = graph_builder.build_from_session(session)
|
||
|
||
total_transitions = sum(
|
||
edge.stats.execution_count for edge in workflow.edges
|
||
)
|
||
# Séquence A,B,A,B,A,B → 5 transitions (A→B: 3, B→A: 2)
|
||
assert total_transitions == 5
|
||
|
||
def test_entry_nodes_set(
|
||
self, synthetic_session, graph_builder, monkeypatch
|
||
):
|
||
session, tmp_path = synthetic_session
|
||
monkeypatch.chdir(tmp_path)
|
||
|
||
workflow = graph_builder.build_from_session(session)
|
||
assert len(workflow.entry_nodes) == 1
|
||
|
||
|
||
class TestQualityValidation:
|
||
"""Tests de la validation de qualité intégrée au pipeline."""
|
||
|
||
def test_quality_report_generated(
|
||
self, synthetic_session, mock_embedding_builder, monkeypatch
|
||
):
|
||
session, tmp_path = synthetic_session
|
||
monkeypatch.chdir(tmp_path)
|
||
|
||
builder = GraphBuilder(
|
||
embedding_builder=mock_embedding_builder,
|
||
min_pattern_repetitions=3,
|
||
enable_quality_validation=True,
|
||
)
|
||
|
||
workflow = builder.build_from_session(session)
|
||
|
||
assert workflow.metadata is not None
|
||
assert "quality_report" in workflow.metadata
|
||
|
||
report = workflow.metadata["quality_report"]
|
||
assert "overall_score" in report
|
||
assert "is_production_ready" in report
|
||
|
||
def test_quality_sets_learning_state(
|
||
self, synthetic_session, mock_embedding_builder, monkeypatch
|
||
):
|
||
session, tmp_path = synthetic_session
|
||
monkeypatch.chdir(tmp_path)
|
||
|
||
builder = GraphBuilder(
|
||
embedding_builder=mock_embedding_builder,
|
||
min_pattern_repetitions=3,
|
||
enable_quality_validation=True,
|
||
)
|
||
|
||
workflow = builder.build_from_session(session)
|
||
|
||
# learning_state doit être défini selon la qualité
|
||
assert workflow.learning_state in [
|
||
"OBSERVATION", "AUTO_CANDIDATE",
|
||
]
|
||
|
||
|
||
class TestEdgeCases:
|
||
"""Tests des cas limites."""
|
||
|
||
def test_empty_session_raises(self, mock_embedding_builder):
|
||
session = RawSession(
|
||
session_id="empty",
|
||
agent_version="test",
|
||
environment={},
|
||
user={},
|
||
context={},
|
||
started_at=datetime.now(),
|
||
)
|
||
|
||
builder = GraphBuilder(
|
||
embedding_builder=mock_embedding_builder,
|
||
enable_quality_validation=False,
|
||
)
|
||
|
||
with pytest.raises(ValueError, match="no screenshots"):
|
||
builder.build_from_session(session)
|
||
|
||
def test_single_screen_type_no_edges(
|
||
self, mock_embedding_builder, tmp_path, monkeypatch
|
||
):
|
||
"""Une seule fenêtre → 1 cluster, pas d'edges."""
|
||
session_id = "single_screen"
|
||
screens_dir = (
|
||
tmp_path / "data" / "training" / "sessions"
|
||
/ session_id / session_id / "screenshots"
|
||
)
|
||
screens_dir.mkdir(parents=True)
|
||
monkeypatch.chdir(tmp_path)
|
||
|
||
screenshots = []
|
||
events = []
|
||
for i in range(4):
|
||
ts = datetime(2026, 3, 10, 10, 0, i)
|
||
img = Image.new("RGB", (100, 100), (100, 100, 100))
|
||
fname = f"screen_{i:03d}.png"
|
||
img.save(str(screens_dir / fname))
|
||
|
||
screenshots.append(Screenshot(
|
||
screenshot_id=f"ss_{i}",
|
||
relative_path=f"screenshots/{fname}",
|
||
captured_at=ts.isoformat(),
|
||
))
|
||
events.append(Event(
|
||
t=float(i),
|
||
type="mouse_click",
|
||
window=RawWindowContext(title="Login Page", app_name="app"),
|
||
screenshot_id=f"ss_{i}",
|
||
data={"button": "left", "pos": [100, 100]},
|
||
))
|
||
|
||
session = RawSession(
|
||
session_id=session_id,
|
||
agent_version="test",
|
||
environment={"screen": {"primary_resolution": [1920, 1080]}},
|
||
user={"id": "user"},
|
||
context={},
|
||
started_at=datetime(2026, 3, 10, 10, 0, 0),
|
||
events=events,
|
||
screenshots=screenshots,
|
||
)
|
||
|
||
builder = GraphBuilder(
|
||
embedding_builder=mock_embedding_builder,
|
||
min_pattern_repetitions=3,
|
||
enable_quality_validation=False,
|
||
)
|
||
|
||
workflow = builder.build_from_session(session)
|
||
|
||
# Tous les états mappent au même cluster → pas de transition
|
||
assert len(workflow.edges) == 0
|
||
|
||
def test_serialization_roundtrip(
|
||
self, synthetic_session, graph_builder, monkeypatch, tmp_path
|
||
):
|
||
"""Le Workflow construit peut être sérialisé en JSON."""
|
||
session, sess_tmp = synthetic_session
|
||
monkeypatch.chdir(sess_tmp)
|
||
|
||
workflow = graph_builder.build_from_session(session)
|
||
|
||
# to_json retourne un string JSON, to_dict retourne un dict
|
||
json_dict = workflow.to_dict()
|
||
assert json_dict["name"] is not None
|
||
assert len(json_dict["nodes"]) == 2
|