feat: chat unifié, GestureCatalog, Copilot, Léa UI, extraction données, vérification replay
Refonte majeure du système Agent Chat et ajout de nombreux modules : - Chat unifié : suppression du dual Workflows/Agent Libre, tout passe par /api/chat avec résolution en 3 niveaux (workflow → geste → "montre-moi") - GestureCatalog : 38 raccourcis clavier universels Windows avec matching sémantique, substitution automatique dans les replays, et endpoint /api/gestures - Mode Copilot : exécution pas-à-pas des workflows avec validation humaine via WebSocket (approve/skip/abort) avant chaque action - Léa UI (agent_v0/lea_ui/) : interface PyQt5 pour Windows avec overlay transparent pour feedback visuel pendant le replay - Data Extraction (core/extraction/) : moteur d'extraction visuelle de données (OCR + VLM → SQLite), avec schémas YAML et export CSV/Excel - ReplayVerifier (agent_v0/server_v1/) : vérification post-action par comparaison de screenshots, avec logique de retry (max 3) - IntentParser durci : meilleur fallback regex, type GREETING, patterns améliorés - Dashboard : nouvelles pages gestures, streaming, extractions - Tests : 63 tests GestureCatalog, 47 tests extraction, corrections tests existants - Dépréciation : /api/agent/plan et /api/agent/execute retournent HTTP 410, suppression du code hardcodé _plan_to_replay_actions Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
478
tests/test_pipeline_e2e.py
Normal file
478
tests/test_pipeline_e2e.py
Normal file
@@ -0,0 +1,478 @@
|
||||
"""
|
||||
tests/test_pipeline_e2e.py — Phase 0, Tâche P0-5
|
||||
|
||||
Test end-to-end du pipeline complet :
|
||||
RawSession → ScreenStates → Embeddings → Clustering → Workflow (nodes + edges)
|
||||
|
||||
Utilise des embeddings déterministes (mocks) pour valider la logique du pipeline
|
||||
sans dépendre d'OpenCLIP ou d'un moteur OCR.
|
||||
|
||||
Scénario simulé :
|
||||
- 2 écrans distincts ("Login Page" et "Dashboard")
|
||||
- 3 cycles de navigation Login→Dashboard
|
||||
- DBSCAN doit trouver 2 clusters, produire 2 nodes et 2 edges
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
from datetime import datetime, timedelta
|
||||
from unittest.mock import MagicMock
|
||||
from PIL import Image
|
||||
|
||||
from core.models.raw_session import RawSession, Event, Screenshot, RawWindowContext
|
||||
from core.models.workflow_graph import Workflow
|
||||
from core.graph.graph_builder import GraphBuilder
|
||||
|
||||
|
||||
# ======================================================================
|
||||
# Helpers
|
||||
# ======================================================================
|
||||
|
||||
def _make_vector(cluster_id: int, seed: int, dim: int = 512) -> np.ndarray:
|
||||
"""
|
||||
Crée un vecteur déterministe pour un cluster donné.
|
||||
|
||||
Cluster 0 : énergie dans la première moitié du vecteur
|
||||
Cluster 1 : énergie dans la seconde moitié
|
||||
→ distance cosinus inter-cluster ≈ 1.0, intra-cluster ≈ 0.01
|
||||
"""
|
||||
base = np.zeros(dim, dtype=np.float32)
|
||||
if cluster_id == 0:
|
||||
base[: dim // 2] = 1.0
|
||||
else:
|
||||
base[dim // 2 :] = 1.0
|
||||
|
||||
rng = np.random.RandomState(seed)
|
||||
noise = rng.randn(dim).astype(np.float32) * 0.01
|
||||
vector = base + noise
|
||||
return vector / np.linalg.norm(vector)
|
||||
|
||||
|
||||
# ======================================================================
|
||||
# Fixtures
|
||||
# ======================================================================
|
||||
|
||||
@pytest.fixture
|
||||
def synthetic_session(tmp_path):
|
||||
"""
|
||||
RawSession synthétique : 2 types d'écran × 3 cycles = 6 screenshots.
|
||||
|
||||
Séquence : Login, Dashboard, Login, Dashboard, Login, Dashboard
|
||||
Transitions attendues : Login→Dashboard (×3), Dashboard→Login (×2)
|
||||
"""
|
||||
session_id = "test_e2e_session"
|
||||
|
||||
# Créer les screenshots sur disque (chemin attendu par _create_screen_states)
|
||||
screens_dir = (
|
||||
tmp_path / "data" / "training" / "sessions"
|
||||
/ session_id / session_id / "screenshots"
|
||||
)
|
||||
screens_dir.mkdir(parents=True)
|
||||
|
||||
screenshots = []
|
||||
events = []
|
||||
|
||||
screen_defs = [
|
||||
("Login Page", "firefox", (200, 50, 50)), # Rouge
|
||||
("Dashboard", "firefox", (50, 50, 200)), # Bleu
|
||||
]
|
||||
|
||||
for cycle in range(3):
|
||||
for screen_idx, (title, app, color) in enumerate(screen_defs):
|
||||
i = cycle * 2 + screen_idx
|
||||
ts = datetime(2026, 3, 10, 10, 0, 0) + timedelta(seconds=i * 2)
|
||||
|
||||
# Screenshot réel sur disque
|
||||
img = Image.new("RGB", (100, 100), color)
|
||||
filename = f"screen_{i:03d}.png"
|
||||
img.save(str(screens_dir / filename))
|
||||
|
||||
screenshots.append(Screenshot(
|
||||
screenshot_id=f"ss_{i:03d}",
|
||||
relative_path=f"screenshots/{filename}",
|
||||
captured_at=ts.isoformat(),
|
||||
))
|
||||
|
||||
events.append(Event(
|
||||
t=float(i * 2),
|
||||
type="mouse_click",
|
||||
window=RawWindowContext(title=title, app_name=app),
|
||||
screenshot_id=f"ss_{i:03d}",
|
||||
data={"button": "left", "pos": [500, 300]},
|
||||
))
|
||||
|
||||
session = RawSession(
|
||||
session_id=session_id,
|
||||
agent_version="test_1.0",
|
||||
environment={
|
||||
"screen": {"primary_resolution": [1920, 1080]},
|
||||
"os": "linux",
|
||||
},
|
||||
user={"id": "test_user"},
|
||||
context={"workflow": "test", "tags": ["e2e"]},
|
||||
started_at=datetime(2026, 3, 10, 10, 0, 0),
|
||||
ended_at=datetime(2026, 3, 10, 10, 0, 12),
|
||||
events=events,
|
||||
screenshots=screenshots,
|
||||
)
|
||||
|
||||
return session, tmp_path
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_embedding_builder():
|
||||
"""
|
||||
Mock de StateEmbeddingBuilder retournant des embeddings déterministes
|
||||
basés sur le titre de fenêtre du ScreenState.
|
||||
"""
|
||||
builder = MagicMock()
|
||||
|
||||
def build_side_effect(screen_state, *args, **kwargs):
|
||||
title = screen_state.window.window_title
|
||||
cluster_id = 0 if "Login" in title else 1
|
||||
seed = hash(screen_state.screen_state_id) % (2**31)
|
||||
vector = _make_vector(cluster_id, seed)
|
||||
|
||||
embedding_mock = MagicMock()
|
||||
embedding_mock.get_vector.return_value = vector
|
||||
return embedding_mock
|
||||
|
||||
builder.build.side_effect = build_side_effect
|
||||
return builder
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def graph_builder(mock_embedding_builder):
|
||||
"""GraphBuilder configuré pour le test (validation qualité désactivée)."""
|
||||
return GraphBuilder(
|
||||
embedding_builder=mock_embedding_builder,
|
||||
min_pattern_repetitions=3,
|
||||
clustering_eps=0.15,
|
||||
clustering_min_samples=2,
|
||||
enable_quality_validation=False,
|
||||
)
|
||||
|
||||
|
||||
# ======================================================================
|
||||
# Tests
|
||||
# ======================================================================
|
||||
|
||||
class TestScreenStatesCreation:
|
||||
"""Tests de _create_screen_states : RawSession → List[ScreenState]."""
|
||||
|
||||
def test_creates_correct_number_of_states(
|
||||
self, synthetic_session, graph_builder, monkeypatch
|
||||
):
|
||||
session, tmp_path = synthetic_session
|
||||
monkeypatch.chdir(tmp_path)
|
||||
|
||||
states = graph_builder._create_screen_states(session)
|
||||
assert len(states) == 6
|
||||
|
||||
def test_window_titles_alternate(
|
||||
self, synthetic_session, graph_builder, monkeypatch
|
||||
):
|
||||
session, tmp_path = synthetic_session
|
||||
monkeypatch.chdir(tmp_path)
|
||||
|
||||
states = graph_builder._create_screen_states(session)
|
||||
for i, state in enumerate(states):
|
||||
expected = "Login Page" if i % 2 == 0 else "Dashboard"
|
||||
assert state.window.window_title == expected
|
||||
|
||||
def test_metadata_contains_event_info(
|
||||
self, synthetic_session, graph_builder, monkeypatch
|
||||
):
|
||||
session, tmp_path = synthetic_session
|
||||
monkeypatch.chdir(tmp_path)
|
||||
|
||||
states = graph_builder._create_screen_states(session)
|
||||
for state in states:
|
||||
assert state.metadata.get("event_type") == "mouse_click"
|
||||
assert state.session_id == session.session_id
|
||||
|
||||
def test_screenshot_files_detected(
|
||||
self, synthetic_session, graph_builder, monkeypatch
|
||||
):
|
||||
"""Les screenshots existent sur disque et file_size_bytes > 0."""
|
||||
session, tmp_path = synthetic_session
|
||||
monkeypatch.chdir(tmp_path)
|
||||
|
||||
states = graph_builder._create_screen_states(session)
|
||||
for state in states:
|
||||
assert state.raw.file_size_bytes > 0
|
||||
|
||||
|
||||
class TestClustering:
|
||||
"""Tests du clustering DBSCAN : embeddings → clusters."""
|
||||
|
||||
def test_detects_two_clusters(
|
||||
self, synthetic_session, graph_builder, monkeypatch
|
||||
):
|
||||
session, tmp_path = synthetic_session
|
||||
monkeypatch.chdir(tmp_path)
|
||||
|
||||
states = graph_builder._create_screen_states(session)
|
||||
embeddings = graph_builder._compute_embeddings(states)
|
||||
clusters = graph_builder._detect_patterns(embeddings, states)
|
||||
|
||||
assert len(clusters) == 2
|
||||
|
||||
def test_each_cluster_has_three_members(
|
||||
self, synthetic_session, graph_builder, monkeypatch
|
||||
):
|
||||
session, tmp_path = synthetic_session
|
||||
monkeypatch.chdir(tmp_path)
|
||||
|
||||
states = graph_builder._create_screen_states(session)
|
||||
embeddings = graph_builder._compute_embeddings(states)
|
||||
clusters = graph_builder._detect_patterns(embeddings, states)
|
||||
|
||||
for cluster_id, indices in clusters.items():
|
||||
assert len(indices) == 3
|
||||
|
||||
def test_insufficient_data_returns_empty(self, graph_builder):
|
||||
"""Moins de min_pattern_repetitions screenshots → pas de clusters."""
|
||||
embeddings = [np.random.randn(512).astype(np.float32) for _ in range(2)]
|
||||
clusters = graph_builder._detect_patterns(embeddings, [None, None])
|
||||
assert clusters == {}
|
||||
|
||||
|
||||
class TestWorkflowConstruction:
|
||||
"""Tests du pipeline complet : RawSession → Workflow."""
|
||||
|
||||
def test_produces_valid_workflow(
|
||||
self, synthetic_session, graph_builder, monkeypatch
|
||||
):
|
||||
session, tmp_path = synthetic_session
|
||||
monkeypatch.chdir(tmp_path)
|
||||
|
||||
workflow = graph_builder.build_from_session(session, "Test Login Workflow")
|
||||
|
||||
assert isinstance(workflow, Workflow)
|
||||
assert workflow.name == "Test Login Workflow"
|
||||
|
||||
def test_workflow_has_two_nodes(
|
||||
self, synthetic_session, graph_builder, monkeypatch
|
||||
):
|
||||
session, tmp_path = synthetic_session
|
||||
monkeypatch.chdir(tmp_path)
|
||||
|
||||
workflow = graph_builder.build_from_session(session)
|
||||
assert len(workflow.nodes) == 2
|
||||
|
||||
def test_workflow_has_edges(
|
||||
self, synthetic_session, graph_builder, monkeypatch
|
||||
):
|
||||
session, tmp_path = synthetic_session
|
||||
monkeypatch.chdir(tmp_path)
|
||||
|
||||
workflow = graph_builder.build_from_session(session)
|
||||
assert len(workflow.edges) >= 1
|
||||
|
||||
def test_nodes_have_screen_templates(
|
||||
self, synthetic_session, graph_builder, monkeypatch
|
||||
):
|
||||
session, tmp_path = synthetic_session
|
||||
monkeypatch.chdir(tmp_path)
|
||||
|
||||
workflow = graph_builder.build_from_session(session)
|
||||
|
||||
for node in workflow.nodes:
|
||||
tmpl = node.template
|
||||
assert tmpl is not None
|
||||
assert tmpl.embedding is not None
|
||||
assert tmpl.embedding.min_cosine_similarity > 0
|
||||
assert tmpl.embedding.sample_count >= 3
|
||||
# Vecteur prototype stocké dans metadata
|
||||
assert "_prototype_vector" in node.metadata
|
||||
assert len(node.metadata["_prototype_vector"]) == 512
|
||||
assert node.metadata.get("observation_count", 0) >= 3
|
||||
|
||||
def test_nodes_have_window_title_pattern(
|
||||
self, synthetic_session, graph_builder, monkeypatch
|
||||
):
|
||||
session, tmp_path = synthetic_session
|
||||
monkeypatch.chdir(tmp_path)
|
||||
|
||||
workflow = graph_builder.build_from_session(session)
|
||||
|
||||
titles = {
|
||||
node.template.window.title_pattern
|
||||
for node in workflow.nodes
|
||||
if node.template.window and node.template.window.title_pattern
|
||||
}
|
||||
assert "Login Page" in titles or "Dashboard" in titles
|
||||
|
||||
def test_edges_have_actions(
|
||||
self, synthetic_session, graph_builder, monkeypatch
|
||||
):
|
||||
session, tmp_path = synthetic_session
|
||||
monkeypatch.chdir(tmp_path)
|
||||
|
||||
workflow = graph_builder.build_from_session(session)
|
||||
|
||||
for edge in workflow.edges:
|
||||
assert edge.from_node != edge.to_node
|
||||
assert edge.action is not None
|
||||
assert edge.action.type == "mouse_click"
|
||||
assert edge.action.target is not None
|
||||
|
||||
def test_edge_execution_counts(
|
||||
self, synthetic_session, graph_builder, monkeypatch
|
||||
):
|
||||
"""Vérifier que les compteurs de transitions sont corrects."""
|
||||
session, tmp_path = synthetic_session
|
||||
monkeypatch.chdir(tmp_path)
|
||||
|
||||
workflow = graph_builder.build_from_session(session)
|
||||
|
||||
total_transitions = sum(
|
||||
edge.stats.execution_count for edge in workflow.edges
|
||||
)
|
||||
# Séquence A,B,A,B,A,B → 5 transitions (A→B: 3, B→A: 2)
|
||||
assert total_transitions == 5
|
||||
|
||||
def test_entry_nodes_set(
|
||||
self, synthetic_session, graph_builder, monkeypatch
|
||||
):
|
||||
session, tmp_path = synthetic_session
|
||||
monkeypatch.chdir(tmp_path)
|
||||
|
||||
workflow = graph_builder.build_from_session(session)
|
||||
assert len(workflow.entry_nodes) == 1
|
||||
|
||||
|
||||
class TestQualityValidation:
|
||||
"""Tests de la validation de qualité intégrée au pipeline."""
|
||||
|
||||
def test_quality_report_generated(
|
||||
self, synthetic_session, mock_embedding_builder, monkeypatch
|
||||
):
|
||||
session, tmp_path = synthetic_session
|
||||
monkeypatch.chdir(tmp_path)
|
||||
|
||||
builder = GraphBuilder(
|
||||
embedding_builder=mock_embedding_builder,
|
||||
min_pattern_repetitions=3,
|
||||
enable_quality_validation=True,
|
||||
)
|
||||
|
||||
workflow = builder.build_from_session(session)
|
||||
|
||||
assert workflow.metadata is not None
|
||||
assert "quality_report" in workflow.metadata
|
||||
|
||||
report = workflow.metadata["quality_report"]
|
||||
assert "overall_score" in report
|
||||
assert "is_production_ready" in report
|
||||
|
||||
def test_quality_sets_learning_state(
|
||||
self, synthetic_session, mock_embedding_builder, monkeypatch
|
||||
):
|
||||
session, tmp_path = synthetic_session
|
||||
monkeypatch.chdir(tmp_path)
|
||||
|
||||
builder = GraphBuilder(
|
||||
embedding_builder=mock_embedding_builder,
|
||||
min_pattern_repetitions=3,
|
||||
enable_quality_validation=True,
|
||||
)
|
||||
|
||||
workflow = builder.build_from_session(session)
|
||||
|
||||
# learning_state doit être défini selon la qualité
|
||||
assert workflow.learning_state in [
|
||||
"OBSERVATION", "AUTO_CANDIDATE",
|
||||
]
|
||||
|
||||
|
||||
class TestEdgeCases:
|
||||
"""Tests des cas limites."""
|
||||
|
||||
def test_empty_session_raises(self, mock_embedding_builder):
|
||||
session = RawSession(
|
||||
session_id="empty",
|
||||
agent_version="test",
|
||||
environment={},
|
||||
user={},
|
||||
context={},
|
||||
started_at=datetime.now(),
|
||||
)
|
||||
|
||||
builder = GraphBuilder(
|
||||
embedding_builder=mock_embedding_builder,
|
||||
enable_quality_validation=False,
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="no screenshots"):
|
||||
builder.build_from_session(session)
|
||||
|
||||
def test_single_screen_type_no_edges(
|
||||
self, mock_embedding_builder, tmp_path, monkeypatch
|
||||
):
|
||||
"""Une seule fenêtre → 1 cluster, pas d'edges."""
|
||||
session_id = "single_screen"
|
||||
screens_dir = (
|
||||
tmp_path / "data" / "training" / "sessions"
|
||||
/ session_id / session_id / "screenshots"
|
||||
)
|
||||
screens_dir.mkdir(parents=True)
|
||||
monkeypatch.chdir(tmp_path)
|
||||
|
||||
screenshots = []
|
||||
events = []
|
||||
for i in range(4):
|
||||
ts = datetime(2026, 3, 10, 10, 0, i)
|
||||
img = Image.new("RGB", (100, 100), (100, 100, 100))
|
||||
fname = f"screen_{i:03d}.png"
|
||||
img.save(str(screens_dir / fname))
|
||||
|
||||
screenshots.append(Screenshot(
|
||||
screenshot_id=f"ss_{i}",
|
||||
relative_path=f"screenshots/{fname}",
|
||||
captured_at=ts.isoformat(),
|
||||
))
|
||||
events.append(Event(
|
||||
t=float(i),
|
||||
type="mouse_click",
|
||||
window=RawWindowContext(title="Login Page", app_name="app"),
|
||||
screenshot_id=f"ss_{i}",
|
||||
data={"button": "left", "pos": [100, 100]},
|
||||
))
|
||||
|
||||
session = RawSession(
|
||||
session_id=session_id,
|
||||
agent_version="test",
|
||||
environment={"screen": {"primary_resolution": [1920, 1080]}},
|
||||
user={"id": "user"},
|
||||
context={},
|
||||
started_at=datetime(2026, 3, 10, 10, 0, 0),
|
||||
events=events,
|
||||
screenshots=screenshots,
|
||||
)
|
||||
|
||||
builder = GraphBuilder(
|
||||
embedding_builder=mock_embedding_builder,
|
||||
min_pattern_repetitions=3,
|
||||
enable_quality_validation=False,
|
||||
)
|
||||
|
||||
workflow = builder.build_from_session(session)
|
||||
|
||||
# Tous les états mappent au même cluster → pas de transition
|
||||
assert len(workflow.edges) == 0
|
||||
|
||||
def test_serialization_roundtrip(
|
||||
self, synthetic_session, graph_builder, monkeypatch, tmp_path
|
||||
):
|
||||
"""Le Workflow construit peut être sérialisé en JSON."""
|
||||
session, sess_tmp = synthetic_session
|
||||
monkeypatch.chdir(sess_tmp)
|
||||
|
||||
workflow = graph_builder.build_from_session(session)
|
||||
|
||||
# to_json retourne un string JSON, to_dict retourne un dict
|
||||
json_dict = workflow.to_dict()
|
||||
assert json_dict["name"] is not None
|
||||
assert len(json_dict["nodes"]) == 2
|
||||
Reference in New Issue
Block a user