feat: chat unifié, GestureCatalog, Copilot, Léa UI, extraction données, vérification replay

Refonte majeure du système Agent Chat et ajout de nombreux modules :

- Chat unifié : suppression du dual Workflows/Agent Libre, tout passe par /api/chat
  avec résolution en 3 niveaux (workflow → geste → "montre-moi")
- GestureCatalog : 38 raccourcis clavier universels Windows avec matching sémantique,
  substitution automatique dans les replays, et endpoint /api/gestures
- Mode Copilot : exécution pas-à-pas des workflows avec validation humaine via WebSocket
  (approve/skip/abort) avant chaque action
- Léa UI (agent_v0/lea_ui/) : interface PyQt5 pour Windows avec overlay transparent
  pour feedback visuel pendant le replay
- Data Extraction (core/extraction/) : moteur d'extraction visuelle de données
  (OCR + VLM → SQLite), avec schémas YAML et export CSV/Excel
- ReplayVerifier (agent_v0/server_v1/) : vérification post-action par comparaison
  de screenshots, avec logique de retry (max 3)
- IntentParser durci : meilleur fallback regex, type GREETING, patterns améliorés
- Dashboard : nouvelles pages gestures, streaming, extractions
- Tests : 63 tests GestureCatalog, 47 tests extraction, corrections tests existants
- Dépréciation : /api/agent/plan et /api/agent/execute retournent HTTP 410,
  suppression du code hardcodé _plan_to_replay_actions

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Dom
2026-03-15 10:02:09 +01:00
parent 74a1cb4e03
commit cf495dd82f
93 changed files with 12463 additions and 1080 deletions

478
tests/test_pipeline_e2e.py Normal file
View File

@@ -0,0 +1,478 @@
"""
tests/test_pipeline_e2e.py — Phase 0, Tâche P0-5
Test end-to-end du pipeline complet :
RawSession → ScreenStates → Embeddings → Clustering → Workflow (nodes + edges)
Utilise des embeddings déterministes (mocks) pour valider la logique du pipeline
sans dépendre d'OpenCLIP ou d'un moteur OCR.
Scénario simulé :
- 2 écrans distincts ("Login Page" et "Dashboard")
- 3 cycles de navigation Login→Dashboard
- DBSCAN doit trouver 2 clusters, produire 2 nodes et 2 edges
"""
import pytest
import numpy as np
from datetime import datetime, timedelta
from unittest.mock import MagicMock
from PIL import Image
from core.models.raw_session import RawSession, Event, Screenshot, RawWindowContext
from core.models.workflow_graph import Workflow
from core.graph.graph_builder import GraphBuilder
# ======================================================================
# Helpers
# ======================================================================
def _make_vector(cluster_id: int, seed: int, dim: int = 512) -> np.ndarray:
"""
Crée un vecteur déterministe pour un cluster donné.
Cluster 0 : énergie dans la première moitié du vecteur
Cluster 1 : énergie dans la seconde moitié
→ distance cosinus inter-cluster ≈ 1.0, intra-cluster ≈ 0.01
"""
base = np.zeros(dim, dtype=np.float32)
if cluster_id == 0:
base[: dim // 2] = 1.0
else:
base[dim // 2 :] = 1.0
rng = np.random.RandomState(seed)
noise = rng.randn(dim).astype(np.float32) * 0.01
vector = base + noise
return vector / np.linalg.norm(vector)
# ======================================================================
# Fixtures
# ======================================================================
@pytest.fixture
def synthetic_session(tmp_path):
"""
RawSession synthétique : 2 types d'écran × 3 cycles = 6 screenshots.
Séquence : Login, Dashboard, Login, Dashboard, Login, Dashboard
Transitions attendues : Login→Dashboard (×3), Dashboard→Login (×2)
"""
session_id = "test_e2e_session"
# Créer les screenshots sur disque (chemin attendu par _create_screen_states)
screens_dir = (
tmp_path / "data" / "training" / "sessions"
/ session_id / session_id / "screenshots"
)
screens_dir.mkdir(parents=True)
screenshots = []
events = []
screen_defs = [
("Login Page", "firefox", (200, 50, 50)), # Rouge
("Dashboard", "firefox", (50, 50, 200)), # Bleu
]
for cycle in range(3):
for screen_idx, (title, app, color) in enumerate(screen_defs):
i = cycle * 2 + screen_idx
ts = datetime(2026, 3, 10, 10, 0, 0) + timedelta(seconds=i * 2)
# Screenshot réel sur disque
img = Image.new("RGB", (100, 100), color)
filename = f"screen_{i:03d}.png"
img.save(str(screens_dir / filename))
screenshots.append(Screenshot(
screenshot_id=f"ss_{i:03d}",
relative_path=f"screenshots/{filename}",
captured_at=ts.isoformat(),
))
events.append(Event(
t=float(i * 2),
type="mouse_click",
window=RawWindowContext(title=title, app_name=app),
screenshot_id=f"ss_{i:03d}",
data={"button": "left", "pos": [500, 300]},
))
session = RawSession(
session_id=session_id,
agent_version="test_1.0",
environment={
"screen": {"primary_resolution": [1920, 1080]},
"os": "linux",
},
user={"id": "test_user"},
context={"workflow": "test", "tags": ["e2e"]},
started_at=datetime(2026, 3, 10, 10, 0, 0),
ended_at=datetime(2026, 3, 10, 10, 0, 12),
events=events,
screenshots=screenshots,
)
return session, tmp_path
@pytest.fixture
def mock_embedding_builder():
"""
Mock de StateEmbeddingBuilder retournant des embeddings déterministes
basés sur le titre de fenêtre du ScreenState.
"""
builder = MagicMock()
def build_side_effect(screen_state, *args, **kwargs):
title = screen_state.window.window_title
cluster_id = 0 if "Login" in title else 1
seed = hash(screen_state.screen_state_id) % (2**31)
vector = _make_vector(cluster_id, seed)
embedding_mock = MagicMock()
embedding_mock.get_vector.return_value = vector
return embedding_mock
builder.build.side_effect = build_side_effect
return builder
@pytest.fixture
def graph_builder(mock_embedding_builder):
"""GraphBuilder configuré pour le test (validation qualité désactivée)."""
return GraphBuilder(
embedding_builder=mock_embedding_builder,
min_pattern_repetitions=3,
clustering_eps=0.15,
clustering_min_samples=2,
enable_quality_validation=False,
)
# ======================================================================
# Tests
# ======================================================================
class TestScreenStatesCreation:
"""Tests de _create_screen_states : RawSession → List[ScreenState]."""
def test_creates_correct_number_of_states(
self, synthetic_session, graph_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
states = graph_builder._create_screen_states(session)
assert len(states) == 6
def test_window_titles_alternate(
self, synthetic_session, graph_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
states = graph_builder._create_screen_states(session)
for i, state in enumerate(states):
expected = "Login Page" if i % 2 == 0 else "Dashboard"
assert state.window.window_title == expected
def test_metadata_contains_event_info(
self, synthetic_session, graph_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
states = graph_builder._create_screen_states(session)
for state in states:
assert state.metadata.get("event_type") == "mouse_click"
assert state.session_id == session.session_id
def test_screenshot_files_detected(
self, synthetic_session, graph_builder, monkeypatch
):
"""Les screenshots existent sur disque et file_size_bytes > 0."""
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
states = graph_builder._create_screen_states(session)
for state in states:
assert state.raw.file_size_bytes > 0
class TestClustering:
"""Tests du clustering DBSCAN : embeddings → clusters."""
def test_detects_two_clusters(
self, synthetic_session, graph_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
states = graph_builder._create_screen_states(session)
embeddings = graph_builder._compute_embeddings(states)
clusters = graph_builder._detect_patterns(embeddings, states)
assert len(clusters) == 2
def test_each_cluster_has_three_members(
self, synthetic_session, graph_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
states = graph_builder._create_screen_states(session)
embeddings = graph_builder._compute_embeddings(states)
clusters = graph_builder._detect_patterns(embeddings, states)
for cluster_id, indices in clusters.items():
assert len(indices) == 3
def test_insufficient_data_returns_empty(self, graph_builder):
"""Moins de min_pattern_repetitions screenshots → pas de clusters."""
embeddings = [np.random.randn(512).astype(np.float32) for _ in range(2)]
clusters = graph_builder._detect_patterns(embeddings, [None, None])
assert clusters == {}
class TestWorkflowConstruction:
"""Tests du pipeline complet : RawSession → Workflow."""
def test_produces_valid_workflow(
self, synthetic_session, graph_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
workflow = graph_builder.build_from_session(session, "Test Login Workflow")
assert isinstance(workflow, Workflow)
assert workflow.name == "Test Login Workflow"
def test_workflow_has_two_nodes(
self, synthetic_session, graph_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
workflow = graph_builder.build_from_session(session)
assert len(workflow.nodes) == 2
def test_workflow_has_edges(
self, synthetic_session, graph_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
workflow = graph_builder.build_from_session(session)
assert len(workflow.edges) >= 1
def test_nodes_have_screen_templates(
self, synthetic_session, graph_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
workflow = graph_builder.build_from_session(session)
for node in workflow.nodes:
tmpl = node.template
assert tmpl is not None
assert tmpl.embedding is not None
assert tmpl.embedding.min_cosine_similarity > 0
assert tmpl.embedding.sample_count >= 3
# Vecteur prototype stocké dans metadata
assert "_prototype_vector" in node.metadata
assert len(node.metadata["_prototype_vector"]) == 512
assert node.metadata.get("observation_count", 0) >= 3
def test_nodes_have_window_title_pattern(
self, synthetic_session, graph_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
workflow = graph_builder.build_from_session(session)
titles = {
node.template.window.title_pattern
for node in workflow.nodes
if node.template.window and node.template.window.title_pattern
}
assert "Login Page" in titles or "Dashboard" in titles
def test_edges_have_actions(
self, synthetic_session, graph_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
workflow = graph_builder.build_from_session(session)
for edge in workflow.edges:
assert edge.from_node != edge.to_node
assert edge.action is not None
assert edge.action.type == "mouse_click"
assert edge.action.target is not None
def test_edge_execution_counts(
self, synthetic_session, graph_builder, monkeypatch
):
"""Vérifier que les compteurs de transitions sont corrects."""
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
workflow = graph_builder.build_from_session(session)
total_transitions = sum(
edge.stats.execution_count for edge in workflow.edges
)
# Séquence A,B,A,B,A,B → 5 transitions (A→B: 3, B→A: 2)
assert total_transitions == 5
def test_entry_nodes_set(
self, synthetic_session, graph_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
workflow = graph_builder.build_from_session(session)
assert len(workflow.entry_nodes) == 1
class TestQualityValidation:
"""Tests de la validation de qualité intégrée au pipeline."""
def test_quality_report_generated(
self, synthetic_session, mock_embedding_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
builder = GraphBuilder(
embedding_builder=mock_embedding_builder,
min_pattern_repetitions=3,
enable_quality_validation=True,
)
workflow = builder.build_from_session(session)
assert workflow.metadata is not None
assert "quality_report" in workflow.metadata
report = workflow.metadata["quality_report"]
assert "overall_score" in report
assert "is_production_ready" in report
def test_quality_sets_learning_state(
self, synthetic_session, mock_embedding_builder, monkeypatch
):
session, tmp_path = synthetic_session
monkeypatch.chdir(tmp_path)
builder = GraphBuilder(
embedding_builder=mock_embedding_builder,
min_pattern_repetitions=3,
enable_quality_validation=True,
)
workflow = builder.build_from_session(session)
# learning_state doit être défini selon la qualité
assert workflow.learning_state in [
"OBSERVATION", "AUTO_CANDIDATE",
]
class TestEdgeCases:
"""Tests des cas limites."""
def test_empty_session_raises(self, mock_embedding_builder):
session = RawSession(
session_id="empty",
agent_version="test",
environment={},
user={},
context={},
started_at=datetime.now(),
)
builder = GraphBuilder(
embedding_builder=mock_embedding_builder,
enable_quality_validation=False,
)
with pytest.raises(ValueError, match="no screenshots"):
builder.build_from_session(session)
def test_single_screen_type_no_edges(
self, mock_embedding_builder, tmp_path, monkeypatch
):
"""Une seule fenêtre → 1 cluster, pas d'edges."""
session_id = "single_screen"
screens_dir = (
tmp_path / "data" / "training" / "sessions"
/ session_id / session_id / "screenshots"
)
screens_dir.mkdir(parents=True)
monkeypatch.chdir(tmp_path)
screenshots = []
events = []
for i in range(4):
ts = datetime(2026, 3, 10, 10, 0, i)
img = Image.new("RGB", (100, 100), (100, 100, 100))
fname = f"screen_{i:03d}.png"
img.save(str(screens_dir / fname))
screenshots.append(Screenshot(
screenshot_id=f"ss_{i}",
relative_path=f"screenshots/{fname}",
captured_at=ts.isoformat(),
))
events.append(Event(
t=float(i),
type="mouse_click",
window=RawWindowContext(title="Login Page", app_name="app"),
screenshot_id=f"ss_{i}",
data={"button": "left", "pos": [100, 100]},
))
session = RawSession(
session_id=session_id,
agent_version="test",
environment={"screen": {"primary_resolution": [1920, 1080]}},
user={"id": "user"},
context={},
started_at=datetime(2026, 3, 10, 10, 0, 0),
events=events,
screenshots=screenshots,
)
builder = GraphBuilder(
embedding_builder=mock_embedding_builder,
min_pattern_repetitions=3,
enable_quality_validation=False,
)
workflow = builder.build_from_session(session)
# Tous les états mappent au même cluster → pas de transition
assert len(workflow.edges) == 0
def test_serialization_roundtrip(
self, synthetic_session, graph_builder, monkeypatch, tmp_path
):
"""Le Workflow construit peut être sérialisé en JSON."""
session, sess_tmp = synthetic_session
monkeypatch.chdir(sess_tmp)
workflow = graph_builder.build_from_session(session)
# to_json retourne un string JSON, to_dict retourne un dict
json_dict = workflow.to_dict()
assert json_dict["name"] is not None
assert len(json_dict["nodes"]) == 2