Files
rpa_vision_v3/tests/unit/test_precision_metrics.py
Dom cf495dd82f feat: chat unifié, GestureCatalog, Copilot, Léa UI, extraction données, vérification replay
Refonte majeure du système Agent Chat et ajout de nombreux modules :

- Chat unifié : suppression du dual Workflows/Agent Libre, tout passe par /api/chat
  avec résolution en 3 niveaux (workflow → geste → "montre-moi")
- GestureCatalog : 38 raccourcis clavier universels Windows avec matching sémantique,
  substitution automatique dans les replays, et endpoint /api/gestures
- Mode Copilot : exécution pas-à-pas des workflows avec validation humaine via WebSocket
  (approve/skip/abort) avant chaque action
- Léa UI (agent_v0/lea_ui/) : interface PyQt5 pour Windows avec overlay transparent
  pour feedback visuel pendant le replay
- Data Extraction (core/extraction/) : moteur d'extraction visuelle de données
  (OCR + VLM → SQLite), avec schémas YAML et export CSV/Excel
- ReplayVerifier (agent_v0/server_v1/) : vérification post-action par comparaison
  de screenshots, avec logique de retry (max 3)
- IntentParser durci : meilleur fallback regex, type GREETING, patterns améliorés
- Dashboard : nouvelles pages gestures, streaming, extractions
- Tests : 63 tests GestureCatalog, 47 tests extraction, corrections tests existants
- Dépréciation : /api/agent/plan et /api/agent/execute retournent HTTP 410,
  suppression du code hardcodé _plan_to_replay_actions

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-15 10:02:09 +01:00

392 lines
13 KiB
Python

"""
Tests Precision Metrics Engine - Fiche #10 Patch E
Tests unitaires pour validation du système de métriques
avec vérification overhead <1ms et fonctionnalités.
Auteur: Dom, Alice Kiro - 15 décembre 2024
"""
import pytest
import time
import threading
from unittest.mock import Mock, patch
from dataclasses import dataclass
from core.precision.metrics_engine import MetricsEngine, initialize_global_metrics_engine
from core.precision.models.metric_models import (
MetricType,
ResolutionMetric,
PerformanceMetric,
ErrorMetric,
generate_target_spec_hash,
generate_screen_state_hash,
generate_environment_hash
)
from core.precision.api.metrics_api import MetricsAPI
# Mock objects pour tests
@dataclass
class MockTargetSpec:
by_role: str = "button"
by_text: str = "Submit"
by_position: tuple = None
context_hints: dict = None
@dataclass
class MockUIElement:
element_type: str = "button"
text: str = "Submit"
bbox: tuple = (100, 200, 50, 30)
@dataclass
class MockScreenState:
ui_elements: list = None
screenshot_path: str = "/tmp/test.png"
def __post_init__(self):
if self.ui_elements is None:
self.ui_elements = [MockUIElement()]
@dataclass
class MockResolvedTarget:
success: bool = True
confidence: float = 0.95
strategy: str = "sniper_mode"
error_type: str = None
candidates_count: int = 3
sniper_score: float = 0.87
anchor_element_id: str = "elem_123"
context_hints_used: list = None
class TestMetricsEngine:
"""Tests pour MetricsEngine"""
def setup_method(self):
"""Setup pour chaque test"""
self.engine = MetricsEngine(buffer_size=100, flush_interval=0.1)
self.target_spec = MockTargetSpec()
self.screen_state = MockScreenState()
self.resolved_target = MockResolvedTarget()
def teardown_method(self):
"""Cleanup après chaque test"""
if hasattr(self, 'engine') and hasattr(self.engine, 'shutdown'):
self.engine.shutdown()
def test_metrics_collection_overhead(self):
"""Vérifie overhead <1ms pour collecte métriques"""
# Test overhead record_resolution
start_time = time.perf_counter()
for _ in range(100): # 100 collectes pour moyenne
self.engine.record_resolution(
target_spec=self.target_spec,
result=self.resolved_target,
duration_ms=45.2,
screen_state=self.screen_state
)
total_time = (time.perf_counter() - start_time) * 1000
avg_overhead = total_time / 100
# Vérification overhead <1ms
assert avg_overhead < 1.0, f"Overhead moyen {avg_overhead:.2f}ms > 1ms target"
# Vérification stats collecte
stats = self.engine.get_stats()
assert stats['metrics_collected'][MetricType.RESOLUTION] == 100
assert stats['collection_performance']['avg_time_ms'] < 1.0
def test_resolution_metrics_accuracy(self):
"""Vérifie précision métriques résolution"""
# Enregistrement métrique
self.engine.record_resolution(
target_spec=self.target_spec,
result=self.resolved_target,
duration_ms=42.5,
screen_state=self.screen_state
)
# Vérification buffer
resolution_buffer = self.engine._buffers[MetricType.RESOLUTION]
assert len(resolution_buffer) == 1
metric = resolution_buffer[0]
assert isinstance(metric, ResolutionMetric)
assert metric.success == True
assert metric.confidence_score == 0.95
assert metric.resolution_strategy == "sniper_mode"
assert metric.duration_ms == 42.5
assert metric.sniper_score == 0.87
assert metric.anchor_element_id == "elem_123"
assert metric.candidates_count == 3
def test_performance_metrics_collection(self):
"""Vérifie collecte métriques performance"""
# Enregistrement métrique performance
self.engine.record_performance(
operation_type="resolve",
duration_ms=35.7,
memory_usage_mb=128.5,
cpu_usage_percent=15.2,
cache_hit=True
)
# Vérification buffer
performance_buffer = self.engine._buffers[MetricType.PERFORMANCE]
assert len(performance_buffer) == 1
metric = performance_buffer[0]
assert isinstance(metric, PerformanceMetric)
assert metric.operation_type == "resolve"
assert metric.duration_ms == 35.7
assert metric.memory_usage_mb == 128.5
assert metric.cpu_usage_percent == 15.2
assert metric.cache_hit == True
def test_error_metrics_capture(self):
"""Vérifie capture métriques erreurs"""
# Enregistrement métrique erreur
self.engine.record_error(
error_type="TargetNotFound",
error_message="No matching element found",
component="target_resolver",
severity="high",
context={"target_spec": "button[Submit]"}
)
# Vérification buffer
error_buffer = self.engine._buffers[MetricType.ERROR]
assert len(error_buffer) == 1
metric = error_buffer[0]
assert isinstance(metric, ErrorMetric)
assert metric.error_type == "TargetNotFound"
assert metric.error_message == "No matching element found"
assert metric.component == "target_resolver"
assert metric.severity == "high"
assert metric.context["target_spec"] == "button[Submit]"
def test_thread_safety(self):
"""Vérifie thread safety du moteur métriques"""
results = []
def collect_metrics(thread_id):
"""Fonction collecte pour thread"""
for i in range(50):
self.engine.record_resolution(
target_spec=self.target_spec,
result=self.resolved_target,
duration_ms=float(i),
screen_state=self.screen_state
)
results.append(thread_id)
# Lancement 4 threads simultanés
threads = []
for i in range(4):
thread = threading.Thread(target=collect_metrics, args=(i,))
threads.append(thread)
thread.start()
# Attente fin threads
for thread in threads:
thread.join()
# Vérification résultats
assert len(results) == 4 # Tous threads terminés
stats = self.engine.get_stats()
assert stats['metrics_collected'][MetricType.RESOLUTION] == 200 # 4 * 50
def test_buffer_overflow_handling(self):
"""Vérifie gestion overflow buffer"""
# Remplissage buffer au-delà capacité
buffer_size = self.engine.buffer_size
for i in range(buffer_size + 50):
self.engine.record_resolution(
target_spec=self.target_spec,
result=self.resolved_target,
duration_ms=float(i),
screen_state=self.screen_state
)
# Vérification taille buffer limitée
resolution_buffer = self.engine._buffers[MetricType.RESOLUTION]
assert len(resolution_buffer) == buffer_size
# Vérification stats correctes
stats = self.engine.get_stats()
assert stats['metrics_collected'][MetricType.RESOLUTION] == buffer_size + 50
class TestMetricsAPI:
"""Tests pour MetricsAPI"""
def setup_method(self):
"""Setup pour chaque test"""
self.engine = MetricsEngine()
self.api = MetricsAPI(self.engine)
def teardown_method(self):
"""Cleanup après chaque test"""
if hasattr(self, 'engine') and hasattr(self.engine, 'shutdown'):
self.engine.shutdown()
def test_precision_stats_empty(self):
"""Vérifie stats précision avec données vides"""
stats = self.api.get_precision_stats("1h")
assert stats['precision']['overall_rate'] == 0.0
assert stats['precision']['total_resolutions'] == 0
assert stats['precision']['successful_resolutions'] == 0
assert stats['precision']['failed_resolutions'] == 0
assert stats['by_strategy'] == {}
def test_precision_stats_with_data(self):
"""Vérifie stats précision avec données"""
# Ajout métriques test
target_spec = MockTargetSpec()
screen_state = MockScreenState()
# 3 succès, 1 échec
for i in range(3):
success_result = MockResolvedTarget(success=True, strategy="sniper_mode")
self.engine.record_resolution(target_spec, success_result, 40.0, screen_state)
failure_result = MockResolvedTarget(success=False, strategy="text_search", error_type="NotFound")
self.engine.record_resolution(target_spec, failure_result, 80.0, screen_state)
# Récupération stats
stats = self.api.get_precision_stats("1h")
assert stats['precision']['overall_rate'] == 0.75 # 3/4
assert stats['precision']['total_resolutions'] == 4
assert stats['precision']['successful_resolutions'] == 3
assert stats['precision']['failed_resolutions'] == 1
# Vérification par stratégie
assert 'sniper_mode' in stats['by_strategy']
assert stats['by_strategy']['sniper_mode']['precision_rate'] == 1.0 # 3/3
assert 'text_search' in stats['by_strategy']
assert stats['by_strategy']['text_search']['precision_rate'] == 0.0 # 0/1
def test_performance_stats(self):
"""Vérifie stats performance"""
# Ajout métriques performance
self.engine.record_performance("resolve", 45.2, 128.0, 12.5, True)
self.engine.record_performance("execute", 67.8, 135.2, 18.3, False)
stats = self.api.get_performance_stats("1h")
assert 'engine_stats' in stats
assert stats['timestamp'] > 0
def test_export_metrics(self):
"""Vérifie export métriques"""
# Ajout données test
target_spec = MockTargetSpec()
screen_state = MockScreenState()
result = MockResolvedTarget()
self.engine.record_resolution(target_spec, result, 42.0, screen_state)
self.engine.record_performance("resolve", 42.0)
self.engine.record_error("TestError", "Test message", "test_component")
# Export
export_data = self.api.export_metrics("json", "1h")
assert 'precision' in export_data
assert 'performance' in export_data
assert 'errors' in export_data
# Vérification données précision
assert export_data['precision']['precision']['total_resolutions'] == 1
class TestMetricModels:
"""Tests pour modèles métriques"""
def test_resolution_metric_creation(self):
"""Vérifie création ResolutionMetric"""
metric = ResolutionMetric(
timestamp=time.time(),
target_spec_hash="abc123",
resolution_strategy="sniper_mode",
success=True,
duration_ms=42.5,
confidence_score=0.95,
environment_hash="env123",
screen_state_hash="screen123",
sniper_score=0.87,
candidates_count=3
)
assert metric.success == True
assert metric.duration_ms == 42.5
assert metric.confidence_score == 0.95
assert metric.sniper_score == 0.87
# Test sérialisation
data = metric.to_dict()
assert data['success'] == True
assert data['duration_ms'] == 42.5
def test_hash_generation(self):
"""Vérifie génération hash"""
target_spec = MockTargetSpec()
screen_state = MockScreenState()
# Test hash target_spec
hash1 = generate_target_spec_hash(target_spec)
hash2 = generate_target_spec_hash(target_spec)
assert hash1 == hash2 # Déterministe
assert len(hash1) == 16 # Longueur attendue
# Test hash screen_state
hash3 = generate_screen_state_hash(screen_state)
assert len(hash3) == 16
# Test hash environnement
hash4 = generate_environment_hash()
assert len(hash4) == 16
class TestGlobalMetricsEngine:
"""Tests pour instance globale MetricsEngine"""
def test_global_engine_initialization(self):
"""Vérifie initialisation instance globale"""
# Initialisation
engine = initialize_global_metrics_engine(buffer_size=500)
assert engine is not None
assert engine.buffer_size == 500
# Vérification récupération
from core.precision.metrics_engine import get_global_metrics_engine
global_engine = get_global_metrics_engine()
assert global_engine is engine
# Cleanup
if hasattr(engine, 'shutdown'):
engine.shutdown()
# Markers pytest pour organisation
pytestmark = [
pytest.mark.unit,
pytest.mark.fiche10
]
if __name__ == "__main__":
pytest.main([__file__, "-v"])