v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution
- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
391
tests/unit/test_precision_metrics.py
Normal file
391
tests/unit/test_precision_metrics.py
Normal file
@@ -0,0 +1,391 @@
|
||||
"""
|
||||
Tests Precision Metrics Engine - Fiche #10 Patch E
|
||||
|
||||
Tests unitaires pour validation du système de métriques
|
||||
avec vérification overhead <1ms et fonctionnalités.
|
||||
|
||||
Auteur: Dom, Alice Kiro - 15 décembre 2024
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import time
|
||||
import threading
|
||||
from unittest.mock import Mock, patch
|
||||
from dataclasses import dataclass
|
||||
|
||||
from core.precision.metrics_engine import MetricsEngine, initialize_global_metrics_engine
|
||||
from core.precision.models.metric_models import (
|
||||
MetricType,
|
||||
ResolutionMetric,
|
||||
PerformanceMetric,
|
||||
ErrorMetric,
|
||||
generate_target_spec_hash,
|
||||
generate_screen_state_hash,
|
||||
generate_environment_hash
|
||||
)
|
||||
from core.precision.api.metrics_api import MetricsAPI
|
||||
|
||||
|
||||
# Mock objects pour tests
|
||||
@dataclass
|
||||
class MockTargetSpec:
|
||||
by_role: str = "button"
|
||||
by_text: str = "Submit"
|
||||
by_position: tuple = None
|
||||
context_hints: dict = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class MockUIElement:
|
||||
element_type: str = "button"
|
||||
text: str = "Submit"
|
||||
bbox: tuple = (100, 200, 50, 30)
|
||||
|
||||
|
||||
@dataclass
|
||||
class MockScreenState:
|
||||
ui_elements: list = None
|
||||
screenshot_path: str = "/tmp/test.png"
|
||||
|
||||
def __post_init__(self):
|
||||
if self.ui_elements is None:
|
||||
self.ui_elements = [MockUIElement()]
|
||||
|
||||
|
||||
@dataclass
|
||||
class MockResolvedTarget:
|
||||
success: bool = True
|
||||
confidence: float = 0.95
|
||||
strategy: str = "sniper_mode"
|
||||
error_type: str = None
|
||||
candidates_count: int = 3
|
||||
sniper_score: float = 0.87
|
||||
anchor_element_id: str = "elem_123"
|
||||
context_hints_used: list = None
|
||||
|
||||
|
||||
class TestMetricsEngine:
|
||||
"""Tests pour MetricsEngine"""
|
||||
|
||||
def setup_method(self):
|
||||
"""Setup pour chaque test"""
|
||||
self.engine = MetricsEngine(buffer_size=100, flush_interval=0.1)
|
||||
self.target_spec = MockTargetSpec()
|
||||
self.screen_state = MockScreenState()
|
||||
self.resolved_target = MockResolvedTarget()
|
||||
|
||||
def teardown_method(self):
|
||||
"""Cleanup après chaque test"""
|
||||
if hasattr(self, 'engine'):
|
||||
self.engine.shutdown()
|
||||
|
||||
def test_metrics_collection_overhead(self):
|
||||
"""Vérifie overhead <1ms pour collecte métriques"""
|
||||
# Test overhead record_resolution
|
||||
start_time = time.perf_counter()
|
||||
|
||||
for _ in range(100): # 100 collectes pour moyenne
|
||||
self.engine.record_resolution(
|
||||
target_spec=self.target_spec,
|
||||
result=self.resolved_target,
|
||||
duration_ms=45.2,
|
||||
screen_state=self.screen_state
|
||||
)
|
||||
|
||||
total_time = (time.perf_counter() - start_time) * 1000
|
||||
avg_overhead = total_time / 100
|
||||
|
||||
# Vérification overhead <1ms
|
||||
assert avg_overhead < 1.0, f"Overhead moyen {avg_overhead:.2f}ms > 1ms target"
|
||||
|
||||
# Vérification stats collecte
|
||||
stats = self.engine.get_stats()
|
||||
assert stats['metrics_collected'][MetricType.RESOLUTION] == 100
|
||||
assert stats['collection_performance']['avg_time_ms'] < 1.0
|
||||
|
||||
def test_resolution_metrics_accuracy(self):
|
||||
"""Vérifie précision métriques résolution"""
|
||||
# Enregistrement métrique
|
||||
self.engine.record_resolution(
|
||||
target_spec=self.target_spec,
|
||||
result=self.resolved_target,
|
||||
duration_ms=42.5,
|
||||
screen_state=self.screen_state
|
||||
)
|
||||
|
||||
# Vérification buffer
|
||||
resolution_buffer = self.engine._buffers[MetricType.RESOLUTION]
|
||||
assert len(resolution_buffer) == 1
|
||||
|
||||
metric = resolution_buffer[0]
|
||||
assert isinstance(metric, ResolutionMetric)
|
||||
assert metric.success == True
|
||||
assert metric.confidence_score == 0.95
|
||||
assert metric.resolution_strategy == "sniper_mode"
|
||||
assert metric.duration_ms == 42.5
|
||||
assert metric.sniper_score == 0.87
|
||||
assert metric.anchor_element_id == "elem_123"
|
||||
assert metric.candidates_count == 3
|
||||
|
||||
def test_performance_metrics_collection(self):
|
||||
"""Vérifie collecte métriques performance"""
|
||||
# Enregistrement métrique performance
|
||||
self.engine.record_performance(
|
||||
operation_type="resolve",
|
||||
duration_ms=35.7,
|
||||
memory_usage_mb=128.5,
|
||||
cpu_usage_percent=15.2,
|
||||
cache_hit=True
|
||||
)
|
||||
|
||||
# Vérification buffer
|
||||
performance_buffer = self.engine._buffers[MetricType.PERFORMANCE]
|
||||
assert len(performance_buffer) == 1
|
||||
|
||||
metric = performance_buffer[0]
|
||||
assert isinstance(metric, PerformanceMetric)
|
||||
assert metric.operation_type == "resolve"
|
||||
assert metric.duration_ms == 35.7
|
||||
assert metric.memory_usage_mb == 128.5
|
||||
assert metric.cpu_usage_percent == 15.2
|
||||
assert metric.cache_hit == True
|
||||
|
||||
def test_error_metrics_capture(self):
|
||||
"""Vérifie capture métriques erreurs"""
|
||||
# Enregistrement métrique erreur
|
||||
self.engine.record_error(
|
||||
error_type="TargetNotFound",
|
||||
error_message="No matching element found",
|
||||
component="target_resolver",
|
||||
severity="high",
|
||||
context={"target_spec": "button[Submit]"}
|
||||
)
|
||||
|
||||
# Vérification buffer
|
||||
error_buffer = self.engine._buffers[MetricType.ERROR]
|
||||
assert len(error_buffer) == 1
|
||||
|
||||
metric = error_buffer[0]
|
||||
assert isinstance(metric, ErrorMetric)
|
||||
assert metric.error_type == "TargetNotFound"
|
||||
assert metric.error_message == "No matching element found"
|
||||
assert metric.component == "target_resolver"
|
||||
assert metric.severity == "high"
|
||||
assert metric.context["target_spec"] == "button[Submit]"
|
||||
|
||||
def test_thread_safety(self):
|
||||
"""Vérifie thread safety du moteur métriques"""
|
||||
results = []
|
||||
|
||||
def collect_metrics(thread_id):
|
||||
"""Fonction collecte pour thread"""
|
||||
for i in range(50):
|
||||
self.engine.record_resolution(
|
||||
target_spec=self.target_spec,
|
||||
result=self.resolved_target,
|
||||
duration_ms=float(i),
|
||||
screen_state=self.screen_state
|
||||
)
|
||||
results.append(thread_id)
|
||||
|
||||
# Lancement 4 threads simultanés
|
||||
threads = []
|
||||
for i in range(4):
|
||||
thread = threading.Thread(target=collect_metrics, args=(i,))
|
||||
threads.append(thread)
|
||||
thread.start()
|
||||
|
||||
# Attente fin threads
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
|
||||
# Vérification résultats
|
||||
assert len(results) == 4 # Tous threads terminés
|
||||
|
||||
stats = self.engine.get_stats()
|
||||
assert stats['metrics_collected'][MetricType.RESOLUTION] == 200 # 4 * 50
|
||||
|
||||
def test_buffer_overflow_handling(self):
|
||||
"""Vérifie gestion overflow buffer"""
|
||||
# Remplissage buffer au-delà capacité
|
||||
buffer_size = self.engine.buffer_size
|
||||
|
||||
for i in range(buffer_size + 50):
|
||||
self.engine.record_resolution(
|
||||
target_spec=self.target_spec,
|
||||
result=self.resolved_target,
|
||||
duration_ms=float(i),
|
||||
screen_state=self.screen_state
|
||||
)
|
||||
|
||||
# Vérification taille buffer limitée
|
||||
resolution_buffer = self.engine._buffers[MetricType.RESOLUTION]
|
||||
assert len(resolution_buffer) == buffer_size
|
||||
|
||||
# Vérification stats correctes
|
||||
stats = self.engine.get_stats()
|
||||
assert stats['metrics_collected'][MetricType.RESOLUTION] == buffer_size + 50
|
||||
|
||||
|
||||
class TestMetricsAPI:
|
||||
"""Tests pour MetricsAPI"""
|
||||
|
||||
def setup_method(self):
|
||||
"""Setup pour chaque test"""
|
||||
self.engine = MetricsEngine()
|
||||
self.api = MetricsAPI(self.engine)
|
||||
|
||||
def teardown_method(self):
|
||||
"""Cleanup après chaque test"""
|
||||
if hasattr(self, 'engine'):
|
||||
self.engine.shutdown()
|
||||
|
||||
def test_precision_stats_empty(self):
|
||||
"""Vérifie stats précision avec données vides"""
|
||||
stats = self.api.get_precision_stats("1h")
|
||||
|
||||
assert stats['precision']['overall_rate'] == 0.0
|
||||
assert stats['precision']['total_resolutions'] == 0
|
||||
assert stats['precision']['successful_resolutions'] == 0
|
||||
assert stats['precision']['failed_resolutions'] == 0
|
||||
assert stats['by_strategy'] == {}
|
||||
|
||||
def test_precision_stats_with_data(self):
|
||||
"""Vérifie stats précision avec données"""
|
||||
# Ajout métriques test
|
||||
target_spec = MockTargetSpec()
|
||||
screen_state = MockScreenState()
|
||||
|
||||
# 3 succès, 1 échec
|
||||
for i in range(3):
|
||||
success_result = MockResolvedTarget(success=True, strategy="sniper_mode")
|
||||
self.engine.record_resolution(target_spec, success_result, 40.0, screen_state)
|
||||
|
||||
failure_result = MockResolvedTarget(success=False, strategy="text_search", error_type="NotFound")
|
||||
self.engine.record_resolution(target_spec, failure_result, 80.0, screen_state)
|
||||
|
||||
# Récupération stats
|
||||
stats = self.api.get_precision_stats("1h")
|
||||
|
||||
assert stats['precision']['overall_rate'] == 0.75 # 3/4
|
||||
assert stats['precision']['total_resolutions'] == 4
|
||||
assert stats['precision']['successful_resolutions'] == 3
|
||||
assert stats['precision']['failed_resolutions'] == 1
|
||||
|
||||
# Vérification par stratégie
|
||||
assert 'sniper_mode' in stats['by_strategy']
|
||||
assert stats['by_strategy']['sniper_mode']['precision_rate'] == 1.0 # 3/3
|
||||
assert 'text_search' in stats['by_strategy']
|
||||
assert stats['by_strategy']['text_search']['precision_rate'] == 0.0 # 0/1
|
||||
|
||||
def test_performance_stats(self):
|
||||
"""Vérifie stats performance"""
|
||||
# Ajout métriques performance
|
||||
self.engine.record_performance("resolve", 45.2, 128.0, 12.5, True)
|
||||
self.engine.record_performance("execute", 67.8, 135.2, 18.3, False)
|
||||
|
||||
stats = self.api.get_performance_stats("1h")
|
||||
|
||||
assert 'engine_stats' in stats
|
||||
assert stats['timestamp'] > 0
|
||||
|
||||
def test_export_metrics(self):
|
||||
"""Vérifie export métriques"""
|
||||
# Ajout données test
|
||||
target_spec = MockTargetSpec()
|
||||
screen_state = MockScreenState()
|
||||
result = MockResolvedTarget()
|
||||
|
||||
self.engine.record_resolution(target_spec, result, 42.0, screen_state)
|
||||
self.engine.record_performance("resolve", 42.0)
|
||||
self.engine.record_error("TestError", "Test message", "test_component")
|
||||
|
||||
# Export
|
||||
export_data = self.api.export_metrics("json", "1h")
|
||||
|
||||
assert 'precision' in export_data
|
||||
assert 'performance' in export_data
|
||||
assert 'errors' in export_data
|
||||
|
||||
# Vérification données précision
|
||||
assert export_data['precision']['precision']['total_resolutions'] == 1
|
||||
|
||||
|
||||
class TestMetricModels:
|
||||
"""Tests pour modèles métriques"""
|
||||
|
||||
def test_resolution_metric_creation(self):
|
||||
"""Vérifie création ResolutionMetric"""
|
||||
metric = ResolutionMetric(
|
||||
timestamp=time.time(),
|
||||
target_spec_hash="abc123",
|
||||
resolution_strategy="sniper_mode",
|
||||
success=True,
|
||||
duration_ms=42.5,
|
||||
confidence_score=0.95,
|
||||
environment_hash="env123",
|
||||
screen_state_hash="screen123",
|
||||
sniper_score=0.87,
|
||||
candidates_count=3
|
||||
)
|
||||
|
||||
assert metric.success == True
|
||||
assert metric.duration_ms == 42.5
|
||||
assert metric.confidence_score == 0.95
|
||||
assert metric.sniper_score == 0.87
|
||||
|
||||
# Test sérialisation
|
||||
data = metric.to_dict()
|
||||
assert data['success'] == True
|
||||
assert data['duration_ms'] == 42.5
|
||||
|
||||
def test_hash_generation(self):
|
||||
"""Vérifie génération hash"""
|
||||
target_spec = MockTargetSpec()
|
||||
screen_state = MockScreenState()
|
||||
|
||||
# Test hash target_spec
|
||||
hash1 = generate_target_spec_hash(target_spec)
|
||||
hash2 = generate_target_spec_hash(target_spec)
|
||||
assert hash1 == hash2 # Déterministe
|
||||
assert len(hash1) == 16 # Longueur attendue
|
||||
|
||||
# Test hash screen_state
|
||||
hash3 = generate_screen_state_hash(screen_state)
|
||||
assert len(hash3) == 16
|
||||
|
||||
# Test hash environnement
|
||||
hash4 = generate_environment_hash()
|
||||
assert len(hash4) == 16
|
||||
|
||||
|
||||
class TestGlobalMetricsEngine:
|
||||
"""Tests pour instance globale MetricsEngine"""
|
||||
|
||||
def test_global_engine_initialization(self):
|
||||
"""Vérifie initialisation instance globale"""
|
||||
# Initialisation
|
||||
engine = initialize_global_metrics_engine(buffer_size=500)
|
||||
|
||||
assert engine is not None
|
||||
assert engine.buffer_size == 500
|
||||
|
||||
# Vérification récupération
|
||||
from core.precision.metrics_engine import get_global_metrics_engine
|
||||
global_engine = get_global_metrics_engine()
|
||||
|
||||
assert global_engine is engine
|
||||
|
||||
# Cleanup
|
||||
engine.shutdown()
|
||||
|
||||
|
||||
# Markers pytest pour organisation
|
||||
pytestmark = [
|
||||
pytest.mark.unit,
|
||||
pytest.mark.fiche10
|
||||
]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
Reference in New Issue
Block a user