rpa_vision_v3/tests/unit/test_precision_metrics.py

"""
Tests Precision Metrics Engine - Fiche #10 Patch E

Tests unitaires pour validation du système de métriques
avec vérification overhead <1ms et fonctionnalités.

Auteur: Dom, Alice Kiro - 15 décembre 2024
"""

import pytest
import time
import threading
from unittest.mock import Mock, patch
from dataclasses import dataclass

from core.precision.metrics_engine import MetricsEngine, initialize_global_metrics_engine
from core.precision.models.metric_models import (
    MetricType,
    ResolutionMetric,
    PerformanceMetric,
    ErrorMetric,
    generate_target_spec_hash,
    generate_screen_state_hash,
    generate_environment_hash
)
from core.precision.api.metrics_api import MetricsAPI


# Mock objects pour tests
@dataclass
class MockTargetSpec:
    by_role: str = "button"
    by_text: str = "Submit"
    by_position: tuple = None
    context_hints: dict = None


@dataclass
class MockUIElement:
    element_type: str = "button"
    text: str = "Submit"
    bbox: tuple = (100, 200, 50, 30)


@dataclass
class MockScreenState:
    ui_elements: list = None
    screenshot_path: str = "/tmp/test.png"

    def __post_init__(self):
        if self.ui_elements is None:
            self.ui_elements = [MockUIElement()]


@dataclass
class MockResolvedTarget:
    success: bool = True
    confidence: float = 0.95
    strategy: str = "sniper_mode"
    error_type: str = None
    candidates_count: int = 3
    sniper_score: float = 0.87
    anchor_element_id: str = "elem_123"
    context_hints_used: list = None


class TestMetricsEngine:
    """Tests pour MetricsEngine"""

    def setup_method(self):
        """Setup pour chaque test"""
        self.engine = MetricsEngine(buffer_size=100, flush_interval=0.1)
        self.target_spec = MockTargetSpec()
        self.screen_state = MockScreenState()
        self.resolved_target = MockResolvedTarget()

    def teardown_method(self):
        """Cleanup après chaque test"""
        if hasattr(self, 'engine'):
            self.engine.shutdown()

    def test_metrics_collection_overhead(self):
        """Vérifie overhead <1ms pour collecte métriques"""
        # Test overhead record_resolution
        start_time = time.perf_counter()

        for _ in range(100):  # 100 collectes pour moyenne
            self.engine.record_resolution(
                target_spec=self.target_spec,
                result=self.resolved_target,
                duration_ms=45.2,
                screen_state=self.screen_state
            )

        total_time = (time.perf_counter() - start_time) * 1000
        avg_overhead = total_time / 100

        # Vérification overhead <1ms
        assert avg_overhead < 1.0, f"Overhead moyen {avg_overhead:.2f}ms > 1ms target"

        # Vérification stats collecte
        stats = self.engine.get_stats()
        assert stats['metrics_collected'][MetricType.RESOLUTION] == 100
        assert stats['collection_performance']['avg_time_ms'] < 1.0

    def test_resolution_metrics_accuracy(self):
        """Vérifie précision métriques résolution"""
        # Enregistrement métrique
        self.engine.record_resolution(
            target_spec=self.target_spec,
            result=self.resolved_target,
            duration_ms=42.5,
            screen_state=self.screen_state
        )

        # Vérification buffer
        resolution_buffer = self.engine._buffers[MetricType.RESOLUTION]
        assert len(resolution_buffer) == 1

        metric = resolution_buffer[0]
        assert isinstance(metric, ResolutionMetric)
        assert metric.success == True
        assert metric.confidence_score == 0.95
        assert metric.resolution_strategy == "sniper_mode"
        assert metric.duration_ms == 42.5
        assert metric.sniper_score == 0.87
        assert metric.anchor_element_id == "elem_123"
        assert metric.candidates_count == 3

    def test_performance_metrics_collection(self):
        """Vérifie collecte métriques performance"""
        # Enregistrement métrique performance
        self.engine.record_performance(
            operation_type="resolve",
            duration_ms=35.7,
            memory_usage_mb=128.5,
            cpu_usage_percent=15.2,
            cache_hit=True
        )

        # Vérification buffer
        performance_buffer = self.engine._buffers[MetricType.PERFORMANCE]
        assert len(performance_buffer) == 1

        metric = performance_buffer[0]
        assert isinstance(metric, PerformanceMetric)
        assert metric.operation_type == "resolve"
        assert metric.duration_ms == 35.7
        assert metric.memory_usage_mb == 128.5
        assert metric.cpu_usage_percent == 15.2
        assert metric.cache_hit == True

    def test_error_metrics_capture(self):
        """Vérifie capture métriques erreurs"""
        # Enregistrement métrique erreur
        self.engine.record_error(
            error_type="TargetNotFound",
            error_message="No matching element found",
            component="target_resolver",
            severity="high",
            context={"target_spec": "button[Submit]"}
        )

        # Vérification buffer
        error_buffer = self.engine._buffers[MetricType.ERROR]
        assert len(error_buffer) == 1

        metric = error_buffer[0]
        assert isinstance(metric, ErrorMetric)
        assert metric.error_type == "TargetNotFound"
        assert metric.error_message == "No matching element found"
        assert metric.component == "target_resolver"
        assert metric.severity == "high"
        assert metric.context["target_spec"] == "button[Submit]"

    def test_thread_safety(self):
        """Vérifie thread safety du moteur métriques"""
        results = []

        def collect_metrics(thread_id):
            """Fonction collecte pour thread"""
            for i in range(50):
                self.engine.record_resolution(
                    target_spec=self.target_spec,
                    result=self.resolved_target,
                    duration_ms=float(i),
                    screen_state=self.screen_state
                )
            results.append(thread_id)

        # Lancement 4 threads simultanés
        threads = []
        for i in range(4):
            thread = threading.Thread(target=collect_metrics, args=(i,))
            threads.append(thread)
            thread.start()

        # Attente fin threads
        for thread in threads:
            thread.join()

        # Vérification résultats
        assert len(results) == 4  # Tous threads terminés

        stats = self.engine.get_stats()
        assert stats['metrics_collected'][MetricType.RESOLUTION] == 200  # 4 * 50

    def test_buffer_overflow_handling(self):
        """Vérifie gestion overflow buffer"""
        # Remplissage buffer au-delà capacité
        buffer_size = self.engine.buffer_size

        for i in range(buffer_size + 50):
            self.engine.record_resolution(
                target_spec=self.target_spec,
                result=self.resolved_target,
                duration_ms=float(i),
                screen_state=self.screen_state
            )

        # Vérification taille buffer limitée
        resolution_buffer = self.engine._buffers[MetricType.RESOLUTION]
        assert len(resolution_buffer) == buffer_size

        # Vérification stats correctes
        stats = self.engine.get_stats()
        assert stats['metrics_collected'][MetricType.RESOLUTION] == buffer_size + 50


class TestMetricsAPI:
    """Tests pour MetricsAPI"""

    def setup_method(self):
        """Setup pour chaque test"""
        self.engine = MetricsEngine()
        self.api = MetricsAPI(self.engine)

    def teardown_method(self):
        """Cleanup après chaque test"""
        if hasattr(self, 'engine'):
            self.engine.shutdown()

    def test_precision_stats_empty(self):
        """Vérifie stats précision avec données vides"""
        stats = self.api.get_precision_stats("1h")

        assert stats['precision']['overall_rate'] == 0.0
        assert stats['precision']['total_resolutions'] == 0
        assert stats['precision']['successful_resolutions'] == 0
        assert stats['precision']['failed_resolutions'] == 0
        assert stats['by_strategy'] == {}

    def test_precision_stats_with_data(self):
        """Vérifie stats précision avec données"""
        # Ajout métriques test
        target_spec = MockTargetSpec()
        screen_state = MockScreenState()

        # 3 succès, 1 échec
        for i in range(3):
            success_result = MockResolvedTarget(success=True, strategy="sniper_mode")
            self.engine.record_resolution(target_spec, success_result, 40.0, screen_state)

        failure_result = MockResolvedTarget(success=False, strategy="text_search", error_type="NotFound")
        self.engine.record_resolution(target_spec, failure_result, 80.0, screen_state)

        # Récupération stats
        stats = self.api.get_precision_stats("1h")

        assert stats['precision']['overall_rate'] == 0.75  # 3/4
        assert stats['precision']['total_resolutions'] == 4
        assert stats['precision']['successful_resolutions'] == 3
        assert stats['precision']['failed_resolutions'] == 1

        # Vérification par stratégie
        assert 'sniper_mode' in stats['by_strategy']
        assert stats['by_strategy']['sniper_mode']['precision_rate'] == 1.0  # 3/3
        assert 'text_search' in stats['by_strategy']
        assert stats['by_strategy']['text_search']['precision_rate'] == 0.0  # 0/1

    def test_performance_stats(self):
        """Vérifie stats performance"""
        # Ajout métriques performance
        self.engine.record_performance("resolve", 45.2, 128.0, 12.5, True)
        self.engine.record_performance("execute", 67.8, 135.2, 18.3, False)

        stats = self.api.get_performance_stats("1h")

        assert 'engine_stats' in stats
        assert stats['timestamp'] > 0

    def test_export_metrics(self):
        """Vérifie export métriques"""
        # Ajout données test
        target_spec = MockTargetSpec()
        screen_state = MockScreenState()
        result = MockResolvedTarget()

        self.engine.record_resolution(target_spec, result, 42.0, screen_state)
        self.engine.record_performance("resolve", 42.0)
        self.engine.record_error("TestError", "Test message", "test_component")

        # Export
        export_data = self.api.export_metrics("json", "1h")

        assert 'precision' in export_data
        assert 'performance' in export_data
        assert 'errors' in export_data

        # Vérification données précision
        assert export_data['precision']['precision']['total_resolutions'] == 1


class TestMetricModels:
    """Tests pour modèles métriques"""

    def test_resolution_metric_creation(self):
        """Vérifie création ResolutionMetric"""
        metric = ResolutionMetric(
            timestamp=time.time(),
            target_spec_hash="abc123",
            resolution_strategy="sniper_mode",
            success=True,
            duration_ms=42.5,
            confidence_score=0.95,
            environment_hash="env123",
            screen_state_hash="screen123",
            sniper_score=0.87,
            candidates_count=3
        )

        assert metric.success == True
        assert metric.duration_ms == 42.5
        assert metric.confidence_score == 0.95
        assert metric.sniper_score == 0.87

        # Test sérialisation
        data = metric.to_dict()
        assert data['success'] == True
        assert data['duration_ms'] == 42.5

    def test_hash_generation(self):
        """Vérifie génération hash"""
        target_spec = MockTargetSpec()
        screen_state = MockScreenState()

        # Test hash target_spec
        hash1 = generate_target_spec_hash(target_spec)
        hash2 = generate_target_spec_hash(target_spec)
        assert hash1 == hash2  # Déterministe
        assert len(hash1) == 16  # Longueur attendue

        # Test hash screen_state
        hash3 = generate_screen_state_hash(screen_state)
        assert len(hash3) == 16

        # Test hash environnement
        hash4 = generate_environment_hash()
        assert len(hash4) == 16


class TestGlobalMetricsEngine:
    """Tests pour instance globale MetricsEngine"""

    def test_global_engine_initialization(self):
        """Vérifie initialisation instance globale"""
        # Initialisation
        engine = initialize_global_metrics_engine(buffer_size=500)

        assert engine is not None
        assert engine.buffer_size == 500

        # Vérification récupération
        from core.precision.metrics_engine import get_global_metrics_engine
        global_engine = get_global_metrics_engine()

        assert global_engine is engine

        # Cleanup
        engine.shutdown()


# Markers pytest pour organisation
pytestmark = [
    pytest.mark.unit,
    pytest.mark.fiche10
]


if __name__ == "__main__":
    pytest.main([__file__, "-v"])