rpa_vision_v3/core/precision/metrics_engine.py

"""
Precision Metrics Engine - Fiche #10 Patch B

Moteur principal de collecte métriques temps réel avec <1ms overhead
et support 1000+ métriques/seconde.

Auteur: Dom, Alice Kiro - 15 décembre 2024
"""

import time
import threading
import logging
from typing import Optional, List, Dict, Any
from collections import deque, defaultdict

from .models.metric_models import (
    MetricType,
    ResolutionMetric,
    PerformanceMetric,
    ErrorMetric,
    generate_target_spec_hash,
    generate_screen_state_hash,
    generate_environment_hash
)

logger = logging.getLogger(__name__)


class MetricsEngine:
    """
    Moteur principal de collecte métriques temps réel

    Fonctionnalités:
    - Collecte <1ms overhead par métrique
    - Support 1000+ métriques/seconde
    - Buffer asynchrone pour performance
    - Thread-safe pour usage concurrent
    """

    def __init__(self, storage_adapter=None, buffer_size=10000, flush_interval=1.0):
        """
        Initialise le moteur métriques

        Args:
            storage_adapter: Adapter stockage (défaut: mémoire)
            buffer_size: Taille buffer métriques
            flush_interval: Intervalle flush buffer (secondes)
        """
        self.storage = storage_adapter or self._create_memory_storage()
        self.buffer_size = buffer_size
        self.flush_interval = flush_interval

        # Buffers thread-safe par type métrique
        self._buffers = {
            MetricType.RESOLUTION: deque(maxlen=buffer_size),
            MetricType.PERFORMANCE: deque(maxlen=buffer_size),
            MetricType.ERROR: deque(maxlen=buffer_size)
        }

        # Locks pour thread safety
        self._locks = {
            metric_type: threading.Lock()
            for metric_type in MetricType
        }

        # Statistiques internes
        self._stats = {
            'metrics_collected': defaultdict(int),
            'collection_time_ms': deque(maxlen=1000)
        }

        # Cache environnement pour performance
        self._env_hash_cache = None
        self._env_cache_time = 0
        self._env_cache_ttl = 60.0  # 1 minute

        logger.info("MetricsEngine initialized with buffer_size=%d", buffer_size)

    def record_resolution(self, target_spec, result, duration_ms: float, screen_state) -> None:
        """
        Enregistre métrique résolution avec <1ms overhead

        Args:
            target_spec: Spécification cible
            result: Résultat résolution
            duration_ms: Durée résolution en ms
            screen_state: État écran
        """
        start_time = time.perf_counter()

        try:
            # Génération hash optimisée
            target_hash = generate_target_spec_hash(target_spec)
            screen_hash = generate_screen_state_hash(screen_state)
            env_hash = self._get_cached_environment_hash()

            # Extraction données résultat
            success = getattr(result, 'success', False)
            confidence = getattr(result, 'confidence', 0.0)
            strategy = getattr(result, 'strategy', 'unknown')
            error_type = getattr(result, 'error_type', None) if not success else None

            # Contexte avancé (Fiches #6-8)
            sniper_score = getattr(result, 'sniper_score', None)
            anchor_id = getattr(result, 'anchor_element_id', None)
            context_hints = getattr(result, 'context_hints_used', None)
            candidates_count = getattr(result, 'candidates_count', 0)

            # Création métrique
            metric = ResolutionMetric(
                timestamp=time.time(),
                target_spec_hash=target_hash,
                resolution_strategy=strategy,
                success=success,
                duration_ms=duration_ms,
                confidence_score=confidence,
                environment_hash=env_hash,
                screen_state_hash=screen_hash,
                error_type=error_type,
                candidates_count=candidates_count,
                sniper_score=sniper_score,
                anchor_element_id=anchor_id,
                context_hints_used=context_hints
            )

            # Ajout buffer thread-safe
            with self._locks[MetricType.RESOLUTION]:
                self._buffers[MetricType.RESOLUTION].append(metric)

            # Statistiques
            self._stats['metrics_collected'][MetricType.RESOLUTION] += 1

        except Exception as e:
            logger.warning("Failed to record resolution metric: %s", e)

        finally:
            # Mesure overhead
            collection_time = (time.perf_counter() - start_time) * 1000
            self._stats['collection_time_ms'].append(collection_time)

            # Warning si overhead > 1ms
            if collection_time > 1.0:
                logger.warning("Resolution metric collection took %.2fms (>1ms target)",
                             collection_time)

    def record_performance(self, operation_type: str, duration_ms: float,
                          memory_usage_mb: Optional[float] = None,
                          cpu_usage_percent: Optional[float] = None,
                          cache_hit: bool = False) -> None:
        """
        Enregistre métrique performance

        Args:
            operation_type: Type opération
            duration_ms: Durée en ms
            memory_usage_mb: Usage mémoire
            cpu_usage_percent: Usage CPU
            cache_hit: Hit cache
        """
        start_time = time.perf_counter()

        try:
            # Création métrique
            metric = PerformanceMetric(
                timestamp=time.time(),
                operation_type=operation_type,
                duration_ms=duration_ms,
                memory_usage_mb=memory_usage_mb or 0.0,
                cpu_usage_percent=cpu_usage_percent or 0.0,
                cache_hit=cache_hit
            )

            # Ajout buffer
            with self._locks[MetricType.PERFORMANCE]:
                self._buffers[MetricType.PERFORMANCE].append(metric)

            self._stats['metrics_collected'][MetricType.PERFORMANCE] += 1

        except Exception as e:
            logger.warning("Failed to record performance metric: %s", e)

        finally:
            collection_time = (time.perf_counter() - start_time) * 1000
            self._stats['collection_time_ms'].append(collection_time)

    def record_error(self, error_type: str, error_message: str,
                    component: str = "unknown", severity: str = "medium",
                    context: Optional[Dict[str, Any]] = None) -> None:
        """
        Enregistre métrique erreur

        Args:
            error_type: Type erreur
            error_message: Message erreur
            component: Composant source
            severity: Sévérité (low/medium/high/critical)
            context: Contexte additionnel
        """
        start_time = time.perf_counter()

        try:
            # Création métrique
            metric = ErrorMetric(
                timestamp=time.time(),
                error_type=error_type,
                error_message=error_message,
                component=component,
                severity=severity,
                context=context
            )

            # Ajout buffer
            with self._locks[MetricType.ERROR]:
                self._buffers[MetricType.ERROR].append(metric)

            self._stats['metrics_collected'][MetricType.ERROR] += 1

        except Exception as e:
            logger.warning("Failed to record error metric: %s", e)

        finally:
            collection_time = (time.perf_counter() - start_time) * 1000
            self._stats['collection_time_ms'].append(collection_time)

    def get_stats(self) -> Dict[str, Any]:
        """
        Récupère statistiques moteur métriques

        Returns:
            Dictionnaire statistiques
        """
        collection_times = list(self._stats['collection_time_ms'])

        return {
            'metrics_collected': dict(self._stats['metrics_collected']),
            'buffer_sizes': {
                metric_type.value: len(self._buffers[metric_type])
                for metric_type in MetricType
                if metric_type in self._buffers
            },
            'collection_performance': {
                'avg_time_ms': sum(collection_times) / len(collection_times) if collection_times else 0,
                'max_time_ms': max(collection_times) if collection_times else 0,
                'p95_time_ms': sorted(collection_times)[int(len(collection_times) * 0.95)] if collection_times else 0
            }
        }

    def get_metrics(self, metric_type: MetricType, limit: int = 1000,
                   time_range_seconds: Optional[float] = None) -> List[Dict[str, Any]]:
        """
        Récupère métriques avec filtrage

        Args:
            metric_type: Type métrique
            limit: Limite nombre résultats
            time_range_seconds: Plage temporelle (None = toutes)

        Returns:
            Liste métriques sérialisées
        """
        try:
            # Flush buffer pour données récentes
            self._flush_buffer_to_storage(metric_type)

            # Récupération depuis storage
            metrics = self.storage.get_metrics(metric_type, limit)

            return [metric.to_dict() if hasattr(metric, 'to_dict') else metric
                   for metric in metrics]

        except Exception as e:
            logger.error("Failed to get metrics: %s", e)
            return []

    def _flush_buffer_to_storage(self, metric_type: MetricType):
        """Flush buffer spécifique vers storage"""
        try:
            with self._locks[metric_type]:
                if not self._buffers[metric_type]:
                    return

                metrics_to_flush = list(self._buffers[metric_type])
                self._buffers[metric_type].clear()

            if metrics_to_flush:
                self.storage.store_metrics(metric_type, metrics_to_flush)
        except Exception as e:
            logger.error("Failed to flush buffer: %s", e)

    def _get_cached_environment_hash(self) -> str:
        """Récupère hash environnement avec cache TTL"""
        current_time = time.time()

        if (self._env_hash_cache is None or
            current_time - self._env_cache_time > self._env_cache_ttl):
            self._env_hash_cache = generate_environment_hash()
            self._env_cache_time = current_time

        return self._env_hash_cache

    def _create_memory_storage(self):
        """Crée storage mémoire simple"""
        return SimpleMemoryStorage()


class SimpleMemoryStorage:
    """Storage mémoire simple pour développement"""

    def __init__(self):
        self.metrics = defaultdict(list)

    def store_metrics(self, metric_type: MetricType, metrics: List):
        """Stocke métriques en mémoire"""
        self.metrics[metric_type].extend(metrics)

    def get_metrics(self, metric_type: MetricType, limit: int = 1000):
        """Récupère métriques depuis mémoire"""
        return self.metrics[metric_type][-limit:]


# Instance globale optionnelle pour usage simple
_global_metrics_engine: Optional[MetricsEngine] = None


def get_global_metrics_engine() -> Optional[MetricsEngine]:
    """Récupère instance globale MetricsEngine"""
    return _global_metrics_engine


def initialize_global_metrics_engine(**kwargs) -> MetricsEngine:
    """Initialise et définit instance globale MetricsEngine"""
    global _global_metrics_engine
    _global_metrics_engine = MetricsEngine(**kwargs)
    return _global_metrics_engine