v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution

- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-29 11:23:51 +01:00
parent 21bfa3b337
commit a27b74cf22
1595 changed files with 412691 additions and 400 deletions
--- a/core/analytics/query/init.py
+++ b/core/analytics/query/init.py
@@ -0,0 +1,5 @@
+"""Query engine for analytics data."""
+
+from .query_engine import QueryEngine
+
+__all__ = ['QueryEngine']
--- a/core/analytics/query/query_engine.py
+++ b/core/analytics/query/query_engine.py
@@ -0,0 +1,312 @@
+"""Query engine for analytics data with caching."""
+
+import logging
+import hashlib
+import json
+from typing import List, Dict, Any, Optional, Tuple
+from datetime import datetime
+from collections import OrderedDict
+
+from ..storage.timeseries_store import TimeSeriesStore
+from ..storage.archive_storage import ArchiveStorage
+
+logger = logging.getLogger(__name__)
+
+
+class LRUCache:
+    """Simple LRU cache implementation."""
+    
+    def __init__(self, capacity: int = 100):
+        """Initialize LRU cache."""
+        self.capacity = capacity
+        self.cache: OrderedDict = OrderedDict()
+    
+    def get(self, key: str) -> Optional[Any]:
+        """Get value from cache."""
+        if key not in self.cache:
+            return None
+        # Move to end (most recently used)
+        self.cache.move_to_end(key)
+        return self.cache[key]
+    
+    def put(self, key: str, value: Any) -> None:
+        """Put value in cache."""
+        if key in self.cache:
+            self.cache.move_to_end(key)
+        self.cache[key] = value
+        # Remove oldest if over capacity
+        if len(self.cache) > self.capacity:
+            self.cache.popitem(last=False)
+    
+    def clear(self) -> None:
+        """Clear cache."""
+        self.cache.clear()
+    
+    def size(self) -> int:
+        """Get cache size."""
+        return len(self.cache)
+
+
+class QueryEngine:
+    """Query engine for analytics data with caching."""
+    
+    def __init__(
+        self,
+        time_series_store: TimeSeriesStore,
+        archive_storage: Optional[ArchiveStorage] = None,
+        cache_size: int = 100
+    ):
+        """
+        Initialize query engine.
+        
+        Args:
+            time_series_store: Time series storage
+            archive_storage: Optional archive storage
+            cache_size: Size of query cache
+        """
+        self.ts_store = time_series_store
+        self.archive = archive_storage
+        self.cache = LRUCache(cache_size)
+        
+        logger.info(f"QueryEngine initialized (cache_size={cache_size})")
+    
+    def query(
+        self,
+        query: Dict[str, Any],
+        use_cache: bool = True
+    ) -> List[Dict]:
+        """
+        Execute a query against analytics data.
+        
+        Args:
+            query: Query specification with filters, time range, etc.
+            use_cache: Whether to use cache
+        
+        Returns:
+            List of matching records
+        """
+        # Generate cache key
+        cache_key = self._generate_cache_key(query)
+        
+        # Check cache
+        if use_cache:
+            cached = self.cache.get(cache_key)
+            if cached is not None:
+                logger.debug(f"Cache hit for query: {cache_key[:8]}")
+                return cached
+        
+        # Execute query
+        start_time = query.get('start_time')
+        end_time = query.get('end_time')
+        workflow_id = query.get('workflow_id')
+        metric_types = query.get('metric_types', ['execution', 'step', 'resource'])
+        
+        if not start_time or not end_time:
+            raise ValueError("start_time and end_time are required")
+        
+        # Convert to datetime if strings
+        if isinstance(start_time, str):
+            start_time = datetime.fromisoformat(start_time)
+        if isinstance(end_time, str):
+            end_time = datetime.fromisoformat(end_time)
+        
+        # Query time series store
+        results = self.ts_store.query_range(
+            start_time=start_time,
+            end_time=end_time,
+            workflow_id=workflow_id,
+            metric_types=metric_types
+        )
+        
+        # Apply additional filters
+        filters = query.get('filters', {})
+        if filters:
+            for metric_type, records in results.items():
+                results[metric_type] = self._apply_filters(records, filters)
+        
+        # Flatten if requested
+        if query.get('flatten', False):
+            flattened = []
+            for records in results.values():
+                flattened.extend(records)
+            results = flattened
+        
+        # Cache result
+        if use_cache:
+            self.cache.put(cache_key, results)
+        
+        return results
+    
+    def aggregate(
+        self,
+        metric: str,
+        aggregation: str,
+        group_by: List[str],
+        filters: Dict[str, Any],
+        time_range: Tuple[datetime, datetime],
+        use_cache: bool = True
+    ) -> List[Dict]:
+        """
+        Aggregate metrics with grouping.
+        
+        Args:
+            metric: Metric field to aggregate
+            aggregation: Aggregation function (avg, sum, count, min, max)
+            group_by: Fields to group by
+            filters: Filter criteria
+            time_range: (start_time, end_time)
+            use_cache: Whether to use cache
+        
+        Returns:
+            List of aggregated results
+        """
+        # Generate cache key
+        cache_key = self._generate_cache_key({
+            'type': 'aggregate',
+            'metric': metric,
+            'aggregation': aggregation,
+            'group_by': group_by,
+            'filters': filters,
+            'time_range': [t.isoformat() for t in time_range]
+        })
+        
+        # Check cache
+        if use_cache:
+            cached = self.cache.get(cache_key)
+            if cached is not None:
+                return cached
+        
+        # Execute aggregation
+        start_time, end_time = time_range
+        results = self.ts_store.aggregate(
+            metric=metric,
+            aggregation=aggregation,
+            group_by=group_by,
+            start_time=start_time,
+            end_time=end_time,
+            filters=filters
+        )
+        
+        # Cache result
+        if use_cache:
+            self.cache.put(cache_key, results)
+        
+        return results
+    
+    def compare(
+        self,
+        workflow_ids: List[str],
+        metrics: List[str],
+        time_range: Tuple[datetime, datetime]
+    ) -> Dict[str, Dict]:
+        """
+        Compare metrics across workflows.
+        
+        Args:
+            workflow_ids: List of workflow IDs to compare
+            metrics: List of metrics to compare
+            time_range: (start_time, end_time)
+        
+        Returns:
+            Dictionary mapping workflow_id to metrics
+        """
+        results = {}
+        start_time, end_time = time_range
+        
+        for workflow_id in workflow_ids:
+            workflow_metrics = {}
+            
+            # Query metrics for this workflow
+            data = self.ts_store.query_range(
+                start_time=start_time,
+                end_time=end_time,
+                workflow_id=workflow_id
+            )
+            
+            # Calculate requested metrics
+            executions = data.get('execution', [])
+            if executions:
+                for metric in metrics:
+                    values = [e.get(metric) for e in executions if e.get(metric) is not None]
+                    if values:
+                        import statistics
+                        workflow_metrics[metric] = {
+                            'avg': statistics.mean(values),
+                            'min': min(values),
+                            'max': max(values),
+                            'count': len(values)
+                        }
+            
+            results[workflow_id] = workflow_metrics
+        
+        # Calculate differences
+        if len(workflow_ids) == 2:
+            results['comparison'] = self._calculate_differences(
+                results[workflow_ids[0]],
+                results[workflow_ids[1]]
+            )
+        
+        return results
+    
+    def invalidate_cache(self, pattern: Optional[str] = None) -> int:
+        """
+        Invalidate cache entries.
+        
+        Args:
+            pattern: Optional pattern to match (None = clear all)
+        
+        Returns:
+            Number of entries invalidated
+        """
+        if pattern is None:
+            size = self.cache.size()
+            self.cache.clear()
+            logger.info(f"Cleared entire cache ({size} entries)")
+            return size
+        
+        # Pattern-based invalidation not implemented yet
+        # For now, just clear all
+        return self.invalidate_cache(None)
+    
+    def _apply_filters(self, records: List[Dict], filters: Dict[str, Any]) -> List[Dict]:
+        """Apply filters to records."""
+        filtered = []
+        
+        for record in records:
+            match = True
+            for key, value in filters.items():
+                if record.get(key) != value:
+                    match = False
+                    break
+            if match:
+                filtered.append(record)
+        
+        return filtered
+    
+    def _calculate_differences(
+        self,
+        metrics1: Dict[str, Dict],
+        metrics2: Dict[str, Dict]
+    ) -> Dict[str, Dict]:
+        """Calculate differences between two metric sets."""
+        differences = {}
+        
+        for metric in metrics1.keys():
+            if metric in metrics2:
+                m1 = metrics1[metric]
+                m2 = metrics2[metric]
+                
+                differences[metric] = {
+                    'diff_avg': m2['avg'] - m1['avg'],
+                    'diff_percent': ((m2['avg'] - m1['avg']) / m1['avg'] * 100) if m1['avg'] != 0 else 0,
+                    'workflow1_avg': m1['avg'],
+                    'workflow2_avg': m2['avg']
+                }
+        
+        return differences
+    
+    def _generate_cache_key(self, query: Dict[str, Any]) -> str:
+        """Generate cache key from query."""
+        # Sort keys for consistent hashing
+        query_str = json.dumps(query, sort_keys=True, default=str)
+        return hashlib.md5(query_str.encode()).hexdigest()