v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution
- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
5
core/analytics/query/__init__.py
Normal file
5
core/analytics/query/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Query engine for analytics data."""
|
||||
|
||||
from .query_engine import QueryEngine
|
||||
|
||||
__all__ = ['QueryEngine']
|
||||
312
core/analytics/query/query_engine.py
Normal file
312
core/analytics/query/query_engine.py
Normal file
@@ -0,0 +1,312 @@
|
||||
"""Query engine for analytics data with caching."""
|
||||
|
||||
import logging
|
||||
import hashlib
|
||||
import json
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
from datetime import datetime
|
||||
from collections import OrderedDict
|
||||
|
||||
from ..storage.timeseries_store import TimeSeriesStore
|
||||
from ..storage.archive_storage import ArchiveStorage
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LRUCache:
|
||||
"""Simple LRU cache implementation."""
|
||||
|
||||
def __init__(self, capacity: int = 100):
|
||||
"""Initialize LRU cache."""
|
||||
self.capacity = capacity
|
||||
self.cache: OrderedDict = OrderedDict()
|
||||
|
||||
def get(self, key: str) -> Optional[Any]:
|
||||
"""Get value from cache."""
|
||||
if key not in self.cache:
|
||||
return None
|
||||
# Move to end (most recently used)
|
||||
self.cache.move_to_end(key)
|
||||
return self.cache[key]
|
||||
|
||||
def put(self, key: str, value: Any) -> None:
|
||||
"""Put value in cache."""
|
||||
if key in self.cache:
|
||||
self.cache.move_to_end(key)
|
||||
self.cache[key] = value
|
||||
# Remove oldest if over capacity
|
||||
if len(self.cache) > self.capacity:
|
||||
self.cache.popitem(last=False)
|
||||
|
||||
def clear(self) -> None:
|
||||
"""Clear cache."""
|
||||
self.cache.clear()
|
||||
|
||||
def size(self) -> int:
|
||||
"""Get cache size."""
|
||||
return len(self.cache)
|
||||
|
||||
|
||||
class QueryEngine:
|
||||
"""Query engine for analytics data with caching."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
time_series_store: TimeSeriesStore,
|
||||
archive_storage: Optional[ArchiveStorage] = None,
|
||||
cache_size: int = 100
|
||||
):
|
||||
"""
|
||||
Initialize query engine.
|
||||
|
||||
Args:
|
||||
time_series_store: Time series storage
|
||||
archive_storage: Optional archive storage
|
||||
cache_size: Size of query cache
|
||||
"""
|
||||
self.ts_store = time_series_store
|
||||
self.archive = archive_storage
|
||||
self.cache = LRUCache(cache_size)
|
||||
|
||||
logger.info(f"QueryEngine initialized (cache_size={cache_size})")
|
||||
|
||||
def query(
|
||||
self,
|
||||
query: Dict[str, Any],
|
||||
use_cache: bool = True
|
||||
) -> List[Dict]:
|
||||
"""
|
||||
Execute a query against analytics data.
|
||||
|
||||
Args:
|
||||
query: Query specification with filters, time range, etc.
|
||||
use_cache: Whether to use cache
|
||||
|
||||
Returns:
|
||||
List of matching records
|
||||
"""
|
||||
# Generate cache key
|
||||
cache_key = self._generate_cache_key(query)
|
||||
|
||||
# Check cache
|
||||
if use_cache:
|
||||
cached = self.cache.get(cache_key)
|
||||
if cached is not None:
|
||||
logger.debug(f"Cache hit for query: {cache_key[:8]}")
|
||||
return cached
|
||||
|
||||
# Execute query
|
||||
start_time = query.get('start_time')
|
||||
end_time = query.get('end_time')
|
||||
workflow_id = query.get('workflow_id')
|
||||
metric_types = query.get('metric_types', ['execution', 'step', 'resource'])
|
||||
|
||||
if not start_time or not end_time:
|
||||
raise ValueError("start_time and end_time are required")
|
||||
|
||||
# Convert to datetime if strings
|
||||
if isinstance(start_time, str):
|
||||
start_time = datetime.fromisoformat(start_time)
|
||||
if isinstance(end_time, str):
|
||||
end_time = datetime.fromisoformat(end_time)
|
||||
|
||||
# Query time series store
|
||||
results = self.ts_store.query_range(
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
workflow_id=workflow_id,
|
||||
metric_types=metric_types
|
||||
)
|
||||
|
||||
# Apply additional filters
|
||||
filters = query.get('filters', {})
|
||||
if filters:
|
||||
for metric_type, records in results.items():
|
||||
results[metric_type] = self._apply_filters(records, filters)
|
||||
|
||||
# Flatten if requested
|
||||
if query.get('flatten', False):
|
||||
flattened = []
|
||||
for records in results.values():
|
||||
flattened.extend(records)
|
||||
results = flattened
|
||||
|
||||
# Cache result
|
||||
if use_cache:
|
||||
self.cache.put(cache_key, results)
|
||||
|
||||
return results
|
||||
|
||||
def aggregate(
|
||||
self,
|
||||
metric: str,
|
||||
aggregation: str,
|
||||
group_by: List[str],
|
||||
filters: Dict[str, Any],
|
||||
time_range: Tuple[datetime, datetime],
|
||||
use_cache: bool = True
|
||||
) -> List[Dict]:
|
||||
"""
|
||||
Aggregate metrics with grouping.
|
||||
|
||||
Args:
|
||||
metric: Metric field to aggregate
|
||||
aggregation: Aggregation function (avg, sum, count, min, max)
|
||||
group_by: Fields to group by
|
||||
filters: Filter criteria
|
||||
time_range: (start_time, end_time)
|
||||
use_cache: Whether to use cache
|
||||
|
||||
Returns:
|
||||
List of aggregated results
|
||||
"""
|
||||
# Generate cache key
|
||||
cache_key = self._generate_cache_key({
|
||||
'type': 'aggregate',
|
||||
'metric': metric,
|
||||
'aggregation': aggregation,
|
||||
'group_by': group_by,
|
||||
'filters': filters,
|
||||
'time_range': [t.isoformat() for t in time_range]
|
||||
})
|
||||
|
||||
# Check cache
|
||||
if use_cache:
|
||||
cached = self.cache.get(cache_key)
|
||||
if cached is not None:
|
||||
return cached
|
||||
|
||||
# Execute aggregation
|
||||
start_time, end_time = time_range
|
||||
results = self.ts_store.aggregate(
|
||||
metric=metric,
|
||||
aggregation=aggregation,
|
||||
group_by=group_by,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
filters=filters
|
||||
)
|
||||
|
||||
# Cache result
|
||||
if use_cache:
|
||||
self.cache.put(cache_key, results)
|
||||
|
||||
return results
|
||||
|
||||
def compare(
|
||||
self,
|
||||
workflow_ids: List[str],
|
||||
metrics: List[str],
|
||||
time_range: Tuple[datetime, datetime]
|
||||
) -> Dict[str, Dict]:
|
||||
"""
|
||||
Compare metrics across workflows.
|
||||
|
||||
Args:
|
||||
workflow_ids: List of workflow IDs to compare
|
||||
metrics: List of metrics to compare
|
||||
time_range: (start_time, end_time)
|
||||
|
||||
Returns:
|
||||
Dictionary mapping workflow_id to metrics
|
||||
"""
|
||||
results = {}
|
||||
start_time, end_time = time_range
|
||||
|
||||
for workflow_id in workflow_ids:
|
||||
workflow_metrics = {}
|
||||
|
||||
# Query metrics for this workflow
|
||||
data = self.ts_store.query_range(
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
workflow_id=workflow_id
|
||||
)
|
||||
|
||||
# Calculate requested metrics
|
||||
executions = data.get('execution', [])
|
||||
if executions:
|
||||
for metric in metrics:
|
||||
values = [e.get(metric) for e in executions if e.get(metric) is not None]
|
||||
if values:
|
||||
import statistics
|
||||
workflow_metrics[metric] = {
|
||||
'avg': statistics.mean(values),
|
||||
'min': min(values),
|
||||
'max': max(values),
|
||||
'count': len(values)
|
||||
}
|
||||
|
||||
results[workflow_id] = workflow_metrics
|
||||
|
||||
# Calculate differences
|
||||
if len(workflow_ids) == 2:
|
||||
results['comparison'] = self._calculate_differences(
|
||||
results[workflow_ids[0]],
|
||||
results[workflow_ids[1]]
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
def invalidate_cache(self, pattern: Optional[str] = None) -> int:
|
||||
"""
|
||||
Invalidate cache entries.
|
||||
|
||||
Args:
|
||||
pattern: Optional pattern to match (None = clear all)
|
||||
|
||||
Returns:
|
||||
Number of entries invalidated
|
||||
"""
|
||||
if pattern is None:
|
||||
size = self.cache.size()
|
||||
self.cache.clear()
|
||||
logger.info(f"Cleared entire cache ({size} entries)")
|
||||
return size
|
||||
|
||||
# Pattern-based invalidation not implemented yet
|
||||
# For now, just clear all
|
||||
return self.invalidate_cache(None)
|
||||
|
||||
def _apply_filters(self, records: List[Dict], filters: Dict[str, Any]) -> List[Dict]:
|
||||
"""Apply filters to records."""
|
||||
filtered = []
|
||||
|
||||
for record in records:
|
||||
match = True
|
||||
for key, value in filters.items():
|
||||
if record.get(key) != value:
|
||||
match = False
|
||||
break
|
||||
if match:
|
||||
filtered.append(record)
|
||||
|
||||
return filtered
|
||||
|
||||
def _calculate_differences(
|
||||
self,
|
||||
metrics1: Dict[str, Dict],
|
||||
metrics2: Dict[str, Dict]
|
||||
) -> Dict[str, Dict]:
|
||||
"""Calculate differences between two metric sets."""
|
||||
differences = {}
|
||||
|
||||
for metric in metrics1.keys():
|
||||
if metric in metrics2:
|
||||
m1 = metrics1[metric]
|
||||
m2 = metrics2[metric]
|
||||
|
||||
differences[metric] = {
|
||||
'diff_avg': m2['avg'] - m1['avg'],
|
||||
'diff_percent': ((m2['avg'] - m1['avg']) / m1['avg'] * 100) if m1['avg'] != 0 else 0,
|
||||
'workflow1_avg': m1['avg'],
|
||||
'workflow2_avg': m2['avg']
|
||||
}
|
||||
|
||||
return differences
|
||||
|
||||
def _generate_cache_key(self, query: Dict[str, Any]) -> str:
|
||||
"""Generate cache key from query."""
|
||||
# Sort keys for consistent hashing
|
||||
query_str = json.dumps(query, sort_keys=True, default=str)
|
||||
return hashlib.md5(query_str.encode()).hexdigest()
|
||||
Reference in New Issue
Block a user