v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution
- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
52
core/analytics/__init__.py
Normal file
52
core/analytics/__init__.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""
|
||||
RPA Analytics & Insights Module
|
||||
|
||||
This module provides comprehensive analytics and insights for RPA workflows,
|
||||
including performance analysis, anomaly detection, and automated recommendations.
|
||||
"""
|
||||
|
||||
from .collection.metrics_collector import MetricsCollector, ExecutionMetrics, StepMetrics
|
||||
from .collection.resource_collector import ResourceCollector, ResourceMetrics
|
||||
from .storage.timeseries_store import TimeSeriesStore
|
||||
from .storage.archive_storage import ArchiveStorage, RetentionPolicyEngine, RetentionPolicy
|
||||
from .engine.performance_analyzer import PerformanceAnalyzer, PerformanceStats
|
||||
from .engine.anomaly_detector import AnomalyDetector, Anomaly
|
||||
from .engine.insight_generator import InsightGenerator, Insight
|
||||
from .engine.success_rate_calculator import SuccessRateCalculator, SuccessRateStats, ReliabilityRanking
|
||||
from .query.query_engine import QueryEngine
|
||||
from .realtime.realtime_analytics import RealtimeAnalytics, LiveExecution
|
||||
from .reporting.report_generator import ReportGenerator, ReportConfig, ScheduledReport
|
||||
from .dashboard.dashboard_manager import DashboardManager, Dashboard, DashboardWidget, DashboardTemplate
|
||||
from .api.analytics_api import AnalyticsAPI
|
||||
|
||||
__all__ = [
|
||||
'MetricsCollector',
|
||||
'ExecutionMetrics',
|
||||
'StepMetrics',
|
||||
'ResourceCollector',
|
||||
'ResourceMetrics',
|
||||
'TimeSeriesStore',
|
||||
'ArchiveStorage',
|
||||
'RetentionPolicyEngine',
|
||||
'RetentionPolicy',
|
||||
'PerformanceAnalyzer',
|
||||
'PerformanceStats',
|
||||
'AnomalyDetector',
|
||||
'Anomaly',
|
||||
'InsightGenerator',
|
||||
'Insight',
|
||||
'SuccessRateCalculator',
|
||||
'SuccessRateStats',
|
||||
'ReliabilityRanking',
|
||||
'QueryEngine',
|
||||
'RealtimeAnalytics',
|
||||
'LiveExecution',
|
||||
'ReportGenerator',
|
||||
'ReportConfig',
|
||||
'ScheduledReport',
|
||||
'DashboardManager',
|
||||
'Dashboard',
|
||||
'DashboardWidget',
|
||||
'DashboardTemplate',
|
||||
'AnalyticsAPI',
|
||||
]
|
||||
197
core/analytics/analytics_system.py
Normal file
197
core/analytics/analytics_system.py
Normal file
@@ -0,0 +1,197 @@
|
||||
"""Integrated analytics system."""
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
from pathlib import Path
|
||||
|
||||
from .collection.metrics_collector import MetricsCollector
|
||||
from .collection.resource_collector import ResourceCollector
|
||||
from .storage.timeseries_store import TimeSeriesStore
|
||||
from .storage.archive_storage import ArchiveStorage, RetentionPolicyEngine
|
||||
from .engine.performance_analyzer import PerformanceAnalyzer
|
||||
from .engine.anomaly_detector import AnomalyDetector
|
||||
from .engine.insight_generator import InsightGenerator
|
||||
from .engine.success_rate_calculator import SuccessRateCalculator
|
||||
from .query.query_engine import QueryEngine
|
||||
from .realtime.realtime_analytics import RealtimeAnalytics
|
||||
from .reporting.report_generator import ReportGenerator
|
||||
from .dashboard.dashboard_manager import DashboardManager
|
||||
from .api.analytics_api import AnalyticsAPI
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AnalyticsSystem:
|
||||
"""Integrated analytics system."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
db_path: str = "data/analytics/metrics.db",
|
||||
archive_dir: str = "data/analytics/archive",
|
||||
reports_dir: str = "data/analytics/reports",
|
||||
dashboards_dir: str = "data/analytics/dashboards"
|
||||
):
|
||||
"""
|
||||
Initialize analytics system.
|
||||
|
||||
Args:
|
||||
db_path: Path to metrics database
|
||||
archive_dir: Directory for archived data
|
||||
reports_dir: Directory for reports
|
||||
dashboards_dir: Directory for dashboards
|
||||
"""
|
||||
logger.info("Initializing AnalyticsSystem...")
|
||||
|
||||
# Storage layer
|
||||
self.store = TimeSeriesStore(db_path)
|
||||
self.archive = ArchiveStorage(archive_dir)
|
||||
self.retention_engine = RetentionPolicyEngine(self.archive)
|
||||
|
||||
# Collection layer
|
||||
self.metrics_collector = MetricsCollector(self.store)
|
||||
self.resource_collector = ResourceCollector(self.store)
|
||||
|
||||
# Analysis layer
|
||||
self.performance_analyzer = PerformanceAnalyzer(self.store)
|
||||
self.anomaly_detector = AnomalyDetector(self.store)
|
||||
self.insight_generator = InsightGenerator(
|
||||
self.performance_analyzer,
|
||||
self.anomaly_detector
|
||||
)
|
||||
self.success_rate_calculator = SuccessRateCalculator(self.store)
|
||||
|
||||
# Query layer
|
||||
self.query_engine = QueryEngine(self.store)
|
||||
self.realtime_analytics = RealtimeAnalytics(self.metrics_collector)
|
||||
|
||||
# Reporting layer
|
||||
self.report_generator = ReportGenerator(
|
||||
self.query_engine,
|
||||
self.performance_analyzer,
|
||||
self.insight_generator,
|
||||
reports_dir
|
||||
)
|
||||
|
||||
# Dashboard layer
|
||||
self.dashboard_manager = DashboardManager(dashboards_dir)
|
||||
|
||||
# API layer
|
||||
self.api = AnalyticsAPI(
|
||||
self.query_engine,
|
||||
self.performance_analyzer,
|
||||
self.anomaly_detector,
|
||||
self.insight_generator,
|
||||
self.success_rate_calculator,
|
||||
self.report_generator,
|
||||
self.dashboard_manager
|
||||
)
|
||||
|
||||
logger.info("AnalyticsSystem initialized successfully")
|
||||
|
||||
def start_resource_monitoring(
|
||||
self,
|
||||
interval_seconds: int = 60
|
||||
) -> None:
|
||||
"""
|
||||
Start resource monitoring.
|
||||
|
||||
Args:
|
||||
interval_seconds: Monitoring interval in seconds
|
||||
"""
|
||||
self.resource_collector.start_monitoring(interval_seconds)
|
||||
logger.info(f"Resource monitoring started (interval: {interval_seconds}s)")
|
||||
|
||||
def stop_resource_monitoring(self) -> None:
|
||||
"""Stop resource monitoring."""
|
||||
self.resource_collector.stop_monitoring()
|
||||
logger.info("Resource monitoring stopped")
|
||||
|
||||
def apply_retention_policies(self, dry_run: bool = False) -> dict:
|
||||
"""
|
||||
Apply retention policies.
|
||||
|
||||
Args:
|
||||
dry_run: If True, don't actually delete data
|
||||
|
||||
Returns:
|
||||
Dictionary with application results
|
||||
"""
|
||||
results = self.retention_engine.apply_policies(self.store, dry_run)
|
||||
logger.info(f"Retention policies applied (dry_run={dry_run})")
|
||||
return results
|
||||
|
||||
def get_system_stats(self) -> dict:
|
||||
"""
|
||||
Get system statistics.
|
||||
|
||||
Returns:
|
||||
Dictionary with system stats
|
||||
"""
|
||||
return {
|
||||
'storage': {
|
||||
'metrics_count': self.store.get_metrics_count(),
|
||||
'database_size': Path(self.store.db_path).stat().st_size if Path(self.store.db_path).exists() else 0
|
||||
},
|
||||
'archive': self.archive.get_archive_stats(),
|
||||
'collectors': {
|
||||
'metrics_buffer_size': len(self.metrics_collector.buffer),
|
||||
'resource_monitoring_active': self.resource_collector.monitoring_active
|
||||
},
|
||||
'dashboards': {
|
||||
'total': len(self.dashboard_manager.dashboards)
|
||||
},
|
||||
'reports': {
|
||||
'scheduled': len(self.report_generator.scheduled_reports)
|
||||
}
|
||||
}
|
||||
|
||||
def shutdown(self) -> None:
|
||||
"""Shutdown analytics system."""
|
||||
logger.info("Shutting down AnalyticsSystem...")
|
||||
|
||||
# Stop monitoring
|
||||
if self.resource_collector.monitoring_active:
|
||||
self.stop_resource_monitoring()
|
||||
|
||||
# Flush any pending metrics
|
||||
self.metrics_collector.flush()
|
||||
|
||||
# Close database connection
|
||||
self.store.close()
|
||||
|
||||
logger.info("AnalyticsSystem shutdown complete")
|
||||
|
||||
|
||||
# Global instance
|
||||
_analytics_system: Optional[AnalyticsSystem] = None
|
||||
|
||||
|
||||
def get_analytics_system(
|
||||
db_path: str = "data/analytics/metrics.db",
|
||||
archive_dir: str = "data/analytics/archive",
|
||||
reports_dir: str = "data/analytics/reports",
|
||||
dashboards_dir: str = "data/analytics/dashboards"
|
||||
) -> AnalyticsSystem:
|
||||
"""
|
||||
Get or create global analytics system instance.
|
||||
|
||||
Args:
|
||||
db_path: Path to metrics database
|
||||
archive_dir: Directory for archived data
|
||||
reports_dir: Directory for reports
|
||||
dashboards_dir: Directory for dashboards
|
||||
|
||||
Returns:
|
||||
AnalyticsSystem instance
|
||||
"""
|
||||
global _analytics_system
|
||||
|
||||
if _analytics_system is None:
|
||||
_analytics_system = AnalyticsSystem(
|
||||
db_path=db_path,
|
||||
archive_dir=archive_dir,
|
||||
reports_dir=reports_dir,
|
||||
dashboards_dir=dashboards_dir
|
||||
)
|
||||
|
||||
return _analytics_system
|
||||
5
core/analytics/api/__init__.py
Normal file
5
core/analytics/api/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Analytics API module."""
|
||||
|
||||
from .analytics_api import AnalyticsAPI
|
||||
|
||||
__all__ = ['AnalyticsAPI']
|
||||
387
core/analytics/api/analytics_api.py
Normal file
387
core/analytics/api/analytics_api.py
Normal file
@@ -0,0 +1,387 @@
|
||||
"""REST API for analytics."""
|
||||
|
||||
import logging
|
||||
from typing import Dict, List, Optional, Any
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
try:
|
||||
from flask import Blueprint, request, jsonify, send_file
|
||||
FLASK_AVAILABLE = True
|
||||
except ImportError:
|
||||
FLASK_AVAILABLE = False
|
||||
Blueprint = None
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AnalyticsAPI:
|
||||
"""REST API for analytics."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
query_engine,
|
||||
performance_analyzer,
|
||||
anomaly_detector,
|
||||
insight_generator,
|
||||
success_rate_calculator,
|
||||
report_generator,
|
||||
dashboard_manager
|
||||
):
|
||||
"""
|
||||
Initialize analytics API.
|
||||
|
||||
Args:
|
||||
query_engine: Query engine instance
|
||||
performance_analyzer: Performance analyzer instance
|
||||
anomaly_detector: Anomaly detector instance
|
||||
insight_generator: Insight generator instance
|
||||
success_rate_calculator: Success rate calculator instance
|
||||
report_generator: Report generator instance
|
||||
dashboard_manager: Dashboard manager instance
|
||||
"""
|
||||
if not FLASK_AVAILABLE:
|
||||
logger.warning("Flask not available - API endpoints will not be registered")
|
||||
self.blueprint = None
|
||||
return
|
||||
|
||||
self.query_engine = query_engine
|
||||
self.performance_analyzer = performance_analyzer
|
||||
self.anomaly_detector = anomaly_detector
|
||||
self.insight_generator = insight_generator
|
||||
self.success_rate_calculator = success_rate_calculator
|
||||
self.report_generator = report_generator
|
||||
self.dashboard_manager = dashboard_manager
|
||||
|
||||
self.blueprint = Blueprint('analytics', __name__, url_prefix='/api/analytics')
|
||||
self._register_routes()
|
||||
|
||||
logger.info("AnalyticsAPI initialized")
|
||||
|
||||
def _register_routes(self) -> None:
|
||||
"""Register API routes."""
|
||||
if not FLASK_AVAILABLE or not self.blueprint:
|
||||
return
|
||||
|
||||
@self.blueprint.route('/metrics', methods=['GET'])
|
||||
def get_metrics():
|
||||
"""Get metrics with filters."""
|
||||
try:
|
||||
metric_type = request.args.get('type', 'execution')
|
||||
workflow_id = request.args.get('workflow_id')
|
||||
hours = int(request.args.get('hours', 24))
|
||||
|
||||
end_time = datetime.now()
|
||||
start_time = end_time - timedelta(hours=hours)
|
||||
|
||||
filters = {}
|
||||
if workflow_id:
|
||||
filters['workflow_id'] = workflow_id
|
||||
|
||||
metrics = self.query_engine.query(
|
||||
metric_type=metric_type,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
filters=filters
|
||||
)
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'count': len(metrics),
|
||||
'metrics': metrics
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting metrics: {e}")
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
@self.blueprint.route('/performance', methods=['GET'])
|
||||
def get_performance():
|
||||
"""Get performance analysis."""
|
||||
try:
|
||||
workflow_id = request.args.get('workflow_id')
|
||||
if not workflow_id:
|
||||
return jsonify({'success': False, 'error': 'workflow_id required'}), 400
|
||||
|
||||
hours = int(request.args.get('hours', 24))
|
||||
end_time = datetime.now()
|
||||
start_time = end_time - timedelta(hours=hours)
|
||||
|
||||
stats = self.performance_analyzer.analyze_performance(
|
||||
workflow_id=workflow_id,
|
||||
start_time=start_time,
|
||||
end_time=end_time
|
||||
)
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'performance': stats.to_dict()
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting performance: {e}")
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
@self.blueprint.route('/performance/bottlenecks', methods=['GET'])
|
||||
def get_bottlenecks():
|
||||
"""Get performance bottlenecks."""
|
||||
try:
|
||||
workflow_id = request.args.get('workflow_id')
|
||||
if not workflow_id:
|
||||
return jsonify({'success': False, 'error': 'workflow_id required'}), 400
|
||||
|
||||
hours = int(request.args.get('hours', 24))
|
||||
end_time = datetime.now()
|
||||
start_time = end_time - timedelta(hours=hours)
|
||||
|
||||
bottlenecks = self.performance_analyzer.identify_bottlenecks(
|
||||
workflow_id=workflow_id,
|
||||
start_time=start_time,
|
||||
end_time=end_time
|
||||
)
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'bottlenecks': [b.to_dict() for b in bottlenecks]
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting bottlenecks: {e}")
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
@self.blueprint.route('/anomalies', methods=['GET'])
|
||||
def get_anomalies():
|
||||
"""Get detected anomalies."""
|
||||
try:
|
||||
workflow_id = request.args.get('workflow_id')
|
||||
hours = int(request.args.get('hours', 24))
|
||||
|
||||
end_time = datetime.now()
|
||||
start_time = end_time - timedelta(hours=hours)
|
||||
|
||||
anomalies = self.anomaly_detector.detect_anomalies(
|
||||
workflow_id=workflow_id,
|
||||
start_time=start_time,
|
||||
end_time=end_time
|
||||
)
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'count': len(anomalies),
|
||||
'anomalies': [a.to_dict() for a in anomalies]
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting anomalies: {e}")
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
@self.blueprint.route('/insights', methods=['GET'])
|
||||
def get_insights():
|
||||
"""Get generated insights."""
|
||||
try:
|
||||
hours = int(request.args.get('hours', 168)) # 1 week default
|
||||
|
||||
end_time = datetime.now()
|
||||
start_time = end_time - timedelta(hours=hours)
|
||||
|
||||
insights = self.insight_generator.generate_insights(
|
||||
start_time=start_time,
|
||||
end_time=end_time
|
||||
)
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'count': len(insights),
|
||||
'insights': [i.to_dict() for i in insights]
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting insights: {e}")
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
@self.blueprint.route('/success-rate', methods=['GET'])
|
||||
def get_success_rate():
|
||||
"""Get success rate statistics."""
|
||||
try:
|
||||
workflow_id = request.args.get('workflow_id')
|
||||
if not workflow_id:
|
||||
return jsonify({'success': False, 'error': 'workflow_id required'}), 400
|
||||
|
||||
hours = int(request.args.get('hours', 24))
|
||||
|
||||
stats = self.success_rate_calculator.calculate_success_rate(
|
||||
workflow_id=workflow_id,
|
||||
time_window_hours=hours
|
||||
)
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'stats': stats.to_dict()
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting success rate: {e}")
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
@self.blueprint.route('/reliability-ranking', methods=['GET'])
|
||||
def get_reliability_ranking():
|
||||
"""Get workflow reliability rankings."""
|
||||
try:
|
||||
hours = int(request.args.get('hours', 168)) # 1 week default
|
||||
|
||||
rankings = self.success_rate_calculator.rank_workflows_by_reliability(
|
||||
time_window_hours=hours
|
||||
)
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'rankings': [r.to_dict() for r in rankings]
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting reliability ranking: {e}")
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
@self.blueprint.route('/reports', methods=['POST'])
|
||||
def generate_report():
|
||||
"""Generate a report."""
|
||||
try:
|
||||
data = request.json
|
||||
|
||||
from ..reporting.report_generator import ReportConfig
|
||||
config = ReportConfig(
|
||||
title=data.get('title', 'Analytics Report'),
|
||||
metric_types=data.get('metric_types', ['execution']),
|
||||
start_time=datetime.fromisoformat(data['start_time']),
|
||||
end_time=datetime.fromisoformat(data['end_time']),
|
||||
workflow_ids=data.get('workflow_ids'),
|
||||
include_charts=data.get('include_charts', True),
|
||||
include_insights=data.get('include_insights', True),
|
||||
format=data.get('format', 'json')
|
||||
)
|
||||
|
||||
report_data = self.report_generator.generate_report(config)
|
||||
|
||||
# Export based on format
|
||||
if config.format == 'json':
|
||||
filepath = self.report_generator.export_json(report_data)
|
||||
elif config.format == 'csv':
|
||||
filepath = self.report_generator.export_csv(report_data)
|
||||
elif config.format == 'html':
|
||||
filepath = self.report_generator.export_html(report_data)
|
||||
elif config.format == 'pdf':
|
||||
filepath = self.report_generator.export_pdf(report_data)
|
||||
else:
|
||||
filepath = self.report_generator.export_json(report_data)
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'filepath': filepath
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating report: {e}")
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
@self.blueprint.route('/reports/<path:filename>', methods=['GET'])
|
||||
def download_report(filename):
|
||||
"""Download a generated report."""
|
||||
try:
|
||||
filepath = self.report_generator.output_dir / filename
|
||||
if not filepath.exists():
|
||||
return jsonify({'success': False, 'error': 'Report not found'}), 404
|
||||
|
||||
return send_file(str(filepath), as_attachment=True)
|
||||
except Exception as e:
|
||||
logger.error(f"Error downloading report: {e}")
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
@self.blueprint.route('/dashboards', methods=['GET'])
|
||||
def list_dashboards():
|
||||
"""List dashboards."""
|
||||
try:
|
||||
owner = request.args.get('owner')
|
||||
dashboards = self.dashboard_manager.list_dashboards(owner=owner)
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'dashboards': [d.to_dict() for d in dashboards]
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error listing dashboards: {e}")
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
@self.blueprint.route('/dashboards', methods=['POST'])
|
||||
def create_dashboard():
|
||||
"""Create a dashboard."""
|
||||
try:
|
||||
data = request.json
|
||||
dashboard = self.dashboard_manager.create_dashboard(
|
||||
name=data['name'],
|
||||
description=data.get('description', ''),
|
||||
owner=data['owner'],
|
||||
template_id=data.get('template_id')
|
||||
)
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'dashboard': dashboard.to_dict()
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating dashboard: {e}")
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
@self.blueprint.route('/dashboards/<dashboard_id>', methods=['GET'])
|
||||
def get_dashboard(dashboard_id):
|
||||
"""Get dashboard by ID."""
|
||||
try:
|
||||
dashboard = self.dashboard_manager.get_dashboard(dashboard_id)
|
||||
if not dashboard:
|
||||
return jsonify({'success': False, 'error': 'Dashboard not found'}), 404
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'dashboard': dashboard.to_dict()
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting dashboard: {e}")
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
@self.blueprint.route('/dashboards/<dashboard_id>', methods=['PUT'])
|
||||
def update_dashboard(dashboard_id):
|
||||
"""Update dashboard."""
|
||||
try:
|
||||
data = request.json
|
||||
dashboard = self.dashboard_manager.update_dashboard(dashboard_id, data)
|
||||
if not dashboard:
|
||||
return jsonify({'success': False, 'error': 'Dashboard not found'}), 404
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'dashboard': dashboard.to_dict()
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating dashboard: {e}")
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
@self.blueprint.route('/dashboards/<dashboard_id>', methods=['DELETE'])
|
||||
def delete_dashboard(dashboard_id):
|
||||
"""Delete dashboard."""
|
||||
try:
|
||||
success = self.dashboard_manager.delete_dashboard(dashboard_id)
|
||||
if not success:
|
||||
return jsonify({'success': False, 'error': 'Dashboard not found'}), 404
|
||||
|
||||
return jsonify({'success': True})
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting dashboard: {e}")
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
@self.blueprint.route('/dashboard-templates', methods=['GET'])
|
||||
def get_dashboard_templates():
|
||||
"""Get dashboard templates."""
|
||||
try:
|
||||
templates = self.dashboard_manager.get_templates()
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'templates': [t.to_dict() for t in templates]
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting templates: {e}")
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
def get_blueprint(self) -> Blueprint:
|
||||
"""Get Flask blueprint."""
|
||||
return self.blueprint
|
||||
12
core/analytics/collection/__init__.py
Normal file
12
core/analytics/collection/__init__.py
Normal file
@@ -0,0 +1,12 @@
|
||||
"""Data collection components for analytics."""
|
||||
|
||||
from .metrics_collector import MetricsCollector, ExecutionMetrics, StepMetrics
|
||||
from .resource_collector import ResourceCollector, ResourceMetrics
|
||||
|
||||
__all__ = [
|
||||
'MetricsCollector',
|
||||
'ExecutionMetrics',
|
||||
'StepMetrics',
|
||||
'ResourceCollector',
|
||||
'ResourceMetrics',
|
||||
]
|
||||
348
core/analytics/collection/metrics_collector.py
Normal file
348
core/analytics/collection/metrics_collector.py
Normal file
@@ -0,0 +1,348 @@
|
||||
"""Metrics collection for workflow executions."""
|
||||
|
||||
import threading
|
||||
import time
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Dict, Any, Optional, Union
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExecutionMetrics:
|
||||
"""Metrics for a workflow execution."""
|
||||
execution_id: str
|
||||
workflow_id: str
|
||||
started_at: datetime
|
||||
completed_at: Optional[datetime] = None
|
||||
duration_ms: Optional[float] = None
|
||||
status: str = 'running' # 'running', 'completed', 'failed'
|
||||
steps_total: int = 0
|
||||
steps_completed: int = 0
|
||||
steps_failed: int = 0
|
||||
error_message: Optional[str] = None
|
||||
context: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary for storage."""
|
||||
return {
|
||||
'execution_id': self.execution_id,
|
||||
'workflow_id': self.workflow_id,
|
||||
'started_at': self.started_at.isoformat(),
|
||||
'completed_at': self.completed_at.isoformat() if self.completed_at else None,
|
||||
'duration_ms': self.duration_ms,
|
||||
'status': self.status,
|
||||
'steps_total': self.steps_total,
|
||||
'steps_completed': self.steps_completed,
|
||||
'steps_failed': self.steps_failed,
|
||||
'error_message': self.error_message,
|
||||
'context': self.context
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'ExecutionMetrics':
|
||||
"""Create from dictionary."""
|
||||
return cls(
|
||||
execution_id=data['execution_id'],
|
||||
workflow_id=data['workflow_id'],
|
||||
started_at=datetime.fromisoformat(data['started_at']),
|
||||
completed_at=datetime.fromisoformat(data['completed_at']) if data.get('completed_at') else None,
|
||||
duration_ms=data.get('duration_ms'),
|
||||
status=data.get('status', 'running'),
|
||||
steps_total=data.get('steps_total', 0),
|
||||
steps_completed=data.get('steps_completed', 0),
|
||||
steps_failed=data.get('steps_failed', 0),
|
||||
error_message=data.get('error_message'),
|
||||
context=data.get('context', {})
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class StepMetrics:
|
||||
"""Metrics for a workflow step."""
|
||||
step_id: str
|
||||
execution_id: str
|
||||
workflow_id: str
|
||||
node_id: str
|
||||
action_type: str
|
||||
target_element: str
|
||||
started_at: datetime
|
||||
completed_at: datetime
|
||||
duration_ms: float
|
||||
status: str
|
||||
confidence_score: float
|
||||
retry_count: int = 0
|
||||
error_details: Optional[str] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary for storage."""
|
||||
return {
|
||||
'step_id': self.step_id,
|
||||
'execution_id': self.execution_id,
|
||||
'workflow_id': self.workflow_id,
|
||||
'node_id': self.node_id,
|
||||
'action_type': self.action_type,
|
||||
'target_element': self.target_element,
|
||||
'started_at': self.started_at.isoformat(),
|
||||
'completed_at': self.completed_at.isoformat(),
|
||||
'duration_ms': self.duration_ms,
|
||||
'status': self.status,
|
||||
'confidence_score': self.confidence_score,
|
||||
'retry_count': self.retry_count,
|
||||
'error_details': self.error_details
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'StepMetrics':
|
||||
"""Create from dictionary."""
|
||||
return cls(
|
||||
step_id=data['step_id'],
|
||||
execution_id=data['execution_id'],
|
||||
workflow_id=data['workflow_id'],
|
||||
node_id=data['node_id'],
|
||||
action_type=data['action_type'],
|
||||
target_element=data['target_element'],
|
||||
started_at=datetime.fromisoformat(data['started_at']),
|
||||
completed_at=datetime.fromisoformat(data['completed_at']),
|
||||
duration_ms=data['duration_ms'],
|
||||
status=data['status'],
|
||||
confidence_score=data['confidence_score'],
|
||||
retry_count=data.get('retry_count', 0),
|
||||
error_details=data.get('error_details')
|
||||
)
|
||||
|
||||
|
||||
class MetricsCollector:
|
||||
"""Collects metrics from workflow executions."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
storage_callback: Optional[callable] = None,
|
||||
buffer_size: int = 1000,
|
||||
flush_interval_sec: float = 5.0
|
||||
):
|
||||
"""
|
||||
Initialize metrics collector.
|
||||
|
||||
Args:
|
||||
storage_callback: Callback to persist metrics (receives list of metrics)
|
||||
buffer_size: Maximum buffer size before forcing flush
|
||||
flush_interval_sec: Interval between automatic flushes
|
||||
"""
|
||||
self.storage_callback = storage_callback
|
||||
self.buffer_size = buffer_size
|
||||
self.flush_interval = flush_interval_sec
|
||||
|
||||
self._buffer: List[Union[ExecutionMetrics, StepMetrics]] = []
|
||||
self._lock = threading.Lock()
|
||||
self._flush_thread: Optional[threading.Thread] = None
|
||||
self._running = False
|
||||
|
||||
# Track active executions
|
||||
self._active_executions: Dict[str, ExecutionMetrics] = {}
|
||||
|
||||
logger.info(f"MetricsCollector initialized (buffer_size={buffer_size}, flush_interval={flush_interval_sec}s)")
|
||||
|
||||
def start(self) -> None:
|
||||
"""Start automatic flushing."""
|
||||
if self._running:
|
||||
return
|
||||
|
||||
self._running = True
|
||||
self._flush_thread = threading.Thread(target=self._auto_flush, daemon=True)
|
||||
self._flush_thread.start()
|
||||
logger.info("MetricsCollector started")
|
||||
|
||||
def stop(self) -> None:
|
||||
"""Stop automatic flushing and flush remaining metrics."""
|
||||
self._running = False
|
||||
if self._flush_thread:
|
||||
self._flush_thread.join(timeout=5.0)
|
||||
self.flush()
|
||||
logger.info("MetricsCollector stopped")
|
||||
|
||||
def record_execution_start(
|
||||
self,
|
||||
execution_id: str,
|
||||
workflow_id: str,
|
||||
context: Optional[Dict[str, Any]] = None
|
||||
) -> None:
|
||||
"""
|
||||
Record the start of a workflow execution.
|
||||
|
||||
Args:
|
||||
execution_id: Unique execution identifier
|
||||
workflow_id: Workflow identifier
|
||||
context: Additional context information
|
||||
"""
|
||||
metrics = ExecutionMetrics(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
started_at=datetime.now(),
|
||||
status='running',
|
||||
context=context or {}
|
||||
)
|
||||
|
||||
with self._lock:
|
||||
self._active_executions[execution_id] = metrics
|
||||
|
||||
logger.debug(f"Recorded execution start: {execution_id}")
|
||||
|
||||
def record_execution_complete(
|
||||
self,
|
||||
execution_id: str,
|
||||
status: str,
|
||||
steps_total: int = 0,
|
||||
steps_completed: int = 0,
|
||||
steps_failed: int = 0,
|
||||
error_message: Optional[str] = None
|
||||
) -> None:
|
||||
"""
|
||||
Record the completion of a workflow execution.
|
||||
|
||||
Args:
|
||||
execution_id: Execution identifier
|
||||
status: Final status ('completed' or 'failed')
|
||||
steps_total: Total number of steps
|
||||
steps_completed: Number of completed steps
|
||||
steps_failed: Number of failed steps
|
||||
error_message: Error message if failed
|
||||
"""
|
||||
with self._lock:
|
||||
if execution_id not in self._active_executions:
|
||||
logger.warning(f"Execution not found: {execution_id}")
|
||||
return
|
||||
|
||||
metrics = self._active_executions[execution_id]
|
||||
metrics.completed_at = datetime.now()
|
||||
metrics.duration_ms = (metrics.completed_at - metrics.started_at).total_seconds() * 1000
|
||||
metrics.status = status
|
||||
metrics.steps_total = steps_total
|
||||
metrics.steps_completed = steps_completed
|
||||
metrics.steps_failed = steps_failed
|
||||
metrics.error_message = error_message
|
||||
|
||||
# Move to buffer
|
||||
self._buffer.append(metrics)
|
||||
del self._active_executions[execution_id]
|
||||
|
||||
# Check if buffer is full
|
||||
if len(self._buffer) >= self.buffer_size:
|
||||
self._flush_unlocked()
|
||||
|
||||
logger.debug(f"Recorded execution complete: {execution_id} ({status})")
|
||||
|
||||
def record_step(self, step_metrics: StepMetrics) -> None:
|
||||
"""
|
||||
Record metrics for a completed step.
|
||||
|
||||
Args:
|
||||
step_metrics: Step metrics to record
|
||||
"""
|
||||
with self._lock:
|
||||
self._buffer.append(step_metrics)
|
||||
|
||||
# Check if buffer is full
|
||||
if len(self._buffer) >= self.buffer_size:
|
||||
self._flush_unlocked()
|
||||
|
||||
logger.debug(f"Recorded step: {step_metrics.step_id}")
|
||||
|
||||
def flush(self) -> int:
|
||||
"""
|
||||
Flush buffered metrics to storage.
|
||||
|
||||
Returns:
|
||||
Number of metrics flushed
|
||||
"""
|
||||
with self._lock:
|
||||
return self._flush_unlocked()
|
||||
|
||||
def _flush_unlocked(self) -> int:
|
||||
"""Flush without acquiring lock (must be called with lock held)."""
|
||||
if not self._buffer:
|
||||
return 0
|
||||
|
||||
if not self.storage_callback:
|
||||
logger.warning("No storage callback configured, discarding metrics")
|
||||
count = len(self._buffer)
|
||||
self._buffer.clear()
|
||||
return count
|
||||
|
||||
try:
|
||||
# Copy buffer
|
||||
metrics_to_flush = self._buffer.copy()
|
||||
self._buffer.clear()
|
||||
|
||||
# Persist (outside lock to avoid blocking)
|
||||
self.storage_callback(metrics_to_flush)
|
||||
|
||||
logger.debug(f"Flushed {len(metrics_to_flush)} metrics")
|
||||
return len(metrics_to_flush)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error flushing metrics: {e}")
|
||||
# Put metrics back in buffer
|
||||
self._buffer.extend(metrics_to_flush)
|
||||
return 0
|
||||
|
||||
def _auto_flush(self) -> None:
|
||||
"""Automatic flush thread."""
|
||||
while self._running:
|
||||
time.sleep(self.flush_interval)
|
||||
if self._running:
|
||||
self.flush()
|
||||
|
||||
def get_active_executions(self) -> Dict[str, ExecutionMetrics]:
|
||||
"""Get currently active executions."""
|
||||
with self._lock:
|
||||
return self._active_executions.copy()
|
||||
|
||||
def get_buffer_size(self) -> int:
|
||||
"""Get current buffer size."""
|
||||
with self._lock:
|
||||
return len(self._buffer)
|
||||
|
||||
def record_recovery_attempt(
|
||||
self,
|
||||
workflow_id: str,
|
||||
node_id: str,
|
||||
failure_reason: str,
|
||||
recovery_success: bool,
|
||||
strategy_used: Optional[str] = None,
|
||||
confidence: float = 0.0
|
||||
) -> None:
|
||||
"""
|
||||
Record a self-healing recovery attempt.
|
||||
|
||||
Args:
|
||||
workflow_id: Workflow identifier
|
||||
node_id: Node where failure occurred
|
||||
failure_reason: Reason for the failure
|
||||
recovery_success: Whether recovery was successful
|
||||
strategy_used: Strategy used for recovery
|
||||
confidence: Confidence score of recovery
|
||||
"""
|
||||
# Create a custom metrics entry for recovery
|
||||
recovery_metrics = {
|
||||
'type': 'recovery_attempt',
|
||||
'timestamp': datetime.now().isoformat(),
|
||||
'workflow_id': workflow_id,
|
||||
'node_id': node_id,
|
||||
'failure_reason': failure_reason,
|
||||
'recovery_success': recovery_success,
|
||||
'strategy_used': strategy_used,
|
||||
'confidence': confidence
|
||||
}
|
||||
|
||||
with self._lock:
|
||||
self._buffer.append(recovery_metrics)
|
||||
|
||||
# Check if buffer is full
|
||||
if len(self._buffer) >= self.buffer_size:
|
||||
self._flush_unlocked()
|
||||
|
||||
logger.debug(f"Recorded recovery attempt: {workflow_id}/{node_id} - {'success' if recovery_success else 'failed'}")
|
||||
209
core/analytics/collection/resource_collector.py
Normal file
209
core/analytics/collection/resource_collector.py
Normal file
@@ -0,0 +1,209 @@
|
||||
"""Resource usage collection for analytics."""
|
||||
|
||||
import psutil
|
||||
import threading
|
||||
import time
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, Dict, Any, List
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ResourceMetrics:
|
||||
"""System resource usage metrics."""
|
||||
timestamp: datetime
|
||||
workflow_id: Optional[str] = None
|
||||
execution_id: Optional[str] = None
|
||||
cpu_percent: float = 0.0
|
||||
memory_mb: float = 0.0
|
||||
gpu_utilization: float = 0.0
|
||||
gpu_memory_mb: float = 0.0
|
||||
disk_io_mb: float = 0.0
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary for storage."""
|
||||
return {
|
||||
'timestamp': self.timestamp.isoformat(),
|
||||
'workflow_id': self.workflow_id,
|
||||
'execution_id': self.execution_id,
|
||||
'cpu_percent': self.cpu_percent,
|
||||
'memory_mb': self.memory_mb,
|
||||
'gpu_utilization': self.gpu_utilization,
|
||||
'gpu_memory_mb': self.gpu_memory_mb,
|
||||
'disk_io_mb': self.disk_io_mb
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'ResourceMetrics':
|
||||
"""Create from dictionary."""
|
||||
return cls(
|
||||
timestamp=datetime.fromisoformat(data['timestamp']),
|
||||
workflow_id=data.get('workflow_id'),
|
||||
execution_id=data.get('execution_id'),
|
||||
cpu_percent=data.get('cpu_percent', 0.0),
|
||||
memory_mb=data.get('memory_mb', 0.0),
|
||||
gpu_utilization=data.get('gpu_utilization', 0.0),
|
||||
gpu_memory_mb=data.get('gpu_memory_mb', 0.0),
|
||||
disk_io_mb=data.get('disk_io_mb', 0.0)
|
||||
)
|
||||
|
||||
|
||||
class ResourceCollector:
|
||||
"""Collects system resource usage metrics."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
storage_callback: Optional[callable] = None,
|
||||
sample_interval_sec: float = 1.0
|
||||
):
|
||||
"""
|
||||
Initialize resource collector.
|
||||
|
||||
Args:
|
||||
storage_callback: Callback to persist metrics
|
||||
sample_interval_sec: Interval between samples
|
||||
"""
|
||||
self.storage_callback = storage_callback
|
||||
self.sample_interval = sample_interval_sec
|
||||
|
||||
self._running = False
|
||||
self._thread: Optional[threading.Thread] = None
|
||||
self._current_context: Dict[str, Optional[str]] = {
|
||||
'workflow_id': None,
|
||||
'execution_id': None
|
||||
}
|
||||
self._context_lock = threading.Lock()
|
||||
|
||||
# Initialize psutil
|
||||
self._process = psutil.Process()
|
||||
self._last_disk_io = None
|
||||
|
||||
# Try to import GPU monitoring
|
||||
self._gpu_available = False
|
||||
try:
|
||||
import pynvml
|
||||
pynvml.nvmlInit()
|
||||
self._gpu_handle = pynvml.nvmlDeviceGetHandleByIndex(0)
|
||||
self._gpu_available = True
|
||||
logger.info("GPU monitoring enabled")
|
||||
except:
|
||||
logger.info("GPU monitoring not available")
|
||||
|
||||
logger.info(f"ResourceCollector initialized (sample_interval={sample_interval_sec}s)")
|
||||
|
||||
@property
|
||||
def monitoring_active(self) -> bool:
|
||||
"""Check if resource monitoring is active."""
|
||||
return self._running
|
||||
|
||||
def start(self) -> None:
|
||||
"""Start collecting resource metrics."""
|
||||
if self._running:
|
||||
return
|
||||
|
||||
self._running = True
|
||||
self._thread = threading.Thread(target=self._collect_loop, daemon=True)
|
||||
self._thread.start()
|
||||
logger.info("ResourceCollector started")
|
||||
|
||||
def stop(self) -> None:
|
||||
"""Stop collecting resource metrics."""
|
||||
self._running = False
|
||||
if self._thread:
|
||||
self._thread.join(timeout=5.0)
|
||||
logger.info("ResourceCollector stopped")
|
||||
|
||||
def set_context(
|
||||
self,
|
||||
workflow_id: Optional[str] = None,
|
||||
execution_id: Optional[str] = None
|
||||
) -> None:
|
||||
"""
|
||||
Set current execution context for resource tracking.
|
||||
|
||||
Args:
|
||||
workflow_id: Current workflow ID
|
||||
execution_id: Current execution ID
|
||||
"""
|
||||
with self._context_lock:
|
||||
self._current_context['workflow_id'] = workflow_id
|
||||
self._current_context['execution_id'] = execution_id
|
||||
|
||||
def clear_context(self) -> None:
|
||||
"""Clear execution context."""
|
||||
with self._context_lock:
|
||||
self._current_context['workflow_id'] = None
|
||||
self._current_context['execution_id'] = None
|
||||
|
||||
def get_current_metrics(self) -> ResourceMetrics:
|
||||
"""
|
||||
Get current resource usage.
|
||||
|
||||
Returns:
|
||||
ResourceMetrics with current usage
|
||||
"""
|
||||
with self._context_lock:
|
||||
workflow_id = self._current_context['workflow_id']
|
||||
execution_id = self._current_context['execution_id']
|
||||
|
||||
# CPU usage
|
||||
cpu_percent = self._process.cpu_percent(interval=0.1)
|
||||
|
||||
# Memory usage
|
||||
memory_info = self._process.memory_info()
|
||||
memory_mb = memory_info.rss / (1024 * 1024)
|
||||
|
||||
# Disk I/O
|
||||
disk_io_mb = 0.0
|
||||
try:
|
||||
disk_io = self._process.io_counters()
|
||||
if self._last_disk_io:
|
||||
bytes_read = disk_io.read_bytes - self._last_disk_io.read_bytes
|
||||
bytes_written = disk_io.write_bytes - self._last_disk_io.write_bytes
|
||||
disk_io_mb = (bytes_read + bytes_written) / (1024 * 1024)
|
||||
self._last_disk_io = disk_io
|
||||
except:
|
||||
pass
|
||||
|
||||
# GPU usage
|
||||
gpu_utilization = 0.0
|
||||
gpu_memory_mb = 0.0
|
||||
if self._gpu_available:
|
||||
try:
|
||||
import pynvml
|
||||
util = pynvml.nvmlDeviceGetUtilizationRates(self._gpu_handle)
|
||||
gpu_utilization = float(util.gpu)
|
||||
|
||||
mem_info = pynvml.nvmlDeviceGetMemoryInfo(self._gpu_handle)
|
||||
gpu_memory_mb = mem_info.used / (1024 * 1024)
|
||||
except:
|
||||
pass
|
||||
|
||||
return ResourceMetrics(
|
||||
timestamp=datetime.now(),
|
||||
workflow_id=workflow_id,
|
||||
execution_id=execution_id,
|
||||
cpu_percent=cpu_percent,
|
||||
memory_mb=memory_mb,
|
||||
gpu_utilization=gpu_utilization,
|
||||
gpu_memory_mb=gpu_memory_mb,
|
||||
disk_io_mb=disk_io_mb
|
||||
)
|
||||
|
||||
def _collect_loop(self) -> None:
|
||||
"""Collection loop running in background thread."""
|
||||
while self._running:
|
||||
try:
|
||||
metrics = self.get_current_metrics()
|
||||
|
||||
# Persist if callback is configured
|
||||
if self.storage_callback:
|
||||
self.storage_callback([metrics])
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error collecting resource metrics: {e}")
|
||||
|
||||
time.sleep(self.sample_interval)
|
||||
15
core/analytics/dashboard/__init__.py
Normal file
15
core/analytics/dashboard/__init__.py
Normal file
@@ -0,0 +1,15 @@
|
||||
"""Analytics dashboard module."""
|
||||
|
||||
from .dashboard_manager import (
|
||||
DashboardManager,
|
||||
Dashboard,
|
||||
DashboardWidget,
|
||||
DashboardTemplate
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'DashboardManager',
|
||||
'Dashboard',
|
||||
'DashboardWidget',
|
||||
'DashboardTemplate'
|
||||
]
|
||||
468
core/analytics/dashboard/dashboard_manager.py
Normal file
468
core/analytics/dashboard/dashboard_manager.py
Normal file
@@ -0,0 +1,468 @@
|
||||
"""Dashboard management for analytics."""
|
||||
|
||||
import logging
|
||||
import json
|
||||
import uuid
|
||||
from typing import Dict, List, Optional, Any
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class DashboardWidget:
|
||||
"""Dashboard widget configuration."""
|
||||
widget_id: str
|
||||
widget_type: str # chart, table, metric, insight
|
||||
title: str
|
||||
config: Dict[str, Any]
|
||||
position: Dict[str, int] # x, y, width, height
|
||||
|
||||
def to_dict(self) -> Dict:
|
||||
"""Convert to dictionary."""
|
||||
return {
|
||||
'widget_id': self.widget_id,
|
||||
'widget_type': self.widget_type,
|
||||
'title': self.title,
|
||||
'config': self.config,
|
||||
'position': self.position
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict) -> 'DashboardWidget':
|
||||
"""Create from dictionary."""
|
||||
return cls(**data)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Dashboard:
|
||||
"""Dashboard configuration."""
|
||||
dashboard_id: str
|
||||
name: str
|
||||
description: str
|
||||
owner: str
|
||||
widgets: List[DashboardWidget] = field(default_factory=list)
|
||||
layout: str = 'grid' # grid, flex
|
||||
refresh_interval: int = 30 # seconds
|
||||
is_public: bool = False
|
||||
shared_with: List[str] = field(default_factory=list)
|
||||
created_at: datetime = field(default_factory=datetime.now)
|
||||
updated_at: datetime = field(default_factory=datetime.now)
|
||||
|
||||
def to_dict(self) -> Dict:
|
||||
"""Convert to dictionary."""
|
||||
return {
|
||||
'dashboard_id': self.dashboard_id,
|
||||
'name': self.name,
|
||||
'description': self.description,
|
||||
'owner': self.owner,
|
||||
'widgets': [w.to_dict() for w in self.widgets],
|
||||
'layout': self.layout,
|
||||
'refresh_interval': self.refresh_interval,
|
||||
'is_public': self.is_public,
|
||||
'shared_with': self.shared_with,
|
||||
'created_at': self.created_at.isoformat(),
|
||||
'updated_at': self.updated_at.isoformat()
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict) -> 'Dashboard':
|
||||
"""Create from dictionary."""
|
||||
data = data.copy()
|
||||
data['widgets'] = [DashboardWidget.from_dict(w) for w in data.get('widgets', [])]
|
||||
data['created_at'] = datetime.fromisoformat(data['created_at'])
|
||||
data['updated_at'] = datetime.fromisoformat(data['updated_at'])
|
||||
return cls(**data)
|
||||
|
||||
|
||||
@dataclass
|
||||
class DashboardTemplate:
|
||||
"""Pre-built dashboard template."""
|
||||
template_id: str
|
||||
name: str
|
||||
description: str
|
||||
category: str
|
||||
widgets: List[DashboardWidget]
|
||||
|
||||
def to_dict(self) -> Dict:
|
||||
"""Convert to dictionary."""
|
||||
return {
|
||||
'template_id': self.template_id,
|
||||
'name': self.name,
|
||||
'description': self.description,
|
||||
'category': self.category,
|
||||
'widgets': [w.to_dict() for w in self.widgets]
|
||||
}
|
||||
|
||||
|
||||
class DashboardManager:
|
||||
"""Manage analytics dashboards."""
|
||||
|
||||
def __init__(self, storage_dir: str = "data/analytics/dashboards"):
|
||||
"""
|
||||
Initialize dashboard manager.
|
||||
|
||||
Args:
|
||||
storage_dir: Directory for dashboard storage
|
||||
"""
|
||||
self.storage_dir = Path(storage_dir)
|
||||
self.storage_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.dashboards: Dict[str, Dashboard] = {}
|
||||
self.templates: Dict[str, DashboardTemplate] = {}
|
||||
self._load_dashboards()
|
||||
self._init_templates()
|
||||
logger.info("DashboardManager initialized")
|
||||
|
||||
def create_dashboard(
|
||||
self,
|
||||
name: str,
|
||||
description: str,
|
||||
owner: str,
|
||||
template_id: Optional[str] = None
|
||||
) -> Dashboard:
|
||||
"""
|
||||
Create a new dashboard.
|
||||
|
||||
Args:
|
||||
name: Dashboard name
|
||||
description: Dashboard description
|
||||
owner: Owner username
|
||||
template_id: Optional template to use
|
||||
|
||||
Returns:
|
||||
Created dashboard
|
||||
"""
|
||||
dashboard_id = str(uuid.uuid4())
|
||||
|
||||
# Create from template if specified
|
||||
if template_id and template_id in self.templates:
|
||||
template = self.templates[template_id]
|
||||
widgets = [
|
||||
DashboardWidget(
|
||||
widget_id=str(uuid.uuid4()),
|
||||
widget_type=w.widget_type,
|
||||
title=w.title,
|
||||
config=w.config.copy(),
|
||||
position=w.position.copy()
|
||||
)
|
||||
for w in template.widgets
|
||||
]
|
||||
else:
|
||||
widgets = []
|
||||
|
||||
dashboard = Dashboard(
|
||||
dashboard_id=dashboard_id,
|
||||
name=name,
|
||||
description=description,
|
||||
owner=owner,
|
||||
widgets=widgets
|
||||
)
|
||||
|
||||
self.dashboards[dashboard_id] = dashboard
|
||||
self._save_dashboard(dashboard)
|
||||
|
||||
logger.info(f"Created dashboard: {dashboard_id}")
|
||||
return dashboard
|
||||
|
||||
def get_dashboard(self, dashboard_id: str) -> Optional[Dashboard]:
|
||||
"""Get dashboard by ID."""
|
||||
return self.dashboards.get(dashboard_id)
|
||||
|
||||
def list_dashboards(
|
||||
self,
|
||||
owner: Optional[str] = None,
|
||||
include_shared: bool = True
|
||||
) -> List[Dashboard]:
|
||||
"""
|
||||
List dashboards.
|
||||
|
||||
Args:
|
||||
owner: Filter by owner (None = all)
|
||||
include_shared: Include dashboards shared with owner
|
||||
|
||||
Returns:
|
||||
List of dashboards
|
||||
"""
|
||||
dashboards = list(self.dashboards.values())
|
||||
|
||||
if owner:
|
||||
dashboards = [
|
||||
d for d in dashboards
|
||||
if d.owner == owner or
|
||||
(include_shared and (d.is_public or owner in d.shared_with))
|
||||
]
|
||||
|
||||
return dashboards
|
||||
|
||||
def update_dashboard(
|
||||
self,
|
||||
dashboard_id: str,
|
||||
updates: Dict[str, Any]
|
||||
) -> Optional[Dashboard]:
|
||||
"""
|
||||
Update dashboard configuration.
|
||||
|
||||
Args:
|
||||
dashboard_id: Dashboard identifier
|
||||
updates: Dictionary of updates
|
||||
|
||||
Returns:
|
||||
Updated dashboard or None
|
||||
"""
|
||||
dashboard = self.dashboards.get(dashboard_id)
|
||||
if not dashboard:
|
||||
return None
|
||||
|
||||
# Apply updates
|
||||
for key, value in updates.items():
|
||||
if hasattr(dashboard, key):
|
||||
setattr(dashboard, key, value)
|
||||
|
||||
dashboard.updated_at = datetime.now()
|
||||
self._save_dashboard(dashboard)
|
||||
|
||||
logger.info(f"Updated dashboard: {dashboard_id}")
|
||||
return dashboard
|
||||
|
||||
def delete_dashboard(self, dashboard_id: str) -> bool:
|
||||
"""
|
||||
Delete a dashboard.
|
||||
|
||||
Args:
|
||||
dashboard_id: Dashboard identifier
|
||||
|
||||
Returns:
|
||||
True if deleted, False if not found
|
||||
"""
|
||||
if dashboard_id not in self.dashboards:
|
||||
return False
|
||||
|
||||
del self.dashboards[dashboard_id]
|
||||
|
||||
# Delete file
|
||||
filepath = self.storage_dir / f"{dashboard_id}.json"
|
||||
if filepath.exists():
|
||||
filepath.unlink()
|
||||
|
||||
logger.info(f"Deleted dashboard: {dashboard_id}")
|
||||
return True
|
||||
|
||||
def add_widget(
|
||||
self,
|
||||
dashboard_id: str,
|
||||
widget_type: str,
|
||||
title: str,
|
||||
config: Dict[str, Any],
|
||||
position: Dict[str, int]
|
||||
) -> Optional[DashboardWidget]:
|
||||
"""
|
||||
Add widget to dashboard.
|
||||
|
||||
Args:
|
||||
dashboard_id: Dashboard identifier
|
||||
widget_type: Widget type
|
||||
title: Widget title
|
||||
config: Widget configuration
|
||||
position: Widget position
|
||||
|
||||
Returns:
|
||||
Created widget or None
|
||||
"""
|
||||
dashboard = self.dashboards.get(dashboard_id)
|
||||
if not dashboard:
|
||||
return None
|
||||
|
||||
widget = DashboardWidget(
|
||||
widget_id=str(uuid.uuid4()),
|
||||
widget_type=widget_type,
|
||||
title=title,
|
||||
config=config,
|
||||
position=position
|
||||
)
|
||||
|
||||
dashboard.widgets.append(widget)
|
||||
dashboard.updated_at = datetime.now()
|
||||
self._save_dashboard(dashboard)
|
||||
|
||||
logger.info(f"Added widget to dashboard {dashboard_id}")
|
||||
return widget
|
||||
|
||||
def remove_widget(
|
||||
self,
|
||||
dashboard_id: str,
|
||||
widget_id: str
|
||||
) -> bool:
|
||||
"""
|
||||
Remove widget from dashboard.
|
||||
|
||||
Args:
|
||||
dashboard_id: Dashboard identifier
|
||||
widget_id: Widget identifier
|
||||
|
||||
Returns:
|
||||
True if removed, False if not found
|
||||
"""
|
||||
dashboard = self.dashboards.get(dashboard_id)
|
||||
if not dashboard:
|
||||
return False
|
||||
|
||||
dashboard.widgets = [w for w in dashboard.widgets if w.widget_id != widget_id]
|
||||
dashboard.updated_at = datetime.now()
|
||||
self._save_dashboard(dashboard)
|
||||
|
||||
logger.info(f"Removed widget from dashboard {dashboard_id}")
|
||||
return True
|
||||
|
||||
def share_dashboard(
|
||||
self,
|
||||
dashboard_id: str,
|
||||
username: str
|
||||
) -> bool:
|
||||
"""
|
||||
Share dashboard with a user.
|
||||
|
||||
Args:
|
||||
dashboard_id: Dashboard identifier
|
||||
username: Username to share with
|
||||
|
||||
Returns:
|
||||
True if shared, False if not found
|
||||
"""
|
||||
dashboard = self.dashboards.get(dashboard_id)
|
||||
if not dashboard:
|
||||
return False
|
||||
|
||||
if username not in dashboard.shared_with:
|
||||
dashboard.shared_with.append(username)
|
||||
dashboard.updated_at = datetime.now()
|
||||
self._save_dashboard(dashboard)
|
||||
|
||||
logger.info(f"Shared dashboard {dashboard_id} with {username}")
|
||||
return True
|
||||
|
||||
def make_public(
|
||||
self,
|
||||
dashboard_id: str,
|
||||
is_public: bool = True
|
||||
) -> bool:
|
||||
"""
|
||||
Make dashboard public or private.
|
||||
|
||||
Args:
|
||||
dashboard_id: Dashboard identifier
|
||||
is_public: Whether dashboard should be public
|
||||
|
||||
Returns:
|
||||
True if updated, False if not found
|
||||
"""
|
||||
dashboard = self.dashboards.get(dashboard_id)
|
||||
if not dashboard:
|
||||
return False
|
||||
|
||||
dashboard.is_public = is_public
|
||||
dashboard.updated_at = datetime.now()
|
||||
self._save_dashboard(dashboard)
|
||||
|
||||
logger.info(f"Dashboard {dashboard_id} public: {is_public}")
|
||||
return True
|
||||
|
||||
def get_templates(self) -> List[DashboardTemplate]:
|
||||
"""Get all dashboard templates."""
|
||||
return list(self.templates.values())
|
||||
|
||||
def _load_dashboards(self) -> None:
|
||||
"""Load dashboards from storage."""
|
||||
for filepath in self.storage_dir.glob('*.json'):
|
||||
try:
|
||||
with open(filepath, 'r') as f:
|
||||
data = json.load(f)
|
||||
dashboard = Dashboard.from_dict(data)
|
||||
self.dashboards[dashboard.dashboard_id] = dashboard
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading dashboard {filepath}: {e}")
|
||||
|
||||
logger.info(f"Loaded {len(self.dashboards)} dashboards")
|
||||
|
||||
def _save_dashboard(self, dashboard: Dashboard) -> None:
|
||||
"""Save dashboard to storage."""
|
||||
filepath = self.storage_dir / f"{dashboard.dashboard_id}.json"
|
||||
with open(filepath, 'w') as f:
|
||||
json.dump(dashboard.to_dict(), f, indent=2)
|
||||
|
||||
def _init_templates(self) -> None:
|
||||
"""Initialize default dashboard templates."""
|
||||
# Performance Overview Template
|
||||
self.templates['performance'] = DashboardTemplate(
|
||||
template_id='performance',
|
||||
name='Performance Overview',
|
||||
description='Overview of workflow performance metrics',
|
||||
category='performance',
|
||||
widgets=[
|
||||
DashboardWidget(
|
||||
widget_id='perf_chart',
|
||||
widget_type='chart',
|
||||
title='Execution Duration Trend',
|
||||
config={
|
||||
'chart_type': 'line',
|
||||
'metric': 'duration',
|
||||
'time_range': '7d'
|
||||
},
|
||||
position={'x': 0, 'y': 0, 'width': 6, 'height': 4}
|
||||
),
|
||||
DashboardWidget(
|
||||
widget_id='success_rate',
|
||||
widget_type='metric',
|
||||
title='Success Rate',
|
||||
config={
|
||||
'metric': 'success_rate',
|
||||
'format': 'percentage'
|
||||
},
|
||||
position={'x': 6, 'y': 0, 'width': 3, 'height': 2}
|
||||
),
|
||||
DashboardWidget(
|
||||
widget_id='bottlenecks',
|
||||
widget_type='table',
|
||||
title='Top Bottlenecks',
|
||||
config={
|
||||
'metric': 'bottlenecks',
|
||||
'limit': 10
|
||||
},
|
||||
position={'x': 0, 'y': 4, 'width': 9, 'height': 4}
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
# Anomaly Detection Template
|
||||
self.templates['anomalies'] = DashboardTemplate(
|
||||
template_id='anomalies',
|
||||
name='Anomaly Detection',
|
||||
description='Real-time anomaly detection and alerts',
|
||||
category='monitoring',
|
||||
widgets=[
|
||||
DashboardWidget(
|
||||
widget_id='anomaly_chart',
|
||||
widget_type='chart',
|
||||
title='Anomalies Over Time',
|
||||
config={
|
||||
'chart_type': 'scatter',
|
||||
'metric': 'anomalies',
|
||||
'time_range': '24h'
|
||||
},
|
||||
position={'x': 0, 'y': 0, 'width': 8, 'height': 4}
|
||||
),
|
||||
DashboardWidget(
|
||||
widget_id='anomaly_list',
|
||||
widget_type='table',
|
||||
title='Recent Anomalies',
|
||||
config={
|
||||
'metric': 'anomalies',
|
||||
'limit': 20
|
||||
},
|
||||
position={'x': 0, 'y': 4, 'width': 12, 'height': 4}
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
logger.info(f"Initialized {len(self.templates)} dashboard templates")
|
||||
14
core/analytics/engine/__init__.py
Normal file
14
core/analytics/engine/__init__.py
Normal file
@@ -0,0 +1,14 @@
|
||||
"""Analytics engine components."""
|
||||
|
||||
from .performance_analyzer import PerformanceAnalyzer, PerformanceStats
|
||||
from .anomaly_detector import AnomalyDetector, Anomaly
|
||||
from .insight_generator import InsightGenerator, Insight
|
||||
|
||||
__all__ = [
|
||||
'PerformanceAnalyzer',
|
||||
'PerformanceStats',
|
||||
'AnomalyDetector',
|
||||
'Anomaly',
|
||||
'InsightGenerator',
|
||||
'Insight',
|
||||
]
|
||||
311
core/analytics/engine/anomaly_detector.py
Normal file
311
core/analytics/engine/anomaly_detector.py
Normal file
@@ -0,0 +1,311 @@
|
||||
"""Anomaly detection for workflow execution."""
|
||||
|
||||
import logging
|
||||
import statistics
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime, timedelta
|
||||
import hashlib
|
||||
|
||||
from ..storage.timeseries_store import TimeSeriesStore
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Anomaly:
|
||||
"""Detected anomaly."""
|
||||
anomaly_id: str
|
||||
workflow_id: str
|
||||
metric_name: str
|
||||
detected_at: datetime
|
||||
severity: float # 0.0 to 1.0
|
||||
deviation: float
|
||||
baseline_value: float
|
||||
actual_value: float
|
||||
description: str
|
||||
recommended_action: Optional[str] = None
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary."""
|
||||
return {
|
||||
'anomaly_id': self.anomaly_id,
|
||||
'workflow_id': self.workflow_id,
|
||||
'metric_name': self.metric_name,
|
||||
'detected_at': self.detected_at.isoformat(),
|
||||
'severity': self.severity,
|
||||
'deviation': self.deviation,
|
||||
'baseline_value': self.baseline_value,
|
||||
'actual_value': self.actual_value,
|
||||
'description': self.description,
|
||||
'recommended_action': self.recommended_action,
|
||||
'metadata': self.metadata
|
||||
}
|
||||
|
||||
|
||||
class AnomalyDetector:
|
||||
"""Detects anomalies in workflow execution using statistical methods."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
time_series_store: TimeSeriesStore,
|
||||
sensitivity: float = 2.0 # Standard deviations
|
||||
):
|
||||
"""
|
||||
Initialize anomaly detector.
|
||||
|
||||
Args:
|
||||
time_series_store: Time series storage
|
||||
sensitivity: Number of standard deviations for anomaly threshold
|
||||
"""
|
||||
self.store = time_series_store
|
||||
self.sensitivity = sensitivity
|
||||
self.baselines: Dict[str, Dict] = {}
|
||||
|
||||
logger.info(f"AnomalyDetector initialized (sensitivity={sensitivity})")
|
||||
|
||||
def detect_anomalies(
|
||||
self,
|
||||
workflow_id: str,
|
||||
metrics: List[Dict],
|
||||
metric_name: str = 'duration_ms'
|
||||
) -> List[Anomaly]:
|
||||
"""
|
||||
Detect anomalies in metrics.
|
||||
|
||||
Args:
|
||||
workflow_id: Workflow identifier
|
||||
metrics: List of metric dictionaries
|
||||
metric_name: Name of metric to analyze
|
||||
|
||||
Returns:
|
||||
List of detected anomalies
|
||||
"""
|
||||
if not metrics:
|
||||
return []
|
||||
|
||||
# Get or create baseline
|
||||
baseline = self._get_baseline(workflow_id, metric_name)
|
||||
if not baseline:
|
||||
# Not enough data for baseline
|
||||
return []
|
||||
|
||||
anomalies = []
|
||||
|
||||
for metric in metrics:
|
||||
value = metric.get(metric_name)
|
||||
if value is None:
|
||||
continue
|
||||
|
||||
# Calculate deviation from baseline
|
||||
deviation = abs(value - baseline['mean']) / baseline['std_dev'] if baseline['std_dev'] > 0 else 0
|
||||
|
||||
# Check if anomaly
|
||||
if deviation > self.sensitivity:
|
||||
severity = min(deviation / (self.sensitivity * 2), 1.0)
|
||||
|
||||
anomaly = Anomaly(
|
||||
anomaly_id=self._generate_anomaly_id(workflow_id, metric_name, metric),
|
||||
workflow_id=workflow_id,
|
||||
metric_name=metric_name,
|
||||
detected_at=datetime.now(),
|
||||
severity=severity,
|
||||
deviation=deviation,
|
||||
baseline_value=baseline['mean'],
|
||||
actual_value=value,
|
||||
description=self._generate_description(metric_name, value, baseline['mean'], deviation),
|
||||
recommended_action=self._generate_recommendation(metric_name, value, baseline['mean']),
|
||||
metadata=metric
|
||||
)
|
||||
|
||||
anomalies.append(anomaly)
|
||||
logger.info(f"Anomaly detected: {anomaly.description}")
|
||||
|
||||
return anomalies
|
||||
|
||||
def update_baseline(
|
||||
self,
|
||||
workflow_id: str,
|
||||
stable_period_days: int = 7,
|
||||
metric_name: str = 'duration_ms'
|
||||
) -> None:
|
||||
"""
|
||||
Update baseline from stable period.
|
||||
|
||||
Args:
|
||||
workflow_id: Workflow identifier
|
||||
stable_period_days: Number of days for baseline calculation
|
||||
metric_name: Metric to calculate baseline for
|
||||
"""
|
||||
end_time = datetime.now()
|
||||
start_time = end_time - timedelta(days=stable_period_days)
|
||||
|
||||
# Query metrics
|
||||
metrics = self.store.query_range(
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
workflow_id=workflow_id,
|
||||
metric_types=['execution']
|
||||
)
|
||||
|
||||
executions = metrics.get('execution', [])
|
||||
if not executions:
|
||||
logger.warning(f"No data for baseline calculation: {workflow_id}")
|
||||
return
|
||||
|
||||
# Extract values
|
||||
values = [e.get(metric_name) for e in executions if e.get(metric_name) is not None]
|
||||
|
||||
if len(values) < 10: # Minimum sample size
|
||||
logger.warning(f"Insufficient data for baseline: {workflow_id} ({len(values)} samples)")
|
||||
return
|
||||
|
||||
# Calculate baseline statistics
|
||||
mean = statistics.mean(values)
|
||||
std_dev = statistics.stdev(values) if len(values) > 1 else 0.0
|
||||
median = statistics.median(values)
|
||||
|
||||
baseline_key = f"{workflow_id}:{metric_name}"
|
||||
self.baselines[baseline_key] = {
|
||||
'mean': mean,
|
||||
'std_dev': std_dev,
|
||||
'median': median,
|
||||
'sample_size': len(values),
|
||||
'updated_at': datetime.now(),
|
||||
'period_days': stable_period_days
|
||||
}
|
||||
|
||||
logger.info(f"Baseline updated for {workflow_id}: mean={mean:.2f}, std_dev={std_dev:.2f}")
|
||||
|
||||
def correlate_anomalies(
|
||||
self,
|
||||
anomalies: List[Anomaly],
|
||||
time_window_minutes: int = 30
|
||||
) -> List[List[Anomaly]]:
|
||||
"""
|
||||
Correlate related anomalies within a time window.
|
||||
|
||||
Args:
|
||||
anomalies: List of anomalies to correlate
|
||||
time_window_minutes: Time window for correlation
|
||||
|
||||
Returns:
|
||||
List of correlated anomaly groups
|
||||
"""
|
||||
if not anomalies:
|
||||
return []
|
||||
|
||||
# Sort by detection time
|
||||
sorted_anomalies = sorted(anomalies, key=lambda a: a.detected_at)
|
||||
|
||||
groups = []
|
||||
current_group = [sorted_anomalies[0]]
|
||||
|
||||
for anomaly in sorted_anomalies[1:]:
|
||||
# Check if within time window of last anomaly in current group
|
||||
time_diff = (anomaly.detected_at - current_group[-1].detected_at).total_seconds() / 60
|
||||
|
||||
if time_diff <= time_window_minutes:
|
||||
current_group.append(anomaly)
|
||||
else:
|
||||
# Start new group
|
||||
if len(current_group) > 1: # Only keep groups with multiple anomalies
|
||||
groups.append(current_group)
|
||||
current_group = [anomaly]
|
||||
|
||||
# Add last group if it has multiple anomalies
|
||||
if len(current_group) > 1:
|
||||
groups.append(current_group)
|
||||
|
||||
return groups
|
||||
|
||||
def escalate_anomaly(
|
||||
self,
|
||||
anomaly: Anomaly,
|
||||
duration_minutes: int,
|
||||
impact_score: float
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Escalate an anomaly based on duration and impact.
|
||||
|
||||
Args:
|
||||
anomaly: Anomaly to escalate
|
||||
duration_minutes: How long the anomaly has persisted
|
||||
impact_score: Impact score (0.0 to 1.0)
|
||||
|
||||
Returns:
|
||||
Escalation information
|
||||
"""
|
||||
# Calculate escalation level
|
||||
escalation_score = (anomaly.severity + impact_score) / 2
|
||||
escalation_score *= min(duration_minutes / 60, 2.0) # Cap at 2x for duration
|
||||
|
||||
if escalation_score > 0.8:
|
||||
level = 'critical'
|
||||
elif escalation_score > 0.5:
|
||||
level = 'high'
|
||||
elif escalation_score > 0.3:
|
||||
level = 'medium'
|
||||
else:
|
||||
level = 'low'
|
||||
|
||||
return {
|
||||
'anomaly_id': anomaly.anomaly_id,
|
||||
'escalation_level': level,
|
||||
'escalation_score': min(escalation_score, 1.0),
|
||||
'duration_minutes': duration_minutes,
|
||||
'impact_score': impact_score,
|
||||
'requires_immediate_action': escalation_score > 0.8
|
||||
}
|
||||
|
||||
def _get_baseline(self, workflow_id: str, metric_name: str) -> Optional[Dict]:
|
||||
"""Get baseline for workflow and metric."""
|
||||
baseline_key = f"{workflow_id}:{metric_name}"
|
||||
|
||||
if baseline_key not in self.baselines:
|
||||
# Try to calculate baseline
|
||||
self.update_baseline(workflow_id, metric_name=metric_name)
|
||||
|
||||
return self.baselines.get(baseline_key)
|
||||
|
||||
def _generate_anomaly_id(self, workflow_id: str, metric_name: str, metric: Dict) -> str:
|
||||
"""Generate unique anomaly ID."""
|
||||
data = f"{workflow_id}:{metric_name}:{metric.get('execution_id', '')}:{datetime.now().isoformat()}"
|
||||
return hashlib.md5(data.encode()).hexdigest()[:16]
|
||||
|
||||
def _generate_description(
|
||||
self,
|
||||
metric_name: str,
|
||||
actual_value: float,
|
||||
baseline_value: float,
|
||||
deviation: float
|
||||
) -> str:
|
||||
"""Generate human-readable anomaly description."""
|
||||
percent_diff = abs((actual_value - baseline_value) / baseline_value * 100) if baseline_value > 0 else 0
|
||||
direction = "higher" if actual_value > baseline_value else "lower"
|
||||
|
||||
return (
|
||||
f"{metric_name} is {percent_diff:.1f}% {direction} than baseline "
|
||||
f"({actual_value:.2f} vs {baseline_value:.2f}, {deviation:.1f} std devs)"
|
||||
)
|
||||
|
||||
def _generate_recommendation(
|
||||
self,
|
||||
metric_name: str,
|
||||
actual_value: float,
|
||||
baseline_value: float
|
||||
) -> str:
|
||||
"""Generate recommended action for anomaly."""
|
||||
if actual_value > baseline_value:
|
||||
if metric_name == 'duration_ms':
|
||||
return "Investigate performance degradation. Check for resource constraints or code changes."
|
||||
elif metric_name == 'error_rate':
|
||||
return "Investigate error spike. Check logs and recent deployments."
|
||||
elif metric_name in ['cpu_percent', 'memory_mb']:
|
||||
return "Investigate resource usage spike. Check for memory leaks or inefficient operations."
|
||||
else:
|
||||
if metric_name == 'success_rate':
|
||||
return "Investigate success rate drop. Check for system issues or data quality problems."
|
||||
|
||||
return "Monitor the situation and investigate if anomaly persists."
|
||||
301
core/analytics/engine/insight_generator.py
Normal file
301
core/analytics/engine/insight_generator.py
Normal file
@@ -0,0 +1,301 @@
|
||||
"""Automated insight generation for workflows."""
|
||||
|
||||
import logging
|
||||
import hashlib
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from .performance_analyzer import PerformanceAnalyzer, PerformanceStats
|
||||
from .anomaly_detector import AnomalyDetector, Anomaly
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Insight:
|
||||
"""Generated insight with recommendation."""
|
||||
insight_id: str
|
||||
workflow_id: str
|
||||
category: str # 'performance', 'reliability', 'resource', 'best_practice'
|
||||
title: str
|
||||
description: str
|
||||
recommendation: str
|
||||
expected_impact: str
|
||||
ease_of_implementation: str # 'easy', 'medium', 'hard'
|
||||
priority_score: float
|
||||
supporting_data: Dict[str, Any]
|
||||
created_at: datetime
|
||||
implemented: bool = False
|
||||
actual_impact: Optional[Dict] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary."""
|
||||
return {
|
||||
'insight_id': self.insight_id,
|
||||
'workflow_id': self.workflow_id,
|
||||
'category': self.category,
|
||||
'title': self.title,
|
||||
'description': self.description,
|
||||
'recommendation': self.recommendation,
|
||||
'expected_impact': self.expected_impact,
|
||||
'ease_of_implementation': self.ease_of_implementation,
|
||||
'priority_score': self.priority_score,
|
||||
'supporting_data': self.supporting_data,
|
||||
'created_at': self.created_at.isoformat(),
|
||||
'implemented': self.implemented,
|
||||
'actual_impact': self.actual_impact
|
||||
}
|
||||
|
||||
|
||||
class InsightGenerator:
|
||||
"""Generates automated insights and recommendations."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
performance_analyzer: PerformanceAnalyzer,
|
||||
anomaly_detector: AnomalyDetector
|
||||
):
|
||||
"""
|
||||
Initialize insight generator.
|
||||
|
||||
Args:
|
||||
performance_analyzer: Performance analyzer instance
|
||||
anomaly_detector: Anomaly detector instance
|
||||
"""
|
||||
self.performance_analyzer = performance_analyzer
|
||||
self.anomaly_detector = anomaly_detector
|
||||
self._insight_implementations: Dict[str, Dict] = {}
|
||||
|
||||
logger.info("InsightGenerator initialized")
|
||||
|
||||
def generate_insights(
|
||||
self,
|
||||
workflow_id: str,
|
||||
analysis_period_days: int = 30
|
||||
) -> List[Insight]:
|
||||
"""
|
||||
Generate insights for a workflow.
|
||||
|
||||
Args:
|
||||
workflow_id: Workflow identifier
|
||||
analysis_period_days: Number of days to analyze
|
||||
|
||||
Returns:
|
||||
List of generated insights
|
||||
"""
|
||||
insights = []
|
||||
|
||||
end_time = datetime.now()
|
||||
start_time = end_time - timedelta(days=analysis_period_days)
|
||||
|
||||
# Analyze performance
|
||||
perf_stats = self.performance_analyzer.analyze_workflow(
|
||||
workflow_id,
|
||||
start_time,
|
||||
end_time
|
||||
)
|
||||
|
||||
if perf_stats:
|
||||
# Generate performance insights
|
||||
insights.extend(self._generate_performance_insights(perf_stats))
|
||||
|
||||
# Generate bottleneck insights
|
||||
insights.extend(self._generate_bottleneck_insights(perf_stats))
|
||||
|
||||
# Check for performance degradation
|
||||
degradation = self.performance_analyzer.detect_performance_degradation(
|
||||
workflow_id,
|
||||
baseline_period=timedelta(days=7),
|
||||
current_period=timedelta(days=1)
|
||||
)
|
||||
|
||||
if degradation:
|
||||
insights.append(self._generate_degradation_insight(degradation))
|
||||
|
||||
# Prioritize insights
|
||||
insights = self.prioritize_insights(insights)
|
||||
|
||||
return insights
|
||||
|
||||
def prioritize_insights(self, insights: List[Insight]) -> List[Insight]:
|
||||
"""
|
||||
Prioritize insights by impact and ease.
|
||||
|
||||
Args:
|
||||
insights: List of insights to prioritize
|
||||
|
||||
Returns:
|
||||
Sorted list of insights
|
||||
"""
|
||||
# Calculate priority scores
|
||||
for insight in insights:
|
||||
impact_score = self._calculate_impact_score(insight.expected_impact)
|
||||
ease_score = self._calculate_ease_score(insight.ease_of_implementation)
|
||||
|
||||
# Priority = Impact * Ease (higher is better)
|
||||
insight.priority_score = impact_score * ease_score
|
||||
|
||||
# Sort by priority (descending)
|
||||
return sorted(insights, key=lambda i: i.priority_score, reverse=True)
|
||||
|
||||
def track_insight_implementation(
|
||||
self,
|
||||
insight_id: str,
|
||||
implemented: bool,
|
||||
actual_impact: Optional[Dict] = None
|
||||
) -> None:
|
||||
"""
|
||||
Track insight implementation and measure impact.
|
||||
|
||||
Args:
|
||||
insight_id: Insight identifier
|
||||
implemented: Whether insight was implemented
|
||||
actual_impact: Measured impact after implementation
|
||||
"""
|
||||
self._insight_implementations[insight_id] = {
|
||||
'implemented': implemented,
|
||||
'actual_impact': actual_impact,
|
||||
'tracked_at': datetime.now()
|
||||
}
|
||||
|
||||
logger.info(f"Tracked implementation for insight {insight_id}")
|
||||
|
||||
def _generate_performance_insights(self, stats: PerformanceStats) -> List[Insight]:
|
||||
"""Generate insights from performance statistics."""
|
||||
insights = []
|
||||
|
||||
# High variability insight
|
||||
if stats.std_dev_ms > stats.avg_duration_ms * 0.5:
|
||||
insights.append(Insight(
|
||||
insight_id=self._generate_id(stats.workflow_id, 'high_variability'),
|
||||
workflow_id=stats.workflow_id,
|
||||
category='performance',
|
||||
title='High Performance Variability',
|
||||
description=(
|
||||
f"Execution time varies significantly (std dev: {stats.std_dev_ms:.0f}ms, "
|
||||
f"avg: {stats.avg_duration_ms:.0f}ms). This indicates inconsistent performance."
|
||||
),
|
||||
recommendation=(
|
||||
"Investigate causes of variability. Check for: "
|
||||
"1) Resource contention, 2) Network latency, 3) Data size variations, "
|
||||
"4) External service dependencies."
|
||||
),
|
||||
expected_impact="Reduce execution time variability by 30-50%",
|
||||
ease_of_implementation='medium',
|
||||
priority_score=0.0,
|
||||
supporting_data={'stats': stats.to_dict()},
|
||||
created_at=datetime.now()
|
||||
))
|
||||
|
||||
# Slow p99 insight
|
||||
if stats.p99_duration_ms > stats.median_duration_ms * 3:
|
||||
insights.append(Insight(
|
||||
insight_id=self._generate_id(stats.workflow_id, 'slow_p99'),
|
||||
workflow_id=stats.workflow_id,
|
||||
category='performance',
|
||||
title='Slow 99th Percentile Performance',
|
||||
description=(
|
||||
f"99th percentile ({stats.p99_duration_ms:.0f}ms) is 3x slower than median "
|
||||
f"({stats.median_duration_ms:.0f}ms). Some executions are significantly slower."
|
||||
),
|
||||
recommendation=(
|
||||
"Analyze slowest executions to identify outliers. "
|
||||
"Consider adding timeouts or optimizing worst-case scenarios."
|
||||
),
|
||||
expected_impact="Improve worst-case performance by 40-60%",
|
||||
ease_of_implementation='medium',
|
||||
priority_score=0.0,
|
||||
supporting_data={'stats': stats.to_dict()},
|
||||
created_at=datetime.now()
|
||||
))
|
||||
|
||||
return insights
|
||||
|
||||
def _generate_bottleneck_insights(self, stats: PerformanceStats) -> List[Insight]:
|
||||
"""Generate insights from bottleneck analysis."""
|
||||
insights = []
|
||||
|
||||
if not stats.slowest_steps:
|
||||
return insights
|
||||
|
||||
# Top bottleneck
|
||||
top_bottleneck = stats.slowest_steps[0]
|
||||
|
||||
insights.append(Insight(
|
||||
insight_id=self._generate_id(stats.workflow_id, 'top_bottleneck'),
|
||||
workflow_id=stats.workflow_id,
|
||||
category='performance',
|
||||
title=f"Bottleneck: {top_bottleneck['action_type']} on {top_bottleneck['node_id']}",
|
||||
description=(
|
||||
f"Step '{top_bottleneck['action_type']}' takes {top_bottleneck['avg_duration_ms']:.0f}ms "
|
||||
f"on average (p95: {top_bottleneck['p95_duration_ms']:.0f}ms). "
|
||||
f"This is the slowest step in the workflow."
|
||||
),
|
||||
recommendation=(
|
||||
f"Optimize the '{top_bottleneck['action_type']}' action. "
|
||||
"Consider: 1) Caching results, 2) Parallel execution, "
|
||||
"3) Reducing wait times, 4) Optimizing selectors."
|
||||
),
|
||||
expected_impact=f"Reduce overall workflow time by {(top_bottleneck['avg_duration_ms'] / stats.avg_duration_ms * 100 * 0.5):.0f}%",
|
||||
ease_of_implementation='easy',
|
||||
priority_score=0.0,
|
||||
supporting_data={'bottleneck': top_bottleneck},
|
||||
created_at=datetime.now()
|
||||
))
|
||||
|
||||
return insights
|
||||
|
||||
def _generate_degradation_insight(self, degradation: Dict) -> Insight:
|
||||
"""Generate insight from performance degradation."""
|
||||
return Insight(
|
||||
insight_id=self._generate_id(degradation['workflow_id'], 'degradation'),
|
||||
workflow_id=degradation['workflow_id'],
|
||||
category='performance',
|
||||
title='Performance Degradation Detected',
|
||||
description=(
|
||||
f"Performance has degraded by {degradation['percent_change']:.1f}% "
|
||||
f"(from {degradation['baseline_avg_ms']:.0f}ms to {degradation['current_avg_ms']:.0f}ms)."
|
||||
),
|
||||
recommendation=(
|
||||
"Investigate recent changes: 1) Code deployments, 2) Data volume increases, "
|
||||
"3) Infrastructure changes, 4) External service degradation."
|
||||
),
|
||||
expected_impact="Restore baseline performance",
|
||||
ease_of_implementation='medium',
|
||||
priority_score=0.0,
|
||||
supporting_data=degradation,
|
||||
created_at=datetime.now()
|
||||
)
|
||||
|
||||
def _calculate_impact_score(self, expected_impact: str) -> float:
|
||||
"""Calculate impact score from expected impact description."""
|
||||
impact_lower = expected_impact.lower()
|
||||
|
||||
# Look for percentage improvements
|
||||
if '50%' in impact_lower or '60%' in impact_lower:
|
||||
return 1.0
|
||||
elif '30%' in impact_lower or '40%' in impact_lower:
|
||||
return 0.8
|
||||
elif '20%' in impact_lower:
|
||||
return 0.6
|
||||
elif '10%' in impact_lower:
|
||||
return 0.4
|
||||
else:
|
||||
return 0.5 # Default
|
||||
|
||||
def _calculate_ease_score(self, ease: str) -> float:
|
||||
"""Calculate ease score from ease of implementation."""
|
||||
if ease == 'easy':
|
||||
return 1.0
|
||||
elif ease == 'medium':
|
||||
return 0.6
|
||||
elif ease == 'hard':
|
||||
return 0.3
|
||||
else:
|
||||
return 0.5
|
||||
|
||||
def _generate_id(self, workflow_id: str, insight_type: str) -> str:
|
||||
"""Generate unique insight ID."""
|
||||
data = f"{workflow_id}:{insight_type}:{datetime.now().date().isoformat()}"
|
||||
return hashlib.md5(data.encode()).hexdigest()[:16]
|
||||
359
core/analytics/engine/performance_analyzer.py
Normal file
359
core/analytics/engine/performance_analyzer.py
Normal file
@@ -0,0 +1,359 @@
|
||||
"""Performance analysis for workflows."""
|
||||
|
||||
import logging
|
||||
import statistics
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from ..storage.timeseries_store import TimeSeriesStore
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PerformanceStats:
|
||||
"""Performance statistics for a workflow."""
|
||||
workflow_id: str
|
||||
time_period: str
|
||||
execution_count: int
|
||||
avg_duration_ms: float
|
||||
median_duration_ms: float
|
||||
p95_duration_ms: float
|
||||
p99_duration_ms: float
|
||||
min_duration_ms: float
|
||||
max_duration_ms: float
|
||||
std_dev_ms: float
|
||||
slowest_steps: List[Dict]
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary."""
|
||||
return {
|
||||
'workflow_id': self.workflow_id,
|
||||
'time_period': self.time_period,
|
||||
'execution_count': self.execution_count,
|
||||
'avg_duration_ms': self.avg_duration_ms,
|
||||
'median_duration_ms': self.median_duration_ms,
|
||||
'p95_duration_ms': self.p95_duration_ms,
|
||||
'p99_duration_ms': self.p99_duration_ms,
|
||||
'min_duration_ms': self.min_duration_ms,
|
||||
'max_duration_ms': self.max_duration_ms,
|
||||
'std_dev_ms': self.std_dev_ms,
|
||||
'slowest_steps': self.slowest_steps
|
||||
}
|
||||
|
||||
|
||||
class PerformanceAnalyzer:
|
||||
"""Analyzes workflow performance metrics."""
|
||||
|
||||
def __init__(self, time_series_store: TimeSeriesStore):
|
||||
"""
|
||||
Initialize performance analyzer.
|
||||
|
||||
Args:
|
||||
time_series_store: Time series storage for metrics
|
||||
"""
|
||||
self.store = time_series_store
|
||||
logger.info("PerformanceAnalyzer initialized")
|
||||
|
||||
def analyze_workflow(
|
||||
self,
|
||||
workflow_id: str,
|
||||
start_time: datetime,
|
||||
end_time: datetime
|
||||
) -> Optional[PerformanceStats]:
|
||||
"""
|
||||
Analyze performance for a workflow.
|
||||
|
||||
Args:
|
||||
workflow_id: Workflow identifier
|
||||
start_time: Start of analysis period
|
||||
end_time: End of analysis period
|
||||
|
||||
Returns:
|
||||
PerformanceStats or None if no data
|
||||
"""
|
||||
# Query execution metrics
|
||||
metrics = self.store.query_range(
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
workflow_id=workflow_id,
|
||||
metric_types=['execution']
|
||||
)
|
||||
|
||||
executions = metrics.get('execution', [])
|
||||
if not executions:
|
||||
logger.warning(f"No execution data for workflow {workflow_id}")
|
||||
return None
|
||||
|
||||
# Filter completed executions with duration
|
||||
completed = [
|
||||
e for e in executions
|
||||
if e.get('status') == 'completed' and e.get('duration_ms') is not None
|
||||
]
|
||||
|
||||
if not completed:
|
||||
logger.warning(f"No completed executions for workflow {workflow_id}")
|
||||
return None
|
||||
|
||||
# Extract durations
|
||||
durations = [e['duration_ms'] for e in completed]
|
||||
|
||||
# Calculate statistics
|
||||
avg_duration = statistics.mean(durations)
|
||||
median_duration = statistics.median(durations)
|
||||
min_duration = min(durations)
|
||||
max_duration = max(durations)
|
||||
std_dev = statistics.stdev(durations) if len(durations) > 1 else 0.0
|
||||
|
||||
# Calculate percentiles
|
||||
sorted_durations = sorted(durations)
|
||||
p95_duration = self._percentile(sorted_durations, 0.95)
|
||||
p99_duration = self._percentile(sorted_durations, 0.99)
|
||||
|
||||
# Identify slowest steps
|
||||
slowest_steps = self.identify_bottlenecks(
|
||||
workflow_id,
|
||||
start_time,
|
||||
end_time,
|
||||
threshold_percentile=0.95
|
||||
)
|
||||
|
||||
time_period = f"{start_time.isoformat()} to {end_time.isoformat()}"
|
||||
|
||||
return PerformanceStats(
|
||||
workflow_id=workflow_id,
|
||||
time_period=time_period,
|
||||
execution_count=len(completed),
|
||||
avg_duration_ms=avg_duration,
|
||||
median_duration_ms=median_duration,
|
||||
p95_duration_ms=p95_duration,
|
||||
p99_duration_ms=p99_duration,
|
||||
min_duration_ms=min_duration,
|
||||
max_duration_ms=max_duration,
|
||||
std_dev_ms=std_dev,
|
||||
slowest_steps=slowest_steps[:5] # Top 5 slowest
|
||||
)
|
||||
|
||||
def identify_bottlenecks(
|
||||
self,
|
||||
workflow_id: str,
|
||||
start_time: datetime,
|
||||
end_time: datetime,
|
||||
threshold_percentile: float = 0.95
|
||||
) -> List[Dict]:
|
||||
"""
|
||||
Identify bottleneck steps in a workflow.
|
||||
|
||||
Args:
|
||||
workflow_id: Workflow identifier
|
||||
start_time: Start of analysis period
|
||||
end_time: End of analysis period
|
||||
threshold_percentile: Percentile threshold for bottlenecks
|
||||
|
||||
Returns:
|
||||
List of bottleneck steps sorted by duration
|
||||
"""
|
||||
# Query step metrics
|
||||
metrics = self.store.query_range(
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
workflow_id=workflow_id,
|
||||
metric_types=['step']
|
||||
)
|
||||
|
||||
steps = metrics.get('step', [])
|
||||
if not steps:
|
||||
return []
|
||||
|
||||
# Group by node_id and action_type
|
||||
step_groups: Dict[tuple, List[float]] = {}
|
||||
for step in steps:
|
||||
key = (step['node_id'], step['action_type'])
|
||||
if key not in step_groups:
|
||||
step_groups[key] = []
|
||||
step_groups[key].append(step['duration_ms'])
|
||||
|
||||
# Calculate statistics for each group
|
||||
bottlenecks = []
|
||||
for (node_id, action_type), durations in step_groups.items():
|
||||
if not durations:
|
||||
continue
|
||||
|
||||
avg_duration = statistics.mean(durations)
|
||||
p95_duration = self._percentile(sorted(durations), threshold_percentile)
|
||||
|
||||
bottlenecks.append({
|
||||
'node_id': node_id,
|
||||
'action_type': action_type,
|
||||
'avg_duration_ms': avg_duration,
|
||||
'p95_duration_ms': p95_duration,
|
||||
'execution_count': len(durations),
|
||||
'max_duration_ms': max(durations)
|
||||
})
|
||||
|
||||
# Sort by p95 duration (descending)
|
||||
bottlenecks.sort(key=lambda x: x['p95_duration_ms'], reverse=True)
|
||||
|
||||
return bottlenecks
|
||||
|
||||
def detect_performance_degradation(
|
||||
self,
|
||||
workflow_id: str,
|
||||
baseline_period: timedelta,
|
||||
current_period: timedelta,
|
||||
threshold_percent: float = 20.0
|
||||
) -> Optional[Dict]:
|
||||
"""
|
||||
Detect performance degradation compared to baseline.
|
||||
|
||||
Args:
|
||||
workflow_id: Workflow identifier
|
||||
baseline_period: Duration of baseline period (e.g., last 7 days)
|
||||
current_period: Duration of current period (e.g., last 24 hours)
|
||||
threshold_percent: Threshold for degradation alert (%)
|
||||
|
||||
Returns:
|
||||
Degradation info dict or None if no degradation
|
||||
"""
|
||||
now = datetime.now()
|
||||
|
||||
# Baseline period (older)
|
||||
baseline_end = now - current_period
|
||||
baseline_start = baseline_end - baseline_period
|
||||
|
||||
# Current period (recent)
|
||||
current_start = now - current_period
|
||||
current_end = now
|
||||
|
||||
# Analyze both periods
|
||||
baseline_stats = self.analyze_workflow(
|
||||
workflow_id,
|
||||
baseline_start,
|
||||
baseline_end
|
||||
)
|
||||
|
||||
current_stats = self.analyze_workflow(
|
||||
workflow_id,
|
||||
current_start,
|
||||
current_end
|
||||
)
|
||||
|
||||
if not baseline_stats or not current_stats:
|
||||
logger.warning(f"Insufficient data for degradation detection: {workflow_id}")
|
||||
return None
|
||||
|
||||
# Calculate percentage change
|
||||
baseline_avg = baseline_stats.avg_duration_ms
|
||||
current_avg = current_stats.avg_duration_ms
|
||||
|
||||
if baseline_avg == 0:
|
||||
return None
|
||||
|
||||
percent_change = ((current_avg - baseline_avg) / baseline_avg) * 100
|
||||
|
||||
# Check if degradation exceeds threshold
|
||||
if percent_change > threshold_percent:
|
||||
return {
|
||||
'workflow_id': workflow_id,
|
||||
'degradation_detected': True,
|
||||
'baseline_avg_ms': baseline_avg,
|
||||
'current_avg_ms': current_avg,
|
||||
'percent_change': percent_change,
|
||||
'threshold_percent': threshold_percent,
|
||||
'baseline_period': str(baseline_period),
|
||||
'current_period': str(current_period),
|
||||
'severity': 'high' if percent_change > threshold_percent * 2 else 'medium'
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
def compare_workflows(
|
||||
self,
|
||||
workflow_ids: List[str],
|
||||
start_time: datetime,
|
||||
end_time: datetime
|
||||
) -> Dict[str, PerformanceStats]:
|
||||
"""
|
||||
Compare performance across multiple workflows.
|
||||
|
||||
Args:
|
||||
workflow_ids: List of workflow identifiers
|
||||
start_time: Start of analysis period
|
||||
end_time: End of analysis period
|
||||
|
||||
Returns:
|
||||
Dictionary mapping workflow_id to PerformanceStats
|
||||
"""
|
||||
results = {}
|
||||
|
||||
for workflow_id in workflow_ids:
|
||||
stats = self.analyze_workflow(workflow_id, start_time, end_time)
|
||||
if stats:
|
||||
results[workflow_id] = stats
|
||||
|
||||
return results
|
||||
|
||||
def get_performance_trend(
|
||||
self,
|
||||
workflow_id: str,
|
||||
start_time: datetime,
|
||||
end_time: datetime,
|
||||
bucket_size: timedelta = timedelta(hours=1)
|
||||
) -> List[Dict]:
|
||||
"""
|
||||
Get performance trend over time with bucketing.
|
||||
|
||||
Args:
|
||||
workflow_id: Workflow identifier
|
||||
start_time: Start of analysis period
|
||||
end_time: End of analysis period
|
||||
bucket_size: Size of time buckets
|
||||
|
||||
Returns:
|
||||
List of performance data points over time
|
||||
"""
|
||||
trend = []
|
||||
current = start_time
|
||||
|
||||
while current < end_time:
|
||||
bucket_end = min(current + bucket_size, end_time)
|
||||
|
||||
stats = self.analyze_workflow(workflow_id, current, bucket_end)
|
||||
if stats:
|
||||
trend.append({
|
||||
'timestamp': current.isoformat(),
|
||||
'avg_duration_ms': stats.avg_duration_ms,
|
||||
'median_duration_ms': stats.median_duration_ms,
|
||||
'execution_count': stats.execution_count
|
||||
})
|
||||
|
||||
current = bucket_end
|
||||
|
||||
return trend
|
||||
|
||||
@staticmethod
|
||||
def _percentile(sorted_data: List[float], percentile: float) -> float:
|
||||
"""
|
||||
Calculate percentile from sorted data.
|
||||
|
||||
Args:
|
||||
sorted_data: Sorted list of values
|
||||
percentile: Percentile to calculate (0.0 to 1.0)
|
||||
|
||||
Returns:
|
||||
Percentile value
|
||||
"""
|
||||
if not sorted_data:
|
||||
return 0.0
|
||||
|
||||
if len(sorted_data) == 1:
|
||||
return sorted_data[0]
|
||||
|
||||
# Linear interpolation
|
||||
index = percentile * (len(sorted_data) - 1)
|
||||
lower = int(index)
|
||||
upper = min(lower + 1, len(sorted_data) - 1)
|
||||
weight = index - lower
|
||||
|
||||
return sorted_data[lower] * (1 - weight) + sorted_data[upper] * weight
|
||||
334
core/analytics/engine/success_rate_calculator.py
Normal file
334
core/analytics/engine/success_rate_calculator.py
Normal file
@@ -0,0 +1,334 @@
|
||||
"""Success rate analytics for workflows."""
|
||||
|
||||
import logging
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from datetime import datetime, timedelta
|
||||
from dataclasses import dataclass
|
||||
from collections import defaultdict
|
||||
|
||||
from ..storage.timeseries_store import TimeSeriesStore
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SuccessRateStats:
|
||||
"""Success rate statistics."""
|
||||
workflow_id: str
|
||||
total_executions: int
|
||||
successful_executions: int
|
||||
failed_executions: int
|
||||
success_rate: float
|
||||
failure_categories: Dict[str, int]
|
||||
reliability_score: float
|
||||
time_window_start: datetime
|
||||
time_window_end: datetime
|
||||
|
||||
def to_dict(self) -> Dict:
|
||||
"""Convert to dictionary."""
|
||||
return {
|
||||
'workflow_id': self.workflow_id,
|
||||
'total_executions': self.total_executions,
|
||||
'successful_executions': self.successful_executions,
|
||||
'failed_executions': self.failed_executions,
|
||||
'success_rate': self.success_rate,
|
||||
'failure_categories': self.failure_categories,
|
||||
'reliability_score': self.reliability_score,
|
||||
'time_window_start': self.time_window_start.isoformat(),
|
||||
'time_window_end': self.time_window_end.isoformat()
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReliabilityRanking:
|
||||
"""Workflow reliability ranking."""
|
||||
workflow_id: str
|
||||
reliability_score: float
|
||||
success_rate: float
|
||||
stability_score: float
|
||||
total_executions: int
|
||||
rank: int
|
||||
|
||||
def to_dict(self) -> Dict:
|
||||
"""Convert to dictionary."""
|
||||
return {
|
||||
'workflow_id': self.workflow_id,
|
||||
'reliability_score': self.reliability_score,
|
||||
'success_rate': self.success_rate,
|
||||
'stability_score': self.stability_score,
|
||||
'total_executions': self.total_executions,
|
||||
'rank': self.rank
|
||||
}
|
||||
|
||||
|
||||
class SuccessRateCalculator:
|
||||
"""Calculate success rates and reliability metrics."""
|
||||
|
||||
def __init__(self, store: TimeSeriesStore):
|
||||
"""
|
||||
Initialize success rate calculator.
|
||||
|
||||
Args:
|
||||
store: Time-series storage instance
|
||||
"""
|
||||
self.store = store
|
||||
logger.info("SuccessRateCalculator initialized")
|
||||
|
||||
def calculate_success_rate(
|
||||
self,
|
||||
workflow_id: str,
|
||||
time_window_hours: int = 24
|
||||
) -> SuccessRateStats:
|
||||
"""
|
||||
Calculate success rate for a workflow.
|
||||
|
||||
Args:
|
||||
workflow_id: Workflow identifier
|
||||
time_window_hours: Time window in hours
|
||||
|
||||
Returns:
|
||||
Success rate statistics
|
||||
"""
|
||||
end_time = datetime.now()
|
||||
start_time = end_time - timedelta(hours=time_window_hours)
|
||||
|
||||
# Query execution metrics
|
||||
metrics = self.store.query_range(
|
||||
metric_type='execution',
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
filters={'workflow_id': workflow_id}
|
||||
)
|
||||
|
||||
total = len(metrics)
|
||||
successful = sum(1 for m in metrics if m.get('status') == 'success')
|
||||
failed = total - successful
|
||||
success_rate = (successful / total * 100) if total > 0 else 0.0
|
||||
|
||||
# Categorize failures
|
||||
failure_categories = self._categorize_failures(
|
||||
[m for m in metrics if m.get('status') != 'success']
|
||||
)
|
||||
|
||||
# Calculate reliability score
|
||||
reliability_score = self._calculate_reliability_score(
|
||||
success_rate=success_rate,
|
||||
total_executions=total,
|
||||
failure_categories=failure_categories
|
||||
)
|
||||
|
||||
return SuccessRateStats(
|
||||
workflow_id=workflow_id,
|
||||
total_executions=total,
|
||||
successful_executions=successful,
|
||||
failed_executions=failed,
|
||||
success_rate=success_rate,
|
||||
failure_categories=failure_categories,
|
||||
reliability_score=reliability_score,
|
||||
time_window_start=start_time,
|
||||
time_window_end=end_time
|
||||
)
|
||||
|
||||
def categorize_failures(
|
||||
self,
|
||||
workflow_id: str,
|
||||
time_window_hours: int = 24
|
||||
) -> Dict[str, int]:
|
||||
"""
|
||||
Categorize failures by type.
|
||||
|
||||
Args:
|
||||
workflow_id: Workflow identifier
|
||||
time_window_hours: Time window in hours
|
||||
|
||||
Returns:
|
||||
Dictionary of failure categories and counts
|
||||
"""
|
||||
end_time = datetime.now()
|
||||
start_time = end_time - timedelta(hours=time_window_hours)
|
||||
|
||||
# Query failed executions
|
||||
metrics = self.store.query_range(
|
||||
metric_type='execution',
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
filters={'workflow_id': workflow_id}
|
||||
)
|
||||
|
||||
failed_metrics = [m for m in metrics if m.get('status') != 'success']
|
||||
return self._categorize_failures(failed_metrics)
|
||||
|
||||
def _categorize_failures(self, failed_metrics: List[Dict]) -> Dict[str, int]:
|
||||
"""
|
||||
Categorize failures by error type.
|
||||
|
||||
Args:
|
||||
failed_metrics: List of failed execution metrics
|
||||
|
||||
Returns:
|
||||
Dictionary of categories and counts
|
||||
"""
|
||||
categories = defaultdict(int)
|
||||
|
||||
for metric in failed_metrics:
|
||||
error_msg = metric.get('error_message', '').lower()
|
||||
|
||||
# Categorize by error type
|
||||
if 'timeout' in error_msg:
|
||||
categories['timeout'] += 1
|
||||
elif 'not found' in error_msg or 'element' in error_msg:
|
||||
categories['element_not_found'] += 1
|
||||
elif 'permission' in error_msg or 'access' in error_msg:
|
||||
categories['permission_error'] += 1
|
||||
elif 'network' in error_msg or 'connection' in error_msg:
|
||||
categories['network_error'] += 1
|
||||
elif 'validation' in error_msg:
|
||||
categories['validation_error'] += 1
|
||||
else:
|
||||
categories['other'] += 1
|
||||
|
||||
return dict(categories)
|
||||
|
||||
def rank_workflows_by_reliability(
|
||||
self,
|
||||
workflow_ids: Optional[List[str]] = None,
|
||||
time_window_hours: int = 168 # 1 week
|
||||
) -> List[ReliabilityRanking]:
|
||||
"""
|
||||
Rank workflows by reliability score.
|
||||
|
||||
Args:
|
||||
workflow_ids: List of workflow IDs (None = all)
|
||||
time_window_hours: Time window in hours
|
||||
|
||||
Returns:
|
||||
List of reliability rankings sorted by score
|
||||
"""
|
||||
end_time = datetime.now()
|
||||
start_time = end_time - timedelta(hours=time_window_hours)
|
||||
|
||||
# Get all workflows if not specified
|
||||
if workflow_ids is None:
|
||||
metrics = self.store.query_range(
|
||||
metric_type='execution',
|
||||
start_time=start_time,
|
||||
end_time=end_time
|
||||
)
|
||||
workflow_ids = list(set(m.get('workflow_id') for m in metrics if m.get('workflow_id')))
|
||||
|
||||
# Calculate reliability for each workflow
|
||||
rankings = []
|
||||
for workflow_id in workflow_ids:
|
||||
stats = self.calculate_success_rate(workflow_id, time_window_hours)
|
||||
|
||||
# Calculate stability score (consistency over time)
|
||||
stability_score = self._calculate_stability_score(
|
||||
workflow_id, start_time, end_time
|
||||
)
|
||||
|
||||
rankings.append(ReliabilityRanking(
|
||||
workflow_id=workflow_id,
|
||||
reliability_score=stats.reliability_score,
|
||||
success_rate=stats.success_rate,
|
||||
stability_score=stability_score,
|
||||
total_executions=stats.total_executions,
|
||||
rank=0 # Will be set after sorting
|
||||
))
|
||||
|
||||
# Sort by reliability score (descending)
|
||||
rankings.sort(key=lambda r: r.reliability_score, reverse=True)
|
||||
|
||||
# Assign ranks
|
||||
for i, ranking in enumerate(rankings, 1):
|
||||
ranking.rank = i
|
||||
|
||||
return rankings
|
||||
|
||||
def _calculate_reliability_score(
|
||||
self,
|
||||
success_rate: float,
|
||||
total_executions: int,
|
||||
failure_categories: Dict[str, int]
|
||||
) -> float:
|
||||
"""
|
||||
Calculate overall reliability score.
|
||||
|
||||
Args:
|
||||
success_rate: Success rate percentage
|
||||
total_executions: Total number of executions
|
||||
failure_categories: Failure categories
|
||||
|
||||
Returns:
|
||||
Reliability score (0-100)
|
||||
"""
|
||||
# Base score from success rate (70% weight)
|
||||
base_score = success_rate * 0.7
|
||||
|
||||
# Execution volume bonus (up to 15% for 100+ executions)
|
||||
volume_bonus = min(total_executions / 100 * 15, 15)
|
||||
|
||||
# Failure diversity penalty (up to -15% for many failure types)
|
||||
num_failure_types = len(failure_categories)
|
||||
diversity_penalty = min(num_failure_types * 3, 15)
|
||||
|
||||
# Calculate final score
|
||||
reliability_score = base_score + volume_bonus - diversity_penalty
|
||||
|
||||
# Clamp to 0-100
|
||||
return max(0.0, min(100.0, reliability_score))
|
||||
|
||||
def _calculate_stability_score(
|
||||
self,
|
||||
workflow_id: str,
|
||||
start_time: datetime,
|
||||
end_time: datetime
|
||||
) -> float:
|
||||
"""
|
||||
Calculate stability score (consistency over time).
|
||||
|
||||
Args:
|
||||
workflow_id: Workflow identifier
|
||||
start_time: Start of time window
|
||||
end_time: End of time window
|
||||
|
||||
Returns:
|
||||
Stability score (0-100)
|
||||
"""
|
||||
# Split time window into buckets
|
||||
num_buckets = 7 # Weekly buckets
|
||||
bucket_duration = (end_time - start_time) / num_buckets
|
||||
|
||||
bucket_success_rates = []
|
||||
for i in range(num_buckets):
|
||||
bucket_start = start_time + (bucket_duration * i)
|
||||
bucket_end = bucket_start + bucket_duration
|
||||
|
||||
metrics = self.store.query_range(
|
||||
metric_type='execution',
|
||||
start_time=bucket_start,
|
||||
end_time=bucket_end,
|
||||
filters={'workflow_id': workflow_id}
|
||||
)
|
||||
|
||||
if metrics:
|
||||
successful = sum(1 for m in metrics if m.get('status') == 'success')
|
||||
success_rate = (successful / len(metrics)) * 100
|
||||
bucket_success_rates.append(success_rate)
|
||||
|
||||
if not bucket_success_rates:
|
||||
return 0.0
|
||||
|
||||
# Calculate coefficient of variation (lower = more stable)
|
||||
import statistics
|
||||
mean = statistics.mean(bucket_success_rates)
|
||||
if mean == 0:
|
||||
return 0.0
|
||||
|
||||
stdev = statistics.stdev(bucket_success_rates) if len(bucket_success_rates) > 1 else 0
|
||||
cv = (stdev / mean) * 100
|
||||
|
||||
# Convert to stability score (lower CV = higher stability)
|
||||
# CV of 0 = 100 stability, CV of 50+ = 0 stability
|
||||
stability_score = max(0.0, 100.0 - (cv * 2))
|
||||
|
||||
return stability_score
|
||||
11
core/analytics/integration/__init__.py
Normal file
11
core/analytics/integration/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""Analytics integration module."""
|
||||
|
||||
from .execution_integration import (
|
||||
AnalyticsExecutionIntegration,
|
||||
get_analytics_integration
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'AnalyticsExecutionIntegration',
|
||||
'get_analytics_integration'
|
||||
]
|
||||
370
core/analytics/integration/execution_integration.py
Normal file
370
core/analytics/integration/execution_integration.py
Normal file
@@ -0,0 +1,370 @@
|
||||
"""Integration of analytics with ExecutionLoop."""
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
from datetime import datetime
|
||||
import uuid
|
||||
|
||||
from ..analytics_system import get_analytics_system
|
||||
from ..collection.metrics_collector import ExecutionMetrics, StepMetrics
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AnalyticsExecutionIntegration:
|
||||
"""Integrate analytics collection with workflow execution."""
|
||||
|
||||
def __init__(self, enabled: bool = True):
|
||||
"""
|
||||
Initialize analytics integration.
|
||||
|
||||
Args:
|
||||
enabled: Whether analytics collection is enabled
|
||||
"""
|
||||
self.enabled = enabled
|
||||
self.analytics = None
|
||||
|
||||
if enabled:
|
||||
try:
|
||||
self.analytics = get_analytics_system()
|
||||
logger.info("Analytics integration enabled")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize analytics: {e}")
|
||||
self.enabled = False
|
||||
|
||||
def on_execution_start(
|
||||
self,
|
||||
workflow_id: str,
|
||||
execution_id: Optional[str] = None,
|
||||
total_steps: int = 0
|
||||
) -> str:
|
||||
"""
|
||||
Called when workflow execution starts.
|
||||
|
||||
Args:
|
||||
workflow_id: Workflow identifier
|
||||
execution_id: Execution identifier (generated if None)
|
||||
total_steps: Total number of steps
|
||||
|
||||
Returns:
|
||||
Execution ID
|
||||
"""
|
||||
if not self.enabled or not self.analytics:
|
||||
return execution_id or str(uuid.uuid4())
|
||||
|
||||
if execution_id is None:
|
||||
execution_id = str(uuid.uuid4())
|
||||
|
||||
try:
|
||||
# Start real-time tracking
|
||||
self.analytics.realtime_analytics.track_execution(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
total_steps=total_steps
|
||||
)
|
||||
|
||||
logger.debug(f"Started tracking execution: {execution_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error starting execution tracking: {e}")
|
||||
|
||||
return execution_id
|
||||
|
||||
def on_step_start(
|
||||
self,
|
||||
execution_id: str,
|
||||
node_id: str,
|
||||
step_number: int
|
||||
) -> None:
|
||||
"""
|
||||
Called when a step starts.
|
||||
|
||||
Args:
|
||||
execution_id: Execution identifier
|
||||
node_id: Node identifier
|
||||
step_number: Step number
|
||||
"""
|
||||
if not self.enabled or not self.analytics:
|
||||
return
|
||||
|
||||
try:
|
||||
# Update progress
|
||||
self.analytics.realtime_analytics.update_progress(
|
||||
execution_id=execution_id,
|
||||
current_step=step_number,
|
||||
current_node_id=node_id
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating step progress: {e}")
|
||||
|
||||
def on_step_complete(
|
||||
self,
|
||||
execution_id: str,
|
||||
workflow_id: str,
|
||||
node_id: str,
|
||||
action_type: str,
|
||||
started_at: datetime,
|
||||
completed_at: datetime,
|
||||
duration: float,
|
||||
success: bool,
|
||||
error_message: Optional[str] = None
|
||||
) -> None:
|
||||
"""
|
||||
Called when a step completes.
|
||||
|
||||
Args:
|
||||
execution_id: Execution identifier
|
||||
workflow_id: Workflow identifier
|
||||
node_id: Node identifier
|
||||
action_type: Type of action
|
||||
started_at: Start timestamp
|
||||
completed_at: Completion timestamp
|
||||
duration: Duration in seconds
|
||||
success: Whether step succeeded
|
||||
error_message: Error message if failed
|
||||
"""
|
||||
if not self.enabled or not self.analytics:
|
||||
return
|
||||
|
||||
try:
|
||||
# Record step metrics
|
||||
step_metrics = StepMetrics(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
node_id=node_id,
|
||||
action_type=action_type,
|
||||
started_at=started_at,
|
||||
completed_at=completed_at,
|
||||
duration=duration,
|
||||
success=success,
|
||||
error_message=error_message
|
||||
)
|
||||
|
||||
self.analytics.metrics_collector.record_step(step_metrics)
|
||||
|
||||
# Update real-time tracking
|
||||
self.analytics.realtime_analytics.record_step_complete(
|
||||
execution_id=execution_id,
|
||||
success=success
|
||||
)
|
||||
|
||||
logger.debug(f"Recorded step: {node_id} ({'success' if success else 'failed'})")
|
||||
except Exception as e:
|
||||
logger.error(f"Error recording step completion: {e}")
|
||||
|
||||
def on_execution_complete(
|
||||
self,
|
||||
execution_id: str,
|
||||
workflow_id: str,
|
||||
started_at: datetime,
|
||||
completed_at: datetime,
|
||||
duration: float,
|
||||
status: str,
|
||||
error_message: Optional[str] = None,
|
||||
steps_completed: int = 0,
|
||||
steps_failed: int = 0
|
||||
) -> None:
|
||||
"""
|
||||
Called when workflow execution completes.
|
||||
|
||||
Args:
|
||||
execution_id: Execution identifier
|
||||
workflow_id: Workflow identifier
|
||||
started_at: Start timestamp
|
||||
completed_at: Completion timestamp
|
||||
duration: Duration in seconds
|
||||
status: Final status (success, failed, timeout)
|
||||
error_message: Error message if failed
|
||||
steps_completed: Number of steps completed
|
||||
steps_failed: Number of steps failed
|
||||
"""
|
||||
if not self.enabled or not self.analytics:
|
||||
return
|
||||
|
||||
try:
|
||||
# Record execution metrics
|
||||
execution_metrics = ExecutionMetrics(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
started_at=started_at,
|
||||
completed_at=completed_at,
|
||||
duration=duration,
|
||||
status=status,
|
||||
error_message=error_message,
|
||||
steps_completed=steps_completed,
|
||||
steps_failed=steps_failed
|
||||
)
|
||||
|
||||
self.analytics.metrics_collector.record_execution(execution_metrics)
|
||||
|
||||
# Flush to ensure persistence
|
||||
self.analytics.metrics_collector.flush()
|
||||
|
||||
# Complete real-time tracking
|
||||
self.analytics.realtime_analytics.complete_execution(
|
||||
execution_id=execution_id,
|
||||
status=status
|
||||
)
|
||||
|
||||
logger.info(f"Recorded execution: {execution_id} ({status})")
|
||||
except Exception as e:
|
||||
logger.error(f"Error recording execution completion: {e}")
|
||||
|
||||
def on_recovery_attempt(
|
||||
self,
|
||||
execution_id: str,
|
||||
workflow_id: str,
|
||||
node_id: str,
|
||||
strategy: str,
|
||||
success: bool,
|
||||
duration: float
|
||||
) -> None:
|
||||
"""
|
||||
Called when self-healing attempts recovery.
|
||||
|
||||
Args:
|
||||
execution_id: Execution identifier
|
||||
workflow_id: Workflow identifier
|
||||
node_id: Node identifier
|
||||
strategy: Recovery strategy used
|
||||
success: Whether recovery succeeded
|
||||
duration: Recovery duration
|
||||
"""
|
||||
if not self.enabled or not self.analytics:
|
||||
return
|
||||
|
||||
try:
|
||||
# Record as a special step metric
|
||||
recovery_metrics = StepMetrics(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
node_id=f"{node_id}_recovery",
|
||||
action_type=f"recovery_{strategy}",
|
||||
started_at=datetime.now(),
|
||||
completed_at=datetime.now(),
|
||||
duration=duration,
|
||||
success=success,
|
||||
error_message=None if success else f"Recovery failed: {strategy}"
|
||||
)
|
||||
|
||||
self.analytics.metrics_collector.record_step(recovery_metrics)
|
||||
|
||||
logger.debug(f"Recorded recovery: {strategy} ({'success' if success else 'failed'})")
|
||||
except Exception as e:
|
||||
logger.error(f"Error recording recovery attempt: {e}")
|
||||
|
||||
def get_live_metrics(self, execution_id: str) -> Optional[dict]:
|
||||
"""
|
||||
Get live metrics for an execution.
|
||||
|
||||
Args:
|
||||
execution_id: Execution identifier
|
||||
|
||||
Returns:
|
||||
Live metrics dictionary or None
|
||||
"""
|
||||
if not self.enabled or not self.analytics:
|
||||
return None
|
||||
|
||||
try:
|
||||
return self.analytics.realtime_analytics.get_live_metrics(execution_id)
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting live metrics: {e}")
|
||||
return None
|
||||
|
||||
def get_workflow_stats(self, workflow_id: str, hours: int = 24) -> Optional[dict]:
|
||||
"""
|
||||
Get statistics for a workflow.
|
||||
|
||||
Args:
|
||||
workflow_id: Workflow identifier
|
||||
hours: Time window in hours
|
||||
|
||||
Returns:
|
||||
Statistics dictionary or None
|
||||
"""
|
||||
if not self.enabled or not self.analytics:
|
||||
return None
|
||||
|
||||
try:
|
||||
from datetime import timedelta
|
||||
|
||||
end_time = datetime.now()
|
||||
start_time = end_time - timedelta(hours=hours)
|
||||
|
||||
# Get performance stats
|
||||
perf_stats = self.analytics.performance_analyzer.analyze_performance(
|
||||
workflow_id=workflow_id,
|
||||
start_time=start_time,
|
||||
end_time=end_time
|
||||
)
|
||||
|
||||
# Get success rate
|
||||
success_stats = self.analytics.success_rate_calculator.calculate_success_rate(
|
||||
workflow_id=workflow_id,
|
||||
time_window_hours=hours
|
||||
)
|
||||
|
||||
return {
|
||||
'performance': perf_stats.to_dict(),
|
||||
'success_rate': success_stats.to_dict()
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting workflow stats: {e}")
|
||||
return None
|
||||
|
||||
def start_resource_monitoring(self, execution_id: str) -> None:
|
||||
"""
|
||||
Start monitoring resources for an execution.
|
||||
|
||||
Args:
|
||||
execution_id: Execution identifier
|
||||
"""
|
||||
if not self.enabled or not self.analytics:
|
||||
return
|
||||
|
||||
try:
|
||||
# Tag resource metrics with execution ID
|
||||
self.analytics.collectors.resource.start_monitoring(
|
||||
context={'execution_id': execution_id}
|
||||
)
|
||||
logger.debug(f"Started resource monitoring for: {execution_id}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error starting resource monitoring: {e}")
|
||||
|
||||
def stop_resource_monitoring(self, execution_id: str) -> None:
|
||||
"""
|
||||
Stop monitoring resources for an execution.
|
||||
|
||||
Args:
|
||||
execution_id: Execution identifier
|
||||
"""
|
||||
if not self.enabled or not self.analytics:
|
||||
return
|
||||
|
||||
try:
|
||||
self.analytics.collectors.resource.stop_monitoring()
|
||||
logger.debug(f"Stopped resource monitoring for: {execution_id}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error stopping resource monitoring: {e}")
|
||||
|
||||
|
||||
# Global instance
|
||||
_analytics_integration: Optional[AnalyticsExecutionIntegration] = None
|
||||
|
||||
|
||||
def get_analytics_integration(enabled: bool = True) -> AnalyticsExecutionIntegration:
|
||||
"""
|
||||
Get or create global analytics integration instance.
|
||||
|
||||
Args:
|
||||
enabled: Whether analytics is enabled
|
||||
|
||||
Returns:
|
||||
AnalyticsExecutionIntegration instance
|
||||
"""
|
||||
global _analytics_integration
|
||||
|
||||
if _analytics_integration is None:
|
||||
_analytics_integration = AnalyticsExecutionIntegration(enabled=enabled)
|
||||
|
||||
return _analytics_integration
|
||||
5
core/analytics/query/__init__.py
Normal file
5
core/analytics/query/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Query engine for analytics data."""
|
||||
|
||||
from .query_engine import QueryEngine
|
||||
|
||||
__all__ = ['QueryEngine']
|
||||
312
core/analytics/query/query_engine.py
Normal file
312
core/analytics/query/query_engine.py
Normal file
@@ -0,0 +1,312 @@
|
||||
"""Query engine for analytics data with caching."""
|
||||
|
||||
import logging
|
||||
import hashlib
|
||||
import json
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
from datetime import datetime
|
||||
from collections import OrderedDict
|
||||
|
||||
from ..storage.timeseries_store import TimeSeriesStore
|
||||
from ..storage.archive_storage import ArchiveStorage
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LRUCache:
|
||||
"""Simple LRU cache implementation."""
|
||||
|
||||
def __init__(self, capacity: int = 100):
|
||||
"""Initialize LRU cache."""
|
||||
self.capacity = capacity
|
||||
self.cache: OrderedDict = OrderedDict()
|
||||
|
||||
def get(self, key: str) -> Optional[Any]:
|
||||
"""Get value from cache."""
|
||||
if key not in self.cache:
|
||||
return None
|
||||
# Move to end (most recently used)
|
||||
self.cache.move_to_end(key)
|
||||
return self.cache[key]
|
||||
|
||||
def put(self, key: str, value: Any) -> None:
|
||||
"""Put value in cache."""
|
||||
if key in self.cache:
|
||||
self.cache.move_to_end(key)
|
||||
self.cache[key] = value
|
||||
# Remove oldest if over capacity
|
||||
if len(self.cache) > self.capacity:
|
||||
self.cache.popitem(last=False)
|
||||
|
||||
def clear(self) -> None:
|
||||
"""Clear cache."""
|
||||
self.cache.clear()
|
||||
|
||||
def size(self) -> int:
|
||||
"""Get cache size."""
|
||||
return len(self.cache)
|
||||
|
||||
|
||||
class QueryEngine:
|
||||
"""Query engine for analytics data with caching."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
time_series_store: TimeSeriesStore,
|
||||
archive_storage: Optional[ArchiveStorage] = None,
|
||||
cache_size: int = 100
|
||||
):
|
||||
"""
|
||||
Initialize query engine.
|
||||
|
||||
Args:
|
||||
time_series_store: Time series storage
|
||||
archive_storage: Optional archive storage
|
||||
cache_size: Size of query cache
|
||||
"""
|
||||
self.ts_store = time_series_store
|
||||
self.archive = archive_storage
|
||||
self.cache = LRUCache(cache_size)
|
||||
|
||||
logger.info(f"QueryEngine initialized (cache_size={cache_size})")
|
||||
|
||||
def query(
|
||||
self,
|
||||
query: Dict[str, Any],
|
||||
use_cache: bool = True
|
||||
) -> List[Dict]:
|
||||
"""
|
||||
Execute a query against analytics data.
|
||||
|
||||
Args:
|
||||
query: Query specification with filters, time range, etc.
|
||||
use_cache: Whether to use cache
|
||||
|
||||
Returns:
|
||||
List of matching records
|
||||
"""
|
||||
# Generate cache key
|
||||
cache_key = self._generate_cache_key(query)
|
||||
|
||||
# Check cache
|
||||
if use_cache:
|
||||
cached = self.cache.get(cache_key)
|
||||
if cached is not None:
|
||||
logger.debug(f"Cache hit for query: {cache_key[:8]}")
|
||||
return cached
|
||||
|
||||
# Execute query
|
||||
start_time = query.get('start_time')
|
||||
end_time = query.get('end_time')
|
||||
workflow_id = query.get('workflow_id')
|
||||
metric_types = query.get('metric_types', ['execution', 'step', 'resource'])
|
||||
|
||||
if not start_time or not end_time:
|
||||
raise ValueError("start_time and end_time are required")
|
||||
|
||||
# Convert to datetime if strings
|
||||
if isinstance(start_time, str):
|
||||
start_time = datetime.fromisoformat(start_time)
|
||||
if isinstance(end_time, str):
|
||||
end_time = datetime.fromisoformat(end_time)
|
||||
|
||||
# Query time series store
|
||||
results = self.ts_store.query_range(
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
workflow_id=workflow_id,
|
||||
metric_types=metric_types
|
||||
)
|
||||
|
||||
# Apply additional filters
|
||||
filters = query.get('filters', {})
|
||||
if filters:
|
||||
for metric_type, records in results.items():
|
||||
results[metric_type] = self._apply_filters(records, filters)
|
||||
|
||||
# Flatten if requested
|
||||
if query.get('flatten', False):
|
||||
flattened = []
|
||||
for records in results.values():
|
||||
flattened.extend(records)
|
||||
results = flattened
|
||||
|
||||
# Cache result
|
||||
if use_cache:
|
||||
self.cache.put(cache_key, results)
|
||||
|
||||
return results
|
||||
|
||||
def aggregate(
|
||||
self,
|
||||
metric: str,
|
||||
aggregation: str,
|
||||
group_by: List[str],
|
||||
filters: Dict[str, Any],
|
||||
time_range: Tuple[datetime, datetime],
|
||||
use_cache: bool = True
|
||||
) -> List[Dict]:
|
||||
"""
|
||||
Aggregate metrics with grouping.
|
||||
|
||||
Args:
|
||||
metric: Metric field to aggregate
|
||||
aggregation: Aggregation function (avg, sum, count, min, max)
|
||||
group_by: Fields to group by
|
||||
filters: Filter criteria
|
||||
time_range: (start_time, end_time)
|
||||
use_cache: Whether to use cache
|
||||
|
||||
Returns:
|
||||
List of aggregated results
|
||||
"""
|
||||
# Generate cache key
|
||||
cache_key = self._generate_cache_key({
|
||||
'type': 'aggregate',
|
||||
'metric': metric,
|
||||
'aggregation': aggregation,
|
||||
'group_by': group_by,
|
||||
'filters': filters,
|
||||
'time_range': [t.isoformat() for t in time_range]
|
||||
})
|
||||
|
||||
# Check cache
|
||||
if use_cache:
|
||||
cached = self.cache.get(cache_key)
|
||||
if cached is not None:
|
||||
return cached
|
||||
|
||||
# Execute aggregation
|
||||
start_time, end_time = time_range
|
||||
results = self.ts_store.aggregate(
|
||||
metric=metric,
|
||||
aggregation=aggregation,
|
||||
group_by=group_by,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
filters=filters
|
||||
)
|
||||
|
||||
# Cache result
|
||||
if use_cache:
|
||||
self.cache.put(cache_key, results)
|
||||
|
||||
return results
|
||||
|
||||
def compare(
|
||||
self,
|
||||
workflow_ids: List[str],
|
||||
metrics: List[str],
|
||||
time_range: Tuple[datetime, datetime]
|
||||
) -> Dict[str, Dict]:
|
||||
"""
|
||||
Compare metrics across workflows.
|
||||
|
||||
Args:
|
||||
workflow_ids: List of workflow IDs to compare
|
||||
metrics: List of metrics to compare
|
||||
time_range: (start_time, end_time)
|
||||
|
||||
Returns:
|
||||
Dictionary mapping workflow_id to metrics
|
||||
"""
|
||||
results = {}
|
||||
start_time, end_time = time_range
|
||||
|
||||
for workflow_id in workflow_ids:
|
||||
workflow_metrics = {}
|
||||
|
||||
# Query metrics for this workflow
|
||||
data = self.ts_store.query_range(
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
workflow_id=workflow_id
|
||||
)
|
||||
|
||||
# Calculate requested metrics
|
||||
executions = data.get('execution', [])
|
||||
if executions:
|
||||
for metric in metrics:
|
||||
values = [e.get(metric) for e in executions if e.get(metric) is not None]
|
||||
if values:
|
||||
import statistics
|
||||
workflow_metrics[metric] = {
|
||||
'avg': statistics.mean(values),
|
||||
'min': min(values),
|
||||
'max': max(values),
|
||||
'count': len(values)
|
||||
}
|
||||
|
||||
results[workflow_id] = workflow_metrics
|
||||
|
||||
# Calculate differences
|
||||
if len(workflow_ids) == 2:
|
||||
results['comparison'] = self._calculate_differences(
|
||||
results[workflow_ids[0]],
|
||||
results[workflow_ids[1]]
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
def invalidate_cache(self, pattern: Optional[str] = None) -> int:
|
||||
"""
|
||||
Invalidate cache entries.
|
||||
|
||||
Args:
|
||||
pattern: Optional pattern to match (None = clear all)
|
||||
|
||||
Returns:
|
||||
Number of entries invalidated
|
||||
"""
|
||||
if pattern is None:
|
||||
size = self.cache.size()
|
||||
self.cache.clear()
|
||||
logger.info(f"Cleared entire cache ({size} entries)")
|
||||
return size
|
||||
|
||||
# Pattern-based invalidation not implemented yet
|
||||
# For now, just clear all
|
||||
return self.invalidate_cache(None)
|
||||
|
||||
def _apply_filters(self, records: List[Dict], filters: Dict[str, Any]) -> List[Dict]:
|
||||
"""Apply filters to records."""
|
||||
filtered = []
|
||||
|
||||
for record in records:
|
||||
match = True
|
||||
for key, value in filters.items():
|
||||
if record.get(key) != value:
|
||||
match = False
|
||||
break
|
||||
if match:
|
||||
filtered.append(record)
|
||||
|
||||
return filtered
|
||||
|
||||
def _calculate_differences(
|
||||
self,
|
||||
metrics1: Dict[str, Dict],
|
||||
metrics2: Dict[str, Dict]
|
||||
) -> Dict[str, Dict]:
|
||||
"""Calculate differences between two metric sets."""
|
||||
differences = {}
|
||||
|
||||
for metric in metrics1.keys():
|
||||
if metric in metrics2:
|
||||
m1 = metrics1[metric]
|
||||
m2 = metrics2[metric]
|
||||
|
||||
differences[metric] = {
|
||||
'diff_avg': m2['avg'] - m1['avg'],
|
||||
'diff_percent': ((m2['avg'] - m1['avg']) / m1['avg'] * 100) if m1['avg'] != 0 else 0,
|
||||
'workflow1_avg': m1['avg'],
|
||||
'workflow2_avg': m2['avg']
|
||||
}
|
||||
|
||||
return differences
|
||||
|
||||
def _generate_cache_key(self, query: Dict[str, Any]) -> str:
|
||||
"""Generate cache key from query."""
|
||||
# Sort keys for consistent hashing
|
||||
query_str = json.dumps(query, sort_keys=True, default=str)
|
||||
return hashlib.md5(query_str.encode()).hexdigest()
|
||||
5
core/analytics/realtime/__init__.py
Normal file
5
core/analytics/realtime/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Real-time analytics components."""
|
||||
|
||||
from .realtime_analytics import RealtimeAnalytics
|
||||
|
||||
__all__ = ['RealtimeAnalytics']
|
||||
283
core/analytics/realtime/realtime_analytics.py
Normal file
283
core/analytics/realtime/realtime_analytics.py
Normal file
@@ -0,0 +1,283 @@
|
||||
"""Real-time analytics for active workflows."""
|
||||
|
||||
import logging
|
||||
import threading
|
||||
from typing import Dict, Any, Optional, List, Callable
|
||||
from datetime import datetime
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from ..collection.metrics_collector import MetricsCollector, ExecutionMetrics
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class LiveExecution:
|
||||
"""Live execution tracking."""
|
||||
execution_id: str
|
||||
workflow_id: str
|
||||
started_at: datetime
|
||||
current_step: int = 0
|
||||
total_steps: int = 0
|
||||
steps_completed: int = 0
|
||||
steps_failed: int = 0
|
||||
current_node_id: Optional[str] = None
|
||||
last_update: datetime = field(default_factory=datetime.now)
|
||||
|
||||
@property
|
||||
def progress_percent(self) -> float:
|
||||
"""Calculate progress percentage."""
|
||||
if self.total_steps == 0:
|
||||
return 0.0
|
||||
return (self.steps_completed / self.total_steps) * 100
|
||||
|
||||
@property
|
||||
def estimated_completion(self) -> Optional[datetime]:
|
||||
"""Estimate completion time."""
|
||||
if self.steps_completed == 0 or self.total_steps == 0:
|
||||
return None
|
||||
|
||||
elapsed = (datetime.now() - self.started_at).total_seconds()
|
||||
avg_time_per_step = elapsed / self.steps_completed
|
||||
remaining_steps = self.total_steps - self.steps_completed
|
||||
estimated_remaining = avg_time_per_step * remaining_steps
|
||||
|
||||
from datetime import timedelta
|
||||
return datetime.now() + timedelta(seconds=estimated_remaining)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary."""
|
||||
return {
|
||||
'execution_id': self.execution_id,
|
||||
'workflow_id': self.workflow_id,
|
||||
'started_at': self.started_at.isoformat(),
|
||||
'current_step': self.current_step,
|
||||
'total_steps': self.total_steps,
|
||||
'steps_completed': self.steps_completed,
|
||||
'steps_failed': self.steps_failed,
|
||||
'current_node_id': self.current_node_id,
|
||||
'progress_percent': self.progress_percent,
|
||||
'estimated_completion': self.estimated_completion.isoformat() if self.estimated_completion else None,
|
||||
'last_update': self.last_update.isoformat()
|
||||
}
|
||||
|
||||
|
||||
class RealtimeAnalytics:
|
||||
"""Real-time analytics for active workflows."""
|
||||
|
||||
def __init__(self, metrics_collector: Optional[MetricsCollector] = None):
|
||||
"""
|
||||
Initialize real-time analytics.
|
||||
|
||||
Args:
|
||||
metrics_collector: Metrics collector instance
|
||||
"""
|
||||
self.collector = metrics_collector
|
||||
self.active_executions: Dict[str, LiveExecution] = {}
|
||||
self.subscribers: Dict[str, List[Callable]] = {}
|
||||
self._lock = threading.Lock()
|
||||
|
||||
logger.info("RealtimeAnalytics initialized")
|
||||
|
||||
def track_execution(
|
||||
self,
|
||||
execution_id: str,
|
||||
workflow_id: str,
|
||||
total_steps: int = 0
|
||||
) -> None:
|
||||
"""
|
||||
Start tracking an execution in real-time.
|
||||
|
||||
Args:
|
||||
execution_id: Execution identifier
|
||||
workflow_id: Workflow identifier
|
||||
total_steps: Total number of steps
|
||||
"""
|
||||
with self._lock:
|
||||
self.active_executions[execution_id] = LiveExecution(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
started_at=datetime.now(),
|
||||
total_steps=total_steps
|
||||
)
|
||||
|
||||
# Notify subscribers
|
||||
self._notify_subscribers(execution_id, 'started')
|
||||
|
||||
logger.info(f"Tracking execution: {execution_id}")
|
||||
|
||||
def update_progress(
|
||||
self,
|
||||
execution_id: str,
|
||||
current_step: int,
|
||||
total_steps: Optional[int] = None,
|
||||
current_node_id: Optional[str] = None
|
||||
) -> None:
|
||||
"""
|
||||
Update execution progress.
|
||||
|
||||
Args:
|
||||
execution_id: Execution identifier
|
||||
current_step: Current step number
|
||||
total_steps: Total steps (updates if provided)
|
||||
current_node_id: Current node ID
|
||||
"""
|
||||
with self._lock:
|
||||
if execution_id not in self.active_executions:
|
||||
logger.warning(f"Execution not tracked: {execution_id}")
|
||||
return
|
||||
|
||||
execution = self.active_executions[execution_id]
|
||||
execution.current_step = current_step
|
||||
if total_steps is not None:
|
||||
execution.total_steps = total_steps
|
||||
if current_node_id is not None:
|
||||
execution.current_node_id = current_node_id
|
||||
execution.last_update = datetime.now()
|
||||
|
||||
# Notify subscribers
|
||||
self._notify_subscribers(execution_id, 'progress')
|
||||
|
||||
def record_step_complete(
|
||||
self,
|
||||
execution_id: str,
|
||||
success: bool
|
||||
) -> None:
|
||||
"""
|
||||
Record step completion.
|
||||
|
||||
Args:
|
||||
execution_id: Execution identifier
|
||||
success: Whether step succeeded
|
||||
"""
|
||||
with self._lock:
|
||||
if execution_id not in self.active_executions:
|
||||
return
|
||||
|
||||
execution = self.active_executions[execution_id]
|
||||
if success:
|
||||
execution.steps_completed += 1
|
||||
else:
|
||||
execution.steps_failed += 1
|
||||
execution.last_update = datetime.now()
|
||||
|
||||
# Notify subscribers
|
||||
self._notify_subscribers(execution_id, 'step_complete')
|
||||
|
||||
def complete_execution(
|
||||
self,
|
||||
execution_id: str,
|
||||
status: str
|
||||
) -> None:
|
||||
"""
|
||||
Mark execution as complete.
|
||||
|
||||
Args:
|
||||
execution_id: Execution identifier
|
||||
status: Final status
|
||||
"""
|
||||
with self._lock:
|
||||
if execution_id in self.active_executions:
|
||||
del self.active_executions[execution_id]
|
||||
|
||||
# Notify subscribers
|
||||
self._notify_subscribers(execution_id, 'completed', {'status': status})
|
||||
|
||||
logger.info(f"Execution completed: {execution_id} ({status})")
|
||||
|
||||
def get_live_metrics(self, execution_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get live metrics for an execution.
|
||||
|
||||
Args:
|
||||
execution_id: Execution identifier
|
||||
|
||||
Returns:
|
||||
Live metrics dictionary or None
|
||||
"""
|
||||
with self._lock:
|
||||
execution = self.active_executions.get(execution_id)
|
||||
if not execution:
|
||||
return None
|
||||
return execution.to_dict()
|
||||
|
||||
def get_all_active(self) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get all active executions.
|
||||
|
||||
Returns:
|
||||
List of active execution metrics
|
||||
"""
|
||||
with self._lock:
|
||||
return [e.to_dict() for e in self.active_executions.values()]
|
||||
|
||||
def subscribe(
|
||||
self,
|
||||
execution_id: str,
|
||||
callback: Callable[[str, Dict], None]
|
||||
) -> None:
|
||||
"""
|
||||
Subscribe to real-time updates for an execution.
|
||||
|
||||
Args:
|
||||
execution_id: Execution identifier
|
||||
callback: Callback function (event_type, data)
|
||||
"""
|
||||
with self._lock:
|
||||
if execution_id not in self.subscribers:
|
||||
self.subscribers[execution_id] = []
|
||||
self.subscribers[execution_id].append(callback)
|
||||
|
||||
logger.debug(f"Subscriber added for {execution_id}")
|
||||
|
||||
def unsubscribe(
|
||||
self,
|
||||
execution_id: str,
|
||||
callback: Optional[Callable] = None
|
||||
) -> None:
|
||||
"""
|
||||
Unsubscribe from updates.
|
||||
|
||||
Args:
|
||||
execution_id: Execution identifier
|
||||
callback: Specific callback to remove (None = remove all)
|
||||
"""
|
||||
with self._lock:
|
||||
if execution_id not in self.subscribers:
|
||||
return
|
||||
|
||||
if callback is None:
|
||||
del self.subscribers[execution_id]
|
||||
else:
|
||||
self.subscribers[execution_id] = [
|
||||
cb for cb in self.subscribers[execution_id] if cb != callback
|
||||
]
|
||||
|
||||
def _notify_subscribers(
|
||||
self,
|
||||
execution_id: str,
|
||||
event_type: str,
|
||||
data: Optional[Dict] = None
|
||||
) -> None:
|
||||
"""Notify subscribers of an event."""
|
||||
with self._lock:
|
||||
callbacks = self.subscribers.get(execution_id, []).copy()
|
||||
|
||||
if not callbacks:
|
||||
return
|
||||
|
||||
# Get current metrics
|
||||
metrics = self.get_live_metrics(execution_id)
|
||||
event_data = {
|
||||
'event_type': event_type,
|
||||
'execution_id': execution_id,
|
||||
'metrics': metrics,
|
||||
**(data or {})
|
||||
}
|
||||
|
||||
# Call subscribers (outside lock)
|
||||
for callback in callbacks:
|
||||
try:
|
||||
callback(event_type, event_data)
|
||||
except Exception as e:
|
||||
logger.error(f"Subscriber callback error: {e}")
|
||||
13
core/analytics/reporting/__init__.py
Normal file
13
core/analytics/reporting/__init__.py
Normal file
@@ -0,0 +1,13 @@
|
||||
"""Analytics reporting module."""
|
||||
|
||||
from .report_generator import (
|
||||
ReportGenerator,
|
||||
ReportConfig,
|
||||
ScheduledReport
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'ReportGenerator',
|
||||
'ReportConfig',
|
||||
'ScheduledReport'
|
||||
]
|
||||
443
core/analytics/reporting/report_generator.py
Normal file
443
core/analytics/reporting/report_generator.py
Normal file
@@ -0,0 +1,443 @@
|
||||
"""Report generation for analytics data."""
|
||||
|
||||
import logging
|
||||
import json
|
||||
import csv
|
||||
from typing import Dict, List, Optional, Any
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass
|
||||
from io import StringIO
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReportConfig:
|
||||
"""Report configuration."""
|
||||
title: str
|
||||
metric_types: List[str]
|
||||
start_time: datetime
|
||||
end_time: datetime
|
||||
workflow_ids: Optional[List[str]] = None
|
||||
include_charts: bool = True
|
||||
include_insights: bool = True
|
||||
format: str = 'json' # json, csv, html, pdf
|
||||
|
||||
def to_dict(self) -> Dict:
|
||||
"""Convert to dictionary."""
|
||||
return {
|
||||
'title': self.title,
|
||||
'metric_types': self.metric_types,
|
||||
'start_time': self.start_time.isoformat(),
|
||||
'end_time': self.end_time.isoformat(),
|
||||
'workflow_ids': self.workflow_ids,
|
||||
'include_charts': self.include_charts,
|
||||
'include_insights': self.include_insights,
|
||||
'format': self.format
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScheduledReport:
|
||||
"""Scheduled report configuration."""
|
||||
report_id: str
|
||||
config: ReportConfig
|
||||
schedule_cron: str # Cron expression
|
||||
delivery_method: str # email, webhook, file
|
||||
delivery_config: Dict[str, Any]
|
||||
enabled: bool = True
|
||||
last_run: Optional[datetime] = None
|
||||
next_run: Optional[datetime] = None
|
||||
|
||||
def to_dict(self) -> Dict:
|
||||
"""Convert to dictionary."""
|
||||
return {
|
||||
'report_id': self.report_id,
|
||||
'config': self.config.to_dict(),
|
||||
'schedule_cron': self.schedule_cron,
|
||||
'delivery_method': self.delivery_method,
|
||||
'delivery_config': self.delivery_config,
|
||||
'enabled': self.enabled,
|
||||
'last_run': self.last_run.isoformat() if self.last_run else None,
|
||||
'next_run': self.next_run.isoformat() if self.next_run else None
|
||||
}
|
||||
|
||||
|
||||
class ReportGenerator:
|
||||
"""Generate analytics reports in various formats."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
query_engine, # QueryEngine
|
||||
performance_analyzer, # PerformanceAnalyzer
|
||||
insight_generator, # InsightGenerator
|
||||
output_dir: str = "data/analytics/reports"
|
||||
):
|
||||
"""
|
||||
Initialize report generator.
|
||||
|
||||
Args:
|
||||
query_engine: Query engine instance
|
||||
performance_analyzer: Performance analyzer instance
|
||||
insight_generator: Insight generator instance
|
||||
output_dir: Output directory for reports
|
||||
"""
|
||||
self.query_engine = query_engine
|
||||
self.performance_analyzer = performance_analyzer
|
||||
self.insight_generator = insight_generator
|
||||
self.output_dir = Path(output_dir)
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.scheduled_reports: Dict[str, ScheduledReport] = {}
|
||||
logger.info("ReportGenerator initialized")
|
||||
|
||||
def generate_report(
|
||||
self,
|
||||
config: ReportConfig
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate a report based on configuration.
|
||||
|
||||
Args:
|
||||
config: Report configuration
|
||||
|
||||
Returns:
|
||||
Report data dictionary
|
||||
"""
|
||||
logger.info(f"Generating report: {config.title}")
|
||||
|
||||
# Collect data
|
||||
report_data = {
|
||||
'title': config.title,
|
||||
'generated_at': datetime.now().isoformat(),
|
||||
'time_range': {
|
||||
'start': config.start_time.isoformat(),
|
||||
'end': config.end_time.isoformat()
|
||||
},
|
||||
'metrics': {},
|
||||
'performance': {},
|
||||
'insights': []
|
||||
}
|
||||
|
||||
# Query metrics
|
||||
for metric_type in config.metric_types:
|
||||
filters = {}
|
||||
if config.workflow_ids:
|
||||
filters['workflow_id'] = config.workflow_ids[0] # Simplified
|
||||
|
||||
metrics = self.query_engine.query(
|
||||
metric_type=metric_type,
|
||||
start_time=config.start_time,
|
||||
end_time=config.end_time,
|
||||
filters=filters
|
||||
)
|
||||
report_data['metrics'][metric_type] = metrics
|
||||
|
||||
# Add performance analysis
|
||||
if config.workflow_ids:
|
||||
for workflow_id in config.workflow_ids:
|
||||
perf_stats = self.performance_analyzer.analyze_performance(
|
||||
workflow_id=workflow_id,
|
||||
start_time=config.start_time,
|
||||
end_time=config.end_time
|
||||
)
|
||||
report_data['performance'][workflow_id] = perf_stats.to_dict()
|
||||
|
||||
# Add insights
|
||||
if config.include_insights:
|
||||
insights = self.insight_generator.generate_insights(
|
||||
start_time=config.start_time,
|
||||
end_time=config.end_time
|
||||
)
|
||||
report_data['insights'] = [i.to_dict() for i in insights]
|
||||
|
||||
return report_data
|
||||
|
||||
def export_json(
|
||||
self,
|
||||
report_data: Dict[str, Any],
|
||||
filename: Optional[str] = None
|
||||
) -> str:
|
||||
"""
|
||||
Export report as JSON.
|
||||
|
||||
Args:
|
||||
report_data: Report data
|
||||
filename: Output filename (auto-generated if None)
|
||||
|
||||
Returns:
|
||||
Path to exported file
|
||||
"""
|
||||
if filename is None:
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
filename = f"report_{timestamp}.json"
|
||||
|
||||
filepath = self.output_dir / filename
|
||||
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
json.dump(report_data, f, indent=2)
|
||||
|
||||
logger.info(f"Exported JSON report: {filepath}")
|
||||
return str(filepath)
|
||||
|
||||
def export_csv(
|
||||
self,
|
||||
report_data: Dict[str, Any],
|
||||
filename: Optional[str] = None
|
||||
) -> str:
|
||||
"""
|
||||
Export report as CSV.
|
||||
|
||||
Args:
|
||||
report_data: Report data
|
||||
filename: Output filename (auto-generated if None)
|
||||
|
||||
Returns:
|
||||
Path to exported file
|
||||
"""
|
||||
if filename is None:
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
filename = f"report_{timestamp}.csv"
|
||||
|
||||
filepath = self.output_dir / filename
|
||||
|
||||
# Flatten metrics for CSV export
|
||||
rows = []
|
||||
for metric_type, metrics in report_data.get('metrics', {}).items():
|
||||
for metric in metrics:
|
||||
row = {
|
||||
'metric_type': metric_type,
|
||||
**metric
|
||||
}
|
||||
rows.append(row)
|
||||
|
||||
if rows:
|
||||
# Get all unique keys
|
||||
fieldnames = set()
|
||||
for row in rows:
|
||||
fieldnames.update(row.keys())
|
||||
fieldnames = sorted(fieldnames)
|
||||
|
||||
with open(filepath, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(rows)
|
||||
|
||||
logger.info(f"Exported CSV report: {filepath}")
|
||||
return str(filepath)
|
||||
|
||||
def export_html(
|
||||
self,
|
||||
report_data: Dict[str, Any],
|
||||
filename: Optional[str] = None
|
||||
) -> str:
|
||||
"""
|
||||
Export report as HTML.
|
||||
|
||||
Args:
|
||||
report_data: Report data
|
||||
filename: Output filename (auto-generated if None)
|
||||
|
||||
Returns:
|
||||
Path to exported file
|
||||
"""
|
||||
if filename is None:
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
filename = f"report_{timestamp}.html"
|
||||
|
||||
filepath = self.output_dir / filename
|
||||
|
||||
# Generate HTML
|
||||
html = self._generate_html(report_data)
|
||||
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
f.write(html)
|
||||
|
||||
logger.info(f"Exported HTML report: {filepath}")
|
||||
return str(filepath)
|
||||
|
||||
def _generate_html(self, report_data: Dict[str, Any]) -> str:
|
||||
"""Generate HTML report."""
|
||||
html = f"""<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>{report_data['title']}</title>
|
||||
<style>
|
||||
body {{ font-family: Arial, sans-serif; margin: 20px; }}
|
||||
h1 {{ color: #333; }}
|
||||
h2 {{ color: #666; margin-top: 30px; }}
|
||||
table {{ border-collapse: collapse; width: 100%; margin: 20px 0; }}
|
||||
th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
|
||||
th {{ background-color: #4CAF50; color: white; }}
|
||||
.insight {{ background-color: #f9f9f9; padding: 15px; margin: 10px 0; border-left: 4px solid #4CAF50; }}
|
||||
.metric-section {{ margin: 20px 0; }}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>{report_data['title']}</h1>
|
||||
<p><strong>Generated:</strong> {report_data['generated_at']}</p>
|
||||
<p><strong>Time Range:</strong> {report_data['time_range']['start']} to {report_data['time_range']['end']}</p>
|
||||
"""
|
||||
|
||||
# Add performance section
|
||||
if report_data.get('performance'):
|
||||
html += "<h2>Performance Analysis</h2>\n"
|
||||
for workflow_id, perf in report_data['performance'].items():
|
||||
html += f"<div class='metric-section'>\n"
|
||||
html += f"<h3>Workflow: {workflow_id}</h3>\n"
|
||||
html += f"<p>Average Duration: {perf.get('avg_duration', 0):.2f}s</p>\n"
|
||||
html += f"<p>Success Rate: {perf.get('success_rate', 0):.1f}%</p>\n"
|
||||
html += "</div>\n"
|
||||
|
||||
# Add insights section
|
||||
if report_data.get('insights'):
|
||||
html += "<h2>Insights</h2>\n"
|
||||
for insight in report_data['insights']:
|
||||
html += f"<div class='insight'>\n"
|
||||
html += f"<strong>{insight.get('title', 'Insight')}</strong>\n"
|
||||
html += f"<p>{insight.get('description', '')}</p>\n"
|
||||
html += "</div>\n"
|
||||
|
||||
html += "</body>\n</html>"
|
||||
return html
|
||||
|
||||
def export_pdf(
|
||||
self,
|
||||
report_data: Dict[str, Any],
|
||||
filename: Optional[str] = None
|
||||
) -> str:
|
||||
"""
|
||||
Export report as PDF.
|
||||
|
||||
Note: Requires reportlab library. Falls back to HTML if not available.
|
||||
|
||||
Args:
|
||||
report_data: Report data
|
||||
filename: Output filename (auto-generated if None)
|
||||
|
||||
Returns:
|
||||
Path to exported file
|
||||
"""
|
||||
try:
|
||||
from reportlab.lib.pagesizes import letter
|
||||
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table
|
||||
from reportlab.lib.styles import getSampleStyleSheet
|
||||
from reportlab.lib.units import inch
|
||||
|
||||
if filename is None:
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
filename = f"report_{timestamp}.pdf"
|
||||
|
||||
filepath = self.output_dir / filename
|
||||
|
||||
# Create PDF
|
||||
doc = SimpleDocTemplate(str(filepath), pagesize=letter)
|
||||
styles = getSampleStyleSheet()
|
||||
story = []
|
||||
|
||||
# Title
|
||||
title = Paragraph(report_data['title'], styles['Title'])
|
||||
story.append(title)
|
||||
story.append(Spacer(1, 0.2*inch))
|
||||
|
||||
# Metadata
|
||||
meta = Paragraph(f"Generated: {report_data['generated_at']}", styles['Normal'])
|
||||
story.append(meta)
|
||||
story.append(Spacer(1, 0.3*inch))
|
||||
|
||||
# Performance section
|
||||
if report_data.get('performance'):
|
||||
heading = Paragraph("Performance Analysis", styles['Heading2'])
|
||||
story.append(heading)
|
||||
story.append(Spacer(1, 0.1*inch))
|
||||
|
||||
for workflow_id, perf in report_data['performance'].items():
|
||||
text = f"<b>Workflow:</b> {workflow_id}<br/>"
|
||||
text += f"Average Duration: {perf.get('avg_duration', 0):.2f}s<br/>"
|
||||
text += f"Success Rate: {perf.get('success_rate', 0):.1f}%"
|
||||
para = Paragraph(text, styles['Normal'])
|
||||
story.append(para)
|
||||
story.append(Spacer(1, 0.2*inch))
|
||||
|
||||
# Build PDF
|
||||
doc.build(story)
|
||||
|
||||
logger.info(f"Exported PDF report: {filepath}")
|
||||
return str(filepath)
|
||||
|
||||
except ImportError:
|
||||
logger.warning("reportlab not available, falling back to HTML")
|
||||
return self.export_html(report_data, filename.replace('.pdf', '.html') if filename else None)
|
||||
|
||||
def schedule_report(
|
||||
self,
|
||||
report: ScheduledReport
|
||||
) -> None:
|
||||
"""
|
||||
Schedule a report for automatic generation.
|
||||
|
||||
Args:
|
||||
report: Scheduled report configuration
|
||||
"""
|
||||
self.scheduled_reports[report.report_id] = report
|
||||
logger.info(f"Scheduled report: {report.report_id}")
|
||||
|
||||
def get_scheduled_reports(self) -> List[ScheduledReport]:
|
||||
"""Get all scheduled reports."""
|
||||
return list(self.scheduled_reports.values())
|
||||
|
||||
def run_scheduled_report(self, report_id: str) -> Optional[str]:
|
||||
"""
|
||||
Run a scheduled report.
|
||||
|
||||
Args:
|
||||
report_id: Report identifier
|
||||
|
||||
Returns:
|
||||
Path to generated report or None
|
||||
"""
|
||||
report = self.scheduled_reports.get(report_id)
|
||||
if not report or not report.enabled:
|
||||
return None
|
||||
|
||||
# Generate report
|
||||
report_data = self.generate_report(report.config)
|
||||
|
||||
# Export based on format
|
||||
if report.config.format == 'json':
|
||||
filepath = self.export_json(report_data)
|
||||
elif report.config.format == 'csv':
|
||||
filepath = self.export_csv(report_data)
|
||||
elif report.config.format == 'html':
|
||||
filepath = self.export_html(report_data)
|
||||
elif report.config.format == 'pdf':
|
||||
filepath = self.export_pdf(report_data)
|
||||
else:
|
||||
filepath = self.export_json(report_data)
|
||||
|
||||
# Update last run
|
||||
report.last_run = datetime.now()
|
||||
|
||||
# Deliver report
|
||||
self._deliver_report(report, filepath)
|
||||
|
||||
return filepath
|
||||
|
||||
def _deliver_report(
|
||||
self,
|
||||
report: ScheduledReport,
|
||||
filepath: str
|
||||
) -> None:
|
||||
"""Deliver report via configured method."""
|
||||
if report.delivery_method == 'file':
|
||||
# Already saved to file
|
||||
logger.info(f"Report saved to: {filepath}")
|
||||
|
||||
elif report.delivery_method == 'email':
|
||||
# TODO: Implement email delivery
|
||||
logger.info(f"Email delivery not yet implemented: {filepath}")
|
||||
|
||||
elif report.delivery_method == 'webhook':
|
||||
# TODO: Implement webhook delivery
|
||||
logger.info(f"Webhook delivery not yet implemented: {filepath}")
|
||||
9
core/analytics/storage/__init__.py
Normal file
9
core/analytics/storage/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
||||
"""Storage components for analytics data."""
|
||||
|
||||
from .timeseries_store import TimeSeriesStore
|
||||
from .archive_storage import ArchiveStorage
|
||||
|
||||
__all__ = [
|
||||
'TimeSeriesStore',
|
||||
'ArchiveStorage',
|
||||
]
|
||||
393
core/analytics/storage/archive_storage.py
Normal file
393
core/analytics/storage/archive_storage.py
Normal file
@@ -0,0 +1,393 @@
|
||||
"""Archive storage for old metrics with compression."""
|
||||
|
||||
import logging
|
||||
import gzip
|
||||
import json
|
||||
import os
|
||||
from typing import Dict, List, Optional, Any
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class RetentionPolicy:
|
||||
"""Retention policy configuration."""
|
||||
metric_type: str
|
||||
hot_retention_days: int # Keep in main storage
|
||||
archive_retention_days: int # Keep in archive
|
||||
compression_enabled: bool = True
|
||||
|
||||
def to_dict(self) -> Dict:
|
||||
"""Convert to dictionary."""
|
||||
return {
|
||||
'metric_type': self.metric_type,
|
||||
'hot_retention_days': self.hot_retention_days,
|
||||
'archive_retention_days': self.archive_retention_days,
|
||||
'compression_enabled': self.compression_enabled
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict) -> 'RetentionPolicy':
|
||||
"""Create from dictionary."""
|
||||
return cls(**data)
|
||||
|
||||
|
||||
class ArchiveStorage:
|
||||
"""Archive storage for old metrics."""
|
||||
|
||||
def __init__(self, archive_dir: str = "data/analytics/archive"):
|
||||
"""
|
||||
Initialize archive storage.
|
||||
|
||||
Args:
|
||||
archive_dir: Directory for archived data
|
||||
"""
|
||||
self.archive_dir = Path(archive_dir)
|
||||
self.archive_dir.mkdir(parents=True, exist_ok=True)
|
||||
logger.info(f"ArchiveStorage initialized: {archive_dir}")
|
||||
|
||||
def archive_metrics(
|
||||
self,
|
||||
metrics: List[Dict[str, Any]],
|
||||
metric_type: str,
|
||||
archive_date: datetime,
|
||||
compress: bool = True
|
||||
) -> str:
|
||||
"""
|
||||
Archive metrics to compressed storage.
|
||||
|
||||
Args:
|
||||
metrics: List of metrics to archive
|
||||
metric_type: Type of metrics
|
||||
archive_date: Date for archive file
|
||||
compress: Whether to compress
|
||||
|
||||
Returns:
|
||||
Path to archive file
|
||||
"""
|
||||
# Create archive filename
|
||||
date_str = archive_date.strftime('%Y%m%d')
|
||||
filename = f"{metric_type}_{date_str}.json"
|
||||
if compress:
|
||||
filename += ".gz"
|
||||
|
||||
filepath = self.archive_dir / filename
|
||||
|
||||
# Serialize metrics
|
||||
data = {
|
||||
'metric_type': metric_type,
|
||||
'archive_date': archive_date.isoformat(),
|
||||
'count': len(metrics),
|
||||
'metrics': metrics
|
||||
}
|
||||
json_data = json.dumps(data, indent=2)
|
||||
|
||||
# Write to file (compressed or not)
|
||||
if compress:
|
||||
with gzip.open(filepath, 'wt', encoding='utf-8') as f:
|
||||
f.write(json_data)
|
||||
else:
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
f.write(json_data)
|
||||
|
||||
logger.info(f"Archived {len(metrics)} {metric_type} metrics to {filepath}")
|
||||
return str(filepath)
|
||||
|
||||
def query_archive(
|
||||
self,
|
||||
metric_type: str,
|
||||
start_date: datetime,
|
||||
end_date: datetime,
|
||||
filters: Optional[Dict[str, Any]] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Query archived metrics.
|
||||
|
||||
Args:
|
||||
metric_type: Type of metrics
|
||||
start_date: Start date
|
||||
end_date: End date
|
||||
filters: Optional filters
|
||||
|
||||
Returns:
|
||||
List of matching metrics
|
||||
"""
|
||||
results = []
|
||||
|
||||
# Iterate through date range
|
||||
current_date = start_date
|
||||
while current_date <= end_date:
|
||||
date_str = current_date.strftime('%Y%m%d')
|
||||
|
||||
# Try both compressed and uncompressed
|
||||
for ext in ['.json.gz', '.json']:
|
||||
filename = f"{metric_type}_{date_str}{ext}"
|
||||
filepath = self.archive_dir / filename
|
||||
|
||||
if filepath.exists():
|
||||
metrics = self._read_archive_file(filepath)
|
||||
|
||||
# Apply filters
|
||||
if filters:
|
||||
metrics = self._apply_filters(metrics, filters)
|
||||
|
||||
results.extend(metrics)
|
||||
break
|
||||
|
||||
current_date += timedelta(days=1)
|
||||
|
||||
logger.debug(f"Query returned {len(results)} archived metrics")
|
||||
return results
|
||||
|
||||
def _read_archive_file(self, filepath: Path) -> List[Dict[str, Any]]:
|
||||
"""Read archive file (compressed or not)."""
|
||||
try:
|
||||
if filepath.suffix == '.gz':
|
||||
with gzip.open(filepath, 'rt', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
else:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
return data.get('metrics', [])
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading archive {filepath}: {e}")
|
||||
return []
|
||||
|
||||
def _apply_filters(
|
||||
self,
|
||||
metrics: List[Dict[str, Any]],
|
||||
filters: Dict[str, Any]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Apply filters to metrics."""
|
||||
filtered = []
|
||||
for metric in metrics:
|
||||
match = True
|
||||
for key, value in filters.items():
|
||||
if metric.get(key) != value:
|
||||
match = False
|
||||
break
|
||||
if match:
|
||||
filtered.append(metric)
|
||||
return filtered
|
||||
|
||||
def delete_archive(
|
||||
self,
|
||||
metric_type: str,
|
||||
before_date: datetime
|
||||
) -> int:
|
||||
"""
|
||||
Delete archived data before a date.
|
||||
|
||||
Args:
|
||||
metric_type: Type of metrics
|
||||
before_date: Delete archives before this date
|
||||
|
||||
Returns:
|
||||
Number of files deleted
|
||||
"""
|
||||
deleted = 0
|
||||
|
||||
# Find matching archive files
|
||||
pattern = f"{metric_type}_*.json*"
|
||||
for filepath in self.archive_dir.glob(pattern):
|
||||
# Extract date from filename
|
||||
try:
|
||||
date_str = filepath.stem.split('_')[1]
|
||||
if filepath.suffix == '.gz':
|
||||
date_str = date_str.replace('.json', '')
|
||||
|
||||
file_date = datetime.strptime(date_str, '%Y%m%d')
|
||||
|
||||
if file_date < before_date:
|
||||
filepath.unlink()
|
||||
deleted += 1
|
||||
logger.info(f"Deleted archive: {filepath}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing {filepath}: {e}")
|
||||
|
||||
return deleted
|
||||
|
||||
def get_archive_stats(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get archive storage statistics.
|
||||
|
||||
Returns:
|
||||
Dictionary with archive stats
|
||||
"""
|
||||
stats = {
|
||||
'total_files': 0,
|
||||
'total_size_bytes': 0,
|
||||
'by_metric_type': {},
|
||||
'oldest_archive': None,
|
||||
'newest_archive': None
|
||||
}
|
||||
|
||||
for filepath in self.archive_dir.glob('*.json*'):
|
||||
stats['total_files'] += 1
|
||||
stats['total_size_bytes'] += filepath.stat().st_size
|
||||
|
||||
# Extract metric type
|
||||
metric_type = filepath.stem.split('_')[0]
|
||||
if metric_type not in stats['by_metric_type']:
|
||||
stats['by_metric_type'][metric_type] = {
|
||||
'count': 0,
|
||||
'size_bytes': 0
|
||||
}
|
||||
|
||||
stats['by_metric_type'][metric_type]['count'] += 1
|
||||
stats['by_metric_type'][metric_type]['size_bytes'] += filepath.stat().st_size
|
||||
|
||||
# Track oldest/newest
|
||||
mtime = datetime.fromtimestamp(filepath.stat().st_mtime)
|
||||
if stats['oldest_archive'] is None or mtime < stats['oldest_archive']:
|
||||
stats['oldest_archive'] = mtime
|
||||
if stats['newest_archive'] is None or mtime > stats['newest_archive']:
|
||||
stats['newest_archive'] = mtime
|
||||
|
||||
# Convert to ISO format
|
||||
if stats['oldest_archive']:
|
||||
stats['oldest_archive'] = stats['oldest_archive'].isoformat()
|
||||
if stats['newest_archive']:
|
||||
stats['newest_archive'] = stats['newest_archive'].isoformat()
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
class RetentionPolicyEngine:
|
||||
"""Engine for applying retention policies."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
archive_storage: ArchiveStorage,
|
||||
policies: Optional[List[RetentionPolicy]] = None
|
||||
):
|
||||
"""
|
||||
Initialize retention policy engine.
|
||||
|
||||
Args:
|
||||
archive_storage: Archive storage instance
|
||||
policies: List of retention policies
|
||||
"""
|
||||
self.archive = archive_storage
|
||||
self.policies = policies or self._default_policies()
|
||||
self.policy_file = Path("data/analytics/retention_policies.json")
|
||||
self._load_policies()
|
||||
logger.info("RetentionPolicyEngine initialized")
|
||||
|
||||
def _default_policies(self) -> List[RetentionPolicy]:
|
||||
"""Get default retention policies."""
|
||||
return [
|
||||
RetentionPolicy(
|
||||
metric_type='execution',
|
||||
hot_retention_days=30,
|
||||
archive_retention_days=365
|
||||
),
|
||||
RetentionPolicy(
|
||||
metric_type='step',
|
||||
hot_retention_days=7,
|
||||
archive_retention_days=90
|
||||
),
|
||||
RetentionPolicy(
|
||||
metric_type='resource',
|
||||
hot_retention_days=7,
|
||||
archive_retention_days=30
|
||||
)
|
||||
]
|
||||
|
||||
def _load_policies(self) -> None:
|
||||
"""Load policies from file."""
|
||||
if self.policy_file.exists():
|
||||
try:
|
||||
with open(self.policy_file, 'r') as f:
|
||||
data = json.load(f)
|
||||
self.policies = [RetentionPolicy.from_dict(p) for p in data]
|
||||
logger.info(f"Loaded {len(self.policies)} retention policies")
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading policies: {e}")
|
||||
|
||||
def save_policies(self) -> None:
|
||||
"""Save policies to file."""
|
||||
self.policy_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(self.policy_file, 'w') as f:
|
||||
json.dump([p.to_dict() for p in self.policies], f, indent=2)
|
||||
logger.info("Retention policies saved")
|
||||
|
||||
def add_policy(self, policy: RetentionPolicy) -> None:
|
||||
"""Add or update a retention policy."""
|
||||
# Remove existing policy for same metric type
|
||||
self.policies = [p for p in self.policies if p.metric_type != policy.metric_type]
|
||||
self.policies.append(policy)
|
||||
self.save_policies()
|
||||
logger.info(f"Added policy for {policy.metric_type}")
|
||||
|
||||
def get_policy(self, metric_type: str) -> Optional[RetentionPolicy]:
|
||||
"""Get policy for a metric type."""
|
||||
for policy in self.policies:
|
||||
if policy.metric_type == metric_type:
|
||||
return policy
|
||||
return None
|
||||
|
||||
def apply_policies(
|
||||
self,
|
||||
store, # TimeSeriesStore
|
||||
dry_run: bool = False
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Apply retention policies to storage.
|
||||
|
||||
Args:
|
||||
store: TimeSeriesStore instance
|
||||
dry_run: If True, don't actually delete data
|
||||
|
||||
Returns:
|
||||
Dictionary with application results
|
||||
"""
|
||||
results = {
|
||||
'archived': {},
|
||||
'deleted': {},
|
||||
'errors': []
|
||||
}
|
||||
|
||||
now = datetime.now()
|
||||
|
||||
for policy in self.policies:
|
||||
try:
|
||||
# Archive old hot data
|
||||
hot_cutoff = now - timedelta(days=policy.hot_retention_days)
|
||||
metrics_to_archive = store.query_range(
|
||||
metric_type=policy.metric_type,
|
||||
start_time=datetime.min,
|
||||
end_time=hot_cutoff
|
||||
)
|
||||
|
||||
if metrics_to_archive and not dry_run:
|
||||
archive_path = self.archive.archive_metrics(
|
||||
metrics=metrics_to_archive,
|
||||
metric_type=policy.metric_type,
|
||||
archive_date=hot_cutoff,
|
||||
compress=policy.compression_enabled
|
||||
)
|
||||
results['archived'][policy.metric_type] = {
|
||||
'count': len(metrics_to_archive),
|
||||
'path': archive_path
|
||||
}
|
||||
|
||||
# Delete old archived data
|
||||
archive_cutoff = now - timedelta(days=policy.archive_retention_days)
|
||||
if not dry_run:
|
||||
deleted_count = self.archive.delete_archive(
|
||||
metric_type=policy.metric_type,
|
||||
before_date=archive_cutoff
|
||||
)
|
||||
results['deleted'][policy.metric_type] = deleted_count
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error applying policy for {policy.metric_type}: {e}"
|
||||
logger.error(error_msg)
|
||||
results['errors'].append(error_msg)
|
||||
|
||||
return results
|
||||
374
core/analytics/storage/timeseries_store.py
Normal file
374
core/analytics/storage/timeseries_store.py
Normal file
@@ -0,0 +1,374 @@
|
||||
"""Time-series storage for analytics metrics."""
|
||||
|
||||
import sqlite3
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
from datetime import datetime
|
||||
from contextlib import contextmanager
|
||||
|
||||
from ..collection.metrics_collector import ExecutionMetrics, StepMetrics
|
||||
from ..collection.resource_collector import ResourceMetrics
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TimeSeriesStore:
|
||||
"""Store for time-series metrics data using SQLite."""
|
||||
|
||||
# Database schema
|
||||
SCHEMA = """
|
||||
-- Execution metrics table
|
||||
CREATE TABLE IF NOT EXISTS execution_metrics (
|
||||
execution_id TEXT PRIMARY KEY,
|
||||
workflow_id TEXT NOT NULL,
|
||||
started_at TIMESTAMP NOT NULL,
|
||||
completed_at TIMESTAMP,
|
||||
duration_ms REAL,
|
||||
status TEXT NOT NULL,
|
||||
steps_total INTEGER DEFAULT 0,
|
||||
steps_completed INTEGER DEFAULT 0,
|
||||
steps_failed INTEGER DEFAULT 0,
|
||||
error_message TEXT,
|
||||
context JSON
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_workflow_time
|
||||
ON execution_metrics(workflow_id, started_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_status
|
||||
ON execution_metrics(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_started_at
|
||||
ON execution_metrics(started_at);
|
||||
|
||||
-- Step metrics table
|
||||
CREATE TABLE IF NOT EXISTS step_metrics (
|
||||
step_id TEXT PRIMARY KEY,
|
||||
execution_id TEXT NOT NULL,
|
||||
workflow_id TEXT NOT NULL,
|
||||
node_id TEXT NOT NULL,
|
||||
action_type TEXT NOT NULL,
|
||||
target_element TEXT,
|
||||
started_at TIMESTAMP NOT NULL,
|
||||
completed_at TIMESTAMP NOT NULL,
|
||||
duration_ms REAL NOT NULL,
|
||||
status TEXT NOT NULL,
|
||||
confidence_score REAL,
|
||||
retry_count INTEGER DEFAULT 0,
|
||||
error_details TEXT,
|
||||
FOREIGN KEY (execution_id) REFERENCES execution_metrics(execution_id)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_execution
|
||||
ON step_metrics(execution_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_workflow_action
|
||||
ON step_metrics(workflow_id, action_type);
|
||||
CREATE INDEX IF NOT EXISTS idx_step_time
|
||||
ON step_metrics(started_at);
|
||||
|
||||
-- Resource metrics table
|
||||
CREATE TABLE IF NOT EXISTS resource_metrics (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
timestamp TIMESTAMP NOT NULL,
|
||||
workflow_id TEXT,
|
||||
execution_id TEXT,
|
||||
cpu_percent REAL NOT NULL,
|
||||
memory_mb REAL NOT NULL,
|
||||
gpu_utilization REAL DEFAULT 0.0,
|
||||
gpu_memory_mb REAL DEFAULT 0.0,
|
||||
disk_io_mb REAL DEFAULT 0.0
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_resource_time
|
||||
ON resource_metrics(timestamp);
|
||||
CREATE INDEX IF NOT EXISTS idx_resource_workflow
|
||||
ON resource_metrics(workflow_id, timestamp);
|
||||
"""
|
||||
|
||||
def __init__(self, storage_path: Path):
|
||||
"""
|
||||
Initialize time-series store.
|
||||
|
||||
Args:
|
||||
storage_path: Path to storage directory
|
||||
"""
|
||||
self.storage_path = Path(storage_path)
|
||||
self.storage_path.mkdir(parents=True, exist_ok=True)
|
||||
self.db_path = self.storage_path / 'timeseries.db'
|
||||
|
||||
# Initialize database
|
||||
self._init_database()
|
||||
|
||||
logger.info(f"TimeSeriesStore initialized at {self.db_path}")
|
||||
|
||||
def _init_database(self) -> None:
|
||||
"""Initialize database schema."""
|
||||
with self._get_connection() as conn:
|
||||
conn.executescript(self.SCHEMA)
|
||||
conn.commit()
|
||||
|
||||
@contextmanager
|
||||
def _get_connection(self):
|
||||
"""Get database connection context manager."""
|
||||
conn = sqlite3.connect(str(self.db_path))
|
||||
conn.row_factory = sqlite3.Row
|
||||
try:
|
||||
yield conn
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def write_metrics(
|
||||
self,
|
||||
metrics: List[Any] # Union[ExecutionMetrics, StepMetrics, ResourceMetrics]
|
||||
) -> None:
|
||||
"""
|
||||
Write metrics to time-series storage.
|
||||
|
||||
Args:
|
||||
metrics: List of metrics to write
|
||||
"""
|
||||
if not metrics:
|
||||
return
|
||||
|
||||
with self._get_connection() as conn:
|
||||
for metric in metrics:
|
||||
if isinstance(metric, ExecutionMetrics):
|
||||
self._write_execution_metric(conn, metric)
|
||||
elif isinstance(metric, StepMetrics):
|
||||
self._write_step_metric(conn, metric)
|
||||
elif isinstance(metric, ResourceMetrics):
|
||||
self._write_resource_metric(conn, metric)
|
||||
conn.commit()
|
||||
|
||||
logger.debug(f"Wrote {len(metrics)} metrics to storage")
|
||||
|
||||
def _write_execution_metric(self, conn: sqlite3.Connection, metric: ExecutionMetrics) -> None:
|
||||
"""Write execution metric."""
|
||||
conn.execute("""
|
||||
INSERT OR REPLACE INTO execution_metrics
|
||||
(execution_id, workflow_id, started_at, completed_at, duration_ms,
|
||||
status, steps_total, steps_completed, steps_failed, error_message, context)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
metric.execution_id,
|
||||
metric.workflow_id,
|
||||
metric.started_at.isoformat(),
|
||||
metric.completed_at.isoformat() if metric.completed_at else None,
|
||||
metric.duration_ms,
|
||||
metric.status,
|
||||
metric.steps_total,
|
||||
metric.steps_completed,
|
||||
metric.steps_failed,
|
||||
metric.error_message,
|
||||
json.dumps(metric.context)
|
||||
))
|
||||
|
||||
def _write_step_metric(self, conn: sqlite3.Connection, metric: StepMetrics) -> None:
|
||||
"""Write step metric."""
|
||||
conn.execute("""
|
||||
INSERT OR REPLACE INTO step_metrics
|
||||
(step_id, execution_id, workflow_id, node_id, action_type, target_element,
|
||||
started_at, completed_at, duration_ms, status, confidence_score,
|
||||
retry_count, error_details)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
metric.step_id,
|
||||
metric.execution_id,
|
||||
metric.workflow_id,
|
||||
metric.node_id,
|
||||
metric.action_type,
|
||||
metric.target_element,
|
||||
metric.started_at.isoformat(),
|
||||
metric.completed_at.isoformat(),
|
||||
metric.duration_ms,
|
||||
metric.status,
|
||||
metric.confidence_score,
|
||||
metric.retry_count,
|
||||
metric.error_details
|
||||
))
|
||||
|
||||
def _write_resource_metric(self, conn: sqlite3.Connection, metric: ResourceMetrics) -> None:
|
||||
"""Write resource metric."""
|
||||
conn.execute("""
|
||||
INSERT INTO resource_metrics
|
||||
(timestamp, workflow_id, execution_id, cpu_percent, memory_mb,
|
||||
gpu_utilization, gpu_memory_mb, disk_io_mb)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
metric.timestamp.isoformat(),
|
||||
metric.workflow_id,
|
||||
metric.execution_id,
|
||||
metric.cpu_percent,
|
||||
metric.memory_mb,
|
||||
metric.gpu_utilization,
|
||||
metric.gpu_memory_mb,
|
||||
metric.disk_io_mb
|
||||
))
|
||||
|
||||
def query_range(
|
||||
self,
|
||||
start_time: datetime,
|
||||
end_time: datetime,
|
||||
workflow_id: Optional[str] = None,
|
||||
metric_types: Optional[List[str]] = None
|
||||
) -> Dict[str, List[Dict]]:
|
||||
"""
|
||||
Query metrics within a time range.
|
||||
|
||||
Args:
|
||||
start_time: Start of time range
|
||||
end_time: End of time range
|
||||
workflow_id: Optional workflow ID filter
|
||||
metric_types: Optional list of metric types ('execution', 'step', 'resource')
|
||||
|
||||
Returns:
|
||||
Dictionary with metric type as key and list of metrics as value
|
||||
"""
|
||||
results = {}
|
||||
metric_types = metric_types or ['execution', 'step', 'resource']
|
||||
|
||||
with self._get_connection() as conn:
|
||||
if 'execution' in metric_types:
|
||||
results['execution'] = self._query_execution_metrics(
|
||||
conn, start_time, end_time, workflow_id
|
||||
)
|
||||
|
||||
if 'step' in metric_types:
|
||||
results['step'] = self._query_step_metrics(
|
||||
conn, start_time, end_time, workflow_id
|
||||
)
|
||||
|
||||
if 'resource' in metric_types:
|
||||
results['resource'] = self._query_resource_metrics(
|
||||
conn, start_time, end_time, workflow_id
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
def _query_execution_metrics(
|
||||
self,
|
||||
conn: sqlite3.Connection,
|
||||
start_time: datetime,
|
||||
end_time: datetime,
|
||||
workflow_id: Optional[str]
|
||||
) -> List[Dict]:
|
||||
"""Query execution metrics."""
|
||||
query = """
|
||||
SELECT * FROM execution_metrics
|
||||
WHERE started_at >= ? AND started_at <= ?
|
||||
"""
|
||||
params = [start_time.isoformat(), end_time.isoformat()]
|
||||
|
||||
if workflow_id:
|
||||
query += " AND workflow_id = ?"
|
||||
params.append(workflow_id)
|
||||
|
||||
query += " ORDER BY started_at"
|
||||
|
||||
cursor = conn.execute(query, params)
|
||||
return [dict(row) for row in cursor.fetchall()]
|
||||
|
||||
def _query_step_metrics(
|
||||
self,
|
||||
conn: sqlite3.Connection,
|
||||
start_time: datetime,
|
||||
end_time: datetime,
|
||||
workflow_id: Optional[str]
|
||||
) -> List[Dict]:
|
||||
"""Query step metrics."""
|
||||
query = """
|
||||
SELECT * FROM step_metrics
|
||||
WHERE started_at >= ? AND started_at <= ?
|
||||
"""
|
||||
params = [start_time.isoformat(), end_time.isoformat()]
|
||||
|
||||
if workflow_id:
|
||||
query += " AND workflow_id = ?"
|
||||
params.append(workflow_id)
|
||||
|
||||
query += " ORDER BY started_at"
|
||||
|
||||
cursor = conn.execute(query, params)
|
||||
return [dict(row) for row in cursor.fetchall()]
|
||||
|
||||
def _query_resource_metrics(
|
||||
self,
|
||||
conn: sqlite3.Connection,
|
||||
start_time: datetime,
|
||||
end_time: datetime,
|
||||
workflow_id: Optional[str]
|
||||
) -> List[Dict]:
|
||||
"""Query resource metrics."""
|
||||
query = """
|
||||
SELECT * FROM resource_metrics
|
||||
WHERE timestamp >= ? AND timestamp <= ?
|
||||
"""
|
||||
params = [start_time.isoformat(), end_time.isoformat()]
|
||||
|
||||
if workflow_id:
|
||||
query += " AND workflow_id = ?"
|
||||
params.append(workflow_id)
|
||||
|
||||
query += " ORDER BY timestamp"
|
||||
|
||||
cursor = conn.execute(query, params)
|
||||
return [dict(row) for row in cursor.fetchall()]
|
||||
|
||||
def aggregate(
|
||||
self,
|
||||
metric: str,
|
||||
aggregation: str, # 'avg', 'sum', 'count', 'min', 'max'
|
||||
group_by: List[str],
|
||||
start_time: datetime,
|
||||
end_time: datetime,
|
||||
filters: Optional[Dict] = None
|
||||
) -> List[Dict]:
|
||||
"""
|
||||
Aggregate metrics with grouping.
|
||||
|
||||
Args:
|
||||
metric: Metric field to aggregate
|
||||
aggregation: Aggregation function
|
||||
group_by: Fields to group by
|
||||
start_time: Start of time range
|
||||
end_time: End of time range
|
||||
filters: Optional filters
|
||||
|
||||
Returns:
|
||||
List of aggregated results
|
||||
"""
|
||||
# Determine table based on metric
|
||||
if metric in ['duration_ms', 'steps_total', 'steps_completed', 'steps_failed']:
|
||||
table = 'execution_metrics'
|
||||
time_field = 'started_at'
|
||||
elif metric in ['confidence_score', 'retry_count']:
|
||||
table = 'step_metrics'
|
||||
time_field = 'started_at'
|
||||
elif metric in ['cpu_percent', 'memory_mb', 'gpu_utilization']:
|
||||
table = 'resource_metrics'
|
||||
time_field = 'timestamp'
|
||||
else:
|
||||
raise ValueError(f"Unknown metric: {metric}")
|
||||
|
||||
# Build query
|
||||
agg_func = aggregation.upper()
|
||||
group_fields = ', '.join(group_by)
|
||||
|
||||
query = f"""
|
||||
SELECT {group_fields}, {agg_func}({metric}) as value
|
||||
FROM {table}
|
||||
WHERE {time_field} >= ? AND {time_field} <= ?
|
||||
"""
|
||||
params = [start_time.isoformat(), end_time.isoformat()]
|
||||
|
||||
# Add filters
|
||||
if filters:
|
||||
for key, value in filters.items():
|
||||
query += f" AND {key} = ?"
|
||||
params.append(value)
|
||||
|
||||
query += f" GROUP BY {group_fields}"
|
||||
|
||||
with self._get_connection() as conn:
|
||||
cursor = conn.execute(query, params)
|
||||
return [dict(row) for row in cursor.fetchall()]
|
||||
Reference in New Issue
Block a user