v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution

- Frontend v4 accessible sur réseau local (192.168.1.40)
- Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard)
- Ollama GPU fonctionnel
- Self-healing interactif
- Dashboard confiance

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Dom
2026-01-29 11:23:51 +01:00
parent 21bfa3b337
commit a27b74cf22
1595 changed files with 412691 additions and 400 deletions

View File

@@ -0,0 +1,14 @@
"""Analytics engine components."""
from .performance_analyzer import PerformanceAnalyzer, PerformanceStats
from .anomaly_detector import AnomalyDetector, Anomaly
from .insight_generator import InsightGenerator, Insight
__all__ = [
'PerformanceAnalyzer',
'PerformanceStats',
'AnomalyDetector',
'Anomaly',
'InsightGenerator',
'Insight',
]

View File

@@ -0,0 +1,311 @@
"""Anomaly detection for workflow execution."""
import logging
import statistics
from dataclasses import dataclass, field
from typing import List, Dict, Any, Optional
from datetime import datetime, timedelta
import hashlib
from ..storage.timeseries_store import TimeSeriesStore
logger = logging.getLogger(__name__)
@dataclass
class Anomaly:
"""Detected anomaly."""
anomaly_id: str
workflow_id: str
metric_name: str
detected_at: datetime
severity: float # 0.0 to 1.0
deviation: float
baseline_value: float
actual_value: float
description: str
recommended_action: Optional[str] = None
metadata: Dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary."""
return {
'anomaly_id': self.anomaly_id,
'workflow_id': self.workflow_id,
'metric_name': self.metric_name,
'detected_at': self.detected_at.isoformat(),
'severity': self.severity,
'deviation': self.deviation,
'baseline_value': self.baseline_value,
'actual_value': self.actual_value,
'description': self.description,
'recommended_action': self.recommended_action,
'metadata': self.metadata
}
class AnomalyDetector:
"""Detects anomalies in workflow execution using statistical methods."""
def __init__(
self,
time_series_store: TimeSeriesStore,
sensitivity: float = 2.0 # Standard deviations
):
"""
Initialize anomaly detector.
Args:
time_series_store: Time series storage
sensitivity: Number of standard deviations for anomaly threshold
"""
self.store = time_series_store
self.sensitivity = sensitivity
self.baselines: Dict[str, Dict] = {}
logger.info(f"AnomalyDetector initialized (sensitivity={sensitivity})")
def detect_anomalies(
self,
workflow_id: str,
metrics: List[Dict],
metric_name: str = 'duration_ms'
) -> List[Anomaly]:
"""
Detect anomalies in metrics.
Args:
workflow_id: Workflow identifier
metrics: List of metric dictionaries
metric_name: Name of metric to analyze
Returns:
List of detected anomalies
"""
if not metrics:
return []
# Get or create baseline
baseline = self._get_baseline(workflow_id, metric_name)
if not baseline:
# Not enough data for baseline
return []
anomalies = []
for metric in metrics:
value = metric.get(metric_name)
if value is None:
continue
# Calculate deviation from baseline
deviation = abs(value - baseline['mean']) / baseline['std_dev'] if baseline['std_dev'] > 0 else 0
# Check if anomaly
if deviation > self.sensitivity:
severity = min(deviation / (self.sensitivity * 2), 1.0)
anomaly = Anomaly(
anomaly_id=self._generate_anomaly_id(workflow_id, metric_name, metric),
workflow_id=workflow_id,
metric_name=metric_name,
detected_at=datetime.now(),
severity=severity,
deviation=deviation,
baseline_value=baseline['mean'],
actual_value=value,
description=self._generate_description(metric_name, value, baseline['mean'], deviation),
recommended_action=self._generate_recommendation(metric_name, value, baseline['mean']),
metadata=metric
)
anomalies.append(anomaly)
logger.info(f"Anomaly detected: {anomaly.description}")
return anomalies
def update_baseline(
self,
workflow_id: str,
stable_period_days: int = 7,
metric_name: str = 'duration_ms'
) -> None:
"""
Update baseline from stable period.
Args:
workflow_id: Workflow identifier
stable_period_days: Number of days for baseline calculation
metric_name: Metric to calculate baseline for
"""
end_time = datetime.now()
start_time = end_time - timedelta(days=stable_period_days)
# Query metrics
metrics = self.store.query_range(
start_time=start_time,
end_time=end_time,
workflow_id=workflow_id,
metric_types=['execution']
)
executions = metrics.get('execution', [])
if not executions:
logger.warning(f"No data for baseline calculation: {workflow_id}")
return
# Extract values
values = [e.get(metric_name) for e in executions if e.get(metric_name) is not None]
if len(values) < 10: # Minimum sample size
logger.warning(f"Insufficient data for baseline: {workflow_id} ({len(values)} samples)")
return
# Calculate baseline statistics
mean = statistics.mean(values)
std_dev = statistics.stdev(values) if len(values) > 1 else 0.0
median = statistics.median(values)
baseline_key = f"{workflow_id}:{metric_name}"
self.baselines[baseline_key] = {
'mean': mean,
'std_dev': std_dev,
'median': median,
'sample_size': len(values),
'updated_at': datetime.now(),
'period_days': stable_period_days
}
logger.info(f"Baseline updated for {workflow_id}: mean={mean:.2f}, std_dev={std_dev:.2f}")
def correlate_anomalies(
self,
anomalies: List[Anomaly],
time_window_minutes: int = 30
) -> List[List[Anomaly]]:
"""
Correlate related anomalies within a time window.
Args:
anomalies: List of anomalies to correlate
time_window_minutes: Time window for correlation
Returns:
List of correlated anomaly groups
"""
if not anomalies:
return []
# Sort by detection time
sorted_anomalies = sorted(anomalies, key=lambda a: a.detected_at)
groups = []
current_group = [sorted_anomalies[0]]
for anomaly in sorted_anomalies[1:]:
# Check if within time window of last anomaly in current group
time_diff = (anomaly.detected_at - current_group[-1].detected_at).total_seconds() / 60
if time_diff <= time_window_minutes:
current_group.append(anomaly)
else:
# Start new group
if len(current_group) > 1: # Only keep groups with multiple anomalies
groups.append(current_group)
current_group = [anomaly]
# Add last group if it has multiple anomalies
if len(current_group) > 1:
groups.append(current_group)
return groups
def escalate_anomaly(
self,
anomaly: Anomaly,
duration_minutes: int,
impact_score: float
) -> Dict[str, Any]:
"""
Escalate an anomaly based on duration and impact.
Args:
anomaly: Anomaly to escalate
duration_minutes: How long the anomaly has persisted
impact_score: Impact score (0.0 to 1.0)
Returns:
Escalation information
"""
# Calculate escalation level
escalation_score = (anomaly.severity + impact_score) / 2
escalation_score *= min(duration_minutes / 60, 2.0) # Cap at 2x for duration
if escalation_score > 0.8:
level = 'critical'
elif escalation_score > 0.5:
level = 'high'
elif escalation_score > 0.3:
level = 'medium'
else:
level = 'low'
return {
'anomaly_id': anomaly.anomaly_id,
'escalation_level': level,
'escalation_score': min(escalation_score, 1.0),
'duration_minutes': duration_minutes,
'impact_score': impact_score,
'requires_immediate_action': escalation_score > 0.8
}
def _get_baseline(self, workflow_id: str, metric_name: str) -> Optional[Dict]:
"""Get baseline for workflow and metric."""
baseline_key = f"{workflow_id}:{metric_name}"
if baseline_key not in self.baselines:
# Try to calculate baseline
self.update_baseline(workflow_id, metric_name=metric_name)
return self.baselines.get(baseline_key)
def _generate_anomaly_id(self, workflow_id: str, metric_name: str, metric: Dict) -> str:
"""Generate unique anomaly ID."""
data = f"{workflow_id}:{metric_name}:{metric.get('execution_id', '')}:{datetime.now().isoformat()}"
return hashlib.md5(data.encode()).hexdigest()[:16]
def _generate_description(
self,
metric_name: str,
actual_value: float,
baseline_value: float,
deviation: float
) -> str:
"""Generate human-readable anomaly description."""
percent_diff = abs((actual_value - baseline_value) / baseline_value * 100) if baseline_value > 0 else 0
direction = "higher" if actual_value > baseline_value else "lower"
return (
f"{metric_name} is {percent_diff:.1f}% {direction} than baseline "
f"({actual_value:.2f} vs {baseline_value:.2f}, {deviation:.1f} std devs)"
)
def _generate_recommendation(
self,
metric_name: str,
actual_value: float,
baseline_value: float
) -> str:
"""Generate recommended action for anomaly."""
if actual_value > baseline_value:
if metric_name == 'duration_ms':
return "Investigate performance degradation. Check for resource constraints or code changes."
elif metric_name == 'error_rate':
return "Investigate error spike. Check logs and recent deployments."
elif metric_name in ['cpu_percent', 'memory_mb']:
return "Investigate resource usage spike. Check for memory leaks or inefficient operations."
else:
if metric_name == 'success_rate':
return "Investigate success rate drop. Check for system issues or data quality problems."
return "Monitor the situation and investigate if anomaly persists."

View File

@@ -0,0 +1,301 @@
"""Automated insight generation for workflows."""
import logging
import hashlib
from dataclasses import dataclass, field
from typing import List, Dict, Any, Optional
from datetime import datetime, timedelta
from .performance_analyzer import PerformanceAnalyzer, PerformanceStats
from .anomaly_detector import AnomalyDetector, Anomaly
logger = logging.getLogger(__name__)
@dataclass
class Insight:
"""Generated insight with recommendation."""
insight_id: str
workflow_id: str
category: str # 'performance', 'reliability', 'resource', 'best_practice'
title: str
description: str
recommendation: str
expected_impact: str
ease_of_implementation: str # 'easy', 'medium', 'hard'
priority_score: float
supporting_data: Dict[str, Any]
created_at: datetime
implemented: bool = False
actual_impact: Optional[Dict] = None
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary."""
return {
'insight_id': self.insight_id,
'workflow_id': self.workflow_id,
'category': self.category,
'title': self.title,
'description': self.description,
'recommendation': self.recommendation,
'expected_impact': self.expected_impact,
'ease_of_implementation': self.ease_of_implementation,
'priority_score': self.priority_score,
'supporting_data': self.supporting_data,
'created_at': self.created_at.isoformat(),
'implemented': self.implemented,
'actual_impact': self.actual_impact
}
class InsightGenerator:
"""Generates automated insights and recommendations."""
def __init__(
self,
performance_analyzer: PerformanceAnalyzer,
anomaly_detector: AnomalyDetector
):
"""
Initialize insight generator.
Args:
performance_analyzer: Performance analyzer instance
anomaly_detector: Anomaly detector instance
"""
self.performance_analyzer = performance_analyzer
self.anomaly_detector = anomaly_detector
self._insight_implementations: Dict[str, Dict] = {}
logger.info("InsightGenerator initialized")
def generate_insights(
self,
workflow_id: str,
analysis_period_days: int = 30
) -> List[Insight]:
"""
Generate insights for a workflow.
Args:
workflow_id: Workflow identifier
analysis_period_days: Number of days to analyze
Returns:
List of generated insights
"""
insights = []
end_time = datetime.now()
start_time = end_time - timedelta(days=analysis_period_days)
# Analyze performance
perf_stats = self.performance_analyzer.analyze_workflow(
workflow_id,
start_time,
end_time
)
if perf_stats:
# Generate performance insights
insights.extend(self._generate_performance_insights(perf_stats))
# Generate bottleneck insights
insights.extend(self._generate_bottleneck_insights(perf_stats))
# Check for performance degradation
degradation = self.performance_analyzer.detect_performance_degradation(
workflow_id,
baseline_period=timedelta(days=7),
current_period=timedelta(days=1)
)
if degradation:
insights.append(self._generate_degradation_insight(degradation))
# Prioritize insights
insights = self.prioritize_insights(insights)
return insights
def prioritize_insights(self, insights: List[Insight]) -> List[Insight]:
"""
Prioritize insights by impact and ease.
Args:
insights: List of insights to prioritize
Returns:
Sorted list of insights
"""
# Calculate priority scores
for insight in insights:
impact_score = self._calculate_impact_score(insight.expected_impact)
ease_score = self._calculate_ease_score(insight.ease_of_implementation)
# Priority = Impact * Ease (higher is better)
insight.priority_score = impact_score * ease_score
# Sort by priority (descending)
return sorted(insights, key=lambda i: i.priority_score, reverse=True)
def track_insight_implementation(
self,
insight_id: str,
implemented: bool,
actual_impact: Optional[Dict] = None
) -> None:
"""
Track insight implementation and measure impact.
Args:
insight_id: Insight identifier
implemented: Whether insight was implemented
actual_impact: Measured impact after implementation
"""
self._insight_implementations[insight_id] = {
'implemented': implemented,
'actual_impact': actual_impact,
'tracked_at': datetime.now()
}
logger.info(f"Tracked implementation for insight {insight_id}")
def _generate_performance_insights(self, stats: PerformanceStats) -> List[Insight]:
"""Generate insights from performance statistics."""
insights = []
# High variability insight
if stats.std_dev_ms > stats.avg_duration_ms * 0.5:
insights.append(Insight(
insight_id=self._generate_id(stats.workflow_id, 'high_variability'),
workflow_id=stats.workflow_id,
category='performance',
title='High Performance Variability',
description=(
f"Execution time varies significantly (std dev: {stats.std_dev_ms:.0f}ms, "
f"avg: {stats.avg_duration_ms:.0f}ms). This indicates inconsistent performance."
),
recommendation=(
"Investigate causes of variability. Check for: "
"1) Resource contention, 2) Network latency, 3) Data size variations, "
"4) External service dependencies."
),
expected_impact="Reduce execution time variability by 30-50%",
ease_of_implementation='medium',
priority_score=0.0,
supporting_data={'stats': stats.to_dict()},
created_at=datetime.now()
))
# Slow p99 insight
if stats.p99_duration_ms > stats.median_duration_ms * 3:
insights.append(Insight(
insight_id=self._generate_id(stats.workflow_id, 'slow_p99'),
workflow_id=stats.workflow_id,
category='performance',
title='Slow 99th Percentile Performance',
description=(
f"99th percentile ({stats.p99_duration_ms:.0f}ms) is 3x slower than median "
f"({stats.median_duration_ms:.0f}ms). Some executions are significantly slower."
),
recommendation=(
"Analyze slowest executions to identify outliers. "
"Consider adding timeouts or optimizing worst-case scenarios."
),
expected_impact="Improve worst-case performance by 40-60%",
ease_of_implementation='medium',
priority_score=0.0,
supporting_data={'stats': stats.to_dict()},
created_at=datetime.now()
))
return insights
def _generate_bottleneck_insights(self, stats: PerformanceStats) -> List[Insight]:
"""Generate insights from bottleneck analysis."""
insights = []
if not stats.slowest_steps:
return insights
# Top bottleneck
top_bottleneck = stats.slowest_steps[0]
insights.append(Insight(
insight_id=self._generate_id(stats.workflow_id, 'top_bottleneck'),
workflow_id=stats.workflow_id,
category='performance',
title=f"Bottleneck: {top_bottleneck['action_type']} on {top_bottleneck['node_id']}",
description=(
f"Step '{top_bottleneck['action_type']}' takes {top_bottleneck['avg_duration_ms']:.0f}ms "
f"on average (p95: {top_bottleneck['p95_duration_ms']:.0f}ms). "
f"This is the slowest step in the workflow."
),
recommendation=(
f"Optimize the '{top_bottleneck['action_type']}' action. "
"Consider: 1) Caching results, 2) Parallel execution, "
"3) Reducing wait times, 4) Optimizing selectors."
),
expected_impact=f"Reduce overall workflow time by {(top_bottleneck['avg_duration_ms'] / stats.avg_duration_ms * 100 * 0.5):.0f}%",
ease_of_implementation='easy',
priority_score=0.0,
supporting_data={'bottleneck': top_bottleneck},
created_at=datetime.now()
))
return insights
def _generate_degradation_insight(self, degradation: Dict) -> Insight:
"""Generate insight from performance degradation."""
return Insight(
insight_id=self._generate_id(degradation['workflow_id'], 'degradation'),
workflow_id=degradation['workflow_id'],
category='performance',
title='Performance Degradation Detected',
description=(
f"Performance has degraded by {degradation['percent_change']:.1f}% "
f"(from {degradation['baseline_avg_ms']:.0f}ms to {degradation['current_avg_ms']:.0f}ms)."
),
recommendation=(
"Investigate recent changes: 1) Code deployments, 2) Data volume increases, "
"3) Infrastructure changes, 4) External service degradation."
),
expected_impact="Restore baseline performance",
ease_of_implementation='medium',
priority_score=0.0,
supporting_data=degradation,
created_at=datetime.now()
)
def _calculate_impact_score(self, expected_impact: str) -> float:
"""Calculate impact score from expected impact description."""
impact_lower = expected_impact.lower()
# Look for percentage improvements
if '50%' in impact_lower or '60%' in impact_lower:
return 1.0
elif '30%' in impact_lower or '40%' in impact_lower:
return 0.8
elif '20%' in impact_lower:
return 0.6
elif '10%' in impact_lower:
return 0.4
else:
return 0.5 # Default
def _calculate_ease_score(self, ease: str) -> float:
"""Calculate ease score from ease of implementation."""
if ease == 'easy':
return 1.0
elif ease == 'medium':
return 0.6
elif ease == 'hard':
return 0.3
else:
return 0.5
def _generate_id(self, workflow_id: str, insight_type: str) -> str:
"""Generate unique insight ID."""
data = f"{workflow_id}:{insight_type}:{datetime.now().date().isoformat()}"
return hashlib.md5(data.encode()).hexdigest()[:16]

View File

@@ -0,0 +1,359 @@
"""Performance analysis for workflows."""
import logging
import statistics
from dataclasses import dataclass
from typing import List, Dict, Any, Optional
from datetime import datetime, timedelta
from ..storage.timeseries_store import TimeSeriesStore
logger = logging.getLogger(__name__)
@dataclass
class PerformanceStats:
"""Performance statistics for a workflow."""
workflow_id: str
time_period: str
execution_count: int
avg_duration_ms: float
median_duration_ms: float
p95_duration_ms: float
p99_duration_ms: float
min_duration_ms: float
max_duration_ms: float
std_dev_ms: float
slowest_steps: List[Dict]
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary."""
return {
'workflow_id': self.workflow_id,
'time_period': self.time_period,
'execution_count': self.execution_count,
'avg_duration_ms': self.avg_duration_ms,
'median_duration_ms': self.median_duration_ms,
'p95_duration_ms': self.p95_duration_ms,
'p99_duration_ms': self.p99_duration_ms,
'min_duration_ms': self.min_duration_ms,
'max_duration_ms': self.max_duration_ms,
'std_dev_ms': self.std_dev_ms,
'slowest_steps': self.slowest_steps
}
class PerformanceAnalyzer:
"""Analyzes workflow performance metrics."""
def __init__(self, time_series_store: TimeSeriesStore):
"""
Initialize performance analyzer.
Args:
time_series_store: Time series storage for metrics
"""
self.store = time_series_store
logger.info("PerformanceAnalyzer initialized")
def analyze_workflow(
self,
workflow_id: str,
start_time: datetime,
end_time: datetime
) -> Optional[PerformanceStats]:
"""
Analyze performance for a workflow.
Args:
workflow_id: Workflow identifier
start_time: Start of analysis period
end_time: End of analysis period
Returns:
PerformanceStats or None if no data
"""
# Query execution metrics
metrics = self.store.query_range(
start_time=start_time,
end_time=end_time,
workflow_id=workflow_id,
metric_types=['execution']
)
executions = metrics.get('execution', [])
if not executions:
logger.warning(f"No execution data for workflow {workflow_id}")
return None
# Filter completed executions with duration
completed = [
e for e in executions
if e.get('status') == 'completed' and e.get('duration_ms') is not None
]
if not completed:
logger.warning(f"No completed executions for workflow {workflow_id}")
return None
# Extract durations
durations = [e['duration_ms'] for e in completed]
# Calculate statistics
avg_duration = statistics.mean(durations)
median_duration = statistics.median(durations)
min_duration = min(durations)
max_duration = max(durations)
std_dev = statistics.stdev(durations) if len(durations) > 1 else 0.0
# Calculate percentiles
sorted_durations = sorted(durations)
p95_duration = self._percentile(sorted_durations, 0.95)
p99_duration = self._percentile(sorted_durations, 0.99)
# Identify slowest steps
slowest_steps = self.identify_bottlenecks(
workflow_id,
start_time,
end_time,
threshold_percentile=0.95
)
time_period = f"{start_time.isoformat()} to {end_time.isoformat()}"
return PerformanceStats(
workflow_id=workflow_id,
time_period=time_period,
execution_count=len(completed),
avg_duration_ms=avg_duration,
median_duration_ms=median_duration,
p95_duration_ms=p95_duration,
p99_duration_ms=p99_duration,
min_duration_ms=min_duration,
max_duration_ms=max_duration,
std_dev_ms=std_dev,
slowest_steps=slowest_steps[:5] # Top 5 slowest
)
def identify_bottlenecks(
self,
workflow_id: str,
start_time: datetime,
end_time: datetime,
threshold_percentile: float = 0.95
) -> List[Dict]:
"""
Identify bottleneck steps in a workflow.
Args:
workflow_id: Workflow identifier
start_time: Start of analysis period
end_time: End of analysis period
threshold_percentile: Percentile threshold for bottlenecks
Returns:
List of bottleneck steps sorted by duration
"""
# Query step metrics
metrics = self.store.query_range(
start_time=start_time,
end_time=end_time,
workflow_id=workflow_id,
metric_types=['step']
)
steps = metrics.get('step', [])
if not steps:
return []
# Group by node_id and action_type
step_groups: Dict[tuple, List[float]] = {}
for step in steps:
key = (step['node_id'], step['action_type'])
if key not in step_groups:
step_groups[key] = []
step_groups[key].append(step['duration_ms'])
# Calculate statistics for each group
bottlenecks = []
for (node_id, action_type), durations in step_groups.items():
if not durations:
continue
avg_duration = statistics.mean(durations)
p95_duration = self._percentile(sorted(durations), threshold_percentile)
bottlenecks.append({
'node_id': node_id,
'action_type': action_type,
'avg_duration_ms': avg_duration,
'p95_duration_ms': p95_duration,
'execution_count': len(durations),
'max_duration_ms': max(durations)
})
# Sort by p95 duration (descending)
bottlenecks.sort(key=lambda x: x['p95_duration_ms'], reverse=True)
return bottlenecks
def detect_performance_degradation(
self,
workflow_id: str,
baseline_period: timedelta,
current_period: timedelta,
threshold_percent: float = 20.0
) -> Optional[Dict]:
"""
Detect performance degradation compared to baseline.
Args:
workflow_id: Workflow identifier
baseline_period: Duration of baseline period (e.g., last 7 days)
current_period: Duration of current period (e.g., last 24 hours)
threshold_percent: Threshold for degradation alert (%)
Returns:
Degradation info dict or None if no degradation
"""
now = datetime.now()
# Baseline period (older)
baseline_end = now - current_period
baseline_start = baseline_end - baseline_period
# Current period (recent)
current_start = now - current_period
current_end = now
# Analyze both periods
baseline_stats = self.analyze_workflow(
workflow_id,
baseline_start,
baseline_end
)
current_stats = self.analyze_workflow(
workflow_id,
current_start,
current_end
)
if not baseline_stats or not current_stats:
logger.warning(f"Insufficient data for degradation detection: {workflow_id}")
return None
# Calculate percentage change
baseline_avg = baseline_stats.avg_duration_ms
current_avg = current_stats.avg_duration_ms
if baseline_avg == 0:
return None
percent_change = ((current_avg - baseline_avg) / baseline_avg) * 100
# Check if degradation exceeds threshold
if percent_change > threshold_percent:
return {
'workflow_id': workflow_id,
'degradation_detected': True,
'baseline_avg_ms': baseline_avg,
'current_avg_ms': current_avg,
'percent_change': percent_change,
'threshold_percent': threshold_percent,
'baseline_period': str(baseline_period),
'current_period': str(current_period),
'severity': 'high' if percent_change > threshold_percent * 2 else 'medium'
}
return None
def compare_workflows(
self,
workflow_ids: List[str],
start_time: datetime,
end_time: datetime
) -> Dict[str, PerformanceStats]:
"""
Compare performance across multiple workflows.
Args:
workflow_ids: List of workflow identifiers
start_time: Start of analysis period
end_time: End of analysis period
Returns:
Dictionary mapping workflow_id to PerformanceStats
"""
results = {}
for workflow_id in workflow_ids:
stats = self.analyze_workflow(workflow_id, start_time, end_time)
if stats:
results[workflow_id] = stats
return results
def get_performance_trend(
self,
workflow_id: str,
start_time: datetime,
end_time: datetime,
bucket_size: timedelta = timedelta(hours=1)
) -> List[Dict]:
"""
Get performance trend over time with bucketing.
Args:
workflow_id: Workflow identifier
start_time: Start of analysis period
end_time: End of analysis period
bucket_size: Size of time buckets
Returns:
List of performance data points over time
"""
trend = []
current = start_time
while current < end_time:
bucket_end = min(current + bucket_size, end_time)
stats = self.analyze_workflow(workflow_id, current, bucket_end)
if stats:
trend.append({
'timestamp': current.isoformat(),
'avg_duration_ms': stats.avg_duration_ms,
'median_duration_ms': stats.median_duration_ms,
'execution_count': stats.execution_count
})
current = bucket_end
return trend
@staticmethod
def _percentile(sorted_data: List[float], percentile: float) -> float:
"""
Calculate percentile from sorted data.
Args:
sorted_data: Sorted list of values
percentile: Percentile to calculate (0.0 to 1.0)
Returns:
Percentile value
"""
if not sorted_data:
return 0.0
if len(sorted_data) == 1:
return sorted_data[0]
# Linear interpolation
index = percentile * (len(sorted_data) - 1)
lower = int(index)
upper = min(lower + 1, len(sorted_data) - 1)
weight = index - lower
return sorted_data[lower] * (1 - weight) + sorted_data[upper] * weight

View File

@@ -0,0 +1,334 @@
"""Success rate analytics for workflows."""
import logging
from typing import Dict, List, Optional, Tuple
from datetime import datetime, timedelta
from dataclasses import dataclass
from collections import defaultdict
from ..storage.timeseries_store import TimeSeriesStore
logger = logging.getLogger(__name__)
@dataclass
class SuccessRateStats:
"""Success rate statistics."""
workflow_id: str
total_executions: int
successful_executions: int
failed_executions: int
success_rate: float
failure_categories: Dict[str, int]
reliability_score: float
time_window_start: datetime
time_window_end: datetime
def to_dict(self) -> Dict:
"""Convert to dictionary."""
return {
'workflow_id': self.workflow_id,
'total_executions': self.total_executions,
'successful_executions': self.successful_executions,
'failed_executions': self.failed_executions,
'success_rate': self.success_rate,
'failure_categories': self.failure_categories,
'reliability_score': self.reliability_score,
'time_window_start': self.time_window_start.isoformat(),
'time_window_end': self.time_window_end.isoformat()
}
@dataclass
class ReliabilityRanking:
"""Workflow reliability ranking."""
workflow_id: str
reliability_score: float
success_rate: float
stability_score: float
total_executions: int
rank: int
def to_dict(self) -> Dict:
"""Convert to dictionary."""
return {
'workflow_id': self.workflow_id,
'reliability_score': self.reliability_score,
'success_rate': self.success_rate,
'stability_score': self.stability_score,
'total_executions': self.total_executions,
'rank': self.rank
}
class SuccessRateCalculator:
"""Calculate success rates and reliability metrics."""
def __init__(self, store: TimeSeriesStore):
"""
Initialize success rate calculator.
Args:
store: Time-series storage instance
"""
self.store = store
logger.info("SuccessRateCalculator initialized")
def calculate_success_rate(
self,
workflow_id: str,
time_window_hours: int = 24
) -> SuccessRateStats:
"""
Calculate success rate for a workflow.
Args:
workflow_id: Workflow identifier
time_window_hours: Time window in hours
Returns:
Success rate statistics
"""
end_time = datetime.now()
start_time = end_time - timedelta(hours=time_window_hours)
# Query execution metrics
metrics = self.store.query_range(
metric_type='execution',
start_time=start_time,
end_time=end_time,
filters={'workflow_id': workflow_id}
)
total = len(metrics)
successful = sum(1 for m in metrics if m.get('status') == 'success')
failed = total - successful
success_rate = (successful / total * 100) if total > 0 else 0.0
# Categorize failures
failure_categories = self._categorize_failures(
[m for m in metrics if m.get('status') != 'success']
)
# Calculate reliability score
reliability_score = self._calculate_reliability_score(
success_rate=success_rate,
total_executions=total,
failure_categories=failure_categories
)
return SuccessRateStats(
workflow_id=workflow_id,
total_executions=total,
successful_executions=successful,
failed_executions=failed,
success_rate=success_rate,
failure_categories=failure_categories,
reliability_score=reliability_score,
time_window_start=start_time,
time_window_end=end_time
)
def categorize_failures(
self,
workflow_id: str,
time_window_hours: int = 24
) -> Dict[str, int]:
"""
Categorize failures by type.
Args:
workflow_id: Workflow identifier
time_window_hours: Time window in hours
Returns:
Dictionary of failure categories and counts
"""
end_time = datetime.now()
start_time = end_time - timedelta(hours=time_window_hours)
# Query failed executions
metrics = self.store.query_range(
metric_type='execution',
start_time=start_time,
end_time=end_time,
filters={'workflow_id': workflow_id}
)
failed_metrics = [m for m in metrics if m.get('status') != 'success']
return self._categorize_failures(failed_metrics)
def _categorize_failures(self, failed_metrics: List[Dict]) -> Dict[str, int]:
"""
Categorize failures by error type.
Args:
failed_metrics: List of failed execution metrics
Returns:
Dictionary of categories and counts
"""
categories = defaultdict(int)
for metric in failed_metrics:
error_msg = metric.get('error_message', '').lower()
# Categorize by error type
if 'timeout' in error_msg:
categories['timeout'] += 1
elif 'not found' in error_msg or 'element' in error_msg:
categories['element_not_found'] += 1
elif 'permission' in error_msg or 'access' in error_msg:
categories['permission_error'] += 1
elif 'network' in error_msg or 'connection' in error_msg:
categories['network_error'] += 1
elif 'validation' in error_msg:
categories['validation_error'] += 1
else:
categories['other'] += 1
return dict(categories)
def rank_workflows_by_reliability(
self,
workflow_ids: Optional[List[str]] = None,
time_window_hours: int = 168 # 1 week
) -> List[ReliabilityRanking]:
"""
Rank workflows by reliability score.
Args:
workflow_ids: List of workflow IDs (None = all)
time_window_hours: Time window in hours
Returns:
List of reliability rankings sorted by score
"""
end_time = datetime.now()
start_time = end_time - timedelta(hours=time_window_hours)
# Get all workflows if not specified
if workflow_ids is None:
metrics = self.store.query_range(
metric_type='execution',
start_time=start_time,
end_time=end_time
)
workflow_ids = list(set(m.get('workflow_id') for m in metrics if m.get('workflow_id')))
# Calculate reliability for each workflow
rankings = []
for workflow_id in workflow_ids:
stats = self.calculate_success_rate(workflow_id, time_window_hours)
# Calculate stability score (consistency over time)
stability_score = self._calculate_stability_score(
workflow_id, start_time, end_time
)
rankings.append(ReliabilityRanking(
workflow_id=workflow_id,
reliability_score=stats.reliability_score,
success_rate=stats.success_rate,
stability_score=stability_score,
total_executions=stats.total_executions,
rank=0 # Will be set after sorting
))
# Sort by reliability score (descending)
rankings.sort(key=lambda r: r.reliability_score, reverse=True)
# Assign ranks
for i, ranking in enumerate(rankings, 1):
ranking.rank = i
return rankings
def _calculate_reliability_score(
self,
success_rate: float,
total_executions: int,
failure_categories: Dict[str, int]
) -> float:
"""
Calculate overall reliability score.
Args:
success_rate: Success rate percentage
total_executions: Total number of executions
failure_categories: Failure categories
Returns:
Reliability score (0-100)
"""
# Base score from success rate (70% weight)
base_score = success_rate * 0.7
# Execution volume bonus (up to 15% for 100+ executions)
volume_bonus = min(total_executions / 100 * 15, 15)
# Failure diversity penalty (up to -15% for many failure types)
num_failure_types = len(failure_categories)
diversity_penalty = min(num_failure_types * 3, 15)
# Calculate final score
reliability_score = base_score + volume_bonus - diversity_penalty
# Clamp to 0-100
return max(0.0, min(100.0, reliability_score))
def _calculate_stability_score(
self,
workflow_id: str,
start_time: datetime,
end_time: datetime
) -> float:
"""
Calculate stability score (consistency over time).
Args:
workflow_id: Workflow identifier
start_time: Start of time window
end_time: End of time window
Returns:
Stability score (0-100)
"""
# Split time window into buckets
num_buckets = 7 # Weekly buckets
bucket_duration = (end_time - start_time) / num_buckets
bucket_success_rates = []
for i in range(num_buckets):
bucket_start = start_time + (bucket_duration * i)
bucket_end = bucket_start + bucket_duration
metrics = self.store.query_range(
metric_type='execution',
start_time=bucket_start,
end_time=bucket_end,
filters={'workflow_id': workflow_id}
)
if metrics:
successful = sum(1 for m in metrics if m.get('status') == 'success')
success_rate = (successful / len(metrics)) * 100
bucket_success_rates.append(success_rate)
if not bucket_success_rates:
return 0.0
# Calculate coefficient of variation (lower = more stable)
import statistics
mean = statistics.mean(bucket_success_rates)
if mean == 0:
return 0.0
stdev = statistics.stdev(bucket_success_rates) if len(bucket_success_rates) > 1 else 0
cv = (stdev / mean) * 100
# Convert to stability score (lower CV = higher stability)
# CV of 0 = 100 stability, CV of 50+ = 0 stability
stability_score = max(0.0, 100.0 - (cv * 2))
return stability_score