v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution

- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-29 11:23:51 +01:00
parent 21bfa3b337
commit a27b74cf22
1595 changed files with 412691 additions and 400 deletions
--- a/core/analytics/engine/init.py
+++ b/core/analytics/engine/init.py
@@ -0,0 +1,14 @@
+"""Analytics engine components."""
+
+from .performance_analyzer import PerformanceAnalyzer, PerformanceStats
+from .anomaly_detector import AnomalyDetector, Anomaly
+from .insight_generator import InsightGenerator, Insight
+
+__all__ = [
+    'PerformanceAnalyzer',
+    'PerformanceStats',
+    'AnomalyDetector',
+    'Anomaly',
+    'InsightGenerator',
+    'Insight',
+]
--- a/core/analytics/engine/anomaly_detector.py
+++ b/core/analytics/engine/anomaly_detector.py
@@ -0,0 +1,311 @@
+"""Anomaly detection for workflow execution."""
+
+import logging
+import statistics
+from dataclasses import dataclass, field
+from typing import List, Dict, Any, Optional
+from datetime import datetime, timedelta
+import hashlib
+
+from ..storage.timeseries_store import TimeSeriesStore
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class Anomaly:
+    """Detected anomaly."""
+    anomaly_id: str
+    workflow_id: str
+    metric_name: str
+    detected_at: datetime
+    severity: float  # 0.0 to 1.0
+    deviation: float
+    baseline_value: float
+    actual_value: float
+    description: str
+    recommended_action: Optional[str] = None
+    metadata: Dict[str, Any] = field(default_factory=dict)
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary."""
+        return {
+            'anomaly_id': self.anomaly_id,
+            'workflow_id': self.workflow_id,
+            'metric_name': self.metric_name,
+            'detected_at': self.detected_at.isoformat(),
+            'severity': self.severity,
+            'deviation': self.deviation,
+            'baseline_value': self.baseline_value,
+            'actual_value': self.actual_value,
+            'description': self.description,
+            'recommended_action': self.recommended_action,
+            'metadata': self.metadata
+        }
+
+
+class AnomalyDetector:
+    """Detects anomalies in workflow execution using statistical methods."""
+    
+    def __init__(
+        self,
+        time_series_store: TimeSeriesStore,
+        sensitivity: float = 2.0  # Standard deviations
+    ):
+        """
+        Initialize anomaly detector.
+        
+        Args:
+            time_series_store: Time series storage
+            sensitivity: Number of standard deviations for anomaly threshold
+        """
+        self.store = time_series_store
+        self.sensitivity = sensitivity
+        self.baselines: Dict[str, Dict] = {}
+        
+        logger.info(f"AnomalyDetector initialized (sensitivity={sensitivity})")
+    
+    def detect_anomalies(
+        self,
+        workflow_id: str,
+        metrics: List[Dict],
+        metric_name: str = 'duration_ms'
+    ) -> List[Anomaly]:
+        """
+        Detect anomalies in metrics.
+        
+        Args:
+            workflow_id: Workflow identifier
+            metrics: List of metric dictionaries
+            metric_name: Name of metric to analyze
+        
+        Returns:
+            List of detected anomalies
+        """
+        if not metrics:
+            return []
+        
+        # Get or create baseline
+        baseline = self._get_baseline(workflow_id, metric_name)
+        if not baseline:
+            # Not enough data for baseline
+            return []
+        
+        anomalies = []
+        
+        for metric in metrics:
+            value = metric.get(metric_name)
+            if value is None:
+                continue
+            
+            # Calculate deviation from baseline
+            deviation = abs(value - baseline['mean']) / baseline['std_dev'] if baseline['std_dev'] > 0 else 0
+            
+            # Check if anomaly
+            if deviation > self.sensitivity:
+                severity = min(deviation / (self.sensitivity * 2), 1.0)
+                
+                anomaly = Anomaly(
+                    anomaly_id=self._generate_anomaly_id(workflow_id, metric_name, metric),
+                    workflow_id=workflow_id,
+                    metric_name=metric_name,
+                    detected_at=datetime.now(),
+                    severity=severity,
+                    deviation=deviation,
+                    baseline_value=baseline['mean'],
+                    actual_value=value,
+                    description=self._generate_description(metric_name, value, baseline['mean'], deviation),
+                    recommended_action=self._generate_recommendation(metric_name, value, baseline['mean']),
+                    metadata=metric
+                )
+                
+                anomalies.append(anomaly)
+                logger.info(f"Anomaly detected: {anomaly.description}")
+        
+        return anomalies
+    
+    def update_baseline(
+        self,
+        workflow_id: str,
+        stable_period_days: int = 7,
+        metric_name: str = 'duration_ms'
+    ) -> None:
+        """
+        Update baseline from stable period.
+        
+        Args:
+            workflow_id: Workflow identifier
+            stable_period_days: Number of days for baseline calculation
+            metric_name: Metric to calculate baseline for
+        """
+        end_time = datetime.now()
+        start_time = end_time - timedelta(days=stable_period_days)
+        
+        # Query metrics
+        metrics = self.store.query_range(
+            start_time=start_time,
+            end_time=end_time,
+            workflow_id=workflow_id,
+            metric_types=['execution']
+        )
+        
+        executions = metrics.get('execution', [])
+        if not executions:
+            logger.warning(f"No data for baseline calculation: {workflow_id}")
+            return
+        
+        # Extract values
+        values = [e.get(metric_name) for e in executions if e.get(metric_name) is not None]
+        
+        if len(values) < 10:  # Minimum sample size
+            logger.warning(f"Insufficient data for baseline: {workflow_id} ({len(values)} samples)")
+            return
+        
+        # Calculate baseline statistics
+        mean = statistics.mean(values)
+        std_dev = statistics.stdev(values) if len(values) > 1 else 0.0
+        median = statistics.median(values)
+        
+        baseline_key = f"{workflow_id}:{metric_name}"
+        self.baselines[baseline_key] = {
+            'mean': mean,
+            'std_dev': std_dev,
+            'median': median,
+            'sample_size': len(values),
+            'updated_at': datetime.now(),
+            'period_days': stable_period_days
+        }
+        
+        logger.info(f"Baseline updated for {workflow_id}: mean={mean:.2f}, std_dev={std_dev:.2f}")
+    
+    def correlate_anomalies(
+        self,
+        anomalies: List[Anomaly],
+        time_window_minutes: int = 30
+    ) -> List[List[Anomaly]]:
+        """
+        Correlate related anomalies within a time window.
+        
+        Args:
+            anomalies: List of anomalies to correlate
+            time_window_minutes: Time window for correlation
+        
+        Returns:
+            List of correlated anomaly groups
+        """
+        if not anomalies:
+            return []
+        
+        # Sort by detection time
+        sorted_anomalies = sorted(anomalies, key=lambda a: a.detected_at)
+        
+        groups = []
+        current_group = [sorted_anomalies[0]]
+        
+        for anomaly in sorted_anomalies[1:]:
+            # Check if within time window of last anomaly in current group
+            time_diff = (anomaly.detected_at - current_group[-1].detected_at).total_seconds() / 60
+            
+            if time_diff <= time_window_minutes:
+                current_group.append(anomaly)
+            else:
+                # Start new group
+                if len(current_group) > 1:  # Only keep groups with multiple anomalies
+                    groups.append(current_group)
+                current_group = [anomaly]
+        
+        # Add last group if it has multiple anomalies
+        if len(current_group) > 1:
+            groups.append(current_group)
+        
+        return groups
+    
+    def escalate_anomaly(
+        self,
+        anomaly: Anomaly,
+        duration_minutes: int,
+        impact_score: float
+    ) -> Dict[str, Any]:
+        """
+        Escalate an anomaly based on duration and impact.
+        
+        Args:
+            anomaly: Anomaly to escalate
+            duration_minutes: How long the anomaly has persisted
+            impact_score: Impact score (0.0 to 1.0)
+        
+        Returns:
+            Escalation information
+        """
+        # Calculate escalation level
+        escalation_score = (anomaly.severity + impact_score) / 2
+        escalation_score *= min(duration_minutes / 60, 2.0)  # Cap at 2x for duration
+        
+        if escalation_score > 0.8:
+            level = 'critical'
+        elif escalation_score > 0.5:
+            level = 'high'
+        elif escalation_score > 0.3:
+            level = 'medium'
+        else:
+            level = 'low'
+        
+        return {
+            'anomaly_id': anomaly.anomaly_id,
+            'escalation_level': level,
+            'escalation_score': min(escalation_score, 1.0),
+            'duration_minutes': duration_minutes,
+            'impact_score': impact_score,
+            'requires_immediate_action': escalation_score > 0.8
+        }
+    
+    def _get_baseline(self, workflow_id: str, metric_name: str) -> Optional[Dict]:
+        """Get baseline for workflow and metric."""
+        baseline_key = f"{workflow_id}:{metric_name}"
+        
+        if baseline_key not in self.baselines:
+            # Try to calculate baseline
+            self.update_baseline(workflow_id, metric_name=metric_name)
+        
+        return self.baselines.get(baseline_key)
+    
+    def _generate_anomaly_id(self, workflow_id: str, metric_name: str, metric: Dict) -> str:
+        """Generate unique anomaly ID."""
+        data = f"{workflow_id}:{metric_name}:{metric.get('execution_id', '')}:{datetime.now().isoformat()}"
+        return hashlib.md5(data.encode()).hexdigest()[:16]
+    
+    def _generate_description(
+        self,
+        metric_name: str,
+        actual_value: float,
+        baseline_value: float,
+        deviation: float
+    ) -> str:
+        """Generate human-readable anomaly description."""
+        percent_diff = abs((actual_value - baseline_value) / baseline_value * 100) if baseline_value > 0 else 0
+        direction = "higher" if actual_value > baseline_value else "lower"
+        
+        return (
+            f"{metric_name} is {percent_diff:.1f}% {direction} than baseline "
+            f"({actual_value:.2f} vs {baseline_value:.2f}, {deviation:.1f} std devs)"
+        )
+    
+    def _generate_recommendation(
+        self,
+        metric_name: str,
+        actual_value: float,
+        baseline_value: float
+    ) -> str:
+        """Generate recommended action for anomaly."""
+        if actual_value > baseline_value:
+            if metric_name == 'duration_ms':
+                return "Investigate performance degradation. Check for resource constraints or code changes."
+            elif metric_name == 'error_rate':
+                return "Investigate error spike. Check logs and recent deployments."
+            elif metric_name in ['cpu_percent', 'memory_mb']:
+                return "Investigate resource usage spike. Check for memory leaks or inefficient operations."
+        else:
+            if metric_name == 'success_rate':
+                return "Investigate success rate drop. Check for system issues or data quality problems."
+        
+        return "Monitor the situation and investigate if anomaly persists."
--- a/core/analytics/engine/insight_generator.py
+++ b/core/analytics/engine/insight_generator.py
@@ -0,0 +1,301 @@
+"""Automated insight generation for workflows."""
+
+import logging
+import hashlib
+from dataclasses import dataclass, field
+from typing import List, Dict, Any, Optional
+from datetime import datetime, timedelta
+
+from .performance_analyzer import PerformanceAnalyzer, PerformanceStats
+from .anomaly_detector import AnomalyDetector, Anomaly
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class Insight:
+    """Generated insight with recommendation."""
+    insight_id: str
+    workflow_id: str
+    category: str  # 'performance', 'reliability', 'resource', 'best_practice'
+    title: str
+    description: str
+    recommendation: str
+    expected_impact: str
+    ease_of_implementation: str  # 'easy', 'medium', 'hard'
+    priority_score: float
+    supporting_data: Dict[str, Any]
+    created_at: datetime
+    implemented: bool = False
+    actual_impact: Optional[Dict] = None
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary."""
+        return {
+            'insight_id': self.insight_id,
+            'workflow_id': self.workflow_id,
+            'category': self.category,
+            'title': self.title,
+            'description': self.description,
+            'recommendation': self.recommendation,
+            'expected_impact': self.expected_impact,
+            'ease_of_implementation': self.ease_of_implementation,
+            'priority_score': self.priority_score,
+            'supporting_data': self.supporting_data,
+            'created_at': self.created_at.isoformat(),
+            'implemented': self.implemented,
+            'actual_impact': self.actual_impact
+        }
+
+
+class InsightGenerator:
+    """Generates automated insights and recommendations."""
+    
+    def __init__(
+        self,
+        performance_analyzer: PerformanceAnalyzer,
+        anomaly_detector: AnomalyDetector
+    ):
+        """
+        Initialize insight generator.
+        
+        Args:
+            performance_analyzer: Performance analyzer instance
+            anomaly_detector: Anomaly detector instance
+        """
+        self.performance_analyzer = performance_analyzer
+        self.anomaly_detector = anomaly_detector
+        self._insight_implementations: Dict[str, Dict] = {}
+        
+        logger.info("InsightGenerator initialized")
+    
+    def generate_insights(
+        self,
+        workflow_id: str,
+        analysis_period_days: int = 30
+    ) -> List[Insight]:
+        """
+        Generate insights for a workflow.
+        
+        Args:
+            workflow_id: Workflow identifier
+            analysis_period_days: Number of days to analyze
+        
+        Returns:
+            List of generated insights
+        """
+        insights = []
+        
+        end_time = datetime.now()
+        start_time = end_time - timedelta(days=analysis_period_days)
+        
+        # Analyze performance
+        perf_stats = self.performance_analyzer.analyze_workflow(
+            workflow_id,
+            start_time,
+            end_time
+        )
+        
+        if perf_stats:
+            # Generate performance insights
+            insights.extend(self._generate_performance_insights(perf_stats))
+            
+            # Generate bottleneck insights
+            insights.extend(self._generate_bottleneck_insights(perf_stats))
+        
+        # Check for performance degradation
+        degradation = self.performance_analyzer.detect_performance_degradation(
+            workflow_id,
+            baseline_period=timedelta(days=7),
+            current_period=timedelta(days=1)
+        )
+        
+        if degradation:
+            insights.append(self._generate_degradation_insight(degradation))
+        
+        # Prioritize insights
+        insights = self.prioritize_insights(insights)
+        
+        return insights
+    
+    def prioritize_insights(self, insights: List[Insight]) -> List[Insight]:
+        """
+        Prioritize insights by impact and ease.
+        
+        Args:
+            insights: List of insights to prioritize
+        
+        Returns:
+            Sorted list of insights
+        """
+        # Calculate priority scores
+        for insight in insights:
+            impact_score = self._calculate_impact_score(insight.expected_impact)
+            ease_score = self._calculate_ease_score(insight.ease_of_implementation)
+            
+            # Priority = Impact * Ease (higher is better)
+            insight.priority_score = impact_score * ease_score
+        
+        # Sort by priority (descending)
+        return sorted(insights, key=lambda i: i.priority_score, reverse=True)
+    
+    def track_insight_implementation(
+        self,
+        insight_id: str,
+        implemented: bool,
+        actual_impact: Optional[Dict] = None
+    ) -> None:
+        """
+        Track insight implementation and measure impact.
+        
+        Args:
+            insight_id: Insight identifier
+            implemented: Whether insight was implemented
+            actual_impact: Measured impact after implementation
+        """
+        self._insight_implementations[insight_id] = {
+            'implemented': implemented,
+            'actual_impact': actual_impact,
+            'tracked_at': datetime.now()
+        }
+        
+        logger.info(f"Tracked implementation for insight {insight_id}")
+    
+    def _generate_performance_insights(self, stats: PerformanceStats) -> List[Insight]:
+        """Generate insights from performance statistics."""
+        insights = []
+        
+        # High variability insight
+        if stats.std_dev_ms > stats.avg_duration_ms * 0.5:
+            insights.append(Insight(
+                insight_id=self._generate_id(stats.workflow_id, 'high_variability'),
+                workflow_id=stats.workflow_id,
+                category='performance',
+                title='High Performance Variability',
+                description=(
+                    f"Execution time varies significantly (std dev: {stats.std_dev_ms:.0f}ms, "
+                    f"avg: {stats.avg_duration_ms:.0f}ms). This indicates inconsistent performance."
+                ),
+                recommendation=(
+                    "Investigate causes of variability. Check for: "
+                    "1) Resource contention, 2) Network latency, 3) Data size variations, "
+                    "4) External service dependencies."
+                ),
+                expected_impact="Reduce execution time variability by 30-50%",
+                ease_of_implementation='medium',
+                priority_score=0.0,
+                supporting_data={'stats': stats.to_dict()},
+                created_at=datetime.now()
+            ))
+        
+        # Slow p99 insight
+        if stats.p99_duration_ms > stats.median_duration_ms * 3:
+            insights.append(Insight(
+                insight_id=self._generate_id(stats.workflow_id, 'slow_p99'),
+                workflow_id=stats.workflow_id,
+                category='performance',
+                title='Slow 99th Percentile Performance',
+                description=(
+                    f"99th percentile ({stats.p99_duration_ms:.0f}ms) is 3x slower than median "
+                    f"({stats.median_duration_ms:.0f}ms). Some executions are significantly slower."
+                ),
+                recommendation=(
+                    "Analyze slowest executions to identify outliers. "
+                    "Consider adding timeouts or optimizing worst-case scenarios."
+                ),
+                expected_impact="Improve worst-case performance by 40-60%",
+                ease_of_implementation='medium',
+                priority_score=0.0,
+                supporting_data={'stats': stats.to_dict()},
+                created_at=datetime.now()
+            ))
+        
+        return insights
+    
+    def _generate_bottleneck_insights(self, stats: PerformanceStats) -> List[Insight]:
+        """Generate insights from bottleneck analysis."""
+        insights = []
+        
+        if not stats.slowest_steps:
+            return insights
+        
+        # Top bottleneck
+        top_bottleneck = stats.slowest_steps[0]
+        
+        insights.append(Insight(
+            insight_id=self._generate_id(stats.workflow_id, 'top_bottleneck'),
+            workflow_id=stats.workflow_id,
+            category='performance',
+            title=f"Bottleneck: {top_bottleneck['action_type']} on {top_bottleneck['node_id']}",
+            description=(
+                f"Step '{top_bottleneck['action_type']}' takes {top_bottleneck['avg_duration_ms']:.0f}ms "
+                f"on average (p95: {top_bottleneck['p95_duration_ms']:.0f}ms). "
+                f"This is the slowest step in the workflow."
+            ),
+            recommendation=(
+                f"Optimize the '{top_bottleneck['action_type']}' action. "
+                "Consider: 1) Caching results, 2) Parallel execution, "
+                "3) Reducing wait times, 4) Optimizing selectors."
+            ),
+            expected_impact=f"Reduce overall workflow time by {(top_bottleneck['avg_duration_ms'] / stats.avg_duration_ms * 100 * 0.5):.0f}%",
+            ease_of_implementation='easy',
+            priority_score=0.0,
+            supporting_data={'bottleneck': top_bottleneck},
+            created_at=datetime.now()
+        ))
+        
+        return insights
+    
+    def _generate_degradation_insight(self, degradation: Dict) -> Insight:
+        """Generate insight from performance degradation."""
+        return Insight(
+            insight_id=self._generate_id(degradation['workflow_id'], 'degradation'),
+            workflow_id=degradation['workflow_id'],
+            category='performance',
+            title='Performance Degradation Detected',
+            description=(
+                f"Performance has degraded by {degradation['percent_change']:.1f}% "
+                f"(from {degradation['baseline_avg_ms']:.0f}ms to {degradation['current_avg_ms']:.0f}ms)."
+            ),
+            recommendation=(
+                "Investigate recent changes: 1) Code deployments, 2) Data volume increases, "
+                "3) Infrastructure changes, 4) External service degradation."
+            ),
+            expected_impact="Restore baseline performance",
+            ease_of_implementation='medium',
+            priority_score=0.0,
+            supporting_data=degradation,
+            created_at=datetime.now()
+        )
+    
+    def _calculate_impact_score(self, expected_impact: str) -> float:
+        """Calculate impact score from expected impact description."""
+        impact_lower = expected_impact.lower()
+        
+        # Look for percentage improvements
+        if '50%' in impact_lower or '60%' in impact_lower:
+            return 1.0
+        elif '30%' in impact_lower or '40%' in impact_lower:
+            return 0.8
+        elif '20%' in impact_lower:
+            return 0.6
+        elif '10%' in impact_lower:
+            return 0.4
+        else:
+            return 0.5  # Default
+    
+    def _calculate_ease_score(self, ease: str) -> float:
+        """Calculate ease score from ease of implementation."""
+        if ease == 'easy':
+            return 1.0
+        elif ease == 'medium':
+            return 0.6
+        elif ease == 'hard':
+            return 0.3
+        else:
+            return 0.5
+    
+    def _generate_id(self, workflow_id: str, insight_type: str) -> str:
+        """Generate unique insight ID."""
+        data = f"{workflow_id}:{insight_type}:{datetime.now().date().isoformat()}"
+        return hashlib.md5(data.encode()).hexdigest()[:16]
--- a/core/analytics/engine/performance_analyzer.py
+++ b/core/analytics/engine/performance_analyzer.py
@@ -0,0 +1,359 @@
+"""Performance analysis for workflows."""
+
+import logging
+import statistics
+from dataclasses import dataclass
+from typing import List, Dict, Any, Optional
+from datetime import datetime, timedelta
+
+from ..storage.timeseries_store import TimeSeriesStore
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class PerformanceStats:
+    """Performance statistics for a workflow."""
+    workflow_id: str
+    time_period: str
+    execution_count: int
+    avg_duration_ms: float
+    median_duration_ms: float
+    p95_duration_ms: float
+    p99_duration_ms: float
+    min_duration_ms: float
+    max_duration_ms: float
+    std_dev_ms: float
+    slowest_steps: List[Dict]
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary."""
+        return {
+            'workflow_id': self.workflow_id,
+            'time_period': self.time_period,
+            'execution_count': self.execution_count,
+            'avg_duration_ms': self.avg_duration_ms,
+            'median_duration_ms': self.median_duration_ms,
+            'p95_duration_ms': self.p95_duration_ms,
+            'p99_duration_ms': self.p99_duration_ms,
+            'min_duration_ms': self.min_duration_ms,
+            'max_duration_ms': self.max_duration_ms,
+            'std_dev_ms': self.std_dev_ms,
+            'slowest_steps': self.slowest_steps
+        }
+
+
+class PerformanceAnalyzer:
+    """Analyzes workflow performance metrics."""
+    
+    def __init__(self, time_series_store: TimeSeriesStore):
+        """
+        Initialize performance analyzer.
+        
+        Args:
+            time_series_store: Time series storage for metrics
+        """
+        self.store = time_series_store
+        logger.info("PerformanceAnalyzer initialized")
+    
+    def analyze_workflow(
+        self,
+        workflow_id: str,
+        start_time: datetime,
+        end_time: datetime
+    ) -> Optional[PerformanceStats]:
+        """
+        Analyze performance for a workflow.
+        
+        Args:
+            workflow_id: Workflow identifier
+            start_time: Start of analysis period
+            end_time: End of analysis period
+        
+        Returns:
+            PerformanceStats or None if no data
+        """
+        # Query execution metrics
+        metrics = self.store.query_range(
+            start_time=start_time,
+            end_time=end_time,
+            workflow_id=workflow_id,
+            metric_types=['execution']
+        )
+        
+        executions = metrics.get('execution', [])
+        if not executions:
+            logger.warning(f"No execution data for workflow {workflow_id}")
+            return None
+        
+        # Filter completed executions with duration
+        completed = [
+            e for e in executions
+            if e.get('status') == 'completed' and e.get('duration_ms') is not None
+        ]
+        
+        if not completed:
+            logger.warning(f"No completed executions for workflow {workflow_id}")
+            return None
+        
+        # Extract durations
+        durations = [e['duration_ms'] for e in completed]
+        
+        # Calculate statistics
+        avg_duration = statistics.mean(durations)
+        median_duration = statistics.median(durations)
+        min_duration = min(durations)
+        max_duration = max(durations)
+        std_dev = statistics.stdev(durations) if len(durations) > 1 else 0.0
+        
+        # Calculate percentiles
+        sorted_durations = sorted(durations)
+        p95_duration = self._percentile(sorted_durations, 0.95)
+        p99_duration = self._percentile(sorted_durations, 0.99)
+        
+        # Identify slowest steps
+        slowest_steps = self.identify_bottlenecks(
+            workflow_id,
+            start_time,
+            end_time,
+            threshold_percentile=0.95
+        )
+        
+        time_period = f"{start_time.isoformat()} to {end_time.isoformat()}"
+        
+        return PerformanceStats(
+            workflow_id=workflow_id,
+            time_period=time_period,
+            execution_count=len(completed),
+            avg_duration_ms=avg_duration,
+            median_duration_ms=median_duration,
+            p95_duration_ms=p95_duration,
+            p99_duration_ms=p99_duration,
+            min_duration_ms=min_duration,
+            max_duration_ms=max_duration,
+            std_dev_ms=std_dev,
+            slowest_steps=slowest_steps[:5]  # Top 5 slowest
+        )
+    
+    def identify_bottlenecks(
+        self,
+        workflow_id: str,
+        start_time: datetime,
+        end_time: datetime,
+        threshold_percentile: float = 0.95
+    ) -> List[Dict]:
+        """
+        Identify bottleneck steps in a workflow.
+        
+        Args:
+            workflow_id: Workflow identifier
+            start_time: Start of analysis period
+            end_time: End of analysis period
+            threshold_percentile: Percentile threshold for bottlenecks
+        
+        Returns:
+            List of bottleneck steps sorted by duration
+        """
+        # Query step metrics
+        metrics = self.store.query_range(
+            start_time=start_time,
+            end_time=end_time,
+            workflow_id=workflow_id,
+            metric_types=['step']
+        )
+        
+        steps = metrics.get('step', [])
+        if not steps:
+            return []
+        
+        # Group by node_id and action_type
+        step_groups: Dict[tuple, List[float]] = {}
+        for step in steps:
+            key = (step['node_id'], step['action_type'])
+            if key not in step_groups:
+                step_groups[key] = []
+            step_groups[key].append(step['duration_ms'])
+        
+        # Calculate statistics for each group
+        bottlenecks = []
+        for (node_id, action_type), durations in step_groups.items():
+            if not durations:
+                continue
+            
+            avg_duration = statistics.mean(durations)
+            p95_duration = self._percentile(sorted(durations), threshold_percentile)
+            
+            bottlenecks.append({
+                'node_id': node_id,
+                'action_type': action_type,
+                'avg_duration_ms': avg_duration,
+                'p95_duration_ms': p95_duration,
+                'execution_count': len(durations),
+                'max_duration_ms': max(durations)
+            })
+        
+        # Sort by p95 duration (descending)
+        bottlenecks.sort(key=lambda x: x['p95_duration_ms'], reverse=True)
+        
+        return bottlenecks
+    
+    def detect_performance_degradation(
+        self,
+        workflow_id: str,
+        baseline_period: timedelta,
+        current_period: timedelta,
+        threshold_percent: float = 20.0
+    ) -> Optional[Dict]:
+        """
+        Detect performance degradation compared to baseline.
+        
+        Args:
+            workflow_id: Workflow identifier
+            baseline_period: Duration of baseline period (e.g., last 7 days)
+            current_period: Duration of current period (e.g., last 24 hours)
+            threshold_percent: Threshold for degradation alert (%)
+        
+        Returns:
+            Degradation info dict or None if no degradation
+        """
+        now = datetime.now()
+        
+        # Baseline period (older)
+        baseline_end = now - current_period
+        baseline_start = baseline_end - baseline_period
+        
+        # Current period (recent)
+        current_start = now - current_period
+        current_end = now
+        
+        # Analyze both periods
+        baseline_stats = self.analyze_workflow(
+            workflow_id,
+            baseline_start,
+            baseline_end
+        )
+        
+        current_stats = self.analyze_workflow(
+            workflow_id,
+            current_start,
+            current_end
+        )
+        
+        if not baseline_stats or not current_stats:
+            logger.warning(f"Insufficient data for degradation detection: {workflow_id}")
+            return None
+        
+        # Calculate percentage change
+        baseline_avg = baseline_stats.avg_duration_ms
+        current_avg = current_stats.avg_duration_ms
+        
+        if baseline_avg == 0:
+            return None
+        
+        percent_change = ((current_avg - baseline_avg) / baseline_avg) * 100
+        
+        # Check if degradation exceeds threshold
+        if percent_change > threshold_percent:
+            return {
+                'workflow_id': workflow_id,
+                'degradation_detected': True,
+                'baseline_avg_ms': baseline_avg,
+                'current_avg_ms': current_avg,
+                'percent_change': percent_change,
+                'threshold_percent': threshold_percent,
+                'baseline_period': str(baseline_period),
+                'current_period': str(current_period),
+                'severity': 'high' if percent_change > threshold_percent * 2 else 'medium'
+            }
+        
+        return None
+    
+    def compare_workflows(
+        self,
+        workflow_ids: List[str],
+        start_time: datetime,
+        end_time: datetime
+    ) -> Dict[str, PerformanceStats]:
+        """
+        Compare performance across multiple workflows.
+        
+        Args:
+            workflow_ids: List of workflow identifiers
+            start_time: Start of analysis period
+            end_time: End of analysis period
+        
+        Returns:
+            Dictionary mapping workflow_id to PerformanceStats
+        """
+        results = {}
+        
+        for workflow_id in workflow_ids:
+            stats = self.analyze_workflow(workflow_id, start_time, end_time)
+            if stats:
+                results[workflow_id] = stats
+        
+        return results
+    
+    def get_performance_trend(
+        self,
+        workflow_id: str,
+        start_time: datetime,
+        end_time: datetime,
+        bucket_size: timedelta = timedelta(hours=1)
+    ) -> List[Dict]:
+        """
+        Get performance trend over time with bucketing.
+        
+        Args:
+            workflow_id: Workflow identifier
+            start_time: Start of analysis period
+            end_time: End of analysis period
+            bucket_size: Size of time buckets
+        
+        Returns:
+            List of performance data points over time
+        """
+        trend = []
+        current = start_time
+        
+        while current < end_time:
+            bucket_end = min(current + bucket_size, end_time)
+            
+            stats = self.analyze_workflow(workflow_id, current, bucket_end)
+            if stats:
+                trend.append({
+                    'timestamp': current.isoformat(),
+                    'avg_duration_ms': stats.avg_duration_ms,
+                    'median_duration_ms': stats.median_duration_ms,
+                    'execution_count': stats.execution_count
+                })
+            
+            current = bucket_end
+        
+        return trend
+    
+    @staticmethod
+    def _percentile(sorted_data: List[float], percentile: float) -> float:
+        """
+        Calculate percentile from sorted data.
+        
+        Args:
+            sorted_data: Sorted list of values
+            percentile: Percentile to calculate (0.0 to 1.0)
+        
+        Returns:
+            Percentile value
+        """
+        if not sorted_data:
+            return 0.0
+        
+        if len(sorted_data) == 1:
+            return sorted_data[0]
+        
+        # Linear interpolation
+        index = percentile * (len(sorted_data) - 1)
+        lower = int(index)
+        upper = min(lower + 1, len(sorted_data) - 1)
+        weight = index - lower
+        
+        return sorted_data[lower] * (1 - weight) + sorted_data[upper] * weight
--- a/core/analytics/engine/success_rate_calculator.py
+++ b/core/analytics/engine/success_rate_calculator.py
@@ -0,0 +1,334 @@
+"""Success rate analytics for workflows."""
+
+import logging
+from typing import Dict, List, Optional, Tuple
+from datetime import datetime, timedelta
+from dataclasses import dataclass
+from collections import defaultdict
+
+from ..storage.timeseries_store import TimeSeriesStore
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class SuccessRateStats:
+    """Success rate statistics."""
+    workflow_id: str
+    total_executions: int
+    successful_executions: int
+    failed_executions: int
+    success_rate: float
+    failure_categories: Dict[str, int]
+    reliability_score: float
+    time_window_start: datetime
+    time_window_end: datetime
+    
+    def to_dict(self) -> Dict:
+        """Convert to dictionary."""
+        return {
+            'workflow_id': self.workflow_id,
+            'total_executions': self.total_executions,
+            'successful_executions': self.successful_executions,
+            'failed_executions': self.failed_executions,
+            'success_rate': self.success_rate,
+            'failure_categories': self.failure_categories,
+            'reliability_score': self.reliability_score,
+            'time_window_start': self.time_window_start.isoformat(),
+            'time_window_end': self.time_window_end.isoformat()
+        }
+
+
+@dataclass
+class ReliabilityRanking:
+    """Workflow reliability ranking."""
+    workflow_id: str
+    reliability_score: float
+    success_rate: float
+    stability_score: float
+    total_executions: int
+    rank: int
+    
+    def to_dict(self) -> Dict:
+        """Convert to dictionary."""
+        return {
+            'workflow_id': self.workflow_id,
+            'reliability_score': self.reliability_score,
+            'success_rate': self.success_rate,
+            'stability_score': self.stability_score,
+            'total_executions': self.total_executions,
+            'rank': self.rank
+        }
+
+
+class SuccessRateCalculator:
+    """Calculate success rates and reliability metrics."""
+    
+    def __init__(self, store: TimeSeriesStore):
+        """
+        Initialize success rate calculator.
+        
+        Args:
+            store: Time-series storage instance
+        """
+        self.store = store
+        logger.info("SuccessRateCalculator initialized")
+    
+    def calculate_success_rate(
+        self,
+        workflow_id: str,
+        time_window_hours: int = 24
+    ) -> SuccessRateStats:
+        """
+        Calculate success rate for a workflow.
+        
+        Args:
+            workflow_id: Workflow identifier
+            time_window_hours: Time window in hours
+        
+        Returns:
+            Success rate statistics
+        """
+        end_time = datetime.now()
+        start_time = end_time - timedelta(hours=time_window_hours)
+        
+        # Query execution metrics
+        metrics = self.store.query_range(
+            metric_type='execution',
+            start_time=start_time,
+            end_time=end_time,
+            filters={'workflow_id': workflow_id}
+        )
+        
+        total = len(metrics)
+        successful = sum(1 for m in metrics if m.get('status') == 'success')
+        failed = total - successful
+        success_rate = (successful / total * 100) if total > 0 else 0.0
+        
+        # Categorize failures
+        failure_categories = self._categorize_failures(
+            [m for m in metrics if m.get('status') != 'success']
+        )
+        
+        # Calculate reliability score
+        reliability_score = self._calculate_reliability_score(
+            success_rate=success_rate,
+            total_executions=total,
+            failure_categories=failure_categories
+        )
+        
+        return SuccessRateStats(
+            workflow_id=workflow_id,
+            total_executions=total,
+            successful_executions=successful,
+            failed_executions=failed,
+            success_rate=success_rate,
+            failure_categories=failure_categories,
+            reliability_score=reliability_score,
+            time_window_start=start_time,
+            time_window_end=end_time
+        )
+    
+    def categorize_failures(
+        self,
+        workflow_id: str,
+        time_window_hours: int = 24
+    ) -> Dict[str, int]:
+        """
+        Categorize failures by type.
+        
+        Args:
+            workflow_id: Workflow identifier
+            time_window_hours: Time window in hours
+        
+        Returns:
+            Dictionary of failure categories and counts
+        """
+        end_time = datetime.now()
+        start_time = end_time - timedelta(hours=time_window_hours)
+        
+        # Query failed executions
+        metrics = self.store.query_range(
+            metric_type='execution',
+            start_time=start_time,
+            end_time=end_time,
+            filters={'workflow_id': workflow_id}
+        )
+        
+        failed_metrics = [m for m in metrics if m.get('status') != 'success']
+        return self._categorize_failures(failed_metrics)
+    
+    def _categorize_failures(self, failed_metrics: List[Dict]) -> Dict[str, int]:
+        """
+        Categorize failures by error type.
+        
+        Args:
+            failed_metrics: List of failed execution metrics
+        
+        Returns:
+            Dictionary of categories and counts
+        """
+        categories = defaultdict(int)
+        
+        for metric in failed_metrics:
+            error_msg = metric.get('error_message', '').lower()
+            
+            # Categorize by error type
+            if 'timeout' in error_msg:
+                categories['timeout'] += 1
+            elif 'not found' in error_msg or 'element' in error_msg:
+                categories['element_not_found'] += 1
+            elif 'permission' in error_msg or 'access' in error_msg:
+                categories['permission_error'] += 1
+            elif 'network' in error_msg or 'connection' in error_msg:
+                categories['network_error'] += 1
+            elif 'validation' in error_msg:
+                categories['validation_error'] += 1
+            else:
+                categories['other'] += 1
+        
+        return dict(categories)
+    
+    def rank_workflows_by_reliability(
+        self,
+        workflow_ids: Optional[List[str]] = None,
+        time_window_hours: int = 168  # 1 week
+    ) -> List[ReliabilityRanking]:
+        """
+        Rank workflows by reliability score.
+        
+        Args:
+            workflow_ids: List of workflow IDs (None = all)
+            time_window_hours: Time window in hours
+        
+        Returns:
+            List of reliability rankings sorted by score
+        """
+        end_time = datetime.now()
+        start_time = end_time - timedelta(hours=time_window_hours)
+        
+        # Get all workflows if not specified
+        if workflow_ids is None:
+            metrics = self.store.query_range(
+                metric_type='execution',
+                start_time=start_time,
+                end_time=end_time
+            )
+            workflow_ids = list(set(m.get('workflow_id') for m in metrics if m.get('workflow_id')))
+        
+        # Calculate reliability for each workflow
+        rankings = []
+        for workflow_id in workflow_ids:
+            stats = self.calculate_success_rate(workflow_id, time_window_hours)
+            
+            # Calculate stability score (consistency over time)
+            stability_score = self._calculate_stability_score(
+                workflow_id, start_time, end_time
+            )
+            
+            rankings.append(ReliabilityRanking(
+                workflow_id=workflow_id,
+                reliability_score=stats.reliability_score,
+                success_rate=stats.success_rate,
+                stability_score=stability_score,
+                total_executions=stats.total_executions,
+                rank=0  # Will be set after sorting
+            ))
+        
+        # Sort by reliability score (descending)
+        rankings.sort(key=lambda r: r.reliability_score, reverse=True)
+        
+        # Assign ranks
+        for i, ranking in enumerate(rankings, 1):
+            ranking.rank = i
+        
+        return rankings
+    
+    def _calculate_reliability_score(
+        self,
+        success_rate: float,
+        total_executions: int,
+        failure_categories: Dict[str, int]
+    ) -> float:
+        """
+        Calculate overall reliability score.
+        
+        Args:
+            success_rate: Success rate percentage
+            total_executions: Total number of executions
+            failure_categories: Failure categories
+        
+        Returns:
+            Reliability score (0-100)
+        """
+        # Base score from success rate (70% weight)
+        base_score = success_rate * 0.7
+        
+        # Execution volume bonus (up to 15% for 100+ executions)
+        volume_bonus = min(total_executions / 100 * 15, 15)
+        
+        # Failure diversity penalty (up to -15% for many failure types)
+        num_failure_types = len(failure_categories)
+        diversity_penalty = min(num_failure_types * 3, 15)
+        
+        # Calculate final score
+        reliability_score = base_score + volume_bonus - diversity_penalty
+        
+        # Clamp to 0-100
+        return max(0.0, min(100.0, reliability_score))
+    
+    def _calculate_stability_score(
+        self,
+        workflow_id: str,
+        start_time: datetime,
+        end_time: datetime
+    ) -> float:
+        """
+        Calculate stability score (consistency over time).
+        
+        Args:
+            workflow_id: Workflow identifier
+            start_time: Start of time window
+            end_time: End of time window
+        
+        Returns:
+            Stability score (0-100)
+        """
+        # Split time window into buckets
+        num_buckets = 7  # Weekly buckets
+        bucket_duration = (end_time - start_time) / num_buckets
+        
+        bucket_success_rates = []
+        for i in range(num_buckets):
+            bucket_start = start_time + (bucket_duration * i)
+            bucket_end = bucket_start + bucket_duration
+            
+            metrics = self.store.query_range(
+                metric_type='execution',
+                start_time=bucket_start,
+                end_time=bucket_end,
+                filters={'workflow_id': workflow_id}
+            )
+            
+            if metrics:
+                successful = sum(1 for m in metrics if m.get('status') == 'success')
+                success_rate = (successful / len(metrics)) * 100
+                bucket_success_rates.append(success_rate)
+        
+        if not bucket_success_rates:
+            return 0.0
+        
+        # Calculate coefficient of variation (lower = more stable)
+        import statistics
+        mean = statistics.mean(bucket_success_rates)
+        if mean == 0:
+            return 0.0
+        
+        stdev = statistics.stdev(bucket_success_rates) if len(bucket_success_rates) > 1 else 0
+        cv = (stdev / mean) * 100
+        
+        # Convert to stability score (lower CV = higher stability)
+        # CV of 0 = 100 stability, CV of 50+ = 0 stability
+        stability_score = max(0.0, 100.0 - (cv * 2))
+        
+        return stability_score