""" COACHING Metrics Module Provides comprehensive metrics and monitoring for COACHING mode: - Session statistics aggregation - Learning progress tracking - Performance analytics - Recommendations for mode transitions """ from dataclasses import dataclass, field from datetime import datetime, timedelta from typing import Dict, List, Optional, Any, Tuple from enum import Enum from .session_persistence import ( CoachingSessionPersistence, CoachingSessionState, SessionStatus, get_coaching_persistence ) class LearningProgress(str, Enum): """Learning progress levels for workflow.""" NOT_STARTED = "not_started" OBSERVATION = "observation" # Still collecting data LEARNING = "learning" # Actively learning from corrections COACHING = "coaching" # User coaching mode READY_FOR_AUTO = "ready" # Ready for autonomous mode AUTONOMOUS = "autonomous" # Running autonomously @dataclass class WorkflowLearningMetrics: """Metrics for a single workflow's learning progress.""" workflow_id: str total_sessions: int = 0 completed_sessions: int = 0 total_steps_coached: int = 0 total_decisions: int = 0 accepted: int = 0 rejected: int = 0 corrected: int = 0 manual_executions: int = 0 skipped: int = 0 # Computed metrics acceptance_rate: float = 0.0 correction_rate: float = 0.0 completion_rate: float = 0.0 # Time metrics avg_session_duration_seconds: float = 0.0 avg_decision_time_seconds: float = 0.0 # Learning progress learning_progress: LearningProgress = LearningProgress.NOT_STARTED confidence_score: float = 0.0 ready_for_auto: bool = False # Recommendations recommendations: List[str] = field(default_factory=list) def to_dict(self) -> Dict[str, Any]: return { 'workflow_id': self.workflow_id, 'total_sessions': self.total_sessions, 'completed_sessions': self.completed_sessions, 'total_steps_coached': self.total_steps_coached, 'total_decisions': self.total_decisions, 'accepted': self.accepted, 'rejected': self.rejected, 'corrected': self.corrected, 'manual_executions': self.manual_executions, 'skipped': self.skipped, 'acceptance_rate': self.acceptance_rate, 'correction_rate': self.correction_rate, 'completion_rate': self.completion_rate, 'avg_session_duration_seconds': self.avg_session_duration_seconds, 'avg_decision_time_seconds': self.avg_decision_time_seconds, 'learning_progress': self.learning_progress.value, 'confidence_score': self.confidence_score, 'ready_for_auto': self.ready_for_auto, 'recommendations': self.recommendations } @dataclass class GlobalCoachingMetrics: """Global metrics across all workflows.""" total_workflows: int = 0 total_sessions: int = 0 active_sessions: int = 0 completed_sessions: int = 0 failed_sessions: int = 0 total_decisions: int = 0 total_accepted: int = 0 total_rejected: int = 0 total_corrected: int = 0 overall_acceptance_rate: float = 0.0 overall_correction_rate: float = 0.0 workflows_ready_for_auto: int = 0 workflows_in_learning: int = 0 # Time-based metrics sessions_last_24h: int = 0 decisions_last_24h: int = 0 # Top workflows top_workflows_by_sessions: List[Tuple[str, int]] = field(default_factory=list) top_workflows_by_corrections: List[Tuple[str, int]] = field(default_factory=list) def to_dict(self) -> Dict[str, Any]: return { 'total_workflows': self.total_workflows, 'total_sessions': self.total_sessions, 'active_sessions': self.active_sessions, 'completed_sessions': self.completed_sessions, 'failed_sessions': self.failed_sessions, 'total_decisions': self.total_decisions, 'total_accepted': self.total_accepted, 'total_rejected': self.total_rejected, 'total_corrected': self.total_corrected, 'overall_acceptance_rate': self.overall_acceptance_rate, 'overall_correction_rate': self.overall_correction_rate, 'workflows_ready_for_auto': self.workflows_ready_for_auto, 'workflows_in_learning': self.workflows_in_learning, 'sessions_last_24h': self.sessions_last_24h, 'decisions_last_24h': self.decisions_last_24h, 'top_workflows_by_sessions': self.top_workflows_by_sessions, 'top_workflows_by_corrections': self.top_workflows_by_corrections } class CoachingMetricsCollector: """ Collector and analyzer for COACHING metrics. Provides methods to: - Calculate workflow-specific learning metrics - Determine readiness for autonomous mode - Generate recommendations for improvement - Track global system health """ # Thresholds for auto mode readiness MIN_SESSIONS_FOR_AUTO = 5 MIN_ACCEPTANCE_RATE_FOR_AUTO = 0.85 MAX_CORRECTION_RATE_FOR_AUTO = 0.10 MIN_CONFIDENCE_FOR_AUTO = 0.80 def __init__(self, persistence: Optional[CoachingSessionPersistence] = None): """ Initialize metrics collector. Args: persistence: Session persistence instance """ self.persistence = persistence or get_coaching_persistence() def get_workflow_metrics(self, workflow_id: str) -> WorkflowLearningMetrics: """ Calculate comprehensive metrics for a workflow. Args: workflow_id: Workflow ID Returns: WorkflowLearningMetrics with all computed values """ # Get all sessions for this workflow sessions = self.persistence.list_sessions(workflow_id=workflow_id, limit=1000) metrics = WorkflowLearningMetrics(workflow_id=workflow_id) metrics.total_sessions = len(sessions) if not sessions: metrics.learning_progress = LearningProgress.NOT_STARTED metrics.recommendations = ["Demarrez une premiere session COACHING"] return metrics # Load full sessions for detailed analysis full_sessions: List[CoachingSessionState] = [] for session_info in sessions: session = self.persistence.load_session(session_info['session_id']) if session: full_sessions.append(session) # Calculate basic stats total_duration = 0.0 for session in full_sessions: if session.status == SessionStatus.COMPLETED: metrics.completed_sessions += 1 # Aggregate decision stats metrics.total_steps_coached += len(session.decisions) metrics.total_decisions += session.stats.get('suggestions_made', 0) metrics.accepted += session.stats.get('accepted', 0) metrics.rejected += session.stats.get('rejected', 0) metrics.corrected += session.stats.get('corrected', 0) metrics.manual_executions += session.stats.get('manual_executions', 0) metrics.skipped += session.stats.get('skipped', 0) # Calculate duration if session.started_at and session.completed_at: try: start = datetime.fromisoformat(session.started_at) end = datetime.fromisoformat(session.completed_at) total_duration += (end - start).total_seconds() except: pass # Calculate rates total_decisions = metrics.accepted + metrics.rejected + metrics.corrected if total_decisions > 0: metrics.acceptance_rate = metrics.accepted / total_decisions metrics.correction_rate = metrics.corrected / total_decisions if metrics.total_sessions > 0: metrics.completion_rate = metrics.completed_sessions / metrics.total_sessions if metrics.completed_sessions > 0: metrics.avg_session_duration_seconds = total_duration / metrics.completed_sessions if metrics.total_decisions > 0 and total_duration > 0: metrics.avg_decision_time_seconds = total_duration / metrics.total_decisions # Determine learning progress metrics.learning_progress = self._determine_learning_progress(metrics) # Calculate confidence score metrics.confidence_score = self._calculate_confidence_score(metrics) # Check if ready for auto metrics.ready_for_auto = self._check_ready_for_auto(metrics) # Generate recommendations metrics.recommendations = self._generate_recommendations(metrics) return metrics def get_global_metrics(self) -> GlobalCoachingMetrics: """ Calculate global metrics across all workflows. Returns: GlobalCoachingMetrics with aggregated data """ metrics = GlobalCoachingMetrics() # Get all sessions all_sessions = self.persistence.list_sessions(limit=10000) metrics.total_sessions = len(all_sessions) # Track unique workflows workflow_stats: Dict[str, Dict] = {} now = datetime.now() last_24h = now - timedelta(hours=24) for session_info in all_sessions: workflow_id = session_info.get('workflow_id', 'unknown') status = session_info.get('status', 'unknown') # Initialize workflow stats if workflow_id not in workflow_stats: workflow_stats[workflow_id] = { 'sessions': 0, 'corrections': 0 } workflow_stats[workflow_id]['sessions'] += 1 # Count by status if status == 'active': metrics.active_sessions += 1 elif status == 'completed': metrics.completed_sessions += 1 elif status == 'failed': metrics.failed_sessions += 1 # Check last 24h try: updated_at = datetime.fromisoformat(session_info.get('updated_at', '')) if updated_at > last_24h: metrics.sessions_last_24h += 1 except: pass # Load full session for decision stats session = self.persistence.load_session(session_info['session_id']) if session: metrics.total_decisions += session.stats.get('suggestions_made', 0) metrics.total_accepted += session.stats.get('accepted', 0) metrics.total_rejected += session.stats.get('rejected', 0) metrics.total_corrected += session.stats.get('corrected', 0) workflow_stats[workflow_id]['corrections'] += session.stats.get('corrected', 0) # Decisions in last 24h for decision in session.decisions: try: decision_time = datetime.fromisoformat(decision.timestamp) if decision_time > last_24h: metrics.decisions_last_24h += 1 except: pass metrics.total_workflows = len(workflow_stats) # Calculate overall rates total_decided = metrics.total_accepted + metrics.total_rejected + metrics.total_corrected if total_decided > 0: metrics.overall_acceptance_rate = metrics.total_accepted / total_decided metrics.overall_correction_rate = metrics.total_corrected / total_decided # Count workflows by learning state for workflow_id in workflow_stats: wf_metrics = self.get_workflow_metrics(workflow_id) if wf_metrics.ready_for_auto: metrics.workflows_ready_for_auto += 1 elif wf_metrics.learning_progress in [LearningProgress.LEARNING, LearningProgress.COACHING]: metrics.workflows_in_learning += 1 # Top workflows sorted_by_sessions = sorted( workflow_stats.items(), key=lambda x: x[1]['sessions'], reverse=True )[:5] metrics.top_workflows_by_sessions = [ (wf_id, stats['sessions']) for wf_id, stats in sorted_by_sessions ] sorted_by_corrections = sorted( workflow_stats.items(), key=lambda x: x[1]['corrections'], reverse=True )[:5] metrics.top_workflows_by_corrections = [ (wf_id, stats['corrections']) for wf_id, stats in sorted_by_corrections ] return metrics def _determine_learning_progress(self, metrics: WorkflowLearningMetrics) -> LearningProgress: """Determine the learning progress level.""" if metrics.total_sessions == 0: return LearningProgress.NOT_STARTED if metrics.total_sessions < 3: return LearningProgress.OBSERVATION if metrics.acceptance_rate < 0.5: return LearningProgress.LEARNING if metrics.acceptance_rate >= self.MIN_ACCEPTANCE_RATE_FOR_AUTO and \ metrics.correction_rate <= self.MAX_CORRECTION_RATE_FOR_AUTO and \ metrics.total_sessions >= self.MIN_SESSIONS_FOR_AUTO: return LearningProgress.READY_FOR_AUTO return LearningProgress.COACHING def _calculate_confidence_score(self, metrics: WorkflowLearningMetrics) -> float: """Calculate overall confidence score (0-1).""" if metrics.total_decisions == 0: return 0.0 # Weighted factors acceptance_weight = 0.4 correction_weight = 0.3 completion_weight = 0.2 volume_weight = 0.1 # Acceptance component (higher is better) acceptance_score = metrics.acceptance_rate # Correction component (lower is better) correction_score = max(0, 1 - metrics.correction_rate * 2) # Completion component completion_score = metrics.completion_rate # Volume component (normalized, caps at 10 sessions) volume_score = min(1, metrics.total_sessions / 10) confidence = ( acceptance_weight * acceptance_score + correction_weight * correction_score + completion_weight * completion_score + volume_weight * volume_score ) return round(confidence, 3) def _check_ready_for_auto(self, metrics: WorkflowLearningMetrics) -> bool: """Check if workflow is ready for autonomous mode.""" return ( metrics.total_sessions >= self.MIN_SESSIONS_FOR_AUTO and metrics.acceptance_rate >= self.MIN_ACCEPTANCE_RATE_FOR_AUTO and metrics.correction_rate <= self.MAX_CORRECTION_RATE_FOR_AUTO and metrics.confidence_score >= self.MIN_CONFIDENCE_FOR_AUTO ) def _generate_recommendations(self, metrics: WorkflowLearningMetrics) -> List[str]: """Generate actionable recommendations.""" recommendations = [] if metrics.total_sessions == 0: recommendations.append("Demarrez votre premiere session COACHING pour commencer l'apprentissage") return recommendations if metrics.total_sessions < self.MIN_SESSIONS_FOR_AUTO: remaining = self.MIN_SESSIONS_FOR_AUTO - metrics.total_sessions recommendations.append(f"Completez {remaining} session(s) supplementaire(s) pour atteindre le minimum requis") if metrics.acceptance_rate < self.MIN_ACCEPTANCE_RATE_FOR_AUTO: current_pct = round(metrics.acceptance_rate * 100, 1) target_pct = round(self.MIN_ACCEPTANCE_RATE_FOR_AUTO * 100, 1) recommendations.append( f"Ameliorez le taux d'acceptation de {current_pct}% a {target_pct}% " "en ajustant les selecteurs d'elements" ) if metrics.correction_rate > self.MAX_CORRECTION_RATE_FOR_AUTO: recommendations.append( "Le taux de correction est eleve. Verifiez les elements visuels " "qui necessitent souvent des corrections" ) if metrics.rejected > metrics.total_sessions * 2: recommendations.append( "Beaucoup d'actions rejetees. Revisez le workflow pour supprimer " "les etapes incorrectes" ) if metrics.manual_executions > metrics.total_decisions * 0.1: recommendations.append( "Plusieurs executions manuelles detectees. Considerez automatiser " "ces actions frequentes" ) if metrics.ready_for_auto: recommendations.append( "Ce workflow est pret pour le mode autonome ! " "Vous pouvez le passer en mode AUTO" ) return recommendations # Singleton instance _metrics_collector: Optional[CoachingMetricsCollector] = None def get_metrics_collector(persistence: Optional[CoachingSessionPersistence] = None) -> CoachingMetricsCollector: """Get or create the global metrics collector.""" global _metrics_collector if _metrics_collector is None: _metrics_collector = CoachingMetricsCollector(persistence) return _metrics_collector