Add comprehensive COACHING mode system with: Backend: - core/coaching module with session persistence and metrics - CoachingSessionPersistence for pause/resume sessions - CoachingMetricsCollector with learning progress tracking - REST API blueprint for coaching sessions management - Execution integration with COACHING mode support Frontend: - CoachingPanel component with keyboard shortcuts - Decision buttons (accept/reject/correct/manual/skip) - Real-time stats display and correction editor - CorrectionPacksDashboard for pack visualization - WebSocket hooks for real-time COACHING events Metrics & Monitoring: - WorkflowLearningMetrics with confidence scoring - GlobalCoachingMetrics for system-wide analytics - AUTO mode readiness detection (85% acceptance threshold) - Learning progress levels (OBSERVATION → COACHING → AUTO) Tests: - E2E tests for complete OBSERVATION → AUTO journey - Session persistence and recovery tests - Metrics threshold validation tests Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
463 lines
17 KiB
Python
463 lines
17 KiB
Python
"""
|
|
COACHING Metrics Module
|
|
|
|
Provides comprehensive metrics and monitoring for COACHING mode:
|
|
- Session statistics aggregation
|
|
- Learning progress tracking
|
|
- Performance analytics
|
|
- Recommendations for mode transitions
|
|
"""
|
|
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime, timedelta
|
|
from typing import Dict, List, Optional, Any, Tuple
|
|
from enum import Enum
|
|
|
|
from .session_persistence import (
|
|
CoachingSessionPersistence,
|
|
CoachingSessionState,
|
|
SessionStatus,
|
|
get_coaching_persistence
|
|
)
|
|
|
|
|
|
class LearningProgress(str, Enum):
|
|
"""Learning progress levels for workflow."""
|
|
NOT_STARTED = "not_started"
|
|
OBSERVATION = "observation" # Still collecting data
|
|
LEARNING = "learning" # Actively learning from corrections
|
|
COACHING = "coaching" # User coaching mode
|
|
READY_FOR_AUTO = "ready" # Ready for autonomous mode
|
|
AUTONOMOUS = "autonomous" # Running autonomously
|
|
|
|
|
|
@dataclass
|
|
class WorkflowLearningMetrics:
|
|
"""Metrics for a single workflow's learning progress."""
|
|
workflow_id: str
|
|
total_sessions: int = 0
|
|
completed_sessions: int = 0
|
|
total_steps_coached: int = 0
|
|
total_decisions: int = 0
|
|
accepted: int = 0
|
|
rejected: int = 0
|
|
corrected: int = 0
|
|
manual_executions: int = 0
|
|
skipped: int = 0
|
|
|
|
# Computed metrics
|
|
acceptance_rate: float = 0.0
|
|
correction_rate: float = 0.0
|
|
completion_rate: float = 0.0
|
|
|
|
# Time metrics
|
|
avg_session_duration_seconds: float = 0.0
|
|
avg_decision_time_seconds: float = 0.0
|
|
|
|
# Learning progress
|
|
learning_progress: LearningProgress = LearningProgress.NOT_STARTED
|
|
confidence_score: float = 0.0
|
|
ready_for_auto: bool = False
|
|
|
|
# Recommendations
|
|
recommendations: List[str] = field(default_factory=list)
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
return {
|
|
'workflow_id': self.workflow_id,
|
|
'total_sessions': self.total_sessions,
|
|
'completed_sessions': self.completed_sessions,
|
|
'total_steps_coached': self.total_steps_coached,
|
|
'total_decisions': self.total_decisions,
|
|
'accepted': self.accepted,
|
|
'rejected': self.rejected,
|
|
'corrected': self.corrected,
|
|
'manual_executions': self.manual_executions,
|
|
'skipped': self.skipped,
|
|
'acceptance_rate': self.acceptance_rate,
|
|
'correction_rate': self.correction_rate,
|
|
'completion_rate': self.completion_rate,
|
|
'avg_session_duration_seconds': self.avg_session_duration_seconds,
|
|
'avg_decision_time_seconds': self.avg_decision_time_seconds,
|
|
'learning_progress': self.learning_progress.value,
|
|
'confidence_score': self.confidence_score,
|
|
'ready_for_auto': self.ready_for_auto,
|
|
'recommendations': self.recommendations
|
|
}
|
|
|
|
|
|
@dataclass
|
|
class GlobalCoachingMetrics:
|
|
"""Global metrics across all workflows."""
|
|
total_workflows: int = 0
|
|
total_sessions: int = 0
|
|
active_sessions: int = 0
|
|
completed_sessions: int = 0
|
|
failed_sessions: int = 0
|
|
|
|
total_decisions: int = 0
|
|
total_accepted: int = 0
|
|
total_rejected: int = 0
|
|
total_corrected: int = 0
|
|
|
|
overall_acceptance_rate: float = 0.0
|
|
overall_correction_rate: float = 0.0
|
|
|
|
workflows_ready_for_auto: int = 0
|
|
workflows_in_learning: int = 0
|
|
|
|
# Time-based metrics
|
|
sessions_last_24h: int = 0
|
|
decisions_last_24h: int = 0
|
|
|
|
# Top workflows
|
|
top_workflows_by_sessions: List[Tuple[str, int]] = field(default_factory=list)
|
|
top_workflows_by_corrections: List[Tuple[str, int]] = field(default_factory=list)
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
return {
|
|
'total_workflows': self.total_workflows,
|
|
'total_sessions': self.total_sessions,
|
|
'active_sessions': self.active_sessions,
|
|
'completed_sessions': self.completed_sessions,
|
|
'failed_sessions': self.failed_sessions,
|
|
'total_decisions': self.total_decisions,
|
|
'total_accepted': self.total_accepted,
|
|
'total_rejected': self.total_rejected,
|
|
'total_corrected': self.total_corrected,
|
|
'overall_acceptance_rate': self.overall_acceptance_rate,
|
|
'overall_correction_rate': self.overall_correction_rate,
|
|
'workflows_ready_for_auto': self.workflows_ready_for_auto,
|
|
'workflows_in_learning': self.workflows_in_learning,
|
|
'sessions_last_24h': self.sessions_last_24h,
|
|
'decisions_last_24h': self.decisions_last_24h,
|
|
'top_workflows_by_sessions': self.top_workflows_by_sessions,
|
|
'top_workflows_by_corrections': self.top_workflows_by_corrections
|
|
}
|
|
|
|
|
|
class CoachingMetricsCollector:
|
|
"""
|
|
Collector and analyzer for COACHING metrics.
|
|
|
|
Provides methods to:
|
|
- Calculate workflow-specific learning metrics
|
|
- Determine readiness for autonomous mode
|
|
- Generate recommendations for improvement
|
|
- Track global system health
|
|
"""
|
|
|
|
# Thresholds for auto mode readiness
|
|
MIN_SESSIONS_FOR_AUTO = 5
|
|
MIN_ACCEPTANCE_RATE_FOR_AUTO = 0.85
|
|
MAX_CORRECTION_RATE_FOR_AUTO = 0.10
|
|
MIN_CONFIDENCE_FOR_AUTO = 0.80
|
|
|
|
def __init__(self, persistence: Optional[CoachingSessionPersistence] = None):
|
|
"""
|
|
Initialize metrics collector.
|
|
|
|
Args:
|
|
persistence: Session persistence instance
|
|
"""
|
|
self.persistence = persistence or get_coaching_persistence()
|
|
|
|
def get_workflow_metrics(self, workflow_id: str) -> WorkflowLearningMetrics:
|
|
"""
|
|
Calculate comprehensive metrics for a workflow.
|
|
|
|
Args:
|
|
workflow_id: Workflow ID
|
|
|
|
Returns:
|
|
WorkflowLearningMetrics with all computed values
|
|
"""
|
|
# Get all sessions for this workflow
|
|
sessions = self.persistence.list_sessions(workflow_id=workflow_id, limit=1000)
|
|
|
|
metrics = WorkflowLearningMetrics(workflow_id=workflow_id)
|
|
metrics.total_sessions = len(sessions)
|
|
|
|
if not sessions:
|
|
metrics.learning_progress = LearningProgress.NOT_STARTED
|
|
metrics.recommendations = ["Demarrez une premiere session COACHING"]
|
|
return metrics
|
|
|
|
# Load full sessions for detailed analysis
|
|
full_sessions: List[CoachingSessionState] = []
|
|
for session_info in sessions:
|
|
session = self.persistence.load_session(session_info['session_id'])
|
|
if session:
|
|
full_sessions.append(session)
|
|
|
|
# Calculate basic stats
|
|
total_duration = 0.0
|
|
for session in full_sessions:
|
|
if session.status == SessionStatus.COMPLETED:
|
|
metrics.completed_sessions += 1
|
|
|
|
# Aggregate decision stats
|
|
metrics.total_steps_coached += len(session.decisions)
|
|
metrics.total_decisions += session.stats.get('suggestions_made', 0)
|
|
metrics.accepted += session.stats.get('accepted', 0)
|
|
metrics.rejected += session.stats.get('rejected', 0)
|
|
metrics.corrected += session.stats.get('corrected', 0)
|
|
metrics.manual_executions += session.stats.get('manual_executions', 0)
|
|
metrics.skipped += session.stats.get('skipped', 0)
|
|
|
|
# Calculate duration
|
|
if session.started_at and session.completed_at:
|
|
try:
|
|
start = datetime.fromisoformat(session.started_at)
|
|
end = datetime.fromisoformat(session.completed_at)
|
|
total_duration += (end - start).total_seconds()
|
|
except:
|
|
pass
|
|
|
|
# Calculate rates
|
|
total_decisions = metrics.accepted + metrics.rejected + metrics.corrected
|
|
if total_decisions > 0:
|
|
metrics.acceptance_rate = metrics.accepted / total_decisions
|
|
metrics.correction_rate = metrics.corrected / total_decisions
|
|
|
|
if metrics.total_sessions > 0:
|
|
metrics.completion_rate = metrics.completed_sessions / metrics.total_sessions
|
|
if metrics.completed_sessions > 0:
|
|
metrics.avg_session_duration_seconds = total_duration / metrics.completed_sessions
|
|
|
|
if metrics.total_decisions > 0 and total_duration > 0:
|
|
metrics.avg_decision_time_seconds = total_duration / metrics.total_decisions
|
|
|
|
# Determine learning progress
|
|
metrics.learning_progress = self._determine_learning_progress(metrics)
|
|
|
|
# Calculate confidence score
|
|
metrics.confidence_score = self._calculate_confidence_score(metrics)
|
|
|
|
# Check if ready for auto
|
|
metrics.ready_for_auto = self._check_ready_for_auto(metrics)
|
|
|
|
# Generate recommendations
|
|
metrics.recommendations = self._generate_recommendations(metrics)
|
|
|
|
return metrics
|
|
|
|
def get_global_metrics(self) -> GlobalCoachingMetrics:
|
|
"""
|
|
Calculate global metrics across all workflows.
|
|
|
|
Returns:
|
|
GlobalCoachingMetrics with aggregated data
|
|
"""
|
|
metrics = GlobalCoachingMetrics()
|
|
|
|
# Get all sessions
|
|
all_sessions = self.persistence.list_sessions(limit=10000)
|
|
metrics.total_sessions = len(all_sessions)
|
|
|
|
# Track unique workflows
|
|
workflow_stats: Dict[str, Dict] = {}
|
|
now = datetime.now()
|
|
last_24h = now - timedelta(hours=24)
|
|
|
|
for session_info in all_sessions:
|
|
workflow_id = session_info.get('workflow_id', 'unknown')
|
|
status = session_info.get('status', 'unknown')
|
|
|
|
# Initialize workflow stats
|
|
if workflow_id not in workflow_stats:
|
|
workflow_stats[workflow_id] = {
|
|
'sessions': 0,
|
|
'corrections': 0
|
|
}
|
|
workflow_stats[workflow_id]['sessions'] += 1
|
|
|
|
# Count by status
|
|
if status == 'active':
|
|
metrics.active_sessions += 1
|
|
elif status == 'completed':
|
|
metrics.completed_sessions += 1
|
|
elif status == 'failed':
|
|
metrics.failed_sessions += 1
|
|
|
|
# Check last 24h
|
|
try:
|
|
updated_at = datetime.fromisoformat(session_info.get('updated_at', ''))
|
|
if updated_at > last_24h:
|
|
metrics.sessions_last_24h += 1
|
|
except:
|
|
pass
|
|
|
|
# Load full session for decision stats
|
|
session = self.persistence.load_session(session_info['session_id'])
|
|
if session:
|
|
metrics.total_decisions += session.stats.get('suggestions_made', 0)
|
|
metrics.total_accepted += session.stats.get('accepted', 0)
|
|
metrics.total_rejected += session.stats.get('rejected', 0)
|
|
metrics.total_corrected += session.stats.get('corrected', 0)
|
|
|
|
workflow_stats[workflow_id]['corrections'] += session.stats.get('corrected', 0)
|
|
|
|
# Decisions in last 24h
|
|
for decision in session.decisions:
|
|
try:
|
|
decision_time = datetime.fromisoformat(decision.timestamp)
|
|
if decision_time > last_24h:
|
|
metrics.decisions_last_24h += 1
|
|
except:
|
|
pass
|
|
|
|
metrics.total_workflows = len(workflow_stats)
|
|
|
|
# Calculate overall rates
|
|
total_decided = metrics.total_accepted + metrics.total_rejected + metrics.total_corrected
|
|
if total_decided > 0:
|
|
metrics.overall_acceptance_rate = metrics.total_accepted / total_decided
|
|
metrics.overall_correction_rate = metrics.total_corrected / total_decided
|
|
|
|
# Count workflows by learning state
|
|
for workflow_id in workflow_stats:
|
|
wf_metrics = self.get_workflow_metrics(workflow_id)
|
|
if wf_metrics.ready_for_auto:
|
|
metrics.workflows_ready_for_auto += 1
|
|
elif wf_metrics.learning_progress in [LearningProgress.LEARNING, LearningProgress.COACHING]:
|
|
metrics.workflows_in_learning += 1
|
|
|
|
# Top workflows
|
|
sorted_by_sessions = sorted(
|
|
workflow_stats.items(),
|
|
key=lambda x: x[1]['sessions'],
|
|
reverse=True
|
|
)[:5]
|
|
metrics.top_workflows_by_sessions = [
|
|
(wf_id, stats['sessions']) for wf_id, stats in sorted_by_sessions
|
|
]
|
|
|
|
sorted_by_corrections = sorted(
|
|
workflow_stats.items(),
|
|
key=lambda x: x[1]['corrections'],
|
|
reverse=True
|
|
)[:5]
|
|
metrics.top_workflows_by_corrections = [
|
|
(wf_id, stats['corrections']) for wf_id, stats in sorted_by_corrections
|
|
]
|
|
|
|
return metrics
|
|
|
|
def _determine_learning_progress(self, metrics: WorkflowLearningMetrics) -> LearningProgress:
|
|
"""Determine the learning progress level."""
|
|
if metrics.total_sessions == 0:
|
|
return LearningProgress.NOT_STARTED
|
|
|
|
if metrics.total_sessions < 3:
|
|
return LearningProgress.OBSERVATION
|
|
|
|
if metrics.acceptance_rate < 0.5:
|
|
return LearningProgress.LEARNING
|
|
|
|
if metrics.acceptance_rate >= self.MIN_ACCEPTANCE_RATE_FOR_AUTO and \
|
|
metrics.correction_rate <= self.MAX_CORRECTION_RATE_FOR_AUTO and \
|
|
metrics.total_sessions >= self.MIN_SESSIONS_FOR_AUTO:
|
|
return LearningProgress.READY_FOR_AUTO
|
|
|
|
return LearningProgress.COACHING
|
|
|
|
def _calculate_confidence_score(self, metrics: WorkflowLearningMetrics) -> float:
|
|
"""Calculate overall confidence score (0-1)."""
|
|
if metrics.total_decisions == 0:
|
|
return 0.0
|
|
|
|
# Weighted factors
|
|
acceptance_weight = 0.4
|
|
correction_weight = 0.3
|
|
completion_weight = 0.2
|
|
volume_weight = 0.1
|
|
|
|
# Acceptance component (higher is better)
|
|
acceptance_score = metrics.acceptance_rate
|
|
|
|
# Correction component (lower is better)
|
|
correction_score = max(0, 1 - metrics.correction_rate * 2)
|
|
|
|
# Completion component
|
|
completion_score = metrics.completion_rate
|
|
|
|
# Volume component (normalized, caps at 10 sessions)
|
|
volume_score = min(1, metrics.total_sessions / 10)
|
|
|
|
confidence = (
|
|
acceptance_weight * acceptance_score +
|
|
correction_weight * correction_score +
|
|
completion_weight * completion_score +
|
|
volume_weight * volume_score
|
|
)
|
|
|
|
return round(confidence, 3)
|
|
|
|
def _check_ready_for_auto(self, metrics: WorkflowLearningMetrics) -> bool:
|
|
"""Check if workflow is ready for autonomous mode."""
|
|
return (
|
|
metrics.total_sessions >= self.MIN_SESSIONS_FOR_AUTO and
|
|
metrics.acceptance_rate >= self.MIN_ACCEPTANCE_RATE_FOR_AUTO and
|
|
metrics.correction_rate <= self.MAX_CORRECTION_RATE_FOR_AUTO and
|
|
metrics.confidence_score >= self.MIN_CONFIDENCE_FOR_AUTO
|
|
)
|
|
|
|
def _generate_recommendations(self, metrics: WorkflowLearningMetrics) -> List[str]:
|
|
"""Generate actionable recommendations."""
|
|
recommendations = []
|
|
|
|
if metrics.total_sessions == 0:
|
|
recommendations.append("Demarrez votre premiere session COACHING pour commencer l'apprentissage")
|
|
return recommendations
|
|
|
|
if metrics.total_sessions < self.MIN_SESSIONS_FOR_AUTO:
|
|
remaining = self.MIN_SESSIONS_FOR_AUTO - metrics.total_sessions
|
|
recommendations.append(f"Completez {remaining} session(s) supplementaire(s) pour atteindre le minimum requis")
|
|
|
|
if metrics.acceptance_rate < self.MIN_ACCEPTANCE_RATE_FOR_AUTO:
|
|
current_pct = round(metrics.acceptance_rate * 100, 1)
|
|
target_pct = round(self.MIN_ACCEPTANCE_RATE_FOR_AUTO * 100, 1)
|
|
recommendations.append(
|
|
f"Ameliorez le taux d'acceptation de {current_pct}% a {target_pct}% "
|
|
"en ajustant les selecteurs d'elements"
|
|
)
|
|
|
|
if metrics.correction_rate > self.MAX_CORRECTION_RATE_FOR_AUTO:
|
|
recommendations.append(
|
|
"Le taux de correction est eleve. Verifiez les elements visuels "
|
|
"qui necessitent souvent des corrections"
|
|
)
|
|
|
|
if metrics.rejected > metrics.total_sessions * 2:
|
|
recommendations.append(
|
|
"Beaucoup d'actions rejetees. Revisez le workflow pour supprimer "
|
|
"les etapes incorrectes"
|
|
)
|
|
|
|
if metrics.manual_executions > metrics.total_decisions * 0.1:
|
|
recommendations.append(
|
|
"Plusieurs executions manuelles detectees. Considerez automatiser "
|
|
"ces actions frequentes"
|
|
)
|
|
|
|
if metrics.ready_for_auto:
|
|
recommendations.append(
|
|
"Ce workflow est pret pour le mode autonome ! "
|
|
"Vous pouvez le passer en mode AUTO"
|
|
)
|
|
|
|
return recommendations
|
|
|
|
|
|
# Singleton instance
|
|
_metrics_collector: Optional[CoachingMetricsCollector] = None
|
|
|
|
|
|
def get_metrics_collector(persistence: Optional[CoachingSessionPersistence] = None) -> CoachingMetricsCollector:
|
|
"""Get or create the global metrics collector."""
|
|
global _metrics_collector
|
|
if _metrics_collector is None:
|
|
_metrics_collector = CoachingMetricsCollector(persistence)
|
|
return _metrics_collector
|