v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution
- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
286
core/healing/recovery_logger.py
Normal file
286
core/healing/recovery_logger.py
Normal file
@@ -0,0 +1,286 @@
|
||||
"""Logging and monitoring for self-healing operations."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
from datetime import datetime
|
||||
from .models import RecoveryContext, RecoveryResult
|
||||
|
||||
|
||||
class RecoveryLogger:
|
||||
"""Logger for self-healing recovery operations."""
|
||||
|
||||
def __init__(self, log_path: Optional[Path] = None):
|
||||
"""
|
||||
Initialize recovery logger.
|
||||
|
||||
Args:
|
||||
log_path: Path for storing recovery logs
|
||||
"""
|
||||
self.log_path = log_path or Path('logs/healing')
|
||||
self.log_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Setup file logger
|
||||
self.logger = logging.getLogger('healing')
|
||||
self.logger.setLevel(logging.INFO)
|
||||
|
||||
# File handler
|
||||
log_file = self.log_path / 'recovery.log'
|
||||
handler = logging.FileHandler(log_file)
|
||||
handler.setFormatter(logging.Formatter(
|
||||
'%(asctime)s - %(levelname)s - %(message)s'
|
||||
))
|
||||
self.logger.addHandler(handler)
|
||||
|
||||
# Metrics storage
|
||||
self.metrics_file = self.log_path / 'metrics.json'
|
||||
self.metrics = self._load_metrics()
|
||||
|
||||
def log_recovery_attempt(
|
||||
self,
|
||||
context: RecoveryContext,
|
||||
result: RecoveryResult
|
||||
):
|
||||
"""
|
||||
Log a recovery attempt with full details.
|
||||
|
||||
Args:
|
||||
context: Recovery context
|
||||
result: Recovery result
|
||||
"""
|
||||
log_entry = {
|
||||
'timestamp': datetime.now().isoformat(),
|
||||
'workflow_id': context.workflow_id,
|
||||
'node_id': context.node_id,
|
||||
'original_action': context.original_action,
|
||||
'target_element': context.target_element,
|
||||
'failure_reason': context.failure_reason,
|
||||
'attempt_count': context.attempt_count,
|
||||
'strategy_used': result.strategy_used,
|
||||
'success': result.success,
|
||||
'confidence_score': result.confidence_score,
|
||||
'execution_time': result.execution_time,
|
||||
'new_element': result.new_element,
|
||||
'requires_user_input': result.requires_user_input,
|
||||
'error_message': result.error_message
|
||||
}
|
||||
|
||||
# Log to file
|
||||
if result.success:
|
||||
self.logger.info(f"Recovery SUCCESS: {json.dumps(log_entry)}")
|
||||
else:
|
||||
self.logger.warning(f"Recovery FAILED: {json.dumps(log_entry)}")
|
||||
|
||||
# Update metrics
|
||||
self._update_metrics(context, result)
|
||||
|
||||
def log_user_intervention(
|
||||
self,
|
||||
context: RecoveryContext,
|
||||
user_action: str,
|
||||
details: Dict
|
||||
):
|
||||
"""
|
||||
Log user intervention in recovery process.
|
||||
|
||||
Args:
|
||||
context: Recovery context
|
||||
user_action: Action taken by user
|
||||
details: Additional details
|
||||
"""
|
||||
log_entry = {
|
||||
'timestamp': datetime.now().isoformat(),
|
||||
'workflow_id': context.workflow_id,
|
||||
'node_id': context.node_id,
|
||||
'user_action': user_action,
|
||||
'details': details
|
||||
}
|
||||
|
||||
self.logger.info(f"User intervention: {json.dumps(log_entry)}")
|
||||
|
||||
def get_recovery_statistics(
|
||||
self,
|
||||
workflow_id: Optional[str] = None
|
||||
) -> Dict:
|
||||
"""
|
||||
Get recovery statistics.
|
||||
|
||||
Args:
|
||||
workflow_id: Optional workflow ID to filter by
|
||||
|
||||
Returns:
|
||||
Dictionary with statistics
|
||||
"""
|
||||
metrics = self.metrics.copy()
|
||||
|
||||
if workflow_id and workflow_id in metrics.get('by_workflow', {}):
|
||||
return metrics['by_workflow'][workflow_id]
|
||||
|
||||
return metrics
|
||||
|
||||
def generate_insights(self) -> List[str]:
|
||||
"""
|
||||
Generate insights and recommendations from recovery patterns.
|
||||
|
||||
Returns:
|
||||
List of insight strings
|
||||
"""
|
||||
insights = []
|
||||
metrics = self.metrics
|
||||
|
||||
# Overall success rate
|
||||
total = metrics.get('total_attempts', 0)
|
||||
successes = metrics.get('successful_recoveries', 0)
|
||||
if total > 0:
|
||||
success_rate = (successes / total) * 100
|
||||
insights.append(f"Overall recovery success rate: {success_rate:.1f}%")
|
||||
|
||||
# Strategy performance
|
||||
strategy_perf = metrics.get('strategy_performance', {})
|
||||
if strategy_perf:
|
||||
best_strategy = max(
|
||||
strategy_perf.items(),
|
||||
key=lambda x: x[1].get('success_rate', 0)
|
||||
)
|
||||
insights.append(
|
||||
f"Best performing strategy: {best_strategy[0]} "
|
||||
f"({best_strategy[1].get('success_rate', 0):.1f}% success)"
|
||||
)
|
||||
|
||||
# Time savings
|
||||
time_saved = metrics.get('time_saved_hours', 0)
|
||||
if time_saved > 0:
|
||||
insights.append(f"Estimated time saved: {time_saved:.1f} hours")
|
||||
|
||||
# Repeated failures
|
||||
repeated_failures = self._detect_repeated_failures()
|
||||
if repeated_failures:
|
||||
insights.append(
|
||||
f"Warning: {len(repeated_failures)} workflows have repeated failures"
|
||||
)
|
||||
|
||||
return insights
|
||||
|
||||
def check_for_alerts(self) -> List[Dict]:
|
||||
"""
|
||||
Check for conditions that require administrator attention.
|
||||
|
||||
Returns:
|
||||
List of alert dictionaries
|
||||
"""
|
||||
alerts = []
|
||||
|
||||
# Check for repeated failures
|
||||
repeated_failures = self._detect_repeated_failures()
|
||||
for workflow_id, count in repeated_failures.items():
|
||||
if count >= 5:
|
||||
alerts.append({
|
||||
'severity': 'high',
|
||||
'type': 'repeated_failures',
|
||||
'workflow_id': workflow_id,
|
||||
'count': count,
|
||||
'message': f'Workflow {workflow_id} has {count} repeated failures'
|
||||
})
|
||||
|
||||
# Check for low success rates
|
||||
strategy_perf = self.metrics.get('strategy_performance', {})
|
||||
for strategy, perf in strategy_perf.items():
|
||||
success_rate = perf.get('success_rate', 0)
|
||||
attempts = perf.get('attempts', 0)
|
||||
if attempts >= 10 and success_rate < 50:
|
||||
alerts.append({
|
||||
'severity': 'medium',
|
||||
'type': 'low_success_rate',
|
||||
'strategy': strategy,
|
||||
'success_rate': success_rate,
|
||||
'message': f'Strategy {strategy} has low success rate: {success_rate:.1f}%'
|
||||
})
|
||||
|
||||
return alerts
|
||||
|
||||
def _update_metrics(self, context: RecoveryContext, result: RecoveryResult):
|
||||
"""Update metrics with recovery result."""
|
||||
# Total attempts
|
||||
self.metrics['total_attempts'] = self.metrics.get('total_attempts', 0) + 1
|
||||
|
||||
# Successful recoveries
|
||||
if result.success:
|
||||
self.metrics['successful_recoveries'] = \
|
||||
self.metrics.get('successful_recoveries', 0) + 1
|
||||
|
||||
# Estimate time saved (assume 5 minutes per manual intervention)
|
||||
time_saved_hours = self.metrics.get('time_saved_hours', 0.0)
|
||||
self.metrics['time_saved_hours'] = time_saved_hours + (5.0 / 60.0)
|
||||
|
||||
# Strategy performance
|
||||
if 'strategy_performance' not in self.metrics:
|
||||
self.metrics['strategy_performance'] = {}
|
||||
|
||||
strategy = result.strategy_used
|
||||
if strategy not in self.metrics['strategy_performance']:
|
||||
self.metrics['strategy_performance'][strategy] = {
|
||||
'attempts': 0,
|
||||
'successes': 0,
|
||||
'success_rate': 0.0
|
||||
}
|
||||
|
||||
perf = self.metrics['strategy_performance'][strategy]
|
||||
perf['attempts'] += 1
|
||||
if result.success:
|
||||
perf['successes'] += 1
|
||||
perf['success_rate'] = (perf['successes'] / perf['attempts']) * 100
|
||||
|
||||
# By workflow
|
||||
if 'by_workflow' not in self.metrics:
|
||||
self.metrics['by_workflow'] = {}
|
||||
|
||||
workflow_id = context.workflow_id
|
||||
if workflow_id not in self.metrics['by_workflow']:
|
||||
self.metrics['by_workflow'][workflow_id] = {
|
||||
'attempts': 0,
|
||||
'successes': 0,
|
||||
'failures': 0
|
||||
}
|
||||
|
||||
wf_metrics = self.metrics['by_workflow'][workflow_id]
|
||||
wf_metrics['attempts'] += 1
|
||||
if result.success:
|
||||
wf_metrics['successes'] += 1
|
||||
else:
|
||||
wf_metrics['failures'] += 1
|
||||
|
||||
# Save metrics
|
||||
self._save_metrics()
|
||||
|
||||
def _detect_repeated_failures(self) -> Dict[str, int]:
|
||||
"""Detect workflows with repeated failures."""
|
||||
repeated = {}
|
||||
by_workflow = self.metrics.get('by_workflow', {})
|
||||
|
||||
for workflow_id, metrics in by_workflow.items():
|
||||
failures = metrics.get('failures', 0)
|
||||
if failures >= 3:
|
||||
repeated[workflow_id] = failures
|
||||
|
||||
return repeated
|
||||
|
||||
def _load_metrics(self) -> Dict:
|
||||
"""Load metrics from storage."""
|
||||
if not self.metrics_file.exists():
|
||||
return {}
|
||||
|
||||
try:
|
||||
with open(self.metrics_file, 'r') as f:
|
||||
return json.load(f)
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error loading metrics: {e}")
|
||||
return {}
|
||||
|
||||
def _save_metrics(self):
|
||||
"""Save metrics to storage."""
|
||||
try:
|
||||
with open(self.metrics_file, 'w') as f:
|
||||
json.dump(self.metrics, f, indent=2)
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error saving metrics: {e}")
|
||||
Reference in New Issue
Block a user