- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
287 lines
9.5 KiB
Python
287 lines
9.5 KiB
Python
"""Logging and monitoring for self-healing operations."""
|
|
|
|
import json
|
|
import logging
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional
|
|
from datetime import datetime
|
|
from .models import RecoveryContext, RecoveryResult
|
|
|
|
|
|
class RecoveryLogger:
|
|
"""Logger for self-healing recovery operations."""
|
|
|
|
def __init__(self, log_path: Optional[Path] = None):
|
|
"""
|
|
Initialize recovery logger.
|
|
|
|
Args:
|
|
log_path: Path for storing recovery logs
|
|
"""
|
|
self.log_path = log_path or Path('logs/healing')
|
|
self.log_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Setup file logger
|
|
self.logger = logging.getLogger('healing')
|
|
self.logger.setLevel(logging.INFO)
|
|
|
|
# File handler
|
|
log_file = self.log_path / 'recovery.log'
|
|
handler = logging.FileHandler(log_file)
|
|
handler.setFormatter(logging.Formatter(
|
|
'%(asctime)s - %(levelname)s - %(message)s'
|
|
))
|
|
self.logger.addHandler(handler)
|
|
|
|
# Metrics storage
|
|
self.metrics_file = self.log_path / 'metrics.json'
|
|
self.metrics = self._load_metrics()
|
|
|
|
def log_recovery_attempt(
|
|
self,
|
|
context: RecoveryContext,
|
|
result: RecoveryResult
|
|
):
|
|
"""
|
|
Log a recovery attempt with full details.
|
|
|
|
Args:
|
|
context: Recovery context
|
|
result: Recovery result
|
|
"""
|
|
log_entry = {
|
|
'timestamp': datetime.now().isoformat(),
|
|
'workflow_id': context.workflow_id,
|
|
'node_id': context.node_id,
|
|
'original_action': context.original_action,
|
|
'target_element': context.target_element,
|
|
'failure_reason': context.failure_reason,
|
|
'attempt_count': context.attempt_count,
|
|
'strategy_used': result.strategy_used,
|
|
'success': result.success,
|
|
'confidence_score': result.confidence_score,
|
|
'execution_time': result.execution_time,
|
|
'new_element': result.new_element,
|
|
'requires_user_input': result.requires_user_input,
|
|
'error_message': result.error_message
|
|
}
|
|
|
|
# Log to file
|
|
if result.success:
|
|
self.logger.info(f"Recovery SUCCESS: {json.dumps(log_entry)}")
|
|
else:
|
|
self.logger.warning(f"Recovery FAILED: {json.dumps(log_entry)}")
|
|
|
|
# Update metrics
|
|
self._update_metrics(context, result)
|
|
|
|
def log_user_intervention(
|
|
self,
|
|
context: RecoveryContext,
|
|
user_action: str,
|
|
details: Dict
|
|
):
|
|
"""
|
|
Log user intervention in recovery process.
|
|
|
|
Args:
|
|
context: Recovery context
|
|
user_action: Action taken by user
|
|
details: Additional details
|
|
"""
|
|
log_entry = {
|
|
'timestamp': datetime.now().isoformat(),
|
|
'workflow_id': context.workflow_id,
|
|
'node_id': context.node_id,
|
|
'user_action': user_action,
|
|
'details': details
|
|
}
|
|
|
|
self.logger.info(f"User intervention: {json.dumps(log_entry)}")
|
|
|
|
def get_recovery_statistics(
|
|
self,
|
|
workflow_id: Optional[str] = None
|
|
) -> Dict:
|
|
"""
|
|
Get recovery statistics.
|
|
|
|
Args:
|
|
workflow_id: Optional workflow ID to filter by
|
|
|
|
Returns:
|
|
Dictionary with statistics
|
|
"""
|
|
metrics = self.metrics.copy()
|
|
|
|
if workflow_id and workflow_id in metrics.get('by_workflow', {}):
|
|
return metrics['by_workflow'][workflow_id]
|
|
|
|
return metrics
|
|
|
|
def generate_insights(self) -> List[str]:
|
|
"""
|
|
Generate insights and recommendations from recovery patterns.
|
|
|
|
Returns:
|
|
List of insight strings
|
|
"""
|
|
insights = []
|
|
metrics = self.metrics
|
|
|
|
# Overall success rate
|
|
total = metrics.get('total_attempts', 0)
|
|
successes = metrics.get('successful_recoveries', 0)
|
|
if total > 0:
|
|
success_rate = (successes / total) * 100
|
|
insights.append(f"Overall recovery success rate: {success_rate:.1f}%")
|
|
|
|
# Strategy performance
|
|
strategy_perf = metrics.get('strategy_performance', {})
|
|
if strategy_perf:
|
|
best_strategy = max(
|
|
strategy_perf.items(),
|
|
key=lambda x: x[1].get('success_rate', 0)
|
|
)
|
|
insights.append(
|
|
f"Best performing strategy: {best_strategy[0]} "
|
|
f"({best_strategy[1].get('success_rate', 0):.1f}% success)"
|
|
)
|
|
|
|
# Time savings
|
|
time_saved = metrics.get('time_saved_hours', 0)
|
|
if time_saved > 0:
|
|
insights.append(f"Estimated time saved: {time_saved:.1f} hours")
|
|
|
|
# Repeated failures
|
|
repeated_failures = self._detect_repeated_failures()
|
|
if repeated_failures:
|
|
insights.append(
|
|
f"Warning: {len(repeated_failures)} workflows have repeated failures"
|
|
)
|
|
|
|
return insights
|
|
|
|
def check_for_alerts(self) -> List[Dict]:
|
|
"""
|
|
Check for conditions that require administrator attention.
|
|
|
|
Returns:
|
|
List of alert dictionaries
|
|
"""
|
|
alerts = []
|
|
|
|
# Check for repeated failures
|
|
repeated_failures = self._detect_repeated_failures()
|
|
for workflow_id, count in repeated_failures.items():
|
|
if count >= 5:
|
|
alerts.append({
|
|
'severity': 'high',
|
|
'type': 'repeated_failures',
|
|
'workflow_id': workflow_id,
|
|
'count': count,
|
|
'message': f'Workflow {workflow_id} has {count} repeated failures'
|
|
})
|
|
|
|
# Check for low success rates
|
|
strategy_perf = self.metrics.get('strategy_performance', {})
|
|
for strategy, perf in strategy_perf.items():
|
|
success_rate = perf.get('success_rate', 0)
|
|
attempts = perf.get('attempts', 0)
|
|
if attempts >= 10 and success_rate < 50:
|
|
alerts.append({
|
|
'severity': 'medium',
|
|
'type': 'low_success_rate',
|
|
'strategy': strategy,
|
|
'success_rate': success_rate,
|
|
'message': f'Strategy {strategy} has low success rate: {success_rate:.1f}%'
|
|
})
|
|
|
|
return alerts
|
|
|
|
def _update_metrics(self, context: RecoveryContext, result: RecoveryResult):
|
|
"""Update metrics with recovery result."""
|
|
# Total attempts
|
|
self.metrics['total_attempts'] = self.metrics.get('total_attempts', 0) + 1
|
|
|
|
# Successful recoveries
|
|
if result.success:
|
|
self.metrics['successful_recoveries'] = \
|
|
self.metrics.get('successful_recoveries', 0) + 1
|
|
|
|
# Estimate time saved (assume 5 minutes per manual intervention)
|
|
time_saved_hours = self.metrics.get('time_saved_hours', 0.0)
|
|
self.metrics['time_saved_hours'] = time_saved_hours + (5.0 / 60.0)
|
|
|
|
# Strategy performance
|
|
if 'strategy_performance' not in self.metrics:
|
|
self.metrics['strategy_performance'] = {}
|
|
|
|
strategy = result.strategy_used
|
|
if strategy not in self.metrics['strategy_performance']:
|
|
self.metrics['strategy_performance'][strategy] = {
|
|
'attempts': 0,
|
|
'successes': 0,
|
|
'success_rate': 0.0
|
|
}
|
|
|
|
perf = self.metrics['strategy_performance'][strategy]
|
|
perf['attempts'] += 1
|
|
if result.success:
|
|
perf['successes'] += 1
|
|
perf['success_rate'] = (perf['successes'] / perf['attempts']) * 100
|
|
|
|
# By workflow
|
|
if 'by_workflow' not in self.metrics:
|
|
self.metrics['by_workflow'] = {}
|
|
|
|
workflow_id = context.workflow_id
|
|
if workflow_id not in self.metrics['by_workflow']:
|
|
self.metrics['by_workflow'][workflow_id] = {
|
|
'attempts': 0,
|
|
'successes': 0,
|
|
'failures': 0
|
|
}
|
|
|
|
wf_metrics = self.metrics['by_workflow'][workflow_id]
|
|
wf_metrics['attempts'] += 1
|
|
if result.success:
|
|
wf_metrics['successes'] += 1
|
|
else:
|
|
wf_metrics['failures'] += 1
|
|
|
|
# Save metrics
|
|
self._save_metrics()
|
|
|
|
def _detect_repeated_failures(self) -> Dict[str, int]:
|
|
"""Detect workflows with repeated failures."""
|
|
repeated = {}
|
|
by_workflow = self.metrics.get('by_workflow', {})
|
|
|
|
for workflow_id, metrics in by_workflow.items():
|
|
failures = metrics.get('failures', 0)
|
|
if failures >= 3:
|
|
repeated[workflow_id] = failures
|
|
|
|
return repeated
|
|
|
|
def _load_metrics(self) -> Dict:
|
|
"""Load metrics from storage."""
|
|
if not self.metrics_file.exists():
|
|
return {}
|
|
|
|
try:
|
|
with open(self.metrics_file, 'r') as f:
|
|
return json.load(f)
|
|
except Exception as e:
|
|
self.logger.error(f"Error loading metrics: {e}")
|
|
return {}
|
|
|
|
def _save_metrics(self):
|
|
"""Save metrics to storage."""
|
|
try:
|
|
with open(self.metrics_file, 'w') as f:
|
|
json.dump(self.metrics, f, indent=2)
|
|
except Exception as e:
|
|
self.logger.error(f"Error saving metrics: {e}")
|