feat(corrections): Add Correction Packs system for cross-workflow learning
Implement a complete system for capitalizing user corrections across multiple workflows and sessions. This enables automatic application of learned fixes when similar failures occur in different contexts. New components: - core/corrections/models.py: CorrectionKey, Correction, CorrectionPack models - core/corrections/correction_repository.py: JSON storage with atomic writes - core/corrections/aggregator.py: Aggregation by hash and quality filtering - core/corrections/correction_pack_service.py: CRUD, export/import, versioning - backend/api/correction_packs.py: REST API with 15 endpoints Features: - MD5-based key hashing for correction deduplication - Export/import in JSON and YAML formats - Version history with rollback support - Cross-workflow pattern detection - Integration with SelfHealingEngine for automatic application - 29 unit tests (all passing) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
443
core/healing/healing_engine.py
Normal file
443
core/healing/healing_engine.py
Normal file
@@ -0,0 +1,443 @@
|
||||
"""Main self-healing engine for workflow recovery."""
|
||||
|
||||
import time
|
||||
import logging
|
||||
from typing import List, Optional, Dict, Any, Tuple
|
||||
from pathlib import Path
|
||||
from .models import RecoveryContext, RecoveryResult, RecoverySuggestion
|
||||
from .learning_repository import LearningRepository
|
||||
from .confidence_scorer import ConfidenceScorer
|
||||
from .strategies import (
|
||||
RecoveryStrategy,
|
||||
SemanticVariantStrategy,
|
||||
SpatialFallbackStrategy,
|
||||
TimingAdaptationStrategy,
|
||||
FormatTransformationStrategy
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SelfHealingEngine:
|
||||
"""Main engine for self-healing workflows."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
learning_repo: Optional[LearningRepository] = None,
|
||||
storage_path: Optional[Path] = None,
|
||||
correction_packs_enabled: bool = True
|
||||
):
|
||||
"""
|
||||
Initialize the self-healing engine.
|
||||
|
||||
Args:
|
||||
learning_repo: Learning repository instance
|
||||
storage_path: Path for storing learned patterns
|
||||
correction_packs_enabled: Whether to consult correction packs
|
||||
"""
|
||||
# Initialize learning repository
|
||||
if learning_repo:
|
||||
self.learning_repo = learning_repo
|
||||
else:
|
||||
storage_path = storage_path or Path('data/healing')
|
||||
self.learning_repo = LearningRepository(storage_path)
|
||||
|
||||
# Initialize confidence scorer
|
||||
self.confidence_scorer = ConfidenceScorer()
|
||||
|
||||
# Initialize recovery strategies
|
||||
self.recovery_strategies: List[RecoveryStrategy] = [
|
||||
SemanticVariantStrategy(),
|
||||
SpatialFallbackStrategy(),
|
||||
TimingAdaptationStrategy(),
|
||||
FormatTransformationStrategy()
|
||||
]
|
||||
|
||||
# Configuration
|
||||
self.max_recovery_time = 30.0 # seconds
|
||||
self.parallel_execution = True
|
||||
|
||||
# Correction packs integration
|
||||
self.correction_packs_enabled = correction_packs_enabled
|
||||
self._correction_pack_service = None
|
||||
|
||||
@property
|
||||
def correction_pack_service(self):
|
||||
"""Lazy-load correction pack service."""
|
||||
if self._correction_pack_service is None and self.correction_packs_enabled:
|
||||
try:
|
||||
from core.corrections import CorrectionPackService
|
||||
self._correction_pack_service = CorrectionPackService()
|
||||
logger.info("CorrectionPackService initialized for healing engine")
|
||||
except ImportError as e:
|
||||
logger.warning(f"CorrectionPackService not available: {e}")
|
||||
self.correction_packs_enabled = False
|
||||
return self._correction_pack_service
|
||||
|
||||
def attempt_recovery(self, context: RecoveryContext) -> RecoveryResult:
|
||||
"""
|
||||
Attempt to recover from a workflow failure.
|
||||
|
||||
Args:
|
||||
context: Recovery context with failure information
|
||||
|
||||
Returns:
|
||||
RecoveryResult with outcome
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
# Check if we've exceeded max attempts
|
||||
if context.attempt_count >= context.max_attempts:
|
||||
return RecoveryResult(
|
||||
success=False,
|
||||
strategy_used='none',
|
||||
error_message=f'Max recovery attempts ({context.max_attempts}) exceeded'
|
||||
)
|
||||
|
||||
# Try correction packs first (if enabled)
|
||||
if self.correction_packs_enabled and self.correction_pack_service:
|
||||
pack_result = self._try_correction_packs(context)
|
||||
if pack_result and pack_result.success:
|
||||
return pack_result
|
||||
|
||||
# Get learned patterns for this context
|
||||
learned_patterns = self.learning_repo.get_matching_patterns(context)
|
||||
|
||||
# Get prioritized strategies
|
||||
strategies = self._prioritize_strategies(context, learned_patterns)
|
||||
|
||||
# Try each strategy
|
||||
for strategy in strategies:
|
||||
# Check time limit
|
||||
elapsed = time.time() - start_time
|
||||
if elapsed >= self.max_recovery_time:
|
||||
break
|
||||
|
||||
# Skip if strategy can't handle this failure
|
||||
if not strategy.can_handle(context):
|
||||
continue
|
||||
|
||||
# Attempt recovery
|
||||
result = strategy.attempt_recovery(context)
|
||||
|
||||
# If successful, learn from it
|
||||
if result.success:
|
||||
# Calculate final confidence
|
||||
historical_success = self._get_historical_success_rate(
|
||||
strategy.name, context, learned_patterns
|
||||
)
|
||||
result.confidence_score = self.confidence_scorer.calculate_recovery_confidence(
|
||||
result.strategy_used,
|
||||
context,
|
||||
historical_success
|
||||
)
|
||||
|
||||
# Check if safe to proceed
|
||||
if self.confidence_scorer.is_safe_to_proceed(
|
||||
result.confidence_score,
|
||||
context.confidence_threshold,
|
||||
involves_data_modification=self._involves_data_modification(context)
|
||||
):
|
||||
# Learn from success
|
||||
self.learn_from_success(context, result)
|
||||
return result
|
||||
else:
|
||||
# Confidence too low, mark as requiring user input
|
||||
result.requires_user_input = True
|
||||
return result
|
||||
|
||||
# All strategies failed
|
||||
total_time = time.time() - start_time
|
||||
return RecoveryResult(
|
||||
success=False,
|
||||
strategy_used='all_failed',
|
||||
execution_time=total_time,
|
||||
error_message='All recovery strategies failed',
|
||||
requires_user_input=True
|
||||
)
|
||||
|
||||
def learn_from_success(self, context: RecoveryContext, result: RecoveryResult):
|
||||
"""
|
||||
Learn from successful recovery for future use.
|
||||
|
||||
Args:
|
||||
context: Recovery context
|
||||
result: Successful recovery result
|
||||
"""
|
||||
self.learning_repo.store_pattern(context, result)
|
||||
|
||||
def get_recovery_suggestions(self, context: RecoveryContext) -> List[RecoverySuggestion]:
|
||||
"""
|
||||
Get ranked recovery suggestions based on learned patterns.
|
||||
|
||||
Args:
|
||||
context: Recovery context
|
||||
|
||||
Returns:
|
||||
List of recovery suggestions sorted by confidence
|
||||
"""
|
||||
suggestions = []
|
||||
|
||||
# Get learned patterns
|
||||
learned_patterns = self.learning_repo.get_matching_patterns(context)
|
||||
|
||||
# Get suggestions from each strategy
|
||||
for strategy in self.recovery_strategies:
|
||||
if not strategy.can_handle(context):
|
||||
continue
|
||||
|
||||
# Calculate confidence based on historical success
|
||||
historical_success = self._get_historical_success_rate(
|
||||
strategy.name, context, learned_patterns
|
||||
)
|
||||
confidence = self.confidence_scorer.calculate_recovery_confidence(
|
||||
strategy.name,
|
||||
context,
|
||||
historical_success
|
||||
)
|
||||
|
||||
suggestion = RecoverySuggestion(
|
||||
strategy=strategy.name,
|
||||
confidence=confidence,
|
||||
description=self._get_strategy_description(strategy),
|
||||
estimated_time=self._estimate_strategy_time(strategy, context)
|
||||
)
|
||||
suggestions.append(suggestion)
|
||||
|
||||
# Sort by confidence
|
||||
suggestions.sort(key=lambda s: s.confidence, reverse=True)
|
||||
|
||||
return suggestions
|
||||
|
||||
def prune_learned_patterns(
|
||||
self,
|
||||
max_age_days: int = 90,
|
||||
min_confidence: float = 0.3
|
||||
):
|
||||
"""
|
||||
Prune outdated learned patterns.
|
||||
|
||||
Args:
|
||||
max_age_days: Maximum age for patterns
|
||||
min_confidence: Minimum confidence threshold
|
||||
"""
|
||||
self.learning_repo.prune_outdated_patterns(max_age_days, min_confidence)
|
||||
|
||||
def _prioritize_strategies(
|
||||
self,
|
||||
context: RecoveryContext,
|
||||
learned_patterns: List
|
||||
) -> List[RecoveryStrategy]:
|
||||
"""Prioritize strategies based on context and learned patterns."""
|
||||
# Score each strategy
|
||||
scored_strategies = []
|
||||
for strategy in self.recovery_strategies:
|
||||
# Base priority from strategy
|
||||
priority = strategy.get_priority(context)
|
||||
|
||||
# Boost priority if we have successful patterns
|
||||
historical_success = self._get_historical_success_rate(
|
||||
strategy.name, context, learned_patterns
|
||||
)
|
||||
if historical_success > 0:
|
||||
priority *= (1.0 + historical_success)
|
||||
|
||||
scored_strategies.append((priority, strategy))
|
||||
|
||||
# Sort by priority (highest first)
|
||||
scored_strategies.sort(key=lambda x: x[0], reverse=True)
|
||||
|
||||
return [strategy for _, strategy in scored_strategies]
|
||||
|
||||
def _get_historical_success_rate(
|
||||
self,
|
||||
strategy_name: str,
|
||||
context: RecoveryContext,
|
||||
learned_patterns: List
|
||||
) -> float:
|
||||
"""Get historical success rate for a strategy."""
|
||||
matching_patterns = [
|
||||
p for p in learned_patterns
|
||||
if p.recovery_strategy == strategy_name
|
||||
]
|
||||
|
||||
if not matching_patterns:
|
||||
return 0.0
|
||||
|
||||
# Return average success rate
|
||||
return sum(p.success_rate for p in matching_patterns) / len(matching_patterns)
|
||||
|
||||
def _involves_data_modification(self, context: RecoveryContext) -> bool:
|
||||
"""Check if the action involves data modification."""
|
||||
data_modification_actions = [
|
||||
'input', 'type', 'submit', 'delete', 'update', 'save'
|
||||
]
|
||||
return any(action in context.original_action.lower()
|
||||
for action in data_modification_actions)
|
||||
|
||||
def _get_strategy_description(self, strategy: RecoveryStrategy) -> str:
|
||||
"""Get human-readable description of strategy."""
|
||||
descriptions = {
|
||||
'SemanticVariantStrategy': 'Try semantic variants of the element (e.g., Submit → Send)',
|
||||
'SpatialFallbackStrategy': 'Search in expanded area around original position',
|
||||
'TimingAdaptationStrategy': 'Increase wait time for element to appear',
|
||||
'FormatTransformationStrategy': 'Transform input format to match validation'
|
||||
}
|
||||
return descriptions.get(strategy.name, strategy.name)
|
||||
|
||||
def _estimate_strategy_time(
|
||||
self,
|
||||
strategy: RecoveryStrategy,
|
||||
context: RecoveryContext
|
||||
) -> float:
|
||||
"""Estimate execution time for strategy."""
|
||||
# Simple estimates in seconds
|
||||
estimates = {
|
||||
'SemanticVariantStrategy': 2.0,
|
||||
'SpatialFallbackStrategy': 5.0,
|
||||
'TimingAdaptationStrategy': 10.0,
|
||||
'FormatTransformationStrategy': 1.0
|
||||
}
|
||||
return estimates.get(strategy.name, 3.0)
|
||||
|
||||
def _try_correction_packs(self, context: RecoveryContext) -> Optional[RecoveryResult]:
|
||||
"""
|
||||
Try to find and apply a correction from correction packs.
|
||||
|
||||
Args:
|
||||
context: Recovery context
|
||||
|
||||
Returns:
|
||||
RecoveryResult if a correction was successfully applied, None otherwise
|
||||
"""
|
||||
try:
|
||||
# Get element type from metadata
|
||||
element_type = context.metadata.get('element_type', 'unknown')
|
||||
|
||||
# Search for applicable corrections
|
||||
corrections = self.correction_pack_service.find_applicable_corrections(
|
||||
action_type=context.original_action,
|
||||
element_type=element_type,
|
||||
failure_context=context.failure_reason,
|
||||
min_confidence=0.5 # Only use high-confidence corrections
|
||||
)
|
||||
|
||||
if not corrections:
|
||||
return None
|
||||
|
||||
# Try corrections in order of confidence
|
||||
for correction_info in corrections:
|
||||
pack_id = correction_info['pack_id']
|
||||
correction = correction_info['correction']
|
||||
|
||||
try:
|
||||
# Apply the correction
|
||||
applied = self._apply_correction(context, correction)
|
||||
|
||||
if applied:
|
||||
# Record successful application
|
||||
self.correction_pack_service.apply_correction(
|
||||
pack_id=pack_id,
|
||||
correction_id=correction['id'],
|
||||
success=True
|
||||
)
|
||||
|
||||
# Propagate to learning repository
|
||||
result = RecoveryResult(
|
||||
success=True,
|
||||
strategy_used=f"correction_pack:{correction['correction_type']}",
|
||||
confidence_score=correction['confidence_score'],
|
||||
modified_action=correction.get('corrected_params'),
|
||||
modified_target=correction.get('corrected_target')
|
||||
)
|
||||
|
||||
# Learn from this for future reference
|
||||
self.learning_repo.store_pattern(context, result)
|
||||
|
||||
logger.info(
|
||||
f"Applied correction from pack {pack_id}: "
|
||||
f"{correction['id']} (confidence: {correction['confidence_score']:.2f})"
|
||||
)
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to apply correction {correction['id']}: {e}")
|
||||
# Record failed application
|
||||
self.correction_pack_service.apply_correction(
|
||||
pack_id=pack_id,
|
||||
correction_id=correction['id'],
|
||||
success=False
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in correction pack lookup: {e}")
|
||||
return None
|
||||
|
||||
def _apply_correction(
|
||||
self,
|
||||
context: RecoveryContext,
|
||||
correction: Dict[str, Any]
|
||||
) -> bool:
|
||||
"""
|
||||
Apply a correction to the current context.
|
||||
|
||||
Args:
|
||||
context: Recovery context
|
||||
correction: Correction data dict
|
||||
|
||||
Returns:
|
||||
True if correction was applied successfully
|
||||
"""
|
||||
correction_type = correction.get('correction_type', 'other')
|
||||
|
||||
# Handle different correction types
|
||||
if correction_type == 'target_change':
|
||||
# Update target in context
|
||||
if correction.get('corrected_target'):
|
||||
context.metadata['corrected_target'] = correction['corrected_target']
|
||||
return True
|
||||
|
||||
elif correction_type == 'parameter_change':
|
||||
# Update parameters in context
|
||||
if correction.get('corrected_params'):
|
||||
context.metadata['corrected_params'] = correction['corrected_params']
|
||||
return True
|
||||
|
||||
elif correction_type == 'wait_added':
|
||||
# Add wait time
|
||||
wait_time = correction.get('corrected_params', {}).get('wait_time', 2.0)
|
||||
context.metadata['additional_wait'] = wait_time
|
||||
return True
|
||||
|
||||
elif correction_type == 'timing_adjust':
|
||||
# Adjust timing
|
||||
timing = correction.get('corrected_params', {}).get('timing_multiplier', 1.5)
|
||||
context.metadata['timing_multiplier'] = timing
|
||||
return True
|
||||
|
||||
elif correction_type == 'coordinates_adjust':
|
||||
# Adjust coordinates
|
||||
offset = correction.get('corrected_params', {}).get('offset', {})
|
||||
context.metadata['coordinate_offset'] = offset
|
||||
return True
|
||||
|
||||
# For other types, just mark as applied if we have any correction data
|
||||
return bool(correction.get('corrected_target') or correction.get('corrected_params'))
|
||||
|
||||
def get_correction_pack_statistics(self) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get statistics from correction packs.
|
||||
|
||||
Returns:
|
||||
Statistics dict or None if not available
|
||||
"""
|
||||
if not self.correction_packs_enabled or not self.correction_pack_service:
|
||||
return None
|
||||
|
||||
try:
|
||||
return self.correction_pack_service.get_global_statistics()
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting correction pack statistics: {e}")
|
||||
return None
|
||||
Reference in New Issue
Block a user