"""Property-based tests for self-healing workflows.""" import pytest from hypothesis import given, strategies as st, settings from pathlib import Path import tempfile import shutil from core.healing.healing_engine import SelfHealingEngine from core.healing.learning_repository import LearningRepository from core.healing.confidence_scorer import ConfidenceScorer from core.healing.models import RecoveryContext, RecoveryResult, RecoveryPattern from datetime import datetime, timedelta # Strategy for generating recovery contexts @st.composite def recovery_context_strategy(draw): """Generate random recovery contexts.""" return RecoveryContext( original_action=draw(st.sampled_from(['click', 'input', 'type', 'submit'])), target_element=draw(st.text(min_size=1, max_size=50)), failure_reason=draw(st.sampled_from([ 'element_not_found', 'timeout', 'validation_failed', 'element_moved' ])), screenshot_path=draw(st.text(min_size=1, max_size=100)), workflow_id=draw(st.text(min_size=1, max_size=20)), node_id=draw(st.text(min_size=1, max_size=20)), attempt_count=draw(st.integers(min_value=1, max_value=3)), max_attempts=3, confidence_threshold=draw(st.floats(min_value=0.5, max_value=0.9)), metadata=draw(st.dictionaries( st.text(min_size=1, max_size=20), st.one_of(st.text(), st.integers(), st.floats()) )) ) @st.composite def recovery_result_strategy(draw): """Generate random recovery results.""" success = draw(st.booleans()) return RecoveryResult( success=success, strategy_used=draw(st.sampled_from([ 'semantic_variant', 'spatial_fallback', 'timing_adaptation', 'format_transformation' ])), new_element=draw(st.one_of(st.none(), st.text(min_size=1, max_size=50))), confidence_score=draw(st.floats(min_value=0.0, max_value=1.0)), execution_time=draw(st.floats(min_value=0.0, max_value=30.0)), learned_pattern=draw(st.one_of(st.none(), st.dictionaries( st.text(min_size=1, max_size=20), st.text(min_size=1, max_size=50) ))), requires_user_input=not success if success else draw(st.booleans()) ) class TestConfidenceScorer: """Property tests for confidence scorer.""" @given( strategy=st.sampled_from([ 'semantic_variant', 'spatial_fallback', 'timing_adaptation', 'format_transformation' ]), context=recovery_context_strategy(), historical_success=st.floats(min_value=0.0, max_value=1.0) ) @settings(max_examples=50) def test_property_confidence_score_validity(self, strategy, context, historical_success): """ Property 3: Confidence score validity For any recovery action proposed, the confidence score SHALL be a valid float between 0.0 and 1.0. """ scorer = ConfidenceScorer() confidence = scorer.calculate_recovery_confidence(strategy, context, historical_success) # Confidence must be valid float assert isinstance(confidence, float) # Confidence must be in valid range assert 0.0 <= confidence <= 1.0 @given( original=st.text(min_size=1, max_size=50), candidate=st.text(min_size=1, max_size=50) ) @settings(max_examples=50) def test_element_similarity_score_validity(self, original, candidate): """Element similarity scores must be valid.""" scorer = ConfidenceScorer() similarity = scorer.calculate_element_similarity_score(original, candidate) assert isinstance(similarity, float) assert 0.0 <= similarity <= 1.0 class TestLearningRepository: """Property tests for learning repository.""" def setup_method(self): """Setup test repository.""" self.temp_dir = tempfile.mkdtemp() self.repo = LearningRepository(Path(self.temp_dir)) def teardown_method(self): """Cleanup test repository.""" shutil.rmtree(self.temp_dir, ignore_errors=True) @given( context=recovery_context_strategy(), result=recovery_result_strategy() ) @settings(max_examples=30) def test_property_learning_pattern_storage(self, context, result): """ Property 2: Learning pattern storage For any successful recovery action, the system SHALL store the recovery pattern in the learning repository with complete context metadata. """ if result.success: # Clear repo for clean test self.repo.patterns.clear() # Store pattern self.repo.store_pattern(context, result) # Pattern should be stored patterns = self.repo.get_all_patterns() assert len(patterns) > 0 # Pattern should have complete metadata pattern = patterns[0] # Pattern groups similar contexts, so check metadata assert pattern.context_metadata is not None assert 'original_action' in pattern.context_metadata assert pattern.context_metadata['original_action'] == context.original_action @given( context=recovery_context_strategy(), result=recovery_result_strategy() ) @settings(max_examples=30) def test_property_pattern_reuse_consistency(self, context, result): """ Property 5: Pattern reuse consistency For any failure that matches a previously learned pattern, the system SHALL apply the learned recovery strategy before trying new approaches. """ if result.success: # Clear repo for clean test self.repo.patterns.clear() # Store a successful pattern self.repo.store_pattern(context, result) # Create similar context similar_context = RecoveryContext( original_action=context.original_action, target_element="different_element", failure_reason=context.failure_reason, screenshot_path="different_path", workflow_id=context.workflow_id, node_id="different_node", attempt_count=1, metadata=context.metadata.copy() ) # Should find matching pattern matching = self.repo.get_matching_patterns(similar_context) # Pattern should be found since contexts match assert len(matching) > 0 @given( max_age_days=st.integers(min_value=1, max_value=365), min_confidence=st.floats(min_value=0.0, max_value=0.85) # Keep below 0.9 ) @settings(max_examples=20) def test_property_repository_pruning_correctness(self, max_age_days, min_confidence): """ Property 10: Repository pruning correctness For any pruning operation, only patterns that meet the removal criteria (age, confidence, success rate) SHALL be deleted. """ # Create patterns with different characteristics old_pattern = RecoveryPattern( pattern_id="old", original_failure="test", recovery_strategy="test", success_count=1, failure_count=0, confidence_score=0.8, context_metadata={}, created_at=datetime.now() - timedelta(days=max_age_days + 10), last_used=datetime.now() - timedelta(days=max_age_days + 10) ) recent_pattern = RecoveryPattern( pattern_id="recent", original_failure="test", recovery_strategy="test", success_count=5, failure_count=0, confidence_score=0.95, # High confidence to ensure it stays context_metadata={}, created_at=datetime.now(), last_used=datetime.now() ) self.repo.patterns["old"] = old_pattern self.repo.patterns["recent"] = recent_pattern # Prune self.repo.prune_outdated_patterns(max_age_days, min_confidence) # Recent high-confidence pattern should remain assert "recent" in self.repo.patterns # Old pattern should be removed assert "old" not in self.repo.patterns class TestSelfHealingEngine: """Property tests for self-healing engine.""" def setup_method(self): """Setup test engine.""" self.temp_dir = tempfile.mkdtemp() self.engine = SelfHealingEngine(storage_path=Path(self.temp_dir)) def teardown_method(self): """Cleanup test engine.""" shutil.rmtree(self.temp_dir, ignore_errors=True) @given(context=recovery_context_strategy()) @settings(max_examples=20, deadline=5000) def test_property_recovery_time_limits(self, context): """ Property 4: Recovery time limits For any recovery attempt, the total time spent SHALL not exceed 3x the original action timeout. """ # Set a short max recovery time for testing self.engine.max_recovery_time = 5.0 import time start_time = time.time() result = self.engine.attempt_recovery(context) elapsed = time.time() - start_time # Should not exceed max recovery time assert elapsed <= self.engine.max_recovery_time + 1.0 # 1s buffer for overhead @given( context=recovery_context_strategy(), result=recovery_result_strategy() ) @settings(max_examples=20) def test_property_workflow_definition_updates(self, context, result): """ Property 6: Workflow definition updates For any successful recovery that finds an alternative element, the workflow definition SHALL be updated with the new element information. """ if result.success and result.new_element: # Learn from success self.engine.learn_from_success(context, result) # Pattern should be stored patterns = self.engine.learning_repo.get_all_patterns() assert len(patterns) > 0 @given(context=recovery_context_strategy()) @settings(max_examples=20, deadline=2000) # Increase deadline for slow operations def test_property_recovery_logging_completeness(self, context): """ Property 8: Recovery logging completeness For any recovery attempt, detailed log information SHALL be recorded including original failure, strategy used, and outcome. """ result = self.engine.attempt_recovery(context) # Result should have all required fields assert result.strategy_used is not None assert isinstance(result.success, bool) assert isinstance(result.execution_time, float) assert result.execution_time >= 0.0 class TestSafetyThresholds: """Property tests for safety thresholds.""" @given( confidence=st.floats(min_value=0.0, max_value=1.0), threshold=st.floats(min_value=0.5, max_value=0.9), involves_data=st.booleans() ) @settings(max_examples=50) def test_property_safety_threshold_enforcement(self, confidence, threshold, involves_data): """ Property 7: Safety threshold enforcement For any recovery action with confidence below the safety threshold, user confirmation SHALL be requested before proceeding. """ scorer = ConfidenceScorer() is_safe = scorer.is_safe_to_proceed(confidence, threshold, involves_data) # If involves data modification, threshold should be at least 0.8 if involves_data: effective_threshold = max(threshold, 0.8) else: effective_threshold = threshold # Safety check should match threshold if confidence >= effective_threshold: assert is_safe else: assert not is_safe if __name__ == '__main__': pytest.main([__file__, '-v'])