v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution
- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
321
tests/property/test_self_healing_properties.py
Normal file
321
tests/property/test_self_healing_properties.py
Normal file
@@ -0,0 +1,321 @@
|
||||
"""Property-based tests for self-healing workflows."""
|
||||
|
||||
import pytest
|
||||
from hypothesis import given, strategies as st, settings
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
import shutil
|
||||
|
||||
from core.healing.healing_engine import SelfHealingEngine
|
||||
from core.healing.learning_repository import LearningRepository
|
||||
from core.healing.confidence_scorer import ConfidenceScorer
|
||||
from core.healing.models import RecoveryContext, RecoveryResult, RecoveryPattern
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
|
||||
# Strategy for generating recovery contexts
|
||||
@st.composite
|
||||
def recovery_context_strategy(draw):
|
||||
"""Generate random recovery contexts."""
|
||||
return RecoveryContext(
|
||||
original_action=draw(st.sampled_from(['click', 'input', 'type', 'submit'])),
|
||||
target_element=draw(st.text(min_size=1, max_size=50)),
|
||||
failure_reason=draw(st.sampled_from([
|
||||
'element_not_found', 'timeout', 'validation_failed', 'element_moved'
|
||||
])),
|
||||
screenshot_path=draw(st.text(min_size=1, max_size=100)),
|
||||
workflow_id=draw(st.text(min_size=1, max_size=20)),
|
||||
node_id=draw(st.text(min_size=1, max_size=20)),
|
||||
attempt_count=draw(st.integers(min_value=1, max_value=3)),
|
||||
max_attempts=3,
|
||||
confidence_threshold=draw(st.floats(min_value=0.5, max_value=0.9)),
|
||||
metadata=draw(st.dictionaries(
|
||||
st.text(min_size=1, max_size=20),
|
||||
st.one_of(st.text(), st.integers(), st.floats())
|
||||
))
|
||||
)
|
||||
|
||||
|
||||
@st.composite
|
||||
def recovery_result_strategy(draw):
|
||||
"""Generate random recovery results."""
|
||||
success = draw(st.booleans())
|
||||
return RecoveryResult(
|
||||
success=success,
|
||||
strategy_used=draw(st.sampled_from([
|
||||
'semantic_variant', 'spatial_fallback', 'timing_adaptation', 'format_transformation'
|
||||
])),
|
||||
new_element=draw(st.one_of(st.none(), st.text(min_size=1, max_size=50))),
|
||||
confidence_score=draw(st.floats(min_value=0.0, max_value=1.0)),
|
||||
execution_time=draw(st.floats(min_value=0.0, max_value=30.0)),
|
||||
learned_pattern=draw(st.one_of(st.none(), st.dictionaries(
|
||||
st.text(min_size=1, max_size=20),
|
||||
st.text(min_size=1, max_size=50)
|
||||
))),
|
||||
requires_user_input=not success if success else draw(st.booleans())
|
||||
)
|
||||
|
||||
|
||||
class TestConfidenceScorer:
|
||||
"""Property tests for confidence scorer."""
|
||||
|
||||
@given(
|
||||
strategy=st.sampled_from([
|
||||
'semantic_variant', 'spatial_fallback', 'timing_adaptation', 'format_transformation'
|
||||
]),
|
||||
context=recovery_context_strategy(),
|
||||
historical_success=st.floats(min_value=0.0, max_value=1.0)
|
||||
)
|
||||
@settings(max_examples=50)
|
||||
def test_property_confidence_score_validity(self, strategy, context, historical_success):
|
||||
"""
|
||||
Property 3: Confidence score validity
|
||||
For any recovery action proposed, the confidence score SHALL be
|
||||
a valid float between 0.0 and 1.0.
|
||||
"""
|
||||
scorer = ConfidenceScorer()
|
||||
confidence = scorer.calculate_recovery_confidence(strategy, context, historical_success)
|
||||
|
||||
# Confidence must be valid float
|
||||
assert isinstance(confidence, float)
|
||||
# Confidence must be in valid range
|
||||
assert 0.0 <= confidence <= 1.0
|
||||
|
||||
@given(
|
||||
original=st.text(min_size=1, max_size=50),
|
||||
candidate=st.text(min_size=1, max_size=50)
|
||||
)
|
||||
@settings(max_examples=50)
|
||||
def test_element_similarity_score_validity(self, original, candidate):
|
||||
"""Element similarity scores must be valid."""
|
||||
scorer = ConfidenceScorer()
|
||||
similarity = scorer.calculate_element_similarity_score(original, candidate)
|
||||
|
||||
assert isinstance(similarity, float)
|
||||
assert 0.0 <= similarity <= 1.0
|
||||
|
||||
|
||||
class TestLearningRepository:
|
||||
"""Property tests for learning repository."""
|
||||
|
||||
def setup_method(self):
|
||||
"""Setup test repository."""
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
self.repo = LearningRepository(Path(self.temp_dir))
|
||||
|
||||
def teardown_method(self):
|
||||
"""Cleanup test repository."""
|
||||
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
||||
|
||||
@given(
|
||||
context=recovery_context_strategy(),
|
||||
result=recovery_result_strategy()
|
||||
)
|
||||
@settings(max_examples=30)
|
||||
def test_property_learning_pattern_storage(self, context, result):
|
||||
"""
|
||||
Property 2: Learning pattern storage
|
||||
For any successful recovery action, the system SHALL store the recovery
|
||||
pattern in the learning repository with complete context metadata.
|
||||
"""
|
||||
if result.success:
|
||||
# Clear repo for clean test
|
||||
self.repo.patterns.clear()
|
||||
|
||||
# Store pattern
|
||||
self.repo.store_pattern(context, result)
|
||||
|
||||
# Pattern should be stored
|
||||
patterns = self.repo.get_all_patterns()
|
||||
assert len(patterns) > 0
|
||||
|
||||
# Pattern should have complete metadata
|
||||
pattern = patterns[0]
|
||||
# Pattern groups similar contexts, so check metadata
|
||||
assert pattern.context_metadata is not None
|
||||
assert 'original_action' in pattern.context_metadata
|
||||
assert pattern.context_metadata['original_action'] == context.original_action
|
||||
|
||||
@given(
|
||||
context=recovery_context_strategy(),
|
||||
result=recovery_result_strategy()
|
||||
)
|
||||
@settings(max_examples=30)
|
||||
def test_property_pattern_reuse_consistency(self, context, result):
|
||||
"""
|
||||
Property 5: Pattern reuse consistency
|
||||
For any failure that matches a previously learned pattern, the system
|
||||
SHALL apply the learned recovery strategy before trying new approaches.
|
||||
"""
|
||||
if result.success:
|
||||
# Clear repo for clean test
|
||||
self.repo.patterns.clear()
|
||||
|
||||
# Store a successful pattern
|
||||
self.repo.store_pattern(context, result)
|
||||
|
||||
# Create similar context
|
||||
similar_context = RecoveryContext(
|
||||
original_action=context.original_action,
|
||||
target_element="different_element",
|
||||
failure_reason=context.failure_reason,
|
||||
screenshot_path="different_path",
|
||||
workflow_id=context.workflow_id,
|
||||
node_id="different_node",
|
||||
attempt_count=1,
|
||||
metadata=context.metadata.copy()
|
||||
)
|
||||
|
||||
# Should find matching pattern
|
||||
matching = self.repo.get_matching_patterns(similar_context)
|
||||
# Pattern should be found since contexts match
|
||||
assert len(matching) > 0
|
||||
|
||||
@given(
|
||||
max_age_days=st.integers(min_value=1, max_value=365),
|
||||
min_confidence=st.floats(min_value=0.0, max_value=0.85) # Keep below 0.9
|
||||
)
|
||||
@settings(max_examples=20)
|
||||
def test_property_repository_pruning_correctness(self, max_age_days, min_confidence):
|
||||
"""
|
||||
Property 10: Repository pruning correctness
|
||||
For any pruning operation, only patterns that meet the removal criteria
|
||||
(age, confidence, success rate) SHALL be deleted.
|
||||
"""
|
||||
# Create patterns with different characteristics
|
||||
old_pattern = RecoveryPattern(
|
||||
pattern_id="old",
|
||||
original_failure="test",
|
||||
recovery_strategy="test",
|
||||
success_count=1,
|
||||
failure_count=0,
|
||||
confidence_score=0.8,
|
||||
context_metadata={},
|
||||
created_at=datetime.now() - timedelta(days=max_age_days + 10),
|
||||
last_used=datetime.now() - timedelta(days=max_age_days + 10)
|
||||
)
|
||||
|
||||
recent_pattern = RecoveryPattern(
|
||||
pattern_id="recent",
|
||||
original_failure="test",
|
||||
recovery_strategy="test",
|
||||
success_count=5,
|
||||
failure_count=0,
|
||||
confidence_score=0.95, # High confidence to ensure it stays
|
||||
context_metadata={},
|
||||
created_at=datetime.now(),
|
||||
last_used=datetime.now()
|
||||
)
|
||||
|
||||
self.repo.patterns["old"] = old_pattern
|
||||
self.repo.patterns["recent"] = recent_pattern
|
||||
|
||||
# Prune
|
||||
self.repo.prune_outdated_patterns(max_age_days, min_confidence)
|
||||
|
||||
# Recent high-confidence pattern should remain
|
||||
assert "recent" in self.repo.patterns
|
||||
# Old pattern should be removed
|
||||
assert "old" not in self.repo.patterns
|
||||
|
||||
|
||||
class TestSelfHealingEngine:
|
||||
"""Property tests for self-healing engine."""
|
||||
|
||||
def setup_method(self):
|
||||
"""Setup test engine."""
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
self.engine = SelfHealingEngine(storage_path=Path(self.temp_dir))
|
||||
|
||||
def teardown_method(self):
|
||||
"""Cleanup test engine."""
|
||||
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
||||
|
||||
@given(context=recovery_context_strategy())
|
||||
@settings(max_examples=20, deadline=5000)
|
||||
def test_property_recovery_time_limits(self, context):
|
||||
"""
|
||||
Property 4: Recovery time limits
|
||||
For any recovery attempt, the total time spent SHALL not exceed
|
||||
3x the original action timeout.
|
||||
"""
|
||||
# Set a short max recovery time for testing
|
||||
self.engine.max_recovery_time = 5.0
|
||||
|
||||
import time
|
||||
start_time = time.time()
|
||||
result = self.engine.attempt_recovery(context)
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
# Should not exceed max recovery time
|
||||
assert elapsed <= self.engine.max_recovery_time + 1.0 # 1s buffer for overhead
|
||||
|
||||
@given(
|
||||
context=recovery_context_strategy(),
|
||||
result=recovery_result_strategy()
|
||||
)
|
||||
@settings(max_examples=20)
|
||||
def test_property_workflow_definition_updates(self, context, result):
|
||||
"""
|
||||
Property 6: Workflow definition updates
|
||||
For any successful recovery that finds an alternative element,
|
||||
the workflow definition SHALL be updated with the new element information.
|
||||
"""
|
||||
if result.success and result.new_element:
|
||||
# Learn from success
|
||||
self.engine.learn_from_success(context, result)
|
||||
|
||||
# Pattern should be stored
|
||||
patterns = self.engine.learning_repo.get_all_patterns()
|
||||
assert len(patterns) > 0
|
||||
|
||||
@given(context=recovery_context_strategy())
|
||||
@settings(max_examples=20, deadline=2000) # Increase deadline for slow operations
|
||||
def test_property_recovery_logging_completeness(self, context):
|
||||
"""
|
||||
Property 8: Recovery logging completeness
|
||||
For any recovery attempt, detailed log information SHALL be recorded
|
||||
including original failure, strategy used, and outcome.
|
||||
"""
|
||||
result = self.engine.attempt_recovery(context)
|
||||
|
||||
# Result should have all required fields
|
||||
assert result.strategy_used is not None
|
||||
assert isinstance(result.success, bool)
|
||||
assert isinstance(result.execution_time, float)
|
||||
assert result.execution_time >= 0.0
|
||||
|
||||
|
||||
class TestSafetyThresholds:
|
||||
"""Property tests for safety thresholds."""
|
||||
|
||||
@given(
|
||||
confidence=st.floats(min_value=0.0, max_value=1.0),
|
||||
threshold=st.floats(min_value=0.5, max_value=0.9),
|
||||
involves_data=st.booleans()
|
||||
)
|
||||
@settings(max_examples=50)
|
||||
def test_property_safety_threshold_enforcement(self, confidence, threshold, involves_data):
|
||||
"""
|
||||
Property 7: Safety threshold enforcement
|
||||
For any recovery action with confidence below the safety threshold,
|
||||
user confirmation SHALL be requested before proceeding.
|
||||
"""
|
||||
scorer = ConfidenceScorer()
|
||||
is_safe = scorer.is_safe_to_proceed(confidence, threshold, involves_data)
|
||||
|
||||
# If involves data modification, threshold should be at least 0.8
|
||||
if involves_data:
|
||||
effective_threshold = max(threshold, 0.8)
|
||||
else:
|
||||
effective_threshold = threshold
|
||||
|
||||
# Safety check should match threshold
|
||||
if confidence >= effective_threshold:
|
||||
assert is_safe
|
||||
else:
|
||||
assert not is_safe
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
Reference in New Issue
Block a user