"""Property-based tests for analytics system.""" import pytest from hypothesis import given, strategies as st, settings from datetime import datetime, timedelta import tempfile import os from core.analytics.collection.metrics_collector import ( MetricsCollector, ExecutionMetrics, StepMetrics ) from core.analytics.storage.timeseries_store import TimeSeriesStore from core.analytics.storage.archive_storage import ( ArchiveStorage, RetentionPolicyEngine, RetentionPolicy ) from core.analytics.engine.performance_analyzer import PerformanceAnalyzer from core.analytics.engine.anomaly_detector import AnomalyDetector from core.analytics.engine.success_rate_calculator import SuccessRateCalculator from core.analytics.reporting.report_generator import ReportGenerator, ReportConfig from core.analytics.query.query_engine import QueryEngine # Fixtures @pytest.fixture def temp_db(): """Create temporary database.""" fd, path = tempfile.mkstemp(suffix='.db') os.close(fd) yield path if os.path.exists(path): os.unlink(path) @pytest.fixture def temp_archive_dir(): """Create temporary archive directory.""" import tempfile import shutil dirpath = tempfile.mkdtemp() yield dirpath shutil.rmtree(dirpath, ignore_errors=True) @pytest.fixture def store(temp_db): """Create TimeSeriesStore instance.""" return TimeSeriesStore(temp_db) @pytest.fixture def collector(store): """Create MetricsCollector instance.""" return MetricsCollector(store) # Property 1: Metrics completeness # **Feature: rpa-analytics, Property 1: Metrics completeness** # **Validates: Requirements 1.1, 1.4** @given( execution_id=st.text(min_size=1, max_size=50), workflow_id=st.text(min_size=1, max_size=50), duration=st.floats(min_value=0.1, max_value=1000.0), status=st.sampled_from(['success', 'failed', 'timeout']) ) @settings(max_examples=100, deadline=None) def test_metrics_completeness(temp_db, execution_id, workflow_id, duration, status): """ Property: For any execution metrics recorded, all required fields must be present when queried back from storage. """ store = TimeSeriesStore(temp_db) collector = MetricsCollector(store) # Record execution now = datetime.now() execution = ExecutionMetrics( execution_id=execution_id, workflow_id=workflow_id, started_at=now, completed_at=now + timedelta(seconds=duration), duration=duration, status=status ) collector.record_execution(execution) collector.flush() # Query back metrics = store.query_range( metric_type='execution', start_time=now - timedelta(seconds=1), end_time=now + timedelta(seconds=duration + 1) ) # Verify completeness assert len(metrics) > 0 metric = metrics[0] assert 'execution_id' in metric assert 'workflow_id' in metric assert 'duration' in metric assert 'status' in metric assert metric['workflow_id'] == workflow_id assert metric['status'] == status # Property 3: Failure recording completeness # **Feature: rpa-analytics, Property 3: Failure recording completeness** # **Validates: Requirements 1.3** @given( workflow_id=st.text(min_size=1, max_size=50), error_message=st.text(min_size=1, max_size=200) ) @settings(max_examples=50, deadline=None) def test_failure_recording_completeness(temp_db, workflow_id, error_message): """ Property: For any failed execution, the error message must be recorded and retrievable. """ store = TimeSeriesStore(temp_db) collector = MetricsCollector(store) now = datetime.now() execution = ExecutionMetrics( execution_id="failed_exec", workflow_id=workflow_id, started_at=now, completed_at=now + timedelta(seconds=10), duration=10.0, status="failed", error_message=error_message ) collector.record_execution(execution) collector.flush() # Query failed executions metrics = store.query_range( metric_type='execution', start_time=now - timedelta(seconds=1), end_time=now + timedelta(seconds=11), filters={'status': 'failed'} ) assert len(metrics) > 0 assert metrics[0].get('error_message') is not None # Property 5: Statistical accuracy # **Feature: rpa-analytics, Property 5: Statistical accuracy** # **Validates: Requirements 2.1** @given( durations=st.lists( st.floats(min_value=1.0, max_value=100.0), min_size=10, max_size=50 ) ) @settings(max_examples=50, deadline=None) def test_statistical_accuracy(temp_db, durations): """ Property: For any list of durations, calculated statistics (avg, median) must match expected values within tolerance. """ store = TimeSeriesStore(temp_db) collector = MetricsCollector(store) analyzer = PerformanceAnalyzer(store) workflow_id = "test_workflow" now = datetime.now() # Record executions for i, duration in enumerate(durations): execution = ExecutionMetrics( execution_id=f"exec_{i}", workflow_id=workflow_id, started_at=now + timedelta(seconds=i*10), completed_at=now + timedelta(seconds=i*10 + duration), duration=duration, status="success" ) collector.record_execution(execution) collector.flush() # Analyze stats = analyzer.analyze_performance( workflow_id=workflow_id, start_time=now - timedelta(seconds=1), end_time=now + timedelta(seconds=len(durations)*10 + 100) ) # Verify statistics import statistics expected_avg = statistics.mean(durations) expected_median = statistics.median(durations) assert abs(stats.avg_duration - expected_avg) < 0.1 assert abs(stats.median_duration - expected_median) < 0.1 # Property 8: Success rate calculation accuracy # **Feature: rpa-analytics, Property 8: Success rate calculation accuracy** # **Validates: Requirements 3.1** @given( num_success=st.integers(min_value=0, max_value=50), num_failed=st.integers(min_value=0, max_value=50) ) @settings(max_examples=50, deadline=None) def test_success_rate_accuracy(temp_db, num_success, num_failed): """ Property: For any combination of successful and failed executions, the calculated success rate must match the expected percentage. """ if num_success + num_failed == 0: return # Skip empty case store = TimeSeriesStore(temp_db) collector = MetricsCollector(store) calculator = SuccessRateCalculator(store) workflow_id = "test_workflow" now = datetime.now() # Record successful executions for i in range(num_success): execution = ExecutionMetrics( execution_id=f"success_{i}", workflow_id=workflow_id, started_at=now + timedelta(seconds=i), completed_at=now + timedelta(seconds=i+1), duration=1.0, status="success" ) collector.record_execution(execution) # Record failed executions for i in range(num_failed): execution = ExecutionMetrics( execution_id=f"failed_{i}", workflow_id=workflow_id, started_at=now + timedelta(seconds=num_success+i), completed_at=now + timedelta(seconds=num_success+i+1), duration=1.0, status="failed" ) collector.record_execution(execution) collector.flush() # Calculate success rate stats = calculator.calculate_success_rate( workflow_id=workflow_id, time_window_hours=1 ) # Verify total = num_success + num_failed expected_rate = (num_success / total) * 100 assert abs(stats.success_rate - expected_rate) < 0.1 assert stats.total_executions == total assert stats.successful_executions == num_success assert stats.failed_executions == num_failed # Property 15: Filter application correctness # **Feature: rpa-analytics, Property 15: Filter application correctness** # **Validates: Requirements 7.1** @given( workflow_ids=st.lists( st.text(min_size=1, max_size=20), min_size=2, max_size=5, unique=True ), target_workflow=st.integers(min_value=0, max_value=4) ) @settings(max_examples=50, deadline=None) def test_filter_correctness(temp_db, workflow_ids, target_workflow): """ Property: For any set of workflows, filtering by a specific workflow_id must return only metrics for that workflow. """ if target_workflow >= len(workflow_ids): target_workflow = 0 store = TimeSeriesStore(temp_db) collector = MetricsCollector(store) engine = QueryEngine(store) target_id = workflow_ids[target_workflow] now = datetime.now() # Record executions for different workflows for i, wf_id in enumerate(workflow_ids): execution = ExecutionMetrics( execution_id=f"exec_{i}", workflow_id=wf_id, started_at=now + timedelta(seconds=i), completed_at=now + timedelta(seconds=i+1), duration=1.0, status="success" ) collector.record_execution(execution) collector.flush() # Query with filter results = engine.query( metric_type='execution', start_time=now - timedelta(seconds=1), end_time=now + timedelta(seconds=len(workflow_ids)+1), filters={'workflow_id': target_id} ) # Verify all results match filter assert len(results) > 0 for result in results: assert result['workflow_id'] == target_id # Property 16: Export format validity # **Feature: rpa-analytics, Property 16: Export format validity** # **Validates: Requirements 7.3** @given( title=st.text(min_size=1, max_size=100), format_type=st.sampled_from(['json', 'csv', 'html']) ) @settings(max_examples=30, deadline=None) def test_export_format_validity(temp_db, temp_archive_dir, title, format_type): """ Property: For any report configuration, the exported file must be valid and readable in the specified format. """ store = TimeSeriesStore(temp_db) collector = MetricsCollector(store) # Create some test data now = datetime.now() execution = ExecutionMetrics( execution_id="test_exec", workflow_id="test_workflow", started_at=now, completed_at=now + timedelta(seconds=10), duration=10.0, status="success" ) collector.record_execution(execution) collector.flush() # Generate report from core.analytics.engine.performance_analyzer import PerformanceAnalyzer from core.analytics.engine.insight_generator import InsightGenerator from core.analytics.engine.anomaly_detector import AnomalyDetector analyzer = PerformanceAnalyzer(store) detector = AnomalyDetector(store) insight_gen = InsightGenerator(analyzer, detector) engine = QueryEngine(store) generator = ReportGenerator( engine, analyzer, insight_gen, temp_archive_dir ) config = ReportConfig( title=title, metric_types=['execution'], start_time=now - timedelta(hours=1), end_time=now + timedelta(hours=1), format=format_type ) report_data = generator.generate_report(config) # Export and verify if format_type == 'json': filepath = generator.export_json(report_data) assert os.path.exists(filepath) import json with open(filepath, 'r') as f: data = json.load(f) assert 'title' in data elif format_type == 'csv': filepath = generator.export_csv(report_data) assert os.path.exists(filepath) import csv with open(filepath, 'r') as f: reader = csv.reader(f) rows = list(reader) assert len(rows) > 0 # At least header elif format_type == 'html': filepath = generator.export_html(report_data) assert os.path.exists(filepath) with open(filepath, 'r') as f: content = f.read() assert '' in content.lower() assert title in content # Property 19: Retention policy enforcement # **Feature: rpa-analytics, Property 19: Retention policy enforcement** # **Validates: Requirements 10.2** @given( hot_days=st.integers(min_value=1, max_value=30), archive_days=st.integers(min_value=31, max_value=365) ) @settings(max_examples=30, deadline=None) def test_retention_policy_enforcement(temp_db, temp_archive_dir, hot_days, archive_days): """ Property: For any retention policy, data older than hot_retention_days must be archived, and data older than archive_retention_days must be deleted. """ store = TimeSeriesStore(temp_db) archive = ArchiveStorage(temp_archive_dir) engine = RetentionPolicyEngine(archive) # Create policy policy = RetentionPolicy( metric_type='execution', hot_retention_days=hot_days, archive_retention_days=archive_days, compression_enabled=True ) engine.add_policy(policy) # Verify policy is stored retrieved_policy = engine.get_policy('execution') assert retrieved_policy is not None assert retrieved_policy.hot_retention_days == hot_days assert retrieved_policy.archive_retention_days == archive_days # Property 20: Archive data integrity # **Feature: rpa-analytics, Property 20: Archive data integrity** # **Validates: Requirements 10.3** @given( num_metrics=st.integers(min_value=1, max_value=50) ) @settings(max_examples=30, deadline=None) def test_archive_data_integrity(temp_archive_dir, num_metrics): """ Property: For any metrics archived, querying the archive must return the same data that was archived. """ archive = ArchiveStorage(temp_archive_dir) # Create test metrics now = datetime.now() metrics = [] for i in range(num_metrics): metrics.append({ 'execution_id': f'exec_{i}', 'workflow_id': 'test_workflow', 'duration': float(i + 1), 'status': 'success', 'timestamp': (now + timedelta(seconds=i)).isoformat() }) # Archive metrics archive.archive_metrics( metrics=metrics, metric_type='execution', archive_date=now, compress=True ) # Query back retrieved = archive.query_archive( metric_type='execution', start_date=now - timedelta(days=1), end_date=now + timedelta(days=1) ) # Verify integrity assert len(retrieved) == num_metrics for original, retrieved_metric in zip(metrics, retrieved): assert original['execution_id'] == retrieved_metric['execution_id'] assert original['workflow_id'] == retrieved_metric['workflow_id'] if __name__ == '__main__': pytest.main([__file__, '-v', '--tb=short'])