rpa_vision_v3/test_simple_validation.py

"""
Real functionality tests for input validation system.

Tests the actual RealInputValidator implementation with real data scenarios,
security configurations, and integration with the security config system.
No mocks or simulations - tests real behavior.
"""

import pytest
import os
import sys
import tempfile
import json
from pathlib import Path
from unittest import mock

# Add project root to path
sys.path.insert(0, str(Path(__file__).parent))

# Import the real implementations directly
try:
    # Import the classes and functions directly from the module
    import importlib.util
    spec = importlib.util.spec_from_file_location(
        "input_validator",
        Path(__file__).parent / "core" / "security" / "input_validator.py"
    )
    input_validator_module = importlib.util.module_from_spec(spec)

    # Execute the module to load classes
    spec.loader.exec_module(input_validator_module)

    # Extract the classes we need
    RealInputValidator = input_validator_module.RealInputValidator
    ValidationResult = input_validator_module.ValidationResult
    InputValidationError = input_validator_module.InputValidationError
    SecurityViolationError = input_validator_module.SecurityViolationError
    validate_string_input = input_validator_module.validate_string_input
    validate_file_path_input = input_validator_module.validate_file_path_input
    validate_json_input = input_validator_module.validate_json_input
    sanitize_for_logging = input_validator_module.sanitize_for_logging

    # Import security config
    from core.security.security_config import SecurityConfig, get_security_config

except Exception as e:
    print(f"Import error: {e}")
    # Fallback - try direct import
    from core.security.input_validator import *
    from core.security.security_config import SecurityConfig, get_security_config


class TestRealInputValidation:
    """Test real input validation functionality without mocks."""

    def setup_method(self):
        """Setup real validator instances for each test."""
        self.strict_validator = RealInputValidator(strict_mode=True)
        self.lenient_validator = RealInputValidator(strict_mode=False)

    def test_real_sql_injection_detection(self):
        """Test detection of real SQL injection patterns."""
        # Real SQL injection attempts from security research
        real_sql_attacks = [
            "'; DROP TABLE users; --",
            "1' OR '1'='1",
            "admin'--",
            "UNION SELECT username, password FROM users",
            "1; EXEC xp_cmdshell('dir')",
            "' OR 1=1 --",
            "'; INSERT INTO users VALUES ('hacker', 'password'); --",
            "1' AND (SELECT COUNT(*) FROM users) > 0 --"
        ]

        for attack in real_sql_attacks:
            result = self.strict_validator.validate_string(attack)
            assert not result.is_valid, f"Failed to detect SQL injection: {attack}"
            assert any("SQL injection" in error for error in result.errors)

    def test_real_nosql_injection_detection(self):
        """Test detection of real NoSQL injection patterns."""
        # Real NoSQL injection attempts
        real_nosql_attacks = [
            '{"$where": "this.username == this.password"}',
            '{"$regex": ".*"}',
            'function() { return true; }',
            '{"$ne": null}',
            'this.username',
            '{"$gt": ""}',
            '{"$exists": true}',
            'db.users.find()'
        ]

        for attack in real_nosql_attacks:
            result = self.strict_validator.validate_string(attack)
            assert not result.is_valid, f"Failed to detect NoSQL injection: {attack}"
            assert any("injection" in error.lower() for error in result.errors)

    def test_real_file_path_validation(self):
        """Test file path validation with real filesystem scenarios."""
        # Test with real temporary directory
        with tempfile.TemporaryDirectory() as temp_dir:
            temp_path = Path(temp_dir)

            # Valid paths
            valid_paths = [
                "document.txt",
                "folder/file.pdf",
                "data/session.json"
            ]

            for path in valid_paths:
                result = validate_file_path_input(path, allowed_dirs=[temp_dir])
                assert result == os.path.normpath(path)

            # Dangerous paths (path traversal)
            dangerous_paths = [
                "../../../etc/passwd",
                "..\\..\\windows\\system32\\config\\sam",
                "folder/../../../secret.txt"
            ]

            for path in dangerous_paths:
                with pytest.raises(SecurityViolationError):
                    validate_file_path_input(path)

    def test_real_json_validation(self):
        """Test JSON validation with real JSON data."""
        # Valid JSON data
        valid_json_data = [
            {"name": "John", "age": 30},
            {"workflow": {"steps": [{"action": "click", "target": "button"}]}},
            {"config": {"timeout": 5000, "retries": 3}}
        ]

        for data in valid_json_data:
            # Test dict input
            result = validate_json_input(data)
            assert result == data

            # Test string input
            json_str = json.dumps(data)
            result = validate_json_input(json_str)
            assert result == data

        # Invalid JSON
        invalid_json = '{"invalid": json, "missing": quotes}'
        with pytest.raises(InputValidationError):
            validate_json_input(invalid_json)

    def test_real_html_sanitization(self):
        """Test HTML sanitization with real XSS payloads."""
        # Real XSS payloads from security research
        xss_payloads = [
            '<script>alert("xss")</script>',
            '<img src="x" onerror="alert(1)">',
            '<svg onload="alert(1)">',
            '<iframe src="javascript:alert(1)"></iframe>',
            '<body onload="alert(1)">',
            '<div onclick="alert(1)">Click me</div>'
        ]

        for payload in xss_payloads:
            result = self.strict_validator.validate_string(payload, allow_html=False)

            # Should be sanitized (escaped)
            assert result.is_valid
            assert "<script>" not in result.sanitized_value
            assert "&lt;script&gt;" in result.sanitized_value or "&lt;" in result.sanitized_value

    def test_real_logging_sanitization(self):
        """Test logging sanitization with real sensitive data patterns."""
        # Real sensitive data patterns
        sensitive_data = [
            "password123",
            "sk-1234567890abcdef",  # API key pattern
            "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9",  # JWT token
            "4532-1234-5678-9012",  # Credit card pattern
            "user@example.com:password123"  # Credentials
        ]

        for data in sensitive_data:
            sanitized = sanitize_for_logging(data, "sensitive_field")

            # Should not contain the original sensitive data in full
            if len(data) > 20:
                assert data not in sanitized

            # Should be truncated or hashed
            assert len(sanitized) <= 250  # Reasonable log length

    def test_real_security_config_integration(self):
        """Test integration with real security configuration."""
        # Test with different environment configurations
        test_configs = [
            {"ENVIRONMENT": "development", "STRICT_INPUT_VALIDATION": "false"},
            {"ENVIRONMENT": "production", "STRICT_INPUT_VALIDATION": "true"},
            {"LOG_SENSITIVE_DATA": "false"}
        ]

        for config in test_configs:
            with mock.patch.dict(os.environ, config, clear=False):
                # Create validator that reads real config
                validator = RealInputValidator()

                # Test that config affects behavior
                long_string = "a" * 1500
                result = validator.validate_string(long_string, max_length=1000)

                if config.get("STRICT_INPUT_VALIDATION") == "true":
                    assert not result.is_valid
                else:
                    # In lenient mode, should truncate
                    assert result.is_valid or len(result.warnings) > 0

    def test_real_performance_with_large_data(self):
        """Test performance with real large data sets."""
        # Test with realistic large data
        large_text = "Lorem ipsum " * 10000  # ~110KB of text
        large_json = {"data": ["item"] * 1000}  # Large JSON structure

        # Should handle large data without crashing
        result = self.lenient_validator.validate_string(large_text, max_length=50000)
        assert result.is_valid or len(result.warnings) > 0

        # JSON validation should work with reasonable size limits
        json_result = validate_json_input(large_json, max_size=100000)
        assert json_result == large_json

    def test_real_edge_cases(self):
        """Test real edge cases and boundary conditions."""
        edge_cases = [
            "",  # Empty string
            " ",  # Whitespace only
            "\n\t\r",  # Control characters
            "🚀🎯✅",  # Unicode/emoji
            "a" * 999,  # Just under limit
            "a" * 1000,  # Exactly at limit
            "a" * 1001,  # Just over limit
        ]

        for case in edge_cases:
            # Should not crash on any input
            result = self.lenient_validator.validate_string(case, max_length=1000)
            assert isinstance(result, ValidationResult)
            assert isinstance(result.is_valid, bool)
            assert isinstance(result.sanitized_value, (str, type(None)))

    def test_real_concurrent_validation(self):
        """Test concurrent validation scenarios."""
        import threading
        import time

        results = []
        errors = []

        def validate_worker(validator, data, worker_id):
            try:
                for i in range(10):
                    result = validator.validate_string(f"{data}_{worker_id}_{i}")
                    results.append((worker_id, i, result.is_valid))
                    time.sleep(0.001)  # Small delay to encourage race conditions
            except Exception as e:
                errors.append((worker_id, str(e)))

        # Create multiple threads using the same validator
        threads = []
        for i in range(5):
            thread = threading.Thread(
                target=validate_worker,
                args=(self.strict_validator, "test_data", i)
            )
            threads.append(thread)
            thread.start()

        # Wait for all threads
        for thread in threads:
            thread.join()

        # Should not have any errors from concurrent access
        assert len(errors) == 0, f"Concurrent validation errors: {errors}"
        assert len(results) == 50  # 5 threads * 10 iterations each


class TestRealSecurityIntegration:
    """Test real integration with security configuration system."""

    def test_real_production_mode_behavior(self):
        """Test behavior in real production mode configuration."""
        # Simulate production environment
        prod_env = {
            "ENVIRONMENT": "production",
            "STRICT_INPUT_VALIDATION": "true",
            "LOG_SENSITIVE_DATA": "false"
        }

        with mock.patch.dict(os.environ, prod_env, clear=True):
            validator = RealInputValidator()

            # In production mode, should be strict
            malicious_input = "'; DROP TABLE users; --"
            result = validator.validate_string(malicious_input)
            assert not result.is_valid

            # Logging should be secure
            sensitive_data = "password123" * 10
            sanitized = validator.sanitize_for_logging(sensitive_data, "password")
            assert "password123" not in sanitized

    def test_real_file_system_integration(self):
        """Test integration with real file system operations."""
        with tempfile.TemporaryDirectory() as temp_dir:
            # Create real files
            test_file = Path(temp_dir) / "test.txt"
            test_file.write_text("test content")

            dangerous_file = Path(temp_dir) / "dangerous.exe"
            dangerous_file.write_text("fake executable")

            # Test validation with real files
            assert validate_file_path_input("test.txt", [temp_dir]) == "test.txt"

            with pytest.raises(SecurityViolationError):
                validate_file_path_input("dangerous.exe", [temp_dir])


def run_real_validation_demo():
    """Run a demonstration using real validation functionality."""
    print("=== RPA Vision V3 - Real Input Validation Demo ===\n")

    validator = RealInputValidator(strict_mode=True)

    # Test with real attack vectors
    print("1. Testing real SQL injection vectors:")
    sql_attacks = [
        "'; DROP TABLE users; --",
        "1' OR '1'='1",
        "UNION SELECT * FROM passwords"
    ]

    for attack in sql_attacks:
        result = validator.validate_string(attack)
        status = "✅ BLOCKED" if not result.is_valid else "❌ ALLOWED"
        print(f"   {status}: {attack}")

    print("\n2. Testing real file path validation:")
    with tempfile.TemporaryDirectory() as temp_dir:
        try:
            safe_path = validate_file_path_input("document.txt", [temp_dir])
            print(f"   ✅ ALLOWED: document.txt -> {safe_path}")
        except Exception as e:
            print(f"   ❌ REJECTED: document.txt -> {e}")

        try:
            validate_file_path_input("../../../etc/passwd")
            print("   ❌ ALLOWED: ../../../etc/passwd (SECURITY ISSUE!)")
        except SecurityViolationError:
            print("   ✅ BLOCKED: ../../../etc/passwd")

    print("\n3. Testing real JSON validation:")
    real_json = {"workflow": {"steps": [{"action": "click"}]}}
    try:
        result = validate_json_input(real_json)
        print(f"   ✅ VALID: {result}")
    except Exception as e:
        print(f"   ❌ INVALID: {e}")

    print("\n=== Real Demo Complete ===")


if __name__ == "__main__":
    # Run the real functionality demo
    run_real_validation_demo()

    # Or run pytest tests
    # pytest.main([__file__, "-v"])