#!/usr/bin/env python3
"""
Integration tests for input validation using real functionality.
This demonstrates how to test real functionality without mocking core components,
using actual security patterns and realistic data scenarios.
"""
import pytest
import re
import html
import json
import logging
from typing import Any, List, Dict
from dataclasses import dataclass
@dataclass
class ValidationResult:
"""Real validation result structure."""
is_valid: bool
sanitized_value: Any
errors: List[str]
warnings: List[str]
class RealInputValidator:
"""
Real input validator implementation for testing.
This is a simplified but functional implementation that demonstrates
real security validation without mocking.
"""
# Real SQL injection patterns from security research
SQL_INJECTION_PATTERNS = [
r"(\b(SELECT|INSERT|UPDATE|DELETE|DROP|CREATE|ALTER|EXEC|EXECUTE)\b)",
r"(\b(UNION|OR|AND)\s+\d+\s*=\s*\d+)",
r"(--|#|/\*|\*/)",
r"(\b(SCRIPT|JAVASCRIPT|VBSCRIPT|ONLOAD|ONERROR)\b)",
r"([\'\";])",
r"(\bxp_cmdshell\b)",
r"(\bsp_executesql\b)"
]
# Real NoSQL injection patterns
NOSQL_INJECTION_PATTERNS = [
r"(\$where|\$regex|\$ne|\$gt|\$lt|\$in|\$nin)",
r"(function\s*\(|\beval\b|\bsetTimeout\b)",
r"(\{\s*\$.*\})",
r"(this\.|db\.)"
]
def __init__(self, strict_mode: bool = True):
"""Initialize with real configuration."""
self.strict_mode = strict_mode
self.logger = logging.getLogger(__name__)
# Compile patterns for performance (real optimization)
self._sql_patterns = [re.compile(pattern, re.IGNORECASE) for pattern in self.SQL_INJECTION_PATTERNS]
self._nosql_patterns = [re.compile(pattern, re.IGNORECASE) for pattern in self.NOSQL_INJECTION_PATTERNS]
def validate_string(self, value: str, max_length: int = 1000,
allow_html: bool = False, field_name: str = "input") -> ValidationResult:
"""
Real string validation with actual security checks.
This performs real validation logic without mocking.
"""
errors = []
warnings = []
sanitized = value
if not isinstance(value, str):
errors.append(f"{field_name} must be a string")
return ValidationResult(False, None, errors, warnings)
# Real length validation
if len(value) > max_length:
if self.strict_mode:
errors.append(f"{field_name} exceeds maximum length of {max_length}")
else:
warnings.append(f"{field_name} truncated to {max_length} characters")
sanitized = value[:max_length]
# Real SQL injection detection
for pattern in self._sql_patterns:
if pattern.search(value):
if self.strict_mode:
errors.append(f"{field_name} contains potential SQL injection pattern")
self._log_security_violation("SQL injection attempt", field_name, value)
else:
warnings.append(f"{field_name} contains suspicious SQL pattern")
# Real NoSQL injection detection
for pattern in self._nosql_patterns:
if pattern.search(value):
if self.strict_mode:
errors.append(f"{field_name} contains potential NoSQL injection pattern")
self._log_security_violation("NoSQL injection attempt", field_name, value)
else:
warnings.append(f"{field_name} contains suspicious NoSQL pattern")
# Real HTML sanitization
if not allow_html:
sanitized = html.escape(sanitized)
# Real control character removal
sanitized = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', sanitized)
is_valid = len(errors) == 0
return ValidationResult(is_valid, sanitized, errors, warnings)
def sanitize_for_logging(self, data: Any, field_name: str = "data") -> str:
"""
Real logging sanitization without mocking.
"""
try:
if isinstance(data, (dict, list)):
data_str = json.dumps(data, ensure_ascii=True, separators=(',', ':'))
else:
data_str = str(data)
# Real size limitation
if len(data_str) > 200:
data_str = data_str[:200] + "..."
# Real HTML escaping
data_str = html.escape(data_str)
return data_str
except Exception:
return f"{field_name}[unprintable:{type(data).__name__}]"
def _log_security_violation(self, violation_type: str, field_name: str, value: Any) -> None:
"""Real security logging."""
sanitized_value = self.sanitize_for_logging(value, field_name)
self.logger.warning(
f"Security violation detected: {violation_type} in {field_name}. "
f"Value: {sanitized_value}"
)
class TestRealInputValidationFunctionality:
"""Test real input validation functionality without mocking core components."""
def setup_method(self):
"""Setup using real validator instances."""
self.strict_validator = RealInputValidator(strict_mode=True)
self.lenient_validator = RealInputValidator(strict_mode=False)
def test_real_sql_injection_detection(self):
"""Test detection of real SQL injection attacks."""
# These are actual SQL injection payloads from security research
real_sql_attacks = [
"'; DROP TABLE users; --",
"1' OR '1'='1",
"admin'--",
"UNION SELECT username, password FROM users",
"1; EXEC xp_cmdshell('dir')",
"' OR 1=1 --",
"'; INSERT INTO users VALUES ('hacker', 'password'); --",
"1' UNION SELECT null, username, password FROM admin_users --"
]
for attack in real_sql_attacks:
result = self.strict_validator.validate_string(attack, field_name="user_input")
# Real assertion: SQL attacks should be blocked
assert not result.is_valid, f"Failed to detect SQL injection: {attack}"
assert any("SQL injection" in error for error in result.errors), \
f"SQL injection not properly identified: {attack}"
def test_real_nosql_injection_detection(self):
"""Test detection of real NoSQL injection attacks."""
# These are actual NoSQL injection payloads
real_nosql_attacks = [
'{"$where": "this.username == this.password"}',
'{"$regex": ".*"}',
'function() { return true; }',
'{"$ne": null}',
'this.username',
'{"$gt": ""}',
'db.users.find()',
'{"$or": [{"username": "admin"}, {"role": "admin"}]}'
]
for attack in real_nosql_attacks:
result = self.strict_validator.validate_string(attack, field_name="query_param")
# Real assertion: NoSQL attacks should be blocked
assert not result.is_valid, f"Failed to detect NoSQL injection: {attack}"
assert any("NoSQL injection" in error for error in result.errors), \
f"NoSQL injection not properly identified: {attack}"
def test_legitimate_user_inputs_pass_validation(self):
"""Test that real legitimate user inputs are accepted."""
# These are realistic inputs that users would actually enter
legitimate_inputs = [
"john.doe@example.com",
"My Important Document.pdf",
"User input with spaces and numbers 123",
"Unicode text: café, naïve, résumé, 中文",
"File path: /home/user/documents/report.xlsx",
"Normal SQL-like text: SELECT good options WHERE valid = true",
"Workflow name: Invoice_Processing_v2.1"
]
for input_data in legitimate_inputs:
result = self.strict_validator.validate_string(input_data, field_name="legitimate_input")
# Real assertion: Legitimate inputs should pass
assert result.is_valid, f"Legitimate input incorrectly rejected: {input_data}"
assert len(result.errors) == 0, f"Unexpected errors for legitimate input: {input_data}"
def test_real_xss_sanitization(self):
"""Test real XSS attack sanitization."""
# These are actual XSS payloads from security research
real_xss_attacks = [
'',
'',
'