v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution
- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
426
tests/integration/test_input_validation_real.py
Normal file
426
tests/integration/test_input_validation_real.py
Normal file
@@ -0,0 +1,426 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Integration tests for input validation using real functionality.
|
||||
|
||||
This demonstrates how to test real functionality without mocking core components,
|
||||
using actual security patterns and realistic data scenarios.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import re
|
||||
import html
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, List, Dict
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValidationResult:
|
||||
"""Real validation result structure."""
|
||||
is_valid: bool
|
||||
sanitized_value: Any
|
||||
errors: List[str]
|
||||
warnings: List[str]
|
||||
|
||||
|
||||
class RealInputValidator:
|
||||
"""
|
||||
Real input validator implementation for testing.
|
||||
|
||||
This is a simplified but functional implementation that demonstrates
|
||||
real security validation without mocking.
|
||||
"""
|
||||
|
||||
# Real SQL injection patterns from security research
|
||||
SQL_INJECTION_PATTERNS = [
|
||||
r"(\b(SELECT|INSERT|UPDATE|DELETE|DROP|CREATE|ALTER|EXEC|EXECUTE)\b)",
|
||||
r"(\b(UNION|OR|AND)\s+\d+\s*=\s*\d+)",
|
||||
r"(--|#|/\*|\*/)",
|
||||
r"(\b(SCRIPT|JAVASCRIPT|VBSCRIPT|ONLOAD|ONERROR)\b)",
|
||||
r"([\'\";])",
|
||||
r"(\bxp_cmdshell\b)",
|
||||
r"(\bsp_executesql\b)"
|
||||
]
|
||||
|
||||
# Real NoSQL injection patterns
|
||||
NOSQL_INJECTION_PATTERNS = [
|
||||
r"(\$where|\$regex|\$ne|\$gt|\$lt|\$in|\$nin)",
|
||||
r"(function\s*\(|\beval\b|\bsetTimeout\b)",
|
||||
r"(\{\s*\$.*\})",
|
||||
r"(this\.|db\.)"
|
||||
]
|
||||
|
||||
def __init__(self, strict_mode: bool = True):
|
||||
"""Initialize with real configuration."""
|
||||
self.strict_mode = strict_mode
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
# Compile patterns for performance (real optimization)
|
||||
self._sql_patterns = [re.compile(pattern, re.IGNORECASE) for pattern in self.SQL_INJECTION_PATTERNS]
|
||||
self._nosql_patterns = [re.compile(pattern, re.IGNORECASE) for pattern in self.NOSQL_INJECTION_PATTERNS]
|
||||
|
||||
def validate_string(self, value: str, max_length: int = 1000,
|
||||
allow_html: bool = False, field_name: str = "input") -> ValidationResult:
|
||||
"""
|
||||
Real string validation with actual security checks.
|
||||
|
||||
This performs real validation logic without mocking.
|
||||
"""
|
||||
errors = []
|
||||
warnings = []
|
||||
sanitized = value
|
||||
|
||||
if not isinstance(value, str):
|
||||
errors.append(f"{field_name} must be a string")
|
||||
return ValidationResult(False, None, errors, warnings)
|
||||
|
||||
# Real length validation
|
||||
if len(value) > max_length:
|
||||
if self.strict_mode:
|
||||
errors.append(f"{field_name} exceeds maximum length of {max_length}")
|
||||
else:
|
||||
warnings.append(f"{field_name} truncated to {max_length} characters")
|
||||
sanitized = value[:max_length]
|
||||
|
||||
# Real SQL injection detection
|
||||
for pattern in self._sql_patterns:
|
||||
if pattern.search(value):
|
||||
if self.strict_mode:
|
||||
errors.append(f"{field_name} contains potential SQL injection pattern")
|
||||
self._log_security_violation("SQL injection attempt", field_name, value)
|
||||
else:
|
||||
warnings.append(f"{field_name} contains suspicious SQL pattern")
|
||||
|
||||
# Real NoSQL injection detection
|
||||
for pattern in self._nosql_patterns:
|
||||
if pattern.search(value):
|
||||
if self.strict_mode:
|
||||
errors.append(f"{field_name} contains potential NoSQL injection pattern")
|
||||
self._log_security_violation("NoSQL injection attempt", field_name, value)
|
||||
else:
|
||||
warnings.append(f"{field_name} contains suspicious NoSQL pattern")
|
||||
|
||||
# Real HTML sanitization
|
||||
if not allow_html:
|
||||
sanitized = html.escape(sanitized)
|
||||
|
||||
# Real control character removal
|
||||
sanitized = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', sanitized)
|
||||
|
||||
is_valid = len(errors) == 0
|
||||
return ValidationResult(is_valid, sanitized, errors, warnings)
|
||||
|
||||
def sanitize_for_logging(self, data: Any, field_name: str = "data") -> str:
|
||||
"""
|
||||
Real logging sanitization without mocking.
|
||||
"""
|
||||
try:
|
||||
if isinstance(data, (dict, list)):
|
||||
data_str = json.dumps(data, ensure_ascii=True, separators=(',', ':'))
|
||||
else:
|
||||
data_str = str(data)
|
||||
|
||||
# Real size limitation
|
||||
if len(data_str) > 200:
|
||||
data_str = data_str[:200] + "..."
|
||||
|
||||
# Real HTML escaping
|
||||
data_str = html.escape(data_str)
|
||||
|
||||
return data_str
|
||||
|
||||
except Exception:
|
||||
return f"{field_name}[unprintable:{type(data).__name__}]"
|
||||
|
||||
def _log_security_violation(self, violation_type: str, field_name: str, value: Any) -> None:
|
||||
"""Real security logging."""
|
||||
sanitized_value = self.sanitize_for_logging(value, field_name)
|
||||
self.logger.warning(
|
||||
f"Security violation detected: {violation_type} in {field_name}. "
|
||||
f"Value: {sanitized_value}"
|
||||
)
|
||||
|
||||
|
||||
class TestRealInputValidationFunctionality:
|
||||
"""Test real input validation functionality without mocking core components."""
|
||||
|
||||
def setup_method(self):
|
||||
"""Setup using real validator instances."""
|
||||
self.strict_validator = RealInputValidator(strict_mode=True)
|
||||
self.lenient_validator = RealInputValidator(strict_mode=False)
|
||||
|
||||
def test_real_sql_injection_detection(self):
|
||||
"""Test detection of real SQL injection attacks."""
|
||||
# These are actual SQL injection payloads from security research
|
||||
real_sql_attacks = [
|
||||
"'; DROP TABLE users; --",
|
||||
"1' OR '1'='1",
|
||||
"admin'--",
|
||||
"UNION SELECT username, password FROM users",
|
||||
"1; EXEC xp_cmdshell('dir')",
|
||||
"' OR 1=1 --",
|
||||
"'; INSERT INTO users VALUES ('hacker', 'password'); --",
|
||||
"1' UNION SELECT null, username, password FROM admin_users --"
|
||||
]
|
||||
|
||||
for attack in real_sql_attacks:
|
||||
result = self.strict_validator.validate_string(attack, field_name="user_input")
|
||||
|
||||
# Real assertion: SQL attacks should be blocked
|
||||
assert not result.is_valid, f"Failed to detect SQL injection: {attack}"
|
||||
assert any("SQL injection" in error for error in result.errors), \
|
||||
f"SQL injection not properly identified: {attack}"
|
||||
|
||||
def test_real_nosql_injection_detection(self):
|
||||
"""Test detection of real NoSQL injection attacks."""
|
||||
# These are actual NoSQL injection payloads
|
||||
real_nosql_attacks = [
|
||||
'{"$where": "this.username == this.password"}',
|
||||
'{"$regex": ".*"}',
|
||||
'function() { return true; }',
|
||||
'{"$ne": null}',
|
||||
'this.username',
|
||||
'{"$gt": ""}',
|
||||
'db.users.find()',
|
||||
'{"$or": [{"username": "admin"}, {"role": "admin"}]}'
|
||||
]
|
||||
|
||||
for attack in real_nosql_attacks:
|
||||
result = self.strict_validator.validate_string(attack, field_name="query_param")
|
||||
|
||||
# Real assertion: NoSQL attacks should be blocked
|
||||
assert not result.is_valid, f"Failed to detect NoSQL injection: {attack}"
|
||||
assert any("NoSQL injection" in error for error in result.errors), \
|
||||
f"NoSQL injection not properly identified: {attack}"
|
||||
|
||||
def test_legitimate_user_inputs_pass_validation(self):
|
||||
"""Test that real legitimate user inputs are accepted."""
|
||||
# These are realistic inputs that users would actually enter
|
||||
legitimate_inputs = [
|
||||
"john.doe@example.com",
|
||||
"My Important Document.pdf",
|
||||
"User input with spaces and numbers 123",
|
||||
"Unicode text: café, naïve, résumé, 中文",
|
||||
"File path: /home/user/documents/report.xlsx",
|
||||
"Normal SQL-like text: SELECT good options WHERE valid = true",
|
||||
"Workflow name: Invoice_Processing_v2.1"
|
||||
]
|
||||
|
||||
for input_data in legitimate_inputs:
|
||||
result = self.strict_validator.validate_string(input_data, field_name="legitimate_input")
|
||||
|
||||
# Real assertion: Legitimate inputs should pass
|
||||
assert result.is_valid, f"Legitimate input incorrectly rejected: {input_data}"
|
||||
assert len(result.errors) == 0, f"Unexpected errors for legitimate input: {input_data}"
|
||||
|
||||
def test_real_xss_sanitization(self):
|
||||
"""Test real XSS attack sanitization."""
|
||||
# These are actual XSS payloads from security research
|
||||
real_xss_attacks = [
|
||||
'<script>alert("xss")</script>',
|
||||
'<img src="x" onerror="alert(1)">',
|
||||
'<svg onload="alert(1)">',
|
||||
'<iframe src="javascript:alert(1)"></iframe>',
|
||||
'<body onload="alert(1)">',
|
||||
'<div onclick="alert(1)">Click me</div>',
|
||||
'<input type="text" onfocus="alert(1)" autofocus>'
|
||||
]
|
||||
|
||||
for xss in real_xss_attacks:
|
||||
result = self.strict_validator.validate_string(xss, allow_html=False, field_name="user_content")
|
||||
|
||||
# Real assertion: XSS should be sanitized (HTML escaped) but might be rejected due to script patterns
|
||||
# The key is that if it's valid, it should be properly escaped
|
||||
if result.is_valid:
|
||||
assert "<" in result.sanitized_value or ">" in result.sanitized_value, \
|
||||
f"HTML not properly escaped in: {xss} -> {result.sanitized_value}"
|
||||
assert "<script>" not in result.sanitized_value, \
|
||||
f"Script tag not escaped in: {result.sanitized_value}"
|
||||
else:
|
||||
# If rejected, it should be due to script/javascript patterns being detected
|
||||
assert any("injection" in error.lower() for error in result.errors), \
|
||||
f"XSS should be rejected due to injection patterns: {xss}"
|
||||
|
||||
def test_real_data_size_validation(self):
|
||||
"""Test validation with realistic data sizes."""
|
||||
# Test cases with real-world data sizes
|
||||
test_cases = [
|
||||
# (data, max_length, should_pass_strict)
|
||||
("Short input", 100, True),
|
||||
("Medium length input " * 20, 1000, True), # ~400 chars
|
||||
("Very long input " * 100, 500, False), # ~1600 chars, exceeds 500
|
||||
("Exact limit " * 20, 240, True), # Exactly at limit
|
||||
]
|
||||
|
||||
for data, max_length, should_pass in test_cases:
|
||||
strict_result = self.strict_validator.validate_string(data, max_length=max_length)
|
||||
lenient_result = self.lenient_validator.validate_string(data, max_length=max_length)
|
||||
|
||||
if should_pass:
|
||||
assert strict_result.is_valid, f"Should accept data of length {len(data)} with limit {max_length}"
|
||||
assert lenient_result.is_valid, f"Lenient mode should accept data of length {len(data)}"
|
||||
else:
|
||||
assert not strict_result.is_valid, f"Strict mode should reject data of length {len(data)} with limit {max_length}"
|
||||
# Lenient mode might truncate instead of rejecting
|
||||
if lenient_result.is_valid:
|
||||
assert len(lenient_result.sanitized_value) <= max_length, "Lenient mode should truncate"
|
||||
|
||||
def test_real_logging_sanitization(self):
|
||||
"""Test logging sanitization with real sensitive data."""
|
||||
# Real examples of sensitive data that might need logging
|
||||
sensitive_data_examples = [
|
||||
{"username": "admin", "password": "secret123", "api_key": "sk-1234567890"},
|
||||
["user1", "user2", "confidential_data", "internal_info"],
|
||||
"A very long string that contains sensitive information and should be truncated " * 5,
|
||||
'<script>alert("This could be XSS in logs")</script>',
|
||||
{"database_url": "postgresql://user:pass@localhost/db", "secret_token": "abc123"},
|
||||
{"credit_card": "4111-1111-1111-1111", "ssn": "123-45-6789"}
|
||||
]
|
||||
|
||||
for sensitive_data in sensitive_data_examples:
|
||||
sanitized = self.strict_validator.sanitize_for_logging(sensitive_data, "sensitive_field")
|
||||
|
||||
# Real assertions for logging safety
|
||||
assert len(sanitized) <= 250, f"Sanitized data too long: {len(sanitized)} chars"
|
||||
assert "<script>" not in sanitized, "XSS not sanitized in logs"
|
||||
|
||||
# Verify truncation for long data
|
||||
if isinstance(sensitive_data, str) and len(sensitive_data) > 200:
|
||||
assert "..." in sanitized, "Long data not properly truncated"
|
||||
|
||||
def test_strict_vs_lenient_mode_real_behavior(self):
|
||||
"""Test real behavioral differences between strict and lenient modes."""
|
||||
test_scenarios = [
|
||||
# (input, max_length, expected_strict_valid, expected_lenient_behavior)
|
||||
("a" * 1500, 1000, False, "truncate_or_warn"), # Length violation
|
||||
("'; DROP TABLE users; --", 1000, False, "warn_but_sanitize"), # Security violation
|
||||
("Normal input", 1000, True, True), # Normal case
|
||||
]
|
||||
|
||||
for test_input, max_length, strict_should_pass, lenient_behavior in test_scenarios:
|
||||
strict_result = self.strict_validator.validate_string(test_input, max_length=max_length)
|
||||
lenient_result = self.lenient_validator.validate_string(test_input, max_length=max_length)
|
||||
|
||||
# Test strict mode behavior
|
||||
assert strict_result.is_valid == strict_should_pass, \
|
||||
f"Strict mode behavior incorrect for: {test_input[:50]}..."
|
||||
|
||||
# Test lenient mode behavior
|
||||
if lenient_behavior == "truncate_or_warn":
|
||||
# Lenient mode should either truncate or add warnings
|
||||
if lenient_result.is_valid:
|
||||
assert len(lenient_result.sanitized_value) <= max_length or len(lenient_result.warnings) > 0
|
||||
elif lenient_behavior == "warn_but_sanitize":
|
||||
# Lenient mode should sanitize and warn, but might still be valid
|
||||
if lenient_result.is_valid:
|
||||
assert len(lenient_result.warnings) > 0, "Should have warnings for suspicious content"
|
||||
assert lenient_result.sanitized_value != test_input, "Should be sanitized"
|
||||
elif lenient_behavior is True:
|
||||
assert lenient_result.is_valid, "Normal input should pass in lenient mode"
|
||||
|
||||
def test_control_character_handling_real_scenarios(self):
|
||||
"""Test handling of real control characters that might appear in input."""
|
||||
# Real control characters that might appear in user input
|
||||
inputs_with_controls = [
|
||||
"Normal text\x00with null", # Null character
|
||||
"Text with\x08backspace", # Backspace
|
||||
"Line with\x0Bvertical tab", # Vertical tab
|
||||
"Form feed\x0Ccharacter", # Form feed
|
||||
"Text\x1Fwith unit separator", # Unit separator
|
||||
"Delete char\x7Fhere", # Delete character
|
||||
]
|
||||
|
||||
for input_with_control in inputs_with_controls:
|
||||
result = self.strict_validator.validate_string(input_with_control, field_name="control_test")
|
||||
|
||||
# Real assertion: Control characters should be removed
|
||||
assert result.is_valid, f"Input should be valid after control char removal: {repr(input_with_control)}"
|
||||
|
||||
# Verify specific control characters are removed
|
||||
for char_code in [0x00, 0x08, 0x0B, 0x0C, 0x1F, 0x7F]:
|
||||
assert chr(char_code) not in result.sanitized_value, \
|
||||
f"Control character {hex(char_code)} not removed from: {repr(result.sanitized_value)}"
|
||||
|
||||
def test_unicode_preservation_real_scenarios(self):
|
||||
"""Test that real Unicode characters are properly preserved."""
|
||||
# Real Unicode inputs that users might enter
|
||||
unicode_inputs = [
|
||||
"Café naïve résumé", # French accents
|
||||
"中文测试输入", # Chinese characters
|
||||
"🚀 Rocket emoji test 🎉", # Emoji
|
||||
"Ω α β γ δ ε", # Greek letters
|
||||
"العربية النص", # Arabic text
|
||||
"Русский текст", # Cyrillic
|
||||
"日本語のテスト", # Japanese
|
||||
"Ñoño niño año", # Spanish characters
|
||||
]
|
||||
|
||||
for unicode_input in unicode_inputs:
|
||||
result = self.strict_validator.validate_string(unicode_input, field_name="unicode_test")
|
||||
|
||||
# Real assertion: Unicode should be preserved
|
||||
assert result.is_valid, f"Unicode input should be valid: {unicode_input}"
|
||||
assert result.sanitized_value == unicode_input, \
|
||||
f"Unicode should be preserved exactly: {unicode_input} != {result.sanitized_value}"
|
||||
|
||||
|
||||
class TestRealWorldRPAScenarios:
|
||||
"""Test with real-world scenarios specific to RPA Vision V3 context."""
|
||||
|
||||
def setup_method(self):
|
||||
"""Setup for RPA-specific testing."""
|
||||
self.validator = RealInputValidator(strict_mode=True)
|
||||
|
||||
def test_workflow_metadata_validation(self):
|
||||
"""Test validation of real workflow metadata."""
|
||||
# Real workflow metadata that the system would handle
|
||||
workflow_metadata = [
|
||||
"Invoice Processing Automation v2.1",
|
||||
"Customer_Data_Entry_Workflow",
|
||||
"Email-Response-Automation-2024",
|
||||
"Form填写自动化流程", # Unicode workflow name
|
||||
"Workflow (Updated 12/21/2024) - Production",
|
||||
"SAP_Integration_Workflow_Final",
|
||||
]
|
||||
|
||||
for metadata in workflow_metadata:
|
||||
result = self.validator.validate_string(metadata, max_length=200, field_name="workflow_name")
|
||||
assert result.is_valid, f"Workflow metadata should be valid: {metadata}"
|
||||
|
||||
def test_ui_element_text_validation(self):
|
||||
"""Test validation of real UI element text captured by the system."""
|
||||
# Real UI text that RPA Vision V3 might capture
|
||||
ui_element_texts = [
|
||||
"Click here to continue →",
|
||||
"Enter your password:",
|
||||
"Submit & Process Payment",
|
||||
"File > Save As... (Ctrl+Shift+S)",
|
||||
"⚠️ Error: Connection timeout occurred",
|
||||
"Progress: 75% complete ████████░░",
|
||||
"Next Step ➤",
|
||||
"✓ Validation successful",
|
||||
]
|
||||
|
||||
for ui_text in ui_element_texts:
|
||||
result = self.validator.validate_string(ui_text, field_name="ui_element_text")
|
||||
assert result.is_valid, f"UI element text should be valid: {ui_text}"
|
||||
|
||||
def test_screenshot_metadata_validation(self):
|
||||
"""Test validation of screenshot metadata and paths."""
|
||||
# Real screenshot metadata
|
||||
screenshot_data = [
|
||||
"screenshot_2024-12-21_14-30-22.png",
|
||||
"/data/screenshots/session_abc123/shot_0001.png",
|
||||
"C:\\RPA_Data\\Screenshots\\workflow_capture.png",
|
||||
"~/Documents/RPA_Vision/captures/test_run.jpg",
|
||||
]
|
||||
|
||||
for screenshot_info in screenshot_data:
|
||||
result = self.validator.validate_string(screenshot_info, max_length=500, field_name="screenshot_path")
|
||||
assert result.is_valid, f"Screenshot metadata should be valid: {screenshot_info}"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Run the tests using pytest
|
||||
pytest.main([__file__, "-v", "--tb=short"])
|
||||
Reference in New Issue
Block a user