Files
rpa_vision_v3/test_workflow_naming_improved.py
Dom a27b74cf22 v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution
- Frontend v4 accessible sur réseau local (192.168.1.40)
- Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard)
- Ollama GPU fonctionnel
- Self-healing interactif
- Dashboard confiance

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-29 11:23:51 +01:00

698 lines
28 KiB
Python

#!/usr/bin/env python3
"""
Improved Test Script for Workflow Naming System - Real Functionality Testing
This script tests the workflow naming components with real implementations,
authentic data, and actual system integration without mocks or simulations.
Key improvements:
1. Uses real file system operations instead of mocks
2. Tests actual component integration and data flows
3. Uses authentic workflow scenarios with real data
4. Validates actual business logic and system behavior
5. Tests real error conditions and edge cases
"""
import sys
import os
import json
import tempfile
import platform
import socket
import time
from datetime import datetime
from typing import List, Dict, Any
from pathlib import Path
# Add agent_v0 to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'agent_v0'))
from agent_v0.workflow_namer import WorkflowNamer, SessionAnalysis, UIElement
from agent_v0.enhanced_raw_session import EnhancedRawSession, WorkflowMetadata
from agent_v0.raw_session import RawSession, Event
def create_realistic_crm_session() -> RawSession:
"""Create a realistic CRM workflow session using actual system data"""
# Use real system information
session = RawSession.create(
user_id=f"{os.getenv('USER', 'test_user')}@company.com",
user_label=f"{os.getenv('USER', 'Test User')} - Sales Team",
platform=platform.system().lower(),
hostname=socket.gethostname(),
screen_resolution=[1920, 1080],
customer="Real Company Inc",
training_label="Customer_Management_Training"
)
# Simulate realistic CRM interactions with actual timing
start_time = time.time()
# Login to CRM system
session.add_mouse_click_event(
button="left",
pos=[150, 100],
window_title="SalesForce - Login",
app_name="SalesForce",
screenshot_id="shot_001"
)
time.sleep(0.1) # Realistic delay
# Enter credentials
session.add_key_combo_event(
keys=["user@company.com"],
window_title="SalesForce - Login",
app_name="SalesForce",
screenshot_id="shot_002"
)
time.sleep(0.2)
# Navigate to customer form
session.add_mouse_click_event(
button="left",
pos=[200, 150],
window_title="SalesForce - Dashboard",
app_name="SalesForce",
screenshot_id="shot_003"
)
# Fill customer data with realistic information
customer_data = [
"John Smith",
"john.smith@client.com",
"555-0123",
"123 Main St",
"Software Engineer"
]
for i, data in enumerate(customer_data):
session.add_key_combo_event(
keys=list(data),
window_title="SalesForce - New Customer",
app_name="SalesForce",
screenshot_id=f"shot_{i+4:03d}"
)
time.sleep(0.1)
# Save customer
session.add_mouse_click_event(
button="left",
pos=[300, 400],
window_title="SalesForce - New Customer",
app_name="SalesForce",
screenshot_id="shot_009"
)
return session
def test_workflow_namer_with_real_data():
"""Test WorkflowNamer with real session data and actual file operations"""
print("=== Testing WorkflowNamer with Real Data ===")
# Create namer with real configuration
config_dir = tempfile.mkdtemp()
config_path = os.path.join(config_dir, "naming_config.json")
# Write actual configuration file
real_config = {
"max_name_length": 60,
"min_name_length": 8,
"use_timestamps": True,
"use_application_names": True,
"sanitize_names": True,
"default_prefix": "Workflow",
"forbidden_chars": r'[<>:"/\\|?*]',
"replacement_char": "_"
}
with open(config_path, 'w') as f:
json.dump(real_config, f)
# Initialize namer with real config file
namer = WorkflowNamer(config_path)
# Test with real session data
test_session = create_realistic_crm_session()
print("Testing name generation with real session data...")
generated_name = namer.generate_name(test_session)
print(f"Generated name: {generated_name}")
# Validate with real business rules
assert generated_name, "Generated name should not be empty"
assert len(generated_name) >= real_config["min_name_length"], "Name should meet minimum length"
assert len(generated_name) <= real_config["max_name_length"], "Name should not exceed maximum length"
assert not any(char in generated_name for char in '<>:"/\\|?*'), "Name should not contain forbidden characters"
# Test validation with real edge cases from production
print("\nTesting validation with real-world edge cases...")
real_world_test_cases = [
# Valid cases from actual usage
("Customer_Onboarding_Process_2024", True, ""),
("Email_Campaign_Setup_Marketing", True, ""),
("Invoice_Generation_Automated", True, ""),
("Data_Migration_Q4_2024", True, ""),
# Invalid cases that users actually try
("", False, "Le nom ne peut pas être vide"),
("x", False, "Le nom doit contenir au moins 8 caractères"),
("workflow<script>alert('xss')</script>", False, "Le nom contient des caractères interdits"),
("file/path\\with:invalid*chars", False, "Le nom contient des caractères interdits"),
("a" * 100, False, "Le nom ne peut pas dépasser 60 caractères"),
]
for name, expected_valid, expected_error_type in real_world_test_cases:
is_valid, error = namer.validate_name(name)
print(f"'{name}': {'' if is_valid else ''} {error or 'Valid'}")
assert is_valid == expected_valid, f"Validation failed for '{name}': expected {expected_valid}, got {is_valid}"
if not expected_valid:
assert error, f"Should have error message for invalid name '{name}'"
# Test uniqueness with real workflow names from production
print("\nTesting uniqueness with production-like workflow names...")
production_names = [
"Customer_Onboarding_Process",
"Customer_Onboarding_Process_01",
"Customer_Onboarding_Process_02",
"Email_Campaign_Setup",
"Invoice_Generation_Automated",
"Data_Migration_Q4_2024"
]
# Test uniqueness generation
unique_name = namer.ensure_uniqueness("Customer_Onboarding_Process", production_names)
print(f"Unique name for existing workflow: {unique_name}")
assert unique_name not in production_names, "Generated name should be unique"
assert "Customer_Onboarding_Process" in unique_name, "Should maintain base name"
# Test suggestions with real scenarios
suggestions = namer.suggest_alternatives("Customer_Process", production_names)
print(f"Suggestions for 'Customer_Process': {suggestions}")
assert len(suggestions) > 0, "Should provide alternative suggestions"
assert all(s not in production_names for s in suggestions), "All suggestions should be unique"
# Cleanup real files
os.unlink(config_path)
os.rmdir(config_dir)
print("✓ WorkflowNamer tests completed with real data and file operations\n")
def test_enhanced_session_with_real_persistence():
"""Test EnhancedRawSession with real file system persistence"""
print("=== Testing EnhancedRawSession with Real Persistence ===")
# Create session with real system information
session = EnhancedRawSession.create_enhanced(
user_id=f"{os.getenv('USER', 'test_user')}@company.com",
user_label=f"{os.getenv('USER', 'Test User')} - Operations",
workflow_name="Real_Invoice_Processing",
platform=platform.system(),
hostname=socket.gethostname(),
screen_resolution=[1920, 1080]
)
print(f"Created session: {session.session_id}")
print(f"System platform: {session.environment['platform']}")
print(f"Hostname: {session.environment['hostname']}")
# Add realistic enhanced events with actual timing
print("Adding realistic invoice processing events...")
invoice_workflow_steps = [
("Open invoice system", "button", "Invoice Management", 0.95),
("Select customer", "dropdown", "Customer Selection", 0.90),
("Enter invoice amount", "input", "2500.00", 0.88),
("Add line items", "button", "Add Item", 0.92),
("Set due date", "datepicker", "2024-02-15", 0.85),
("Apply tax rate", "input", "8.5%", 0.87),
("Generate PDF", "button", "Generate Invoice", 0.94),
("Send to customer", "button", "Send Email", 0.91)
]
for i, (description, element_type, element_text, confidence) in enumerate(invoice_workflow_steps):
session.add_enhanced_mouse_click_event(
button="left",
pos=[150 + i*20, 200 + i*30],
window_title="Invoice Management System - New Invoice",
app_name="InvoiceApp",
screenshot_id=f"shot_{i+1:03d}",
element_type=element_type,
element_text=element_text,
confidence=confidence
)
if element_type == "input":
session.add_enhanced_key_event(
keys=list(element_text),
window_title="Invoice Management System - New Invoice",
app_name="InvoiceApp",
screenshot_id=f"shot_{i+1:03d}_input",
text_content=element_text,
input_method="typing",
confidence=confidence
)
time.sleep(0.05) # Realistic timing
# Test intelligent name generation with real analysis
print("Testing intelligent name generation...")
intelligent_name = session.generate_intelligent_name()
print(f"Generated intelligent name: {intelligent_name}")
assert intelligent_name, "Should generate a meaningful name"
assert len(intelligent_name) > 5, "Generated name should be descriptive"
assert any(keyword in intelligent_name.lower() for keyword in ['invoice', 'billing', 'financial']), \
"Generated name should reflect invoice-related workflow"
# Test session analysis with real data
print("Testing session analysis with real workflow data...")
analysis = session.analyze_session()
print(f"Workflow type: {analysis.workflow_type}")
print(f"Primary app: {analysis.primary_application}")
print(f"Complexity score: {analysis.complexity_score:.2f}")
print(f"UI elements detected: {len(analysis.ui_elements)}")
print(f"Text inputs: {len(analysis.text_inputs)}")
# Validate analysis results with flexible application detection
assert analysis.primary_application in ["InvoiceApp", "Invoice Management System", "Invoice"], \
f"Should identify invoice-related application, got: {analysis.primary_application}"
assert analysis.complexity_score > 0.3, "Invoice workflow should have reasonable complexity"
assert len(analysis.ui_elements) > 0, "Should detect UI elements"
# Test quality assessment with real metrics
print("Testing quality assessment...")
quality_score = session.get_workflow_quality_score()
suggestions = session.get_workflow_suggestions()
print(f"Quality score: {quality_score:.2f}")
print(f"Number of suggestions: {len(suggestions)}")
assert 0 <= quality_score <= 1, "Quality score should be normalized"
assert isinstance(suggestions, list), "Suggestions should be a list"
assert quality_score > 0.5, "Rich workflow should have good quality score"
# Test real file system persistence
print("Testing real file system persistence...")
with tempfile.TemporaryDirectory() as temp_dir:
# Save with real file operations
json_path = session.save_enhanced_json(temp_dir)
print(f"Saved to: {json_path}")
# Verify file actually exists and is readable
assert os.path.exists(json_path), "JSON file should be created"
assert os.path.getsize(json_path) > 0, "JSON file should not be empty"
# Test real JSON parsing
with open(json_path, 'r', encoding='utf-8') as f:
saved_data = json.load(f)
print(f"JSON contains {len(saved_data)} top-level keys")
# Validate saved data structure with real content
required_keys = ['workflow_metadata', 'events', 'enhanced_events', 'quality_score']
for key in required_keys:
assert key in saved_data, f"Saved data should contain {key}"
assert saved_data['workflow_metadata']['workflow_name'] == "Real_Invoice_Processing"
assert len(saved_data['events']) == len(invoice_workflow_steps)
assert len(saved_data['enhanced_events']) > 0
assert 0 <= saved_data['quality_score'] <= 1
# Test that we can actually reload the data
reloaded_session = EnhancedRawSession.from_json(json_path)
assert reloaded_session.workflow_metadata.workflow_name == session.workflow_metadata.workflow_name
assert len(reloaded_session.events) == len(session.events)
# Test file permissions and metadata
file_stat = os.stat(json_path)
assert file_stat.st_size > 1000, "Saved file should contain substantial data"
print("✓ EnhancedRawSession tests completed with real persistence\n")
def test_real_workflow_analysis():
"""Test workflow analysis with real application detection and classification"""
print("=== Testing Real Workflow Analysis ===")
# Test different real application scenarios
test_scenarios = [
{
"name": "Gmail Email Workflow",
"window_titles": ["Gmail - Inbox", "Gmail - Compose", "Gmail - Sent"],
"app_name": "Chrome",
"expected_app": "Gmail",
"expected_type": "form_filling"
},
{
"name": "Excel Data Entry",
"window_titles": ["Microsoft Excel - Workbook1", "Excel - Data Entry"],
"app_name": "Excel",
"expected_app": "Excel",
"expected_type": "data_entry"
},
{
"name": "CRM Customer Management",
"window_titles": ["SalesForce - Customers", "CRM Pro - New Contact"],
"app_name": "SalesForce",
"expected_app": "SalesForce",
"expected_type": "form_filling"
}
]
namer = WorkflowNamer()
for scenario in test_scenarios:
print(f"\nTesting scenario: {scenario['name']}")
# Create session with real application data
session = RawSession.create(
user_id="analyst@company.com",
user_label="Business Analyst"
)
# Add realistic events for each scenario
for i, window_title in enumerate(scenario["window_titles"]):
session.add_mouse_click_event(
button="left",
pos=[100 + i*50, 150 + i*25],
window_title=window_title,
app_name=scenario["app_name"],
screenshot_id=f"shot_{i+1:03d}"
)
# Add some typing for form filling scenarios
if "form" in scenario["expected_type"] or "data" in scenario["expected_type"]:
session.add_key_combo_event(
keys=["test", "data", str(i)],
window_title=window_title,
app_name=scenario["app_name"],
screenshot_id=f"shot_{i+1:03d}_key"
)
# Analyze with real detection logic
analysis = namer._analyze_session(session)
print(f" Detected app: {analysis.primary_application}")
print(f" Detected type: {analysis.workflow_type}")
print(f" Complexity: {analysis.complexity_score:.2f}")
# Validate real detection results
assert analysis.primary_application in [scenario["expected_app"], scenario["app_name"]], \
f"Should detect correct application for {scenario['name']}"
# Generate name with real logic
generated_name = namer.generate_name(session)
print(f" Generated name: {generated_name}")
assert generated_name, "Should generate name for real scenario"
assert len(generated_name) > 5, "Generated name should be meaningful"
print("✓ Real workflow analysis tests completed\n")
def test_real_ui_validation():
"""Test UI validation logic without mocks"""
print("=== Testing Real UI Validation Logic ===")
try:
# Test actual Qt availability
from PyQt5.QtWidgets import QApplication
app = QApplication.instance()
if app is None:
app = QApplication(sys.argv)
app.setQuitOnLastWindowClosed(False)
print("Qt5 available - testing real validation logic")
# Test actual validation components
from agent_v0.workflow_namer import WorkflowNamer
namer = WorkflowNamer()
# Test real validation scenarios
validation_tests = [
("Valid_Workflow_Name", True),
("Another_Valid_Name_2024", True),
("", False),
("x", False),
("name<with>invalid:chars", False),
("a" * 100, False)
]
for name, should_be_valid in validation_tests:
is_valid, error = namer.validate_name(name)
print(f" '{name}': {'' if is_valid else ''} {error or 'Valid'}")
assert is_valid == should_be_valid, f"Validation mismatch for '{name}'"
# Test real uniqueness checking
existing_names = ["Workflow_A", "Workflow_B", "Workflow_C"]
unique_name = namer.ensure_uniqueness("Workflow_A", existing_names)
print(f" Uniqueness test: 'Workflow_A' -> '{unique_name}'")
assert unique_name not in existing_names, "Should generate unique name"
# Test real suggestion generation
suggestions = namer.suggest_alternatives("Test_Workflow", existing_names)
print(f" Suggestions: {suggestions}")
assert len(suggestions) > 0, "Should provide suggestions"
assert all(s not in existing_names for s in suggestions), "All suggestions should be unique"
print("✓ Qt5 validation logic tested successfully")
except ImportError:
print("Qt5 not available - testing fallback validation")
# Test real fallback behavior (not mocked)
from agent_v0.workflow_namer import WorkflowNamer
namer = WorkflowNamer()
# Test actual fallback name generation
fallback_name = namer._generate_fallback_name()
print(f" Real fallback name: {fallback_name}")
assert fallback_name, "Fallback should generate name"
assert "Workflow" in fallback_name, "Should use default prefix"
# Test console-based uniqueness (real implementation)
existing = ["Test_01", "Test_02"]
console_unique = namer.ensure_uniqueness("Test", existing)
print(f" Console uniqueness: 'Test' -> '{console_unique}'")
assert console_unique not in existing, "Console mode should ensure uniqueness"
print()
def test_end_to_end_integration():
"""Test complete end-to-end integration with real data flow"""
print("=== Testing End-to-End Integration ===")
# Create realistic multi-step workflow
session = EnhancedRawSession.create_enhanced(
user_id=f"{os.getenv('USER', 'integration_user')}@company.com",
user_label="Integration Test User",
workflow_name=None, # Let system generate
auto_generate_name=True
)
# Simulate realistic e-commerce order processing workflow
print("Simulating e-commerce order processing workflow...")
ecommerce_steps = [
("Login to admin panel", "Admin Dashboard - Login", "AdminApp", "button", "Login"),
("Navigate to orders", "Admin Dashboard - Orders", "AdminApp", "link", "Orders"),
("Select pending order", "Admin Dashboard - Order #12345", "AdminApp", "button", "Order #12345"),
("Update order status", "Admin Dashboard - Order Details", "AdminApp", "dropdown", "Processing"),
("Add tracking number", "Admin Dashboard - Order Details", "AdminApp", "input", "1Z999AA1234567890"),
("Send notification", "Admin Dashboard - Order Details", "AdminApp", "button", "Send Customer Email"),
("Print shipping label", "Admin Dashboard - Shipping", "AdminApp", "button", "Print Label"),
("Mark as shipped", "Admin Dashboard - Order Details", "AdminApp", "button", "Mark Shipped")
]
for i, (description, window_title, app_name, element_type, element_text) in enumerate(ecommerce_steps):
# Add mouse click
session.add_enhanced_mouse_click_event(
button="left",
pos=[200 + i*15, 150 + i*20],
window_title=window_title,
app_name=app_name,
screenshot_id=f"ecom_{i+1:03d}",
element_type=element_type,
element_text=element_text,
confidence=0.85 + (i % 3) * 0.05 # Vary confidence realistically
)
# Add text input for input fields
if element_type == "input":
session.add_enhanced_key_event(
keys=list(element_text),
window_title=window_title,
app_name=app_name,
screenshot_id=f"ecom_{i+1:03d}_input",
text_content=element_text,
input_method="typing",
confidence=0.90
)
time.sleep(0.02) # Realistic timing
# Test intelligent name generation
intelligent_name = session.generate_intelligent_name()
print(f"Generated intelligent name: {intelligent_name}")
# Validate name reflects actual workflow content
assert intelligent_name, "Should generate meaningful name"
assert any(keyword in intelligent_name.lower() for keyword in ['order', 'admin', 'processing', 'ecommerce']), \
f"Name should reflect e-commerce workflow: {intelligent_name}"
# Test complete analysis
session.close_with_analysis()
# Validate final analysis results
if session.workflow_metadata:
print(f"Final workflow name: {session.workflow_metadata.workflow_name}")
print(f"Workflow type: {session.workflow_metadata.workflow_type}")
print(f"Primary app: {session.workflow_metadata.primary_application}")
print(f"Complexity: {session.workflow_metadata.complexity_score:.2f}")
print(f"UI elements: {session.workflow_metadata.ui_elements_count}")
print(f"Text inputs: {session.workflow_metadata.text_inputs_count}")
# Validate metadata accuracy
assert session.workflow_metadata.primary_application == "AdminApp", \
f"Should identify admin application: {session.workflow_metadata.primary_application}"
assert session.workflow_metadata.complexity_score > 0.6, \
"E-commerce workflow should have high complexity"
assert session.workflow_metadata.ui_elements_count == len(ecommerce_steps), \
"Should count all UI interactions"
assert session.workflow_metadata.text_inputs_count > 0, \
"Should detect text inputs"
# Test real persistence with complete workflow
print("Testing complete workflow persistence...")
with tempfile.TemporaryDirectory() as temp_dir:
json_path = session.save_enhanced_json(temp_dir)
# Verify complete data persistence
with open(json_path, 'r', encoding='utf-8') as f:
complete_data = json.load(f)
# Validate all data is preserved
assert complete_data['workflow_metadata']['workflow_name'] == session.workflow_metadata.workflow_name
assert complete_data['workflow_metadata']['primary_application'] == "AdminApp"
assert len(complete_data['events']) == len(ecommerce_steps) + 1 # +1 for text inputs
assert len(complete_data['enhanced_events']) > 0
assert complete_data['quality_score'] > 0.7 # High quality workflow
# Test that enhanced events preserve all metadata
first_enhanced_event = complete_data['enhanced_events'][0]
assert 'element_type' in first_enhanced_event
assert 'interaction_confidence' in first_enhanced_event
assert first_enhanced_event['element_type'] == 'button'
# Verify file size indicates rich data
file_size = os.path.getsize(json_path)
assert file_size > 2000, f"Complete workflow should generate substantial data: {file_size} bytes"
print("✓ End-to-end integration tests completed successfully\n")
def main():
"""Run all real functionality tests with comprehensive error handling"""
print("Enhanced Workflow Naming System Tests - Real Functionality")
print("=" * 65)
print(f"Running on: {platform.system()} {platform.release()}")
print(f"Python: {sys.version}")
print(f"User: {os.getenv('USER', 'unknown')}")
print(f"Hostname: {socket.gethostname()}")
print("=" * 65)
test_results = {
'workflow_namer_real_data': False,
'enhanced_session_persistence': False,
'real_workflow_analysis': False,
'real_ui_validation': False,
'end_to_end_integration': False
}
try:
# Run each test with individual error handling
try:
test_workflow_namer_with_real_data()
test_results['workflow_namer_real_data'] = True
except Exception as e:
print(f"✗ WorkflowNamer real data test failed: {e}")
import traceback
traceback.print_exc()
try:
test_enhanced_session_with_real_persistence()
test_results['enhanced_session_persistence'] = True
except Exception as e:
print(f"✗ EnhancedRawSession persistence test failed: {e}")
import traceback
traceback.print_exc()
try:
test_real_workflow_analysis()
test_results['real_workflow_analysis'] = True
except Exception as e:
print(f"✗ Real workflow analysis test failed: {e}")
import traceback
traceback.print_exc()
try:
test_real_ui_validation()
test_results['real_ui_validation'] = True
except Exception as e:
print(f"✗ Real UI validation test failed: {e}")
import traceback
traceback.print_exc()
try:
test_end_to_end_integration()
test_results['end_to_end_integration'] = True
except Exception as e:
print(f"✗ End-to-end integration test failed: {e}")
import traceback
traceback.print_exc()
# Report comprehensive results
print("=" * 65)
print("Real Functionality Test Results:")
print("=" * 65)
passed_tests = sum(test_results.values())
total_tests = len(test_results)
for test_name, passed in test_results.items():
status = "✓ PASSED" if passed else "✗ FAILED"
print(f" {test_name.replace('_', ' ').title()}: {status}")
print(f"\nOverall: {passed_tests}/{total_tests} tests passed ({passed_tests/total_tests*100:.1f}%)")
if passed_tests == total_tests:
print("\n🎉 All real functionality tests completed successfully!")
print("\nKey Achievements:")
print(" ✓ Real file system operations tested")
print(" ✓ Actual component integration validated")
print(" ✓ Authentic workflow scenarios processed")
print(" ✓ Real business logic verified")
print(" ✓ Production-like data flows tested")
return 0
else:
print(f"\n{total_tests - passed_tests} test(s) failed")
print("Some real functionality tests need attention")
return 1
except Exception as e:
print(f"✗ Test suite failed with critical error: {e}")
import traceback
traceback.print_exc()
return 1
if __name__ == "__main__":
sys.exit(main())