207 lines
6.8 KiB
Python
207 lines
6.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test du feedback détaillé sur échec de matching.
|
|
"""
|
|
|
|
from geniusia2.core.enhanced_workflow_matcher import EnhancedWorkflowMatcher, MatchDifference
|
|
from geniusia2.core.multimodal_embedding_manager import MultiModalEmbeddingManager
|
|
from geniusia2.core.ui_element_models import (
|
|
UIElement, UIElementType, VisualData, TextData,
|
|
ElementProperties, ElementContext, EnrichedScreenState,
|
|
WindowInfo, RawData, PerceptionData
|
|
)
|
|
from geniusia2.core.logger import Logger
|
|
import numpy as np
|
|
from dataclasses import dataclass
|
|
from typing import List, Optional
|
|
from datetime import datetime
|
|
|
|
print('Test du feedback détaillé sur échec de matching')
|
|
print('='*60)
|
|
|
|
# Créer les composants
|
|
logger = Logger(log_dir='test_logs')
|
|
mm_manager = MultiModalEmbeddingManager(logger=logger, data_dir='test_data')
|
|
matcher = EnhancedWorkflowMatcher(mm_manager, logger=logger)
|
|
|
|
# Définir les structures de workflow
|
|
@dataclass
|
|
class WorkflowStep:
|
|
step_id: int
|
|
action_type: str
|
|
target_description: str
|
|
position: tuple
|
|
window: str
|
|
embedding: Optional[np.ndarray] = None
|
|
|
|
@dataclass
|
|
class Workflow:
|
|
workflow_id: str
|
|
name: str
|
|
steps: List[WorkflowStep]
|
|
repetitions: int
|
|
confidence: float
|
|
|
|
# Scénario 1: Match parfait (pas de feedback)
|
|
print('\n1. Test Match Parfait (pas de feedback):')
|
|
print('-' * 60)
|
|
|
|
button = UIElement(
|
|
element_id='btn_001',
|
|
type=UIElementType.BUTTON,
|
|
role='primary_action',
|
|
bbox=(100, 200, 80, 30),
|
|
label='Submit',
|
|
visual=VisualData('test.png', 'test', 'test.npy'),
|
|
text=TextData('Submit', 'submit', 'test', 'test.npy'),
|
|
properties=ElementProperties(is_clickable=True),
|
|
context=ElementContext('TestApp', 'Test Window'),
|
|
confidence=0.95
|
|
)
|
|
|
|
screen_state = EnrichedScreenState(
|
|
screen_state_id='screen_001',
|
|
timestamp=datetime.now(),
|
|
session_id='session_001',
|
|
window=WindowInfo('TestApp', 'Test Window', True),
|
|
raw=RawData('test.png'),
|
|
perception=PerceptionData(['Extracted text']),
|
|
ui_elements=[button],
|
|
state_embedding=None,
|
|
context=None
|
|
)
|
|
|
|
step1 = WorkflowStep(1, 'click', 'submit button', (140, 215), 'test', np.random.rand(512))
|
|
workflow_perfect = Workflow('wf_perfect', 'Perfect Match', [step1], 5, 0.95)
|
|
|
|
# Simuler un embedding parfait
|
|
current_embedding = np.random.rand(512)
|
|
|
|
match_perfect = matcher._compute_workflow_match(
|
|
screen_state, current_embedding, workflow_perfect
|
|
)
|
|
|
|
print(f'✓ Score composite: {match_perfect.composite_score:.3f}')
|
|
print(f'✓ Confiance: {match_perfect.confidence:.3f}')
|
|
print(f'✓ Différences: {len(match_perfect.differences) if match_perfect.differences else 0}')
|
|
if match_perfect.differences:
|
|
print(f'⚠ Feedback inattendu pour un match parfait!')
|
|
else:
|
|
print(f'✓ Pas de feedback (comme attendu pour un bon match)')
|
|
|
|
# Scénario 2: Match partiel avec éléments manquants
|
|
print('\n2. Test Match Partiel (éléments manquants):')
|
|
print('-' * 60)
|
|
|
|
# Workflow avec 3 steps mais seulement 1 élément détecté
|
|
step1 = WorkflowStep(1, 'type', 'username field', (150, 162), 'test')
|
|
step2 = WorkflowStep(2, 'type', 'password field', (150, 200), 'test')
|
|
step3 = WorkflowStep(3, 'click', 'submit button', (140, 250), 'test')
|
|
workflow_partial = Workflow('wf_partial', 'Partial Match', [step1, step2, step3], 5, 0.8)
|
|
|
|
# Seulement 1 élément détecté
|
|
screen_state_partial = EnrichedScreenState(
|
|
screen_state_id='screen_002',
|
|
timestamp=datetime.now(),
|
|
session_id='session_001',
|
|
window=WindowInfo('TestApp', 'Test Window', True),
|
|
raw=RawData('test.png'),
|
|
perception=PerceptionData(['Extracted text']),
|
|
ui_elements=[button], # Seulement 1 élément au lieu de 3
|
|
state_embedding=None,
|
|
context=None
|
|
)
|
|
|
|
match_partial = matcher._compute_workflow_match(
|
|
screen_state_partial, current_embedding, workflow_partial
|
|
)
|
|
|
|
print(f'✓ Score composite: {match_partial.composite_score:.3f}')
|
|
print(f'✓ Confiance: {match_partial.confidence:.3f}')
|
|
print(f'✓ Différences détectées: {len(match_partial.differences) if match_partial.differences else 0}')
|
|
|
|
if match_partial.differences:
|
|
print(f'\n📋 Feedback détaillé:')
|
|
for i, diff in enumerate(match_partial.differences, 1):
|
|
print(f'\n {i}. [{diff.severity.upper()}] {diff.difference_type}')
|
|
print(f' Description: {diff.description}')
|
|
if diff.expected:
|
|
print(f' Attendu: {diff.expected}')
|
|
if diff.actual:
|
|
print(f' Actuel: {diff.actual}')
|
|
if diff.suggestion:
|
|
print(f' 💡 Suggestion: {diff.suggestion}')
|
|
|
|
# Scénario 3: Test du résumé de feedback
|
|
print('\n3. Test Résumé de Feedback:')
|
|
print('-' * 60)
|
|
|
|
if match_partial.differences:
|
|
summary = match_partial.get_feedback_summary()
|
|
print(summary)
|
|
|
|
# Scénario 4: Match avec faible confiance
|
|
print('\n4. Test Match avec Faible Confiance:')
|
|
print('-' * 60)
|
|
|
|
button_low_conf = UIElement(
|
|
element_id='btn_002',
|
|
type=UIElementType.BUTTON,
|
|
role='primary_action',
|
|
bbox=(100, 200, 80, 30),
|
|
label='Maybe Submit',
|
|
visual=VisualData('test.png', 'test', 'test.npy'),
|
|
text=TextData('Maybe Submit', 'maybe submit', 'test', 'test.npy'),
|
|
properties=ElementProperties(is_clickable=True),
|
|
context=ElementContext('TestApp', 'Test Window'),
|
|
confidence=0.35 # Faible confiance
|
|
)
|
|
|
|
screen_state_low_conf = EnrichedScreenState(
|
|
screen_state_id='screen_003',
|
|
timestamp=datetime.now(),
|
|
session_id='session_001',
|
|
window=WindowInfo('TestApp', 'Test Window', True),
|
|
raw=RawData('test.png'),
|
|
perception=PerceptionData(['Extracted text']),
|
|
ui_elements=[button_low_conf],
|
|
state_embedding=None,
|
|
context=None
|
|
)
|
|
|
|
match_low_conf = matcher._compute_workflow_match(
|
|
screen_state_low_conf, current_embedding, workflow_perfect
|
|
)
|
|
|
|
print(f'✓ Score composite: {match_low_conf.composite_score:.3f}')
|
|
print(f'✓ Confiance: {match_low_conf.confidence:.3f}')
|
|
print(f'✓ Différences détectées: {len(match_low_conf.differences) if match_low_conf.differences else 0}')
|
|
|
|
if match_low_conf.differences:
|
|
print(f'\n📋 Résumé:')
|
|
print(match_low_conf.get_feedback_summary())
|
|
|
|
# Test de la conversion en dictionnaire
|
|
print('\n5. Test Sérialisation JSON:')
|
|
print('-' * 60)
|
|
|
|
match_dict = match_partial.to_dict()
|
|
print(f'✓ Workflow ID: {match_dict["workflow_id"]}')
|
|
print(f'✓ Score composite: {match_dict["composite_score"]:.3f}')
|
|
print(f'✓ Différences incluses: {"differences" in match_dict}')
|
|
if "differences" in match_dict:
|
|
print(f'✓ Nombre de différences: {len(match_dict["differences"])}')
|
|
print(f'✓ Première différence: {match_dict["differences"][0]["type"]}')
|
|
|
|
print('\n' + '='*60)
|
|
print('✅ Tous les tests de feedback détaillé réussis!')
|
|
print('='*60)
|
|
|
|
# Nettoyage
|
|
import shutil
|
|
from pathlib import Path
|
|
if Path('test_data').exists():
|
|
shutil.rmtree('test_data')
|
|
if Path('test_logs').exists():
|
|
shutil.rmtree('test_logs')
|