#!/usr/bin/env python3 """Test du pipeline complet d'embedding avec CLIP.""" import sys import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import numpy as np from PIL import Image import logging from pathlib import Path from core.models.screen_state import ScreenState from core.models.ui_element import UIElement, UIElementEmbeddings, VisualFeatures from core.embedding.state_embedding_builder import StateEmbeddingBuilder from core.embedding.clip_embedder import create_clip_embedder # Configuration du logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def create_test_screen_state() -> ScreenState: """Crée un ScreenState de test avec des éléments UI.""" # Créer quelques éléments UI de test elements = [ UIElement( element_id="login_btn", type="button", role="primary_action", bbox=(50, 50, 150, 90), center=(100, 70), label="Login", label_confidence=0.95, embeddings=UIElementEmbeddings(), visual_features=VisualFeatures( dominant_color="#0066cc", has_icon=False, shape="rounded_rectangle", size_category="medium" ), confidence=0.9 ), UIElement( element_id="username_field", type="text_input", role="form_input", bbox=(50, 120, 300, 150), center=(175, 135), label="Username", label_confidence=0.90, embeddings=UIElementEmbeddings(), visual_features=VisualFeatures( dominant_color="#ffffff", has_icon=False, shape="rectangle", size_category="large" ), confidence=0.85 ), UIElement( element_id="password_field", type="text_input", role="form_input", bbox=(50, 160, 300, 190), center=(175, 175), label="Password", label_confidence=0.92, embeddings=UIElementEmbeddings(), visual_features=VisualFeatures( dominant_color="#ffffff", has_icon=False, shape="rectangle", size_category="large" ), confidence=0.88 ), UIElement( element_id="nav_menu", type="menu_item", role="navigation", bbox=(50, 200, 350, 240), center=(200, 220), label="Navigation", label_confidence=0.88, embeddings=UIElementEmbeddings(), visual_features=VisualFeatures( dominant_color="#f0f0f0", has_icon=True, shape="rectangle", size_category="large" ), tags=["menu", "navigation"], confidence=0.92 ) ] # Utiliser un screenshot existant ou créer un chemin screenshot_path = None test_images = ["test_screenshot.png", "real_world_screenshot.png", "synthetic_ui.png"] for img_name in test_images: img_path = os.path.join(os.path.dirname(__file__), img_name) if os.path.exists(img_path): screenshot_path = img_path break # Créer le ScreenState screen_state = ScreenState( timestamp=1700000000.0, window_title="Test Application - Login Page", screenshot_path=screenshot_path, ui_elements=elements, screen_size=(800, 600) ) return screen_state def test_embedding_pipeline(): """Test complet du pipeline d'embedding.""" print("=" * 70) print(" Test Pipeline Embedding Complet - RPA Vision V3") print("=" * 70) print() try: # 1. Créer un ScreenState de test print("1. Création du ScreenState de test...") screen_state = create_test_screen_state() print(f" ✓ ScreenState créé avec {len(screen_state.ui_elements)} éléments") print(f" ✓ Titre: '{screen_state.window_title}'") print(f" ✓ Screenshot: {screen_state.screenshot_path}") print() # 2. Créer le StateEmbeddingBuilder avec CLIP print("2. Création du StateEmbeddingBuilder avec CLIP...") builder = StateEmbeddingBuilder(use_real_embedders=True) print(f" ✓ Builder créé avec embedder CLIP") print() # 3. Générer l'embedding d'état print("3. Génération de l'embedding d'état...") state_embedding = builder.build_embedding(screen_state) print(f" ✓ StateEmbedding généré") print(f" ✓ ID: {state_embedding.state_id}") print(f" ✓ Timestamp: {state_embedding.timestamp}") print(f" ✓ Vecteur fusionné: {state_embedding.fused_vector.shape}") print(f" ✓ Norme L2: {np.linalg.norm(state_embedding.fused_vector):.3f}") print() # 4. Analyser les composants print("4. Analyse des composants d'embedding...") components = state_embedding.component_vectors for component, vector in components.items(): norm = np.linalg.norm(vector) print(f" {component:>8}: {vector.shape} (norm: {norm:.3f})") print() # 5. Test de similarité avec un autre état print("5. Test de similarité...") # Créer un état similaire (même titre, éléments légèrement différents) similar_elements = [ UIElement( element_id="signin_btn", type="button", role="primary_action", bbox=(60, 60, 160, 100), center=(110, 80), label="Sign In", label_confidence=0.93, embeddings=UIElementEmbeddings(), visual_features=VisualFeatures( dominant_color="#0066cc", has_icon=False, shape="rounded_rectangle", size_category="medium" ), confidence=0.9 ), UIElement( element_id="user_field", type="text_input", role="form_input", bbox=(60, 130, 310, 160), center=(185, 145), label="User", label_confidence=0.88, embeddings=UIElementEmbeddings(), visual_features=VisualFeatures( dominant_color="#ffffff", has_icon=False, shape="rectangle", size_category="large" ), confidence=0.85 ) ] similar_state = ScreenState( timestamp=1700000060.0, window_title="Test Application - Sign In Page", screenshot_path=screen_state.screenshot_path, # Même screenshot ui_elements=similar_elements, screen_size=(800, 600) ) similar_embedding = builder.build_embedding(similar_state) # Calculer la similarité similarity = np.dot(state_embedding.fused_vector, similar_embedding.fused_vector) print(f" Similarité entre états similaires: {similarity:.3f}") # Créer un état très différent different_elements = [ UIElement( element_id="search_box", type="text_input", role="search_field", bbox=(100, 50, 400, 80), center=(250, 65), label="Search", label_confidence=0.91, embeddings=UIElementEmbeddings(), visual_features=VisualFeatures( dominant_color="#ffffff", has_icon=True, shape="rounded_rectangle", size_category="large" ), confidence=0.9 ) ] different_state = ScreenState( timestamp=1700000120.0, window_title="Search Engine - Main Page", screenshot_path=screen_state.screenshot_path, ui_elements=different_elements, screen_size=(1200, 800) ) different_embedding = builder.build_embedding(different_state) similarity_diff = np.dot(state_embedding.fused_vector, different_embedding.fused_vector) print(f" Similarité entre états différents: {similarity_diff:.3f}") print() # 6. Test de sauvegarde print("6. Test de sauvegarde...") output_dir = Path("test_embeddings") output_dir.mkdir(exist_ok=True) saved_path = builder.save_embedding(state_embedding, output_dir) print(f" ✓ Embedding sauvegardé: {saved_path}") # Vérifier que les fichiers existent vector_file = output_dir / f"{state_embedding.state_id}.npy" metadata_file = output_dir / f"{state_embedding.state_id}_metadata.json" if vector_file.exists() and metadata_file.exists(): print(f" ✓ Fichiers créés: .npy ({vector_file.stat().st_size} bytes)") print(f" ✓ Fichiers créés: .json ({metadata_file.stat().st_size} bytes)") else: print(f" ❌ Erreur: fichiers manquants") print() # 7. Résumé des performances print("7. Résumé des performances...") print(f" Dimension des embeddings: {state_embedding.fused_vector.shape[0]}") print(f" Nombre de composants: {len(state_embedding.component_vectors)}") print(f" Similarité états similaires: {similarity:.3f}") print(f" Similarité états différents: {similarity_diff:.3f}") print() print("=" * 70) print("🎉 Test Pipeline Embedding Complet RÉUSSI !") print("=" * 70) print() print("Prochaines étapes:") print(" 1. ✅ CLIP embedders fonctionnels") print(" 2. ✅ StateEmbeddingBuilder intégré") print(" 3. ⏳ Finaliser Phase 2 (tests)") print(" 4. ⏳ Phase 3.5 (Optimisation Asynchrone)") print(" 5. ⏳ Phase 4 (Workflow Graphs)") print() return True except Exception as e: print(f"❌ Erreur lors du test du pipeline: {e}") import traceback traceback.print_exc() return False if __name__ == "__main__": success = test_embedding_pipeline() if not success: print() print("=" * 70) print("❌ Test échoué - Vérifications:") print(" 1. OpenCLIP est-il installé ? (bash rpa_vision_v3/install_clip.sh)") print(" 2. PyTorch est-il installé ?") print(" 3. Les modèles sont-ils téléchargés ?") print("=" * 70) exit(1) exit(0)