Files
rpa_vision_v3/examples/test_embedding_pipeline.py
Dom a27b74cf22 v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution
- Frontend v4 accessible sur réseau local (192.168.1.40)
- Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard)
- Ollama GPU fonctionnel
- Self-healing interactif
- Dashboard confiance

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-29 11:23:51 +01:00

312 lines
11 KiB
Python
Executable File

#!/usr/bin/env python3
"""Test du pipeline complet d'embedding avec CLIP."""
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import numpy as np
from PIL import Image
import logging
from pathlib import Path
from core.models.screen_state import ScreenState
from core.models.ui_element import UIElement, UIElementEmbeddings, VisualFeatures
from core.embedding.state_embedding_builder import StateEmbeddingBuilder
from core.embedding.clip_embedder import create_clip_embedder
# Configuration du logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def create_test_screen_state() -> ScreenState:
"""Crée un ScreenState de test avec des éléments UI."""
# Créer quelques éléments UI de test
elements = [
UIElement(
element_id="login_btn",
type="button",
role="primary_action",
bbox=(50, 50, 150, 90),
center=(100, 70),
label="Login",
label_confidence=0.95,
embeddings=UIElementEmbeddings(),
visual_features=VisualFeatures(
dominant_color="#0066cc",
has_icon=False,
shape="rounded_rectangle",
size_category="medium"
),
confidence=0.9
),
UIElement(
element_id="username_field",
type="text_input",
role="form_input",
bbox=(50, 120, 300, 150),
center=(175, 135),
label="Username",
label_confidence=0.90,
embeddings=UIElementEmbeddings(),
visual_features=VisualFeatures(
dominant_color="#ffffff",
has_icon=False,
shape="rectangle",
size_category="large"
),
confidence=0.85
),
UIElement(
element_id="password_field",
type="text_input",
role="form_input",
bbox=(50, 160, 300, 190),
center=(175, 175),
label="Password",
label_confidence=0.92,
embeddings=UIElementEmbeddings(),
visual_features=VisualFeatures(
dominant_color="#ffffff",
has_icon=False,
shape="rectangle",
size_category="large"
),
confidence=0.88
),
UIElement(
element_id="nav_menu",
type="menu_item",
role="navigation",
bbox=(50, 200, 350, 240),
center=(200, 220),
label="Navigation",
label_confidence=0.88,
embeddings=UIElementEmbeddings(),
visual_features=VisualFeatures(
dominant_color="#f0f0f0",
has_icon=True,
shape="rectangle",
size_category="large"
),
tags=["menu", "navigation"],
confidence=0.92
)
]
# Utiliser un screenshot existant ou créer un chemin
screenshot_path = None
test_images = ["test_screenshot.png", "real_world_screenshot.png", "synthetic_ui.png"]
for img_name in test_images:
img_path = os.path.join(os.path.dirname(__file__), img_name)
if os.path.exists(img_path):
screenshot_path = img_path
break
# Créer le ScreenState
screen_state = ScreenState(
timestamp=1700000000.0,
window_title="Test Application - Login Page",
screenshot_path=screenshot_path,
ui_elements=elements,
screen_size=(800, 600)
)
return screen_state
def test_embedding_pipeline():
"""Test complet du pipeline d'embedding."""
print("=" * 70)
print(" Test Pipeline Embedding Complet - RPA Vision V3")
print("=" * 70)
print()
try:
# 1. Créer un ScreenState de test
print("1. Création du ScreenState de test...")
screen_state = create_test_screen_state()
print(f" ✓ ScreenState créé avec {len(screen_state.ui_elements)} éléments")
print(f" ✓ Titre: '{screen_state.window_title}'")
print(f" ✓ Screenshot: {screen_state.screenshot_path}")
print()
# 2. Créer le StateEmbeddingBuilder avec CLIP
print("2. Création du StateEmbeddingBuilder avec CLIP...")
builder = StateEmbeddingBuilder(use_real_embedders=True)
print(f" ✓ Builder créé avec embedder CLIP")
print()
# 3. Générer l'embedding d'état
print("3. Génération de l'embedding d'état...")
state_embedding = builder.build_embedding(screen_state)
print(f" ✓ StateEmbedding généré")
print(f" ✓ ID: {state_embedding.state_id}")
print(f" ✓ Timestamp: {state_embedding.timestamp}")
print(f" ✓ Vecteur fusionné: {state_embedding.fused_vector.shape}")
print(f" ✓ Norme L2: {np.linalg.norm(state_embedding.fused_vector):.3f}")
print()
# 4. Analyser les composants
print("4. Analyse des composants d'embedding...")
components = state_embedding.component_vectors
for component, vector in components.items():
norm = np.linalg.norm(vector)
print(f" {component:>8}: {vector.shape} (norm: {norm:.3f})")
print()
# 5. Test de similarité avec un autre état
print("5. Test de similarité...")
# Créer un état similaire (même titre, éléments légèrement différents)
similar_elements = [
UIElement(
element_id="signin_btn",
type="button",
role="primary_action",
bbox=(60, 60, 160, 100),
center=(110, 80),
label="Sign In",
label_confidence=0.93,
embeddings=UIElementEmbeddings(),
visual_features=VisualFeatures(
dominant_color="#0066cc",
has_icon=False,
shape="rounded_rectangle",
size_category="medium"
),
confidence=0.9
),
UIElement(
element_id="user_field",
type="text_input",
role="form_input",
bbox=(60, 130, 310, 160),
center=(185, 145),
label="User",
label_confidence=0.88,
embeddings=UIElementEmbeddings(),
visual_features=VisualFeatures(
dominant_color="#ffffff",
has_icon=False,
shape="rectangle",
size_category="large"
),
confidence=0.85
)
]
similar_state = ScreenState(
timestamp=1700000060.0,
window_title="Test Application - Sign In Page",
screenshot_path=screen_state.screenshot_path, # Même screenshot
ui_elements=similar_elements,
screen_size=(800, 600)
)
similar_embedding = builder.build_embedding(similar_state)
# Calculer la similarité
similarity = np.dot(state_embedding.fused_vector, similar_embedding.fused_vector)
print(f" Similarité entre états similaires: {similarity:.3f}")
# Créer un état très différent
different_elements = [
UIElement(
element_id="search_box",
type="text_input",
role="search_field",
bbox=(100, 50, 400, 80),
center=(250, 65),
label="Search",
label_confidence=0.91,
embeddings=UIElementEmbeddings(),
visual_features=VisualFeatures(
dominant_color="#ffffff",
has_icon=True,
shape="rounded_rectangle",
size_category="large"
),
confidence=0.9
)
]
different_state = ScreenState(
timestamp=1700000120.0,
window_title="Search Engine - Main Page",
screenshot_path=screen_state.screenshot_path,
ui_elements=different_elements,
screen_size=(1200, 800)
)
different_embedding = builder.build_embedding(different_state)
similarity_diff = np.dot(state_embedding.fused_vector, different_embedding.fused_vector)
print(f" Similarité entre états différents: {similarity_diff:.3f}")
print()
# 6. Test de sauvegarde
print("6. Test de sauvegarde...")
output_dir = Path("test_embeddings")
output_dir.mkdir(exist_ok=True)
saved_path = builder.save_embedding(state_embedding, output_dir)
print(f" ✓ Embedding sauvegardé: {saved_path}")
# Vérifier que les fichiers existent
vector_file = output_dir / f"{state_embedding.state_id}.npy"
metadata_file = output_dir / f"{state_embedding.state_id}_metadata.json"
if vector_file.exists() and metadata_file.exists():
print(f" ✓ Fichiers créés: .npy ({vector_file.stat().st_size} bytes)")
print(f" ✓ Fichiers créés: .json ({metadata_file.stat().st_size} bytes)")
else:
print(f" ❌ Erreur: fichiers manquants")
print()
# 7. Résumé des performances
print("7. Résumé des performances...")
print(f" Dimension des embeddings: {state_embedding.fused_vector.shape[0]}")
print(f" Nombre de composants: {len(state_embedding.component_vectors)}")
print(f" Similarité états similaires: {similarity:.3f}")
print(f" Similarité états différents: {similarity_diff:.3f}")
print()
print("=" * 70)
print("🎉 Test Pipeline Embedding Complet RÉUSSI !")
print("=" * 70)
print()
print("Prochaines étapes:")
print(" 1. ✅ CLIP embedders fonctionnels")
print(" 2. ✅ StateEmbeddingBuilder intégré")
print(" 3. ⏳ Finaliser Phase 2 (tests)")
print(" 4. ⏳ Phase 3.5 (Optimisation Asynchrone)")
print(" 5. ⏳ Phase 4 (Workflow Graphs)")
print()
return True
except Exception as e:
print(f"❌ Erreur lors du test du pipeline: {e}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
success = test_embedding_pipeline()
if not success:
print()
print("=" * 70)
print("❌ Test échoué - Vérifications:")
print(" 1. OpenCLIP est-il installé ? (bash rpa_vision_v3/install_clip.sh)")
print(" 2. PyTorch est-il installé ?")
print(" 3. Les modèles sont-ils téléchargés ?")
print("=" * 70)
exit(1)
exit(0)