#!/usr/bin/env python3 """ Test du CLIP Embedder Ce script teste le chargement et l'utilisation du CLIP embedder. """ import sys from pathlib import Path # Ajouter le répertoire parent au path sys.path.insert(0, str(Path(__file__).parent.parent)) from core.embedding import CLIPEmbedder, create_clip_embedder, get_default_embedder from PIL import Image import numpy as np def test_clip_loading(): """Tester le chargement du modèle CLIP""" print("=" * 80) print("TEST 1: Chargement du modèle CLIP") print("=" * 80) try: embedder = get_default_embedder() print(f"✓ Modèle chargé: {embedder.get_model_name()}") print(f"✓ Dimension: {embedder.get_dimension()}") print(f"✓ Device: {embedder.device}") return embedder except Exception as e: print(f"❌ Erreur de chargement: {e}") return None def test_image_embedding(embedder): """Tester l'embedding d'une image""" print("\n" + "=" * 80) print("TEST 2: Embedding d'image") print("=" * 80) # Charger une image de test test_images = [ "test_ui_screenshot.png", "real_world_screenshot.png", "test_screenshot.png" ] image_path = None for img_name in test_images: path = Path(__file__).parent / img_name if path.exists(): image_path = path break if not image_path: print("⚠ Aucune image de test trouvée") return None try: print(f"📸 Chargement de l'image: {image_path.name}") image = Image.open(image_path) print(f" Taille: {image.size}") print("🔄 Génération de l'embedding...") embedding = embedder.embed_image(image) print(f"✓ Embedding généré:") print(f" Shape: {embedding.shape}") print(f" Type: {embedding.dtype}") print(f" Norm L2: {np.linalg.norm(embedding):.4f}") print(f" Min: {embedding.min():.4f}, Max: {embedding.max():.4f}") print(f" Mean: {embedding.mean():.4f}, Std: {embedding.std():.4f}") # Vérifier la normalisation norm = np.linalg.norm(embedding) if abs(norm - 1.0) < 0.01: print(f"✓ Vecteur normalisé (L2 norm ≈ 1.0)") else: print(f"⚠ Vecteur non normalisé (L2 norm = {norm:.4f})") return embedding except Exception as e: print(f"❌ Erreur d'embedding: {e}") import traceback traceback.print_exc() return None def test_text_embedding(embedder): """Tester l'embedding de texte""" print("\n" + "=" * 80) print("TEST 3: Embedding de texte") print("=" * 80) test_texts = [ "A button to submit a form", "Text input field for username", "Navigation menu with links", "" # Texte vide ] try: for i, text in enumerate(test_texts, 1): print(f"\n{i}. Texte: '{text}'") embedding = embedder.embed_text(text) print(f" Shape: {embedding.shape}") print(f" Norm L2: {np.linalg.norm(embedding):.4f}") if not text.strip(): if np.allclose(embedding, 0): print(f" ✓ Vecteur zéro pour texte vide") else: print(f" ⚠ Vecteur non-zéro pour texte vide") print("\n✓ Tous les embeddings de texte générés") return True except Exception as e: print(f"❌ Erreur d'embedding de texte: {e}") import traceback traceback.print_exc() return False def test_similarity(embedder): """Tester la similarité entre embeddings""" print("\n" + "=" * 80) print("TEST 4: Similarité entre embeddings") print("=" * 80) try: # Textes similaires text1 = "A blue button" text2 = "A button that is blue" text3 = "A red car" print(f"Texte 1: '{text1}'") print(f"Texte 2: '{text2}'") print(f"Texte 3: '{text3}'") emb1 = embedder.embed_text(text1) emb2 = embedder.embed_text(text2) emb3 = embedder.embed_text(text3) # Similarité cosinus (produit scalaire car normalisés) sim_1_2 = np.dot(emb1, emb2) sim_1_3 = np.dot(emb1, emb3) sim_2_3 = np.dot(emb2, emb3) print(f"\nSimilarités (cosinus):") print(f" Texte 1 ↔ Texte 2: {sim_1_2:.4f}") print(f" Texte 1 ↔ Texte 3: {sim_1_3:.4f}") print(f" Texte 2 ↔ Texte 3: {sim_2_3:.4f}") if sim_1_2 > sim_1_3: print(f"✓ Textes similaires plus proches (1-2 > 1-3)") else: print(f"⚠ Similarité inattendue") return True except Exception as e: print(f"❌ Erreur de similarité: {e}") return False def test_batch_processing(embedder): """Tester le traitement par batch""" print("\n" + "=" * 80) print("TEST 5: Traitement par batch") print("=" * 80) try: texts = [ "First text", "Second text", "Third text" ] print(f"📝 Embedding de {len(texts)} textes en batch...") embeddings = embedder.embed_text_batch(texts) print(f"✓ Batch embeddings générés:") print(f" Shape: {embeddings.shape}") print(f" Expected: ({len(texts)}, {embedder.get_dimension()})") if embeddings.shape == (len(texts), embedder.get_dimension()): print(f"✓ Shape correcte") else: print(f"❌ Shape incorrecte") # Vérifier normalisation norms = np.linalg.norm(embeddings, axis=1) print(f" Normes L2: {norms}") if np.allclose(norms, 1.0, atol=0.01): print(f"✓ Tous les vecteurs normalisés") else: print(f"⚠ Certains vecteurs non normalisés") return True except Exception as e: print(f"❌ Erreur de batch processing: {e}") import traceback traceback.print_exc() return False def main(): """Fonction principale""" print("\n🚀 Test du CLIP Embedder\n") # Test 1: Chargement embedder = test_clip_loading() if not embedder: print("\n❌ Échec du chargement, arrêt des tests") return False # Test 2: Image embedding image_emb = test_image_embedding(embedder) # Test 3: Text embedding text_ok = test_text_embedding(embedder) # Test 4: Similarité sim_ok = test_similarity(embedder) # Test 5: Batch processing batch_ok = test_batch_processing(embedder) # Résumé print("\n" + "=" * 80) print("RÉSUMÉ DES TESTS") print("=" * 80) print(f"Chargement: {'✓ PASS' if embedder else '❌ FAIL'}") print(f"Image embedding: {'✓ PASS' if image_emb is not None else '❌ FAIL'}") print(f"Text embedding: {'✓ PASS' if text_ok else '❌ FAIL'}") print(f"Similarité: {'✓ PASS' if sim_ok else '❌ FAIL'}") print(f"Batch processing: {'✓ PASS' if batch_ok else '❌ FAIL'}") print("=" * 80) all_pass = embedder and image_emb is not None and text_ok and sim_ok and batch_ok if all_pass: print("\n🎉 Tous les tests sont passés!") return True else: print("\n⚠ Certains tests ont échoué") return False if __name__ == "__main__": success = main() sys.exit(0 if success else 1)