- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
259 lines
7.5 KiB
Python
259 lines
7.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Test du CLIP Embedder
|
|
|
|
Ce script teste le chargement et l'utilisation du CLIP embedder.
|
|
"""
|
|
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Ajouter le répertoire parent au path
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
from core.embedding import CLIPEmbedder, create_clip_embedder, get_default_embedder
|
|
from PIL import Image
|
|
import numpy as np
|
|
|
|
|
|
def test_clip_loading():
|
|
"""Tester le chargement du modèle CLIP"""
|
|
print("=" * 80)
|
|
print("TEST 1: Chargement du modèle CLIP")
|
|
print("=" * 80)
|
|
|
|
try:
|
|
embedder = get_default_embedder()
|
|
print(f"✓ Modèle chargé: {embedder.get_model_name()}")
|
|
print(f"✓ Dimension: {embedder.get_dimension()}")
|
|
print(f"✓ Device: {embedder.device}")
|
|
return embedder
|
|
except Exception as e:
|
|
print(f"❌ Erreur de chargement: {e}")
|
|
return None
|
|
|
|
|
|
def test_image_embedding(embedder):
|
|
"""Tester l'embedding d'une image"""
|
|
print("\n" + "=" * 80)
|
|
print("TEST 2: Embedding d'image")
|
|
print("=" * 80)
|
|
|
|
# Charger une image de test
|
|
test_images = [
|
|
"test_ui_screenshot.png",
|
|
"real_world_screenshot.png",
|
|
"test_screenshot.png"
|
|
]
|
|
|
|
image_path = None
|
|
for img_name in test_images:
|
|
path = Path(__file__).parent / img_name
|
|
if path.exists():
|
|
image_path = path
|
|
break
|
|
|
|
if not image_path:
|
|
print("⚠ Aucune image de test trouvée")
|
|
return None
|
|
|
|
try:
|
|
print(f"📸 Chargement de l'image: {image_path.name}")
|
|
image = Image.open(image_path)
|
|
print(f" Taille: {image.size}")
|
|
|
|
print("🔄 Génération de l'embedding...")
|
|
embedding = embedder.embed_image(image)
|
|
|
|
print(f"✓ Embedding généré:")
|
|
print(f" Shape: {embedding.shape}")
|
|
print(f" Type: {embedding.dtype}")
|
|
print(f" Norm L2: {np.linalg.norm(embedding):.4f}")
|
|
print(f" Min: {embedding.min():.4f}, Max: {embedding.max():.4f}")
|
|
print(f" Mean: {embedding.mean():.4f}, Std: {embedding.std():.4f}")
|
|
|
|
# Vérifier la normalisation
|
|
norm = np.linalg.norm(embedding)
|
|
if abs(norm - 1.0) < 0.01:
|
|
print(f"✓ Vecteur normalisé (L2 norm ≈ 1.0)")
|
|
else:
|
|
print(f"⚠ Vecteur non normalisé (L2 norm = {norm:.4f})")
|
|
|
|
return embedding
|
|
|
|
except Exception as e:
|
|
print(f"❌ Erreur d'embedding: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return None
|
|
|
|
|
|
def test_text_embedding(embedder):
|
|
"""Tester l'embedding de texte"""
|
|
print("\n" + "=" * 80)
|
|
print("TEST 3: Embedding de texte")
|
|
print("=" * 80)
|
|
|
|
test_texts = [
|
|
"A button to submit a form",
|
|
"Text input field for username",
|
|
"Navigation menu with links",
|
|
"" # Texte vide
|
|
]
|
|
|
|
try:
|
|
for i, text in enumerate(test_texts, 1):
|
|
print(f"\n{i}. Texte: '{text}'")
|
|
embedding = embedder.embed_text(text)
|
|
|
|
print(f" Shape: {embedding.shape}")
|
|
print(f" Norm L2: {np.linalg.norm(embedding):.4f}")
|
|
|
|
if not text.strip():
|
|
if np.allclose(embedding, 0):
|
|
print(f" ✓ Vecteur zéro pour texte vide")
|
|
else:
|
|
print(f" ⚠ Vecteur non-zéro pour texte vide")
|
|
|
|
print("\n✓ Tous les embeddings de texte générés")
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"❌ Erreur d'embedding de texte: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
|
|
def test_similarity(embedder):
|
|
"""Tester la similarité entre embeddings"""
|
|
print("\n" + "=" * 80)
|
|
print("TEST 4: Similarité entre embeddings")
|
|
print("=" * 80)
|
|
|
|
try:
|
|
# Textes similaires
|
|
text1 = "A blue button"
|
|
text2 = "A button that is blue"
|
|
text3 = "A red car"
|
|
|
|
print(f"Texte 1: '{text1}'")
|
|
print(f"Texte 2: '{text2}'")
|
|
print(f"Texte 3: '{text3}'")
|
|
|
|
emb1 = embedder.embed_text(text1)
|
|
emb2 = embedder.embed_text(text2)
|
|
emb3 = embedder.embed_text(text3)
|
|
|
|
# Similarité cosinus (produit scalaire car normalisés)
|
|
sim_1_2 = np.dot(emb1, emb2)
|
|
sim_1_3 = np.dot(emb1, emb3)
|
|
sim_2_3 = np.dot(emb2, emb3)
|
|
|
|
print(f"\nSimilarités (cosinus):")
|
|
print(f" Texte 1 ↔ Texte 2: {sim_1_2:.4f}")
|
|
print(f" Texte 1 ↔ Texte 3: {sim_1_3:.4f}")
|
|
print(f" Texte 2 ↔ Texte 3: {sim_2_3:.4f}")
|
|
|
|
if sim_1_2 > sim_1_3:
|
|
print(f"✓ Textes similaires plus proches (1-2 > 1-3)")
|
|
else:
|
|
print(f"⚠ Similarité inattendue")
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"❌ Erreur de similarité: {e}")
|
|
return False
|
|
|
|
|
|
def test_batch_processing(embedder):
|
|
"""Tester le traitement par batch"""
|
|
print("\n" + "=" * 80)
|
|
print("TEST 5: Traitement par batch")
|
|
print("=" * 80)
|
|
|
|
try:
|
|
texts = [
|
|
"First text",
|
|
"Second text",
|
|
"Third text"
|
|
]
|
|
|
|
print(f"📝 Embedding de {len(texts)} textes en batch...")
|
|
embeddings = embedder.embed_text_batch(texts)
|
|
|
|
print(f"✓ Batch embeddings générés:")
|
|
print(f" Shape: {embeddings.shape}")
|
|
print(f" Expected: ({len(texts)}, {embedder.get_dimension()})")
|
|
|
|
if embeddings.shape == (len(texts), embedder.get_dimension()):
|
|
print(f"✓ Shape correcte")
|
|
else:
|
|
print(f"❌ Shape incorrecte")
|
|
|
|
# Vérifier normalisation
|
|
norms = np.linalg.norm(embeddings, axis=1)
|
|
print(f" Normes L2: {norms}")
|
|
|
|
if np.allclose(norms, 1.0, atol=0.01):
|
|
print(f"✓ Tous les vecteurs normalisés")
|
|
else:
|
|
print(f"⚠ Certains vecteurs non normalisés")
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"❌ Erreur de batch processing: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
|
|
def main():
|
|
"""Fonction principale"""
|
|
print("\n🚀 Test du CLIP Embedder\n")
|
|
|
|
# Test 1: Chargement
|
|
embedder = test_clip_loading()
|
|
if not embedder:
|
|
print("\n❌ Échec du chargement, arrêt des tests")
|
|
return False
|
|
|
|
# Test 2: Image embedding
|
|
image_emb = test_image_embedding(embedder)
|
|
|
|
# Test 3: Text embedding
|
|
text_ok = test_text_embedding(embedder)
|
|
|
|
# Test 4: Similarité
|
|
sim_ok = test_similarity(embedder)
|
|
|
|
# Test 5: Batch processing
|
|
batch_ok = test_batch_processing(embedder)
|
|
|
|
# Résumé
|
|
print("\n" + "=" * 80)
|
|
print("RÉSUMÉ DES TESTS")
|
|
print("=" * 80)
|
|
print(f"Chargement: {'✓ PASS' if embedder else '❌ FAIL'}")
|
|
print(f"Image embedding: {'✓ PASS' if image_emb is not None else '❌ FAIL'}")
|
|
print(f"Text embedding: {'✓ PASS' if text_ok else '❌ FAIL'}")
|
|
print(f"Similarité: {'✓ PASS' if sim_ok else '❌ FAIL'}")
|
|
print(f"Batch processing: {'✓ PASS' if batch_ok else '❌ FAIL'}")
|
|
print("=" * 80)
|
|
|
|
all_pass = embedder and image_emb is not None and text_ok and sim_ok and batch_ok
|
|
|
|
if all_pass:
|
|
print("\n🎉 Tous les tests sont passés!")
|
|
return True
|
|
else:
|
|
print("\n⚠ Certains tests ont échoué")
|
|
return False
|
|
|
|
|
|
if __name__ == "__main__":
|
|
success = main()
|
|
sys.exit(0 if success else 1)
|