Initial commit

2026-03-05 00:20:25 +01:00
commit dcd4de9945
1954 changed files with 669380 additions and 0 deletions
--- a/geniusia2/core/ui_element_models.py
+++ b/geniusia2/core/ui_element_models.py
@@ -0,0 +1,827 @@
+"""
+Modèles de données pour la détection d'éléments UI et l'état d'écran enrichi.
+Implémente les structures UIElement et EnrichedScreenState pour le système RPA Vision V2.
+
+Phase 1 - Mode Light: Structures de base avec compatibilité arrière complète.
+"""
+
+from dataclasses import dataclass, field, asdict
+from datetime import datetime
+from typing import List, Dict, Any, Tuple, Optional
+from enum import Enum
+import json
+import hashlib
+import numpy as np
+
+
+class UIElementType(Enum):
+    """Types d'éléments UI supportés."""
+    BUTTON = "button"
+    TEXT_INPUT = "text_input"
+    DROPDOWN = "dropdown"
+    TAB = "tab"
+    CHECKBOX = "checkbox"
+    RADIO_BUTTON = "radio_button"
+    LINK = "link"
+    GENERIC_INTERACTIVE = "generic_interactive"
+
+
+@dataclass
+class VisualData:
+    """Données visuelles d'un élément UI."""
+    screenshot_path: str
+    embedding_provider: str  # ex: "openclip_ViT-B-32"
+    embedding_vector_id: str  # chemin vers le fichier .npy
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convertit en dictionnaire pour sérialisation JSON."""
+        return {
+            "screenshot_path": self.screenshot_path,
+            "embedding": {
+                "provider": self.embedding_provider,
+                "vector_id": self.embedding_vector_id
+            }
+        }
+    
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'VisualData':
+        """Crée une instance depuis un dictionnaire."""
+        if "embedding" in data:
+            # Nouveau format
+            return cls(
+                screenshot_path=data["screenshot_path"],
+                embedding_provider=data["embedding"]["provider"],
+                embedding_vector_id=data["embedding"]["vector_id"]
+            )
+        else:
+            # Format legacy
+            return cls(
+                screenshot_path=data["screenshot_path"],
+                embedding_provider=data.get("embedding_provider", ""),
+                embedding_vector_id=data.get("embedding_vector_id", "")
+            )
+
+
+@dataclass
+class TextData:
+    """Données textuelles d'un élément UI."""
+    raw: str
+    normalized: str
+    embedding_provider: str  # ex: "clip_text"
+    embedding_vector_id: str  # chemin vers le fichier .npy
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convertit en dictionnaire pour sérialisation JSON."""
+        return {
+            "raw": self.raw,
+            "normalized": self.normalized,
+            "embedding": {
+                "provider": self.embedding_provider,
+                "vector_id": self.embedding_vector_id
+            }
+        }
+    
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'TextData':
+        """Crée une instance depuis un dictionnaire."""
+        if "embedding" in data:
+            # Nouveau format
+            return cls(
+                raw=data["raw"],
+                normalized=data["normalized"],
+                embedding_provider=data["embedding"]["provider"],
+                embedding_vector_id=data["embedding"]["vector_id"]
+            )
+        else:
+            # Format legacy
+            return cls(
+                raw=data.get("raw", ""),
+                normalized=data.get("normalized", ""),
+                embedding_provider=data.get("embedding_provider", ""),
+                embedding_vector_id=data.get("embedding_vector_id", "")
+            )
+
+
+@dataclass
+class ElementProperties:
+    """Propriétés d'un élément UI."""
+    is_clickable: bool = False
+    is_focusable: bool = False
+    is_dangerous: bool = False
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convertit en dictionnaire pour sérialisation JSON."""
+        return {
+            "is_clickable": self.is_clickable,
+            "is_focusable": self.is_focusable,
+            "is_dangerous": self.is_dangerous
+        }
+    
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'ElementProperties':
+        """Crée une instance depuis un dictionnaire."""
+        return cls(
+            is_clickable=data.get("is_clickable", False),
+            is_focusable=data.get("is_focusable", False),
+            is_dangerous=data.get("is_dangerous", False)
+        )
+
+
+@dataclass
+class ElementContext:
+    """Contexte d'un élément UI."""
+    app_name: str
+    window_title: str
+    workflow_hint: Optional[str] = None
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convertit en dictionnaire pour sérialisation JSON."""
+        return {
+            "app_name": self.app_name,
+            "window_title": self.window_title,
+            "workflow_hint": self.workflow_hint
+        }
+    
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'ElementContext':
+        """Crée une instance depuis un dictionnaire."""
+        return cls(
+            app_name=data["app_name"],
+            window_title=data["window_title"],
+            workflow_hint=data.get("workflow_hint")
+        )
+
+
+@dataclass
+class UIElement:
+    """
+    Représente un élément d'interface utilisateur détecté.
+    
+    Attributes:
+        element_id: Identifiant stable basé sur hash(app_name + center_bbox + label_normalized)
+        type: Type d'élément (button, text_input, etc.)
+        role: Rôle sémantique (validate_invoice, search_field, etc.)
+        bbox: Bounding box (x1, y1, x2, y2)
+        label: Texte visible de l'élément
+        visual: Données visuelles (screenshot, embedding)
+        text: Données textuelles (raw, normalized, embedding)
+        properties: Propriétés (is_clickable, is_focusable, is_dangerous)
+        context: Contexte (app_name, window_title, workflow_hint)
+        tags: Tags additionnels
+        confidence: Score de confiance de la détection (0.0-1.0)
+        detection_method: Méthode de détection utilisée
+    """
+    element_id: str
+    type: UIElementType
+    role: str
+    bbox: Tuple[int, int, int, int]  # (x1, y1, x2, y2)
+    label: str
+    visual: VisualData
+    text: TextData
+    properties: ElementProperties
+    context: ElementContext
+    tags: List[str] = field(default_factory=list)
+    confidence: float = 1.0
+    detection_method: str = "unknown"
+    
+    @staticmethod
+    def generate_element_id(app_name: str, bbox: Tuple[int, int, int, int], label: str) -> str:
+        """
+        Génère un identifiant stable pour un élément UI.
+        
+        Args:
+            app_name: Nom de l'application
+            bbox: Bounding box (x1, y1, x2, y2)
+            label: Label de l'élément
+            
+        Returns:
+            Identifiant stable basé sur hash
+        """
+        # Calculer le centre de la bbox
+        center_x = (bbox[0] + bbox[2]) // 2
+        center_y = (bbox[1] + bbox[3]) // 2
+        
+        # Normaliser le label (lowercase, strip whitespace)
+        label_normalized = label.lower().strip()
+        
+        # Créer la chaîne à hasher
+        hash_input = f"{app_name}_{center_x}_{center_y}_{label_normalized}"
+        
+        # Générer le hash
+        hash_obj = hashlib.sha256(hash_input.encode('utf-8'))
+        hash_hex = hash_obj.hexdigest()[:16]  # Prendre les 16 premiers caractères
+        
+        return f"el_{hash_hex}"
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convertit en dictionnaire pour sérialisation JSON."""
+        return {
+            "schema_version": "uielement_v1",
+            "element_id": self.element_id,
+            "type": self.type.value,
+            "role": self.role,
+            "bbox": list(self.bbox),
+            "label": self.label,
+            "confidence": float(self.confidence),
+            "detection_method": self.detection_method,
+            "visual": self.visual.to_dict(),
+            "text": self.text.to_dict(),
+            "properties": self.properties.to_dict(),
+            "context": self.context.to_dict(),
+            "tags": self.tags
+        }
+    
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'UIElement':
+        """Crée une instance depuis un dictionnaire."""
+        # Gérer la compatibilité avec différentes versions de schéma
+        schema_version = data.get("schema_version", "uielement_v1")
+        
+        # Parser le type
+        element_type = UIElementType(data["type"])
+        
+        # Reconstruire les sous-structures
+        visual = VisualData.from_dict(data["visual"])
+        text = TextData.from_dict(data["text"])
+        properties = ElementProperties.from_dict(data["properties"])
+        context = ElementContext.from_dict(data["context"])
+        
+        return cls(
+            element_id=data["element_id"],
+            type=element_type,
+            role=data["role"],
+            bbox=tuple(data["bbox"]),
+            label=data["label"],
+            visual=visual,
+            text=text,
+            properties=properties,
+            context=context,
+            tags=data.get("tags", []),
+            confidence=data.get("confidence", 1.0),
+            detection_method=data.get("detection_method", "unknown")
+        )
+    
+    def to_json(self) -> str:
+        """Sérialise en JSON."""
+        return json.dumps(self.to_dict(), indent=2, ensure_ascii=False)
+    
+    @classmethod
+    def from_json(cls, json_str: str) -> 'UIElement':
+        """Désérialise depuis JSON."""
+        data = json.loads(json_str)
+        return cls.from_dict(data)
+
+
+if __name__ == "__main__":
+    # Tests basiques
+    print("Test des modèles UIElement")
+    print("=" * 50)
+    
+    # Test génération d'element_id
+    print("\n1. Test génération d'element_id:")
+    element_id = UIElement.generate_element_id(
+        app_name="test_app",
+        bbox=(100, 200, 300, 250),
+        label="Valider"
+    )
+    print(f"   Element ID: {element_id}")
+    
+    # Test création d'un UIElement
+    print("\n2. Test création UIElement:")
+    element = UIElement(
+        element_id=element_id,
+        type=UIElementType.BUTTON,
+        role="validate_action",
+        bbox=(100, 200, 300, 250),
+        label="Valider",
+        visual=VisualData(
+            screenshot_path="data/elements/el_001.png",
+            embedding_provider="openclip_ViT-B-32",
+            embedding_vector_id="data/embeddings/el_001.npy"
+        ),
+        text=TextData(
+            raw="Valider",
+            normalized="valider",
+            embedding_provider="clip_text",
+            embedding_vector_id="data/embeddings/el_001_text.npy"
+        ),
+        properties=ElementProperties(
+            is_clickable=True,
+            is_focusable=True,
+            is_dangerous=False
+        ),
+        context=ElementContext(
+            app_name="test_app",
+            window_title="Test Window",
+            workflow_hint="WF_test"
+        ),
+        tags=["primary_action"],
+        confidence=0.95,
+        detection_method="heuristic_rectangle"
+    )
+    
+    print(f"   Element ID: {element.element_id}")
+    print(f"   Type: {element.type.value}")
+    print(f"   Role: {element.role}")
+    print(f"   Label: {element.label}")
+    print(f"   Confidence: {element.confidence}")
+    
+    # Test sérialisation
+    print("\n3. Test sérialisation JSON:")
+    json_str = element.to_json()
+    print(f"   JSON length: {len(json_str)} chars")
+    print(f"   Schema version: uielement_v1")
+    
+    # Test désérialisation
+    print("\n4. Test désérialisation:")
+    element_restored = UIElement.from_json(json_str)
+    print(f"   Restored element_id: {element_restored.element_id}")
+    print(f"   Restored type: {element_restored.type.value}")
+    print(f"   Restored label: {element_restored.label}")
+    
+    # Test stabilité de l'ID
+    print("\n5. Test stabilité de l'element_id:")
+    element_id_2 = UIElement.generate_element_id(
+        app_name="test_app",
+        bbox=(100, 200, 300, 250),
+        label="Valider"
+    )
+    print(f"   ID 1: {element_id}")
+    print(f"   ID 2: {element_id_2}")
+    print(f"   IDs identiques: {element_id == element_id_2}")
+    
+    print("\n✓ Tous les tests basiques réussis!")
+
+
+# ============================================================================
+# EnrichedScreenState and related structures
+# ============================================================================
+
+
+@dataclass
+class WindowInfo:
+    """Informations sur la fenêtre active."""
+    app_name: str
+    window_title: str
+    screen_resolution: Tuple[int, int]  # (width, height)
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convertit en dictionnaire pour sérialisation JSON."""
+        return {
+            "app_name": self.app_name,
+            "window_title": self.window_title,
+            "screen_resolution": list(self.screen_resolution)
+        }
+    
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'WindowInfo':
+        """Crée une instance depuis un dictionnaire."""
+        return cls(
+            app_name=data["app_name"],
+            window_title=data["window_title"],
+            screen_resolution=tuple(data["screen_resolution"])
+        )
+
+
+@dataclass
+class RawData:
+    """Données brutes de capture d'écran."""
+    screenshot_path: str
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convertit en dictionnaire pour sérialisation JSON."""
+        return {
+            "screenshot_path": self.screenshot_path
+        }
+    
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'RawData':
+        """Crée une instance depuis un dictionnaire."""
+        return cls(screenshot_path=data["screenshot_path"])
+
+
+@dataclass
+class PerceptionData:
+    """Données de perception (texte détecté, OCR, etc.)."""
+    detected_text: List[str] = field(default_factory=list)
+    ocr_results: Optional[Dict[str, Any]] = None
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convertit en dictionnaire pour sérialisation JSON."""
+        return {
+            "detected_text": self.detected_text,
+            "ocr_results": self.ocr_results
+        }
+    
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'PerceptionData':
+        """Crée une instance depuis un dictionnaire."""
+        return cls(
+            detected_text=data.get("detected_text", []),
+            ocr_results=data.get("ocr_results")
+        )
+
+
+@dataclass
+class ComponentInfo:
+    """Informations sur une composante d'embedding."""
+    provider: str
+    vector_id: str
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convertit en dictionnaire pour sérialisation JSON."""
+        return {
+            "provider": self.provider,
+            "vector_id": self.vector_id
+        }
+    
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'ComponentInfo':
+        """Crée une instance depuis un dictionnaire."""
+        return cls(
+            provider=data["provider"],
+            vector_id=data["vector_id"]
+        )
+
+
+@dataclass
+class EmbeddingComponents:
+    """Composantes individuelles d'un state embedding multi-modal."""
+    image_embedding: Optional[ComponentInfo] = None
+    text_embedding: Optional[ComponentInfo] = None
+    title_embedding: Optional[ComponentInfo] = None
+    ui_embedding: Optional[ComponentInfo] = None
+    context_embedding: Optional[ComponentInfo] = None
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convertit en dictionnaire pour sérialisation JSON."""
+        result = {}
+        if self.image_embedding:
+            result["image_embedding"] = self.image_embedding.to_dict()
+        if self.text_embedding:
+            result["text_embedding"] = self.text_embedding.to_dict()
+        if self.title_embedding:
+            result["title_embedding"] = self.title_embedding.to_dict()
+        if self.ui_embedding:
+            result["ui_embedding"] = self.ui_embedding.to_dict()
+        if self.context_embedding:
+            result["context_embedding"] = self.context_embedding.to_dict()
+        return result
+    
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'EmbeddingComponents':
+        """Crée une instance depuis un dictionnaire."""
+        return cls(
+            image_embedding=ComponentInfo.from_dict(data["image_embedding"]) if "image_embedding" in data else None,
+            text_embedding=ComponentInfo.from_dict(data["text_embedding"]) if "text_embedding" in data else None,
+            title_embedding=ComponentInfo.from_dict(data["title_embedding"]) if "title_embedding" in data else None,
+            ui_embedding=ComponentInfo.from_dict(data["ui_embedding"]) if "ui_embedding" in data else None,
+            context_embedding=ComponentInfo.from_dict(data["context_embedding"]) if "context_embedding" in data else None
+        )
+
+
+@dataclass
+class StateEmbedding:
+    """Embedding d'état unifié (multi-modal ou simple)."""
+    provider: str
+    vector_id: str
+    components: Optional[EmbeddingComponents] = None
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convertit en dictionnaire pour sérialisation JSON."""
+        result = {
+            "provider": self.provider,
+            "vector_id": self.vector_id
+        }
+        if self.components:
+            result["components"] = self.components.to_dict()
+        return result
+    
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'StateEmbedding':
+        """Crée une instance depuis un dictionnaire."""
+        components = None
+        if "components" in data and data["components"]:
+            components = EmbeddingComponents.from_dict(data["components"])
+        
+        return cls(
+            provider=data["provider"],
+            vector_id=data["vector_id"],
+            components=components
+        )
+
+
+@dataclass
+class ContextData:
+    """Données de contexte workflow."""
+    current_workflow_candidate: Optional[str] = None
+    tags: List[str] = field(default_factory=list)
+    metadata: Dict[str, Any] = field(default_factory=dict)
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convertit en dictionnaire pour sérialisation JSON."""
+        return {
+            "current_workflow_candidate": self.current_workflow_candidate,
+            "tags": self.tags,
+            "metadata": self.metadata
+        }
+    
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'ContextData':
+        """Crée une instance depuis un dictionnaire."""
+        return cls(
+            current_workflow_candidate=data.get("current_workflow_candidate"),
+            tags=data.get("tags", []),
+            metadata=data.get("metadata", {})
+        )
+
+
+@dataclass
+class EnrichedScreenState:
+    """
+    ScreenState enrichi avec éléments d'UI et embedding multi-modal.
+    
+    Attributes:
+        screen_state_id: Identifiant unique de l'état d'écran
+        timestamp: Horodatage de la capture
+        session_id: Identifiant de session
+        window: Informations sur la fenêtre
+        raw: Données brutes (screenshot_path)
+        perception: Données de perception (texte détecté)
+        ui_elements: Liste des éléments UI détectés
+        state_embedding: Embedding d'état unifié
+        context: Contexte workflow
+        mode: Mode de traitement ("light", "enriched", "complete")
+        processing_metadata: Métadonnées de traitement (optionnel)
+    """
+    screen_state_id: str
+    timestamp: datetime
+    session_id: str
+    window: WindowInfo
+    raw: RawData
+    perception: PerceptionData
+    ui_elements: List[UIElement]
+    state_embedding: StateEmbedding
+    context: ContextData
+    mode: str = "light"
+    processing_metadata: Optional[Dict[str, Any]] = None
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convertit en dictionnaire pour sérialisation JSON."""
+        result = {
+            "schema_version": "screenstate_v1",
+            "mode": self.mode,
+            "screen_state_id": self.screen_state_id,
+            "timestamp": self.timestamp.isoformat(),
+            "session_id": self.session_id,
+            "window": self.window.to_dict(),
+            "raw": self.raw.to_dict(),
+            "perception": self.perception.to_dict(),
+            "ui_elements": [elem.to_dict() for elem in self.ui_elements],
+            "state_embedding": self.state_embedding.to_dict(),
+            "context": self.context.to_dict()
+        }
+        
+        if self.processing_metadata:
+            result["processing_metadata"] = self.processing_metadata
+        
+        return result
+    
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'EnrichedScreenState':
+        """Crée une instance depuis un dictionnaire."""
+        # Gérer la compatibilité avec différentes versions de schéma
+        schema_version = data.get("schema_version", "screenstate_v1")
+        
+        # Parser le timestamp
+        timestamp = datetime.fromisoformat(data["timestamp"])
+        
+        # Reconstruire les sous-structures
+        window = WindowInfo.from_dict(data["window"])
+        raw = RawData.from_dict(data["raw"])
+        perception = PerceptionData.from_dict(data["perception"])
+        
+        # Reconstruire les UI elements
+        ui_elements = [UIElement.from_dict(elem_data) for elem_data in data.get("ui_elements", [])]
+        
+        # Reconstruire le state embedding
+        state_embedding = StateEmbedding.from_dict(data["state_embedding"])
+        
+        # Reconstruire le contexte
+        context = ContextData.from_dict(data["context"])
+        
+        return cls(
+            screen_state_id=data["screen_state_id"],
+            timestamp=timestamp,
+            session_id=data["session_id"],
+            window=window,
+            raw=raw,
+            perception=perception,
+            ui_elements=ui_elements,
+            state_embedding=state_embedding,
+            context=context,
+            mode=data.get("mode", "light"),
+            processing_metadata=data.get("processing_metadata")
+        )
+    
+    def to_json(self) -> str:
+        """Sérialise en JSON."""
+        return json.dumps(self.to_dict(), indent=2, ensure_ascii=False)
+    
+    @classmethod
+    def from_json(cls, json_str: str) -> 'EnrichedScreenState':
+        """Désérialise depuis JSON."""
+        data = json.loads(json_str)
+        return cls.from_dict(data)
+    
+    @classmethod
+    def create_light_mode(
+        cls,
+        screen_state_id: str,
+        session_id: str,
+        window: WindowInfo,
+        screenshot_path: str,
+        image_embedding_provider: str,
+        image_embedding_vector_id: str
+    ) -> 'EnrichedScreenState':
+        """
+        Crée un EnrichedScreenState en mode light (compatibilité arrière).
+        
+        Args:
+            screen_state_id: ID de l'état d'écran
+            session_id: ID de session
+            window: Informations sur la fenêtre
+            screenshot_path: Chemin vers le screenshot
+            image_embedding_provider: Provider de l'embedding image
+            image_embedding_vector_id: ID du vecteur d'embedding image
+            
+        Returns:
+            EnrichedScreenState en mode light
+        """
+        return cls(
+            screen_state_id=screen_state_id,
+            timestamp=datetime.now(),
+            session_id=session_id,
+            window=window,
+            raw=RawData(screenshot_path=screenshot_path),
+            perception=PerceptionData(detected_text=[]),
+            ui_elements=[],  # Vide en mode light
+            state_embedding=StateEmbedding(
+                provider=image_embedding_provider,
+                vector_id=image_embedding_vector_id,
+                components=None  # Pas de composantes en mode light
+            ),
+            context=ContextData(),
+            mode="light"
+        )
+
+
+# Tests pour EnrichedScreenState
+def test_enriched_screen_state():
+    """Tests basiques pour EnrichedScreenState."""
+    print("\n" + "=" * 50)
+    print("Test des modèles EnrichedScreenState")
+    print("=" * 50)
+    
+    # Test mode light
+    print("\n1. Test création en mode light:")
+    window = WindowInfo(
+        app_name="test_app",
+        window_title="Test Window",
+        screen_resolution=(1920, 1080)
+    )
+    
+    screen_state_light = EnrichedScreenState.create_light_mode(
+        screen_state_id="screen_001",
+        session_id="session_001",
+        window=window,
+        screenshot_path="data/screens/screen_001.png",
+        image_embedding_provider="openclip_ViT-B-32",
+        image_embedding_vector_id="data/embeddings/screen_001.npy"
+    )
+    
+    print(f"   Screen State ID: {screen_state_light.screen_state_id}")
+    print(f"   Mode: {screen_state_light.mode}")
+    print(f"   UI Elements: {len(screen_state_light.ui_elements)}")
+    print(f"   State Embedding Provider: {screen_state_light.state_embedding.provider}")
+    print(f"   Has Components: {screen_state_light.state_embedding.components is not None}")
+    
+    # Test sérialisation mode light
+    print("\n2. Test sérialisation JSON (mode light):")
+    json_str = screen_state_light.to_json()
+    print(f"   JSON length: {len(json_str)} chars")
+    
+    # Test désérialisation mode light
+    print("\n3. Test désérialisation (mode light):")
+    screen_state_restored = EnrichedScreenState.from_json(json_str)
+    print(f"   Restored screen_state_id: {screen_state_restored.screen_state_id}")
+    print(f"   Restored mode: {screen_state_restored.mode}")
+    print(f"   Restored UI elements count: {len(screen_state_restored.ui_elements)}")
+    
+    # Test mode enriched avec éléments
+    print("\n4. Test création en mode enriched:")
+    element = UIElement(
+        element_id="el_test_001",
+        type=UIElementType.BUTTON,
+        role="validate_action",
+        bbox=(100, 200, 300, 250),
+        label="Valider",
+        visual=VisualData(
+            screenshot_path="data/elements/el_001.png",
+            embedding_provider="openclip_ViT-B-32",
+            embedding_vector_id="data/embeddings/el_001.npy"
+        ),
+        text=TextData(
+            raw="Valider",
+            normalized="valider",
+            embedding_provider="clip_text",
+            embedding_vector_id="data/embeddings/el_001_text.npy"
+        ),
+        properties=ElementProperties(is_clickable=True),
+        context=ElementContext(
+            app_name="test_app",
+            window_title="Test Window"
+        ),
+        tags=["primary_action"],
+        confidence=0.95
+    )
+    
+    screen_state_enriched = EnrichedScreenState(
+        screen_state_id="screen_002",
+        timestamp=datetime.now(),
+        session_id="session_001",
+        window=window,
+        raw=RawData(screenshot_path="data/screens/screen_002.png"),
+        perception=PerceptionData(detected_text=["Valider", "Annuler"]),
+        ui_elements=[element],
+        state_embedding=StateEmbedding(
+            provider="openclip_ViT-B-32",
+            vector_id="data/embeddings/screen_002.npy",
+            components=None
+        ),
+        context=ContextData(tags=["test"]),
+        mode="enriched"
+    )
+    
+    print(f"   Screen State ID: {screen_state_enriched.screen_state_id}")
+    print(f"   Mode: {screen_state_enriched.mode}")
+    print(f"   UI Elements: {len(screen_state_enriched.ui_elements)}")
+    print(f"   Detected Text: {screen_state_enriched.perception.detected_text}")
+    
+    # Test mode complete avec composantes
+    print("\n5. Test création en mode complete:")
+    components = EmbeddingComponents(
+        image_embedding=ComponentInfo(
+            provider="openclip_ViT-B-32",
+            vector_id="data/embeddings/screen_003_image.npy"
+        ),
+        text_embedding=ComponentInfo(
+            provider="clip_text",
+            vector_id="data/embeddings/screen_003_text.npy"
+        ),
+        title_embedding=ComponentInfo(
+            provider="clip_text",
+            vector_id="data/embeddings/screen_003_title.npy"
+        )
+    )
+    
+    screen_state_complete = EnrichedScreenState(
+        screen_state_id="screen_003",
+        timestamp=datetime.now(),
+        session_id="session_001",
+        window=window,
+        raw=RawData(screenshot_path="data/screens/screen_003.png"),
+        perception=PerceptionData(detected_text=["Valider", "Annuler"]),
+        ui_elements=[element],
+        state_embedding=StateEmbedding(
+            provider="multimodal_fusion_v1",
+            vector_id="data/embeddings/screen_003_fused.npy",
+            components=components
+        ),
+        context=ContextData(tags=["test"]),
+        mode="complete"
+    )
+    
+    print(f"   Screen State ID: {screen_state_complete.screen_state_id}")
+    print(f"   Mode: {screen_state_complete.mode}")
+    print(f"   State Embedding Provider: {screen_state_complete.state_embedding.provider}")
+    print(f"   Has Components: {screen_state_complete.state_embedding.components is not None}")
+    
+    # Test sérialisation mode complete
+    print("\n6. Test sérialisation JSON (mode complete):")
+    json_str_complete = screen_state_complete.to_json()
+    print(f"   JSON length: {len(json_str_complete)} chars")
+    
+    # Test désérialisation mode complete
+    print("\n7. Test désérialisation (mode complete):")
+    screen_state_complete_restored = EnrichedScreenState.from_json(json_str_complete)
+    print(f"   Restored screen_state_id: {screen_state_complete_restored.screen_state_id}")
+    print(f"   Restored mode: {screen_state_complete_restored.mode}")
+    print(f"   Restored components: {screen_state_complete_restored.state_embedding.components is not None}")
+    
+    print("\n✓ Tous les tests EnrichedScreenState réussis!")
+
+
+if __name__ == "__main__":
+    # Exécuter les tests
+    test_enriched_screen_state()