Initial commit
This commit is contained in:
827
geniusia2/core/ui_element_models.py
Normal file
827
geniusia2/core/ui_element_models.py
Normal file
@@ -0,0 +1,827 @@
|
||||
"""
|
||||
Modèles de données pour la détection d'éléments UI et l'état d'écran enrichi.
|
||||
Implémente les structures UIElement et EnrichedScreenState pour le système RPA Vision V2.
|
||||
|
||||
Phase 1 - Mode Light: Structures de base avec compatibilité arrière complète.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Any, Tuple, Optional
|
||||
from enum import Enum
|
||||
import json
|
||||
import hashlib
|
||||
import numpy as np
|
||||
|
||||
|
||||
class UIElementType(Enum):
|
||||
"""Types d'éléments UI supportés."""
|
||||
BUTTON = "button"
|
||||
TEXT_INPUT = "text_input"
|
||||
DROPDOWN = "dropdown"
|
||||
TAB = "tab"
|
||||
CHECKBOX = "checkbox"
|
||||
RADIO_BUTTON = "radio_button"
|
||||
LINK = "link"
|
||||
GENERIC_INTERACTIVE = "generic_interactive"
|
||||
|
||||
|
||||
@dataclass
|
||||
class VisualData:
|
||||
"""Données visuelles d'un élément UI."""
|
||||
screenshot_path: str
|
||||
embedding_provider: str # ex: "openclip_ViT-B-32"
|
||||
embedding_vector_id: str # chemin vers le fichier .npy
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convertit en dictionnaire pour sérialisation JSON."""
|
||||
return {
|
||||
"screenshot_path": self.screenshot_path,
|
||||
"embedding": {
|
||||
"provider": self.embedding_provider,
|
||||
"vector_id": self.embedding_vector_id
|
||||
}
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'VisualData':
|
||||
"""Crée une instance depuis un dictionnaire."""
|
||||
if "embedding" in data:
|
||||
# Nouveau format
|
||||
return cls(
|
||||
screenshot_path=data["screenshot_path"],
|
||||
embedding_provider=data["embedding"]["provider"],
|
||||
embedding_vector_id=data["embedding"]["vector_id"]
|
||||
)
|
||||
else:
|
||||
# Format legacy
|
||||
return cls(
|
||||
screenshot_path=data["screenshot_path"],
|
||||
embedding_provider=data.get("embedding_provider", ""),
|
||||
embedding_vector_id=data.get("embedding_vector_id", "")
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class TextData:
|
||||
"""Données textuelles d'un élément UI."""
|
||||
raw: str
|
||||
normalized: str
|
||||
embedding_provider: str # ex: "clip_text"
|
||||
embedding_vector_id: str # chemin vers le fichier .npy
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convertit en dictionnaire pour sérialisation JSON."""
|
||||
return {
|
||||
"raw": self.raw,
|
||||
"normalized": self.normalized,
|
||||
"embedding": {
|
||||
"provider": self.embedding_provider,
|
||||
"vector_id": self.embedding_vector_id
|
||||
}
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'TextData':
|
||||
"""Crée une instance depuis un dictionnaire."""
|
||||
if "embedding" in data:
|
||||
# Nouveau format
|
||||
return cls(
|
||||
raw=data["raw"],
|
||||
normalized=data["normalized"],
|
||||
embedding_provider=data["embedding"]["provider"],
|
||||
embedding_vector_id=data["embedding"]["vector_id"]
|
||||
)
|
||||
else:
|
||||
# Format legacy
|
||||
return cls(
|
||||
raw=data.get("raw", ""),
|
||||
normalized=data.get("normalized", ""),
|
||||
embedding_provider=data.get("embedding_provider", ""),
|
||||
embedding_vector_id=data.get("embedding_vector_id", "")
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ElementProperties:
|
||||
"""Propriétés d'un élément UI."""
|
||||
is_clickable: bool = False
|
||||
is_focusable: bool = False
|
||||
is_dangerous: bool = False
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convertit en dictionnaire pour sérialisation JSON."""
|
||||
return {
|
||||
"is_clickable": self.is_clickable,
|
||||
"is_focusable": self.is_focusable,
|
||||
"is_dangerous": self.is_dangerous
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'ElementProperties':
|
||||
"""Crée une instance depuis un dictionnaire."""
|
||||
return cls(
|
||||
is_clickable=data.get("is_clickable", False),
|
||||
is_focusable=data.get("is_focusable", False),
|
||||
is_dangerous=data.get("is_dangerous", False)
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ElementContext:
|
||||
"""Contexte d'un élément UI."""
|
||||
app_name: str
|
||||
window_title: str
|
||||
workflow_hint: Optional[str] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convertit en dictionnaire pour sérialisation JSON."""
|
||||
return {
|
||||
"app_name": self.app_name,
|
||||
"window_title": self.window_title,
|
||||
"workflow_hint": self.workflow_hint
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'ElementContext':
|
||||
"""Crée une instance depuis un dictionnaire."""
|
||||
return cls(
|
||||
app_name=data["app_name"],
|
||||
window_title=data["window_title"],
|
||||
workflow_hint=data.get("workflow_hint")
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class UIElement:
|
||||
"""
|
||||
Représente un élément d'interface utilisateur détecté.
|
||||
|
||||
Attributes:
|
||||
element_id: Identifiant stable basé sur hash(app_name + center_bbox + label_normalized)
|
||||
type: Type d'élément (button, text_input, etc.)
|
||||
role: Rôle sémantique (validate_invoice, search_field, etc.)
|
||||
bbox: Bounding box (x1, y1, x2, y2)
|
||||
label: Texte visible de l'élément
|
||||
visual: Données visuelles (screenshot, embedding)
|
||||
text: Données textuelles (raw, normalized, embedding)
|
||||
properties: Propriétés (is_clickable, is_focusable, is_dangerous)
|
||||
context: Contexte (app_name, window_title, workflow_hint)
|
||||
tags: Tags additionnels
|
||||
confidence: Score de confiance de la détection (0.0-1.0)
|
||||
detection_method: Méthode de détection utilisée
|
||||
"""
|
||||
element_id: str
|
||||
type: UIElementType
|
||||
role: str
|
||||
bbox: Tuple[int, int, int, int] # (x1, y1, x2, y2)
|
||||
label: str
|
||||
visual: VisualData
|
||||
text: TextData
|
||||
properties: ElementProperties
|
||||
context: ElementContext
|
||||
tags: List[str] = field(default_factory=list)
|
||||
confidence: float = 1.0
|
||||
detection_method: str = "unknown"
|
||||
|
||||
@staticmethod
|
||||
def generate_element_id(app_name: str, bbox: Tuple[int, int, int, int], label: str) -> str:
|
||||
"""
|
||||
Génère un identifiant stable pour un élément UI.
|
||||
|
||||
Args:
|
||||
app_name: Nom de l'application
|
||||
bbox: Bounding box (x1, y1, x2, y2)
|
||||
label: Label de l'élément
|
||||
|
||||
Returns:
|
||||
Identifiant stable basé sur hash
|
||||
"""
|
||||
# Calculer le centre de la bbox
|
||||
center_x = (bbox[0] + bbox[2]) // 2
|
||||
center_y = (bbox[1] + bbox[3]) // 2
|
||||
|
||||
# Normaliser le label (lowercase, strip whitespace)
|
||||
label_normalized = label.lower().strip()
|
||||
|
||||
# Créer la chaîne à hasher
|
||||
hash_input = f"{app_name}_{center_x}_{center_y}_{label_normalized}"
|
||||
|
||||
# Générer le hash
|
||||
hash_obj = hashlib.sha256(hash_input.encode('utf-8'))
|
||||
hash_hex = hash_obj.hexdigest()[:16] # Prendre les 16 premiers caractères
|
||||
|
||||
return f"el_{hash_hex}"
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convertit en dictionnaire pour sérialisation JSON."""
|
||||
return {
|
||||
"schema_version": "uielement_v1",
|
||||
"element_id": self.element_id,
|
||||
"type": self.type.value,
|
||||
"role": self.role,
|
||||
"bbox": list(self.bbox),
|
||||
"label": self.label,
|
||||
"confidence": float(self.confidence),
|
||||
"detection_method": self.detection_method,
|
||||
"visual": self.visual.to_dict(),
|
||||
"text": self.text.to_dict(),
|
||||
"properties": self.properties.to_dict(),
|
||||
"context": self.context.to_dict(),
|
||||
"tags": self.tags
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'UIElement':
|
||||
"""Crée une instance depuis un dictionnaire."""
|
||||
# Gérer la compatibilité avec différentes versions de schéma
|
||||
schema_version = data.get("schema_version", "uielement_v1")
|
||||
|
||||
# Parser le type
|
||||
element_type = UIElementType(data["type"])
|
||||
|
||||
# Reconstruire les sous-structures
|
||||
visual = VisualData.from_dict(data["visual"])
|
||||
text = TextData.from_dict(data["text"])
|
||||
properties = ElementProperties.from_dict(data["properties"])
|
||||
context = ElementContext.from_dict(data["context"])
|
||||
|
||||
return cls(
|
||||
element_id=data["element_id"],
|
||||
type=element_type,
|
||||
role=data["role"],
|
||||
bbox=tuple(data["bbox"]),
|
||||
label=data["label"],
|
||||
visual=visual,
|
||||
text=text,
|
||||
properties=properties,
|
||||
context=context,
|
||||
tags=data.get("tags", []),
|
||||
confidence=data.get("confidence", 1.0),
|
||||
detection_method=data.get("detection_method", "unknown")
|
||||
)
|
||||
|
||||
def to_json(self) -> str:
|
||||
"""Sérialise en JSON."""
|
||||
return json.dumps(self.to_dict(), indent=2, ensure_ascii=False)
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, json_str: str) -> 'UIElement':
|
||||
"""Désérialise depuis JSON."""
|
||||
data = json.loads(json_str)
|
||||
return cls.from_dict(data)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Tests basiques
|
||||
print("Test des modèles UIElement")
|
||||
print("=" * 50)
|
||||
|
||||
# Test génération d'element_id
|
||||
print("\n1. Test génération d'element_id:")
|
||||
element_id = UIElement.generate_element_id(
|
||||
app_name="test_app",
|
||||
bbox=(100, 200, 300, 250),
|
||||
label="Valider"
|
||||
)
|
||||
print(f" Element ID: {element_id}")
|
||||
|
||||
# Test création d'un UIElement
|
||||
print("\n2. Test création UIElement:")
|
||||
element = UIElement(
|
||||
element_id=element_id,
|
||||
type=UIElementType.BUTTON,
|
||||
role="validate_action",
|
||||
bbox=(100, 200, 300, 250),
|
||||
label="Valider",
|
||||
visual=VisualData(
|
||||
screenshot_path="data/elements/el_001.png",
|
||||
embedding_provider="openclip_ViT-B-32",
|
||||
embedding_vector_id="data/embeddings/el_001.npy"
|
||||
),
|
||||
text=TextData(
|
||||
raw="Valider",
|
||||
normalized="valider",
|
||||
embedding_provider="clip_text",
|
||||
embedding_vector_id="data/embeddings/el_001_text.npy"
|
||||
),
|
||||
properties=ElementProperties(
|
||||
is_clickable=True,
|
||||
is_focusable=True,
|
||||
is_dangerous=False
|
||||
),
|
||||
context=ElementContext(
|
||||
app_name="test_app",
|
||||
window_title="Test Window",
|
||||
workflow_hint="WF_test"
|
||||
),
|
||||
tags=["primary_action"],
|
||||
confidence=0.95,
|
||||
detection_method="heuristic_rectangle"
|
||||
)
|
||||
|
||||
print(f" Element ID: {element.element_id}")
|
||||
print(f" Type: {element.type.value}")
|
||||
print(f" Role: {element.role}")
|
||||
print(f" Label: {element.label}")
|
||||
print(f" Confidence: {element.confidence}")
|
||||
|
||||
# Test sérialisation
|
||||
print("\n3. Test sérialisation JSON:")
|
||||
json_str = element.to_json()
|
||||
print(f" JSON length: {len(json_str)} chars")
|
||||
print(f" Schema version: uielement_v1")
|
||||
|
||||
# Test désérialisation
|
||||
print("\n4. Test désérialisation:")
|
||||
element_restored = UIElement.from_json(json_str)
|
||||
print(f" Restored element_id: {element_restored.element_id}")
|
||||
print(f" Restored type: {element_restored.type.value}")
|
||||
print(f" Restored label: {element_restored.label}")
|
||||
|
||||
# Test stabilité de l'ID
|
||||
print("\n5. Test stabilité de l'element_id:")
|
||||
element_id_2 = UIElement.generate_element_id(
|
||||
app_name="test_app",
|
||||
bbox=(100, 200, 300, 250),
|
||||
label="Valider"
|
||||
)
|
||||
print(f" ID 1: {element_id}")
|
||||
print(f" ID 2: {element_id_2}")
|
||||
print(f" IDs identiques: {element_id == element_id_2}")
|
||||
|
||||
print("\n✓ Tous les tests basiques réussis!")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# EnrichedScreenState and related structures
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@dataclass
|
||||
class WindowInfo:
|
||||
"""Informations sur la fenêtre active."""
|
||||
app_name: str
|
||||
window_title: str
|
||||
screen_resolution: Tuple[int, int] # (width, height)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convertit en dictionnaire pour sérialisation JSON."""
|
||||
return {
|
||||
"app_name": self.app_name,
|
||||
"window_title": self.window_title,
|
||||
"screen_resolution": list(self.screen_resolution)
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'WindowInfo':
|
||||
"""Crée une instance depuis un dictionnaire."""
|
||||
return cls(
|
||||
app_name=data["app_name"],
|
||||
window_title=data["window_title"],
|
||||
screen_resolution=tuple(data["screen_resolution"])
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class RawData:
|
||||
"""Données brutes de capture d'écran."""
|
||||
screenshot_path: str
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convertit en dictionnaire pour sérialisation JSON."""
|
||||
return {
|
||||
"screenshot_path": self.screenshot_path
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'RawData':
|
||||
"""Crée une instance depuis un dictionnaire."""
|
||||
return cls(screenshot_path=data["screenshot_path"])
|
||||
|
||||
|
||||
@dataclass
|
||||
class PerceptionData:
|
||||
"""Données de perception (texte détecté, OCR, etc.)."""
|
||||
detected_text: List[str] = field(default_factory=list)
|
||||
ocr_results: Optional[Dict[str, Any]] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convertit en dictionnaire pour sérialisation JSON."""
|
||||
return {
|
||||
"detected_text": self.detected_text,
|
||||
"ocr_results": self.ocr_results
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'PerceptionData':
|
||||
"""Crée une instance depuis un dictionnaire."""
|
||||
return cls(
|
||||
detected_text=data.get("detected_text", []),
|
||||
ocr_results=data.get("ocr_results")
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ComponentInfo:
|
||||
"""Informations sur une composante d'embedding."""
|
||||
provider: str
|
||||
vector_id: str
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convertit en dictionnaire pour sérialisation JSON."""
|
||||
return {
|
||||
"provider": self.provider,
|
||||
"vector_id": self.vector_id
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'ComponentInfo':
|
||||
"""Crée une instance depuis un dictionnaire."""
|
||||
return cls(
|
||||
provider=data["provider"],
|
||||
vector_id=data["vector_id"]
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class EmbeddingComponents:
|
||||
"""Composantes individuelles d'un state embedding multi-modal."""
|
||||
image_embedding: Optional[ComponentInfo] = None
|
||||
text_embedding: Optional[ComponentInfo] = None
|
||||
title_embedding: Optional[ComponentInfo] = None
|
||||
ui_embedding: Optional[ComponentInfo] = None
|
||||
context_embedding: Optional[ComponentInfo] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convertit en dictionnaire pour sérialisation JSON."""
|
||||
result = {}
|
||||
if self.image_embedding:
|
||||
result["image_embedding"] = self.image_embedding.to_dict()
|
||||
if self.text_embedding:
|
||||
result["text_embedding"] = self.text_embedding.to_dict()
|
||||
if self.title_embedding:
|
||||
result["title_embedding"] = self.title_embedding.to_dict()
|
||||
if self.ui_embedding:
|
||||
result["ui_embedding"] = self.ui_embedding.to_dict()
|
||||
if self.context_embedding:
|
||||
result["context_embedding"] = self.context_embedding.to_dict()
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'EmbeddingComponents':
|
||||
"""Crée une instance depuis un dictionnaire."""
|
||||
return cls(
|
||||
image_embedding=ComponentInfo.from_dict(data["image_embedding"]) if "image_embedding" in data else None,
|
||||
text_embedding=ComponentInfo.from_dict(data["text_embedding"]) if "text_embedding" in data else None,
|
||||
title_embedding=ComponentInfo.from_dict(data["title_embedding"]) if "title_embedding" in data else None,
|
||||
ui_embedding=ComponentInfo.from_dict(data["ui_embedding"]) if "ui_embedding" in data else None,
|
||||
context_embedding=ComponentInfo.from_dict(data["context_embedding"]) if "context_embedding" in data else None
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class StateEmbedding:
|
||||
"""Embedding d'état unifié (multi-modal ou simple)."""
|
||||
provider: str
|
||||
vector_id: str
|
||||
components: Optional[EmbeddingComponents] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convertit en dictionnaire pour sérialisation JSON."""
|
||||
result = {
|
||||
"provider": self.provider,
|
||||
"vector_id": self.vector_id
|
||||
}
|
||||
if self.components:
|
||||
result["components"] = self.components.to_dict()
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'StateEmbedding':
|
||||
"""Crée une instance depuis un dictionnaire."""
|
||||
components = None
|
||||
if "components" in data and data["components"]:
|
||||
components = EmbeddingComponents.from_dict(data["components"])
|
||||
|
||||
return cls(
|
||||
provider=data["provider"],
|
||||
vector_id=data["vector_id"],
|
||||
components=components
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ContextData:
|
||||
"""Données de contexte workflow."""
|
||||
current_workflow_candidate: Optional[str] = None
|
||||
tags: List[str] = field(default_factory=list)
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convertit en dictionnaire pour sérialisation JSON."""
|
||||
return {
|
||||
"current_workflow_candidate": self.current_workflow_candidate,
|
||||
"tags": self.tags,
|
||||
"metadata": self.metadata
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'ContextData':
|
||||
"""Crée une instance depuis un dictionnaire."""
|
||||
return cls(
|
||||
current_workflow_candidate=data.get("current_workflow_candidate"),
|
||||
tags=data.get("tags", []),
|
||||
metadata=data.get("metadata", {})
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class EnrichedScreenState:
|
||||
"""
|
||||
ScreenState enrichi avec éléments d'UI et embedding multi-modal.
|
||||
|
||||
Attributes:
|
||||
screen_state_id: Identifiant unique de l'état d'écran
|
||||
timestamp: Horodatage de la capture
|
||||
session_id: Identifiant de session
|
||||
window: Informations sur la fenêtre
|
||||
raw: Données brutes (screenshot_path)
|
||||
perception: Données de perception (texte détecté)
|
||||
ui_elements: Liste des éléments UI détectés
|
||||
state_embedding: Embedding d'état unifié
|
||||
context: Contexte workflow
|
||||
mode: Mode de traitement ("light", "enriched", "complete")
|
||||
processing_metadata: Métadonnées de traitement (optionnel)
|
||||
"""
|
||||
screen_state_id: str
|
||||
timestamp: datetime
|
||||
session_id: str
|
||||
window: WindowInfo
|
||||
raw: RawData
|
||||
perception: PerceptionData
|
||||
ui_elements: List[UIElement]
|
||||
state_embedding: StateEmbedding
|
||||
context: ContextData
|
||||
mode: str = "light"
|
||||
processing_metadata: Optional[Dict[str, Any]] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convertit en dictionnaire pour sérialisation JSON."""
|
||||
result = {
|
||||
"schema_version": "screenstate_v1",
|
||||
"mode": self.mode,
|
||||
"screen_state_id": self.screen_state_id,
|
||||
"timestamp": self.timestamp.isoformat(),
|
||||
"session_id": self.session_id,
|
||||
"window": self.window.to_dict(),
|
||||
"raw": self.raw.to_dict(),
|
||||
"perception": self.perception.to_dict(),
|
||||
"ui_elements": [elem.to_dict() for elem in self.ui_elements],
|
||||
"state_embedding": self.state_embedding.to_dict(),
|
||||
"context": self.context.to_dict()
|
||||
}
|
||||
|
||||
if self.processing_metadata:
|
||||
result["processing_metadata"] = self.processing_metadata
|
||||
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'EnrichedScreenState':
|
||||
"""Crée une instance depuis un dictionnaire."""
|
||||
# Gérer la compatibilité avec différentes versions de schéma
|
||||
schema_version = data.get("schema_version", "screenstate_v1")
|
||||
|
||||
# Parser le timestamp
|
||||
timestamp = datetime.fromisoformat(data["timestamp"])
|
||||
|
||||
# Reconstruire les sous-structures
|
||||
window = WindowInfo.from_dict(data["window"])
|
||||
raw = RawData.from_dict(data["raw"])
|
||||
perception = PerceptionData.from_dict(data["perception"])
|
||||
|
||||
# Reconstruire les UI elements
|
||||
ui_elements = [UIElement.from_dict(elem_data) for elem_data in data.get("ui_elements", [])]
|
||||
|
||||
# Reconstruire le state embedding
|
||||
state_embedding = StateEmbedding.from_dict(data["state_embedding"])
|
||||
|
||||
# Reconstruire le contexte
|
||||
context = ContextData.from_dict(data["context"])
|
||||
|
||||
return cls(
|
||||
screen_state_id=data["screen_state_id"],
|
||||
timestamp=timestamp,
|
||||
session_id=data["session_id"],
|
||||
window=window,
|
||||
raw=raw,
|
||||
perception=perception,
|
||||
ui_elements=ui_elements,
|
||||
state_embedding=state_embedding,
|
||||
context=context,
|
||||
mode=data.get("mode", "light"),
|
||||
processing_metadata=data.get("processing_metadata")
|
||||
)
|
||||
|
||||
def to_json(self) -> str:
|
||||
"""Sérialise en JSON."""
|
||||
return json.dumps(self.to_dict(), indent=2, ensure_ascii=False)
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, json_str: str) -> 'EnrichedScreenState':
|
||||
"""Désérialise depuis JSON."""
|
||||
data = json.loads(json_str)
|
||||
return cls.from_dict(data)
|
||||
|
||||
@classmethod
|
||||
def create_light_mode(
|
||||
cls,
|
||||
screen_state_id: str,
|
||||
session_id: str,
|
||||
window: WindowInfo,
|
||||
screenshot_path: str,
|
||||
image_embedding_provider: str,
|
||||
image_embedding_vector_id: str
|
||||
) -> 'EnrichedScreenState':
|
||||
"""
|
||||
Crée un EnrichedScreenState en mode light (compatibilité arrière).
|
||||
|
||||
Args:
|
||||
screen_state_id: ID de l'état d'écran
|
||||
session_id: ID de session
|
||||
window: Informations sur la fenêtre
|
||||
screenshot_path: Chemin vers le screenshot
|
||||
image_embedding_provider: Provider de l'embedding image
|
||||
image_embedding_vector_id: ID du vecteur d'embedding image
|
||||
|
||||
Returns:
|
||||
EnrichedScreenState en mode light
|
||||
"""
|
||||
return cls(
|
||||
screen_state_id=screen_state_id,
|
||||
timestamp=datetime.now(),
|
||||
session_id=session_id,
|
||||
window=window,
|
||||
raw=RawData(screenshot_path=screenshot_path),
|
||||
perception=PerceptionData(detected_text=[]),
|
||||
ui_elements=[], # Vide en mode light
|
||||
state_embedding=StateEmbedding(
|
||||
provider=image_embedding_provider,
|
||||
vector_id=image_embedding_vector_id,
|
||||
components=None # Pas de composantes en mode light
|
||||
),
|
||||
context=ContextData(),
|
||||
mode="light"
|
||||
)
|
||||
|
||||
|
||||
# Tests pour EnrichedScreenState
|
||||
def test_enriched_screen_state():
|
||||
"""Tests basiques pour EnrichedScreenState."""
|
||||
print("\n" + "=" * 50)
|
||||
print("Test des modèles EnrichedScreenState")
|
||||
print("=" * 50)
|
||||
|
||||
# Test mode light
|
||||
print("\n1. Test création en mode light:")
|
||||
window = WindowInfo(
|
||||
app_name="test_app",
|
||||
window_title="Test Window",
|
||||
screen_resolution=(1920, 1080)
|
||||
)
|
||||
|
||||
screen_state_light = EnrichedScreenState.create_light_mode(
|
||||
screen_state_id="screen_001",
|
||||
session_id="session_001",
|
||||
window=window,
|
||||
screenshot_path="data/screens/screen_001.png",
|
||||
image_embedding_provider="openclip_ViT-B-32",
|
||||
image_embedding_vector_id="data/embeddings/screen_001.npy"
|
||||
)
|
||||
|
||||
print(f" Screen State ID: {screen_state_light.screen_state_id}")
|
||||
print(f" Mode: {screen_state_light.mode}")
|
||||
print(f" UI Elements: {len(screen_state_light.ui_elements)}")
|
||||
print(f" State Embedding Provider: {screen_state_light.state_embedding.provider}")
|
||||
print(f" Has Components: {screen_state_light.state_embedding.components is not None}")
|
||||
|
||||
# Test sérialisation mode light
|
||||
print("\n2. Test sérialisation JSON (mode light):")
|
||||
json_str = screen_state_light.to_json()
|
||||
print(f" JSON length: {len(json_str)} chars")
|
||||
|
||||
# Test désérialisation mode light
|
||||
print("\n3. Test désérialisation (mode light):")
|
||||
screen_state_restored = EnrichedScreenState.from_json(json_str)
|
||||
print(f" Restored screen_state_id: {screen_state_restored.screen_state_id}")
|
||||
print(f" Restored mode: {screen_state_restored.mode}")
|
||||
print(f" Restored UI elements count: {len(screen_state_restored.ui_elements)}")
|
||||
|
||||
# Test mode enriched avec éléments
|
||||
print("\n4. Test création en mode enriched:")
|
||||
element = UIElement(
|
||||
element_id="el_test_001",
|
||||
type=UIElementType.BUTTON,
|
||||
role="validate_action",
|
||||
bbox=(100, 200, 300, 250),
|
||||
label="Valider",
|
||||
visual=VisualData(
|
||||
screenshot_path="data/elements/el_001.png",
|
||||
embedding_provider="openclip_ViT-B-32",
|
||||
embedding_vector_id="data/embeddings/el_001.npy"
|
||||
),
|
||||
text=TextData(
|
||||
raw="Valider",
|
||||
normalized="valider",
|
||||
embedding_provider="clip_text",
|
||||
embedding_vector_id="data/embeddings/el_001_text.npy"
|
||||
),
|
||||
properties=ElementProperties(is_clickable=True),
|
||||
context=ElementContext(
|
||||
app_name="test_app",
|
||||
window_title="Test Window"
|
||||
),
|
||||
tags=["primary_action"],
|
||||
confidence=0.95
|
||||
)
|
||||
|
||||
screen_state_enriched = EnrichedScreenState(
|
||||
screen_state_id="screen_002",
|
||||
timestamp=datetime.now(),
|
||||
session_id="session_001",
|
||||
window=window,
|
||||
raw=RawData(screenshot_path="data/screens/screen_002.png"),
|
||||
perception=PerceptionData(detected_text=["Valider", "Annuler"]),
|
||||
ui_elements=[element],
|
||||
state_embedding=StateEmbedding(
|
||||
provider="openclip_ViT-B-32",
|
||||
vector_id="data/embeddings/screen_002.npy",
|
||||
components=None
|
||||
),
|
||||
context=ContextData(tags=["test"]),
|
||||
mode="enriched"
|
||||
)
|
||||
|
||||
print(f" Screen State ID: {screen_state_enriched.screen_state_id}")
|
||||
print(f" Mode: {screen_state_enriched.mode}")
|
||||
print(f" UI Elements: {len(screen_state_enriched.ui_elements)}")
|
||||
print(f" Detected Text: {screen_state_enriched.perception.detected_text}")
|
||||
|
||||
# Test mode complete avec composantes
|
||||
print("\n5. Test création en mode complete:")
|
||||
components = EmbeddingComponents(
|
||||
image_embedding=ComponentInfo(
|
||||
provider="openclip_ViT-B-32",
|
||||
vector_id="data/embeddings/screen_003_image.npy"
|
||||
),
|
||||
text_embedding=ComponentInfo(
|
||||
provider="clip_text",
|
||||
vector_id="data/embeddings/screen_003_text.npy"
|
||||
),
|
||||
title_embedding=ComponentInfo(
|
||||
provider="clip_text",
|
||||
vector_id="data/embeddings/screen_003_title.npy"
|
||||
)
|
||||
)
|
||||
|
||||
screen_state_complete = EnrichedScreenState(
|
||||
screen_state_id="screen_003",
|
||||
timestamp=datetime.now(),
|
||||
session_id="session_001",
|
||||
window=window,
|
||||
raw=RawData(screenshot_path="data/screens/screen_003.png"),
|
||||
perception=PerceptionData(detected_text=["Valider", "Annuler"]),
|
||||
ui_elements=[element],
|
||||
state_embedding=StateEmbedding(
|
||||
provider="multimodal_fusion_v1",
|
||||
vector_id="data/embeddings/screen_003_fused.npy",
|
||||
components=components
|
||||
),
|
||||
context=ContextData(tags=["test"]),
|
||||
mode="complete"
|
||||
)
|
||||
|
||||
print(f" Screen State ID: {screen_state_complete.screen_state_id}")
|
||||
print(f" Mode: {screen_state_complete.mode}")
|
||||
print(f" State Embedding Provider: {screen_state_complete.state_embedding.provider}")
|
||||
print(f" Has Components: {screen_state_complete.state_embedding.components is not None}")
|
||||
|
||||
# Test sérialisation mode complete
|
||||
print("\n6. Test sérialisation JSON (mode complete):")
|
||||
json_str_complete = screen_state_complete.to_json()
|
||||
print(f" JSON length: {len(json_str_complete)} chars")
|
||||
|
||||
# Test désérialisation mode complete
|
||||
print("\n7. Test désérialisation (mode complete):")
|
||||
screen_state_complete_restored = EnrichedScreenState.from_json(json_str_complete)
|
||||
print(f" Restored screen_state_id: {screen_state_complete_restored.screen_state_id}")
|
||||
print(f" Restored mode: {screen_state_complete_restored.mode}")
|
||||
print(f" Restored components: {screen_state_complete_restored.state_embedding.components is not None}")
|
||||
|
||||
print("\n✓ Tous les tests EnrichedScreenState réussis!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Exécuter les tests
|
||||
test_enriched_screen_state()
|
||||
Reference in New Issue
Block a user