Files
rpa_vision_v3/core/models/ui_element.py
Dom a27b74cf22 v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution
- Frontend v4 accessible sur réseau local (192.168.1.40)
- Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard)
- Ollama GPU fonctionnel
- Self-healing interactif
- Dashboard confiance

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-29 11:23:51 +01:00

240 lines
7.5 KiB
Python

"""
UIElement - Couche 2 : Détection Sémantique
Représente un élément d'interface détecté avec :
- Type sémantique (button, text_input, etc.)
- Rôle sémantique (primary_action, cancel, etc.)
- Embeddings duaux (image + texte)
- Features visuelles
Tâche 4 : Contrats de données standardisés avec Pydantic
- BBox : Format exclusif (x, y, width, height)
- IDs : Strings uniquement
- Validation automatique des données
"""
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Tuple, Any
from pathlib import Path
import json
from .base_models import BBox, StandardID, DataConverter
@dataclass
class UIElementEmbeddings:
"""Embeddings duaux pour un élément UI"""
image: Optional[Dict[str, Any]] = None # Embedding de l'image croppée
text: Optional[Dict[str, Any]] = None # Embedding du texte détecté
def to_dict(self) -> Dict[str, Any]:
return {
"image": self.image,
"text": self.text
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'UIElementEmbeddings':
return cls(
image=data.get("image"),
text=data.get("text")
)
@dataclass
class VisualFeatures:
"""Features visuelles d'un élément UI"""
dominant_color: str
has_icon: bool
shape: str # "rectangle", "circle", "rounded_rectangle"
size_category: str # "small", "medium", "large"
def to_dict(self) -> Dict[str, Any]:
return {
"dominant_color": self.dominant_color,
"has_icon": self.has_icon,
"shape": self.shape,
"size_category": self.size_category
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'VisualFeatures':
return cls(
dominant_color=data["dominant_color"],
has_icon=data["has_icon"],
shape=data["shape"],
size_category=data["size_category"]
)
@dataclass
class UIElement:
"""
Élément d'interface détecté avec type et rôle sémantiques
Types supportés:
- button, text_input, checkbox, radio, dropdown
- tab, link, icon, table_row, menu_item
Rôles sémantiques:
- primary_action, cancel, submit, form_input
- search_field, navigation, etc.
Tâche 4 : Contrats standardisés
- element_id : StandardID (string uniquement)
- bbox : BBox standardisée (x, y, width, height)
"""
element_id: str # Migré vers StandardID via DataConverter
type: str # Type sémantique
role: str # Rôle sémantique
bbox: BBox # BBox standardisée (x, y, width, height)
center: Tuple[int, int] # (x, y) - calculé depuis bbox
label: str
label_confidence: float
embeddings: UIElementEmbeddings
visual_features: VisualFeatures
tags: List[str] = field(default_factory=list)
confidence: float = 0.0
metadata: Dict[str, Any] = field(default_factory=dict)
def __post_init__(self):
"""Valider les données après initialisation"""
# Migrer element_id vers StandardID si nécessaire
if not isinstance(self.element_id, str):
self.element_id = str(DataConverter.ensure_id(self.element_id))
# Migrer bbox vers BBox si nécessaire
if not isinstance(self.bbox, BBox):
self.bbox = DataConverter.ensure_bbox(self.bbox)
# Recalculer center depuis bbox si nécessaire
bbox_center = self.bbox.center()
if self.center != bbox_center:
self.center = bbox_center
# Valider confidence entre 0 et 1
if not 0.0 <= self.confidence <= 1.0:
raise ValueError(f"Confidence must be between 0 and 1, got {self.confidence}")
if not 0.0 <= self.label_confidence <= 1.0:
raise ValueError(f"Label confidence must be between 0 and 1, got {self.label_confidence}")
@classmethod
def create_with_bbox_tuple(cls, element_id: str, type: str, role: str,
bbox_tuple: Tuple[int, int, int, int], **kwargs) -> 'UIElement':
"""
Méthode de compatibilité pour créer UIElement avec bbox tuple
Args:
bbox_tuple: (x, y, width, height)
"""
bbox = BBox.from_tuple(bbox_tuple)
center = bbox.center()
return cls(
element_id=element_id,
type=type,
role=role,
bbox=bbox,
center=center,
**kwargs
)
def to_dict(self) -> Dict[str, Any]:
"""Sérialiser en JSON"""
return {
"element_id": self.element_id,
"type": self.type,
"role": self.role,
"bbox": self.bbox.dict(), # BBox Pydantic serialization
"center": list(self.center),
"label": self.label,
"label_confidence": self.label_confidence,
"embeddings": self.embeddings.to_dict(),
"visual_features": self.visual_features.to_dict(),
"tags": self.tags,
"confidence": self.confidence,
"metadata": self.metadata
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'UIElement':
"""Désérialiser depuis JSON avec migration automatique"""
# Migrer les données vers les nouveaux contrats
migrated_data = DataConverter.migrate_bbox_dict(data, ['bbox'])
migrated_data = DataConverter.migrate_id_dict(migrated_data, ['element_id'])
embeddings = UIElementEmbeddings.from_dict(migrated_data["embeddings"])
visual_features = VisualFeatures.from_dict(migrated_data["visual_features"])
# Gérer bbox - peut être dict Pydantic ou tuple legacy
bbox_data = migrated_data["bbox"]
if isinstance(bbox_data, dict):
bbox = BBox(**bbox_data)
else:
bbox = DataConverter.ensure_bbox(bbox_data)
# Gérer center - calculer depuis bbox si nécessaire
center_data = migrated_data.get("center")
if center_data:
center = tuple(center_data)
else:
center = bbox.center()
return cls(
element_id=migrated_data["element_id"],
type=migrated_data["type"],
role=migrated_data["role"],
bbox=bbox,
center=center,
label=migrated_data["label"],
label_confidence=migrated_data["label_confidence"],
embeddings=embeddings,
visual_features=visual_features,
tags=migrated_data.get("tags", []),
confidence=migrated_data.get("confidence", 0.0),
metadata=migrated_data.get("metadata", {})
)
def to_json(self) -> str:
"""Sérialiser en JSON string"""
return json.dumps(self.to_dict(), indent=2)
@classmethod
def from_json(cls, json_str: str) -> 'UIElement':
"""Désérialiser depuis JSON string"""
data = json.loads(json_str)
return cls.from_dict(data)
# Types d'éléments supportés
UI_ELEMENT_TYPES = [
"button",
"text_input",
"checkbox",
"radio",
"dropdown",
"tab",
"link",
"icon",
"table_row",
"menu_item",
"label",
"image",
"container"
]
# Rôles sémantiques supportés
UI_ELEMENT_ROLES = [
"primary_action",
"secondary_action",
"cancel",
"submit",
"form_input",
"search_field",
"navigation",
"data_display",
"selectable_item",
"action_trigger",
"status_indicator",
"delete_action",
"dangerous_action"
]