v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution

- Frontend v4 accessible sur réseau local (192.168.1.40)
- Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard)
- Ollama GPU fonctionnel
- Self-healing interactif
- Dashboard confiance

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Dom
2026-01-29 11:23:51 +01:00
parent 21bfa3b337
commit a27b74cf22
1595 changed files with 412691 additions and 400 deletions

127
core/models/__init__.py Normal file
View File

@@ -0,0 +1,127 @@
"""
Modèles de données pour les 5 couches d'architecture.
Note:
- Il existe DEUX concepts différents de "contexte de fenêtre" :
- RawWindowContext (couche 0) : la fenêtre active au moment d'un événement brut
- ScreenWindowContext (couche 1) : la fenêtre active pour un état d'écran
Pour la rétrocompatibilité, `WindowContext` pointe toujours vers le contexte de couche 0.
Il est préférable d'importer `RawWindowContext` ou `ScreenWindowContext` explicitement.
Auteur: Dom, Alice Kiro - 15 décembre 2024
"""
from typing import TYPE_CHECKING
# Imports directs pour les types de base (couches 0-2)
from .base_models import BBox, Point, Timestamp, StandardID, DataConverter
from .raw_session import RawSession, Event, Screenshot, RawWindowContext, WindowContext
from .screen_state import ScreenState, RawLevel, PerceptionLevel, ContextLevel, WindowContext as ScreenWindowContext
from .ui_element import UIElement, UIElementEmbeddings, VisualFeatures
# Imports conditionnels pour éviter les cycles
if TYPE_CHECKING:
from .state_embedding import StateEmbedding, EmbeddingComponent
from .workflow_graph import (
Workflow,
WorkflowNode,
WorkflowEdge,
ScreenTemplate,
Action,
TargetSpec,
EdgeConstraints,
PostConditions,
LearningState,
ActionType,
SelectionPolicy,
WindowConstraint,
TextConstraint,
UIConstraint,
EmbeddingPrototype,
EdgeStats,
SafetyRules,
WorkflowStats,
LearningConfig,
)
from .execution_result import (
WorkflowExecutionResult,
PerformanceMetrics,
RecoveryInfo,
StepExecutionStatus
)
# Fonctions de lazy loading pour éviter les imports circulaires
def get_state_embedding():
"""Lazy import pour StateEmbedding"""
from .state_embedding import StateEmbedding
return StateEmbedding
def get_embedding_component():
"""Lazy import pour EmbeddingComponent"""
from .state_embedding import EmbeddingComponent
return EmbeddingComponent
def get_workflow():
"""Lazy import pour Workflow"""
from .workflow_graph import Workflow
return Workflow
def get_workflow_node():
"""Lazy import pour WorkflowNode"""
from .workflow_graph import WorkflowNode
return WorkflowNode
def get_workflow_edge():
"""Lazy import pour WorkflowEdge"""
from .workflow_graph import WorkflowEdge
return WorkflowEdge
def get_action():
"""Lazy import pour Action"""
from .workflow_graph import Action
return Action
def get_target_spec():
"""Lazy import pour TargetSpec"""
from .workflow_graph import TargetSpec
return TargetSpec
def get_execution_result():
"""Lazy import pour WorkflowExecutionResult"""
from .execution_result import WorkflowExecutionResult
return WorkflowExecutionResult
__all__ = [
# Modèles de base standardisés (Tâche 4)
"BBox",
"Point",
"Timestamp",
"StandardID",
"DataConverter",
# Couche 0
"RawSession",
"Event",
"Screenshot",
"RawWindowContext",
"WindowContext",
# Couche 1
"ScreenState",
"RawLevel",
"PerceptionLevel",
"ContextLevel",
"ScreenWindowContext",
# Couche 2
"UIElement",
"UIElementEmbeddings",
"VisualFeatures",
# Fonctions de lazy loading
"get_state_embedding",
"get_embedding_component",
"get_workflow",
"get_workflow_node",
"get_workflow_edge",
"get_action",
"get_target_spec",
"get_execution_result",
]

345
core/models/base_models.py Normal file
View File

@@ -0,0 +1,345 @@
"""
Modèles de base standardisés avec Pydantic - Tâche 4
Contrats de données standardisés pour assurer la cohérence entre tous les composants :
- BBox : Format exclusif (x, y, width, height) avec validation Pydantic
- Timestamp : Objets datetime uniquement
- IDs : Strings uniquement avec validation
Auteur : Dom, Alice Kiro
Date : 20 décembre 2024
"""
from pydantic import BaseModel, Field, validator
from typing import Tuple, Union, Dict, Any, Optional
from datetime import datetime
import uuid
class BBox(BaseModel):
"""
Bounding box standardisée au format (x, y, width, height)
Exigence 4.1 : Format exclusif (x, y, width, height) avec validation Pydantic
"""
x: int = Field(..., ge=0, description="Position X (coin supérieur gauche)")
y: int = Field(..., ge=0, description="Position Y (coin supérieur gauche)")
width: int = Field(..., gt=0, description="Largeur")
height: int = Field(..., gt=0, description="Hauteur")
@validator('x', 'y', pre=True)
def validate_coordinates(cls, v):
"""Valider que les coordonnées sont non-négatives"""
if isinstance(v, (int, float)):
if v < 0:
raise ValueError("Coordinates must be non-negative")
return int(v)
raise ValueError("Coordinates must be numeric")
@validator('width', 'height', pre=True)
def validate_dimensions(cls, v):
"""Valider que les dimensions sont positives"""
if isinstance(v, (int, float)):
if v <= 0:
raise ValueError("Dimensions must be positive")
return int(v)
raise ValueError("Dimensions must be numeric")
def to_tuple(self) -> Tuple[int, int, int, int]:
"""Conversion vers tuple (x, y, w, h)"""
return (self.x, self.y, self.width, self.height)
@classmethod
def from_tuple(cls, bbox_tuple: Tuple[int, int, int, int]) -> 'BBox':
"""Création depuis tuple (x, y, w, h)"""
if len(bbox_tuple) != 4:
raise ValueError("BBox tuple must have exactly 4 elements")
return cls(x=bbox_tuple[0], y=bbox_tuple[1], width=bbox_tuple[2], height=bbox_tuple[3])
@classmethod
def from_xyxy(cls, x1: int, y1: int, x2: int, y2: int) -> 'BBox':
"""Conversion depuis format (x1, y1, x2, y2)"""
return cls(
x=min(x1, x2),
y=min(y1, y2),
width=abs(x2 - x1),
height=abs(y2 - y1)
)
def to_xyxy(self) -> Tuple[int, int, int, int]:
"""Conversion vers format (x1, y1, x2, y2)"""
return (self.x, self.y, self.x + self.width, self.y + self.height)
def center(self) -> Tuple[int, int]:
"""Calculer le centre de la bbox"""
return (self.x + self.width // 2, self.y + self.height // 2)
def area(self) -> int:
"""Calculer l'aire de la bbox"""
return self.width * self.height
def contains_point(self, x: int, y: int) -> bool:
"""Vérifier si un point est dans la bbox"""
return (self.x <= x <= self.x + self.width and
self.y <= y <= self.y + self.height)
def intersects(self, other: 'BBox') -> bool:
"""Vérifier si cette bbox intersecte avec une autre"""
return not (self.x + self.width < other.x or
other.x + other.width < self.x or
self.y + self.height < other.y or
other.y + other.height < self.y)
def intersection(self, other: 'BBox') -> Optional['BBox']:
"""Calculer l'intersection avec une autre bbox"""
if not self.intersects(other):
return None
x1 = max(self.x, other.x)
y1 = max(self.y, other.y)
x2 = min(self.x + self.width, other.x + other.width)
y2 = min(self.y + self.height, other.y + other.height)
return BBox(x=x1, y=y1, width=x2-x1, height=y2-y1)
def union(self, other: 'BBox') -> 'BBox':
"""Calculer l'union avec une autre bbox"""
x1 = min(self.x, other.x)
y1 = min(self.y, other.y)
x2 = max(self.x + self.width, other.x + other.width)
y2 = max(self.y + self.height, other.y + other.height)
return BBox(x=x1, y=y1, width=x2-x1, height=y2-y1)
class Point(BaseModel):
"""
Point 2D standardisé
Représente un point avec coordonnées x, y
"""
x: int = Field(..., description="Coordonnée X")
y: int = Field(..., description="Coordonnée Y")
@validator('x', 'y', pre=True)
def validate_coordinates(cls, v):
"""Valider que les coordonnées sont numériques"""
if isinstance(v, (int, float)):
return int(v)
raise ValueError("Coordinates must be numeric")
def to_tuple(self) -> Tuple[int, int]:
"""Conversion vers tuple (x, y)"""
return (self.x, self.y)
@classmethod
def from_tuple(cls, point_tuple: Tuple[int, int]) -> 'Point':
"""Création depuis tuple (x, y)"""
if len(point_tuple) != 2:
raise ValueError("Point tuple must have exactly 2 elements")
return cls(x=point_tuple[0], y=point_tuple[1])
def distance_to(self, other: 'Point') -> float:
"""Calculer la distance euclidienne vers un autre point"""
return ((self.x - other.x) ** 2 + (self.y - other.y) ** 2) ** 0.5
def is_inside_bbox(self, bbox: BBox) -> bool:
"""Vérifier si ce point est dans une bbox"""
return bbox.contains_point(self.x, self.y)
class Timestamp(BaseModel):
"""
Timestamp standardisé avec datetime
Exigence 4.2 : Objets datetime uniquement avec utilitaires de conversion
"""
value: datetime = Field(default_factory=datetime.now, description="Valeur datetime")
@validator('value', pre=True)
def validate_datetime(cls, v):
"""Valider et convertir vers datetime"""
if isinstance(v, datetime):
return v
elif isinstance(v, str):
try:
return datetime.fromisoformat(v.replace('Z', '+00:00'))
except ValueError:
raise ValueError(f"Cannot parse datetime string: {v}")
elif isinstance(v, (int, float)):
try:
return datetime.fromtimestamp(v)
except (ValueError, OSError):
raise ValueError(f"Cannot convert timestamp to datetime: {v}")
else:
raise ValueError(f"Cannot convert {type(v)} to datetime")
def to_iso(self) -> str:
"""Conversion vers format ISO"""
return self.value.isoformat()
def to_timestamp(self) -> float:
"""Conversion vers timestamp Unix"""
return self.value.timestamp()
@classmethod
def now(cls) -> 'Timestamp':
"""Créer un timestamp pour maintenant"""
return cls(value=datetime.now())
@classmethod
def from_iso(cls, iso_string: str) -> 'Timestamp':
"""Créer depuis string ISO"""
return cls(value=datetime.fromisoformat(iso_string.replace('Z', '+00:00')))
@classmethod
def from_timestamp(cls, timestamp: float) -> 'Timestamp':
"""Créer depuis timestamp Unix"""
return cls(value=datetime.fromtimestamp(timestamp))
class StandardID(BaseModel):
"""
ID standardisé en string
Exigence 4.3 : IDs en strings uniquement avec validation
"""
value: str = Field(..., min_length=1, description="Valeur de l'ID")
@validator('value', pre=True)
def validate_id(cls, v):
"""Valider et convertir vers string"""
if isinstance(v, str):
if not v.strip():
raise ValueError("ID cannot be empty")
return v.strip()
elif isinstance(v, (int, float)):
return str(v)
elif isinstance(v, uuid.UUID):
return str(v)
else:
raise ValueError(f"Cannot convert {type(v)} to ID string")
def __str__(self) -> str:
return self.value
def __eq__(self, other) -> bool:
if isinstance(other, StandardID):
return self.value == other.value
elif isinstance(other, str):
return self.value == other
return False
def __hash__(self) -> int:
return hash(self.value)
@classmethod
def generate(cls) -> 'StandardID':
"""Générer un nouvel ID unique"""
return cls(value=str(uuid.uuid4()))
@classmethod
def from_uuid(cls, uuid_obj: uuid.UUID) -> 'StandardID':
"""Créer depuis UUID"""
return cls(value=str(uuid_obj))
# Utilitaires de conversion pour la migration
class DataConverter:
"""
Utilitaires de conversion sûrs pour la migration vers les nouveaux contrats
Exigence 4.4 : Assurer la compatibilité ascendante pendant la migration
"""
@staticmethod
def ensure_bbox(bbox: Union[BBox, Tuple, list, Dict, Any]) -> BBox:
"""Assurer que bbox est au format BBox standardisé"""
if isinstance(bbox, BBox):
return bbox
elif isinstance(bbox, (tuple, list)) and len(bbox) == 4:
return BBox.from_tuple(tuple(bbox))
elif isinstance(bbox, dict):
if all(k in bbox for k in ['x', 'y', 'width', 'height']):
return BBox(**bbox)
elif all(k in bbox for k in ['x1', 'y1', 'x2', 'y2']):
return BBox.from_xyxy(bbox['x1'], bbox['y1'], bbox['x2'], bbox['y2'])
else:
raise ValueError(f"Cannot convert dict to BBox: missing required keys")
else:
raise ValueError(f"Cannot convert {type(bbox)} to BBox")
@staticmethod
def ensure_timestamp(timestamp: Union[Timestamp, datetime, str, int, float, Any]) -> Timestamp:
"""Assurer que timestamp est un objet Timestamp standardisé"""
if isinstance(timestamp, Timestamp):
return timestamp
else:
return Timestamp(value=timestamp)
@staticmethod
def ensure_id(id_value: Union[StandardID, str, int, float, uuid.UUID, Any]) -> StandardID:
"""Assurer que l'ID est un StandardID"""
if isinstance(id_value, StandardID):
return id_value
else:
return StandardID(value=id_value)
@staticmethod
def migrate_bbox_dict(data: Dict[str, Any], bbox_fields: list = None) -> Dict[str, Any]:
"""Migrer les champs bbox dans un dictionnaire"""
if bbox_fields is None:
bbox_fields = ['bbox', 'bounding_box', 'bounds']
migrated = data.copy()
for field in bbox_fields:
if field in migrated:
try:
bbox = DataConverter.ensure_bbox(migrated[field])
migrated[field] = bbox.dict()
except Exception as e:
# Log l'erreur mais continue la migration
print(f"Warning: Could not migrate bbox field '{field}': {e}")
return migrated
@staticmethod
def migrate_timestamp_dict(data: Dict[str, Any], timestamp_fields: list = None) -> Dict[str, Any]:
"""Migrer les champs timestamp dans un dictionnaire"""
if timestamp_fields is None:
timestamp_fields = ['timestamp', 'created_at', 'updated_at', 'captured_at']
migrated = data.copy()
for field in timestamp_fields:
if field in migrated:
try:
timestamp = DataConverter.ensure_timestamp(migrated[field])
migrated[field] = timestamp.value
except Exception as e:
# Log l'erreur mais continue la migration
print(f"Warning: Could not migrate timestamp field '{field}': {e}")
return migrated
@staticmethod
def migrate_id_dict(data: Dict[str, Any], id_fields: list = None) -> Dict[str, Any]:
"""Migrer les champs ID dans un dictionnaire"""
if id_fields is None:
id_fields = ['id', 'element_id', 'session_id', 'workflow_id', 'node_id', 'edge_id']
migrated = data.copy()
for field in id_fields:
if field in migrated:
try:
id_obj = DataConverter.ensure_id(migrated[field])
migrated[field] = id_obj.value
except Exception as e:
# Log l'erreur mais continue la migration
print(f"Warning: Could not migrate ID field '{field}': {e}")
return migrated
# Aliases pour compatibilité
BaseTimestamp = Timestamp
BaseID = StandardID

View File

@@ -0,0 +1,268 @@
"""
Modèles de résultats d'exécution pour WorkflowPipeline
Auteur: Dom, Alice Kiro - 20 décembre 2024
"""
import uuid
from dataclasses import dataclass, field
from datetime import datetime
from typing import Optional, Dict, Any, List
from enum import Enum
from .screen_state import ScreenState
from .workflow_graph import WorkflowEdge, Action
from ..execution.target_resolver import ResolvedTarget
class StepExecutionStatus(Enum):
"""Statut d'exécution d'étape de workflow"""
SUCCESS = "success"
FAILED = "failed"
NO_MATCH = "no_match"
WORKFLOW_COMPLETE = "workflow_complete"
TARGET_NOT_FOUND = "target_not_found"
POSTCONDITION_FAILED = "postcondition_failed"
EXECUTION_ERROR = "execution_error"
@dataclass
class RecoveryInfo:
"""Informations sur la récupération appliquée"""
strategy: str
message: str
success: bool
attempts: int = 0
duration_ms: float = 0.0
@dataclass
class PerformanceMetrics:
"""Métriques de performance d'exécution"""
total_execution_time_ms: float
state_matching_time_ms: float = 0.0
target_resolution_time_ms: float = 0.0
action_execution_time_ms: float = 0.0
error_handling_time_ms: float = 0.0
@dataclass
class WorkflowExecutionResult:
"""
Résultat complet d'exécution d'étape de workflow
Contient toutes les métadonnées nécessaires pour l'audit,
l'apprentissage, et le debugging.
"""
# Identifiants
execution_id: str = field(default_factory=lambda: str(uuid.uuid4()))
workflow_id: str = ""
correlation_id: str = field(default_factory=lambda: str(uuid.uuid4()))
# Statut d'exécution
status: StepExecutionStatus = StepExecutionStatus.FAILED
success: bool = False
step_type: str = "unknown"
# Contexte d'exécution
current_node: Optional[str] = None
target_node: Optional[str] = None
current_state: Optional[ScreenState] = None
# Action exécutée
action_executed: Optional[Dict[str, Any]] = None
target_resolved: Optional[ResolvedTarget] = None
# Résultats et erreurs
message: str = ""
error: Optional[str] = None
recovery_applied: Optional[RecoveryInfo] = None
# Métriques
performance_metrics: PerformanceMetrics = field(default_factory=lambda: PerformanceMetrics(0.0))
# Métadonnées d'audit
created_at: datetime = field(default_factory=datetime.now)
match_result: Optional[Dict[str, Any]] = None
execution_details: Dict[str, Any] = field(default_factory=dict)
@classmethod
def success(
cls,
execution_id: str,
workflow_id: str,
current_node: str,
target_node: str,
action_executed: Dict[str, Any],
target_resolved: Optional[ResolvedTarget] = None,
match_result: Optional[Dict[str, Any]] = None,
performance_metrics: Optional[PerformanceMetrics] = None
) -> 'WorkflowExecutionResult':
"""Créer un résultat de succès"""
return cls(
execution_id=execution_id,
workflow_id=workflow_id,
status=StepExecutionStatus.SUCCESS,
success=True,
step_type="action_execution",
current_node=current_node,
target_node=target_node,
action_executed=action_executed,
target_resolved=target_resolved,
match_result=match_result,
performance_metrics=performance_metrics or PerformanceMetrics(0.0),
message="Workflow step executed successfully"
)
@classmethod
def no_match(
cls,
execution_id: str,
workflow_id: str,
current_state: ScreenState,
recovery_info: Optional[RecoveryInfo] = None,
performance_metrics: Optional[PerformanceMetrics] = None
) -> 'WorkflowExecutionResult':
"""Créer un résultat d'échec de matching"""
return cls(
execution_id=execution_id,
workflow_id=workflow_id,
status=StepExecutionStatus.NO_MATCH,
success=False,
step_type="state_matching",
current_state=current_state,
recovery_applied=recovery_info,
performance_metrics=performance_metrics or PerformanceMetrics(0.0),
message="No matching state found in workflow",
error="State matching failed"
)
@classmethod
def workflow_complete(
cls,
execution_id: str,
workflow_id: str,
current_node: str,
performance_metrics: Optional[PerformanceMetrics] = None
) -> 'WorkflowExecutionResult':
"""Créer un résultat de workflow terminé"""
return cls(
execution_id=execution_id,
workflow_id=workflow_id,
status=StepExecutionStatus.WORKFLOW_COMPLETE,
success=True,
step_type="workflow_complete",
current_node=current_node,
performance_metrics=performance_metrics or PerformanceMetrics(0.0),
message="Workflow completed - no more actions"
)
@classmethod
def error(
cls,
execution_id: str,
workflow_id: str,
error_message: str,
step_type: str = "execution_error",
current_node: Optional[str] = None,
recovery_info: Optional[RecoveryInfo] = None,
performance_metrics: Optional[PerformanceMetrics] = None
) -> 'WorkflowExecutionResult':
"""Créer un résultat d'erreur"""
return cls(
execution_id=execution_id,
workflow_id=workflow_id,
status=StepExecutionStatus.EXECUTION_ERROR,
success=False,
step_type=step_type,
current_node=current_node,
recovery_applied=recovery_info,
performance_metrics=performance_metrics or PerformanceMetrics(0.0),
message=f"Execution failed: {error_message}",
error=error_message
)
def to_dict(self) -> Dict[str, Any]:
"""Convertir en dictionnaire pour sérialisation"""
result = {
"execution_id": self.execution_id,
"workflow_id": self.workflow_id,
"correlation_id": self.correlation_id,
"status": self.status.value,
"success": self.success,
"step_type": self.step_type,
"message": self.message,
"created_at": self.created_at.isoformat(),
"performance_metrics": {
"total_execution_time_ms": self.performance_metrics.total_execution_time_ms,
"state_matching_time_ms": self.performance_metrics.state_matching_time_ms,
"target_resolution_time_ms": self.performance_metrics.target_resolution_time_ms,
"action_execution_time_ms": self.performance_metrics.action_execution_time_ms,
"error_handling_time_ms": self.performance_metrics.error_handling_time_ms
}
}
# Ajouter les champs optionnels s'ils existent
if self.current_node:
result["current_node"] = self.current_node
if self.target_node:
result["target_node"] = self.target_node
if self.action_executed:
result["action_executed"] = self.action_executed
if self.target_resolved:
# Gérer la sérialisation de bbox qui peut être un objet BBox
bbox_data = self.target_resolved.element.bbox
if hasattr(bbox_data, 'to_tuple'):
# Si c'est un objet BBox avec méthode to_tuple
bbox_serialized = {
"x": bbox_data.x,
"y": bbox_data.y,
"width": bbox_data.width,
"height": bbox_data.height
}
elif isinstance(bbox_data, dict):
bbox_serialized = bbox_data
elif isinstance(bbox_data, (list, tuple)) and len(bbox_data) >= 4:
bbox_serialized = {
"x": bbox_data[0],
"y": bbox_data[1],
"width": bbox_data[2],
"height": bbox_data[3]
}
else:
bbox_serialized = str(bbox_data) # Fallback to string
result["target_resolved"] = {
"element_id": self.target_resolved.element.element_id,
"confidence": self.target_resolved.confidence,
"method": getattr(self.target_resolved, 'method', 'standard'),
"bbox": bbox_serialized
}
if self.error:
result["error"] = str(self.error) # Forcer la conversion en string
if self.recovery_applied:
result["recovery_applied"] = {
"strategy": self.recovery_applied.strategy,
"message": self.recovery_applied.message,
"success": self.recovery_applied.success,
"attempts": self.recovery_applied.attempts,
"duration_ms": self.recovery_applied.duration_ms
}
if self.match_result:
result["match_result"] = self.match_result
if self.execution_details:
result["execution_details"] = self.execution_details
return result
def add_execution_detail(self, key: str, value: Any) -> None:
"""Ajouter un détail d'exécution"""
self.execution_details[key] = value
def set_performance_metric(self, metric_name: str, value: float) -> None:
"""Définir une métrique de performance"""
if hasattr(self.performance_metrics, metric_name):
setattr(self.performance_metrics, metric_name, value)
else:
# Ajouter comme détail d'exécution si pas une métrique standard
self.execution_details[f"metric_{metric_name}"] = value

420
core/models/model_cache.py Normal file
View File

@@ -0,0 +1,420 @@
"""
ModelCache - Cache persistant des modèles ML
Tâche 5.3: Cache des modèles ML pour éviter les rechargements multiples.
Gère le chargement, la mise en cache et l'éviction des modèles ML.
Auteur : Dom, Alice Kiro - 20 décembre 2024
"""
import logging
import time
import threading
from typing import Dict, Any, Optional, Callable, Tuple
from dataclasses import dataclass, field
from pathlib import Path
import weakref
import gc
logger = logging.getLogger(__name__)
@dataclass
class ModelCacheEntry:
"""Entrée du cache de modèles"""
model: Any
load_time: float
last_access: float
access_count: int = 0
memory_size_mb: float = 0.0
model_type: str = "unknown"
def update_access(self):
"""Mettre à jour les stats d'accès"""
self.last_access = time.time()
self.access_count += 1
@dataclass
class ModelCacheConfig:
"""Configuration du cache de modèles"""
max_models: int = 5 # Nombre max de modèles en cache
max_memory_mb: float = 2048.0 # Mémoire max en MB
ttl_seconds: float = 3600.0 # TTL par défaut (1h)
enable_weak_refs: bool = True # Utiliser WeakValueDictionary
auto_cleanup: bool = True # Nettoyage automatique
cleanup_interval: float = 300.0 # Intervalle de nettoyage (5min)
class ModelCache:
"""
Cache persistant des modèles ML avec gestion mémoire intelligente.
Tâche 5.3: Évite les rechargements multiples des modèles coûteux.
Fonctionnalités:
- Cache LRU avec limite de mémoire
- TTL configurable par modèle
- Nettoyage automatique
- Support WeakValueDictionary
- Thread-safe
"""
def __init__(self, config: Optional[ModelCacheConfig] = None):
"""
Initialiser le cache de modèles.
Args:
config: Configuration du cache
"""
self.config = config or ModelCacheConfig()
# Cache principal avec ou sans weak references
if self.config.enable_weak_refs:
self._cache: weakref.WeakValueDictionary = weakref.WeakValueDictionary()
else:
self._cache: Dict[str, ModelCacheEntry] = {}
# Métadonnées du cache (toujours dict normal)
self._metadata: Dict[str, Dict[str, Any]] = {}
# Thread safety
self._lock = threading.RLock()
# Stats
self._stats = {
'hits': 0,
'misses': 0,
'loads': 0,
'evictions': 0,
'cleanups': 0,
'memory_freed_mb': 0.0
}
# Nettoyage automatique
self._cleanup_timer: Optional[threading.Timer] = None
if self.config.auto_cleanup:
self._start_cleanup_timer()
logger.info(f"ModelCache initialized (max_models={self.config.max_models}, "
f"max_memory={self.config.max_memory_mb}MB)")
def get_model(self,
model_key: str,
loader_func: Callable[[], Any],
model_type: str = "unknown",
ttl_seconds: Optional[float] = None) -> Any:
"""
Obtenir un modèle depuis le cache ou le charger.
Args:
model_key: Clé unique du modèle
loader_func: Fonction pour charger le modèle si absent du cache
model_type: Type de modèle (pour logging/stats)
ttl_seconds: TTL spécifique (utilise config par défaut si None)
Returns:
Modèle chargé
"""
with self._lock:
# Vérifier le cache
if model_key in self._cache:
entry = self._cache[model_key]
# Vérifier TTL
ttl = ttl_seconds or self.config.ttl_seconds
if time.time() - entry.load_time < ttl:
entry.update_access()
self._stats['hits'] += 1
logger.debug(f"Model cache hit: {model_key} ({model_type})")
return entry.model
else:
# TTL expiré
logger.debug(f"Model TTL expired: {model_key}")
self._remove_model(model_key)
# Cache miss - charger le modèle
self._stats['misses'] += 1
logger.info(f"Loading model: {model_key} ({model_type})")
start_time = time.time()
try:
model = loader_func()
load_time = time.time() - start_time
# Estimer la taille mémoire (approximation)
memory_size = self._estimate_model_size(model)
# Créer l'entrée de cache
entry = ModelCacheEntry(
model=model,
load_time=time.time(),
last_access=time.time(),
access_count=1,
memory_size_mb=memory_size,
model_type=model_type
)
# Vérifier les limites avant d'ajouter
self._ensure_cache_limits(memory_size)
# Ajouter au cache
self._cache[model_key] = entry
self._metadata[model_key] = {
'ttl_seconds': ttl_seconds or self.config.ttl_seconds,
'model_type': model_type,
'load_time_seconds': load_time
}
self._stats['loads'] += 1
logger.info(f"Model loaded and cached: {model_key} "
f"({memory_size:.1f}MB, {load_time:.2f}s)")
return model
except Exception as e:
logger.error(f"Failed to load model {model_key}: {e}")
raise
def remove_model(self, model_key: str) -> bool:
"""
Supprimer un modèle du cache.
Args:
model_key: Clé du modèle à supprimer
Returns:
True si supprimé, False si non trouvé
"""
with self._lock:
return self._remove_model(model_key)
def _remove_model(self, model_key: str) -> bool:
"""Version interne de remove_model (sans lock)"""
if model_key in self._cache:
entry = self._cache[model_key]
memory_freed = entry.memory_size_mb
del self._cache[model_key]
self._metadata.pop(model_key, None)
self._stats['evictions'] += 1
self._stats['memory_freed_mb'] += memory_freed
logger.debug(f"Model evicted: {model_key} ({memory_freed:.1f}MB freed)")
return True
return False
def _ensure_cache_limits(self, new_model_size_mb: float) -> None:
"""
S'assurer que les limites du cache sont respectées.
Args:
new_model_size_mb: Taille du nouveau modèle à ajouter
"""
current_memory = self.get_memory_usage()
target_memory = current_memory + new_model_size_mb
# Éviction par mémoire
if target_memory > self.config.max_memory_mb:
logger.info(f"Memory limit would be exceeded ({target_memory:.1f}MB > "
f"{self.config.max_memory_mb}MB), evicting models...")
self._evict_lru_models(target_memory - self.config.max_memory_mb)
# Éviction par nombre de modèles
if len(self._cache) >= self.config.max_models:
logger.info(f"Model count limit reached ({len(self._cache)} >= "
f"{self.config.max_models}), evicting oldest...")
self._evict_oldest_model()
def _evict_lru_models(self, memory_to_free_mb: float) -> None:
"""Éviction LRU pour libérer de la mémoire"""
if not self._cache:
return
# Trier par dernier accès (LRU)
models_by_access = sorted(
self._cache.items(),
key=lambda x: x[1].last_access
)
freed_memory = 0.0
for model_key, entry in models_by_access:
if freed_memory >= memory_to_free_mb:
break
freed_memory += entry.memory_size_mb
self._remove_model(model_key)
logger.info(f"LRU eviction freed {freed_memory:.1f}MB")
def _evict_oldest_model(self) -> None:
"""Éviction du modèle le plus ancien"""
if not self._cache:
return
oldest_key = min(self._cache.keys(),
key=lambda k: self._cache[k].load_time)
self._remove_model(oldest_key)
def _estimate_model_size(self, model: Any) -> float:
"""
Estimer la taille mémoire d'un modèle (approximation).
Args:
model: Modèle à analyser
Returns:
Taille estimée en MB
"""
try:
# Pour les modèles PyTorch
if hasattr(model, 'parameters'):
total_params = sum(p.numel() for p in model.parameters())
# Approximation: 4 bytes par paramètre (float32)
return (total_params * 4) / (1024 * 1024)
# Pour les modèles scikit-learn
if hasattr(model, '__sizeof__'):
return model.__sizeof__() / (1024 * 1024)
# Fallback générique
import sys
return sys.getsizeof(model) / (1024 * 1024)
except Exception:
# Estimation par défaut si échec
return 50.0 # 50MB par défaut
def cleanup_expired(self) -> int:
"""
Nettoyer les modèles expirés.
Returns:
Nombre de modèles supprimés
"""
with self._lock:
current_time = time.time()
expired_keys = []
for model_key, entry in self._cache.items():
metadata = self._metadata.get(model_key, {})
ttl = metadata.get('ttl_seconds', self.config.ttl_seconds)
if current_time - entry.load_time > ttl:
expired_keys.append(model_key)
for key in expired_keys:
self._remove_model(key)
if expired_keys:
self._stats['cleanups'] += 1
logger.info(f"Cleanup removed {len(expired_keys)} expired models")
return len(expired_keys)
def _start_cleanup_timer(self) -> None:
"""Démarrer le timer de nettoyage automatique"""
def cleanup_task():
try:
self.cleanup_expired()
# Force garbage collection après nettoyage
gc.collect()
except Exception as e:
logger.error(f"Error in cleanup task: {e}")
finally:
# Reprogrammer le prochain nettoyage
if self.config.auto_cleanup:
self._cleanup_timer = threading.Timer(
self.config.cleanup_interval,
cleanup_task
)
self._cleanup_timer.daemon = True
self._cleanup_timer.start()
self._cleanup_timer = threading.Timer(
self.config.cleanup_interval,
cleanup_task
)
self._cleanup_timer.daemon = True
self._cleanup_timer.start()
def get_memory_usage(self) -> float:
"""
Obtenir l'utilisation mémoire actuelle du cache.
Returns:
Mémoire utilisée en MB
"""
with self._lock:
return sum(entry.memory_size_mb for entry in self._cache.values())
def get_stats(self) -> Dict[str, Any]:
"""Obtenir les statistiques du cache"""
with self._lock:
return {
**self._stats,
'cache_size': len(self._cache),
'memory_usage_mb': self.get_memory_usage(),
'memory_limit_mb': self.config.max_memory_mb,
'model_limit': self.config.max_models
}
def clear(self) -> None:
"""Vider complètement le cache"""
with self._lock:
cache_size = len(self._cache)
memory_freed = self.get_memory_usage()
self._cache.clear()
self._metadata.clear()
self._stats['evictions'] += cache_size
self._stats['memory_freed_mb'] += memory_freed
logger.info(f"Cache cleared: {cache_size} models, {memory_freed:.1f}MB freed")
def shutdown(self) -> None:
"""Arrêter le cache et nettoyer les ressources"""
if self._cleanup_timer:
self._cleanup_timer.cancel()
self._cleanup_timer = None
self.clear()
logger.info("ModelCache shutdown complete")
def __del__(self):
"""Nettoyage automatique à la destruction"""
try:
self.shutdown()
except Exception:
pass
# Instance globale du cache de modèles
_global_model_cache: Optional[ModelCache] = None
def get_global_model_cache() -> ModelCache:
"""
Obtenir l'instance globale du cache de modèles.
Returns:
Instance globale du ModelCache
"""
global _global_model_cache
if _global_model_cache is None:
_global_model_cache = ModelCache()
return _global_model_cache
def set_global_model_cache(cache: ModelCache) -> None:
"""
Définir l'instance globale du cache de modèles.
Args:
cache: Nouvelle instance de ModelCache
"""
global _global_model_cache
if _global_model_cache:
_global_model_cache.shutdown()
_global_model_cache = cache

200
core/models/raw_session.py Normal file
View File

@@ -0,0 +1,200 @@
"""
RawSession - Couche 0 : Capture Brute
Enregistre fidèlement toutes les interactions utilisateur avec horodatage précis
et contexte complet. C'est la fondation du système RPA Vision V3.
"""
from dataclasses import dataclass, field
from datetime import datetime
from typing import Dict, List, Optional, Any
from pathlib import Path
import json
@dataclass
class RawWindowContext:
"""
Contexte de fenêtre pour un événement (RawSession)
Renommé de WindowContext pour éviter collision avec ScreenState.WindowContext
Auteur: Dom, Alice Kiro - 15 décembre 2024
"""
title: str
app_name: str
def to_dict(self) -> Dict[str, str]:
return {
"title": self.title,
"app_name": self.app_name
}
@classmethod
def from_dict(cls, data: Dict[str, str]) -> 'RawWindowContext':
return cls(
title=data["title"],
app_name=data["app_name"]
)
# Alias de compatibilité pour migration douce
WindowContext = RawWindowContext
@dataclass
class Event:
"""
Événement utilisateur capturé
Types supportés:
- mouse_click, mouse_move, mouse_scroll
- key_press, key_release, text_input
- window_change, screen_change
"""
t: float # Timestamp relatif en secondes depuis début session
type: str # Type d'événement
window: RawWindowContext
screenshot_id: Optional[str] = None
data: Dict[str, Any] = field(default_factory=dict) # Données spécifiques au type
def to_dict(self) -> Dict[str, Any]:
result = {
"t": self.t,
"type": self.type,
"window": self.window.to_dict(),
}
if self.screenshot_id:
result["screenshot_id"] = self.screenshot_id
# Ajouter les données spécifiques
result.update(self.data)
return result
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'Event':
# Extraire les champs de base
t = data["t"]
event_type = data["type"]
window = RawWindowContext.from_dict(data["window"])
screenshot_id = data.get("screenshot_id")
# Le reste va dans data
event_data = {k: v for k, v in data.items()
if k not in ["t", "type", "window", "screenshot_id"]}
return cls(
t=t,
type=event_type,
window=window,
screenshot_id=screenshot_id,
data=event_data
)
@dataclass
class Screenshot:
"""Référence à un screenshot capturé"""
screenshot_id: str
relative_path: str
captured_at: str # ISO format timestamp
def to_dict(self) -> Dict[str, str]:
return {
"screenshot_id": self.screenshot_id,
"relative_path": self.relative_path,
"captured_at": self.captured_at
}
@classmethod
def from_dict(cls, data: Dict[str, str]) -> 'Screenshot':
return cls(
screenshot_id=data["screenshot_id"],
relative_path=data["relative_path"],
captured_at=data["captured_at"]
)
@dataclass
class RawSession:
"""
Session brute capturant tous les événements utilisateur
Format: rawsession_v1
"""
session_id: str
agent_version: str
environment: Dict[str, Any]
user: Dict[str, str]
context: Dict[str, str]
started_at: datetime
ended_at: Optional[datetime] = None
events: List[Event] = field(default_factory=list)
screenshots: List[Screenshot] = field(default_factory=list)
schema_version: str = "rawsession_v1"
def add_event(self, event: Event) -> None:
"""Ajouter un événement à la session"""
self.events.append(event)
def add_screenshot(self, screenshot: Screenshot) -> None:
"""Ajouter un screenshot à la session"""
self.screenshots.append(screenshot)
def to_json(self) -> Dict[str, Any]:
"""Sérialiser en JSON"""
return {
"schema_version": self.schema_version,
"session_id": self.session_id,
"agent_version": self.agent_version,
"environment": self.environment,
"user": self.user,
"context": self.context,
"started_at": self.started_at.isoformat(),
"ended_at": self.ended_at.isoformat() if self.ended_at else None,
"events": [event.to_dict() for event in self.events],
"screenshots": [screenshot.to_dict() for screenshot in self.screenshots]
}
@classmethod
def from_json(cls, data: Dict[str, Any]) -> 'RawSession':
"""Désérialiser depuis JSON"""
# Valider schéma
schema_version = data.get("schema_version")
if schema_version != "rawsession_v1":
raise ValueError(
f"Unsupported schema version: {schema_version}. "
f"Expected: rawsession_v1"
)
# Parser dates
started_at = datetime.fromisoformat(data["started_at"])
ended_at = datetime.fromisoformat(data["ended_at"]) if data.get("ended_at") else None
# Parser events et screenshots
events = [Event.from_dict(e) for e in data.get("events", [])]
screenshots = [Screenshot.from_dict(s) for s in data.get("screenshots", [])]
return cls(
schema_version=schema_version,
session_id=data["session_id"],
agent_version=data["agent_version"],
environment=data["environment"],
user=data["user"],
context=data["context"],
started_at=started_at,
ended_at=ended_at,
events=events,
screenshots=screenshots
)
def save_to_file(self, filepath: Path) -> None:
"""Sauvegarder dans un fichier JSON"""
filepath.parent.mkdir(parents=True, exist_ok=True)
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(self.to_json(), f, indent=2, ensure_ascii=False)
@classmethod
def load_from_file(cls, filepath: Path) -> 'RawSession':
"""Charger depuis un fichier JSON"""
with open(filepath, 'r', encoding='utf-8') as f:
data = json.load(f)
return cls.from_json(data)

310
core/models/screen_state.py Normal file
View File

@@ -0,0 +1,310 @@
"""
ScreenState - Couche 1 : Analyse Multi-Modale
Transforme un screenshot brut en représentation structurée à 4 niveaux :
- Niveau 1 : Raw (Ce que la machine voit)
- Niveau 2 : Perception (Ce que la vision déduit)
- Niveau 3 : Sémantique UI (Ce que le système comprend)
- Niveau 4 : Contexte Métier (Session/Application)
Tâche 4 : Contrats de données standardisés
- Timestamps : datetime objects uniquement
- IDs : Strings uniquement
"""
from dataclasses import dataclass, field
from datetime import datetime
from typing import Dict, List, Optional, Any, TYPE_CHECKING
from pathlib import Path
import json
from .base_models import Timestamp, StandardID, DataConverter
if TYPE_CHECKING:
from .ui_element import UIElement
@dataclass
class EmbeddingRef:
"""Référence à un embedding stocké"""
provider: str # e.g., "openclip_ViT-B-32"
vector_id: str # Chemin vers fichier .npy
dimensions: int
def to_dict(self) -> Dict[str, Any]:
return {
"provider": self.provider,
"vector_id": self.vector_id,
"dimensions": self.dimensions
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'EmbeddingRef':
return cls(
provider=data["provider"],
vector_id=data["vector_id"],
dimensions=data["dimensions"]
)
@dataclass
class RawLevel:
"""Niveau 1 : Raw - Ce que la machine voit"""
screenshot_path: str
capture_method: str # e.g., "mss", "pillow"
file_size_bytes: int
def to_dict(self) -> Dict[str, Any]:
return {
"screenshot_path": self.screenshot_path,
"capture_method": self.capture_method,
"file_size_bytes": self.file_size_bytes
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'RawLevel':
return cls(
screenshot_path=data["screenshot_path"],
capture_method=data["capture_method"],
file_size_bytes=data["file_size_bytes"]
)
@dataclass
class PerceptionLevel:
"""Niveau 2 : Perception - Ce que la vision déduit"""
embedding: EmbeddingRef
detected_text: List[str]
text_detection_method: str # e.g., "qwen_vl", "tesseract"
confidence_avg: float
def to_dict(self) -> Dict[str, Any]:
return {
"embedding": self.embedding.to_dict(),
"detected_text": self.detected_text,
"text_detection_method": self.text_detection_method,
"confidence_avg": self.confidence_avg
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'PerceptionLevel':
return cls(
embedding=EmbeddingRef.from_dict(data["embedding"]),
detected_text=data["detected_text"],
text_detection_method=data["text_detection_method"],
confidence_avg=data["confidence_avg"]
)
@dataclass
class ContextLevel:
"""Niveau 4 : Contexte Métier - Session/Application"""
current_workflow_candidate: Optional[str] = None
workflow_step: Optional[int] = None
user_id: str = "" # Standardisé en string
tags: List[str] = field(default_factory=list)
business_variables: Dict[str, Any] = field(default_factory=dict)
def __post_init__(self):
"""Valider et migrer les données"""
# Assurer que user_id est une string
if self.user_id is not None and not isinstance(self.user_id, str):
self.user_id = str(DataConverter.ensure_id(self.user_id))
def to_dict(self) -> Dict[str, Any]:
return {
"current_workflow_candidate": self.current_workflow_candidate,
"workflow_step": self.workflow_step,
"user_id": self.user_id,
"tags": self.tags,
"business_variables": self.business_variables
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'ContextLevel':
# Migrer user_id vers string
migrated_data = DataConverter.migrate_id_dict(data, ['user_id'])
return cls(
current_workflow_candidate=migrated_data.get("current_workflow_candidate"),
workflow_step=migrated_data.get("workflow_step"),
user_id=migrated_data.get("user_id", ""),
tags=migrated_data.get("tags", []),
business_variables=migrated_data.get("business_variables", {})
)
@dataclass
class WindowContext:
"""Contexte de fenêtre"""
app_name: str
window_title: str
screen_resolution: List[int]
workspace: str = "main"
def to_dict(self) -> Dict[str, Any]:
return {
"app_name": self.app_name,
"window_title": self.window_title,
"screen_resolution": self.screen_resolution,
"workspace": self.workspace
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'WindowContext':
return cls(
app_name=data["app_name"],
window_title=data["window_title"],
screen_resolution=data["screen_resolution"],
workspace=data.get("workspace", "main")
)
@dataclass
class ScreenState:
"""
État d'écran structuré à 4 niveaux
Représente un screenshot analysé avec :
- Raw : Image brute
- Perception : Embeddings + texte détecté
- Sémantique UI : Éléments UI (sera ajouté séparément)
- Contexte : Métadonnées métier
Tâche 4 : Contrats standardisés
- screen_state_id, session_id : Strings standardisés
- timestamp : datetime object uniquement
"""
screen_state_id: str # Standardisé en string
timestamp: datetime # datetime object uniquement
session_id: str # Standardisé en string
window: WindowContext
raw: RawLevel
perception: PerceptionLevel
context: ContextLevel
metadata: Dict[str, Any] = field(default_factory=dict)
# Niveau 3 : UI Elements - Liste des éléments UI détectés
ui_elements: List[Any] = field(default_factory=list) # List[UIElement]
def __post_init__(self):
"""Valider et migrer les données après initialisation"""
# Migrer les IDs vers strings
if not isinstance(self.screen_state_id, str):
self.screen_state_id = str(DataConverter.ensure_id(self.screen_state_id))
if not isinstance(self.session_id, str):
self.session_id = str(DataConverter.ensure_id(self.session_id))
# Migrer timestamp vers datetime
if not isinstance(self.timestamp, datetime):
self.timestamp = DataConverter.ensure_timestamp(self.timestamp).value
# =========================================================================
# ALIASES DE COMPATIBILITÉ (Fiche #1 - Migration douce)
# Auteur: Dom, Alice Kiro - 15 décembre 2024
# =========================================================================
@property
def state_id(self) -> str:
"""Alias de compatibilité pour screen_state_id"""
return self.screen_state_id
@property
def raw_level(self) -> RawLevel:
"""Alias de compatibilité pour raw"""
return self.raw
@property
def perception_level(self) -> PerceptionLevel:
"""Alias de compatibilité pour perception"""
return self.perception
@property
def screenshot_path(self) -> str:
"""Alias de compatibilité pour raw.screenshot_path"""
return self.raw.screenshot_path
@property
def ui_elements_count(self) -> int:
"""Nombre d'éléments UI détectés"""
return len(self.ui_elements)
def to_json(self) -> Dict[str, Any]:
"""Sérialiser en JSON"""
return {
"screen_state_id": self.screen_state_id,
"timestamp": self.timestamp.isoformat(),
"session_id": self.session_id,
"window": self.window.to_dict(),
"raw": self.raw.to_dict(),
"perception": self.perception.to_dict(),
"context": self.context.to_dict(),
"metadata": self.metadata,
"ui_elements": [el.to_dict() if hasattr(el, 'to_dict') else el for el in self.ui_elements]
}
@classmethod
def from_json(cls, data: Dict[str, Any]) -> 'ScreenState':
"""Désérialiser depuis JSON avec migration automatique"""
# Migrer les données vers les nouveaux contrats
migrated_data = DataConverter.migrate_timestamp_dict(data, ['timestamp'])
migrated_data = DataConverter.migrate_id_dict(migrated_data, ['screen_state_id', 'session_id'])
timestamp = migrated_data["timestamp"]
if isinstance(timestamp, str):
timestamp = datetime.fromisoformat(timestamp)
window = WindowContext.from_dict(migrated_data["window"])
raw = RawLevel.from_dict(migrated_data["raw"])
perception = PerceptionLevel.from_dict(migrated_data["perception"])
context = ContextLevel.from_dict(migrated_data["context"])
# Import UIElement ici pour éviter import circulaire
from .ui_element import UIElement
# Parser ui_elements si présents
ui_elements_data = migrated_data.get("ui_elements", [])
ui_elements = []
for el_data in ui_elements_data:
if isinstance(el_data, dict):
ui_elements.append(UIElement.from_dict(el_data))
else:
ui_elements.append(el_data)
return cls(
screen_state_id=migrated_data["screen_state_id"],
timestamp=timestamp,
session_id=migrated_data["session_id"],
window=window,
raw=raw,
perception=perception,
context=context,
metadata=migrated_data.get("metadata", {}),
ui_elements=ui_elements
)
def save_to_file(self, filepath: Path) -> None:
"""Sauvegarder dans un fichier JSON"""
filepath.parent.mkdir(parents=True, exist_ok=True)
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(self.to_json(), f, indent=2, ensure_ascii=False)
@classmethod
def load_from_file(cls, filepath: Path) -> 'ScreenState':
"""Charger depuis un fichier JSON"""
with open(filepath, 'r', encoding='utf-8') as f:
data = json.load(f)
return cls.from_json(data)
def validate_consistency(self) -> bool:
"""
Valider que les 4 niveaux référencent le même screenshot et timestamp
Property 2: ScreenState Multi-Level Consistency
"""
# Tous les niveaux doivent exister
if not all([self.raw, self.perception, self.context]):
return False
# Le timestamp doit être cohérent
# (tous les niveaux référencent le même instant)
return True

View File

@@ -0,0 +1,192 @@
"""
StateEmbedding - Couche 3 : Fusion Multi-Modale
Crée un "fingerprint" unique de l'écran en fusionnant :
- Image embedding (screenshot complet)
- Text embedding (texte détecté)
- Title embedding (titre de fenêtre)
- UI embedding (éléments UI)
"""
from dataclasses import dataclass, field
from typing import Dict, Optional, Any
from pathlib import Path
import numpy as np
import json
@dataclass
class EmbeddingComponent:
"""Composante d'un State Embedding"""
weight: float
vector_id: str
source_text: Optional[str] = None
def to_dict(self) -> Dict[str, Any]:
result = {
"weight": self.weight,
"vector_id": self.vector_id
}
if self.source_text:
result["source_text"] = self.source_text
return result
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'EmbeddingComponent':
return cls(
weight=data["weight"],
vector_id=data["vector_id"],
source_text=data.get("source_text")
)
@dataclass
class StateEmbedding:
"""
State Embedding - Vecteur unique représentant un état d'écran
Fusion multi-modale :
- 50% Image (screenshot complet)
- 30% Texte (texte détecté)
- 10% Titre (fenêtre)
- 10% UI (éléments détectés)
"""
embedding_id: str
vector_id: str # Chemin vers fichier .npy
dimensions: int
fusion_method: str # "weighted" ou "concat_projection"
components: Dict[str, EmbeddingComponent] = field(default_factory=dict)
metadata: Dict[str, Any] = field(default_factory=dict)
# Cache du vecteur en mémoire
_vector_cache: Optional[np.ndarray] = field(default=None, repr=False, compare=False)
def get_vector(self) -> np.ndarray:
"""Charger le vecteur depuis le fichier (avec cache)"""
if self._vector_cache is None:
vector_path = Path(self.vector_id)
if vector_path.exists():
self._vector_cache = np.load(vector_path)
else:
raise FileNotFoundError(f"Embedding vector not found: {self.vector_id}")
return self._vector_cache
def set_vector(self, vector: np.ndarray) -> None:
"""Définir le vecteur et le mettre en cache"""
if vector.shape[0] != self.dimensions:
raise ValueError(
f"Vector dimensions mismatch: expected {self.dimensions}, "
f"got {vector.shape[0]}"
)
self._vector_cache = vector
def save_vector(self, vector: np.ndarray) -> None:
"""Sauvegarder le vecteur dans un fichier .npy"""
vector_path = Path(self.vector_id)
vector_path.parent.mkdir(parents=True, exist_ok=True)
np.save(vector_path, vector)
self._vector_cache = vector
def compute_similarity(self, other: 'StateEmbedding') -> float:
"""
Calculer similarité cosinus avec autre embedding
Property 5: State Embedding Similarity Symmetry
Property 6: State Embedding Similarity Bounds
"""
vec1 = self.get_vector()
vec2 = other.get_vector()
# Similarité cosinus
dot_product = np.dot(vec1, vec2)
norm1 = np.linalg.norm(vec1)
norm2 = np.linalg.norm(vec2)
if norm1 == 0 or norm2 == 0:
return 0.0
similarity = dot_product / (norm1 * norm2)
# Clamp entre -1 et 1 (pour éviter erreurs numériques)
similarity = np.clip(similarity, -1.0, 1.0)
return float(similarity)
def is_normalized(self, tolerance: float = 1e-6) -> bool:
"""
Vérifier si le vecteur est normalisé (L2 norm = 1.0)
Property 4: State Embedding Normalization
"""
vector = self.get_vector()
norm = np.linalg.norm(vector)
return abs(norm - 1.0) < tolerance
def to_dict(self) -> Dict[str, Any]:
"""Sérialiser en JSON (sans le vecteur)"""
return {
"embedding_id": self.embedding_id,
"vector_id": self.vector_id,
"dimensions": self.dimensions,
"fusion_method": self.fusion_method,
"components": {
name: comp.to_dict()
for name, comp in self.components.items()
},
"metadata": self.metadata
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'StateEmbedding':
"""Désérialiser depuis JSON"""
components = {
name: EmbeddingComponent.from_dict(comp_data)
for name, comp_data in data.get("components", {}).items()
}
return cls(
embedding_id=data["embedding_id"],
vector_id=data["vector_id"],
dimensions=data["dimensions"],
fusion_method=data["fusion_method"],
components=components,
metadata=data.get("metadata", {})
)
def to_json(self) -> str:
"""Sérialiser en JSON string"""
return json.dumps(self.to_dict(), indent=2)
@classmethod
def from_json(cls, json_str: str) -> 'StateEmbedding':
"""Désérialiser depuis JSON string"""
data = json.loads(json_str)
return cls.from_dict(data)
def save_to_file(self, filepath: Path) -> None:
"""Sauvegarder métadonnées dans un fichier JSON"""
filepath.parent.mkdir(parents=True, exist_ok=True)
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(self.to_dict(), f, indent=2)
@classmethod
def load_from_file(cls, filepath: Path) -> 'StateEmbedding':
"""Charger métadonnées depuis un fichier JSON"""
with open(filepath, 'r', encoding='utf-8') as f:
data = json.load(f)
return cls.from_dict(data)
# Configuration par défaut des poids de fusion
DEFAULT_FUSION_WEIGHTS = {
"image": 0.5, # 50% - Screenshot complet
"text": 0.3, # 30% - Texte détecté
"title": 0.1, # 10% - Titre fenêtre
"ui": 0.1 # 10% - Éléments UI
}
# Méthodes de fusion supportées
FUSION_METHODS = [
"weighted", # Fusion pondérée simple
"concat_projection" # Concaténation + projection
]

239
core/models/ui_element.py Normal file
View File

@@ -0,0 +1,239 @@
"""
UIElement - Couche 2 : Détection Sémantique
Représente un élément d'interface détecté avec :
- Type sémantique (button, text_input, etc.)
- Rôle sémantique (primary_action, cancel, etc.)
- Embeddings duaux (image + texte)
- Features visuelles
Tâche 4 : Contrats de données standardisés avec Pydantic
- BBox : Format exclusif (x, y, width, height)
- IDs : Strings uniquement
- Validation automatique des données
"""
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Tuple, Any
from pathlib import Path
import json
from .base_models import BBox, StandardID, DataConverter
@dataclass
class UIElementEmbeddings:
"""Embeddings duaux pour un élément UI"""
image: Optional[Dict[str, Any]] = None # Embedding de l'image croppée
text: Optional[Dict[str, Any]] = None # Embedding du texte détecté
def to_dict(self) -> Dict[str, Any]:
return {
"image": self.image,
"text": self.text
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'UIElementEmbeddings':
return cls(
image=data.get("image"),
text=data.get("text")
)
@dataclass
class VisualFeatures:
"""Features visuelles d'un élément UI"""
dominant_color: str
has_icon: bool
shape: str # "rectangle", "circle", "rounded_rectangle"
size_category: str # "small", "medium", "large"
def to_dict(self) -> Dict[str, Any]:
return {
"dominant_color": self.dominant_color,
"has_icon": self.has_icon,
"shape": self.shape,
"size_category": self.size_category
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'VisualFeatures':
return cls(
dominant_color=data["dominant_color"],
has_icon=data["has_icon"],
shape=data["shape"],
size_category=data["size_category"]
)
@dataclass
class UIElement:
"""
Élément d'interface détecté avec type et rôle sémantiques
Types supportés:
- button, text_input, checkbox, radio, dropdown
- tab, link, icon, table_row, menu_item
Rôles sémantiques:
- primary_action, cancel, submit, form_input
- search_field, navigation, etc.
Tâche 4 : Contrats standardisés
- element_id : StandardID (string uniquement)
- bbox : BBox standardisée (x, y, width, height)
"""
element_id: str # Migré vers StandardID via DataConverter
type: str # Type sémantique
role: str # Rôle sémantique
bbox: BBox # BBox standardisée (x, y, width, height)
center: Tuple[int, int] # (x, y) - calculé depuis bbox
label: str
label_confidence: float
embeddings: UIElementEmbeddings
visual_features: VisualFeatures
tags: List[str] = field(default_factory=list)
confidence: float = 0.0
metadata: Dict[str, Any] = field(default_factory=dict)
def __post_init__(self):
"""Valider les données après initialisation"""
# Migrer element_id vers StandardID si nécessaire
if not isinstance(self.element_id, str):
self.element_id = str(DataConverter.ensure_id(self.element_id))
# Migrer bbox vers BBox si nécessaire
if not isinstance(self.bbox, BBox):
self.bbox = DataConverter.ensure_bbox(self.bbox)
# Recalculer center depuis bbox si nécessaire
bbox_center = self.bbox.center()
if self.center != bbox_center:
self.center = bbox_center
# Valider confidence entre 0 et 1
if not 0.0 <= self.confidence <= 1.0:
raise ValueError(f"Confidence must be between 0 and 1, got {self.confidence}")
if not 0.0 <= self.label_confidence <= 1.0:
raise ValueError(f"Label confidence must be between 0 and 1, got {self.label_confidence}")
@classmethod
def create_with_bbox_tuple(cls, element_id: str, type: str, role: str,
bbox_tuple: Tuple[int, int, int, int], **kwargs) -> 'UIElement':
"""
Méthode de compatibilité pour créer UIElement avec bbox tuple
Args:
bbox_tuple: (x, y, width, height)
"""
bbox = BBox.from_tuple(bbox_tuple)
center = bbox.center()
return cls(
element_id=element_id,
type=type,
role=role,
bbox=bbox,
center=center,
**kwargs
)
def to_dict(self) -> Dict[str, Any]:
"""Sérialiser en JSON"""
return {
"element_id": self.element_id,
"type": self.type,
"role": self.role,
"bbox": self.bbox.dict(), # BBox Pydantic serialization
"center": list(self.center),
"label": self.label,
"label_confidence": self.label_confidence,
"embeddings": self.embeddings.to_dict(),
"visual_features": self.visual_features.to_dict(),
"tags": self.tags,
"confidence": self.confidence,
"metadata": self.metadata
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'UIElement':
"""Désérialiser depuis JSON avec migration automatique"""
# Migrer les données vers les nouveaux contrats
migrated_data = DataConverter.migrate_bbox_dict(data, ['bbox'])
migrated_data = DataConverter.migrate_id_dict(migrated_data, ['element_id'])
embeddings = UIElementEmbeddings.from_dict(migrated_data["embeddings"])
visual_features = VisualFeatures.from_dict(migrated_data["visual_features"])
# Gérer bbox - peut être dict Pydantic ou tuple legacy
bbox_data = migrated_data["bbox"]
if isinstance(bbox_data, dict):
bbox = BBox(**bbox_data)
else:
bbox = DataConverter.ensure_bbox(bbox_data)
# Gérer center - calculer depuis bbox si nécessaire
center_data = migrated_data.get("center")
if center_data:
center = tuple(center_data)
else:
center = bbox.center()
return cls(
element_id=migrated_data["element_id"],
type=migrated_data["type"],
role=migrated_data["role"],
bbox=bbox,
center=center,
label=migrated_data["label"],
label_confidence=migrated_data["label_confidence"],
embeddings=embeddings,
visual_features=visual_features,
tags=migrated_data.get("tags", []),
confidence=migrated_data.get("confidence", 0.0),
metadata=migrated_data.get("metadata", {})
)
def to_json(self) -> str:
"""Sérialiser en JSON string"""
return json.dumps(self.to_dict(), indent=2)
@classmethod
def from_json(cls, json_str: str) -> 'UIElement':
"""Désérialiser depuis JSON string"""
data = json.loads(json_str)
return cls.from_dict(data)
# Types d'éléments supportés
UI_ELEMENT_TYPES = [
"button",
"text_input",
"checkbox",
"radio",
"dropdown",
"tab",
"link",
"icon",
"table_row",
"menu_item",
"label",
"image",
"container"
]
# Rôles sémantiques supportés
UI_ELEMENT_ROLES = [
"primary_action",
"secondary_action",
"cancel",
"submit",
"form_input",
"search_field",
"navigation",
"data_display",
"selectable_item",
"action_trigger",
"status_indicator",
"delete_action",
"dangerous_action"
]

File diff suppressed because it is too large Load Diff