v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution
- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
310
core/models/screen_state.py
Normal file
310
core/models/screen_state.py
Normal file
@@ -0,0 +1,310 @@
|
||||
"""
|
||||
ScreenState - Couche 1 : Analyse Multi-Modale
|
||||
|
||||
Transforme un screenshot brut en représentation structurée à 4 niveaux :
|
||||
- Niveau 1 : Raw (Ce que la machine voit)
|
||||
- Niveau 2 : Perception (Ce que la vision déduit)
|
||||
- Niveau 3 : Sémantique UI (Ce que le système comprend)
|
||||
- Niveau 4 : Contexte Métier (Session/Application)
|
||||
|
||||
Tâche 4 : Contrats de données standardisés
|
||||
- Timestamps : datetime objects uniquement
|
||||
- IDs : Strings uniquement
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional, Any, TYPE_CHECKING
|
||||
from pathlib import Path
|
||||
import json
|
||||
from .base_models import Timestamp, StandardID, DataConverter
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .ui_element import UIElement
|
||||
|
||||
|
||||
@dataclass
|
||||
class EmbeddingRef:
|
||||
"""Référence à un embedding stocké"""
|
||||
provider: str # e.g., "openclip_ViT-B-32"
|
||||
vector_id: str # Chemin vers fichier .npy
|
||||
dimensions: int
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"provider": self.provider,
|
||||
"vector_id": self.vector_id,
|
||||
"dimensions": self.dimensions
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'EmbeddingRef':
|
||||
return cls(
|
||||
provider=data["provider"],
|
||||
vector_id=data["vector_id"],
|
||||
dimensions=data["dimensions"]
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class RawLevel:
|
||||
"""Niveau 1 : Raw - Ce que la machine voit"""
|
||||
screenshot_path: str
|
||||
capture_method: str # e.g., "mss", "pillow"
|
||||
file_size_bytes: int
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"screenshot_path": self.screenshot_path,
|
||||
"capture_method": self.capture_method,
|
||||
"file_size_bytes": self.file_size_bytes
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'RawLevel':
|
||||
return cls(
|
||||
screenshot_path=data["screenshot_path"],
|
||||
capture_method=data["capture_method"],
|
||||
file_size_bytes=data["file_size_bytes"]
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PerceptionLevel:
|
||||
"""Niveau 2 : Perception - Ce que la vision déduit"""
|
||||
embedding: EmbeddingRef
|
||||
detected_text: List[str]
|
||||
text_detection_method: str # e.g., "qwen_vl", "tesseract"
|
||||
confidence_avg: float
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"embedding": self.embedding.to_dict(),
|
||||
"detected_text": self.detected_text,
|
||||
"text_detection_method": self.text_detection_method,
|
||||
"confidence_avg": self.confidence_avg
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'PerceptionLevel':
|
||||
return cls(
|
||||
embedding=EmbeddingRef.from_dict(data["embedding"]),
|
||||
detected_text=data["detected_text"],
|
||||
text_detection_method=data["text_detection_method"],
|
||||
confidence_avg=data["confidence_avg"]
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ContextLevel:
|
||||
"""Niveau 4 : Contexte Métier - Session/Application"""
|
||||
current_workflow_candidate: Optional[str] = None
|
||||
workflow_step: Optional[int] = None
|
||||
user_id: str = "" # Standardisé en string
|
||||
tags: List[str] = field(default_factory=list)
|
||||
business_variables: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def __post_init__(self):
|
||||
"""Valider et migrer les données"""
|
||||
# Assurer que user_id est une string
|
||||
if self.user_id is not None and not isinstance(self.user_id, str):
|
||||
self.user_id = str(DataConverter.ensure_id(self.user_id))
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"current_workflow_candidate": self.current_workflow_candidate,
|
||||
"workflow_step": self.workflow_step,
|
||||
"user_id": self.user_id,
|
||||
"tags": self.tags,
|
||||
"business_variables": self.business_variables
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'ContextLevel':
|
||||
# Migrer user_id vers string
|
||||
migrated_data = DataConverter.migrate_id_dict(data, ['user_id'])
|
||||
|
||||
return cls(
|
||||
current_workflow_candidate=migrated_data.get("current_workflow_candidate"),
|
||||
workflow_step=migrated_data.get("workflow_step"),
|
||||
user_id=migrated_data.get("user_id", ""),
|
||||
tags=migrated_data.get("tags", []),
|
||||
business_variables=migrated_data.get("business_variables", {})
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class WindowContext:
|
||||
"""Contexte de fenêtre"""
|
||||
app_name: str
|
||||
window_title: str
|
||||
screen_resolution: List[int]
|
||||
workspace: str = "main"
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"app_name": self.app_name,
|
||||
"window_title": self.window_title,
|
||||
"screen_resolution": self.screen_resolution,
|
||||
"workspace": self.workspace
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'WindowContext':
|
||||
return cls(
|
||||
app_name=data["app_name"],
|
||||
window_title=data["window_title"],
|
||||
screen_resolution=data["screen_resolution"],
|
||||
workspace=data.get("workspace", "main")
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScreenState:
|
||||
"""
|
||||
État d'écran structuré à 4 niveaux
|
||||
|
||||
Représente un screenshot analysé avec :
|
||||
- Raw : Image brute
|
||||
- Perception : Embeddings + texte détecté
|
||||
- Sémantique UI : Éléments UI (sera ajouté séparément)
|
||||
- Contexte : Métadonnées métier
|
||||
|
||||
Tâche 4 : Contrats standardisés
|
||||
- screen_state_id, session_id : Strings standardisés
|
||||
- timestamp : datetime object uniquement
|
||||
"""
|
||||
screen_state_id: str # Standardisé en string
|
||||
timestamp: datetime # datetime object uniquement
|
||||
session_id: str # Standardisé en string
|
||||
window: WindowContext
|
||||
raw: RawLevel
|
||||
perception: PerceptionLevel
|
||||
context: ContextLevel
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# Niveau 3 : UI Elements - Liste des éléments UI détectés
|
||||
ui_elements: List[Any] = field(default_factory=list) # List[UIElement]
|
||||
|
||||
def __post_init__(self):
|
||||
"""Valider et migrer les données après initialisation"""
|
||||
# Migrer les IDs vers strings
|
||||
if not isinstance(self.screen_state_id, str):
|
||||
self.screen_state_id = str(DataConverter.ensure_id(self.screen_state_id))
|
||||
if not isinstance(self.session_id, str):
|
||||
self.session_id = str(DataConverter.ensure_id(self.session_id))
|
||||
|
||||
# Migrer timestamp vers datetime
|
||||
if not isinstance(self.timestamp, datetime):
|
||||
self.timestamp = DataConverter.ensure_timestamp(self.timestamp).value
|
||||
|
||||
# =========================================================================
|
||||
# ALIASES DE COMPATIBILITÉ (Fiche #1 - Migration douce)
|
||||
# Auteur: Dom, Alice Kiro - 15 décembre 2024
|
||||
# =========================================================================
|
||||
|
||||
@property
|
||||
def state_id(self) -> str:
|
||||
"""Alias de compatibilité pour screen_state_id"""
|
||||
return self.screen_state_id
|
||||
|
||||
@property
|
||||
def raw_level(self) -> RawLevel:
|
||||
"""Alias de compatibilité pour raw"""
|
||||
return self.raw
|
||||
|
||||
@property
|
||||
def perception_level(self) -> PerceptionLevel:
|
||||
"""Alias de compatibilité pour perception"""
|
||||
return self.perception
|
||||
|
||||
@property
|
||||
def screenshot_path(self) -> str:
|
||||
"""Alias de compatibilité pour raw.screenshot_path"""
|
||||
return self.raw.screenshot_path
|
||||
|
||||
@property
|
||||
def ui_elements_count(self) -> int:
|
||||
"""Nombre d'éléments UI détectés"""
|
||||
return len(self.ui_elements)
|
||||
|
||||
def to_json(self) -> Dict[str, Any]:
|
||||
"""Sérialiser en JSON"""
|
||||
return {
|
||||
"screen_state_id": self.screen_state_id,
|
||||
"timestamp": self.timestamp.isoformat(),
|
||||
"session_id": self.session_id,
|
||||
"window": self.window.to_dict(),
|
||||
"raw": self.raw.to_dict(),
|
||||
"perception": self.perception.to_dict(),
|
||||
"context": self.context.to_dict(),
|
||||
"metadata": self.metadata,
|
||||
"ui_elements": [el.to_dict() if hasattr(el, 'to_dict') else el for el in self.ui_elements]
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, data: Dict[str, Any]) -> 'ScreenState':
|
||||
"""Désérialiser depuis JSON avec migration automatique"""
|
||||
# Migrer les données vers les nouveaux contrats
|
||||
migrated_data = DataConverter.migrate_timestamp_dict(data, ['timestamp'])
|
||||
migrated_data = DataConverter.migrate_id_dict(migrated_data, ['screen_state_id', 'session_id'])
|
||||
|
||||
timestamp = migrated_data["timestamp"]
|
||||
if isinstance(timestamp, str):
|
||||
timestamp = datetime.fromisoformat(timestamp)
|
||||
|
||||
window = WindowContext.from_dict(migrated_data["window"])
|
||||
raw = RawLevel.from_dict(migrated_data["raw"])
|
||||
perception = PerceptionLevel.from_dict(migrated_data["perception"])
|
||||
context = ContextLevel.from_dict(migrated_data["context"])
|
||||
|
||||
# Import UIElement ici pour éviter import circulaire
|
||||
from .ui_element import UIElement
|
||||
|
||||
# Parser ui_elements si présents
|
||||
ui_elements_data = migrated_data.get("ui_elements", [])
|
||||
ui_elements = []
|
||||
for el_data in ui_elements_data:
|
||||
if isinstance(el_data, dict):
|
||||
ui_elements.append(UIElement.from_dict(el_data))
|
||||
else:
|
||||
ui_elements.append(el_data)
|
||||
|
||||
return cls(
|
||||
screen_state_id=migrated_data["screen_state_id"],
|
||||
timestamp=timestamp,
|
||||
session_id=migrated_data["session_id"],
|
||||
window=window,
|
||||
raw=raw,
|
||||
perception=perception,
|
||||
context=context,
|
||||
metadata=migrated_data.get("metadata", {}),
|
||||
ui_elements=ui_elements
|
||||
)
|
||||
|
||||
def save_to_file(self, filepath: Path) -> None:
|
||||
"""Sauvegarder dans un fichier JSON"""
|
||||
filepath.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
json.dump(self.to_json(), f, indent=2, ensure_ascii=False)
|
||||
|
||||
@classmethod
|
||||
def load_from_file(cls, filepath: Path) -> 'ScreenState':
|
||||
"""Charger depuis un fichier JSON"""
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
return cls.from_json(data)
|
||||
|
||||
def validate_consistency(self) -> bool:
|
||||
"""
|
||||
Valider que les 4 niveaux référencent le même screenshot et timestamp
|
||||
|
||||
Property 2: ScreenState Multi-Level Consistency
|
||||
"""
|
||||
# Tous les niveaux doivent exister
|
||||
if not all([self.raw, self.perception, self.context]):
|
||||
return False
|
||||
|
||||
# Le timestamp doit être cohérent
|
||||
# (tous les niveaux référencent le même instant)
|
||||
return True
|
||||
Reference in New Issue
Block a user