- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
311 lines
11 KiB
Python
311 lines
11 KiB
Python
"""
|
|
ScreenState - Couche 1 : Analyse Multi-Modale
|
|
|
|
Transforme un screenshot brut en représentation structurée à 4 niveaux :
|
|
- Niveau 1 : Raw (Ce que la machine voit)
|
|
- Niveau 2 : Perception (Ce que la vision déduit)
|
|
- Niveau 3 : Sémantique UI (Ce que le système comprend)
|
|
- Niveau 4 : Contexte Métier (Session/Application)
|
|
|
|
Tâche 4 : Contrats de données standardisés
|
|
- Timestamps : datetime objects uniquement
|
|
- IDs : Strings uniquement
|
|
"""
|
|
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime
|
|
from typing import Dict, List, Optional, Any, TYPE_CHECKING
|
|
from pathlib import Path
|
|
import json
|
|
from .base_models import Timestamp, StandardID, DataConverter
|
|
|
|
if TYPE_CHECKING:
|
|
from .ui_element import UIElement
|
|
|
|
|
|
@dataclass
|
|
class EmbeddingRef:
|
|
"""Référence à un embedding stocké"""
|
|
provider: str # e.g., "openclip_ViT-B-32"
|
|
vector_id: str # Chemin vers fichier .npy
|
|
dimensions: int
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
return {
|
|
"provider": self.provider,
|
|
"vector_id": self.vector_id,
|
|
"dimensions": self.dimensions
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: Dict[str, Any]) -> 'EmbeddingRef':
|
|
return cls(
|
|
provider=data["provider"],
|
|
vector_id=data["vector_id"],
|
|
dimensions=data["dimensions"]
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class RawLevel:
|
|
"""Niveau 1 : Raw - Ce que la machine voit"""
|
|
screenshot_path: str
|
|
capture_method: str # e.g., "mss", "pillow"
|
|
file_size_bytes: int
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
return {
|
|
"screenshot_path": self.screenshot_path,
|
|
"capture_method": self.capture_method,
|
|
"file_size_bytes": self.file_size_bytes
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: Dict[str, Any]) -> 'RawLevel':
|
|
return cls(
|
|
screenshot_path=data["screenshot_path"],
|
|
capture_method=data["capture_method"],
|
|
file_size_bytes=data["file_size_bytes"]
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class PerceptionLevel:
|
|
"""Niveau 2 : Perception - Ce que la vision déduit"""
|
|
embedding: EmbeddingRef
|
|
detected_text: List[str]
|
|
text_detection_method: str # e.g., "qwen_vl", "tesseract"
|
|
confidence_avg: float
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
return {
|
|
"embedding": self.embedding.to_dict(),
|
|
"detected_text": self.detected_text,
|
|
"text_detection_method": self.text_detection_method,
|
|
"confidence_avg": self.confidence_avg
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: Dict[str, Any]) -> 'PerceptionLevel':
|
|
return cls(
|
|
embedding=EmbeddingRef.from_dict(data["embedding"]),
|
|
detected_text=data["detected_text"],
|
|
text_detection_method=data["text_detection_method"],
|
|
confidence_avg=data["confidence_avg"]
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class ContextLevel:
|
|
"""Niveau 4 : Contexte Métier - Session/Application"""
|
|
current_workflow_candidate: Optional[str] = None
|
|
workflow_step: Optional[int] = None
|
|
user_id: str = "" # Standardisé en string
|
|
tags: List[str] = field(default_factory=list)
|
|
business_variables: Dict[str, Any] = field(default_factory=dict)
|
|
|
|
def __post_init__(self):
|
|
"""Valider et migrer les données"""
|
|
# Assurer que user_id est une string
|
|
if self.user_id is not None and not isinstance(self.user_id, str):
|
|
self.user_id = str(DataConverter.ensure_id(self.user_id))
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
return {
|
|
"current_workflow_candidate": self.current_workflow_candidate,
|
|
"workflow_step": self.workflow_step,
|
|
"user_id": self.user_id,
|
|
"tags": self.tags,
|
|
"business_variables": self.business_variables
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: Dict[str, Any]) -> 'ContextLevel':
|
|
# Migrer user_id vers string
|
|
migrated_data = DataConverter.migrate_id_dict(data, ['user_id'])
|
|
|
|
return cls(
|
|
current_workflow_candidate=migrated_data.get("current_workflow_candidate"),
|
|
workflow_step=migrated_data.get("workflow_step"),
|
|
user_id=migrated_data.get("user_id", ""),
|
|
tags=migrated_data.get("tags", []),
|
|
business_variables=migrated_data.get("business_variables", {})
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class WindowContext:
|
|
"""Contexte de fenêtre"""
|
|
app_name: str
|
|
window_title: str
|
|
screen_resolution: List[int]
|
|
workspace: str = "main"
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
return {
|
|
"app_name": self.app_name,
|
|
"window_title": self.window_title,
|
|
"screen_resolution": self.screen_resolution,
|
|
"workspace": self.workspace
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: Dict[str, Any]) -> 'WindowContext':
|
|
return cls(
|
|
app_name=data["app_name"],
|
|
window_title=data["window_title"],
|
|
screen_resolution=data["screen_resolution"],
|
|
workspace=data.get("workspace", "main")
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class ScreenState:
|
|
"""
|
|
État d'écran structuré à 4 niveaux
|
|
|
|
Représente un screenshot analysé avec :
|
|
- Raw : Image brute
|
|
- Perception : Embeddings + texte détecté
|
|
- Sémantique UI : Éléments UI (sera ajouté séparément)
|
|
- Contexte : Métadonnées métier
|
|
|
|
Tâche 4 : Contrats standardisés
|
|
- screen_state_id, session_id : Strings standardisés
|
|
- timestamp : datetime object uniquement
|
|
"""
|
|
screen_state_id: str # Standardisé en string
|
|
timestamp: datetime # datetime object uniquement
|
|
session_id: str # Standardisé en string
|
|
window: WindowContext
|
|
raw: RawLevel
|
|
perception: PerceptionLevel
|
|
context: ContextLevel
|
|
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
|
|
# Niveau 3 : UI Elements - Liste des éléments UI détectés
|
|
ui_elements: List[Any] = field(default_factory=list) # List[UIElement]
|
|
|
|
def __post_init__(self):
|
|
"""Valider et migrer les données après initialisation"""
|
|
# Migrer les IDs vers strings
|
|
if not isinstance(self.screen_state_id, str):
|
|
self.screen_state_id = str(DataConverter.ensure_id(self.screen_state_id))
|
|
if not isinstance(self.session_id, str):
|
|
self.session_id = str(DataConverter.ensure_id(self.session_id))
|
|
|
|
# Migrer timestamp vers datetime
|
|
if not isinstance(self.timestamp, datetime):
|
|
self.timestamp = DataConverter.ensure_timestamp(self.timestamp).value
|
|
|
|
# =========================================================================
|
|
# ALIASES DE COMPATIBILITÉ (Fiche #1 - Migration douce)
|
|
# Auteur: Dom, Alice Kiro - 15 décembre 2024
|
|
# =========================================================================
|
|
|
|
@property
|
|
def state_id(self) -> str:
|
|
"""Alias de compatibilité pour screen_state_id"""
|
|
return self.screen_state_id
|
|
|
|
@property
|
|
def raw_level(self) -> RawLevel:
|
|
"""Alias de compatibilité pour raw"""
|
|
return self.raw
|
|
|
|
@property
|
|
def perception_level(self) -> PerceptionLevel:
|
|
"""Alias de compatibilité pour perception"""
|
|
return self.perception
|
|
|
|
@property
|
|
def screenshot_path(self) -> str:
|
|
"""Alias de compatibilité pour raw.screenshot_path"""
|
|
return self.raw.screenshot_path
|
|
|
|
@property
|
|
def ui_elements_count(self) -> int:
|
|
"""Nombre d'éléments UI détectés"""
|
|
return len(self.ui_elements)
|
|
|
|
def to_json(self) -> Dict[str, Any]:
|
|
"""Sérialiser en JSON"""
|
|
return {
|
|
"screen_state_id": self.screen_state_id,
|
|
"timestamp": self.timestamp.isoformat(),
|
|
"session_id": self.session_id,
|
|
"window": self.window.to_dict(),
|
|
"raw": self.raw.to_dict(),
|
|
"perception": self.perception.to_dict(),
|
|
"context": self.context.to_dict(),
|
|
"metadata": self.metadata,
|
|
"ui_elements": [el.to_dict() if hasattr(el, 'to_dict') else el for el in self.ui_elements]
|
|
}
|
|
|
|
@classmethod
|
|
def from_json(cls, data: Dict[str, Any]) -> 'ScreenState':
|
|
"""Désérialiser depuis JSON avec migration automatique"""
|
|
# Migrer les données vers les nouveaux contrats
|
|
migrated_data = DataConverter.migrate_timestamp_dict(data, ['timestamp'])
|
|
migrated_data = DataConverter.migrate_id_dict(migrated_data, ['screen_state_id', 'session_id'])
|
|
|
|
timestamp = migrated_data["timestamp"]
|
|
if isinstance(timestamp, str):
|
|
timestamp = datetime.fromisoformat(timestamp)
|
|
|
|
window = WindowContext.from_dict(migrated_data["window"])
|
|
raw = RawLevel.from_dict(migrated_data["raw"])
|
|
perception = PerceptionLevel.from_dict(migrated_data["perception"])
|
|
context = ContextLevel.from_dict(migrated_data["context"])
|
|
|
|
# Import UIElement ici pour éviter import circulaire
|
|
from .ui_element import UIElement
|
|
|
|
# Parser ui_elements si présents
|
|
ui_elements_data = migrated_data.get("ui_elements", [])
|
|
ui_elements = []
|
|
for el_data in ui_elements_data:
|
|
if isinstance(el_data, dict):
|
|
ui_elements.append(UIElement.from_dict(el_data))
|
|
else:
|
|
ui_elements.append(el_data)
|
|
|
|
return cls(
|
|
screen_state_id=migrated_data["screen_state_id"],
|
|
timestamp=timestamp,
|
|
session_id=migrated_data["session_id"],
|
|
window=window,
|
|
raw=raw,
|
|
perception=perception,
|
|
context=context,
|
|
metadata=migrated_data.get("metadata", {}),
|
|
ui_elements=ui_elements
|
|
)
|
|
|
|
def save_to_file(self, filepath: Path) -> None:
|
|
"""Sauvegarder dans un fichier JSON"""
|
|
filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
with open(filepath, 'w', encoding='utf-8') as f:
|
|
json.dump(self.to_json(), f, indent=2, ensure_ascii=False)
|
|
|
|
@classmethod
|
|
def load_from_file(cls, filepath: Path) -> 'ScreenState':
|
|
"""Charger depuis un fichier JSON"""
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
return cls.from_json(data)
|
|
|
|
def validate_consistency(self) -> bool:
|
|
"""
|
|
Valider que les 4 niveaux référencent le même screenshot et timestamp
|
|
|
|
Property 2: ScreenState Multi-Level Consistency
|
|
"""
|
|
# Tous les niveaux doivent exister
|
|
if not all([self.raw, self.perception, self.context]):
|
|
return False
|
|
|
|
# Le timestamp doit être cohérent
|
|
# (tous les niveaux référencent le même instant)
|
|
return True
|