v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution

- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-29 11:23:51 +01:00
parent 21bfa3b337
commit a27b74cf22
1595 changed files with 412691 additions and 400 deletions
--- a/core/models/screen_state.py
+++ b/core/models/screen_state.py
@@ -0,0 +1,310 @@
+"""
+ScreenState - Couche 1 : Analyse Multi-Modale
+
+Transforme un screenshot brut en représentation structurée à 4 niveaux :
+- Niveau 1 : Raw (Ce que la machine voit)
+- Niveau 2 : Perception (Ce que la vision déduit)
+- Niveau 3 : Sémantique UI (Ce que le système comprend)
+- Niveau 4 : Contexte Métier (Session/Application)
+
+Tâche 4 : Contrats de données standardisés
+- Timestamps : datetime objects uniquement
+- IDs : Strings uniquement
+"""
+
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Dict, List, Optional, Any, TYPE_CHECKING
+from pathlib import Path
+import json
+from .base_models import Timestamp, StandardID, DataConverter
+
+if TYPE_CHECKING:
+    from .ui_element import UIElement
+
+
+@dataclass
+class EmbeddingRef:
+    """Référence à un embedding stocké"""
+    provider: str  # e.g., "openclip_ViT-B-32"
+    vector_id: str  # Chemin vers fichier .npy
+    dimensions: int
+    
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "provider": self.provider,
+            "vector_id": self.vector_id,
+            "dimensions": self.dimensions
+        }
+    
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'EmbeddingRef':
+        return cls(
+            provider=data["provider"],
+            vector_id=data["vector_id"],
+            dimensions=data["dimensions"]
+        )
+
+
+@dataclass
+class RawLevel:
+    """Niveau 1 : Raw - Ce que la machine voit"""
+    screenshot_path: str
+    capture_method: str  # e.g., "mss", "pillow"
+    file_size_bytes: int
+    
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "screenshot_path": self.screenshot_path,
+            "capture_method": self.capture_method,
+            "file_size_bytes": self.file_size_bytes
+        }
+    
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'RawLevel':
+        return cls(
+            screenshot_path=data["screenshot_path"],
+            capture_method=data["capture_method"],
+            file_size_bytes=data["file_size_bytes"]
+        )
+
+
+@dataclass
+class PerceptionLevel:
+    """Niveau 2 : Perception - Ce que la vision déduit"""
+    embedding: EmbeddingRef
+    detected_text: List[str]
+    text_detection_method: str  # e.g., "qwen_vl", "tesseract"
+    confidence_avg: float
+    
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "embedding": self.embedding.to_dict(),
+            "detected_text": self.detected_text,
+            "text_detection_method": self.text_detection_method,
+            "confidence_avg": self.confidence_avg
+        }
+    
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'PerceptionLevel':
+        return cls(
+            embedding=EmbeddingRef.from_dict(data["embedding"]),
+            detected_text=data["detected_text"],
+            text_detection_method=data["text_detection_method"],
+            confidence_avg=data["confidence_avg"]
+        )
+
+
+@dataclass
+class ContextLevel:
+    """Niveau 4 : Contexte Métier - Session/Application"""
+    current_workflow_candidate: Optional[str] = None
+    workflow_step: Optional[int] = None
+    user_id: str = ""  # Standardisé en string
+    tags: List[str] = field(default_factory=list)
+    business_variables: Dict[str, Any] = field(default_factory=dict)
+    
+    def __post_init__(self):
+        """Valider et migrer les données"""
+        # Assurer que user_id est une string
+        if self.user_id is not None and not isinstance(self.user_id, str):
+            self.user_id = str(DataConverter.ensure_id(self.user_id))
+    
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "current_workflow_candidate": self.current_workflow_candidate,
+            "workflow_step": self.workflow_step,
+            "user_id": self.user_id,
+            "tags": self.tags,
+            "business_variables": self.business_variables
+        }
+    
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'ContextLevel':
+        # Migrer user_id vers string
+        migrated_data = DataConverter.migrate_id_dict(data, ['user_id'])
+        
+        return cls(
+            current_workflow_candidate=migrated_data.get("current_workflow_candidate"),
+            workflow_step=migrated_data.get("workflow_step"),
+            user_id=migrated_data.get("user_id", ""),
+            tags=migrated_data.get("tags", []),
+            business_variables=migrated_data.get("business_variables", {})
+        )
+
+
+@dataclass
+class WindowContext:
+    """Contexte de fenêtre"""
+    app_name: str
+    window_title: str
+    screen_resolution: List[int]
+    workspace: str = "main"
+    
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "app_name": self.app_name,
+            "window_title": self.window_title,
+            "screen_resolution": self.screen_resolution,
+            "workspace": self.workspace
+        }
+    
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'WindowContext':
+        return cls(
+            app_name=data["app_name"],
+            window_title=data["window_title"],
+            screen_resolution=data["screen_resolution"],
+            workspace=data.get("workspace", "main")
+        )
+
+
+@dataclass
+class ScreenState:
+    """
+    État d'écran structuré à 4 niveaux
+    
+    Représente un screenshot analysé avec :
+    - Raw : Image brute
+    - Perception : Embeddings + texte détecté
+    - Sémantique UI : Éléments UI (sera ajouté séparément)
+    - Contexte : Métadonnées métier
+    
+    Tâche 4 : Contrats standardisés
+    - screen_state_id, session_id : Strings standardisés
+    - timestamp : datetime object uniquement
+    """
+    screen_state_id: str  # Standardisé en string
+    timestamp: datetime  # datetime object uniquement
+    session_id: str  # Standardisé en string
+    window: WindowContext
+    raw: RawLevel
+    perception: PerceptionLevel
+    context: ContextLevel
+    metadata: Dict[str, Any] = field(default_factory=dict)
+    
+    # Niveau 3 : UI Elements - Liste des éléments UI détectés
+    ui_elements: List[Any] = field(default_factory=list)  # List[UIElement]
+    
+    def __post_init__(self):
+        """Valider et migrer les données après initialisation"""
+        # Migrer les IDs vers strings
+        if not isinstance(self.screen_state_id, str):
+            self.screen_state_id = str(DataConverter.ensure_id(self.screen_state_id))
+        if not isinstance(self.session_id, str):
+            self.session_id = str(DataConverter.ensure_id(self.session_id))
+        
+        # Migrer timestamp vers datetime
+        if not isinstance(self.timestamp, datetime):
+            self.timestamp = DataConverter.ensure_timestamp(self.timestamp).value
+    
+    # =========================================================================
+    # ALIASES DE COMPATIBILITÉ (Fiche #1 - Migration douce)
+    # Auteur: Dom, Alice Kiro - 15 décembre 2024
+    # =========================================================================
+    
+    @property
+    def state_id(self) -> str:
+        """Alias de compatibilité pour screen_state_id"""
+        return self.screen_state_id
+    
+    @property
+    def raw_level(self) -> RawLevel:
+        """Alias de compatibilité pour raw"""
+        return self.raw
+    
+    @property
+    def perception_level(self) -> PerceptionLevel:
+        """Alias de compatibilité pour perception"""
+        return self.perception
+    
+    @property
+    def screenshot_path(self) -> str:
+        """Alias de compatibilité pour raw.screenshot_path"""
+        return self.raw.screenshot_path
+    
+    @property
+    def ui_elements_count(self) -> int:
+        """Nombre d'éléments UI détectés"""
+        return len(self.ui_elements)
+    
+    def to_json(self) -> Dict[str, Any]:
+        """Sérialiser en JSON"""
+        return {
+            "screen_state_id": self.screen_state_id,
+            "timestamp": self.timestamp.isoformat(),
+            "session_id": self.session_id,
+            "window": self.window.to_dict(),
+            "raw": self.raw.to_dict(),
+            "perception": self.perception.to_dict(),
+            "context": self.context.to_dict(),
+            "metadata": self.metadata,
+            "ui_elements": [el.to_dict() if hasattr(el, 'to_dict') else el for el in self.ui_elements]
+        }
+    
+    @classmethod
+    def from_json(cls, data: Dict[str, Any]) -> 'ScreenState':
+        """Désérialiser depuis JSON avec migration automatique"""
+        # Migrer les données vers les nouveaux contrats
+        migrated_data = DataConverter.migrate_timestamp_dict(data, ['timestamp'])
+        migrated_data = DataConverter.migrate_id_dict(migrated_data, ['screen_state_id', 'session_id'])
+        
+        timestamp = migrated_data["timestamp"]
+        if isinstance(timestamp, str):
+            timestamp = datetime.fromisoformat(timestamp)
+        
+        window = WindowContext.from_dict(migrated_data["window"])
+        raw = RawLevel.from_dict(migrated_data["raw"])
+        perception = PerceptionLevel.from_dict(migrated_data["perception"])
+        context = ContextLevel.from_dict(migrated_data["context"])
+        
+        # Import UIElement ici pour éviter import circulaire
+        from .ui_element import UIElement
+        
+        # Parser ui_elements si présents
+        ui_elements_data = migrated_data.get("ui_elements", [])
+        ui_elements = []
+        for el_data in ui_elements_data:
+            if isinstance(el_data, dict):
+                ui_elements.append(UIElement.from_dict(el_data))
+            else:
+                ui_elements.append(el_data)
+        
+        return cls(
+            screen_state_id=migrated_data["screen_state_id"],
+            timestamp=timestamp,
+            session_id=migrated_data["session_id"],
+            window=window,
+            raw=raw,
+            perception=perception,
+            context=context,
+            metadata=migrated_data.get("metadata", {}),
+            ui_elements=ui_elements
+        )
+    
+    def save_to_file(self, filepath: Path) -> None:
+        """Sauvegarder dans un fichier JSON"""
+        filepath.parent.mkdir(parents=True, exist_ok=True)
+        with open(filepath, 'w', encoding='utf-8') as f:
+            json.dump(self.to_json(), f, indent=2, ensure_ascii=False)
+    
+    @classmethod
+    def load_from_file(cls, filepath: Path) -> 'ScreenState':
+        """Charger depuis un fichier JSON"""
+        with open(filepath, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+        return cls.from_json(data)
+    
+    def validate_consistency(self) -> bool:
+        """
+        Valider que les 4 niveaux référencent le même screenshot et timestamp
+        
+        Property 2: ScreenState Multi-Level Consistency
+        """
+        # Tous les niveaux doivent exister
+        if not all([self.raw, self.perception, self.context]):
+            return False
+        
+        # Le timestamp doit être cohérent
+        # (tous les niveaux référencent le même instant)
+        return True