rpa_vision_v3/core/models/screen_state.py

"""
ScreenState - Couche 1 : Analyse Multi-Modale

Transforme un screenshot brut en représentation structurée à 4 niveaux :
- Niveau 1 : Raw (Ce que la machine voit)
- Niveau 2 : Perception (Ce que la vision déduit)
- Niveau 3 : Sémantique UI (Ce que le système comprend)
- Niveau 4 : Contexte Métier (Session/Application)

Tâche 4 : Contrats de données standardisés
- Timestamps : datetime objects uniquement
- IDs : Strings uniquement
"""

from dataclasses import dataclass, field
from datetime import datetime
from typing import Dict, List, Optional, Any, TYPE_CHECKING
from pathlib import Path
import json
from .base_models import Timestamp, StandardID, DataConverter

if TYPE_CHECKING:
    from .ui_element import UIElement


@dataclass
class EmbeddingRef:
    """Référence à un embedding stocké"""
    provider: str  # e.g., "openclip_ViT-B-32"
    vector_id: str  # Chemin vers fichier .npy
    dimensions: int

    def to_dict(self) -> Dict[str, Any]:
        return {
            "provider": self.provider,
            "vector_id": self.vector_id,
            "dimensions": self.dimensions
        }

    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> 'EmbeddingRef':
        return cls(
            provider=data["provider"],
            vector_id=data["vector_id"],
            dimensions=data["dimensions"]
        )


@dataclass
class RawLevel:
    """Niveau 1 : Raw - Ce que la machine voit"""
    screenshot_path: str
    capture_method: str  # e.g., "mss", "pillow"
    file_size_bytes: int

    def to_dict(self) -> Dict[str, Any]:
        return {
            "screenshot_path": self.screenshot_path,
            "capture_method": self.capture_method,
            "file_size_bytes": self.file_size_bytes
        }

    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> 'RawLevel':
        return cls(
            screenshot_path=data["screenshot_path"],
            capture_method=data["capture_method"],
            file_size_bytes=data["file_size_bytes"]
        )


@dataclass
class PerceptionLevel:
    """Niveau 2 : Perception - Ce que la vision déduit"""
    embedding: EmbeddingRef
    detected_text: List[str]
    text_detection_method: str  # e.g., "qwen_vl", "tesseract"
    confidence_avg: float

    def to_dict(self) -> Dict[str, Any]:
        return {
            "embedding": self.embedding.to_dict(),
            "detected_text": self.detected_text,
            "text_detection_method": self.text_detection_method,
            "confidence_avg": self.confidence_avg
        }

    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> 'PerceptionLevel':
        return cls(
            embedding=EmbeddingRef.from_dict(data["embedding"]),
            detected_text=data["detected_text"],
            text_detection_method=data["text_detection_method"],
            confidence_avg=data["confidence_avg"]
        )


@dataclass
class ContextLevel:
    """Niveau 4 : Contexte Métier - Session/Application"""
    current_workflow_candidate: Optional[str] = None
    workflow_step: Optional[int] = None
    user_id: str = ""  # Standardisé en string
    tags: List[str] = field(default_factory=list)
    business_variables: Dict[str, Any] = field(default_factory=dict)

    def __post_init__(self):
        """Valider et migrer les données"""
        # Assurer que user_id est une string
        if self.user_id is not None and not isinstance(self.user_id, str):
            self.user_id = str(DataConverter.ensure_id(self.user_id))

    def to_dict(self) -> Dict[str, Any]:
        return {
            "current_workflow_candidate": self.current_workflow_candidate,
            "workflow_step": self.workflow_step,
            "user_id": self.user_id,
            "tags": self.tags,
            "business_variables": self.business_variables
        }

    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> 'ContextLevel':
        # Migrer user_id vers string
        migrated_data = DataConverter.migrate_id_dict(data, ['user_id'])

        return cls(
            current_workflow_candidate=migrated_data.get("current_workflow_candidate"),
            workflow_step=migrated_data.get("workflow_step"),
            user_id=migrated_data.get("user_id", ""),
            tags=migrated_data.get("tags", []),
            business_variables=migrated_data.get("business_variables", {})
        )


@dataclass
class WindowContext:
    """Contexte de fenêtre"""
    app_name: str
    window_title: str
    screen_resolution: List[int]
    workspace: str = "main"

    def to_dict(self) -> Dict[str, Any]:
        return {
            "app_name": self.app_name,
            "window_title": self.window_title,
            "screen_resolution": self.screen_resolution,
            "workspace": self.workspace
        }

    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> 'WindowContext':
        return cls(
            app_name=data["app_name"],
            window_title=data["window_title"],
            screen_resolution=data["screen_resolution"],
            workspace=data.get("workspace", "main")
        )


@dataclass
class ScreenState:
    """
    État d'écran structuré à 4 niveaux

    Représente un screenshot analysé avec :
    - Raw : Image brute
    - Perception : Embeddings + texte détecté
    - Sémantique UI : Éléments UI (sera ajouté séparément)
    - Contexte : Métadonnées métier

    Tâche 4 : Contrats standardisés
    - screen_state_id, session_id : Strings standardisés
    - timestamp : datetime object uniquement
    """
    screen_state_id: str  # Standardisé en string
    timestamp: datetime  # datetime object uniquement
    session_id: str  # Standardisé en string
    window: WindowContext
    raw: RawLevel
    perception: PerceptionLevel
    context: ContextLevel
    metadata: Dict[str, Any] = field(default_factory=dict)

    # Niveau 3 : UI Elements - Liste des éléments UI détectés
    ui_elements: List[Any] = field(default_factory=list)  # List[UIElement]

    def __post_init__(self):
        """Valider et migrer les données après initialisation"""
        # Migrer les IDs vers strings
        if not isinstance(self.screen_state_id, str):
            self.screen_state_id = str(DataConverter.ensure_id(self.screen_state_id))
        if not isinstance(self.session_id, str):
            self.session_id = str(DataConverter.ensure_id(self.session_id))

        # Migrer timestamp vers datetime
        if not isinstance(self.timestamp, datetime):
            self.timestamp = DataConverter.ensure_timestamp(self.timestamp).value

    # =========================================================================
    # ALIASES DE COMPATIBILITÉ (Fiche #1 - Migration douce)
    # Auteur: Dom, Alice Kiro - 15 décembre 2024
    # =========================================================================

    @property
    def state_id(self) -> str:
        """Alias de compatibilité pour screen_state_id"""
        return self.screen_state_id

    @property
    def raw_level(self) -> RawLevel:
        """Alias de compatibilité pour raw"""
        return self.raw

    @property
    def perception_level(self) -> PerceptionLevel:
        """Alias de compatibilité pour perception"""
        return self.perception

    @property
    def screenshot_path(self) -> str:
        """Alias de compatibilité pour raw.screenshot_path"""
        return self.raw.screenshot_path

    @property
    def ui_elements_count(self) -> int:
        """Nombre d'éléments UI détectés"""
        return len(self.ui_elements)

    def to_json(self) -> Dict[str, Any]:
        """Sérialiser en JSON"""
        return {
            "screen_state_id": self.screen_state_id,
            "timestamp": self.timestamp.isoformat(),
            "session_id": self.session_id,
            "window": self.window.to_dict(),
            "raw": self.raw.to_dict(),
            "perception": self.perception.to_dict(),
            "context": self.context.to_dict(),
            "metadata": self.metadata,
            "ui_elements": [el.to_dict() if hasattr(el, 'to_dict') else el for el in self.ui_elements]
        }

    @classmethod
    def from_json(cls, data: Dict[str, Any]) -> 'ScreenState':
        """Désérialiser depuis JSON avec migration automatique"""
        # Migrer les données vers les nouveaux contrats
        migrated_data = DataConverter.migrate_timestamp_dict(data, ['timestamp'])
        migrated_data = DataConverter.migrate_id_dict(migrated_data, ['screen_state_id', 'session_id'])

        timestamp = migrated_data["timestamp"]
        if isinstance(timestamp, str):
            timestamp = datetime.fromisoformat(timestamp)

        window = WindowContext.from_dict(migrated_data["window"])
        raw = RawLevel.from_dict(migrated_data["raw"])
        perception = PerceptionLevel.from_dict(migrated_data["perception"])
        context = ContextLevel.from_dict(migrated_data["context"])

        # Import UIElement ici pour éviter import circulaire
        from .ui_element import UIElement

        # Parser ui_elements si présents
        ui_elements_data = migrated_data.get("ui_elements", [])
        ui_elements = []
        for el_data in ui_elements_data:
            if isinstance(el_data, dict):
                ui_elements.append(UIElement.from_dict(el_data))
            else:
                ui_elements.append(el_data)

        return cls(
            screen_state_id=migrated_data["screen_state_id"],
            timestamp=timestamp,
            session_id=migrated_data["session_id"],
            window=window,
            raw=raw,
            perception=perception,
            context=context,
            metadata=migrated_data.get("metadata", {}),
            ui_elements=ui_elements
        )

    def save_to_file(self, filepath: Path) -> None:
        """Sauvegarder dans un fichier JSON"""
        filepath.parent.mkdir(parents=True, exist_ok=True)
        with open(filepath, 'w', encoding='utf-8') as f:
            json.dump(self.to_json(), f, indent=2, ensure_ascii=False)

    @classmethod
    def load_from_file(cls, filepath: Path) -> 'ScreenState':
        """Charger depuis un fichier JSON"""
        with open(filepath, 'r', encoding='utf-8') as f:
            data = json.load(f)
        return cls.from_json(data)

    def validate_consistency(self) -> bool:
        """
        Valider que les 4 niveaux référencent le même screenshot et timestamp

        Property 2: ScreenState Multi-Level Consistency
        """
        # Tous les niveaux doivent exister
        if not all([self.raw, self.perception, self.context]):
            return False

        # Le timestamp doit être cohérent
        # (tous les niveaux référencent le même instant)
        return True