rpa_vision_v3/agent_v0/agent_v1/vision/capturer.py

# agent_v1/vision/capturer.py
"""
Gestionnaire de vision avancé pour Agent V1.
Optimisé pour le streaming fibre avec détection de changement.

Captures disponibles :
- Plein écran (full) : contexte global 1920x1080+
- Crop ciblé (crop) : 80x80 autour du clic (apprentissage VLM)
- Fenêtre active (window) : image isolée de la fenêtre + métadonnées
  (titre, rect, coordonnées clic relatives) — cross-platform
"""

import os
import time
import logging
import hashlib
import platform
from typing import Any, Dict, Optional
from PIL import Image, ImageFilter, ImageStat
import mss
from ..config import TARGETED_CROP_SIZE, SCREENSHOT_QUALITY, BLUR_SENSITIVE
from .blur_sensitive import blur_sensitive_regions

logger = logging.getLogger(__name__)

# OS courant (détecté une seule fois)
_SYSTEM = platform.system()

class VisionCapturer:
    def __init__(self, session_dir: str):
        self.session_dir = session_dir
        self.shots_dir = os.path.join(session_dir, "shots")
        os.makedirs(self.shots_dir, exist_ok=True)
        # On ne crée plus self.sct ici car mss n'est pas thread-safe sous Windows
        self.last_img_hash = None

    def capture_full_context(self, name_suffix: str, force=False) -> str:
        """
        Capture l'écran complet.
        Si force=False, vérifie d'abord si l'écran a changé.

        Enrichit les métadonnées avec le titre de la fenêtre active
        (utile pour le contextualisation des heartbeats côté serveur).
        """
        try:
            with mss.mss() as sct:
                monitor = sct.monitors[1]
                sct_img = sct.grab(monitor)
                img = Image.frombytes("RGB", sct_img.size, sct_img.bgra, "raw", "BGRX")

                # Détection de changement (pour Heartbeat)
                if not force:
                    current_hash = self._compute_quick_hash(img)
                    if current_hash == self.last_img_hash:
                        return "" # Pas de changement, on économise la fibre
                    self.last_img_hash = current_hash

                # Floutage des données sensibles (conformité AI Act)
                if BLUR_SENSITIVE:
                    blur_sensitive_regions(img)

                path = os.path.join(self.shots_dir, f"context_{int(time.time())}_{name_suffix}.png")
                img.save(path, "PNG", quality=SCREENSHOT_QUALITY)
                return path
        except Exception as e:
            logger.error(f"Erreur Context Capture: {e}")
            return ""

    def get_active_window_title(self) -> str:
        """Retourne le titre de la fenêtre active (pour enrichir les heartbeats).

        Fallback gracieux : retourne une chaîne vide si indisponible.
        """
        try:
            from ..window_info_crossplatform import get_active_window_info
            info = get_active_window_info()
            return info.get("title", "")
        except Exception:
            return ""

    def capture_dual(self, x: int, y: int, screenshot_id: str, anonymize=False) -> dict:
        """Capture triple (Full + Crop + Fenêtre active) systématique.

        La fenêtre active est un AJOUT — en cas d'échec, le full + crop
        sont toujours retournés (fallback gracieux).
        """
        try:
            with mss.mss() as sct:
                full_path = os.path.join(self.shots_dir, f"{screenshot_id}_full.png")
                monitor = sct.monitors[1]
                sct_img = sct.grab(monitor)
                img = Image.frombytes("RGB", sct_img.size, sct_img.bgra, "raw", "BGRX")

                # Capture du Crop (Cœur de l'apprentissage qwen3-vl)
                crop_path = os.path.join(self.shots_dir, f"{screenshot_id}_crop.png")
                w, h = TARGETED_CROP_SIZE
                left = max(0, x - w // 2)
                top = max(0, y - h // 2)
                crop_img = img.crop((left, top, left + w, top + h))

                if anonymize:
                    crop_img = crop_img.filter(ImageFilter.GaussianBlur(radius=4))

                # Floutage des données sensibles (conformité AI Act)
                if BLUR_SENSITIVE:
                    blur_sensitive_regions(img)
                    blur_sensitive_regions(crop_img)

                img.save(full_path, "PNG", quality=SCREENSHOT_QUALITY)
                crop_img.save(crop_path, "PNG", quality=SCREENSHOT_QUALITY)

                # Mise à jour du hash pour le prochain heartbeat
                self.last_img_hash = self._compute_quick_hash(img)

                result = {"full": full_path, "crop": crop_path}

                # --- Capture de la fenêtre active ---
                # Ajout non-bloquant : enrichit le résultat avec l'image
                # de la fenêtre seule + métadonnées (titre, rect, clic relatif)
                window_info = self.capture_active_window(x, y, screenshot_id, full_img=img)
                if window_info:
                    result["window_capture"] = window_info

                return result
        except Exception as e:
            logger.error(f"Erreur Dual Capture: {e}")
            return {}

    def capture_active_window(
        self,
        x: int,
        y: int,
        screenshot_id: str,
        full_img: Optional[Image.Image] = None,
    ) -> Optional[Dict[str, Any]]:
        """Capture l'image de la fenêtre active seule + métadonnées.

        Stratégie :
        1. Obtenir le rectangle de la fenêtre via l'API OS (pywin32 / xdotool / Quartz)
        2. Cropper depuis le screenshot plein écran (plus fiable que PrintWindow)
        3. Calculer les coordonnées du clic relatives à la fenêtre

        Args:
            x, y: coordonnées du clic en pixels écran
            screenshot_id: identifiant pour le nom de fichier
            full_img: screenshot plein écran déjà capturé (optionnel, évite une
                      double capture si appelé depuis capture_dual)

        Returns:
            Dict avec window_image, window_title, window_rect, click_in_window,
            window_size — ou None si la fenêtre est introuvable.
        """
        try:
            from ..window_info_crossplatform import get_active_window_rect

            rect_info = get_active_window_rect()
            if not rect_info:
                logger.debug("Fenêtre active introuvable — skip capture fenêtre")
                return None

            win_rect = rect_info["rect"]  # [left, top, right, bottom]
            win_left, win_top, win_right, win_bottom = win_rect
            win_w, win_h = rect_info["size"]  # [width, height]
            title = rect_info.get("title", "unknown_window")
            app_name = rect_info.get("app_name", "unknown_app")

            # Ignorer les fenêtres trop petites (barres de tâches, popups système)
            if win_w < 50 or win_h < 50:
                logger.debug(f"Fenêtre trop petite ({win_w}x{win_h}) — skip")
                return None

            # Coordonnées du clic relatives à la fenêtre
            click_rel_x = x - win_left
            click_rel_y = y - win_top

            # Si le clic est en dehors de la fenêtre, on le signale mais on continue
            click_inside = (0 <= click_rel_x <= win_w and 0 <= click_rel_y <= win_h)

            # --- Crop de la fenêtre depuis le plein écran ---
            if full_img is None:
                # Pas de screenshot fourni — en capturer un (cas standalone)
                try:
                    with mss.mss() as sct:
                        monitor = sct.monitors[1]
                        sct_img = sct.grab(monitor)
                        full_img = Image.frombytes(
                            "RGB", sct_img.size, sct_img.bgra, "raw", "BGRX"
                        )
                except Exception as e:
                    logger.error(f"Erreur capture plein écran pour fenêtre : {e}")
                    return None

            # Borner le crop aux limites de l'image plein écran
            img_w, img_h = full_img.size
            crop_left = max(0, win_left)
            crop_top = max(0, win_top)
            crop_right = min(img_w, win_right)
            crop_bottom = min(img_h, win_bottom)

            if crop_right <= crop_left or crop_bottom <= crop_top:
                logger.debug("Fenêtre hors écran — skip capture fenêtre")
                return None

            window_img = full_img.crop((crop_left, crop_top, crop_right, crop_bottom))

            # Floutage conformité AI Act
            if BLUR_SENSITIVE:
                blur_sensitive_regions(window_img)

            # Sauvegarde
            window_path = os.path.join(
                self.shots_dir, f"{screenshot_id}_window.png"
            )
            window_img.save(window_path, "PNG", quality=SCREENSHOT_QUALITY)

            result = {
                "window_image": window_path,
                "window_title": title,
                "app_name": app_name,
                "window_rect": win_rect,
                "window_size": [win_w, win_h],
                "click_in_window": [click_rel_x, click_rel_y],
                "click_inside_window": click_inside,
            }

            logger.debug(
                f"Fenêtre capturée : {title} ({win_w}x{win_h}) — "
                f"clic relatif ({click_rel_x}, {click_rel_y})"
            )
            return result

        except ImportError as e:
            logger.debug(f"Module fenêtre indisponible : {e}")
            return None
        except Exception as e:
            logger.error(f"Erreur capture fenêtre active : {e}")
            return None

    def _compute_quick_hash(self, img: Image) -> str:
        """Calcule un hash rapide basé sur une vignette réduite pour détecter les changements."""
        # On réduit l'image à 64x64 pour comparer les masses de couleurs (très rapide)
        small_img = img.resize((64, 64), Image.NEAREST).convert("L")
        return hashlib.md5(small_img.tobytes()).hexdigest()