711 lines
27 KiB
Python
711 lines
27 KiB
Python
# agent_v1/vision/capturer.py
|
||
"""
|
||
Gestionnaire de vision avancé pour Agent V1.
|
||
Optimisé pour le streaming fibre avec détection de changement.
|
||
|
||
Captures disponibles :
|
||
- Plein écran (full) : contexte global 1920x1080+
|
||
- Crop ciblé (crop) : 80x80 autour du clic (apprentissage VLM)
|
||
- Fenêtre active (window) : image isolée de la fenêtre + métadonnées
|
||
(titre, rect, coordonnées clic relatives) — cross-platform
|
||
"""
|
||
|
||
import os
|
||
import time
|
||
import logging
|
||
import hashlib
|
||
import platform
|
||
from typing import Any, Dict, List, Optional, Tuple
|
||
from PIL import Image, ImageFilter, ImageStat
|
||
import mss
|
||
from ..config import TARGETED_CROP_SIZE, BLUR_SENSITIVE
|
||
from .blur_sensitive import blur_sensitive_regions
|
||
from .capture_io import save_capture
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# OS courant (détecté une seule fois)
|
||
_SYSTEM = platform.system()
|
||
|
||
# QW1 — détection multi-écrans (fallback gracieux si screeninfo absent)
|
||
try:
|
||
from screeninfo import get_monitors as _screeninfo_get_monitors
|
||
_SCREENINFO_AVAILABLE = True
|
||
except ImportError:
|
||
_SCREENINFO_AVAILABLE = False
|
||
|
||
|
||
def _get_monitors_geometry() -> List[Dict[str, Any]]:
|
||
"""Retourne la liste des monitors physiques avec leurs offsets.
|
||
|
||
Returns:
|
||
List[dict] : [{idx, x, y, w, h, primary}, ...]. Vide si screeninfo
|
||
indisponible (le serveur tombera sur fallback composite).
|
||
"""
|
||
if not _SCREENINFO_AVAILABLE:
|
||
return []
|
||
try:
|
||
monitors = _screeninfo_get_monitors()
|
||
return [
|
||
{
|
||
"idx": i,
|
||
"x": int(m.x),
|
||
"y": int(m.y),
|
||
"w": int(m.width),
|
||
"h": int(m.height),
|
||
"primary": bool(getattr(m, "is_primary", False)),
|
||
}
|
||
for i, m in enumerate(monitors)
|
||
]
|
||
except Exception:
|
||
return []
|
||
|
||
|
||
def _get_active_monitor_index() -> Optional[int]:
|
||
"""Retourne l'index logique du monitor où se trouve le curseur (focus actif).
|
||
|
||
Returns:
|
||
int ou None si indéterminable.
|
||
"""
|
||
if not _SCREENINFO_AVAILABLE:
|
||
return None
|
||
try:
|
||
import pyautogui # import paresseux : évite la dépendance dure
|
||
cx, cy = pyautogui.position()
|
||
for i, m in enumerate(_screeninfo_get_monitors()):
|
||
if m.x <= cx < m.x + m.width and m.y <= cy < m.y + m.height:
|
||
return i
|
||
except Exception:
|
||
return None
|
||
return None
|
||
|
||
|
||
def _enrich_with_monitor_info(payload: dict) -> dict:
|
||
"""Ajoute monitor_index et monitors_geometry au payload (in-place + return)."""
|
||
if isinstance(payload, dict):
|
||
payload["monitor_index"] = _get_active_monitor_index()
|
||
payload["monitors_geometry"] = _get_monitors_geometry()
|
||
return payload
|
||
|
||
|
||
# Garde dimensions monitor (démo GHT 19 mai 2026) : mss.monitors[1] peut
|
||
# retourner intermittemment des dims tronquées (cas observé 2560×60). Utiliser
|
||
# ces dims pour normaliser des coords empoisonne la mémoire (TargetMemoryStore).
|
||
MIN_MONITOR_WIDTH = 200
|
||
MIN_MONITOR_HEIGHT = 200
|
||
MONITOR_MAX_ATTEMPTS = 2
|
||
MONITOR_RETRY_DELAY_S = 0.05
|
||
BLACK_FRAME_MEAN_MAX = 1.0
|
||
BLACK_FRAME_STDDEV_MAX = 1.0
|
||
BLACK_FRAME_MAX_LUMA = 3
|
||
|
||
|
||
def _is_monitor_sane(monitor) -> bool:
|
||
"""True si les dims du monitor sont au-dessus du seuil de plausibilité."""
|
||
if not isinstance(monitor, dict):
|
||
return False
|
||
w = monitor.get("width", 0) or 0
|
||
h = monitor.get("height", 0) or 0
|
||
return w >= MIN_MONITOR_WIDTH and h >= MIN_MONITOR_HEIGHT
|
||
|
||
|
||
def _dim_str(monitor) -> str:
|
||
"""Représentation courte WxH pour les logs (gère monitor=None)."""
|
||
if not isinstance(monitor, dict):
|
||
return "?x?"
|
||
return f"{monitor.get('width', '?')}x{monitor.get('height', '?')}"
|
||
|
||
|
||
def _acquire_safe_grab(max_attempts: int = MONITOR_MAX_ATTEMPTS,
|
||
retry_delay_s: float = MONITOR_RETRY_DELAY_S,
|
||
allow_secondary_fallback: bool = True):
|
||
"""Ouvre mss et capture un monitor avec dimensions plausibles.
|
||
|
||
Stratégie en cascade :
|
||
1. À chaque tentative, ouvrir un nouveau `mss.mss()` (peut rafraîchir le
|
||
cache interne) et examiner monitors[1..n].
|
||
2. Préférer monitors[1] (écran principal physique). Si aberrant ET
|
||
`allow_secondary_fallback=True`, prendre le premier monitors[2..n]
|
||
sain avec un WARNING explicite.
|
||
3. Si `allow_secondary_fallback=False`, on n'accepte QUE monitors[1].
|
||
Utile pour les méthodes qui reçoivent des coordonnées (x, y) en
|
||
système écran composite : capturer un monitor secondaire produirait
|
||
une image saine mais décalée par rapport à ces coords.
|
||
4. Si aucune dim plausible : attendre `retry_delay_s` et retenter.
|
||
5. Après `max_attempts` infructueuses : log ERROR et retourner
|
||
(None, None) pour que l'appelant tombe en sortie d'erreur explicite.
|
||
|
||
Args:
|
||
max_attempts: nombre de tentatives mss avant abandon.
|
||
retry_delay_s: délai entre tentatives.
|
||
allow_secondary_fallback: si False, refuser monitors[2..n] (fail-closed
|
||
pour les méthodes coord-bearing).
|
||
|
||
Returns:
|
||
Tuple (monitor_dict, PIL.Image) si capture saine réussie,
|
||
(None, None) sinon.
|
||
"""
|
||
last_aberrant = None
|
||
secondary_seen = False # un monitor secondaire sain a été vu mais refusé
|
||
for attempt in range(max_attempts):
|
||
with mss.mss() as sct:
|
||
monitors = list(sct.monitors) if sct.monitors else []
|
||
chosen = None
|
||
chosen_idx = None
|
||
for idx in range(1, len(monitors)):
|
||
candidate = monitors[idx]
|
||
if not _is_monitor_sane(candidate):
|
||
last_aberrant = candidate
|
||
logger.warning(
|
||
"Monitor[%d] dims aberrantes (%s, seuil %dx%d) "
|
||
"— attempt %d/%d",
|
||
idx, _dim_str(candidate),
|
||
MIN_MONITOR_WIDTH, MIN_MONITOR_HEIGHT,
|
||
attempt + 1, max_attempts,
|
||
)
|
||
continue
|
||
# Monitor sain trouvé
|
||
if idx == 1 or allow_secondary_fallback:
|
||
chosen = candidate
|
||
chosen_idx = idx
|
||
break
|
||
# Sinon : sain mais secondaire interdit pour cet appelant
|
||
secondary_seen = True
|
||
logger.warning(
|
||
"Monitor[%d] sain (%s) mais fallback secondaire refusé "
|
||
"(allow_secondary_fallback=False) — capture cohérente "
|
||
"des coords impossible",
|
||
idx, _dim_str(candidate),
|
||
)
|
||
if chosen is not None:
|
||
if chosen_idx != 1 or attempt > 0:
|
||
logger.warning(
|
||
"Capture fallback : monitor[%d] dim=%s, attempt=%d",
|
||
chosen_idx, _dim_str(chosen), attempt + 1,
|
||
)
|
||
sct_img = sct.grab(chosen)
|
||
img = Image.frombytes(
|
||
"RGB", sct_img.size, sct_img.bgra, "raw", "BGRX",
|
||
)
|
||
return chosen, img
|
||
if attempt < max_attempts - 1:
|
||
time.sleep(retry_delay_s)
|
||
if secondary_seen and not allow_secondary_fallback:
|
||
logger.error(
|
||
"Capture abandonnée : monitor[1] aberrant après %d tentatives "
|
||
"(dernier vu %s) et fallback secondaire désactivé "
|
||
"pour préserver la cohérence des coordonnées",
|
||
max_attempts, _dim_str(last_aberrant),
|
||
)
|
||
else:
|
||
logger.error(
|
||
"Aucun monitor avec dims plausibles trouvé après %d tentatives "
|
||
"(dernier vu : %s, seuil %dx%d) — capture abandonnée",
|
||
max_attempts, _dim_str(last_aberrant),
|
||
MIN_MONITOR_WIDTH, MIN_MONITOR_HEIGHT,
|
||
)
|
||
return None, None
|
||
|
||
|
||
def _compute_luma_stats(img: Image.Image) -> Dict[str, float | int]:
|
||
"""Retourne des stats simples de luminance pour diagnostiquer un frame noir."""
|
||
gray = img.convert("L")
|
||
stat = ImageStat.Stat(gray)
|
||
min_luma, max_luma = gray.getextrema()
|
||
return {
|
||
"mean": round(float(stat.mean[0]) if stat.mean else 0.0, 2),
|
||
"stddev": round(float(stat.stddev[0]) if stat.stddev else 0.0, 2),
|
||
"min": int(min_luma),
|
||
"max": int(max_luma),
|
||
}
|
||
|
||
|
||
def _is_effectively_black(img: Image.Image) -> bool:
|
||
"""Heuristique fail-closed pour refuser un screenshot pratiquement noir."""
|
||
stats = _compute_luma_stats(img)
|
||
return (
|
||
stats["max"] <= BLACK_FRAME_MAX_LUMA
|
||
and stats["mean"] <= BLACK_FRAME_MEAN_MAX
|
||
and stats["stddev"] <= BLACK_FRAME_STDDEV_MAX
|
||
)
|
||
|
||
|
||
def _capture_via_imagegrab() -> Tuple[Optional[Dict[str, int]], Optional[Image.Image], Dict[str, Any]]:
|
||
"""Fallback Windows via Pillow/ImageGrab.
|
||
|
||
Utile quand `mss` retourne un frame noir alors que la session graphique
|
||
utilisateur reste visible.
|
||
"""
|
||
if _SYSTEM != "Windows":
|
||
return None, None, {"backend": "imagegrab", "error": "unsupported_platform"}
|
||
|
||
try:
|
||
from PIL import ImageGrab
|
||
except ImportError as exc:
|
||
return None, None, {"backend": "imagegrab", "error": str(exc)}
|
||
|
||
try:
|
||
img = ImageGrab.grab(all_screens=True)
|
||
except Exception as exc:
|
||
logger.warning("ImageGrab indisponible pour le fallback capture : %s", exc)
|
||
return None, None, {"backend": "imagegrab", "error": str(exc)}
|
||
|
||
monitor = {"left": 0, "top": 0, "width": img.width, "height": img.height}
|
||
return monitor, img, {
|
||
"backend": "imagegrab",
|
||
"luma": _compute_luma_stats(img),
|
||
}
|
||
|
||
|
||
def capture_screen_image(
|
||
allow_secondary_fallback: bool = True,
|
||
) -> Tuple[Optional[Dict[str, int]], Optional[Image.Image], Dict[str, Any]]:
|
||
"""Capture plein écran avec diagnostic noir + fallback Windows.
|
||
|
||
Returns:
|
||
(monitor, image, meta) où image peut être None si aucun backend plein
|
||
écran n'a produit une image exploitable.
|
||
"""
|
||
monitor, img = _acquire_safe_grab(
|
||
allow_secondary_fallback=allow_secondary_fallback
|
||
)
|
||
meta: Dict[str, Any] = {"backend": "mss"}
|
||
|
||
if img is not None:
|
||
meta["luma"] = _compute_luma_stats(img)
|
||
if not _is_effectively_black(img):
|
||
return monitor, img, meta
|
||
logger.warning(
|
||
"Capture mss quasi noire (%s) — tentative de fallback",
|
||
meta["luma"],
|
||
)
|
||
meta["mss_black_frame"] = True
|
||
else:
|
||
meta["mss_unavailable"] = True
|
||
|
||
fallback_monitor, fallback_img, fallback_meta = _capture_via_imagegrab()
|
||
if fallback_img is not None:
|
||
if not _is_effectively_black(fallback_img):
|
||
logger.warning(
|
||
"Capture fallback via ImageGrab (%sx%s)",
|
||
fallback_img.width,
|
||
fallback_img.height,
|
||
)
|
||
return fallback_monitor, fallback_img, fallback_meta
|
||
logger.warning(
|
||
"Capture ImageGrab quasi noire (%s)",
|
||
fallback_meta.get("luma"),
|
||
)
|
||
meta["imagegrab_black_frame"] = True
|
||
|
||
meta["imagegrab_error"] = fallback_meta.get("error")
|
||
return None, None, meta
|
||
|
||
|
||
def _capture_window_image_windows(
|
||
hwnd: int,
|
||
width: int,
|
||
height: int,
|
||
) -> Tuple[Optional[Image.Image], Dict[str, Any]]:
|
||
"""Capture une fenêtre Windows via PrintWindow.
|
||
|
||
Fallback utile quand la capture plein écran est noire mais que la fenêtre
|
||
active reste imprimable par l'API Win32.
|
||
"""
|
||
if _SYSTEM != "Windows":
|
||
return None, {"backend": "printwindow", "error": "unsupported_platform"}
|
||
|
||
try:
|
||
import ctypes
|
||
import win32gui
|
||
import win32ui
|
||
except ImportError as exc:
|
||
return None, {"backend": "printwindow", "error": str(exc)}
|
||
|
||
last_error = None
|
||
for flag in (3, 2, 0):
|
||
wnd_dc = None
|
||
src_dc = None
|
||
mem_dc = None
|
||
bmp = None
|
||
try:
|
||
wnd_dc = win32gui.GetWindowDC(hwnd)
|
||
if not wnd_dc:
|
||
raise RuntimeError("GetWindowDC a retourné 0")
|
||
src_dc = win32ui.CreateDCFromHandle(wnd_dc)
|
||
mem_dc = src_dc.CreateCompatibleDC()
|
||
bmp = win32ui.CreateBitmap()
|
||
bmp.CreateCompatibleBitmap(src_dc, width, height)
|
||
mem_dc.SelectObject(bmp)
|
||
result = ctypes.windll.user32.PrintWindow(
|
||
hwnd, mem_dc.GetSafeHdc(), flag
|
||
)
|
||
bits = bmp.GetBitmapBits(True)
|
||
img = Image.frombuffer(
|
||
"RGB", (width, height), bits, "raw", "BGRX", 0, 1
|
||
)
|
||
luma = _compute_luma_stats(img)
|
||
if result or not _is_effectively_black(img):
|
||
return img, {
|
||
"backend": f"printwindow:{flag}",
|
||
"printwindow_result": int(result),
|
||
"luma": luma,
|
||
}
|
||
except Exception as exc:
|
||
last_error = str(exc)
|
||
finally:
|
||
try:
|
||
if bmp is not None:
|
||
win32gui.DeleteObject(bmp.GetHandle())
|
||
except Exception:
|
||
pass
|
||
try:
|
||
if mem_dc is not None:
|
||
mem_dc.DeleteDC()
|
||
except Exception:
|
||
pass
|
||
try:
|
||
if src_dc is not None:
|
||
src_dc.DeleteDC()
|
||
except Exception:
|
||
pass
|
||
try:
|
||
if wnd_dc is not None:
|
||
win32gui.ReleaseDC(hwnd, wnd_dc)
|
||
except Exception:
|
||
pass
|
||
|
||
return None, {
|
||
"backend": "printwindow",
|
||
"error": last_error or "no_usable_frame",
|
||
}
|
||
|
||
|
||
def capture_foreground_window_image() -> Tuple[Optional[Image.Image], Dict[str, Any]]:
|
||
"""Capture la fenêtre au focus via API native si disponible."""
|
||
try:
|
||
from ..window_info_crossplatform import get_active_window_rect
|
||
|
||
rect_info = get_active_window_rect()
|
||
except Exception as exc:
|
||
return None, {"backend": "printwindow", "error": str(exc)}
|
||
|
||
if not rect_info:
|
||
return None, {"backend": "printwindow", "error": "active_window_unavailable"}
|
||
|
||
win_w, win_h = rect_info.get("size", [0, 0])
|
||
hwnd = rect_info.get("hwnd")
|
||
if not hwnd or win_w <= 0 or win_h <= 0:
|
||
return None, {
|
||
"backend": "printwindow",
|
||
"error": "active_window_handle_unavailable",
|
||
"title": rect_info.get("title", "unknown_window"),
|
||
}
|
||
|
||
img, meta = _capture_window_image_windows(hwnd, win_w, win_h)
|
||
if img is None:
|
||
return None, meta
|
||
|
||
meta.update(
|
||
{
|
||
"title": rect_info.get("title", "unknown_window"),
|
||
"app_name": rect_info.get("app_name", "unknown_app"),
|
||
"rect": rect_info.get("rect"),
|
||
"window_size": rect_info.get("size"),
|
||
"hwnd": hwnd,
|
||
}
|
||
)
|
||
return img, meta
|
||
|
||
|
||
class VisionCapturer:
|
||
def __init__(self, session_dir: str):
|
||
self.session_dir = session_dir
|
||
self.shots_dir = os.path.join(session_dir, "shots")
|
||
os.makedirs(self.shots_dir, exist_ok=True)
|
||
# On ne crée plus self.sct ici car mss n'est pas thread-safe sous Windows
|
||
self.last_img_hash = None
|
||
|
||
def _ensure_shots_dir(self) -> None:
|
||
"""Garantit l'existence de `shots/` avant toute écriture.
|
||
|
||
Le dossier est créé dans `__init__`, mais l'auto-cleanup de
|
||
`SessionStorage` (`shutil.rmtree` par âge/taille) peut supprimer tout
|
||
le dossier de session — y compris la session permanente `_background`.
|
||
Sans ce garde, la capture suivante lève `[Errno 2] No such file or
|
||
directory` (bug observé poste Émilie). On recrée donc le répertoire
|
||
cible juste avant chaque sauvegarde.
|
||
"""
|
||
os.makedirs(self.shots_dir, exist_ok=True)
|
||
|
||
def capture_full_context(self, name_suffix: str, force=False) -> str:
|
||
"""
|
||
Capture l'écran complet.
|
||
Si force=False, vérifie d'abord si l'écran a changé.
|
||
|
||
Enrichit les métadonnées avec le titre de la fenêtre active
|
||
(utile pour le contextualisation des heartbeats côté serveur).
|
||
"""
|
||
try:
|
||
_monitor, img, meta = capture_screen_image()
|
||
if img is None:
|
||
img, win_meta = capture_foreground_window_image()
|
||
if img is None:
|
||
logger.error(
|
||
"Capture plein contexte indisponible (meta=%s, window=%s)",
|
||
meta,
|
||
win_meta,
|
||
)
|
||
return ""
|
||
logger.warning(
|
||
"Capture plein contexte dégradée via fenêtre active (%s)",
|
||
win_meta.get("backend"),
|
||
)
|
||
|
||
# Détection de changement (pour Heartbeat)
|
||
if not force:
|
||
current_hash = self._compute_quick_hash(img)
|
||
if current_hash == self.last_img_hash:
|
||
return "" # Pas de changement, on économise la fibre
|
||
self.last_img_hash = current_hash
|
||
|
||
# Floutage des données sensibles (conformité AI Act)
|
||
if BLUR_SENSITIVE:
|
||
blur_sensitive_regions(img)
|
||
|
||
# Politique d'écriture : les heartbeats sont de la liveness pure
|
||
# (le serveur vérifie juste qu'un écran a changé) → JPEG downscalé.
|
||
# Les autres contextes (focus_change, result_of_*) → JPEG q85.
|
||
kind = "heartbeat" if "heartbeat" in name_suffix else "context"
|
||
self._ensure_shots_dir()
|
||
path_base = os.path.join(
|
||
self.shots_dir, f"context_{int(time.time())}_{name_suffix}"
|
||
)
|
||
return save_capture(img, path_base, kind)
|
||
except Exception as e:
|
||
logger.error(f"Erreur Context Capture: {e}")
|
||
return ""
|
||
|
||
def get_active_window_title(self) -> str:
|
||
"""Retourne le titre de la fenêtre active (pour enrichir les heartbeats).
|
||
|
||
Fallback gracieux : retourne une chaîne vide si indisponible.
|
||
"""
|
||
try:
|
||
from ..window_info_crossplatform import get_active_window_info
|
||
info = get_active_window_info()
|
||
return info.get("title", "")
|
||
except Exception:
|
||
return ""
|
||
|
||
def capture_dual(self, x: int, y: int, screenshot_id: str, anonymize=False) -> dict:
|
||
"""Capture triple (Full + Crop + Fenêtre active) systématique.
|
||
|
||
La fenêtre active est un AJOUT — en cas d'échec, le full + crop
|
||
sont toujours retournés (fallback gracieux).
|
||
"""
|
||
try:
|
||
# Coords (x, y) sont en système écran composite ; cropper depuis
|
||
# un monitor secondaire (offset ≠ 0) produirait une image saine
|
||
# mais décalée → fail-closed sur fallback secondaire.
|
||
_monitor, img, meta = capture_screen_image(
|
||
allow_secondary_fallback=False
|
||
)
|
||
if img is None:
|
||
window_info = self.capture_active_window(
|
||
x, y, screenshot_id, full_img=None
|
||
)
|
||
if window_info:
|
||
result = {"window_capture": window_info}
|
||
_enrich_with_monitor_info(result)
|
||
logger.warning(
|
||
"capture_dual dégradée: fenêtre active seule (%s)",
|
||
meta,
|
||
)
|
||
return result
|
||
return {}
|
||
|
||
full_base = os.path.join(self.shots_dir, f"{screenshot_id}_full")
|
||
|
||
# Capture du Crop (Cœur de l'apprentissage qwen3-vl)
|
||
crop_base = os.path.join(self.shots_dir, f"{screenshot_id}_crop")
|
||
w, h = TARGETED_CROP_SIZE
|
||
left = max(0, x - w // 2)
|
||
top = max(0, y - h // 2)
|
||
crop_img = img.crop((left, top, left + w, top + h))
|
||
|
||
if anonymize:
|
||
crop_img = crop_img.filter(ImageFilter.GaussianBlur(radius=4))
|
||
|
||
# Floutage des données sensibles (conformité AI Act)
|
||
if BLUR_SENSITIVE:
|
||
blur_sensitive_regions(img)
|
||
blur_sensitive_regions(crop_img)
|
||
|
||
# Politique d'écriture : full = vue contextuelle → JPEG q85 ;
|
||
# crop = cible de grounding qwen3-vl → PNG lossless (contrat serveur).
|
||
self._ensure_shots_dir()
|
||
full_path = save_capture(img, full_base, "full")
|
||
crop_path = save_capture(crop_img, crop_base, "crop")
|
||
|
||
# Mise à jour du hash pour le prochain heartbeat
|
||
self.last_img_hash = self._compute_quick_hash(img)
|
||
|
||
result = {"full": full_path, "crop": crop_path}
|
||
|
||
# --- Capture de la fenêtre active ---
|
||
# Ajout non-bloquant : enrichit le résultat avec l'image
|
||
# de la fenêtre seule + métadonnées (titre, rect, clic relatif)
|
||
window_info = self.capture_active_window(x, y, screenshot_id, full_img=img)
|
||
if window_info:
|
||
result["window_capture"] = window_info
|
||
|
||
# QW1 — enrichissement multi-écrans (additif, fallback gracieux)
|
||
_enrich_with_monitor_info(result)
|
||
|
||
return result
|
||
except Exception as e:
|
||
logger.error(f"Erreur Dual Capture: {e}")
|
||
return {}
|
||
|
||
def capture_active_window(
|
||
self,
|
||
x: int,
|
||
y: int,
|
||
screenshot_id: str,
|
||
full_img: Optional[Image.Image] = None,
|
||
) -> Optional[Dict[str, Any]]:
|
||
"""Capture l'image de la fenêtre active seule + métadonnées.
|
||
|
||
Stratégie :
|
||
1. Obtenir le rectangle de la fenêtre via l'API OS (pywin32 / xdotool / Quartz)
|
||
2. Cropper depuis le screenshot plein écran (plus fiable que PrintWindow)
|
||
3. Calculer les coordonnées du clic relatives à la fenêtre
|
||
|
||
Args:
|
||
x, y: coordonnées du clic en pixels écran
|
||
screenshot_id: identifiant pour le nom de fichier
|
||
full_img: screenshot plein écran déjà capturé (optionnel, évite une
|
||
double capture si appelé depuis capture_dual)
|
||
|
||
Returns:
|
||
Dict avec window_image, window_title, window_rect, click_in_window,
|
||
window_size — ou None si la fenêtre est introuvable.
|
||
"""
|
||
try:
|
||
from ..window_info_crossplatform import get_active_window_rect
|
||
|
||
rect_info = get_active_window_rect()
|
||
if not rect_info:
|
||
logger.debug("Fenêtre active introuvable — skip capture fenêtre")
|
||
return None
|
||
|
||
win_rect = rect_info["rect"] # [left, top, right, bottom]
|
||
win_left, win_top, win_right, win_bottom = win_rect
|
||
win_w, win_h = rect_info["size"] # [width, height]
|
||
title = rect_info.get("title", "unknown_window")
|
||
app_name = rect_info.get("app_name", "unknown_app")
|
||
|
||
# Ignorer les fenêtres trop petites (barres de tâches, popups système)
|
||
if win_w < 50 or win_h < 50:
|
||
logger.debug(f"Fenêtre trop petite ({win_w}x{win_h}) — skip")
|
||
return None
|
||
|
||
# Coordonnées du clic relatives à la fenêtre
|
||
click_rel_x = x - win_left
|
||
click_rel_y = y - win_top
|
||
|
||
# Si le clic est en dehors de la fenêtre, on le signale mais on continue
|
||
click_inside = (0 <= click_rel_x <= win_w and 0 <= click_rel_y <= win_h)
|
||
|
||
window_img = None
|
||
|
||
# --- Crop de la fenêtre depuis le plein écran ---
|
||
if full_img is None:
|
||
# Pas de screenshot fourni — en capturer un (cas standalone).
|
||
# win_rect est en coords globales ; cropper depuis un monitor
|
||
# secondaire produirait une image décalée → fail-closed sur
|
||
# fallback secondaire.
|
||
try:
|
||
_monitor, full_img, _meta = capture_screen_image(
|
||
allow_secondary_fallback=False
|
||
)
|
||
except Exception as e:
|
||
logger.error(f"Erreur capture plein écran pour fenêtre : {e}")
|
||
full_img = None
|
||
|
||
if full_img is not None and not _is_effectively_black(full_img):
|
||
img_w, img_h = full_img.size
|
||
crop_left = max(0, win_left)
|
||
crop_top = max(0, win_top)
|
||
crop_right = min(img_w, win_right)
|
||
crop_bottom = min(img_h, win_bottom)
|
||
|
||
if crop_right > crop_left and crop_bottom > crop_top:
|
||
window_img = full_img.crop(
|
||
(crop_left, crop_top, crop_right, crop_bottom)
|
||
)
|
||
else:
|
||
logger.debug("Fenêtre hors écran — fallback natif si possible")
|
||
elif full_img is not None:
|
||
logger.warning(
|
||
"capture_active_window: screenshot plein écran noir, fallback natif"
|
||
)
|
||
|
||
if window_img is None and rect_info.get("hwnd"):
|
||
window_img, native_meta = _capture_window_image_windows(
|
||
rect_info["hwnd"], win_w, win_h
|
||
)
|
||
if window_img is not None:
|
||
logger.warning(
|
||
"capture_active_window via fallback natif (%s)",
|
||
native_meta.get("backend"),
|
||
)
|
||
|
||
if window_img is None:
|
||
logger.debug("Fenêtre hors écran ou capture native indisponible")
|
||
return None
|
||
|
||
# Floutage conformité AI Act
|
||
if BLUR_SENSITIVE:
|
||
blur_sensitive_regions(window_img)
|
||
|
||
# Sauvegarde — fenêtre = vue contextuelle → JPEG q85 (politique).
|
||
self._ensure_shots_dir()
|
||
window_base = os.path.join(
|
||
self.shots_dir, f"{screenshot_id}_window"
|
||
)
|
||
window_path = save_capture(window_img, window_base, "window")
|
||
|
||
result = {
|
||
"window_image": window_path,
|
||
"window_title": title,
|
||
"app_name": app_name,
|
||
"window_rect": win_rect,
|
||
"window_size": [win_w, win_h],
|
||
"click_in_window": [click_rel_x, click_rel_y],
|
||
"click_inside_window": click_inside,
|
||
}
|
||
|
||
# QW1 — enrichissement multi-écrans (additif)
|
||
_enrich_with_monitor_info(result)
|
||
|
||
logger.debug(
|
||
f"Fenêtre capturée : {title} ({win_w}x{win_h}) — "
|
||
f"clic relatif ({click_rel_x}, {click_rel_y})"
|
||
)
|
||
return result
|
||
|
||
except ImportError as e:
|
||
logger.debug(f"Module fenêtre indisponible : {e}")
|
||
return None
|
||
except Exception as e:
|
||
logger.error(f"Erreur capture fenêtre active : {e}")
|
||
return None
|
||
|
||
def _compute_quick_hash(self, img: Image) -> str:
|
||
"""Calcule un hash rapide basé sur une vignette réduite pour détecter les changements."""
|
||
# On réduit l'image à 64x64 pour comparer les masses de couleurs (très rapide)
|
||
small_img = img.resize((64, 64), Image.NEAREST).convert("L")
|
||
return hashlib.md5(small_img.tobytes()).hexdigest()
|