feat(qw1): enrichissement Agent V1 (monitor_index + monitors_geometry) + hook serveur
Côté client Agent V1 : - helpers _get_monitors_geometry() / _get_active_monitor_index() via screeninfo (fallback gracieux [] / None si screeninfo absent) - _enrich_with_monitor_info() ajouté aux payloads dict de capture_dual, capture_active_window, et heartbeat_event poussé par main.py - screeninfo>=0.8 ajouté aux requirements (source + deploy Windows) - Deploy capturer.py reçoit l'enrichissement de manière additive (pas de copie verbatim qui aurait introduit BLUR_SENSITIVE absent côté deploy) Côté serveur : - import resolve_target_monitor depuis monitor_router (créé en QW1.1) - /replay/next : enrichissement action.monitor_resolution avant envoi au client (idx, offset_x/y, w, h, source de la décision) - live_session_manager.add_event : propagation monitor_index + monitors_geometry depuis window_capture ET depuis le payload event brut (cas heartbeat enrichi sans window/window_title) Cascade de résolution (cf monitor_router.py) : 1. action.monitor_index (hérité de la session source) 2. session.last_focused_monitor (focus actif vu en dernier heartbeat) 3. composite_fallback (offset 0,0) — backward compat strict Backward 100% : si geometry vide, fallback composite identique au comportement actuel mss.monitors[0]. Tests : baseline 89/89 préservée, monitor_router 4/4 OK (total 93/93). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -448,6 +448,12 @@ class AgentV1:
|
|||||||
window_title = self.vision.get_active_window_title()
|
window_title = self.vision.get_active_window_title()
|
||||||
if window_title:
|
if window_title:
|
||||||
heartbeat_event["active_window_title"] = window_title
|
heartbeat_event["active_window_title"] = window_title
|
||||||
|
# QW1 — enrichissement multi-écrans (additif, fallback gracieux)
|
||||||
|
try:
|
||||||
|
from .vision.capturer import _enrich_with_monitor_info
|
||||||
|
_enrich_with_monitor_info(heartbeat_event)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
self.streamer.push_event(heartbeat_event)
|
self.streamer.push_event(heartbeat_event)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Heartbeat error: {e}")
|
logger.error(f"Heartbeat error: {e}")
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ Pillow>=10.0.0 # Crops et processing image
|
|||||||
requests>=2.31.0 # Streaming réseau
|
requests>=2.31.0 # Streaming réseau
|
||||||
python-socketio[client]>=5.10,<6.0 # Bus feedback Léa 'lea:*' (compat Flask-SocketIO 5.3.x serveur)
|
python-socketio[client]>=5.10,<6.0 # Bus feedback Léa 'lea:*' (compat Flask-SocketIO 5.3.x serveur)
|
||||||
psutil>=5.9.0 # Monitoring CPU/RAM
|
psutil>=5.9.0 # Monitoring CPU/RAM
|
||||||
|
screeninfo>=0.8 # QW1 — détection des monitors physiques + offsets
|
||||||
pystray>=0.19.5 # Icône Tray UI
|
pystray>=0.19.5 # Icône Tray UI
|
||||||
plyer>=2.1.0 # Notifications toast natives (remplace PyQt5)
|
plyer>=2.1.0 # Notifications toast natives (remplace PyQt5)
|
||||||
pywebview>=5.0 # Fenêtre de chat Léa intégrée (Edge WebView2 sur Windows)
|
pywebview>=5.0 # Fenêtre de chat Léa intégrée (Edge WebView2 sur Windows)
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ import time
|
|||||||
import logging
|
import logging
|
||||||
import hashlib
|
import hashlib
|
||||||
import platform
|
import platform
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
from PIL import Image, ImageFilter, ImageStat
|
from PIL import Image, ImageFilter, ImageStat
|
||||||
import mss
|
import mss
|
||||||
from ..config import TARGETED_CROP_SIZE, SCREENSHOT_QUALITY, BLUR_SENSITIVE
|
from ..config import TARGETED_CROP_SIZE, SCREENSHOT_QUALITY, BLUR_SENSITIVE
|
||||||
@@ -26,6 +26,66 @@ logger = logging.getLogger(__name__)
|
|||||||
# OS courant (détecté une seule fois)
|
# OS courant (détecté une seule fois)
|
||||||
_SYSTEM = platform.system()
|
_SYSTEM = platform.system()
|
||||||
|
|
||||||
|
# QW1 — détection multi-écrans (fallback gracieux si screeninfo absent)
|
||||||
|
try:
|
||||||
|
from screeninfo import get_monitors as _screeninfo_get_monitors
|
||||||
|
_SCREENINFO_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
_SCREENINFO_AVAILABLE = False
|
||||||
|
|
||||||
|
|
||||||
|
def _get_monitors_geometry() -> List[Dict[str, Any]]:
|
||||||
|
"""Retourne la liste des monitors physiques avec leurs offsets.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List[dict] : [{idx, x, y, w, h, primary}, ...]. Vide si screeninfo
|
||||||
|
indisponible (le serveur tombera sur fallback composite).
|
||||||
|
"""
|
||||||
|
if not _SCREENINFO_AVAILABLE:
|
||||||
|
return []
|
||||||
|
try:
|
||||||
|
monitors = _screeninfo_get_monitors()
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"idx": i,
|
||||||
|
"x": int(m.x),
|
||||||
|
"y": int(m.y),
|
||||||
|
"w": int(m.width),
|
||||||
|
"h": int(m.height),
|
||||||
|
"primary": bool(getattr(m, "is_primary", False)),
|
||||||
|
}
|
||||||
|
for i, m in enumerate(monitors)
|
||||||
|
]
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def _get_active_monitor_index() -> Optional[int]:
|
||||||
|
"""Retourne l'index logique du monitor où se trouve le curseur (focus actif).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
int ou None si indéterminable.
|
||||||
|
"""
|
||||||
|
if not _SCREENINFO_AVAILABLE:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
import pyautogui # import paresseux : évite la dépendance dure
|
||||||
|
cx, cy = pyautogui.position()
|
||||||
|
for i, m in enumerate(_screeninfo_get_monitors()):
|
||||||
|
if m.x <= cx < m.x + m.width and m.y <= cy < m.y + m.height:
|
||||||
|
return i
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _enrich_with_monitor_info(payload: dict) -> dict:
|
||||||
|
"""Ajoute monitor_index et monitors_geometry au payload (in-place + return)."""
|
||||||
|
if isinstance(payload, dict):
|
||||||
|
payload["monitor_index"] = _get_active_monitor_index()
|
||||||
|
payload["monitors_geometry"] = _get_monitors_geometry()
|
||||||
|
return payload
|
||||||
|
|
||||||
class VisionCapturer:
|
class VisionCapturer:
|
||||||
def __init__(self, session_dir: str):
|
def __init__(self, session_dir: str):
|
||||||
self.session_dir = session_dir
|
self.session_dir = session_dir
|
||||||
@@ -121,6 +181,9 @@ class VisionCapturer:
|
|||||||
if window_info:
|
if window_info:
|
||||||
result["window_capture"] = window_info
|
result["window_capture"] = window_info
|
||||||
|
|
||||||
|
# QW1 — enrichissement multi-écrans (additif, fallback gracieux)
|
||||||
|
_enrich_with_monitor_info(result)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Erreur Dual Capture: {e}")
|
logger.error(f"Erreur Dual Capture: {e}")
|
||||||
@@ -223,6 +286,9 @@ class VisionCapturer:
|
|||||||
"click_inside_window": click_inside,
|
"click_inside_window": click_inside,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# QW1 — enrichissement multi-écrans (additif)
|
||||||
|
_enrich_with_monitor_info(result)
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"Fenêtre capturée : {title} ({win_w}x{win_h}) — "
|
f"Fenêtre capturée : {title} ({win_w}x{win_h}) — "
|
||||||
f"clic relatif ({click_rel_x}, {click_rel_y})"
|
f"clic relatif ({click_rel_x}, {click_rel_y})"
|
||||||
|
|||||||
@@ -8,12 +8,73 @@ import os
|
|||||||
import time
|
import time
|
||||||
import logging
|
import logging
|
||||||
import hashlib
|
import hashlib
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
from PIL import Image, ImageFilter, ImageStat
|
from PIL import Image, ImageFilter, ImageStat
|
||||||
import mss
|
import mss
|
||||||
from ..config import TARGETED_CROP_SIZE, SCREENSHOT_QUALITY
|
from ..config import TARGETED_CROP_SIZE, SCREENSHOT_QUALITY
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# QW1 — détection multi-écrans (fallback gracieux si screeninfo absent)
|
||||||
|
try:
|
||||||
|
from screeninfo import get_monitors as _screeninfo_get_monitors
|
||||||
|
_SCREENINFO_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
_SCREENINFO_AVAILABLE = False
|
||||||
|
|
||||||
|
|
||||||
|
def _get_monitors_geometry() -> List[Dict[str, Any]]:
|
||||||
|
"""Retourne la liste des monitors physiques avec leurs offsets.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List[dict] : [{idx, x, y, w, h, primary}, ...]. Vide si screeninfo
|
||||||
|
indisponible (le serveur tombera sur fallback composite).
|
||||||
|
"""
|
||||||
|
if not _SCREENINFO_AVAILABLE:
|
||||||
|
return []
|
||||||
|
try:
|
||||||
|
monitors = _screeninfo_get_monitors()
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"idx": i,
|
||||||
|
"x": int(m.x),
|
||||||
|
"y": int(m.y),
|
||||||
|
"w": int(m.width),
|
||||||
|
"h": int(m.height),
|
||||||
|
"primary": bool(getattr(m, "is_primary", False)),
|
||||||
|
}
|
||||||
|
for i, m in enumerate(monitors)
|
||||||
|
]
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def _get_active_monitor_index() -> Optional[int]:
|
||||||
|
"""Retourne l'index logique du monitor où se trouve le curseur (focus actif).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
int ou None si indéterminable.
|
||||||
|
"""
|
||||||
|
if not _SCREENINFO_AVAILABLE:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
import pyautogui # import paresseux : évite la dépendance dure
|
||||||
|
cx, cy = pyautogui.position()
|
||||||
|
for i, m in enumerate(_screeninfo_get_monitors()):
|
||||||
|
if m.x <= cx < m.x + m.width and m.y <= cy < m.y + m.height:
|
||||||
|
return i
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _enrich_with_monitor_info(payload: dict) -> dict:
|
||||||
|
"""Ajoute monitor_index et monitors_geometry au payload (in-place + return)."""
|
||||||
|
if isinstance(payload, dict):
|
||||||
|
payload["monitor_index"] = _get_active_monitor_index()
|
||||||
|
payload["monitors_geometry"] = _get_monitors_geometry()
|
||||||
|
return payload
|
||||||
|
|
||||||
class VisionCapturer:
|
class VisionCapturer:
|
||||||
def __init__(self, session_dir: str):
|
def __init__(self, session_dir: str):
|
||||||
self.session_dir = session_dir
|
self.session_dir = session_dir
|
||||||
@@ -72,7 +133,12 @@ class VisionCapturer:
|
|||||||
# Mise à jour du hash pour le prochain heartbeat
|
# Mise à jour du hash pour le prochain heartbeat
|
||||||
self.last_img_hash = self._compute_quick_hash(img)
|
self.last_img_hash = self._compute_quick_hash(img)
|
||||||
|
|
||||||
return {"full": full_path, "crop": crop_path}
|
result = {"full": full_path, "crop": crop_path}
|
||||||
|
|
||||||
|
# QW1 — enrichissement multi-écrans (additif, fallback gracieux)
|
||||||
|
_enrich_with_monitor_info(result)
|
||||||
|
|
||||||
|
return result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Erreur Dual Capture: {e}")
|
logger.error(f"Erreur Dual Capture: {e}")
|
||||||
return {}
|
return {}
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ Pillow>=10.0.0 # Crops et processing image
|
|||||||
requests>=2.31.0 # Streaming réseau
|
requests>=2.31.0 # Streaming réseau
|
||||||
python-socketio[client]>=5.10,<6.0 # Bus feedback Léa 'lea:*' (compat Flask-SocketIO 5.3.x serveur)
|
python-socketio[client]>=5.10,<6.0 # Bus feedback Léa 'lea:*' (compat Flask-SocketIO 5.3.x serveur)
|
||||||
psutil>=5.9.0 # Monitoring CPU/RAM
|
psutil>=5.9.0 # Monitoring CPU/RAM
|
||||||
|
screeninfo>=0.8 # QW1 — détection des monitors physiques + offsets
|
||||||
pystray>=0.19.5 # Icône Tray UI
|
pystray>=0.19.5 # Icône Tray UI
|
||||||
plyer>=2.1.0 # Notifications toast natives (remplace PyQt5)
|
plyer>=2.1.0 # Notifications toast natives (remplace PyQt5)
|
||||||
|
|
||||||
|
|||||||
@@ -33,6 +33,7 @@ from .audit_trail import AuditTrail, AuditEntry
|
|||||||
from .agent_registry import AgentRegistry, AgentAlreadyEnrolledError
|
from .agent_registry import AgentRegistry, AgentAlreadyEnrolledError
|
||||||
from .stream_processor import StreamProcessor, build_replay_from_raw_events, enrich_click_from_screenshot
|
from .stream_processor import StreamProcessor, build_replay_from_raw_events, enrich_click_from_screenshot
|
||||||
from .worker_stream import StreamWorker
|
from .worker_stream import StreamWorker
|
||||||
|
from .monitor_router import resolve_target_monitor # QW1 — résolution écran cible
|
||||||
from .execution_plan_runner import (
|
from .execution_plan_runner import (
|
||||||
execution_plan_to_actions,
|
execution_plan_to_actions,
|
||||||
inject_plan_into_queue,
|
inject_plan_into_queue,
|
||||||
@@ -222,6 +223,7 @@ from .replay_engine import (
|
|||||||
_resolve_runtime_vars,
|
_resolve_runtime_vars,
|
||||||
_SERVER_SIDE_ACTION_TYPES,
|
_SERVER_SIDE_ACTION_TYPES,
|
||||||
_handle_extract_text_action,
|
_handle_extract_text_action,
|
||||||
|
_handle_extract_table_action,
|
||||||
_handle_t2a_decision_action,
|
_handle_t2a_decision_action,
|
||||||
_expand_compound_steps,
|
_expand_compound_steps,
|
||||||
_pre_check_screen_state as _pre_check_screen_state_impl,
|
_pre_check_screen_state as _pre_check_screen_state_impl,
|
||||||
@@ -511,6 +513,7 @@ class ReplayRequest(BaseModel):
|
|||||||
session_id: str
|
session_id: str
|
||||||
machine_id: Optional[str] = None # Machine cible pour le replay (multi-machine)
|
machine_id: Optional[str] = None # Machine cible pour le replay (multi-machine)
|
||||||
params: Optional[Dict[str, Any]] = None
|
params: Optional[Dict[str, Any]] = None
|
||||||
|
variables: Optional[Dict[str, Any]] = None # Variables runtime initiales (templating {{var}})
|
||||||
|
|
||||||
|
|
||||||
class RawReplayRequest(BaseModel):
|
class RawReplayRequest(BaseModel):
|
||||||
@@ -765,6 +768,21 @@ async def startup():
|
|||||||
_cleanup_thread = threading.Thread(target=_cleanup_loop, daemon=True, name="replay_cleanup")
|
_cleanup_thread = threading.Thread(target=_cleanup_loop, daemon=True, name="replay_cleanup")
|
||||||
_cleanup_thread.start()
|
_cleanup_thread.start()
|
||||||
|
|
||||||
|
# Préchargement EasyOCR en arrière-plan : sans ça, le 1er extract_text /
|
||||||
|
# extract_table déclenche un cold start de ~3-5s qui bloque l'event loop
|
||||||
|
# FastAPI (constaté 2026-05-05 : streaming server inaccessible 2 min).
|
||||||
|
# Le thread tourne pendant que le boot continue ; le 1er appel OCR sera rapide.
|
||||||
|
def _preload_easyocr():
|
||||||
|
try:
|
||||||
|
t0 = time.time()
|
||||||
|
from core.llm.ocr_extractor import _get_reader
|
||||||
|
_get_reader()
|
||||||
|
logger.info("[OCR] EasyOCR préchargé (fr+en, CPU) en %.1fs", time.time() - t0)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("[OCR] Échec préchargement EasyOCR : %s", e)
|
||||||
|
|
||||||
|
threading.Thread(target=_preload_easyocr, daemon=True, name="preload_easyocr").start()
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"API Streaming démarrée — StreamProcessor, Worker et Cleanup prêts. "
|
"API Streaming démarrée — StreamProcessor, Worker et Cleanup prêts. "
|
||||||
"VLM Worker dans un process séparé (run_worker.py)."
|
"VLM Worker dans un process séparé (run_worker.py)."
|
||||||
@@ -1962,6 +1980,11 @@ async def start_replay(request: ReplayRequest):
|
|||||||
machine_id=resolved_machine_id,
|
machine_id=resolved_machine_id,
|
||||||
actions=actions,
|
actions=actions,
|
||||||
)
|
)
|
||||||
|
# Pré-injection des variables runtime (templating {{var}} sur by_text,
|
||||||
|
# text, target_spec.* etc.). Permet à l'orchestrateur d'appeler ce
|
||||||
|
# workflow avec p.ex. variables={"patient_id": "25003284"} pour boucler.
|
||||||
|
if request.variables:
|
||||||
|
_replay_states[replay_id]["variables"].update(request.variables)
|
||||||
# Enregistrer le mapping machine -> session pour le replay ciblé
|
# Enregistrer le mapping machine -> session pour le replay ciblé
|
||||||
if resolved_machine_id and resolved_machine_id != "default":
|
if resolved_machine_id and resolved_machine_id != "default":
|
||||||
_machine_replay_target[resolved_machine_id] = session_id
|
_machine_replay_target[resolved_machine_id] = session_id
|
||||||
@@ -2914,6 +2937,12 @@ async def get_next_action(session_id: str, machine_id: str = "default"):
|
|||||||
_handle_extract_text_action,
|
_handle_extract_text_action,
|
||||||
action, owning_replay, session_id, _last_heartbeat,
|
action, owning_replay, session_id, _last_heartbeat,
|
||||||
)
|
)
|
||||||
|
elif type_ == "extract_table":
|
||||||
|
await loop.run_in_executor(
|
||||||
|
None,
|
||||||
|
_handle_extract_table_action,
|
||||||
|
action, owning_replay, session_id, _last_heartbeat,
|
||||||
|
)
|
||||||
elif type_ == "t2a_decision":
|
elif type_ == "t2a_decision":
|
||||||
await loop.run_in_executor(
|
await loop.run_in_executor(
|
||||||
None,
|
None,
|
||||||
@@ -3117,6 +3146,29 @@ async def get_next_action(session_id: str, machine_id: str = "default"):
|
|||||||
f"{_precheck_sim}"
|
f"{_precheck_sim}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# QW1 — Résoudre l'écran cible et joindre l'info à l'action
|
||||||
|
# Cascade : action.monitor_index → session.last_focused_monitor → composite_fallback
|
||||||
|
try:
|
||||||
|
session_qw1 = processor.session_manager.get_session(session_id)
|
||||||
|
last_window_info_qw1 = (
|
||||||
|
session_qw1.last_window_info if session_qw1 is not None else {}
|
||||||
|
) or {}
|
||||||
|
session_state_qw1 = {
|
||||||
|
"monitors_geometry": last_window_info_qw1.get("monitors_geometry", []),
|
||||||
|
"last_focused_monitor": last_window_info_qw1.get("monitor_index"),
|
||||||
|
}
|
||||||
|
target = resolve_target_monitor(action, session_state_qw1)
|
||||||
|
action["monitor_resolution"] = {
|
||||||
|
"idx": target.idx,
|
||||||
|
"offset_x": target.offset_x,
|
||||||
|
"offset_y": target.offset_y,
|
||||||
|
"w": target.w,
|
||||||
|
"h": target.h,
|
||||||
|
"source": target.source,
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("QW1 monitor_resolution skip (%s)", e)
|
||||||
|
|
||||||
response: Dict[str, Any] = {
|
response: Dict[str, Any] = {
|
||||||
"action": action,
|
"action": action,
|
||||||
"session_id": session_id,
|
"session_id": session_id,
|
||||||
|
|||||||
@@ -256,6 +256,20 @@ class LiveSessionManager:
|
|||||||
session.last_window_info["title"] = wc_title
|
session.last_window_info["title"] = wc_title
|
||||||
if wc_app:
|
if wc_app:
|
||||||
session.last_window_info["app_name"] = wc_app
|
session.last_window_info["app_name"] = wc_app
|
||||||
|
# QW1 — propager monitor_index et monitors_geometry depuis window_capture
|
||||||
|
if "monitor_index" in window_capture:
|
||||||
|
session.last_window_info["monitor_index"] = window_capture["monitor_index"]
|
||||||
|
if "monitors_geometry" in window_capture:
|
||||||
|
session.last_window_info["monitors_geometry"] = window_capture["monitors_geometry"]
|
||||||
|
|
||||||
|
# QW1 — propager monitor_index/monitors_geometry du payload event
|
||||||
|
# (cas heartbeat enrichi sans window/window_title). Toujours
|
||||||
|
# rafraîchir le focus actif (change souvent) et la géométrie
|
||||||
|
# (l'utilisateur peut brancher/débrancher un écran).
|
||||||
|
if "monitor_index" in event_data:
|
||||||
|
session.last_window_info["monitor_index"] = event_data["monitor_index"]
|
||||||
|
if "monitors_geometry" in event_data and event_data["monitors_geometry"]:
|
||||||
|
session.last_window_info["monitors_geometry"] = event_data["monitors_geometry"]
|
||||||
|
|
||||||
# Accumuler les titres/apps pour le nommage automatique
|
# Accumuler les titres/apps pour le nommage automatique
|
||||||
title = session.last_window_info.get("title", "").strip()
|
title = session.last_window_info.get("title", "").strip()
|
||||||
|
|||||||
Reference in New Issue
Block a user