feat(qw1): enrichissement Agent V1 (monitor_index + monitors_geometry) + hook serveur

Côté client Agent V1 : - helpers _get_monitors_geometry() / _get_active_monitor_index() via screeninfo (fallback gracieux [] / None si screeninfo absent) - _enrich_with_monitor_info() ajouté aux payloads dict de capture_dual, capture_active_window, et heartbeat_event poussé par main.py - screeninfo>=0.8 ajouté aux requirements (source + deploy Windows) - Deploy capturer.py reçoit l'enrichissement de manière additive (pas de copie verbatim qui aurait introduit BLUR_SENSITIVE absent côté deploy) Côté serveur : - import resolve_target_monitor depuis monitor_router (créé en QW1.1) - /replay/next : enrichissement action.monitor_resolution avant envoi au client (idx, offset_x/y, w, h, source de la décision) - live_session_manager.add_event : propagation monitor_index + monitors_geometry depuis window_capture ET depuis le payload event brut (cas heartbeat enrichi sans window/window_title) Cascade de résolution (cf monitor_router.py) : 1. action.monitor_index (hérité de la session source) 2. session.last_focused_monitor (focus actif vu en dernier heartbeat) 3. composite_fallback (offset 0,0) — backward compat strict Backward 100% : si geometry vide, fallback composite identique au comportement actuel mss.monitors[0]. Tests : baseline 89/89 préservée, monitor_router 4/4 OK (total 93/93). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-05 23:05:44 +02:00
parent fae95c5366
commit 2d71e2a249
7 changed files with 208 additions and 2 deletions
--- a/agent_v0/server_v1/api_stream.py
+++ b/agent_v0/server_v1/api_stream.py
@@ -33,6 +33,7 @@ from .audit_trail import AuditTrail, AuditEntry
 from .agent_registry import AgentRegistry, AgentAlreadyEnrolledError
 from .stream_processor import StreamProcessor, build_replay_from_raw_events, enrich_click_from_screenshot
 from .worker_stream import StreamWorker
+from .monitor_router import resolve_target_monitor  # QW1 — résolution écran cible
 from .execution_plan_runner import (
    execution_plan_to_actions,
    inject_plan_into_queue,
@@ -222,6 +223,7 @@ from .replay_engine import (
    _resolve_runtime_vars,
    _SERVER_SIDE_ACTION_TYPES,
    _handle_extract_text_action,
+    _handle_extract_table_action,
    _handle_t2a_decision_action,
    _expand_compound_steps,
    _pre_check_screen_state as _pre_check_screen_state_impl,
@@ -511,6 +513,7 @@ class ReplayRequest(BaseModel):
    session_id: str
    machine_id: Optional[str] = None  # Machine cible pour le replay (multi-machine)
    params: Optional[Dict[str, Any]] = None
+    variables: Optional[Dict[str, Any]] = None  # Variables runtime initiales (templating {{var}})


 class RawReplayRequest(BaseModel):
@@ -765,6 +768,21 @@ async def startup():
    _cleanup_thread = threading.Thread(target=_cleanup_loop, daemon=True, name="replay_cleanup")
    _cleanup_thread.start()

+    # Préchargement EasyOCR en arrière-plan : sans ça, le 1er extract_text /
+    # extract_table déclenche un cold start de ~3-5s qui bloque l'event loop
+    # FastAPI (constaté 2026-05-05 : streaming server inaccessible 2 min).
+    # Le thread tourne pendant que le boot continue ; le 1er appel OCR sera rapide.
+    def _preload_easyocr():
+        try:
+            t0 = time.time()
+            from core.llm.ocr_extractor import _get_reader
+            _get_reader()
+            logger.info("[OCR] EasyOCR préchargé (fr+en, CPU) en %.1fs", time.time() - t0)
+        except Exception as e:
+            logger.warning("[OCR] Échec préchargement EasyOCR : %s", e)
+
+    threading.Thread(target=_preload_easyocr, daemon=True, name="preload_easyocr").start()
+
    logger.info(
        "API Streaming démarrée — StreamProcessor, Worker et Cleanup prêts. "
        "VLM Worker dans un process séparé (run_worker.py)."
@@ -1962,6 +1980,11 @@ async def start_replay(request: ReplayRequest):
            machine_id=resolved_machine_id,
            actions=actions,
        )
+        # Pré-injection des variables runtime (templating {{var}} sur by_text,
+        # text, target_spec.* etc.). Permet à l'orchestrateur d'appeler ce
+        # workflow avec p.ex. variables={"patient_id": "25003284"} pour boucler.
+        if request.variables:
+            _replay_states[replay_id]["variables"].update(request.variables)
        # Enregistrer le mapping machine -> session pour le replay ciblé
        if resolved_machine_id and resolved_machine_id != "default":
            _machine_replay_target[resolved_machine_id] = session_id
@@ -2914,6 +2937,12 @@ async def get_next_action(session_id: str, machine_id: str = "default"):
                            _handle_extract_text_action,
                            action, owning_replay, session_id, _last_heartbeat,
                        )
+                    elif type_ == "extract_table":
+                        await loop.run_in_executor(
+                            None,
+                            _handle_extract_table_action,
+                            action, owning_replay, session_id, _last_heartbeat,
+                        )
                    elif type_ == "t2a_decision":
                        await loop.run_in_executor(
                            None,
@@ -3117,6 +3146,29 @@ async def get_next_action(session_id: str, machine_id: str = "default"):
        f"{_precheck_sim}"
    )

+    # QW1 — Résoudre l'écran cible et joindre l'info à l'action
+    # Cascade : action.monitor_index → session.last_focused_monitor → composite_fallback
+    try:
+        session_qw1 = processor.session_manager.get_session(session_id)
+        last_window_info_qw1 = (
+            session_qw1.last_window_info if session_qw1 is not None else {}
+        ) or {}
+        session_state_qw1 = {
+            "monitors_geometry": last_window_info_qw1.get("monitors_geometry", []),
+            "last_focused_monitor": last_window_info_qw1.get("monitor_index"),
+        }
+        target = resolve_target_monitor(action, session_state_qw1)
+        action["monitor_resolution"] = {
+            "idx": target.idx,
+            "offset_x": target.offset_x,
+            "offset_y": target.offset_y,
+            "w": target.w,
+            "h": target.h,
+            "source": target.source,
+        }
+    except Exception as e:
+        logger.debug("QW1 monitor_resolution skip (%s)", e)
+
    response: Dict[str, Any] = {
        "action": action,
        "session_id": session_id,