diff --git a/agent_v0/server_v1/api_stream.py b/agent_v0/server_v1/api_stream.py index ade8acc97..45f649d5c 100644 --- a/agent_v0/server_v1/api_stream.py +++ b/agent_v0/server_v1/api_stream.py @@ -5189,8 +5189,8 @@ def _resolve_target_sync( # --------------------------------------------------------------- by_text_source = target_spec.get("by_text_source", "") - if by_text_strict and by_text_source == "ocr": - # Texte OCR fiable → grounding VLM direct + if by_text_strict and by_text_source in ("ocr", "vlm"): + # Texte visible (OCR ou lu par gemma4) → grounding VLM direct grounding_result = _resolve_by_grounding( screenshot_path=screenshot_path, target_spec=target_spec, @@ -5206,8 +5206,8 @@ def _resolve_target_sync( ) return grounding_result - if not by_text_strict or by_text_source != "ocr": - # Template matching sur la fenêtre active si disponible (évite les faux positifs) + if not by_text_strict or by_text_source not in ("ocr", "vlm"): + # Template matching pour les éléments sans texte (icônes pures) window_capture = target_spec.get("window_capture", {}) window_rect = window_capture.get("rect") from pathlib import Path as _Path diff --git a/agent_v0/server_v1/stream_processor.py b/agent_v0/server_v1/stream_processor.py index e124c57b0..86a182154 100644 --- a/agent_v0/server_v1/stream_processor.py +++ b/agent_v0/server_v1/stream_processor.py @@ -438,6 +438,20 @@ def _needs_post_wait(action: dict) -> int: _GEMMA4_PORT = os.environ.get("GEMMA4_PORT", "11435") +def _unload_gemma4(): + """Décharger gemma4 du GPU Docker pour libérer la VRAM pour qwen2.5vl.""" + try: + import requests as _req + _req.post( + f"http://localhost:{_GEMMA4_PORT}/api/generate", + json={"model": "gemma4:e4b", "keep_alive": 0}, + timeout=5, + ) + logger.info("gemma4 déchargé du GPU (VRAM libérée)") + except Exception: + pass + + def _gemma4_read_element( img_b64: str, window_title: str = "", @@ -1512,6 +1526,10 @@ def build_replay_from_raw_events( "(%d/%d clics avec visual_mode, %d avec screenshot de référence)", session_id, len(result), visual_clicks, total_clicks, verified_count, ) + + # Libérer gemma4 du GPU pour que qwen2.5vl puisse charger au replay + _unload_gemma4() + return result