diff --git a/agent_v0/server_v1/api_stream.py b/agent_v0/server_v1/api_stream.py
index ade8acc97..45f649d5c 100644
--- a/agent_v0/server_v1/api_stream.py
+++ b/agent_v0/server_v1/api_stream.py
@@ -5189,8 +5189,8 @@ def _resolve_target_sync(
         # ---------------------------------------------------------------
         by_text_source = target_spec.get("by_text_source", "")
 
-        if by_text_strict and by_text_source == "ocr":
-            # Texte OCR fiable → grounding VLM direct
+        if by_text_strict and by_text_source in ("ocr", "vlm"):
+            # Texte visible (OCR ou lu par gemma4) → grounding VLM direct
             grounding_result = _resolve_by_grounding(
                 screenshot_path=screenshot_path,
                 target_spec=target_spec,
@@ -5206,8 +5206,8 @@ def _resolve_target_sync(
                 )
                 return grounding_result
 
-        if not by_text_strict or by_text_source != "ocr":
-            # Template matching sur la fenêtre active si disponible (évite les faux positifs)
+        if not by_text_strict or by_text_source not in ("ocr", "vlm"):
+            # Template matching pour les éléments sans texte (icônes pures)
             window_capture = target_spec.get("window_capture", {})
             window_rect = window_capture.get("rect")
             from pathlib import Path as _Path
diff --git a/agent_v0/server_v1/stream_processor.py b/agent_v0/server_v1/stream_processor.py
index e124c57b0..86a182154 100644
--- a/agent_v0/server_v1/stream_processor.py
+++ b/agent_v0/server_v1/stream_processor.py
@@ -438,6 +438,20 @@ def _needs_post_wait(action: dict) -> int:
 _GEMMA4_PORT = os.environ.get("GEMMA4_PORT", "11435")
 
 
+def _unload_gemma4():
+    """Décharger gemma4 du GPU Docker pour libérer la VRAM pour qwen2.5vl."""
+    try:
+        import requests as _req
+        _req.post(
+            f"http://localhost:{_GEMMA4_PORT}/api/generate",
+            json={"model": "gemma4:e4b", "keep_alive": 0},
+            timeout=5,
+        )
+        logger.info("gemma4 déchargé du GPU (VRAM libérée)")
+    except Exception:
+        pass
+
+
 def _gemma4_read_element(
     img_b64: str,
     window_title: str = "",
@@ -1512,6 +1526,10 @@ def build_replay_from_raw_events(
         "(%d/%d clics avec visual_mode, %d avec screenshot de référence)",
         session_id, len(result), visual_clicks, total_clicks, verified_count,
     )
+
+    # Libérer gemma4 du GPU pour que qwen2.5vl puisse charger au replay
+    _unload_gemma4()
+
     return result