""" core/grounding/think_arbiter.py — Layer THINK : VLM arbitre (InfiGUI via subprocess) Appelé UNIQUEMENT quand le SmartMatcher n'a pas assez confiance. Utilise le subprocess worker InfiGUI (pas de serveur HTTP). Utilisation : from core.grounding.think_arbiter import ThinkArbiter arbiter = ThinkArbiter() result = arbiter.arbitrate(target, candidates, screenshot) """ from __future__ import annotations import time from typing import Any, Dict, List, Optional from core.grounding.fast_types import LocateResult, MatchCandidate from core.grounding.target import GroundingTarget class ThinkArbiter: """Arbitre VLM — appelle InfiGUI via subprocess worker.""" def __init__(self): self._grounder = None def _get_grounder(self): if self._grounder is None: from core.grounding.ui_tars_grounder import UITarsGrounder self._grounder = UITarsGrounder.get_instance() return self._grounder @property def available(self) -> bool: """Toujours disponible — le worker se lance à la demande.""" return True def arbitrate( self, target: GroundingTarget, candidates: List[MatchCandidate], screenshot_pil: Optional[Any] = None, ) -> Optional[LocateResult]: """Demande au VLM de trancher. Si target.template_b64 est fourni, on bascule en mode fusionné : le crop est passé comme image de référence à InfiGUI, ce qui évite une description Ollama qwen2.5vl coûteuse en VRAM. """ t0 = time.time() # Décodage du crop d'ancre si disponible (mode fusionné) anchor_pil = None if target.template_b64: try: import base64 import io from PIL import Image raw_b64 = target.template_b64 if ',' in raw_b64: raw_b64 = raw_b64.split(',', 1)[1] anchor_pil = Image.open(io.BytesIO(base64.b64decode(raw_b64))).convert("RGB") except Exception as ex: print(f"⚠️ [THINK] Décodage anchor échoué: {ex}") anchor_pil = None try: grounder = self._get_grounder() result = grounder.ground( target_text=target.text or "", target_description=target.description or "", screen_pil=screenshot_pil, anchor_pil=anchor_pil, ) dt = (time.time() - t0) * 1000 if result is None: label = target.text or "" print(f"🤔 [THINK] VLM n'a pas trouvé '{label}' ({dt:.0f}ms)") return None method = "think_vlm_fused" if anchor_pil is not None else "think_vlm" locate = LocateResult( x=result.x, y=result.y, confidence=result.confidence, method=method, time_ms=dt, tier="think", candidates_count=len(candidates), ) print(f"🤔 [THINK/{method}] ({result.x}, {result.y}) conf={result.confidence:.2f} ({dt:.0f}ms)") return locate except Exception as ex: dt = (time.time() - t0) * 1000 print(f"⚠️ [THINK] Erreur: {ex} ({dt:.0f}ms)") return None