""" core/grounding/fast_pipeline.py — Pipeline FAST → SMART → THINK Orchestrateur central : détecte les éléments (FAST), matche avec la cible (SMART), et demande au VLM de trancher si le score est trop bas (THINK). Seuils de confiance : ≥ 0.90 → action directe (FAST/SMART) 0.60-0.90 → VLM confirme (THINK) < 0.60 → VLM cherche seul (THINK) L'ancien GroundingPipeline est utilisé en fallback si tout échoue. Utilisation : from core.grounding.fast_pipeline import FastSmartThinkPipeline from core.grounding.target import GroundingTarget pipeline = FastSmartThinkPipeline() result = pipeline.locate(GroundingTarget(text="Valider")) if result: print(f"({result.x}, {result.y}) via {result.method} en {result.time_ms:.0f}ms") """ from __future__ import annotations import time import threading from typing import Optional from core.grounding.target import GroundingTarget, GroundingResult from core.grounding.fast_types import LocateResult from core.grounding.fast_detector import FastDetector from core.grounding.smart_matcher import SmartMatcher from core.grounding.think_arbiter import ThinkArbiter from core.grounding.element_signature import SignatureStore # Singleton _instance: Optional[FastSmartThinkPipeline] = None _instance_lock = threading.Lock() class FastSmartThinkPipeline: """Pipeline FAST → SMART → THINK pour la localisation d'éléments UI. Chaque appel à locate() suit la cascade : 1. FAST : détection RF-DETR + OCR enrichissement (~120ms+1s) 2. SMART : matching texte/type/position/voisins (< 1ms) 3. THINK : VLM arbitre si score insuffisant (~3-5s) 4. Fallback : ancien pipeline si tout échoue """ def __init__( self, confidence_direct: float = 0.90, confidence_think: float = 0.60, enable_think: bool = True, enable_learning: bool = True, ): self.confidence_direct = confidence_direct self.confidence_think = confidence_think self.enable_think = enable_think self.enable_learning = enable_learning self._detector = FastDetector() self._matcher = SmartMatcher() self._arbiter = ThinkArbiter() self._signatures = SignatureStore() self._fallback_pipeline = None @classmethod def get_instance(cls) -> FastSmartThinkPipeline: """Retourne l'instance singleton.""" global _instance if _instance is None: with _instance_lock: if _instance is None: _instance = cls() return _instance def set_fallback_pipeline(self, pipeline) -> None: """Configure l'ancien pipeline comme safety net.""" self._fallback_pipeline = pipeline # ------------------------------------------------------------------ # API principale # ------------------------------------------------------------------ def locate( self, target: GroundingTarget, screenshot_pil=None, phash: str = "", window_title: str = "", ) -> Optional[GroundingResult]: """Localise un élément UI via la cascade FAST → SMART → THINK. Args: target: Ce qu'on cherche (texte, description, bbox d'origine). screenshot_pil: Image PIL. Si None, capture via mss. phash: Hash perceptuel pour le cache. window_title: Titre de la fenêtre active. Returns: GroundingResult compatible avec le pipeline existant, ou None. """ t0 = time.time() # --- FAST : détecter tous les éléments --- snapshot = self._detector.detect( screenshot_pil=screenshot_pil, phash=phash, window_title=window_title, ) if not snapshot.elements: print(f"⚡ [Pipeline] FAST : aucun élément détecté") return self._try_fallback(target) # --- Lookup signature apprise --- target_key = SignatureStore.make_target_key( target.text or "", target.description or "" ) screen_ctx = SignatureStore.make_screen_context( window_title, snapshot.resolution ) signature = self._signatures.lookup(target_key, screen_ctx) # --- SMART : matcher avec la cible --- candidate = self._matcher.match(snapshot, target, signature) if candidate: dt = (time.time() - t0) * 1000 # Score suffisant → action directe if candidate.score >= self.confidence_direct: print(f"✅ [Pipeline] FAST→SMART direct : '{candidate.element.ocr_text}' " f"score={candidate.score:.3f} ({candidate.method}) " f"→ ({candidate.element.center[0]}, {candidate.element.center[1]}) " f"en {dt:.0f}ms") # Apprentissage if self.enable_learning: self._signatures.record_success( target_key, screen_ctx, candidate.element, candidate.score, ) return GroundingResult( x=candidate.element.center[0], y=candidate.element.center[1], method=f"fast_{candidate.method}", confidence=candidate.score, time_ms=dt, ) # Score moyen → demander au VLM de confirmer if candidate.score >= self.confidence_think and self.enable_think: print(f"🤔 [Pipeline] SMART score={candidate.score:.3f} — THINK pour confirmer") think_result = self._arbiter.arbitrate( target, candidates=[candidate], screenshot_pil=screenshot_pil or snapshot.elements[0] if False else screenshot_pil, ) dt = (time.time() - t0) * 1000 if think_result: # VLM a confirmé if self.enable_learning: self._signatures.record_success( target_key, screen_ctx, candidate.element, think_result.confidence, ) return GroundingResult( x=think_result.x, y=think_result.y, method="smart_think_confirmed", confidence=think_result.confidence, time_ms=dt, ) # --- THINK : score trop bas ou pas de candidat → VLM cherche seul --- if self.enable_think: score_info = f"score={candidate.score:.3f}" if candidate else "aucun candidat" print(f"🤔 [Pipeline] {score_info} — THINK recherche complète") think_result = self._arbiter.arbitrate( target, candidates=[], screenshot_pil=screenshot_pil, ) dt = (time.time() - t0) * 1000 if think_result: return GroundingResult( x=think_result.x, y=think_result.y, method="think_vlm", confidence=think_result.confidence, time_ms=dt, ) # --- Fallback : ancien pipeline --- return self._try_fallback(target) # ------------------------------------------------------------------ # Fallback # ------------------------------------------------------------------ def _try_fallback(self, target: GroundingTarget) -> Optional[GroundingResult]: """Tente l'ancien pipeline en dernier recours.""" if self._fallback_pipeline is None: print(f"❌ [Pipeline] Aucune méthode n'a trouvé '{target.text}'") return None print(f"⚠️ [Pipeline] Fallback ancien pipeline pour '{target.text}'") try: return self._fallback_pipeline.locate(target) except Exception as ex: print(f"⚠️ [Pipeline] Fallback échoué: {ex}") return None