""" Recherche visuelle d'éléments dans l'écran. Utilise une approche hybride : template matching (rapide) + embeddings (robuste). """ import numpy as np import cv2 from typing import Dict, Any, Optional, Tuple, List from .embeddings_manager import EmbeddingsManager from .logger import Logger class VisionSearch: """ Recherche visuelle d'éléments en utilisant template matching et embeddings. """ def __init__( self, embeddings_manager: EmbeddingsManager, logger: Optional[Logger] = None ): """ Initialise le moteur de recherche visuelle. Args: embeddings_manager: Pour les embeddings logger: Pour la journalisation """ self.embeddings = embeddings_manager self.logger = logger def find_element( self, screenshot: np.ndarray, target_signature: Dict[str, Any], confidence_threshold: float = 0.8 ) -> Optional[Tuple[int, int, float]]: """ Trouve un élément dans l'écran en utilisant sa signature visuelle. Args: screenshot: Image de l'écran actuel target_signature: Signature de l'élément à trouver confidence_threshold: Seuil de confiance minimum Returns: (x, y, confidence) ou None si non trouvé """ # 1. Essayer template matching (rapide) result = self._template_matching( screenshot, target_signature.get("region_image"), confidence_threshold=0.9 # Seuil élevé pour template ) if result: return result # 2. Sinon, recherche par embedding (plus lent mais robuste) result = self._embedding_search( screenshot, target_signature.get("embedding"), region_size=100, confidence_threshold=confidence_threshold ) return result def _template_matching( self, screenshot: np.ndarray, template: np.ndarray, confidence_threshold: float = 0.9 ) -> Optional[Tuple[int, int, float]]: """ Recherche par template matching OpenCV (rapide). Returns: (x, y, confidence) ou None """ if template is None or template.size == 0: return None try: # Convertir en niveaux de gris gray_screenshot = cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY) gray_template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY) # Template matching result = cv2.matchTemplate( gray_screenshot, gray_template, cv2.TM_CCOEFF_NORMED ) min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result) if max_val >= confidence_threshold: # Centre du template h, w = gray_template.shape x = max_loc[0] + w // 2 y = max_loc[1] + h // 2 return (x, y, float(max_val)) except Exception as e: if self.logger: self.logger.log_action({ "action": "template_matching_failed", "error": str(e) }) return None def _embedding_search( self, screenshot: np.ndarray, target_embedding: np.ndarray, region_size: int = 100, confidence_threshold: float = 0.8, step: int = 20 ) -> Optional[Tuple[int, int, float]]: """ Recherche par similarité d'embeddings (robuste mais lent). Args: step: Pas de la fenêtre glissante (20 = rapide, 10 = précis) Returns: (x, y, confidence) ou None """ if target_embedding is None: return None h, w = screenshot.shape[:2] half_size = region_size // 2 best_position = None best_similarity = 0.0 # Fenêtre glissante for y in range(half_size, h - half_size, step): for x in range(half_size, w - half_size, step): # Extraire région x1 = x - half_size y1 = y - half_size x2 = x + half_size y2 = y + half_size region = screenshot[y1:y2, x1:x2] # Redimensionner si nécessaire if region.shape[0] != region_size or region.shape[1] != region_size: region = cv2.resize(region, (region_size, region_size)) # Calculer embedding try: embedding = self.embeddings.encode_image(region) # Similarité cosinus similarity = np.dot(embedding, target_embedding) / ( np.linalg.norm(embedding) * np.linalg.norm(target_embedding) ) if similarity > best_similarity: best_similarity = similarity best_position = (x, y) except Exception: continue if best_similarity >= confidence_threshold: return (*best_position, float(best_similarity)) return None def find_in_region( self, screenshot: np.ndarray, target_signature: Dict[str, Any], search_region: Tuple[int, int, int, int], confidence_threshold: float = 0.8 ) -> Optional[Tuple[int, int, float]]: """ Recherche dans une région spécifique (optimisation). Args: search_region: (x1, y1, x2, y2) région de recherche Returns: (x, y, confidence) ou None """ x1, y1, x2, y2 = search_region # Extraire la région de recherche region_screenshot = screenshot[y1:y2, x1:x2] # Chercher dans cette région result = self.find_element( region_screenshot, target_signature, confidence_threshold ) if result: # Ajuster les coordonnées x, y, conf = result return (x + x1, y + y1, conf) return None