Initial commit

2026-03-05 00:20:25 +01:00
commit dcd4de9945
1954 changed files with 669380 additions and 0 deletions
--- a/geniusia2/core/vision_search.py
+++ b/geniusia2/core/vision_search.py
@@ -0,0 +1,212 @@
+"""
+Recherche visuelle d'éléments dans l'écran.
+Utilise une approche hybride : template matching (rapide) + embeddings (robuste).
+"""
+
+import numpy as np
+import cv2
+from typing import Dict, Any, Optional, Tuple, List
+
+from .embeddings_manager import EmbeddingsManager
+from .logger import Logger
+
+
+class VisionSearch:
+    """
+    Recherche visuelle d'éléments en utilisant template matching et embeddings.
+    """
+    
+    def __init__(
+        self,
+        embeddings_manager: EmbeddingsManager,
+        logger: Optional[Logger] = None
+    ):
+        """
+        Initialise le moteur de recherche visuelle.
+        
+        Args:
+            embeddings_manager: Pour les embeddings
+            logger: Pour la journalisation
+        """
+        self.embeddings = embeddings_manager
+        self.logger = logger
+    
+    def find_element(
+        self,
+        screenshot: np.ndarray,
+        target_signature: Dict[str, Any],
+        confidence_threshold: float = 0.8
+    ) -> Optional[Tuple[int, int, float]]:
+        """
+        Trouve un élément dans l'écran en utilisant sa signature visuelle.
+        
+        Args:
+            screenshot: Image de l'écran actuel
+            target_signature: Signature de l'élément à trouver
+            confidence_threshold: Seuil de confiance minimum
+        
+        Returns:
+            (x, y, confidence) ou None si non trouvé
+        """
+        # 1. Essayer template matching (rapide)
+        result = self._template_matching(
+            screenshot,
+            target_signature.get("region_image"),
+            confidence_threshold=0.9  # Seuil élevé pour template
+        )
+        
+        if result:
+            return result
+        
+        # 2. Sinon, recherche par embedding (plus lent mais robuste)
+        result = self._embedding_search(
+            screenshot,
+            target_signature.get("embedding"),
+            region_size=100,
+            confidence_threshold=confidence_threshold
+        )
+        
+        return result
+    
+    def _template_matching(
+        self,
+        screenshot: np.ndarray,
+        template: np.ndarray,
+        confidence_threshold: float = 0.9
+    ) -> Optional[Tuple[int, int, float]]:
+        """
+        Recherche par template matching OpenCV (rapide).
+        
+        Returns:
+            (x, y, confidence) ou None
+        """
+        if template is None or template.size == 0:
+            return None
+        
+        try:
+            # Convertir en niveaux de gris
+            gray_screenshot = cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY)
+            gray_template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
+            
+            # Template matching
+            result = cv2.matchTemplate(
+                gray_screenshot,
+                gray_template,
+                cv2.TM_CCOEFF_NORMED
+            )
+            
+            min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
+            
+            if max_val >= confidence_threshold:
+                # Centre du template
+                h, w = gray_template.shape
+                x = max_loc[0] + w // 2
+                y = max_loc[1] + h // 2
+                
+                return (x, y, float(max_val))
+        
+        except Exception as e:
+            if self.logger:
+                self.logger.log_action({
+                    "action": "template_matching_failed",
+                    "error": str(e)
+                })
+        
+        return None
+    
+    def _embedding_search(
+        self,
+        screenshot: np.ndarray,
+        target_embedding: np.ndarray,
+        region_size: int = 100,
+        confidence_threshold: float = 0.8,
+        step: int = 20
+    ) -> Optional[Tuple[int, int, float]]:
+        """
+        Recherche par similarité d'embeddings (robuste mais lent).
+        
+        Args:
+            step: Pas de la fenêtre glissante (20 = rapide, 10 = précis)
+        
+        Returns:
+            (x, y, confidence) ou None
+        """
+        if target_embedding is None:
+            return None
+        
+        h, w = screenshot.shape[:2]
+        half_size = region_size // 2
+        
+        best_position = None
+        best_similarity = 0.0
+        
+        # Fenêtre glissante
+        for y in range(half_size, h - half_size, step):
+            for x in range(half_size, w - half_size, step):
+                # Extraire région
+                x1 = x - half_size
+                y1 = y - half_size
+                x2 = x + half_size
+                y2 = y + half_size
+                
+                region = screenshot[y1:y2, x1:x2]
+                
+                # Redimensionner si nécessaire
+                if region.shape[0] != region_size or region.shape[1] != region_size:
+                    region = cv2.resize(region, (region_size, region_size))
+                
+                # Calculer embedding
+                try:
+                    embedding = self.embeddings.encode_image(region)
+                    
+                    # Similarité cosinus
+                    similarity = np.dot(embedding, target_embedding) / (
+                        np.linalg.norm(embedding) * np.linalg.norm(target_embedding)
+                    )
+                    
+                    if similarity > best_similarity:
+                        best_similarity = similarity
+                        best_position = (x, y)
+                
+                except Exception:
+                    continue
+        
+        if best_similarity >= confidence_threshold:
+            return (*best_position, float(best_similarity))
+        
+        return None
+    
+    def find_in_region(
+        self,
+        screenshot: np.ndarray,
+        target_signature: Dict[str, Any],
+        search_region: Tuple[int, int, int, int],
+        confidence_threshold: float = 0.8
+    ) -> Optional[Tuple[int, int, float]]:
+        """
+        Recherche dans une région spécifique (optimisation).
+        
+        Args:
+            search_region: (x1, y1, x2, y2) région de recherche
+        
+        Returns:
+            (x, y, confidence) ou None
+        """
+        x1, y1, x2, y2 = search_region
+        
+        # Extraire la région de recherche
+        region_screenshot = screenshot[y1:y2, x1:x2]
+        
+        # Chercher dans cette région
+        result = self.find_element(
+            region_screenshot,
+            target_signature,
+            confidence_threshold
+        )
+        
+        if result:
+            # Ajuster les coordonnées
+            x, y, conf = result
+            return (x + x1, y + y1, conf)
+        
+        return None