Geniusia_v2/geniusia2/core/vision_search.py

"""
Recherche visuelle d'éléments dans l'écran.
Utilise une approche hybride : template matching (rapide) + embeddings (robuste).
"""

import numpy as np
import cv2
from typing import Dict, Any, Optional, Tuple, List

from .embeddings_manager import EmbeddingsManager
from .logger import Logger


class VisionSearch:
    """
    Recherche visuelle d'éléments en utilisant template matching et embeddings.
    """

    def __init__(
        self,
        embeddings_manager: EmbeddingsManager,
        logger: Optional[Logger] = None
    ):
        """
        Initialise le moteur de recherche visuelle.

        Args:
            embeddings_manager: Pour les embeddings
            logger: Pour la journalisation
        """
        self.embeddings = embeddings_manager
        self.logger = logger

    def find_element(
        self,
        screenshot: np.ndarray,
        target_signature: Dict[str, Any],
        confidence_threshold: float = 0.8
    ) -> Optional[Tuple[int, int, float]]:
        """
        Trouve un élément dans l'écran en utilisant sa signature visuelle.

        Args:
            screenshot: Image de l'écran actuel
            target_signature: Signature de l'élément à trouver
            confidence_threshold: Seuil de confiance minimum

        Returns:
            (x, y, confidence) ou None si non trouvé
        """
        # 1. Essayer template matching (rapide)
        result = self._template_matching(
            screenshot,
            target_signature.get("region_image"),
            confidence_threshold=0.9  # Seuil élevé pour template
        )

        if result:
            return result

        # 2. Sinon, recherche par embedding (plus lent mais robuste)
        result = self._embedding_search(
            screenshot,
            target_signature.get("embedding"),
            region_size=100,
            confidence_threshold=confidence_threshold
        )

        return result

    def _template_matching(
        self,
        screenshot: np.ndarray,
        template: np.ndarray,
        confidence_threshold: float = 0.9
    ) -> Optional[Tuple[int, int, float]]:
        """
        Recherche par template matching OpenCV (rapide).

        Returns:
            (x, y, confidence) ou None
        """
        if template is None or template.size == 0:
            return None

        try:
            # Convertir en niveaux de gris
            gray_screenshot = cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY)
            gray_template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)

            # Template matching
            result = cv2.matchTemplate(
                gray_screenshot,
                gray_template,
                cv2.TM_CCOEFF_NORMED
            )

            min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)

            if max_val >= confidence_threshold:
                # Centre du template
                h, w = gray_template.shape
                x = max_loc[0] + w // 2
                y = max_loc[1] + h // 2

                return (x, y, float(max_val))

        except Exception as e:
            if self.logger:
                self.logger.log_action({
                    "action": "template_matching_failed",
                    "error": str(e)
                })

        return None

    def _embedding_search(
        self,
        screenshot: np.ndarray,
        target_embedding: np.ndarray,
        region_size: int = 100,
        confidence_threshold: float = 0.8,
        step: int = 20
    ) -> Optional[Tuple[int, int, float]]:
        """
        Recherche par similarité d'embeddings (robuste mais lent).

        Args:
            step: Pas de la fenêtre glissante (20 = rapide, 10 = précis)

        Returns:
            (x, y, confidence) ou None
        """
        if target_embedding is None:
            return None

        h, w = screenshot.shape[:2]
        half_size = region_size // 2

        best_position = None
        best_similarity = 0.0

        # Fenêtre glissante
        for y in range(half_size, h - half_size, step):
            for x in range(half_size, w - half_size, step):
                # Extraire région
                x1 = x - half_size
                y1 = y - half_size
                x2 = x + half_size
                y2 = y + half_size

                region = screenshot[y1:y2, x1:x2]

                # Redimensionner si nécessaire
                if region.shape[0] != region_size or region.shape[1] != region_size:
                    region = cv2.resize(region, (region_size, region_size))

                # Calculer embedding
                try:
                    embedding = self.embeddings.encode_image(region)

                    # Similarité cosinus
                    similarity = np.dot(embedding, target_embedding) / (
                        np.linalg.norm(embedding) * np.linalg.norm(target_embedding)
                    )

                    if similarity > best_similarity:
                        best_similarity = similarity
                        best_position = (x, y)

                except Exception:
                    continue

        if best_similarity >= confidence_threshold:
            return (*best_position, float(best_similarity))

        return None

    def find_in_region(
        self,
        screenshot: np.ndarray,
        target_signature: Dict[str, Any],
        search_region: Tuple[int, int, int, int],
        confidence_threshold: float = 0.8
    ) -> Optional[Tuple[int, int, float]]:
        """
        Recherche dans une région spécifique (optimisation).

        Args:
            search_region: (x1, y1, x2, y2) région de recherche

        Returns:
            (x, y, confidence) ou None
        """
        x1, y1, x2, y2 = search_region

        # Extraire la région de recherche
        region_screenshot = screenshot[y1:y2, x1:x2]

        # Chercher dans cette région
        result = self.find_element(
            region_screenshot,
            target_signature,
            confidence_threshold
        )

        if result:
            # Ajuster les coordonnées
            x, y, conf = result
            return (x + x1, y + y1, conf)

        return None