Files
Geniusia_v2/geniusia2/core/vision_search.py
2026-03-05 00:20:25 +01:00

213 lines
6.4 KiB
Python

"""
Recherche visuelle d'éléments dans l'écran.
Utilise une approche hybride : template matching (rapide) + embeddings (robuste).
"""
import numpy as np
import cv2
from typing import Dict, Any, Optional, Tuple, List
from .embeddings_manager import EmbeddingsManager
from .logger import Logger
class VisionSearch:
"""
Recherche visuelle d'éléments en utilisant template matching et embeddings.
"""
def __init__(
self,
embeddings_manager: EmbeddingsManager,
logger: Optional[Logger] = None
):
"""
Initialise le moteur de recherche visuelle.
Args:
embeddings_manager: Pour les embeddings
logger: Pour la journalisation
"""
self.embeddings = embeddings_manager
self.logger = logger
def find_element(
self,
screenshot: np.ndarray,
target_signature: Dict[str, Any],
confidence_threshold: float = 0.8
) -> Optional[Tuple[int, int, float]]:
"""
Trouve un élément dans l'écran en utilisant sa signature visuelle.
Args:
screenshot: Image de l'écran actuel
target_signature: Signature de l'élément à trouver
confidence_threshold: Seuil de confiance minimum
Returns:
(x, y, confidence) ou None si non trouvé
"""
# 1. Essayer template matching (rapide)
result = self._template_matching(
screenshot,
target_signature.get("region_image"),
confidence_threshold=0.9 # Seuil élevé pour template
)
if result:
return result
# 2. Sinon, recherche par embedding (plus lent mais robuste)
result = self._embedding_search(
screenshot,
target_signature.get("embedding"),
region_size=100,
confidence_threshold=confidence_threshold
)
return result
def _template_matching(
self,
screenshot: np.ndarray,
template: np.ndarray,
confidence_threshold: float = 0.9
) -> Optional[Tuple[int, int, float]]:
"""
Recherche par template matching OpenCV (rapide).
Returns:
(x, y, confidence) ou None
"""
if template is None or template.size == 0:
return None
try:
# Convertir en niveaux de gris
gray_screenshot = cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY)
gray_template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
# Template matching
result = cv2.matchTemplate(
gray_screenshot,
gray_template,
cv2.TM_CCOEFF_NORMED
)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
if max_val >= confidence_threshold:
# Centre du template
h, w = gray_template.shape
x = max_loc[0] + w // 2
y = max_loc[1] + h // 2
return (x, y, float(max_val))
except Exception as e:
if self.logger:
self.logger.log_action({
"action": "template_matching_failed",
"error": str(e)
})
return None
def _embedding_search(
self,
screenshot: np.ndarray,
target_embedding: np.ndarray,
region_size: int = 100,
confidence_threshold: float = 0.8,
step: int = 20
) -> Optional[Tuple[int, int, float]]:
"""
Recherche par similarité d'embeddings (robuste mais lent).
Args:
step: Pas de la fenêtre glissante (20 = rapide, 10 = précis)
Returns:
(x, y, confidence) ou None
"""
if target_embedding is None:
return None
h, w = screenshot.shape[:2]
half_size = region_size // 2
best_position = None
best_similarity = 0.0
# Fenêtre glissante
for y in range(half_size, h - half_size, step):
for x in range(half_size, w - half_size, step):
# Extraire région
x1 = x - half_size
y1 = y - half_size
x2 = x + half_size
y2 = y + half_size
region = screenshot[y1:y2, x1:x2]
# Redimensionner si nécessaire
if region.shape[0] != region_size or region.shape[1] != region_size:
region = cv2.resize(region, (region_size, region_size))
# Calculer embedding
try:
embedding = self.embeddings.encode_image(region)
# Similarité cosinus
similarity = np.dot(embedding, target_embedding) / (
np.linalg.norm(embedding) * np.linalg.norm(target_embedding)
)
if similarity > best_similarity:
best_similarity = similarity
best_position = (x, y)
except Exception:
continue
if best_similarity >= confidence_threshold:
return (*best_position, float(best_similarity))
return None
def find_in_region(
self,
screenshot: np.ndarray,
target_signature: Dict[str, Any],
search_region: Tuple[int, int, int, int],
confidence_threshold: float = 0.8
) -> Optional[Tuple[int, int, float]]:
"""
Recherche dans une région spécifique (optimisation).
Args:
search_region: (x1, y1, x2, y2) région de recherche
Returns:
(x, y, confidence) ou None
"""
x1, y1, x2, y2 = search_region
# Extraire la région de recherche
region_screenshot = screenshot[y1:y2, x1:x2]
# Chercher dans cette région
result = self.find_element(
region_screenshot,
target_signature,
confidence_threshold
)
if result:
# Ajuster les coordonnées
x, y, conf = result
return (x + x1, y + y1, conf)
return None