213 lines
6.4 KiB
Python
213 lines
6.4 KiB
Python
"""
|
|
Recherche visuelle d'éléments dans l'écran.
|
|
Utilise une approche hybride : template matching (rapide) + embeddings (robuste).
|
|
"""
|
|
|
|
import numpy as np
|
|
import cv2
|
|
from typing import Dict, Any, Optional, Tuple, List
|
|
|
|
from .embeddings_manager import EmbeddingsManager
|
|
from .logger import Logger
|
|
|
|
|
|
class VisionSearch:
|
|
"""
|
|
Recherche visuelle d'éléments en utilisant template matching et embeddings.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
embeddings_manager: EmbeddingsManager,
|
|
logger: Optional[Logger] = None
|
|
):
|
|
"""
|
|
Initialise le moteur de recherche visuelle.
|
|
|
|
Args:
|
|
embeddings_manager: Pour les embeddings
|
|
logger: Pour la journalisation
|
|
"""
|
|
self.embeddings = embeddings_manager
|
|
self.logger = logger
|
|
|
|
def find_element(
|
|
self,
|
|
screenshot: np.ndarray,
|
|
target_signature: Dict[str, Any],
|
|
confidence_threshold: float = 0.8
|
|
) -> Optional[Tuple[int, int, float]]:
|
|
"""
|
|
Trouve un élément dans l'écran en utilisant sa signature visuelle.
|
|
|
|
Args:
|
|
screenshot: Image de l'écran actuel
|
|
target_signature: Signature de l'élément à trouver
|
|
confidence_threshold: Seuil de confiance minimum
|
|
|
|
Returns:
|
|
(x, y, confidence) ou None si non trouvé
|
|
"""
|
|
# 1. Essayer template matching (rapide)
|
|
result = self._template_matching(
|
|
screenshot,
|
|
target_signature.get("region_image"),
|
|
confidence_threshold=0.9 # Seuil élevé pour template
|
|
)
|
|
|
|
if result:
|
|
return result
|
|
|
|
# 2. Sinon, recherche par embedding (plus lent mais robuste)
|
|
result = self._embedding_search(
|
|
screenshot,
|
|
target_signature.get("embedding"),
|
|
region_size=100,
|
|
confidence_threshold=confidence_threshold
|
|
)
|
|
|
|
return result
|
|
|
|
def _template_matching(
|
|
self,
|
|
screenshot: np.ndarray,
|
|
template: np.ndarray,
|
|
confidence_threshold: float = 0.9
|
|
) -> Optional[Tuple[int, int, float]]:
|
|
"""
|
|
Recherche par template matching OpenCV (rapide).
|
|
|
|
Returns:
|
|
(x, y, confidence) ou None
|
|
"""
|
|
if template is None or template.size == 0:
|
|
return None
|
|
|
|
try:
|
|
# Convertir en niveaux de gris
|
|
gray_screenshot = cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY)
|
|
gray_template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
|
|
|
|
# Template matching
|
|
result = cv2.matchTemplate(
|
|
gray_screenshot,
|
|
gray_template,
|
|
cv2.TM_CCOEFF_NORMED
|
|
)
|
|
|
|
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
|
|
|
|
if max_val >= confidence_threshold:
|
|
# Centre du template
|
|
h, w = gray_template.shape
|
|
x = max_loc[0] + w // 2
|
|
y = max_loc[1] + h // 2
|
|
|
|
return (x, y, float(max_val))
|
|
|
|
except Exception as e:
|
|
if self.logger:
|
|
self.logger.log_action({
|
|
"action": "template_matching_failed",
|
|
"error": str(e)
|
|
})
|
|
|
|
return None
|
|
|
|
def _embedding_search(
|
|
self,
|
|
screenshot: np.ndarray,
|
|
target_embedding: np.ndarray,
|
|
region_size: int = 100,
|
|
confidence_threshold: float = 0.8,
|
|
step: int = 20
|
|
) -> Optional[Tuple[int, int, float]]:
|
|
"""
|
|
Recherche par similarité d'embeddings (robuste mais lent).
|
|
|
|
Args:
|
|
step: Pas de la fenêtre glissante (20 = rapide, 10 = précis)
|
|
|
|
Returns:
|
|
(x, y, confidence) ou None
|
|
"""
|
|
if target_embedding is None:
|
|
return None
|
|
|
|
h, w = screenshot.shape[:2]
|
|
half_size = region_size // 2
|
|
|
|
best_position = None
|
|
best_similarity = 0.0
|
|
|
|
# Fenêtre glissante
|
|
for y in range(half_size, h - half_size, step):
|
|
for x in range(half_size, w - half_size, step):
|
|
# Extraire région
|
|
x1 = x - half_size
|
|
y1 = y - half_size
|
|
x2 = x + half_size
|
|
y2 = y + half_size
|
|
|
|
region = screenshot[y1:y2, x1:x2]
|
|
|
|
# Redimensionner si nécessaire
|
|
if region.shape[0] != region_size or region.shape[1] != region_size:
|
|
region = cv2.resize(region, (region_size, region_size))
|
|
|
|
# Calculer embedding
|
|
try:
|
|
embedding = self.embeddings.encode_image(region)
|
|
|
|
# Similarité cosinus
|
|
similarity = np.dot(embedding, target_embedding) / (
|
|
np.linalg.norm(embedding) * np.linalg.norm(target_embedding)
|
|
)
|
|
|
|
if similarity > best_similarity:
|
|
best_similarity = similarity
|
|
best_position = (x, y)
|
|
|
|
except Exception:
|
|
continue
|
|
|
|
if best_similarity >= confidence_threshold:
|
|
return (*best_position, float(best_similarity))
|
|
|
|
return None
|
|
|
|
def find_in_region(
|
|
self,
|
|
screenshot: np.ndarray,
|
|
target_signature: Dict[str, Any],
|
|
search_region: Tuple[int, int, int, int],
|
|
confidence_threshold: float = 0.8
|
|
) -> Optional[Tuple[int, int, float]]:
|
|
"""
|
|
Recherche dans une région spécifique (optimisation).
|
|
|
|
Args:
|
|
search_region: (x1, y1, x2, y2) région de recherche
|
|
|
|
Returns:
|
|
(x, y, confidence) ou None
|
|
"""
|
|
x1, y1, x2, y2 = search_region
|
|
|
|
# Extraire la région de recherche
|
|
region_screenshot = screenshot[y1:y2, x1:x2]
|
|
|
|
# Chercher dans cette région
|
|
result = self.find_element(
|
|
region_screenshot,
|
|
target_signature,
|
|
confidence_threshold
|
|
)
|
|
|
|
if result:
|
|
# Ajuster les coordonnées
|
|
x, y, conf = result
|
|
return (x + x1, y + y1, conf)
|
|
|
|
return None
|