Initial commit
This commit is contained in:
212
geniusia2/core/vision_search.py
Normal file
212
geniusia2/core/vision_search.py
Normal file
@@ -0,0 +1,212 @@
|
||||
"""
|
||||
Recherche visuelle d'éléments dans l'écran.
|
||||
Utilise une approche hybride : template matching (rapide) + embeddings (robuste).
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import cv2
|
||||
from typing import Dict, Any, Optional, Tuple, List
|
||||
|
||||
from .embeddings_manager import EmbeddingsManager
|
||||
from .logger import Logger
|
||||
|
||||
|
||||
class VisionSearch:
|
||||
"""
|
||||
Recherche visuelle d'éléments en utilisant template matching et embeddings.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
embeddings_manager: EmbeddingsManager,
|
||||
logger: Optional[Logger] = None
|
||||
):
|
||||
"""
|
||||
Initialise le moteur de recherche visuelle.
|
||||
|
||||
Args:
|
||||
embeddings_manager: Pour les embeddings
|
||||
logger: Pour la journalisation
|
||||
"""
|
||||
self.embeddings = embeddings_manager
|
||||
self.logger = logger
|
||||
|
||||
def find_element(
|
||||
self,
|
||||
screenshot: np.ndarray,
|
||||
target_signature: Dict[str, Any],
|
||||
confidence_threshold: float = 0.8
|
||||
) -> Optional[Tuple[int, int, float]]:
|
||||
"""
|
||||
Trouve un élément dans l'écran en utilisant sa signature visuelle.
|
||||
|
||||
Args:
|
||||
screenshot: Image de l'écran actuel
|
||||
target_signature: Signature de l'élément à trouver
|
||||
confidence_threshold: Seuil de confiance minimum
|
||||
|
||||
Returns:
|
||||
(x, y, confidence) ou None si non trouvé
|
||||
"""
|
||||
# 1. Essayer template matching (rapide)
|
||||
result = self._template_matching(
|
||||
screenshot,
|
||||
target_signature.get("region_image"),
|
||||
confidence_threshold=0.9 # Seuil élevé pour template
|
||||
)
|
||||
|
||||
if result:
|
||||
return result
|
||||
|
||||
# 2. Sinon, recherche par embedding (plus lent mais robuste)
|
||||
result = self._embedding_search(
|
||||
screenshot,
|
||||
target_signature.get("embedding"),
|
||||
region_size=100,
|
||||
confidence_threshold=confidence_threshold
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def _template_matching(
|
||||
self,
|
||||
screenshot: np.ndarray,
|
||||
template: np.ndarray,
|
||||
confidence_threshold: float = 0.9
|
||||
) -> Optional[Tuple[int, int, float]]:
|
||||
"""
|
||||
Recherche par template matching OpenCV (rapide).
|
||||
|
||||
Returns:
|
||||
(x, y, confidence) ou None
|
||||
"""
|
||||
if template is None or template.size == 0:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Convertir en niveaux de gris
|
||||
gray_screenshot = cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY)
|
||||
gray_template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# Template matching
|
||||
result = cv2.matchTemplate(
|
||||
gray_screenshot,
|
||||
gray_template,
|
||||
cv2.TM_CCOEFF_NORMED
|
||||
)
|
||||
|
||||
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
|
||||
|
||||
if max_val >= confidence_threshold:
|
||||
# Centre du template
|
||||
h, w = gray_template.shape
|
||||
x = max_loc[0] + w // 2
|
||||
y = max_loc[1] + h // 2
|
||||
|
||||
return (x, y, float(max_val))
|
||||
|
||||
except Exception as e:
|
||||
if self.logger:
|
||||
self.logger.log_action({
|
||||
"action": "template_matching_failed",
|
||||
"error": str(e)
|
||||
})
|
||||
|
||||
return None
|
||||
|
||||
def _embedding_search(
|
||||
self,
|
||||
screenshot: np.ndarray,
|
||||
target_embedding: np.ndarray,
|
||||
region_size: int = 100,
|
||||
confidence_threshold: float = 0.8,
|
||||
step: int = 20
|
||||
) -> Optional[Tuple[int, int, float]]:
|
||||
"""
|
||||
Recherche par similarité d'embeddings (robuste mais lent).
|
||||
|
||||
Args:
|
||||
step: Pas de la fenêtre glissante (20 = rapide, 10 = précis)
|
||||
|
||||
Returns:
|
||||
(x, y, confidence) ou None
|
||||
"""
|
||||
if target_embedding is None:
|
||||
return None
|
||||
|
||||
h, w = screenshot.shape[:2]
|
||||
half_size = region_size // 2
|
||||
|
||||
best_position = None
|
||||
best_similarity = 0.0
|
||||
|
||||
# Fenêtre glissante
|
||||
for y in range(half_size, h - half_size, step):
|
||||
for x in range(half_size, w - half_size, step):
|
||||
# Extraire région
|
||||
x1 = x - half_size
|
||||
y1 = y - half_size
|
||||
x2 = x + half_size
|
||||
y2 = y + half_size
|
||||
|
||||
region = screenshot[y1:y2, x1:x2]
|
||||
|
||||
# Redimensionner si nécessaire
|
||||
if region.shape[0] != region_size or region.shape[1] != region_size:
|
||||
region = cv2.resize(region, (region_size, region_size))
|
||||
|
||||
# Calculer embedding
|
||||
try:
|
||||
embedding = self.embeddings.encode_image(region)
|
||||
|
||||
# Similarité cosinus
|
||||
similarity = np.dot(embedding, target_embedding) / (
|
||||
np.linalg.norm(embedding) * np.linalg.norm(target_embedding)
|
||||
)
|
||||
|
||||
if similarity > best_similarity:
|
||||
best_similarity = similarity
|
||||
best_position = (x, y)
|
||||
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if best_similarity >= confidence_threshold:
|
||||
return (*best_position, float(best_similarity))
|
||||
|
||||
return None
|
||||
|
||||
def find_in_region(
|
||||
self,
|
||||
screenshot: np.ndarray,
|
||||
target_signature: Dict[str, Any],
|
||||
search_region: Tuple[int, int, int, int],
|
||||
confidence_threshold: float = 0.8
|
||||
) -> Optional[Tuple[int, int, float]]:
|
||||
"""
|
||||
Recherche dans une région spécifique (optimisation).
|
||||
|
||||
Args:
|
||||
search_region: (x1, y1, x2, y2) région de recherche
|
||||
|
||||
Returns:
|
||||
(x, y, confidence) ou None
|
||||
"""
|
||||
x1, y1, x2, y2 = search_region
|
||||
|
||||
# Extraire la région de recherche
|
||||
region_screenshot = screenshot[y1:y2, x1:x2]
|
||||
|
||||
# Chercher dans cette région
|
||||
result = self.find_element(
|
||||
region_screenshot,
|
||||
target_signature,
|
||||
confidence_threshold
|
||||
)
|
||||
|
||||
if result:
|
||||
# Ajuster les coordonnées
|
||||
x, y, conf = result
|
||||
return (x + x1, y + y1, conf)
|
||||
|
||||
return None
|
||||
Reference in New Issue
Block a user