521 lines
15 KiB
Python
521 lines
15 KiB
Python
"""
|
|
Utilitaires pour la capture d'écran et le traitement d'images
|
|
Fournit des fonctions pour capturer l'écran, extraire des ROI et dessiner des bounding boxes
|
|
"""
|
|
|
|
import numpy as np
|
|
import cv2
|
|
from typing import Tuple, Optional
|
|
import platform
|
|
import subprocess
|
|
|
|
|
|
def capture_screen() -> np.ndarray:
|
|
"""
|
|
Capture l'écran complet et retourne l'image en format numpy array
|
|
|
|
Returns:
|
|
Image de l'écran en format BGR (OpenCV standard)
|
|
|
|
Raises:
|
|
RuntimeError: Si la capture d'écran échoue
|
|
"""
|
|
try:
|
|
# Utiliser différentes méthodes selon le système d'exploitation
|
|
system = platform.system()
|
|
|
|
if system == "Linux":
|
|
# Sur Linux, utiliser scrot ou gnome-screenshot
|
|
return _capture_screen_linux()
|
|
elif system == "Windows":
|
|
# Sur Windows, utiliser mss ou pyautogui
|
|
return _capture_screen_windows()
|
|
elif system == "Darwin": # macOS
|
|
# Sur macOS, utiliser screencapture
|
|
return _capture_screen_macos()
|
|
else:
|
|
raise RuntimeError(f"Système d'exploitation non supporté: {system}")
|
|
|
|
except Exception as e:
|
|
raise RuntimeError(f"Échec de la capture d'écran: {str(e)}")
|
|
|
|
|
|
def _capture_screen_linux() -> np.ndarray:
|
|
"""
|
|
Capture d'écran spécifique à Linux
|
|
Utilise mss pour une capture rapide
|
|
"""
|
|
try:
|
|
import mss
|
|
import mss.tools
|
|
|
|
with mss.mss() as sct:
|
|
# Capturer le moniteur principal
|
|
monitor = sct.monitors[1]
|
|
screenshot = sct.grab(monitor)
|
|
|
|
# Convertir en numpy array
|
|
img = np.array(screenshot)
|
|
|
|
# Convertir BGRA vers BGR
|
|
img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
|
|
|
|
return img
|
|
except ImportError:
|
|
# Fallback: utiliser PIL/Pillow
|
|
return _capture_screen_pil()
|
|
|
|
|
|
def _capture_screen_windows() -> np.ndarray:
|
|
"""
|
|
Capture d'écran spécifique à Windows
|
|
"""
|
|
try:
|
|
import mss
|
|
import mss.tools
|
|
|
|
with mss.mss() as sct:
|
|
monitor = sct.monitors[1]
|
|
screenshot = sct.grab(monitor)
|
|
img = np.array(screenshot)
|
|
img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
|
|
return img
|
|
except ImportError:
|
|
return _capture_screen_pil()
|
|
|
|
|
|
def _capture_screen_macos() -> np.ndarray:
|
|
"""
|
|
Capture d'écran spécifique à macOS
|
|
"""
|
|
try:
|
|
import mss
|
|
import mss.tools
|
|
|
|
with mss.mss() as sct:
|
|
monitor = sct.monitors[1]
|
|
screenshot = sct.grab(monitor)
|
|
img = np.array(screenshot)
|
|
img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
|
|
return img
|
|
except ImportError:
|
|
return _capture_screen_pil()
|
|
|
|
|
|
def _capture_screen_pil() -> np.ndarray:
|
|
"""
|
|
Capture d'écran en utilisant PIL/Pillow (fallback)
|
|
"""
|
|
try:
|
|
from PIL import ImageGrab
|
|
|
|
screenshot = ImageGrab.grab()
|
|
img = np.array(screenshot)
|
|
|
|
# Convertir RGB vers BGR (format OpenCV)
|
|
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
|
|
|
return img
|
|
except ImportError:
|
|
raise RuntimeError("Aucune bibliothèque de capture d'écran disponible. "
|
|
"Installez 'mss' ou 'Pillow'.")
|
|
|
|
|
|
def get_active_window() -> str:
|
|
"""
|
|
Obtient le titre de la fenêtre active
|
|
|
|
Returns:
|
|
Titre de la fenêtre active, ou chaîne vide si impossible à déterminer
|
|
"""
|
|
try:
|
|
system = platform.system()
|
|
|
|
if system == "Linux":
|
|
return _get_active_window_linux()
|
|
elif system == "Windows":
|
|
return _get_active_window_windows()
|
|
elif system == "Darwin": # macOS
|
|
return _get_active_window_macos()
|
|
else:
|
|
return ""
|
|
|
|
except Exception as e:
|
|
print(f"Erreur lors de la récupération de la fenêtre active: {e}")
|
|
return ""
|
|
|
|
|
|
def _get_active_window_linux() -> str:
|
|
"""
|
|
Obtient la fenêtre active sur Linux avec plusieurs méthodes de fallback
|
|
"""
|
|
# Méthode 1: xdotool (le plus fiable)
|
|
try:
|
|
result = subprocess.run(
|
|
["xdotool", "getactivewindow", "getwindowname"],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=1,
|
|
check=False
|
|
)
|
|
if result.returncode == 0 and result.stdout.strip():
|
|
return result.stdout.strip()
|
|
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
pass
|
|
|
|
# Méthode 2: xprop avec _NET_ACTIVE_WINDOW
|
|
try:
|
|
# Obtenir l'ID de la fenêtre active
|
|
result = subprocess.run(
|
|
["xprop", "-root", "_NET_ACTIVE_WINDOW"],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=1,
|
|
check=False
|
|
)
|
|
if result.returncode == 0:
|
|
# Extraire l'ID de fenêtre (format: "_NET_ACTIVE_WINDOW(WINDOW): window id # 0x...")
|
|
window_id = result.stdout.strip().split()[-1]
|
|
|
|
# Obtenir le nom de la fenêtre
|
|
result2 = subprocess.run(
|
|
["xprop", "-id", window_id, "WM_NAME"],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=1,
|
|
check=False
|
|
)
|
|
if result2.returncode == 0:
|
|
# Format: WM_NAME(STRING) = "Titre de la fenêtre"
|
|
name = result2.stdout.strip()
|
|
if '=' in name:
|
|
title = name.split('=', 1)[1].strip().strip('"')
|
|
if title:
|
|
return title
|
|
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
pass
|
|
|
|
# Méthode 3: wmctrl
|
|
try:
|
|
result = subprocess.run(
|
|
["wmctrl", "-l", "-p"],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=1,
|
|
check=False
|
|
)
|
|
if result.returncode == 0:
|
|
lines = result.stdout.strip().split('\n')
|
|
# Essayer de trouver la fenêtre active (première ligne comme approximation)
|
|
if lines and lines[0]:
|
|
parts = lines[0].split(None, 4)
|
|
if len(parts) >= 5:
|
|
return parts[4]
|
|
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
pass
|
|
|
|
# Méthode 4: Essayer avec Python Xlib (si disponible)
|
|
try:
|
|
from Xlib import X, display
|
|
from Xlib.error import XError
|
|
|
|
d = display.Display()
|
|
root = d.screen().root
|
|
|
|
# Obtenir la fenêtre active
|
|
window_id = root.get_full_property(
|
|
d.intern_atom('_NET_ACTIVE_WINDOW'),
|
|
X.AnyPropertyType
|
|
)
|
|
|
|
if window_id and window_id.value:
|
|
active_window = d.create_resource_object('window', window_id.value[0])
|
|
window_name = active_window.get_wm_name()
|
|
if window_name:
|
|
return window_name
|
|
except (ImportError, XError, Exception):
|
|
pass
|
|
|
|
return "Unknown Window"
|
|
|
|
|
|
def _get_active_window_windows() -> str:
|
|
"""
|
|
Obtient la fenêtre active sur Windows
|
|
"""
|
|
try:
|
|
import win32gui
|
|
|
|
hwnd = win32gui.GetForegroundWindow()
|
|
return win32gui.GetWindowText(hwnd)
|
|
except ImportError:
|
|
# Fallback sans pywin32
|
|
try:
|
|
import ctypes
|
|
|
|
hwnd = ctypes.windll.user32.GetForegroundWindow()
|
|
length = ctypes.windll.user32.GetWindowTextLengthW(hwnd)
|
|
buff = ctypes.create_unicode_buffer(length + 1)
|
|
ctypes.windll.user32.GetWindowTextW(hwnd, buff, length + 1)
|
|
return buff.value
|
|
except Exception:
|
|
return ""
|
|
|
|
|
|
def _get_active_window_macos() -> str:
|
|
"""
|
|
Obtient la fenêtre active sur macOS
|
|
"""
|
|
try:
|
|
script = '''
|
|
tell application "System Events"
|
|
set frontApp to name of first application process whose frontmost is true
|
|
set frontWindow to name of front window of application process frontApp
|
|
return frontApp & " - " & frontWindow
|
|
end tell
|
|
'''
|
|
result = subprocess.run(
|
|
["osascript", "-e", script],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=1
|
|
)
|
|
if result.returncode == 0:
|
|
return result.stdout.strip()
|
|
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
pass
|
|
|
|
return ""
|
|
|
|
|
|
def extract_roi(frame: np.ndarray, bbox: Tuple[int, int, int, int]) -> np.ndarray:
|
|
"""
|
|
Extrait une région d'intérêt (ROI) d'une image
|
|
|
|
Args:
|
|
frame: Image source en format numpy array
|
|
bbox: Bounding box (x, y, width, height) en pixels
|
|
|
|
Returns:
|
|
Image de la région d'intérêt
|
|
|
|
Raises:
|
|
ValueError: Si la bounding box est invalide
|
|
"""
|
|
x, y, w, h = bbox
|
|
|
|
# Valider les dimensions
|
|
if w <= 0 or h <= 0:
|
|
raise ValueError(f"Dimensions de bounding box invalides: width={w}, height={h}")
|
|
|
|
# Obtenir les dimensions de l'image
|
|
img_height, img_width = frame.shape[:2]
|
|
|
|
# Limiter les coordonnées aux dimensions de l'image
|
|
x = max(0, min(x, img_width - 1))
|
|
y = max(0, min(y, img_height - 1))
|
|
x2 = max(0, min(x + w, img_width))
|
|
y2 = max(0, min(y + h, img_height))
|
|
|
|
# Extraire la ROI
|
|
roi = frame[y:y2, x:x2]
|
|
|
|
# Vérifier que la ROI n'est pas vide
|
|
if roi.size == 0:
|
|
raise ValueError(f"ROI vide avec bbox={bbox}, image_size=({img_width}, {img_height})")
|
|
|
|
return roi
|
|
|
|
|
|
def draw_bbox(frame: np.ndarray, bbox: Tuple[int, int, int, int],
|
|
label: str = "", color: Tuple[int, int, int] = (0, 255, 0),
|
|
thickness: int = 2) -> np.ndarray:
|
|
"""
|
|
Dessine une bounding box sur une image avec un label optionnel
|
|
|
|
Args:
|
|
frame: Image sur laquelle dessiner
|
|
bbox: Bounding box (x, y, width, height) en pixels
|
|
label: Label à afficher au-dessus de la box (optionnel)
|
|
color: Couleur BGR de la box (par défaut: vert)
|
|
thickness: Épaisseur de la ligne en pixels
|
|
|
|
Returns:
|
|
Image avec la bounding box dessinée (copie de l'original)
|
|
"""
|
|
# Créer une copie pour ne pas modifier l'original
|
|
img = frame.copy()
|
|
|
|
x, y, w, h = bbox
|
|
|
|
# Dessiner le rectangle
|
|
cv2.rectangle(img, (x, y), (x + w, y + h), color, thickness)
|
|
|
|
# Dessiner le label si fourni
|
|
if label:
|
|
# Calculer la taille du texte
|
|
font = cv2.FONT_HERSHEY_SIMPLEX
|
|
font_scale = 0.6
|
|
font_thickness = 2
|
|
(text_width, text_height), baseline = cv2.getTextSize(
|
|
label, font, font_scale, font_thickness
|
|
)
|
|
|
|
# Dessiner un rectangle de fond pour le texte
|
|
label_y = y - 10 if y - 10 > text_height else y + h + text_height + 10
|
|
cv2.rectangle(
|
|
img,
|
|
(x, label_y - text_height - baseline),
|
|
(x + text_width, label_y + baseline),
|
|
color,
|
|
-1 # Remplir
|
|
)
|
|
|
|
# Dessiner le texte
|
|
cv2.putText(
|
|
img,
|
|
label,
|
|
(x, label_y),
|
|
font,
|
|
font_scale,
|
|
(255, 255, 255), # Blanc
|
|
font_thickness
|
|
)
|
|
|
|
return img
|
|
|
|
|
|
def resize_image(image: np.ndarray, max_width: int = 1920,
|
|
max_height: int = 1080) -> np.ndarray:
|
|
"""
|
|
Redimensionne une image en conservant le ratio d'aspect
|
|
|
|
Args:
|
|
image: Image à redimensionner
|
|
max_width: Largeur maximale
|
|
max_height: Hauteur maximale
|
|
|
|
Returns:
|
|
Image redimensionnée
|
|
"""
|
|
height, width = image.shape[:2]
|
|
|
|
# Calculer le ratio de redimensionnement
|
|
ratio = min(max_width / width, max_height / height)
|
|
|
|
# Si l'image est déjà plus petite, ne pas la redimensionner
|
|
if ratio >= 1.0:
|
|
return image
|
|
|
|
# Calculer les nouvelles dimensions
|
|
new_width = int(width * ratio)
|
|
new_height = int(height * ratio)
|
|
|
|
# Redimensionner
|
|
resized = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
|
|
|
|
return resized
|
|
|
|
|
|
def save_image(image: np.ndarray, filepath: str) -> bool:
|
|
"""
|
|
Sauvegarde une image sur le disque
|
|
|
|
Args:
|
|
image: Image à sauvegarder
|
|
filepath: Chemin du fichier de destination
|
|
|
|
Returns:
|
|
True si la sauvegarde a réussi, False sinon
|
|
"""
|
|
try:
|
|
cv2.imwrite(filepath, image)
|
|
return True
|
|
except Exception as e:
|
|
print(f"Erreur lors de la sauvegarde de l'image: {e}")
|
|
return False
|
|
|
|
|
|
def load_image(filepath: str) -> Optional[np.ndarray]:
|
|
"""
|
|
Charge une image depuis le disque
|
|
|
|
Args:
|
|
filepath: Chemin du fichier image
|
|
|
|
Returns:
|
|
Image en format numpy array, ou None si le chargement échoue
|
|
"""
|
|
try:
|
|
image = cv2.imread(filepath)
|
|
if image is None:
|
|
print(f"Impossible de charger l'image: {filepath}")
|
|
return image
|
|
except Exception as e:
|
|
print(f"Erreur lors du chargement de l'image: {e}")
|
|
return None
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Tests basiques des utilitaires d'image
|
|
print("Test des utilitaires d'image RPA Vision V2")
|
|
print("=" * 50)
|
|
|
|
# Test 1: Capture d'écran
|
|
print("\n1. Test capture_screen():")
|
|
try:
|
|
screen = capture_screen()
|
|
print(f" ✓ Capture réussie: {screen.shape} (H x W x C)")
|
|
print(f" Type: {screen.dtype}")
|
|
except Exception as e:
|
|
print(f" ✗ Échec: {e}")
|
|
|
|
# Test 2: Fenêtre active
|
|
print("\n2. Test get_active_window():")
|
|
window_title = get_active_window()
|
|
if window_title:
|
|
print(f" ✓ Fenêtre active: '{window_title}'")
|
|
else:
|
|
print(f" ⚠ Impossible de déterminer la fenêtre active")
|
|
|
|
# Test 3: Extraction ROI
|
|
print("\n3. Test extract_roi():")
|
|
try:
|
|
# Créer une image de test
|
|
test_img = np.zeros((480, 640, 3), dtype=np.uint8)
|
|
test_img[100:200, 150:300] = [0, 255, 0] # Rectangle vert
|
|
|
|
# Extraire une ROI
|
|
roi = extract_roi(test_img, (150, 100, 150, 100))
|
|
print(f" ✓ ROI extraite: {roi.shape}")
|
|
|
|
# Test avec bbox invalide (devrait être limité)
|
|
roi2 = extract_roi(test_img, (600, 400, 100, 100))
|
|
print(f" ✓ ROI avec bbox hors limites: {roi2.shape}")
|
|
except Exception as e:
|
|
print(f" ✗ Échec: {e}")
|
|
|
|
# Test 4: Dessin de bounding box
|
|
print("\n4. Test draw_bbox():")
|
|
try:
|
|
test_img = np.zeros((480, 640, 3), dtype=np.uint8)
|
|
|
|
# Dessiner plusieurs bounding boxes
|
|
img_with_bbox = draw_bbox(test_img, (100, 100, 200, 150), "Bouton 1", (0, 255, 0))
|
|
img_with_bbox = draw_bbox(img_with_bbox, (350, 200, 150, 100), "Bouton 2", (255, 0, 0))
|
|
|
|
print(f" ✓ Bounding boxes dessinées: {img_with_bbox.shape}")
|
|
except Exception as e:
|
|
print(f" ✗ Échec: {e}")
|
|
|
|
# Test 5: Redimensionnement
|
|
print("\n5. Test resize_image():")
|
|
try:
|
|
large_img = np.zeros((2160, 3840, 3), dtype=np.uint8)
|
|
resized = resize_image(large_img, max_width=1920, max_height=1080)
|
|
print(f" ✓ Image redimensionnée: {large_img.shape} -> {resized.shape}")
|
|
except Exception as e:
|
|
print(f" ✗ Échec: {e}")
|
|
|
|
print("\n✓ Tests terminés!")
|