Geniusia_v2/geniusia2/core/utils/image_utils.py

"""
Utilitaires pour la capture d'écran et le traitement d'images
Fournit des fonctions pour capturer l'écran, extraire des ROI et dessiner des bounding boxes
"""

import numpy as np
import cv2
from typing import Tuple, Optional
import platform
import subprocess


def capture_screen() -> np.ndarray:
    """
    Capture l'écran complet et retourne l'image en format numpy array

    Returns:
        Image de l'écran en format BGR (OpenCV standard)

    Raises:
        RuntimeError: Si la capture d'écran échoue
    """
    try:
        # Utiliser différentes méthodes selon le système d'exploitation
        system = platform.system()

        if system == "Linux":
            # Sur Linux, utiliser scrot ou gnome-screenshot
            return _capture_screen_linux()
        elif system == "Windows":
            # Sur Windows, utiliser mss ou pyautogui
            return _capture_screen_windows()
        elif system == "Darwin":  # macOS
            # Sur macOS, utiliser screencapture
            return _capture_screen_macos()
        else:
            raise RuntimeError(f"Système d'exploitation non supporté: {system}")

    except Exception as e:
        raise RuntimeError(f"Échec de la capture d'écran: {str(e)}")


def _capture_screen_linux() -> np.ndarray:
    """
    Capture d'écran spécifique à Linux
    Utilise mss pour une capture rapide
    """
    try:
        import mss
        import mss.tools

        with mss.mss() as sct:
            # Capturer le moniteur principal
            monitor = sct.monitors[1]
            screenshot = sct.grab(monitor)

            # Convertir en numpy array
            img = np.array(screenshot)

            # Convertir BGRA vers BGR
            img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)

            return img
    except ImportError:
        # Fallback: utiliser PIL/Pillow
        return _capture_screen_pil()


def _capture_screen_windows() -> np.ndarray:
    """
    Capture d'écran spécifique à Windows
    """
    try:
        import mss
        import mss.tools

        with mss.mss() as sct:
            monitor = sct.monitors[1]
            screenshot = sct.grab(monitor)
            img = np.array(screenshot)
            img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
            return img
    except ImportError:
        return _capture_screen_pil()


def _capture_screen_macos() -> np.ndarray:
    """
    Capture d'écran spécifique à macOS
    """
    try:
        import mss
        import mss.tools

        with mss.mss() as sct:
            monitor = sct.monitors[1]
            screenshot = sct.grab(monitor)
            img = np.array(screenshot)
            img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
            return img
    except ImportError:
        return _capture_screen_pil()


def _capture_screen_pil() -> np.ndarray:
    """
    Capture d'écran en utilisant PIL/Pillow (fallback)
    """
    try:
        from PIL import ImageGrab

        screenshot = ImageGrab.grab()
        img = np.array(screenshot)

        # Convertir RGB vers BGR (format OpenCV)
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

        return img
    except ImportError:
        raise RuntimeError("Aucune bibliothèque de capture d'écran disponible. "
                         "Installez 'mss' ou 'Pillow'.")


def get_active_window() -> str:
    """
    Obtient le titre de la fenêtre active

    Returns:
        Titre de la fenêtre active, ou chaîne vide si impossible à déterminer
    """
    try:
        system = platform.system()

        if system == "Linux":
            return _get_active_window_linux()
        elif system == "Windows":
            return _get_active_window_windows()
        elif system == "Darwin":  # macOS
            return _get_active_window_macos()
        else:
            return ""

    except Exception as e:
        print(f"Erreur lors de la récupération de la fenêtre active: {e}")
        return ""


def _get_active_window_linux() -> str:
    """
    Obtient la fenêtre active sur Linux avec plusieurs méthodes de fallback
    """
    # Méthode 1: xdotool (le plus fiable)
    try:
        result = subprocess.run(
            ["xdotool", "getactivewindow", "getwindowname"],
            capture_output=True,
            text=True,
            timeout=1,
            check=False
        )
        if result.returncode == 0 and result.stdout.strip():
            return result.stdout.strip()
    except (subprocess.TimeoutExpired, FileNotFoundError):
        pass

    # Méthode 2: xprop avec _NET_ACTIVE_WINDOW
    try:
        # Obtenir l'ID de la fenêtre active
        result = subprocess.run(
            ["xprop", "-root", "_NET_ACTIVE_WINDOW"],
            capture_output=True,
            text=True,
            timeout=1,
            check=False
        )
        if result.returncode == 0:
            # Extraire l'ID de fenêtre (format: "_NET_ACTIVE_WINDOW(WINDOW): window id # 0x...")
            window_id = result.stdout.strip().split()[-1]

            # Obtenir le nom de la fenêtre
            result2 = subprocess.run(
                ["xprop", "-id", window_id, "WM_NAME"],
                capture_output=True,
                text=True,
                timeout=1,
                check=False
            )
            if result2.returncode == 0:
                # Format: WM_NAME(STRING) = "Titre de la fenêtre"
                name = result2.stdout.strip()
                if '=' in name:
                    title = name.split('=', 1)[1].strip().strip('"')
                    if title:
                        return title
    except (subprocess.TimeoutExpired, FileNotFoundError):
        pass

    # Méthode 3: wmctrl
    try:
        result = subprocess.run(
            ["wmctrl", "-l", "-p"],
            capture_output=True,
            text=True,
            timeout=1,
            check=False
        )
        if result.returncode == 0:
            lines = result.stdout.strip().split('\n')
            # Essayer de trouver la fenêtre active (première ligne comme approximation)
            if lines and lines[0]:
                parts = lines[0].split(None, 4)
                if len(parts) >= 5:
                    return parts[4]
    except (subprocess.TimeoutExpired, FileNotFoundError):
        pass

    # Méthode 4: Essayer avec Python Xlib (si disponible)
    try:
        from Xlib import X, display
        from Xlib.error import XError

        d = display.Display()
        root = d.screen().root

        # Obtenir la fenêtre active
        window_id = root.get_full_property(
            d.intern_atom('_NET_ACTIVE_WINDOW'),
            X.AnyPropertyType
        )

        if window_id and window_id.value:
            active_window = d.create_resource_object('window', window_id.value[0])
            window_name = active_window.get_wm_name()
            if window_name:
                return window_name
    except (ImportError, XError, Exception):
        pass

    return "Unknown Window"


def _get_active_window_windows() -> str:
    """
    Obtient la fenêtre active sur Windows
    """
    try:
        import win32gui

        hwnd = win32gui.GetForegroundWindow()
        return win32gui.GetWindowText(hwnd)
    except ImportError:
        # Fallback sans pywin32
        try:
            import ctypes

            hwnd = ctypes.windll.user32.GetForegroundWindow()
            length = ctypes.windll.user32.GetWindowTextLengthW(hwnd)
            buff = ctypes.create_unicode_buffer(length + 1)
            ctypes.windll.user32.GetWindowTextW(hwnd, buff, length + 1)
            return buff.value
        except Exception:
            return ""


def _get_active_window_macos() -> str:
    """
    Obtient la fenêtre active sur macOS
    """
    try:
        script = '''
        tell application "System Events"
            set frontApp to name of first application process whose frontmost is true
            set frontWindow to name of front window of application process frontApp
            return frontApp & " - " & frontWindow
        end tell
        '''
        result = subprocess.run(
            ["osascript", "-e", script],
            capture_output=True,
            text=True,
            timeout=1
        )
        if result.returncode == 0:
            return result.stdout.strip()
    except (subprocess.TimeoutExpired, FileNotFoundError):
        pass

    return ""


def extract_roi(frame: np.ndarray, bbox: Tuple[int, int, int, int]) -> np.ndarray:
    """
    Extrait une région d'intérêt (ROI) d'une image

    Args:
        frame: Image source en format numpy array
        bbox: Bounding box (x, y, width, height) en pixels

    Returns:
        Image de la région d'intérêt

    Raises:
        ValueError: Si la bounding box est invalide
    """
    x, y, w, h = bbox

    # Valider les dimensions
    if w <= 0 or h <= 0:
        raise ValueError(f"Dimensions de bounding box invalides: width={w}, height={h}")

    # Obtenir les dimensions de l'image
    img_height, img_width = frame.shape[:2]

    # Limiter les coordonnées aux dimensions de l'image
    x = max(0, min(x, img_width - 1))
    y = max(0, min(y, img_height - 1))
    x2 = max(0, min(x + w, img_width))
    y2 = max(0, min(y + h, img_height))

    # Extraire la ROI
    roi = frame[y:y2, x:x2]

    # Vérifier que la ROI n'est pas vide
    if roi.size == 0:
        raise ValueError(f"ROI vide avec bbox={bbox}, image_size=({img_width}, {img_height})")

    return roi


def draw_bbox(frame: np.ndarray, bbox: Tuple[int, int, int, int],
              label: str = "", color: Tuple[int, int, int] = (0, 255, 0),
              thickness: int = 2) -> np.ndarray:
    """
    Dessine une bounding box sur une image avec un label optionnel

    Args:
        frame: Image sur laquelle dessiner
        bbox: Bounding box (x, y, width, height) en pixels
        label: Label à afficher au-dessus de la box (optionnel)
        color: Couleur BGR de la box (par défaut: vert)
        thickness: Épaisseur de la ligne en pixels

    Returns:
        Image avec la bounding box dessinée (copie de l'original)
    """
    # Créer une copie pour ne pas modifier l'original
    img = frame.copy()

    x, y, w, h = bbox

    # Dessiner le rectangle
    cv2.rectangle(img, (x, y), (x + w, y + h), color, thickness)

    # Dessiner le label si fourni
    if label:
        # Calculer la taille du texte
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 0.6
        font_thickness = 2
        (text_width, text_height), baseline = cv2.getTextSize(
            label, font, font_scale, font_thickness
        )

        # Dessiner un rectangle de fond pour le texte
        label_y = y - 10 if y - 10 > text_height else y + h + text_height + 10
        cv2.rectangle(
            img,
            (x, label_y - text_height - baseline),
            (x + text_width, label_y + baseline),
            color,
            -1  # Remplir
        )

        # Dessiner le texte
        cv2.putText(
            img,
            label,
            (x, label_y),
            font,
            font_scale,
            (255, 255, 255),  # Blanc
            font_thickness
        )

    return img


def resize_image(image: np.ndarray, max_width: int = 1920,
                max_height: int = 1080) -> np.ndarray:
    """
    Redimensionne une image en conservant le ratio d'aspect

    Args:
        image: Image à redimensionner
        max_width: Largeur maximale
        max_height: Hauteur maximale

    Returns:
        Image redimensionnée
    """
    height, width = image.shape[:2]

    # Calculer le ratio de redimensionnement
    ratio = min(max_width / width, max_height / height)

    # Si l'image est déjà plus petite, ne pas la redimensionner
    if ratio >= 1.0:
        return image

    # Calculer les nouvelles dimensions
    new_width = int(width * ratio)
    new_height = int(height * ratio)

    # Redimensionner
    resized = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)

    return resized


def save_image(image: np.ndarray, filepath: str) -> bool:
    """
    Sauvegarde une image sur le disque

    Args:
        image: Image à sauvegarder
        filepath: Chemin du fichier de destination

    Returns:
        True si la sauvegarde a réussi, False sinon
    """
    try:
        cv2.imwrite(filepath, image)
        return True
    except Exception as e:
        print(f"Erreur lors de la sauvegarde de l'image: {e}")
        return False


def load_image(filepath: str) -> Optional[np.ndarray]:
    """
    Charge une image depuis le disque

    Args:
        filepath: Chemin du fichier image

    Returns:
        Image en format numpy array, ou None si le chargement échoue
    """
    try:
        image = cv2.imread(filepath)
        if image is None:
            print(f"Impossible de charger l'image: {filepath}")
        return image
    except Exception as e:
        print(f"Erreur lors du chargement de l'image: {e}")
        return None


if __name__ == "__main__":
    # Tests basiques des utilitaires d'image
    print("Test des utilitaires d'image RPA Vision V2")
    print("=" * 50)

    # Test 1: Capture d'écran
    print("\n1. Test capture_screen():")
    try:
        screen = capture_screen()
        print(f"   ✓ Capture réussie: {screen.shape} (H x W x C)")
        print(f"   Type: {screen.dtype}")
    except Exception as e:
        print(f"   ✗ Échec: {e}")

    # Test 2: Fenêtre active
    print("\n2. Test get_active_window():")
    window_title = get_active_window()
    if window_title:
        print(f"   ✓ Fenêtre active: '{window_title}'")
    else:
        print(f"   ⚠ Impossible de déterminer la fenêtre active")

    # Test 3: Extraction ROI
    print("\n3. Test extract_roi():")
    try:
        # Créer une image de test
        test_img = np.zeros((480, 640, 3), dtype=np.uint8)
        test_img[100:200, 150:300] = [0, 255, 0]  # Rectangle vert

        # Extraire une ROI
        roi = extract_roi(test_img, (150, 100, 150, 100))
        print(f"   ✓ ROI extraite: {roi.shape}")

        # Test avec bbox invalide (devrait être limité)
        roi2 = extract_roi(test_img, (600, 400, 100, 100))
        print(f"   ✓ ROI avec bbox hors limites: {roi2.shape}")
    except Exception as e:
        print(f"   ✗ Échec: {e}")

    # Test 4: Dessin de bounding box
    print("\n4. Test draw_bbox():")
    try:
        test_img = np.zeros((480, 640, 3), dtype=np.uint8)

        # Dessiner plusieurs bounding boxes
        img_with_bbox = draw_bbox(test_img, (100, 100, 200, 150), "Bouton 1", (0, 255, 0))
        img_with_bbox = draw_bbox(img_with_bbox, (350, 200, 150, 100), "Bouton 2", (255, 0, 0))

        print(f"   ✓ Bounding boxes dessinées: {img_with_bbox.shape}")
    except Exception as e:
        print(f"   ✗ Échec: {e}")

    # Test 5: Redimensionnement
    print("\n5. Test resize_image():")
    try:
        large_img = np.zeros((2160, 3840, 3), dtype=np.uint8)
        resized = resize_image(large_img, max_width=1920, max_height=1080)
        print(f"   ✓ Image redimensionnée: {large_img.shape} -> {resized.shape}")
    except Exception as e:
        print(f"   ✗ Échec: {e}")

    print("\n✓ Tests terminés!")