Initial commit

2026-03-05 00:20:25 +01:00
commit dcd4de9945
1954 changed files with 669380 additions and 0 deletions
--- a/rpa_vision_v3/core/capture/screen_capturer.py
+++ b/rpa_vision_v3/core/capture/screen_capturer.py
@@ -0,0 +1,182 @@
+"""Screen capture using mss and pyautogui with robust validation"""
+import numpy as np
+from typing import Optional, Dict
+import logging
+import time
+
+logger = logging.getLogger(__name__)
+
+
+class ScreenCapturer:
+    """Captures screenshots efficiently with validation and retry logic"""
+    
+    def __init__(self):
+        """Initialize screen capturer with fallback support"""
+        try:
+            import mss
+            self.sct = mss.mss()
+            self.method = "mss"
+            logger.info("Using mss for screen capture")
+        except ImportError:
+            try:
+                import pyautogui
+                self.pyautogui = pyautogui
+                self.method = "pyautogui"
+                logger.info("Using pyautogui for screen capture")
+            except ImportError:
+                raise ImportError(
+                    "Neither mss nor pyautogui available for screen capture. "
+                    "Install with: pip install mss pyautogui"
+                )
+    
+    def capture(self, max_retries: int = 3) -> Optional[np.ndarray]:
+        """Capture current screen with retry logic and validation
+        
+        Args:
+            max_retries: Maximum number of retry attempts (default: 3)
+            
+        Returns:
+            Screenshot as numpy array (H, W, 3) RGB, or None if all attempts failed
+        """
+        for attempt in range(max_retries):
+            try:
+                # Capture using selected method
+                if self.method == "mss":
+                    img = self._capture_mss()
+                else:
+                    img = self._capture_pyautogui()
+                
+                # Validate captured image
+                if self._validate_image(img):
+                    logger.debug(f"Capture successful: {img.shape}")
+                    return img
+                else:
+                    logger.warning(
+                        f"Invalid image captured (attempt {attempt + 1}/{max_retries})"
+                    )
+                    
+            except Exception as e:
+                logger.error(
+                    f"Capture failed (attempt {attempt + 1}/{max_retries}): {e}"
+                )
+                
+            # Small delay before retry
+            if attempt < max_retries - 1:
+                time.sleep(0.1)
+        
+        logger.error(f"All {max_retries} capture attempts failed")
+        return None
+    
+    def _validate_image(self, img: np.ndarray) -> bool:
+        """Validate captured image with comprehensive checks
+        
+        Args:
+            img: Image to validate
+            
+        Returns:
+            True if image is valid, False otherwise
+        """
+        if img is None:
+            logger.error("Image is None")
+            return False
+        
+        if not isinstance(img, np.ndarray):
+            logger.error(f"Image is not numpy array: {type(img)}")
+            return False
+        
+        if img.size == 0:
+            logger.error("Image is empty (size=0)")
+            return False
+        
+        if len(img.shape) != 3:
+            logger.error(f"Image has wrong dimensions: {img.shape} (expected 3D)")
+            return False
+        
+        if img.shape[0] == 0 or img.shape[1] == 0:
+            logger.error(f"Image has zero width or height: {img.shape}")
+            return False
+        
+        if img.shape[2] != 3:
+            logger.error(f"Image has wrong number of channels: {img.shape[2]} (expected 3)")
+            return False
+        
+        # Check for reasonable dimensions (not too small, not too large)
+        if img.shape[0] < 100 or img.shape[1] < 100:
+            logger.warning(f"Image is very small: {img.shape}")
+            return False
+        
+        if img.shape[0] > 10000 or img.shape[1] > 10000:
+            logger.warning(f"Image is very large: {img.shape}")
+            return False
+        
+        # Check data type
+        if img.dtype != np.uint8:
+            logger.warning(f"Image has unexpected dtype: {img.dtype} (expected uint8)")
+            # Try to convert
+            try:
+                img = img.astype(np.uint8)
+            except:
+                return False
+        
+        return True
+    
+    def _capture_mss(self) -> np.ndarray:
+        """Capture using mss (fast method)"""
+        # Use primary monitor (index 0 is all monitors combined, 1 is primary)
+        # Fallback to all monitors if primary not available
+        monitor_idx = 1 if len(self.sct.monitors) > 1 else 0
+        monitor = self.sct.monitors[monitor_idx]
+        sct_img = self.sct.grab(monitor)
+        
+        # Convert to numpy array
+        img = np.array(sct_img)
+        
+        # Convert BGRA to RGB
+        img = img[:, :, :3][:, :, ::-1]
+        
+        # Basic validation
+        if img.size == 0 or img.shape[0] == 0 or img.shape[1] == 0:
+            raise ValueError("Captured image has invalid dimensions")
+        
+        return img
+    
+    def _capture_pyautogui(self) -> np.ndarray:
+        """Capture using pyautogui (fallback method)"""
+        screenshot = self.pyautogui.screenshot()
+        img = np.array(screenshot)
+        
+        # Basic validation
+        if img.size == 0 or img.shape[0] == 0 or img.shape[1] == 0:
+            raise ValueError("Captured image has invalid dimensions")
+        
+        return img
+    
+    def get_active_window(self) -> Optional[Dict]:
+        """Get active window information
+        
+        Returns:
+            Dict with window title, position, size, or None if unavailable
+        """
+        try:
+            import pygetwindow as gw
+            active = gw.getActiveWindow()
+            if active:
+                return {
+                    'title': active.title,
+                    'x': active.left,
+                    'y': active.top,
+                    'width': active.width,
+                    'height': active.height
+                }
+        except Exception as e:
+            logger.debug(f"Could not get active window: {e}")
+        
+        return None
+    
+    def __del__(self):
+        """Cleanup resources"""
+        if hasattr(self, 'sct'):
+            try:
+                self.sct.close()
+            except:
+                pass