Initial commit
This commit is contained in:
182
rpa_vision_v3/core/capture/screen_capturer.py
Normal file
182
rpa_vision_v3/core/capture/screen_capturer.py
Normal file
@@ -0,0 +1,182 @@
|
||||
"""Screen capture using mss and pyautogui with robust validation"""
|
||||
import numpy as np
|
||||
from typing import Optional, Dict
|
||||
import logging
|
||||
import time
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ScreenCapturer:
|
||||
"""Captures screenshots efficiently with validation and retry logic"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize screen capturer with fallback support"""
|
||||
try:
|
||||
import mss
|
||||
self.sct = mss.mss()
|
||||
self.method = "mss"
|
||||
logger.info("Using mss for screen capture")
|
||||
except ImportError:
|
||||
try:
|
||||
import pyautogui
|
||||
self.pyautogui = pyautogui
|
||||
self.method = "pyautogui"
|
||||
logger.info("Using pyautogui for screen capture")
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Neither mss nor pyautogui available for screen capture. "
|
||||
"Install with: pip install mss pyautogui"
|
||||
)
|
||||
|
||||
def capture(self, max_retries: int = 3) -> Optional[np.ndarray]:
|
||||
"""Capture current screen with retry logic and validation
|
||||
|
||||
Args:
|
||||
max_retries: Maximum number of retry attempts (default: 3)
|
||||
|
||||
Returns:
|
||||
Screenshot as numpy array (H, W, 3) RGB, or None if all attempts failed
|
||||
"""
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
# Capture using selected method
|
||||
if self.method == "mss":
|
||||
img = self._capture_mss()
|
||||
else:
|
||||
img = self._capture_pyautogui()
|
||||
|
||||
# Validate captured image
|
||||
if self._validate_image(img):
|
||||
logger.debug(f"Capture successful: {img.shape}")
|
||||
return img
|
||||
else:
|
||||
logger.warning(
|
||||
f"Invalid image captured (attempt {attempt + 1}/{max_retries})"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Capture failed (attempt {attempt + 1}/{max_retries}): {e}"
|
||||
)
|
||||
|
||||
# Small delay before retry
|
||||
if attempt < max_retries - 1:
|
||||
time.sleep(0.1)
|
||||
|
||||
logger.error(f"All {max_retries} capture attempts failed")
|
||||
return None
|
||||
|
||||
def _validate_image(self, img: np.ndarray) -> bool:
|
||||
"""Validate captured image with comprehensive checks
|
||||
|
||||
Args:
|
||||
img: Image to validate
|
||||
|
||||
Returns:
|
||||
True if image is valid, False otherwise
|
||||
"""
|
||||
if img is None:
|
||||
logger.error("Image is None")
|
||||
return False
|
||||
|
||||
if not isinstance(img, np.ndarray):
|
||||
logger.error(f"Image is not numpy array: {type(img)}")
|
||||
return False
|
||||
|
||||
if img.size == 0:
|
||||
logger.error("Image is empty (size=0)")
|
||||
return False
|
||||
|
||||
if len(img.shape) != 3:
|
||||
logger.error(f"Image has wrong dimensions: {img.shape} (expected 3D)")
|
||||
return False
|
||||
|
||||
if img.shape[0] == 0 or img.shape[1] == 0:
|
||||
logger.error(f"Image has zero width or height: {img.shape}")
|
||||
return False
|
||||
|
||||
if img.shape[2] != 3:
|
||||
logger.error(f"Image has wrong number of channels: {img.shape[2]} (expected 3)")
|
||||
return False
|
||||
|
||||
# Check for reasonable dimensions (not too small, not too large)
|
||||
if img.shape[0] < 100 or img.shape[1] < 100:
|
||||
logger.warning(f"Image is very small: {img.shape}")
|
||||
return False
|
||||
|
||||
if img.shape[0] > 10000 or img.shape[1] > 10000:
|
||||
logger.warning(f"Image is very large: {img.shape}")
|
||||
return False
|
||||
|
||||
# Check data type
|
||||
if img.dtype != np.uint8:
|
||||
logger.warning(f"Image has unexpected dtype: {img.dtype} (expected uint8)")
|
||||
# Try to convert
|
||||
try:
|
||||
img = img.astype(np.uint8)
|
||||
except:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _capture_mss(self) -> np.ndarray:
|
||||
"""Capture using mss (fast method)"""
|
||||
# Use primary monitor (index 0 is all monitors combined, 1 is primary)
|
||||
# Fallback to all monitors if primary not available
|
||||
monitor_idx = 1 if len(self.sct.monitors) > 1 else 0
|
||||
monitor = self.sct.monitors[monitor_idx]
|
||||
sct_img = self.sct.grab(monitor)
|
||||
|
||||
# Convert to numpy array
|
||||
img = np.array(sct_img)
|
||||
|
||||
# Convert BGRA to RGB
|
||||
img = img[:, :, :3][:, :, ::-1]
|
||||
|
||||
# Basic validation
|
||||
if img.size == 0 or img.shape[0] == 0 or img.shape[1] == 0:
|
||||
raise ValueError("Captured image has invalid dimensions")
|
||||
|
||||
return img
|
||||
|
||||
def _capture_pyautogui(self) -> np.ndarray:
|
||||
"""Capture using pyautogui (fallback method)"""
|
||||
screenshot = self.pyautogui.screenshot()
|
||||
img = np.array(screenshot)
|
||||
|
||||
# Basic validation
|
||||
if img.size == 0 or img.shape[0] == 0 or img.shape[1] == 0:
|
||||
raise ValueError("Captured image has invalid dimensions")
|
||||
|
||||
return img
|
||||
|
||||
def get_active_window(self) -> Optional[Dict]:
|
||||
"""Get active window information
|
||||
|
||||
Returns:
|
||||
Dict with window title, position, size, or None if unavailable
|
||||
"""
|
||||
try:
|
||||
import pygetwindow as gw
|
||||
active = gw.getActiveWindow()
|
||||
if active:
|
||||
return {
|
||||
'title': active.title,
|
||||
'x': active.left,
|
||||
'y': active.top,
|
||||
'width': active.width,
|
||||
'height': active.height
|
||||
}
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not get active window: {e}")
|
||||
|
||||
return None
|
||||
|
||||
def __del__(self):
|
||||
"""Cleanup resources"""
|
||||
if hasattr(self, 'sct'):
|
||||
try:
|
||||
self.sct.close()
|
||||
except:
|
||||
pass
|
||||
Reference in New Issue
Block a user