feat: SurfaceClassifier + UIAHelper — détection et wrapper Python

SurfaceClassifier — détecte le type d'application au runtime
- 4 surfaces : citrix / windows_native / web_local / unknown
- Paramètres adaptés par surface :
  * Citrix : OCR 0.65, timeouts 15s, retries 3x (compression JPEG tolérée)
  * Windows natif : OCR 0.75, timeouts 8s, UIA bonus si dispo
  * Web : OCR 0.80, timeouts 5s, paramètres rapides
  * Unknown : fallback sûr
- resolve_order() construit la chaîne selon les capacités disponibles
- Détection UIA via health check du helper Rust
- Détection CDP via localhost:9222

UIAHelper — wrapper Python pour lea_uia.exe
- Subprocess + JSON stdin/stdout
- 3 méthodes : query_at(x,y), find_by_name(name,...), capture_focused()
- Fallback silencieux (None) si helper absent, timeout, crash
- Singleton global get_shared_helper()
- Dataclass UiaElement avec center(), is_clickable(), path_signature()

29 nouveaux tests (détection 4 surfaces, dataclass, wrapper, mocks).
485 tests au total, 0 régression.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dom
2026-04-10 10:54:19 +02:00
parent f85d56ac05
commit ac9c207474
3 changed files with 968 additions and 0 deletions

View File

@@ -0,0 +1,337 @@
# core/workflow/surface_classifier.py
"""
SurfaceClassifier — détecte le type de surface applicative au moment de l'exécution.
4 types de surfaces reconnus :
- citrix : session Citrix/RDP/TSE (wfica32.exe, mstsc.exe, CDViewer.exe)
→ vision pure obligatoire, paramètres tolérants
- windows_native : application Windows native (notepad.exe, explorer.exe, DPI...)
→ vision + UIA bonus, paramètres standards
- web_local : navigateur local (chrome.exe, firefox.exe, msedge.exe)
→ vision + DOM/CDP bonus (si activé), paramètres rapides
- unknown : fallback → vision pure, paramètres par défaut
Le classifier s'exécute UNE SEULE FOIS au début d'une session ou d'un replay.
Son résultat détermine :
1. Quels helpers sont activés (UIA ? CDP ?)
2. Les paramètres de résolution (timeouts, seuils OCR)
3. La stratégie de recovery
Principe : la vision reste le fondement. Le classifier décide juste
des bonus à activer et des paramètres à tuner.
"""
import logging
import os
import platform
import subprocess
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
class SurfaceType(str, Enum):
"""Types de surfaces applicatives."""
CITRIX = "citrix"
WINDOWS_NATIVE = "windows_native"
WEB_LOCAL = "web_local"
UNKNOWN = "unknown"
# Processus connus par type de surface
_CITRIX_PROCESSES = {
"wfica32.exe", # Citrix Workspace (Windows 10+)
"cdviewer.exe", # Citrix Desktop Viewer
"cdviewer.exe",
"mstsc.exe", # Microsoft Remote Desktop
"vmware-vmx.exe", # VMware (cas RDS)
"xen.exe", # Citrix XenApp
"receiver.exe", # Citrix Receiver (ancien)
"selfservice.exe", # Citrix Self-Service Plug-in
}
_BROWSER_PROCESSES = {
"chrome.exe",
"msedge.exe",
"firefox.exe",
"brave.exe",
"opera.exe",
"vivaldi.exe",
}
# Processus système Windows qui ne sont PAS des surfaces applicatives
_SYSTEM_PROCESSES = {
"explorer.exe", # Shell Windows (cas spécial — on le compte comme natif)
"searchhost.exe", # Recherche Windows
"startmenuexperiencehost.exe",
"shellexperiencehost.exe",
"applicationframehost.exe",
}
@dataclass
class SurfaceProfile:
"""Profil complet d'une surface détectée."""
surface_type: SurfaceType
process_name: str = "" # Processus de la fenêtre active
window_title: str = "" # Titre de la fenêtre active
confidence: float = 1.0 # Confiance de la détection (0-1)
# Capacités disponibles
uia_available: bool = False # Le helper UIA peut être utilisé
cdp_available: bool = False # Chrome DevTools Protocol accessible
ocr_available: bool = True # OCR toujours dispo (docTR)
vlm_available: bool = True # VLM toujours dispo (qwen2.5vl)
# Paramètres adaptés à la surface
timeout_click_ms: int = 10000
timeout_resolve_ms: int = 5000
ocr_threshold: float = 0.75
template_threshold: float = 0.85
max_retries: int = 2
retry_delay_ms: int = 2000
# Métadonnées
detected_at: float = 0.0
details: Dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
return {
"surface_type": self.surface_type.value,
"process_name": self.process_name,
"window_title": self.window_title,
"confidence": round(self.confidence, 3),
"capabilities": {
"uia": self.uia_available,
"cdp": self.cdp_available,
"ocr": self.ocr_available,
"vlm": self.vlm_available,
},
"parameters": {
"timeout_click_ms": self.timeout_click_ms,
"timeout_resolve_ms": self.timeout_resolve_ms,
"ocr_threshold": self.ocr_threshold,
"template_threshold": self.template_threshold,
"max_retries": self.max_retries,
"retry_delay_ms": self.retry_delay_ms,
},
"details": self.details,
}
def resolve_order(self) -> List[str]:
"""Construire l'ordre de résolution selon la surface et les capacités."""
order = []
if self.uia_available and self.surface_type == SurfaceType.WINDOWS_NATIVE:
order.append("uia")
if self.cdp_available and self.surface_type == SurfaceType.WEB_LOCAL:
order.append("dom")
order.extend(["ocr", "template", "vlm"])
return order
class SurfaceClassifier:
"""Détecte la surface et configure les paramètres adaptés.
Usage :
classifier = SurfaceClassifier()
profile = classifier.classify(process="notepad.exe", title="Sans titre Bloc-notes")
if profile.uia_available:
# Utiliser lea_uia.exe
"""
def __init__(self, uia_helper_path: str = ""):
"""
Args:
uia_helper_path: Chemin vers lea_uia.exe (optionnel, auto-détection sinon)
"""
self._uia_helper_path = uia_helper_path or self._find_uia_helper()
def _find_uia_helper(self) -> str:
"""Trouver lea_uia.exe dans les emplacements standards."""
candidates = [
r"C:\Lea\helpers\lea_uia.exe",
r".\helpers\lea_uia.exe",
os.path.join(os.path.dirname(__file__), "..", "..", "agent_rust", "lea_uia",
"target", "x86_64-pc-windows-gnu", "release", "lea_uia.exe"),
]
for path in candidates:
if os.path.isfile(path):
return os.path.abspath(path)
return ""
def classify(
self,
process_name: str = "",
window_title: str = "",
screen_info: Optional[Dict] = None,
) -> SurfaceProfile:
"""Classifier une surface depuis le contexte fenêtre.
Args:
process_name: Nom du processus (ex: "notepad.exe")
window_title: Titre de la fenêtre active
screen_info: Infos écran (résolution, DPI, compression détectée)
"""
import time
process_lower = process_name.lower().strip()
title_lower = window_title.lower()
# Détection Citrix — priorité absolue
if process_lower in _CITRIX_PROCESSES:
return self._build_citrix_profile(process_name, window_title, time.time())
# Titre Citrix (ex: "Session Citrix", "Citrix Receiver")
if any(marker in title_lower for marker in ["citrix", "ica session", "rdp session"]):
return self._build_citrix_profile(process_name, window_title, time.time())
# Navigateur
if process_lower in _BROWSER_PROCESSES:
# Cas particulier : navigateur qui contient du Citrix embedded
if "citrix" in title_lower:
return self._build_citrix_profile(process_name, window_title, time.time())
return self._build_web_profile(process_name, window_title, time.time())
# Application Windows native
if process_lower.endswith(".exe") and process_lower not in _SYSTEM_PROCESSES:
return self._build_windows_profile(process_name, window_title, time.time())
# Shell Windows (explorer.exe) — compté comme natif
if process_lower == "explorer.exe":
return self._build_windows_profile(process_name, window_title, time.time())
# Unknown — fallback sûr
return self._build_unknown_profile(process_name, window_title, time.time())
def _build_citrix_profile(self, process: str, title: str, ts: float) -> SurfaceProfile:
"""Profil Citrix — vision pure, paramètres tolérants."""
return SurfaceProfile(
surface_type=SurfaceType.CITRIX,
process_name=process,
window_title=title,
confidence=0.95,
uia_available=False, # UIA n'est pas dispo dans Citrix
cdp_available=False,
ocr_available=True,
vlm_available=True,
# Citrix : compression JPEG, latence, retries agressifs
timeout_click_ms=15000,
timeout_resolve_ms=10000,
ocr_threshold=0.65, # Plus tolérant (compression)
template_threshold=0.75, # Plus tolérant
max_retries=3,
retry_delay_ms=3000,
detected_at=ts,
details={"reason": "citrix_process_or_title"},
)
def _build_windows_profile(self, process: str, title: str, ts: float) -> SurfaceProfile:
"""Profil Windows natif — vision + UIA bonus."""
uia_ok = self._check_uia_available()
return SurfaceProfile(
surface_type=SurfaceType.WINDOWS_NATIVE,
process_name=process,
window_title=title,
confidence=0.9,
uia_available=uia_ok,
cdp_available=False,
ocr_available=True,
vlm_available=True,
timeout_click_ms=8000,
timeout_resolve_ms=5000,
ocr_threshold=0.75,
template_threshold=0.85,
max_retries=2,
retry_delay_ms=2000,
detected_at=ts,
details={
"reason": "native_windows_process",
"uia_helper": self._uia_helper_path if uia_ok else "",
},
)
def _build_web_profile(self, process: str, title: str, ts: float) -> SurfaceProfile:
"""Profil web local — vision (+ CDP plus tard)."""
cdp_ok = self._check_cdp_available()
return SurfaceProfile(
surface_type=SurfaceType.WEB_LOCAL,
process_name=process,
window_title=title,
confidence=0.9,
uia_available=False, # UIA limité pour les navigateurs
cdp_available=cdp_ok,
ocr_available=True,
vlm_available=True,
# Web local : rapide, texte bien rendu
timeout_click_ms=5000,
timeout_resolve_ms=3000,
ocr_threshold=0.80,
template_threshold=0.88,
max_retries=1,
retry_delay_ms=1000,
detected_at=ts,
details={"reason": "browser_process"},
)
def _build_unknown_profile(self, process: str, title: str, ts: float) -> SurfaceProfile:
"""Profil inconnu — paramètres sûrs par défaut."""
return SurfaceProfile(
surface_type=SurfaceType.UNKNOWN,
process_name=process,
window_title=title,
confidence=0.5,
uia_available=False,
cdp_available=False,
ocr_available=True,
vlm_available=True,
timeout_click_ms=10000,
timeout_resolve_ms=5000,
ocr_threshold=0.70,
template_threshold=0.80,
max_retries=2,
retry_delay_ms=2000,
detected_at=ts,
details={"reason": "fallback"},
)
def _check_uia_available(self) -> bool:
"""Vérifier que lea_uia.exe est dispo et fonctionnel.
Sur Windows : appelle `lea_uia.exe health`.
Sur Linux : toujours False (stub).
"""
if platform.system() != "Windows":
return False
if not self._uia_helper_path or not os.path.isfile(self._uia_helper_path):
return False
try:
result = subprocess.run(
[self._uia_helper_path, "health"],
capture_output=True,
text=True,
timeout=5,
)
if result.returncode != 0:
return False
import json
data = json.loads(result.stdout.strip())
return data.get("status") == "ok"
except Exception as e:
logger.debug(f"UIA health check failed: {e}")
return False
def _check_cdp_available(self) -> bool:
"""Vérifier que Chrome DevTools Protocol est accessible.
Teste la présence d'un endpoint CDP sur localhost:9222.
"""
try:
import urllib.request
with urllib.request.urlopen(
"http://localhost:9222/json/version", timeout=1
) as resp:
return resp.status == 200
except Exception:
return False

278
core/workflow/uia_helper.py Normal file
View File

@@ -0,0 +1,278 @@
# core/workflow/uia_helper.py
"""
UIAHelper — Wrapper Python pour lea_uia.exe (helper Rust UI Automation).
Expose une API Python simple pour interroger UIA via le binaire Rust.
Communique via subprocess + stdin/stdout JSON.
Pourquoi un helper Rust ?
- 5-10x plus rapide que pywinauto (10-20ms vs 50-200ms)
- Binaire standalone ~500 Ko, aucune dépendance runtime
- Pas de problèmes de threading COM en Python
- Crash-safe (le crash du helper n'affecte pas l'agent Python)
Architecture :
Python executor
↓ subprocess.run
lea_uia.exe query --x 812 --y 436
↓ UIA API Windows
JSON response
↓ stdout
Python executor parse JSON
Si lea_uia.exe n'est pas disponible (Linux, binaire absent, crash) :
toutes les méthodes retournent None → fallback vision automatique.
"""
import json
import logging
import os
import platform
import subprocess
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Tuple
logger = logging.getLogger(__name__)
# Timeout par défaut pour les appels UIA (en secondes)
_DEFAULT_TIMEOUT = 5.0
@dataclass
class UiaElement:
"""Représentation Python d'un élément UIA."""
name: str = ""
control_type: str = ""
class_name: str = ""
automation_id: str = ""
bounding_rect: Tuple[int, int, int, int] = (0, 0, 0, 0)
is_enabled: bool = False
is_offscreen: bool = True
parent_path: List[Dict[str, str]] = field(default_factory=list)
process_name: str = ""
def center(self) -> Tuple[int, int]:
"""Retourner le centre du rectangle (pixels)."""
x1, y1, x2, y2 = self.bounding_rect
return ((x1 + x2) // 2, (y1 + y2) // 2)
def width(self) -> int:
return self.bounding_rect[2] - self.bounding_rect[0]
def height(self) -> int:
return self.bounding_rect[3] - self.bounding_rect[1]
def is_clickable(self) -> bool:
"""Peut-on cliquer dessus ?"""
return (
self.is_enabled
and not self.is_offscreen
and self.width() > 0
and self.height() > 0
)
def path_signature(self) -> str:
"""Signature du chemin parent (pour retrouver l'élément)."""
parts = [f"{p['control_type']}[{p['name']}]" for p in self.parent_path if p.get("name")]
parts.append(f"{self.control_type}[{self.name}]")
return " > ".join(parts)
def to_dict(self) -> Dict[str, Any]:
return {
"name": self.name,
"control_type": self.control_type,
"class_name": self.class_name,
"automation_id": self.automation_id,
"bounding_rect": list(self.bounding_rect),
"is_enabled": self.is_enabled,
"is_offscreen": self.is_offscreen,
"parent_path": self.parent_path,
"process_name": self.process_name,
}
@classmethod
def from_dict(cls, d: Dict[str, Any]) -> "UiaElement":
rect = d.get("bounding_rect", [0, 0, 0, 0])
if isinstance(rect, list) and len(rect) >= 4:
rect = tuple(rect[:4])
else:
rect = (0, 0, 0, 0)
return cls(
name=d.get("name", ""),
control_type=d.get("control_type", ""),
class_name=d.get("class_name", ""),
automation_id=d.get("automation_id", ""),
bounding_rect=rect,
is_enabled=d.get("is_enabled", False),
is_offscreen=d.get("is_offscreen", True),
parent_path=d.get("parent_path", []),
process_name=d.get("process_name", ""),
)
class UIAHelper:
"""Wrapper Python pour lea_uia.exe."""
def __init__(self, helper_path: str = "", timeout: float = _DEFAULT_TIMEOUT):
self._helper_path = helper_path or self._find_helper()
self._timeout = timeout
self._available = self._check_available()
def _find_helper(self) -> str:
"""Trouver lea_uia.exe dans les emplacements standards."""
candidates = [
r"C:\Lea\helpers\lea_uia.exe",
os.path.join(os.path.dirname(__file__), "..", "..",
"agent_rust", "lea_uia", "target",
"x86_64-pc-windows-gnu", "release", "lea_uia.exe"),
"./helpers/lea_uia.exe",
"lea_uia.exe",
]
for path in candidates:
if os.path.isfile(path):
return os.path.abspath(path)
return ""
def _check_available(self) -> bool:
"""Vérifier que le helper est utilisable (Windows + binaire + health OK)."""
if platform.system() != "Windows":
logger.debug("UIAHelper: Linux/Mac — helper désactivé")
return False
if not self._helper_path:
logger.debug("UIAHelper: lea_uia.exe introuvable")
return False
if not os.path.isfile(self._helper_path):
logger.debug(f"UIAHelper: chemin invalide {self._helper_path}")
return False
return True
@property
def available(self) -> bool:
return self._available
@property
def helper_path(self) -> str:
return self._helper_path
def _run(self, args: List[str]) -> Optional[Dict[str, Any]]:
"""Exécuter lea_uia.exe avec les arguments et parser le JSON."""
if not self._available:
return None
try:
result = subprocess.run(
[self._helper_path] + args,
capture_output=True,
text=True,
timeout=self._timeout,
encoding="utf-8",
errors="replace",
)
if result.returncode != 0:
logger.debug(
f"UIAHelper: exit code {result.returncode}, "
f"stderr: {result.stderr[:200]}"
)
return None
output = result.stdout.strip()
if not output:
return None
return json.loads(output)
except subprocess.TimeoutExpired:
logger.debug(f"UIAHelper: timeout ({self._timeout}s) sur {args}")
return None
except json.JSONDecodeError as e:
logger.debug(f"UIAHelper: JSON invalide — {e}")
return None
except Exception as e:
logger.debug(f"UIAHelper: erreur {e}")
return None
def health(self) -> bool:
"""Vérifier que UIA répond."""
data = self._run(["health"])
return data is not None and data.get("status") == "ok"
def query_at(
self,
x: int,
y: int,
with_parents: bool = True,
) -> Optional[UiaElement]:
"""Récupérer l'élément UIA à une position écran.
Args:
x, y: Coordonnées pixel absolues
with_parents: Inclure la hiérarchie des parents
Returns:
UiaElement si trouvé, None sinon (pas d'élément ou UIA indispo)
"""
args = ["query", "--x", str(x), "--y", str(y)]
if not with_parents:
args.append("--with-parents=false")
data = self._run(args)
if not data or data.get("status") != "ok":
return None
elem_data = data.get("element")
if not elem_data:
return None
return UiaElement.from_dict(elem_data)
def find_by_name(
self,
name: str,
control_type: Optional[str] = None,
automation_id: Optional[str] = None,
window: Optional[str] = None,
timeout_ms: int = 2000,
) -> Optional[UiaElement]:
"""Rechercher un élément par son nom (+ filtres optionnels).
Args:
name: Nom exact de l'élément
control_type: Type de contrôle (Button, Edit, MenuItem...)
automation_id: ID d'automation
window: Restreindre à une fenêtre spécifique
timeout_ms: Timeout de recherche en millisecondes
"""
args = ["find", "--name", name, "--timeout-ms", str(timeout_ms)]
if control_type:
args.extend(["--control-type", control_type])
if automation_id:
args.extend(["--automation-id", automation_id])
if window:
args.extend(["--window", window])
data = self._run(args)
if not data or data.get("status") != "ok":
return None
elem_data = data.get("element")
if not elem_data:
return None
return UiaElement.from_dict(elem_data)
def capture_focused(self, max_depth: int = 3) -> Optional[UiaElement]:
"""Capturer l'élément ayant le focus + son contexte."""
data = self._run(["capture", "--max-depth", str(max_depth)])
if not data or data.get("status") != "ok":
return None
elem_data = data.get("element")
if not elem_data:
return None
return UiaElement.from_dict(elem_data)
# Instance globale partagée (singleton léger)
_SHARED_HELPER: Optional[UIAHelper] = None
def get_shared_helper() -> UIAHelper:
"""Retourner une instance partagée de UIAHelper."""
global _SHARED_HELPER
if _SHARED_HELPER is None:
_SHARED_HELPER = UIAHelper()
return _SHARED_HELPER