"""Visual login — résolution + vérification du formulaire de login par grounding. Architecture (alignée visual_verifier + grounding) : - verify_before : formulaire login visible (champs + bouton présents) - resolve_login_form : ground chaque champ (login, password, bouton) → coords - verify_after : dashboard/accueil visible (post-login) - Chaque étape encadrée par vision (DETTE-023 couvert) Coords = cache local validé par vue (Dom/Claude recadrage). Le runtime exécute les actions (type/click) — ce module résout + valide. """ from __future__ import annotations import logging from dataclasses import dataclass, field from typing import Any, Callable, Dict, List, Optional, Tuple from core.navigation.grounding import ( BBox, CoordsCache, GroundedElement, OcrDetailedClient, OcrTokenInfo, ground_element, ) from core.navigation.visual_verifier import ( OcrClient, ScreenMatchResult, VlmClient, verify_before, verify_after, ) logger = logging.getLogger(__name__) # ── Dataclasses ────────────────────────────────────────────────────── @dataclass class LoginFormConfig: """Configuration for a login form — what to look for.""" login_field: Dict[str, Any] # {"role": "champ", "text": "Login"} password_field: Dict[str, Any] # {"role": "champ", "text": "Mot de passe"} submit_button: Dict[str, Any] # {"role": "bouton", "text": "Connexion"} success_elements: List[Dict[str, Any]] = field(default_factory=list) context: str = "" # e.g. "DPI urgences" @dataclass class LoginResolution: """Result of login form resolution — grounded coords for each field.""" login_field: Optional[GroundedElement] = None password_field: Optional[GroundedElement] = None submit_button: Optional[GroundedElement] = None all_resolved: bool = False method: str = "" # "ocr_anchor", "vlm_grounder", "mixed", "cache" def describe(self) -> str: parts = [] if self.login_field: parts.append(f"login@{self.login_field.center} ({self.login_field.method})") else: parts.append("login: NOT FOUND") if self.password_field: parts.append(f"password@{self.password_field.center} ({self.password_field.method})") else: parts.append("password: NOT FOUND") if self.submit_button: parts.append(f"button@{self.submit_button.center} ({self.submit_button.method})") else: parts.append("button: NOT FOUND") status = "OK" if self.all_resolved else "INCOMPLETE" return f"Login resolution [{status}]: " + ", ".join(parts) # ── Default configs ────────────────────────────────────────────────── def dpi_urgences_login_config() -> LoginFormConfig: """Default config for DPI urgences login form.""" return LoginFormConfig( login_field={"role": "champ", "text": "Login", "extra": "champ identifiant"}, password_field={"role": "champ", "text": "Mot de passe", "extra": "champ password"}, submit_button={"role": "bouton", "text": "Connexion", "extra": "bouton submit"}, success_elements=[ {"role": "page", "text": "Accueil"}, {"role": "page", "text": "Dashboard"}, ], context="DPI urgences — page login", ) # ── Helper ─────────────────────────────────────────────────────────── def _ocr_detailed_to_simple(ocr_detailed: OcrDetailedClient) -> OcrClient: """Convert OcrDetailedClient (text+bbox) to OcrClient (text-only) for verification.""" def client(image_path: str) -> List[str]: return [t.text for t in ocr_detailed(image_path)] return client # ── Core functions ─────────────────────────────────────────────────── def verify_login_visible( screenshot_path: str, config: LoginFormConfig, ocr_client: OcrClient, vlm_client: VlmClient, ) -> ScreenMatchResult: """Verify login form is visible on screen (pre-condition). Checks that login field, password field, and submit button are present. Uses OCR-anchored verification (deterministic presence, VLM role). """ expected = [ config.login_field, config.password_field, config.submit_button, ] return verify_before( screenshot_path, expected, ocr_client, vlm_client, context=config.context, ) def verify_login_success( screenshot_path: str, config: LoginFormConfig, ocr_client: OcrClient, vlm_client: VlmClient, ) -> ScreenMatchResult: """Verify dashboard/accueil visible after login (post-condition). Higher threshold (verify_after = 0.8) — false positive = Léa proceeds wrong. """ if not config.success_elements: # No success criteria defined → can't verify return ScreenMatchResult( match=False, confidence=0.0, reason="no success_elements defined in config", ) return verify_after( screenshot_path, config.success_elements, ocr_client, vlm_client, context=f"POST-LOGIN: {config.context}", ) def resolve_login_form( screenshot_path: str, config: LoginFormConfig, ocr_client: OcrDetailedClient, vlm_client: VlmClient, screen_width: int = 1920, screen_height: int = 1080, coords_cache: Optional[CoordsCache] = None, ) -> LoginResolution: """Ground all login form elements → coords for runtime action. Resolution strategy per element: 1. Cache hit → return cached coords (validated separately) 2. OCR-anchor → deterministic bbox from OCR token 3. VLM grounder → fallback visual grounding Returns LoginResolution with grounded coords for each field. Runtime uses these coords to type/click. """ login_el = ground_element( screenshot_path, config.login_field, ocr_client=ocr_client, vlm_client=vlm_client, screen_width=screen_width, screen_height=screen_height, coords_cache=coords_cache, context=config.context, ) password_el = ground_element( screenshot_path, config.password_field, ocr_client=ocr_client, vlm_client=vlm_client, screen_width=screen_width, screen_height=screen_height, coords_cache=coords_cache, context=config.context, ) button_el = ground_element( screenshot_path, config.submit_button, ocr_client=ocr_client, vlm_client=vlm_client, screen_width=screen_width, screen_height=screen_height, coords_cache=coords_cache, context=config.context, ) all_resolved = login_el is not None and password_el is not None and button_el is not None # Determine overall method methods = [] if login_el: methods.append(login_el.method) if password_el: methods.append(password_el.method) if button_el: methods.append(button_el.method) unique_methods = set(methods) if len(unique_methods) == 1: method = unique_methods.pop() elif len(unique_methods) > 1: method = "mixed" else: method = "" resolution = LoginResolution( login_field=login_el, password_field=password_el, submit_button=button_el, all_resolved=all_resolved, method=method, ) if all_resolved: logger.info("resolve_login_form: %s", resolution.describe()) else: logger.warning("resolve_login_form: incomplete — %s", resolution.describe()) return resolution