- core/navigation/ : visual_verifier (presence=OCR, role=VLM ancre sur tokens), grounding (OCR-anchor first, VLM fallback, cache coords valide par la vue), visual_login (verify_before/after, DETTE-023), action_resolver (pont runtime) - api_stream/replay_engine : dispatch action navigate server-side, never-fail -> needs_review, import depuis core.navigation (boot 5005 garanti) - 131 tests verts (wiring boot, e2e handler, unit modules) Chantier Qwen 01-02/07/2026, revue croisee Claude (plan deploy v2). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
228 lines
7.7 KiB
Python
228 lines
7.7 KiB
Python
"""Visual login — résolution + vérification du formulaire de login par grounding.
|
|
|
|
Architecture (alignée visual_verifier + grounding) :
|
|
- verify_before : formulaire login visible (champs + bouton présents)
|
|
- resolve_login_form : ground chaque champ (login, password, bouton) → coords
|
|
- verify_after : dashboard/accueil visible (post-login)
|
|
- Chaque étape encadrée par vision (DETTE-023 couvert)
|
|
|
|
Coords = cache local validé par vue (Dom/Claude recadrage).
|
|
Le runtime exécute les actions (type/click) — ce module résout + valide.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from dataclasses import dataclass, field
|
|
from typing import Any, Callable, Dict, List, Optional, Tuple
|
|
|
|
from core.navigation.grounding import (
|
|
BBox,
|
|
CoordsCache,
|
|
GroundedElement,
|
|
OcrDetailedClient,
|
|
OcrTokenInfo,
|
|
ground_element,
|
|
)
|
|
from core.navigation.visual_verifier import (
|
|
OcrClient,
|
|
ScreenMatchResult,
|
|
VlmClient,
|
|
verify_before,
|
|
verify_after,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# ── Dataclasses ──────────────────────────────────────────────────────
|
|
|
|
|
|
@dataclass
|
|
class LoginFormConfig:
|
|
"""Configuration for a login form — what to look for."""
|
|
|
|
login_field: Dict[str, Any] # {"role": "champ", "text": "Login"}
|
|
password_field: Dict[str, Any] # {"role": "champ", "text": "Mot de passe"}
|
|
submit_button: Dict[str, Any] # {"role": "bouton", "text": "Connexion"}
|
|
success_elements: List[Dict[str, Any]] = field(default_factory=list)
|
|
context: str = "" # e.g. "DPI urgences"
|
|
|
|
|
|
@dataclass
|
|
class LoginResolution:
|
|
"""Result of login form resolution — grounded coords for each field."""
|
|
|
|
login_field: Optional[GroundedElement] = None
|
|
password_field: Optional[GroundedElement] = None
|
|
submit_button: Optional[GroundedElement] = None
|
|
all_resolved: bool = False
|
|
method: str = "" # "ocr_anchor", "vlm_grounder", "mixed", "cache"
|
|
|
|
def describe(self) -> str:
|
|
parts = []
|
|
if self.login_field:
|
|
parts.append(f"login@{self.login_field.center} ({self.login_field.method})")
|
|
else:
|
|
parts.append("login: NOT FOUND")
|
|
if self.password_field:
|
|
parts.append(f"password@{self.password_field.center} ({self.password_field.method})")
|
|
else:
|
|
parts.append("password: NOT FOUND")
|
|
if self.submit_button:
|
|
parts.append(f"button@{self.submit_button.center} ({self.submit_button.method})")
|
|
else:
|
|
parts.append("button: NOT FOUND")
|
|
status = "OK" if self.all_resolved else "INCOMPLETE"
|
|
return f"Login resolution [{status}]: " + ", ".join(parts)
|
|
|
|
|
|
# ── Default configs ──────────────────────────────────────────────────
|
|
|
|
|
|
def dpi_urgences_login_config() -> LoginFormConfig:
|
|
"""Default config for DPI urgences login form."""
|
|
return LoginFormConfig(
|
|
login_field={"role": "champ", "text": "Login", "extra": "champ identifiant"},
|
|
password_field={"role": "champ", "text": "Mot de passe", "extra": "champ password"},
|
|
submit_button={"role": "bouton", "text": "Connexion", "extra": "bouton submit"},
|
|
success_elements=[
|
|
{"role": "page", "text": "Accueil"},
|
|
{"role": "page", "text": "Dashboard"},
|
|
],
|
|
context="DPI urgences — page login",
|
|
)
|
|
|
|
|
|
# ── Helper ───────────────────────────────────────────────────────────
|
|
|
|
|
|
def _ocr_detailed_to_simple(ocr_detailed: OcrDetailedClient) -> OcrClient:
|
|
"""Convert OcrDetailedClient (text+bbox) to OcrClient (text-only) for verification."""
|
|
def client(image_path: str) -> List[str]:
|
|
return [t.text for t in ocr_detailed(image_path)]
|
|
return client
|
|
|
|
|
|
# ── Core functions ───────────────────────────────────────────────────
|
|
|
|
|
|
def verify_login_visible(
|
|
screenshot_path: str,
|
|
config: LoginFormConfig,
|
|
ocr_client: OcrClient,
|
|
vlm_client: VlmClient,
|
|
) -> ScreenMatchResult:
|
|
"""Verify login form is visible on screen (pre-condition).
|
|
|
|
Checks that login field, password field, and submit button are present.
|
|
Uses OCR-anchored verification (deterministic presence, VLM role).
|
|
"""
|
|
expected = [
|
|
config.login_field,
|
|
config.password_field,
|
|
config.submit_button,
|
|
]
|
|
return verify_before(
|
|
screenshot_path, expected, ocr_client, vlm_client,
|
|
context=config.context,
|
|
)
|
|
|
|
|
|
def verify_login_success(
|
|
screenshot_path: str,
|
|
config: LoginFormConfig,
|
|
ocr_client: OcrClient,
|
|
vlm_client: VlmClient,
|
|
) -> ScreenMatchResult:
|
|
"""Verify dashboard/accueil visible after login (post-condition).
|
|
|
|
Higher threshold (verify_after = 0.8) — false positive = Léa proceeds wrong.
|
|
"""
|
|
if not config.success_elements:
|
|
# No success criteria defined → can't verify
|
|
return ScreenMatchResult(
|
|
match=False,
|
|
confidence=0.0,
|
|
reason="no success_elements defined in config",
|
|
)
|
|
return verify_after(
|
|
screenshot_path, config.success_elements, ocr_client, vlm_client,
|
|
context=f"POST-LOGIN: {config.context}",
|
|
)
|
|
|
|
|
|
def resolve_login_form(
|
|
screenshot_path: str,
|
|
config: LoginFormConfig,
|
|
ocr_client: OcrDetailedClient,
|
|
vlm_client: VlmClient,
|
|
screen_width: int = 1920,
|
|
screen_height: int = 1080,
|
|
coords_cache: Optional[CoordsCache] = None,
|
|
) -> LoginResolution:
|
|
"""Ground all login form elements → coords for runtime action.
|
|
|
|
Resolution strategy per element:
|
|
1. Cache hit → return cached coords (validated separately)
|
|
2. OCR-anchor → deterministic bbox from OCR token
|
|
3. VLM grounder → fallback visual grounding
|
|
|
|
Returns LoginResolution with grounded coords for each field.
|
|
Runtime uses these coords to type/click.
|
|
"""
|
|
login_el = ground_element(
|
|
screenshot_path, config.login_field,
|
|
ocr_client=ocr_client, vlm_client=vlm_client,
|
|
screen_width=screen_width, screen_height=screen_height,
|
|
coords_cache=coords_cache, context=config.context,
|
|
)
|
|
|
|
password_el = ground_element(
|
|
screenshot_path, config.password_field,
|
|
ocr_client=ocr_client, vlm_client=vlm_client,
|
|
screen_width=screen_width, screen_height=screen_height,
|
|
coords_cache=coords_cache, context=config.context,
|
|
)
|
|
|
|
button_el = ground_element(
|
|
screenshot_path, config.submit_button,
|
|
ocr_client=ocr_client, vlm_client=vlm_client,
|
|
screen_width=screen_width, screen_height=screen_height,
|
|
coords_cache=coords_cache, context=config.context,
|
|
)
|
|
|
|
all_resolved = login_el is not None and password_el is not None and button_el is not None
|
|
|
|
# Determine overall method
|
|
methods = []
|
|
if login_el:
|
|
methods.append(login_el.method)
|
|
if password_el:
|
|
methods.append(password_el.method)
|
|
if button_el:
|
|
methods.append(button_el.method)
|
|
|
|
unique_methods = set(methods)
|
|
if len(unique_methods) == 1:
|
|
method = unique_methods.pop()
|
|
elif len(unique_methods) > 1:
|
|
method = "mixed"
|
|
else:
|
|
method = ""
|
|
|
|
resolution = LoginResolution(
|
|
login_field=login_el,
|
|
password_field=password_el,
|
|
submit_button=button_el,
|
|
all_resolved=all_resolved,
|
|
method=method,
|
|
)
|
|
|
|
if all_resolved:
|
|
logger.info("resolve_login_form: %s", resolution.describe())
|
|
else:
|
|
logger.warning("resolve_login_form: incomplete — %s", resolution.describe())
|
|
|
|
return resolution
|