feat(navigation): brique login visuel OCR-ancre + action navigate au replay
- core/navigation/ : visual_verifier (presence=OCR, role=VLM ancre sur tokens), grounding (OCR-anchor first, VLM fallback, cache coords valide par la vue), visual_login (verify_before/after, DETTE-023), action_resolver (pont runtime) - api_stream/replay_engine : dispatch action navigate server-side, never-fail -> needs_review, import depuis core.navigation (boot 5005 garanti) - 131 tests verts (wiring boot, e2e handler, unit modules) Chantier Qwen 01-02/07/2026, revue croisee Claude (plan deploy v2). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
227
core/navigation/visual_login.py
Normal file
227
core/navigation/visual_login.py
Normal file
@@ -0,0 +1,227 @@
|
||||
"""Visual login — résolution + vérification du formulaire de login par grounding.
|
||||
|
||||
Architecture (alignée visual_verifier + grounding) :
|
||||
- verify_before : formulaire login visible (champs + bouton présents)
|
||||
- resolve_login_form : ground chaque champ (login, password, bouton) → coords
|
||||
- verify_after : dashboard/accueil visible (post-login)
|
||||
- Chaque étape encadrée par vision (DETTE-023 couvert)
|
||||
|
||||
Coords = cache local validé par vue (Dom/Claude recadrage).
|
||||
Le runtime exécute les actions (type/click) — ce module résout + valide.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple
|
||||
|
||||
from core.navigation.grounding import (
|
||||
BBox,
|
||||
CoordsCache,
|
||||
GroundedElement,
|
||||
OcrDetailedClient,
|
||||
OcrTokenInfo,
|
||||
ground_element,
|
||||
)
|
||||
from core.navigation.visual_verifier import (
|
||||
OcrClient,
|
||||
ScreenMatchResult,
|
||||
VlmClient,
|
||||
verify_before,
|
||||
verify_after,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ── Dataclasses ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@dataclass
|
||||
class LoginFormConfig:
|
||||
"""Configuration for a login form — what to look for."""
|
||||
|
||||
login_field: Dict[str, Any] # {"role": "champ", "text": "Login"}
|
||||
password_field: Dict[str, Any] # {"role": "champ", "text": "Mot de passe"}
|
||||
submit_button: Dict[str, Any] # {"role": "bouton", "text": "Connexion"}
|
||||
success_elements: List[Dict[str, Any]] = field(default_factory=list)
|
||||
context: str = "" # e.g. "DPI urgences"
|
||||
|
||||
|
||||
@dataclass
|
||||
class LoginResolution:
|
||||
"""Result of login form resolution — grounded coords for each field."""
|
||||
|
||||
login_field: Optional[GroundedElement] = None
|
||||
password_field: Optional[GroundedElement] = None
|
||||
submit_button: Optional[GroundedElement] = None
|
||||
all_resolved: bool = False
|
||||
method: str = "" # "ocr_anchor", "vlm_grounder", "mixed", "cache"
|
||||
|
||||
def describe(self) -> str:
|
||||
parts = []
|
||||
if self.login_field:
|
||||
parts.append(f"login@{self.login_field.center} ({self.login_field.method})")
|
||||
else:
|
||||
parts.append("login: NOT FOUND")
|
||||
if self.password_field:
|
||||
parts.append(f"password@{self.password_field.center} ({self.password_field.method})")
|
||||
else:
|
||||
parts.append("password: NOT FOUND")
|
||||
if self.submit_button:
|
||||
parts.append(f"button@{self.submit_button.center} ({self.submit_button.method})")
|
||||
else:
|
||||
parts.append("button: NOT FOUND")
|
||||
status = "OK" if self.all_resolved else "INCOMPLETE"
|
||||
return f"Login resolution [{status}]: " + ", ".join(parts)
|
||||
|
||||
|
||||
# ── Default configs ──────────────────────────────────────────────────
|
||||
|
||||
|
||||
def dpi_urgences_login_config() -> LoginFormConfig:
|
||||
"""Default config for DPI urgences login form."""
|
||||
return LoginFormConfig(
|
||||
login_field={"role": "champ", "text": "Login", "extra": "champ identifiant"},
|
||||
password_field={"role": "champ", "text": "Mot de passe", "extra": "champ password"},
|
||||
submit_button={"role": "bouton", "text": "Connexion", "extra": "bouton submit"},
|
||||
success_elements=[
|
||||
{"role": "page", "text": "Accueil"},
|
||||
{"role": "page", "text": "Dashboard"},
|
||||
],
|
||||
context="DPI urgences — page login",
|
||||
)
|
||||
|
||||
|
||||
# ── Helper ───────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _ocr_detailed_to_simple(ocr_detailed: OcrDetailedClient) -> OcrClient:
|
||||
"""Convert OcrDetailedClient (text+bbox) to OcrClient (text-only) for verification."""
|
||||
def client(image_path: str) -> List[str]:
|
||||
return [t.text for t in ocr_detailed(image_path)]
|
||||
return client
|
||||
|
||||
|
||||
# ── Core functions ───────────────────────────────────────────────────
|
||||
|
||||
|
||||
def verify_login_visible(
|
||||
screenshot_path: str,
|
||||
config: LoginFormConfig,
|
||||
ocr_client: OcrClient,
|
||||
vlm_client: VlmClient,
|
||||
) -> ScreenMatchResult:
|
||||
"""Verify login form is visible on screen (pre-condition).
|
||||
|
||||
Checks that login field, password field, and submit button are present.
|
||||
Uses OCR-anchored verification (deterministic presence, VLM role).
|
||||
"""
|
||||
expected = [
|
||||
config.login_field,
|
||||
config.password_field,
|
||||
config.submit_button,
|
||||
]
|
||||
return verify_before(
|
||||
screenshot_path, expected, ocr_client, vlm_client,
|
||||
context=config.context,
|
||||
)
|
||||
|
||||
|
||||
def verify_login_success(
|
||||
screenshot_path: str,
|
||||
config: LoginFormConfig,
|
||||
ocr_client: OcrClient,
|
||||
vlm_client: VlmClient,
|
||||
) -> ScreenMatchResult:
|
||||
"""Verify dashboard/accueil visible after login (post-condition).
|
||||
|
||||
Higher threshold (verify_after = 0.8) — false positive = Léa proceeds wrong.
|
||||
"""
|
||||
if not config.success_elements:
|
||||
# No success criteria defined → can't verify
|
||||
return ScreenMatchResult(
|
||||
match=False,
|
||||
confidence=0.0,
|
||||
reason="no success_elements defined in config",
|
||||
)
|
||||
return verify_after(
|
||||
screenshot_path, config.success_elements, ocr_client, vlm_client,
|
||||
context=f"POST-LOGIN: {config.context}",
|
||||
)
|
||||
|
||||
|
||||
def resolve_login_form(
|
||||
screenshot_path: str,
|
||||
config: LoginFormConfig,
|
||||
ocr_client: OcrDetailedClient,
|
||||
vlm_client: VlmClient,
|
||||
screen_width: int = 1920,
|
||||
screen_height: int = 1080,
|
||||
coords_cache: Optional[CoordsCache] = None,
|
||||
) -> LoginResolution:
|
||||
"""Ground all login form elements → coords for runtime action.
|
||||
|
||||
Resolution strategy per element:
|
||||
1. Cache hit → return cached coords (validated separately)
|
||||
2. OCR-anchor → deterministic bbox from OCR token
|
||||
3. VLM grounder → fallback visual grounding
|
||||
|
||||
Returns LoginResolution with grounded coords for each field.
|
||||
Runtime uses these coords to type/click.
|
||||
"""
|
||||
login_el = ground_element(
|
||||
screenshot_path, config.login_field,
|
||||
ocr_client=ocr_client, vlm_client=vlm_client,
|
||||
screen_width=screen_width, screen_height=screen_height,
|
||||
coords_cache=coords_cache, context=config.context,
|
||||
)
|
||||
|
||||
password_el = ground_element(
|
||||
screenshot_path, config.password_field,
|
||||
ocr_client=ocr_client, vlm_client=vlm_client,
|
||||
screen_width=screen_width, screen_height=screen_height,
|
||||
coords_cache=coords_cache, context=config.context,
|
||||
)
|
||||
|
||||
button_el = ground_element(
|
||||
screenshot_path, config.submit_button,
|
||||
ocr_client=ocr_client, vlm_client=vlm_client,
|
||||
screen_width=screen_width, screen_height=screen_height,
|
||||
coords_cache=coords_cache, context=config.context,
|
||||
)
|
||||
|
||||
all_resolved = login_el is not None and password_el is not None and button_el is not None
|
||||
|
||||
# Determine overall method
|
||||
methods = []
|
||||
if login_el:
|
||||
methods.append(login_el.method)
|
||||
if password_el:
|
||||
methods.append(password_el.method)
|
||||
if button_el:
|
||||
methods.append(button_el.method)
|
||||
|
||||
unique_methods = set(methods)
|
||||
if len(unique_methods) == 1:
|
||||
method = unique_methods.pop()
|
||||
elif len(unique_methods) > 1:
|
||||
method = "mixed"
|
||||
else:
|
||||
method = ""
|
||||
|
||||
resolution = LoginResolution(
|
||||
login_field=login_el,
|
||||
password_field=password_el,
|
||||
submit_button=button_el,
|
||||
all_resolved=all_resolved,
|
||||
method=method,
|
||||
)
|
||||
|
||||
if all_resolved:
|
||||
logger.info("resolve_login_form: %s", resolution.describe())
|
||||
else:
|
||||
logger.warning("resolve_login_form: incomplete — %s", resolution.describe())
|
||||
|
||||
return resolution
|
||||
Reference in New Issue
Block a user