rpa_vision_v3/core/navigation/visual_login.py

"""Visual login — résolution + vérification du formulaire de login par grounding.

Architecture (alignée visual_verifier + grounding) :
- verify_before : formulaire login visible (champs + bouton présents)
- resolve_login_form : ground chaque champ (login, password, bouton) → coords
- verify_after : dashboard/accueil visible (post-login)
- Chaque étape encadrée par vision (DETTE-023 couvert)

Coords = cache local validé par vue (Dom/Claude recadrage).
Le runtime exécute les actions (type/click) — ce module résout + valide.
"""

from __future__ import annotations

import logging
from dataclasses import dataclass, field
from typing import Any, Callable, Dict, List, Optional, Tuple

from core.navigation.grounding import (
    BBox,
    CoordsCache,
    GroundedElement,
    OcrDetailedClient,
    OcrTokenInfo,
    ground_element,
)
from core.navigation.visual_verifier import (
    OcrClient,
    ScreenMatchResult,
    VlmClient,
    verify_before,
    verify_after,
)

logger = logging.getLogger(__name__)


# ── Dataclasses ──────────────────────────────────────────────────────


@dataclass
class LoginFormConfig:
    """Configuration for a login form — what to look for."""

    login_field: Dict[str, Any]  # {"role": "champ", "text": "Login"}
    password_field: Dict[str, Any]  # {"role": "champ", "text": "Mot de passe"}
    submit_button: Dict[str, Any]  # {"role": "bouton", "text": "Connexion"}
    success_elements: List[Dict[str, Any]] = field(default_factory=list)
    context: str = ""  # e.g. "DPI urgences"


@dataclass
class LoginResolution:
    """Result of login form resolution — grounded coords for each field."""

    login_field: Optional[GroundedElement] = None
    password_field: Optional[GroundedElement] = None
    submit_button: Optional[GroundedElement] = None
    all_resolved: bool = False
    method: str = ""  # "ocr_anchor", "vlm_grounder", "mixed", "cache"

    def describe(self) -> str:
        parts = []
        if self.login_field:
            parts.append(f"login@{self.login_field.center} ({self.login_field.method})")
        else:
            parts.append("login: NOT FOUND")
        if self.password_field:
            parts.append(f"password@{self.password_field.center} ({self.password_field.method})")
        else:
            parts.append("password: NOT FOUND")
        if self.submit_button:
            parts.append(f"button@{self.submit_button.center} ({self.submit_button.method})")
        else:
            parts.append("button: NOT FOUND")
        status = "OK" if self.all_resolved else "INCOMPLETE"
        return f"Login resolution [{status}]: " + ", ".join(parts)


# ── Default configs ──────────────────────────────────────────────────


def dpi_urgences_login_config() -> LoginFormConfig:
    """Default config for DPI urgences login form."""
    return LoginFormConfig(
        login_field={"role": "champ", "text": "Login", "extra": "champ identifiant"},
        password_field={"role": "champ", "text": "Mot de passe", "extra": "champ password"},
        submit_button={"role": "bouton", "text": "Connexion", "extra": "bouton submit"},
        success_elements=[
            {"role": "page", "text": "Accueil"},
            {"role": "page", "text": "Dashboard"},
        ],
        context="DPI urgences — page login",
    )


# ── Helper ───────────────────────────────────────────────────────────


def _ocr_detailed_to_simple(ocr_detailed: OcrDetailedClient) -> OcrClient:
    """Convert OcrDetailedClient (text+bbox) to OcrClient (text-only) for verification."""
    def client(image_path: str) -> List[str]:
        return [t.text for t in ocr_detailed(image_path)]
    return client


# ── Core functions ───────────────────────────────────────────────────


def verify_login_visible(
    screenshot_path: str,
    config: LoginFormConfig,
    ocr_client: OcrClient,
    vlm_client: VlmClient,
) -> ScreenMatchResult:
    """Verify login form is visible on screen (pre-condition).

    Checks that login field, password field, and submit button are present.
    Uses OCR-anchored verification (deterministic presence, VLM role).
    """
    expected = [
        config.login_field,
        config.password_field,
        config.submit_button,
    ]
    return verify_before(
        screenshot_path, expected, ocr_client, vlm_client,
        context=config.context,
    )


def verify_login_success(
    screenshot_path: str,
    config: LoginFormConfig,
    ocr_client: OcrClient,
    vlm_client: VlmClient,
) -> ScreenMatchResult:
    """Verify dashboard/accueil visible after login (post-condition).

    Higher threshold (verify_after = 0.8) — false positive = Léa proceeds wrong.
    """
    if not config.success_elements:
        # No success criteria defined → can't verify
        return ScreenMatchResult(
            match=False,
            confidence=0.0,
            reason="no success_elements defined in config",
        )
    return verify_after(
        screenshot_path, config.success_elements, ocr_client, vlm_client,
        context=f"POST-LOGIN: {config.context}",
    )


def resolve_login_form(
    screenshot_path: str,
    config: LoginFormConfig,
    ocr_client: OcrDetailedClient,
    vlm_client: VlmClient,
    screen_width: int = 1920,
    screen_height: int = 1080,
    coords_cache: Optional[CoordsCache] = None,
) -> LoginResolution:
    """Ground all login form elements → coords for runtime action.

    Resolution strategy per element:
    1. Cache hit → return cached coords (validated separately)
    2. OCR-anchor → deterministic bbox from OCR token
    3. VLM grounder → fallback visual grounding

    Returns LoginResolution with grounded coords for each field.
    Runtime uses these coords to type/click.
    """
    login_el = ground_element(
        screenshot_path, config.login_field,
        ocr_client=ocr_client, vlm_client=vlm_client,
        screen_width=screen_width, screen_height=screen_height,
        coords_cache=coords_cache, context=config.context,
    )

    password_el = ground_element(
        screenshot_path, config.password_field,
        ocr_client=ocr_client, vlm_client=vlm_client,
        screen_width=screen_width, screen_height=screen_height,
        coords_cache=coords_cache, context=config.context,
    )

    button_el = ground_element(
        screenshot_path, config.submit_button,
        ocr_client=ocr_client, vlm_client=vlm_client,
        screen_width=screen_width, screen_height=screen_height,
        coords_cache=coords_cache, context=config.context,
    )

    all_resolved = login_el is not None and password_el is not None and button_el is not None

    # Determine overall method
    methods = []
    if login_el:
        methods.append(login_el.method)
    if password_el:
        methods.append(password_el.method)
    if button_el:
        methods.append(button_el.method)

    unique_methods = set(methods)
    if len(unique_methods) == 1:
        method = unique_methods.pop()
    elif len(unique_methods) > 1:
        method = "mixed"
    else:
        method = ""

    resolution = LoginResolution(
        login_field=login_el,
        password_field=password_el,
        submit_button=button_el,
        all_resolved=all_resolved,
        method=method,
    )

    if all_resolved:
        logger.info("resolve_login_form: %s", resolution.describe())
    else:
        logger.warning("resolve_login_form: incomplete — %s", resolution.describe())

    return resolution