feat(agent): add standalone anchor-relative resolver
This commit is contained in:
82
agent_v0/agent_v1/core/anchor_catalog.py
Normal file
82
agent_v0/agent_v1/core/anchor_catalog.py
Normal file
@@ -0,0 +1,82 @@
|
||||
"""Catalog d'ancres visuelles — Phase 1 standalone.
|
||||
|
||||
Ce module fournit un catalog Python (pas YAML) listant les trios
|
||||
(window_title, anchor_label, target_label) connus pour lesquels la
|
||||
résolution par triangulation visuelle est applicable.
|
||||
|
||||
Phase 1 : non branché au runtime, prouvé sur fixtures par
|
||||
`tests/unit/test_anchor_relative.py`.
|
||||
|
||||
Edition simple : ajouter une entrée à `ANCHOR_ENTRIES`.
|
||||
Validation : `find_entry_for_title(title)` retourne la première entrée
|
||||
dont un `title_patterns` matche (case-insensitive, substring).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
# Catalog des entrées d'ancres visuelles connues.
|
||||
#
|
||||
# Format d'une entrée :
|
||||
# id (str) : identifiant stable pour audit
|
||||
# title_patterns (tuple) : sous-chaines case-insensitive du titre fenêtre
|
||||
# anchor_label (list) : labels d'ancres a essayer dans l'ordre (FR puis EN)
|
||||
# target_label (str) : libelle cible (ex. "Enregistrer")
|
||||
# geometry_hint (dict) :
|
||||
# region (str) : indicatif ("bottom-right", "bottom-center", ...)
|
||||
# min_x_norm/min_y_norm/max_x_norm/max_y_norm (float) : zone valide
|
||||
# (normalisée 0..1 sur la fenêtre/écran)
|
||||
# offset_from_anchor (dict) : {"x_px": int, "y_px": int} delta ancre→cible
|
||||
ANCHOR_ENTRIES: List[Dict[str, Any]] = [
|
||||
{
|
||||
"id": "notepad_save_as_enregistrer",
|
||||
"title_patterns": ("enregistrer sous", "save as"),
|
||||
"anchor_label": ["Annuler", "Cancel"],
|
||||
"target_label": "Enregistrer",
|
||||
"geometry_hint": {
|
||||
"region": "bottom-right",
|
||||
"min_x_norm": 0.55,
|
||||
"min_y_norm": 0.75,
|
||||
"max_x_norm": 1.0,
|
||||
"max_y_norm": 1.0,
|
||||
"offset_from_anchor": {"x_px": -100, "y_px": 0},
|
||||
},
|
||||
},
|
||||
{
|
||||
"id": "notepad_unsaved_changes_enregistrer",
|
||||
"title_patterns": ("bloc-notes", "notepad"),
|
||||
"anchor_label": ["Ne pas enregistrer", "Don't Save"],
|
||||
"target_label": "Enregistrer",
|
||||
"geometry_hint": {
|
||||
"region": "bottom-center",
|
||||
"min_x_norm": 0.30,
|
||||
"min_y_norm": 0.50,
|
||||
"max_x_norm": 0.85,
|
||||
"max_y_norm": 1.0,
|
||||
"offset_from_anchor": {"x_px": -120, "y_px": 0},
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def find_entry_for_title(title: str) -> Optional[Dict[str, Any]]:
|
||||
"""Retourne la première entrée dont un title_pattern matche (substring CI).
|
||||
|
||||
Args:
|
||||
title: titre de fenêtre courant (ex. "Enregistrer sous").
|
||||
|
||||
Returns:
|
||||
L'entrée catalog matchante, ou None si aucun match.
|
||||
Aucun raise — l'absence de match est un cas normal.
|
||||
"""
|
||||
if not title:
|
||||
return None
|
||||
title_lower = title.lower()
|
||||
for entry in ANCHOR_ENTRIES:
|
||||
patterns = entry.get("title_patterns") or ()
|
||||
for pat in patterns:
|
||||
if pat and pat.lower() in title_lower:
|
||||
return entry
|
||||
return None
|
||||
292
agent_v0/agent_v1/core/anchor_relative.py
Normal file
292
agent_v0/agent_v1/core/anchor_relative.py
Normal file
@@ -0,0 +1,292 @@
|
||||
"""Localisation par triangulation depuis une ancre visuelle.
|
||||
|
||||
Module standalone Phase 1 — non branché au runtime.
|
||||
|
||||
Principe : étant donnée une ancre texte fiable (ex. "Annuler"),
|
||||
localiser une cible voisine ("Enregistrer") par offset géométrique.
|
||||
Validation optionnelle par cross-check du label cible.
|
||||
|
||||
Détecteur injectable (`detector=`) pour faciliter les tests offline ;
|
||||
au runtime (Phase 2), on injectera `ActionExecutorV1._find_text_on_screen`.
|
||||
|
||||
Pas de dépendance nouvelle. Pas de VLM, pas d'UIA, pas de persistance.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Callable, Dict, Optional, Tuple
|
||||
|
||||
# Type alias : un détecteur prend (screenshot_b64, label) et retourne
|
||||
# (x_px, y_px) ou None.
|
||||
DetectorFn = Callable[[str, str], Optional[Tuple[int, int]]]
|
||||
|
||||
|
||||
@dataclass
|
||||
class AnchorMatch:
|
||||
"""Résultat d'une recherche par ancre relative.
|
||||
|
||||
Tous les champs sont remplis même si `found=False` (zéros pour les
|
||||
coordonnées, reason explicite, evidence pour audit).
|
||||
"""
|
||||
|
||||
found: bool
|
||||
target_x_pct: float
|
||||
target_y_pct: float
|
||||
anchor_x_pct: float
|
||||
anchor_y_pct: float
|
||||
confidence: float
|
||||
reason: str
|
||||
evidence: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
def _default_detector(screenshot_b64: str, label: str) -> Optional[Tuple[int, int]]:
|
||||
"""Détecteur OCR par défaut : rendu TTF + cv2.matchTemplate.
|
||||
|
||||
Reprend la logique de `ActionExecutorV1._find_text_on_screen`
|
||||
(executor.py:3277) sans dépendre de l'instance ActionExecutorV1
|
||||
(qui amène mss/pynput inutiles ici).
|
||||
"""
|
||||
try:
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
import cv2
|
||||
import numpy as np
|
||||
except ImportError:
|
||||
return None
|
||||
|
||||
if not label or not screenshot_b64:
|
||||
return None
|
||||
|
||||
try:
|
||||
img_bytes = base64.b64decode(screenshot_b64)
|
||||
img_array = np.frombuffer(img_bytes, dtype=np.uint8)
|
||||
screenshot_bgr = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
|
||||
if screenshot_bgr is None:
|
||||
return None
|
||||
gray = cv2.cvtColor(screenshot_bgr, cv2.COLOR_BGR2GRAY)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
font_paths = [
|
||||
"C:/Windows/Fonts/arial.ttf",
|
||||
"C:/Windows/Fonts/segoeui.ttf",
|
||||
"C:/Windows/Fonts/tahoma.ttf",
|
||||
"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
|
||||
"/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf",
|
||||
]
|
||||
|
||||
def _get_font(size: int):
|
||||
for fp in font_paths:
|
||||
try:
|
||||
return ImageFont.truetype(fp, size)
|
||||
except (OSError, IOError):
|
||||
continue
|
||||
return ImageFont.load_default()
|
||||
|
||||
best_match: Optional[Tuple[int, int]] = None
|
||||
best_val = 0.0
|
||||
threshold = 0.75
|
||||
|
||||
for font_size in (14, 16, 18, 20, 22, 24, 12, 26, 28, 10):
|
||||
font = _get_font(font_size)
|
||||
tmp = Image.new("L", (1, 1), 255)
|
||||
tmp_draw = ImageDraw.Draw(tmp)
|
||||
bbox = tmp_draw.textbbox((0, 0), label, font=font)
|
||||
text_w = bbox[2] - bbox[0] + 6
|
||||
text_h = bbox[3] - bbox[1] + 6
|
||||
if text_w <= 0 or text_h <= 0:
|
||||
continue
|
||||
if text_w >= gray.shape[1] or text_h >= gray.shape[0]:
|
||||
continue
|
||||
text_img = Image.new("L", (text_w, text_h), 255)
|
||||
draw = ImageDraw.Draw(text_img)
|
||||
draw.text((3, 3), label, fill=0, font=font)
|
||||
template = np.array(text_img)
|
||||
result = cv2.matchTemplate(gray, template, cv2.TM_CCOEFF_NORMED)
|
||||
_, max_val, _, max_loc = cv2.minMaxLoc(result)
|
||||
if max_val > best_val:
|
||||
best_val = max_val
|
||||
best_match = (
|
||||
max_loc[0] + template.shape[1] // 2,
|
||||
max_loc[1] + template.shape[0] // 2,
|
||||
)
|
||||
if max_val > 0.75:
|
||||
break
|
||||
|
||||
if best_match and best_val >= threshold:
|
||||
return best_match
|
||||
return None
|
||||
|
||||
|
||||
def _try_detect(
|
||||
detector: DetectorFn,
|
||||
screenshot_b64: str,
|
||||
labels: Any,
|
||||
) -> Tuple[Optional[Tuple[int, int]], str]:
|
||||
"""Essaye chaque label de la liste (ou string unique) jusqu'à un hit.
|
||||
|
||||
Retourne (position_px, label_qui_a_matche) ou (None, "").
|
||||
"""
|
||||
if isinstance(labels, str):
|
||||
labels_list = [labels]
|
||||
else:
|
||||
labels_list = list(labels or [])
|
||||
for label in labels_list:
|
||||
pos = detector(screenshot_b64, label)
|
||||
if pos:
|
||||
return pos, label
|
||||
return None, ""
|
||||
|
||||
|
||||
def _is_in_zone(
|
||||
x_norm: float,
|
||||
y_norm: float,
|
||||
geometry_hint: Dict[str, Any],
|
||||
) -> bool:
|
||||
"""Vérifie que (x_norm, y_norm) tombe dans la zone du geometry_hint."""
|
||||
min_x = float(geometry_hint.get("min_x_norm", 0.0))
|
||||
max_x = float(geometry_hint.get("max_x_norm", 1.0))
|
||||
min_y = float(geometry_hint.get("min_y_norm", 0.0))
|
||||
max_y = float(geometry_hint.get("max_y_norm", 1.0))
|
||||
return (min_x <= x_norm <= max_x) and (min_y <= y_norm <= max_y)
|
||||
|
||||
|
||||
def find_target_via_anchor(
|
||||
anchor_label: Any,
|
||||
target_label: str,
|
||||
geometry_hint: Dict[str, Any],
|
||||
screenshot_b64: str,
|
||||
screen_width: int,
|
||||
screen_height: int,
|
||||
detector: Optional[DetectorFn] = None,
|
||||
cross_check_target: bool = True,
|
||||
) -> AnchorMatch:
|
||||
"""Localise `target_label` par triangulation depuis `anchor_label`.
|
||||
|
||||
Args:
|
||||
anchor_label: label (str) ou liste de labels essayés dans l'ordre
|
||||
(ex. ["Annuler", "Cancel"] pour fallback FR→EN).
|
||||
target_label: libellé cible (ex. "Enregistrer"). Utilisé pour le
|
||||
cross-check uniquement.
|
||||
geometry_hint: dict décrivant la zone valide pour l'ancre et
|
||||
l'offset ancre→cible. Voir `anchor_catalog.ANCHOR_ENTRIES`
|
||||
pour le format exact.
|
||||
screenshot_b64: capture encodée base64 (JPEG/PNG).
|
||||
screen_width: largeur de référence en pixels (écran ou fenêtre).
|
||||
screen_height: hauteur de référence en pixels.
|
||||
detector: callable (b64, label) → (x_px, y_px) | None. Si None,
|
||||
utilise un détecteur OCR par défaut (rendu TTF + cv2).
|
||||
Pour les tests, injecter un mock.
|
||||
cross_check_target: si True (défaut), tente de détecter aussi
|
||||
`target_label` près de la position candidate et ajuste la
|
||||
confidence en conséquence.
|
||||
|
||||
Returns:
|
||||
AnchorMatch toujours retourné (jamais None). `found=False` si
|
||||
l'ancre n'est pas trouvée ou hors zone ; `reason` explique.
|
||||
"""
|
||||
det = detector or _default_detector
|
||||
ev: Dict[str, Any] = {
|
||||
"anchor_candidates_tried": (
|
||||
list(anchor_label) if not isinstance(anchor_label, str) else [anchor_label]
|
||||
),
|
||||
"target_label": target_label,
|
||||
"geometry_hint": geometry_hint,
|
||||
}
|
||||
|
||||
# 1. Détection ancre (FR puis EN)
|
||||
anchor_px, matched_anchor_label = _try_detect(det, screenshot_b64, anchor_label)
|
||||
if not anchor_px:
|
||||
return AnchorMatch(
|
||||
found=False,
|
||||
target_x_pct=0.0,
|
||||
target_y_pct=0.0,
|
||||
anchor_x_pct=0.0,
|
||||
anchor_y_pct=0.0,
|
||||
confidence=0.0,
|
||||
reason="anchor_not_found",
|
||||
evidence=ev,
|
||||
)
|
||||
|
||||
ax, ay = anchor_px
|
||||
anchor_x_pct = ax / float(screen_width) if screen_width else 0.0
|
||||
anchor_y_pct = ay / float(screen_height) if screen_height else 0.0
|
||||
ev["anchor_matched_label"] = matched_anchor_label
|
||||
ev["anchor_px"] = [ax, ay]
|
||||
ev["anchor_norm"] = [anchor_x_pct, anchor_y_pct]
|
||||
|
||||
# 2. Garde géométrique : ancre dans la zone autorisée
|
||||
if not _is_in_zone(anchor_x_pct, anchor_y_pct, geometry_hint):
|
||||
return AnchorMatch(
|
||||
found=False,
|
||||
target_x_pct=0.0,
|
||||
target_y_pct=0.0,
|
||||
anchor_x_pct=anchor_x_pct,
|
||||
anchor_y_pct=anchor_y_pct,
|
||||
confidence=0.0,
|
||||
reason="anchor_out_of_zone",
|
||||
evidence=ev,
|
||||
)
|
||||
|
||||
# 3. Déduction position cible par offset
|
||||
offset = geometry_hint.get("offset_from_anchor", {}) or {}
|
||||
dx = int(offset.get("x_px", 0))
|
||||
dy = int(offset.get("y_px", 0))
|
||||
target_x_px = ax + dx
|
||||
target_y_px = ay + dy
|
||||
target_x_pct = target_x_px / float(screen_width) if screen_width else 0.0
|
||||
target_y_pct = target_y_px / float(screen_height) if screen_height else 0.0
|
||||
ev["target_px_from_offset"] = [target_x_px, target_y_px]
|
||||
|
||||
if not (0.0 <= target_x_pct <= 1.0 and 0.0 <= target_y_pct <= 1.0):
|
||||
return AnchorMatch(
|
||||
found=False,
|
||||
target_x_pct=target_x_pct,
|
||||
target_y_pct=target_y_pct,
|
||||
anchor_x_pct=anchor_x_pct,
|
||||
anchor_y_pct=anchor_y_pct,
|
||||
confidence=0.0,
|
||||
reason="target_out_of_bounds",
|
||||
evidence=ev,
|
||||
)
|
||||
|
||||
# 4. Cross-check : tenter de détecter target_label
|
||||
confidence = 0.5 # ancre seule
|
||||
reason = "anchor_only"
|
||||
if cross_check_target and target_label:
|
||||
target_pos = det(screenshot_b64, target_label)
|
||||
if target_pos:
|
||||
tx, ty = target_pos
|
||||
dist_px = ((tx - target_x_px) ** 2 + (ty - target_y_px) ** 2) ** 0.5
|
||||
ev["target_detected_px"] = [tx, ty]
|
||||
ev["target_cross_check_dist_px"] = round(dist_px, 1)
|
||||
# Tolerance proche de l'offset (cf. design 2200 §3.2)
|
||||
if dist_px <= 50:
|
||||
# Cross-check OK : on raffine sur la position détectée
|
||||
target_x_px, target_y_px = tx, ty
|
||||
target_x_pct = tx / float(screen_width) if screen_width else 0.0
|
||||
target_y_pct = ty / float(screen_height) if screen_height else 0.0
|
||||
confidence = 0.85
|
||||
reason = "anchor_plus_target_cross_check"
|
||||
else:
|
||||
# target_label détecté mais loin de l'offset attendu : suspect.
|
||||
# On garde la position offset mais on dégrade confidence.
|
||||
confidence = 0.4
|
||||
reason = "anchor_ok_target_drift_high"
|
||||
else:
|
||||
# Cross-check absent : comportement documenté (cf. test 7).
|
||||
# On garde la position offset mais confidence reste à 0.5.
|
||||
ev["target_cross_check_dist_px"] = None
|
||||
reason = "anchor_only_target_not_visible"
|
||||
|
||||
return AnchorMatch(
|
||||
found=True,
|
||||
target_x_pct=target_x_pct,
|
||||
target_y_pct=target_y_pct,
|
||||
anchor_x_pct=anchor_x_pct,
|
||||
anchor_y_pct=anchor_y_pct,
|
||||
confidence=confidence,
|
||||
reason=reason,
|
||||
evidence=ev,
|
||||
)
|
||||
317
tests/unit/test_anchor_relative.py
Normal file
317
tests/unit/test_anchor_relative.py
Normal file
@@ -0,0 +1,317 @@
|
||||
"""Tests unitaires anchor_relative — Phase 1 standalone.
|
||||
|
||||
Couvre :
|
||||
- catalog : match sur titre Save As, no-match sur titre random.
|
||||
- find_target_via_anchor : ancre absente, ancre hors zone, ancre OK
|
||||
bas-droite + offset, cross-check target OK, cross-check absent.
|
||||
|
||||
Tous les tests utilisent un détecteur mocké : pas d'OCR réel, pas
|
||||
d'image, pas de dépendance externe (cv2/PIL). Exécution ~10ms total.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, Optional, Tuple
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
_ROOT = str(Path(__file__).resolve().parents[2])
|
||||
if _ROOT not in sys.path:
|
||||
sys.path.insert(0, _ROOT)
|
||||
|
||||
|
||||
from agent_v0.agent_v1.core.anchor_catalog import ( # noqa: E402
|
||||
ANCHOR_ENTRIES,
|
||||
find_entry_for_title,
|
||||
)
|
||||
from agent_v0.agent_v1.core.anchor_relative import ( # noqa: E402
|
||||
AnchorMatch,
|
||||
find_target_via_anchor,
|
||||
)
|
||||
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers — fabrique de détecteurs mockés
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_detector(positions: Dict[str, Optional[Tuple[int, int]]]):
|
||||
"""Construit un détecteur fake : label → position fixe (ou None)."""
|
||||
|
||||
def _det(_screenshot_b64: str, label: str) -> Optional[Tuple[int, int]]:
|
||||
return positions.get(label)
|
||||
|
||||
return _det
|
||||
|
||||
|
||||
# Geometry typique Save As Notepad : ancre attendue bas-droite,
|
||||
# offset -100 px à gauche, écran 1920x1080.
|
||||
_SAVE_AS_GEOMETRY = {
|
||||
"region": "bottom-right",
|
||||
"min_x_norm": 0.55,
|
||||
"min_y_norm": 0.75,
|
||||
"max_x_norm": 1.0,
|
||||
"max_y_norm": 1.0,
|
||||
"offset_from_anchor": {"x_px": -100, "y_px": 0},
|
||||
}
|
||||
|
||||
_SCREEN_W, _SCREEN_H = 1920, 1080
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Catalog
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_catalog_match_save_as_title():
|
||||
"""Le titre 'Enregistrer sous' matche l'entrée notepad_save_as."""
|
||||
entry = find_entry_for_title("Enregistrer sous")
|
||||
assert entry is not None
|
||||
assert entry["id"] == "notepad_save_as_enregistrer"
|
||||
assert "Annuler" in entry["anchor_label"]
|
||||
assert entry["target_label"] == "Enregistrer"
|
||||
|
||||
# Insensible à la casse + substring (vrai titre Win11)
|
||||
entry2 = find_entry_for_title("*test - Bloc-notes — Enregistrer sous")
|
||||
assert entry2 is not None
|
||||
assert entry2["id"] == "notepad_save_as_enregistrer"
|
||||
|
||||
|
||||
def test_catalog_no_match_random_title():
|
||||
"""Un titre inconnu retourne None, pas d'exception."""
|
||||
assert find_entry_for_title("Firefox - Mozilla") is None
|
||||
assert find_entry_for_title("") is None
|
||||
assert find_entry_for_title(None) is None # tolérance
|
||||
|
||||
|
||||
def test_catalog_entries_are_well_formed():
|
||||
"""Chaque entrée du catalog a les champs requis (garde anti-régression)."""
|
||||
required = {"id", "title_patterns", "anchor_label", "target_label", "geometry_hint"}
|
||||
for entry in ANCHOR_ENTRIES:
|
||||
missing = required - set(entry.keys())
|
||||
assert not missing, f"entry {entry.get('id')} missing fields: {missing}"
|
||||
gh = entry["geometry_hint"]
|
||||
assert "offset_from_anchor" in gh
|
||||
for key in ("min_x_norm", "max_x_norm", "min_y_norm", "max_y_norm"):
|
||||
assert key in gh, f"{entry['id']}.geometry_hint missing {key}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# find_target_via_anchor — cas d'erreur
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_find_target_anchor_absent_returns_not_found():
|
||||
"""Ancre OCR pas trouvée → found=False, reason='anchor_not_found'."""
|
||||
det = _make_detector({}) # rien ne matche
|
||||
result = find_target_via_anchor(
|
||||
anchor_label=["Annuler", "Cancel"],
|
||||
target_label="Enregistrer",
|
||||
geometry_hint=_SAVE_AS_GEOMETRY,
|
||||
screenshot_b64="fake_b64",
|
||||
screen_width=_SCREEN_W,
|
||||
screen_height=_SCREEN_H,
|
||||
detector=det,
|
||||
)
|
||||
assert isinstance(result, AnchorMatch)
|
||||
assert result.found is False
|
||||
assert result.reason == "anchor_not_found"
|
||||
assert result.confidence == 0.0
|
||||
assert result.target_x_pct == 0.0
|
||||
assert result.target_y_pct == 0.0
|
||||
assert "anchor_candidates_tried" in result.evidence
|
||||
|
||||
|
||||
def test_find_target_anchor_out_of_geometry_zone():
|
||||
"""Ancre trouvée mais hors zone bas-droite → found=False, reason explicite."""
|
||||
# "Annuler" détecté en haut-gauche (faux positif probable du menu Fichier)
|
||||
det = _make_detector({"Annuler": (100, 50)})
|
||||
result = find_target_via_anchor(
|
||||
anchor_label=["Annuler", "Cancel"],
|
||||
target_label="Enregistrer",
|
||||
geometry_hint=_SAVE_AS_GEOMETRY,
|
||||
screenshot_b64="fake_b64",
|
||||
screen_width=_SCREEN_W,
|
||||
screen_height=_SCREEN_H,
|
||||
detector=det,
|
||||
)
|
||||
assert result.found is False
|
||||
assert result.reason == "anchor_out_of_zone"
|
||||
# L'ancre a été localisée → on remonte sa position pour debug
|
||||
assert result.anchor_x_pct == pytest.approx(100 / _SCREEN_W)
|
||||
assert result.anchor_y_pct == pytest.approx(50 / _SCREEN_H)
|
||||
assert result.evidence.get("anchor_matched_label") == "Annuler"
|
||||
|
||||
|
||||
def test_find_target_offset_out_of_bounds_returns_not_found():
|
||||
"""Offset qui sort de l'ecran → pas de candidat cliquable."""
|
||||
det = _make_detector({"Annuler": (1700, 900)})
|
||||
geometry = {
|
||||
**_SAVE_AS_GEOMETRY,
|
||||
"offset_from_anchor": {"x_px": 400, "y_px": 0},
|
||||
}
|
||||
result = find_target_via_anchor(
|
||||
anchor_label=["Annuler", "Cancel"],
|
||||
target_label="Enregistrer",
|
||||
geometry_hint=geometry,
|
||||
screenshot_b64="fake_b64",
|
||||
screen_width=_SCREEN_W,
|
||||
screen_height=_SCREEN_H,
|
||||
detector=det,
|
||||
)
|
||||
assert result.found is False
|
||||
assert result.reason == "target_out_of_bounds"
|
||||
assert result.target_x_pct > 1.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# find_target_via_anchor — cas nominaux
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_find_target_bottom_right_anchor_with_offset_returns_candidate():
|
||||
"""Ancre dans la zone + offset → coordonnées candidate correctes."""
|
||||
# "Annuler" à (1600, 900), pas de target visible (cross_check absent).
|
||||
det = _make_detector({"Annuler": (1600, 900)})
|
||||
result = find_target_via_anchor(
|
||||
anchor_label=["Annuler", "Cancel"],
|
||||
target_label="Enregistrer",
|
||||
geometry_hint=_SAVE_AS_GEOMETRY,
|
||||
screenshot_b64="fake_b64",
|
||||
screen_width=_SCREEN_W,
|
||||
screen_height=_SCREEN_H,
|
||||
detector=det,
|
||||
cross_check_target=True,
|
||||
)
|
||||
assert result.found is True
|
||||
# Target attendu : 1600 - 100 = 1500 px → 1500/1920 = 0.78125
|
||||
assert result.target_x_pct == pytest.approx(1500 / _SCREEN_W)
|
||||
assert result.target_y_pct == pytest.approx(900 / _SCREEN_H)
|
||||
assert result.anchor_x_pct == pytest.approx(1600 / _SCREEN_W)
|
||||
# Cross-check tenté mais absent → confidence reste à 0.5
|
||||
assert result.confidence == pytest.approx(0.5)
|
||||
|
||||
|
||||
def test_find_target_anchor_fallback_en_when_fr_absent():
|
||||
"""Fallback EN ('Cancel') quand 'Annuler' absent."""
|
||||
det = _make_detector({"Cancel": (1700, 950)})
|
||||
result = find_target_via_anchor(
|
||||
anchor_label=["Annuler", "Cancel"],
|
||||
target_label="Save",
|
||||
geometry_hint=_SAVE_AS_GEOMETRY,
|
||||
screenshot_b64="fake_b64",
|
||||
screen_width=_SCREEN_W,
|
||||
screen_height=_SCREEN_H,
|
||||
detector=det,
|
||||
cross_check_target=False,
|
||||
)
|
||||
assert result.found is True
|
||||
assert result.evidence["anchor_matched_label"] == "Cancel"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# find_target_via_anchor — cross-check
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_find_target_with_cross_check_target_visible_high_confidence():
|
||||
"""Cross-check target_label OK proche de l'offset → confidence > 0.8."""
|
||||
# "Annuler" à (1600, 900), "Enregistrer" à (1505, 902) — très proche de
|
||||
# la position calculée (1500, 900), dist ~ 5 px.
|
||||
det = _make_detector({
|
||||
"Annuler": (1600, 900),
|
||||
"Enregistrer": (1505, 902),
|
||||
})
|
||||
result = find_target_via_anchor(
|
||||
anchor_label=["Annuler", "Cancel"],
|
||||
target_label="Enregistrer",
|
||||
geometry_hint=_SAVE_AS_GEOMETRY,
|
||||
screenshot_b64="fake_b64",
|
||||
screen_width=_SCREEN_W,
|
||||
screen_height=_SCREEN_H,
|
||||
detector=det,
|
||||
cross_check_target=True,
|
||||
)
|
||||
assert result.found is True
|
||||
assert result.confidence > 0.8
|
||||
assert result.reason == "anchor_plus_target_cross_check"
|
||||
# Position raffinée sur la détection target
|
||||
assert result.target_x_pct == pytest.approx(1505 / _SCREEN_W)
|
||||
assert result.target_y_pct == pytest.approx(902 / _SCREEN_H)
|
||||
assert result.evidence["target_cross_check_dist_px"] < 10
|
||||
|
||||
|
||||
def test_find_target_cross_check_absent_documented_behaviour():
|
||||
"""Cross-check absent → comportement documenté : found=True, confidence=0.5.
|
||||
|
||||
Décision design (cf. design unifié §3, confidence.anchor_only=0.5) :
|
||||
on retourne quand même un candidat utilisable, mais avec confidence
|
||||
réduite, pour que l'orchestrateur (Phase 2) puisse décider de
|
||||
demander supervision.
|
||||
"""
|
||||
det = _make_detector({"Annuler": (1600, 900)}) # target absent
|
||||
result = find_target_via_anchor(
|
||||
anchor_label=["Annuler", "Cancel"],
|
||||
target_label="Enregistrer",
|
||||
geometry_hint=_SAVE_AS_GEOMETRY,
|
||||
screenshot_b64="fake_b64",
|
||||
screen_width=_SCREEN_W,
|
||||
screen_height=_SCREEN_H,
|
||||
detector=det,
|
||||
cross_check_target=True,
|
||||
)
|
||||
assert result.found is True
|
||||
assert result.confidence == pytest.approx(0.5)
|
||||
assert result.reason == "anchor_only_target_not_visible"
|
||||
assert result.evidence.get("target_cross_check_dist_px") is None
|
||||
|
||||
|
||||
def test_find_target_cross_check_target_far_degrades_confidence():
|
||||
"""Target détecté mais loin de l'offset attendu → confidence dégradée."""
|
||||
# "Annuler" à (1600, 900). Offset attendu cible = (1500, 900).
|
||||
# "Enregistrer" détecté à (300, 200) — clairement un faux positif (menu).
|
||||
det = _make_detector({
|
||||
"Annuler": (1600, 900),
|
||||
"Enregistrer": (300, 200),
|
||||
})
|
||||
result = find_target_via_anchor(
|
||||
anchor_label=["Annuler", "Cancel"],
|
||||
target_label="Enregistrer",
|
||||
geometry_hint=_SAVE_AS_GEOMETRY,
|
||||
screenshot_b64="fake_b64",
|
||||
screen_width=_SCREEN_W,
|
||||
screen_height=_SCREEN_H,
|
||||
detector=det,
|
||||
cross_check_target=True,
|
||||
)
|
||||
assert result.found is True
|
||||
assert result.confidence < 0.5
|
||||
assert result.reason == "anchor_ok_target_drift_high"
|
||||
# On garde la position offset, pas celle du faux positif
|
||||
assert result.target_x_pct == pytest.approx(1500 / _SCREEN_W)
|
||||
|
||||
|
||||
def test_find_target_cross_check_disabled_returns_anchor_only_confidence():
|
||||
"""cross_check_target=False → confidence=0.5, reason='anchor_only'."""
|
||||
det = _make_detector({"Annuler": (1600, 900)})
|
||||
result = find_target_via_anchor(
|
||||
anchor_label="Annuler", # accepte aussi un string simple
|
||||
target_label="Enregistrer",
|
||||
geometry_hint=_SAVE_AS_GEOMETRY,
|
||||
screenshot_b64="fake_b64",
|
||||
screen_width=_SCREEN_W,
|
||||
screen_height=_SCREEN_H,
|
||||
detector=det,
|
||||
cross_check_target=False,
|
||||
)
|
||||
assert result.found is True
|
||||
assert result.confidence == pytest.approx(0.5)
|
||||
assert result.reason == "anchor_only"
|
||||
# Détecteur n'a été appelé QUE pour l'ancre (1 hit), pas pour la cible
|
||||
assert "target_detected_px" not in result.evidence
|
||||
Reference in New Issue
Block a user