Files
rpa_vision_v3/tests/unit/test_text_mismatch_empty_observed.py
Dom 7df51d2c79 snapshot: WIP 5j replay reliability (B1 watchdog + dialog handlers + grounding drift)
Snapshot avant correction du blocage relance Léa (3 incidents 24h: SSH refusé,
polls morts ×2). Point de rollback stable.

Contenu:
- agent_v1/core/executor.py: 5 patchs dialog handling (saveas drift, close_tab
  hotkey fallback, confirm_save Unicode apostrophe, foreground dialog
  recontextualization, runtime_dialog in-loop) + helpers normalize_window_hint,
  requires_post_verify_window_transition
- agent_v1/core/grounding.py: garde drift template fix (fallback_x/y plumbed)
- server_v1/replay_watchdog.py (NEW): orphan watchdog B1, scan 10s timeout 30s
- server_v1/api_stream.py: dispatched_action plumbing, watchdog lifespan,
  metrics endpoint
- server_v1/replay_engine.py: _schedule_retry préserve original_action +
  dispatched_action
- stream_processor.py: gardes _infer_tab_switch_target (no false switch_tab
  on save_as dialog open) + _attach_expected_window_before
- tests/integration: test_replay_watchdog.py (8 cas), test_stream_processor.py
- tests/unit: test_executor_verify_window_guard.py (start_button, close_tab,
  runtime_dialog, post_verify, transition fallbacks)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-24 16:48:37 +02:00

84 lines
3.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Tests pour `_should_reject_on_text_mismatch` — patch 2026-05-23 :
distinguer `observed=''` (OCR n'a rien lu, ambigu) de `observed='X'`
(autre texte lu = mismatch confirmé) dans le pré-check OCR.
Brief Codex 2026-05-23 08:55 : le crop bbox SoM précis (50 × 48 px)
sur un onglet Notepad moderne donne `observed=''` car EasyOCR n'a pas
suffisamment de signal (texte peu contrasté, zone trop petite). Le
patch précédent rejetait ce cas comme mismatch — alors qu'aucune
preuve d'un mauvais clic n'existe. On ne rejette plus que quand l'OCR
a effectivement lu autre chose que la cible attendue.
Le faux succès OBS Studio reste bloqué : (1) son OCR retournait
`'ue audio disponible GUI OBS Studio…'` = non-vide → rejet conservé ;
(2) la garde drift agent posée sur ANCHOR-TM bloque déjà ce match.
"""
from __future__ import annotations
import sys
from pathlib import Path
ROOT = Path(__file__).parent.parent.parent
sys.path.insert(0, str(ROOT))
from agent_v0.server_v1.resolve_engine import ( # noqa: E402
_should_reject_on_text_mismatch,
)
class TestShouldRejectOnTextMismatch:
def test_valid_passes(self):
"""Cas nominal : OCR a vu la cible → on ne rejette pas."""
assert not _should_reject_on_text_mismatch(
is_valid=True, observed="Enregistrer sous",
)
def test_invalid_with_text_rejects(self):
"""Cas 0745 historique : OCR voit '9 ?' qui ne matche pas
'Enregistrer sous' → rejet confirmé."""
assert _should_reject_on_text_mismatch(
is_valid=False, observed="9 ?",
)
def test_invalid_with_obs_studio_rejects(self):
"""Cas 0756 : OCR voit du texte OBS Studio → rejet confirmé."""
assert _should_reject_on_text_mismatch(
is_valid=False, observed="ue audio disponible GUI OBS Studio",
)
def test_invalid_with_empty_observed_does_not_reject(self):
"""Cas 0855 : OCR n'a rien lu (zone trop petite/peu contrastée)
→ ambigu, pas un mismatch confirmé. On préserve la résolution
serveur — la garde drift agent protège en aval."""
assert not _should_reject_on_text_mismatch(
is_valid=False, observed="",
)
def test_invalid_with_whitespace_only_does_not_reject(self):
"""Espace seul = équivalent vide pour notre logique."""
assert not _should_reject_on_text_mismatch(
is_valid=False, observed=" ",
)
def test_invalid_with_newline_only_does_not_reject(self):
assert not _should_reject_on_text_mismatch(
is_valid=False, observed="\n\t",
)
def test_invalid_with_none_observed_does_not_reject(self):
"""Robustesse : observed None (cas dégénéré OCR-lib absente)
ne doit pas planter."""
assert not _should_reject_on_text_mismatch(
is_valid=False, observed=None,
)
def test_valid_with_empty_passes(self):
"""is_valid=True avec observed vide — ne peut normalement pas
arriver via _text_match_fuzzy (qui retourne False sur vide)
mais on garde la logique cohérente : si is_valid=True, on
ne rejette pas, peu importe observed."""
assert not _should_reject_on_text_mismatch(
is_valid=True, observed="",
)