feat(validator): R1 MVP P0 — OcrRoiChecker + orchestrator (flag OFF default)

Package core/validation/ minimal :
- result.py : Verdict, FailureCategory, ValidationResult
- pixel_diff_checker.py : wrapper de ReplayVerifier.verify_action
- ocr_roi_checker.py : ROI 80px autour du clic, détecte WRONG_APPLICATION
  via SUSPECT_TOKENS (edge/https/explorateur de fichiers/…)
- orchestrator.py : Validator dispatch action_type → checkers + agrégation

Wiring api_stream.py:3646 derrière RPA_VALIDATOR_V2_ENABLED (OFF par défaut).
Si verdict ≠ COMPLETE, override report.success=False et expose failure_category
dans result_entry. Zero régression flag OFF.

Tests :
- tests/unit/test_validator_v2.py : 13 tests (Checkers + Validator + sérialisation)
- tests/integration/test_validator_step10.py : 2 tests reproduisant le bug
  replay_sess_4c38dbb8 / act_raw_6c1432b3 (clic Enregistrer fait basculer
  vers Explorateur de fichiers) — Validator retourne WRONG_APPLICATION

Activation pour test live : RPA_VALIDATOR_V2_ENABLED=true

Cf. docs/recherche/SPEC_VALIDATOR_MATRICE.md, AXE_B2_DEEP_VALIDATOR.md.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dom
2026-05-24 17:52:06 +02:00
parent bd100bc538
commit 1b4e64960b
8 changed files with 851 additions and 0 deletions

View File

@@ -0,0 +1,130 @@
"""Repro offline du bug fonctionnel : replay_sess_4c38dbb8 / act_raw_6c1432b3.
L'agent rapporte success=True après avoir cliqué sur le bouton "Enregistrer"
du dialog "Enregistrer sous", mais la fenêtre active après le clic est
"rpa_vision : Explorateur de fichiers" — l'app a basculé hors du Bloc-notes.
Le Validator MVP P0 doit attribuer failure_category=WRONG_APPLICATION via
OcrRoiChecker (token suspect 'explorateur de fichiers' dans la ROI) et donc
override success → False.
Stratégie de fixture :
- screenshot_after synthétique : 800×600 avec "rpa_vision : Explorateur de fichiers"
au centre (= bug observé : la fenêtre est passée à l'Explorateur).
- screenshot_before : dialog "Enregistrer sous" (texte centré).
- action : click_anchor sur "Enregistrer" au centre (x_pct=0.5, y_pct=0.5).
- OCR injecté : fake qui retourne le texte du screenshot_after.
"""
from __future__ import annotations
import base64
import io
import pytest
pytestmark = [pytest.mark.integration]
def _png_b64(img) -> str:
buf = io.BytesIO()
img.save(buf, format="PNG")
return base64.b64encode(buf.getvalue()).decode("ascii")
def _make_screenshot(text: str, color=(245, 245, 245), size=(1920, 1080)):
"""Screenshot 1920x1080 avec un texte centré (visible dans la ROI 80px)."""
from PIL import Image, ImageDraw
img = Image.new("RGB", size, color=color)
draw = ImageDraw.Draw(img)
cx, cy = size[0] // 2, size[1] // 2
draw.text((cx - 200, cy - 8), text, fill=(0, 0, 0))
return img
@pytest.fixture
def bug_step10_fixtures():
"""Reproduit la situation act_raw_6c1432b3 sans OCR réel.
L'OCR est mocké pour retourner ce que verrait EasyOCR sur le screenshot after.
"""
before = _png_b64(_make_screenshot("Enregistrer sous"))
after = _png_b64(_make_screenshot("rpa_vision : Explorateur de fichiers"))
action = {
"type": "click",
"action_id": "act_raw_6c1432b3",
"by_text": "Enregistrer",
"target_spec": {
"by_text": "Enregistrer",
"window_title": "Enregistrer sous",
},
# Position normalisée au centre du screen (où le bouton "Enregistrer"
# était attendu d'après replay_sess_4c38dbb8.failures.jsonl)
"x_pct": 0.5289,
"y_pct": 0.7913,
}
# L'agent rapporte success=True (c'est le bug : pixel-diff legacy ne discrimine pas)
result = {
"success": True,
"actual_position": {"x_pct": 0.5289, "y_pct": 0.7913},
}
return before, after, action, result
def test_validator_detects_wrong_application_on_act_raw_6c1432b3(bug_step10_fixtures):
"""Le Validator doit retourner WRONG_APPLICATION malgré success=True client."""
from core.validation import OcrRoiChecker, Validator, Verdict, FailureCategory
before, after, action, result = bug_step10_fixtures
# OCR fake : on simule que EasyOCR lit dans la ROI le titre de la fenêtre
# active après le clic (l'Explorateur de fichiers a pris le focus).
def fake_ocr(crop):
# On suppose que la ROI 80×80 autour du clic au milieu-bas tombe
# sur la zone du texte. Pour le test, on retourne directement le
# texte qui ferait foi.
return "rpa_vision : Explorateur de fichiers"
ocr_click = OcrRoiChecker(ocr_fn=fake_ocr, radius_px=80)
# Construit le même Validator que api_stream._get_validator_v2()
validator = Validator(checkers={"click": [ocr_click]})
vr = validator.validate(
action=action,
result=result,
screenshot_before=before,
screenshot_after=after,
context={},
)
# Verdict attendu : TERMINATE / WRONG_APPLICATION (token 'explorateur de fichiers')
assert vr.verdict == Verdict.TERMINATE, (
f"Verdict attendu TERMINATE, obtenu {vr.verdict} (reasoning={vr.reasoning})"
)
assert vr.failure_category == FailureCategory.WRONG_APPLICATION
assert vr.confidence >= 0.85
assert "explorateur" in vr.reasoning.lower() or "explorateur" in vr.raw_evidence.get("roi_text", "").lower()
def test_validator_complete_when_correct_window_active(bug_step10_fixtures):
"""Sanity : si l'OCR voit bien 'Enregistrer' dans la ROI, le verdict est COMPLETE."""
from core.validation import OcrRoiChecker, Validator, Verdict
before, after_bad, action, result = bug_step10_fixtures
after_good = _png_b64(_make_screenshot("Document enregistre - Bloc-notes"))
def fake_ocr(crop):
return "Bouton Enregistrer cliqué — Bloc-notes"
validator = Validator(
checkers={"click": [OcrRoiChecker(ocr_fn=fake_ocr, radius_px=80)]},
)
vr = validator.validate(
action=action,
result=result,
screenshot_before=before,
screenshot_after=_png_b64(_make_screenshot("après save Bloc-notes")),
context={},
)
assert vr.verdict == Verdict.COMPLETE
assert vr.failure_category is None

View File

@@ -0,0 +1,249 @@
"""Tests unitaires du Validator V2 (P0 MVP).
Cf. SPEC_VALIDATOR_MATRICE.md, AXE_B2_DEEP_VALIDATOR.md.
"""
from __future__ import annotations
import base64
import io
from pathlib import Path
import pytest
pytestmark = pytest.mark.unit
def _png_b64(img) -> str:
"""Encode une image PIL en base64 (préfixé iVBOR pour _load_single_image)."""
buf = io.BytesIO()
img.save(buf, format="PNG")
return base64.b64encode(buf.getvalue()).decode("ascii")
def _make_image(text: str = "", color=(255, 255, 255), size=(800, 600)):
"""Crée une image PIL 800x600 avec du texte centré (pour OCR fake)."""
from PIL import Image, ImageDraw
img = Image.new("RGB", size, color=color)
if text:
draw = ImageDraw.Draw(img)
draw.text((size[0] // 2 - 100, size[1] // 2), text, fill=(0, 0, 0))
return img
# ----------------------------------------------------------------------------
# PixelDiffChecker
# ----------------------------------------------------------------------------
class _FakeVerifierResult:
def __init__(self, suggestion, changes_detected, confidence, detail="",
change_area_pct=0.0, local_change_pct=0.0):
self.suggestion = suggestion
self.changes_detected = changes_detected
self.confidence = confidence
self.detail = detail
self.change_area_pct = change_area_pct
self.local_change_pct = local_change_pct
class _FakeReplayVerifier:
def __init__(self, result):
self._r = result
def verify_action(self, action, result, screenshot_before, screenshot_after):
return self._r
def test_pixel_diff_complete_when_changes_detected():
from core.validation import PixelDiffChecker, Verdict
rv = _FakeReplayVerifier(_FakeVerifierResult(
suggestion="continue", changes_detected=True, confidence=0.85,
detail="pixels changés"
))
checker = PixelDiffChecker(rv)
res = checker.check({"type": "click"}, {"success": True}, "x", "y", {})
assert res.verdict == Verdict.COMPLETE
assert res.confidence == pytest.approx(0.85)
assert res.check_used == "pixel_diff"
def test_pixel_diff_continue_when_retry_suggested():
from core.validation import PixelDiffChecker, Verdict, FailureCategory
rv = _FakeReplayVerifier(_FakeVerifierResult(
suggestion="retry", changes_detected=False, confidence=0.7,
detail="aucun changement"
))
res = PixelDiffChecker(rv).check({"type": "click"}, {"success": True}, "x", "y", {})
assert res.verdict == Verdict.CONTINUE
assert res.failure_category == FailureCategory.NO_VISUAL_CHANGE
def test_pixel_diff_handles_internal_exception():
from core.validation import PixelDiffChecker, Verdict
class _BadVerifier:
def verify_action(self, **kw):
raise RuntimeError("boom")
res = PixelDiffChecker(_BadVerifier()).check(
{"type": "click"}, {"success": True}, "x", "y", {}
)
assert res.verdict == Verdict.CONTINUE
assert "boom" in res.reasoning
# ----------------------------------------------------------------------------
# OcrRoiChecker — avec ocr_fn injecté (pas d'EasyOCR ici)
# ----------------------------------------------------------------------------
def test_ocr_roi_detects_wrong_application_suspect_token():
"""Bug step 10 : token 'edge' / 'explorateur de fichiers' dans ROI = WRONG_APPLICATION."""
from core.validation import OcrRoiChecker, Verdict, FailureCategory
img = _make_image()
img_b64 = _png_b64(img)
# OCR fake retourne un titre Explorateur de fichiers alors qu'on attendait "Enregistrer"
checker = OcrRoiChecker(ocr_fn=lambda _crop: "rpa_vision : Explorateur de fichiers")
res = checker.check(
action={"type": "click", "by_text": "Enregistrer", "x_pct": 0.5, "y_pct": 0.5},
result={"success": True, "actual_position": {"x_pct": 0.5, "y_pct": 0.5}},
screenshot_before=None,
screenshot_after=img_b64,
context={},
)
assert res.verdict == Verdict.TERMINATE
assert res.failure_category == FailureCategory.WRONG_APPLICATION
assert res.confidence >= 0.85
def test_ocr_roi_complete_when_expected_text_in_roi():
from core.validation import OcrRoiChecker, Verdict
img_b64 = _png_b64(_make_image())
checker = OcrRoiChecker(ocr_fn=lambda _: "Bouton Enregistrer actif")
res = checker.check(
action={"type": "click", "by_text": "Enregistrer", "x_pct": 0.5, "y_pct": 0.5},
result={"success": True},
screenshot_before=None,
screenshot_after=img_b64,
context={},
)
assert res.verdict == Verdict.COMPLETE
assert res.confidence >= 0.85
def test_ocr_roi_ocr_text_missing_when_no_match():
from core.validation import OcrRoiChecker, Verdict, FailureCategory
img_b64 = _png_b64(_make_image())
checker = OcrRoiChecker(ocr_fn=lambda _: "texte sans rapport")
res = checker.check(
action={"type": "click", "by_text": "Enregistrer", "x_pct": 0.5, "y_pct": 0.5},
result={"success": True},
screenshot_before=None,
screenshot_after=img_b64,
context={},
)
assert res.verdict == Verdict.CONTINUE
assert res.failure_category == FailureCategory.OCR_TEXT_MISSING
def test_ocr_roi_missing_coords_returns_continue():
from core.validation import OcrRoiChecker, Verdict
img_b64 = _png_b64(_make_image())
checker = OcrRoiChecker(ocr_fn=lambda _: "")
res = checker.check(
action={"type": "click", "by_text": "Enregistrer"}, # no coords
result={"success": True},
screenshot_before=None,
screenshot_after=img_b64,
context={},
)
assert res.verdict == Verdict.CONTINUE
assert "ROI indéfinie" in res.reasoning
# ----------------------------------------------------------------------------
# Validator orchestrator
# ----------------------------------------------------------------------------
class _FakeChecker:
def __init__(self, name, verdict, conf, failure_category=None):
from core.validation.result import ValidationResult
self.name = name
self._res = ValidationResult(
verdict=verdict, confidence=conf, check_used=name, elapsed_ms=1.0,
failure_category=failure_category, reasoning=f"fake {name}",
)
def check(self, action, result, sb, sa, ctx):
return self._res
def test_validator_terminate_high_conf_short_circuits():
from core.validation import Validator, Verdict, FailureCategory
bad = _FakeChecker("ocr_roi", Verdict.TERMINATE, 0.9,
FailureCategory.WRONG_APPLICATION)
never = _FakeChecker("pixel", Verdict.COMPLETE, 0.99)
v = Validator(checkers={"click": [bad, never]})
res = v.validate({"type": "click"}, {"success": True})
assert res.verdict == Verdict.TERMINATE
assert res.failure_category == FailureCategory.WRONG_APPLICATION
assert res.check_used == "ocr_roi"
def test_validator_complete_high_conf_short_circuits():
from core.validation import Validator, Verdict
ok = _FakeChecker("ocr_roi", Verdict.COMPLETE, 0.95)
v = Validator(checkers={"click": [ok, _FakeChecker("pixel", Verdict.CONTINUE, 0.3)]})
res = v.validate({"type": "click"}, {"success": True})
assert res.verdict == Verdict.COMPLETE
assert res.check_used == "ocr_roi"
def test_validator_falls_back_to_default_checkers():
from core.validation import Validator, Verdict
d = _FakeChecker("default", Verdict.COMPLETE, 0.8)
v = Validator(checkers={}, default_checkers=[d])
res = v.validate({"type": "unknown_action"}, {"success": True})
assert res.check_used == "default"
assert res.verdict == Verdict.COMPLETE
def test_validator_no_checker_returns_neutral_continue():
from core.validation import Validator, Verdict
v = Validator(checkers={})
res = v.validate({"type": "click"}, {"success": True})
assert res.verdict == Verdict.CONTINUE
assert res.check_used == "no_checker"
def test_validator_skips_checker_that_raises():
from core.validation import Validator, Verdict
class _Boom:
name = "boom"
def check(self, *a, **kw):
raise RuntimeError("crash")
ok = _FakeChecker("ok", Verdict.COMPLETE, 0.9)
v = Validator(checkers={"click": [_Boom(), ok]})
res = v.validate({"type": "click"}, {"success": True})
assert res.check_used == "ok"
# ----------------------------------------------------------------------------
# Sérialisation ValidationResult
# ----------------------------------------------------------------------------
def test_validation_result_to_dict_is_json_serializable():
import json
from core.validation import FailureCategory, Verdict
from core.validation.result import ValidationResult
r = ValidationResult(
verdict=Verdict.TERMINATE, confidence=0.88,
check_used="ocr_roi", elapsed_ms=42.7,
reasoning="trop long" * 5,
failure_category=FailureCategory.WRONG_APPLICATION,
raw_evidence={"roi_text": "abc"},
)
d = r.to_dict()
s = json.dumps(d) # ne doit pas lever
assert "wrong_application" in s
assert d["verdict"] == "terminate"