Package core/validation/ minimal : - result.py : Verdict, FailureCategory, ValidationResult - pixel_diff_checker.py : wrapper de ReplayVerifier.verify_action - ocr_roi_checker.py : ROI 80px autour du clic, détecte WRONG_APPLICATION via SUSPECT_TOKENS (edge/https/explorateur de fichiers/…) - orchestrator.py : Validator dispatch action_type → checkers + agrégation Wiring api_stream.py:3646 derrière RPA_VALIDATOR_V2_ENABLED (OFF par défaut). Si verdict ≠ COMPLETE, override report.success=False et expose failure_category dans result_entry. Zero régression flag OFF. Tests : - tests/unit/test_validator_v2.py : 13 tests (Checkers + Validator + sérialisation) - tests/integration/test_validator_step10.py : 2 tests reproduisant le bug replay_sess_4c38dbb8 / act_raw_6c1432b3 (clic Enregistrer fait basculer vers Explorateur de fichiers) — Validator retourne WRONG_APPLICATION Activation pour test live : RPA_VALIDATOR_V2_ENABLED=true Cf. docs/recherche/SPEC_VALIDATOR_MATRICE.md, AXE_B2_DEEP_VALIDATOR.md. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
250 lines
9.0 KiB
Python
250 lines
9.0 KiB
Python
"""Tests unitaires du Validator V2 (P0 MVP).
|
|
|
|
Cf. SPEC_VALIDATOR_MATRICE.md, AXE_B2_DEEP_VALIDATOR.md.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import base64
|
|
import io
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
pytestmark = pytest.mark.unit
|
|
|
|
|
|
def _png_b64(img) -> str:
|
|
"""Encode une image PIL en base64 (préfixé iVBOR pour _load_single_image)."""
|
|
buf = io.BytesIO()
|
|
img.save(buf, format="PNG")
|
|
return base64.b64encode(buf.getvalue()).decode("ascii")
|
|
|
|
|
|
def _make_image(text: str = "", color=(255, 255, 255), size=(800, 600)):
|
|
"""Crée une image PIL 800x600 avec du texte centré (pour OCR fake)."""
|
|
from PIL import Image, ImageDraw
|
|
img = Image.new("RGB", size, color=color)
|
|
if text:
|
|
draw = ImageDraw.Draw(img)
|
|
draw.text((size[0] // 2 - 100, size[1] // 2), text, fill=(0, 0, 0))
|
|
return img
|
|
|
|
|
|
# ----------------------------------------------------------------------------
|
|
# PixelDiffChecker
|
|
# ----------------------------------------------------------------------------
|
|
|
|
class _FakeVerifierResult:
|
|
def __init__(self, suggestion, changes_detected, confidence, detail="",
|
|
change_area_pct=0.0, local_change_pct=0.0):
|
|
self.suggestion = suggestion
|
|
self.changes_detected = changes_detected
|
|
self.confidence = confidence
|
|
self.detail = detail
|
|
self.change_area_pct = change_area_pct
|
|
self.local_change_pct = local_change_pct
|
|
|
|
|
|
class _FakeReplayVerifier:
|
|
def __init__(self, result):
|
|
self._r = result
|
|
|
|
def verify_action(self, action, result, screenshot_before, screenshot_after):
|
|
return self._r
|
|
|
|
|
|
def test_pixel_diff_complete_when_changes_detected():
|
|
from core.validation import PixelDiffChecker, Verdict
|
|
rv = _FakeReplayVerifier(_FakeVerifierResult(
|
|
suggestion="continue", changes_detected=True, confidence=0.85,
|
|
detail="pixels changés"
|
|
))
|
|
checker = PixelDiffChecker(rv)
|
|
res = checker.check({"type": "click"}, {"success": True}, "x", "y", {})
|
|
assert res.verdict == Verdict.COMPLETE
|
|
assert res.confidence == pytest.approx(0.85)
|
|
assert res.check_used == "pixel_diff"
|
|
|
|
|
|
def test_pixel_diff_continue_when_retry_suggested():
|
|
from core.validation import PixelDiffChecker, Verdict, FailureCategory
|
|
rv = _FakeReplayVerifier(_FakeVerifierResult(
|
|
suggestion="retry", changes_detected=False, confidence=0.7,
|
|
detail="aucun changement"
|
|
))
|
|
res = PixelDiffChecker(rv).check({"type": "click"}, {"success": True}, "x", "y", {})
|
|
assert res.verdict == Verdict.CONTINUE
|
|
assert res.failure_category == FailureCategory.NO_VISUAL_CHANGE
|
|
|
|
|
|
def test_pixel_diff_handles_internal_exception():
|
|
from core.validation import PixelDiffChecker, Verdict
|
|
|
|
class _BadVerifier:
|
|
def verify_action(self, **kw):
|
|
raise RuntimeError("boom")
|
|
|
|
res = PixelDiffChecker(_BadVerifier()).check(
|
|
{"type": "click"}, {"success": True}, "x", "y", {}
|
|
)
|
|
assert res.verdict == Verdict.CONTINUE
|
|
assert "boom" in res.reasoning
|
|
|
|
|
|
# ----------------------------------------------------------------------------
|
|
# OcrRoiChecker — avec ocr_fn injecté (pas d'EasyOCR ici)
|
|
# ----------------------------------------------------------------------------
|
|
|
|
def test_ocr_roi_detects_wrong_application_suspect_token():
|
|
"""Bug step 10 : token 'edge' / 'explorateur de fichiers' dans ROI = WRONG_APPLICATION."""
|
|
from core.validation import OcrRoiChecker, Verdict, FailureCategory
|
|
img = _make_image()
|
|
img_b64 = _png_b64(img)
|
|
# OCR fake retourne un titre Explorateur de fichiers alors qu'on attendait "Enregistrer"
|
|
checker = OcrRoiChecker(ocr_fn=lambda _crop: "rpa_vision : Explorateur de fichiers")
|
|
res = checker.check(
|
|
action={"type": "click", "by_text": "Enregistrer", "x_pct": 0.5, "y_pct": 0.5},
|
|
result={"success": True, "actual_position": {"x_pct": 0.5, "y_pct": 0.5}},
|
|
screenshot_before=None,
|
|
screenshot_after=img_b64,
|
|
context={},
|
|
)
|
|
assert res.verdict == Verdict.TERMINATE
|
|
assert res.failure_category == FailureCategory.WRONG_APPLICATION
|
|
assert res.confidence >= 0.85
|
|
|
|
|
|
def test_ocr_roi_complete_when_expected_text_in_roi():
|
|
from core.validation import OcrRoiChecker, Verdict
|
|
img_b64 = _png_b64(_make_image())
|
|
checker = OcrRoiChecker(ocr_fn=lambda _: "Bouton Enregistrer actif")
|
|
res = checker.check(
|
|
action={"type": "click", "by_text": "Enregistrer", "x_pct": 0.5, "y_pct": 0.5},
|
|
result={"success": True},
|
|
screenshot_before=None,
|
|
screenshot_after=img_b64,
|
|
context={},
|
|
)
|
|
assert res.verdict == Verdict.COMPLETE
|
|
assert res.confidence >= 0.85
|
|
|
|
|
|
def test_ocr_roi_ocr_text_missing_when_no_match():
|
|
from core.validation import OcrRoiChecker, Verdict, FailureCategory
|
|
img_b64 = _png_b64(_make_image())
|
|
checker = OcrRoiChecker(ocr_fn=lambda _: "texte sans rapport")
|
|
res = checker.check(
|
|
action={"type": "click", "by_text": "Enregistrer", "x_pct": 0.5, "y_pct": 0.5},
|
|
result={"success": True},
|
|
screenshot_before=None,
|
|
screenshot_after=img_b64,
|
|
context={},
|
|
)
|
|
assert res.verdict == Verdict.CONTINUE
|
|
assert res.failure_category == FailureCategory.OCR_TEXT_MISSING
|
|
|
|
|
|
def test_ocr_roi_missing_coords_returns_continue():
|
|
from core.validation import OcrRoiChecker, Verdict
|
|
img_b64 = _png_b64(_make_image())
|
|
checker = OcrRoiChecker(ocr_fn=lambda _: "")
|
|
res = checker.check(
|
|
action={"type": "click", "by_text": "Enregistrer"}, # no coords
|
|
result={"success": True},
|
|
screenshot_before=None,
|
|
screenshot_after=img_b64,
|
|
context={},
|
|
)
|
|
assert res.verdict == Verdict.CONTINUE
|
|
assert "ROI indéfinie" in res.reasoning
|
|
|
|
|
|
# ----------------------------------------------------------------------------
|
|
# Validator orchestrator
|
|
# ----------------------------------------------------------------------------
|
|
|
|
class _FakeChecker:
|
|
def __init__(self, name, verdict, conf, failure_category=None):
|
|
from core.validation.result import ValidationResult
|
|
self.name = name
|
|
self._res = ValidationResult(
|
|
verdict=verdict, confidence=conf, check_used=name, elapsed_ms=1.0,
|
|
failure_category=failure_category, reasoning=f"fake {name}",
|
|
)
|
|
|
|
def check(self, action, result, sb, sa, ctx):
|
|
return self._res
|
|
|
|
|
|
def test_validator_terminate_high_conf_short_circuits():
|
|
from core.validation import Validator, Verdict, FailureCategory
|
|
bad = _FakeChecker("ocr_roi", Verdict.TERMINATE, 0.9,
|
|
FailureCategory.WRONG_APPLICATION)
|
|
never = _FakeChecker("pixel", Verdict.COMPLETE, 0.99)
|
|
v = Validator(checkers={"click": [bad, never]})
|
|
res = v.validate({"type": "click"}, {"success": True})
|
|
assert res.verdict == Verdict.TERMINATE
|
|
assert res.failure_category == FailureCategory.WRONG_APPLICATION
|
|
assert res.check_used == "ocr_roi"
|
|
|
|
|
|
def test_validator_complete_high_conf_short_circuits():
|
|
from core.validation import Validator, Verdict
|
|
ok = _FakeChecker("ocr_roi", Verdict.COMPLETE, 0.95)
|
|
v = Validator(checkers={"click": [ok, _FakeChecker("pixel", Verdict.CONTINUE, 0.3)]})
|
|
res = v.validate({"type": "click"}, {"success": True})
|
|
assert res.verdict == Verdict.COMPLETE
|
|
assert res.check_used == "ocr_roi"
|
|
|
|
|
|
def test_validator_falls_back_to_default_checkers():
|
|
from core.validation import Validator, Verdict
|
|
d = _FakeChecker("default", Verdict.COMPLETE, 0.8)
|
|
v = Validator(checkers={}, default_checkers=[d])
|
|
res = v.validate({"type": "unknown_action"}, {"success": True})
|
|
assert res.check_used == "default"
|
|
assert res.verdict == Verdict.COMPLETE
|
|
|
|
|
|
def test_validator_no_checker_returns_neutral_continue():
|
|
from core.validation import Validator, Verdict
|
|
v = Validator(checkers={})
|
|
res = v.validate({"type": "click"}, {"success": True})
|
|
assert res.verdict == Verdict.CONTINUE
|
|
assert res.check_used == "no_checker"
|
|
|
|
|
|
def test_validator_skips_checker_that_raises():
|
|
from core.validation import Validator, Verdict
|
|
|
|
class _Boom:
|
|
name = "boom"
|
|
def check(self, *a, **kw):
|
|
raise RuntimeError("crash")
|
|
|
|
ok = _FakeChecker("ok", Verdict.COMPLETE, 0.9)
|
|
v = Validator(checkers={"click": [_Boom(), ok]})
|
|
res = v.validate({"type": "click"}, {"success": True})
|
|
assert res.check_used == "ok"
|
|
|
|
|
|
# ----------------------------------------------------------------------------
|
|
# Sérialisation ValidationResult
|
|
# ----------------------------------------------------------------------------
|
|
|
|
def test_validation_result_to_dict_is_json_serializable():
|
|
import json
|
|
from core.validation import FailureCategory, Verdict
|
|
from core.validation.result import ValidationResult
|
|
r = ValidationResult(
|
|
verdict=Verdict.TERMINATE, confidence=0.88,
|
|
check_used="ocr_roi", elapsed_ms=42.7,
|
|
reasoning="trop long" * 5,
|
|
failure_category=FailureCategory.WRONG_APPLICATION,
|
|
raw_evidence={"roi_text": "abc"},
|
|
)
|
|
d = r.to_dict()
|
|
s = json.dumps(d) # ne doit pas lever
|
|
assert "wrong_application" in s
|
|
assert d["verdict"] == "terminate"
|