feat(validator): R1 MVP P0 — OcrRoiChecker + orchestrator (flag OFF default)

Package core/validation/ minimal :
- result.py : Verdict, FailureCategory, ValidationResult
- pixel_diff_checker.py : wrapper de ReplayVerifier.verify_action
- ocr_roi_checker.py : ROI 80px autour du clic, détecte WRONG_APPLICATION
  via SUSPECT_TOKENS (edge/https/explorateur de fichiers/…)
- orchestrator.py : Validator dispatch action_type → checkers + agrégation

Wiring api_stream.py:3646 derrière RPA_VALIDATOR_V2_ENABLED (OFF par défaut).
Si verdict ≠ COMPLETE, override report.success=False et expose failure_category
dans result_entry. Zero régression flag OFF.

Tests :
- tests/unit/test_validator_v2.py : 13 tests (Checkers + Validator + sérialisation)
- tests/integration/test_validator_step10.py : 2 tests reproduisant le bug
  replay_sess_4c38dbb8 / act_raw_6c1432b3 (clic Enregistrer fait basculer
  vers Explorateur de fichiers) — Validator retourne WRONG_APPLICATION

Activation pour test live : RPA_VALIDATOR_V2_ENABLED=true

Cf. docs/recherche/SPEC_VALIDATOR_MATRICE.md, AXE_B2_DEEP_VALIDATOR.md.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dom
2026-05-24 17:52:06 +02:00
parent bd100bc538
commit 1b4e64960b
8 changed files with 851 additions and 0 deletions

View File

@@ -53,6 +53,42 @@ _replay_verifier = ReplayVerifier()
_replay_learner = ReplayLearner()
_audit_trail = AuditTrail()
# Validator V2 (MVP P0) — flag RPA_VALIDATOR_V2_ENABLED, OFF par défaut.
# Lazy init au premier appel : EasyOCR/docTR sont chargés à la demande.
_validator_v2 = None
def _get_validator_v2():
"""Lazy init du Validator V2. Active uniquement si flag ON."""
global _validator_v2
if _validator_v2 is not None:
return _validator_v2
if os.environ.get("RPA_VALIDATOR_V2_ENABLED", "").lower() not in ("1", "true", "yes", "on"):
return None
try:
from core.validation import (
OcrRoiChecker,
PixelDiffChecker,
Validator,
)
pixel = PixelDiffChecker(_replay_verifier)
ocr_click = OcrRoiChecker(radius_px=80)
ocr_type = OcrRoiChecker(radius_px=120)
_validator_v2 = Validator(
checkers={
"click": [ocr_click, pixel],
"double_click": [ocr_click, pixel],
"right_click": [ocr_click, pixel],
"type": [ocr_type, pixel],
},
default_checkers=[pixel],
)
logger.info("[VALIDATOR_V2] activé (flag RPA_VALIDATOR_V2_ENABLED=ON)")
except Exception as exc:
logger.warning(f"[VALIDATOR_V2] init impossible: {exc}")
_validator_v2 = None
return _validator_v2
# Nombre maximum de retries par action avant de déclarer un échec
MAX_RETRIES_PER_ACTION = 3
@@ -3607,6 +3643,10 @@ async def report_action_result(report: ReplayResultReport):
# Skip aussi la vérification serveur si l'agent a déjà géré la popup
skip_verify = skip_verify or agent_handled_popup
verification = None
# [VALIDATOR_V2] override conditionnel — flag RPA_VALIDATOR_V2_ENABLED.
# Si verdict ≠ COMPLETE, on force result.success=False et on expose failure_category.
validator_v2_result = None
validator_v2_failure_category = None
if report.success and screenshot_after and not skip_verify:
# Utiliser le screenshot_before envoyé par l'agent (Critic fiable)
# Fallback sur le dernier screenshot stocké côté serveur
@@ -3617,7 +3657,35 @@ async def report_action_result(report: ReplayResultReport):
result_dict = {
"success": report.success,
"error": report.error,
"actual_position": report.actual_position,
}
# === Validator V2 (P0 MVP) — derrière flag, sinon no-op ===
v2 = _get_validator_v2()
if v2 is not None:
try:
validator_v2_result = v2.validate(
action=action_dict,
result=result_dict,
screenshot_before=screenshot_before,
screenshot_after=screenshot_after,
context={},
)
from core.validation import Verdict as _V2Verdict
if validator_v2_result.verdict != _V2Verdict.COMPLETE:
validator_v2_failure_category = (
validator_v2_result.failure_category.value
if validator_v2_result.failure_category else None
)
report.success = False
logger.info(
f"[VALIDATOR_V2] override success→False action={action_id} "
f"verdict={validator_v2_result.verdict.value} "
f"conf={validator_v2_result.confidence:.2f} "
f"failure_category={validator_v2_failure_category} "
f"reason={validator_v2_result.reasoning[:120]}"
)
except Exception as exc:
logger.warning(f"[VALIDATOR_V2] échec: {exc}")
# Utiliser le Critic sémantique si l'action a un expected_result
expected_result = (original_action or {}).get("expected_result", "")
action_intention = (original_action or {}).get("intention", "")
@@ -3686,6 +3754,8 @@ async def report_action_result(report: ReplayResultReport):
"actual_position": report.actual_position,
"retry_count": retry_count,
"verification": verification.to_dict() if verification else None,
"validator_v2": validator_v2_result.to_dict() if validator_v2_result else None,
"failure_category": validator_v2_failure_category,
"resolution_method": report.resolution_method,
"resolution_score": report.resolution_score,
"resolution_elapsed_ms": report.resolution_elapsed_ms,