feat(validator): R1 MVP P0 — OcrRoiChecker + orchestrator (flag OFF default)
Package core/validation/ minimal : - result.py : Verdict, FailureCategory, ValidationResult - pixel_diff_checker.py : wrapper de ReplayVerifier.verify_action - ocr_roi_checker.py : ROI 80px autour du clic, détecte WRONG_APPLICATION via SUSPECT_TOKENS (edge/https/explorateur de fichiers/…) - orchestrator.py : Validator dispatch action_type → checkers + agrégation Wiring api_stream.py:3646 derrière RPA_VALIDATOR_V2_ENABLED (OFF par défaut). Si verdict ≠ COMPLETE, override report.success=False et expose failure_category dans result_entry. Zero régression flag OFF. Tests : - tests/unit/test_validator_v2.py : 13 tests (Checkers + Validator + sérialisation) - tests/integration/test_validator_step10.py : 2 tests reproduisant le bug replay_sess_4c38dbb8 / act_raw_6c1432b3 (clic Enregistrer fait basculer vers Explorateur de fichiers) — Validator retourne WRONG_APPLICATION Activation pour test live : RPA_VALIDATOR_V2_ENABLED=true Cf. docs/recherche/SPEC_VALIDATOR_MATRICE.md, AXE_B2_DEEP_VALIDATOR.md. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -53,6 +53,42 @@ _replay_verifier = ReplayVerifier()
|
||||
_replay_learner = ReplayLearner()
|
||||
_audit_trail = AuditTrail()
|
||||
|
||||
# Validator V2 (MVP P0) — flag RPA_VALIDATOR_V2_ENABLED, OFF par défaut.
|
||||
# Lazy init au premier appel : EasyOCR/docTR sont chargés à la demande.
|
||||
_validator_v2 = None
|
||||
|
||||
|
||||
def _get_validator_v2():
|
||||
"""Lazy init du Validator V2. Active uniquement si flag ON."""
|
||||
global _validator_v2
|
||||
if _validator_v2 is not None:
|
||||
return _validator_v2
|
||||
if os.environ.get("RPA_VALIDATOR_V2_ENABLED", "").lower() not in ("1", "true", "yes", "on"):
|
||||
return None
|
||||
try:
|
||||
from core.validation import (
|
||||
OcrRoiChecker,
|
||||
PixelDiffChecker,
|
||||
Validator,
|
||||
)
|
||||
pixel = PixelDiffChecker(_replay_verifier)
|
||||
ocr_click = OcrRoiChecker(radius_px=80)
|
||||
ocr_type = OcrRoiChecker(radius_px=120)
|
||||
_validator_v2 = Validator(
|
||||
checkers={
|
||||
"click": [ocr_click, pixel],
|
||||
"double_click": [ocr_click, pixel],
|
||||
"right_click": [ocr_click, pixel],
|
||||
"type": [ocr_type, pixel],
|
||||
},
|
||||
default_checkers=[pixel],
|
||||
)
|
||||
logger.info("[VALIDATOR_V2] activé (flag RPA_VALIDATOR_V2_ENABLED=ON)")
|
||||
except Exception as exc:
|
||||
logger.warning(f"[VALIDATOR_V2] init impossible: {exc}")
|
||||
_validator_v2 = None
|
||||
return _validator_v2
|
||||
|
||||
# Nombre maximum de retries par action avant de déclarer un échec
|
||||
MAX_RETRIES_PER_ACTION = 3
|
||||
|
||||
@@ -3607,6 +3643,10 @@ async def report_action_result(report: ReplayResultReport):
|
||||
# Skip aussi la vérification serveur si l'agent a déjà géré la popup
|
||||
skip_verify = skip_verify or agent_handled_popup
|
||||
verification = None
|
||||
# [VALIDATOR_V2] override conditionnel — flag RPA_VALIDATOR_V2_ENABLED.
|
||||
# Si verdict ≠ COMPLETE, on force result.success=False et on expose failure_category.
|
||||
validator_v2_result = None
|
||||
validator_v2_failure_category = None
|
||||
if report.success and screenshot_after and not skip_verify:
|
||||
# Utiliser le screenshot_before envoyé par l'agent (Critic fiable)
|
||||
# Fallback sur le dernier screenshot stocké côté serveur
|
||||
@@ -3617,7 +3657,35 @@ async def report_action_result(report: ReplayResultReport):
|
||||
result_dict = {
|
||||
"success": report.success,
|
||||
"error": report.error,
|
||||
"actual_position": report.actual_position,
|
||||
}
|
||||
# === Validator V2 (P0 MVP) — derrière flag, sinon no-op ===
|
||||
v2 = _get_validator_v2()
|
||||
if v2 is not None:
|
||||
try:
|
||||
validator_v2_result = v2.validate(
|
||||
action=action_dict,
|
||||
result=result_dict,
|
||||
screenshot_before=screenshot_before,
|
||||
screenshot_after=screenshot_after,
|
||||
context={},
|
||||
)
|
||||
from core.validation import Verdict as _V2Verdict
|
||||
if validator_v2_result.verdict != _V2Verdict.COMPLETE:
|
||||
validator_v2_failure_category = (
|
||||
validator_v2_result.failure_category.value
|
||||
if validator_v2_result.failure_category else None
|
||||
)
|
||||
report.success = False
|
||||
logger.info(
|
||||
f"[VALIDATOR_V2] override success→False action={action_id} "
|
||||
f"verdict={validator_v2_result.verdict.value} "
|
||||
f"conf={validator_v2_result.confidence:.2f} "
|
||||
f"failure_category={validator_v2_failure_category} "
|
||||
f"reason={validator_v2_result.reasoning[:120]}"
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(f"[VALIDATOR_V2] échec: {exc}")
|
||||
# Utiliser le Critic sémantique si l'action a un expected_result
|
||||
expected_result = (original_action or {}).get("expected_result", "")
|
||||
action_intention = (original_action or {}).get("intention", "")
|
||||
@@ -3686,6 +3754,8 @@ async def report_action_result(report: ReplayResultReport):
|
||||
"actual_position": report.actual_position,
|
||||
"retry_count": retry_count,
|
||||
"verification": verification.to_dict() if verification else None,
|
||||
"validator_v2": validator_v2_result.to_dict() if validator_v2_result else None,
|
||||
"failure_category": validator_v2_failure_category,
|
||||
"resolution_method": report.resolution_method,
|
||||
"resolution_score": report.resolution_score,
|
||||
"resolution_elapsed_ms": report.resolution_elapsed_ms,
|
||||
|
||||
Reference in New Issue
Block a user