merge(R1): Validator MVP P0 (worktree a0dcb652)
This commit is contained in:
@@ -53,6 +53,42 @@ _replay_verifier = ReplayVerifier()
|
|||||||
_replay_learner = ReplayLearner()
|
_replay_learner = ReplayLearner()
|
||||||
_audit_trail = AuditTrail()
|
_audit_trail = AuditTrail()
|
||||||
|
|
||||||
|
# Validator V2 (MVP P0) — flag RPA_VALIDATOR_V2_ENABLED, OFF par défaut.
|
||||||
|
# Lazy init au premier appel : EasyOCR/docTR sont chargés à la demande.
|
||||||
|
_validator_v2 = None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_validator_v2():
|
||||||
|
"""Lazy init du Validator V2. Active uniquement si flag ON."""
|
||||||
|
global _validator_v2
|
||||||
|
if _validator_v2 is not None:
|
||||||
|
return _validator_v2
|
||||||
|
if os.environ.get("RPA_VALIDATOR_V2_ENABLED", "").lower() not in ("1", "true", "yes", "on"):
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
from core.validation import (
|
||||||
|
OcrRoiChecker,
|
||||||
|
PixelDiffChecker,
|
||||||
|
Validator,
|
||||||
|
)
|
||||||
|
pixel = PixelDiffChecker(_replay_verifier)
|
||||||
|
ocr_click = OcrRoiChecker(radius_px=80)
|
||||||
|
ocr_type = OcrRoiChecker(radius_px=120)
|
||||||
|
_validator_v2 = Validator(
|
||||||
|
checkers={
|
||||||
|
"click": [ocr_click, pixel],
|
||||||
|
"double_click": [ocr_click, pixel],
|
||||||
|
"right_click": [ocr_click, pixel],
|
||||||
|
"type": [ocr_type, pixel],
|
||||||
|
},
|
||||||
|
default_checkers=[pixel],
|
||||||
|
)
|
||||||
|
logger.info("[VALIDATOR_V2] activé (flag RPA_VALIDATOR_V2_ENABLED=ON)")
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning(f"[VALIDATOR_V2] init impossible: {exc}")
|
||||||
|
_validator_v2 = None
|
||||||
|
return _validator_v2
|
||||||
|
|
||||||
# Nombre maximum de retries par action avant de déclarer un échec
|
# Nombre maximum de retries par action avant de déclarer un échec
|
||||||
MAX_RETRIES_PER_ACTION = 3
|
MAX_RETRIES_PER_ACTION = 3
|
||||||
|
|
||||||
@@ -3607,6 +3643,10 @@ async def report_action_result(report: ReplayResultReport):
|
|||||||
# Skip aussi la vérification serveur si l'agent a déjà géré la popup
|
# Skip aussi la vérification serveur si l'agent a déjà géré la popup
|
||||||
skip_verify = skip_verify or agent_handled_popup
|
skip_verify = skip_verify or agent_handled_popup
|
||||||
verification = None
|
verification = None
|
||||||
|
# [VALIDATOR_V2] override conditionnel — flag RPA_VALIDATOR_V2_ENABLED.
|
||||||
|
# Si verdict ≠ COMPLETE, on force result.success=False et on expose failure_category.
|
||||||
|
validator_v2_result = None
|
||||||
|
validator_v2_failure_category = None
|
||||||
if report.success and screenshot_after and not skip_verify:
|
if report.success and screenshot_after and not skip_verify:
|
||||||
# Utiliser le screenshot_before envoyé par l'agent (Critic fiable)
|
# Utiliser le screenshot_before envoyé par l'agent (Critic fiable)
|
||||||
# Fallback sur le dernier screenshot stocké côté serveur
|
# Fallback sur le dernier screenshot stocké côté serveur
|
||||||
@@ -3617,7 +3657,35 @@ async def report_action_result(report: ReplayResultReport):
|
|||||||
result_dict = {
|
result_dict = {
|
||||||
"success": report.success,
|
"success": report.success,
|
||||||
"error": report.error,
|
"error": report.error,
|
||||||
|
"actual_position": report.actual_position,
|
||||||
}
|
}
|
||||||
|
# === Validator V2 (P0 MVP) — derrière flag, sinon no-op ===
|
||||||
|
v2 = _get_validator_v2()
|
||||||
|
if v2 is not None:
|
||||||
|
try:
|
||||||
|
validator_v2_result = v2.validate(
|
||||||
|
action=action_dict,
|
||||||
|
result=result_dict,
|
||||||
|
screenshot_before=screenshot_before,
|
||||||
|
screenshot_after=screenshot_after,
|
||||||
|
context={},
|
||||||
|
)
|
||||||
|
from core.validation import Verdict as _V2Verdict
|
||||||
|
if validator_v2_result.verdict != _V2Verdict.COMPLETE:
|
||||||
|
validator_v2_failure_category = (
|
||||||
|
validator_v2_result.failure_category.value
|
||||||
|
if validator_v2_result.failure_category else None
|
||||||
|
)
|
||||||
|
report.success = False
|
||||||
|
logger.info(
|
||||||
|
f"[VALIDATOR_V2] override success→False action={action_id} "
|
||||||
|
f"verdict={validator_v2_result.verdict.value} "
|
||||||
|
f"conf={validator_v2_result.confidence:.2f} "
|
||||||
|
f"failure_category={validator_v2_failure_category} "
|
||||||
|
f"reason={validator_v2_result.reasoning[:120]}"
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning(f"[VALIDATOR_V2] échec: {exc}")
|
||||||
# Utiliser le Critic sémantique si l'action a un expected_result
|
# Utiliser le Critic sémantique si l'action a un expected_result
|
||||||
expected_result = (original_action or {}).get("expected_result", "")
|
expected_result = (original_action or {}).get("expected_result", "")
|
||||||
action_intention = (original_action or {}).get("intention", "")
|
action_intention = (original_action or {}).get("intention", "")
|
||||||
@@ -3686,6 +3754,8 @@ async def report_action_result(report: ReplayResultReport):
|
|||||||
"actual_position": report.actual_position,
|
"actual_position": report.actual_position,
|
||||||
"retry_count": retry_count,
|
"retry_count": retry_count,
|
||||||
"verification": verification.to_dict() if verification else None,
|
"verification": verification.to_dict() if verification else None,
|
||||||
|
"validator_v2": validator_v2_result.to_dict() if validator_v2_result else None,
|
||||||
|
"failure_category": validator_v2_failure_category,
|
||||||
"resolution_method": report.resolution_method,
|
"resolution_method": report.resolution_method,
|
||||||
"resolution_score": report.resolution_score,
|
"resolution_score": report.resolution_score,
|
||||||
"resolution_elapsed_ms": report.resolution_elapsed_ms,
|
"resolution_elapsed_ms": report.resolution_elapsed_ms,
|
||||||
|
|||||||
31
core/validation/__init__.py
Normal file
31
core/validation/__init__.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
"""core.validation — Validator V2 (MVP P0).
|
||||||
|
|
||||||
|
Pattern Planner-Actor-Validator (cf. SPEC_VALIDATOR_MATRICE.md).
|
||||||
|
Donne un verdict structuré (Verdict / FailureCategory) sur l'effet d'une action
|
||||||
|
en agrégeant plusieurs Checkers spécialisés.
|
||||||
|
|
||||||
|
Périmètre P0 :
|
||||||
|
- PixelDiffChecker (wrapper ReplayVerifier existant)
|
||||||
|
- OcrRoiChecker (ROI 80px autour du clic, détecte WRONG_APPLICATION = bug step 10)
|
||||||
|
- Validator orchestrateur (dispatch action_type → checkers + agrégation conf)
|
||||||
|
|
||||||
|
Flag d'activation : variable d'env RPA_VALIDATOR_V2_ENABLED=true (OFF par défaut).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from core.validation.result import (
|
||||||
|
FailureCategory,
|
||||||
|
ValidationResult,
|
||||||
|
Verdict,
|
||||||
|
)
|
||||||
|
from core.validation.pixel_diff_checker import PixelDiffChecker
|
||||||
|
from core.validation.ocr_roi_checker import OcrRoiChecker
|
||||||
|
from core.validation.orchestrator import Validator
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"Validator",
|
||||||
|
"Verdict",
|
||||||
|
"FailureCategory",
|
||||||
|
"ValidationResult",
|
||||||
|
"PixelDiffChecker",
|
||||||
|
"OcrRoiChecker",
|
||||||
|
]
|
||||||
171
core/validation/ocr_roi_checker.py
Normal file
171
core/validation/ocr_roi_checker.py
Normal file
@@ -0,0 +1,171 @@
|
|||||||
|
"""OcrRoiChecker — ROI 80px (ou 120 px pour type) autour du clic.
|
||||||
|
|
||||||
|
Détecte WRONG_APPLICATION (bug step 10) si un token suspect navigateur/système
|
||||||
|
apparaît dans la ROI alors qu'on attendait un label métier.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import time
|
||||||
|
import unicodedata
|
||||||
|
from typing import Any, Callable, Dict, Optional
|
||||||
|
|
||||||
|
from core.validation.result import FailureCategory, ValidationResult, Verdict
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_accents(s: str) -> str:
|
||||||
|
return "".join(
|
||||||
|
c for c in unicodedata.normalize("NFKD", s) if not unicodedata.combining(c)
|
||||||
|
).lower().strip()
|
||||||
|
|
||||||
|
|
||||||
|
class OcrRoiChecker:
|
||||||
|
name = "ocr_roi"
|
||||||
|
budget_ms = 200.0
|
||||||
|
|
||||||
|
SUSPECT_TOKENS = (
|
||||||
|
"edge", "chrome", "firefox", "mozilla", "opera",
|
||||||
|
"http", "https", "www.",
|
||||||
|
".com", ".fr", ".org", ".net", ".html",
|
||||||
|
"favoris", "favorite", "bookmark",
|
||||||
|
"barre d'adresse", "address bar",
|
||||||
|
"nouvel onglet", "new tab",
|
||||||
|
"securite windows", "windows security",
|
||||||
|
"user account control", "controle de compte",
|
||||||
|
"explorateur de fichiers", "file explorer",
|
||||||
|
)
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
ocr_fn: Optional[Callable] = None,
|
||||||
|
radius_px: int = 80,
|
||||||
|
suspect_min_confidence: float = 0.85,
|
||||||
|
expected_min_confidence: float = 0.90,
|
||||||
|
):
|
||||||
|
self._ocr = ocr_fn # callable(PIL.Image) -> str ; lazy via TitleVerifier si None
|
||||||
|
self._radius = radius_px
|
||||||
|
self._suspect_conf = suspect_min_confidence
|
||||||
|
self._expected_conf = expected_min_confidence
|
||||||
|
|
||||||
|
def _ensure_ocr(self) -> Optional[Callable]:
|
||||||
|
if self._ocr is not None:
|
||||||
|
return self._ocr
|
||||||
|
try:
|
||||||
|
from core.grounding.title_verifier import TitleVerifier
|
||||||
|
tv = TitleVerifier()
|
||||||
|
self._ocr = tv._get_ocr()
|
||||||
|
except Exception:
|
||||||
|
self._ocr = None
|
||||||
|
return self._ocr
|
||||||
|
|
||||||
|
def check(
|
||||||
|
self,
|
||||||
|
action: Dict[str, Any],
|
||||||
|
result: Dict[str, Any],
|
||||||
|
screenshot_before: Optional[str],
|
||||||
|
screenshot_after: Optional[str],
|
||||||
|
context: Dict[str, Any],
|
||||||
|
) -> ValidationResult:
|
||||||
|
t0 = time.time()
|
||||||
|
target_spec = action.get("target_spec") or {}
|
||||||
|
expected_text = (
|
||||||
|
action.get("by_text")
|
||||||
|
or target_spec.get("by_text")
|
||||||
|
or context.get("expected_text")
|
||||||
|
or ""
|
||||||
|
)
|
||||||
|
actual_pos = result.get("actual_position") or {}
|
||||||
|
x_pct = actual_pos.get("x_pct") or action.get("x_pct") or target_spec.get("x_pct")
|
||||||
|
y_pct = actual_pos.get("y_pct") or action.get("y_pct") or target_spec.get("y_pct")
|
||||||
|
|
||||||
|
if not screenshot_after or x_pct is None or y_pct is None or not expected_text:
|
||||||
|
return ValidationResult(
|
||||||
|
verdict=Verdict.CONTINUE, confidence=0.2,
|
||||||
|
check_used=self.name, elapsed_ms=(time.time() - t0) * 1000,
|
||||||
|
reasoning="ROI indéfinie (coords ou expected_text manquants)",
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from agent_v0.server_v1.replay_verifier import ReplayVerifier
|
||||||
|
img = ReplayVerifier()._load_single_image(screenshot_after)
|
||||||
|
except Exception as exc:
|
||||||
|
return ValidationResult(
|
||||||
|
verdict=Verdict.CONTINUE, confidence=0.1,
|
||||||
|
check_used=self.name, elapsed_ms=(time.time() - t0) * 1000,
|
||||||
|
reasoning=f"Chargement image impossible: {exc}",
|
||||||
|
)
|
||||||
|
|
||||||
|
w, h = img.size
|
||||||
|
cx, cy = int(float(x_pct) * w), int(float(y_pct) * h)
|
||||||
|
r = self._radius
|
||||||
|
bbox = (max(0, cx - r), max(0, cy - r), min(w, cx + r), min(h, cy + r))
|
||||||
|
roi = img.crop(bbox)
|
||||||
|
|
||||||
|
ocr_fn = self._ensure_ocr()
|
||||||
|
if ocr_fn is None:
|
||||||
|
return ValidationResult(
|
||||||
|
verdict=Verdict.CONTINUE, confidence=0.1,
|
||||||
|
check_used=self.name, elapsed_ms=(time.time() - t0) * 1000,
|
||||||
|
reasoning="OCR indisponible (EasyOCR/docTR non chargés)",
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
raw_text = ocr_fn(roi) or ""
|
||||||
|
except Exception as exc:
|
||||||
|
return ValidationResult(
|
||||||
|
verdict=Verdict.CONTINUE, confidence=0.1,
|
||||||
|
check_used=self.name, elapsed_ms=(time.time() - t0) * 1000,
|
||||||
|
reasoning=f"OCR erreur: {exc}",
|
||||||
|
)
|
||||||
|
|
||||||
|
text_norm = _strip_accents(raw_text)
|
||||||
|
expected_norm = _strip_accents(expected_text)
|
||||||
|
elapsed_ms = (time.time() - t0) * 1000
|
||||||
|
evidence = {
|
||||||
|
"roi_text": raw_text[:200],
|
||||||
|
"roi_bbox": list(bbox),
|
||||||
|
"expected": expected_text,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Priorité absolue : token suspect → WRONG_APPLICATION (bug step 10 / dialog perdu)
|
||||||
|
for suspect in self.SUSPECT_TOKENS:
|
||||||
|
if suspect in text_norm and suspect not in expected_norm:
|
||||||
|
return ValidationResult(
|
||||||
|
verdict=Verdict.TERMINATE, confidence=self._suspect_conf,
|
||||||
|
check_used=self.name, elapsed_ms=elapsed_ms,
|
||||||
|
failure_category=FailureCategory.WRONG_APPLICATION,
|
||||||
|
reasoning=(
|
||||||
|
f"Token suspect '{suspect}' dans ROI clic "
|
||||||
|
f"(attendu '{expected_text[:40]}') — cible hors-app"
|
||||||
|
),
|
||||||
|
raw_evidence=evidence,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Match exact normalisé
|
||||||
|
if expected_norm and expected_norm in text_norm:
|
||||||
|
return ValidationResult(
|
||||||
|
verdict=Verdict.COMPLETE, confidence=self._expected_conf,
|
||||||
|
check_used=self.name, elapsed_ms=elapsed_ms,
|
||||||
|
reasoning=f"Texte '{expected_text[:40]}' trouvé dans ROI",
|
||||||
|
raw_evidence=evidence,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Match partiel mot-à-mot
|
||||||
|
toks = [t for t in expected_norm.split() if len(t) > 2]
|
||||||
|
if toks:
|
||||||
|
hits = sum(1 for tok in toks if tok in text_norm)
|
||||||
|
ratio = hits / len(toks)
|
||||||
|
if ratio >= 0.5:
|
||||||
|
return ValidationResult(
|
||||||
|
verdict=Verdict.COMPLETE, confidence=0.6 + 0.3 * ratio,
|
||||||
|
check_used=self.name, elapsed_ms=elapsed_ms,
|
||||||
|
reasoning=f"Match partiel {hits}/{len(toks)} tokens",
|
||||||
|
raw_evidence=evidence,
|
||||||
|
)
|
||||||
|
|
||||||
|
return ValidationResult(
|
||||||
|
verdict=Verdict.CONTINUE, confidence=0.4,
|
||||||
|
check_used=self.name, elapsed_ms=elapsed_ms,
|
||||||
|
failure_category=FailureCategory.OCR_TEXT_MISSING,
|
||||||
|
reasoning=f"Texte '{expected_text[:40]}' non trouvé dans ROI",
|
||||||
|
raw_evidence=evidence,
|
||||||
|
)
|
||||||
79
core/validation/orchestrator.py
Normal file
79
core/validation/orchestrator.py
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
"""Validator orchestrator — dispatch action_type → checkers + agrégation.
|
||||||
|
|
||||||
|
Règles d'agrégation (cf. SPEC_VALIDATOR_MATRICE.md §6.2) :
|
||||||
|
- Si un checker rend TERMINATE conf ≥ 0.85 → return immédiat
|
||||||
|
- Si un checker rend COMPLETE conf ≥ accept_confidence → return (max conf)
|
||||||
|
- Sinon → dernier résultat (CONTINUE), à charge du caller d'escalader/retrier
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
from core.validation.result import ValidationResult, Verdict
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class Validator:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
checkers: Dict[str, List[Any]],
|
||||||
|
default_checkers: Optional[List[Any]] = None,
|
||||||
|
accept_confidence: float = 0.70,
|
||||||
|
terminate_confidence: float = 0.85,
|
||||||
|
):
|
||||||
|
self._checkers = checkers
|
||||||
|
self._default = default_checkers or []
|
||||||
|
self._accept = accept_confidence
|
||||||
|
self._terminate_conf = terminate_confidence
|
||||||
|
|
||||||
|
def validate(
|
||||||
|
self,
|
||||||
|
action: Dict[str, Any],
|
||||||
|
result: Dict[str, Any],
|
||||||
|
screenshot_before: Optional[str] = None,
|
||||||
|
screenshot_after: Optional[str] = None,
|
||||||
|
context: Optional[Dict[str, Any]] = None,
|
||||||
|
) -> ValidationResult:
|
||||||
|
ctx = context or {}
|
||||||
|
action_type = action.get("type", "")
|
||||||
|
candidates = self._checkers.get(action_type) or self._default
|
||||||
|
|
||||||
|
results: List[ValidationResult] = []
|
||||||
|
for checker in candidates:
|
||||||
|
try:
|
||||||
|
res = checker.check(
|
||||||
|
action, result, screenshot_before, screenshot_after, ctx
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning(
|
||||||
|
"[VALIDATOR] checker %s a planté: %s",
|
||||||
|
getattr(checker, "name", checker), exc,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
results.append(res)
|
||||||
|
logger.info(
|
||||||
|
"[VALIDATOR] check=%s verdict=%s conf=%.2f elapsed=%.0fms",
|
||||||
|
res.check_used, res.verdict.value, res.confidence, res.elapsed_ms,
|
||||||
|
)
|
||||||
|
# Règle 1 — TERMINATE haute conf : court-circuit
|
||||||
|
if res.verdict == Verdict.TERMINATE and res.confidence >= self._terminate_conf:
|
||||||
|
return res
|
||||||
|
# Règle 2 — COMPLETE haute conf : court-circuit
|
||||||
|
if res.verdict == Verdict.COMPLETE and res.confidence >= self._accept:
|
||||||
|
return res
|
||||||
|
|
||||||
|
# Aucun checker concluant : agrégation finale
|
||||||
|
if results:
|
||||||
|
# Préférer un COMPLETE si présent, sinon le plus confiant
|
||||||
|
completes = [r for r in results if r.verdict == Verdict.COMPLETE]
|
||||||
|
if completes:
|
||||||
|
return max(completes, key=lambda r: r.confidence)
|
||||||
|
return max(results, key=lambda r: r.confidence)
|
||||||
|
|
||||||
|
return ValidationResult(
|
||||||
|
verdict=Verdict.CONTINUE, confidence=0.3,
|
||||||
|
check_used="no_checker", elapsed_ms=0.0,
|
||||||
|
reasoning=f"Aucun checker pour action_type='{action_type}'",
|
||||||
|
)
|
||||||
68
core/validation/pixel_diff_checker.py
Normal file
68
core/validation/pixel_diff_checker.py
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
"""PixelDiffChecker — wrapper de ReplayVerifier.verify_action (~15 ms).
|
||||||
|
|
||||||
|
Pré-filtre rapide : si l'écran n'a pas du tout changé, l'action a probablement
|
||||||
|
échoué. Réutilise l'instance _replay_verifier globale d'api_stream.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import time
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
from core.validation.result import FailureCategory, ValidationResult, Verdict
|
||||||
|
|
||||||
|
|
||||||
|
class PixelDiffChecker:
|
||||||
|
name = "pixel_diff"
|
||||||
|
budget_ms = 15.0
|
||||||
|
|
||||||
|
def __init__(self, replay_verifier):
|
||||||
|
self._rv = replay_verifier
|
||||||
|
|
||||||
|
def check(
|
||||||
|
self,
|
||||||
|
action: Dict[str, Any],
|
||||||
|
result: Dict[str, Any],
|
||||||
|
screenshot_before: Optional[str],
|
||||||
|
screenshot_after: Optional[str],
|
||||||
|
context: Dict[str, Any],
|
||||||
|
) -> ValidationResult:
|
||||||
|
t0 = time.time()
|
||||||
|
try:
|
||||||
|
pr = self._rv.verify_action(
|
||||||
|
action=action,
|
||||||
|
result=result,
|
||||||
|
screenshot_before=screenshot_before,
|
||||||
|
screenshot_after=screenshot_after,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
return ValidationResult(
|
||||||
|
verdict=Verdict.CONTINUE,
|
||||||
|
confidence=0.1,
|
||||||
|
check_used=self.name,
|
||||||
|
elapsed_ms=(time.time() - t0) * 1000,
|
||||||
|
reasoning=f"PixelDiff erreur: {exc}",
|
||||||
|
)
|
||||||
|
elapsed = (time.time() - t0) * 1000
|
||||||
|
|
||||||
|
# Map verdict ReplayVerifier → Verdict Validator
|
||||||
|
if pr.suggestion == "continue" and pr.changes_detected:
|
||||||
|
verdict, conf, fc = Verdict.COMPLETE, pr.confidence, None
|
||||||
|
elif pr.suggestion == "retry":
|
||||||
|
verdict = Verdict.CONTINUE
|
||||||
|
conf = max(0.4, pr.confidence - 0.2)
|
||||||
|
fc = FailureCategory.NO_VISUAL_CHANGE
|
||||||
|
else:
|
||||||
|
verdict, conf, fc = Verdict.CONTINUE, 0.3, None
|
||||||
|
|
||||||
|
return ValidationResult(
|
||||||
|
verdict=verdict,
|
||||||
|
confidence=conf,
|
||||||
|
check_used=self.name,
|
||||||
|
elapsed_ms=elapsed,
|
||||||
|
reasoning=pr.detail,
|
||||||
|
failure_category=fc,
|
||||||
|
raw_evidence={
|
||||||
|
"change_area_pct": pr.change_area_pct,
|
||||||
|
"local_change_pct": pr.local_change_pct,
|
||||||
|
},
|
||||||
|
)
|
||||||
53
core/validation/result.py
Normal file
53
core/validation/result.py
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
"""Dataclasses du Validator — Verdict, FailureCategory, ValidationResult.
|
||||||
|
|
||||||
|
Cf. SPEC_VALIDATOR_MATRICE.md §1 et AXE_B2_DEEP_VALIDATOR.md §3.1.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
|
||||||
|
class Verdict(str, Enum):
|
||||||
|
"""Trois verdicts possibles (calque Skyvern complete/terminate/continue)."""
|
||||||
|
COMPLETE = "complete" # l'action a eu l'effet voulu
|
||||||
|
CONTINUE = "continue" # effet pas encore visible → recheck/wait
|
||||||
|
TERMINATE = "terminate" # échec irrécupérable → pause supervisée
|
||||||
|
|
||||||
|
|
||||||
|
class FailureCategory(str, Enum):
|
||||||
|
"""Classification des échecs (restreinte au contexte rpa_vision_v3)."""
|
||||||
|
WRONG_TARGET = "wrong_target"
|
||||||
|
WRONG_APPLICATION = "wrong_application" # bug step 10 (clic hors-app)
|
||||||
|
NO_VISUAL_CHANGE = "no_visual_change"
|
||||||
|
UNEXPECTED_DIALOG = "unexpected_dialog"
|
||||||
|
OCR_TEXT_MISSING = "ocr_text_missing"
|
||||||
|
SCHEMA_INVALID = "schema_invalid"
|
||||||
|
UI_LOADING = "ui_loading"
|
||||||
|
UNKNOWN = "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ValidationResult:
|
||||||
|
"""Résultat d'un check. Toujours sérialisable JSON."""
|
||||||
|
verdict: Verdict
|
||||||
|
confidence: float
|
||||||
|
check_used: str
|
||||||
|
elapsed_ms: float
|
||||||
|
reasoning: str = ""
|
||||||
|
failure_category: Optional[FailureCategory] = None
|
||||||
|
raw_evidence: Dict[str, Any] = field(default_factory=dict)
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"verdict": self.verdict.value,
|
||||||
|
"confidence": round(self.confidence, 3),
|
||||||
|
"check_used": self.check_used,
|
||||||
|
"elapsed_ms": round(self.elapsed_ms, 1),
|
||||||
|
"reasoning": self.reasoning,
|
||||||
|
"failure_category": (
|
||||||
|
self.failure_category.value if self.failure_category else None
|
||||||
|
),
|
||||||
|
"raw_evidence": self.raw_evidence,
|
||||||
|
}
|
||||||
130
tests/integration/test_validator_step10.py
Normal file
130
tests/integration/test_validator_step10.py
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
"""Repro offline du bug fonctionnel : replay_sess_4c38dbb8 / act_raw_6c1432b3.
|
||||||
|
|
||||||
|
L'agent rapporte success=True après avoir cliqué sur le bouton "Enregistrer"
|
||||||
|
du dialog "Enregistrer sous", mais la fenêtre active après le clic est
|
||||||
|
"rpa_vision : Explorateur de fichiers" — l'app a basculé hors du Bloc-notes.
|
||||||
|
|
||||||
|
Le Validator MVP P0 doit attribuer failure_category=WRONG_APPLICATION via
|
||||||
|
OcrRoiChecker (token suspect 'explorateur de fichiers' dans la ROI) et donc
|
||||||
|
override success → False.
|
||||||
|
|
||||||
|
Stratégie de fixture :
|
||||||
|
- screenshot_after synthétique : 800×600 avec "rpa_vision : Explorateur de fichiers"
|
||||||
|
au centre (= bug observé : la fenêtre est passée à l'Explorateur).
|
||||||
|
- screenshot_before : dialog "Enregistrer sous" (texte centré).
|
||||||
|
- action : click_anchor sur "Enregistrer" au centre (x_pct=0.5, y_pct=0.5).
|
||||||
|
- OCR injecté : fake qui retourne le texte du screenshot_after.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import io
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
pytestmark = [pytest.mark.integration]
|
||||||
|
|
||||||
|
|
||||||
|
def _png_b64(img) -> str:
|
||||||
|
buf = io.BytesIO()
|
||||||
|
img.save(buf, format="PNG")
|
||||||
|
return base64.b64encode(buf.getvalue()).decode("ascii")
|
||||||
|
|
||||||
|
|
||||||
|
def _make_screenshot(text: str, color=(245, 245, 245), size=(1920, 1080)):
|
||||||
|
"""Screenshot 1920x1080 avec un texte centré (visible dans la ROI 80px)."""
|
||||||
|
from PIL import Image, ImageDraw
|
||||||
|
img = Image.new("RGB", size, color=color)
|
||||||
|
draw = ImageDraw.Draw(img)
|
||||||
|
cx, cy = size[0] // 2, size[1] // 2
|
||||||
|
draw.text((cx - 200, cy - 8), text, fill=(0, 0, 0))
|
||||||
|
return img
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def bug_step10_fixtures():
|
||||||
|
"""Reproduit la situation act_raw_6c1432b3 sans OCR réel.
|
||||||
|
|
||||||
|
L'OCR est mocké pour retourner ce que verrait EasyOCR sur le screenshot after.
|
||||||
|
"""
|
||||||
|
before = _png_b64(_make_screenshot("Enregistrer sous"))
|
||||||
|
after = _png_b64(_make_screenshot("rpa_vision : Explorateur de fichiers"))
|
||||||
|
|
||||||
|
action = {
|
||||||
|
"type": "click",
|
||||||
|
"action_id": "act_raw_6c1432b3",
|
||||||
|
"by_text": "Enregistrer",
|
||||||
|
"target_spec": {
|
||||||
|
"by_text": "Enregistrer",
|
||||||
|
"window_title": "Enregistrer sous",
|
||||||
|
},
|
||||||
|
# Position normalisée au centre du screen (où le bouton "Enregistrer"
|
||||||
|
# était attendu d'après replay_sess_4c38dbb8.failures.jsonl)
|
||||||
|
"x_pct": 0.5289,
|
||||||
|
"y_pct": 0.7913,
|
||||||
|
}
|
||||||
|
# L'agent rapporte success=True (c'est le bug : pixel-diff legacy ne discrimine pas)
|
||||||
|
result = {
|
||||||
|
"success": True,
|
||||||
|
"actual_position": {"x_pct": 0.5289, "y_pct": 0.7913},
|
||||||
|
}
|
||||||
|
return before, after, action, result
|
||||||
|
|
||||||
|
|
||||||
|
def test_validator_detects_wrong_application_on_act_raw_6c1432b3(bug_step10_fixtures):
|
||||||
|
"""Le Validator doit retourner WRONG_APPLICATION malgré success=True client."""
|
||||||
|
from core.validation import OcrRoiChecker, Validator, Verdict, FailureCategory
|
||||||
|
|
||||||
|
before, after, action, result = bug_step10_fixtures
|
||||||
|
|
||||||
|
# OCR fake : on simule que EasyOCR lit dans la ROI le titre de la fenêtre
|
||||||
|
# active après le clic (l'Explorateur de fichiers a pris le focus).
|
||||||
|
def fake_ocr(crop):
|
||||||
|
# On suppose que la ROI 80×80 autour du clic au milieu-bas tombe
|
||||||
|
# sur la zone du texte. Pour le test, on retourne directement le
|
||||||
|
# texte qui ferait foi.
|
||||||
|
return "rpa_vision : Explorateur de fichiers"
|
||||||
|
|
||||||
|
ocr_click = OcrRoiChecker(ocr_fn=fake_ocr, radius_px=80)
|
||||||
|
# Construit le même Validator que api_stream._get_validator_v2()
|
||||||
|
validator = Validator(checkers={"click": [ocr_click]})
|
||||||
|
|
||||||
|
vr = validator.validate(
|
||||||
|
action=action,
|
||||||
|
result=result,
|
||||||
|
screenshot_before=before,
|
||||||
|
screenshot_after=after,
|
||||||
|
context={},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verdict attendu : TERMINATE / WRONG_APPLICATION (token 'explorateur de fichiers')
|
||||||
|
assert vr.verdict == Verdict.TERMINATE, (
|
||||||
|
f"Verdict attendu TERMINATE, obtenu {vr.verdict} (reasoning={vr.reasoning})"
|
||||||
|
)
|
||||||
|
assert vr.failure_category == FailureCategory.WRONG_APPLICATION
|
||||||
|
assert vr.confidence >= 0.85
|
||||||
|
assert "explorateur" in vr.reasoning.lower() or "explorateur" in vr.raw_evidence.get("roi_text", "").lower()
|
||||||
|
|
||||||
|
|
||||||
|
def test_validator_complete_when_correct_window_active(bug_step10_fixtures):
|
||||||
|
"""Sanity : si l'OCR voit bien 'Enregistrer' dans la ROI, le verdict est COMPLETE."""
|
||||||
|
from core.validation import OcrRoiChecker, Validator, Verdict
|
||||||
|
|
||||||
|
before, after_bad, action, result = bug_step10_fixtures
|
||||||
|
after_good = _png_b64(_make_screenshot("Document enregistre - Bloc-notes"))
|
||||||
|
|
||||||
|
def fake_ocr(crop):
|
||||||
|
return "Bouton Enregistrer cliqué — Bloc-notes"
|
||||||
|
|
||||||
|
validator = Validator(
|
||||||
|
checkers={"click": [OcrRoiChecker(ocr_fn=fake_ocr, radius_px=80)]},
|
||||||
|
)
|
||||||
|
vr = validator.validate(
|
||||||
|
action=action,
|
||||||
|
result=result,
|
||||||
|
screenshot_before=before,
|
||||||
|
screenshot_after=_png_b64(_make_screenshot("après save Bloc-notes")),
|
||||||
|
context={},
|
||||||
|
)
|
||||||
|
assert vr.verdict == Verdict.COMPLETE
|
||||||
|
assert vr.failure_category is None
|
||||||
249
tests/unit/test_validator_v2.py
Normal file
249
tests/unit/test_validator_v2.py
Normal file
@@ -0,0 +1,249 @@
|
|||||||
|
"""Tests unitaires du Validator V2 (P0 MVP).
|
||||||
|
|
||||||
|
Cf. SPEC_VALIDATOR_MATRICE.md, AXE_B2_DEEP_VALIDATOR.md.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import io
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.unit
|
||||||
|
|
||||||
|
|
||||||
|
def _png_b64(img) -> str:
|
||||||
|
"""Encode une image PIL en base64 (préfixé iVBOR pour _load_single_image)."""
|
||||||
|
buf = io.BytesIO()
|
||||||
|
img.save(buf, format="PNG")
|
||||||
|
return base64.b64encode(buf.getvalue()).decode("ascii")
|
||||||
|
|
||||||
|
|
||||||
|
def _make_image(text: str = "", color=(255, 255, 255), size=(800, 600)):
|
||||||
|
"""Crée une image PIL 800x600 avec du texte centré (pour OCR fake)."""
|
||||||
|
from PIL import Image, ImageDraw
|
||||||
|
img = Image.new("RGB", size, color=color)
|
||||||
|
if text:
|
||||||
|
draw = ImageDraw.Draw(img)
|
||||||
|
draw.text((size[0] // 2 - 100, size[1] // 2), text, fill=(0, 0, 0))
|
||||||
|
return img
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
# PixelDiffChecker
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class _FakeVerifierResult:
|
||||||
|
def __init__(self, suggestion, changes_detected, confidence, detail="",
|
||||||
|
change_area_pct=0.0, local_change_pct=0.0):
|
||||||
|
self.suggestion = suggestion
|
||||||
|
self.changes_detected = changes_detected
|
||||||
|
self.confidence = confidence
|
||||||
|
self.detail = detail
|
||||||
|
self.change_area_pct = change_area_pct
|
||||||
|
self.local_change_pct = local_change_pct
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeReplayVerifier:
|
||||||
|
def __init__(self, result):
|
||||||
|
self._r = result
|
||||||
|
|
||||||
|
def verify_action(self, action, result, screenshot_before, screenshot_after):
|
||||||
|
return self._r
|
||||||
|
|
||||||
|
|
||||||
|
def test_pixel_diff_complete_when_changes_detected():
|
||||||
|
from core.validation import PixelDiffChecker, Verdict
|
||||||
|
rv = _FakeReplayVerifier(_FakeVerifierResult(
|
||||||
|
suggestion="continue", changes_detected=True, confidence=0.85,
|
||||||
|
detail="pixels changés"
|
||||||
|
))
|
||||||
|
checker = PixelDiffChecker(rv)
|
||||||
|
res = checker.check({"type": "click"}, {"success": True}, "x", "y", {})
|
||||||
|
assert res.verdict == Verdict.COMPLETE
|
||||||
|
assert res.confidence == pytest.approx(0.85)
|
||||||
|
assert res.check_used == "pixel_diff"
|
||||||
|
|
||||||
|
|
||||||
|
def test_pixel_diff_continue_when_retry_suggested():
|
||||||
|
from core.validation import PixelDiffChecker, Verdict, FailureCategory
|
||||||
|
rv = _FakeReplayVerifier(_FakeVerifierResult(
|
||||||
|
suggestion="retry", changes_detected=False, confidence=0.7,
|
||||||
|
detail="aucun changement"
|
||||||
|
))
|
||||||
|
res = PixelDiffChecker(rv).check({"type": "click"}, {"success": True}, "x", "y", {})
|
||||||
|
assert res.verdict == Verdict.CONTINUE
|
||||||
|
assert res.failure_category == FailureCategory.NO_VISUAL_CHANGE
|
||||||
|
|
||||||
|
|
||||||
|
def test_pixel_diff_handles_internal_exception():
|
||||||
|
from core.validation import PixelDiffChecker, Verdict
|
||||||
|
|
||||||
|
class _BadVerifier:
|
||||||
|
def verify_action(self, **kw):
|
||||||
|
raise RuntimeError("boom")
|
||||||
|
|
||||||
|
res = PixelDiffChecker(_BadVerifier()).check(
|
||||||
|
{"type": "click"}, {"success": True}, "x", "y", {}
|
||||||
|
)
|
||||||
|
assert res.verdict == Verdict.CONTINUE
|
||||||
|
assert "boom" in res.reasoning
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
# OcrRoiChecker — avec ocr_fn injecté (pas d'EasyOCR ici)
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_ocr_roi_detects_wrong_application_suspect_token():
|
||||||
|
"""Bug step 10 : token 'edge' / 'explorateur de fichiers' dans ROI = WRONG_APPLICATION."""
|
||||||
|
from core.validation import OcrRoiChecker, Verdict, FailureCategory
|
||||||
|
img = _make_image()
|
||||||
|
img_b64 = _png_b64(img)
|
||||||
|
# OCR fake retourne un titre Explorateur de fichiers alors qu'on attendait "Enregistrer"
|
||||||
|
checker = OcrRoiChecker(ocr_fn=lambda _crop: "rpa_vision : Explorateur de fichiers")
|
||||||
|
res = checker.check(
|
||||||
|
action={"type": "click", "by_text": "Enregistrer", "x_pct": 0.5, "y_pct": 0.5},
|
||||||
|
result={"success": True, "actual_position": {"x_pct": 0.5, "y_pct": 0.5}},
|
||||||
|
screenshot_before=None,
|
||||||
|
screenshot_after=img_b64,
|
||||||
|
context={},
|
||||||
|
)
|
||||||
|
assert res.verdict == Verdict.TERMINATE
|
||||||
|
assert res.failure_category == FailureCategory.WRONG_APPLICATION
|
||||||
|
assert res.confidence >= 0.85
|
||||||
|
|
||||||
|
|
||||||
|
def test_ocr_roi_complete_when_expected_text_in_roi():
|
||||||
|
from core.validation import OcrRoiChecker, Verdict
|
||||||
|
img_b64 = _png_b64(_make_image())
|
||||||
|
checker = OcrRoiChecker(ocr_fn=lambda _: "Bouton Enregistrer actif")
|
||||||
|
res = checker.check(
|
||||||
|
action={"type": "click", "by_text": "Enregistrer", "x_pct": 0.5, "y_pct": 0.5},
|
||||||
|
result={"success": True},
|
||||||
|
screenshot_before=None,
|
||||||
|
screenshot_after=img_b64,
|
||||||
|
context={},
|
||||||
|
)
|
||||||
|
assert res.verdict == Verdict.COMPLETE
|
||||||
|
assert res.confidence >= 0.85
|
||||||
|
|
||||||
|
|
||||||
|
def test_ocr_roi_ocr_text_missing_when_no_match():
|
||||||
|
from core.validation import OcrRoiChecker, Verdict, FailureCategory
|
||||||
|
img_b64 = _png_b64(_make_image())
|
||||||
|
checker = OcrRoiChecker(ocr_fn=lambda _: "texte sans rapport")
|
||||||
|
res = checker.check(
|
||||||
|
action={"type": "click", "by_text": "Enregistrer", "x_pct": 0.5, "y_pct": 0.5},
|
||||||
|
result={"success": True},
|
||||||
|
screenshot_before=None,
|
||||||
|
screenshot_after=img_b64,
|
||||||
|
context={},
|
||||||
|
)
|
||||||
|
assert res.verdict == Verdict.CONTINUE
|
||||||
|
assert res.failure_category == FailureCategory.OCR_TEXT_MISSING
|
||||||
|
|
||||||
|
|
||||||
|
def test_ocr_roi_missing_coords_returns_continue():
|
||||||
|
from core.validation import OcrRoiChecker, Verdict
|
||||||
|
img_b64 = _png_b64(_make_image())
|
||||||
|
checker = OcrRoiChecker(ocr_fn=lambda _: "")
|
||||||
|
res = checker.check(
|
||||||
|
action={"type": "click", "by_text": "Enregistrer"}, # no coords
|
||||||
|
result={"success": True},
|
||||||
|
screenshot_before=None,
|
||||||
|
screenshot_after=img_b64,
|
||||||
|
context={},
|
||||||
|
)
|
||||||
|
assert res.verdict == Verdict.CONTINUE
|
||||||
|
assert "ROI indéfinie" in res.reasoning
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
# Validator orchestrator
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class _FakeChecker:
|
||||||
|
def __init__(self, name, verdict, conf, failure_category=None):
|
||||||
|
from core.validation.result import ValidationResult
|
||||||
|
self.name = name
|
||||||
|
self._res = ValidationResult(
|
||||||
|
verdict=verdict, confidence=conf, check_used=name, elapsed_ms=1.0,
|
||||||
|
failure_category=failure_category, reasoning=f"fake {name}",
|
||||||
|
)
|
||||||
|
|
||||||
|
def check(self, action, result, sb, sa, ctx):
|
||||||
|
return self._res
|
||||||
|
|
||||||
|
|
||||||
|
def test_validator_terminate_high_conf_short_circuits():
|
||||||
|
from core.validation import Validator, Verdict, FailureCategory
|
||||||
|
bad = _FakeChecker("ocr_roi", Verdict.TERMINATE, 0.9,
|
||||||
|
FailureCategory.WRONG_APPLICATION)
|
||||||
|
never = _FakeChecker("pixel", Verdict.COMPLETE, 0.99)
|
||||||
|
v = Validator(checkers={"click": [bad, never]})
|
||||||
|
res = v.validate({"type": "click"}, {"success": True})
|
||||||
|
assert res.verdict == Verdict.TERMINATE
|
||||||
|
assert res.failure_category == FailureCategory.WRONG_APPLICATION
|
||||||
|
assert res.check_used == "ocr_roi"
|
||||||
|
|
||||||
|
|
||||||
|
def test_validator_complete_high_conf_short_circuits():
|
||||||
|
from core.validation import Validator, Verdict
|
||||||
|
ok = _FakeChecker("ocr_roi", Verdict.COMPLETE, 0.95)
|
||||||
|
v = Validator(checkers={"click": [ok, _FakeChecker("pixel", Verdict.CONTINUE, 0.3)]})
|
||||||
|
res = v.validate({"type": "click"}, {"success": True})
|
||||||
|
assert res.verdict == Verdict.COMPLETE
|
||||||
|
assert res.check_used == "ocr_roi"
|
||||||
|
|
||||||
|
|
||||||
|
def test_validator_falls_back_to_default_checkers():
|
||||||
|
from core.validation import Validator, Verdict
|
||||||
|
d = _FakeChecker("default", Verdict.COMPLETE, 0.8)
|
||||||
|
v = Validator(checkers={}, default_checkers=[d])
|
||||||
|
res = v.validate({"type": "unknown_action"}, {"success": True})
|
||||||
|
assert res.check_used == "default"
|
||||||
|
assert res.verdict == Verdict.COMPLETE
|
||||||
|
|
||||||
|
|
||||||
|
def test_validator_no_checker_returns_neutral_continue():
|
||||||
|
from core.validation import Validator, Verdict
|
||||||
|
v = Validator(checkers={})
|
||||||
|
res = v.validate({"type": "click"}, {"success": True})
|
||||||
|
assert res.verdict == Verdict.CONTINUE
|
||||||
|
assert res.check_used == "no_checker"
|
||||||
|
|
||||||
|
|
||||||
|
def test_validator_skips_checker_that_raises():
|
||||||
|
from core.validation import Validator, Verdict
|
||||||
|
|
||||||
|
class _Boom:
|
||||||
|
name = "boom"
|
||||||
|
def check(self, *a, **kw):
|
||||||
|
raise RuntimeError("crash")
|
||||||
|
|
||||||
|
ok = _FakeChecker("ok", Verdict.COMPLETE, 0.9)
|
||||||
|
v = Validator(checkers={"click": [_Boom(), ok]})
|
||||||
|
res = v.validate({"type": "click"}, {"success": True})
|
||||||
|
assert res.check_used == "ok"
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
# Sérialisation ValidationResult
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_validation_result_to_dict_is_json_serializable():
|
||||||
|
import json
|
||||||
|
from core.validation import FailureCategory, Verdict
|
||||||
|
from core.validation.result import ValidationResult
|
||||||
|
r = ValidationResult(
|
||||||
|
verdict=Verdict.TERMINATE, confidence=0.88,
|
||||||
|
check_used="ocr_roi", elapsed_ms=42.7,
|
||||||
|
reasoning="trop long" * 5,
|
||||||
|
failure_category=FailureCategory.WRONG_APPLICATION,
|
||||||
|
raw_evidence={"roi_text": "abc"},
|
||||||
|
)
|
||||||
|
d = r.to_dict()
|
||||||
|
s = json.dumps(d) # ne doit pas lever
|
||||||
|
assert "wrong_application" in s
|
||||||
|
assert d["verdict"] == "terminate"
|
||||||
Reference in New Issue
Block a user