Files
rpa_vision_v3/core/competences/verdicts.py

214 lines
7.2 KiB
Python

"""Persist supervised human verdicts for Lea competences."""
from __future__ import annotations
import json
import uuid
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, Iterable, Optional
from .catalog import DEFAULT_COMPETENCE_ROOT, REPO_ROOT
from .replay import find_competence
DEFAULT_VERDICT_LOG = REPO_ROOT / "data" / "competence_verdicts" / "verdicts.jsonl"
VALID_VERDICT_KINDS = {"valid", "invalid", "inconclusive"}
SCHEMA_VERSION = "lea_competence_verdict.v1"
class CompetenceVerdictError(ValueError):
"""Raised when a supervised verdict payload is invalid."""
def store_competence_verdict(
competence_id: str,
payload: Dict[str, Any],
*,
log_path: Path | str = DEFAULT_VERDICT_LOG,
competence_root: Path | str = DEFAULT_COMPETENCE_ROOT,
states: Optional[Iterable[str]] = None,
now: Optional[datetime] = None,
) -> Dict[str, Any]:
"""Validate and append one supervised verdict.
The function is idempotent on ``verdict_id``. If the same verdict was
already logged for the same competence, the stored record is returned with
``duplicate=True`` and the log is left untouched.
"""
if not isinstance(payload, dict):
raise CompetenceVerdictError("Payload verdict invalide")
competence = find_competence(competence_id, root=competence_root, states=states)
log = Path(log_path)
verdict_id = _required_text(payload, "verdict_id")
_validate_uuid(verdict_id)
for existing in iter_competence_verdicts(log_path=log):
if existing.get("verdict_id") != verdict_id:
continue
if existing.get("competence_id") != competence_id:
raise CompetenceVerdictError(
f"verdict_id deja utilise pour {existing.get('competence_id')}"
)
duplicate = dict(existing)
duplicate["duplicate"] = True
return duplicate
verdict_kind = _required_text(payload, "verdict_kind")
if verdict_kind not in VALID_VERDICT_KINDS:
raise CompetenceVerdictError(
"verdict_kind doit etre valid, invalid ou inconclusive"
)
verdict_at = _timestamp(payload.get("verdict_at"), now=now)
context_signature = _context_signature(payload.get("context_signature"))
evidence = _mapping(payload.get("evidence"), field="evidence")
source = _mapping(payload.get("source"), field="source")
workflow_id = (
_optional_text(payload, "workflow_id")
or _optional_text(source, "workflow_id")
or _optional_text(evidence, "workflow_id")
or ""
)
step_results = _step_results(payload.get("step_results"))
record = {
"schema_version": SCHEMA_VERSION,
"verdict_id": verdict_id,
"competence_id": competence.id,
"competence_source_path": competence.source_path,
"learning_state": competence.learning_state,
"workflow_id": workflow_id,
"verdict_kind": verdict_kind,
"verdict_at": verdict_at,
"verdict_by": str(payload.get("verdict_by") or "human:dom"),
"context_signature": context_signature,
"step_results": step_results,
"evidence": evidence,
"comments": str(payload.get("comments") or ""),
"source": source,
"write_back_enabled": False,
"yaml_write": False,
"duplicate": False,
}
_append_jsonl(log, record)
return record
def iter_competence_verdicts(
*,
log_path: Path | str = DEFAULT_VERDICT_LOG,
competence_id: Optional[str] = None,
) -> list[Dict[str, Any]]:
"""Load logged verdict records, skipping malformed historical lines."""
log = Path(log_path)
if not log.exists():
return []
records: list[Dict[str, Any]] = []
with log.open("r", encoding="utf-8") as handle:
for line in handle:
line = line.strip()
if not line:
continue
try:
record = json.loads(line)
except json.JSONDecodeError:
continue
if not isinstance(record, dict):
continue
if competence_id and record.get("competence_id") != competence_id:
continue
records.append(record)
return records
def _required_text(payload: Dict[str, Any], key: str) -> str:
value = payload.get(key)
if not isinstance(value, str) or not value.strip():
raise CompetenceVerdictError(f"{key} requis")
return value.strip()
def _optional_text(payload: Dict[str, Any], key: str) -> Optional[str]:
value = payload.get(key)
if value is None:
return None
if not isinstance(value, str):
raise CompetenceVerdictError(f"{key} doit etre du texte")
text = value.strip()
return text or None
def _validate_uuid(value: str) -> None:
try:
parsed = uuid.UUID(value, version=4)
except ValueError as exc:
raise CompetenceVerdictError("verdict_id doit etre un UUID v4") from exc
if str(parsed) != value.lower():
raise CompetenceVerdictError("verdict_id UUID v4 invalide")
def _timestamp(value: Any, *, now: Optional[datetime]) -> str:
if value is None:
timestamp = now or datetime.now(timezone.utc)
elif isinstance(value, datetime):
timestamp = value
elif isinstance(value, str) and value.strip():
text = value.strip()
try:
parsed = datetime.fromisoformat(text.replace("Z", "+00:00"))
except ValueError as exc:
raise CompetenceVerdictError("verdict_at doit etre ISO 8601") from exc
timestamp = parsed
else:
raise CompetenceVerdictError("verdict_at doit etre ISO 8601")
if timestamp.tzinfo is None:
timestamp = timestamp.replace(tzinfo=timezone.utc)
return timestamp.astimezone(timezone.utc).isoformat()
def _context_signature(value: Any) -> Dict[str, Any]:
context = _mapping(value, field="context_signature")
machine_id = context.get("machine_id")
if not isinstance(machine_id, str) or not machine_id.strip():
raise CompetenceVerdictError("context_signature.machine_id requis")
normalized = dict(context)
normalized["machine_id"] = machine_id.strip()
normalized.setdefault("screen_state_initial", "")
normalized.setdefault("screen_state_after_action", "")
return normalized
def _mapping(value: Any, *, field: str) -> Dict[str, Any]:
if value is None:
return {}
if not isinstance(value, dict):
raise CompetenceVerdictError(f"{field} doit etre un objet")
return dict(value)
def _step_results(value: Any) -> list[Dict[str, Any]]:
if value is None:
return []
if not isinstance(value, list):
raise CompetenceVerdictError("step_results doit etre une liste")
results: list[Dict[str, Any]] = []
for item in value:
if not isinstance(item, dict):
raise CompetenceVerdictError("step_results doit contenir des objets")
results.append(dict(item))
return results
def _append_jsonl(log_path: Path, record: Dict[str, Any]) -> None:
log_path.parent.mkdir(parents=True, exist_ok=True)
with log_path.open("a", encoding="utf-8") as handle:
handle.write(json.dumps(record, ensure_ascii=False, sort_keys=True))
handle.write("\n")