rpa_vision_v3/core/competences/verdicts.py

"""Persist supervised human verdicts for Lea competences."""

from __future__ import annotations

import json
import uuid
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, Iterable, Optional

from .catalog import DEFAULT_COMPETENCE_ROOT, REPO_ROOT
from .replay import find_competence


DEFAULT_VERDICT_LOG = REPO_ROOT / "data" / "competence_verdicts" / "verdicts.jsonl"
VALID_VERDICT_KINDS = {"valid", "invalid", "inconclusive"}
SCHEMA_VERSION = "lea_competence_verdict.v1"


class CompetenceVerdictError(ValueError):
    """Raised when a supervised verdict payload is invalid."""


def store_competence_verdict(
    competence_id: str,
    payload: Dict[str, Any],
    *,
    log_path: Path | str = DEFAULT_VERDICT_LOG,
    competence_root: Path | str = DEFAULT_COMPETENCE_ROOT,
    states: Optional[Iterable[str]] = None,
    now: Optional[datetime] = None,
) -> Dict[str, Any]:
    """Validate and append one supervised verdict.

    The function is idempotent on ``verdict_id``. If the same verdict was
    already logged for the same competence, the stored record is returned with
    ``duplicate=True`` and the log is left untouched.
    """

    if not isinstance(payload, dict):
        raise CompetenceVerdictError("Payload verdict invalide")

    competence = find_competence(competence_id, root=competence_root, states=states)
    log = Path(log_path)
    verdict_id = _required_text(payload, "verdict_id")
    _validate_uuid(verdict_id)

    for existing in iter_competence_verdicts(log_path=log):
        if existing.get("verdict_id") != verdict_id:
            continue
        if existing.get("competence_id") != competence_id:
            raise CompetenceVerdictError(
                f"verdict_id deja utilise pour {existing.get('competence_id')}"
            )
        duplicate = dict(existing)
        duplicate["duplicate"] = True
        return duplicate

    verdict_kind = _required_text(payload, "verdict_kind")
    if verdict_kind not in VALID_VERDICT_KINDS:
        raise CompetenceVerdictError(
            "verdict_kind doit etre valid, invalid ou inconclusive"
        )

    verdict_at = _timestamp(payload.get("verdict_at"), now=now)
    context_signature = _context_signature(payload.get("context_signature"))
    evidence = _mapping(payload.get("evidence"), field="evidence")
    source = _mapping(payload.get("source"), field="source")
    workflow_id = (
        _optional_text(payload, "workflow_id")
        or _optional_text(source, "workflow_id")
        or _optional_text(evidence, "workflow_id")
        or ""
    )
    step_results = _step_results(payload.get("step_results"))

    record = {
        "schema_version": SCHEMA_VERSION,
        "verdict_id": verdict_id,
        "competence_id": competence.id,
        "competence_source_path": competence.source_path,
        "learning_state": competence.learning_state,
        "workflow_id": workflow_id,
        "verdict_kind": verdict_kind,
        "verdict_at": verdict_at,
        "verdict_by": str(payload.get("verdict_by") or "human:dom"),
        "context_signature": context_signature,
        "step_results": step_results,
        "evidence": evidence,
        "comments": str(payload.get("comments") or ""),
        "source": source,
        "write_back_enabled": False,
        "yaml_write": False,
        "duplicate": False,
    }

    _append_jsonl(log, record)
    return record


def iter_competence_verdicts(
    *,
    log_path: Path | str = DEFAULT_VERDICT_LOG,
    competence_id: Optional[str] = None,
) -> list[Dict[str, Any]]:
    """Load logged verdict records, skipping malformed historical lines."""

    log = Path(log_path)
    if not log.exists():
        return []

    records: list[Dict[str, Any]] = []
    with log.open("r", encoding="utf-8") as handle:
        for line in handle:
            line = line.strip()
            if not line:
                continue
            try:
                record = json.loads(line)
            except json.JSONDecodeError:
                continue
            if not isinstance(record, dict):
                continue
            if competence_id and record.get("competence_id") != competence_id:
                continue
            records.append(record)
    return records


def _required_text(payload: Dict[str, Any], key: str) -> str:
    value = payload.get(key)
    if not isinstance(value, str) or not value.strip():
        raise CompetenceVerdictError(f"{key} requis")
    return value.strip()


def _optional_text(payload: Dict[str, Any], key: str) -> Optional[str]:
    value = payload.get(key)
    if value is None:
        return None
    if not isinstance(value, str):
        raise CompetenceVerdictError(f"{key} doit etre du texte")
    text = value.strip()
    return text or None


def _validate_uuid(value: str) -> None:
    try:
        parsed = uuid.UUID(value, version=4)
    except ValueError as exc:
        raise CompetenceVerdictError("verdict_id doit etre un UUID v4") from exc
    if str(parsed) != value.lower():
        raise CompetenceVerdictError("verdict_id UUID v4 invalide")


def _timestamp(value: Any, *, now: Optional[datetime]) -> str:
    if value is None:
        timestamp = now or datetime.now(timezone.utc)
    elif isinstance(value, datetime):
        timestamp = value
    elif isinstance(value, str) and value.strip():
        text = value.strip()
        try:
            parsed = datetime.fromisoformat(text.replace("Z", "+00:00"))
        except ValueError as exc:
            raise CompetenceVerdictError("verdict_at doit etre ISO 8601") from exc
        timestamp = parsed
    else:
        raise CompetenceVerdictError("verdict_at doit etre ISO 8601")

    if timestamp.tzinfo is None:
        timestamp = timestamp.replace(tzinfo=timezone.utc)
    return timestamp.astimezone(timezone.utc).isoformat()


def _context_signature(value: Any) -> Dict[str, Any]:
    context = _mapping(value, field="context_signature")
    machine_id = context.get("machine_id")
    if not isinstance(machine_id, str) or not machine_id.strip():
        raise CompetenceVerdictError("context_signature.machine_id requis")
    normalized = dict(context)
    normalized["machine_id"] = machine_id.strip()
    normalized.setdefault("screen_state_initial", "")
    normalized.setdefault("screen_state_after_action", "")
    return normalized


def _mapping(value: Any, *, field: str) -> Dict[str, Any]:
    if value is None:
        return {}
    if not isinstance(value, dict):
        raise CompetenceVerdictError(f"{field} doit etre un objet")
    return dict(value)


def _step_results(value: Any) -> list[Dict[str, Any]]:
    if value is None:
        return []
    if not isinstance(value, list):
        raise CompetenceVerdictError("step_results doit etre une liste")
    results: list[Dict[str, Any]] = []
    for item in value:
        if not isinstance(item, dict):
            raise CompetenceVerdictError("step_results doit contenir des objets")
        results.append(dict(item))
    return results


def _append_jsonl(log_path: Path, record: Dict[str, Any]) -> None:
    log_path.parent.mkdir(parents=True, exist_ok=True)
    with log_path.open("a", encoding="utf-8") as handle:
        handle.write(json.dumps(record, ensure_ascii=False, sort_keys=True))
        handle.write("\n")