"""Persist supervised human verdicts for Lea competences.""" from __future__ import annotations import json import uuid from datetime import datetime, timezone from pathlib import Path from typing import Any, Dict, Iterable, Optional from .catalog import DEFAULT_COMPETENCE_ROOT, REPO_ROOT from .replay import find_competence DEFAULT_VERDICT_LOG = REPO_ROOT / "data" / "competence_verdicts" / "verdicts.jsonl" VALID_VERDICT_KINDS = {"valid", "invalid", "inconclusive"} SCHEMA_VERSION = "lea_competence_verdict.v1" class CompetenceVerdictError(ValueError): """Raised when a supervised verdict payload is invalid.""" def store_competence_verdict( competence_id: str, payload: Dict[str, Any], *, log_path: Path | str = DEFAULT_VERDICT_LOG, competence_root: Path | str = DEFAULT_COMPETENCE_ROOT, states: Optional[Iterable[str]] = None, now: Optional[datetime] = None, ) -> Dict[str, Any]: """Validate and append one supervised verdict. The function is idempotent on ``verdict_id``. If the same verdict was already logged for the same competence, the stored record is returned with ``duplicate=True`` and the log is left untouched. """ if not isinstance(payload, dict): raise CompetenceVerdictError("Payload verdict invalide") competence = find_competence(competence_id, root=competence_root, states=states) log = Path(log_path) verdict_id = _required_text(payload, "verdict_id") _validate_uuid(verdict_id) for existing in iter_competence_verdicts(log_path=log): if existing.get("verdict_id") != verdict_id: continue if existing.get("competence_id") != competence_id: raise CompetenceVerdictError( f"verdict_id deja utilise pour {existing.get('competence_id')}" ) duplicate = dict(existing) duplicate["duplicate"] = True return duplicate verdict_kind = _required_text(payload, "verdict_kind") if verdict_kind not in VALID_VERDICT_KINDS: raise CompetenceVerdictError( "verdict_kind doit etre valid, invalid ou inconclusive" ) verdict_at = _timestamp(payload.get("verdict_at"), now=now) context_signature = _context_signature(payload.get("context_signature")) evidence = _mapping(payload.get("evidence"), field="evidence") source = _mapping(payload.get("source"), field="source") workflow_id = ( _optional_text(payload, "workflow_id") or _optional_text(source, "workflow_id") or _optional_text(evidence, "workflow_id") or "" ) step_results = _step_results(payload.get("step_results")) record = { "schema_version": SCHEMA_VERSION, "verdict_id": verdict_id, "competence_id": competence.id, "competence_source_path": competence.source_path, "learning_state": competence.learning_state, "workflow_id": workflow_id, "verdict_kind": verdict_kind, "verdict_at": verdict_at, "verdict_by": str(payload.get("verdict_by") or "human:dom"), "context_signature": context_signature, "step_results": step_results, "evidence": evidence, "comments": str(payload.get("comments") or ""), "source": source, "write_back_enabled": False, "yaml_write": False, "duplicate": False, } _append_jsonl(log, record) return record def iter_competence_verdicts( *, log_path: Path | str = DEFAULT_VERDICT_LOG, competence_id: Optional[str] = None, ) -> list[Dict[str, Any]]: """Load logged verdict records, skipping malformed historical lines.""" log = Path(log_path) if not log.exists(): return [] records: list[Dict[str, Any]] = [] with log.open("r", encoding="utf-8") as handle: for line in handle: line = line.strip() if not line: continue try: record = json.loads(line) except json.JSONDecodeError: continue if not isinstance(record, dict): continue if competence_id and record.get("competence_id") != competence_id: continue records.append(record) return records def _required_text(payload: Dict[str, Any], key: str) -> str: value = payload.get(key) if not isinstance(value, str) or not value.strip(): raise CompetenceVerdictError(f"{key} requis") return value.strip() def _optional_text(payload: Dict[str, Any], key: str) -> Optional[str]: value = payload.get(key) if value is None: return None if not isinstance(value, str): raise CompetenceVerdictError(f"{key} doit etre du texte") text = value.strip() return text or None def _validate_uuid(value: str) -> None: try: parsed = uuid.UUID(value, version=4) except ValueError as exc: raise CompetenceVerdictError("verdict_id doit etre un UUID v4") from exc if str(parsed) != value.lower(): raise CompetenceVerdictError("verdict_id UUID v4 invalide") def _timestamp(value: Any, *, now: Optional[datetime]) -> str: if value is None: timestamp = now or datetime.now(timezone.utc) elif isinstance(value, datetime): timestamp = value elif isinstance(value, str) and value.strip(): text = value.strip() try: parsed = datetime.fromisoformat(text.replace("Z", "+00:00")) except ValueError as exc: raise CompetenceVerdictError("verdict_at doit etre ISO 8601") from exc timestamp = parsed else: raise CompetenceVerdictError("verdict_at doit etre ISO 8601") if timestamp.tzinfo is None: timestamp = timestamp.replace(tzinfo=timezone.utc) return timestamp.astimezone(timezone.utc).isoformat() def _context_signature(value: Any) -> Dict[str, Any]: context = _mapping(value, field="context_signature") machine_id = context.get("machine_id") if not isinstance(machine_id, str) or not machine_id.strip(): raise CompetenceVerdictError("context_signature.machine_id requis") normalized = dict(context) normalized["machine_id"] = machine_id.strip() normalized.setdefault("screen_state_initial", "") normalized.setdefault("screen_state_after_action", "") return normalized def _mapping(value: Any, *, field: str) -> Dict[str, Any]: if value is None: return {} if not isinstance(value, dict): raise CompetenceVerdictError(f"{field} doit etre un objet") return dict(value) def _step_results(value: Any) -> list[Dict[str, Any]]: if value is None: return [] if not isinstance(value, list): raise CompetenceVerdictError("step_results doit etre une liste") results: list[Dict[str, Any]] = [] for item in value: if not isinstance(item, dict): raise CompetenceVerdictError("step_results doit contenir des objets") results.append(dict(item)) return results def _append_jsonl(log_path: Path, record: Dict[str, Any]) -> None: log_path.parent.mkdir(parents=True, exist_ok=True) with log_path.open("a", encoding="utf-8") as handle: handle.write(json.dumps(record, ensure_ascii=False, sort_keys=True)) handle.write("\n")