214 lines
7.2 KiB
Python
214 lines
7.2 KiB
Python
"""Persist supervised human verdicts for Lea competences."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import uuid
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import Any, Dict, Iterable, Optional
|
|
|
|
from .catalog import DEFAULT_COMPETENCE_ROOT, REPO_ROOT
|
|
from .replay import find_competence
|
|
|
|
|
|
DEFAULT_VERDICT_LOG = REPO_ROOT / "data" / "competence_verdicts" / "verdicts.jsonl"
|
|
VALID_VERDICT_KINDS = {"valid", "invalid", "inconclusive"}
|
|
SCHEMA_VERSION = "lea_competence_verdict.v1"
|
|
|
|
|
|
class CompetenceVerdictError(ValueError):
|
|
"""Raised when a supervised verdict payload is invalid."""
|
|
|
|
|
|
def store_competence_verdict(
|
|
competence_id: str,
|
|
payload: Dict[str, Any],
|
|
*,
|
|
log_path: Path | str = DEFAULT_VERDICT_LOG,
|
|
competence_root: Path | str = DEFAULT_COMPETENCE_ROOT,
|
|
states: Optional[Iterable[str]] = None,
|
|
now: Optional[datetime] = None,
|
|
) -> Dict[str, Any]:
|
|
"""Validate and append one supervised verdict.
|
|
|
|
The function is idempotent on ``verdict_id``. If the same verdict was
|
|
already logged for the same competence, the stored record is returned with
|
|
``duplicate=True`` and the log is left untouched.
|
|
"""
|
|
|
|
if not isinstance(payload, dict):
|
|
raise CompetenceVerdictError("Payload verdict invalide")
|
|
|
|
competence = find_competence(competence_id, root=competence_root, states=states)
|
|
log = Path(log_path)
|
|
verdict_id = _required_text(payload, "verdict_id")
|
|
_validate_uuid(verdict_id)
|
|
|
|
for existing in iter_competence_verdicts(log_path=log):
|
|
if existing.get("verdict_id") != verdict_id:
|
|
continue
|
|
if existing.get("competence_id") != competence_id:
|
|
raise CompetenceVerdictError(
|
|
f"verdict_id deja utilise pour {existing.get('competence_id')}"
|
|
)
|
|
duplicate = dict(existing)
|
|
duplicate["duplicate"] = True
|
|
return duplicate
|
|
|
|
verdict_kind = _required_text(payload, "verdict_kind")
|
|
if verdict_kind not in VALID_VERDICT_KINDS:
|
|
raise CompetenceVerdictError(
|
|
"verdict_kind doit etre valid, invalid ou inconclusive"
|
|
)
|
|
|
|
verdict_at = _timestamp(payload.get("verdict_at"), now=now)
|
|
context_signature = _context_signature(payload.get("context_signature"))
|
|
evidence = _mapping(payload.get("evidence"), field="evidence")
|
|
source = _mapping(payload.get("source"), field="source")
|
|
workflow_id = (
|
|
_optional_text(payload, "workflow_id")
|
|
or _optional_text(source, "workflow_id")
|
|
or _optional_text(evidence, "workflow_id")
|
|
or ""
|
|
)
|
|
step_results = _step_results(payload.get("step_results"))
|
|
|
|
record = {
|
|
"schema_version": SCHEMA_VERSION,
|
|
"verdict_id": verdict_id,
|
|
"competence_id": competence.id,
|
|
"competence_source_path": competence.source_path,
|
|
"learning_state": competence.learning_state,
|
|
"workflow_id": workflow_id,
|
|
"verdict_kind": verdict_kind,
|
|
"verdict_at": verdict_at,
|
|
"verdict_by": str(payload.get("verdict_by") or "human:dom"),
|
|
"context_signature": context_signature,
|
|
"step_results": step_results,
|
|
"evidence": evidence,
|
|
"comments": str(payload.get("comments") or ""),
|
|
"source": source,
|
|
"write_back_enabled": False,
|
|
"yaml_write": False,
|
|
"duplicate": False,
|
|
}
|
|
|
|
_append_jsonl(log, record)
|
|
return record
|
|
|
|
|
|
def iter_competence_verdicts(
|
|
*,
|
|
log_path: Path | str = DEFAULT_VERDICT_LOG,
|
|
competence_id: Optional[str] = None,
|
|
) -> list[Dict[str, Any]]:
|
|
"""Load logged verdict records, skipping malformed historical lines."""
|
|
|
|
log = Path(log_path)
|
|
if not log.exists():
|
|
return []
|
|
|
|
records: list[Dict[str, Any]] = []
|
|
with log.open("r", encoding="utf-8") as handle:
|
|
for line in handle:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
try:
|
|
record = json.loads(line)
|
|
except json.JSONDecodeError:
|
|
continue
|
|
if not isinstance(record, dict):
|
|
continue
|
|
if competence_id and record.get("competence_id") != competence_id:
|
|
continue
|
|
records.append(record)
|
|
return records
|
|
|
|
|
|
def _required_text(payload: Dict[str, Any], key: str) -> str:
|
|
value = payload.get(key)
|
|
if not isinstance(value, str) or not value.strip():
|
|
raise CompetenceVerdictError(f"{key} requis")
|
|
return value.strip()
|
|
|
|
|
|
def _optional_text(payload: Dict[str, Any], key: str) -> Optional[str]:
|
|
value = payload.get(key)
|
|
if value is None:
|
|
return None
|
|
if not isinstance(value, str):
|
|
raise CompetenceVerdictError(f"{key} doit etre du texte")
|
|
text = value.strip()
|
|
return text or None
|
|
|
|
|
|
def _validate_uuid(value: str) -> None:
|
|
try:
|
|
parsed = uuid.UUID(value, version=4)
|
|
except ValueError as exc:
|
|
raise CompetenceVerdictError("verdict_id doit etre un UUID v4") from exc
|
|
if str(parsed) != value.lower():
|
|
raise CompetenceVerdictError("verdict_id UUID v4 invalide")
|
|
|
|
|
|
def _timestamp(value: Any, *, now: Optional[datetime]) -> str:
|
|
if value is None:
|
|
timestamp = now or datetime.now(timezone.utc)
|
|
elif isinstance(value, datetime):
|
|
timestamp = value
|
|
elif isinstance(value, str) and value.strip():
|
|
text = value.strip()
|
|
try:
|
|
parsed = datetime.fromisoformat(text.replace("Z", "+00:00"))
|
|
except ValueError as exc:
|
|
raise CompetenceVerdictError("verdict_at doit etre ISO 8601") from exc
|
|
timestamp = parsed
|
|
else:
|
|
raise CompetenceVerdictError("verdict_at doit etre ISO 8601")
|
|
|
|
if timestamp.tzinfo is None:
|
|
timestamp = timestamp.replace(tzinfo=timezone.utc)
|
|
return timestamp.astimezone(timezone.utc).isoformat()
|
|
|
|
|
|
def _context_signature(value: Any) -> Dict[str, Any]:
|
|
context = _mapping(value, field="context_signature")
|
|
machine_id = context.get("machine_id")
|
|
if not isinstance(machine_id, str) or not machine_id.strip():
|
|
raise CompetenceVerdictError("context_signature.machine_id requis")
|
|
normalized = dict(context)
|
|
normalized["machine_id"] = machine_id.strip()
|
|
normalized.setdefault("screen_state_initial", "")
|
|
normalized.setdefault("screen_state_after_action", "")
|
|
return normalized
|
|
|
|
|
|
def _mapping(value: Any, *, field: str) -> Dict[str, Any]:
|
|
if value is None:
|
|
return {}
|
|
if not isinstance(value, dict):
|
|
raise CompetenceVerdictError(f"{field} doit etre un objet")
|
|
return dict(value)
|
|
|
|
|
|
def _step_results(value: Any) -> list[Dict[str, Any]]:
|
|
if value is None:
|
|
return []
|
|
if not isinstance(value, list):
|
|
raise CompetenceVerdictError("step_results doit etre une liste")
|
|
results: list[Dict[str, Any]] = []
|
|
for item in value:
|
|
if not isinstance(item, dict):
|
|
raise CompetenceVerdictError("step_results doit contenir des objets")
|
|
results.append(dict(item))
|
|
return results
|
|
|
|
|
|
def _append_jsonl(log_path: Path, record: Dict[str, Any]) -> None:
|
|
log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
with log_path.open("a", encoding="utf-8") as handle:
|
|
handle.write(json.dumps(record, ensure_ascii=False, sort_keys=True))
|
|
handle.write("\n")
|