feat(vwb): log supervised competence verdicts
This commit is contained in:
@@ -10,12 +10,20 @@ from .replay import (
|
||||
build_competence_replay_payload,
|
||||
find_competence,
|
||||
)
|
||||
from .verdicts import (
|
||||
CompetenceVerdictError,
|
||||
iter_competence_verdicts,
|
||||
store_competence_verdict,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"CompetenceSummary",
|
||||
"CompetenceVerdictError",
|
||||
"build_competence_replay_actions",
|
||||
"build_competence_replay_payload",
|
||||
"find_competence",
|
||||
"iter_competence_verdicts",
|
||||
"load_competence_catalog_actions",
|
||||
"load_competences",
|
||||
"store_competence_verdict",
|
||||
]
|
||||
|
||||
181
core/competences/verdicts.py
Normal file
181
core/competences/verdicts.py
Normal file
@@ -0,0 +1,181 @@
|
||||
"""Persist supervised human verdicts for Lea competences."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, Optional
|
||||
|
||||
from .catalog import DEFAULT_COMPETENCE_ROOT, REPO_ROOT
|
||||
from .replay import find_competence
|
||||
|
||||
|
||||
DEFAULT_VERDICT_LOG = REPO_ROOT / "data" / "competence_verdicts" / "verdicts.jsonl"
|
||||
VALID_VERDICT_KINDS = {"valid", "invalid", "inconclusive"}
|
||||
SCHEMA_VERSION = "lea_competence_verdict.v1"
|
||||
|
||||
|
||||
class CompetenceVerdictError(ValueError):
|
||||
"""Raised when a supervised verdict payload is invalid."""
|
||||
|
||||
|
||||
def store_competence_verdict(
|
||||
competence_id: str,
|
||||
payload: Dict[str, Any],
|
||||
*,
|
||||
log_path: Path | str = DEFAULT_VERDICT_LOG,
|
||||
competence_root: Path | str = DEFAULT_COMPETENCE_ROOT,
|
||||
states: Optional[Iterable[str]] = None,
|
||||
now: Optional[datetime] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Validate and append one supervised verdict.
|
||||
|
||||
The function is idempotent on ``verdict_id``. If the same verdict was
|
||||
already logged for the same competence, the stored record is returned with
|
||||
``duplicate=True`` and the log is left untouched.
|
||||
"""
|
||||
|
||||
if not isinstance(payload, dict):
|
||||
raise CompetenceVerdictError("Payload verdict invalide")
|
||||
|
||||
competence = find_competence(competence_id, root=competence_root, states=states)
|
||||
log = Path(log_path)
|
||||
verdict_id = _required_text(payload, "verdict_id")
|
||||
_validate_uuid(verdict_id)
|
||||
|
||||
for existing in iter_competence_verdicts(log_path=log):
|
||||
if existing.get("verdict_id") != verdict_id:
|
||||
continue
|
||||
if existing.get("competence_id") != competence_id:
|
||||
raise CompetenceVerdictError(
|
||||
f"verdict_id deja utilise pour {existing.get('competence_id')}"
|
||||
)
|
||||
duplicate = dict(existing)
|
||||
duplicate["duplicate"] = True
|
||||
return duplicate
|
||||
|
||||
verdict_kind = _required_text(payload, "verdict_kind")
|
||||
if verdict_kind not in VALID_VERDICT_KINDS:
|
||||
raise CompetenceVerdictError(
|
||||
"verdict_kind doit etre valid, invalid ou inconclusive"
|
||||
)
|
||||
|
||||
verdict_at = _timestamp(payload.get("verdict_at"), now=now)
|
||||
context_signature = _context_signature(payload.get("context_signature"))
|
||||
evidence = _mapping(payload.get("evidence"), field="evidence")
|
||||
source = _mapping(payload.get("source"), field="source")
|
||||
|
||||
record = {
|
||||
"schema_version": SCHEMA_VERSION,
|
||||
"verdict_id": verdict_id,
|
||||
"competence_id": competence.id,
|
||||
"competence_source_path": competence.source_path,
|
||||
"learning_state": competence.learning_state,
|
||||
"verdict_kind": verdict_kind,
|
||||
"verdict_at": verdict_at,
|
||||
"verdict_by": str(payload.get("verdict_by") or "human:dom"),
|
||||
"context_signature": context_signature,
|
||||
"evidence": evidence,
|
||||
"comments": str(payload.get("comments") or ""),
|
||||
"source": source,
|
||||
"write_back_enabled": False,
|
||||
"yaml_write": False,
|
||||
"duplicate": False,
|
||||
}
|
||||
|
||||
_append_jsonl(log, record)
|
||||
return record
|
||||
|
||||
|
||||
def iter_competence_verdicts(
|
||||
*,
|
||||
log_path: Path | str = DEFAULT_VERDICT_LOG,
|
||||
competence_id: Optional[str] = None,
|
||||
) -> list[Dict[str, Any]]:
|
||||
"""Load logged verdict records, skipping malformed historical lines."""
|
||||
|
||||
log = Path(log_path)
|
||||
if not log.exists():
|
||||
return []
|
||||
|
||||
records: list[Dict[str, Any]] = []
|
||||
with log.open("r", encoding="utf-8") as handle:
|
||||
for line in handle:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
record = json.loads(line)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
if not isinstance(record, dict):
|
||||
continue
|
||||
if competence_id and record.get("competence_id") != competence_id:
|
||||
continue
|
||||
records.append(record)
|
||||
return records
|
||||
|
||||
|
||||
def _required_text(payload: Dict[str, Any], key: str) -> str:
|
||||
value = payload.get(key)
|
||||
if not isinstance(value, str) or not value.strip():
|
||||
raise CompetenceVerdictError(f"{key} requis")
|
||||
return value.strip()
|
||||
|
||||
|
||||
def _validate_uuid(value: str) -> None:
|
||||
try:
|
||||
parsed = uuid.UUID(value, version=4)
|
||||
except ValueError as exc:
|
||||
raise CompetenceVerdictError("verdict_id doit etre un UUID v4") from exc
|
||||
if str(parsed) != value.lower():
|
||||
raise CompetenceVerdictError("verdict_id UUID v4 invalide")
|
||||
|
||||
|
||||
def _timestamp(value: Any, *, now: Optional[datetime]) -> str:
|
||||
if value is None:
|
||||
timestamp = now or datetime.now(timezone.utc)
|
||||
elif isinstance(value, datetime):
|
||||
timestamp = value
|
||||
elif isinstance(value, str) and value.strip():
|
||||
text = value.strip()
|
||||
try:
|
||||
parsed = datetime.fromisoformat(text.replace("Z", "+00:00"))
|
||||
except ValueError as exc:
|
||||
raise CompetenceVerdictError("verdict_at doit etre ISO 8601") from exc
|
||||
timestamp = parsed
|
||||
else:
|
||||
raise CompetenceVerdictError("verdict_at doit etre ISO 8601")
|
||||
|
||||
if timestamp.tzinfo is None:
|
||||
timestamp = timestamp.replace(tzinfo=timezone.utc)
|
||||
return timestamp.astimezone(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def _context_signature(value: Any) -> Dict[str, Any]:
|
||||
context = _mapping(value, field="context_signature")
|
||||
machine_id = context.get("machine_id")
|
||||
if not isinstance(machine_id, str) or not machine_id.strip():
|
||||
raise CompetenceVerdictError("context_signature.machine_id requis")
|
||||
normalized = dict(context)
|
||||
normalized["machine_id"] = machine_id.strip()
|
||||
normalized.setdefault("screen_state_initial", "")
|
||||
normalized.setdefault("screen_state_after_action", "")
|
||||
return normalized
|
||||
|
||||
|
||||
def _mapping(value: Any, *, field: str) -> Dict[str, Any]:
|
||||
if value is None:
|
||||
return {}
|
||||
if not isinstance(value, dict):
|
||||
raise CompetenceVerdictError(f"{field} doit etre un objet")
|
||||
return dict(value)
|
||||
|
||||
|
||||
def _append_jsonl(log_path: Path, record: Dict[str, Any]) -> None:
|
||||
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with log_path.open("a", encoding="utf-8") as handle:
|
||||
handle.write(json.dumps(record, ensure_ascii=False, sort_keys=True))
|
||||
handle.write("\n")
|
||||
Reference in New Issue
Block a user