feat(vwb): harden supervised verdict evidence

This commit is contained in:
Dom
2026-05-29 18:54:54 +02:00
parent d515b22d1b
commit 47377226f2
6 changed files with 121 additions and 0 deletions

View File

@@ -66,6 +66,13 @@ def store_competence_verdict(
context_signature = _context_signature(payload.get("context_signature"))
evidence = _mapping(payload.get("evidence"), field="evidence")
source = _mapping(payload.get("source"), field="source")
workflow_id = (
_optional_text(payload, "workflow_id")
or _optional_text(source, "workflow_id")
or _optional_text(evidence, "workflow_id")
or ""
)
step_results = _step_results(payload.get("step_results"))
record = {
"schema_version": SCHEMA_VERSION,
@@ -73,10 +80,12 @@ def store_competence_verdict(
"competence_id": competence.id,
"competence_source_path": competence.source_path,
"learning_state": competence.learning_state,
"workflow_id": workflow_id,
"verdict_kind": verdict_kind,
"verdict_at": verdict_at,
"verdict_by": str(payload.get("verdict_by") or "human:dom"),
"context_signature": context_signature,
"step_results": step_results,
"evidence": evidence,
"comments": str(payload.get("comments") or ""),
"source": source,
@@ -125,6 +134,16 @@ def _required_text(payload: Dict[str, Any], key: str) -> str:
return value.strip()
def _optional_text(payload: Dict[str, Any], key: str) -> Optional[str]:
value = payload.get(key)
if value is None:
return None
if not isinstance(value, str):
raise CompetenceVerdictError(f"{key} doit etre du texte")
text = value.strip()
return text or None
def _validate_uuid(value: str) -> None:
try:
parsed = uuid.UUID(value, version=4)
@@ -174,6 +193,19 @@ def _mapping(value: Any, *, field: str) -> Dict[str, Any]:
return dict(value)
def _step_results(value: Any) -> list[Dict[str, Any]]:
if value is None:
return []
if not isinstance(value, list):
raise CompetenceVerdictError("step_results doit etre une liste")
results: list[Dict[str, Any]] = []
for item in value:
if not isinstance(item, dict):
raise CompetenceVerdictError("step_results doit contenir des objets")
results.append(dict(item))
return results
def _append_jsonl(log_path: Path, record: Dict[str, Any]) -> None:
log_path.parent.mkdir(parents=True, exist_ok=True)
with log_path.open("a", encoding="utf-8") as handle:

View File

@@ -111,3 +111,39 @@ def test_preview_endpoint_returns_read_only_workflow():
"wait_for_state",
"pause_for_human",
]
def test_unsupported_method_is_reported_as_warning(tmp_path):
root = tmp_path / "competences"
candidate = root / "candidate"
candidate.mkdir(parents=True)
(candidate / "unsupported_preview.yaml").write_text(
"\n".join(
[
"schema_version: 1",
"id: unsupported_preview",
"name: Unsupported preview",
"learning_state: candidate",
"intent:",
" fr: tester une methode non supportee",
"methods:",
"- id: step_1_unknown",
" kind: shell_magic",
" primitive_ref: shell_magic",
" parameters: {}",
"failure_message_template:",
" attendu: aucun replay automatique",
]
),
encoding="utf-8",
)
preview = competence_yaml_to_vwb_preview("unsupported_preview", root=root)
assert [step["action_type"] for step in preview["steps"]] == [
"pause_for_human",
"pause_for_human",
]
assert preview["warnings"] == [
"Methode non supportee en preview VWB: step_1_unknown"
]

View File

@@ -30,6 +30,14 @@ def _payload(**overrides):
"process_name": "explorer.exe",
},
},
"workflow_id": "preview_competence_key_win_r_wait_explorer_exe",
"step_results": [
{
"step_id": "step_1_key_combo",
"action_type": "keyboard_shortcut",
"status": "success",
}
],
"comments": "Supervised replay ok",
}
payload.update(overrides)
@@ -52,6 +60,14 @@ def test_store_competence_verdict_appends_jsonl(tmp_path):
assert record["write_back_enabled"] is False
assert record["yaml_write"] is False
assert record["duplicate"] is False
assert record["workflow_id"] == "preview_competence_key_win_r_wait_explorer_exe"
assert record["step_results"] == [
{
"step_id": "step_1_key_combo",
"action_type": "keyboard_shortcut",
"status": "success",
}
]
assert record["context_signature"]["machine_id"] == "DESKTOP-58D5CAC_windows"
records = iter_competence_verdicts(log_path=log_path)
@@ -79,6 +95,23 @@ def test_store_competence_verdict_is_idempotent(tmp_path):
assert second["comments"] == "Supervised replay ok"
def test_store_competence_verdict_rejects_same_id_for_other_competence(tmp_path):
log_path = tmp_path / "verdicts.jsonl"
store_competence_verdict(
"key_win_r_wait_explorer_exe",
_payload(),
log_path=log_path,
)
with pytest.raises(CompetenceVerdictError, match="deja utilise"):
store_competence_verdict(
"key_ctrl_s_wait_notepad_exe",
_payload(),
log_path=log_path,
)
@pytest.mark.parametrize("kind", ["valid", "invalid", "inconclusive"])
def test_store_competence_verdict_accepts_three_kinds(tmp_path, kind):
record = store_competence_verdict(
@@ -115,3 +148,12 @@ def test_store_competence_verdict_rejects_yaml_write_attempt(tmp_path):
assert record["write_back_enabled"] is False
assert record["yaml_write"] is False
def test_store_competence_verdict_requires_step_results_list(tmp_path):
with pytest.raises(CompetenceVerdictError, match="step_results"):
store_competence_verdict(
"key_win_r_wait_explorer_exe",
_payload(step_results={"step_id": "not_a_list"}),
log_path=tmp_path / "verdicts.jsonl",
)

View File

@@ -671,6 +671,8 @@ function App() {
verdictEndpoint={appState.execution.verdict_endpoint}
competenceId={appState.execution.competence_id}
executionId={appState.execution.id}
workflowId={appState.execution.workflow_id}
stepResults={appState.execution.step_results}
onResume={async (ackIds) => {
const replayId = appState.execution?.replay_id || appState.execution?.id;
if (replayId) {

View File

@@ -18,6 +18,8 @@ interface Props {
verdictEndpoint?: string;
competenceId?: string;
executionId?: string;
workflowId?: string;
stepResults?: unknown[];
onResume: (acknowledgedIds: string[]) => Promise<void>;
onCancel: () => void;
}
@@ -30,6 +32,8 @@ export default function PauseDialog({
verdictEndpoint,
competenceId,
executionId,
workflowId,
stepResults,
onResume,
onCancel,
}: Props) {
@@ -85,6 +89,8 @@ export default function PauseDialog({
verdict_id: newVerdictId(),
verdict_kind: verdictKind,
verdict_by: 'human:dom',
workflow_id: workflowId || '',
step_results: Array.isArray(stepResults) ? stepResults : [],
context_signature: {
machine_id: `browser:${window.navigator.platform || 'unknown'}`,
screen_state_initial: '',
@@ -92,11 +98,13 @@ export default function PauseDialog({
},
evidence: {
execution_id: executionId || '',
workflow_id: workflowId || '',
pause_reason: pauseReason || '',
},
source: {
frontend: 'vwb_v4',
execution_id: executionId || '',
workflow_id: workflowId || '',
},
comments: `Verdict humain VWB: ${verdictKind}`,
}),

View File

@@ -346,6 +346,7 @@ export interface Execution {
failed_steps: number;
total_steps: number;
error_message?: string;
step_results?: unknown[];
// === QW4 — Pause supervisée (renvoyés par /replay/state quand status = paused_need_help) ===
pause_reason?: string;
pause_message?: string;