feat(vwb): harden supervised verdict evidence
This commit is contained in:
@@ -66,6 +66,13 @@ def store_competence_verdict(
|
||||
context_signature = _context_signature(payload.get("context_signature"))
|
||||
evidence = _mapping(payload.get("evidence"), field="evidence")
|
||||
source = _mapping(payload.get("source"), field="source")
|
||||
workflow_id = (
|
||||
_optional_text(payload, "workflow_id")
|
||||
or _optional_text(source, "workflow_id")
|
||||
or _optional_text(evidence, "workflow_id")
|
||||
or ""
|
||||
)
|
||||
step_results = _step_results(payload.get("step_results"))
|
||||
|
||||
record = {
|
||||
"schema_version": SCHEMA_VERSION,
|
||||
@@ -73,10 +80,12 @@ def store_competence_verdict(
|
||||
"competence_id": competence.id,
|
||||
"competence_source_path": competence.source_path,
|
||||
"learning_state": competence.learning_state,
|
||||
"workflow_id": workflow_id,
|
||||
"verdict_kind": verdict_kind,
|
||||
"verdict_at": verdict_at,
|
||||
"verdict_by": str(payload.get("verdict_by") or "human:dom"),
|
||||
"context_signature": context_signature,
|
||||
"step_results": step_results,
|
||||
"evidence": evidence,
|
||||
"comments": str(payload.get("comments") or ""),
|
||||
"source": source,
|
||||
@@ -125,6 +134,16 @@ def _required_text(payload: Dict[str, Any], key: str) -> str:
|
||||
return value.strip()
|
||||
|
||||
|
||||
def _optional_text(payload: Dict[str, Any], key: str) -> Optional[str]:
|
||||
value = payload.get(key)
|
||||
if value is None:
|
||||
return None
|
||||
if not isinstance(value, str):
|
||||
raise CompetenceVerdictError(f"{key} doit etre du texte")
|
||||
text = value.strip()
|
||||
return text or None
|
||||
|
||||
|
||||
def _validate_uuid(value: str) -> None:
|
||||
try:
|
||||
parsed = uuid.UUID(value, version=4)
|
||||
@@ -174,6 +193,19 @@ def _mapping(value: Any, *, field: str) -> Dict[str, Any]:
|
||||
return dict(value)
|
||||
|
||||
|
||||
def _step_results(value: Any) -> list[Dict[str, Any]]:
|
||||
if value is None:
|
||||
return []
|
||||
if not isinstance(value, list):
|
||||
raise CompetenceVerdictError("step_results doit etre une liste")
|
||||
results: list[Dict[str, Any]] = []
|
||||
for item in value:
|
||||
if not isinstance(item, dict):
|
||||
raise CompetenceVerdictError("step_results doit contenir des objets")
|
||||
results.append(dict(item))
|
||||
return results
|
||||
|
||||
|
||||
def _append_jsonl(log_path: Path, record: Dict[str, Any]) -> None:
|
||||
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with log_path.open("a", encoding="utf-8") as handle:
|
||||
|
||||
@@ -111,3 +111,39 @@ def test_preview_endpoint_returns_read_only_workflow():
|
||||
"wait_for_state",
|
||||
"pause_for_human",
|
||||
]
|
||||
|
||||
|
||||
def test_unsupported_method_is_reported_as_warning(tmp_path):
|
||||
root = tmp_path / "competences"
|
||||
candidate = root / "candidate"
|
||||
candidate.mkdir(parents=True)
|
||||
(candidate / "unsupported_preview.yaml").write_text(
|
||||
"\n".join(
|
||||
[
|
||||
"schema_version: 1",
|
||||
"id: unsupported_preview",
|
||||
"name: Unsupported preview",
|
||||
"learning_state: candidate",
|
||||
"intent:",
|
||||
" fr: tester une methode non supportee",
|
||||
"methods:",
|
||||
"- id: step_1_unknown",
|
||||
" kind: shell_magic",
|
||||
" primitive_ref: shell_magic",
|
||||
" parameters: {}",
|
||||
"failure_message_template:",
|
||||
" attendu: aucun replay automatique",
|
||||
]
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
preview = competence_yaml_to_vwb_preview("unsupported_preview", root=root)
|
||||
|
||||
assert [step["action_type"] for step in preview["steps"]] == [
|
||||
"pause_for_human",
|
||||
"pause_for_human",
|
||||
]
|
||||
assert preview["warnings"] == [
|
||||
"Methode non supportee en preview VWB: step_1_unknown"
|
||||
]
|
||||
|
||||
@@ -30,6 +30,14 @@ def _payload(**overrides):
|
||||
"process_name": "explorer.exe",
|
||||
},
|
||||
},
|
||||
"workflow_id": "preview_competence_key_win_r_wait_explorer_exe",
|
||||
"step_results": [
|
||||
{
|
||||
"step_id": "step_1_key_combo",
|
||||
"action_type": "keyboard_shortcut",
|
||||
"status": "success",
|
||||
}
|
||||
],
|
||||
"comments": "Supervised replay ok",
|
||||
}
|
||||
payload.update(overrides)
|
||||
@@ -52,6 +60,14 @@ def test_store_competence_verdict_appends_jsonl(tmp_path):
|
||||
assert record["write_back_enabled"] is False
|
||||
assert record["yaml_write"] is False
|
||||
assert record["duplicate"] is False
|
||||
assert record["workflow_id"] == "preview_competence_key_win_r_wait_explorer_exe"
|
||||
assert record["step_results"] == [
|
||||
{
|
||||
"step_id": "step_1_key_combo",
|
||||
"action_type": "keyboard_shortcut",
|
||||
"status": "success",
|
||||
}
|
||||
]
|
||||
assert record["context_signature"]["machine_id"] == "DESKTOP-58D5CAC_windows"
|
||||
|
||||
records = iter_competence_verdicts(log_path=log_path)
|
||||
@@ -79,6 +95,23 @@ def test_store_competence_verdict_is_idempotent(tmp_path):
|
||||
assert second["comments"] == "Supervised replay ok"
|
||||
|
||||
|
||||
def test_store_competence_verdict_rejects_same_id_for_other_competence(tmp_path):
|
||||
log_path = tmp_path / "verdicts.jsonl"
|
||||
|
||||
store_competence_verdict(
|
||||
"key_win_r_wait_explorer_exe",
|
||||
_payload(),
|
||||
log_path=log_path,
|
||||
)
|
||||
|
||||
with pytest.raises(CompetenceVerdictError, match="deja utilise"):
|
||||
store_competence_verdict(
|
||||
"key_ctrl_s_wait_notepad_exe",
|
||||
_payload(),
|
||||
log_path=log_path,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("kind", ["valid", "invalid", "inconclusive"])
|
||||
def test_store_competence_verdict_accepts_three_kinds(tmp_path, kind):
|
||||
record = store_competence_verdict(
|
||||
@@ -115,3 +148,12 @@ def test_store_competence_verdict_rejects_yaml_write_attempt(tmp_path):
|
||||
|
||||
assert record["write_back_enabled"] is False
|
||||
assert record["yaml_write"] is False
|
||||
|
||||
|
||||
def test_store_competence_verdict_requires_step_results_list(tmp_path):
|
||||
with pytest.raises(CompetenceVerdictError, match="step_results"):
|
||||
store_competence_verdict(
|
||||
"key_win_r_wait_explorer_exe",
|
||||
_payload(step_results={"step_id": "not_a_list"}),
|
||||
log_path=tmp_path / "verdicts.jsonl",
|
||||
)
|
||||
|
||||
@@ -671,6 +671,8 @@ function App() {
|
||||
verdictEndpoint={appState.execution.verdict_endpoint}
|
||||
competenceId={appState.execution.competence_id}
|
||||
executionId={appState.execution.id}
|
||||
workflowId={appState.execution.workflow_id}
|
||||
stepResults={appState.execution.step_results}
|
||||
onResume={async (ackIds) => {
|
||||
const replayId = appState.execution?.replay_id || appState.execution?.id;
|
||||
if (replayId) {
|
||||
|
||||
@@ -18,6 +18,8 @@ interface Props {
|
||||
verdictEndpoint?: string;
|
||||
competenceId?: string;
|
||||
executionId?: string;
|
||||
workflowId?: string;
|
||||
stepResults?: unknown[];
|
||||
onResume: (acknowledgedIds: string[]) => Promise<void>;
|
||||
onCancel: () => void;
|
||||
}
|
||||
@@ -30,6 +32,8 @@ export default function PauseDialog({
|
||||
verdictEndpoint,
|
||||
competenceId,
|
||||
executionId,
|
||||
workflowId,
|
||||
stepResults,
|
||||
onResume,
|
||||
onCancel,
|
||||
}: Props) {
|
||||
@@ -85,6 +89,8 @@ export default function PauseDialog({
|
||||
verdict_id: newVerdictId(),
|
||||
verdict_kind: verdictKind,
|
||||
verdict_by: 'human:dom',
|
||||
workflow_id: workflowId || '',
|
||||
step_results: Array.isArray(stepResults) ? stepResults : [],
|
||||
context_signature: {
|
||||
machine_id: `browser:${window.navigator.platform || 'unknown'}`,
|
||||
screen_state_initial: '',
|
||||
@@ -92,11 +98,13 @@ export default function PauseDialog({
|
||||
},
|
||||
evidence: {
|
||||
execution_id: executionId || '',
|
||||
workflow_id: workflowId || '',
|
||||
pause_reason: pauseReason || '',
|
||||
},
|
||||
source: {
|
||||
frontend: 'vwb_v4',
|
||||
execution_id: executionId || '',
|
||||
workflow_id: workflowId || '',
|
||||
},
|
||||
comments: `Verdict humain VWB: ${verdictKind}`,
|
||||
}),
|
||||
|
||||
@@ -346,6 +346,7 @@ export interface Execution {
|
||||
failed_steps: number;
|
||||
total_steps: number;
|
||||
error_message?: string;
|
||||
step_results?: unknown[];
|
||||
// === QW4 — Pause supervisée (renvoyés par /replay/state quand status = paused_need_help) ===
|
||||
pause_reason?: string;
|
||||
pause_message?: string;
|
||||
|
||||
Reference in New Issue
Block a user