From 47377226f256831b0c84a8e582de5882d56a34b5 Mon Sep 17 00:00:00 2001 From: Dom Date: Fri, 29 May 2026 18:54:54 +0200 Subject: [PATCH] feat(vwb): harden supervised verdict evidence --- core/competences/verdicts.py | 32 ++++++++++++++ tests/unit/test_competence_to_vwb_preview.py | 36 ++++++++++++++++ tests/unit/test_competence_verdicts.py | 42 +++++++++++++++++++ .../frontend_v4/src/App.tsx | 2 + .../src/components/PauseDialog.tsx | 8 ++++ .../frontend_v4/src/types.ts | 1 + 6 files changed, 121 insertions(+) diff --git a/core/competences/verdicts.py b/core/competences/verdicts.py index 620f55195..eeb913df4 100644 --- a/core/competences/verdicts.py +++ b/core/competences/verdicts.py @@ -66,6 +66,13 @@ def store_competence_verdict( context_signature = _context_signature(payload.get("context_signature")) evidence = _mapping(payload.get("evidence"), field="evidence") source = _mapping(payload.get("source"), field="source") + workflow_id = ( + _optional_text(payload, "workflow_id") + or _optional_text(source, "workflow_id") + or _optional_text(evidence, "workflow_id") + or "" + ) + step_results = _step_results(payload.get("step_results")) record = { "schema_version": SCHEMA_VERSION, @@ -73,10 +80,12 @@ def store_competence_verdict( "competence_id": competence.id, "competence_source_path": competence.source_path, "learning_state": competence.learning_state, + "workflow_id": workflow_id, "verdict_kind": verdict_kind, "verdict_at": verdict_at, "verdict_by": str(payload.get("verdict_by") or "human:dom"), "context_signature": context_signature, + "step_results": step_results, "evidence": evidence, "comments": str(payload.get("comments") or ""), "source": source, @@ -125,6 +134,16 @@ def _required_text(payload: Dict[str, Any], key: str) -> str: return value.strip() +def _optional_text(payload: Dict[str, Any], key: str) -> Optional[str]: + value = payload.get(key) + if value is None: + return None + if not isinstance(value, str): + raise CompetenceVerdictError(f"{key} doit etre du texte") + text = value.strip() + return text or None + + def _validate_uuid(value: str) -> None: try: parsed = uuid.UUID(value, version=4) @@ -174,6 +193,19 @@ def _mapping(value: Any, *, field: str) -> Dict[str, Any]: return dict(value) +def _step_results(value: Any) -> list[Dict[str, Any]]: + if value is None: + return [] + if not isinstance(value, list): + raise CompetenceVerdictError("step_results doit etre une liste") + results: list[Dict[str, Any]] = [] + for item in value: + if not isinstance(item, dict): + raise CompetenceVerdictError("step_results doit contenir des objets") + results.append(dict(item)) + return results + + def _append_jsonl(log_path: Path, record: Dict[str, Any]) -> None: log_path.parent.mkdir(parents=True, exist_ok=True) with log_path.open("a", encoding="utf-8") as handle: diff --git a/tests/unit/test_competence_to_vwb_preview.py b/tests/unit/test_competence_to_vwb_preview.py index ebf9552af..7f32bc364 100644 --- a/tests/unit/test_competence_to_vwb_preview.py +++ b/tests/unit/test_competence_to_vwb_preview.py @@ -111,3 +111,39 @@ def test_preview_endpoint_returns_read_only_workflow(): "wait_for_state", "pause_for_human", ] + + +def test_unsupported_method_is_reported_as_warning(tmp_path): + root = tmp_path / "competences" + candidate = root / "candidate" + candidate.mkdir(parents=True) + (candidate / "unsupported_preview.yaml").write_text( + "\n".join( + [ + "schema_version: 1", + "id: unsupported_preview", + "name: Unsupported preview", + "learning_state: candidate", + "intent:", + " fr: tester une methode non supportee", + "methods:", + "- id: step_1_unknown", + " kind: shell_magic", + " primitive_ref: shell_magic", + " parameters: {}", + "failure_message_template:", + " attendu: aucun replay automatique", + ] + ), + encoding="utf-8", + ) + + preview = competence_yaml_to_vwb_preview("unsupported_preview", root=root) + + assert [step["action_type"] for step in preview["steps"]] == [ + "pause_for_human", + "pause_for_human", + ] + assert preview["warnings"] == [ + "Methode non supportee en preview VWB: step_1_unknown" + ] diff --git a/tests/unit/test_competence_verdicts.py b/tests/unit/test_competence_verdicts.py index 98bcee40b..6c7e11cdb 100644 --- a/tests/unit/test_competence_verdicts.py +++ b/tests/unit/test_competence_verdicts.py @@ -30,6 +30,14 @@ def _payload(**overrides): "process_name": "explorer.exe", }, }, + "workflow_id": "preview_competence_key_win_r_wait_explorer_exe", + "step_results": [ + { + "step_id": "step_1_key_combo", + "action_type": "keyboard_shortcut", + "status": "success", + } + ], "comments": "Supervised replay ok", } payload.update(overrides) @@ -52,6 +60,14 @@ def test_store_competence_verdict_appends_jsonl(tmp_path): assert record["write_back_enabled"] is False assert record["yaml_write"] is False assert record["duplicate"] is False + assert record["workflow_id"] == "preview_competence_key_win_r_wait_explorer_exe" + assert record["step_results"] == [ + { + "step_id": "step_1_key_combo", + "action_type": "keyboard_shortcut", + "status": "success", + } + ] assert record["context_signature"]["machine_id"] == "DESKTOP-58D5CAC_windows" records = iter_competence_verdicts(log_path=log_path) @@ -79,6 +95,23 @@ def test_store_competence_verdict_is_idempotent(tmp_path): assert second["comments"] == "Supervised replay ok" +def test_store_competence_verdict_rejects_same_id_for_other_competence(tmp_path): + log_path = tmp_path / "verdicts.jsonl" + + store_competence_verdict( + "key_win_r_wait_explorer_exe", + _payload(), + log_path=log_path, + ) + + with pytest.raises(CompetenceVerdictError, match="deja utilise"): + store_competence_verdict( + "key_ctrl_s_wait_notepad_exe", + _payload(), + log_path=log_path, + ) + + @pytest.mark.parametrize("kind", ["valid", "invalid", "inconclusive"]) def test_store_competence_verdict_accepts_three_kinds(tmp_path, kind): record = store_competence_verdict( @@ -115,3 +148,12 @@ def test_store_competence_verdict_rejects_yaml_write_attempt(tmp_path): assert record["write_back_enabled"] is False assert record["yaml_write"] is False + + +def test_store_competence_verdict_requires_step_results_list(tmp_path): + with pytest.raises(CompetenceVerdictError, match="step_results"): + store_competence_verdict( + "key_win_r_wait_explorer_exe", + _payload(step_results={"step_id": "not_a_list"}), + log_path=tmp_path / "verdicts.jsonl", + ) diff --git a/visual_workflow_builder/frontend_v4/src/App.tsx b/visual_workflow_builder/frontend_v4/src/App.tsx index 13a012593..825432cc5 100644 --- a/visual_workflow_builder/frontend_v4/src/App.tsx +++ b/visual_workflow_builder/frontend_v4/src/App.tsx @@ -671,6 +671,8 @@ function App() { verdictEndpoint={appState.execution.verdict_endpoint} competenceId={appState.execution.competence_id} executionId={appState.execution.id} + workflowId={appState.execution.workflow_id} + stepResults={appState.execution.step_results} onResume={async (ackIds) => { const replayId = appState.execution?.replay_id || appState.execution?.id; if (replayId) { diff --git a/visual_workflow_builder/frontend_v4/src/components/PauseDialog.tsx b/visual_workflow_builder/frontend_v4/src/components/PauseDialog.tsx index 4622b2c0c..25f757f79 100644 --- a/visual_workflow_builder/frontend_v4/src/components/PauseDialog.tsx +++ b/visual_workflow_builder/frontend_v4/src/components/PauseDialog.tsx @@ -18,6 +18,8 @@ interface Props { verdictEndpoint?: string; competenceId?: string; executionId?: string; + workflowId?: string; + stepResults?: unknown[]; onResume: (acknowledgedIds: string[]) => Promise; onCancel: () => void; } @@ -30,6 +32,8 @@ export default function PauseDialog({ verdictEndpoint, competenceId, executionId, + workflowId, + stepResults, onResume, onCancel, }: Props) { @@ -85,6 +89,8 @@ export default function PauseDialog({ verdict_id: newVerdictId(), verdict_kind: verdictKind, verdict_by: 'human:dom', + workflow_id: workflowId || '', + step_results: Array.isArray(stepResults) ? stepResults : [], context_signature: { machine_id: `browser:${window.navigator.platform || 'unknown'}`, screen_state_initial: '', @@ -92,11 +98,13 @@ export default function PauseDialog({ }, evidence: { execution_id: executionId || '', + workflow_id: workflowId || '', pause_reason: pauseReason || '', }, source: { frontend: 'vwb_v4', execution_id: executionId || '', + workflow_id: workflowId || '', }, comments: `Verdict humain VWB: ${verdictKind}`, }), diff --git a/visual_workflow_builder/frontend_v4/src/types.ts b/visual_workflow_builder/frontend_v4/src/types.ts index 0c752cfad..aadf4ed87 100644 --- a/visual_workflow_builder/frontend_v4/src/types.ts +++ b/visual_workflow_builder/frontend_v4/src/types.ts @@ -346,6 +346,7 @@ export interface Execution { failed_steps: number; total_steps: number; error_message?: string; + step_results?: unknown[]; // === QW4 — Pause supervisée (renvoyés par /replay/state quand status = paused_need_help) === pause_reason?: string; pause_message?: string;