feat(vwb): log supervised competence verdicts

This commit is contained in:
Dom
2026-05-29 18:36:06 +02:00
parent 7ad260d02f
commit aba849324a
18 changed files with 1082 additions and 5 deletions

View File

@@ -16,6 +16,9 @@ def test_competence_to_vwb_preview_key_win_r_steps():
assert preview["readonly"] is True
assert preview["write_back_enabled"] is False
assert preview["workflow"]["competence_id"] == "key_win_r_wait_explorer_exe"
assert preview["workflow"]["verdict_endpoint"] == (
"/api/v1/lea/competences/key_win_r_wait_explorer_exe/verdict"
)
assert _by_type(preview) == [
"pause_for_human",
"keyboard_shortcut",
@@ -47,6 +50,8 @@ def test_wait_for_state_expected_state_is_preserved():
assert params["timeout_ms"] == 5000
assert params["poll_interval_ms"] == 250
assert params["evidence_required"] == "window_or_process"
assert params["supervised_popup_detection"] is True
assert params["popup_policy"] == "pause_only"
def test_pause_for_human_before_and_after_are_supervision_only():
@@ -59,6 +64,9 @@ def test_pause_for_human_before_and_after_are_supervision_only():
assert after["parameters"]["phase"] == "after"
assert after["parameters"]["verdict_required"] is True
assert after["parameters"]["write_back_enabled"] is False
assert after["parameters"]["verdict_endpoint"] == (
"/api/v1/lea/competences/key_win_r_wait_explorer_exe/verdict"
)
def test_adapter_is_generic_on_methods_not_hardcoded_to_win_r():

View File

@@ -0,0 +1,117 @@
from datetime import datetime, timezone
import pytest
from core.competences.verdicts import (
CompetenceVerdictError,
iter_competence_verdicts,
store_competence_verdict,
)
VERDICT_ID = "123e4567-e89b-42d3-a456-426614174000"
def _payload(**overrides):
payload = {
"verdict_id": VERDICT_ID,
"verdict_kind": "valid",
"verdict_by": "human:dom",
"context_signature": {
"machine_id": "DESKTOP-58D5CAC_windows",
"screen_state_initial": "before_hash",
"screen_state_after_action": "after_hash",
},
"evidence": {
"screenshot_before": "evidence/before.png",
"screenshot_after": "evidence/after.png",
"wait_state_matched_evidence": {
"window_title": "Executer",
"process_name": "explorer.exe",
},
},
"comments": "Supervised replay ok",
}
payload.update(overrides)
return payload
def test_store_competence_verdict_appends_jsonl(tmp_path):
log_path = tmp_path / "verdicts.jsonl"
record = store_competence_verdict(
"key_win_r_wait_explorer_exe",
_payload(),
log_path=log_path,
now=datetime(2026, 5, 29, 16, 30, tzinfo=timezone.utc),
)
assert record["schema_version"] == "lea_competence_verdict.v1"
assert record["competence_id"] == "key_win_r_wait_explorer_exe"
assert record["verdict_kind"] == "valid"
assert record["write_back_enabled"] is False
assert record["yaml_write"] is False
assert record["duplicate"] is False
assert record["context_signature"]["machine_id"] == "DESKTOP-58D5CAC_windows"
records = iter_competence_verdicts(log_path=log_path)
assert len(records) == 1
assert records[0]["verdict_id"] == VERDICT_ID
def test_store_competence_verdict_is_idempotent(tmp_path):
log_path = tmp_path / "verdicts.jsonl"
first = store_competence_verdict(
"key_win_r_wait_explorer_exe",
_payload(),
log_path=log_path,
)
second = store_competence_verdict(
"key_win_r_wait_explorer_exe",
_payload(comments="second click"),
log_path=log_path,
)
assert first["duplicate"] is False
assert second["duplicate"] is True
assert len(log_path.read_text(encoding="utf-8").splitlines()) == 1
assert second["comments"] == "Supervised replay ok"
@pytest.mark.parametrize("kind", ["valid", "invalid", "inconclusive"])
def test_store_competence_verdict_accepts_three_kinds(tmp_path, kind):
record = store_competence_verdict(
"key_win_r_wait_explorer_exe",
_payload(
verdict_id={
"valid": "123e4567-e89b-42d3-a456-426614174000",
"invalid": "123e4567-e89b-42d3-a456-426614174001",
"inconclusive": "123e4567-e89b-42d3-a456-426614174002",
}[kind],
verdict_kind=kind,
),
log_path=tmp_path / "verdicts.jsonl",
)
assert record["verdict_kind"] == kind
def test_store_competence_verdict_requires_context_machine(tmp_path):
with pytest.raises(CompetenceVerdictError, match="machine_id"):
store_competence_verdict(
"key_win_r_wait_explorer_exe",
_payload(context_signature={}),
log_path=tmp_path / "verdicts.jsonl",
)
def test_store_competence_verdict_rejects_yaml_write_attempt(tmp_path):
record = store_competence_verdict(
"key_win_r_wait_explorer_exe",
_payload(write_back_enabled=True, yaml_write=True),
log_path=tmp_path / "verdicts.jsonl",
)
assert record["write_back_enabled"] is False
assert record["yaml_write"] is False

View File

@@ -0,0 +1,109 @@
from flask import Flask
import importlib.util
from pathlib import Path
MODULE_PATH = (
Path(__file__).resolve().parents[2]
/ "visual_workflow_builder"
/ "backend"
/ "api"
/ "lea_competences.py"
)
SPEC = importlib.util.spec_from_file_location("lea_competences_test_module", MODULE_PATH)
lea_competences_module = importlib.util.module_from_spec(SPEC)
assert SPEC and SPEC.loader
SPEC.loader.exec_module(lea_competences_module)
lea_competences_bp = lea_competences_module.lea_competences_bp
def _app():
app = Flask(__name__)
app.register_blueprint(lea_competences_bp)
return app
def test_submit_competence_verdict_endpoint(monkeypatch):
def fake_store(competence_id, payload):
assert competence_id == "key_win_r_wait_explorer_exe"
assert payload["verdict_kind"] == "valid"
return {
"verdict_id": payload["verdict_id"],
"competence_id": competence_id,
"verdict_kind": "valid",
"duplicate": False,
"write_back_enabled": False,
"yaml_write": False,
}
monkeypatch.setattr(
lea_competences_module,
"store_competence_verdict",
fake_store,
)
with _app().test_client() as client:
response = client.post(
"/api/v1/lea/competences/key_win_r_wait_explorer_exe/verdict",
json={
"verdict_id": "123e4567-e89b-42d3-a456-426614174000",
"verdict_kind": "valid",
},
)
assert response.status_code == 201
data = response.get_json()
assert data["success"] is True
assert data["write_back_enabled"] is False
assert data["yaml_write"] is False
assert data["verdict"]["duplicate"] is False
def test_submit_competence_verdict_endpoint_returns_duplicate_200(monkeypatch):
def fake_store(_competence_id, payload):
return {
"verdict_id": payload["verdict_id"],
"competence_id": "key_win_r_wait_explorer_exe",
"verdict_kind": "valid",
"duplicate": True,
"write_back_enabled": False,
"yaml_write": False,
}
monkeypatch.setattr(
lea_competences_module,
"store_competence_verdict",
fake_store,
)
with _app().test_client() as client:
response = client.post(
"/api/v1/lea/competences/key_win_r_wait_explorer_exe/verdict",
json={
"verdict_id": "123e4567-e89b-42d3-a456-426614174000",
"verdict_kind": "valid",
},
)
assert response.status_code == 200
assert response.get_json()["duplicate"] is True
def test_list_competence_verdicts_endpoint(monkeypatch):
monkeypatch.setattr(
lea_competences_module,
"iter_competence_verdicts",
lambda competence_id: [{"competence_id": competence_id, "verdict_kind": "valid"}],
)
with _app().test_client() as client:
response = client.get(
"/api/v1/lea/competences/key_win_r_wait_explorer_exe/verdicts"
)
data = response.get_json()
assert response.status_code == 200
assert data["success"] is True
assert data["verdicts"] == [
{"competence_id": "key_win_r_wait_explorer_exe", "verdict_kind": "valid"}
]

View File

@@ -0,0 +1,38 @@
from visual_workflow_builder.backend.services.supervised_popup_guard import (
build_unexpected_popup_pause,
)
def test_unexpected_popup_builds_pause_without_auto_resolution():
pause = build_unexpected_popup_pause(
{"pattern": "confirm_save_overwrite", "title": "Enregistrer sous"},
expected_state={
"window_title_in": ["Executer"],
"process_active": "explorer.exe",
},
competence_id="key_win_r_wait_explorer_exe",
source_method_id="step_2_wait_state",
)
assert pause is not None
assert pause["needs_human"] is True
assert pause["pause_reason"] == "unexpected_popup"
assert pause["auto_resolution"] is False
assert pause["write_back_enabled"] is False
assert pause["detected_popup"]["title"] == "Enregistrer sous"
def test_expected_popup_title_does_not_build_pause():
pause = build_unexpected_popup_pause(
{"pattern": "run_dialog", "title": "Executer"},
expected_state={"window_title_in": ["Executer"]},
)
assert pause is None
def test_missing_popup_does_not_build_pause():
assert build_unexpected_popup_pause(
None,
expected_state={"window_title_in": ["Executer"]},
) is None

View File

@@ -0,0 +1,81 @@
from flask import Flask
from visual_workflow_builder.backend.catalog_routes_v2_vlm import catalog_bp
class _WaitResult:
matched = False
def to_dict(self):
return {
"matched": False,
"timed_out": True,
"match": {
"expected_state": {"window_title_in": ["Executer"]},
"observed_state": {"window_title": "Enregistrer sous"},
},
}
def test_catalog_pause_for_human_returns_paused_status():
app = Flask(__name__)
app.register_blueprint(catalog_bp)
with app.test_client() as client:
response = client.post(
"/api/vwb/catalog/execute",
json={
"type": "pause_for_human",
"step_id": "pause_1",
"parameters": {
"message": "Valider le resultat",
"phase": "after",
"verdict_required": True,
"competence_id": "key_win_r_wait_explorer_exe",
},
},
)
data = response.get_json()
assert response.status_code == 200
assert data["result"]["status"] == "paused"
assert data["result"]["output_data"]["needs_human"] is True
assert data["result"]["output_data"]["write_back_enabled"] is False
def test_catalog_wait_for_state_popup_pauses_without_auto_resolution(monkeypatch):
monkeypatch.setattr(
"visual_workflow_builder.backend.services.wait_for_state.wait_for_expected_state",
lambda **_kwargs: _WaitResult(),
)
monkeypatch.setattr(
"core.execution.input_handler.check_screen_for_patterns",
lambda: {"pattern": "save_as", "title": "Enregistrer sous"},
)
app = Flask(__name__)
app.register_blueprint(catalog_bp)
with app.test_client() as client:
response = client.post(
"/api/vwb/catalog/execute",
json={
"type": "wait_for_state",
"step_id": "wait_1",
"parameters": {
"expected_state": {"window_title_in": ["Executer"]},
"timeout_ms": 1,
"poll_interval_ms": 1,
"supervised_popup_detection": True,
"competence_id": "key_win_r_wait_explorer_exe",
"source_method_id": "step_2_wait_state",
},
},
)
data = response.get_json()
assert response.status_code == 200
assert data["result"]["status"] == "paused"
pause = data["result"]["output_data"]["human_pause"]
assert pause["pause_reason"] == "unexpected_popup"
assert pause["auto_resolution"] is False