feat(vwb): log supervised competence verdicts
This commit is contained in:
@@ -16,6 +16,9 @@ def test_competence_to_vwb_preview_key_win_r_steps():
|
||||
assert preview["readonly"] is True
|
||||
assert preview["write_back_enabled"] is False
|
||||
assert preview["workflow"]["competence_id"] == "key_win_r_wait_explorer_exe"
|
||||
assert preview["workflow"]["verdict_endpoint"] == (
|
||||
"/api/v1/lea/competences/key_win_r_wait_explorer_exe/verdict"
|
||||
)
|
||||
assert _by_type(preview) == [
|
||||
"pause_for_human",
|
||||
"keyboard_shortcut",
|
||||
@@ -47,6 +50,8 @@ def test_wait_for_state_expected_state_is_preserved():
|
||||
assert params["timeout_ms"] == 5000
|
||||
assert params["poll_interval_ms"] == 250
|
||||
assert params["evidence_required"] == "window_or_process"
|
||||
assert params["supervised_popup_detection"] is True
|
||||
assert params["popup_policy"] == "pause_only"
|
||||
|
||||
|
||||
def test_pause_for_human_before_and_after_are_supervision_only():
|
||||
@@ -59,6 +64,9 @@ def test_pause_for_human_before_and_after_are_supervision_only():
|
||||
assert after["parameters"]["phase"] == "after"
|
||||
assert after["parameters"]["verdict_required"] is True
|
||||
assert after["parameters"]["write_back_enabled"] is False
|
||||
assert after["parameters"]["verdict_endpoint"] == (
|
||||
"/api/v1/lea/competences/key_win_r_wait_explorer_exe/verdict"
|
||||
)
|
||||
|
||||
|
||||
def test_adapter_is_generic_on_methods_not_hardcoded_to_win_r():
|
||||
|
||||
117
tests/unit/test_competence_verdicts.py
Normal file
117
tests/unit/test_competence_verdicts.py
Normal file
@@ -0,0 +1,117 @@
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import pytest
|
||||
|
||||
from core.competences.verdicts import (
|
||||
CompetenceVerdictError,
|
||||
iter_competence_verdicts,
|
||||
store_competence_verdict,
|
||||
)
|
||||
|
||||
|
||||
VERDICT_ID = "123e4567-e89b-42d3-a456-426614174000"
|
||||
|
||||
|
||||
def _payload(**overrides):
|
||||
payload = {
|
||||
"verdict_id": VERDICT_ID,
|
||||
"verdict_kind": "valid",
|
||||
"verdict_by": "human:dom",
|
||||
"context_signature": {
|
||||
"machine_id": "DESKTOP-58D5CAC_windows",
|
||||
"screen_state_initial": "before_hash",
|
||||
"screen_state_after_action": "after_hash",
|
||||
},
|
||||
"evidence": {
|
||||
"screenshot_before": "evidence/before.png",
|
||||
"screenshot_after": "evidence/after.png",
|
||||
"wait_state_matched_evidence": {
|
||||
"window_title": "Executer",
|
||||
"process_name": "explorer.exe",
|
||||
},
|
||||
},
|
||||
"comments": "Supervised replay ok",
|
||||
}
|
||||
payload.update(overrides)
|
||||
return payload
|
||||
|
||||
|
||||
def test_store_competence_verdict_appends_jsonl(tmp_path):
|
||||
log_path = tmp_path / "verdicts.jsonl"
|
||||
|
||||
record = store_competence_verdict(
|
||||
"key_win_r_wait_explorer_exe",
|
||||
_payload(),
|
||||
log_path=log_path,
|
||||
now=datetime(2026, 5, 29, 16, 30, tzinfo=timezone.utc),
|
||||
)
|
||||
|
||||
assert record["schema_version"] == "lea_competence_verdict.v1"
|
||||
assert record["competence_id"] == "key_win_r_wait_explorer_exe"
|
||||
assert record["verdict_kind"] == "valid"
|
||||
assert record["write_back_enabled"] is False
|
||||
assert record["yaml_write"] is False
|
||||
assert record["duplicate"] is False
|
||||
assert record["context_signature"]["machine_id"] == "DESKTOP-58D5CAC_windows"
|
||||
|
||||
records = iter_competence_verdicts(log_path=log_path)
|
||||
assert len(records) == 1
|
||||
assert records[0]["verdict_id"] == VERDICT_ID
|
||||
|
||||
|
||||
def test_store_competence_verdict_is_idempotent(tmp_path):
|
||||
log_path = tmp_path / "verdicts.jsonl"
|
||||
|
||||
first = store_competence_verdict(
|
||||
"key_win_r_wait_explorer_exe",
|
||||
_payload(),
|
||||
log_path=log_path,
|
||||
)
|
||||
second = store_competence_verdict(
|
||||
"key_win_r_wait_explorer_exe",
|
||||
_payload(comments="second click"),
|
||||
log_path=log_path,
|
||||
)
|
||||
|
||||
assert first["duplicate"] is False
|
||||
assert second["duplicate"] is True
|
||||
assert len(log_path.read_text(encoding="utf-8").splitlines()) == 1
|
||||
assert second["comments"] == "Supervised replay ok"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("kind", ["valid", "invalid", "inconclusive"])
|
||||
def test_store_competence_verdict_accepts_three_kinds(tmp_path, kind):
|
||||
record = store_competence_verdict(
|
||||
"key_win_r_wait_explorer_exe",
|
||||
_payload(
|
||||
verdict_id={
|
||||
"valid": "123e4567-e89b-42d3-a456-426614174000",
|
||||
"invalid": "123e4567-e89b-42d3-a456-426614174001",
|
||||
"inconclusive": "123e4567-e89b-42d3-a456-426614174002",
|
||||
}[kind],
|
||||
verdict_kind=kind,
|
||||
),
|
||||
log_path=tmp_path / "verdicts.jsonl",
|
||||
)
|
||||
|
||||
assert record["verdict_kind"] == kind
|
||||
|
||||
|
||||
def test_store_competence_verdict_requires_context_machine(tmp_path):
|
||||
with pytest.raises(CompetenceVerdictError, match="machine_id"):
|
||||
store_competence_verdict(
|
||||
"key_win_r_wait_explorer_exe",
|
||||
_payload(context_signature={}),
|
||||
log_path=tmp_path / "verdicts.jsonl",
|
||||
)
|
||||
|
||||
|
||||
def test_store_competence_verdict_rejects_yaml_write_attempt(tmp_path):
|
||||
record = store_competence_verdict(
|
||||
"key_win_r_wait_explorer_exe",
|
||||
_payload(write_back_enabled=True, yaml_write=True),
|
||||
log_path=tmp_path / "verdicts.jsonl",
|
||||
)
|
||||
|
||||
assert record["write_back_enabled"] is False
|
||||
assert record["yaml_write"] is False
|
||||
109
tests/unit/test_lea_competence_verdict_api.py
Normal file
109
tests/unit/test_lea_competence_verdict_api.py
Normal file
@@ -0,0 +1,109 @@
|
||||
from flask import Flask
|
||||
import importlib.util
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
MODULE_PATH = (
|
||||
Path(__file__).resolve().parents[2]
|
||||
/ "visual_workflow_builder"
|
||||
/ "backend"
|
||||
/ "api"
|
||||
/ "lea_competences.py"
|
||||
)
|
||||
SPEC = importlib.util.spec_from_file_location("lea_competences_test_module", MODULE_PATH)
|
||||
lea_competences_module = importlib.util.module_from_spec(SPEC)
|
||||
assert SPEC and SPEC.loader
|
||||
SPEC.loader.exec_module(lea_competences_module)
|
||||
lea_competences_bp = lea_competences_module.lea_competences_bp
|
||||
|
||||
|
||||
def _app():
|
||||
app = Flask(__name__)
|
||||
app.register_blueprint(lea_competences_bp)
|
||||
return app
|
||||
|
||||
|
||||
def test_submit_competence_verdict_endpoint(monkeypatch):
|
||||
def fake_store(competence_id, payload):
|
||||
assert competence_id == "key_win_r_wait_explorer_exe"
|
||||
assert payload["verdict_kind"] == "valid"
|
||||
return {
|
||||
"verdict_id": payload["verdict_id"],
|
||||
"competence_id": competence_id,
|
||||
"verdict_kind": "valid",
|
||||
"duplicate": False,
|
||||
"write_back_enabled": False,
|
||||
"yaml_write": False,
|
||||
}
|
||||
|
||||
monkeypatch.setattr(
|
||||
lea_competences_module,
|
||||
"store_competence_verdict",
|
||||
fake_store,
|
||||
)
|
||||
|
||||
with _app().test_client() as client:
|
||||
response = client.post(
|
||||
"/api/v1/lea/competences/key_win_r_wait_explorer_exe/verdict",
|
||||
json={
|
||||
"verdict_id": "123e4567-e89b-42d3-a456-426614174000",
|
||||
"verdict_kind": "valid",
|
||||
},
|
||||
)
|
||||
|
||||
assert response.status_code == 201
|
||||
data = response.get_json()
|
||||
assert data["success"] is True
|
||||
assert data["write_back_enabled"] is False
|
||||
assert data["yaml_write"] is False
|
||||
assert data["verdict"]["duplicate"] is False
|
||||
|
||||
|
||||
def test_submit_competence_verdict_endpoint_returns_duplicate_200(monkeypatch):
|
||||
def fake_store(_competence_id, payload):
|
||||
return {
|
||||
"verdict_id": payload["verdict_id"],
|
||||
"competence_id": "key_win_r_wait_explorer_exe",
|
||||
"verdict_kind": "valid",
|
||||
"duplicate": True,
|
||||
"write_back_enabled": False,
|
||||
"yaml_write": False,
|
||||
}
|
||||
|
||||
monkeypatch.setattr(
|
||||
lea_competences_module,
|
||||
"store_competence_verdict",
|
||||
fake_store,
|
||||
)
|
||||
|
||||
with _app().test_client() as client:
|
||||
response = client.post(
|
||||
"/api/v1/lea/competences/key_win_r_wait_explorer_exe/verdict",
|
||||
json={
|
||||
"verdict_id": "123e4567-e89b-42d3-a456-426614174000",
|
||||
"verdict_kind": "valid",
|
||||
},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert response.get_json()["duplicate"] is True
|
||||
|
||||
|
||||
def test_list_competence_verdicts_endpoint(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
lea_competences_module,
|
||||
"iter_competence_verdicts",
|
||||
lambda competence_id: [{"competence_id": competence_id, "verdict_kind": "valid"}],
|
||||
)
|
||||
|
||||
with _app().test_client() as client:
|
||||
response = client.get(
|
||||
"/api/v1/lea/competences/key_win_r_wait_explorer_exe/verdicts"
|
||||
)
|
||||
|
||||
data = response.get_json()
|
||||
assert response.status_code == 200
|
||||
assert data["success"] is True
|
||||
assert data["verdicts"] == [
|
||||
{"competence_id": "key_win_r_wait_explorer_exe", "verdict_kind": "valid"}
|
||||
]
|
||||
38
tests/unit/test_supervised_popup_guard.py
Normal file
38
tests/unit/test_supervised_popup_guard.py
Normal file
@@ -0,0 +1,38 @@
|
||||
from visual_workflow_builder.backend.services.supervised_popup_guard import (
|
||||
build_unexpected_popup_pause,
|
||||
)
|
||||
|
||||
|
||||
def test_unexpected_popup_builds_pause_without_auto_resolution():
|
||||
pause = build_unexpected_popup_pause(
|
||||
{"pattern": "confirm_save_overwrite", "title": "Enregistrer sous"},
|
||||
expected_state={
|
||||
"window_title_in": ["Executer"],
|
||||
"process_active": "explorer.exe",
|
||||
},
|
||||
competence_id="key_win_r_wait_explorer_exe",
|
||||
source_method_id="step_2_wait_state",
|
||||
)
|
||||
|
||||
assert pause is not None
|
||||
assert pause["needs_human"] is True
|
||||
assert pause["pause_reason"] == "unexpected_popup"
|
||||
assert pause["auto_resolution"] is False
|
||||
assert pause["write_back_enabled"] is False
|
||||
assert pause["detected_popup"]["title"] == "Enregistrer sous"
|
||||
|
||||
|
||||
def test_expected_popup_title_does_not_build_pause():
|
||||
pause = build_unexpected_popup_pause(
|
||||
{"pattern": "run_dialog", "title": "Executer"},
|
||||
expected_state={"window_title_in": ["Executer"]},
|
||||
)
|
||||
|
||||
assert pause is None
|
||||
|
||||
|
||||
def test_missing_popup_does_not_build_pause():
|
||||
assert build_unexpected_popup_pause(
|
||||
None,
|
||||
expected_state={"window_title_in": ["Executer"]},
|
||||
) is None
|
||||
81
tests/unit/test_vwb_supervised_pause_runtime.py
Normal file
81
tests/unit/test_vwb_supervised_pause_runtime.py
Normal file
@@ -0,0 +1,81 @@
|
||||
from flask import Flask
|
||||
|
||||
from visual_workflow_builder.backend.catalog_routes_v2_vlm import catalog_bp
|
||||
|
||||
|
||||
class _WaitResult:
|
||||
matched = False
|
||||
|
||||
def to_dict(self):
|
||||
return {
|
||||
"matched": False,
|
||||
"timed_out": True,
|
||||
"match": {
|
||||
"expected_state": {"window_title_in": ["Executer"]},
|
||||
"observed_state": {"window_title": "Enregistrer sous"},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def test_catalog_pause_for_human_returns_paused_status():
|
||||
app = Flask(__name__)
|
||||
app.register_blueprint(catalog_bp)
|
||||
|
||||
with app.test_client() as client:
|
||||
response = client.post(
|
||||
"/api/vwb/catalog/execute",
|
||||
json={
|
||||
"type": "pause_for_human",
|
||||
"step_id": "pause_1",
|
||||
"parameters": {
|
||||
"message": "Valider le resultat",
|
||||
"phase": "after",
|
||||
"verdict_required": True,
|
||||
"competence_id": "key_win_r_wait_explorer_exe",
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
data = response.get_json()
|
||||
assert response.status_code == 200
|
||||
assert data["result"]["status"] == "paused"
|
||||
assert data["result"]["output_data"]["needs_human"] is True
|
||||
assert data["result"]["output_data"]["write_back_enabled"] is False
|
||||
|
||||
|
||||
def test_catalog_wait_for_state_popup_pauses_without_auto_resolution(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"visual_workflow_builder.backend.services.wait_for_state.wait_for_expected_state",
|
||||
lambda **_kwargs: _WaitResult(),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"core.execution.input_handler.check_screen_for_patterns",
|
||||
lambda: {"pattern": "save_as", "title": "Enregistrer sous"},
|
||||
)
|
||||
|
||||
app = Flask(__name__)
|
||||
app.register_blueprint(catalog_bp)
|
||||
|
||||
with app.test_client() as client:
|
||||
response = client.post(
|
||||
"/api/vwb/catalog/execute",
|
||||
json={
|
||||
"type": "wait_for_state",
|
||||
"step_id": "wait_1",
|
||||
"parameters": {
|
||||
"expected_state": {"window_title_in": ["Executer"]},
|
||||
"timeout_ms": 1,
|
||||
"poll_interval_ms": 1,
|
||||
"supervised_popup_detection": True,
|
||||
"competence_id": "key_win_r_wait_explorer_exe",
|
||||
"source_method_id": "step_2_wait_state",
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
data = response.get_json()
|
||||
assert response.status_code == 200
|
||||
assert data["result"]["status"] == "paused"
|
||||
pause = data["result"]["output_data"]["human_pause"]
|
||||
assert pause["pause_reason"] == "unexpected_popup"
|
||||
assert pause["auto_resolution"] is False
|
||||
Reference in New Issue
Block a user