feat(competences): extract batch candidates
This commit is contained in:
580
tests/unit/test_extract_competences_from_session.py
Normal file
580
tests/unit/test_extract_competences_from_session.py
Normal file
@@ -0,0 +1,580 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
import tools.extract_competences_from_session as extractor
|
||||
from tools.extract_competences_from_session import build_report, render_markdown_report
|
||||
|
||||
|
||||
def _write_raw_jsonl(path, events):
|
||||
lines = [
|
||||
json.dumps(
|
||||
{
|
||||
"session_id": "sess_extract_test",
|
||||
"timestamp": float(index),
|
||||
"event": event,
|
||||
"machine_id": "windows_vm",
|
||||
}
|
||||
)
|
||||
for index, event in enumerate(events)
|
||||
]
|
||||
path.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
||||
|
||||
|
||||
def test_dry_run_extracts_click_wait_state_candidate(tmp_path):
|
||||
session_path = tmp_path / "live_events.jsonl"
|
||||
output_dir = tmp_path / "observed"
|
||||
_write_raw_jsonl(
|
||||
session_path,
|
||||
[
|
||||
{"type": "heartbeat", "active_window_title": "Bureau"},
|
||||
{
|
||||
"type": "mouse_click",
|
||||
"button": "left",
|
||||
"window": {"title": "Bureau", "app_name": "explorer.exe"},
|
||||
"uia_snapshot": {
|
||||
"name": "Rechercher",
|
||||
"control_type": "bouton",
|
||||
"automation_id": "SearchButton",
|
||||
"parent_path": [{"name": "Barre des taches", "control_type": "volet"}],
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "window_focus_change",
|
||||
"to": {"title": "Rechercher", "app_name": "SearchHost.exe"},
|
||||
"window": {"title": "Rechercher", "app_name": "SearchHost.exe"},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
report = build_report(
|
||||
session_path=session_path,
|
||||
machine_id="windows_vm",
|
||||
output_dir=output_dir,
|
||||
)
|
||||
|
||||
assert report["mode"] == "dry_run"
|
||||
assert report["summary"]["would_write"] == 0
|
||||
assert report["summary"]["candidates_generated"] == 1
|
||||
candidate = report["candidates"][0]
|
||||
assert candidate["validator_status"] == "would_pass"
|
||||
assert candidate["apply_eligible"] is True
|
||||
assert candidate["primitive_refs"] == ["click_anchor", "wait_for_state"]
|
||||
assert candidate["segment"] == {"keep": [0, 1, 2], "method": [1, 2], "success": [2]}
|
||||
assert candidate["t2_gaps_detected"] == [
|
||||
"click_target_semantics_not_observed_offline",
|
||||
"no_ocr_offline",
|
||||
]
|
||||
assert not (output_dir / f"{candidate['competence_id']}.yaml").exists()
|
||||
|
||||
|
||||
def test_dry_run_rejects_click_without_uia_anchor(tmp_path):
|
||||
session_path = tmp_path / "live_events.jsonl"
|
||||
_write_raw_jsonl(
|
||||
session_path,
|
||||
[
|
||||
{
|
||||
"type": "mouse_click",
|
||||
"button": "left",
|
||||
"window": {"title": "Bureau", "app_name": "explorer.exe"},
|
||||
},
|
||||
{
|
||||
"type": "window_focus_change",
|
||||
"to": {"title": "Rechercher", "app_name": "SearchHost.exe"},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
report = build_report(session_path=session_path, machine_id="windows_vm")
|
||||
|
||||
assert report["summary"]["candidates_generated"] == 0
|
||||
assert report["summary"]["candidates_rejected"] == 1
|
||||
assert report["rejected"][0]["reason"] == "click without uia_snapshot anchor"
|
||||
assert report["rejected"][0]["validator_codes"] == ["anchor_ref_uia_missing"]
|
||||
|
||||
|
||||
def test_dry_run_rejects_weak_uia_click_anchor(tmp_path):
|
||||
session_path = tmp_path / "live_events.jsonl"
|
||||
_write_raw_jsonl(
|
||||
session_path,
|
||||
[
|
||||
{
|
||||
"type": "mouse_click",
|
||||
"button": "left",
|
||||
"window": {"title": "Rechercher", "app_name": "SearchHost.exe"},
|
||||
"uia_snapshot": {
|
||||
"name": "Aujourd'hui",
|
||||
"control_type": "Groupe",
|
||||
"automation_id": "0",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "window_focus_change",
|
||||
"to": {"title": "unknown_window", "app_name": "explorer.exe"},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
report = build_report(session_path=session_path, machine_id="windows_vm")
|
||||
|
||||
assert report["summary"]["candidates_generated"] == 0
|
||||
assert report["rejected"][0]["reason"] == "click with too generic anchor"
|
||||
assert report["rejected"][0]["validator_codes"] == ["anchor_ref_too_generic"]
|
||||
|
||||
|
||||
def test_dry_run_rejects_systemtrayicon_anchor(tmp_path):
|
||||
session_path = tmp_path / "live_events.jsonl"
|
||||
_write_raw_jsonl(
|
||||
session_path,
|
||||
[
|
||||
{
|
||||
"type": "mouse_click",
|
||||
"button": "left",
|
||||
"window": {"title": "Shell_TrayWnd", "app_name": "explorer.exe"},
|
||||
"uia_snapshot": {
|
||||
"name": "SystemTrayIcon",
|
||||
"control_type": "bouton",
|
||||
"automation_id": "SystemTrayIcon",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "window_focus_change",
|
||||
"to": {"title": "unknown_window", "app_name": "explorer.exe"},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
report = build_report(session_path=session_path, machine_id="windows_vm")
|
||||
|
||||
assert report["summary"]["candidates_generated"] == 0
|
||||
assert report["rejected"][0]["reason"] == "click on fragile system tray anchor"
|
||||
assert report["rejected"][0]["validator_codes"] == ["anchor_ref_systray_fragile"]
|
||||
|
||||
|
||||
def test_dry_run_rejects_dom_autogenerated_anchor(tmp_path):
|
||||
session_path = tmp_path / "live_events.jsonl"
|
||||
_write_raw_jsonl(
|
||||
session_path,
|
||||
[
|
||||
{
|
||||
"type": "mouse_click",
|
||||
"button": "left",
|
||||
"window": {"title": "Chrome", "app_name": "chrome.exe"},
|
||||
"uia_snapshot": {
|
||||
"name": "Continuer",
|
||||
"control_type": "bouton",
|
||||
"automation_id": "so_iazxhgsedkduppcyhoay_73",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "window_focus_change",
|
||||
"to": {"title": "Chrome", "app_name": "chrome.exe"},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
report = build_report(session_path=session_path, machine_id="windows_vm")
|
||||
|
||||
assert report["summary"]["candidates_generated"] == 0
|
||||
assert report["rejected"][0]["reason"] == "click on autogenerated DOM anchor"
|
||||
assert report["rejected"][0]["validator_codes"] == ["anchor_ref_dom_autogenerated"]
|
||||
|
||||
|
||||
def test_dry_run_rejects_unknown_window_title(tmp_path):
|
||||
session_path = tmp_path / "live_events.jsonl"
|
||||
_write_raw_jsonl(
|
||||
session_path,
|
||||
[
|
||||
{
|
||||
"type": "mouse_click",
|
||||
"button": "left",
|
||||
"window": {"title": "unknown_window", "app_name": "explorer.exe"},
|
||||
"uia_snapshot": {
|
||||
"name": "Ouvrir",
|
||||
"control_type": "bouton",
|
||||
"automation_id": "OpenButton",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "window_focus_change",
|
||||
"to": {"title": "Explorateur", "app_name": "explorer.exe"},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
report = build_report(session_path=session_path, machine_id="windows_vm")
|
||||
|
||||
assert report["summary"]["candidates_generated"] == 0
|
||||
assert report["rejected"][0]["reason"] == "click in unknown or overflow window"
|
||||
assert report["rejected"][0]["validator_codes"] == ["anchor_ref_unknown_window"]
|
||||
|
||||
|
||||
def test_dry_run_rejects_browser_contextual_anchor(tmp_path):
|
||||
session_path = tmp_path / "live_events.jsonl"
|
||||
_write_raw_jsonl(
|
||||
session_path,
|
||||
[
|
||||
{
|
||||
"type": "mouse_click",
|
||||
"button": "left",
|
||||
"window": {"title": "Dashboard - Google Chrome", "app_name": "chrome.exe"},
|
||||
"uia_snapshot": {
|
||||
"name": "Nouvel onglet",
|
||||
"control_type": "Bouton",
|
||||
"class_name": "TabStripControlButton",
|
||||
"automation_id": "",
|
||||
"parent_path": [{"name": "", "control_type": "tabulation"}],
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "window_focus_change",
|
||||
"to": {"title": "Nouvel onglet - Google Chrome", "app_name": "chrome.exe"},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
report = build_report(session_path=session_path, machine_id="windows_vm")
|
||||
|
||||
assert report["summary"]["candidates_generated"] == 0
|
||||
assert report["rejected"][0]["reason"] == "click on contextual browser chrome anchor"
|
||||
assert report["rejected"][0]["validator_codes"] == ["anchor_ref_browser_contextual"]
|
||||
|
||||
|
||||
def test_dry_run_rejects_contextual_add_tab_button_anchor(tmp_path):
|
||||
session_path = tmp_path / "live_events.jsonl"
|
||||
_write_raw_jsonl(
|
||||
session_path,
|
||||
[
|
||||
{
|
||||
"type": "mouse_click",
|
||||
"button": "left",
|
||||
"window": {"title": "agent_debug.log - Bloc-notes", "app_name": "Notepad.exe"},
|
||||
"uia_snapshot": {
|
||||
"name": "Ajouter un nouvel onglet",
|
||||
"control_type": "bouton",
|
||||
"class_name": "Button",
|
||||
"automation_id": "AddButton",
|
||||
"parent_path": [
|
||||
{"name": "Bureau 1", "control_type": "volet"},
|
||||
{"name": "agent_debug.log - Bloc-notes", "control_type": "fenetre"},
|
||||
{"name": "", "control_type": "volet"},
|
||||
{"name": "", "control_type": "onglet"},
|
||||
],
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "window_focus_change",
|
||||
"to": {"title": "agent_debug.log - Bloc-notes", "app_name": "Notepad.exe"},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
report = build_report(session_path=session_path, machine_id="windows_vm")
|
||||
|
||||
assert report["summary"]["candidates_generated"] == 0
|
||||
assert report["rejected"][0]["reason"] == "click on contextual UI chrome button"
|
||||
assert report["rejected"][0]["validator_codes"] == ["anchor_ref_contextual_button"]
|
||||
|
||||
|
||||
def test_dry_run_rejects_too_generic_anchor(tmp_path):
|
||||
session_path = tmp_path / "live_events.jsonl"
|
||||
_write_raw_jsonl(
|
||||
session_path,
|
||||
[
|
||||
{
|
||||
"type": "mouse_click",
|
||||
"button": "left",
|
||||
"window": {"title": "Application", "app_name": "app.exe"},
|
||||
"uia_snapshot": {
|
||||
"name": "button_12",
|
||||
"control_type": "bouton",
|
||||
"automation_id": "",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "window_focus_change",
|
||||
"to": {"title": "Application", "app_name": "app.exe"},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
report = build_report(session_path=session_path, machine_id="windows_vm")
|
||||
|
||||
assert report["summary"]["candidates_generated"] == 0
|
||||
assert report["rejected"][0]["reason"] == "click with too generic anchor"
|
||||
assert report["rejected"][0]["validator_codes"] == ["anchor_ref_too_generic"]
|
||||
|
||||
|
||||
def test_dry_run_rejects_empty_region_anchor(tmp_path):
|
||||
session_path = tmp_path / "live_events.jsonl"
|
||||
_write_raw_jsonl(
|
||||
session_path,
|
||||
[
|
||||
{
|
||||
"type": "mouse_click",
|
||||
"button": "left",
|
||||
"window": {"title": "Application", "app_name": "app.exe"},
|
||||
"uia_snapshot": {
|
||||
"name": "",
|
||||
"control_type": "région",
|
||||
"automation_id": "",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "window_focus_change",
|
||||
"to": {"title": "Application", "app_name": "app.exe"},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
report = build_report(session_path=session_path, machine_id="windows_vm")
|
||||
|
||||
assert report["summary"]["candidates_generated"] == 0
|
||||
assert report["rejected"][0]["reason"] == "click with too generic anchor"
|
||||
assert report["rejected"][0]["validator_codes"] == ["anchor_ref_too_generic"]
|
||||
|
||||
|
||||
def test_dry_run_hard_caps_candidates(tmp_path):
|
||||
session_path = tmp_path / "live_events.jsonl"
|
||||
_write_raw_jsonl(session_path, [])
|
||||
|
||||
with pytest.raises(ValueError, match="hard-cap"):
|
||||
build_report(session_path=session_path, machine_id="windows_vm", max_candidates=11)
|
||||
|
||||
|
||||
def test_apply_requires_allow_list(tmp_path):
|
||||
session_path = tmp_path / "live_events.jsonl"
|
||||
_write_raw_jsonl(session_path, [])
|
||||
|
||||
with pytest.raises(ValueError, match="--allow-list is required"):
|
||||
build_report(session_path=session_path, machine_id="windows_vm", mode="apply")
|
||||
|
||||
|
||||
def test_apply_rejects_unknown_id_in_allow_list(tmp_path):
|
||||
session_path = tmp_path / "live_events.jsonl"
|
||||
output_dir = tmp_path / "observed"
|
||||
_write_raw_jsonl(
|
||||
session_path,
|
||||
[
|
||||
{
|
||||
"type": "key_combo",
|
||||
"keys": ["win", "e"],
|
||||
"window": {"title": "Bureau", "app_name": "explorer.exe"},
|
||||
},
|
||||
{
|
||||
"type": "window_focus_change",
|
||||
"to": {"title": "Executer", "app_name": "explorer.exe"},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="--allow-list-id-not-found: missing_id"):
|
||||
build_report(
|
||||
session_path=session_path,
|
||||
machine_id="windows_vm",
|
||||
output_dir=output_dir,
|
||||
mode="apply",
|
||||
allow_list=["missing_id"],
|
||||
)
|
||||
|
||||
assert not list(output_dir.glob("*.yaml"))
|
||||
|
||||
|
||||
def test_apply_atomic_rollback_on_validation_failure(tmp_path, monkeypatch):
|
||||
session_path = tmp_path / "live_events.jsonl"
|
||||
output_dir = tmp_path / "observed"
|
||||
_write_raw_jsonl(
|
||||
session_path,
|
||||
[
|
||||
{
|
||||
"type": "key_combo",
|
||||
"keys": ["win", "e"],
|
||||
"window": {"title": "Bureau", "app_name": "explorer.exe"},
|
||||
},
|
||||
{
|
||||
"type": "window_focus_change",
|
||||
"to": {"title": "Executer", "app_name": "explorer.exe"},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
def fail_validation(paths, *, repo_root):
|
||||
raise ValueError("apply-validation-failed: forced")
|
||||
|
||||
monkeypatch.setattr(extractor, "_validate_apply_yaml_files", fail_validation)
|
||||
|
||||
with pytest.raises(ValueError, match="apply-validation-failed: forced"):
|
||||
build_report(
|
||||
session_path=session_path,
|
||||
machine_id="windows_vm",
|
||||
output_dir=output_dir,
|
||||
mode="apply",
|
||||
allow_list=["key_win_e_wait_explorer_exe"],
|
||||
)
|
||||
|
||||
assert not list(output_dir.glob("*.yaml"))
|
||||
|
||||
|
||||
def test_apply_writes_only_allowed_ids(tmp_path):
|
||||
session_path = tmp_path / "live_events.jsonl"
|
||||
output_dir = tmp_path / "observed"
|
||||
_write_raw_jsonl(
|
||||
session_path,
|
||||
[
|
||||
{
|
||||
"type": "key_combo",
|
||||
"keys": ["win", "e"],
|
||||
"window": {"title": "Bureau", "app_name": "explorer.exe"},
|
||||
},
|
||||
{
|
||||
"type": "window_focus_change",
|
||||
"to": {"title": "Executer", "app_name": "explorer.exe"},
|
||||
},
|
||||
{
|
||||
"type": "key_combo",
|
||||
"keys": ["ctrl", "p"],
|
||||
"window": {"title": "Bloc-notes", "app_name": "Notepad.exe"},
|
||||
},
|
||||
{
|
||||
"type": "window_focus_change",
|
||||
"to": {"title": "Enregistrer sous", "app_name": "Notepad.exe"},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
report = build_report(
|
||||
session_path=session_path,
|
||||
machine_id="windows_vm",
|
||||
output_dir=output_dir,
|
||||
mode="apply",
|
||||
allow_list=["key_win_e_wait_explorer_exe"],
|
||||
)
|
||||
|
||||
assert report["mode"] == "apply"
|
||||
assert report["allow_list"] == ["key_win_e_wait_explorer_exe"]
|
||||
assert report["summary"]["would_write"] == 1
|
||||
assert report["summary"]["written"] == 1
|
||||
assert report["applied"] == [
|
||||
{
|
||||
"competence_id": "key_win_e_wait_explorer_exe",
|
||||
"path": str(output_dir / "key_win_e_wait_explorer_exe.yaml"),
|
||||
}
|
||||
]
|
||||
assert (output_dir / "key_win_e_wait_explorer_exe.yaml").is_file()
|
||||
assert not (output_dir / "key_ctrl_p_wait_notepad_exe.yaml").exists()
|
||||
|
||||
|
||||
def test_apply_respects_max_candidates_cap(tmp_path):
|
||||
session_path = tmp_path / "live_events.jsonl"
|
||||
_write_raw_jsonl(session_path, [])
|
||||
|
||||
with pytest.raises(ValueError, match="hard-cap"):
|
||||
build_report(
|
||||
session_path=session_path,
|
||||
machine_id="windows_vm",
|
||||
mode="apply",
|
||||
allow_list=["key_win_r_wait_explorer_exe"],
|
||||
max_candidates=11,
|
||||
)
|
||||
|
||||
|
||||
def test_markdown_report_includes_candidate_summary(tmp_path):
|
||||
session_path = tmp_path / "live_events.jsonl"
|
||||
_write_raw_jsonl(
|
||||
session_path,
|
||||
[
|
||||
{
|
||||
"type": "key_combo",
|
||||
"keys": ["ctrl", "s"],
|
||||
"window": {"title": "Bloc-notes", "app_name": "Notepad.exe"},
|
||||
},
|
||||
{
|
||||
"type": "window_focus_change",
|
||||
"to": {"title": "Enregistrer sous", "app_name": "Notepad.exe"},
|
||||
},
|
||||
],
|
||||
)
|
||||
report = build_report(session_path=session_path, machine_id="windows_vm")
|
||||
|
||||
markdown = render_markdown_report(report)
|
||||
|
||||
assert "# Extraction report" in markdown
|
||||
assert "key_ctrl_s_wait_notepad_exe" in markdown
|
||||
assert "wait_for_state" in markdown
|
||||
|
||||
|
||||
def test_azerty_ctrl_s_trace_is_normalized_for_candidate(tmp_path):
|
||||
session_path = tmp_path / "live_events.jsonl"
|
||||
_write_raw_jsonl(
|
||||
session_path,
|
||||
[
|
||||
{
|
||||
"type": "key_combo",
|
||||
"keys": ["shift", "ctrl", "@"],
|
||||
"window": {"title": "WordPad", "app_name": "WordPad.exe"},
|
||||
},
|
||||
{
|
||||
"type": "window_focus_change",
|
||||
"to": {"title": "Enregistrer sous", "app_name": "WordPad.exe"},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
report = build_report(session_path=session_path, machine_id="windows_vm")
|
||||
|
||||
candidate = report["candidates"][0]
|
||||
assert candidate["competence_id"] == "key_ctrl_s_wait_wordpad_exe"
|
||||
assert candidate["validator_status"] == "would_pass"
|
||||
assert candidate["apply_eligible"] is True
|
||||
|
||||
|
||||
def test_ctrl_s_control_character_trace_is_normalized_for_candidate(tmp_path):
|
||||
session_path = tmp_path / "live_events.jsonl"
|
||||
_write_raw_jsonl(
|
||||
session_path,
|
||||
[
|
||||
{
|
||||
"type": "key_combo",
|
||||
"keys": ["shift", "ctrl", "\x13"],
|
||||
"window": {"title": "Bloc-notes", "app_name": "Notepad.exe"},
|
||||
},
|
||||
{
|
||||
"type": "window_focus_change",
|
||||
"to": {"title": "Enregistrer sous", "app_name": "Notepad.exe"},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
report = build_report(session_path=session_path, machine_id="windows_vm")
|
||||
|
||||
candidate = report["candidates"][0]
|
||||
assert candidate["competence_id"] == "key_ctrl_s_wait_notepad_exe"
|
||||
assert candidate["validator_status"] == "would_pass"
|
||||
|
||||
|
||||
def test_text_input_candidate_is_below_apply_threshold(tmp_path):
|
||||
session_path = tmp_path / "live_events.jsonl"
|
||||
_write_raw_jsonl(
|
||||
session_path,
|
||||
[
|
||||
{
|
||||
"type": "text_input",
|
||||
"text": "hello",
|
||||
"window": {"title": "Bloc-notes", "app_name": "Notepad.exe"},
|
||||
},
|
||||
{
|
||||
"type": "heartbeat",
|
||||
"window": {"title": "Bloc-notes", "app_name": "Notepad.exe"},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
report = build_report(session_path=session_path, machine_id="windows_vm")
|
||||
|
||||
candidate = report["candidates"][0]
|
||||
assert candidate["primitive_refs"] == ["text_input_focused"]
|
||||
assert candidate["confidence"] < report["summary"]["apply_min_confidence"]
|
||||
assert candidate["apply_eligible"] is False
|
||||
assert "below_apply_confidence_threshold" in candidate["quality_flags"]
|
||||
Reference in New Issue
Block a user