feat: câblage complet V4 — stratégie UIA + surface profile
Pipeline V4 câblé de bout en bout : RawTrace (avec uia_snapshot) → IRBuilder → Action._enrichment WorkflowIR → ExecutionCompiler (avec SurfaceProfile) → ExecutionPlan ExecutionPlan → runner → target_spec (avec uia_target + resolve_order) ResolutionStrategy étendu : - Champs UIA : uia_name, uia_control_type, uia_automation_id, uia_parent_path - Champs DOM : dom_selector, dom_xpath, dom_url_pattern (préparation web) ExecutionCompiler.compile(surface_profile=...) : - Timeouts/retries tirés du profil (citrix=15s/3x, web=5s/1x, natif=8s/2x) - UIA primaire seulement si surface=WINDOWS_NATIVE et uia_available - Citrix ignore UIA même si snapshot présent (UIA ne marche pas dans Citrix) IRBuilder lit evt['uia_snapshot'] et le stocke dans action._enrichment (à remplir par l'agent Windows pendant l'enregistrement via lea_uia.exe) execution_plan_runner propage uia_target et dom_target dans target_spec pour que l'agent Windows puisse les consommer au runtime. 11 tests de câblage E2E : - Profils (Citrix/web/natif) imposent bien les timeouts - Stratégie UIA créée quand snapshot+surface OK - Stratégie UIA bloquée sur Citrix - IRBuilder propage uia_snapshot - Runner produit target_spec avec uia_target + resolve_order=['uia', 'ocr', 'vlm'] 496 tests au total, 0 régression. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -114,6 +114,8 @@ def _strategy_to_target_spec(
|
||||
by_text_candidate = ""
|
||||
anchor_candidate = ""
|
||||
vlm_candidate = ""
|
||||
uia_data: Dict[str, Any] = {}
|
||||
dom_data: Dict[str, Any] = {}
|
||||
resolve_order: List[str] = []
|
||||
seen_methods: set = set()
|
||||
|
||||
@@ -129,6 +131,19 @@ def _strategy_to_target_spec(
|
||||
by_text_candidate = strat.target_text
|
||||
elif strat.method == "vlm" and strat.vlm_description and not vlm_candidate:
|
||||
vlm_candidate = strat.vlm_description
|
||||
elif strat.method == "uia" and strat.uia_name and not uia_data:
|
||||
uia_data = {
|
||||
"name": strat.uia_name,
|
||||
"control_type": strat.uia_control_type,
|
||||
"automation_id": strat.uia_automation_id,
|
||||
"parent_path": strat.uia_parent_path,
|
||||
}
|
||||
elif strat.method == "dom" and strat.dom_selector and not dom_data:
|
||||
dom_data = {
|
||||
"selector": strat.dom_selector,
|
||||
"xpath": strat.dom_xpath,
|
||||
"url_pattern": strat.dom_url_pattern,
|
||||
}
|
||||
|
||||
# Construire l'ordre des méthodes (dans l'ordre primaire → fallbacks)
|
||||
if strat.method and strat.method not in seen_methods:
|
||||
@@ -145,6 +160,14 @@ def _strategy_to_target_spec(
|
||||
# L'intention métier devient le prompt VLM de dernier recours
|
||||
spec["vlm_description"] = intent
|
||||
|
||||
# Données UIA — consommées par l'agent Windows via lea_uia.exe
|
||||
if uia_data:
|
||||
spec["uia_target"] = uia_data
|
||||
|
||||
# Données DOM — consommées par l'agent Windows via CDP (futur)
|
||||
if dom_data:
|
||||
spec["dom_target"] = dom_data
|
||||
|
||||
# Ordre de résolution pré-compilé — c'est LA pièce centrale du V4
|
||||
if resolve_order:
|
||||
spec["resolve_order"] = resolve_order
|
||||
|
||||
@@ -61,6 +61,7 @@ class ExecutionCompiler:
|
||||
target_machine: str = "",
|
||||
target_resolution: str = "1280x800",
|
||||
params: Optional[Dict[str, str]] = None,
|
||||
surface_profile=None,
|
||||
) -> ExecutionPlan:
|
||||
"""Compiler un WorkflowIR en ExecutionPlan.
|
||||
|
||||
@@ -69,6 +70,8 @@ class ExecutionCompiler:
|
||||
target_machine: Machine cible (pour adapter les stratégies)
|
||||
target_resolution: Résolution de la machine cible
|
||||
params: Variables à substituer
|
||||
surface_profile: SurfaceProfile optionnel pour adapter les paramètres.
|
||||
Si fourni, timeouts/seuils/retries sont tirés du profil.
|
||||
"""
|
||||
t_start = time.time()
|
||||
|
||||
@@ -88,7 +91,7 @@ class ExecutionCompiler:
|
||||
|
||||
# Compiler chaque étape
|
||||
for step in ir.steps:
|
||||
nodes = self._compile_step(step, ir, learned_strategies)
|
||||
nodes = self._compile_step(step, ir, learned_strategies, surface_profile)
|
||||
plan.nodes.extend(nodes)
|
||||
|
||||
# Statistiques de compilation
|
||||
@@ -124,6 +127,7 @@ class ExecutionCompiler:
|
||||
step: Step,
|
||||
ir: WorkflowIR,
|
||||
learned: Dict[str, str],
|
||||
surface_profile=None,
|
||||
) -> List[ExecutionNode]:
|
||||
"""Compiler une étape en nœuds d'exécution."""
|
||||
nodes = []
|
||||
@@ -135,6 +139,7 @@ class ExecutionCompiler:
|
||||
action_index=i,
|
||||
ir=ir,
|
||||
learned=learned,
|
||||
surface_profile=surface_profile,
|
||||
)
|
||||
nodes.append(node)
|
||||
|
||||
@@ -147,6 +152,7 @@ class ExecutionCompiler:
|
||||
action_index: int,
|
||||
ir: WorkflowIR,
|
||||
learned: Dict[str, str],
|
||||
surface_profile=None,
|
||||
) -> ExecutionNode:
|
||||
"""Compiler une action en nœud d'exécution avec stratégie de résolution."""
|
||||
|
||||
@@ -158,13 +164,20 @@ class ExecutionCompiler:
|
||||
is_optional=step.is_optional,
|
||||
)
|
||||
|
||||
# Paramètres par défaut, surchargés par le surface_profile si fourni
|
||||
default_click_timeout = 10000
|
||||
default_click_retries = 2
|
||||
if surface_profile is not None:
|
||||
default_click_timeout = getattr(surface_profile, "timeout_click_ms", 10000)
|
||||
default_click_retries = getattr(surface_profile, "max_retries", 2)
|
||||
|
||||
if action.type == "click":
|
||||
# Compiler les stratégies de résolution pour ce clic
|
||||
node.strategy_primary, node.strategy_fallbacks = self._compile_click_resolution(
|
||||
action, step, learned,
|
||||
action, step, learned, surface_profile,
|
||||
)
|
||||
node.timeout_ms = 10000
|
||||
node.max_retries = 2
|
||||
node.timeout_ms = default_click_timeout
|
||||
node.max_retries = default_click_retries
|
||||
node.recovery_action = "escape"
|
||||
|
||||
# Condition de succès basée sur la postcondition
|
||||
@@ -205,16 +218,22 @@ class ExecutionCompiler:
|
||||
action: Action,
|
||||
step: Step,
|
||||
learned: Dict[str, str],
|
||||
surface_profile=None,
|
||||
) -> tuple:
|
||||
"""Compiler les stratégies de résolution pour un clic.
|
||||
|
||||
Utilise les données d'enrichissement visuel (action._enrichment) si
|
||||
disponibles (crop anchor, description VLM, window_capture).
|
||||
disponibles :
|
||||
- by_text (OCR)
|
||||
- anchor_image_base64 (template)
|
||||
- vlm_description (VLM)
|
||||
- uia_snapshot (UIA sur Windows natif)
|
||||
|
||||
Ordre de priorité :
|
||||
1. OCR exact (si by_text disponible) — 100ms, pixel-perfect
|
||||
2. Template matching (si anchor_image_base64) — 10ms
|
||||
3. VLM (vlm_description) — 2-5s, exception handler
|
||||
Ordre de priorité (variable selon la surface) :
|
||||
1. UIA (si snapshot dispo ET surface native ET helper dispo) — 10-20ms
|
||||
2. OCR exact (si texte visible) — 100-200ms
|
||||
3. Template matching (si crop) — 10ms
|
||||
4. VLM — exception handler
|
||||
|
||||
Le learning peut réordonner si une stratégie a mieux marché avant.
|
||||
"""
|
||||
@@ -227,6 +246,7 @@ class ExecutionCompiler:
|
||||
anchor_b64 = enrichment.get("anchor_image_base64", "")
|
||||
vlm_desc_from_enrich = enrichment.get("vlm_description", "")
|
||||
window_title = enrichment.get("window_title", "")
|
||||
uia_snapshot = enrichment.get("uia_snapshot") or {}
|
||||
|
||||
# Source de texte : enrichissement > anchor_hint > target
|
||||
target_text = by_text_from_enrich or action.anchor_hint or action.target
|
||||
@@ -236,6 +256,33 @@ class ExecutionCompiler:
|
||||
|
||||
learned_method = learned.get(target_text, "")
|
||||
|
||||
# Est-ce qu'on est sur une surface où UIA est activable ?
|
||||
uia_eligible = False
|
||||
if surface_profile is not None:
|
||||
from .surface_classifier import SurfaceType
|
||||
surface_type = getattr(surface_profile, "surface_type", None)
|
||||
uia_available = getattr(surface_profile, "uia_available", False)
|
||||
uia_eligible = (
|
||||
uia_available
|
||||
and surface_type == SurfaceType.WINDOWS_NATIVE
|
||||
)
|
||||
else:
|
||||
# Sans profil explicite, on active UIA si le snapshot est présent
|
||||
# (l'agent décidera au runtime s'il peut l'utiliser)
|
||||
uia_eligible = bool(uia_snapshot)
|
||||
|
||||
# Stratégie UIA — la plus rapide et la plus précise sur Windows natif
|
||||
if uia_snapshot and uia_snapshot.get("name") and uia_eligible:
|
||||
uia_strategy = ResolutionStrategy(
|
||||
method="uia",
|
||||
uia_name=uia_snapshot.get("name", ""),
|
||||
uia_control_type=uia_snapshot.get("control_type", ""),
|
||||
uia_automation_id=uia_snapshot.get("automation_id", ""),
|
||||
uia_parent_path=uia_snapshot.get("parent_path", []),
|
||||
threshold=0.95,
|
||||
)
|
||||
primary = uia_strategy
|
||||
|
||||
# Stratégie OCR — le texte visible est la meilleure ancre
|
||||
if target_text:
|
||||
ocr_strategy = ResolutionStrategy(
|
||||
@@ -243,7 +290,10 @@ class ExecutionCompiler:
|
||||
target_text=target_text,
|
||||
threshold=0.7,
|
||||
)
|
||||
if not learned_method or learned_method in ("ocr", "som_text_match", "hybrid_text_direct", "v4_ocr"):
|
||||
if primary is None and (
|
||||
not learned_method
|
||||
or learned_method in ("ocr", "som_text_match", "hybrid_text_direct", "v4_ocr")
|
||||
):
|
||||
primary = ocr_strategy
|
||||
else:
|
||||
fallbacks.append(ocr_strategy)
|
||||
@@ -256,9 +306,9 @@ class ExecutionCompiler:
|
||||
anchor_b64=anchor_b64,
|
||||
threshold=0.85,
|
||||
)
|
||||
if learned_method in ("anchor_template", "template_matching", "v4_template"):
|
||||
if primary:
|
||||
fallbacks.insert(0, primary)
|
||||
if primary is None and learned_method in (
|
||||
"anchor_template", "template_matching", "v4_template"
|
||||
):
|
||||
primary = template_strategy
|
||||
else:
|
||||
fallbacks.append(template_strategy)
|
||||
|
||||
@@ -29,7 +29,7 @@ class ResolutionStrategy:
|
||||
|
||||
Pré-compilée — le runtime n'a pas besoin du VLM pour résoudre.
|
||||
"""
|
||||
method: str # "ocr", "template", "position", "vlm"
|
||||
method: str # "uia", "ocr", "template", "position", "vlm", "dom"
|
||||
target_text: str = "" # Texte à chercher (pour OCR)
|
||||
anchor_b64: str = "" # Crop de référence (pour template matching)
|
||||
zone: Dict[str, float] = field(default_factory=dict) # Zone de recherche {x_min, y_min, x_max, y_max}
|
||||
@@ -37,6 +37,20 @@ class ResolutionStrategy:
|
||||
vlm_description: str = "" # Description VLM (dernier recours)
|
||||
threshold: float = 0.8 # Seuil de confiance
|
||||
|
||||
# Stratégie UIA (Windows UI Automation)
|
||||
# Utilisée quand l'enregistrement a capturé un snapshot UIA au moment du clic.
|
||||
# Au replay, l'agent Windows appelle lea_uia.exe find --name ... pour retrouver
|
||||
# l'élément par son chemin logique (100% fiable sur Windows natif).
|
||||
uia_name: str = "" # Name property de l'élément
|
||||
uia_control_type: str = "" # ControlType (Button, Edit, MenuItem, ...)
|
||||
uia_automation_id: str = "" # AutomationId (optionnel)
|
||||
uia_parent_path: List[Dict[str, str]] = field(default_factory=list)
|
||||
|
||||
# Stratégie DOM (web avec CDP activé) — préparation pour plus tard
|
||||
dom_selector: str = "" # CSS selector
|
||||
dom_xpath: str = "" # XPath
|
||||
dom_url_pattern: str = "" # Pattern URL à matcher
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
d = {"method": self.method}
|
||||
if self.target_text:
|
||||
@@ -49,6 +63,20 @@ class ResolutionStrategy:
|
||||
d["position_hint"] = self.position_hint
|
||||
if self.vlm_description:
|
||||
d["vlm_description"] = self.vlm_description
|
||||
if self.uia_name:
|
||||
d["uia_name"] = self.uia_name
|
||||
if self.uia_control_type:
|
||||
d["uia_control_type"] = self.uia_control_type
|
||||
if self.uia_automation_id:
|
||||
d["uia_automation_id"] = self.uia_automation_id
|
||||
if self.uia_parent_path:
|
||||
d["uia_parent_path"] = self.uia_parent_path
|
||||
if self.dom_selector:
|
||||
d["dom_selector"] = self.dom_selector
|
||||
if self.dom_xpath:
|
||||
d["dom_xpath"] = self.dom_xpath
|
||||
if self.dom_url_pattern:
|
||||
d["dom_url_pattern"] = self.dom_url_pattern
|
||||
d["threshold"] = self.threshold
|
||||
return d
|
||||
|
||||
|
||||
@@ -269,6 +269,22 @@ class IRBuilder:
|
||||
# (utilisé par l'ExecutionCompiler pour construire les stratégies)
|
||||
action._enrichment = enrichment
|
||||
|
||||
# Lire le snapshot UIA si l'agent Windows l'a capturé.
|
||||
# Format attendu dans l'événement :
|
||||
# evt["uia_snapshot"] = {
|
||||
# "name": "Enregistrer",
|
||||
# "control_type": "bouton",
|
||||
# "automation_id": "btnSave",
|
||||
# "parent_path": [{"name": "...", "control_type": "..."}],
|
||||
# }
|
||||
# Si présent, il est fusionné dans _enrichment pour que
|
||||
# l'ExecutionCompiler puisse créer une stratégie UIA prioritaire.
|
||||
uia_snapshot = evt.get("uia_snapshot")
|
||||
if uia_snapshot and isinstance(uia_snapshot, dict):
|
||||
if not hasattr(action, "_enrichment") or action._enrichment is None:
|
||||
action._enrichment = {}
|
||||
action._enrichment["uia_snapshot"] = uia_snapshot
|
||||
|
||||
return action
|
||||
|
||||
elif evt_type == "text_input":
|
||||
|
||||
349
tests/unit/test_v4_wiring.py
Normal file
349
tests/unit/test_v4_wiring.py
Normal file
@@ -0,0 +1,349 @@
|
||||
"""
|
||||
Tests de câblage complet V4 :
|
||||
- SurfaceClassifier + ExecutionCompiler : paramètres adaptés par surface
|
||||
- IRBuilder lit uia_snapshot depuis les événements
|
||||
- ExecutionCompiler crée une stratégie UIA quand dispo
|
||||
- execution_plan_runner propage uia_target dans target_spec
|
||||
- Pipeline E2E : RawTrace (avec UIA) → WorkflowIR → Plan → action runtime
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
_ROOT = str(Path(__file__).resolve().parents[2])
|
||||
if _ROOT not in sys.path:
|
||||
sys.path.insert(0, _ROOT)
|
||||
|
||||
from core.workflow.workflow_ir import WorkflowIR, Step, Action
|
||||
from core.workflow.execution_plan import ExecutionPlan, ExecutionNode, ResolutionStrategy
|
||||
from core.workflow.execution_compiler import ExecutionCompiler
|
||||
from core.workflow.surface_classifier import SurfaceClassifier, SurfaceProfile, SurfaceType
|
||||
from core.workflow.ir_builder import IRBuilder
|
||||
from agent_v0.server_v1.execution_plan_runner import (
|
||||
execution_node_to_action,
|
||||
execution_plan_to_actions,
|
||||
_strategy_to_target_spec,
|
||||
)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# ExecutionCompiler avec SurfaceProfile
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestCompilerWithSurfaceProfile:
|
||||
|
||||
def test_profil_citrix_impose_timeouts_longs(self):
|
||||
"""Profil Citrix → timeouts longs, retries 3x."""
|
||||
ir = WorkflowIR.new("Test")
|
||||
ir.add_step("Clic", actions=[{"type": "click", "target": "Bouton", "anchor_hint": "OK"}])
|
||||
|
||||
profile = SurfaceProfile(
|
||||
surface_type=SurfaceType.CITRIX,
|
||||
timeout_click_ms=15000,
|
||||
max_retries=3,
|
||||
ocr_threshold=0.65,
|
||||
)
|
||||
|
||||
compiler = ExecutionCompiler()
|
||||
plan = compiler.compile(ir, surface_profile=profile)
|
||||
|
||||
click_node = [n for n in plan.nodes if n.action_type == "click"][0]
|
||||
assert click_node.timeout_ms == 15000
|
||||
assert click_node.max_retries == 3
|
||||
|
||||
def test_profil_web_impose_timeouts_courts(self):
|
||||
"""Profil web → timeouts courts, 1 retry."""
|
||||
ir = WorkflowIR.new("Test")
|
||||
ir.add_step("Clic", actions=[{"type": "click", "target": "X", "anchor_hint": "Login"}])
|
||||
|
||||
profile = SurfaceProfile(
|
||||
surface_type=SurfaceType.WEB_LOCAL,
|
||||
timeout_click_ms=5000,
|
||||
max_retries=1,
|
||||
)
|
||||
|
||||
compiler = ExecutionCompiler()
|
||||
plan = compiler.compile(ir, surface_profile=profile)
|
||||
|
||||
click_node = [n for n in plan.nodes if n.action_type == "click"][0]
|
||||
assert click_node.timeout_ms == 5000
|
||||
assert click_node.max_retries == 1
|
||||
|
||||
def test_sans_profil_utilise_defauts(self):
|
||||
"""Sans surface_profile, comportement par défaut."""
|
||||
ir = WorkflowIR.new("Test")
|
||||
ir.add_step("Clic", actions=[{"type": "click", "target": "X", "anchor_hint": "Y"}])
|
||||
|
||||
compiler = ExecutionCompiler()
|
||||
plan = compiler.compile(ir)
|
||||
|
||||
click_node = [n for n in plan.nodes if n.action_type == "click"][0]
|
||||
assert click_node.timeout_ms == 10000 # Défaut
|
||||
assert click_node.max_retries == 2 # Défaut
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Stratégie UIA dans la compilation
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestUiaStrategyCompilation:
|
||||
|
||||
def _make_ir_with_uia(self):
|
||||
"""Créer un WorkflowIR avec une action portant un uia_snapshot."""
|
||||
ir = WorkflowIR.new("Test UIA")
|
||||
action = Action(
|
||||
type="click",
|
||||
target="Bloc-notes",
|
||||
anchor_hint="Enregistrer",
|
||||
)
|
||||
# Simuler l'enrichissement avec UIA
|
||||
action._enrichment = {
|
||||
"by_text": "Enregistrer",
|
||||
"anchor_image_base64": "fake_crop_data",
|
||||
"vlm_description": "Le bouton Enregistrer du menu Fichier",
|
||||
"uia_snapshot": {
|
||||
"name": "Enregistrer",
|
||||
"control_type": "bouton",
|
||||
"automation_id": "btnSave",
|
||||
"parent_path": [
|
||||
{"name": "Bloc-notes", "control_type": "fenêtre"},
|
||||
{"name": "Fichier", "control_type": "menu"},
|
||||
],
|
||||
},
|
||||
}
|
||||
step = Step(step_id="s1", intent="Sauvegarder", actions=[action])
|
||||
ir.steps.append(step)
|
||||
return ir
|
||||
|
||||
def test_uia_strategie_creee_si_surface_windows(self):
|
||||
"""Sur Windows natif avec UIA dispo, la stratégie UIA est primaire."""
|
||||
ir = self._make_ir_with_uia()
|
||||
profile = SurfaceProfile(
|
||||
surface_type=SurfaceType.WINDOWS_NATIVE,
|
||||
uia_available=True,
|
||||
)
|
||||
|
||||
compiler = ExecutionCompiler()
|
||||
plan = compiler.compile(ir, surface_profile=profile)
|
||||
|
||||
click = [n for n in plan.nodes if n.action_type == "click"][0]
|
||||
assert click.strategy_primary is not None
|
||||
assert click.strategy_primary.method == "uia"
|
||||
assert click.strategy_primary.uia_name == "Enregistrer"
|
||||
assert click.strategy_primary.uia_control_type == "bouton"
|
||||
|
||||
def test_uia_desactive_sur_citrix(self):
|
||||
"""Sur Citrix, UIA est ignoré même si snapshot présent."""
|
||||
ir = self._make_ir_with_uia()
|
||||
profile = SurfaceProfile(
|
||||
surface_type=SurfaceType.CITRIX,
|
||||
uia_available=False,
|
||||
)
|
||||
|
||||
compiler = ExecutionCompiler()
|
||||
plan = compiler.compile(ir, surface_profile=profile)
|
||||
|
||||
click = [n for n in plan.nodes if n.action_type == "click"][0]
|
||||
assert click.strategy_primary.method != "uia"
|
||||
# OCR est la primaire (texte dispo)
|
||||
assert click.strategy_primary.method == "ocr"
|
||||
|
||||
def test_uia_fallback_sur_ocr_si_uia_manquant(self):
|
||||
"""Sans uia_snapshot, OCR primaire."""
|
||||
ir = WorkflowIR.new("Test")
|
||||
action = Action(
|
||||
type="click",
|
||||
target="Fichier",
|
||||
anchor_hint="Fichier",
|
||||
)
|
||||
action._enrichment = {
|
||||
"by_text": "Fichier",
|
||||
"vlm_description": "Menu Fichier",
|
||||
}
|
||||
step = Step(step_id="s1", intent="Ouvrir menu", actions=[action])
|
||||
ir.steps.append(step)
|
||||
|
||||
profile = SurfaceProfile(
|
||||
surface_type=SurfaceType.WINDOWS_NATIVE,
|
||||
uia_available=True,
|
||||
)
|
||||
|
||||
compiler = ExecutionCompiler()
|
||||
plan = compiler.compile(ir, surface_profile=profile)
|
||||
|
||||
click = [n for n in plan.nodes if n.action_type == "click"][0]
|
||||
assert click.strategy_primary.method == "ocr"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# IRBuilder lit uia_snapshot depuis les événements
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestIRBuilderLitUiaSnapshot:
|
||||
|
||||
def test_ir_builder_propage_uia_snapshot(self):
|
||||
"""Un event avec uia_snapshot → Action._enrichment contient uia_snapshot."""
|
||||
events = [
|
||||
{
|
||||
"event": {
|
||||
"type": "mouse_click",
|
||||
"pos": [500, 300],
|
||||
"window": {"title": "Bloc-notes"},
|
||||
"timestamp": 100.0,
|
||||
"uia_snapshot": {
|
||||
"name": "Enregistrer",
|
||||
"control_type": "bouton",
|
||||
"automation_id": "btnSave",
|
||||
"parent_path": [{"name": "Fichier", "control_type": "menu"}],
|
||||
},
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
builder = IRBuilder(gemma4_port="99999")
|
||||
ir = builder.build(events, name="Test")
|
||||
|
||||
# Parcourir les steps pour trouver le clic
|
||||
found_action = None
|
||||
for step in ir.steps:
|
||||
for action in step.actions:
|
||||
if action.type == "click":
|
||||
found_action = action
|
||||
break
|
||||
|
||||
assert found_action is not None
|
||||
enrichment = getattr(found_action, "_enrichment", None) or {}
|
||||
assert "uia_snapshot" in enrichment
|
||||
assert enrichment["uia_snapshot"]["name"] == "Enregistrer"
|
||||
assert enrichment["uia_snapshot"]["control_type"] == "bouton"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# execution_plan_runner propage uia_target dans target_spec
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestUiaTargetPropagation:
|
||||
|
||||
def test_strategy_uia_produit_uia_target(self):
|
||||
"""Une stratégie UIA primaire → target_spec contient uia_target."""
|
||||
primary = ResolutionStrategy(
|
||||
method="uia",
|
||||
uia_name="Enregistrer",
|
||||
uia_control_type="bouton",
|
||||
uia_automation_id="btnSave",
|
||||
uia_parent_path=[{"name": "Fichier", "control_type": "menu"}],
|
||||
)
|
||||
fallbacks = [
|
||||
ResolutionStrategy(method="ocr", target_text="Enregistrer"),
|
||||
ResolutionStrategy(method="vlm", vlm_description="bouton Enregistrer"),
|
||||
]
|
||||
|
||||
spec = _strategy_to_target_spec(primary, fallbacks)
|
||||
|
||||
assert "uia_target" in spec
|
||||
assert spec["uia_target"]["name"] == "Enregistrer"
|
||||
assert spec["uia_target"]["control_type"] == "bouton"
|
||||
assert spec["uia_target"]["automation_id"] == "btnSave"
|
||||
assert spec["resolve_order"][0] == "uia"
|
||||
assert "ocr" in spec["resolve_order"]
|
||||
assert "vlm" in spec["resolve_order"]
|
||||
|
||||
def test_pas_de_uia_target_si_pas_de_stratégie(self):
|
||||
"""Sans stratégie UIA → pas de uia_target."""
|
||||
primary = ResolutionStrategy(method="ocr", target_text="test")
|
||||
spec = _strategy_to_target_spec(primary, [])
|
||||
|
||||
assert "uia_target" not in spec
|
||||
assert "uia" not in spec.get("resolve_order", [])
|
||||
|
||||
def test_execution_node_to_action_avec_uia(self):
|
||||
"""Un ExecutionNode avec stratégie UIA produit une action complète."""
|
||||
node = ExecutionNode(
|
||||
node_id="n1",
|
||||
action_type="click",
|
||||
intent="Cliquer Enregistrer",
|
||||
strategy_primary=ResolutionStrategy(
|
||||
method="uia",
|
||||
uia_name="Enregistrer",
|
||||
uia_control_type="bouton",
|
||||
),
|
||||
strategy_fallbacks=[
|
||||
ResolutionStrategy(method="ocr", target_text="Enregistrer"),
|
||||
],
|
||||
)
|
||||
|
||||
action = execution_node_to_action(node)
|
||||
assert action is not None
|
||||
assert action["type"] == "click"
|
||||
assert "uia_target" in action["target_spec"]
|
||||
assert action["target_spec"]["uia_target"]["name"] == "Enregistrer"
|
||||
assert action["target_spec"]["resolve_order"] == ["uia", "ocr"]
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Pipeline E2E : événement avec UIA → action runtime avec uia_target
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestPipelineE2EUia:
|
||||
|
||||
def test_pipeline_complet_uia(self):
|
||||
"""RawTrace (avec uia_snapshot) → WorkflowIR → Plan → action runtime."""
|
||||
# Événements simulés d'un enregistrement sur Windows natif
|
||||
events = [
|
||||
{
|
||||
"event": {
|
||||
"type": "mouse_click",
|
||||
"pos": [500, 300],
|
||||
"window": {"title": "Bloc-notes"},
|
||||
"timestamp": 100.0,
|
||||
"uia_snapshot": {
|
||||
"name": "Enregistrer",
|
||||
"control_type": "bouton",
|
||||
"automation_id": "btnSave",
|
||||
"parent_path": [
|
||||
{"name": "Bloc-notes", "control_type": "fenêtre"},
|
||||
],
|
||||
},
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
# Pipeline complet
|
||||
builder = IRBuilder(gemma4_port="99999")
|
||||
ir = builder.build(events, name="Test E2E UIA")
|
||||
|
||||
profile = SurfaceProfile(
|
||||
surface_type=SurfaceType.WINDOWS_NATIVE,
|
||||
uia_available=True,
|
||||
timeout_click_ms=8000,
|
||||
max_retries=2,
|
||||
)
|
||||
|
||||
compiler = ExecutionCompiler()
|
||||
plan = compiler.compile(ir, surface_profile=profile)
|
||||
|
||||
actions = execution_plan_to_actions(plan)
|
||||
|
||||
# Vérifier que l'action finale a toutes les données UIA
|
||||
click_actions = [a for a in actions if a["type"] == "click"]
|
||||
assert len(click_actions) == 1
|
||||
|
||||
action = click_actions[0]
|
||||
assert "target_spec" in action
|
||||
spec = action["target_spec"]
|
||||
|
||||
assert "resolve_order" in spec
|
||||
assert spec["resolve_order"][0] == "uia"
|
||||
assert "uia_target" in spec
|
||||
assert spec["uia_target"]["name"] == "Enregistrer"
|
||||
assert spec["uia_target"]["control_type"] == "bouton"
|
||||
assert action.get("timeout_ms") == 8000
|
||||
assert action.get("max_retries") == 2
|
||||
Reference in New Issue
Block a user