feat: runtime V4 — endpoints /workflow/compile et /replay/plan
Pipeline V4 complet disponible en API :
RawTrace → /workflow/compile → WorkflowIR + ExecutionPlan → /replay/plan → Runtime
- execution_plan_runner.py : adaptateur ExecutionNode → action executor
- Substitution variables {var} dans target/text
- Fusion stratégies primary + fallbacks (OCR, template, VLM)
- Clicks: coordonnées neutralisées, resolve_engine trouve au runtime
- 35 nouveaux tests (conversion, substitution, injection queue, pipeline E2E)
- Ancien chemin build_replay_from_raw_events() préservé (coexistence)
208 tests passent, 0 régression.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
322
agent_v0/server_v1/execution_plan_runner.py
Normal file
322
agent_v0/server_v1/execution_plan_runner.py
Normal file
@@ -0,0 +1,322 @@
|
||||
# agent_v0/server_v1/execution_plan_runner.py
|
||||
"""
|
||||
ExecutionPlanRunner — Adaptateur ExecutionPlan → actions replay.
|
||||
|
||||
Pièce d'intégration du pipeline V4 :
|
||||
RawTrace → IRBuilder → WorkflowIR → ExecutionCompiler → ExecutionPlan → Runtime
|
||||
|
||||
Ce module convertit un `ExecutionPlan` (plan pré-compilé, déterministe) en
|
||||
liste d'actions au format attendu par l'executor replay actuel (clé x_pct,
|
||||
y_pct, target_spec, etc.), puis les injecte dans `_replay_queues`.
|
||||
|
||||
L'ancien chemin `build_replay_from_raw_events()` dans stream_processor.py
|
||||
reste inchangé — les deux chemins coexistent pendant la transition.
|
||||
|
||||
Format d'action produit (compatible executor existant) :
|
||||
{
|
||||
"action_id": "act_...",
|
||||
"type": "click",
|
||||
"x_pct": 0.5,
|
||||
"y_pct": 0.3,
|
||||
"visual_mode": True,
|
||||
"target_spec": {
|
||||
"by_text": "...",
|
||||
"window_title": "...",
|
||||
"vlm_description": "...",
|
||||
"anchor_image_base64": "...",
|
||||
},
|
||||
"expected_window_title": "...",
|
||||
}
|
||||
|
||||
Auteur: Dom, Alice - Avril 2026
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import threading
|
||||
import uuid
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from core.workflow.execution_plan import (
|
||||
ExecutionNode,
|
||||
ExecutionPlan,
|
||||
ResolutionStrategy,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Substitution de variables
|
||||
# =========================================================================
|
||||
# Le WorkflowIR utilise la syntaxe `{var}` dans les champs texte.
|
||||
# Ici on supporte les deux : `{var}` (IR natif) et `${var}` (replay legacy).
|
||||
_VARIABLE_RE_CURLY = re.compile(r"\{(\w+)\}")
|
||||
_VARIABLE_RE_DOLLAR = re.compile(r"\$\{(\w+)\}")
|
||||
|
||||
|
||||
def substitute_variables(text: str, variables: Dict[str, Any]) -> str:
|
||||
"""Remplacer `{var}` et `${var}` par leurs valeurs.
|
||||
|
||||
Priorité : variables fournies > placeholder brut (inchangé si inconnu).
|
||||
"""
|
||||
if not text or not variables:
|
||||
return text
|
||||
|
||||
def replacer(match: "re.Match[str]") -> str:
|
||||
var_name = match.group(1)
|
||||
if var_name in variables:
|
||||
return str(variables[var_name])
|
||||
return match.group(0)
|
||||
|
||||
text = _VARIABLE_RE_DOLLAR.sub(replacer, text)
|
||||
text = _VARIABLE_RE_CURLY.sub(replacer, text)
|
||||
return text
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Conversion ExecutionNode → action replay
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def _strategy_to_target_spec(
|
||||
strategy: Optional[ResolutionStrategy],
|
||||
fallbacks: Optional[List[ResolutionStrategy]] = None,
|
||||
intent: str = "",
|
||||
) -> Dict[str, Any]:
|
||||
"""Construire un `target_spec` depuis les stratégies de résolution.
|
||||
|
||||
Fusionne la primaire et les fallbacks pour donner un maximum d'indices
|
||||
au resolve_engine :
|
||||
- OCR → by_text
|
||||
- template → anchor_image_base64 (depuis anchor_b64)
|
||||
- VLM → vlm_description
|
||||
|
||||
Règle : la stratégie primaire dicte la méthode préférée, mais on expose
|
||||
toutes les ancres connues pour que le runtime puisse retomber dessus.
|
||||
"""
|
||||
spec: Dict[str, Any] = {}
|
||||
|
||||
all_strategies: List[ResolutionStrategy] = []
|
||||
if strategy is not None:
|
||||
all_strategies.append(strategy)
|
||||
if fallbacks:
|
||||
all_strategies.extend(fallbacks)
|
||||
|
||||
by_text_candidate = ""
|
||||
anchor_candidate = ""
|
||||
vlm_candidate = ""
|
||||
|
||||
for strat in all_strategies:
|
||||
if not strat:
|
||||
continue
|
||||
if strat.method == "ocr" and strat.target_text and not by_text_candidate:
|
||||
by_text_candidate = strat.target_text
|
||||
elif strat.method == "template":
|
||||
if strat.anchor_b64 and not anchor_candidate:
|
||||
anchor_candidate = strat.anchor_b64
|
||||
if strat.target_text and not by_text_candidate:
|
||||
by_text_candidate = strat.target_text
|
||||
elif strat.method == "vlm" and strat.vlm_description and not vlm_candidate:
|
||||
vlm_candidate = strat.vlm_description
|
||||
|
||||
if by_text_candidate:
|
||||
spec["by_text"] = by_text_candidate
|
||||
if anchor_candidate:
|
||||
spec["anchor_image_base64"] = anchor_candidate
|
||||
if vlm_candidate:
|
||||
spec["vlm_description"] = vlm_candidate
|
||||
elif intent and "vlm_description" not in spec:
|
||||
# L'intention métier devient le prompt VLM de dernier recours
|
||||
spec["vlm_description"] = intent
|
||||
|
||||
return spec
|
||||
|
||||
|
||||
def execution_node_to_action(
|
||||
node: ExecutionNode,
|
||||
variables: Optional[Dict[str, Any]] = None,
|
||||
id_prefix: str = "act_plan",
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Convertir un `ExecutionNode` en action replay.
|
||||
|
||||
Retourne `None` si le nœud n'est pas exécutable (type inconnu).
|
||||
|
||||
Args:
|
||||
node: Le nœud à convertir.
|
||||
variables: Dictionnaire de variables pour substituer les {var}.
|
||||
id_prefix: Préfixe pour l'action_id générée.
|
||||
"""
|
||||
variables = variables or {}
|
||||
|
||||
action: Dict[str, Any] = {
|
||||
"action_id": f"{id_prefix}_{uuid.uuid4().hex[:8]}",
|
||||
"plan_node_id": node.node_id,
|
||||
}
|
||||
|
||||
if node.intent:
|
||||
action["intention"] = node.intent
|
||||
if node.step_id:
|
||||
action["plan_step_id"] = node.step_id
|
||||
if node.is_optional:
|
||||
action["is_optional"] = True
|
||||
|
||||
# Métadonnées d'exécution utiles au runtime
|
||||
if node.timeout_ms:
|
||||
action["timeout_ms"] = node.timeout_ms
|
||||
if node.max_retries:
|
||||
action["max_retries"] = node.max_retries
|
||||
if node.recovery_action:
|
||||
action["recovery_action"] = node.recovery_action
|
||||
if node.success_condition:
|
||||
action["success_condition"] = node.success_condition.to_dict()
|
||||
|
||||
action_type = node.action_type
|
||||
|
||||
if action_type == "click":
|
||||
action["type"] = "click"
|
||||
|
||||
strategy = node.strategy_primary
|
||||
fallbacks = node.strategy_fallbacks or []
|
||||
|
||||
# ── Déduction des coordonnées depuis la stratégie primaire ──
|
||||
# - OCR : pas de coordonnées (le runtime trouve via OCR)
|
||||
# - template : l'anchor sera utilisé au runtime
|
||||
# - VLM : la description sera utilisée au runtime
|
||||
# Dans tous les cas le resolve_engine retrouve les pixels au replay.
|
||||
# On expose néanmoins un centre (0.5, 0.5) neutre pour rester
|
||||
# compatible avec les validations de queue existantes.
|
||||
action["x_pct"] = 0.5
|
||||
action["y_pct"] = 0.5
|
||||
action["visual_mode"] = True
|
||||
|
||||
target_spec = _strategy_to_target_spec(
|
||||
strategy=strategy,
|
||||
fallbacks=fallbacks,
|
||||
intent=node.intent,
|
||||
)
|
||||
|
||||
# Titre fenêtre attendu (pour la vérification post-action / pre-check)
|
||||
if node.success_condition and node.success_condition.expected_title:
|
||||
action["expected_window_title"] = node.success_condition.expected_title
|
||||
target_spec.setdefault("window_title", node.success_condition.expected_title)
|
||||
|
||||
if target_spec:
|
||||
action["target_spec"] = target_spec
|
||||
|
||||
elif action_type == "type":
|
||||
action["type"] = "type"
|
||||
text = node.text or ""
|
||||
# Substituer les variables avant d'envoyer (ex: {patient} → "DUPONT")
|
||||
action["text"] = substitute_variables(text, variables)
|
||||
if node.variable_name:
|
||||
action["variable_name"] = node.variable_name
|
||||
|
||||
elif action_type in ("key_combo", "key_press"):
|
||||
action["type"] = "key_combo"
|
||||
keys = list(node.keys or [])
|
||||
if not keys:
|
||||
return None
|
||||
action["keys"] = keys
|
||||
|
||||
elif action_type == "wait":
|
||||
action["type"] = "wait"
|
||||
duration = node.duration_ms or 1000
|
||||
action["duration_ms"] = int(duration)
|
||||
|
||||
elif action_type == "scroll":
|
||||
action["type"] = "scroll"
|
||||
# Les stratégies peuvent contenir une zone — pas exploitée ici,
|
||||
# le scroll est implicitement sur la fenêtre active.
|
||||
action["delta"] = -3
|
||||
|
||||
else:
|
||||
logger.debug("execution_node_to_action: type inconnu '%s' ignoré", action_type)
|
||||
return None
|
||||
|
||||
return action
|
||||
|
||||
|
||||
def execution_plan_to_actions(
|
||||
plan: ExecutionPlan,
|
||||
variables: Optional[Dict[str, Any]] = None,
|
||||
id_prefix: str = "act_plan",
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Convertir un `ExecutionPlan` complet en liste d'actions replay.
|
||||
|
||||
Les variables passées en argument écrasent celles du plan.
|
||||
"""
|
||||
merged_vars: Dict[str, Any] = dict(plan.variables or {})
|
||||
if variables:
|
||||
merged_vars.update(variables)
|
||||
|
||||
actions: List[Dict[str, Any]] = []
|
||||
for node in plan.nodes:
|
||||
action = execution_node_to_action(
|
||||
node=node,
|
||||
variables=merged_vars,
|
||||
id_prefix=id_prefix,
|
||||
)
|
||||
if action is not None:
|
||||
actions.append(action)
|
||||
|
||||
logger.info(
|
||||
"execution_plan_to_actions(%s) : %d nœuds → %d actions replay "
|
||||
"(vars=%d)",
|
||||
plan.plan_id, plan.total_nodes, len(actions), len(merged_vars),
|
||||
)
|
||||
return actions
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Injection dans la queue de replay
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def inject_plan_into_queue(
|
||||
plan: ExecutionPlan,
|
||||
session_id: str,
|
||||
replay_queues: Dict[str, List[Dict[str, Any]]],
|
||||
variables: Optional[Dict[str, Any]] = None,
|
||||
lock: Optional[threading.Lock] = None,
|
||||
replace: bool = True,
|
||||
id_prefix: str = "act_plan",
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Injecter un `ExecutionPlan` dans la queue de replay d'une session.
|
||||
|
||||
Args:
|
||||
plan: Le plan à exécuter.
|
||||
session_id: La session Agent V1 cible.
|
||||
replay_queues: Le dict global `_replay_queues` partagé par le serveur.
|
||||
variables: Variables à substituer dans les actions.
|
||||
lock: Verrou optionnel à acquérir avant d'écrire (threadsafe).
|
||||
replace: Si True (défaut), remplace la queue existante. Sinon, append.
|
||||
id_prefix: Préfixe pour les action_id générés.
|
||||
|
||||
Returns:
|
||||
La liste des actions injectées (après substitution).
|
||||
"""
|
||||
actions = execution_plan_to_actions(
|
||||
plan=plan, variables=variables, id_prefix=id_prefix,
|
||||
)
|
||||
|
||||
def _write() -> None:
|
||||
if replace:
|
||||
replay_queues[session_id] = list(actions)
|
||||
else:
|
||||
replay_queues[session_id].extend(actions)
|
||||
|
||||
if lock is not None:
|
||||
with lock:
|
||||
_write()
|
||||
else:
|
||||
_write()
|
||||
|
||||
logger.info(
|
||||
"inject_plan_into_queue(%s) : %d actions injectées dans la queue "
|
||||
"de la session '%s' (replace=%s)",
|
||||
plan.plan_id, len(actions), session_id, replace,
|
||||
)
|
||||
return actions
|
||||
Reference in New Issue
Block a user