feat: runtime V4 — endpoints /workflow/compile et /replay/plan

Pipeline V4 complet disponible en API :
  RawTrace → /workflow/compile → WorkflowIR + ExecutionPlan → /replay/plan → Runtime

- execution_plan_runner.py : adaptateur ExecutionNode → action executor
- Substitution variables {var} dans target/text
- Fusion stratégies primary + fallbacks (OCR, template, VLM)
- Clicks: coordonnées neutralisées, resolve_engine trouve au runtime
- 35 nouveaux tests (conversion, substitution, injection queue, pipeline E2E)
- Ancien chemin build_replay_from_raw_events() préservé (coexistence)

208 tests passent, 0 régression.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dom
2026-04-10 08:09:05 +02:00
parent bffcfb2db3
commit 2ac781343a
3 changed files with 1282 additions and 0 deletions

View File

@@ -32,6 +32,10 @@ from .replay_learner import ReplayLearner
from .audit_trail import AuditTrail, AuditEntry
from .stream_processor import StreamProcessor, build_replay_from_raw_events, enrich_click_from_screenshot
from .worker_stream import StreamWorker
from .execution_plan_runner import (
execution_plan_to_actions,
inject_plan_into_queue,
)
# Instance globale du vérificateur de replay (comparaison screenshots avant/après)
_replay_verifier = ReplayVerifier()
@@ -438,6 +442,34 @@ class SingleActionRequest(BaseModel):
machine_id: Optional[str] = None # Machine cible (multi-machine)
class PlanReplayRequest(BaseModel):
"""Requête de lancement de replay depuis un ExecutionPlan (pipeline V4).
Deux modes supportés :
1. Référence par ID : fournir `plan_id` → le serveur charge le plan
depuis `data/plans/{plan_id}.json`.
2. Plan inline : fournir `plan` (dict JSON) → utilisé directement.
Les `variables` écrasent celles du plan.
"""
plan_id: Optional[str] = None
plan: Optional[Dict[str, Any]] = None
variables: Optional[Dict[str, Any]] = None
session_id: str = ""
machine_id: Optional[str] = None
class CompileWorkflowRequest(BaseModel):
"""Requête de compilation d'une session en WorkflowIR + ExecutionPlan."""
session_id: str
machine_id: str = "default"
domain: str = "generic"
name: str = ""
target_machine: str = ""
target_resolution: str = "1280x800"
params: Optional[Dict[str, str]] = None
class ReplayResultReport(BaseModel):
"""Rapport de résultat d'exécution d'une action par l'Agent V1."""
session_id: str
@@ -1906,6 +1938,369 @@ async def enqueue_single_action(request: SingleActionRequest):
}
# =========================================================================
# Pipeline V4 — ExecutionPlan → Runtime (nouveau chemin)
# =========================================================================
# RawTrace → IRBuilder → WorkflowIR → ExecutionCompiler → ExecutionPlan → Runtime
#
# Ces deux endpoints sont optionnels et coexistent avec le chemin legacy
# (build_replay_from_raw_events() dans stream_processor.py). Ils permettent
# de lancer un replay depuis un plan pré-compilé, déterministe et borné.
# =========================================================================
# Répertoires par défaut pour la persistance du pipeline V4
WORKFLOWS_IR_DIR = ROOT_DIR / "data" / "workflows_ir"
EXECUTION_PLANS_DIR = ROOT_DIR / "data" / "plans"
def _load_execution_plan(plan_id: str):
"""Charger un ExecutionPlan depuis le disque (data/plans/{id}.json)."""
from core.workflow.execution_plan import ExecutionPlan
# Chemin direct
candidate = EXECUTION_PLANS_DIR / f"{plan_id}.json"
if candidate.exists():
return ExecutionPlan.load(str(candidate))
# Fallback : recherche par prefix (plan_id sans _vN)
if EXECUTION_PLANS_DIR.exists():
for p in EXECUTION_PLANS_DIR.glob(f"{plan_id}*.json"):
return ExecutionPlan.load(str(p))
return None
@app.post("/api/v1/traces/stream/replay/plan")
async def launch_replay_from_plan(request: PlanReplayRequest):
"""Lancer un replay depuis un ExecutionPlan (pipeline V4).
Pipeline :
1. Charger le plan (depuis plan_id sur disque ou depuis le body inline)
2. Convertir chaque ExecutionNode en action replay via
execution_plan_runner.execution_plan_to_actions()
3. Appliquer les variables (body > plan.variables)
4. Valider chaque action (sécurité HIGH)
5. Injecter dans la queue de replay de la session Agent V1 cible
Pas de dépendance au VLM au runtime pour les cas normaux — les stratégies
de résolution sont déjà pré-compilées dans le plan.
"""
from core.workflow.execution_plan import ExecutionPlan
# ── 1. Charger / parser le plan ──
plan = None
if request.plan_id:
plan = _load_execution_plan(request.plan_id)
if plan is None:
raise HTTPException(
status_code=404,
detail=f"ExecutionPlan '{request.plan_id}' introuvable dans "
f"{EXECUTION_PLANS_DIR}/",
)
elif request.plan:
try:
plan = ExecutionPlan.from_dict(request.plan)
except Exception as e:
raise HTTPException(
status_code=400,
detail=f"Impossible de parser le plan inline : {e}",
)
else:
raise HTTPException(
status_code=400,
detail="Fournir 'plan_id' (référence) ou 'plan' (inline).",
)
if not plan.nodes:
raise HTTPException(
status_code=400,
detail=f"ExecutionPlan '{plan.plan_id}' : aucun nœud à exécuter.",
)
# ── 2. Convertir les nœuds en actions replay ──
try:
actions = execution_plan_to_actions(
plan=plan,
variables=request.variables,
id_prefix="act_plan",
)
except Exception as e:
logger.exception("Erreur conversion ExecutionPlan → actions")
raise HTTPException(
status_code=500,
detail=f"Erreur de conversion du plan : {e}",
)
if not actions:
raise HTTPException(
status_code=400,
detail=f"ExecutionPlan '{plan.plan_id}' : aucune action exploitable "
f"après conversion ({plan.total_nodes} nœuds).",
)
# Limite de sécurité
if len(actions) > MAX_ACTIONS_PER_REPLAY:
raise HTTPException(
status_code=400,
detail=f"Trop d'actions ({len(actions)} > {MAX_ACTIONS_PER_REPLAY}).",
)
# ── 3. Validation de chaque action (sécurité HIGH) ──
validated: List[Dict[str, Any]] = []
for i, action in enumerate(actions):
error = _validate_replay_action(action)
if error:
logger.warning(
"replay/plan : action #%d invalide (%s), suppression", i, error,
)
continue
validated.append(action)
if not validated:
raise HTTPException(
status_code=400,
detail=f"ExecutionPlan '{plan.plan_id}' : toutes les actions "
f"ont été rejetées par la validation.",
)
# ── 4. Trouver la session Agent V1 cible ──
target_session_id = request.session_id
if not target_session_id or target_session_id.startswith("chat_"):
active_session = _find_active_agent_session(machine_id=request.machine_id)
if active_session:
target_session_id = active_session
else:
machine_hint = (
f" sur la machine '{request.machine_id}'" if request.machine_id else ""
)
raise HTTPException(
status_code=404,
detail=f"Aucune session Agent V1 active{machine_hint}. "
"Lancez l'Agent V1 sur le PC cible.",
)
# ── 5. Injecter dans la queue de replay ──
replay_id = f"replay_plan_{uuid.uuid4().hex[:8]}"
session_obj = processor.session_manager.get_session(target_session_id)
resolved_machine_id = (
request.machine_id
or (session_obj.machine_id if session_obj else "default")
)
with _replay_lock:
_replay_queues[target_session_id] = list(validated)
_replay_states[replay_id] = _create_replay_state(
replay_id=replay_id,
workflow_id=f"execution_plan:{plan.plan_id}",
session_id=target_session_id,
total_actions=len(validated),
params=dict(plan.variables or {}),
machine_id=resolved_machine_id,
)
if resolved_machine_id and resolved_machine_id != "default":
_machine_replay_target[resolved_machine_id] = target_session_id
# Signaler au worker VLM qu'un replay est actif → se suspendre
_set_replay_lock(replay_id)
logger.info(
"Replay plan V4 démarré : %s | plan=%s (v%d) | session=%s | "
"machine=%s | %d actions (total_nodes=%d, rejected=%d)",
replay_id, plan.plan_id, plan.version, target_session_id,
resolved_machine_id, len(validated), plan.total_nodes,
len(actions) - len(validated),
)
return {
"replay_id": replay_id,
"status": "running",
"plan_id": plan.plan_id,
"workflow_id": plan.workflow_id,
"plan_version": plan.version,
"session_id": target_session_id,
"machine_id": resolved_machine_id,
"total_actions": len(validated),
"total_nodes": plan.total_nodes,
"rejected_actions": len(actions) - len(validated),
"stats": {
"nodes_with_ocr": plan.nodes_with_ocr,
"nodes_with_template": plan.nodes_with_template,
"nodes_with_vlm": plan.nodes_with_vlm,
"estimated_duration_s": plan.estimated_duration_s,
},
}
@app.post("/api/v1/traces/stream/workflow/compile")
async def compile_workflow_endpoint(request: CompileWorkflowRequest):
"""Compiler une session en WorkflowIR + ExecutionPlan (pipeline V4).
Pipeline :
1. Charger les événements bruts de la session (live_events.jsonl)
2. IRBuilder.build() → WorkflowIR (connaissance métier)
3. WorkflowIR.save() → persistance dans data/workflows_ir/
4. ExecutionCompiler.compile() → ExecutionPlan (plan déterministe)
5. ExecutionPlan.save() → persistance dans data/plans/
6. Retourner les IDs pour lancer ensuite /replay/plan
Cette endpoint NE LANCE PAS le replay — elle prépare le plan.
L'appelant doit ensuite appeler /replay/plan avec plan_id.
"""
from core.workflow.execution_compiler import ExecutionCompiler
from core.workflow.ir_builder import IRBuilder
session_id = request.session_id
machine_id = request.machine_id or "default"
if not session_id:
raise HTTPException(status_code=400, detail="session_id requis")
# ── 1. Trouver le fichier live_events.jsonl de la session ──
events_file = None
if machine_id and machine_id != "default":
candidate = LIVE_SESSIONS_DIR / machine_id / session_id / "live_events.jsonl"
if candidate.exists():
events_file = candidate
if not events_file and LIVE_SESSIONS_DIR.exists():
for machine_dir in LIVE_SESSIONS_DIR.iterdir():
if not machine_dir.is_dir():
continue
candidate = machine_dir / session_id / "live_events.jsonl"
if candidate.exists():
events_file = candidate
if machine_id == "default":
machine_id = machine_dir.name
break
if not events_file:
candidate = LIVE_SESSIONS_DIR / session_id / "live_events.jsonl"
if candidate.exists():
events_file = candidate
if not events_file:
raise HTTPException(
status_code=404,
detail=f"Session '{session_id}' : live_events.jsonl introuvable.",
)
# ── 2. Charger les événements ──
raw_events: List[Dict[str, Any]] = []
try:
for line in events_file.read_text(encoding="utf-8").splitlines():
line = line.strip()
if not line:
continue
try:
raw_events.append(json.loads(line))
except json.JSONDecodeError:
continue
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Erreur lecture events : {e}",
)
if not raw_events:
raise HTTPException(
status_code=400,
detail=f"Session '{session_id}' : aucun événement.",
)
# ── 3. IRBuilder → WorkflowIR ──
try:
builder = IRBuilder()
ir = builder.build(
events=raw_events,
session_id=session_id,
session_dir=str(events_file.parent),
domain=request.domain,
name=request.name,
)
except Exception as e:
logger.exception("Erreur IRBuilder.build()")
raise HTTPException(
status_code=500,
detail=f"Erreur de construction WorkflowIR : {e}",
)
if not ir.steps:
raise HTTPException(
status_code=400,
detail=f"Session '{session_id}' : aucune étape détectée "
f"(pipeline IRBuilder a produit un workflow vide).",
)
# ── 4. Sauvegarder le WorkflowIR ──
try:
WORKFLOWS_IR_DIR.mkdir(parents=True, exist_ok=True)
ir_path = ir.save(str(WORKFLOWS_IR_DIR))
except Exception as e:
logger.exception("Erreur sauvegarde WorkflowIR")
raise HTTPException(
status_code=500,
detail=f"Erreur sauvegarde WorkflowIR : {e}",
)
# ── 5. ExecutionCompiler → ExecutionPlan ──
try:
compiler = ExecutionCompiler()
plan = compiler.compile(
ir=ir,
target_machine=request.target_machine,
target_resolution=request.target_resolution,
params=request.params,
)
except Exception as e:
logger.exception("Erreur ExecutionCompiler.compile()")
raise HTTPException(
status_code=500,
detail=f"Erreur de compilation du plan : {e}",
)
# ── 6. Sauvegarder l'ExecutionPlan ──
try:
EXECUTION_PLANS_DIR.mkdir(parents=True, exist_ok=True)
plan_path = plan.save(str(EXECUTION_PLANS_DIR))
except Exception as e:
logger.exception("Erreur sauvegarde ExecutionPlan")
raise HTTPException(
status_code=500,
detail=f"Erreur sauvegarde ExecutionPlan : {e}",
)
logger.info(
"Compilation V4 : session=%s → workflow_ir=%s (v%d) → plan=%s "
"(%d nœuds, OCR=%d, template=%d, VLM=%d)",
session_id, ir.workflow_id, ir.version, plan.plan_id,
plan.total_nodes, plan.nodes_with_ocr, plan.nodes_with_template,
plan.nodes_with_vlm,
)
return {
"session_id": session_id,
"machine_id": machine_id,
"workflow_id": ir.workflow_id,
"workflow_version": ir.version,
"workflow_ir_path": str(ir_path),
"workflow_name": ir.name,
"domain": ir.domain,
"steps": len(ir.steps),
"variables": len(ir.variables),
"applications": ir.applications,
"plan_id": plan.plan_id,
"plan_path": str(plan_path),
"total_nodes": plan.total_nodes,
"stats": {
"nodes_with_ocr": plan.nodes_with_ocr,
"nodes_with_template": plan.nodes_with_template,
"nodes_with_vlm": plan.nodes_with_vlm,
"estimated_duration_s": plan.estimated_duration_s,
},
}
# =========================================================================
# Pre-check écran — Vérification pré-action par embedding CLIP
# =========================================================================

View File

@@ -0,0 +1,322 @@
# agent_v0/server_v1/execution_plan_runner.py
"""
ExecutionPlanRunner — Adaptateur ExecutionPlan → actions replay.
Pièce d'intégration du pipeline V4 :
RawTrace → IRBuilder → WorkflowIR → ExecutionCompiler → ExecutionPlan → Runtime
Ce module convertit un `ExecutionPlan` (plan pré-compilé, déterministe) en
liste d'actions au format attendu par l'executor replay actuel (clé x_pct,
y_pct, target_spec, etc.), puis les injecte dans `_replay_queues`.
L'ancien chemin `build_replay_from_raw_events()` dans stream_processor.py
reste inchangé — les deux chemins coexistent pendant la transition.
Format d'action produit (compatible executor existant) :
{
"action_id": "act_...",
"type": "click",
"x_pct": 0.5,
"y_pct": 0.3,
"visual_mode": True,
"target_spec": {
"by_text": "...",
"window_title": "...",
"vlm_description": "...",
"anchor_image_base64": "...",
},
"expected_window_title": "...",
}
Auteur: Dom, Alice - Avril 2026
"""
from __future__ import annotations
import logging
import re
import threading
import uuid
from typing import Any, Dict, List, Optional
from core.workflow.execution_plan import (
ExecutionNode,
ExecutionPlan,
ResolutionStrategy,
)
logger = logging.getLogger(__name__)
# =========================================================================
# Substitution de variables
# =========================================================================
# Le WorkflowIR utilise la syntaxe `{var}` dans les champs texte.
# Ici on supporte les deux : `{var}` (IR natif) et `${var}` (replay legacy).
_VARIABLE_RE_CURLY = re.compile(r"\{(\w+)\}")
_VARIABLE_RE_DOLLAR = re.compile(r"\$\{(\w+)\}")
def substitute_variables(text: str, variables: Dict[str, Any]) -> str:
"""Remplacer `{var}` et `${var}` par leurs valeurs.
Priorité : variables fournies > placeholder brut (inchangé si inconnu).
"""
if not text or not variables:
return text
def replacer(match: "re.Match[str]") -> str:
var_name = match.group(1)
if var_name in variables:
return str(variables[var_name])
return match.group(0)
text = _VARIABLE_RE_DOLLAR.sub(replacer, text)
text = _VARIABLE_RE_CURLY.sub(replacer, text)
return text
# =========================================================================
# Conversion ExecutionNode → action replay
# =========================================================================
def _strategy_to_target_spec(
strategy: Optional[ResolutionStrategy],
fallbacks: Optional[List[ResolutionStrategy]] = None,
intent: str = "",
) -> Dict[str, Any]:
"""Construire un `target_spec` depuis les stratégies de résolution.
Fusionne la primaire et les fallbacks pour donner un maximum d'indices
au resolve_engine :
- OCR → by_text
- template → anchor_image_base64 (depuis anchor_b64)
- VLM → vlm_description
Règle : la stratégie primaire dicte la méthode préférée, mais on expose
toutes les ancres connues pour que le runtime puisse retomber dessus.
"""
spec: Dict[str, Any] = {}
all_strategies: List[ResolutionStrategy] = []
if strategy is not None:
all_strategies.append(strategy)
if fallbacks:
all_strategies.extend(fallbacks)
by_text_candidate = ""
anchor_candidate = ""
vlm_candidate = ""
for strat in all_strategies:
if not strat:
continue
if strat.method == "ocr" and strat.target_text and not by_text_candidate:
by_text_candidate = strat.target_text
elif strat.method == "template":
if strat.anchor_b64 and not anchor_candidate:
anchor_candidate = strat.anchor_b64
if strat.target_text and not by_text_candidate:
by_text_candidate = strat.target_text
elif strat.method == "vlm" and strat.vlm_description and not vlm_candidate:
vlm_candidate = strat.vlm_description
if by_text_candidate:
spec["by_text"] = by_text_candidate
if anchor_candidate:
spec["anchor_image_base64"] = anchor_candidate
if vlm_candidate:
spec["vlm_description"] = vlm_candidate
elif intent and "vlm_description" not in spec:
# L'intention métier devient le prompt VLM de dernier recours
spec["vlm_description"] = intent
return spec
def execution_node_to_action(
node: ExecutionNode,
variables: Optional[Dict[str, Any]] = None,
id_prefix: str = "act_plan",
) -> Optional[Dict[str, Any]]:
"""Convertir un `ExecutionNode` en action replay.
Retourne `None` si le nœud n'est pas exécutable (type inconnu).
Args:
node: Le nœud à convertir.
variables: Dictionnaire de variables pour substituer les {var}.
id_prefix: Préfixe pour l'action_id générée.
"""
variables = variables or {}
action: Dict[str, Any] = {
"action_id": f"{id_prefix}_{uuid.uuid4().hex[:8]}",
"plan_node_id": node.node_id,
}
if node.intent:
action["intention"] = node.intent
if node.step_id:
action["plan_step_id"] = node.step_id
if node.is_optional:
action["is_optional"] = True
# Métadonnées d'exécution utiles au runtime
if node.timeout_ms:
action["timeout_ms"] = node.timeout_ms
if node.max_retries:
action["max_retries"] = node.max_retries
if node.recovery_action:
action["recovery_action"] = node.recovery_action
if node.success_condition:
action["success_condition"] = node.success_condition.to_dict()
action_type = node.action_type
if action_type == "click":
action["type"] = "click"
strategy = node.strategy_primary
fallbacks = node.strategy_fallbacks or []
# ── Déduction des coordonnées depuis la stratégie primaire ──
# - OCR : pas de coordonnées (le runtime trouve via OCR)
# - template : l'anchor sera utilisé au runtime
# - VLM : la description sera utilisée au runtime
# Dans tous les cas le resolve_engine retrouve les pixels au replay.
# On expose néanmoins un centre (0.5, 0.5) neutre pour rester
# compatible avec les validations de queue existantes.
action["x_pct"] = 0.5
action["y_pct"] = 0.5
action["visual_mode"] = True
target_spec = _strategy_to_target_spec(
strategy=strategy,
fallbacks=fallbacks,
intent=node.intent,
)
# Titre fenêtre attendu (pour la vérification post-action / pre-check)
if node.success_condition and node.success_condition.expected_title:
action["expected_window_title"] = node.success_condition.expected_title
target_spec.setdefault("window_title", node.success_condition.expected_title)
if target_spec:
action["target_spec"] = target_spec
elif action_type == "type":
action["type"] = "type"
text = node.text or ""
# Substituer les variables avant d'envoyer (ex: {patient} → "DUPONT")
action["text"] = substitute_variables(text, variables)
if node.variable_name:
action["variable_name"] = node.variable_name
elif action_type in ("key_combo", "key_press"):
action["type"] = "key_combo"
keys = list(node.keys or [])
if not keys:
return None
action["keys"] = keys
elif action_type == "wait":
action["type"] = "wait"
duration = node.duration_ms or 1000
action["duration_ms"] = int(duration)
elif action_type == "scroll":
action["type"] = "scroll"
# Les stratégies peuvent contenir une zone — pas exploitée ici,
# le scroll est implicitement sur la fenêtre active.
action["delta"] = -3
else:
logger.debug("execution_node_to_action: type inconnu '%s' ignoré", action_type)
return None
return action
def execution_plan_to_actions(
plan: ExecutionPlan,
variables: Optional[Dict[str, Any]] = None,
id_prefix: str = "act_plan",
) -> List[Dict[str, Any]]:
"""Convertir un `ExecutionPlan` complet en liste d'actions replay.
Les variables passées en argument écrasent celles du plan.
"""
merged_vars: Dict[str, Any] = dict(plan.variables or {})
if variables:
merged_vars.update(variables)
actions: List[Dict[str, Any]] = []
for node in plan.nodes:
action = execution_node_to_action(
node=node,
variables=merged_vars,
id_prefix=id_prefix,
)
if action is not None:
actions.append(action)
logger.info(
"execution_plan_to_actions(%s) : %d nœuds → %d actions replay "
"(vars=%d)",
plan.plan_id, plan.total_nodes, len(actions), len(merged_vars),
)
return actions
# =========================================================================
# Injection dans la queue de replay
# =========================================================================
def inject_plan_into_queue(
plan: ExecutionPlan,
session_id: str,
replay_queues: Dict[str, List[Dict[str, Any]]],
variables: Optional[Dict[str, Any]] = None,
lock: Optional[threading.Lock] = None,
replace: bool = True,
id_prefix: str = "act_plan",
) -> List[Dict[str, Any]]:
"""Injecter un `ExecutionPlan` dans la queue de replay d'une session.
Args:
plan: Le plan à exécuter.
session_id: La session Agent V1 cible.
replay_queues: Le dict global `_replay_queues` partagé par le serveur.
variables: Variables à substituer dans les actions.
lock: Verrou optionnel à acquérir avant d'écrire (threadsafe).
replace: Si True (défaut), remplace la queue existante. Sinon, append.
id_prefix: Préfixe pour les action_id générés.
Returns:
La liste des actions injectées (après substitution).
"""
actions = execution_plan_to_actions(
plan=plan, variables=variables, id_prefix=id_prefix,
)
def _write() -> None:
if replace:
replay_queues[session_id] = list(actions)
else:
replay_queues[session_id].extend(actions)
if lock is not None:
with lock:
_write()
else:
_write()
logger.info(
"inject_plan_into_queue(%s) : %d actions injectées dans la queue "
"de la session '%s' (replace=%s)",
plan.plan_id, len(actions), session_id, replace,
)
return actions