feat: runtime V4 honore resolve_order pré-compilé (zéro VLM au runtime)

Le resolve_engine suit désormais l'ordre de méthodes décidé par l'ExecutionCompiler
au lieu de sa cascade improvisée. C'est la pièce maîtresse du V4 :

- execution_plan_runner.py : ajout de 'resolve_order' dans target_spec
  ["ocr", "template", "vlm"] = stratégies dans l'ordre de préférence

- resolve_engine.py : _resolve_with_precompiled_order() honore l'ordre
  - Court-circuite la cascade legacy quand resolve_order est présent
  - Fallback sur la cascade si toutes les méthodes V4 échouent

- _resolve_by_ocr_text() : résolution OCR directe via docTR (~200ms)
  Chemin rapide V4 — pas de VLM pour les éléments avec texte visible

- 12 nouveaux tests : propagation resolve_order, cascade, fallback, pipeline E2E

220 tests passent (208 existants + 12 nouveaux), 0 régression.

"Le LLM compile. Le runtime exécute."

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dom
2026-04-10 08:28:55 +02:00
parent 2ac781343a
commit f6ad5ff2b2
3 changed files with 554 additions and 2 deletions

View File

@@ -0,0 +1,305 @@
"""
Tests du mécanisme V4 : résolution pilotée par l'ordre pré-compilé.
Vérifie que :
- Le resolve_order est bien propagé du plan vers le target_spec
- Le resolve_engine honore l'ordre au lieu de sa cascade par défaut
- Les méthodes sont essayées dans l'ordre spécifié
- Si toutes échouent, fallback sur la cascade legacy
"""
import sys
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
_ROOT = str(Path(__file__).resolve().parents[2])
if _ROOT not in sys.path:
sys.path.insert(0, _ROOT)
from core.workflow.workflow_ir import WorkflowIR
from core.workflow.execution_plan import ExecutionNode, ResolutionStrategy, ExecutionPlan
from core.workflow.execution_compiler import ExecutionCompiler
from agent_v0.server_v1.execution_plan_runner import (
execution_node_to_action,
execution_plan_to_actions,
_strategy_to_target_spec,
)
# =========================================================================
# Test 1 : le resolve_order est propagé du plan au target_spec
# =========================================================================
class TestResolveOrderPropagation:
"""Le resolve_order doit être présent dans le target_spec."""
def test_ocr_primary_produit_resolve_order(self):
"""OCR primaire → resolve_order commence par 'ocr'."""
primary = ResolutionStrategy(method="ocr", target_text="Enregistrer")
fallbacks = [
ResolutionStrategy(method="template", anchor_b64="abc123"),
ResolutionStrategy(method="vlm", vlm_description="bouton Enregistrer"),
]
spec = _strategy_to_target_spec(primary, fallbacks)
assert "resolve_order" in spec
assert spec["resolve_order"] == ["ocr", "template", "vlm"]
def test_template_primary_produit_resolve_order(self):
"""Template primaire → resolve_order commence par 'template'."""
primary = ResolutionStrategy(method="template", anchor_b64="abc")
fallbacks = [ResolutionStrategy(method="vlm", vlm_description="icône")]
spec = _strategy_to_target_spec(primary, fallbacks)
assert spec["resolve_order"][0] == "template"
def test_vlm_only(self):
"""Juste VLM → resolve_order = ['vlm']."""
primary = ResolutionStrategy(method="vlm", vlm_description="popup")
spec = _strategy_to_target_spec(primary, [])
assert spec["resolve_order"] == ["vlm"]
def test_pas_de_doublons(self):
"""Chaque méthode apparaît une seule fois dans l'ordre."""
primary = ResolutionStrategy(method="ocr", target_text="test")
fallbacks = [
ResolutionStrategy(method="template", anchor_b64="abc"),
ResolutionStrategy(method="ocr", target_text="autre"), # Doublon
]
spec = _strategy_to_target_spec(primary, fallbacks)
assert spec["resolve_order"].count("ocr") == 1
assert spec["resolve_order"].count("template") == 1
# =========================================================================
# Test 2 : execution_node_to_action propage bien le resolve_order
# =========================================================================
class TestExecutionNodeConversion:
"""Les actions générées contiennent le resolve_order."""
def test_click_node_a_resolve_order(self):
"""Un ExecutionNode click produit une action avec resolve_order."""
node = ExecutionNode(
node_id="n1",
action_type="click",
intent="Cliquer sur Fichier",
strategy_primary=ResolutionStrategy(method="ocr", target_text="Fichier"),
strategy_fallbacks=[
ResolutionStrategy(method="vlm", vlm_description="menu Fichier"),
],
)
action = execution_node_to_action(node)
assert action is not None
assert action["type"] == "click"
assert "target_spec" in action
assert "resolve_order" in action["target_spec"]
assert action["target_spec"]["resolve_order"] == ["ocr", "vlm"]
assert action["target_spec"]["by_text"] == "Fichier"
assert action["target_spec"]["vlm_description"] == "menu Fichier"
# =========================================================================
# Test 3 : le compilateur produit des plans avec resolve_order correct
# =========================================================================
class TestCompilerProduitResolveOrder:
"""Le ExecutionCompiler produit des plans avec resolve_order."""
def test_workflow_complet_avec_resolve_order(self):
"""Un workflow compilé a des actions avec resolve_order."""
ir = WorkflowIR.new("Test", domain="generic")
ir.add_step(
"Cliquer sur Enregistrer",
actions=[{
"type": "click",
"target": "bouton Enregistrer",
"anchor_hint": "Enregistrer",
}],
)
compiler = ExecutionCompiler()
plan = compiler.compile(ir)
actions = execution_plan_to_actions(plan)
assert len(actions) == 1
assert "resolve_order" in actions[0]["target_spec"]
# OCR doit être en premier (stratégie primaire quand texte dispo)
assert actions[0]["target_spec"]["resolve_order"][0] == "ocr"
def test_fallback_vlm_toujours_present(self):
"""Le VLM est toujours présent en fallback."""
ir = WorkflowIR.new("Test")
ir.add_step("Clic", actions=[{"type": "click", "target": "X", "anchor_hint": "X"}])
compiler = ExecutionCompiler()
plan = compiler.compile(ir)
actions = execution_plan_to_actions(plan)
assert "vlm" in actions[0]["target_spec"]["resolve_order"]
# =========================================================================
# Test 4 : _resolve_with_precompiled_order respecte l'ordre
# =========================================================================
class TestResolveWithPrecompiledOrder:
"""Le mécanisme V4 de résolution honore l'ordre."""
@patch("agent_v0.server_v1.resolve_engine._resolve_by_ocr_text")
def test_ocr_appele_en_premier(self, mock_ocr):
"""Si resolve_order=['ocr', 'vlm'], OCR est appelé en premier."""
from agent_v0.server_v1.resolve_engine import _resolve_with_precompiled_order
mock_ocr.return_value = {
"resolved": True,
"x_pct": 0.5,
"y_pct": 0.3,
"score": 0.9,
}
result = _resolve_with_precompiled_order(
screenshot_path="/fake.png",
target_spec={
"by_text": "Enregistrer",
"resolve_order": ["ocr", "vlm"],
},
resolve_order=["ocr", "vlm"],
screen_width=1280,
screen_height=800,
fallback_x_pct=0.5,
fallback_y_pct=0.5,
)
assert result is not None
assert result.get("resolved") is True
assert result.get("resolve_method") == "v4_ocr"
mock_ocr.assert_called_once()
@patch("agent_v0.server_v1.resolve_engine._vlm_quick_find")
@patch("agent_v0.server_v1.resolve_engine._resolve_by_ocr_text")
def test_cascade_ocr_vers_vlm(self, mock_ocr, mock_vlm):
"""Si OCR échoue, VLM est essayé."""
from agent_v0.server_v1.resolve_engine import _resolve_with_precompiled_order
mock_ocr.return_value = None # OCR échoue
mock_vlm.return_value = {
"resolved": True,
"x_pct": 0.5,
"y_pct": 0.3,
}
result = _resolve_with_precompiled_order(
screenshot_path="/fake.png",
target_spec={
"by_text": "Enregistrer",
"vlm_description": "bouton Enregistrer",
"resolve_order": ["ocr", "vlm"],
},
resolve_order=["ocr", "vlm"],
screen_width=1280,
screen_height=800,
fallback_x_pct=0.5,
fallback_y_pct=0.5,
)
assert result is not None
assert result.get("resolve_method") == "v4_vlm"
mock_ocr.assert_called_once()
mock_vlm.assert_called_once()
@patch("agent_v0.server_v1.resolve_engine._vlm_quick_find")
@patch("agent_v0.server_v1.resolve_engine._resolve_by_ocr_text")
def test_toutes_methodes_echouent(self, mock_ocr, mock_vlm):
"""Si toutes les méthodes échouent, retourne None."""
from agent_v0.server_v1.resolve_engine import _resolve_with_precompiled_order
mock_ocr.return_value = None
mock_vlm.return_value = None
result = _resolve_with_precompiled_order(
screenshot_path="/fake.png",
target_spec={
"by_text": "Inexistant",
"vlm_description": "truc inexistant",
"resolve_order": ["ocr", "vlm"],
},
resolve_order=["ocr", "vlm"],
screen_width=1280,
screen_height=800,
fallback_x_pct=0.5,
fallback_y_pct=0.5,
)
assert result is None
def test_resolve_order_vide(self):
"""Un resolve_order vide ne plante pas."""
from agent_v0.server_v1.resolve_engine import _resolve_with_precompiled_order
result = _resolve_with_precompiled_order(
screenshot_path="/fake.png",
target_spec={"by_text": "test"},
resolve_order=[],
screen_width=1280,
screen_height=800,
fallback_x_pct=0.5,
fallback_y_pct=0.5,
)
assert result is None
# =========================================================================
# Test 5 : pipeline complet — IR → Plan → action avec resolve_order
# =========================================================================
class TestPipelineCompletV4:
"""Test du pipeline V4 complet de bout en bout (sans runtime réel)."""
def test_ir_vers_action_avec_resolve_order(self):
"""Un WorkflowIR produit des actions avec resolve_order correctement."""
ir = WorkflowIR.new("Workflow complet", domain="tim_codage")
ir.add_step(
"Ouvrir le fichier",
actions=[{
"type": "click",
"target": "bouton Ouvrir",
"anchor_hint": "Ouvrir",
}],
)
ir.add_step(
"Saisir le nom",
actions=[
{"type": "type", "text": "rapport.pdf"},
{"type": "key_combo", "keys": ["enter"]},
],
)
compiler = ExecutionCompiler()
plan = compiler.compile(ir)
actions = execution_plan_to_actions(plan)
# On doit avoir 3 actions : click, type, key_combo
assert len(actions) == 3
click_action = actions[0]
assert click_action["type"] == "click"
assert "resolve_order" in click_action["target_spec"]
assert click_action["target_spec"]["resolve_order"][0] == "ocr"
assert click_action["target_spec"]["by_text"] == "Ouvrir"
# type et key_combo n'ont pas de target_spec
assert actions[1]["type"] == "type"
assert "target_spec" not in actions[1]
assert actions[2]["type"] == "key_combo"