""" Tests du mécanisme V4 : résolution pilotée par l'ordre pré-compilé. Vérifie que : - Le resolve_order est bien propagé du plan vers le target_spec - Le resolve_engine honore l'ordre au lieu de sa cascade par défaut - Les méthodes sont essayées dans l'ordre spécifié - Si toutes échouent, fallback sur la cascade legacy """ import sys from pathlib import Path from unittest.mock import MagicMock, patch import pytest _ROOT = str(Path(__file__).resolve().parents[2]) if _ROOT not in sys.path: sys.path.insert(0, _ROOT) from core.workflow.workflow_ir import WorkflowIR from core.workflow.execution_plan import ExecutionNode, ResolutionStrategy, ExecutionPlan from core.workflow.execution_compiler import ExecutionCompiler from agent_v0.server_v1.execution_plan_runner import ( execution_node_to_action, execution_plan_to_actions, _strategy_to_target_spec, ) # ========================================================================= # Test 1 : le resolve_order est propagé du plan au target_spec # ========================================================================= class TestResolveOrderPropagation: """Le resolve_order doit être présent dans le target_spec.""" def test_ocr_primary_produit_resolve_order(self): """OCR primaire → resolve_order commence par 'ocr'.""" primary = ResolutionStrategy(method="ocr", target_text="Enregistrer") fallbacks = [ ResolutionStrategy(method="template", anchor_b64="abc123"), ResolutionStrategy(method="vlm", vlm_description="bouton Enregistrer"), ] spec = _strategy_to_target_spec(primary, fallbacks) assert "resolve_order" in spec assert spec["resolve_order"] == ["ocr", "template", "vlm"] def test_template_primary_produit_resolve_order(self): """Template primaire → resolve_order commence par 'template'.""" primary = ResolutionStrategy(method="template", anchor_b64="abc") fallbacks = [ResolutionStrategy(method="vlm", vlm_description="icône")] spec = _strategy_to_target_spec(primary, fallbacks) assert spec["resolve_order"][0] == "template" def test_vlm_only(self): """Juste VLM → resolve_order = ['vlm'].""" primary = ResolutionStrategy(method="vlm", vlm_description="popup") spec = _strategy_to_target_spec(primary, []) assert spec["resolve_order"] == ["vlm"] def test_pas_de_doublons(self): """Chaque méthode apparaît une seule fois dans l'ordre.""" primary = ResolutionStrategy(method="ocr", target_text="test") fallbacks = [ ResolutionStrategy(method="template", anchor_b64="abc"), ResolutionStrategy(method="ocr", target_text="autre"), # Doublon ] spec = _strategy_to_target_spec(primary, fallbacks) assert spec["resolve_order"].count("ocr") == 1 assert spec["resolve_order"].count("template") == 1 # ========================================================================= # Test 2 : execution_node_to_action propage bien le resolve_order # ========================================================================= class TestExecutionNodeConversion: """Les actions générées contiennent le resolve_order.""" def test_click_node_a_resolve_order(self): """Un ExecutionNode click produit une action avec resolve_order.""" node = ExecutionNode( node_id="n1", action_type="click", intent="Cliquer sur Fichier", strategy_primary=ResolutionStrategy(method="ocr", target_text="Fichier"), strategy_fallbacks=[ ResolutionStrategy(method="vlm", vlm_description="menu Fichier"), ], ) action = execution_node_to_action(node) assert action is not None assert action["type"] == "click" assert "target_spec" in action assert "resolve_order" in action["target_spec"] assert action["target_spec"]["resolve_order"] == ["ocr", "vlm"] assert action["target_spec"]["by_text"] == "Fichier" assert action["target_spec"]["vlm_description"] == "menu Fichier" # ========================================================================= # Test 3 : le compilateur produit des plans avec resolve_order correct # ========================================================================= class TestCompilerProduitResolveOrder: """Le ExecutionCompiler produit des plans avec resolve_order.""" def test_workflow_complet_avec_resolve_order(self): """Un workflow compilé a des actions avec resolve_order.""" ir = WorkflowIR.new("Test", domain="generic") ir.add_step( "Cliquer sur Enregistrer", actions=[{ "type": "click", "target": "bouton Enregistrer", "anchor_hint": "Enregistrer", }], ) compiler = ExecutionCompiler() plan = compiler.compile(ir) actions = execution_plan_to_actions(plan) assert len(actions) == 1 assert "resolve_order" in actions[0]["target_spec"] # OCR doit être en premier (stratégie primaire quand texte dispo) assert actions[0]["target_spec"]["resolve_order"][0] == "ocr" def test_fallback_vlm_toujours_present(self): """Le VLM est toujours présent en fallback.""" ir = WorkflowIR.new("Test") ir.add_step("Clic", actions=[{"type": "click", "target": "X", "anchor_hint": "X"}]) compiler = ExecutionCompiler() plan = compiler.compile(ir) actions = execution_plan_to_actions(plan) assert "vlm" in actions[0]["target_spec"]["resolve_order"] # ========================================================================= # Test 4 : _resolve_with_precompiled_order respecte l'ordre # ========================================================================= class TestResolveWithPrecompiledOrder: """Le mécanisme V4 de résolution honore l'ordre.""" @patch("agent_v0.server_v1.resolve_engine._resolve_by_ocr_text") def test_ocr_appele_en_premier(self, mock_ocr): """Si resolve_order=['ocr', 'vlm'], OCR est appelé en premier.""" from agent_v0.server_v1.resolve_engine import _resolve_with_precompiled_order mock_ocr.return_value = { "resolved": True, "x_pct": 0.5, "y_pct": 0.3, "score": 0.9, } result = _resolve_with_precompiled_order( screenshot_path="/fake.png", target_spec={ "by_text": "Enregistrer", "resolve_order": ["ocr", "vlm"], }, resolve_order=["ocr", "vlm"], screen_width=1280, screen_height=800, fallback_x_pct=0.5, fallback_y_pct=0.5, ) assert result is not None assert result.get("resolved") is True assert result.get("resolve_method") == "v4_ocr" mock_ocr.assert_called_once() @patch("agent_v0.server_v1.resolve_engine._vlm_quick_find") @patch("agent_v0.server_v1.resolve_engine._resolve_by_ocr_text") def test_cascade_ocr_vers_vlm(self, mock_ocr, mock_vlm): """Si OCR échoue, VLM est essayé.""" from agent_v0.server_v1.resolve_engine import _resolve_with_precompiled_order mock_ocr.return_value = None # OCR échoue mock_vlm.return_value = { "resolved": True, "x_pct": 0.5, "y_pct": 0.3, } result = _resolve_with_precompiled_order( screenshot_path="/fake.png", target_spec={ "by_text": "Enregistrer", "vlm_description": "bouton Enregistrer", "resolve_order": ["ocr", "vlm"], }, resolve_order=["ocr", "vlm"], screen_width=1280, screen_height=800, fallback_x_pct=0.5, fallback_y_pct=0.5, ) assert result is not None assert result.get("resolve_method") == "v4_vlm" mock_ocr.assert_called_once() mock_vlm.assert_called_once() @patch("agent_v0.server_v1.resolve_engine._vlm_quick_find") @patch("agent_v0.server_v1.resolve_engine._resolve_by_ocr_text") def test_toutes_methodes_echouent(self, mock_ocr, mock_vlm): """Si toutes les méthodes échouent, retourne None.""" from agent_v0.server_v1.resolve_engine import _resolve_with_precompiled_order mock_ocr.return_value = None mock_vlm.return_value = None result = _resolve_with_precompiled_order( screenshot_path="/fake.png", target_spec={ "by_text": "Inexistant", "vlm_description": "truc inexistant", "resolve_order": ["ocr", "vlm"], }, resolve_order=["ocr", "vlm"], screen_width=1280, screen_height=800, fallback_x_pct=0.5, fallback_y_pct=0.5, ) assert result is None def test_resolve_order_vide(self): """Un resolve_order vide ne plante pas.""" from agent_v0.server_v1.resolve_engine import _resolve_with_precompiled_order result = _resolve_with_precompiled_order( screenshot_path="/fake.png", target_spec={"by_text": "test"}, resolve_order=[], screen_width=1280, screen_height=800, fallback_x_pct=0.5, fallback_y_pct=0.5, ) assert result is None # ========================================================================= # Test 5 : pipeline complet — IR → Plan → action avec resolve_order # ========================================================================= class TestPipelineCompletV4: """Test du pipeline V4 complet de bout en bout (sans runtime réel).""" def test_ir_vers_action_avec_resolve_order(self): """Un WorkflowIR produit des actions avec resolve_order correctement.""" ir = WorkflowIR.new("Workflow complet", domain="tim_codage") ir.add_step( "Ouvrir le fichier", actions=[{ "type": "click", "target": "bouton Ouvrir", "anchor_hint": "Ouvrir", }], ) ir.add_step( "Saisir le nom", actions=[ {"type": "type", "text": "rapport.pdf"}, {"type": "key_combo", "keys": ["enter"]}, ], ) compiler = ExecutionCompiler() plan = compiler.compile(ir) actions = execution_plan_to_actions(plan) # On doit avoir 3 actions : click, type, key_combo assert len(actions) == 3 click_action = actions[0] assert click_action["type"] == "click" assert "resolve_order" in click_action["target_spec"] assert click_action["target_spec"]["resolve_order"][0] == "ocr" assert click_action["target_spec"]["by_text"] == "Ouvrir" # type et key_combo n'ont pas de target_spec assert actions[1]["type"] == "type" assert "target_spec" not in actions[1] assert actions[2]["type"] == "key_combo"