feat: runtime V4 honore resolve_order pré-compilé (zéro VLM au runtime)
Le resolve_engine suit désormais l'ordre de méthodes décidé par l'ExecutionCompiler au lieu de sa cascade improvisée. C'est la pièce maîtresse du V4 : - execution_plan_runner.py : ajout de 'resolve_order' dans target_spec ["ocr", "template", "vlm"] = stratégies dans l'ordre de préférence - resolve_engine.py : _resolve_with_precompiled_order() honore l'ordre - Court-circuite la cascade legacy quand resolve_order est présent - Fallback sur la cascade si toutes les méthodes V4 échouent - _resolve_by_ocr_text() : résolution OCR directe via docTR (~200ms) Chemin rapide V4 — pas de VLM pour les éléments avec texte visible - 12 nouveaux tests : propagation resolve_order, cascade, fallback, pipeline E2E 220 tests passent (208 existants + 12 nouveaux), 0 régression. "Le LLM compile. Le runtime exécute." Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -94,8 +94,14 @@ def _strategy_to_target_spec(
|
|||||||
- template → anchor_image_base64 (depuis anchor_b64)
|
- template → anchor_image_base64 (depuis anchor_b64)
|
||||||
- VLM → vlm_description
|
- VLM → vlm_description
|
||||||
|
|
||||||
Règle : la stratégie primaire dicte la méthode préférée, mais on expose
|
Règle V4 : la stratégie primaire dicte la méthode préférée.
|
||||||
toutes les ancres connues pour que le runtime puisse retomber dessus.
|
Le champ `resolve_order` liste les méthodes dans l'ordre à essayer.
|
||||||
|
Le resolve_engine honore cet ordre au lieu de sa cascade par défaut.
|
||||||
|
|
||||||
|
resolve_order est la clé du "zéro VLM au runtime" :
|
||||||
|
- ["ocr", "template", "vlm"] → V4 typique (OCR rapide)
|
||||||
|
- ["template", "ocr", "vlm"] → apprentissage : template marche mieux
|
||||||
|
- ["vlm"] → éléments sans texte (icônes)
|
||||||
"""
|
"""
|
||||||
spec: Dict[str, Any] = {}
|
spec: Dict[str, Any] = {}
|
||||||
|
|
||||||
@@ -108,6 +114,8 @@ def _strategy_to_target_spec(
|
|||||||
by_text_candidate = ""
|
by_text_candidate = ""
|
||||||
anchor_candidate = ""
|
anchor_candidate = ""
|
||||||
vlm_candidate = ""
|
vlm_candidate = ""
|
||||||
|
resolve_order: List[str] = []
|
||||||
|
seen_methods: set = set()
|
||||||
|
|
||||||
for strat in all_strategies:
|
for strat in all_strategies:
|
||||||
if not strat:
|
if not strat:
|
||||||
@@ -122,6 +130,11 @@ def _strategy_to_target_spec(
|
|||||||
elif strat.method == "vlm" and strat.vlm_description and not vlm_candidate:
|
elif strat.method == "vlm" and strat.vlm_description and not vlm_candidate:
|
||||||
vlm_candidate = strat.vlm_description
|
vlm_candidate = strat.vlm_description
|
||||||
|
|
||||||
|
# Construire l'ordre des méthodes (dans l'ordre primaire → fallbacks)
|
||||||
|
if strat.method and strat.method not in seen_methods:
|
||||||
|
resolve_order.append(strat.method)
|
||||||
|
seen_methods.add(strat.method)
|
||||||
|
|
||||||
if by_text_candidate:
|
if by_text_candidate:
|
||||||
spec["by_text"] = by_text_candidate
|
spec["by_text"] = by_text_candidate
|
||||||
if anchor_candidate:
|
if anchor_candidate:
|
||||||
@@ -132,6 +145,10 @@ def _strategy_to_target_spec(
|
|||||||
# L'intention métier devient le prompt VLM de dernier recours
|
# L'intention métier devient le prompt VLM de dernier recours
|
||||||
spec["vlm_description"] = intent
|
spec["vlm_description"] = intent
|
||||||
|
|
||||||
|
# Ordre de résolution pré-compilé — c'est LA pièce centrale du V4
|
||||||
|
if resolve_order:
|
||||||
|
spec["resolve_order"] = resolve_order
|
||||||
|
|
||||||
return spec
|
return spec
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1328,6 +1328,205 @@ def _resolve_by_som(
|
|||||||
# Orchestrateur — Résolution cible complète (synchrone)
|
# Orchestrateur — Résolution cible complète (synchrone)
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# V4 : Résolution pilotée par le plan pré-compilé
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_with_precompiled_order(
|
||||||
|
screenshot_path: str,
|
||||||
|
target_spec: Dict[str, Any],
|
||||||
|
resolve_order: list,
|
||||||
|
screen_width: int,
|
||||||
|
screen_height: int,
|
||||||
|
fallback_x_pct: float,
|
||||||
|
fallback_y_pct: float,
|
||||||
|
) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Résoudre la cible en suivant l'ordre pré-compilé par l'ExecutionCompiler.
|
||||||
|
|
||||||
|
C'est le chemin V4 : l'ExecutionPlan a déjà décidé quelle méthode utiliser
|
||||||
|
(OCR, template, VLM) selon le learning et les caractéristiques de l'élément.
|
||||||
|
Le runtime ne fait qu'exécuter l'ordre — pas de cascade improvisée.
|
||||||
|
|
||||||
|
resolve_order : liste de méthodes dans l'ordre à essayer
|
||||||
|
ex: ["ocr", "template", "vlm"]
|
||||||
|
ex: ["template", "ocr"] (template d'abord pour les icônes)
|
||||||
|
ex: ["vlm"] (dernier recours)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict résultat si trouvé, None si toutes les méthodes échouent.
|
||||||
|
"""
|
||||||
|
import time as _time
|
||||||
|
|
||||||
|
t_start = _time.time()
|
||||||
|
by_text = target_spec.get("by_text", "").strip()
|
||||||
|
anchor_b64 = target_spec.get("anchor_image_base64", "")
|
||||||
|
vlm_description = target_spec.get("vlm_description", "")
|
||||||
|
|
||||||
|
for method in resolve_order:
|
||||||
|
method_start = _time.time()
|
||||||
|
|
||||||
|
if method == "ocr" and by_text:
|
||||||
|
# OCR : chercher le texte visible dans l'image
|
||||||
|
# C'est le chemin rapide — idéalement < 200ms
|
||||||
|
try:
|
||||||
|
result = _resolve_by_ocr_text(
|
||||||
|
screenshot_path=screenshot_path,
|
||||||
|
target_text=by_text,
|
||||||
|
screen_width=screen_width,
|
||||||
|
screen_height=screen_height,
|
||||||
|
)
|
||||||
|
if result and result.get("resolved"):
|
||||||
|
elapsed = (_time.time() - method_start) * 1000
|
||||||
|
logger.info(
|
||||||
|
"V4 OCR : OK en %.0fms pour '%s' → (%.3f, %.3f)",
|
||||||
|
elapsed, by_text[:30],
|
||||||
|
result.get("x_pct", 0), result.get("y_pct", 0),
|
||||||
|
)
|
||||||
|
result["resolve_method"] = "v4_ocr"
|
||||||
|
result["resolve_elapsed_ms"] = elapsed
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("V4 OCR erreur : %s", e)
|
||||||
|
|
||||||
|
elif method == "template" and anchor_b64:
|
||||||
|
# Template matching : comparer des pixels
|
||||||
|
try:
|
||||||
|
result = _resolve_by_template_matching(
|
||||||
|
screenshot_path=screenshot_path,
|
||||||
|
anchor_image_b64=anchor_b64,
|
||||||
|
screen_width=screen_width,
|
||||||
|
screen_height=screen_height,
|
||||||
|
confidence_threshold=0.85,
|
||||||
|
)
|
||||||
|
if result and result.get("resolved"):
|
||||||
|
elapsed = (_time.time() - method_start) * 1000
|
||||||
|
logger.info(
|
||||||
|
"V4 TEMPLATE : OK en %.0fms score=%.3f → (%.3f, %.3f)",
|
||||||
|
elapsed, result.get("score", 0),
|
||||||
|
result.get("x_pct", 0), result.get("y_pct", 0),
|
||||||
|
)
|
||||||
|
result["resolve_method"] = "v4_template"
|
||||||
|
result["resolve_elapsed_ms"] = elapsed
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("V4 template erreur : %s", e)
|
||||||
|
|
||||||
|
elif method == "vlm" and (vlm_description or by_text):
|
||||||
|
# VLM : exception handler (lent, dernier recours)
|
||||||
|
description = vlm_description or f"élément '{by_text}'"
|
||||||
|
try:
|
||||||
|
result = _vlm_quick_find(
|
||||||
|
screenshot_path=screenshot_path,
|
||||||
|
target_description=description,
|
||||||
|
screen_width=screen_width,
|
||||||
|
screen_height=screen_height,
|
||||||
|
anchor_image_b64=anchor_b64,
|
||||||
|
)
|
||||||
|
if result and result.get("resolved"):
|
||||||
|
elapsed = (_time.time() - method_start) * 1000
|
||||||
|
logger.info(
|
||||||
|
"V4 VLM : OK en %.0fms pour '%s' → (%.3f, %.3f)",
|
||||||
|
elapsed, description[:30],
|
||||||
|
result.get("x_pct", 0), result.get("y_pct", 0),
|
||||||
|
)
|
||||||
|
result["resolve_method"] = "v4_vlm"
|
||||||
|
result["resolve_elapsed_ms"] = elapsed
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("V4 VLM erreur : %s", e)
|
||||||
|
|
||||||
|
total_elapsed = (_time.time() - t_start) * 1000
|
||||||
|
logger.info(
|
||||||
|
"V4 resolve : toutes les méthodes (%s) ont échoué en %.0fms",
|
||||||
|
resolve_order, total_elapsed,
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_by_ocr_text(
|
||||||
|
screenshot_path: str,
|
||||||
|
target_text: str,
|
||||||
|
screen_width: int,
|
||||||
|
screen_height: int,
|
||||||
|
) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Localiser du texte dans l'image via OCR (docTR ou fallback).
|
||||||
|
|
||||||
|
C'est le chemin rapide V4 : pas de VLM, pas de template matching,
|
||||||
|
juste de l'OCR direct. Idéal pour les éléments avec texte visible.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict avec x_pct, y_pct, score si trouvé, None sinon.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from doctr.io import DocumentFile
|
||||||
|
from doctr.models import ocr_predictor
|
||||||
|
except ImportError:
|
||||||
|
logger.debug("docTR non disponible pour V4 OCR")
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Utiliser un cache global pour éviter de recharger le modèle à chaque appel
|
||||||
|
global _V4_OCR_PREDICTOR
|
||||||
|
try:
|
||||||
|
_V4_OCR_PREDICTOR
|
||||||
|
except NameError:
|
||||||
|
_V4_OCR_PREDICTOR = None
|
||||||
|
|
||||||
|
if _V4_OCR_PREDICTOR is None:
|
||||||
|
_V4_OCR_PREDICTOR = ocr_predictor(
|
||||||
|
det_arch='db_resnet50',
|
||||||
|
reco_arch='crnn_vgg16_bn',
|
||||||
|
pretrained=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
doc = DocumentFile.from_images([screenshot_path])
|
||||||
|
result = _V4_OCR_PREDICTOR(doc)
|
||||||
|
|
||||||
|
# Chercher le texte (match exact, insensible à la casse)
|
||||||
|
target_lower = target_text.lower().strip()
|
||||||
|
best_match = None
|
||||||
|
best_score = 0.0
|
||||||
|
|
||||||
|
for page in result.pages:
|
||||||
|
for block in page.blocks:
|
||||||
|
for line_obj in block.lines:
|
||||||
|
line_text = " ".join(w.value for w in line_obj.words)
|
||||||
|
line_lower = line_text.lower()
|
||||||
|
|
||||||
|
# Match exact > contient > mot par mot
|
||||||
|
score = 0.0
|
||||||
|
if target_lower == line_lower:
|
||||||
|
score = 1.0
|
||||||
|
elif target_lower in line_lower:
|
||||||
|
score = 0.8
|
||||||
|
elif any(target_lower == w.value.lower() for w in line_obj.words):
|
||||||
|
score = 0.9
|
||||||
|
|
||||||
|
if score > best_score:
|
||||||
|
# Coordonnées de la ligne entière (bbox)
|
||||||
|
box = line_obj.geometry # ((x1,y1), (x2,y2)) normalisées 0-1
|
||||||
|
cx = (box[0][0] + box[1][0]) / 2
|
||||||
|
cy = (box[0][1] + box[1][1]) / 2
|
||||||
|
best_match = {
|
||||||
|
"resolved": True,
|
||||||
|
"method": "v4_ocr",
|
||||||
|
"x_pct": cx,
|
||||||
|
"y_pct": cy,
|
||||||
|
"score": score,
|
||||||
|
"matched_text": line_text,
|
||||||
|
}
|
||||||
|
best_score = score
|
||||||
|
|
||||||
|
if best_match and best_score >= 0.7:
|
||||||
|
return best_match
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("docTR OCR erreur : %s", e)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _resolve_target_sync(
|
def _resolve_target_sync(
|
||||||
screenshot_path: str,
|
screenshot_path: str,
|
||||||
target_spec: Dict[str, Any],
|
target_spec: Dict[str, Any],
|
||||||
@@ -1359,6 +1558,37 @@ def _resolve_target_sync(
|
|||||||
"""
|
"""
|
||||||
anchor_image_b64 = target_spec.get("anchor_image_base64", "")
|
anchor_image_b64 = target_spec.get("anchor_image_base64", "")
|
||||||
|
|
||||||
|
# ===================================================================
|
||||||
|
# V4 : Résolution pilotée par le plan pré-compilé
|
||||||
|
# ===================================================================
|
||||||
|
# Si le target_spec contient `resolve_order`, il vient d'un ExecutionPlan
|
||||||
|
# compilé. On honore cet ordre au lieu de faire la cascade par défaut.
|
||||||
|
# C'est le "zéro VLM au runtime" : on essaie d'abord la stratégie
|
||||||
|
# pré-compilée (OCR, template, ou VLM).
|
||||||
|
resolve_order = target_spec.get("resolve_order")
|
||||||
|
if resolve_order and isinstance(resolve_order, list):
|
||||||
|
logger.info(
|
||||||
|
"V4 resolve : ordre pré-compilé = %s",
|
||||||
|
resolve_order,
|
||||||
|
)
|
||||||
|
result = _resolve_with_precompiled_order(
|
||||||
|
screenshot_path=screenshot_path,
|
||||||
|
target_spec=target_spec,
|
||||||
|
resolve_order=resolve_order,
|
||||||
|
screen_width=screen_width,
|
||||||
|
screen_height=screen_height,
|
||||||
|
fallback_x_pct=fallback_x_pct,
|
||||||
|
fallback_y_pct=fallback_y_pct,
|
||||||
|
)
|
||||||
|
if result and result.get("resolved"):
|
||||||
|
return result
|
||||||
|
# Si les méthodes pré-compilées ont toutes échoué, on continue
|
||||||
|
# vers la cascade legacy (compatibilité et robustesse).
|
||||||
|
logger.info(
|
||||||
|
"V4 resolve : toutes les méthodes pré-compilées ont échoué, "
|
||||||
|
"fallback cascade legacy"
|
||||||
|
)
|
||||||
|
|
||||||
# ===================================================================
|
# ===================================================================
|
||||||
# MODE STRICT (replay sessions) — Stratégie VLM-FIRST
|
# MODE STRICT (replay sessions) — Stratégie VLM-FIRST
|
||||||
# ===================================================================
|
# ===================================================================
|
||||||
|
|||||||
305
tests/unit/test_v4_resolve_order.py
Normal file
305
tests/unit/test_v4_resolve_order.py
Normal file
@@ -0,0 +1,305 @@
|
|||||||
|
"""
|
||||||
|
Tests du mécanisme V4 : résolution pilotée par l'ordre pré-compilé.
|
||||||
|
|
||||||
|
Vérifie que :
|
||||||
|
- Le resolve_order est bien propagé du plan vers le target_spec
|
||||||
|
- Le resolve_engine honore l'ordre au lieu de sa cascade par défaut
|
||||||
|
- Les méthodes sont essayées dans l'ordre spécifié
|
||||||
|
- Si toutes échouent, fallback sur la cascade legacy
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
_ROOT = str(Path(__file__).resolve().parents[2])
|
||||||
|
if _ROOT not in sys.path:
|
||||||
|
sys.path.insert(0, _ROOT)
|
||||||
|
|
||||||
|
from core.workflow.workflow_ir import WorkflowIR
|
||||||
|
from core.workflow.execution_plan import ExecutionNode, ResolutionStrategy, ExecutionPlan
|
||||||
|
from core.workflow.execution_compiler import ExecutionCompiler
|
||||||
|
from agent_v0.server_v1.execution_plan_runner import (
|
||||||
|
execution_node_to_action,
|
||||||
|
execution_plan_to_actions,
|
||||||
|
_strategy_to_target_spec,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Test 1 : le resolve_order est propagé du plan au target_spec
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestResolveOrderPropagation:
|
||||||
|
"""Le resolve_order doit être présent dans le target_spec."""
|
||||||
|
|
||||||
|
def test_ocr_primary_produit_resolve_order(self):
|
||||||
|
"""OCR primaire → resolve_order commence par 'ocr'."""
|
||||||
|
primary = ResolutionStrategy(method="ocr", target_text="Enregistrer")
|
||||||
|
fallbacks = [
|
||||||
|
ResolutionStrategy(method="template", anchor_b64="abc123"),
|
||||||
|
ResolutionStrategy(method="vlm", vlm_description="bouton Enregistrer"),
|
||||||
|
]
|
||||||
|
spec = _strategy_to_target_spec(primary, fallbacks)
|
||||||
|
|
||||||
|
assert "resolve_order" in spec
|
||||||
|
assert spec["resolve_order"] == ["ocr", "template", "vlm"]
|
||||||
|
|
||||||
|
def test_template_primary_produit_resolve_order(self):
|
||||||
|
"""Template primaire → resolve_order commence par 'template'."""
|
||||||
|
primary = ResolutionStrategy(method="template", anchor_b64="abc")
|
||||||
|
fallbacks = [ResolutionStrategy(method="vlm", vlm_description="icône")]
|
||||||
|
spec = _strategy_to_target_spec(primary, fallbacks)
|
||||||
|
|
||||||
|
assert spec["resolve_order"][0] == "template"
|
||||||
|
|
||||||
|
def test_vlm_only(self):
|
||||||
|
"""Juste VLM → resolve_order = ['vlm']."""
|
||||||
|
primary = ResolutionStrategy(method="vlm", vlm_description="popup")
|
||||||
|
spec = _strategy_to_target_spec(primary, [])
|
||||||
|
|
||||||
|
assert spec["resolve_order"] == ["vlm"]
|
||||||
|
|
||||||
|
def test_pas_de_doublons(self):
|
||||||
|
"""Chaque méthode apparaît une seule fois dans l'ordre."""
|
||||||
|
primary = ResolutionStrategy(method="ocr", target_text="test")
|
||||||
|
fallbacks = [
|
||||||
|
ResolutionStrategy(method="template", anchor_b64="abc"),
|
||||||
|
ResolutionStrategy(method="ocr", target_text="autre"), # Doublon
|
||||||
|
]
|
||||||
|
spec = _strategy_to_target_spec(primary, fallbacks)
|
||||||
|
|
||||||
|
assert spec["resolve_order"].count("ocr") == 1
|
||||||
|
assert spec["resolve_order"].count("template") == 1
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Test 2 : execution_node_to_action propage bien le resolve_order
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestExecutionNodeConversion:
|
||||||
|
"""Les actions générées contiennent le resolve_order."""
|
||||||
|
|
||||||
|
def test_click_node_a_resolve_order(self):
|
||||||
|
"""Un ExecutionNode click produit une action avec resolve_order."""
|
||||||
|
node = ExecutionNode(
|
||||||
|
node_id="n1",
|
||||||
|
action_type="click",
|
||||||
|
intent="Cliquer sur Fichier",
|
||||||
|
strategy_primary=ResolutionStrategy(method="ocr", target_text="Fichier"),
|
||||||
|
strategy_fallbacks=[
|
||||||
|
ResolutionStrategy(method="vlm", vlm_description="menu Fichier"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
action = execution_node_to_action(node)
|
||||||
|
|
||||||
|
assert action is not None
|
||||||
|
assert action["type"] == "click"
|
||||||
|
assert "target_spec" in action
|
||||||
|
assert "resolve_order" in action["target_spec"]
|
||||||
|
assert action["target_spec"]["resolve_order"] == ["ocr", "vlm"]
|
||||||
|
assert action["target_spec"]["by_text"] == "Fichier"
|
||||||
|
assert action["target_spec"]["vlm_description"] == "menu Fichier"
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Test 3 : le compilateur produit des plans avec resolve_order correct
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestCompilerProduitResolveOrder:
|
||||||
|
"""Le ExecutionCompiler produit des plans avec resolve_order."""
|
||||||
|
|
||||||
|
def test_workflow_complet_avec_resolve_order(self):
|
||||||
|
"""Un workflow compilé a des actions avec resolve_order."""
|
||||||
|
ir = WorkflowIR.new("Test", domain="generic")
|
||||||
|
ir.add_step(
|
||||||
|
"Cliquer sur Enregistrer",
|
||||||
|
actions=[{
|
||||||
|
"type": "click",
|
||||||
|
"target": "bouton Enregistrer",
|
||||||
|
"anchor_hint": "Enregistrer",
|
||||||
|
}],
|
||||||
|
)
|
||||||
|
|
||||||
|
compiler = ExecutionCompiler()
|
||||||
|
plan = compiler.compile(ir)
|
||||||
|
|
||||||
|
actions = execution_plan_to_actions(plan)
|
||||||
|
assert len(actions) == 1
|
||||||
|
assert "resolve_order" in actions[0]["target_spec"]
|
||||||
|
# OCR doit être en premier (stratégie primaire quand texte dispo)
|
||||||
|
assert actions[0]["target_spec"]["resolve_order"][0] == "ocr"
|
||||||
|
|
||||||
|
def test_fallback_vlm_toujours_present(self):
|
||||||
|
"""Le VLM est toujours présent en fallback."""
|
||||||
|
ir = WorkflowIR.new("Test")
|
||||||
|
ir.add_step("Clic", actions=[{"type": "click", "target": "X", "anchor_hint": "X"}])
|
||||||
|
|
||||||
|
compiler = ExecutionCompiler()
|
||||||
|
plan = compiler.compile(ir)
|
||||||
|
|
||||||
|
actions = execution_plan_to_actions(plan)
|
||||||
|
assert "vlm" in actions[0]["target_spec"]["resolve_order"]
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Test 4 : _resolve_with_precompiled_order respecte l'ordre
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestResolveWithPrecompiledOrder:
|
||||||
|
"""Le mécanisme V4 de résolution honore l'ordre."""
|
||||||
|
|
||||||
|
@patch("agent_v0.server_v1.resolve_engine._resolve_by_ocr_text")
|
||||||
|
def test_ocr_appele_en_premier(self, mock_ocr):
|
||||||
|
"""Si resolve_order=['ocr', 'vlm'], OCR est appelé en premier."""
|
||||||
|
from agent_v0.server_v1.resolve_engine import _resolve_with_precompiled_order
|
||||||
|
|
||||||
|
mock_ocr.return_value = {
|
||||||
|
"resolved": True,
|
||||||
|
"x_pct": 0.5,
|
||||||
|
"y_pct": 0.3,
|
||||||
|
"score": 0.9,
|
||||||
|
}
|
||||||
|
|
||||||
|
result = _resolve_with_precompiled_order(
|
||||||
|
screenshot_path="/fake.png",
|
||||||
|
target_spec={
|
||||||
|
"by_text": "Enregistrer",
|
||||||
|
"resolve_order": ["ocr", "vlm"],
|
||||||
|
},
|
||||||
|
resolve_order=["ocr", "vlm"],
|
||||||
|
screen_width=1280,
|
||||||
|
screen_height=800,
|
||||||
|
fallback_x_pct=0.5,
|
||||||
|
fallback_y_pct=0.5,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result.get("resolved") is True
|
||||||
|
assert result.get("resolve_method") == "v4_ocr"
|
||||||
|
mock_ocr.assert_called_once()
|
||||||
|
|
||||||
|
@patch("agent_v0.server_v1.resolve_engine._vlm_quick_find")
|
||||||
|
@patch("agent_v0.server_v1.resolve_engine._resolve_by_ocr_text")
|
||||||
|
def test_cascade_ocr_vers_vlm(self, mock_ocr, mock_vlm):
|
||||||
|
"""Si OCR échoue, VLM est essayé."""
|
||||||
|
from agent_v0.server_v1.resolve_engine import _resolve_with_precompiled_order
|
||||||
|
|
||||||
|
mock_ocr.return_value = None # OCR échoue
|
||||||
|
mock_vlm.return_value = {
|
||||||
|
"resolved": True,
|
||||||
|
"x_pct": 0.5,
|
||||||
|
"y_pct": 0.3,
|
||||||
|
}
|
||||||
|
|
||||||
|
result = _resolve_with_precompiled_order(
|
||||||
|
screenshot_path="/fake.png",
|
||||||
|
target_spec={
|
||||||
|
"by_text": "Enregistrer",
|
||||||
|
"vlm_description": "bouton Enregistrer",
|
||||||
|
"resolve_order": ["ocr", "vlm"],
|
||||||
|
},
|
||||||
|
resolve_order=["ocr", "vlm"],
|
||||||
|
screen_width=1280,
|
||||||
|
screen_height=800,
|
||||||
|
fallback_x_pct=0.5,
|
||||||
|
fallback_y_pct=0.5,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result.get("resolve_method") == "v4_vlm"
|
||||||
|
mock_ocr.assert_called_once()
|
||||||
|
mock_vlm.assert_called_once()
|
||||||
|
|
||||||
|
@patch("agent_v0.server_v1.resolve_engine._vlm_quick_find")
|
||||||
|
@patch("agent_v0.server_v1.resolve_engine._resolve_by_ocr_text")
|
||||||
|
def test_toutes_methodes_echouent(self, mock_ocr, mock_vlm):
|
||||||
|
"""Si toutes les méthodes échouent, retourne None."""
|
||||||
|
from agent_v0.server_v1.resolve_engine import _resolve_with_precompiled_order
|
||||||
|
|
||||||
|
mock_ocr.return_value = None
|
||||||
|
mock_vlm.return_value = None
|
||||||
|
|
||||||
|
result = _resolve_with_precompiled_order(
|
||||||
|
screenshot_path="/fake.png",
|
||||||
|
target_spec={
|
||||||
|
"by_text": "Inexistant",
|
||||||
|
"vlm_description": "truc inexistant",
|
||||||
|
"resolve_order": ["ocr", "vlm"],
|
||||||
|
},
|
||||||
|
resolve_order=["ocr", "vlm"],
|
||||||
|
screen_width=1280,
|
||||||
|
screen_height=800,
|
||||||
|
fallback_x_pct=0.5,
|
||||||
|
fallback_y_pct=0.5,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_resolve_order_vide(self):
|
||||||
|
"""Un resolve_order vide ne plante pas."""
|
||||||
|
from agent_v0.server_v1.resolve_engine import _resolve_with_precompiled_order
|
||||||
|
|
||||||
|
result = _resolve_with_precompiled_order(
|
||||||
|
screenshot_path="/fake.png",
|
||||||
|
target_spec={"by_text": "test"},
|
||||||
|
resolve_order=[],
|
||||||
|
screen_width=1280,
|
||||||
|
screen_height=800,
|
||||||
|
fallback_x_pct=0.5,
|
||||||
|
fallback_y_pct=0.5,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Test 5 : pipeline complet — IR → Plan → action avec resolve_order
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestPipelineCompletV4:
|
||||||
|
"""Test du pipeline V4 complet de bout en bout (sans runtime réel)."""
|
||||||
|
|
||||||
|
def test_ir_vers_action_avec_resolve_order(self):
|
||||||
|
"""Un WorkflowIR produit des actions avec resolve_order correctement."""
|
||||||
|
ir = WorkflowIR.new("Workflow complet", domain="tim_codage")
|
||||||
|
ir.add_step(
|
||||||
|
"Ouvrir le fichier",
|
||||||
|
actions=[{
|
||||||
|
"type": "click",
|
||||||
|
"target": "bouton Ouvrir",
|
||||||
|
"anchor_hint": "Ouvrir",
|
||||||
|
}],
|
||||||
|
)
|
||||||
|
ir.add_step(
|
||||||
|
"Saisir le nom",
|
||||||
|
actions=[
|
||||||
|
{"type": "type", "text": "rapport.pdf"},
|
||||||
|
{"type": "key_combo", "keys": ["enter"]},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
compiler = ExecutionCompiler()
|
||||||
|
plan = compiler.compile(ir)
|
||||||
|
actions = execution_plan_to_actions(plan)
|
||||||
|
|
||||||
|
# On doit avoir 3 actions : click, type, key_combo
|
||||||
|
assert len(actions) == 3
|
||||||
|
|
||||||
|
click_action = actions[0]
|
||||||
|
assert click_action["type"] == "click"
|
||||||
|
assert "resolve_order" in click_action["target_spec"]
|
||||||
|
assert click_action["target_spec"]["resolve_order"][0] == "ocr"
|
||||||
|
assert click_action["target_spec"]["by_text"] == "Ouvrir"
|
||||||
|
|
||||||
|
# type et key_combo n'ont pas de target_spec
|
||||||
|
assert actions[1]["type"] == "type"
|
||||||
|
assert "target_spec" not in actions[1]
|
||||||
|
assert actions[2]["type"] == "key_combo"
|
||||||
Reference in New Issue
Block a user