From 931cf132178d6cf1ea4547f8644fcd8ad3487266 Mon Sep 17 00:00:00 2001 From: Dom Date: Thu, 2 Jul 2026 18:49:32 +0200 Subject: [PATCH] =?UTF-8?q?feat(navigate):=20jalon=20partiel=20D1=20?= =?UTF-8?q?=E2=80=94=20compile=20navigate=20+=20coercion=20coords=20s?= =?UTF-8?q?=C3=BBre?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ferme Gap C : _edge_to_normalized_actions produit désormais une action navigate (handler serveur atteignable). Ajoute _coerce_action_coords : cast x_pct/y_pct en float APRÈS résolution des templates, JAMAIS de fallback (0,0) — template non résolu / valeur invalide → pause_for_human (safety_level=high). Non-régression prouvée sur mouse_click classiques (idempotent sur floats). ⚠️ NE FERME PAS le write-only : Gap A (P1-B) non livré — aucun step click/type ne déclare encore consommer navigate_login_coords. TestCompilerGapLiteralFloats assert l'état ouvert. Boucle complète = chantier suivant (P1-B + test e2e edge→action). 21 tests verts, boot OK. Revue croisée Claude (GO jalon partiel). Co-Authored-By: Claude Fable 5 --- agent_v0/server_v1/api_stream.py | 4 + agent_v0/server_v1/replay_engine.py | 47 ++ ...VIGATE_COORDS_IMPLEMENTATION_2026-07-02.md | 474 ++++++++++++++++++ ...QG_REVIEW_D1_NAVIGATE_COORDS_2026-07-02.md | 135 +++++ tests/unit/test_coords_consumption_gap.py | 187 ++++++- 5 files changed, 841 insertions(+), 6 deletions(-) create mode 100644 docs/PLAN_D1_NAVIGATE_COORDS_IMPLEMENTATION_2026-07-02.md create mode 100644 docs/QG_REVIEW_D1_NAVIGATE_COORDS_2026-07-02.md diff --git a/agent_v0/server_v1/api_stream.py b/agent_v0/server_v1/api_stream.py index 26f64b460..41b2fd77c 100644 --- a/agent_v0/server_v1/api_stream.py +++ b/agent_v0/server_v1/api_stream.py @@ -420,6 +420,7 @@ from .replay_engine import ( _edge_to_normalized_actions, _substitute_variables, _resolve_runtime_vars, + _coerce_action_coords, _SERVER_SIDE_ACTION_TYPES, _handle_extract_text_action, _handle_extract_table_action, @@ -4334,6 +4335,9 @@ async def get_next_action(session_id: str, machine_id: str = "default"): if runtime_vars: action = _resolve_runtime_vars(action, runtime_vars) + # Coercion coords: cast x_pct/y_pct en float après resolver + action = _coerce_action_coords(action) + type_ = action.get("type") # pause_for_human : pause supervisée si safety_level/safety_checks ou mode supervised, diff --git a/agent_v0/server_v1/replay_engine.py b/agent_v0/server_v1/replay_engine.py index 37a996a36..371c4e499 100644 --- a/agent_v0/server_v1/replay_engine.py +++ b/agent_v0/server_v1/replay_engine.py @@ -1948,6 +1948,21 @@ def _edge_to_normalized_actions(edge, params: Dict[str, Any]) -> List[Dict[str, normalized["parameters"]["temperature"] = action_params.get("temperature") return [normalized] + elif action_type == "navigate": + normalized["type"] = "navigate" + normalized["parameters"] = { + "action": action_params.get("action", "login"), + "login_coords_var": action_params.get("login_coords_var", "navigate_login_coords"), + "password_coords_var": action_params.get("password_coords_var", "navigate_password_coords"), + "submit_coords_var": action_params.get("submit_coords_var", "navigate_submit_coords"), + } + login_config_keys = ("login_field", "password_field", "submit_button", + "success_elements", "context") + for key in login_config_keys: + if action_params.get(key) is not None: + normalized["parameters"][key] = action_params[key] + return [normalized] + else: logger.warning(f"Type d'action inconnu : {action_type}") return [] @@ -2045,6 +2060,38 @@ def _resolve_runtime_vars(value: Any, variables: Dict[str, Any]) -> Any: return value +def _coerce_action_coords(action: dict) -> dict: + """Cast x_pct/y_pct en float après template resolution par _resolve_runtime_vars. + + Politique : si string non convertible ou template encore present → skip + pause_for_human. + Idempotent sur les actions qui ont déjà des floats (mouse_click existant). + Jamais fallback 0.0/0.0 — un clic sur coords (0,0) = top-left = potentiellement dangereux. + + Appelé APRÈS _resolve_runtime_vars dans la boucle dispatch (api_stream.py). + """ + for key in ("x_pct", "y_pct"): + val = action.get(key) + if val is None: + continue + if isinstance(val, float): + continue # déjà float, idempotent + if isinstance(val, str): + # Template encore présent = non résolu par _resolve_runtime_vars + if val.startswith("{{") and val.endswith("}}"): + action["_skip_reason"] = f"coords_var non résolu: {key}={val}" + action["type"] = "pause_for_human" + action["safety_level"] = "high" + return action + try: + action[key] = float(val) + except (ValueError, TypeError): + action["_skip_reason"] = f"coords invalide: {key}={val}" + action["type"] = "pause_for_human" + action["safety_level"] = "high" + return action + return action + + # ========================================================================= # Handlers pour les actions exécutées côté serveur (extract_text, t2a_decision) # ========================================================================= diff --git a/docs/PLAN_D1_NAVIGATE_COORDS_IMPLEMENTATION_2026-07-02.md b/docs/PLAN_D1_NAVIGATE_COORDS_IMPLEMENTATION_2026-07-02.md new file mode 100644 index 000000000..c648bd51c --- /dev/null +++ b/docs/PLAN_D1_NAVIGATE_COORDS_IMPLEMENTATION_2026-07-02.md @@ -0,0 +1,474 @@ +# D1 — NavigateCoords Implementation Plan + +**Auteur**: Qwen +**Date**: 2026-07-02 +**Statut**: EN ATTENTE GO Dom/Claude — Option 1 vs Option 2 +**Référence**: `docs/DESIGN_NAVIGATE_COORDS_CONSUMPTION_2026-07-02.md` (3 gaps documentés) + +--- + +## Résumé des gaps à résoudre + +| Gap | Description | Fichier:Ligne | Preuve | +|-----|-------------|---------------|--------| +| A | Compiler bake floats littéraux — aucun template pour coords | `replay_engine.py:1821-1833` | `x_pct = px` (literal float) | +| B | Zéro consommateur de `navigate_*_coords` variables | `replay_engine.py` + `api_stream.py` | grep: 0 occurrences | +| C | `_edge_to_normalized_actions` pas de branche `navigate` → `[]` | `replay_engine.py:1951-1953` | `else: return []` | + +--- + +## Infrastructure existante (non-modifiée) + +### `_ALLOWED_ACTION_TYPES` (replay_engine.py:35-50) + +`"navigate"` est **déjà présent** (ligne 44). La validation de sécurité l'accepte déjà. + +### `_SERVER_SIDE_ACTION_TYPES` (replay_engine.py:55-64) + +`"navigate"` est **déjà présent** (ligne 59). Le dispatch loop le traite comme serveur-side. + +### `_handle_navigate_action` (core/navigation/__init__.py:24-113) + +Handler **déjà câblé** dans api_stream.py (ligne 4459-4467). Résout screenshot, OCR/VLM, stocke coords dans `replay_state["variables"]`. + +### `_resolve_runtime_vars` (replay_engine.py:2031-2045) + +Resolver **existant** pour `{{var.field}}` — récursif sur dict/list/str. Retourne `str(value)` au niveau leaf → float→string conversion nécessaire pour coords. + +--- + +## OPTION 1 — Compiler Injection (~2h) + +### Principe + +Ajouter une branche `navigate` dans `_edge_to_normalized_actions` + ajouter `coords_var` mechanism dans les branches `mouse_click`/`text_input` + runtime resolution + float conversion. + +### Patch P1-A : Branche navigate dans `_edge_to_normalized_actions` + +**Fichier**: `agent_v0/server_v1/replay_engine.py` +**Position**: Après `elif action_type == "llm_generate":` (ligne 1949), avant `else:` (ligne 1951) + +```python +elif action_type == "navigate": + normalized["type"] = "navigate" + normalized["parameters"] = { + "login_field": action_params.get("login_field", "login"), + "password_field": action_params.get("password_field", "password"), + "submit_button": action_params.get("submit_button", "submit"), + "login_coords_var": action_params.get("login_coords_var", "navigate_login_coords"), + "password_coords_var": action_params.get("password_coords_var", "navigate_password_coords"), + "submit_coords_var": action_params.get("submit_coords_var", "navigate_submit_coords"), + } + return [normalized] +``` + +**Justification**: Action serveur-side — pas besoin de `x_pct/y_pct` ni `target_spec`. Le handler `_handle_navigate_action` lit `parameters` pour config, résout coords au runtime. + +**Impact**: Gap C résolu. Navigate edge → 1 normalized action au lieu de `[]`. + +### Patch P1-B : coords_var dans branches mouse_click / text_input + +**Fichier**: `agent_v0/server_v1/replay_engine.py` +**Position**: Lignes 1844-1856 (branches click et type) + +**mouse_click** (ligne 1844-1848) — AVANT : + +```python +if action_type == "mouse_click": + normalized["type"] = "click" + normalized["x_pct"] = x_pct + normalized["y_pct"] = y_pct + normalized["button"] = action_params.get("button", "left") +``` + +**mouse_click** — APRES : + +```python +if action_type == "mouse_click": + normalized["type"] = "click" + coords_var = action_params.get("coords_var") + if coords_var: + normalized["x_pct"] = f"{{{{{coords_var}.x_pct}}}}" + normalized["y_pct"] = f"{{{{{coords_var}.y_pct}}}}" + normalized["coords_var"] = coords_var + else: + normalized["x_pct"] = x_pct + normalized["y_pct"] = y_pct + normalized["button"] = action_params.get("button", "left") +``` + +**text_input** (ligne 1850-1856) — AVANT : + +```python +elif action_type == "text_input": + normalized["type"] = "type" + text = action_params.get("text", "") + text = _substitute_variables(text, params, action_params.get("defaults", {})) + normalized["text"] = text + normalized["x_pct"] = x_pct + normalized["y_pct"] = y_pct +``` + +**text_input** — APRES : + +```python +elif action_type == "text_input": + normalized["type"] = "type" + text = action_params.get("text", "") + text = _substitute_variables(text, params, action_params.get("defaults", {})) + normalized["text"] = text + coords_var = action_params.get("coords_var") + if coords_var: + normalized["x_pct"] = f"{{{{{coords_var}.y_pct}}}}" + normalized["y_pct"] = f"{{{{{coords_var}.y_pct}}}}" + normalized["coords_var"] = coords_var + else: + normalized["x_pct"] = x_pct + normalized["y_pct"] = y_pct +``` + +**⚠️ BUG dans le draft ci-dessus**: `x_pct` template pour text_input doit être `{{coords_var.x_pct}}` (pas `.y_pct` deux fois). Version corrigée : + +```python +elif action_type == "text_input": + normalized["type"] = "type" + text = action_params.get("text", "") + text = _substitute_variables(text, params, action_params.get("defaults", {})) + normalized["text"] = text + coords_var = action_params.get("coords_var") + if coords_var: + normalized["x_pct"] = f"{{{{{coords_var}.x_pct}}}}" + normalized["y_pct"] = f"{{{{{coords_var}.y_pct}}}}" + normalized["coords_var"] = coords_var + else: + normalized["x_pct"] = x_pct + normalized["y_pct"] = y_pct +``` + +**Justification**: `coords_var` = mécanisme minimal pour déclarer "ces coords viennent de la variable navigate_login_coords". Template strings résolus au runtime par `_resolve_runtime_vars`. + +**Impact**: Gap A résolu. Gap B partiellement — les actions click/type deviennent consommatrices via `coords_var`. + +### Patch P1-C : Coercion helper après resolver existant + +**⚠️ CORRECTION IMPORTANT (2026-07-02 14:45)** : Le plan original sur-dimensionnait P1-C en proposant un second resolver runtime. **Codex a correctement identifié** que `_resolve_runtime_vars` est **déjà appelé** dans la boucle dispatch à `api_stream.py:4331-4335` : + +```python +# L4331-4335 (EXISTANT, ne pas modifier) +if owning_replay is not None: + runtime_vars = owning_replay.get("variables") or {} + if runtime_vars: + action = _resolve_runtime_vars(action, runtime_vars) +``` + +**Besoin réel = coercion helper uniquement** : `_resolve_runtime_vars` résout les templates `{{var.field}}` mais retourne `str(value)` au leaf → `{{navigate_login_coords.x_pct}}` devient `"0.15"` (string), pas `0.15` (float). Le client attend des floats pour x_pct/y_pct. + +**Fichier**: `agent_v0/server_v1/api_stream.py` +**Position**: Juste après la ligne 4335 (`action = _resolve_runtime_vars(action, runtime_vars)`) + +**Politique coords_var non résolu** : Skip + pause supervisée (AGREED Qwen/Codex). Jamais fallback 0.0/0.0 — un clic sur coords (0,0) = top-left = potentiellement dangereux. + +```python +def _coerce_action_coords(action: dict) -> dict: + """Cast x_pct/y_pct en float après template resolution par _resolve_runtime_vars. + + Politique : si string non convertible ou template encore present → skip + pause_for_human. + Idempotent sur les actions qui ont déjà des floats (mouse_click existant). + + Appelé APRÈS _resolve_runtime_vars dans la boucle dispatch (api_stream.py ~L4335). + """ + for key in ("x_pct", "y_pct"): + val = action.get(key) + if val is None: + continue + if isinstance(val, float): + continue # déjà float, idempotent + if isinstance(val, str): + # Template encore présent = non résolu par _resolve_runtime_vars + if val.startswith("{{") and val.endswith("}}"): + action["_skip_reason"] = f"coords_var non résolu: {key}={val}" + action["type"] = "pause_for_human" + action["safety_level"] = "high" + return action + try: + action[key] = float(val) + except (ValueError, TypeError): + action["_skip_reason"] = f"coords invalide: {key}={val}" + action["type"] = "pause_for_human" + action["safety_level"] = "high" + return action + return action +``` + +**Appel dans la boucle dispatch** (à insérer après L4335) : + +```python +# L4335 existant: action = _resolve_runtime_vars(action, runtime_vars) +# NOUVEAU — coercion coords après resolver existant +action = _coerce_action_coords(action) +``` + +**Justification**: `_resolve_runtime_vars` (existant à L4335) résout les templates → strings. `_coerce_action_coords` cast les strings en floats. Si template non résolu ou conversion impossible → pause_for_human (fail-safe), jamais fallback coords (0,0). Idempotent sur actions existantes (floats déjà présents). + +**Risques additionnels identifiés** : +1. **Résolution partielle** : si seul y_pct est résolu mais x_pct reste template → `_coerce_action_coords` convertit pause_for_human (safe stop, pas top-left click). +2. **Idempotence** : si action existante a déjà x_pct=0.35 (float) → helper passe sans modification (isinstance(float) → continue). +3. **Race condition** : variables dict partagé entre navigate handler et dispatch loop — mais BFS séquentiel garantit que navigate stocke AVANT click consomme. + +**Impact**: Gap B résolu — les coords navigate sont consommées au runtime par click/type, avec coercion + fail-safe. + +### Patch P1-D : VWB YAML schema — coords_var field + +**Fichier**: Schema VWB (workflow YAML format) — documentation +**Nature**: Ajout d'un champ `coords_var` dans `action.parameters` pour les steps `mouse_click` et `text_input` + +Exemple de workflow YAML avec navigate + click consommateur : + +```yaml +steps: + - id: s1 + action: + type: navigate + parameters: + login_coords_var: navigate_login_coords + password_coords_var: navigate_password_coords + to_node: n2 + + - id: s2 + action: + type: mouse_click + parameters: + coords_var: navigate_login_coords + button: left + to_node: n3 + + - id: s3 + action: + type: text_input + parameters: + coords_var: navigate_password_coords + text: "${password}" + to_node: n4 +``` + +**Justification**: Le VWB builder doit savoir qu'un click peut référencer une variable coords au lieu de fournir des pixels littéraux. C'est un changement de schema minimal (1 champ optionnel). + +--- + +## OPTION 2 — Declarative YAML Templates (~4h) + +### Principe + +Introduire un `coords_template` field dans les step definitions + un resolver typed qui extrait directement les floats du dict variables sans passage string→float. + +### Patch P2-A : Même branche navigate (identique à P1-A) + +Inchangé — Gap C résolu par la même branche. + +### Patch P2-B : coords_template field + typed resolver + +**Fichier**: `agent_v0/server_v1/replay_engine.py` + +Nouvelle fonction `_resolve_coords_template` : + +```python +def _resolve_coords_template( + coords_template: str, + variables: Dict[str, Any], +) -> Optional[Dict[str, float]]: + """Résoudre un coords_template en dict {x_pct, y_pct, bbox_pct} depuis variables. + + Retourne None si la variable n'existe pas ou si les champs ne sont pas floats. + Pas de conversion string→float : les valeurs doivent déjà être des floats. + """ + coords_dict = variables.get(coords_template) + if not coords_dict or not isinstance(coords_dict, dict): + return None + + x_pct = coords_dict.get("x_pct") + y_pct = coords_dict.get("y_pct") + + if not isinstance(x_pct, (int, float)) or not isinstance(y_pct, (int, float)): + logger.warning( + f"coords_template {coords_template}: x_pct/y_pct not numeric " + f"(x_pct={x_pct}, y_pct={y_pct})" + ) + return None + + result = {"x_pct": float(x_pct), "y_pct": float(y_pct)} + + bbox_pct = coords_dict.get("bbox_pct") + if bbox_pct: + result["bbox_pct"] = bbox_pct # tuple, pas de conversion + + return result +``` + +### Patch P2-C : Branches mouse_click / text_input avec coords_template + +```python +if action_type == "mouse_click": + normalized["type"] = "click" + coords_template = action_params.get("coords_template") + if coords_template: + normalized["coords_template"] = coords_template + # x_pct/y_pct résolus au runtime par _resolve_coords_template + normalized["x_pct"] = None # placeholder → resolved at runtime + normalized["y_pct"] = None + else: + normalized["x_pct"] = x_pct + normalized["y_pct"] = y_pct + normalized["button"] = action_params.get("button", "left") +``` + +### Patch P2-D : Runtime resolution typed dans dispatch loop + +```python +# --- Résolution coords_template (typed, no string→float) --- +if action.get("coords_template"): + variables = owning_replay.replay_state.get("variables", {}) + from agent_v0.server_v1.replay_engine import _resolve_coords_template + coords = _resolve_coords_template(action["coords_template"], variables) + if coords: + action["x_pct"] = coords["x_pct"] + action["y_pct"] = coords["y_pct"] + if coords.get("bbox_pct"): + action["bbox_pct"] = coords["bbox_pct"] + del action["coords_template"] # résolu, pas besoin de garder le ref + else: + logger.warning( + f"coords_template {action['coords_template']} unresolved — skipping action" + ) + # skip → next action +``` + +**Avantage Option 2**: Pas de string→float conversion. Les coords restent des floats du navigate handler au click handler. Plus clean, plus safe. + +**Inconvénient Option 2**: `_resolve_coords_template` est une nouvelle fonction + le `x_pct = None` placeholder nécessite que le client tolère les None temporairement (ou que la resolution se fasse AVANT transmission). Le schema VWB doit documenter `coords_template` comme champ alternatif à `by_position`. + +--- + +## Comparative Table — Patches + +| Aspect | Option 1 (Compiler Injection) | Option 2 (YAML Templates) | +|--------|-------------------------------|---------------------------| +| **Gap C fix** | Identique (branche navigate) | Identique (branche navigate) | +| **Gap A fix** | Template strings `{{var.field}}` dans x_pct/y_pct | `x_pct = None` placeholder + typed resolver | +| **Gap B fix** | `_resolve_runtime_vars` + float conversion | `_resolve_coords_template` typed (no conversion) | +| **String→float** | Nécessaire (design smell) | Aucun (floats passent directement) | +| **Nouvelles fonctions** | 0 (reuse `_resolve_runtime_vars`) | 1 (`_resolve_coords_template`) | +| **Schema VWB** | 1 champ `coords_var` | 1 champ `coords_template` | +| **Temps implémentation** | ~2h | ~4h | +| **Extensibilité** | Limitée (coupling navigate→click) | Extensible (any coords source) | +| **Risque POC** | Minimal | Moyen (placeholder None + typed resolver) | +| **Migration post-POC** | Option 2 refactor needed | Already Option 2 | + +--- + +## Test Rouge Proposal + +### Test TR-1 : Prouve Gap C (navigate → []) + +```python +def test_edge_to_normalized_actionsnavigate_returns_empty(): + """Gap C: _edge_to_normalized_actions retourne [] pour navigate type.""" + from agent_v0.server_v1.replay_engine import _edge_to_normalized_actions + + edge = WorkflowEdge( + edge_id="e1", + from_node="n1", + to_node="n2", + action=ActionSpec( + type="navigate", + parameters={"login_coords_var": "navigate_login_coords"}, + ), + ) + result = _edge_to_normalized_actions(edge, {}) + # BEFORE fix: result == [] (Gap C) + # AFTER fix: result == [{"type": "navigate", "parameters": {...}}] + assert len(result) >= 1, "navigate must produce at least 1 normalized action" + assert result[0]["type"] == "navigate" +``` + +### Test TR-2 : Prouve coords_var resolution (Option 1) + +```python +def test_coords_var_runtime_resolution(): + """Option 1: coords_var template resolved + float conversion.""" + from agent_v0.server_v1.replay_engine import _resolve_runtime_vars + + variables = { + "navigate_login_coords": { + "x_pct": 0.15, + "y_pct": 0.35, + "method": "ocr+vlm", + } + } + action = { + "type": "click", + "x_pct": "{{navigate_login_coords.x_pct}}", + "y_pct": "{{navigate_login_coords.y_pct}}", + "coords_var": "navigate_login_coords", + } + resolved = _resolve_runtime_vars(action, variables) + # resolved["x_pct"] == "0.15" (string) → needs float conversion + assert resolved["x_pct"] == "0.15" # string from resolver + assert float(resolved["x_pct"]) == 0.15 # conversion works +``` + +### Test TR-3 : Prouve coords_template typed resolution (Option 2) + +```python +def test_coords_template_typed_resolution(): + """Option 2: coords_template returns floats directly, no conversion.""" + from agent_v0.server_v1.replay_engine import _resolve_coords_template + + variables = { + "navigate_login_coords": { + "x_pct": 0.15, + "y_pct": 0.35, + "method": "ocr+vlm", + } + } + coords = _resolve_coords_template("navigate_login_coords", variables) + assert coords is not None + assert isinstance(coords["x_pct"], float) # float, not string + assert coords["x_pct"] == 0.15 + assert coords["y_pct"] == 0.35 +``` + +--- + +## BFS Ordonnancement — Risque scheduling + +Le dispatch loop (`api_stream.py:get_next_action`) traite les actions séquentiellement par path BFS. Navigate est serveur-side → traité en boucle interne avant transmission. Click/type consommant coords_var/template sont visuels → transmis au client. + +**Flows correct**: +1. BFS traverse edge navigate → normalized action `type=navigate` +2. Loop interne: `_handle_navigate_action` → stocke coords dans variables +3. BFS traverse edge click → normalized action avec `coords_var` +4. Loop: resolution runtime → float conversion → transmission client + +**Risque**: Si le BFS ordonnance le click AVANT le navigate (par ex. edges parallèles), coords_var sera unresolved → fallback 0.0/0.0. + +**Mitigation**: VWB builder doit garantir que navigate edge précède click consommateur dans le path topologique. C'est une contrainte de schema, pas un bug runtime. + +--- + +## Decision Matrix + +| Critère | Option 1 | Option 2 | Recommandation POC | +|---------|----------|----------|--------------------| +| Temps | 2h | 4h | **Option 1** | +| Risque runtime | string→float edge | None placeholder | Option 1 (conversion simple) | +| Extensibilité | Limitée | Extensible | Option 1 pour POC, migration Option 2 post-POC | +| Code mort risk | 0 nouvelles fonctions | 1 nouvelle fonction | Option 1 | +| Test coverage | TR-1 + TR-2 | TR-1 + TR-3 | Option 1 | + +**Recommandation Qwen**: Option 1 pour POC (2h, minimal risk, reuse infrastructure existante). Migration Option 2 post-POC si scaling multi-coords est confirmé (search, dossier). + +**GO requis**: Dom + Claude (décision D1). + +--- + +*Qwen — plan implémentation D1 déposé, awaiting GO.* diff --git a/docs/QG_REVIEW_D1_NAVIGATE_COORDS_2026-07-02.md b/docs/QG_REVIEW_D1_NAVIGATE_COORDS_2026-07-02.md new file mode 100644 index 000000000..9d75df9e6 --- /dev/null +++ b/docs/QG_REVIEW_D1_NAVIGATE_COORDS_2026-07-02.md @@ -0,0 +1,135 @@ +# QG Review Framework — D1 NavigateCoords Patch + +**Auteur**: Qwen +**Date**: 2026-07-02 +**Statut**: EN ATTENTE patch Codex +**Scope**: Review du patch D1 (Option 1 — Compiler Injection) produit par Codex + +--- + +## Baseline test coverage (pré-patch) + +| Fichier | Classes | Tests | Rôle | +|---------|---------|-------|------| +| `test_navigate_handler_e2e.py` | 4 | 8 | Handler mock — nominal, OCR miss, no screenshot, never-fail | +| `test_navigate_wiring.py` | 4 | 5 | Import/wiring non-regression | +| `test_action_resolver.py` | 6 | 10 | NavigateCoords, NavigateResult, grounded_to_coords, navigate_login | +| `test_coords_consumption_gap.py` | 3 | 10 | **GAP DOCUMENTATION** — résolution viable, compiler gap, navigate→[] | +| **Total** | **17** | **33** | | + +**Tests critiques à mettre à jour après D1 patch**: +- `test_coords_consumption_gap.py::test_navigate_action_type_unknown` — affirme actuellement `actions == []`; doit affirmer `len(actions) >= 1` et `actions[0]["type"] == "navigate"` après D1 +- `test_coords_consumption_gap.py::TestCompilerGapLiteralFloats` — 4 tests documentant le gap literal-floats; après D1, les tests coords_var doivent affirmer templates strings ARE produites quand coords_var présent + +**Point d'insertion exact D1**: +- Fichier: `replay_engine.py` +- Entre `elif action_type == "llm_generate"` (retourne `[normalized]` ~L1949) et `else:` clause (~L1953) +- Navigate branch: `elif action_type == "navigate"` → `normalized["type"] = "navigate"` + parameters dict → `return [normalized]` + +**P1-C root cause**: +- `_resolve_runtime_vars_in_str` (L2025): `return str(value)` — tout {{var.field}} résolu devient string "0.35" pas float 0.35 +- Coercion helper `_coerce_action_coords` doit agir APRÈS `_resolve_runtime_vars` (L4335), AVANT `type_ = action.get("type")` (L4337) + +--- + +## Critères de review — Checklist + +### C1 : Branche navigate dans `_edge_to_normalized_actions` (Gap C) + +| # | Critère | GO | NOGO | +|---|---------|----|------| +| C1-1 | Branche `elif action_type == "navigate"` ajoutée entre `llm_generate` (L1949) et `else` (L1951) | Present, position correcte | Absente ou mal positionnée | +| C1-2 | `normalized["type"] = "navigate"` | Oui | Type incorrect | +| C1-3 | Parameters dict avec `login_coords_var`, `password_coords_var`, `submit_coords_var` | Noms exacts, valeurs default | Noms divergent ou absents | +| C1-4 | Retourne `[normalized]` (1 action serveur-side) | `[normalized]` | `[]` ou autre | +| C1-5 | Test TR-1 : `test_navigate_action_type_unknown` mis à jour — affirme `len(result) >= 1` et `result[0]["type"] == "navigate"` | Test updated + passes | Test non mis à jour ou fails | + +### C2 : coords_var dans branches mouse_click / text_input (Gap A) + +| # | Critère | GO | NOGO | +|---|---------|----|------| +| C2-1 | `coords_var = action_params.get("coords_var")` check dans mouse_click | Present | Absent | +| C2-2 | Si coords_var → `x_pct = f"{{{{{coords_var}.x_pct}}}"` et `y_pct = f"{{{{{coords_var}.y_pct}}}"` | Template strings correctes | Syntaxe template incorrecte ou .y_pct pour x_pct | +| C2-3 | Si coords_var absent → literal floats comme avant (fallback existant) | Branch else intacte | Branch else modifiée ou supprimée | +| C2-4 | `normalized["coords_var"] = coords_var` ajouté pour traçabilité | Oui | Absent | +| C2-5 | Même mécanisme dans text_input branch | Identique à mouse_click | Absent ou divergent | +| C2-6 | BUG vérifié : text_input x_pct template = `{{coords_var.x_pct}}` (pas `.y_pct` deux fois) | Correct | y_pct en double | + +### C3 : `_coerce_action_coords()` helper (Gap B / P1-C) + +| # | Critère | GO | NOGO | +|---|---------|----|------| +| C3-1 | Helper défini dans api_stream.py (pas replay_engine.py) | api_stream.py | Autre fichier | +| C3-2 | Appel APRÈS `_resolve_runtime_vars` (L4335), AVANT `type_ = action.get("type")` (L4337) | Position correcte | Avant resolver ou après type_ check | +| C3-3 | float pass-through : `isinstance(val, float) → continue` | Idempotent sur actions existantes | Pas de float check → conversion inutile | +| C3-4 | string→float : `try: action[key] = float(val)` | Conversion correcte | Pas de try/except → crash possible | +| C3-5 | Template non résolu → pause_for_human (pas fallback 0.0/0.0) | `val.startswith("{{") and val.endswith("}}")` → pause_for_human | Fallback 0.0/0.0 ou autre coords dangereux | +| C3-6 | Conversion impossible → pause_for_human | ValueError/TypeError → pause_for_human | Exception non catchée | +| C3-7 | `_skip_reason` documenté pour debug | Oui | Absent | +| C3-8 | `safety_level = "high"` pour pause_for_human | Oui | Absent ou autre valeur | +| C3-9 | Retourne action mutée (pas de new dict) | Mutation in-place | Copie → risque race | +| C3-10 | Keys itérées = ("x_pct", "y_pct") uniquement | Pas de sur-itération | Autres keys modifiées | + +### C4 : Never-fail contract + +| # | Critère | GO | NOGO | +|---|---------|----|------| +| C4-1 | `_handle_navigate_action` ne lance jamais d'exception non catchée | Contract preserved | Nouvelle exception possible | +| C4-2 | `_coerce_action_coords` ne lance jamais — tout cas couvert par try/except ou pause_for_human | Contract preserved | Exception possible | + +### C5 : Limites de scope POC + +| # | Critère | GO | NOGO | +|---|---------|----|------| +| C5-1 | Maximum 4 fichiers modifiés | ≤ 4 | > 4 | +| C5-2 | Pas de changement schema VWB dans POC patch | Pas de modification VWB code | VWB code modifié | +| C5-3 | Pas de nouvelle dépendance pip | 0 nouvelles deps | Nouvelle dep | +| C5-4 | Pas de modification OmniParser wiring | `_omniparser_available = False` intact | Modifié | + +### C6 : Test coverage + +| # | Critère | GO | NOGO | +|---|---------|----|------| +| C6-1 | TR-1 : navigate compile à 1 action (pas []) | Passes | Fails | +| C6-2 | TR-2 : coords_var template resolution + float conversion | Passes | Fails | +| C6-3 | Test `_coerce_action_coords` : float pass-through | Passes | Absent | +| C6-4 | Test `_coerce_action_coords` : string→float conversion | Passes | Absent | +| C6-5 | Test `_coerce_action_coords` : template non résolu → pause_for_human | Passes | Absent | +| C6-6 | Test `_coerce_action_coords` : conversion impossible → pause_for_human | Passes | Absent | +| C6-7 | Test idempotence : action existante float non modifiée | Passes | Absent | +| C6-8 | `pytest tests/unit/` passe en intégralité | 0 failures | ≥1 failure | + +### C7 : Risques additionnels (3 identifiés dans PLAN_D1) + +| # | Risque | Mitigation attendue | GO | NOGO | +|---|--------|--------------------|----|------| +| C7-1 | Résolution partielle (x_pct résolu, y_pct template) | `_coerce_action_coords` → pause_for_human si ANY key unresolved | Mitigation presente | Pas de mitigation | +| C7-2 | Idempotence sur mouse_click existant | `isinstance(val, float) → continue` | Idempotent | Risque de double conversion | +| C7-3 | Race condition sur variables dict partagé | BFS séquentiel garantit navigate→click ordre | Note dans code/doc | Pas de mention | + +--- + +## Procédure de review + +1. **Lire le patch** : `git diff` sur les fichiers modifiés par Codex +2. **Vérifier chaque critère C1-C7** : GO/NOGO par ligne +3. **Exécuter les tests** : `cd /home/dom/ai/rpa_vision_v3 && .venv/bin/python -m pytest tests/unit/ -x -v` +4. **Produire le verdict** : Table GO/NOGO avec justification + verdict global + +## Format verdict + +``` +## QG Verdict — D1 NavigateCoords Patch + +| Critère | GO/NOGO | Note | +|---------|---------|------| +| C1 | GO | Branche navigate correcte | +| C2 | NOGO | BUG: y_pct en double dans text_input | +| ... | ... | ... | + +**Verdict global**: GO / NOGO (avec réserves listées) +``` + +--- + +*Qwen — framework QG prêt, awaiting Codex patch pour exécution.* diff --git a/tests/unit/test_coords_consumption_gap.py b/tests/unit/test_coords_consumption_gap.py index 4adaa6e75..1188db8c5 100644 --- a/tests/unit/test_coords_consumption_gap.py +++ b/tests/unit/test_coords_consumption_gap.py @@ -23,6 +23,7 @@ from agent_v0.server_v1.replay_engine import ( _edge_to_normalized_actions, _resolve_runtime_vars, _resolve_runtime_vars_in_str, + _coerce_action_coords, ) @@ -192,11 +193,185 @@ class TestCompilerGapLiteralFloats: assert action["x_pct"] == 0.30 assert action["y_pct"] == 0.50 - def test_navigate_action_type_unknown(self): - """navigate action type is NOT handled by _edge_to_normalized_actions — - falls into the else branch logging "Type d'action inconnu".""" - edge = _FakeEdge(_FakeAction("navigate", parameters={"target": "login"})) + def test_navigate_action_type_handled(self): + """navigate action type IS now handled by _edge_to_normalized_actions — + produces a normalized action dict with type='navigate' and parameters.""" + edge = _FakeEdge(_FakeAction("navigate", parameters={"action": "login"})) actions = _edge_to_normalized_actions(edge, params={}) - # navigate produces empty actions — not compiled at all - assert actions == [] + assert len(actions) == 1 + action = actions[0] + assert action["type"] == "navigate" + assert "parameters" in action + assert action["parameters"]["action"] == "login" + assert action["parameters"]["login_coords_var"] == "navigate_login_coords" + assert action["parameters"]["password_coords_var"] == "navigate_password_coords" + assert action["parameters"]["submit_coords_var"] == "navigate_submit_coords" + + +class TestNavigateBranchNonRegression: + """Non-regression tests for the navigate branch in _edge_to_normalized_actions. + + These verify the D1 fix: navigate action type now produces a proper + normalized dict that the server-side dispatch can route to + _handle_navigate_action. + """ + + def test_navigate_default_params(self): + """Navigate with minimal params fills defaults.""" + edge = _FakeEdge(_FakeAction("navigate", parameters={})) + actions = _edge_to_normalized_actions(edge, params={}) + + assert len(actions) == 1 + action = actions[0] + assert action["type"] == "navigate" + assert action["parameters"]["action"] == "login" + assert action["parameters"]["login_coords_var"] == "navigate_login_coords" + assert action["parameters"]["password_coords_var"] == "navigate_password_coords" + assert action["parameters"]["submit_coords_var"] == "navigate_submit_coords" + + def test_navigate_custom_vars(self): + """Navigate with custom coords_var names propagates them.""" + edge = _FakeEdge( + _FakeAction( + "navigate", + parameters={ + "login_coords_var": "login_pos", + "password_coords_var": "pwd_pos", + "submit_coords_var": "btn_pos", + }, + ) + ) + actions = _edge_to_normalized_actions(edge, params={}) + + assert len(actions) == 1 + params = actions[0]["parameters"] + assert params["login_coords_var"] == "login_pos" + assert params["password_coords_var"] == "pwd_pos" + assert params["submit_coords_var"] == "btn_pos" + + def test_navigate_login_config_overrides(self): + """Navigate forwards login_config keys to parameters.""" + edge = _FakeEdge( + _FakeAction( + "navigate", + parameters={ + "login_field": "username", + "password_field": "pass", + "submit_button": "connexion", + "context": "DPI urgences", + }, + ) + ) + actions = _edge_to_normalized_actions(edge, params={}) + + assert len(actions) == 1 + params = actions[0]["parameters"] + assert params["login_field"] == "username" + assert params["password_field"] == "pass" + assert params["submit_button"] == "connexion" + assert params["context"] == "DPI urgences" + + def test_navigate_base_fields_present(self): + """Navigate action retains edge_id, from_node, to_node, action_id.""" + edge = _FakeEdge(_FakeAction("navigate", parameters={"action": "login"})) + actions = _edge_to_normalized_actions(edge, params={}) + + action = actions[0] + assert "edge_id" in action + assert "from_node" in action + assert "to_node" in action + assert "action_id" in action + assert action["edge_id"] == "edge_coords_gap" + assert action["from_node"] == "node_src" + assert action["to_node"] == "node_dst" + + def test_navigate_no_x_y_pct(self): + """Navigate action does NOT include x_pct/y_pct — coords come from handler.""" + edge = _FakeEdge(_FakeAction("navigate", parameters={"action": "login"})) + actions = _edge_to_normalized_actions(edge, params={}) + + action = actions[0] + assert "x_pct" not in action + assert "y_pct" not in action + + +# ── Test P1-C: _coerce_action_coords ────────────────────────────────── + + +class TestCoerceActionCoords: + """P1-C coercion helper: cast x_pct/y_pct strings to floats after + _resolve_runtime_vars template resolution. + + Chain: navigate → variables → _resolve_runtime_vars → strings → + _coerce_action_coords → floats. Fail-safe on unresolved/invalid. + """ + + def test_float_idempotent(self): + """Float values pass through unchanged — existing mouse_click actions unaffected.""" + action = {"type": "click", "x_pct": 0.15, "y_pct": 0.07} + result = _coerce_action_coords(action) + assert result["x_pct"] == 0.15 + assert result["y_pct"] == 0.07 + assert result["type"] == "click" + + def test_string_to_float_conversion(self): + """Resolved template strings "0.35" → 0.35 (float) after _resolve_runtime_vars.""" + action = {"type": "click", "x_pct": "0.35", "y_pct": "0.07"} + result = _coerce_action_coords(action) + assert result["x_pct"] == 0.35 + assert isinstance(result["x_pct"], float) + assert result["y_pct"] == 0.07 + assert isinstance(result["y_pct"], float) + assert result["type"] == "click" + + def test_unresolved_template_pause_for_human(self): + """Unresolved {{var.field}} template → pause_for_human, never fallback 0.0.""" + action = {"type": "click", "x_pct": "{{navigate_login_coords.x_pct}}", "y_pct": 0.07} + result = _coerce_action_coords(action) + assert result["type"] == "pause_for_human" + assert result["safety_level"] == "high" + assert "coords_var non résolu" in result["_skip_reason"] + assert "{{navigate_login_coords.x_pct}}" in result["_skip_reason"] + + def test_invalid_string_pause_for_human(self): + """Non-convertible string "abc" → pause_for_human, no fallback coords.""" + action = {"type": "click", "x_pct": "abc", "y_pct": 0.07} + result = _coerce_action_coords(action) + assert result["type"] == "pause_for_human" + assert result["safety_level"] == "high" + assert "coords invalide" in result["_skip_reason"] + assert "abc" in result["_skip_reason"] + + def test_no_coords_keys_unchanged(self): + """Action without x_pct/y_pct passes through unchanged.""" + action = {"type": "navigate", "parameters": {"action": "login"}} + result = _coerce_action_coords(action) + assert result == action + + def test_full_chain_resolve_then_coerce(self): + """Full chain: _resolve_runtime_vars → _coerce_action_coords → floats.""" + variables = { + "navigate_login_coords": { + "x_pct": 0.15, + "y_pct": 0.35, + "method": "ocr_anchor", + } + } + action = { + "type": "click", + "x_pct": "{{navigate_login_coords.x_pct}}", + "y_pct": "{{navigate_login_coords.y_pct}}", + } + # Step 1: resolve templates (produces strings) + resolved = _resolve_runtime_vars(action, variables) + assert resolved["x_pct"] == "0.15" # string after resolver + assert resolved["y_pct"] == "0.35" # string after resolver + + # Step 2: coerce strings to floats + coerced = _coerce_action_coords(resolved) + assert coerced["x_pct"] == 0.15 # float after coercion + assert isinstance(coerced["x_pct"], float) + assert coerced["y_pct"] == 0.35 + assert isinstance(coerced["y_pct"], float) + assert coerced["type"] == "click"