Compare commits
5 Commits
bd1c9d2c8a
...
feat/push-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c371c9775f | ||
|
|
931cf13217 | ||
|
|
fd9efdbbf5 | ||
|
|
19187e633e | ||
|
|
9a34ecded6 |
@@ -420,6 +420,7 @@ from .replay_engine import (
|
||||
_edge_to_normalized_actions,
|
||||
_substitute_variables,
|
||||
_resolve_runtime_vars,
|
||||
_coerce_action_coords,
|
||||
_SERVER_SIDE_ACTION_TYPES,
|
||||
_handle_extract_text_action,
|
||||
_handle_extract_table_action,
|
||||
@@ -4334,6 +4335,9 @@ async def get_next_action(session_id: str, machine_id: str = "default"):
|
||||
if runtime_vars:
|
||||
action = _resolve_runtime_vars(action, runtime_vars)
|
||||
|
||||
# Coercion coords: cast x_pct/y_pct en float après resolver
|
||||
action = _coerce_action_coords(action)
|
||||
|
||||
type_ = action.get("type")
|
||||
|
||||
# pause_for_human : pause supervisée si safety_level/safety_checks ou mode supervised,
|
||||
|
||||
@@ -1948,6 +1948,21 @@ def _edge_to_normalized_actions(edge, params: Dict[str, Any]) -> List[Dict[str,
|
||||
normalized["parameters"]["temperature"] = action_params.get("temperature")
|
||||
return [normalized]
|
||||
|
||||
elif action_type == "navigate":
|
||||
normalized["type"] = "navigate"
|
||||
normalized["parameters"] = {
|
||||
"action": action_params.get("action", "login"),
|
||||
"login_coords_var": action_params.get("login_coords_var", "navigate_login_coords"),
|
||||
"password_coords_var": action_params.get("password_coords_var", "navigate_password_coords"),
|
||||
"submit_coords_var": action_params.get("submit_coords_var", "navigate_submit_coords"),
|
||||
}
|
||||
login_config_keys = ("login_field", "password_field", "submit_button",
|
||||
"success_elements", "context")
|
||||
for key in login_config_keys:
|
||||
if action_params.get(key) is not None:
|
||||
normalized["parameters"][key] = action_params[key]
|
||||
return [normalized]
|
||||
|
||||
else:
|
||||
logger.warning(f"Type d'action inconnu : {action_type}")
|
||||
return []
|
||||
@@ -2045,6 +2060,38 @@ def _resolve_runtime_vars(value: Any, variables: Dict[str, Any]) -> Any:
|
||||
return value
|
||||
|
||||
|
||||
def _coerce_action_coords(action: dict) -> dict:
|
||||
"""Cast x_pct/y_pct en float après template resolution par _resolve_runtime_vars.
|
||||
|
||||
Politique : si string non convertible ou template encore present → skip + pause_for_human.
|
||||
Idempotent sur les actions qui ont déjà des floats (mouse_click existant).
|
||||
Jamais fallback 0.0/0.0 — un clic sur coords (0,0) = top-left = potentiellement dangereux.
|
||||
|
||||
Appelé APRÈS _resolve_runtime_vars dans la boucle dispatch (api_stream.py).
|
||||
"""
|
||||
for key in ("x_pct", "y_pct"):
|
||||
val = action.get(key)
|
||||
if val is None:
|
||||
continue
|
||||
if isinstance(val, float):
|
||||
continue # déjà float, idempotent
|
||||
if isinstance(val, str):
|
||||
# Template encore présent = non résolu par _resolve_runtime_vars
|
||||
if val.startswith("{{") and val.endswith("}}"):
|
||||
action["_skip_reason"] = f"coords_var non résolu: {key}={val}"
|
||||
action["type"] = "pause_for_human"
|
||||
action["safety_level"] = "high"
|
||||
return action
|
||||
try:
|
||||
action[key] = float(val)
|
||||
except (ValueError, TypeError):
|
||||
action["_skip_reason"] = f"coords invalide: {key}={val}"
|
||||
action["type"] = "pause_for_human"
|
||||
action["safety_level"] = "high"
|
||||
return action
|
||||
return action
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Handlers pour les actions exécutées côté serveur (extract_text, t2a_decision)
|
||||
# =========================================================================
|
||||
|
||||
170
docs/BENCH_OCR_PPOCRV5_2026-07-02.md
Normal file
170
docs/BENCH_OCR_PPOCRV5_2026-07-02.md
Normal file
@@ -0,0 +1,170 @@
|
||||
# Benchmark OCR PP-OCRv5 CPU — 02/07/2026
|
||||
|
||||
> **Label**: baseline CPU, non verdict GPU
|
||||
> **Machine**: Ryzen 9 9950X 32 threads, 123GB RAM, RTX 5070 12GB VRAM, CUDA driver 580.159.03/13.0
|
||||
> **Image**: `shot_0172_full.png` (2560×1600, 721K, RGB) — capture écran Windows Léa
|
||||
> **PaddleOCR**: 3.4.0, paddlepaddle 3.3.1 CPU-only (non compilé CUDA)
|
||||
|
||||
---
|
||||
|
||||
## 1. Résultats synthèse
|
||||
|
||||
| Engine | Cold (s) | Warm (s) | Detections | Mem init (MB) | Mem peak (MB) | Statut |
|
||||
|--------|----------|----------|------------|---------------|---------------|--------|
|
||||
| **docTR CPU** | 0.776 | 0.717 | 139 | 263.2 | 263.2 | ✅ OK |
|
||||
| **EasyOCR CPU** | 4.878 | 4.856 | 54 | 0.6 | 156.9 | ✅ OK |
|
||||
| **PP-OCRv5 CPU** | — | — | — | — | — | ❌ BLOCKED |
|
||||
|
||||
---
|
||||
|
||||
## 2. PP-OCRv5 CPU — VERDICT: BLOCKED
|
||||
|
||||
### Crash récurrent
|
||||
|
||||
Toute inference PaddleOCR sur paddlepaddle 3.3.1 CPU-only crash systématiquement :
|
||||
|
||||
```
|
||||
(Unimplemented) ConvertPirAttribute2RuntimeAttribute not support
|
||||
[pir::ArrayAttribute<pir::DoubleAttribute>]
|
||||
(at /paddle/paddle/fluid/framework/new_executor/instruction/onednn/onednn_instruction.cc:116)
|
||||
```
|
||||
|
||||
### Root cause
|
||||
|
||||
Bug dans le **PIR new executor** de paddlepaddle 3.3.1 CPU-only : l'instruction OneDNN
|
||||
tente de convertir un `ArrayAttribute<DoubleAttribute>` en runtime attribute, opération
|
||||
non implémentée. Ce bug est :
|
||||
|
||||
- **NON model-spécifique** : v3_mobile_det, v4_mobile_det, v5_mobile_det crashent tous
|
||||
- **NON version-spécifique** : PP-OCRv3, v4 (fr absent), v5 crashent tous
|
||||
- **NON API-spécifique** : `ocr()` (deprecated) et `predict()` crashent identiquement
|
||||
- **NON contournable** par flags : `FLAGS_use_mkldnn=0`, `FLAGS_use_pir_api=0` n'ont aucun effet
|
||||
|
||||
### 7 approches testées — TOUTES FAILED
|
||||
|
||||
| # | Approche | Résultat |
|
||||
|---|----------|----------|
|
||||
| 1 | `FLAGS_use_mkldnn=0` via `os.environ` | Same crash |
|
||||
| 2 | `det='PP-OCRv5_mobile_det'` param | ValueError "Unknown argument: det" (PaddleOCR 3.4.0 rejette ce param) |
|
||||
| 3 | `FLAGS_use_mkldnn=0` shell-level avant Python | Same crash |
|
||||
| 4 | `text_detection_model_name='PP-OCRv5_mobile_det'` | mobile_det DL OK → inference crash (same OneDNN) |
|
||||
| 5 | `ocr_version='PP-OCRv4', lang='fr'` | ValueError "No models available for language 'fr' and PP-OCRv4" |
|
||||
| 6 | PP-OCRv3 + `ocr(img, cls=True)` legacy | DeprecationWarning → TypeError sur `cls` kwarg → predict() → same crash |
|
||||
| 7 | `FLAGS_use_pir_api=0` shell + os level | Same crash |
|
||||
|
||||
### PaddleOCR 3.4.0 __init__ params inspectés
|
||||
|
||||
28 paramètres au total. **Pas** de `enable_mkldnn`, `use_pir`, ou `det`. Param de détection
|
||||
remplacé par `text_detection_model_name`. API v3.4.0 : `use_angle_cls` deprecated
|
||||
→ `use_textline_orientation=True`, `show_log` supprimé (ValueError si utilisé).
|
||||
|
||||
### Incompatibilité downgrade
|
||||
|
||||
paddlepaddle 2.6.2 existe mais **incompatible** avec PaddleOCR 3.4.0 (requires ≥3.x).
|
||||
PaddleOCR 2.x serait compatible avec paddlepaddle 2.6.2 mais API/outils complètement
|
||||
différents — non évalué dans ce bench.
|
||||
|
||||
### Conclusion
|
||||
|
||||
**PP-OCRv5 CPU = BLOCKED**. Bug upstream dans paddlepaddle CPU-only binary, aucune
|
||||
workaround applicative possible. Seules alternatives :
|
||||
|
||||
1. **paddlepaddle GPU binary** (RTX 5070 + CUDA 13.0 compatible) → bench GPU séparé
|
||||
2. **Fix upstream** paddlepaddle (PR PIR executor OneDNN)
|
||||
3. **Downgrade PaddleOCR 2.x + paddlepaddle 2.6.2** (API legacy, non testé)
|
||||
|
||||
---
|
||||
|
||||
## 3. docTR CPU — Résultats détaillés
|
||||
|
||||
- **Cold latency**: 0.776s (incl. model loading)
|
||||
- **Warm latency**: 0.717s
|
||||
- **Detections**: 139 (mot-level, agressif — fragmente "Dites", "Sortie", "de", "veille")
|
||||
- **Mémoire**: 263.2MB stable (init = peak)
|
||||
- **Qualité**: haute sur mots courts, fragmente les phrases longues
|
||||
- **Confiance**: variable (0.26→0.99), nombreux tokens <0.7
|
||||
|
||||
### Observations docTR
|
||||
|
||||
- Word-level detection = 139 items → beaucoup de fragments 1-2 lettres
|
||||
- Bonne qualité sur labels UI ("Mode", "veille", "RPA", "VWB", "Python", "proxmox")
|
||||
- Fragmente les phrases ("Sortie de veille de l'accès vocal ou appuyez..." → 12 mots isolés)
|
||||
- IP correctement détecté : "192.168.1.40:3002" (conf 0.90)
|
||||
- Faux positifs : "0", "E03", "E", "€" isolés avec conf <0.4
|
||||
|
||||
---
|
||||
|
||||
## 4. EasyOCR CPU — Résultats détaillés
|
||||
|
||||
- **Cold latency**: 4.878s (heavy model loading)
|
||||
- **Warm latency**: 4.856s
|
||||
- **Detections**: 54 (line-level, plus conservatif)
|
||||
- **Mémoire**: 0.6MB init → 156.9MB peak
|
||||
- **Qualité**: bonne sur lignes complètes, plus robuste sur phrases
|
||||
|
||||
### Observations EasyOCR
|
||||
|
||||
- Line-level detection = 54 items → phrases plus cohérentes
|
||||
- Cold start très lent (5x docTR) mais warm identique
|
||||
- Meilleur sur textes longs, moins de fragmentation
|
||||
- Peak mémoire plus élevé que docTR (156.9 vs 263.2 MB init docTR)
|
||||
|
||||
---
|
||||
|
||||
## 5. Comparaison avec baselines Mai 2026
|
||||
|
||||
> Bench Mai 2026 — image `landing_wide.png`, critère 11 items de référence
|
||||
|
||||
| Engine | Score Mai (11 ref) | Score Juillet (detections) | Latency warm | Commentaire |
|
||||
|--------|-------------------|---------------------------|--------------|-------------|
|
||||
| Tesseract | **11/11** | — (non re-benché) | — | Référence May, non retesté |
|
||||
| EasyOCR brut | 8/11 | 54 det (shot_0172) | 4.856s | Fragmente moins, score < Tesseract |
|
||||
| EasyOCR preproc | 9/11 | — | — | +1 vs brut May |
|
||||
| docTR CPU | 10/11 | 139 det (shot_0172) | 0.717s | **Meilleur rapport qualité/latence** |
|
||||
| PP-OCRv5 CPU | non testé May | BLOCKED | — | Bug PIR/OneDNN, 0 inference possible |
|
||||
|
||||
### Hierarchie CPU confirmée
|
||||
|
||||
```
|
||||
docTR CPU (0.7s, 10/11) > EasyOCR preproc (4.9s, 9/11) > EasyOCR brut (4.9s, 8/11) > PP-OCRv5 CPU (BLOCKED)
|
||||
```
|
||||
|
||||
docTR reste le **meilleur moteur OCR CPU** pour Léa en termes de latence + qualité.
|
||||
Tesseract reste le plus précis (11/11) mais sans bounding boxes exploitables.
|
||||
|
||||
---
|
||||
|
||||
## 6. Recommandations
|
||||
|
||||
1. **docTR = moteur OCR CPU de production** — latence <1s, qualité 10/11, word-level bboxes
|
||||
2. **PP-OCRv5 GPU bench = action séparée** — requiere paddlepaddle GPU binary sur RTX 5070
|
||||
3. **PaddleOCR 3.4.0 = ORPHAN** — 0 imports dans le projet, pas dans requirements.txt,
|
||||
CPU-only install sans CUDA → retirer du venv si cleanup D2 (C-MORT)
|
||||
4. **Ne pas dépendre de PaddleOCR** pour POC T1 — docTR suffisant
|
||||
5. **Bug report upstream** — paddlepaddle PIR executor OneDNN, repro: any model + CPU binary
|
||||
|
||||
---
|
||||
|
||||
## 7. Annexes
|
||||
|
||||
### A. Script bench
|
||||
|
||||
`scripts/bench_ppocrv5_cpu.py` — compare PP-OCRv5, docTR, EasyOCR sur shot_0172_full.png.
|
||||
PP-OCRv5 crash → résultats JSON avec error field.
|
||||
|
||||
### B. Résultats JSON
|
||||
|
||||
`scripts/bench_ppocrv5_results.json` — 4522 lignes, contient tous texts + bboxes pour
|
||||
docTR (139 items) et EasyOCR (54 items). PP-OCRv5 = error only.
|
||||
|
||||
### C. Machine specs
|
||||
|
||||
- CPU: Ryzen 9 9950X, 32 threads
|
||||
- RAM: 123 GB
|
||||
- GPU: RTX 5070 12GB VRAM (non utilisé — bench CPU)
|
||||
- CUDA driver: 580.159.03 / runtime 13.0
|
||||
- OS: Linux (Ubuntu)
|
||||
- paddlepaddle: 3.3.1 CPU-only (pip install)
|
||||
- PaddleOCR: 3.4.0
|
||||
- docTR: (version installée dans venv)
|
||||
- EasyOCR: (version installée dans venv)
|
||||
186
docs/CARTO_CODE_NON_BRANCHE_2026-07-02.md
Normal file
186
docs/CARTO_CODE_NON_BRANCHE_2026-07-02.md
Normal file
@@ -0,0 +1,186 @@
|
||||
# CARTO CODE NON BRANCHÉ — carte de référence wiring (2026-07-02)
|
||||
|
||||
> **But** : carte « existing-first » de référence. AVANT tout chantier/bench/proposition,
|
||||
> consulter ce doc pour savoir si une brique existe et si elle est **réellement branchée au
|
||||
> runtime**. Recadrage Dom 02/07 : « vérifier ce qui existe et non branché, c'est le BABA ».
|
||||
>
|
||||
> **Méthode** : verdict prouvé par chaîne d'imports depuis un point d'entrée actif
|
||||
> (fichier:ligne), imports lazy inclus, gates de config citées. Jamais de conclusion sur un
|
||||
> grep seul. Sources fusionnées : agent Claude « intelligence » + carto Qwen (volet 1
|
||||
> détection) + `AUDIT_CODE_MORT_2026-07-02.md` (Qwen) + vérifs ponctuelles Claude.
|
||||
>
|
||||
> **Légende** : **WIRED** (chaîne prouvée) · **GATED** (branché mais derrière flag, défaut
|
||||
> cité) · **ORPHELIN** (0 appelant runtime, recherche exhaustive) · **INCERTAIN** (non tranché,
|
||||
> raison donnée).
|
||||
>
|
||||
> **Points d'entrée actifs runtime** : `api_stream.py` (streaming 5005, `rpa-streaming`) ·
|
||||
> `run_worker.py` (worker VLM 5099) · VWB `app.py` (5002) · `web_dashboard/app.py` (5001) ·
|
||||
> `agent_chat/app.py` (5004) · `server/api_upload.py` (8000).
|
||||
|
||||
---
|
||||
|
||||
## 0. Résumé exécutif — les découvertes qui changent une décision
|
||||
|
||||
1. **Self-healing = façade morte, malgré doc « wired ».** Chaîne d'import réelle (VWB →
|
||||
`core/healing`), routes REST répondent, MAIS déclenchement **impossible** : le code teste
|
||||
`hasattr(healing_integration, 'enable_healing')` et cette méthode **n'existe nulle part**
|
||||
(`execution_integration.py:421`). `handle_execution_failure` = 0 appelant d'exécution.
|
||||
Preuve d'inertie : `logs/healing/recovery.log` = **0 octet, mtime déc. 2025**. Le pont
|
||||
manquant tient à **une méthode**, pas un module. → `PLAN_MENAGE_CODE_MORT` le classait
|
||||
« wired — NE PAS TOUCHER » : **doc fausse**.
|
||||
|
||||
2. **`core/navigation` (commit du matin `f9a053132`) = write-only.** Le handler résout le
|
||||
login et écrit `navigate_login_coords` dans `replay_state["variables"]`, mais **aucun
|
||||
consommateur** : le compilateur `_edge_to_normalized_actions` n'a pas de branche `navigate`
|
||||
et produit des coords littérales, jamais de templates `{{navigate_login_coords.x_pct}}`.
|
||||
Détail : `docs/DESIGN_NAVIGATE_COORDS_CONSUMPTION_2026-07-02.md`. Décision D1 en attente Dom.
|
||||
|
||||
3. **AutonomousPlanner : coût sans usage.** Instancié au boot d'`agent_chat` (charge LLM +
|
||||
OWL detector via `autonomous_planner.py:36`), mais **aucune route n'appelle une méthode de
|
||||
planification** — seuls des setters. Type même du « code écrit jamais invoqué ».
|
||||
|
||||
4. **PaddleOCR installé, jamais importé.** `paddleocr 3.4.0` + `paddlepaddle 3.3.1` (CPU)
|
||||
présents dans `.venv`, **0 `import paddle` dans le code**, 0 requirements, 0 deploy. Piste
|
||||
bench en cours (Qwen), pas un composant actif.
|
||||
|
||||
5. **YOLO cascade de résolution = mort.** `_resolve_by_yolo` défini
|
||||
(`resolve_engine.py:458`) + importé (`api_stream.py:6114`) mais **jamais appelé** ; aucune
|
||||
branche `yolo` dans la cascade compilée. ⚠ À NE PAS confondre avec le YOLO de `som_engine`
|
||||
(OmniParser SoM), lui **WIRED**.
|
||||
|
||||
6. **`server/api_core.py`** : blueprint Flask complet (capture/detect/embed/faiss) **jamais
|
||||
enregistré** — orphelin absent du plan ménage.
|
||||
|
||||
7. **Nos propres cartos avaient 4 erreurs** (cf. §4). Re-prouver était justifié.
|
||||
|
||||
---
|
||||
|
||||
## 1. Chaîne détection / grounding / résolution
|
||||
|
||||
| Module | Verdict | Preuve (fichier:ligne) | Remarque |
|
||||
|--------|---------|------------------------|----------|
|
||||
| `core/detection/som_engine.py` | **WIRED** | resolve_engine.py:1192 (replay) · stream_processor.py:643 (recording) · api_stream.py:1958 (temps réel) | 3 chemins indépendants, singleton thread-safe. Tire YOLO weights direct. |
|
||||
| `core/detection/omniparser_adapter.py` | **B-DORMANT** (branché lazy, fallback vide) | phase25_analyzer.py:388 · resolve_engine.py:437 · désactivé côté VWB (`_omniparser_available=False`) | Import lazy try/except, singleton. 7 zones cartographiées (§ audit Qwen). |
|
||||
| `core/detection/owl_detector.py` | **WIRED (via AutonomousPlanner) — mais planner inerte** | autonomous_planner.py:36 | Chargé au boot agent_chat pour rien (cf. §0.3). 4 méthodes internes C-MORT. |
|
||||
| `core/detection/ollama_client.py` | **WIRED partiel** | `classify_element_complete()` actif ; 5 vieilles méthodes + `check_ollama_available()` standalone = C-MORT | Duplicat VWB (D2). |
|
||||
| `_resolve_by_yolo` (resolve_engine.py:458) | **ORPHELIN** | importé api_stream.py:6114, **0 appel réel**, 0 branche cascade | ≠ YOLO de som_engine (wired). |
|
||||
| `core/grounding/bbox_parser.py` | **WIRED** | resolve_engine.py:29 | |
|
||||
| `core/grounding/smart_resize.py` | **ORPHELIN (C-MORT)** | 0 appelant prod, DETTE-007 triple impl (2 autres existent) | |
|
||||
| `core/grounding/server.py` | **WIRED** | service HTTP Flask port 8200 standalone | Upgrade C→A (Qwen). |
|
||||
| `visual_workflow_builder/.../api/ui_detection.py` | **WIRED** | VWB app.py:310 (blueprint) · fast_detector.py:117 | UI-DETR-1 du recording, modèle rfdetr RFDETRMedium, 5 endpoints `/api/ui-detection`. |
|
||||
| `core/semantic/phase25_analyzer.py` | **WIRED** | api_stream.py:7690 (route `lea_competence_persist:7435`) | |
|
||||
| `core/extraction/{field_extractor,vlm_client,role_mapper}` | **WIRED-transitif** | field_extractor ← input_handler.py:121/504/722 (lazy) · vlm_client+role_mapper ← core/navigation/__init__.py:69, action_resolver.py:109 | Le plan ménage 23/06 (« 4/5 morts ») précède navigation. |
|
||||
| `core/llm/` (ocr_extractor, extract_grid) | **WIRED** | api_stream.py:1766 · replay_engine.py:2115-2403 · resolve_engine.py:2597 | |
|
||||
| `core/navigation/` | **WIRED (boot) / write-only (fonctionnel)** | api_stream.py:440 top-level NON gardé · handler résout mais 0 consommateur coords | cf. §0.2. ⚠ import non gardé → si casse, 5005 ne boote pas (garde-fou test_navigate_wiring.py). |
|
||||
| PaddleOCR (venv) | **ORPHELIN** | 0 import, 0 requirements, 0 deploy | cf. §0.4. |
|
||||
|
||||
---
|
||||
|
||||
## 2. Modules « intelligence »
|
||||
|
||||
| Module | Verdict | Preuve (fichier:ligne) | Remarque |
|
||||
|--------|---------|------------------------|----------|
|
||||
| `core/healing/` | **ORPHELIN de fait** (importé, indéclenchable) | chaîne VWB app.py:217 → api/self_healing.py → services/self_healing_integration.py, MAIS `enable_healing` inexistant (execution_integration.py:421) ; `handle_execution_failure` 0 appelant | cf. §0.1. `logs/healing/recovery.log` vide depuis déc. 2025. |
|
||||
| `core/coaching/` | **WIRED** | VWB app.py:284-285 (blueprint) → api/coaching_sessions.py:17,22 · exec : execution_integration.py:869 · front WebSocket | REST blueprint peut-être non consommé par l'UI (front = socket.io). |
|
||||
| `core/cognition/working_memory` | **WIRED-transitif** | observe_reason_act.py:30,506 · ORALoop ← VWB execute.py:1542,2075 | Les 4 autres sous-modules cognition = MORTS (tests only). |
|
||||
| `core/learning/` (4/5) | **WIRED** | target_memory_store: resolve_engine.py:1865 + api_stream.py:5132 · continuous_learner: stream_processor.py:3147 · learning_manager: VWB learning_integration.py:36, api/workflows.py:696 · feedback_processor: execution_loop.py:317 | `versioned_store` ORPHELIN. `record_observation` = **0 appelant** (learning_manager.py:54). |
|
||||
| `core/execution/` | **WIRED massif** | observe_reason_act ← execute.py:1542 · input_handler ← execute.py:69 · dag_executor+llm_actions ← dag_execute.py:33,40 · action_executor/target_resolver/error_handler/execution_loop ← agent_chat app.py:328-340 · +transitifs | Morts : spatial_index, target_memory, workflow_runner (⚠ encore exporté par `__init__.py:10`). |
|
||||
| `core/auth/` | **GATED — défaut OFF** | api_stream.py:278-286 : import lazy SSI `RPA_AUTH_VAULT_PATH` **et** `RPA_AUTH_VAULT_PASSWORD` définis (absents par défaut). Seul lieu qui les définit : CI `.gitea/workflows/tests.yml:35` | Vault inactif en prod. TOTP dans la même chaîne gated. |
|
||||
| `core/federation/` | **WIRED manuel, write-only** | routes actives non gated : GET learning-pack/export api_stream.py:6431 · POST import :6476 | `GlobalFAISSIndex.search()` = **0 appelant**. Aucun auto-déclenchement. |
|
||||
| `core/gpu/` (2/6) | **WIRED** | device_policy ← resolve_engine.py:1750 (hot-path) · gpu_resource_manager ← agent_chat app.py:53,266 | clip_manager, ollama_manager, vram_monitor, preflight = morts. |
|
||||
| `core/embedding/` | **WIRED (lazy)** | construction CLIP/FAISS ← stream_processor.py:2560 `_ensure_initialized` (appelé process_screenshot:2804 + finalize_session:2969) · lecture web_dashboard app.py:309+ | Se déclenche au 1er screenshot / finalisation, pas au boot. |
|
||||
| `agent_chat/autonomous_planner` | **INSTANCIÉ mais INERTE** | import app.py:48, instancié :358, mais seuls appels = setters :362,367 ; 0 route de planification | cf. §0.3. Tire owl_detector pour rien. |
|
||||
| `agent_chat/urgences_orchestrator` | **WIRED** | import lazy app.py:2740, routes `/api/urgences/*` | |
|
||||
| `agent_chat/gesture_catalog` | **WIRED ×2** | agent_chat app.py:377,955 · **api_stream.py:269,3598** (hot-path replay `optimize_replay_actions`) | Pas seulement le chat. |
|
||||
| `core/validation/` | **GATED — défaut OFF** | flag `RPA_VALIDATOR_V2_ENABLED` défaut OFF (api_stream.py:91), consommé report_action_result:4924 | |
|
||||
|
||||
**WIRED confirmés (survol)** : capture, models, competences, corrections, data, graph,
|
||||
knowledge, monitoring, persistence, pipeline, system, workflow, visual, config.py,
|
||||
anonymisation (PII), matching/training (transitifs).
|
||||
**ORPHELINS confirmés** : variants, precision, supervision, interfaces (0 importeur non-test) ·
|
||||
`core/evaluation/` (consommé seulement par `tools/lea_bench*.py`, outillage CLI) ·
|
||||
`server/api_core.py` (blueprint jamais enregistré).
|
||||
|
||||
---
|
||||
|
||||
## 3. Zones GATED (flags + défaut) — activation supervisée
|
||||
|
||||
| Flag | Défaut | Effet si ON | Preuve |
|
||||
|------|--------|-------------|--------|
|
||||
| `RPA_AUTH_VAULT_PATH` + `RPA_AUTH_VAULT_PASSWORD` | absents | active `core/auth` (vault Fernet + TOTP) | api_stream.py:278-286 |
|
||||
| `RPA_VALIDATOR_V2_ENABLED` | OFF | active validation V2 (report_action_result) | api_stream.py:91 |
|
||||
| `RPA_R1_AUTO_IMPORT` | OFF | active import auto core→DB VWB (R1) | api_stream.py:~4480 (revue en cours) |
|
||||
| `RPA_AUTO_UPDATE_ENABLED` | OFF | MAJ silencieuse client (DETTE-022) | agent_v1/config.py:103 |
|
||||
| `RPA_GROUNDING_ENGINE=qwen3vl_vllm` | legacy Qwen2.5-VL | grounder Qwen3-VL (override DGX runtime) | resolve_engine.py:1001-1007 |
|
||||
|
||||
---
|
||||
|
||||
## 4. Divergences corrigées avec les docs existants
|
||||
|
||||
1. **`core/healing` : doc `PLAN_MENAGE` = « wired, NE PAS TOUCHER » → FAUX.** Indéclenchable
|
||||
(`enable_healing` fantôme, log vide déc. 2025). Le pont est à une méthode près.
|
||||
2. **`feedback_processor` : CARTO 16/06 = ORPHELIN → FAUX.** Instancié à chaque ExecutionLoop
|
||||
(execution_loop.py:317).
|
||||
3. **`core/cognition` : CARTO 16/06 = tout orphelin → FAUX pour working_memory** (vivant au
|
||||
runtime VWB via observe_reason_act).
|
||||
4. **`core/extraction` : plan ménage « 4/5 morts » → périmé.** vlm_client + role_mapper
|
||||
branchés via `core/navigation` (postérieur au doc).
|
||||
|
||||
Upgrades C→A/B confirmés par Qwen : autonomous_planner (C→A, mais inerte cf. §0.3),
|
||||
seeclick_adapter (C→B), grounding/server.py (C→A), get_grounding_profile (C→A).
|
||||
|
||||
---
|
||||
|
||||
## 5. Code mort candidat suppression → voir `AUDIT_CODE_MORT_2026-07-02.md`
|
||||
|
||||
Résumé : **8 C-MORT** (~843 lignes, ex. deploy_windows.py, smart_resize.py, 7 config classes
|
||||
dépréciées, agent_chat 410 endpoints) · **5 B-ORPHELIN** (à conserver, projections) · **4
|
||||
duplicats** (décision Dom). Suppression = GO Dom par lot, worktree isolé + tests après chaque
|
||||
lot. ⚠ Prudence renforcée vu les 4 erreurs de doc du §4 : re-prouver chaque item avant
|
||||
suppression.
|
||||
|
||||
---
|
||||
|
||||
## 6. Cascade de résolution UI (`resolve_engine.py`) — ordre RÉEL prouvé
|
||||
|
||||
Point d'entrée unique au replay : le client Léa (`executor.py:2847`, **`strict_mode=True` hardcodé**
|
||||
:2870) → route `resolve_target` (`api_stream.py:6131`) → `_resolve_target_sync`
|
||||
(`resolve_engine.py:1804`). `replay_engine.py` ne résout pas (il construit le target_spec).
|
||||
|
||||
**Ordre réel au replay (mode strict VLM-first, `resolve_engine.py:1957`)** :
|
||||
```
|
||||
0. Mémoire persistante (replay_memory.memory_lookup:1869) — hit → skip toute la cascade
|
||||
0c. dialog_button → OCR seul (1920-1952)
|
||||
── strict VLM-first (1957) ──
|
||||
S0a. Grounding VLM (_resolve_by_grounding:2019) si by_text_source ∈ {ocr, vlm}
|
||||
S0b. Template matching icônes (2057) sinon
|
||||
S0.5 OCR direct (_resolve_by_ocr_text:2105) si by_text
|
||||
S1. VLM Quick Find (_vlm_quick_find:2158)
|
||||
S1.5 SoM + VLM (_resolve_by_som:2207)
|
||||
S2. Template matching fallback (2238)
|
||||
S3. STOP replay resolved=False (2283)
|
||||
```
|
||||
Note : grounding VLM (S0a) et VLM Quick Find (S1) sont **deux appels VLM distincts**.
|
||||
|
||||
**Statut resolvers** : `_resolve_by_grounding`, `_resolve_by_template_matching`,
|
||||
`_resolve_by_ocr_text`, `_vlm_quick_find`, `_resolve_by_som`, `replay_memory` = **WIRED**
|
||||
(preuves lignes ci-dessus). Grounder Qwen3-VL : bascule dans `_resolve_by_grounding:1006`
|
||||
(flag `RPA_GROUNDING_ENGINE=qwen3vl_vllm`, sinon legacy Qwen2.5-VL) — change modèle/endpoint/
|
||||
prompt/parser, pas le flux.
|
||||
|
||||
**3 branches MORTES dans la cascade** :
|
||||
- `_resolve_by_yolo` (:458) — importé api_stream.py:6114, **0 appel réel**. ORPHELIN.
|
||||
- **Vérification CLIP** (:1972-2008) — **dead gate** : lit `target_spec["clip_embedding"]`
|
||||
qui n'est **jamais peuplé** dans tout `agent_v0/` → branche jamais exécutée.
|
||||
- **V4 pré-compilé** (`_resolve_with_precompiled_order:1601`, ordre figé `["ocr","template",
|
||||
"vlm"]`) — **WIRED mais dormant en replay normal** : alimenté uniquement par l'endpoint
|
||||
`/replay/plan` (`execution_plan_runner.py:173`), jamais par le flux VWB→Léa.
|
||||
|
||||
**Verdict README « OCR→template→YOLO→VLM » = FAUX** : (1) YOLO mort, (2) l'ordre est
|
||||
VLM-first, (3) la séquence `ocr,template,vlm` n'existe que dans le V4 dormant.
|
||||
|
||||
## 7. Zones restantes non re-vérifiées (honnêteté)
|
||||
|
||||
- `core/analytics/` : ~13 sous-modules orphelins non re-vérifiés un par un (conforme doc).
|
||||
- Reste couvert : chaîne détection/grounding/résolution + intelligence = prouvés. **Carto
|
||||
considérée complète sur le périmètre runtime actif.**
|
||||
474
docs/PLAN_D1_NAVIGATE_COORDS_IMPLEMENTATION_2026-07-02.md
Normal file
474
docs/PLAN_D1_NAVIGATE_COORDS_IMPLEMENTATION_2026-07-02.md
Normal file
@@ -0,0 +1,474 @@
|
||||
# D1 — NavigateCoords Implementation Plan
|
||||
|
||||
**Auteur**: Qwen
|
||||
**Date**: 2026-07-02
|
||||
**Statut**: EN ATTENTE GO Dom/Claude — Option 1 vs Option 2
|
||||
**Référence**: `docs/DESIGN_NAVIGATE_COORDS_CONSUMPTION_2026-07-02.md` (3 gaps documentés)
|
||||
|
||||
---
|
||||
|
||||
## Résumé des gaps à résoudre
|
||||
|
||||
| Gap | Description | Fichier:Ligne | Preuve |
|
||||
|-----|-------------|---------------|--------|
|
||||
| A | Compiler bake floats littéraux — aucun template pour coords | `replay_engine.py:1821-1833` | `x_pct = px` (literal float) |
|
||||
| B | Zéro consommateur de `navigate_*_coords` variables | `replay_engine.py` + `api_stream.py` | grep: 0 occurrences |
|
||||
| C | `_edge_to_normalized_actions` pas de branche `navigate` → `[]` | `replay_engine.py:1951-1953` | `else: return []` |
|
||||
|
||||
---
|
||||
|
||||
## Infrastructure existante (non-modifiée)
|
||||
|
||||
### `_ALLOWED_ACTION_TYPES` (replay_engine.py:35-50)
|
||||
|
||||
`"navigate"` est **déjà présent** (ligne 44). La validation de sécurité l'accepte déjà.
|
||||
|
||||
### `_SERVER_SIDE_ACTION_TYPES` (replay_engine.py:55-64)
|
||||
|
||||
`"navigate"` est **déjà présent** (ligne 59). Le dispatch loop le traite comme serveur-side.
|
||||
|
||||
### `_handle_navigate_action` (core/navigation/__init__.py:24-113)
|
||||
|
||||
Handler **déjà câblé** dans api_stream.py (ligne 4459-4467). Résout screenshot, OCR/VLM, stocke coords dans `replay_state["variables"]`.
|
||||
|
||||
### `_resolve_runtime_vars` (replay_engine.py:2031-2045)
|
||||
|
||||
Resolver **existant** pour `{{var.field}}` — récursif sur dict/list/str. Retourne `str(value)` au niveau leaf → float→string conversion nécessaire pour coords.
|
||||
|
||||
---
|
||||
|
||||
## OPTION 1 — Compiler Injection (~2h)
|
||||
|
||||
### Principe
|
||||
|
||||
Ajouter une branche `navigate` dans `_edge_to_normalized_actions` + ajouter `coords_var` mechanism dans les branches `mouse_click`/`text_input` + runtime resolution + float conversion.
|
||||
|
||||
### Patch P1-A : Branche navigate dans `_edge_to_normalized_actions`
|
||||
|
||||
**Fichier**: `agent_v0/server_v1/replay_engine.py`
|
||||
**Position**: Après `elif action_type == "llm_generate":` (ligne 1949), avant `else:` (ligne 1951)
|
||||
|
||||
```python
|
||||
elif action_type == "navigate":
|
||||
normalized["type"] = "navigate"
|
||||
normalized["parameters"] = {
|
||||
"login_field": action_params.get("login_field", "login"),
|
||||
"password_field": action_params.get("password_field", "password"),
|
||||
"submit_button": action_params.get("submit_button", "submit"),
|
||||
"login_coords_var": action_params.get("login_coords_var", "navigate_login_coords"),
|
||||
"password_coords_var": action_params.get("password_coords_var", "navigate_password_coords"),
|
||||
"submit_coords_var": action_params.get("submit_coords_var", "navigate_submit_coords"),
|
||||
}
|
||||
return [normalized]
|
||||
```
|
||||
|
||||
**Justification**: Action serveur-side — pas besoin de `x_pct/y_pct` ni `target_spec`. Le handler `_handle_navigate_action` lit `parameters` pour config, résout coords au runtime.
|
||||
|
||||
**Impact**: Gap C résolu. Navigate edge → 1 normalized action au lieu de `[]`.
|
||||
|
||||
### Patch P1-B : coords_var dans branches mouse_click / text_input
|
||||
|
||||
**Fichier**: `agent_v0/server_v1/replay_engine.py`
|
||||
**Position**: Lignes 1844-1856 (branches click et type)
|
||||
|
||||
**mouse_click** (ligne 1844-1848) — AVANT :
|
||||
|
||||
```python
|
||||
if action_type == "mouse_click":
|
||||
normalized["type"] = "click"
|
||||
normalized["x_pct"] = x_pct
|
||||
normalized["y_pct"] = y_pct
|
||||
normalized["button"] = action_params.get("button", "left")
|
||||
```
|
||||
|
||||
**mouse_click** — APRES :
|
||||
|
||||
```python
|
||||
if action_type == "mouse_click":
|
||||
normalized["type"] = "click"
|
||||
coords_var = action_params.get("coords_var")
|
||||
if coords_var:
|
||||
normalized["x_pct"] = f"{{{{{coords_var}.x_pct}}}}"
|
||||
normalized["y_pct"] = f"{{{{{coords_var}.y_pct}}}}"
|
||||
normalized["coords_var"] = coords_var
|
||||
else:
|
||||
normalized["x_pct"] = x_pct
|
||||
normalized["y_pct"] = y_pct
|
||||
normalized["button"] = action_params.get("button", "left")
|
||||
```
|
||||
|
||||
**text_input** (ligne 1850-1856) — AVANT :
|
||||
|
||||
```python
|
||||
elif action_type == "text_input":
|
||||
normalized["type"] = "type"
|
||||
text = action_params.get("text", "")
|
||||
text = _substitute_variables(text, params, action_params.get("defaults", {}))
|
||||
normalized["text"] = text
|
||||
normalized["x_pct"] = x_pct
|
||||
normalized["y_pct"] = y_pct
|
||||
```
|
||||
|
||||
**text_input** — APRES :
|
||||
|
||||
```python
|
||||
elif action_type == "text_input":
|
||||
normalized["type"] = "type"
|
||||
text = action_params.get("text", "")
|
||||
text = _substitute_variables(text, params, action_params.get("defaults", {}))
|
||||
normalized["text"] = text
|
||||
coords_var = action_params.get("coords_var")
|
||||
if coords_var:
|
||||
normalized["x_pct"] = f"{{{{{coords_var}.y_pct}}}}"
|
||||
normalized["y_pct"] = f"{{{{{coords_var}.y_pct}}}}"
|
||||
normalized["coords_var"] = coords_var
|
||||
else:
|
||||
normalized["x_pct"] = x_pct
|
||||
normalized["y_pct"] = y_pct
|
||||
```
|
||||
|
||||
**⚠️ BUG dans le draft ci-dessus**: `x_pct` template pour text_input doit être `{{coords_var.x_pct}}` (pas `.y_pct` deux fois). Version corrigée :
|
||||
|
||||
```python
|
||||
elif action_type == "text_input":
|
||||
normalized["type"] = "type"
|
||||
text = action_params.get("text", "")
|
||||
text = _substitute_variables(text, params, action_params.get("defaults", {}))
|
||||
normalized["text"] = text
|
||||
coords_var = action_params.get("coords_var")
|
||||
if coords_var:
|
||||
normalized["x_pct"] = f"{{{{{coords_var}.x_pct}}}}"
|
||||
normalized["y_pct"] = f"{{{{{coords_var}.y_pct}}}}"
|
||||
normalized["coords_var"] = coords_var
|
||||
else:
|
||||
normalized["x_pct"] = x_pct
|
||||
normalized["y_pct"] = y_pct
|
||||
```
|
||||
|
||||
**Justification**: `coords_var` = mécanisme minimal pour déclarer "ces coords viennent de la variable navigate_login_coords". Template strings résolus au runtime par `_resolve_runtime_vars`.
|
||||
|
||||
**Impact**: Gap A résolu. Gap B partiellement — les actions click/type deviennent consommatrices via `coords_var`.
|
||||
|
||||
### Patch P1-C : Coercion helper après resolver existant
|
||||
|
||||
**⚠️ CORRECTION IMPORTANT (2026-07-02 14:45)** : Le plan original sur-dimensionnait P1-C en proposant un second resolver runtime. **Codex a correctement identifié** que `_resolve_runtime_vars` est **déjà appelé** dans la boucle dispatch à `api_stream.py:4331-4335` :
|
||||
|
||||
```python
|
||||
# L4331-4335 (EXISTANT, ne pas modifier)
|
||||
if owning_replay is not None:
|
||||
runtime_vars = owning_replay.get("variables") or {}
|
||||
if runtime_vars:
|
||||
action = _resolve_runtime_vars(action, runtime_vars)
|
||||
```
|
||||
|
||||
**Besoin réel = coercion helper uniquement** : `_resolve_runtime_vars` résout les templates `{{var.field}}` mais retourne `str(value)` au leaf → `{{navigate_login_coords.x_pct}}` devient `"0.15"` (string), pas `0.15` (float). Le client attend des floats pour x_pct/y_pct.
|
||||
|
||||
**Fichier**: `agent_v0/server_v1/api_stream.py`
|
||||
**Position**: Juste après la ligne 4335 (`action = _resolve_runtime_vars(action, runtime_vars)`)
|
||||
|
||||
**Politique coords_var non résolu** : Skip + pause supervisée (AGREED Qwen/Codex). Jamais fallback 0.0/0.0 — un clic sur coords (0,0) = top-left = potentiellement dangereux.
|
||||
|
||||
```python
|
||||
def _coerce_action_coords(action: dict) -> dict:
|
||||
"""Cast x_pct/y_pct en float après template resolution par _resolve_runtime_vars.
|
||||
|
||||
Politique : si string non convertible ou template encore present → skip + pause_for_human.
|
||||
Idempotent sur les actions qui ont déjà des floats (mouse_click existant).
|
||||
|
||||
Appelé APRÈS _resolve_runtime_vars dans la boucle dispatch (api_stream.py ~L4335).
|
||||
"""
|
||||
for key in ("x_pct", "y_pct"):
|
||||
val = action.get(key)
|
||||
if val is None:
|
||||
continue
|
||||
if isinstance(val, float):
|
||||
continue # déjà float, idempotent
|
||||
if isinstance(val, str):
|
||||
# Template encore présent = non résolu par _resolve_runtime_vars
|
||||
if val.startswith("{{") and val.endswith("}}"):
|
||||
action["_skip_reason"] = f"coords_var non résolu: {key}={val}"
|
||||
action["type"] = "pause_for_human"
|
||||
action["safety_level"] = "high"
|
||||
return action
|
||||
try:
|
||||
action[key] = float(val)
|
||||
except (ValueError, TypeError):
|
||||
action["_skip_reason"] = f"coords invalide: {key}={val}"
|
||||
action["type"] = "pause_for_human"
|
||||
action["safety_level"] = "high"
|
||||
return action
|
||||
return action
|
||||
```
|
||||
|
||||
**Appel dans la boucle dispatch** (à insérer après L4335) :
|
||||
|
||||
```python
|
||||
# L4335 existant: action = _resolve_runtime_vars(action, runtime_vars)
|
||||
# NOUVEAU — coercion coords après resolver existant
|
||||
action = _coerce_action_coords(action)
|
||||
```
|
||||
|
||||
**Justification**: `_resolve_runtime_vars` (existant à L4335) résout les templates → strings. `_coerce_action_coords` cast les strings en floats. Si template non résolu ou conversion impossible → pause_for_human (fail-safe), jamais fallback coords (0,0). Idempotent sur actions existantes (floats déjà présents).
|
||||
|
||||
**Risques additionnels identifiés** :
|
||||
1. **Résolution partielle** : si seul y_pct est résolu mais x_pct reste template → `_coerce_action_coords` convertit pause_for_human (safe stop, pas top-left click).
|
||||
2. **Idempotence** : si action existante a déjà x_pct=0.35 (float) → helper passe sans modification (isinstance(float) → continue).
|
||||
3. **Race condition** : variables dict partagé entre navigate handler et dispatch loop — mais BFS séquentiel garantit que navigate stocke AVANT click consomme.
|
||||
|
||||
**Impact**: Gap B résolu — les coords navigate sont consommées au runtime par click/type, avec coercion + fail-safe.
|
||||
|
||||
### Patch P1-D : VWB YAML schema — coords_var field
|
||||
|
||||
**Fichier**: Schema VWB (workflow YAML format) — documentation
|
||||
**Nature**: Ajout d'un champ `coords_var` dans `action.parameters` pour les steps `mouse_click` et `text_input`
|
||||
|
||||
Exemple de workflow YAML avec navigate + click consommateur :
|
||||
|
||||
```yaml
|
||||
steps:
|
||||
- id: s1
|
||||
action:
|
||||
type: navigate
|
||||
parameters:
|
||||
login_coords_var: navigate_login_coords
|
||||
password_coords_var: navigate_password_coords
|
||||
to_node: n2
|
||||
|
||||
- id: s2
|
||||
action:
|
||||
type: mouse_click
|
||||
parameters:
|
||||
coords_var: navigate_login_coords
|
||||
button: left
|
||||
to_node: n3
|
||||
|
||||
- id: s3
|
||||
action:
|
||||
type: text_input
|
||||
parameters:
|
||||
coords_var: navigate_password_coords
|
||||
text: "${password}"
|
||||
to_node: n4
|
||||
```
|
||||
|
||||
**Justification**: Le VWB builder doit savoir qu'un click peut référencer une variable coords au lieu de fournir des pixels littéraux. C'est un changement de schema minimal (1 champ optionnel).
|
||||
|
||||
---
|
||||
|
||||
## OPTION 2 — Declarative YAML Templates (~4h)
|
||||
|
||||
### Principe
|
||||
|
||||
Introduire un `coords_template` field dans les step definitions + un resolver typed qui extrait directement les floats du dict variables sans passage string→float.
|
||||
|
||||
### Patch P2-A : Même branche navigate (identique à P1-A)
|
||||
|
||||
Inchangé — Gap C résolu par la même branche.
|
||||
|
||||
### Patch P2-B : coords_template field + typed resolver
|
||||
|
||||
**Fichier**: `agent_v0/server_v1/replay_engine.py`
|
||||
|
||||
Nouvelle fonction `_resolve_coords_template` :
|
||||
|
||||
```python
|
||||
def _resolve_coords_template(
|
||||
coords_template: str,
|
||||
variables: Dict[str, Any],
|
||||
) -> Optional[Dict[str, float]]:
|
||||
"""Résoudre un coords_template en dict {x_pct, y_pct, bbox_pct} depuis variables.
|
||||
|
||||
Retourne None si la variable n'existe pas ou si les champs ne sont pas floats.
|
||||
Pas de conversion string→float : les valeurs doivent déjà être des floats.
|
||||
"""
|
||||
coords_dict = variables.get(coords_template)
|
||||
if not coords_dict or not isinstance(coords_dict, dict):
|
||||
return None
|
||||
|
||||
x_pct = coords_dict.get("x_pct")
|
||||
y_pct = coords_dict.get("y_pct")
|
||||
|
||||
if not isinstance(x_pct, (int, float)) or not isinstance(y_pct, (int, float)):
|
||||
logger.warning(
|
||||
f"coords_template {coords_template}: x_pct/y_pct not numeric "
|
||||
f"(x_pct={x_pct}, y_pct={y_pct})"
|
||||
)
|
||||
return None
|
||||
|
||||
result = {"x_pct": float(x_pct), "y_pct": float(y_pct)}
|
||||
|
||||
bbox_pct = coords_dict.get("bbox_pct")
|
||||
if bbox_pct:
|
||||
result["bbox_pct"] = bbox_pct # tuple, pas de conversion
|
||||
|
||||
return result
|
||||
```
|
||||
|
||||
### Patch P2-C : Branches mouse_click / text_input avec coords_template
|
||||
|
||||
```python
|
||||
if action_type == "mouse_click":
|
||||
normalized["type"] = "click"
|
||||
coords_template = action_params.get("coords_template")
|
||||
if coords_template:
|
||||
normalized["coords_template"] = coords_template
|
||||
# x_pct/y_pct résolus au runtime par _resolve_coords_template
|
||||
normalized["x_pct"] = None # placeholder → resolved at runtime
|
||||
normalized["y_pct"] = None
|
||||
else:
|
||||
normalized["x_pct"] = x_pct
|
||||
normalized["y_pct"] = y_pct
|
||||
normalized["button"] = action_params.get("button", "left")
|
||||
```
|
||||
|
||||
### Patch P2-D : Runtime resolution typed dans dispatch loop
|
||||
|
||||
```python
|
||||
# --- Résolution coords_template (typed, no string→float) ---
|
||||
if action.get("coords_template"):
|
||||
variables = owning_replay.replay_state.get("variables", {})
|
||||
from agent_v0.server_v1.replay_engine import _resolve_coords_template
|
||||
coords = _resolve_coords_template(action["coords_template"], variables)
|
||||
if coords:
|
||||
action["x_pct"] = coords["x_pct"]
|
||||
action["y_pct"] = coords["y_pct"]
|
||||
if coords.get("bbox_pct"):
|
||||
action["bbox_pct"] = coords["bbox_pct"]
|
||||
del action["coords_template"] # résolu, pas besoin de garder le ref
|
||||
else:
|
||||
logger.warning(
|
||||
f"coords_template {action['coords_template']} unresolved — skipping action"
|
||||
)
|
||||
# skip → next action
|
||||
```
|
||||
|
||||
**Avantage Option 2**: Pas de string→float conversion. Les coords restent des floats du navigate handler au click handler. Plus clean, plus safe.
|
||||
|
||||
**Inconvénient Option 2**: `_resolve_coords_template` est une nouvelle fonction + le `x_pct = None` placeholder nécessite que le client tolère les None temporairement (ou que la resolution se fasse AVANT transmission). Le schema VWB doit documenter `coords_template` comme champ alternatif à `by_position`.
|
||||
|
||||
---
|
||||
|
||||
## Comparative Table — Patches
|
||||
|
||||
| Aspect | Option 1 (Compiler Injection) | Option 2 (YAML Templates) |
|
||||
|--------|-------------------------------|---------------------------|
|
||||
| **Gap C fix** | Identique (branche navigate) | Identique (branche navigate) |
|
||||
| **Gap A fix** | Template strings `{{var.field}}` dans x_pct/y_pct | `x_pct = None` placeholder + typed resolver |
|
||||
| **Gap B fix** | `_resolve_runtime_vars` + float conversion | `_resolve_coords_template` typed (no conversion) |
|
||||
| **String→float** | Nécessaire (design smell) | Aucun (floats passent directement) |
|
||||
| **Nouvelles fonctions** | 0 (reuse `_resolve_runtime_vars`) | 1 (`_resolve_coords_template`) |
|
||||
| **Schema VWB** | 1 champ `coords_var` | 1 champ `coords_template` |
|
||||
| **Temps implémentation** | ~2h | ~4h |
|
||||
| **Extensibilité** | Limitée (coupling navigate→click) | Extensible (any coords source) |
|
||||
| **Risque POC** | Minimal | Moyen (placeholder None + typed resolver) |
|
||||
| **Migration post-POC** | Option 2 refactor needed | Already Option 2 |
|
||||
|
||||
---
|
||||
|
||||
## Test Rouge Proposal
|
||||
|
||||
### Test TR-1 : Prouve Gap C (navigate → [])
|
||||
|
||||
```python
|
||||
def test_edge_to_normalized_actionsnavigate_returns_empty():
|
||||
"""Gap C: _edge_to_normalized_actions retourne [] pour navigate type."""
|
||||
from agent_v0.server_v1.replay_engine import _edge_to_normalized_actions
|
||||
|
||||
edge = WorkflowEdge(
|
||||
edge_id="e1",
|
||||
from_node="n1",
|
||||
to_node="n2",
|
||||
action=ActionSpec(
|
||||
type="navigate",
|
||||
parameters={"login_coords_var": "navigate_login_coords"},
|
||||
),
|
||||
)
|
||||
result = _edge_to_normalized_actions(edge, {})
|
||||
# BEFORE fix: result == [] (Gap C)
|
||||
# AFTER fix: result == [{"type": "navigate", "parameters": {...}}]
|
||||
assert len(result) >= 1, "navigate must produce at least 1 normalized action"
|
||||
assert result[0]["type"] == "navigate"
|
||||
```
|
||||
|
||||
### Test TR-2 : Prouve coords_var resolution (Option 1)
|
||||
|
||||
```python
|
||||
def test_coords_var_runtime_resolution():
|
||||
"""Option 1: coords_var template resolved + float conversion."""
|
||||
from agent_v0.server_v1.replay_engine import _resolve_runtime_vars
|
||||
|
||||
variables = {
|
||||
"navigate_login_coords": {
|
||||
"x_pct": 0.15,
|
||||
"y_pct": 0.35,
|
||||
"method": "ocr+vlm",
|
||||
}
|
||||
}
|
||||
action = {
|
||||
"type": "click",
|
||||
"x_pct": "{{navigate_login_coords.x_pct}}",
|
||||
"y_pct": "{{navigate_login_coords.y_pct}}",
|
||||
"coords_var": "navigate_login_coords",
|
||||
}
|
||||
resolved = _resolve_runtime_vars(action, variables)
|
||||
# resolved["x_pct"] == "0.15" (string) → needs float conversion
|
||||
assert resolved["x_pct"] == "0.15" # string from resolver
|
||||
assert float(resolved["x_pct"]) == 0.15 # conversion works
|
||||
```
|
||||
|
||||
### Test TR-3 : Prouve coords_template typed resolution (Option 2)
|
||||
|
||||
```python
|
||||
def test_coords_template_typed_resolution():
|
||||
"""Option 2: coords_template returns floats directly, no conversion."""
|
||||
from agent_v0.server_v1.replay_engine import _resolve_coords_template
|
||||
|
||||
variables = {
|
||||
"navigate_login_coords": {
|
||||
"x_pct": 0.15,
|
||||
"y_pct": 0.35,
|
||||
"method": "ocr+vlm",
|
||||
}
|
||||
}
|
||||
coords = _resolve_coords_template("navigate_login_coords", variables)
|
||||
assert coords is not None
|
||||
assert isinstance(coords["x_pct"], float) # float, not string
|
||||
assert coords["x_pct"] == 0.15
|
||||
assert coords["y_pct"] == 0.35
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## BFS Ordonnancement — Risque scheduling
|
||||
|
||||
Le dispatch loop (`api_stream.py:get_next_action`) traite les actions séquentiellement par path BFS. Navigate est serveur-side → traité en boucle interne avant transmission. Click/type consommant coords_var/template sont visuels → transmis au client.
|
||||
|
||||
**Flows correct**:
|
||||
1. BFS traverse edge navigate → normalized action `type=navigate`
|
||||
2. Loop interne: `_handle_navigate_action` → stocke coords dans variables
|
||||
3. BFS traverse edge click → normalized action avec `coords_var`
|
||||
4. Loop: resolution runtime → float conversion → transmission client
|
||||
|
||||
**Risque**: Si le BFS ordonnance le click AVANT le navigate (par ex. edges parallèles), coords_var sera unresolved → fallback 0.0/0.0.
|
||||
|
||||
**Mitigation**: VWB builder doit garantir que navigate edge précède click consommateur dans le path topologique. C'est une contrainte de schema, pas un bug runtime.
|
||||
|
||||
---
|
||||
|
||||
## Decision Matrix
|
||||
|
||||
| Critère | Option 1 | Option 2 | Recommandation POC |
|
||||
|---------|----------|----------|--------------------|
|
||||
| Temps | 2h | 4h | **Option 1** |
|
||||
| Risque runtime | string→float edge | None placeholder | Option 1 (conversion simple) |
|
||||
| Extensibilité | Limitée | Extensible | Option 1 pour POC, migration Option 2 post-POC |
|
||||
| Code mort risk | 0 nouvelles fonctions | 1 nouvelle fonction | Option 1 |
|
||||
| Test coverage | TR-1 + TR-2 | TR-1 + TR-3 | Option 1 |
|
||||
|
||||
**Recommandation Qwen**: Option 1 pour POC (2h, minimal risk, reuse infrastructure existante). Migration Option 2 post-POC si scaling multi-coords est confirmé (search, dossier).
|
||||
|
||||
**GO requis**: Dom + Claude (décision D1).
|
||||
|
||||
---
|
||||
|
||||
*Qwen — plan implémentation D1 déposé, awaiting GO.*
|
||||
@@ -58,7 +58,7 @@ Causes racines (au-delà du « c'est débranché ») :
|
||||
|
||||
Tranchées le 23/06 (`DECISIONS_PRODUIT_EN_ATTENTE_2026-06-23.md`) → **on exécute, on ne re-décide pas** :
|
||||
- **F2-1/F14-1** : rejeu intelligent = **OUI, prérequis** (consulter le fonds appris, pas de coords figées).
|
||||
- **F1-1** : critère de fusion = **signature de trajectoire** (create-or-update).
|
||||
- **F1-1** : critère de fusion = **signature de trajectoire**. ~~create-or-update~~ → **create-or-skip** (révisé Dom 2026-07-02 : un ré-apprentissage ne doit PAS écraser un workflow validé par revue humaine ; la 1ʳᵉ version importée fait foi. Refresh explicite = chantier séparé si besoin. Implémenté ainsi dans `learned_workflow_bridge.py`, cf. `CARTO_CODE_NON_BRANCHE_2026-07-02.md`).
|
||||
- **F9-1** : **DB = vérité**, JSON = échange ; métrique = workflows rejouables validés.
|
||||
- **F6-1** : mutualisation **cross + intra-clinique** (fédération anonymisée dans le périmètre + lever silo `machine_id`).
|
||||
|
||||
|
||||
135
docs/QG_REVIEW_D1_NAVIGATE_COORDS_2026-07-02.md
Normal file
135
docs/QG_REVIEW_D1_NAVIGATE_COORDS_2026-07-02.md
Normal file
@@ -0,0 +1,135 @@
|
||||
# QG Review Framework — D1 NavigateCoords Patch
|
||||
|
||||
**Auteur**: Qwen
|
||||
**Date**: 2026-07-02
|
||||
**Statut**: EN ATTENTE patch Codex
|
||||
**Scope**: Review du patch D1 (Option 1 — Compiler Injection) produit par Codex
|
||||
|
||||
---
|
||||
|
||||
## Baseline test coverage (pré-patch)
|
||||
|
||||
| Fichier | Classes | Tests | Rôle |
|
||||
|---------|---------|-------|------|
|
||||
| `test_navigate_handler_e2e.py` | 4 | 8 | Handler mock — nominal, OCR miss, no screenshot, never-fail |
|
||||
| `test_navigate_wiring.py` | 4 | 5 | Import/wiring non-regression |
|
||||
| `test_action_resolver.py` | 6 | 10 | NavigateCoords, NavigateResult, grounded_to_coords, navigate_login |
|
||||
| `test_coords_consumption_gap.py` | 3 | 10 | **GAP DOCUMENTATION** — résolution viable, compiler gap, navigate→[] |
|
||||
| **Total** | **17** | **33** | |
|
||||
|
||||
**Tests critiques à mettre à jour après D1 patch**:
|
||||
- `test_coords_consumption_gap.py::test_navigate_action_type_unknown` — affirme actuellement `actions == []`; doit affirmer `len(actions) >= 1` et `actions[0]["type"] == "navigate"` après D1
|
||||
- `test_coords_consumption_gap.py::TestCompilerGapLiteralFloats` — 4 tests documentant le gap literal-floats; après D1, les tests coords_var doivent affirmer templates strings ARE produites quand coords_var présent
|
||||
|
||||
**Point d'insertion exact D1**:
|
||||
- Fichier: `replay_engine.py`
|
||||
- Entre `elif action_type == "llm_generate"` (retourne `[normalized]` ~L1949) et `else:` clause (~L1953)
|
||||
- Navigate branch: `elif action_type == "navigate"` → `normalized["type"] = "navigate"` + parameters dict → `return [normalized]`
|
||||
|
||||
**P1-C root cause**:
|
||||
- `_resolve_runtime_vars_in_str` (L2025): `return str(value)` — tout {{var.field}} résolu devient string "0.35" pas float 0.35
|
||||
- Coercion helper `_coerce_action_coords` doit agir APRÈS `_resolve_runtime_vars` (L4335), AVANT `type_ = action.get("type")` (L4337)
|
||||
|
||||
---
|
||||
|
||||
## Critères de review — Checklist
|
||||
|
||||
### C1 : Branche navigate dans `_edge_to_normalized_actions` (Gap C)
|
||||
|
||||
| # | Critère | GO | NOGO |
|
||||
|---|---------|----|------|
|
||||
| C1-1 | Branche `elif action_type == "navigate"` ajoutée entre `llm_generate` (L1949) et `else` (L1951) | Present, position correcte | Absente ou mal positionnée |
|
||||
| C1-2 | `normalized["type"] = "navigate"` | Oui | Type incorrect |
|
||||
| C1-3 | Parameters dict avec `login_coords_var`, `password_coords_var`, `submit_coords_var` | Noms exacts, valeurs default | Noms divergent ou absents |
|
||||
| C1-4 | Retourne `[normalized]` (1 action serveur-side) | `[normalized]` | `[]` ou autre |
|
||||
| C1-5 | Test TR-1 : `test_navigate_action_type_unknown` mis à jour — affirme `len(result) >= 1` et `result[0]["type"] == "navigate"` | Test updated + passes | Test non mis à jour ou fails |
|
||||
|
||||
### C2 : coords_var dans branches mouse_click / text_input (Gap A)
|
||||
|
||||
| # | Critère | GO | NOGO |
|
||||
|---|---------|----|------|
|
||||
| C2-1 | `coords_var = action_params.get("coords_var")` check dans mouse_click | Present | Absent |
|
||||
| C2-2 | Si coords_var → `x_pct = f"{{{{{coords_var}.x_pct}}}"` et `y_pct = f"{{{{{coords_var}.y_pct}}}"` | Template strings correctes | Syntaxe template incorrecte ou .y_pct pour x_pct |
|
||||
| C2-3 | Si coords_var absent → literal floats comme avant (fallback existant) | Branch else intacte | Branch else modifiée ou supprimée |
|
||||
| C2-4 | `normalized["coords_var"] = coords_var` ajouté pour traçabilité | Oui | Absent |
|
||||
| C2-5 | Même mécanisme dans text_input branch | Identique à mouse_click | Absent ou divergent |
|
||||
| C2-6 | BUG vérifié : text_input x_pct template = `{{coords_var.x_pct}}` (pas `.y_pct` deux fois) | Correct | y_pct en double |
|
||||
|
||||
### C3 : `_coerce_action_coords()` helper (Gap B / P1-C)
|
||||
|
||||
| # | Critère | GO | NOGO |
|
||||
|---|---------|----|------|
|
||||
| C3-1 | Helper défini dans api_stream.py (pas replay_engine.py) | api_stream.py | Autre fichier |
|
||||
| C3-2 | Appel APRÈS `_resolve_runtime_vars` (L4335), AVANT `type_ = action.get("type")` (L4337) | Position correcte | Avant resolver ou après type_ check |
|
||||
| C3-3 | float pass-through : `isinstance(val, float) → continue` | Idempotent sur actions existantes | Pas de float check → conversion inutile |
|
||||
| C3-4 | string→float : `try: action[key] = float(val)` | Conversion correcte | Pas de try/except → crash possible |
|
||||
| C3-5 | Template non résolu → pause_for_human (pas fallback 0.0/0.0) | `val.startswith("{{") and val.endswith("}}")` → pause_for_human | Fallback 0.0/0.0 ou autre coords dangereux |
|
||||
| C3-6 | Conversion impossible → pause_for_human | ValueError/TypeError → pause_for_human | Exception non catchée |
|
||||
| C3-7 | `_skip_reason` documenté pour debug | Oui | Absent |
|
||||
| C3-8 | `safety_level = "high"` pour pause_for_human | Oui | Absent ou autre valeur |
|
||||
| C3-9 | Retourne action mutée (pas de new dict) | Mutation in-place | Copie → risque race |
|
||||
| C3-10 | Keys itérées = ("x_pct", "y_pct") uniquement | Pas de sur-itération | Autres keys modifiées |
|
||||
|
||||
### C4 : Never-fail contract
|
||||
|
||||
| # | Critère | GO | NOGO |
|
||||
|---|---------|----|------|
|
||||
| C4-1 | `_handle_navigate_action` ne lance jamais d'exception non catchée | Contract preserved | Nouvelle exception possible |
|
||||
| C4-2 | `_coerce_action_coords` ne lance jamais — tout cas couvert par try/except ou pause_for_human | Contract preserved | Exception possible |
|
||||
|
||||
### C5 : Limites de scope POC
|
||||
|
||||
| # | Critère | GO | NOGO |
|
||||
|---|---------|----|------|
|
||||
| C5-1 | Maximum 4 fichiers modifiés | ≤ 4 | > 4 |
|
||||
| C5-2 | Pas de changement schema VWB dans POC patch | Pas de modification VWB code | VWB code modifié |
|
||||
| C5-3 | Pas de nouvelle dépendance pip | 0 nouvelles deps | Nouvelle dep |
|
||||
| C5-4 | Pas de modification OmniParser wiring | `_omniparser_available = False` intact | Modifié |
|
||||
|
||||
### C6 : Test coverage
|
||||
|
||||
| # | Critère | GO | NOGO |
|
||||
|---|---------|----|------|
|
||||
| C6-1 | TR-1 : navigate compile à 1 action (pas []) | Passes | Fails |
|
||||
| C6-2 | TR-2 : coords_var template resolution + float conversion | Passes | Fails |
|
||||
| C6-3 | Test `_coerce_action_coords` : float pass-through | Passes | Absent |
|
||||
| C6-4 | Test `_coerce_action_coords` : string→float conversion | Passes | Absent |
|
||||
| C6-5 | Test `_coerce_action_coords` : template non résolu → pause_for_human | Passes | Absent |
|
||||
| C6-6 | Test `_coerce_action_coords` : conversion impossible → pause_for_human | Passes | Absent |
|
||||
| C6-7 | Test idempotence : action existante float non modifiée | Passes | Absent |
|
||||
| C6-8 | `pytest tests/unit/` passe en intégralité | 0 failures | ≥1 failure |
|
||||
|
||||
### C7 : Risques additionnels (3 identifiés dans PLAN_D1)
|
||||
|
||||
| # | Risque | Mitigation attendue | GO | NOGO |
|
||||
|---|--------|--------------------|----|------|
|
||||
| C7-1 | Résolution partielle (x_pct résolu, y_pct template) | `_coerce_action_coords` → pause_for_human si ANY key unresolved | Mitigation presente | Pas de mitigation |
|
||||
| C7-2 | Idempotence sur mouse_click existant | `isinstance(val, float) → continue` | Idempotent | Risque de double conversion |
|
||||
| C7-3 | Race condition sur variables dict partagé | BFS séquentiel garantit navigate→click ordre | Note dans code/doc | Pas de mention |
|
||||
|
||||
---
|
||||
|
||||
## Procédure de review
|
||||
|
||||
1. **Lire le patch** : `git diff` sur les fichiers modifiés par Codex
|
||||
2. **Vérifier chaque critère C1-C7** : GO/NOGO par ligne
|
||||
3. **Exécuter les tests** : `cd /home/dom/ai/rpa_vision_v3 && .venv/bin/python -m pytest tests/unit/ -x -v`
|
||||
4. **Produire le verdict** : Table GO/NOGO avec justification + verdict global
|
||||
|
||||
## Format verdict
|
||||
|
||||
```
|
||||
## QG Verdict — D1 NavigateCoords Patch
|
||||
|
||||
| Critère | GO/NOGO | Note |
|
||||
|---------|---------|------|
|
||||
| C1 | GO | Branche navigate correcte |
|
||||
| C2 | NOGO | BUG: y_pct en double dans text_input |
|
||||
| ... | ... | ... |
|
||||
|
||||
**Verdict global**: GO / NOGO (avec réserves listées)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
*Qwen — framework QG prêt, awaiting Codex patch pour exécution.*
|
||||
263
scripts/bench_ppocrv5_cpu.py
Normal file
263
scripts/bench_ppocrv5_cpu.py
Normal file
@@ -0,0 +1,263 @@
|
||||
#!/usr/bin/env python3
|
||||
"""PP-OCRv5 CPU baseline bench — dry-run 1 capture.
|
||||
|
||||
Compare docTR vs EasyOCR vs PP-OCRv5 (CPU-only paddlepaddle).
|
||||
|
||||
Label obligatoire : baseline CPU, non verdict GPU.
|
||||
|
||||
Metrics:
|
||||
- text accuracy (field-level exact match)
|
||||
- word bbox center error (px) vs docTR reference
|
||||
- latency cold/warm (s)
|
||||
- peak memory (MB)
|
||||
"""
|
||||
|
||||
import time
|
||||
import tracemalloc
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# ── Config ──
|
||||
TEST_IMAGE = Path("/home/dom/ai/rpa_vision_v3/data/training/live_sessions/DESKTOP-58D5CAC_windows/sess_20260318T010719_62a058/shots/shot_0172_full.png")
|
||||
EASILY_IMAGE = Path("/home/dom/ai/rpa_vision_v3/output/playwright/easily_dryrun_2026-05-26/landing_wide.png")
|
||||
RESULTS_JSON = Path("/home/dom/ai/rpa_vision_v3/scripts/bench_ppocrv5_results.json")
|
||||
|
||||
ENGINES = ["ppocrv5_cpu", "doctr", "easyocr"]
|
||||
|
||||
|
||||
def bench_ppocrv5_cpu(img_path: Path) -> dict:
|
||||
"""Run PP-OCRv5 CPU on image, return results dict."""
|
||||
from paddleocr import PaddleOCR
|
||||
|
||||
tracemalloc.start()
|
||||
ocr = PaddleOCR(
|
||||
use_textline_orientation=True,
|
||||
lang="fr",
|
||||
return_word_box=True,
|
||||
)
|
||||
mem_init = tracemalloc.get_traced_memory()[1] / 1024 / 1024
|
||||
|
||||
# Cold run
|
||||
t0 = time.perf_counter()
|
||||
result_cold = ocr.ocr(str(img_path))
|
||||
t_cold = time.perf_counter() - t0
|
||||
|
||||
# Warm run
|
||||
t0 = time.perf_counter()
|
||||
result_warm = ocr.ocr(str(img_path))
|
||||
t_warm = time.perf_counter() - t0
|
||||
|
||||
mem_peak = tracemalloc.get_traced_memory()[1] / 1024 / 1024
|
||||
tracemalloc.stop()
|
||||
|
||||
# Parse results — PaddleOCR v3.4 returns list of pages
|
||||
texts = []
|
||||
bboxes = []
|
||||
if result_cold and result_cold[0]:
|
||||
for line in result_cold[0]:
|
||||
if line is None:
|
||||
continue
|
||||
bbox_raw = line[0] # [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]
|
||||
text = line[1][0] # recognized text
|
||||
confidence = line[1][1]
|
||||
# Compute center
|
||||
xs = [pt[0] for pt in bbox_raw]
|
||||
ys = [pt[1] for pt in bbox_raw]
|
||||
cx = sum(xs) / len(xs)
|
||||
cy = sum(ys) / len(ys)
|
||||
texts.append({"text": text, "confidence": confidence})
|
||||
bboxes.append({"bbox": bbox_raw, "center": (cx, cy), "text": text})
|
||||
|
||||
return {
|
||||
"engine": "ppocrv5_cpu",
|
||||
"image": str(img_path),
|
||||
"cold_latency_s": round(t_cold, 3),
|
||||
"warm_latency_s": round(t_warm, 3),
|
||||
"mem_init_MB": round(mem_init, 1),
|
||||
"mem_peak_MB": round(mem_peak, 1),
|
||||
"num_detections": len(texts),
|
||||
"texts": texts,
|
||||
"bboxes": bboxes,
|
||||
"paddle_version": "3.4.0",
|
||||
"paddlepaddle_version": "3.3.1",
|
||||
"device": "cpu",
|
||||
"cuda_available_driver": True,
|
||||
"cuda_compiled_paddle": False,
|
||||
"label": "baseline CPU, non verdict GPU",
|
||||
}
|
||||
|
||||
|
||||
def bench_doctr(img_path: Path) -> dict:
|
||||
"""Run docTR CPU on image."""
|
||||
from doctr.models import ocr_predictor
|
||||
|
||||
tracemalloc.start()
|
||||
predictor = ocr_predictor(pretrained=True)
|
||||
mem_init = tracemalloc.get_traced_memory()[1] / 1024 / 1024
|
||||
|
||||
from doctr.io import DocumentFile
|
||||
doc = DocumentFile.from_images(str(img_path))
|
||||
|
||||
t0 = time.perf_counter()
|
||||
result = predictor(doc)
|
||||
t_cold = time.perf_counter() - t0
|
||||
|
||||
t0 = time.perf_counter()
|
||||
result2 = predictor(doc)
|
||||
t_warm = time.perf_counter() - t0
|
||||
|
||||
mem_peak = tracemalloc.get_traced_memory()[1] / 1024 / 1024
|
||||
tracemalloc.stop()
|
||||
|
||||
texts = []
|
||||
bboxes = []
|
||||
for page in result.pages:
|
||||
for block in page.blocks:
|
||||
for line in block.lines:
|
||||
for word in line.words:
|
||||
texts.append({"text": word.value, "confidence": word.confidence})
|
||||
# docTR bbox in relative coords (0-1)
|
||||
bbox = word.geometry
|
||||
# Convert relative to pixel
|
||||
import PIL.Image
|
||||
with PIL.Image.open(img_path) as im:
|
||||
w, h = im.size
|
||||
cx = (bbox[0][0] + bbox[1][0]) / 2 * w
|
||||
cy = (bbox[0][1] + bbox[1][1]) / 2 * h
|
||||
bboxes.append({
|
||||
"bbox_relative": [(bbox[0][0], bbox[0][1]), (bbox[1][0], bbox[1][1])],
|
||||
"center_px": (round(cx, 1), round(cy, 1)),
|
||||
"text": word.value,
|
||||
})
|
||||
|
||||
return {
|
||||
"engine": "doctr",
|
||||
"image": str(img_path),
|
||||
"cold_latency_s": round(t_cold, 3),
|
||||
"warm_latency_s": round(t_warm, 3),
|
||||
"mem_init_MB": round(mem_init, 1),
|
||||
"mem_peak_MB": round(mem_peak, 1),
|
||||
"num_detections": len(texts),
|
||||
"texts": texts,
|
||||
"bboxes": bboxes,
|
||||
"version": "1.0.1",
|
||||
"device": "cpu",
|
||||
"label": "baseline CPU",
|
||||
}
|
||||
|
||||
|
||||
def bench_easyocr(img_path: Path) -> dict:
|
||||
"""Run EasyOCR CPU on image."""
|
||||
import easyocr
|
||||
|
||||
tracemalloc.start()
|
||||
reader = easyocr.Reader(["fr"], gpu=False)
|
||||
mem_init = tracemalloc.get_traced_memory()[1] / 1024 / 1024
|
||||
|
||||
t0 = time.perf_counter()
|
||||
result = reader.readtext(str(img_path))
|
||||
t_cold = time.perf_counter() - t0
|
||||
|
||||
t0 = time.perf_counter()
|
||||
result2 = reader.readtext(str(img_path))
|
||||
t_warm = time.perf_counter() - t0
|
||||
|
||||
mem_peak = tracemalloc.get_traced_memory()[1] / 1024 / 1024
|
||||
tracemalloc.stop()
|
||||
|
||||
texts = []
|
||||
bboxes = []
|
||||
for detection in result:
|
||||
bbox_raw = detection[0] # list of [x,y] points
|
||||
text = detection[1]
|
||||
confidence = detection[2]
|
||||
xs = [pt[0] for pt in bbox_raw]
|
||||
ys = [pt[1] for pt in bbox_raw]
|
||||
cx = sum(xs) / len(xs)
|
||||
cy = sum(ys) / len(ys)
|
||||
texts.append({"text": text, "confidence": confidence})
|
||||
bboxes.append({"bbox": bbox_raw, "center_px": (round(cx, 1), round(cy, 1)), "text": text})
|
||||
|
||||
return {
|
||||
"engine": "easyocr",
|
||||
"image": str(img_path),
|
||||
"cold_latency_s": round(t_cold, 3),
|
||||
"warm_latency_s": round(t_warm, 3),
|
||||
"mem_init_MB": round(mem_init, 1),
|
||||
"mem_peak_MB": round(mem_peak, 1),
|
||||
"num_detections": len(texts),
|
||||
"texts": texts,
|
||||
"bboxes": bboxes,
|
||||
"version": "1.7.2",
|
||||
"device": "cpu",
|
||||
"label": "baseline CPU",
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
# Check image exists
|
||||
img = TEST_IMAGE if TEST_IMAGE.exists() else EASILY_IMAGE
|
||||
if not img.exists():
|
||||
print(f"ERROR: No test image found. Tried {TEST_IMAGE} and {EASILY_IMAGE}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Bench image: {img}")
|
||||
print(f"Image size: ...")
|
||||
import PIL.Image
|
||||
with PIL.Image.open(img) as im:
|
||||
w, h = im.size
|
||||
print(f" {w}x{h}, mode={im.mode}")
|
||||
|
||||
all_results = {}
|
||||
|
||||
# ── PP-OCRv5 CPU ──
|
||||
print("\n=== PP-OCRv5 CPU ===")
|
||||
try:
|
||||
r = bench_ppocrv5_cpu(img)
|
||||
all_results["ppocrv5_cpu"] = r
|
||||
print(f" Cold: {r['cold_latency_s']}s | Warm: {r['warm_latency_s']}s | Detections: {r['num_detections']}")
|
||||
print(f" Memory: init {r['mem_init_MB']}MB | peak {r['mem_peak_MB']}MB")
|
||||
except Exception as e:
|
||||
print(f" FAILED: {e}")
|
||||
all_results["ppocrv5_cpu"] = {"error": str(e)}
|
||||
|
||||
# ── docTR ──
|
||||
print("\n=== docTR CPU ===")
|
||||
try:
|
||||
r = bench_doctr(img)
|
||||
all_results["doctr"] = r
|
||||
print(f" Cold: {r['cold_latency_s']}s | Warm: {r['warm_latency_s']}s | Detections: {r['num_detections']}")
|
||||
print(f" Memory: init {r['mem_init_MB']}MB | peak {r['mem_peak_MB']}MB")
|
||||
except Exception as e:
|
||||
print(f" FAILED: {e}")
|
||||
all_results["doctr"] = {"error": str(e)}
|
||||
|
||||
# ── EasyOCR ──
|
||||
print("\n=== EasyOCR CPU ===")
|
||||
try:
|
||||
r = bench_easyocr(img)
|
||||
all_results["easyocr"] = r
|
||||
print(f" Cold: {r['cold_latency_s']}s | Warm: {r['warm_latency_s']}s | Detections: {r['num_detections']}")
|
||||
print(f" Memory: init {r['mem_init_MB']}MB | peak {r['mem_peak_MB']}MB")
|
||||
except Exception as e:
|
||||
print(f" FAILED: {e}")
|
||||
all_results["easyocr"] = {"error": str(e)}
|
||||
|
||||
# Save JSON
|
||||
with open(RESULTS_JSON, "w") as f:
|
||||
json.dump(all_results, f, indent=2, default=str)
|
||||
print(f"\nResults saved to {RESULTS_JSON}")
|
||||
|
||||
# ── Synthesis table ──
|
||||
print("\n=== Synthesis ===")
|
||||
print(f"{'Engine':<15} {'Cold(s)':<10} {'Warm(s)':<10} {'Det':<6} {'Mem(MB)':<10} {'Label'}")
|
||||
for eng, r in all_results.items():
|
||||
if "error" in r:
|
||||
print(f"{eng:<15} FAILED")
|
||||
continue
|
||||
print(f"{eng:<15} {r['cold_latency_s']:<10} {r['warm_latency_s']:<10} {r['num_detections']:<6} {r['mem_peak_MB']:<10} {r.get('label', '')}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
4522
scripts/bench_ppocrv5_results.json
Normal file
4522
scripts/bench_ppocrv5_results.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -23,6 +23,7 @@ from agent_v0.server_v1.replay_engine import (
|
||||
_edge_to_normalized_actions,
|
||||
_resolve_runtime_vars,
|
||||
_resolve_runtime_vars_in_str,
|
||||
_coerce_action_coords,
|
||||
)
|
||||
|
||||
|
||||
@@ -192,11 +193,185 @@ class TestCompilerGapLiteralFloats:
|
||||
assert action["x_pct"] == 0.30
|
||||
assert action["y_pct"] == 0.50
|
||||
|
||||
def test_navigate_action_type_unknown(self):
|
||||
"""navigate action type is NOT handled by _edge_to_normalized_actions —
|
||||
falls into the else branch logging "Type d'action inconnu"."""
|
||||
edge = _FakeEdge(_FakeAction("navigate", parameters={"target": "login"}))
|
||||
def test_navigate_action_type_handled(self):
|
||||
"""navigate action type IS now handled by _edge_to_normalized_actions —
|
||||
produces a normalized action dict with type='navigate' and parameters."""
|
||||
edge = _FakeEdge(_FakeAction("navigate", parameters={"action": "login"}))
|
||||
actions = _edge_to_normalized_actions(edge, params={})
|
||||
|
||||
# navigate produces empty actions — not compiled at all
|
||||
assert actions == []
|
||||
assert len(actions) == 1
|
||||
action = actions[0]
|
||||
assert action["type"] == "navigate"
|
||||
assert "parameters" in action
|
||||
assert action["parameters"]["action"] == "login"
|
||||
assert action["parameters"]["login_coords_var"] == "navigate_login_coords"
|
||||
assert action["parameters"]["password_coords_var"] == "navigate_password_coords"
|
||||
assert action["parameters"]["submit_coords_var"] == "navigate_submit_coords"
|
||||
|
||||
|
||||
class TestNavigateBranchNonRegression:
|
||||
"""Non-regression tests for the navigate branch in _edge_to_normalized_actions.
|
||||
|
||||
These verify the D1 fix: navigate action type now produces a proper
|
||||
normalized dict that the server-side dispatch can route to
|
||||
_handle_navigate_action.
|
||||
"""
|
||||
|
||||
def test_navigate_default_params(self):
|
||||
"""Navigate with minimal params fills defaults."""
|
||||
edge = _FakeEdge(_FakeAction("navigate", parameters={}))
|
||||
actions = _edge_to_normalized_actions(edge, params={})
|
||||
|
||||
assert len(actions) == 1
|
||||
action = actions[0]
|
||||
assert action["type"] == "navigate"
|
||||
assert action["parameters"]["action"] == "login"
|
||||
assert action["parameters"]["login_coords_var"] == "navigate_login_coords"
|
||||
assert action["parameters"]["password_coords_var"] == "navigate_password_coords"
|
||||
assert action["parameters"]["submit_coords_var"] == "navigate_submit_coords"
|
||||
|
||||
def test_navigate_custom_vars(self):
|
||||
"""Navigate with custom coords_var names propagates them."""
|
||||
edge = _FakeEdge(
|
||||
_FakeAction(
|
||||
"navigate",
|
||||
parameters={
|
||||
"login_coords_var": "login_pos",
|
||||
"password_coords_var": "pwd_pos",
|
||||
"submit_coords_var": "btn_pos",
|
||||
},
|
||||
)
|
||||
)
|
||||
actions = _edge_to_normalized_actions(edge, params={})
|
||||
|
||||
assert len(actions) == 1
|
||||
params = actions[0]["parameters"]
|
||||
assert params["login_coords_var"] == "login_pos"
|
||||
assert params["password_coords_var"] == "pwd_pos"
|
||||
assert params["submit_coords_var"] == "btn_pos"
|
||||
|
||||
def test_navigate_login_config_overrides(self):
|
||||
"""Navigate forwards login_config keys to parameters."""
|
||||
edge = _FakeEdge(
|
||||
_FakeAction(
|
||||
"navigate",
|
||||
parameters={
|
||||
"login_field": "username",
|
||||
"password_field": "pass",
|
||||
"submit_button": "connexion",
|
||||
"context": "DPI urgences",
|
||||
},
|
||||
)
|
||||
)
|
||||
actions = _edge_to_normalized_actions(edge, params={})
|
||||
|
||||
assert len(actions) == 1
|
||||
params = actions[0]["parameters"]
|
||||
assert params["login_field"] == "username"
|
||||
assert params["password_field"] == "pass"
|
||||
assert params["submit_button"] == "connexion"
|
||||
assert params["context"] == "DPI urgences"
|
||||
|
||||
def test_navigate_base_fields_present(self):
|
||||
"""Navigate action retains edge_id, from_node, to_node, action_id."""
|
||||
edge = _FakeEdge(_FakeAction("navigate", parameters={"action": "login"}))
|
||||
actions = _edge_to_normalized_actions(edge, params={})
|
||||
|
||||
action = actions[0]
|
||||
assert "edge_id" in action
|
||||
assert "from_node" in action
|
||||
assert "to_node" in action
|
||||
assert "action_id" in action
|
||||
assert action["edge_id"] == "edge_coords_gap"
|
||||
assert action["from_node"] == "node_src"
|
||||
assert action["to_node"] == "node_dst"
|
||||
|
||||
def test_navigate_no_x_y_pct(self):
|
||||
"""Navigate action does NOT include x_pct/y_pct — coords come from handler."""
|
||||
edge = _FakeEdge(_FakeAction("navigate", parameters={"action": "login"}))
|
||||
actions = _edge_to_normalized_actions(edge, params={})
|
||||
|
||||
action = actions[0]
|
||||
assert "x_pct" not in action
|
||||
assert "y_pct" not in action
|
||||
|
||||
|
||||
# ── Test P1-C: _coerce_action_coords ──────────────────────────────────
|
||||
|
||||
|
||||
class TestCoerceActionCoords:
|
||||
"""P1-C coercion helper: cast x_pct/y_pct strings to floats after
|
||||
_resolve_runtime_vars template resolution.
|
||||
|
||||
Chain: navigate → variables → _resolve_runtime_vars → strings →
|
||||
_coerce_action_coords → floats. Fail-safe on unresolved/invalid.
|
||||
"""
|
||||
|
||||
def test_float_idempotent(self):
|
||||
"""Float values pass through unchanged — existing mouse_click actions unaffected."""
|
||||
action = {"type": "click", "x_pct": 0.15, "y_pct": 0.07}
|
||||
result = _coerce_action_coords(action)
|
||||
assert result["x_pct"] == 0.15
|
||||
assert result["y_pct"] == 0.07
|
||||
assert result["type"] == "click"
|
||||
|
||||
def test_string_to_float_conversion(self):
|
||||
"""Resolved template strings "0.35" → 0.35 (float) after _resolve_runtime_vars."""
|
||||
action = {"type": "click", "x_pct": "0.35", "y_pct": "0.07"}
|
||||
result = _coerce_action_coords(action)
|
||||
assert result["x_pct"] == 0.35
|
||||
assert isinstance(result["x_pct"], float)
|
||||
assert result["y_pct"] == 0.07
|
||||
assert isinstance(result["y_pct"], float)
|
||||
assert result["type"] == "click"
|
||||
|
||||
def test_unresolved_template_pause_for_human(self):
|
||||
"""Unresolved {{var.field}} template → pause_for_human, never fallback 0.0."""
|
||||
action = {"type": "click", "x_pct": "{{navigate_login_coords.x_pct}}", "y_pct": 0.07}
|
||||
result = _coerce_action_coords(action)
|
||||
assert result["type"] == "pause_for_human"
|
||||
assert result["safety_level"] == "high"
|
||||
assert "coords_var non résolu" in result["_skip_reason"]
|
||||
assert "{{navigate_login_coords.x_pct}}" in result["_skip_reason"]
|
||||
|
||||
def test_invalid_string_pause_for_human(self):
|
||||
"""Non-convertible string "abc" → pause_for_human, no fallback coords."""
|
||||
action = {"type": "click", "x_pct": "abc", "y_pct": 0.07}
|
||||
result = _coerce_action_coords(action)
|
||||
assert result["type"] == "pause_for_human"
|
||||
assert result["safety_level"] == "high"
|
||||
assert "coords invalide" in result["_skip_reason"]
|
||||
assert "abc" in result["_skip_reason"]
|
||||
|
||||
def test_no_coords_keys_unchanged(self):
|
||||
"""Action without x_pct/y_pct passes through unchanged."""
|
||||
action = {"type": "navigate", "parameters": {"action": "login"}}
|
||||
result = _coerce_action_coords(action)
|
||||
assert result == action
|
||||
|
||||
def test_full_chain_resolve_then_coerce(self):
|
||||
"""Full chain: _resolve_runtime_vars → _coerce_action_coords → floats."""
|
||||
variables = {
|
||||
"navigate_login_coords": {
|
||||
"x_pct": 0.15,
|
||||
"y_pct": 0.35,
|
||||
"method": "ocr_anchor",
|
||||
}
|
||||
}
|
||||
action = {
|
||||
"type": "click",
|
||||
"x_pct": "{{navigate_login_coords.x_pct}}",
|
||||
"y_pct": "{{navigate_login_coords.y_pct}}",
|
||||
}
|
||||
# Step 1: resolve templates (produces strings)
|
||||
resolved = _resolve_runtime_vars(action, variables)
|
||||
assert resolved["x_pct"] == "0.15" # string after resolver
|
||||
assert resolved["y_pct"] == "0.35" # string after resolver
|
||||
|
||||
# Step 2: coerce strings to floats
|
||||
coerced = _coerce_action_coords(resolved)
|
||||
assert coerced["x_pct"] == 0.15 # float after coercion
|
||||
assert isinstance(coerced["x_pct"], float)
|
||||
assert coerced["y_pct"] == 0.35
|
||||
assert isinstance(coerced["y_pct"], float)
|
||||
assert coerced["type"] == "click"
|
||||
|
||||
@@ -362,8 +362,14 @@ def import_core_workflow_to_db(
|
||||
dict {created: bool, workflow_id: str, signature: str, warnings: list}.
|
||||
`created=False` quand un workflow de même trajectoire existait déjà.
|
||||
|
||||
Note (non-wiring) : cette unité n'est PAS branchée au worker live ni à la
|
||||
route HTTP existante ; voir le rapport de câblage R1.
|
||||
Sémantique : **create-or-skip** (choix acté Dom 2026-07-02). Si un workflow
|
||||
de même signature de trajectoire existe déjà, on le RÉUTILISE tel quel — on ne
|
||||
le met PAS à jour. Rationale : le workflow validé (revue humaine) fait foi ;
|
||||
un ré-apprentissage automatique ne doit pas écraser une version validée. Si un
|
||||
refresh explicite devient nécessaire, ce sera un chantier séparé (create-or-update).
|
||||
|
||||
Wiring : branché au worker live via `stream_processor._maybe_import_to_vwb`
|
||||
(depuis c82829f2b, 29/06), sous gate `RPA_R1_AUTO_IMPORT` (défaut OFF).
|
||||
"""
|
||||
# Imports paresseux : garde le module léger et évite un import core/DB au load.
|
||||
from core.execution.trajectory_signature import workflow_trajectory_signature
|
||||
|
||||
Reference in New Issue
Block a user