feat(vwb): add dashboard competence testing and health tools

2026-06-02 16:27:19 +02:00
parent d38f0b0f2f
commit 18ed6cb751
23 changed files with 2769 additions and 27 deletions
--- a/visual_workflow_builder/backend/api_v3/dag_execute.py
+++ b/visual_workflow_builder/backend/api_v3/dag_execute.py
@@ -1040,16 +1040,27 @@ def _load_anchor_image_b64(anchor_id: str) -> Optional[str]:
    return None


+def _first_non_empty(*values: Any) -> str:
+    for value in values:
+        text = str(value or "").strip()
+        if text and text.casefold() not in {"none", "null"}:
+            return text
+    return ""
+
+
 def _load_anchor_metadata(anchor_id: str) -> Optional[Dict]:
    """Charger les métadonnées d'une ancre (bounding_box, taille, etc.)."""
    backend_dir = Path(__file__).resolve().parent.parent
+    metadata: Dict[str, Any] = {}

    # 1. Ancien format : metadata.json
    meta_path = backend_dir / 'data' / 'anchor_images' / anchor_id / 'metadata.json'
    if meta_path.exists():
        try:
            with open(meta_path, 'r', encoding='utf-8') as f:
-                return json.load(f)
+                loaded = json.load(f)
+                if isinstance(loaded, dict):
+                    metadata.update(loaded)
        except Exception:
            pass

@@ -1059,19 +1070,55 @@ def _load_anchor_metadata(anchor_id: str) -> Optional[Dict]:
        db_path = backend_dir / 'instance' / 'workflows.db'
        conn = sqlite3.connect(str(db_path))
        row = conn.execute(
-            "SELECT bbox_x, bbox_y, bbox_width, bbox_height, screen_width, screen_height "
+            "SELECT bbox_x, bbox_y, bbox_width, bbox_height, screen_width, screen_height, "
+            "target_text, ocr_description, description "
            "FROM visual_anchors WHERE id=?", (anchor_id,)
        ).fetchone()
        conn.close()
        if row:
-            return {
+            db_metadata = {
                'bounding_box': {'x': row[0], 'y': row[1], 'width': row[2], 'height': row[3]},
                'original_size': {'width': row[4] or 1920, 'height': row[5] or 1080},
+                'target_text': row[6] or '',
+                'ocr_description': row[7] or '',
+                'description': row[8] or '',
            }
+            metadata.setdefault('bounding_box', db_metadata['bounding_box'])
+            metadata.setdefault('original_size', db_metadata['original_size'])
+            for key in ('target_text', 'ocr_description', 'description'):
+                value = _first_non_empty(metadata.get(key), db_metadata.get(key))
+                if value:
+                    metadata[key] = value
    except Exception:
        pass

-    return None
+    return metadata or None
+
+
+def _anchor_semantic_target_spec(anchor_id: str, anchor_meta: Optional[Dict]) -> Dict[str, Any]:
+    """Construire les libellés humains stables d'une ancre visuelle."""
+    if not anchor_id:
+        return {}
+
+    anchor_meta = anchor_meta or {}
+    target_text = _first_non_empty(anchor_meta.get('target_text'))
+    ocr_description = _first_non_empty(anchor_meta.get('ocr_description'))
+    description = _first_non_empty(anchor_meta.get('description'))
+
+    target_spec: Dict[str, Any] = {'anchor_id': anchor_id}
+    if target_text:
+        target_spec['target_text'] = target_text
+        target_spec['by_text'] = target_text
+        target_spec['by_text_source'] = 'visual_anchor'
+    if ocr_description:
+        target_spec['ocr_description'] = ocr_description
+    if description:
+        target_spec['description'] = description
+        target_spec['vlm_description'] = description
+    elif ocr_description:
+        target_spec['vlm_description'] = ocr_description
+
+    return target_spec


 def _inject_anchor_targeting(action: Dict, anchor_id: str) -> None:
@@ -1101,19 +1148,17 @@ def _inject_anchor_targeting(action: Dict, anchor_id: str) -> None:
            action['x_pct'] = round(cx, 4)
            action['y_pct'] = round(cy, 4)

+    target_spec = _anchor_semantic_target_spec(anchor_id, anchor_meta)
+
    # Image de l'ancre pour template matching côté agent
    anchor_b64 = _load_anchor_image_b64(anchor_id)
    if anchor_b64:
-        target_spec = {
-            'anchor_image_base64': anchor_b64,
-            'anchor_id': anchor_id,
-        }
+        target_spec['anchor_image_base64'] = anchor_b64
        if anchor_meta:
            target_spec['anchor_bbox'] = anchor_meta.get('bounding_box', {})
            target_spec['original_size'] = anchor_meta.get('original_size', {})

        action['visual_mode'] = True
-        action['target_spec'] = target_spec
        logger.info(
            "Action %s : ancre '%s' chargée (%d Ko), visual_mode activé",
            action.get('action_id', '?'),
@@ -1127,6 +1172,23 @@ def _inject_anchor_targeting(action: Dict, anchor_id: str) -> None:
            anchor_id,
        )

+    if target_spec:
+        action['target_spec'] = target_spec
+        if target_spec.get('by_text') or target_spec.get('vlm_description'):
+            action['visual_mode'] = True
+
+        label = _first_non_empty(
+            target_spec.get('by_text'),
+            target_spec.get('target_text'),
+            target_spec.get('description'),
+            target_spec.get('ocr_description'),
+            target_spec.get('vlm_description'),
+        )
+        if label:
+            action.setdefault('target_text', target_spec.get('target_text') or label)
+            action.setdefault('target_description', label)
+            action.setdefault('description', target_spec.get('description') or label)
+

@api_v3_bp.route('/execute-windows', methods=['POST'])
 def execute_windows():