chore(dgx): snapshot consolidation WIP pour transfert poc DGX

Regroupe le WIP non committé requis pour le clone/runtime DGX (Option A) : - api_stream.py : préflight replay + smoke santé modèles + handler 403 WP-B - de-hardcode VLM : vlm_config, gpu/*, vram_orchestrator, ollama_manager - stream_processor, semantic_matcher, agent_chat (app/planner/intent) - workflows.db (acquis ; le transfert artifacts le mettra à jour + rewrite chemins) - docs : plans DGX, benchmarks VLM/grounders, recherche SOTA, coordination 8 juin Snapshot destiné à la branche poc-dgx poussée sur Gitea pour cloner le DGX. Scan anti-secret : clean. graphify (repo embarqué) exclu. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-08 16:33:58 +02:00
parent f18de016d7
commit 6d34b3cb68
204 changed files with 15744 additions and 47 deletions
--- a/agent_v0/server_v1/api_stream.py
+++ b/agent_v0/server_v1/api_stream.py
@@ -31,7 +31,12 @@ from .replay_failure_logger import log_replay_failure
 from .replay_verifier import ReplayVerifier, VerificationResult
 from .replay_learner import ReplayLearner
 from .audit_trail import AuditTrail, AuditEntry
-from .agent_registry import AgentRegistry, AgentAlreadyEnrolledError, AgentRevokedError
+from .agent_registry import (
+    AgentRegistry,
+    AgentAlreadyEnrolledError,
+    AgentRevokedError,
+    FleetEnrollLockedError,
+)
 from .stream_processor import StreamProcessor, build_replay_from_raw_events, enrich_click_from_screenshot
 from .worker_stream import StreamWorker
 from .monitor_router import resolve_target_monitor  # QW1 — résolution écran cible
@@ -1595,6 +1600,20 @@ async def startup():
    logger.info("VLM model: %s", _vlm_model_name)
    print(f"\n  VLM model: {_vlm_model_name}")

+    # Smoke-test santé des modèles VLM/grounding (NON bloquant, thread daemon) :
+    # détecte les modèles « aveugles » (sans capacité vision) au démarrage plutôt qu'en
+    # échec silencieux runtime (incident 2026-06-08, UI-TARS réimporté sans mmproj → 500 masqué).
+    def _smoke_model_health():
+        try:
+            from core.detection.model_health import smoke_check_models
+            from core.detection import vlm_config
+            _models = [vlm_config.get_vlm_model()] + list(getattr(vlm_config, "FALLBACK_VLM_MODELS", []))
+            smoke_check_models(sorted({m for m in _models if m}))
+        except Exception as _e:  # ne jamais bloquer le démarrage
+            logger.debug("smoke santé modèles ignoré: %s", _e)
+
+    threading.Thread(target=_smoke_model_health, name="model-health-smoke", daemon=True).start()
+
    # Afficher le token API au démarrage pour que l'utilisateur puisse configurer l'agent
    _token_source = "env RPA_API_TOKEN" if os.environ.get("RPA_API_TOKEN") else "auto-généré"
    logger.info(f"API Token ({_token_source}): {API_TOKEN}")
@@ -1648,7 +1667,15 @@ async def startup():
    )


-def _load_existing_workflows():
+def _iter_workflow_json_files(wf_dir: Path):
+    """Iterate workflow JSON files root-first, including machine subdirectories."""
+    return sorted(
+        wf_dir.rglob("*.json"),
+        key=lambda p: (len(p.relative_to(wf_dir).parts), str(p.relative_to(wf_dir))),
+    )
+
+
+def _load_existing_workflows(clear: bool = False) -> int:
    """Charger les workflows JSON existants dans processor._workflows.

    Supporte deux formats :
@@ -1657,6 +1684,10 @@ def _load_existing_workflows():
    """
    from core.models.workflow_graph import Workflow

+    if clear:
+        with processor._data_lock:
+            processor._workflows.clear()
+
    workflow_dirs = [
        ROOT_DIR / "data" / "workflows",
        ROOT_DIR / "data" / "training" / "workflows",
@@ -1667,7 +1698,7 @@ def _load_existing_workflows():
    for wf_dir in workflow_dirs:
        if not wf_dir.exists():
            continue
-        for wf_file in wf_dir.glob("*.json"):
+        for wf_file in _iter_workflow_json_files(wf_dir):
            try:
                wf = Workflow.load_from_file(str(wf_file))
                if wf and hasattr(wf, 'workflow_id'):
@@ -1689,7 +1720,10 @@ def _load_existing_workflows():
            except Exception as e:
                logger.debug(f"Skip workflow {wf_file.name}: {e}")

-    logger.info(f"Workflows chargés depuis disque: {loaded}")
+    with processor._data_lock:
+        total = len(processor._workflows)
+    logger.info(f"Workflows chargés depuis disque: {loaded} fichier(s), {total} en mémoire")
+    return total


@app.on_event("shutdown")
@@ -2858,7 +2892,7 @@ async def reload_workflows():
    Appelé par le VWB après un export-for-lea pour que le streaming server
    voie immédiatement les nouveaux workflows sans redémarrage.
    """
-    count = processor.reload_workflows()
+    count = _load_existing_workflows(clear=True)
    return {"success": True, "workflows_count": count}


@@ -2901,6 +2935,129 @@ async def get_session(session_id: str):
 # =========================================================================


+# Marqueurs de dialogues/popups connus, détectables statiquement dans un workflow.
+_DIALOG_MARKERS = (
+    "enregistrer sous",
+    "confirmer l'enregistrement",
+    "overwrite",
+    "remplacer",
+    "unsaved",
+    "modifications non enregistrées",
+    "save as",
+)
+
+
+def _iter_workflow_nodes(workflow: Any):
+    """Itère les nodes d'un workflow (objet Workflow OU dict), de façon tolérante."""
+    if isinstance(workflow, dict):
+        yield from workflow.get("nodes", [])
+        return
+    nodes = getattr(workflow, "nodes", None)
+    if nodes is None:
+        return
+    # nodes peut être un dict {id: node} ou une liste
+    yield from (nodes.values() if isinstance(nodes, dict) else nodes)
+
+
+def _node_text_blob(node: Any) -> str:
+    """Concatène les champs texte pertinents d'un node pour la détection de dialogue."""
+    parts: List[str] = []
+    if isinstance(node, dict):
+        parts.append(str(node.get("label", "")))
+        tmpl = node.get("template", {}) or {}
+        window = tmpl.get("window", {}) if isinstance(tmpl, dict) else {}
+        if isinstance(window, dict):
+            parts.append(str(window.get("title_contains", "")))
+            parts.append(str(window.get("title_pattern", "")))
+        parts.append(str(node.get("expected_window_title", "")))
+    else:
+        parts.append(str(getattr(node, "label", "")))
+        tmpl = getattr(node, "template", None)
+        window = getattr(tmpl, "window", None) if tmpl is not None else None
+        if window is not None:
+            parts.append(str(getattr(window, "title_contains", "") or ""))
+    return " ".join(p for p in parts if p).lower()
+
+
+def _detect_dialogs_static(workflow: Any) -> List[str]:
+    """Détecte statiquement les dialogues/popups attendus d'un workflow.
+
+    Analyse les nodes (titres de fenêtre, labels) sans aucune exécution ni session.
+    Retourne la liste dédupliquée des marqueurs de dialogue trouvés.
+    """
+    found: List[str] = []
+    for node in _iter_workflow_nodes(workflow):
+        blob = _node_text_blob(node)
+        for marker in _DIALOG_MARKERS:
+            if marker in blob and marker not in found:
+                found.append(marker)
+    return found
+
+
+def _sanitize_action(action: Dict[str, Any]) -> Dict[str, Any]:
+    """Réduit une action à des champs non sensibles pour l'aperçu préflight."""
+    return {
+        "type": action.get("type") or action.get("action"),
+        "target": (str(action.get("by_text") or action.get("target_spec") or "")[:60]) or None,
+        "has_coords": action.get("x_pct") is not None,
+    }
+
+
+def _build_preflight_report(
+    workflow: Any, workflow_id: str, actions: List[Dict[str, Any]]
+) -> Dict[str, Any]:
+    """Construit le rapport de préflight (analyse pure, AUCUN effet de bord).
+
+    Ne touche NI `_replay_queues`, NI `_replay_states`, NI aucun lock.
+    """
+    from collections import Counter
+
+    action_types = dict(Counter(
+        (a.get("type") or a.get("action") or "unknown") for a in actions
+    ))
+    name = workflow.get("name") if isinstance(workflow, dict) else getattr(workflow, "name", "")
+    return {
+        "workflow_known": True,
+        "workflow_id": workflow_id,
+        "workflow_name": name or "",
+        "n_actions": len(actions),
+        "action_types": action_types,
+        "dialogs_detected": _detect_dialogs_static(workflow),
+        "sample_actions": [_sanitize_action(a) for a in actions[:3]],
+        "non_destructive": True,
+    }
+
+
+class PreflightRequest(BaseModel):
+    """Requête de préflight replay (inspection non destructive d'un workflow)."""
+    workflow_id: str
+    params: Optional[Dict[str, Any]] = None
+
+
+@app.post("/api/v1/traces/stream/replay/preflight")
+async def preflight_replay(request: PreflightRequest):
+    """Préflight non destructif d'un workflow de replay.
+
+    Prouve `commande → workflow connu → actions non vides → dialogues détectables`
+    SANS injecter d'action, sans modifier `_replay_queues`/`_replay_states`, sans lock.
+    """
+    workflow_id = request.workflow_id
+    params = request.params or {}
+
+    with processor._data_lock:
+        workflow = processor._workflows.get(workflow_id)
+
+    if not workflow:
+        raise HTTPException(
+            status_code=404,
+            detail=f"Workflow '{workflow_id}' non trouvé. "
+                   f"Workflows disponibles : {list(processor._workflows.keys())[:20]}"
+        )
+
+    # Conversion en actions (fonction pure, sans effet de bord sur les queues)
+    actions = _workflow_to_actions(workflow, params)
+
+    return _build_preflight_report(workflow, workflow_id, actions)


@app.post("/api/v1/traces/stream/replay")
@@ -6859,6 +7016,18 @@ async def agents_enroll(request: AgentEnrollRequest):
                "existing": existing,
            },
        )
+    except FleetEnrollLockedError:
+        logger.warning(
+            f"[FLEET] Enrolement refuse machine_id={machine_id} : parc verrouille "
+            f"(RPA_FLEET_ENROLL_LOCKED)"
+        )
+        raise HTTPException(
+            status_code=403,
+            detail={
+                "error": "fleet_enroll_locked",
+                "message": "enrolement de nouveaux postes desactive (parc verrouille)",
+            },
+        )
    except ValueError as exc:
        raise HTTPException(status_code=400, detail=str(exc))

--- a/agent_v0/server_v1/stream_processor.py
+++ b/agent_v0/server_v1/stream_processor.py
@@ -2486,30 +2486,25 @@ class StreamProcessor:
            from core.models.workflow_graph import Workflow

            count = 0
-            # Charger les workflows du dossier racine (rétrocompatibilité)
-            for wf_file in sorted(workflows_dir.glob("*.json")):
+            workflow_files = sorted(
+                workflows_dir.rglob("*.json"),
+                key=lambda p: (
+                    len(p.relative_to(workflows_dir).parts),
+                    str(p.relative_to(workflows_dir)),
+                ),
+            )
+
+            for wf_file in workflow_files:
                try:
                    wf = Workflow.load_from_file(wf_file)
+                    rel_parts = wf_file.relative_to(workflows_dir).parts
+                    if len(rel_parts) > 1 and not hasattr(wf, '_machine_id'):
+                        wf._machine_id = rel_parts[0]
                    self._workflows[wf.workflow_id] = wf
                    count += 1
                except Exception as e:
                    logger.warning(f"Impossible de charger {wf_file.name}: {e}")

-            # Charger les workflows des sous-dossiers par machine
-            for machine_dir in sorted(workflows_dir.iterdir()):
-                if not machine_dir.is_dir():
-                    continue
-                for wf_file in sorted(machine_dir.glob("*.json")):
-                    try:
-                        wf = Workflow.load_from_file(wf_file)
-                        # Stocker le machine_id dans les métadonnées du workflow
-                        if not hasattr(wf, '_machine_id'):
-                            wf._machine_id = machine_dir.name
-                        self._workflows[wf.workflow_id] = wf
-                        count += 1
-                    except Exception as e:
-                        logger.warning(f"Impossible de charger {wf_file.name}: {e}")
-
            if count:
                logger.info(f"{count} workflow(s) chargé(s) depuis {workflows_dir}")
        except ImportError: