docs: track design docs, plans, audits, coordination infrastructure, handoffs
- 21 docs/*.md: audits, design notes, deployment plans, checklists, memos - Coordination: ROLES, runbooks (DGX reboot, Lea live), patches, registre, syntheses, systemd, QG template - Handoffs: 6 Codex handoff documents + README + template
This commit is contained in:
208
docs/coordination/patches/2026-06-18_api_workflows_fix.patch
Normal file
208
docs/coordination/patches/2026-06-18_api_workflows_fix.patch
Normal file
@@ -0,0 +1,208 @@
|
||||
diff --git a/tests/unit/test_dashboard_routes.py b/tests/unit/test_dashboard_routes.py
|
||||
index 3f8f0528c..69cc1b2fb 100644
|
||||
--- a/tests/unit/test_dashboard_routes.py
|
||||
+++ b/tests/unit/test_dashboard_routes.py
|
||||
@@ -212,6 +212,58 @@ class TestDashboardRoutes:
|
||||
data = resp.get_json()
|
||||
assert 'workflows' in data
|
||||
|
||||
+ def test_workflows_list_reads_vwb_db(self, client, monkeypatch, tmp_path):
|
||||
+ """Régression red-gate : /api/workflows reflète la base VWB v3, pas 0.
|
||||
+
|
||||
+ Avant correctif l'endpoint globait un store JSON vide et renvoyait
|
||||
+ toujours total:0. On construit une DB VWB minimale (schéma canonique
|
||||
+ workflows + steps) et on vérifie que l'endpoint expose le compte réel.
|
||||
+ """
|
||||
+ import sqlite3
|
||||
+ from pathlib import Path
|
||||
+
|
||||
+ db_path = tmp_path / "instance" / "workflows.db"
|
||||
+ db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
+ conn = sqlite3.connect(str(db_path))
|
||||
+ conn.execute(
|
||||
+ "CREATE TABLE workflows (id VARCHAR(64) PRIMARY KEY, name VARCHAR(255), "
|
||||
+ "description TEXT, created_at DATETIME, updated_at DATETIME, "
|
||||
+ "is_active BOOLEAN, source VARCHAR(64), review_status VARCHAR(32))"
|
||||
+ )
|
||||
+ conn.execute(
|
||||
+ "CREATE TABLE steps (id VARCHAR(64) PRIMARY KEY, workflow_id VARCHAR(64), "
|
||||
+ "action_type VARCHAR(64))"
|
||||
+ )
|
||||
+ conn.execute(
|
||||
+ "INSERT INTO workflows VALUES (?,?,?,?,?,?,?,?)",
|
||||
+ ("wf_aiva", "Urgence_aiva_demo", "demo", "2026-06-01", "2026-06-18",
|
||||
+ 1, "manual", ""),
|
||||
+ )
|
||||
+ conn.execute(
|
||||
+ "INSERT INTO workflows VALUES (?,?,?,?,?,?,?,?)",
|
||||
+ ("wf_learned", "Learned_flow", "", "2026-06-02", "2026-06-17",
|
||||
+ 1, "learned_import", "pending"),
|
||||
+ )
|
||||
+ # 3 steps pour wf_aiva → nodes_count attendu = 3
|
||||
+ for i in range(3):
|
||||
+ conn.execute(
|
||||
+ "INSERT INTO steps VALUES (?,?,?)", (f"s{i}", "wf_aiva", "click")
|
||||
+ )
|
||||
+ conn.commit()
|
||||
+ conn.close()
|
||||
+
|
||||
+ monkeypatch.setattr(dashboard_app, "VWB_DB_PATH", Path(db_path))
|
||||
+
|
||||
+ resp = client.get('/api/workflows')
|
||||
+ assert resp.status_code == 200
|
||||
+ data = resp.get_json()
|
||||
+ assert data['total'] == 2, f"attendu 2 workflows, obtenu {data['total']}"
|
||||
+ names = {w['name'] for w in data['workflows']}
|
||||
+ assert 'Urgence_aiva_demo' in names
|
||||
+ aiva = next(w for w in data['workflows'] if w['name'] == 'Urgence_aiva_demo')
|
||||
+ assert aiva['nodes_count'] == 3
|
||||
+ assert aiva['source'] == 'manual'
|
||||
+
|
||||
def test_sessions_list(self, client):
|
||||
"""L'API sessions retourne la liste."""
|
||||
resp = client.get('/api/agent/sessions')
|
||||
diff --git a/web_dashboard/app.py b/web_dashboard/app.py
|
||||
index 7ee00c811..aec1edaf9 100644
|
||||
--- a/web_dashboard/app.py
|
||||
+++ b/web_dashboard/app.py
|
||||
@@ -189,6 +189,20 @@ SESSIONS_PATH = DATA_PATH / "sessions"
|
||||
WORKFLOWS_PATH = DATA_PATH / "workflows"
|
||||
LOGS_PATH = BASE_PATH / "logs"
|
||||
|
||||
+# Source canonique des workflows (décision produit D3) : la base VWB v3
|
||||
+# (SQLAlchemy/SQLite) que Léa lit déjà au runtime. Chemin absolu robuste (PAS la
|
||||
+# DB fantôme vide à la racine du repo `instance/workflows.db`, schéma obsolète,
|
||||
+# ni l'ancien store JSON `data/training/workflows/` créé vide sur DGX).
|
||||
+# Surchargeable via RPA_VWB_DB_PATH pour les déploiements atypiques.
|
||||
+def _resolve_vwb_db_path() -> Path:
|
||||
+ override = os.getenv("RPA_VWB_DB_PATH", "").strip()
|
||||
+ if override:
|
||||
+ return Path(override).expanduser()
|
||||
+ return BASE_PATH / "visual_workflow_builder" / "backend" / "instance" / "workflows.db"
|
||||
+
|
||||
+
|
||||
+VWB_DB_PATH = _resolve_vwb_db_path()
|
||||
+
|
||||
# StorageManager
|
||||
storage = StorageManager(base_path=str(DATA_PATH))
|
||||
|
||||
@@ -261,7 +275,9 @@ def system_status():
|
||||
"""Statut du système."""
|
||||
try:
|
||||
sessions_count = len(list(SESSIONS_PATH.glob('*'))) if SESSIONS_PATH.exists() else 0
|
||||
- workflows_count = len(list(WORKFLOWS_PATH.glob('*.json'))) if WORKFLOWS_PATH.exists() else 0
|
||||
+ # Source canonique D3 : base VWB v3 (même comptage que /api/workflows),
|
||||
+ # pas l'ancien store JSON `data/training/workflows/` créé vide sur DGX.
|
||||
+ workflows_count = len(_load_workflows_from_vwb_db())
|
||||
|
||||
dependencies_ok = True
|
||||
try:
|
||||
@@ -785,36 +801,83 @@ def rename_session_workflow(session_id):
|
||||
# API Workflows
|
||||
# =============================================================================
|
||||
|
||||
+def _load_workflows_from_vwb_db() -> list:
|
||||
+ """Charge les workflows depuis la base VWB v3 (source canonique D3).
|
||||
+
|
||||
+ Lit directement le SQLite que Léa interroge au runtime (cf.
|
||||
+ `agent_chat/app.py` → `GET /api/v3/session/state`). On compte les `steps`
|
||||
+ par workflow pour `nodes_count` (pas de notion d'`edges` en DAG linéaire :
|
||||
+ `edges_count` = max(steps-1, 0)). Robuste à l'absence de la DB ou des
|
||||
+ colonnes `source`/`review_status` (DB ancienne) : retourne [] sans planter.
|
||||
+ """
|
||||
+ import sqlite3
|
||||
+
|
||||
+ if not VWB_DB_PATH.exists():
|
||||
+ return []
|
||||
+
|
||||
+ workflows = []
|
||||
+ conn = sqlite3.connect(str(VWB_DB_PATH))
|
||||
+ try:
|
||||
+ conn.row_factory = sqlite3.Row
|
||||
+ # Colonnes disponibles (la DB fantôme/ancienne n'a pas source/review_status)
|
||||
+ cols = {row[1] for row in conn.execute("PRAGMA table_info(workflows)")}
|
||||
+ has_source = 'source' in cols
|
||||
+ has_review = 'review_status' in cols
|
||||
+
|
||||
+ select_cols = ['id', 'name', 'description', 'created_at', 'updated_at']
|
||||
+ if has_source:
|
||||
+ select_cols.append('source')
|
||||
+ if has_review:
|
||||
+ select_cols.append('review_status')
|
||||
+
|
||||
+ # Nombre de steps par workflow (= nodes du DAG)
|
||||
+ step_counts = {
|
||||
+ row[0]: row[1]
|
||||
+ for row in conn.execute(
|
||||
+ "SELECT workflow_id, COUNT(*) FROM steps GROUP BY workflow_id"
|
||||
+ )
|
||||
+ }
|
||||
+
|
||||
+ rows = conn.execute(
|
||||
+ f"SELECT {', '.join(select_cols)} FROM workflows ORDER BY updated_at DESC"
|
||||
+ ).fetchall()
|
||||
+
|
||||
+ for row in rows:
|
||||
+ wf_id = row['id']
|
||||
+ nodes_count = step_counts.get(wf_id, 0)
|
||||
+ workflows.append({
|
||||
+ 'workflow_id': wf_id,
|
||||
+ 'name': row['name'] or wf_id,
|
||||
+ 'description': row['description'] or '',
|
||||
+ 'nodes_count': nodes_count,
|
||||
+ 'edges_count': max(nodes_count - 1, 0),
|
||||
+ 'learning_state': 'OBSERVATION',
|
||||
+ 'created_at': str(row['created_at'] or ''),
|
||||
+ 'updated_at': str(row['updated_at'] or ''),
|
||||
+ 'execution_count': 0,
|
||||
+ 'source': row['source'] if has_source else 'manual',
|
||||
+ 'review_status': row['review_status'] if has_review else '',
|
||||
+ 'file_path': f"vwb_db://{wf_id}",
|
||||
+ })
|
||||
+ finally:
|
||||
+ conn.close()
|
||||
+
|
||||
+ return workflows
|
||||
+
|
||||
+
|
||||
@app.route('/api/workflows')
|
||||
def list_workflows():
|
||||
- """Liste tous les workflows."""
|
||||
+ """Liste tous les workflows depuis la base VWB v3 (source canonique D3).
|
||||
+
|
||||
+ Avant ce correctif, l'endpoint globait `data/training/workflows/*.json`
|
||||
+ (ancien store JSON, créé vide sur DGX) et renvoyait toujours `total: 0`,
|
||||
+ rendant la surface « ce que Léa sait » faussement vide. On lit désormais la
|
||||
+ même base SQLite que Léa au runtime.
|
||||
+ """
|
||||
try:
|
||||
- workflows = []
|
||||
hide_unnamed = request.args.get('hide_unnamed', 'true').lower() == 'true'
|
||||
|
||||
- if not WORKFLOWS_PATH.exists():
|
||||
- WORKFLOWS_PATH.mkdir(parents=True, exist_ok=True)
|
||||
- return jsonify({'workflows': [], 'total': 0, 'hidden_unnamed': 0})
|
||||
-
|
||||
- for wf_file in WORKFLOWS_PATH.glob('*.json'):
|
||||
- try:
|
||||
- with open(wf_file, 'r') as f:
|
||||
- wf_data = json.load(f)
|
||||
-
|
||||
- workflows.append({
|
||||
- 'workflow_id': wf_data.get('workflow_id', wf_file.stem),
|
||||
- 'name': wf_data.get('name', wf_file.stem),
|
||||
- 'description': wf_data.get('description', ''),
|
||||
- 'nodes_count': len(wf_data.get('nodes', [])),
|
||||
- 'edges_count': len(wf_data.get('edges', [])),
|
||||
- 'learning_state': wf_data.get('learning_state', 'OBSERVATION'),
|
||||
- 'created_at': wf_data.get('created_at', ''),
|
||||
- 'updated_at': wf_data.get('updated_at', ''),
|
||||
- 'execution_count': wf_data.get('execution_count', 0),
|
||||
- 'file_path': str(wf_file)
|
||||
- })
|
||||
- except Exception as e:
|
||||
- print(f"Erreur lecture workflow {wf_file}: {e}")
|
||||
+ workflows = _load_workflows_from_vwb_db()
|
||||
|
||||
# Filtrer les workflows "Unnamed" si demandé
|
||||
if hide_unnamed:
|
||||
@@ -0,0 +1,310 @@
|
||||
diff --git a/agent_v0/server_v1/api_stream.py b/agent_v0/server_v1/api_stream.py
|
||||
index 547aeb299..aa620853b 100644
|
||||
--- a/agent_v0/server_v1/api_stream.py
|
||||
+++ b/agent_v0/server_v1/api_stream.py
|
||||
@@ -835,15 +835,56 @@ def _get_worker_queue_status() -> Dict[str, Any]:
|
||||
components_ready = bool(components) and all(bool(v) for v in components.values())
|
||||
health_status = (health or {}).get("status")
|
||||
running = bool(health) and not health_stale and health_status != "stopped"
|
||||
+
|
||||
+ # Distinction VEILLE (armé, lazy) vs DÉGRADÉ (vrai échec).
|
||||
+ #
|
||||
+ # Les composants lourds (ScreenAnalyzer/CLIP/FAISS/StateEmbedding) sont
|
||||
+ # chargés en lazy par run_worker : le processor n'est instancié qu'au
|
||||
+ # premier _process_session (cf. run_worker._get_processor / _process_session).
|
||||
+ # Un worker neuf qui n'a jamais reçu de session écrit donc status="healthy"
|
||||
+ # avec tous les composants à false — c'est l'état NORMAL « en veille », pas
|
||||
+ # une panne. L'étiqueter "degraded" fait lire une panne là où il n'y en a pas.
|
||||
+ #
|
||||
+ # Signal retenu pour « init jamais tentée » : TOUS les composants à false ET
|
||||
+ # sessions_processed == 0 ET sessions_failed == 0. Justification : run_worker
|
||||
+ # n'appelle _get_processor() (donc l'init lazy) que dans _process_session, qui
|
||||
+ # incrémente toujours exactement un compteur (processed / failed / skipped).
|
||||
+ # Tant que processed == 0 ET failed == 0, aucune session n'a déclenché une
|
||||
+ # init suivie d'un traitement — le worker est armé en attente. Un simple skip
|
||||
+ # (dossier/shots absents) passe quand même par _get_processor() : les
|
||||
+ # composants se chargent, donc tous-à-false devient faux et on n'entre pas ici.
|
||||
+ # run_worker._health_components() écrit toujours les 4 clés (jamais un dict
|
||||
+ # vide), d'où le test sur les VALEURS et non sur la présence des clés.
|
||||
+ # Si run_worker a lui-même forcé status="degraded" (VLM + ScreenAnalyzer
|
||||
+ # absent, cf. run_worker._write_health), c'est un VRAI échec : on le conserve.
|
||||
+ stats = (health or {}).get("stats") or {}
|
||||
+ init_attempted = bool(stats.get("sessions_processed", 0)) or bool(
|
||||
+ stats.get("sessions_failed", 0)
|
||||
+ )
|
||||
+ components_all_false = bool(components) and not any(
|
||||
+ bool(v) for v in components.values()
|
||||
+ )
|
||||
+ armed = (
|
||||
+ running
|
||||
+ and not components_ready
|
||||
+ and health_status == "healthy"
|
||||
+ and components_all_false # aucun composant lourd encore chargé
|
||||
+ and not init_attempted
|
||||
+ )
|
||||
+
|
||||
status = health_status or "unknown"
|
||||
if not running:
|
||||
status = "stale" if health else "unknown"
|
||||
+ elif armed:
|
||||
+ # En veille : worker sain, composants chargés à la 1re session.
|
||||
+ status = "idle"
|
||||
elif not components_ready:
|
||||
status = "degraded"
|
||||
|
||||
return {
|
||||
"running": running,
|
||||
"status": status,
|
||||
+ "armed": armed,
|
||||
"queue_length": len(queue),
|
||||
"queue": queue,
|
||||
"replay_lock_active": REPLAY_LOCK_FILE.exists(),
|
||||
@@ -858,11 +899,29 @@ def _get_worker_queue_status() -> Dict[str, Any]:
|
||||
"components": components,
|
||||
"components_ready": components_ready,
|
||||
"processing_ready": running and not REPLAY_LOCK_FILE.exists() and components_ready,
|
||||
- "stats": (health or {}).get("stats") or {},
|
||||
+ "status_hint": _worker_status_hint(status, armed),
|
||||
+ "stats": stats,
|
||||
"note": "Le worker VLM tourne dans un process séparé (agent_v0.server_v1.run_worker).",
|
||||
}
|
||||
|
||||
|
||||
+def _worker_status_hint(status: str, armed: bool) -> str:
|
||||
+ """Message humain pour le statut worker (consommé par le dashboard)."""
|
||||
+ if armed or status == "idle":
|
||||
+ return "En veille — composants chargés à la 1re session."
|
||||
+ if status == "degraded":
|
||||
+ return "Worker apprentissage dégradé — init des composants en échec."
|
||||
+ if status == "stale":
|
||||
+ return "Health file périmé (> 180s) — worker peut-être arrêté."
|
||||
+ if status == "stopped":
|
||||
+ return "Worker arrêté."
|
||||
+ if status == "busy":
|
||||
+ return "Traitement d'une session en cours."
|
||||
+ if status == "healthy":
|
||||
+ return "Worker prêt — composants chargés."
|
||||
+ return "État worker inconnu."
|
||||
+
|
||||
+
|
||||
# =========================================================================
|
||||
# Compteur d'analyses en cours par session (pour attendre avant finalize)
|
||||
# =========================================================================
|
||||
diff --git a/tests/integration/test_stream_processor.py b/tests/integration/test_stream_processor.py
|
||||
index 660187901..344e614cb 100644
|
||||
--- a/tests/integration/test_stream_processor.py
|
||||
+++ b/tests/integration/test_stream_processor.py
|
||||
@@ -1289,3 +1289,158 @@ class TestAPIEndpoints:
|
||||
assert len(workflows) == 1
|
||||
assert workflows[0]["workflow_id"] == "wf_api_001"
|
||||
assert workflows[0]["nodes"] == 2
|
||||
+
|
||||
+
|
||||
+class TestWorkerStatusTruthfulness:
|
||||
+ """Truthfulness du statut worker exposé par _get_worker_queue_status.
|
||||
+
|
||||
+ Distingue VEILLE (armé, lazy : worker neuf qui n'a jamais traité de
|
||||
+ session, composants chargés à la 1re session) de DÉGRADÉ (init tentée
|
||||
+ et en échec). Un worker en veille ne doit JAMAIS être étiqueté 'degraded'.
|
||||
+ """
|
||||
+
|
||||
+ # Même contrainte que TestAPIEndpoints : api_stream fail-closed à l'import
|
||||
+ # si RPA_API_TOKEN absent.
|
||||
+ _TEST_API_TOKEN = "test_token_for_worker_status_0123456789abcdef"
|
||||
+
|
||||
+ @pytest.fixture(autouse=True)
|
||||
+ def _ensure_api_token(self, monkeypatch):
|
||||
+ monkeypatch.setenv("RPA_API_TOKEN", self._TEST_API_TOKEN)
|
||||
+
|
||||
+ @pytest.fixture
|
||||
+ def status_env(self, tmp_path, monkeypatch):
|
||||
+ """Isole les fichiers worker (health/queue/lock) sur tmp_path."""
|
||||
+ from agent_v0.server_v1 import api_stream
|
||||
+
|
||||
+ health_file = tmp_path / "_worker_health.json"
|
||||
+ queue_file = tmp_path / "_worker_queue.txt"
|
||||
+ lock_file = tmp_path / "_replay_active.lock"
|
||||
+ monkeypatch.setattr(api_stream, "WORKER_HEALTH_FILE", health_file)
|
||||
+ monkeypatch.setattr(api_stream, "WORKER_QUEUE_FILE", queue_file)
|
||||
+ monkeypatch.setattr(api_stream, "REPLAY_LOCK_FILE", lock_file)
|
||||
+ return api_stream, health_file
|
||||
+
|
||||
+ @staticmethod
|
||||
+ def _write_health(health_file, **overrides):
|
||||
+ """Écrit un health file frais (mtime récent => non stale)."""
|
||||
+ payload = {
|
||||
+ "pid": 1234,
|
||||
+ "started_at": "2026-06-18T10:00:00",
|
||||
+ "last_cycle": "2026-06-18T10:00:30",
|
||||
+ "current_session": None,
|
||||
+ "queue_length": 0,
|
||||
+ "components": {
|
||||
+ "screen_analyzer": False,
|
||||
+ "clip_embedder": False,
|
||||
+ "faiss_manager": False,
|
||||
+ "state_embedding_builder": False,
|
||||
+ },
|
||||
+ "stats": {
|
||||
+ "sessions_processed": 0,
|
||||
+ "sessions_failed": 0,
|
||||
+ "sessions_skipped": 0,
|
||||
+ "total_screenshots_analyzed": 0,
|
||||
+ },
|
||||
+ "status": "healthy",
|
||||
+ }
|
||||
+ payload.update(overrides)
|
||||
+ health_file.write_text(json.dumps(payload), encoding="utf-8")
|
||||
+
|
||||
+ def test_fresh_worker_is_idle_not_degraded(self, status_env):
|
||||
+ """Worker neuf : healthy, 0 session, tous composants false
|
||||
+ => statut 'idle' (en veille / armé), PAS 'degraded'."""
|
||||
+ api_stream, health_file = status_env
|
||||
+ self._write_health(health_file) # défaut = état neuf
|
||||
+
|
||||
+ status = api_stream._get_worker_queue_status()
|
||||
+
|
||||
+ assert status["running"] is True
|
||||
+ assert status["status"] == "idle", status
|
||||
+ assert status["armed"] is True
|
||||
+ assert status["components_ready"] is False
|
||||
+ # processing_ready reste False tant que les composants ne sont pas chargés
|
||||
+ assert status["processing_ready"] is False
|
||||
+ assert "veille" in status["status_hint"].lower()
|
||||
+
|
||||
+ def test_worker_init_failed_is_degraded(self, status_env):
|
||||
+ """Init tentée et en échec : run_worker force status='degraded'
|
||||
+ (VLM + ScreenAnalyzer absent) => on conserve 'degraded'."""
|
||||
+ api_stream, health_file = status_env
|
||||
+ self._write_health(
|
||||
+ health_file,
|
||||
+ status="degraded", # forcé par run_worker._write_health
|
||||
+ components={
|
||||
+ "screen_analyzer": False,
|
||||
+ "clip_embedder": True,
|
||||
+ "faiss_manager": True,
|
||||
+ "state_embedding_builder": False,
|
||||
+ },
|
||||
+ stats={
|
||||
+ "sessions_processed": 0,
|
||||
+ "sessions_failed": 1, # une session a tenté l'init et échoué
|
||||
+ "sessions_skipped": 0,
|
||||
+ "total_screenshots_analyzed": 0,
|
||||
+ },
|
||||
+ )
|
||||
+
|
||||
+ status = api_stream._get_worker_queue_status()
|
||||
+
|
||||
+ assert status["running"] is True
|
||||
+ assert status["status"] == "degraded", status
|
||||
+ assert status["armed"] is False
|
||||
+ assert status["processing_ready"] is False
|
||||
+ assert "dégradé" in status["status_hint"].lower()
|
||||
+
|
||||
+ def test_worker_partial_components_after_attempt_is_degraded(self, status_env):
|
||||
+ """Composants partiels après tentative de traitement (sessions_failed>0),
|
||||
+ sans status forcé par le worker => 'degraded' (pas 'idle')."""
|
||||
+ api_stream, health_file = status_env
|
||||
+ self._write_health(
|
||||
+ health_file,
|
||||
+ status="healthy",
|
||||
+ components={
|
||||
+ "screen_analyzer": True,
|
||||
+ "clip_embedder": True,
|
||||
+ "faiss_manager": False, # un composant manquant
|
||||
+ "state_embedding_builder": True,
|
||||
+ },
|
||||
+ stats={
|
||||
+ "sessions_processed": 0,
|
||||
+ "sessions_failed": 2,
|
||||
+ "sessions_skipped": 0,
|
||||
+ "total_screenshots_analyzed": 0,
|
||||
+ },
|
||||
+ )
|
||||
+
|
||||
+ status = api_stream._get_worker_queue_status()
|
||||
+
|
||||
+ assert status["status"] == "degraded", status
|
||||
+ assert status["armed"] is False
|
||||
+
|
||||
+ def test_worker_ready_after_processing_is_healthy(self, status_env):
|
||||
+ """Worker ayant traité au moins une session, tous composants chargés
|
||||
+ => 'healthy' et processing_ready=True."""
|
||||
+ api_stream, health_file = status_env
|
||||
+ self._write_health(
|
||||
+ health_file,
|
||||
+ status="healthy",
|
||||
+ components={
|
||||
+ "screen_analyzer": True,
|
||||
+ "clip_embedder": True,
|
||||
+ "faiss_manager": True,
|
||||
+ "state_embedding_builder": True,
|
||||
+ },
|
||||
+ stats={
|
||||
+ "sessions_processed": 3,
|
||||
+ "sessions_failed": 0,
|
||||
+ "sessions_skipped": 0,
|
||||
+ "total_screenshots_analyzed": 42,
|
||||
+ },
|
||||
+ )
|
||||
+
|
||||
+ status = api_stream._get_worker_queue_status()
|
||||
+
|
||||
+ assert status["status"] == "healthy", status
|
||||
+ assert status["armed"] is False
|
||||
+ assert status["components_ready"] is True
|
||||
+ assert status["processing_ready"] is True
|
||||
diff --git a/web_dashboard/templates/index.html b/web_dashboard/templates/index.html
|
||||
index c96cc8bf4..aeb0e7fa8 100644
|
||||
--- a/web_dashboard/templates/index.html
|
||||
+++ b/web_dashboard/templates/index.html
|
||||
@@ -2838,13 +2838,23 @@
|
||||
]);
|
||||
|
||||
const processingReady = processing && processing.processing_ready === true;
|
||||
- const processingDegraded = processing && !processing.error && !processingReady;
|
||||
+ // « En veille » (armé/lazy) ≠ « dégradé » : un worker neuf sans
|
||||
+ // session a tous ses composants à false par design (chargement à la
|
||||
+ // 1re session), ce n'est PAS une panne. Seul status==='degraded'
|
||||
+ // (init tentée et en échec) est une vraie alerte.
|
||||
+ const processingArmed = processing && (processing.armed === true || processing.status === 'idle');
|
||||
+ const processingDegraded = processing && !processing.error && processing.status === 'degraded';
|
||||
+ const statusHint = (processing && processing.status_hint) || '';
|
||||
statusEl.innerHTML = processingDegraded
|
||||
? '<span style="color:#f59e0b;">⚠️</span>'
|
||||
- : '<span style="color:#22c55e;">✅</span>';
|
||||
+ : processingArmed
|
||||
+ ? '<span style="color:#3b82f6;">⏸️</span>'
|
||||
+ : '<span style="color:#22c55e;">✅</span>';
|
||||
statusEl.title = processingDegraded
|
||||
- ? 'Streaming en ligne, worker apprentissage dégradé'
|
||||
- : 'Serveur streaming en ligne';
|
||||
+ ? `Streaming en ligne, worker apprentissage dégradé${statusHint ? ' — ' + statusHint : ''}`
|
||||
+ : processingArmed
|
||||
+ ? `Streaming en ligne, worker en veille${statusHint ? ' — ' + statusHint : ''}`
|
||||
+ : 'Serveur streaming en ligne';
|
||||
|
||||
document.getElementById('streamActiveSessions').textContent = data.active_sessions || 0;
|
||||
document.getElementById('streamTotalEvents').textContent = data.total_events || 0;
|
||||
@@ -2862,14 +2872,20 @@
|
||||
if (data.server_version) rows.push({label: 'Version serveur', value: data.server_version});
|
||||
if (processing && !processing.error) {
|
||||
const status = processing.status || 'unknown';
|
||||
+ const workerIcon = processingReady ? '✅' : (processingArmed ? '⏸️' : '⚠️');
|
||||
rows.push({
|
||||
label: 'Worker apprentissage',
|
||||
- value: processingReady ? `✅ ${status}` : `⚠️ ${status}`
|
||||
+ value: `${workerIcon} ${status}`
|
||||
});
|
||||
rows.push({
|
||||
label: 'Composants intelligence',
|
||||
- value: processing.components_ready ? 'prêts' : 'non prêts'
|
||||
+ value: processing.components_ready
|
||||
+ ? 'prêts'
|
||||
+ : (processingArmed ? 'en veille (chargés à la 1re session)' : 'non prêts')
|
||||
});
|
||||
+ if (statusHint) {
|
||||
+ rows.push({label: 'Détail worker', value: statusHint});
|
||||
+ }
|
||||
if (processing.queue_length !== undefined) {
|
||||
rows.push({label: 'Queue apprentissage', value: processing.queue_length});
|
||||
}
|
||||
318
docs/coordination/patches/2026-06-19_vwb_basic_auth.patch
Normal file
318
docs/coordination/patches/2026-06-19_vwb_basic_auth.patch
Normal file
@@ -0,0 +1,318 @@
|
||||
diff --git a/visual_workflow_builder/backend/app.py b/visual_workflow_builder/backend/app.py
|
||||
index 7bdae57b0..c3a285cc0 100644
|
||||
--- a/visual_workflow_builder/backend/app.py
|
||||
+++ b/visual_workflow_builder/backend/app.py
|
||||
@@ -28,6 +28,109 @@ load_dotenv() # fallback .env dans cwd (n'écrase pas les vars déjà définies
|
||||
# Initialize Flask app
|
||||
app = Flask(__name__)
|
||||
|
||||
+# ============================================================
|
||||
+# HTTP Basic Auth LAN (cohérent avec le dashboard 5001)
|
||||
+# ============================================================
|
||||
+# Le VWB (backend 5002) était exposé au LAN SANS authentification. On ajoute
|
||||
+# un middleware before_request qui exige un header Authorization: Basic <b64>
|
||||
+# pour toute requête NON-loopback (LAN), avec les MÊMES credentials que le
|
||||
+# dashboard : DASHBOARD_USER / DASHBOARD_PASSWORD (dans .env.local).
|
||||
+#
|
||||
+# GARDE-FOU CRITIQUE — exemption loopback :
|
||||
+# Le dashboard (agent_chat/app.py `_fetch_vwb_workflows`) et les healthchecks
|
||||
+# appellent ce backend en boucle locale (http://localhost:5002 → 127.0.0.1).
|
||||
+# Exiger l'auth en loopback CASSERAIT l'intégration dashboard↔VWB. On exempte
|
||||
+# donc 127.0.0.1 / ::1 (et ::ffff:127.0.0.1) de toute auth.
|
||||
+#
|
||||
+# Différence assumée avec le dashboard (fail-closed) : ici on NE crashe PAS si
|
||||
+# DASHBOARD_PASSWORD est absent. On log un warning et on laisse passer le LAN
|
||||
+# (mode POC dev/dégradé). En clinique, DASHBOARD_PASSWORD est défini dans
|
||||
+# .env.local (chargé ci-dessus, lignes 24-26) → l'auth LAN est effective.
|
||||
+import base64 as _base64
|
||||
+import hmac as _hmac
|
||||
+
|
||||
+_VWB_AUTH_USER = os.getenv("DASHBOARD_USER", "lea").strip()
|
||||
+_VWB_AUTH_PASSWORD = os.getenv("DASHBOARD_PASSWORD", "").strip()
|
||||
+# Désactivation explicite (dev/tests, parité avec le dashboard).
|
||||
+_VWB_AUTH_DISABLED = os.getenv("DASHBOARD_AUTH_DISABLED", "").lower() in (
|
||||
+ "1", "true", "yes",
|
||||
+)
|
||||
+
|
||||
+# Adresses considérées comme loopback (server-to-server, jamais challengées).
|
||||
+_VWB_LOOPBACK_ADDRS = {"127.0.0.1", "::1", "::ffff:127.0.0.1"}
|
||||
+
|
||||
+# Paths publics (pas d'auth) — healthchecks systemd / NPM / smokes.
|
||||
+_VWB_PUBLIC_PATHS = {"/health", "/api/health"}
|
||||
+
|
||||
+if not _VWB_AUTH_PASSWORD and not _VWB_AUTH_DISABLED:
|
||||
+ logging.getLogger("vwb.auth").warning(
|
||||
+ "[SECURITE] DASHBOARD_PASSWORD non defini : l'auth Basic LAN du VWB "
|
||||
+ "(5002) est INACTIVE (le LAN passe sans credentials). Definir "
|
||||
+ "DASHBOARD_PASSWORD dans .env.local pour l'activer (cible clinique)."
|
||||
+ )
|
||||
+
|
||||
+
|
||||
+def _vwb_auth_ok(header_value: str) -> bool:
|
||||
+ """Valide le header Authorization Basic. Comparaison constant-time.
|
||||
+
|
||||
+ Logique identique au dashboard (`web_dashboard/app.py::_dashboard_auth_ok`).
|
||||
+ """
|
||||
+ if not header_value or not header_value.lower().startswith("basic "):
|
||||
+ return False
|
||||
+ try:
|
||||
+ decoded = _base64.b64decode(header_value[6:].strip()).decode("utf-8")
|
||||
+ except (ValueError, UnicodeDecodeError):
|
||||
+ return False
|
||||
+ if ":" not in decoded:
|
||||
+ return False
|
||||
+ user, _, password = decoded.partition(":")
|
||||
+ user_ok = _hmac.compare_digest(user, _VWB_AUTH_USER)
|
||||
+ pwd_ok = _hmac.compare_digest(password, _VWB_AUTH_PASSWORD)
|
||||
+ return user_ok and pwd_ok
|
||||
+
|
||||
+
|
||||
+@app.before_request
|
||||
+def _vwb_basic_auth_middleware():
|
||||
+ """Middleware d'auth HTTP Basic LAN sur le backend VWB (port 5002).
|
||||
+
|
||||
+ - Bypass total si DASHBOARD_AUTH_DISABLED=true (dev/tests).
|
||||
+ - Bypass total si DASHBOARD_PASSWORD absent (mode POC degrade, warning emis
|
||||
+ au demarrage) — on ne casse pas le service faute de secret.
|
||||
+ - Loopback (127.0.0.1 / ::1) : JAMAIS challenge (proxy dashboard, healthcheck).
|
||||
+ - Preflight CORS (OPTIONS) : laisse passer (le navigateur n'envoie pas
|
||||
+ l'en-tete Authorization au preflight).
|
||||
+ - Paths publics (_VWB_PUBLIC_PATHS) : healthchecks externes.
|
||||
+ - Sinon (requete LAN) : header Authorization: Basic <b64> obligatoire, sinon 401.
|
||||
+ """
|
||||
+ from flask import request, Response
|
||||
+
|
||||
+ # Dev / tests / mode degrade sans secret : bypass total
|
||||
+ if _VWB_AUTH_DISABLED or not _VWB_AUTH_PASSWORD:
|
||||
+ return None
|
||||
+
|
||||
+ # Preflight CORS : pas d'auth (le navigateur n'envoie pas les credentials)
|
||||
+ if request.method == "OPTIONS":
|
||||
+ return None
|
||||
+
|
||||
+ # Exemption loopback (server-to-server : dashboard, healthcheck)
|
||||
+ if (request.remote_addr or "") in _VWB_LOOPBACK_ADDRS:
|
||||
+ return None
|
||||
+
|
||||
+ # Paths publics (healthchecks externes)
|
||||
+ if (request.path or "/") in _VWB_PUBLIC_PATHS:
|
||||
+ return None
|
||||
+
|
||||
+ if _vwb_auth_ok(request.headers.get("Authorization", "")):
|
||||
+ return None
|
||||
+
|
||||
+ # Pas authentifie — challenge 401 avec WWW-Authenticate
|
||||
+ return Response(
|
||||
+ '{"error": "authentication required"}',
|
||||
+ status=401,
|
||||
+ mimetype="application/json",
|
||||
+ headers={"WWW-Authenticate": 'Basic realm="RPA Vision V3 VWB"'},
|
||||
+ )
|
||||
+
|
||||
# ============================================================
|
||||
# Logging — fichier rotatif + console (idempotent)
|
||||
# ============================================================
|
||||
diff --git a/visual_workflow_builder/backend/instance/workflows.db b/visual_workflow_builder/backend/instance/workflows.db
|
||||
index db6eabd62..b7e181cbe 100644
|
||||
Binary files a/visual_workflow_builder/backend/instance/workflows.db and b/visual_workflow_builder/backend/instance/workflows.db differ
|
||||
diff --git a/visual_workflow_builder/backend/tests/test_vwb_basic_auth.py b/visual_workflow_builder/backend/tests/test_vwb_basic_auth.py
|
||||
new file mode 100644
|
||||
index 000000000..f4bff4d9d
|
||||
--- /dev/null
|
||||
+++ b/visual_workflow_builder/backend/tests/test_vwb_basic_auth.py
|
||||
@@ -0,0 +1,195 @@
|
||||
+"""
|
||||
+Tests de l'auth HTTP Basic LAN du backend VWB (port 5002).
|
||||
+
|
||||
+Le VWB etait expose au LAN SANS authentification. Le middleware
|
||||
+`_vwb_basic_auth_middleware` ajoute un challenge 401 sur toute requete
|
||||
+NON-loopback, avec les MEMES credentials que le dashboard
|
||||
+(DASHBOARD_USER / DASHBOARD_PASSWORD).
|
||||
+
|
||||
+Controles cles :
|
||||
+- Loopback (127.0.0.1) sans credentials -> 200 (proxy dashboard / healthcheck).
|
||||
+- LAN (REMOTE_ADDR non loopback) sans credentials -> 401 + WWW-Authenticate.
|
||||
+- LAN avec mauvais mot de passe -> 401.
|
||||
+- LAN avec bons credentials -> passage (pas de 401).
|
||||
+- /health public meme en LAN.
|
||||
+- DASHBOARD_AUTH_DISABLED=true -> bypass total.
|
||||
+- DASHBOARD_PASSWORD absent -> auth inactive (mode POC degrade, pas de crash).
|
||||
+"""
|
||||
+from __future__ import annotations
|
||||
+
|
||||
+import base64
|
||||
+import importlib
|
||||
+import os
|
||||
+import sys
|
||||
+from pathlib import Path
|
||||
+
|
||||
+import pytest
|
||||
+
|
||||
+# Le backend VWB s'importe en tant que module top-level `app`
|
||||
+# (cf. tests/conftest.py : `from app import app, db`). On ajoute le repertoire
|
||||
+# backend au path pour pouvoir le recharger avec les variables d'env voulues.
|
||||
+_BACKEND_DIR = Path(__file__).resolve().parent.parent
|
||||
+if str(_BACKEND_DIR) not in sys.path:
|
||||
+ sys.path.insert(0, str(_BACKEND_DIR))
|
||||
+
|
||||
+# Adresse LAN simulee (non loopback)
|
||||
+_LAN_ADDR = "192.168.1.50"
|
||||
+_LAN_ENV = {"REMOTE_ADDR": _LAN_ADDR}
|
||||
+
|
||||
+
|
||||
+def _basic_auth_header(user: str, password: str) -> str:
|
||||
+ token = base64.b64encode(f"{user}:{password}".encode()).decode()
|
||||
+ return f"Basic {token}"
|
||||
+
|
||||
+
|
||||
+def _reload_app():
|
||||
+ """Recharge le module `app` pour relire les constantes d'auth depuis l'env."""
|
||||
+ if "app" in sys.modules:
|
||||
+ return importlib.reload(sys.modules["app"])
|
||||
+ return importlib.import_module("app")
|
||||
+
|
||||
+
|
||||
+@pytest.fixture
|
||||
+def auth_enabled_client(monkeypatch):
|
||||
+ """Client VWB avec auth LAN active (DASHBOARD_USER/PASSWORD definis)."""
|
||||
+ monkeypatch.setenv("DASHBOARD_USER", "lea")
|
||||
+ monkeypatch.setenv("DASHBOARD_PASSWORD", "secret-test-pwd")
|
||||
+ monkeypatch.delenv("DASHBOARD_AUTH_DISABLED", raising=False)
|
||||
+ mod = _reload_app()
|
||||
+ mod.app.config["TESTING"] = True
|
||||
+ mod.app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///:memory:"
|
||||
+ with mod.app.test_client() as c:
|
||||
+ with mod.app.app_context():
|
||||
+ mod.db.create_all()
|
||||
+ yield c
|
||||
+ mod.db.drop_all()
|
||||
+
|
||||
+
|
||||
+@pytest.fixture
|
||||
+def auth_disabled_client(monkeypatch):
|
||||
+ """Client VWB avec auth desactivee (DASHBOARD_AUTH_DISABLED=true)."""
|
||||
+ monkeypatch.setenv("DASHBOARD_AUTH_DISABLED", "true")
|
||||
+ monkeypatch.setenv("DASHBOARD_PASSWORD", "secret-test-pwd")
|
||||
+ mod = _reload_app()
|
||||
+ mod.app.config["TESTING"] = True
|
||||
+ mod.app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///:memory:"
|
||||
+ with mod.app.test_client() as c:
|
||||
+ with mod.app.app_context():
|
||||
+ mod.db.create_all()
|
||||
+ yield c
|
||||
+ mod.db.drop_all()
|
||||
+
|
||||
+
|
||||
+@pytest.fixture
|
||||
+def no_password_client(monkeypatch):
|
||||
+ """Client VWB sans DASHBOARD_PASSWORD (mode POC degrade : auth inactive)."""
|
||||
+ monkeypatch.delenv("DASHBOARD_PASSWORD", raising=False)
|
||||
+ monkeypatch.delenv("DASHBOARD_AUTH_DISABLED", raising=False)
|
||||
+ mod = _reload_app()
|
||||
+ mod.app.config["TESTING"] = True
|
||||
+ mod.app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///:memory:"
|
||||
+ with mod.app.test_client() as c:
|
||||
+ with mod.app.app_context():
|
||||
+ mod.db.create_all()
|
||||
+ yield c
|
||||
+ mod.db.drop_all()
|
||||
+
|
||||
+
|
||||
+class TestVwbBasicAuth:
|
||||
+ """Auth HTTP Basic LAN sur le backend VWB (5002)."""
|
||||
+
|
||||
+ def test_loopback_no_creds_passes(self, auth_enabled_client):
|
||||
+ """Requete loopback (127.0.0.1) sans creds -> PAS de 401.
|
||||
+
|
||||
+ Garde-fou critique : le dashboard proxifie en loopback. La requete
|
||||
+ ne doit jamais etre challengee (200, ou autre code applicatif != 401).
|
||||
+ """
|
||||
+ resp = auth_enabled_client.get("/api/v3/session/state")
|
||||
+ assert resp.status_code != 401, (
|
||||
+ f"Loopback ne doit jamais etre challenge (got {resp.status_code})"
|
||||
+ )
|
||||
+
|
||||
+ def test_lan_no_creds_returns_401(self, auth_enabled_client):
|
||||
+ """Requete LAN (non loopback) sans creds -> 401 + WWW-Authenticate."""
|
||||
+ resp = auth_enabled_client.get(
|
||||
+ "/api/v3/session/state", environ_base=_LAN_ENV
|
||||
+ )
|
||||
+ assert resp.status_code == 401
|
||||
+ assert "WWW-Authenticate" in resp.headers
|
||||
+ assert "Basic" in resp.headers["WWW-Authenticate"]
|
||||
+
|
||||
+ def test_lan_wrong_password_returns_401(self, auth_enabled_client):
|
||||
+ """Requete LAN avec mauvais mot de passe -> 401."""
|
||||
+ resp = auth_enabled_client.get(
|
||||
+ "/api/v3/session/state",
|
||||
+ environ_base=_LAN_ENV,
|
||||
+ headers={"Authorization": _basic_auth_header("lea", "wrong")},
|
||||
+ )
|
||||
+ assert resp.status_code == 401
|
||||
+
|
||||
+ def test_lan_wrong_user_returns_401(self, auth_enabled_client):
|
||||
+ """Requete LAN avec mauvais utilisateur -> 401."""
|
||||
+ resp = auth_enabled_client.get(
|
||||
+ "/api/v3/session/state",
|
||||
+ environ_base=_LAN_ENV,
|
||||
+ headers={"Authorization": _basic_auth_header("intruder", "secret-test-pwd")},
|
||||
+ )
|
||||
+ assert resp.status_code == 401
|
||||
+
|
||||
+ def test_lan_valid_credentials_pass(self, auth_enabled_client):
|
||||
+ """Requete LAN avec bons creds -> PAS de 401 (auth franchie)."""
|
||||
+ resp = auth_enabled_client.get(
|
||||
+ "/api/v3/session/state",
|
||||
+ environ_base=_LAN_ENV,
|
||||
+ headers={"Authorization": _basic_auth_header("lea", "secret-test-pwd")},
|
||||
+ )
|
||||
+ assert resp.status_code != 401, (
|
||||
+ f"Bons creds doivent franchir l'auth (got {resp.status_code})"
|
||||
+ )
|
||||
+
|
||||
+ def test_lan_malformed_header_returns_401(self, auth_enabled_client):
|
||||
+ """Requete LAN avec header mal forme (Bearer) -> 401."""
|
||||
+ resp = auth_enabled_client.get(
|
||||
+ "/api/v3/session/state",
|
||||
+ environ_base=_LAN_ENV,
|
||||
+ headers={"Authorization": "Bearer tototoken"},
|
||||
+ )
|
||||
+ assert resp.status_code == 401
|
||||
+
|
||||
+ def test_lan_health_is_public(self, auth_enabled_client):
|
||||
+ """/health reste public meme en LAN (healthcheck externe)."""
|
||||
+ resp = auth_enabled_client.get("/health", environ_base=_LAN_ENV)
|
||||
+ assert resp.status_code == 200
|
||||
+
|
||||
+ def test_lan_options_preflight_not_blocked(self, auth_enabled_client):
|
||||
+ """Preflight CORS (OPTIONS) en LAN -> pas de 401 (CORS preserve)."""
|
||||
+ resp = auth_enabled_client.open(
|
||||
+ "/api/v3/session/state", method="OPTIONS", environ_base=_LAN_ENV
|
||||
+ )
|
||||
+ assert resp.status_code != 401
|
||||
+
|
||||
+ def test_auth_disabled_bypass_lan(self, auth_disabled_client):
|
||||
+ """DASHBOARD_AUTH_DISABLED=true -> LAN passe sans creds."""
|
||||
+ resp = auth_disabled_client.get(
|
||||
+ "/api/v3/session/state", environ_base=_LAN_ENV
|
||||
+ )
|
||||
+ assert resp.status_code != 401
|
||||
+
|
||||
+ def test_no_password_degraded_lan_passes(self, no_password_client):
|
||||
+ """DASHBOARD_PASSWORD absent -> mode POC degrade : LAN passe (pas de crash)."""
|
||||
+ resp = no_password_client.get(
|
||||
+ "/api/v3/session/state", environ_base=_LAN_ENV
|
||||
+ )
|
||||
+ assert resp.status_code != 401
|
||||
+
|
||||
+
|
||||
+@pytest.fixture(autouse=True)
|
||||
+def _restore_module(monkeypatch):
|
||||
+ """Restaure le module `app` en mode auth desactivee apres chaque test,
|
||||
+ pour ne pas contaminer les autres tests VWB (qui importent `app`)."""
|
||||
+ yield
|
||||
+ monkeypatch.setenv("DASHBOARD_AUTH_DISABLED", "true")
|
||||
+ monkeypatch.delenv("DASHBOARD_PASSWORD", raising=False)
|
||||
+ monkeypatch.delenv("DASHBOARD_USER", raising=False)
|
||||
+ if "app" in sys.modules:
|
||||
+ importlib.reload(sys.modules["app"])
|
||||
Reference in New Issue
Block a user