From 18ed6cb75176cd5f84b71d7353e71852cb66a51f Mon Sep 17 00:00:00 2001 From: Dom Date: Tue, 2 Jun 2026 16:27:19 +0200 Subject: [PATCH] feat(vwb): add dashboard competence testing and health tools --- pytest.ini | 5 +- tests/conftest.py | 35 ++ tests/security/conftest.py | 33 ++ tests/security/test_persist_auth.py | 212 +++++++++ tests/unit/test_dashboard_routes.py | 10 + tests/unit/test_env_setup.py | 34 +- tests/unit/test_ocr_extractor_tesseract.py | 62 +++ tests/unit/test_quick_record_rebranchement.py | 414 ++++++++++++++++++ .../unit/test_resolve_engine_bbox_num_ctx.py | 116 +++++ tests/unit/test_text_match_fuzzy_prefix.py | 152 +++++++ tests/unit/test_visual_anchor_semantics.py | 153 +++++++ tests/unit/test_vlm_grounding_profile.py | 310 +++++++++++++ ...workflow_pipeline_ui_detection_disabled.py | 197 +++++++++ tools/generate_ollama_inventory_v2.py | 101 +++++ tools/lea_healthcheck.py | 401 +++++++++++++++++ tools/lea_micro_preflight.py | 409 +++++++++++++++++ tools/session_cleaner.py | 4 +- .../backend/api_v3/dag_execute.py | 80 +++- .../Executor/VWBExecutorExtension.tsx | 2 +- .../frontend/src/hooks/useVWBExecution.ts | 2 +- .../src/services/vwbExecutionService.ts | 2 +- web_dashboard/app.py | 2 +- web_dashboard/templates/knowledge_base.html | 60 ++- 23 files changed, 2769 insertions(+), 27 deletions(-) create mode 100644 tests/security/conftest.py create mode 100644 tests/security/test_persist_auth.py create mode 100644 tests/unit/test_ocr_extractor_tesseract.py create mode 100644 tests/unit/test_quick_record_rebranchement.py create mode 100644 tests/unit/test_resolve_engine_bbox_num_ctx.py create mode 100644 tests/unit/test_text_match_fuzzy_prefix.py create mode 100644 tests/unit/test_visual_anchor_semantics.py create mode 100644 tests/unit/test_vlm_grounding_profile.py create mode 100644 tests/unit/test_workflow_pipeline_ui_detection_disabled.py create mode 100644 tools/generate_ollama_inventory_v2.py create mode 100644 tools/lea_healthcheck.py create mode 100644 tools/lea_micro_preflight.py diff --git a/pytest.ini b/pytest.ini index 8e073bd49..1338e20df 100644 --- a/pytest.ini +++ b/pytest.ini @@ -7,7 +7,7 @@ testpaths = tests # Options par défaut -addopts = -q --tb=short --strict-markers +addopts = -q --tb=short --strict-markers -m "not performance" # Markers personnalisés markers = @@ -16,6 +16,7 @@ markers = performance: Performance tests (benchmarks) slow: Slow tests (skip avec -m "not slow") smoke: Smoke tests E2E (barrière anti-régression) + security: Security tests (auth, isolation, abuse cases) fiche1: Tests Fiche #1 (aliases compatibilité) fiche2: Tests Fiche #2 (corrections BBOX) fiche3: Tests Fiche #3 (context hints composite) @@ -34,4 +35,4 @@ markers = # Filtres de warnings filterwarnings = ignore::DeprecationWarning - ignore::PendingDeprecationWarning \ No newline at end of file + ignore::PendingDeprecationWarning diff --git a/tests/conftest.py b/tests/conftest.py index d3ea8c375..c9934b906 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -67,6 +67,41 @@ except ModuleNotFoundError: flask_socketio.emit = _fake_emit sys.modules["flask_socketio"] = flask_socketio +try: + import prometheus_client # noqa: F401 +except ModuleNotFoundError: + prometheus_client = types.ModuleType("prometheus_client") + prometheus_client.CONTENT_TYPE_LATEST = "text/plain; version=0.0.4" + + def _fake_generate_latest(*_args, **_kwargs): + return b"" + + class _FakeMetric: + def __init__(self, *_args, **_kwargs): + pass + + def labels(self, **_kwargs): + return self + + def inc(self, *_args, **_kwargs): + return None + + def observe(self, *_args, **_kwargs): + return None + + def set(self, *_args, **_kwargs): + return None + + def info(self, *_args, **_kwargs): + return None + + prometheus_client.generate_latest = _fake_generate_latest + prometheus_client.Counter = _FakeMetric + prometheus_client.Histogram = _FakeMetric + prometheus_client.Gauge = _FakeMetric + prometheus_client.Info = _FakeMetric + sys.modules["prometheus_client"] = prometheus_client + # ============================================================================= # GPU Preflight — vérification avant les tests GPU diff --git a/tests/security/conftest.py b/tests/security/conftest.py new file mode 100644 index 000000000..5379077fd --- /dev/null +++ b/tests/security/conftest.py @@ -0,0 +1,33 @@ +"""Conftest pour les tests securite — assure que le agent_v0 local de +rpa_vision_v3 est trouve avant le standalone de ~/ai/agent_v0/. + +Le conftest racine ajoute /home/dom/ai/rpa_vision_v3 mais ne previent pas +le shadow par /home/dom/ai. On force l'ordre + on purge un eventuel +agent_v0 standalone deja charge depuis ~/ai/. +""" +import sys +from pathlib import Path + +ROOT = str(Path(__file__).resolve().parents[2]) + +# Forcer rpa_vision_v3 en tete (pattern reutilise depuis tests/integration/conftest.py) +if ROOT in sys.path: + sys.path.remove(ROOT) +sys.path.insert(0, ROOT) + +# Purger tout import precoce de agent_v0 / core qui pointe vers un autre dossier +# (cf. ~/ai/agent_v0/ standalone qui n'est pas le package serveur du repo). +_AGENT_V0_LOCAL = str(Path(ROOT) / "agent_v0") +for _mod_name in list(sys.modules): + if _mod_name == "agent_v0" or _mod_name.startswith("agent_v0."): + _mod = sys.modules[_mod_name] + _mod_file = getattr(_mod, "__file__", "") or "" + if not _mod_file.startswith(_AGENT_V0_LOCAL): + del sys.modules[_mod_name] + +# Pre-import du module local pour eviter qu'un autre conftest n'importe +# le standalone /home/dom/ai/agent_v0/ avant nous. +try: # pragma: no cover - garde de path + from agent_v0 import server_v1 as _sv1 # noqa: F401 +except Exception: + pass diff --git a/tests/security/test_persist_auth.py b/tests/security/test_persist_auth.py new file mode 100644 index 000000000..3a53f990f --- /dev/null +++ b/tests/security/test_persist_auth.py @@ -0,0 +1,212 @@ +"""Tests securite /api/v1/lea/competences/candidate/persist. + +Specs §6 : +- Token Bearer obligatoire +- Couplage machine_id (via guard fleet) +- Rate limit 10/min/machine_id +- Path traversal interdit (slug strict) +- PII detection (regle d'or HDS) +""" + +from __future__ import annotations + +import sys +from pathlib import Path + +import pytest + +_ROOT = str(Path(__file__).resolve().parents[2]) +if _ROOT not in sys.path: + sys.path.insert(0, _ROOT) + + +pytestmark = pytest.mark.security + +_TEST_API_TOKEN = "test_persist_security_token_xyz" + + +@pytest.fixture +def persist_client(monkeypatch, tmp_path): + monkeypatch.setenv("RPA_API_TOKEN", _TEST_API_TOKEN) + monkeypatch.delenv("RPA_AUTH_DISABLED", raising=False) + monkeypatch.setenv("RPA_AGENTS_DB_PATH", str(tmp_path / "agents.db")) + + from fastapi.testclient import TestClient + from agent_v0.server_v1 import api_stream + from agent_v0.server_v1.agent_registry import AgentRegistry + from core.competences import persist as P + + monkeypatch.setattr(api_stream, "API_TOKEN", _TEST_API_TOKEN) + test_registry = AgentRegistry(db_path=str(tmp_path / "agents.db")) + monkeypatch.setattr(api_stream, "agent_registry", test_registry) + + candidate_dir = tmp_path / "competences" / "candidate" + candidate_dir.mkdir(parents=True, exist_ok=True) + monkeypatch.setattr(P, "COMPETENCES_ROOT", tmp_path / "competences") + monkeypatch.setattr(P, "CANDIDATE_DIR", candidate_dir) + monkeypatch.setattr(P, "AUDIT_PATH", tmp_path / "competences" / "persist_audit.jsonl") + monkeypatch.setattr( + P, "INCOMPLETE_PATH", tmp_path / "competences" / "incomplete_learnings.jsonl" + ) + P.persist_rate_limiter.reset() + + client = TestClient(api_stream.app, raise_server_exceptions=False) + return client, tmp_path + + +def _good_payload(name="Securite Test", persist_id="uuid-sec-1"): + return { + "name": name, + "machine_id": "machine_sec_x", + "workflow_ir": { + "steps": [{"kind": "click", "parameters": {"target": "OK"}}], + "preconditions": [], + }, + "learning_metadata": {"persist_id": persist_id}, + } + + +# --------------------------------------------------------------------------- +# Token Bearer +# --------------------------------------------------------------------------- + + +class TestPersistAuthToken: + def test_no_token_returns_401(self, persist_client): + client, _ = persist_client + resp = client.post( + "/api/v1/lea/competences/candidate/persist", + json=_good_payload(), + ) + assert resp.status_code == 401 + + def test_wrong_token_returns_401(self, persist_client): + client, _ = persist_client + resp = client.post( + "/api/v1/lea/competences/candidate/persist", + json=_good_payload(), + headers={"Authorization": "Bearer wrong_token_xyz"}, + ) + assert resp.status_code == 401 + + def test_valid_token_returns_201(self, persist_client): + client, _ = persist_client + resp = client.post( + "/api/v1/lea/competences/candidate/persist", + json=_good_payload(), + headers={"Authorization": f"Bearer {_TEST_API_TOKEN}"}, + ) + assert resp.status_code == 201 + + +# --------------------------------------------------------------------------- +# Rate limit +# --------------------------------------------------------------------------- + + +class TestPersistRateLimit: + def test_rate_limit_11th_call_returns_429(self, persist_client, monkeypatch): + client, _ = persist_client + from core.competences import persist as P + + # Forcer max_per_minute=3 pour rendre le test rapide et deterministe + P.persist_rate_limiter.max_per_minute = 3 + P.persist_rate_limiter.reset() + + headers = {"Authorization": f"Bearer {_TEST_API_TOKEN}"} + # 3 appels OK + for i in range(3): + payload = _good_payload(name=f"Rate {i}", persist_id=f"uuid-rate-{i}") + r = client.post( + "/api/v1/lea/competences/candidate/persist", + json=payload, + headers=headers, + ) + assert r.status_code in (201, 409), f"call #{i}: {r.text}" + # 4eme appel -> 429 + r4 = client.post( + "/api/v1/lea/competences/candidate/persist", + json=_good_payload(name="Rate Trop", persist_id="uuid-rate-overflow"), + headers=headers, + ) + assert r4.status_code == 429 + assert "Retry-After" in {k.title() for k in r4.headers.keys()} + + # Cleanup pour ne pas polluer d'autres tests + P.persist_rate_limiter.max_per_minute = 10 + + +# --------------------------------------------------------------------------- +# Path traversal & slug strict +# --------------------------------------------------------------------------- + + +class TestPersistPathTraversal: + def test_path_traversal_in_name_blocked(self, persist_client): + client, tmp_path = persist_client + resp = client.post( + "/api/v1/lea/competences/candidate/persist", + json={ + **_good_payload(), + "name": "../../etc/passwd", + }, + headers={"Authorization": f"Bearer {_TEST_API_TOKEN}"}, + ) + # Le slug strict supprime les `/`, `.`, etc. -> resultat = 'etcpasswd' + # ou bien rejete si la longueur tombe sous le minimum. + # Dans tous les cas, AUCUN fichier ne doit etre ecrit hors CANDIDATE_DIR. + if resp.status_code == 201: + yaml_path = resp.json()["yaml_path"] + assert yaml_path.startswith("data/competences/candidate/") + # Verifier aucun fichier hors candidate + etc_target = Path("/etc/passwd.yaml") + assert not etc_target.exists() or etc_target.is_file() # existant ok + else: + assert resp.status_code == 400 + + def test_slug_with_null_byte_blocked(self, persist_client): + client, _ = persist_client + resp = client.post( + "/api/v1/lea/competences/candidate/persist", + json={ + **_good_payload(), + "name": "abc\x00xyz", + }, + headers={"Authorization": f"Bearer {_TEST_API_TOKEN}"}, + ) + # null byte est non-ASCII -> retire par slugify -> "abcxyz" valide + # ou rejet si l'encodage casse. Tolerer les deux mais pas de 500. + assert resp.status_code in (201, 400) + + +# --------------------------------------------------------------------------- +# PII detection +# --------------------------------------------------------------------------- + + +class TestPersistPiiDetection: + def test_email_in_workflow_rejected(self, persist_client): + client, _ = persist_client + payload = _good_payload(persist_id="uuid-pii-email") + payload["workflow_ir"]["steps"].append( + {"kind": "type", "parameters": {"value": "patient: john.doe@hopital.fr"}} + ) + resp = client.post( + "/api/v1/lea/competences/candidate/persist", + json=payload, + headers={"Authorization": f"Bearer {_TEST_API_TOKEN}"}, + ) + assert resp.status_code == 400 + assert resp.json()["detail"]["error"] == "pii_detected" + + def test_phone_in_annotations_rejected(self, persist_client): + client, _ = persist_client + payload = _good_payload(persist_id="uuid-pii-phone") + payload["annotations_semantiques"] = {"intent_fr": "appeler 01 23 45 67 89"} + resp = client.post( + "/api/v1/lea/competences/candidate/persist", + json=payload, + headers={"Authorization": f"Bearer {_TEST_API_TOKEN}"}, + ) + assert resp.status_code == 400 + assert resp.json()["detail"]["error"] == "pii_detected" diff --git a/tests/unit/test_dashboard_routes.py b/tests/unit/test_dashboard_routes.py index c24998f55..3f8f0528c 100644 --- a/tests/unit/test_dashboard_routes.py +++ b/tests/unit/test_dashboard_routes.py @@ -63,6 +63,16 @@ class TestDashboardRoutes: assert 'competences' in data assert 'items' in data['competences'] + def test_knowledge_base_page_includes_test_safety_guards(self, client): + """Le bouton Tester embarque les garde-fous Win+R et evidence vide.""" + resp = client.get('/knowledge-base') + assert resp.status_code == 200 + html = resp.get_data(as_text=True) + assert 'confirmRunDialogReplay' in html + assert 'peut ouvrir Win+R / Exécuter' in html + assert 'hasReplayEvidence' in html + assert 'Verdict valide refusé' in html + def test_dashboard_replay_competence_proxy(self, client, monkeypatch): """Le dashboard lance un replay competence supervise via streaming.""" calls = [] diff --git a/tests/unit/test_env_setup.py b/tests/unit/test_env_setup.py index d3660cca2..2c84c643f 100644 --- a/tests/unit/test_env_setup.py +++ b/tests/unit/test_env_setup.py @@ -6,6 +6,7 @@ Vérifie que les fonctions d'extraction d'apps et de génération d'actions de setup 100% visuelles fonctionnent correctement. """ import pytest +import os import sys from pathlib import Path @@ -13,6 +14,10 @@ from pathlib import Path ROOT = Path(__file__).parent.parent.parent sys.path.insert(0, str(ROOT)) +# api_stream est fail-closed si RPA_API_TOKEN est absent. Ces tests ciblent les +# helpers de setup, pas le bootstrap d'authentification. +os.environ.setdefault("RPA_API_TOKEN", "test_env_setup_token_0123456789abcdef") + from agent_v0.server_v1.api_stream import ( _extract_required_apps_from_events, _extract_required_apps_from_workflow, @@ -630,7 +635,7 @@ class TestGenerateSetupActions: } actions = _generate_setup_actions(app_info) - assert len(actions) == 7 + assert len(actions) == 10 assert actions[0]["type"] == "key_combo" assert actions[0]["keys"] == ["win", "r"] @@ -652,7 +657,17 @@ class TestGenerateSetupActions: assert actions[5]["duration_ms"] == 2000 assert actions[6]["type"] == "verify_screen" - assert actions[6]["expected_window_title_contains"] == ["Bloc-notes", "notepad"] + assert actions[6]["_setup_step"] == "verify_app_ready_before_fresh_document" + + assert actions[7]["type"] == "key_combo" + assert actions[7]["keys"] == ["ctrl", "n"] + assert actions[7]["_setup_step"] == "ensure_fresh_document" + + assert actions[8]["type"] == "wait" + assert actions[8]["duration_ms"] == 400 + + assert actions[9]["type"] == "verify_screen" + assert actions[9]["expected_window_title_contains"] == ["Bloc-notes", "notepad"] # Toutes les actions sont marquées comme phase setup for action in actions: @@ -1126,9 +1141,10 @@ class TestSetupPipeline: app_info = _extract_required_apps_from_events(events) assert app_info["primary_app"] == "Notepad.exe" + assert app_info["has_neutral_window_title"] is True actions = _generate_setup_actions(app_info) - assert len(actions) == 7 + assert len(actions) == 10 types = [a["type"] for a in actions] steps = [a.get("_setup_step") for a in actions] @@ -1139,11 +1155,14 @@ class TestSetupPipeline: "wait_launch_command", "submit_run_dialog", "wait_app_launch", + "verify_app_ready_before_fresh_document", + "ensure_fresh_document", + "wait_fresh_document", "verify_app_ready", ] assert steps == expected_step_order, steps - assert types.count("key_combo") == 2 + assert types.count("key_combo") == 3 idx_type = steps.index("type_launch_command") assert actions[idx_type]["text"] == "notepad" @@ -1165,14 +1184,15 @@ class TestSetupPipeline: app_info = _extract_required_apps_from_workflow(workflow) assert app_info["primary_app"] == "Notepad.exe" + assert app_info["has_neutral_window_title"] is True actions = _generate_setup_actions(app_info) - assert len(actions) == 7 + assert len(actions) == 10 # Le texte tapé doit être la commande shell pour le setup Win+R. type_action = [a for a in actions if a["type"] == "type"][0] assert type_action["text"] == "notepad" - # Le setup Notepad s'appuie maintenant sur deux key_combo. + # Win+R, Enter, puis Ctrl+N pour garantir un document vierge. key_combos = [a for a in actions if a["type"] == "key_combo"] - assert len(key_combos) == 2 + assert len(key_combos) == 3 diff --git a/tests/unit/test_ocr_extractor_tesseract.py b/tests/unit/test_ocr_extractor_tesseract.py new file mode 100644 index 000000000..f9b42e3a4 --- /dev/null +++ b/tests/unit/test_ocr_extractor_tesseract.py @@ -0,0 +1,62 @@ +from pathlib import Path +import sys +from types import SimpleNamespace + +from PIL import Image + +import core.llm.ocr_extractor as ocr_extractor + + +def _blank_png(path: Path) -> None: + Image.new("RGB", (120, 40), "white").save(path) + + +def test_extract_digits_tesseract_filters_numeric_pattern(tmp_path, monkeypatch): + image_path = tmp_path / "screen.png" + _blank_png(image_path) + + def fake_image_to_string(_img, lang, config): + assert lang == "eng" + assert "tessedit_char_whitelist=0123456789" in config + return "IPP 25003284 MOREL\n25003362 abc 1234\n25012257" + + monkeypatch.setitem( + sys.modules, + "pytesseract", + SimpleNamespace(image_to_string=fake_image_to_string), + ) + + values = ocr_extractor.extract_digits_tesseract_from_image( + str(image_path), + pattern=r"^25\d{6}$", + ) + + assert values == ["25003284", "25003362", "25012257"] + + +def test_extract_table_tesseract_engine_delegates_to_digits(tmp_path, monkeypatch): + image_path = tmp_path / "screen.png" + _blank_png(image_path) + + calls = {} + + def fake_extract_digits(image_path_arg, region=None, pattern=None, limit=None): + calls["args"] = (image_path_arg, region, pattern, limit) + return ["25003284", "25003362"] + + monkeypatch.setattr( + ocr_extractor, + "extract_digits_tesseract_from_image", + fake_extract_digits, + ) + + values = ocr_extractor.extract_table_from_image( + str(image_path), + region=(10, 20, 30, 40), + pattern=r"^25\d{6}$", + limit=2, + engine="tesseract", + ) + + assert values == ["25003284", "25003362"] + assert calls["args"] == (str(image_path), (10, 20, 30, 40), r"^25\d{6}$", 2) diff --git a/tests/unit/test_quick_record_rebranchement.py b/tests/unit/test_quick_record_rebranchement.py new file mode 100644 index 000000000..ca8c0cc28 --- /dev/null +++ b/tests/unit/test_quick_record_rebranchement.py @@ -0,0 +1,414 @@ +""" +Tests unitaires — rebranchement P1-LEA-SHADOW du bouton "Apprenez-moi". + +Vérifie que : +1. Le client HTTP `start_learning_session` POSTe bien le payload attendu + sur `/api/learn/start` avec le bon Authorization Bearer. +2. Le retry+backoff fonctionne sur erreur transitoire puis succès. +3. Une 3e tentative échouée lève LeaOrchestratorError. +4. Les méthodes `_start_lea_orchestrator_session` de ChatWindow et + SmartTrayV1 sont fail-safe : si le client lève, on ne propage pas. + +Les imports PyQt5/tkinter/pystray sont mockés pour permettre l'exécution +des tests sur Linux (CI / dev) sans dépendances Windows. +""" + +from __future__ import annotations + +import sys +import types +import unittest +from pathlib import Path +from unittest.mock import MagicMock, patch + +# --------------------------------------------------------------------------- +# sys.path fix : pytest, lancé depuis la racine projet, peut insérer +# /home/dom/ai en tête (présence d'un autre `agent_v0` legacy dans ce dossier +# parent). Le shadow casse `import agent_v0.agent_v1`. On purge l'entrée +# parasite et on insère la racine du projet en tête. +# --------------------------------------------------------------------------- +_PROJECT_ROOT = Path(__file__).resolve().parents[2] +_PARENT = _PROJECT_ROOT.parent +sys.path[:] = [p for p in sys.path if Path(p).resolve() != _PARENT.resolve()] +if str(_PROJECT_ROOT) not in sys.path: + sys.path.insert(0, str(_PROJECT_ROOT)) + +# Purger un import `agent_v0` venu du mauvais chemin (legacy) +_existing = sys.modules.get("agent_v0") +if _existing is not None and not getattr(_existing, "__file__", "").startswith( + str(_PROJECT_ROOT) +): + for _name in list(sys.modules): + if _name == "agent_v0" or _name.startswith("agent_v0."): + sys.modules.pop(_name, None) + +# --------------------------------------------------------------------------- +# Stubs des dépendances lourdes UI (pystray, PIL.Image avec ImageDraw, etc.) +# pour permettre l'import de smart_tray / chat_window depuis Linux. +# --------------------------------------------------------------------------- + +def _install_ui_stubs() -> None: + """Installe des stubs minimaux pour pystray, PIL, FeedbackBus, etc.""" + # pystray + if "pystray" not in sys.modules: + pystray_mod = types.ModuleType("pystray") + pystray_mod.Menu = MagicMock() + pystray_mod.Menu.SEPARATOR = object() + pystray_mod.MenuItem = MagicMock() + pystray_mod.Icon = MagicMock() + sys.modules["pystray"] = pystray_mod + + # PIL.Image + PIL.ImageDraw (les imports en haut de smart_tray) + if "PIL" not in sys.modules: + pil_mod = types.ModuleType("PIL") + sys.modules["PIL"] = pil_mod + if "PIL.Image" not in sys.modules: + image_mod = types.ModuleType("PIL.Image") + image_mod.Image = MagicMock + image_mod.new = MagicMock(return_value=MagicMock()) + sys.modules["PIL.Image"] = image_mod + sys.modules["PIL"].Image = image_mod # type: ignore[attr-defined] + if "PIL.ImageDraw" not in sys.modules: + draw_mod = types.ModuleType("PIL.ImageDraw") + draw_mod.Draw = MagicMock(return_value=MagicMock()) + sys.modules["PIL.ImageDraw"] = draw_mod + sys.modules["PIL"].ImageDraw = draw_mod # type: ignore[attr-defined] + + +_install_ui_stubs() + + +# --------------------------------------------------------------------------- +# Tests du client HTTP — partie facilement testable, sans dépendance UI. +# --------------------------------------------------------------------------- + +class StartLearningSessionTests(unittest.TestCase): + """Tests du client HTTP `start_learning_session`.""" + + def _make_response(self, status=200, json_data=None): + resp = MagicMock() + resp.status_code = status + resp.json.return_value = json_data or { + "session_id": "sess-abc-123", + "state": "LISTENING", + "message": "Ok, je regarde. Vas-y, je note les actions...", + } + if status >= 400: + import httpx # noqa + resp.raise_for_status.side_effect = Exception(f"HTTP {status}") + else: + resp.raise_for_status.return_value = None + return resp + + def test_post_payload_and_bearer(self): + """Le POST contient payload + Authorization Bearer attendu.""" + from agent_v0.agent_v1.network.lea_orchestrator_client import ( + start_learning_session, + ) + + captured = {} + + class _FakeClient: + def __init__(self, timeout): + captured["timeout"] = timeout + + def __enter__(self): + return self + + def __exit__(self, *a): + return False + + def post(self_inner, url, json, headers): + captured["url"] = url + captured["json"] = json + captured["headers"] = headers + resp = MagicMock() + resp.status_code = 200 + resp.raise_for_status.return_value = None + resp.json.return_value = { + "session_id": "sess-1", + "state": "LISTENING", + "message": "Ok, je regarde.", + } + return resp + + with patch("httpx.Client", _FakeClient): + resp = start_learning_session( + "http://localhost:5004", + machine_id="poste-tim-01", + session_name="Facturation urgences", + api_token="tok-secret", + trigger_source="windows_button", + ) + + self.assertEqual(resp.session_id, "sess-1") + self.assertEqual(resp.state, "LISTENING") + self.assertEqual(captured["url"], "http://localhost:5004/api/learn/start") + self.assertEqual(captured["json"], { + "machine_id": "poste-tim-01", + "session_name": "Facturation urgences", + "trigger_source": "windows_button", + }) + self.assertEqual(captured["headers"]["Authorization"], "Bearer tok-secret") + self.assertEqual(captured["headers"]["Content-Type"], "application/json") + self.assertEqual(captured["timeout"], 10.0) + + def test_no_token_omits_authorization_header(self): + """Si api_token est vide, pas de header Authorization.""" + from agent_v0.agent_v1.network.lea_orchestrator_client import ( + start_learning_session, + ) + + captured_headers = {} + + class _FakeClient: + def __init__(self, timeout): + pass + + def __enter__(self): + return self + + def __exit__(self, *a): + return False + + def post(self_inner, url, json, headers): + captured_headers.update(headers) + resp = MagicMock() + resp.raise_for_status.return_value = None + resp.json.return_value = { + "session_id": "x", + "state": "LISTENING", + "message": "", + } + return resp + + with patch("httpx.Client", _FakeClient): + start_learning_session( + "http://localhost:5004", + machine_id="m", + session_name="n", + api_token="", + ) + + self.assertNotIn("Authorization", captured_headers) + + def test_retry_then_success(self): + """1 échec puis 1 succès → retourne la réponse sans lever.""" + from agent_v0.agent_v1.network.lea_orchestrator_client import ( + start_learning_session, + ) + + calls = {"n": 0} + + class _FakeClient: + def __init__(self, timeout): + pass + + def __enter__(self): + return self + + def __exit__(self, *a): + return False + + def post(self_inner, url, json, headers): + calls["n"] += 1 + if calls["n"] == 1: + raise RuntimeError("connexion refusée") + resp = MagicMock() + resp.raise_for_status.return_value = None + resp.json.return_value = { + "session_id": "ok", + "state": "LISTENING", + "message": "Ok", + } + return resp + + with patch("httpx.Client", _FakeClient), \ + patch("time.sleep") as sleep_mock: + resp = start_learning_session( + "http://localhost:5004", + machine_id="m", + session_name="n", + api_token="t", + backoff_s=(0.01, 0.01), + ) + + self.assertEqual(resp.session_id, "ok") + self.assertEqual(calls["n"], 2) + sleep_mock.assert_called() + + def test_three_failures_raise(self): + """3 échecs consécutifs → LeaOrchestratorError.""" + from agent_v0.agent_v1.network.lea_orchestrator_client import ( + LeaOrchestratorError, + start_learning_session, + ) + + class _FakeClient: + def __init__(self, timeout): + pass + + def __enter__(self): + return self + + def __exit__(self, *a): + return False + + def post(self_inner, url, json, headers): + raise RuntimeError("network down") + + with patch("httpx.Client", _FakeClient), patch("time.sleep"): + with self.assertRaises(LeaOrchestratorError): + start_learning_session( + "http://localhost:5004", + machine_id="m", + session_name="n", + backoff_s=(0.01, 0.01), + ) + + def test_missing_session_id_raises(self): + """Réponse 200 sans session_id → LeaOrchestratorError.""" + from agent_v0.agent_v1.network.lea_orchestrator_client import ( + LeaOrchestratorError, + start_learning_session, + ) + + class _FakeClient: + def __init__(self, timeout): + pass + + def __enter__(self): + return self + + def __exit__(self, *a): + return False + + def post(self_inner, url, json, headers): + resp = MagicMock() + resp.raise_for_status.return_value = None + resp.json.return_value = {"state": "LISTENING"} + return resp + + with patch("httpx.Client", _FakeClient), patch("time.sleep"): + with self.assertRaises(LeaOrchestratorError): + start_learning_session( + "http://localhost:5004", + machine_id="m", + session_name="n", + backoff_s=(0.01, 0.01), + ) + + +# --------------------------------------------------------------------------- +# Tests du wiring `_start_lea_orchestrator_session` sur SmartTrayV1 + ChatWindow. +# On invoque la méthode sur une instance non-construite pour isoler le wiring +# du reste (UI tkinter / pystray non démarrés). +# --------------------------------------------------------------------------- + +class SmartTrayRebranchementTests(unittest.TestCase): + """Vérifie le wiring côté smart_tray._start_lea_orchestrator_session.""" + + def test_smart_tray_calls_start_learning_session(self): + from agent_v0.agent_v1.ui import smart_tray as smart_tray_mod + from agent_v0.agent_v1.network.lea_orchestrator_client import ( + LearnStartResponse, + ) + + instance = smart_tray_mod.SmartTrayV1.__new__(smart_tray_mod.SmartTrayV1) + instance._notifier = MagicMock() + + fake_resp = LearnStartResponse( + session_id="s-1", state="LISTENING", message="Ok" + ) + with patch( + "agent_v0.agent_v1.network.lea_orchestrator_client.start_learning_session", + return_value=fake_resp, + ) as start_mock: + instance._start_lea_orchestrator_session("ma tache") + + start_mock.assert_called_once() + _, kwargs = start_mock.call_args + self.assertEqual(kwargs["session_name"], "ma tache") + self.assertEqual(kwargs["trigger_source"], "tray_button") + # Pas d'appel notifier sur succès (silencieux) + instance._notifier.notify.assert_not_called() + + def test_smart_tray_failsafe_on_orchestrator_error(self): + from agent_v0.agent_v1.ui import smart_tray as smart_tray_mod + from agent_v0.agent_v1.network.lea_orchestrator_client import ( + LeaOrchestratorError, + ) + + instance = smart_tray_mod.SmartTrayV1.__new__(smart_tray_mod.SmartTrayV1) + instance._notifier = MagicMock() + + with patch( + "agent_v0.agent_v1.network.lea_orchestrator_client.start_learning_session", + side_effect=LeaOrchestratorError("down"), + ): + # Ne doit PAS lever + instance._start_lea_orchestrator_session("tache X") + + instance._notifier.notify.assert_called_once() + title, msg = instance._notifier.notify.call_args.args + self.assertEqual(title, "Léa") + self.assertIn("local", msg.lower()) + + +class ChatWindowRebranchementTests(unittest.TestCase): + """Vérifie le wiring côté chat_window._start_lea_orchestrator_session.""" + + def test_chat_window_calls_start_learning_session(self): + from agent_v0.agent_v1.ui import chat_window as chat_window_mod + from agent_v0.agent_v1.network.lea_orchestrator_client import ( + LearnStartResponse, + ) + + instance = chat_window_mod.ChatWindow.__new__(chat_window_mod.ChatWindow) + instance._add_lea_message = MagicMock() + + fake_resp = LearnStartResponse( + session_id="s-42", + state="LISTENING", + message="Ok, je regarde. Vas-y, je note...", + ) + with patch( + "agent_v0.agent_v1.network.lea_orchestrator_client.start_learning_session", + return_value=fake_resp, + ) as start_mock: + instance._start_lea_orchestrator_session("Facturation urgences") + + start_mock.assert_called_once() + _, kwargs = start_mock.call_args + self.assertEqual(kwargs["session_name"], "Facturation urgences") + self.assertEqual(kwargs["trigger_source"], "windows_button") + # Le message d'accueil de Léa doit être affiché + instance._add_lea_message.assert_called_with( + "Ok, je regarde. Vas-y, je note..." + ) + + def test_chat_window_failsafe_on_orchestrator_error(self): + from agent_v0.agent_v1.ui import chat_window as chat_window_mod + from agent_v0.agent_v1.network.lea_orchestrator_client import ( + LeaOrchestratorError, + ) + + instance = chat_window_mod.ChatWindow.__new__(chat_window_mod.ChatWindow) + instance._add_lea_message = MagicMock() + + with patch( + "agent_v0.agent_v1.network.lea_orchestrator_client.start_learning_session", + side_effect=LeaOrchestratorError("timeout"), + ): + # Ne doit PAS lever + instance._start_lea_orchestrator_session("tache Y") + + # Un message dégradé doit être affiché à l'utilisateur + instance._add_lea_message.assert_called_once() + msg = instance._add_lea_message.call_args.args[0] + self.assertIn("Impossible de joindre", msg) + self.assertIn("localement", msg) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/test_resolve_engine_bbox_num_ctx.py b/tests/unit/test_resolve_engine_bbox_num_ctx.py new file mode 100644 index 000000000..49a8cbb8a --- /dev/null +++ b/tests/unit/test_resolve_engine_bbox_num_ctx.py @@ -0,0 +1,116 @@ +"""Tests D5-v3a mini-fix : num_ctx=4096 explicite sur les 3 sites grounding +bbox legacy de resolve_engine.py. + +Avant fix : aucun des 3 sites ne précisait num_ctx → Ollama héritait du +Modelfile qwen2.5vl:7b-rpa (PARAMETER num_ctx 8192). Confirmé via +`ollama show qwen2.5vl:7b-rpa --modelfile` (Codex 2026-05-25 18:45). + +Après fix : payload contient `options.num_ctx = 4096` sur les 3 sites : +- resolve_engine.py:985 (Essai 2 Ollama grounding bbox) +- resolve_engine.py:1015 (Essai 2 fallback multi-image) +- resolve_engine.py:3016 (_locate_popup_button) + +Référence : inbox_claude/2026-05-25_1845_codex-to-claude_GO-D5v3a-mini-fix-numctx4096.md +""" +from __future__ import annotations + +import re +import sys +from pathlib import Path + +import pytest + + +ROOT = Path(__file__).resolve().parents[2] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + + +@pytest.mark.unit +def test_all_three_bbox_sites_have_num_ctx_4096(): + """Vérifie par lecture statique du fichier source que les 3 sites + grounding bbox legacy passent num_ctx=4096 dans options. + + Test résistant aux changements de numéro de ligne (matche la signature + payload plutôt que la ligne exacte).""" + src = (ROOT / "agent_v0" / "server_v1" / "resolve_engine.py").read_text() + + # Compter les occurrences de num_ctx=4096 dans des dicts options bbox + pattern = re.compile(r'"options":\s*\{[^}]*"num_ctx":\s*4096[^}]*\}') + matches = pattern.findall(src) + assert len(matches) >= 3, ( + f"Attendu : 3 sites bbox avec num_ctx=4096, trouvé : {len(matches)}. " + f"D5-v3a mini-fix non appliqué sur tous les sites." + ) + + +@pytest.mark.unit +def test_locate_popup_button_payload_num_ctx(monkeypatch): + """Test runtime : _locate_popup_button construit un payload avec + num_ctx=4096 et model=qwen2.5vl:7b.""" + captured = {} + + def fake_post(url, json=None, timeout=None): + captured["url"] = url + captured["payload"] = json + # Simuler réponse vide → la fonction retourne None mais on a capturé le payload + from unittest.mock import MagicMock + resp = MagicMock() + resp.ok = False + resp.json.return_value = {"message": {"content": ""}} + return resp + + # Le module fait `import requests as _requests` dans la fonction → patch + # via le module global requests (alias _requests). + import requests + monkeypatch.setattr(requests, "post", fake_post) + from agent_v0.server_v1 import resolve_engine as re_module + + result = re_module._locate_popup_button( + screenshot_b64="fake_b64_data", + button_text="OK", + screen_width=1920, + screen_height=1080, + ) + + # Le payload doit avoir num_ctx=4096 + assert captured["payload"]["options"]["num_ctx"] == 4096, ( + f"_locate_popup_button payload sans num_ctx=4096 : " + f"{captured['payload']['options']}" + ) + # Modèle non changé + assert captured["payload"]["model"] == "qwen2.5vl:7b" + + +@pytest.mark.unit +def test_num_ctx_did_not_break_other_options(): + """Vérifie qu'on n'a PAS perdu temperature ni num_predict en ajoutant + num_ctx. Lecture statique fichier source.""" + src = (ROOT / "agent_v0" / "server_v1" / "resolve_engine.py").read_text() + + # Toutes les options bbox doivent toujours avoir temperature ET num_predict + pattern = re.compile( + r'"options":\s*\{[^}]*"temperature":\s*0\.1[^}]*"num_predict":\s*\d+[^}]*"num_ctx":\s*4096[^}]*\}' + ) + matches = pattern.findall(src) + assert len(matches) >= 3, ( + f"Attendu : 3 sites bbox avec temperature + num_predict + num_ctx, " + f"trouvé : {len(matches)}. Une option a peut-être été perdue lors du fix." + ) + + +@pytest.mark.unit +def test_no_helper_migration_done(): + """Vérifie qu'on n'a PAS introduit d'appel à generate_bbox_grounding + ou autre helper dans resolve_engine.py (constraint D5-v3a mini-fix : + pas de migration helper, seulement num_ctx).""" + src = (ROOT / "agent_v0" / "server_v1" / "resolve_engine.py").read_text() + assert "generate_bbox_grounding" not in src, ( + "D5-v3a mini-fix : pas de helper bbox attendu. " + "generate_bbox_grounding sera D5-v3b." + ) + # generate_grounding (D5-v2) n'est pas non plus consommé ici + assert "generate_grounding(" not in src, ( + "D5-v3a mini-fix : pas de migration vers generate_grounding(). " + "D5-v2 reste API préparatoire." + ) diff --git a/tests/unit/test_text_match_fuzzy_prefix.py b/tests/unit/test_text_match_fuzzy_prefix.py new file mode 100644 index 000000000..0722214b0 --- /dev/null +++ b/tests/unit/test_text_match_fuzzy_prefix.py @@ -0,0 +1,152 @@ +"""Tests C-P1 : tolérance préfixe dans _text_match_fuzzy. + +Cas réel : OCR partiel `observed='Enregi'` sur cible `expected='Enregistrer'` +provoquait un rejet pre-check `expected='Enregistrer' observed='Enregi'`. +Patch : accepter si observed est préfixe d'expected avec len ≥ 4 et ≥ 50% +de la longueur expected. + +Référence : inbox_claude/2026-05-25_1938_codex-to-claude_TACHES-projet-ocr-d5v3c-lea.md +Fix : agent_v0/server_v1/resolve_engine.py:_text_match_fuzzy +""" +from __future__ import annotations + +import sys +from pathlib import Path + +import pytest + + +ROOT = Path(__file__).resolve().parents[2] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + + +# ──────────────────────────────────────────────────────────────────────────── +# Cas qui MOTIVENT le patch (rejet incorrect avant) +# ──────────────────────────────────────────────────────────────────────────── + + +@pytest.mark.unit +def test_enregi_matches_enregistrer(): + """Cas réel rapporté Codex : OCR partiel 'Enregi' sur 'Enregistrer' doit matcher.""" + from agent_v0.server_v1.resolve_engine import _text_match_fuzzy + assert _text_match_fuzzy("Enregistrer", "Enregi") is True + + +@pytest.mark.unit +def test_coller_matches_collier(): + """Préfixe 4 chars sur 6 (66%) doit matcher.""" + from agent_v0.server_v1.resolve_engine import _text_match_fuzzy + assert _text_match_fuzzy("Coller", "Coll") is True + + +@pytest.mark.unit +def test_cancel_matches_canc(): + """Préfixe 4 chars sur 6 (66%) doit matcher.""" + from agent_v0.server_v1.resolve_engine import _text_match_fuzzy + assert _text_match_fuzzy("Cancel", "Canc") is True + + +# ──────────────────────────────────────────────────────────────────────────── +# Garde-fous : préfixes trop courts/faibles DOIVENT être rejetés +# ──────────────────────────────────────────────────────────────────────────── + + +@pytest.mark.unit +def test_save_does_not_match_sa_too_short(): + """Préfixe < 4 chars rejeté (faux positif risque élevé).""" + from agent_v0.server_v1.resolve_engine import _text_match_fuzzy + assert _text_match_fuzzy("Save", "Sa") is False + + +@pytest.mark.unit +def test_bouton_does_not_match_bo_too_short(): + """Préfixe 2 chars rejeté (faux positif probable).""" + from agent_v0.server_v1.resolve_engine import _text_match_fuzzy + assert _text_match_fuzzy("Bouton", "Bo") is False + + +@pytest.mark.unit +def test_enregistrer_sous_does_not_match_enregi_below_50pct(): + """Préfixe 6 chars sur 16 (37% < 50%) rejeté (trop ambigu). + + Ce cas évite que 'Enregi' (partiel de 'Enregistrer') soit accepté pour + 'Enregistrer sous' alors qu'il devrait viser 'Enregistrer' tout court. + """ + from agent_v0.server_v1.resolve_engine import _text_match_fuzzy + # _normalize_for_match retire les espaces ? À vérifier. Si oui, observed + # doit être préfixe de "enregistrersous" (15 chars), 6/15 = 40% < 50%. + assert _text_match_fuzzy("Enregistrer sous", "Enregi") is False + + +@pytest.mark.unit +def test_save_matches_save_substring_unchanged(): + """Cas existant substring : 'Save' dans 'Saved' doit toujours matcher.""" + from agent_v0.server_v1.resolve_engine import _text_match_fuzzy + assert _text_match_fuzzy("Save", "Saved") is True + + +# ──────────────────────────────────────────────────────────────────────────── +# Comportements existants préservés (regression guards) +# ──────────────────────────────────────────────────────────────────────────── + + +@pytest.mark.unit +def test_token_matching_still_works(): + """Cas multi-tokens existant : 'coller saisir dossier patient' / 'u saisir le dossier patient' → 3/4 ≥ 60%.""" + from agent_v0.server_v1.resolve_engine import _text_match_fuzzy + assert _text_match_fuzzy( + "Coller ou saisir le dossier patient", + "u saisir le dossier patient", + ) is True + + +@pytest.mark.unit +def test_unrelated_text_still_rejected(): + """Texte totalement différent toujours rejeté.""" + from agent_v0.server_v1.resolve_engine import _text_match_fuzzy + assert _text_match_fuzzy("Enregistrer", "Annuler") is False + + +@pytest.mark.unit +def test_empty_expected_returns_true(): + """Expected vide = pas de contrainte = match.""" + from agent_v0.server_v1.resolve_engine import _text_match_fuzzy + assert _text_match_fuzzy("", "anything") is True + + +@pytest.mark.unit +def test_empty_observed_does_not_match_non_empty_expected(): + """Observed vide rejeté (sauf si expected vide aussi).""" + from agent_v0.server_v1.resolve_engine import _text_match_fuzzy + assert _text_match_fuzzy("Enregistrer", "") is False + + +# ──────────────────────────────────────────────────────────────────────────── +# Edge cases préfixe +# ──────────────────────────────────────────────────────────────────────────── + + +@pytest.mark.unit +def test_prefix_exactly_4_chars_at_50_pct(): + """4 chars / 8 chars = 50% exact + len ≥ 4 → ACCEPT.""" + from agent_v0.server_v1.resolve_engine import _text_match_fuzzy + # "Continue" = 8 chars, "Cont" = 4 chars = 50% + assert _text_match_fuzzy("Continue", "Cont") is True + + +@pytest.mark.unit +def test_prefix_3_chars_rejected_even_if_high_ratio(): + """3 chars rejeté même si ≥ 50% (garde-fou minimum 4 chars).""" + from agent_v0.server_v1.resolve_engine import _text_match_fuzzy + # "Sa" = 2 chars, "Save" = 4 chars (50%) → rejeté car < 4 chars + assert _text_match_fuzzy("Save", "Sav") is False + + +@pytest.mark.unit +def test_prefix_not_strict_prefix_rejected(): + """Si observed n'est PAS un préfixe strict, prefix rule ne s'applique pas.""" + from agent_v0.server_v1.resolve_engine import _text_match_fuzzy + # "Enregistrer" / "Sauver" : pas substring, pas préfixe, 1 token "sauver" + # absent de "enregistrer" → 0/1 < 0.60 → False + assert _text_match_fuzzy("Enregistrer", "Sauver") is False diff --git a/tests/unit/test_visual_anchor_semantics.py b/tests/unit/test_visual_anchor_semantics.py new file mode 100644 index 000000000..df2d0a028 --- /dev/null +++ b/tests/unit/test_visual_anchor_semantics.py @@ -0,0 +1,153 @@ +import os +from types import SimpleNamespace + +from agent_v0.server_v1.replay_engine import ( + _create_replay_state, + _edge_to_normalized_actions, +) + +os.environ.setdefault("RPA_AUTH_DISABLED", "true") + +from agent_v0.server_v1.api_stream import _normalize_action_target_semantics +from visual_workflow_builder.backend.services.learned_workflow_bridge import ( + _vwb_params_to_target_spec, +) + + +class _FakeAction: + def __init__(self, type_, target=None, parameters=None): + self.type = type_ + self.target = target + self.parameters = parameters or {} + + +class _FakeEdge: + def __init__(self, action): + self.edge_id = "edge_anchor" + self.from_node = "node_src" + self.to_node = "node_dst" + self.action = action + + +def test_vwb_target_spec_preserves_visual_anchor_semantics(): + target = _vwb_params_to_target_spec( + "double_click_anchor", + { + "visual_anchor": { + "anchor_id": "anchor_a518f6d5e727_1778849657", + "target_text": "- W - ICE rapport urgenc.", + "description": "Word document icon with text.", + "ocr_description": "Word document icon with text.", + }, + }, + ) + + assert target["by_text"] == "- W - ICE rapport urgenc." + hints = target["context_hints"] + assert hints["anchor_id"] == "anchor_a518f6d5e727_1778849657" + assert hints["target_text"] == "- W - ICE rapport urgenc." + assert hints["description"] == "Word document icon with text." + assert hints["vlm_description"] == "Word document icon with text." + + +def test_replay_normalization_lifts_anchor_semantics_from_context_hints(): + target = SimpleNamespace( + by_role="icon", + by_text=None, + by_position=(0.12, 0.18), + context_hints={ + "anchor_id": "anchor_a518f6d5e727_1778849657", + "target_text": "- W - ICE rapport urgenc.", + "description": "Word document icon with text.", + "ocr_description": "Word document icon with text.", + "anchor_image_base64": "abc123", + }, + ) + edge = _FakeEdge( + _FakeAction("mouse_click", target=target, parameters={"button": "double"}) + ) + + actions = _edge_to_normalized_actions(edge, params={}) + + assert len(actions) == 1 + action = actions[0] + target_spec = action["target_spec"] + assert action["visual_mode"] is True + assert action["target_description"] == "- W - ICE rapport urgenc." + assert target_spec["by_text"] == "- W - ICE rapport urgenc." + assert target_spec["anchor_id"] == "anchor_a518f6d5e727_1778849657" + assert target_spec["vlm_description"] == "Word document icon with text." + assert target_spec["anchor_image_base64"] == "abc123" + + +def test_replay_state_strips_anchor_image_but_keeps_semantic_label(): + action = { + "action_id": "act_anchor", + "type": "click", + "target_spec": { + "anchor_id": "anchor_a518f6d5e727_1778849657", + "anchor_image_base64": "abc123", + "by_text": "- W - ICE rapport urgenc.", + "target_text": "- W - ICE rapport urgenc.", + "description": "Word document icon with text.", + }, + } + + state = _create_replay_state("replay", "workflow", "session", 1, actions=[action]) + + target_spec = state["actions"][0]["target_spec"] + assert "anchor_image_base64" not in target_spec + assert target_spec["anchor_id"] == "anchor_a518f6d5e727_1778849657" + assert target_spec["by_text"] == "- W - ICE rapport urgenc." + assert target_spec["description"] == "Word document icon with text." + + +def test_compound_click_step_keeps_visual_anchor_semantics(): + edge = _FakeEdge( + _FakeAction( + "compound", + parameters={ + "steps": [ + { + "type": "mouse_click", + "x_pct": 0.12, + "y_pct": 0.18, + "target_text": "- W - ICE rapport urgenc.", + "description": "Word document icon with text.", + "anchor_id": "anchor_a518f6d5e727_1778849657", + } + ] + }, + ) + ) + + actions = _edge_to_normalized_actions(edge, params={}) + + assert len(actions) == 1 + target_spec = actions[0]["target_spec"] + assert actions[0]["visual_mode"] is True + assert actions[0]["target_description"] == "- W - ICE rapport urgenc." + assert target_spec["by_text"] == "- W - ICE rapport urgenc." + assert target_spec["anchor_id"] == "anchor_a518f6d5e727_1778849657" + + +def test_serialized_action_semantics_are_promoted_before_enqueue(): + action = { + "action_id": "step_from_tmp", + "type": "click", + "target_spec": { + "anchor_id": "anchor_tmp", + "target_text": "- W - ICE rapport urgenc.", + "description": "Word document icon with text.", + "ocr_description": "Word document icon with text.", + "anchor_image_base64": "abc123", + }, + } + + _normalize_action_target_semantics(action) + + target_spec = action["target_spec"] + assert target_spec["by_text"] == "- W - ICE rapport urgenc." + assert target_spec["by_text_source"] == "visual_anchor" + assert target_spec["vlm_description"] == "Word document icon with text." + assert action["target_description"] == "- W - ICE rapport urgenc." diff --git a/tests/unit/test_vlm_grounding_profile.py b/tests/unit/test_vlm_grounding_profile.py new file mode 100644 index 000000000..c42bfc4be --- /dev/null +++ b/tests/unit/test_vlm_grounding_profile.py @@ -0,0 +1,310 @@ +"""Tests pour D5-v2 : profil grounding VLM centralisé + generate_grounding(). + +Couvre : +- vlm_config.get_grounding_profile() avec valeurs par défaut et overrides env +- ollama_client.OllamaClient.generate_grounding() avec mocks requests.post +- Parsing JSON prefill-aware (reconstitution {"x_pct": ...} → dict) +- Pas d'appel Ollama live (tous les requests.post sont mockés) + +Référence : inbox_claude/2026-05-25_1620_codex-to-claude_GO-revue-strategique-D5v2-C2d.md +Fix : core/detection/vlm_config.py (get_grounding_profile) + + core/detection/ollama_client.py (generate_grounding, _extract_first_json_object) +""" +from __future__ import annotations + +import json +import sys +from pathlib import Path +from unittest.mock import patch, MagicMock + +import pytest + + +ROOT = Path(__file__).resolve().parents[2] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + + +# ──────────────────────────────────────────────────────────────────────────── +# vlm_config.get_grounding_profile +# ──────────────────────────────────────────────────────────────────────────── + + +@pytest.mark.unit +def test_grounding_profile_defaults(monkeypatch): + """Sans env vars, valeurs par défaut D5-v2.""" + for k in ("RPA_GROUNDING_MODEL", "RPA_GROUNDING_CTX", "RPA_GROUNDING_FALLBACK", "RPA_VLM_PREFILL"): + monkeypatch.delenv(k, raising=False) + from core.detection.vlm_config import get_grounding_profile + + p = get_grounding_profile() + assert p["model"] == "qwen3.5:9b" + assert p["num_ctx"] == 4096 + assert p["prefill"] == '{"x_pct":' + assert p["temperature"] == 0.0 + assert p["num_predict"] == 96 + assert p["fallback_model"] == "qwen2.5vl:7b-rpa" + assert p["keep_alive"] == "30m" + # qwen3.5 = thinking model → think doit être False côté payload + # Le profile expose think comme bool ; False signifie "envoyer think:false" + assert p["think"] is False + + +@pytest.mark.unit +def test_grounding_profile_env_override(monkeypatch): + """Env vars override modèle, ctx, fallback.""" + monkeypatch.setenv("RPA_GROUNDING_MODEL", "qwen2.5vl:7b-rpa") + monkeypatch.setenv("RPA_GROUNDING_CTX", "8192") + monkeypatch.setenv("RPA_GROUNDING_FALLBACK", "gemma4:latest") + from core.detection.vlm_config import get_grounding_profile + + p = get_grounding_profile() + assert p["model"] == "qwen2.5vl:7b-rpa" + assert p["num_ctx"] == 8192 + assert p["fallback_model"] == "gemma4:latest" + # qwen2.5vl n'est PAS thinking et n'est PAS gemma4 → think=True (rien à envoyer) + assert p["think"] is True + + +@pytest.mark.unit +def test_grounding_profile_ctx_invalid_falls_back_to_default(monkeypatch): + """RPA_GROUNDING_CTX non-numeric → fallback 4096.""" + monkeypatch.setenv("RPA_GROUNDING_CTX", "not_a_number") + from core.detection.vlm_config import get_grounding_profile + + p = get_grounding_profile() + assert p["num_ctx"] == 4096 + + +@pytest.mark.unit +def test_grounding_profile_prefill_disabled(monkeypatch): + """RPA_VLM_PREFILL=false → prefill None.""" + monkeypatch.setenv("RPA_VLM_PREFILL", "false") + from core.detection.vlm_config import get_grounding_profile + + p = get_grounding_profile() + assert p["prefill"] is None + + +@pytest.mark.unit +def test_grounding_profile_gemma4_triggers_think_false(monkeypatch): + """Si on remplace par gemma4, think_false doit être déclenché.""" + monkeypatch.setenv("RPA_GROUNDING_MODEL", "gemma4:latest") + from core.detection.vlm_config import get_grounding_profile + + p = get_grounding_profile() + assert p["think"] is False # gemma4 needs think=false + + +# ──────────────────────────────────────────────────────────────────────────── +# _extract_first_json_object +# ──────────────────────────────────────────────────────────────────────────── + + +@pytest.mark.unit +def test_extract_first_json_object_clean(): + """JSON propre directement.""" + from core.detection.ollama_client import _extract_first_json_object + obj = _extract_first_json_object('{"x_pct": 0.5, "y_pct": 0.3, "confidence": 0.95}') + assert obj == {"x_pct": 0.5, "y_pct": 0.3, "confidence": 0.95} + + +@pytest.mark.unit +def test_extract_first_json_object_with_trailing_text(): + """JSON suivi de texte parasite (typique VLM).""" + from core.detection.ollama_client import _extract_first_json_object + text = '{"x_pct": 0.4, "y_pct": 0.6, "confidence": 0.88}\n\nThe button is located in the bottom-right area.' + obj = _extract_first_json_object(text) + assert obj["x_pct"] == 0.4 + assert obj["confidence"] == 0.88 + + +@pytest.mark.unit +def test_extract_first_json_object_with_nested(): + """JSON avec objet imbriqué.""" + from core.detection.ollama_client import _extract_first_json_object + text = '{"x_pct": 0.5, "meta": {"source": "qwen", "score": 0.9}}' + obj = _extract_first_json_object(text) + assert obj["meta"]["source"] == "qwen" + + +@pytest.mark.unit +def test_extract_first_json_object_with_braces_in_strings(): + """Les accolades dans les strings ne doivent pas perturber le compteur.""" + from core.detection.ollama_client import _extract_first_json_object + text = '{"x_pct": 0.5, "label": "Click {here}"}' + obj = _extract_first_json_object(text) + assert obj["label"] == "Click {here}" + + +@pytest.mark.unit +def test_extract_first_json_object_invalid(): + """Texte sans JSON → None.""" + from core.detection.ollama_client import _extract_first_json_object + assert _extract_first_json_object("no json here at all") is None + + +@pytest.mark.unit +def test_extract_first_json_object_empty(): + """Texte vide → None.""" + from core.detection.ollama_client import _extract_first_json_object + assert _extract_first_json_object("") is None + assert _extract_first_json_object(None) is None + + +# ──────────────────────────────────────────────────────────────────────────── +# OllamaClient.generate_grounding (mocks requests.post) +# ──────────────────────────────────────────────────────────────────────────── + + +@pytest.fixture +def mock_client(monkeypatch): + """Construit un OllamaClient sans connexion réelle à Ollama.""" + from core.detection import ollama_client as oc_module + + # Bypass _check_connection + monkeypatch.setattr(oc_module.OllamaClient, "_check_connection", lambda self: True) + # Force le modèle pour éviter get_vlm_model() qui appelle Ollama + client = oc_module.OllamaClient(model="qwen2.5vl:7b-rpa") + return client + + +@pytest.mark.unit +def test_generate_grounding_payload_uses_profile(mock_client, monkeypatch): + """Le payload envoyé à Ollama utilise le profile (model, ctx, prefill, temp, etc.).""" + monkeypatch.delenv("RPA_VLM_PREFILL", raising=False) + + captured = {} + + def fake_post(url, json=None, timeout=None): + captured["url"] = url + captured["payload"] = json + # Réponse simulée : Ollama renvoie le contenu APRÈS le prefill + resp = MagicMock() + resp.status_code = 200 + resp.json.return_value = { + "message": { + "content": ' 0.5, "y_pct": 0.3, "confidence": 0.95}' + } + } + return resp + + monkeypatch.setattr("core.detection.ollama_client.requests.post", fake_post) + + result = mock_client.generate_grounding(prompt="Find the Save button") + + # Payload vérifié + assert captured["url"].endswith("/api/chat") + payload = captured["payload"] + assert payload["model"] == "qwen3.5:9b" # défaut grounding + assert payload["options"]["num_ctx"] == 4096 + assert payload["options"]["temperature"] == 0.0 + assert payload["options"]["num_predict"] == 96 + # qwen3.5 = thinking → think=false dans payload + assert payload.get("think") is False + # Le prefill doit être présent dans le dernier message (assistant) + last_msg = payload["messages"][-1] + assert last_msg["role"] == "assistant" + assert last_msg["content"] == '{"x_pct":' + + +@pytest.mark.unit +def test_generate_grounding_parses_prefilled_json(mock_client, monkeypatch): + """Le JSON est correctement reconstitué via prefill + parsé.""" + monkeypatch.delenv("RPA_VLM_PREFILL", raising=False) + + def fake_post(url, json=None, timeout=None): + resp = MagicMock() + resp.status_code = 200 + # Ollama renvoie SANS le prefill (le client le rajoute) + resp.json.return_value = { + "message": { + "content": ' 0.42, "y_pct": 0.68, "confidence": 0.91}' + } + } + return resp + + monkeypatch.setattr("core.detection.ollama_client.requests.post", fake_post) + + result = mock_client.generate_grounding(prompt="Find OK button") + + assert result["success"] is True + # response contient le JSON complet reconstitué + assert result["response"].startswith('{"x_pct":') + # parsed_json est le dict reconstruit + parsed = result["parsed_json"] + assert parsed is not None + assert parsed["x_pct"] == 0.42 + assert parsed["y_pct"] == 0.68 + assert parsed["confidence"] == 0.91 + # profile_used est exposé + assert result["profile_used"]["model"] == "qwen3.5:9b" + assert result["profile_used"]["num_ctx"] == 4096 + + +@pytest.mark.unit +def test_generate_grounding_restores_original_model(mock_client, monkeypatch): + """Après generate_grounding, self.model est restauré (pas de side-effect).""" + original = mock_client.model # qwen2.5vl:7b-rpa + + def fake_post(url, json=None, timeout=None): + resp = MagicMock() + resp.status_code = 200 + resp.json.return_value = {"message": {"content": ' 0.5, "y_pct": 0.5, "confidence": 0.5}'}} + return resp + + monkeypatch.setattr("core.detection.ollama_client.requests.post", fake_post) + mock_client.generate_grounding(prompt="test") + assert mock_client.model == original, ( + f"self.model doit être restauré ({original}), trouvé : {mock_client.model}" + ) + + +@pytest.mark.unit +def test_generate_grounding_handles_ollama_error(mock_client, monkeypatch): + """Si Ollama retourne 500, success=False, parsed_json=None, model restauré.""" + def fake_post(url, json=None, timeout=None): + resp = MagicMock() + resp.status_code = 500 + resp.text = "Internal error" + return resp + + monkeypatch.setattr("core.detection.ollama_client.requests.post", fake_post) + result = mock_client.generate_grounding(prompt="test") + assert result["success"] is False + assert result["parsed_json"] is None + + +@pytest.mark.unit +def test_generate_grounding_profile_override(mock_client, monkeypatch): + """Override profile explicite dans l'appel (pour tests / contextes spéciaux).""" + captured = {} + + def fake_post(url, json=None, timeout=None): + captured["payload"] = json + resp = MagicMock() + resp.status_code = 200 + resp.json.return_value = {"message": {"content": ' 0.1, "y_pct": 0.1, "confidence": 0.5}'}} + return resp + + monkeypatch.setattr("core.detection.ollama_client.requests.post", fake_post) + + custom_profile = { + "model": "gemma4:latest", + "num_ctx": 2048, + "prefill": None, + "temperature": 0.2, + "num_predict": 50, + "think": False, + "keep_alive": "5m", + "fallback_model": "qwen2.5vl:7b-rpa", + } + result = mock_client.generate_grounding(prompt="test", profile=custom_profile) + payload = captured["payload"] + assert payload["model"] == "gemma4:latest" + assert payload["options"]["num_ctx"] == 2048 + assert payload["options"]["temperature"] == 0.2 + # Pas de prefill → pas de message assistant + assert payload["messages"][-1]["role"] == "user" + # gemma4 needs think=false → injecté + assert payload.get("think") is False diff --git a/tests/unit/test_workflow_pipeline_ui_detection_disabled.py b/tests/unit/test_workflow_pipeline_ui_detection_disabled.py new file mode 100644 index 000000000..750720328 --- /dev/null +++ b/tests/unit/test_workflow_pipeline_ui_detection_disabled.py @@ -0,0 +1,197 @@ +"""Tests pour C1c : WorkflowPipeline(enable_ui_detection=False) ne doit pas +charger OWL-v2 sur GPU. + +Contexte : depuis 2026-05-25, agent_chat instancie WorkflowPipeline avec +enable_ui_detection=False par défaut (override AGENT_CHAT_ENABLE_UI_DETECTION=1), +pour économiser ~900 MiB VRAM au boot du service rpa-agent-chat. + +Référence : inbox_claude/2026-05-25_1341_codex-to-claude_C1c-C2b-plan-action.md +Fix : agent_chat/app.py:296 (WorkflowPipeline kwargs env-driven) +Contrat validé : core/pipeline/workflow_pipeline.py:117-118 : + self.ui_detector = None + if enable_ui_detection: + ... # UIDetector chargé → OWL/VLM init +""" +from __future__ import annotations + +import sys +from pathlib import Path + +import pytest + + +ROOT = Path(__file__).resolve().parents[2] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + + +def _patch_light_pipeline_deps(monkeypatch, wp_module): + class FakeCLIPEmbedder: + def __init__(self, device=None): + self.device = device + + class FakeFusionEngine: + pass + + class FakeStateEmbeddingBuilder: + def __init__(self, *args, **kwargs): + self.args = args + self.kwargs = kwargs + + class FakeFAISSManager: + def __init__(self, *args, **kwargs): + self.args = args + self.kwargs = kwargs + + monkeypatch.setattr(wp_module, "CLIPEmbedder", FakeCLIPEmbedder) + monkeypatch.setattr(wp_module, "FusionEngine", FakeFusionEngine) + monkeypatch.setattr(wp_module, "StateEmbeddingBuilder", FakeStateEmbeddingBuilder) + monkeypatch.setattr(wp_module, "FAISSManager", FakeFAISSManager) + + +@pytest.mark.unit +def test_workflow_pipeline_ui_detection_disabled_no_owl_load(monkeypatch): + """WorkflowPipeline(enable_ui_detection=False) → self.ui_detector is None, + pas d'instantiation UIDetector, donc pas de chargement OWL/VLM.""" + from core.pipeline import workflow_pipeline as wp_module + + ui_detector_calls = [] + _patch_light_pipeline_deps(monkeypatch, wp_module) + + class FakeUIDetector: + def __init__(self, config=None): + ui_detector_calls.append(config) + + monkeypatch.setattr(wp_module, "UIDetector", FakeUIDetector) + + pipeline = wp_module.WorkflowPipeline( + enable_ui_detection=False, + enable_vlm=False, + ) + assert pipeline.ui_detector is None, ( + "ui_detector doit être None quand enable_ui_detection=False" + ) + assert len(ui_detector_calls) == 0, ( + f"UIDetector instancié alors que ui_detection=False : " + f"{ui_detector_calls}" + ) + + +@pytest.mark.unit +def test_workflow_pipeline_ui_detection_enabled_calls_ui_detector(monkeypatch): + """Contrat inverse : enable_ui_detection=True → UIDetector instancié.""" + from core.pipeline import workflow_pipeline as wp_module + + ui_detector_calls = [] + _patch_light_pipeline_deps(monkeypatch, wp_module) + + class FakeUIDetector: + def __init__(self, config=None): + ui_detector_calls.append(config) + + monkeypatch.setattr(wp_module, "UIDetector", FakeUIDetector) + + pipeline = wp_module.WorkflowPipeline( + enable_ui_detection=True, + enable_vlm=True, + ) + assert pipeline.ui_detector is not None, ( + "ui_detector doit être instancié quand enable_ui_detection=True" + ) + assert len(ui_detector_calls) == 1 + # Le config passé doit refléter enable_vlm + config = ui_detector_calls[0] + assert config.use_vlm_classification is True + assert config.use_owl_detection is True # Par défaut DetectionConfig + + +@pytest.mark.unit +def test_workflow_pipeline_reuses_clip_embedder_for_state_builder(monkeypatch): + """WorkflowPipeline ne doit pas charger un second OpenCLIP en auto-GPU. + + Le premier CLIP respecte `use_gpu`; StateEmbeddingBuilder doit le réutiliser + au lieu de créer `CLIPEmbedder()` avec auto-détection CUDA. + """ + from core.pipeline import workflow_pipeline as wp_module + + clip_instances = [] + builder_calls = [] + + class FakeCLIPEmbedder: + def __init__(self, device=None): + self.device = device + clip_instances.append(self) + + class FakeStateEmbeddingBuilder: + def __init__( + self, + fusion_engine=None, + embedders=None, + output_dir=None, + use_clip=True, + ): + builder_calls.append( + { + "embedders": embedders, + "use_clip": use_clip, + "output_dir": output_dir, + } + ) + + monkeypatch.setattr(wp_module, "CLIPEmbedder", FakeCLIPEmbedder) + monkeypatch.setattr(wp_module, "FAISSManager", lambda *args, **kwargs: object()) + monkeypatch.setattr(wp_module, "StateEmbeddingBuilder", FakeStateEmbeddingBuilder) + + pipeline = wp_module.WorkflowPipeline( + use_gpu=False, + enable_ui_detection=False, + enable_vlm=False, + ) + + assert len(clip_instances) == 1 + assert clip_instances[0].device == "cpu" + assert len(builder_calls) == 1 + assert builder_calls[0]["use_clip"] is False + assert builder_calls[0]["embedders"]["image"] is pipeline.clip_embedder + assert builder_calls[0]["embedders"]["text"] is pipeline.clip_embedder + assert builder_calls[0]["embedders"]["title"] is pipeline.clip_embedder + assert builder_calls[0]["embedders"]["ui"] is pipeline.clip_embedder + + +@pytest.mark.unit +def test_agent_chat_app_env_flag_default_off(monkeypatch): + """Smoke test du parsing env-driven dans agent_chat/app.py. + + Vérifie le pattern utilisé pour décider si UI detection doit être activée. + """ + monkeypatch.delenv("AGENT_CHAT_ENABLE_UI_DETECTION", raising=False) + + import os + enabled = os.environ.get( + "AGENT_CHAT_ENABLE_UI_DETECTION", "0" + ).strip() in ("1", "true", "yes") + assert enabled is False, "Sans env var, doit être False" + + +@pytest.mark.unit +def test_agent_chat_app_env_flag_explicit_on(monkeypatch): + """AGENT_CHAT_ENABLE_UI_DETECTION=1 → True.""" + monkeypatch.setenv("AGENT_CHAT_ENABLE_UI_DETECTION", "1") + + import os + enabled = os.environ.get( + "AGENT_CHAT_ENABLE_UI_DETECTION", "0" + ).strip() in ("1", "true", "yes") + assert enabled is True + + +@pytest.mark.unit +def test_agent_chat_app_env_flag_explicit_off(monkeypatch): + """AGENT_CHAT_ENABLE_UI_DETECTION=0 → False.""" + monkeypatch.setenv("AGENT_CHAT_ENABLE_UI_DETECTION", "0") + + import os + enabled = os.environ.get( + "AGENT_CHAT_ENABLE_UI_DETECTION", "0" + ).strip() in ("1", "true", "yes") + assert enabled is False diff --git a/tools/generate_ollama_inventory_v2.py b/tools/generate_ollama_inventory_v2.py new file mode 100644 index 000000000..e541b2333 --- /dev/null +++ b/tools/generate_ollama_inventory_v2.py @@ -0,0 +1,101 @@ +import json +import subprocess +import os +import hashlib + +def get_hash(file_path): + if not os.path.exists(file_path): + return "MISSING" + sha256_hash = hashlib.sha256() + with open(file_path,"rb") as f: + for byte_block in iter(lambda: f.read(4096),b""): + sha256_hash.update(byte_block) + return sha256_hash.hexdigest() + +def main(): + # 1. Get ollama list + try: + raw_list = subprocess.check_output(["ollama", "list"]).decode("utf-8") + except: + raw_list = "" + + lines = raw_list.strip().split("\n")[1:] + inventory = [] + + for line in lines: + parts = line.split() + if len(parts) < 3: continue + tag = parts[0] + tag_id = parts[1] + size = parts[2] + + # 2. Find manifest path + # Pattern: /var/lib/ollama/.ollama/models/manifests/registry.ollama.ai/library/NAME/TAG + # Or: /var/lib/ollama/.ollama/models/manifests/registry.ollama.ai/USER/NAME/TAG + + manifest_root = "/var/lib/ollama/.ollama/models/manifests/registry.ollama.ai/" + tag_parts = tag.split("/") + if len(tag_parts) == 1: + # library + name_tag = tag_parts[0].split(":") + name = name_tag[0] + version = name_tag[1] if len(name_tag) > 1 else "latest" + manifest_path = os.path.join(manifest_root, "library", name, version) + else: + user = tag_parts[0] + name_tag = tag_parts[1].split(":") + name = name_tag[0] + version = name_tag[1] if len(name_tag) > 1 else "latest" + manifest_path = os.path.join(manifest_root, user, name, version) + + manifest_hash = get_hash(manifest_path) + + # 3. Read manifest content + layers = [] + config_digest = "" + if os.path.exists(manifest_path): + try: + with open(manifest_path, "r") as f: + data = json.load(f) + config_digest = data.get("config", {}).get("digest", "") + for layer in data.get("layers", []): + layers.append({ + "mediaType": layer.get("mediaType"), + "digest": layer.get("digest"), + "size": layer.get("size"), + "from": layer.get("from") + }) + except: + pass + + # 4. Get Modelfile for critical params + try: + modelfile = subprocess.check_output(["ollama", "show", tag, "--modelfile"]).decode("utf-8") + from_line = [l for l in modelfile.split("\n") if l.startswith("FROM ")][0] + except: + modelfile = "" + from_line = "" + + entry = { + "tag": tag, + "ollama_list_id": tag_id, + "ollama_list_size": size, + "manifest_path": manifest_path, + "manifest_hash": manifest_hash, + "config_digest": config_digest, + "blob_digest_from": from_line.replace("FROM ", "").strip(), + "layers": layers, + "reconstructible": "YES" if from_line and manifest_hash != "MISSING" else "UNKNOWN" + } + inventory.append(entry) + + output = { + "inventory_date": "2026-05-25T13:35:00", + "total_tags": len(inventory), + "models": inventory + } + + print(json.dumps(output, indent=2)) + +if __name__ == "__main__": + main() diff --git a/tools/lea_healthcheck.py b/tools/lea_healthcheck.py new file mode 100644 index 000000000..d933b7e15 --- /dev/null +++ b/tools/lea_healthcheck.py @@ -0,0 +1,401 @@ +#!/usr/bin/env python3 +"""Read-only healthcheck for the Lea demo stack. + +This script does not start, stop, restart, delete, or restore anything. +It is intended as a daily proof artifact before the 2026-06-01 demo. +""" + +from __future__ import annotations + +import argparse +import base64 +import json +import os +import shlex +import socket +import subprocess +import sys +import time +import urllib.error +import urllib.request +from pathlib import Path +from typing import Any + + +REPO_ROOT = Path(__file__).resolve().parents[1] +OLLAMA_MODELS_DIR = Path("/var/lib/ollama/.ollama/models") + +CRITICAL_MODELS = ( + "qwen2.5vl:7b-rpa", + "t2a-gemma3-27b:latest", + "t2a-gemma3-27b-q4:latest", + "thiagomoraes/medgemma-27b-it:Q4_K_S", +) + +CRITICAL_BLOBS = { + "t2a-gemma3-27b:latest": "sha256-2f2509e30b0d07db517b82e62404194ef355846f08ac287775ff363693086818", + "t2a-gemma3-27b-q4:latest": "sha256-0139f42273d53348fa0d24daae016b7231e1310258bbbaa7e38a1af703217c1a", + "thiagomoraes/medgemma-27b-it:Q4_K_S": "sha256-7cb6ff10942c8ccf370e274daafaf56da3fff318f40a355df331d8783c6c11f3", +} + + +def run_command(args: list[str], timeout: float = 5.0) -> tuple[int, str, str]: + try: + proc = subprocess.run( + args, + cwd=REPO_ROOT, + text=True, + encoding="utf-8", + errors="replace", + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + timeout=timeout, + check=False, + ) + return proc.returncode, proc.stdout.strip(), proc.stderr.strip() + except FileNotFoundError as exc: + return 127, "", str(exc) + except subprocess.TimeoutExpired as exc: + stdout = (exc.stdout or "").strip() if isinstance(exc.stdout, str) else "" + stderr = (exc.stderr or "").strip() if isinstance(exc.stderr, str) else "" + return 124, stdout, stderr or f"timeout after {timeout}s" + + +def http_json(url: str, timeout: float = 2.0) -> tuple[bool, Any, str]: + try: + req = urllib.request.Request(url, headers={"User-Agent": "lea-healthcheck/1.0"}) + with urllib.request.urlopen(req, timeout=timeout) as response: + body = response.read().decode("utf-8", errors="replace") + if response.status >= 400: + return False, None, f"http {response.status}: {body[:300]}" + return True, json.loads(body), "" + except json.JSONDecodeError as exc: + return False, None, f"invalid json: {exc}" + except (urllib.error.URLError, TimeoutError, OSError) as exc: + return False, None, str(exc) + + +def tcp_open(host: str, port: int, timeout: float = 1.0) -> tuple[bool, str]: + try: + with socket.create_connection((host, port), timeout=timeout): + return True, "" + except OSError as exc: + return False, str(exc) + + +def add_check( + checks: list[dict[str, Any]], + name: str, + status: str, + summary: str, + details: Any | None = None, +) -> None: + checks.append( + { + "name": name, + "status": status, + "summary": summary, + "details": details, + } + ) + + +def check_systemd(checks: list[dict[str, Any]]) -> None: + for unit, required in ( + ("rpa-streaming.service", True), + ("rpa-agent-chat.service", False), + ): + code, stdout, stderr = run_command(["systemctl", "--user", "is-active", unit]) + if code == 0 and stdout == "active": + add_check(checks, f"systemd:{unit}", "ok", "active") + elif required: + add_check( + checks, + f"systemd:{unit}", + "fail", + stdout or stderr or f"is-active returned {code}", + ) + else: + add_check( + checks, + f"systemd:{unit}", + "warn", + stdout or stderr or "inactive optional service", + {"note": "5004 narration bus is optional, but should be fixed before demo if enabled on Windows."}, + ) + + +def check_ports(checks: list[dict[str, Any]], host: str) -> None: + for port, name, required in ( + (5005, "streaming-http", True), + (11434, "ollama-api", True), + (5004, "feedbackbus-socketio", False), + ): + ok, error = tcp_open(host, port) + if ok: + add_check(checks, f"tcp:{name}:{port}", "ok", f"{host}:{port} accepts TCP") + elif required: + add_check(checks, f"tcp:{name}:{port}", "fail", f"{host}:{port} closed: {error}") + else: + add_check(checks, f"tcp:{name}:{port}", "warn", f"{host}:{port} closed: {error}") + + +def check_http_services(checks: list[dict[str, Any]]) -> None: + ok, data, error = http_json("http://127.0.0.1:5005/health") + if ok and isinstance(data, dict) and data.get("status") == "healthy": + add_check(checks, "http:rpa-streaming:/health", "ok", "healthy", data) + else: + add_check(checks, "http:rpa-streaming:/health", "fail", error or "unexpected health response", data) + + ok, data, error = http_json("http://127.0.0.1:5004/api/status") + if ok and isinstance(data, dict) and data.get("status") == "online": + add_check(checks, "http:feedbackbus:/api/status", "ok", "online", data) + elif ok: + add_check(checks, "http:feedbackbus:/api/status", "warn", "unexpected status response", data) + else: + add_check(checks, "http:feedbackbus:/api/status", "warn", error or "not responding") + + +def check_ollama(checks: list[dict[str, Any]]) -> None: + ok, tags_data, error = http_json("http://127.0.0.1:11434/api/tags", timeout=4.0) + if not ok or not isinstance(tags_data, dict): + add_check(checks, "ollama:tags", "fail", error or "cannot read /api/tags") + return + + models = tags_data.get("models") or [] + names = {entry.get("name") or entry.get("model") for entry in models if isinstance(entry, dict)} + missing = [name for name in CRITICAL_MODELS if name not in names] + if missing: + add_check(checks, "ollama:critical-tags", "fail", f"missing critical tags: {', '.join(missing)}") + else: + add_check(checks, "ollama:critical-tags", "ok", f"{len(CRITICAL_MODELS)} critical tags present") + + ok, ps_data, error = http_json("http://127.0.0.1:11434/api/ps", timeout=4.0) + if ok and isinstance(ps_data, dict): + loaded = ps_data.get("models") or [] + vlm = next( + (entry for entry in loaded if isinstance(entry, dict) and entry.get("name") == "qwen2.5vl:7b-rpa"), + None, + ) + if vlm: + add_check( + checks, + "ollama:resident-vlm", + "ok", + "qwen2.5vl:7b-rpa resident", + { + "context_length": vlm.get("context_length"), + "size": vlm.get("size"), + "size_vram": vlm.get("size_vram"), + "expires_at": vlm.get("expires_at"), + }, + ) + else: + add_check(checks, "ollama:resident-vlm", "warn", "qwen2.5vl:7b-rpa is not currently resident", loaded) + else: + add_check(checks, "ollama:ps", "warn", error or "cannot read /api/ps") + + +def check_model_store(checks: list[dict[str, Any]]) -> None: + manifests_dir = OLLAMA_MODELS_DIR / "manifests" + blobs_dir = OLLAMA_MODELS_DIR / "blobs" + if not OLLAMA_MODELS_DIR.exists(): + add_check(checks, "ollama:store", "fail", f"missing {OLLAMA_MODELS_DIR}") + return + + manifest_count = sum(1 for path in manifests_dir.rglob("*") if path.is_file()) if manifests_dir.exists() else 0 + blob_count = sum(1 for path in blobs_dir.iterdir() if path.is_file()) if blobs_dir.exists() else 0 + add_check( + checks, + "ollama:store-counts", + "ok" if manifest_count >= 38 and blob_count >= 100 else "warn", + f"{manifest_count} manifests, {blob_count} blobs", + {"path": str(OLLAMA_MODELS_DIR)}, + ) + + missing_blobs = [] + for model, blob_name in CRITICAL_BLOBS.items(): + blob_path = blobs_dir / blob_name + if not blob_path.exists(): + missing_blobs.append({"model": model, "blob": blob_name}) + if missing_blobs: + add_check(checks, "ollama:critical-blobs", "fail", "missing critical blobs", missing_blobs) + else: + add_check(checks, "ollama:critical-blobs", "ok", f"{len(CRITICAL_BLOBS)} critical blobs present") + + +def check_windows(checks: list[dict[str, Any]], host: str, user: str, ssh_command: str) -> None: + powershell = r""" +$ErrorActionPreference = "SilentlyContinue" +$task = schtasks /query /tn LeaInteractive /fo LIST /v | Out-String +$taskState = $null +try { + $taskState = (Get-ScheduledTask -TaskName 'LeaInteractive').State.ToString() +} catch { + $taskState = $null +} +$procs = Get-CimInstance Win32_Process -Filter "name = 'pythonw.exe' or name = 'python.exe'" | + Where-Object { $_.CommandLine -like '*run_agent_v1.py*' } | + Select-Object ProcessId,ParentProcessId,CommandLine +$lock = $null +if (Test-Path 'C:\rpa_vision\lea_agent.lock') { + $lock = (Get-Content 'C:\rpa_vision\lea_agent.lock' -Raw).Trim() +} +[pscustomobject]@{ + lea_feedback_bus_user = [Environment]::GetEnvironmentVariable('LEA_FEEDBACK_BUS', 'User') + lea_feedback_bus_machine = [Environment]::GetEnvironmentVariable('LEA_FEEDBACK_BUS', 'Machine') + rpa_capture_bind_user = [Environment]::GetEnvironmentVariable('RPA_CAPTURE_BIND', 'User') + task_state = $taskState + task_running = ($taskState -eq 'Running') + task_raw = $task + agent_processes = @($procs) + lock_pid = $lock +} | ConvertTo-Json -Compress -Depth 5 +""".strip() + + command_parts = shlex.split(ssh_command) + if not command_parts: + add_check(checks, "windows:ssh", "skip", "empty ssh command") + return + + target = f"{user}@{host}" + args = command_parts + ["-o", "StrictHostKeyChecking=no", "-o", "ConnectTimeout=5"] + if "sshpass" not in command_parts[0]: + args += ["-o", "BatchMode=yes"] + encoded = base64.b64encode(powershell.encode("utf-16le")).decode("ascii") + args += [target, "powershell", "-NoProfile", "-EncodedCommand", encoded] + + code, stdout, stderr = run_command(args, timeout=12.0) + if code != 0: + add_check(checks, "windows:ssh", "warn", stderr or stdout or f"ssh returned {code}") + return + + try: + data = json.loads(stdout) + except json.JSONDecodeError as exc: + add_check(checks, "windows:ssh", "warn", f"invalid powershell json: {exc}", stdout[:1000]) + return + + processes = data.get("agent_processes") or [] + if isinstance(processes, dict): + processes = [processes] + process_ids = { + int(proc.get("ProcessId")) + for proc in processes + if proc.get("ProcessId") is not None + } + process_roots = [ + proc for proc in processes + if proc.get("ParentProcessId") is None + or int(proc.get("ParentProcessId")) not in process_ids + ] + if processes and not process_roots: + process_roots = processes + instance_count = len(process_roots) + + task_running = bool(data.get("task_running")) + task_state = data.get("task_state") + if task_running: + task_status = "ok" + task_summary = f"task state={task_state!r}" + elif instance_count: + task_status = "warn" + task_summary = f"task state={task_state!r}, but {instance_count} agent instance tree(s) are alive" + else: + task_status = "fail" + task_summary = f"task state={task_state!r}" + + add_check(checks, "windows:ssh", "ok", f"reachable as {target}") + add_check(checks, "windows:LeaInteractive", task_status, task_summary) + add_check( + checks, + "windows:agent-process", + "ok" if instance_count == 1 else ("warn" if instance_count > 1 else "fail"), + f"{instance_count} Lea instance tree(s), {len(processes)} run_agent_v1.py process(es)", + { + "roots": process_roots, + "processes": processes, + "note": "pythonw.exe from a venv can spawn a child pythonw.exe; count root process trees, not raw processes.", + }, + ) + feedback_bus = data.get("lea_feedback_bus_user") or data.get("lea_feedback_bus_machine") + add_check( + checks, + "windows:LEA_FEEDBACK_BUS", + "ok", + f"LEA_FEEDBACK_BUS={feedback_bus!r}", + { + "note": "If set to '1', Windows will try port 5004; local TCP/HTTP checks report whether that service is available.", + "rpa_capture_bind_user": data.get("rpa_capture_bind_user"), + "lock_pid": data.get("lock_pid"), + }, + ) + + +def summarize(checks: list[dict[str, Any]]) -> str: + if any(check["status"] == "fail" for check in checks): + return "fail" + if any(check["status"] == "warn" for check in checks): + return "warn" + return "ok" + + +def print_text(report: dict[str, Any]) -> None: + print(f"Lea healthcheck: {report['overall'].upper()}") + print(f"Timestamp: {report['timestamp']}") + print() + for check in report["checks"]: + print(f"[{check['status'].upper():4}] {check['name']} - {check['summary']}") + + +def parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--json", action="store_true", help="print machine-readable JSON") + parser.add_argument("--strict", action="store_true", help="exit non-zero on warnings") + parser.add_argument("--host", default="127.0.0.1", help="local service host for TCP checks") + parser.add_argument("--windows-host", default=os.environ.get("LEA_WINDOWS_HOST", "")) + parser.add_argument("--windows-user", default=os.environ.get("LEA_WINDOWS_USER", "dom")) + parser.add_argument( + "--ssh-command", + default=os.environ.get("LEA_SSH_COMMAND", "ssh"), + help="ssh command prefix; for password auth use LEA_SSH_COMMAND='sshpass -e ssh' and SSHPASS externally", + ) + return parser.parse_args(argv) + + +def main(argv: list[str]) -> int: + args = parse_args(argv) + checks: list[dict[str, Any]] = [] + + check_systemd(checks) + check_ports(checks, args.host) + check_http_services(checks) + check_ollama(checks) + check_model_store(checks) + + if args.windows_host: + check_windows(checks, args.windows_host, args.windows_user, args.ssh_command) + else: + add_check(checks, "windows", "skip", "not requested; pass --windows-host or LEA_WINDOWS_HOST") + + report = { + "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S%z"), + "overall": summarize(checks), + "repo": str(REPO_ROOT), + "checks": checks, + } + + if args.json: + print(json.dumps(report, indent=2, sort_keys=True)) + else: + print_text(report) + + if report["overall"] == "fail": + return 2 + if args.strict and report["overall"] == "warn": + return 1 + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/tools/lea_micro_preflight.py b/tools/lea_micro_preflight.py new file mode 100644 index 000000000..d64a13952 --- /dev/null +++ b/tools/lea_micro_preflight.py @@ -0,0 +1,409 @@ +#!/usr/bin/env python3 +"""Read-only preflight for Lea micro-learning prerequisites. + +The script performs fast checks only. It does not warm up models, pull models, +start services, stop replays, restart processes, or modify files. +""" + +from __future__ import annotations + +import argparse +import csv +import json +import os +import re +import subprocess +import sys +import time +import urllib.error +import urllib.request +from pathlib import Path +from typing import Any + + +try: # Script execution from tools/ + from lea_healthcheck import REPO_ROOT, add_check, http_json, run_command, summarize +except ImportError: # Test/import execution from repository root + try: + from tools.lea_healthcheck import REPO_ROOT, add_check, http_json, run_command, summarize + except ImportError: + REPO_ROOT = Path(__file__).resolve().parents[1] + + def run_command(args: list[str], timeout: float = 5.0) -> tuple[int, str, str]: + try: + proc = subprocess.run( + args, + cwd=REPO_ROOT, + text=True, + encoding="utf-8", + errors="replace", + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + timeout=timeout, + check=False, + ) + return proc.returncode, proc.stdout.strip(), proc.stderr.strip() + except FileNotFoundError as exc: + return 127, "", str(exc) + except subprocess.TimeoutExpired as exc: + stdout = (exc.stdout or "").strip() if isinstance(exc.stdout, str) else "" + stderr = (exc.stderr or "").strip() if isinstance(exc.stderr, str) else "" + return 124, stdout, stderr or f"timeout after {timeout}s" + + def http_json(url: str, timeout: float = 2.0) -> tuple[bool, Any, str]: + try: + req = urllib.request.Request(url, headers={"User-Agent": "lea-micro-preflight/1.0"}) + with urllib.request.urlopen(req, timeout=timeout) as response: + body = response.read().decode("utf-8", errors="replace") + if response.status >= 400: + return False, None, f"http {response.status}: {body[:300]}" + return True, json.loads(body), "" + except json.JSONDecodeError as exc: + return False, None, f"invalid json: {exc}" + except (urllib.error.URLError, TimeoutError, OSError) as exc: + return False, None, str(exc) + + def add_check( + checks: list[dict[str, Any]], + name: str, + status: str, + summary: str, + details: Any | None = None, + ) -> None: + checks.append({"name": name, "status": status, "summary": summary, "details": details}) + + def summarize(checks: list[dict[str, Any]]) -> str: + if any(check["status"] == "fail" for check in checks): + return "fail" + if any(check["status"] == "warn" for check in checks): + return "warn" + return "ok" + + +DEFAULT_MIN_VRAM_FREE_MIB = 4000 +DEFAULT_MIN_RAM_AVAILABLE_MIB = 8192 +DEFAULT_MAX_SWAP_USED_MIB = 4096 +DEFAULT_MAX_SWAP_USED_PCT = 70.0 + +REQUIRED_MODELS = ("qwen2.5vl:7b-rpa", "qwen2.5:7b") +DEFAULT_RESIDENT_WARN_MODEL = "qwen2.5vl:7b-rpa" + + +def _parse_mib_int(value: str) -> int: + match = re.search(r"-?\d+", value.replace("\u00a0", " ")) + if not match: + raise ValueError(f"cannot parse integer from {value!r}") + return int(match.group(0)) + + +def parse_free_m(output: str) -> dict[str, dict[str, int]]: + """Parse `free -m` output into mem/swap dictionaries. + + Handles localized column names by normalizing to English keys. + """ + header: list[str] = [] + parsed: dict[str, dict[str, int]] = {} + fallback_columns = { + "mem": ["total", "used", "free", "shared", "buff/cache", "available"], + "swap": ["total", "used", "free"], + } + + # Localization map: known foreign column names → English + _LOCALIZATION_MAP = { + "disponible": "available", + "utilisé": "used", + "libre": "free", + "partagé": "shared", + "tamp/cache": "buff/cache", # French truncation of "tampon/cache" + } + + def _normalize(name: str) -> str: + return _LOCALIZATION_MAP.get(name.lower(), name.lower()) + + for raw_line in output.splitlines(): + parts = raw_line.strip().split() + if not parts: + continue + + if parts[0].lower() == "total": + header = [_normalize(p) for p in parts] + continue + + label = parts[0].rstrip(":").lower() + # Handle localized row labels: "échange" = "swap" (French) + if label == "échange": + label = "swap" + if label not in ("mem", "swap"): + continue + + values = parts[1:] + if label == "mem" and header: + columns = header[: len(values)] + else: + columns = fallback_columns[label][: len(values)] + parsed[label] = {key: int(value) for key, value in zip(columns, values)} + + if "mem" not in parsed: + raise ValueError("missing Mem line in free output") + if "available" not in parsed["mem"] and "free" in parsed["mem"]: + parsed["mem"]["available"] = parsed["mem"]["free"] + if "available" not in parsed["mem"]: + raise ValueError("missing available memory in free output") + if "swap" not in parsed: + raise ValueError("missing Swap line in free output") + return parsed + + +def parse_nvidia_smi_memory(output: str) -> list[dict[str, int]]: + """Parse `nvidia-smi --query-gpu=memory.free,memory.total` CSV output.""" + gpus: list[dict[str, int]] = [] + for row in csv.reader(output.splitlines()): + cells = [cell.strip() for cell in row if cell.strip()] + if not cells: + continue + if len(cells) < 2: + raise ValueError(f"expected two CSV columns, got {cells!r}") + gpus.append( + { + "free_mib": _parse_mib_int(cells[0]), + "total_mib": _parse_mib_int(cells[1]), + } + ) + if not gpus: + raise ValueError("nvidia-smi returned no GPU memory rows") + return gpus + + +def extract_ollama_tags(data: Any) -> set[str]: + """Extract model tags from Ollama `/api/tags` or `/api/ps` style JSON.""" + if isinstance(data, dict): + models = data.get("models") or [] + elif isinstance(data, list): + models = data + else: + return set() + + tags: set[str] = set() + for entry in models: + if not isinstance(entry, dict): + continue + for key in ("name", "model"): + value = entry.get(key) + if isinstance(value, str) and value: + tags.add(value) + return tags + + +def check_gpu(checks: list[dict[str, Any]], min_vram_free_mib: int, timeout: float) -> None: + code, stdout, stderr = run_command( + [ + "nvidia-smi", + "--query-gpu=memory.free,memory.total", + "--format=csv,noheader,nounits", + ], + timeout=timeout, + ) + if code != 0: + summary = "nvidia-smi not available" if code == 127 else stderr or stdout or f"exit {code}" + add_check(checks, "gpu:nvidia-smi", "fail", summary) + return + + try: + gpus = parse_nvidia_smi_memory(stdout) + except ValueError as exc: + add_check(checks, "gpu:nvidia-smi", "fail", f"cannot parse nvidia-smi output: {exc}", stdout) + return + + add_check(checks, "gpu:nvidia-smi", "ok", f"query ok, {len(gpus)} GPU(s)", {"gpus": gpus}) + best_free = max(gpu["free_mib"] for gpu in gpus) + status = "ok" if best_free >= min_vram_free_mib else "fail" + add_check( + checks, + "gpu:vram-free", + status, + f"best free VRAM {best_free} MiB / required {min_vram_free_mib} MiB", + {"threshold_mib": min_vram_free_mib, "gpus": gpus}, + ) + + +def check_memory( + checks: list[dict[str, Any]], + min_ram_available_mib: int, + max_swap_used_mib: int, + max_swap_used_pct: float, + timeout: float, +) -> None: + code, stdout, stderr = run_command(["free", "-m"], timeout=timeout) + if code != 0: + add_check(checks, "memory:free", "fail", stderr or stdout or f"exit {code}") + return + + try: + memory = parse_free_m(stdout) + except ValueError as exc: + add_check(checks, "memory:free", "fail", f"cannot parse free -m output: {exc}", stdout) + return + + mem = memory["mem"] + available = mem["available"] + add_check( + checks, + "ram:available", + "ok" if available >= min_ram_available_mib else "fail", + f"available RAM {available} MiB / required {min_ram_available_mib} MiB", + {"threshold_mib": min_ram_available_mib, "mem": mem}, + ) + + swap = memory["swap"] + swap_total = swap.get("total", 0) + swap_used = swap.get("used", 0) + if swap_total <= 0: + add_check(checks, "swap:usage", "fail", "swap total is 0 MiB", {"swap": swap}) + return + + swap_used_pct = (swap_used / swap_total) * 100.0 + swap_ok = swap_used <= max_swap_used_mib and swap_used_pct <= max_swap_used_pct + add_check( + checks, + "swap:usage", + "ok" if swap_ok else "fail", + f"swap used {swap_used} MiB ({swap_used_pct:.1f}%) / limits {max_swap_used_mib} MiB and {max_swap_used_pct:.1f}%", + { + "max_used_mib": max_swap_used_mib, + "max_used_pct": max_swap_used_pct, + "used_pct": round(swap_used_pct, 2), + "swap": swap, + }, + ) + + +def check_ollama( + checks: list[dict[str, Any]], + base_url: str, + required_models: tuple[str, ...], + resident_warn_model: str, + timeout: float, +) -> None: + base = base_url.rstrip("/") + ok, tags_data, error = http_json(f"{base}/api/tags", timeout=timeout) + if not ok or not isinstance(tags_data, dict): + add_check(checks, "ollama:tags", "fail", error or "cannot read /api/tags") + return + + tags = extract_ollama_tags(tags_data) + add_check(checks, "ollama:tags", "ok", f"/api/tags readable, {len(tags)} tag(s)") + missing = [model for model in required_models if model not in tags] + if missing: + add_check( + checks, + "ollama:required-models", + "fail", + f"missing required model(s): {', '.join(missing)}", + {"required": list(required_models), "present": sorted(tags)}, + ) + else: + add_check( + checks, + "ollama:required-models", + "ok", + f"{len(required_models)} required model(s) present", + {"required": list(required_models)}, + ) + + ok, ps_data, error = http_json(f"{base}/api/ps", timeout=timeout) + if not ok or not isinstance(ps_data, dict): + add_check(checks, "ollama:ps", "fail", error or "cannot read /api/ps") + return + + resident = extract_ollama_tags(ps_data) + add_check(checks, "ollama:ps", "ok", f"/api/ps readable, {len(resident)} resident model(s)") + if resident_warn_model in resident: + add_check(checks, "ollama:resident-vlm", "ok", f"{resident_warn_model} resident") + else: + add_check( + checks, + "ollama:resident-vlm", + "warn", + f"{resident_warn_model} is not resident; no warmup was attempted", + {"resident": sorted(resident)}, + ) + + +def print_text(report: dict[str, Any]) -> None: + print(f"Lea micro preflight: {report['overall'].upper()}") + print(f"Timestamp: {report['timestamp']}") + print("Warmup: disabled") + print() + for check in report["checks"]: + print(f"[{check['status'].upper():4}] {check['name']} - {check['summary']}") + + +def parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--json", action="store_true", help="print machine-readable JSON") + parser.add_argument("--strict", action="store_true", help="exit 1 when only warnings are present") + parser.add_argument("--ollama-url", default=os.environ.get("OLLAMA_BASE_URL", "http://127.0.0.1:11434")) + parser.add_argument("--timeout", type=float, default=4.0, help="per-check timeout in seconds") + parser.add_argument("--min-vram-free-mib", type=int, default=DEFAULT_MIN_VRAM_FREE_MIB) + parser.add_argument("--min-ram-available-mib", type=int, default=DEFAULT_MIN_RAM_AVAILABLE_MIB) + parser.add_argument("--max-swap-used-mib", type=int, default=DEFAULT_MAX_SWAP_USED_MIB) + parser.add_argument("--max-swap-used-pct", type=float, default=DEFAULT_MAX_SWAP_USED_PCT) + parser.add_argument( + "--required-model", + action="append", + dest="required_models", + help="required Ollama model tag; may be repeated", + ) + parser.add_argument("--resident-warn-model", default=DEFAULT_RESIDENT_WARN_MODEL) + return parser.parse_args(argv) + + +def build_report(args: argparse.Namespace) -> dict[str, Any]: + required_models = tuple(args.required_models or REQUIRED_MODELS) + checks: list[dict[str, Any]] = [] + + check_gpu(checks, args.min_vram_free_mib, args.timeout) + check_memory( + checks, + args.min_ram_available_mib, + args.max_swap_used_mib, + args.max_swap_used_pct, + args.timeout, + ) + check_ollama(checks, args.ollama_url, required_models, args.resident_warn_model, args.timeout) + + return { + "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S%z"), + "overall": summarize(checks), + "repo": str(REPO_ROOT), + "warmup": "disabled", + "thresholds": { + "min_vram_free_mib": args.min_vram_free_mib, + "min_ram_available_mib": args.min_ram_available_mib, + "max_swap_used_mib": args.max_swap_used_mib, + "max_swap_used_pct": args.max_swap_used_pct, + }, + "ollama_url": args.ollama_url, + "required_models": list(required_models), + "checks": checks, + } + + +def main(argv: list[str]) -> int: + args = parse_args(argv) + report = build_report(args) + + if args.json: + print(json.dumps(report, indent=2, sort_keys=True)) + else: + print_text(report) + + if report["overall"] == "fail": + return 2 + if args.strict and report["overall"] == "warn": + return 1 + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/tools/session_cleaner.py b/tools/session_cleaner.py index 515598aa0..63acc17cf 100644 --- a/tools/session_cleaner.py +++ b/tools/session_cleaner.py @@ -882,8 +882,8 @@ _SESSION_TEMPLATE = """ {{ a.position }} {{ a.window_title|truncate(40) }} - {% if a.text %}{{ a.text|truncate(60) }}{% endif %} - {% if a.keys %}{{ a.keys }}{% endif %} + {% if a["text"] %}{{ a["text"]|truncate(60) }}{% endif %} + {% if a["keys"] %}{{ a["keys"] }}{% endif %} {% if a.shot_file %} diff --git a/visual_workflow_builder/backend/api_v3/dag_execute.py b/visual_workflow_builder/backend/api_v3/dag_execute.py index dfd1bca24..5a3290838 100644 --- a/visual_workflow_builder/backend/api_v3/dag_execute.py +++ b/visual_workflow_builder/backend/api_v3/dag_execute.py @@ -1040,16 +1040,27 @@ def _load_anchor_image_b64(anchor_id: str) -> Optional[str]: return None +def _first_non_empty(*values: Any) -> str: + for value in values: + text = str(value or "").strip() + if text and text.casefold() not in {"none", "null"}: + return text + return "" + + def _load_anchor_metadata(anchor_id: str) -> Optional[Dict]: """Charger les métadonnées d'une ancre (bounding_box, taille, etc.).""" backend_dir = Path(__file__).resolve().parent.parent + metadata: Dict[str, Any] = {} # 1. Ancien format : metadata.json meta_path = backend_dir / 'data' / 'anchor_images' / anchor_id / 'metadata.json' if meta_path.exists(): try: with open(meta_path, 'r', encoding='utf-8') as f: - return json.load(f) + loaded = json.load(f) + if isinstance(loaded, dict): + metadata.update(loaded) except Exception: pass @@ -1059,19 +1070,55 @@ def _load_anchor_metadata(anchor_id: str) -> Optional[Dict]: db_path = backend_dir / 'instance' / 'workflows.db' conn = sqlite3.connect(str(db_path)) row = conn.execute( - "SELECT bbox_x, bbox_y, bbox_width, bbox_height, screen_width, screen_height " + "SELECT bbox_x, bbox_y, bbox_width, bbox_height, screen_width, screen_height, " + "target_text, ocr_description, description " "FROM visual_anchors WHERE id=?", (anchor_id,) ).fetchone() conn.close() if row: - return { + db_metadata = { 'bounding_box': {'x': row[0], 'y': row[1], 'width': row[2], 'height': row[3]}, 'original_size': {'width': row[4] or 1920, 'height': row[5] or 1080}, + 'target_text': row[6] or '', + 'ocr_description': row[7] or '', + 'description': row[8] or '', } + metadata.setdefault('bounding_box', db_metadata['bounding_box']) + metadata.setdefault('original_size', db_metadata['original_size']) + for key in ('target_text', 'ocr_description', 'description'): + value = _first_non_empty(metadata.get(key), db_metadata.get(key)) + if value: + metadata[key] = value except Exception: pass - return None + return metadata or None + + +def _anchor_semantic_target_spec(anchor_id: str, anchor_meta: Optional[Dict]) -> Dict[str, Any]: + """Construire les libellés humains stables d'une ancre visuelle.""" + if not anchor_id: + return {} + + anchor_meta = anchor_meta or {} + target_text = _first_non_empty(anchor_meta.get('target_text')) + ocr_description = _first_non_empty(anchor_meta.get('ocr_description')) + description = _first_non_empty(anchor_meta.get('description')) + + target_spec: Dict[str, Any] = {'anchor_id': anchor_id} + if target_text: + target_spec['target_text'] = target_text + target_spec['by_text'] = target_text + target_spec['by_text_source'] = 'visual_anchor' + if ocr_description: + target_spec['ocr_description'] = ocr_description + if description: + target_spec['description'] = description + target_spec['vlm_description'] = description + elif ocr_description: + target_spec['vlm_description'] = ocr_description + + return target_spec def _inject_anchor_targeting(action: Dict, anchor_id: str) -> None: @@ -1101,19 +1148,17 @@ def _inject_anchor_targeting(action: Dict, anchor_id: str) -> None: action['x_pct'] = round(cx, 4) action['y_pct'] = round(cy, 4) + target_spec = _anchor_semantic_target_spec(anchor_id, anchor_meta) + # Image de l'ancre pour template matching côté agent anchor_b64 = _load_anchor_image_b64(anchor_id) if anchor_b64: - target_spec = { - 'anchor_image_base64': anchor_b64, - 'anchor_id': anchor_id, - } + target_spec['anchor_image_base64'] = anchor_b64 if anchor_meta: target_spec['anchor_bbox'] = anchor_meta.get('bounding_box', {}) target_spec['original_size'] = anchor_meta.get('original_size', {}) action['visual_mode'] = True - action['target_spec'] = target_spec logger.info( "Action %s : ancre '%s' chargée (%d Ko), visual_mode activé", action.get('action_id', '?'), @@ -1127,6 +1172,23 @@ def _inject_anchor_targeting(action: Dict, anchor_id: str) -> None: anchor_id, ) + if target_spec: + action['target_spec'] = target_spec + if target_spec.get('by_text') or target_spec.get('vlm_description'): + action['visual_mode'] = True + + label = _first_non_empty( + target_spec.get('by_text'), + target_spec.get('target_text'), + target_spec.get('description'), + target_spec.get('ocr_description'), + target_spec.get('vlm_description'), + ) + if label: + action.setdefault('target_text', target_spec.get('target_text') or label) + action.setdefault('target_description', label) + action.setdefault('description', target_spec.get('description') or label) + @api_v3_bp.route('/execute-windows', methods=['POST']) def execute_windows(): diff --git a/visual_workflow_builder/frontend/src/components/Executor/VWBExecutorExtension.tsx b/visual_workflow_builder/frontend/src/components/Executor/VWBExecutorExtension.tsx index fa9605954..049146f19 100644 --- a/visual_workflow_builder/frontend/src/components/Executor/VWBExecutorExtension.tsx +++ b/visual_workflow_builder/frontend/src/components/Executor/VWBExecutorExtension.tsx @@ -685,4 +685,4 @@ const VWBExecutorExtension: React.FC = ({ ); }; -export default VWBExecutorExtension; \ No newline at end of file +export default VWBExecutorExtension; diff --git a/visual_workflow_builder/frontend/src/hooks/useVWBExecution.ts b/visual_workflow_builder/frontend/src/hooks/useVWBExecution.ts index df7fc46d3..3249b75e2 100644 --- a/visual_workflow_builder/frontend/src/hooks/useVWBExecution.ts +++ b/visual_workflow_builder/frontend/src/hooks/useVWBExecution.ts @@ -634,4 +634,4 @@ export const useVWBExecution = ( isVWBStep: (step: Step) => vwbExecutionService.isVWBStep(step), validateStep: (step: Step) => vwbExecutionService.validateStep(step) }; -}; \ No newline at end of file +}; diff --git a/visual_workflow_builder/frontend/src/services/vwbExecutionService.ts b/visual_workflow_builder/frontend/src/services/vwbExecutionService.ts index 2d23b955b..56412045c 100644 --- a/visual_workflow_builder/frontend/src/services/vwbExecutionService.ts +++ b/visual_workflow_builder/frontend/src/services/vwbExecutionService.ts @@ -554,4 +554,4 @@ export const useVWBExecutionService = () => { cancelExecution: () => vwbExecutionService.cancelExecution(), isExecutionRunning: () => vwbExecutionService.isExecutionRunning(), }; -}; \ No newline at end of file +}; diff --git a/web_dashboard/app.py b/web_dashboard/app.py index b7140e717..a80685f6a 100644 --- a/web_dashboard/app.py +++ b/web_dashboard/app.py @@ -76,7 +76,7 @@ _DASHBOARD_AUTH_DISABLED = os.getenv("DASHBOARD_AUTH_DISABLED", "").lower() in ( # avant un déploiement prod. On ne veut surtout pas générer un mot de passe # aléatoire à chaque boot (même problème que l'API token auto-généré). if not _DASHBOARD_PASSWORD and not _DASHBOARD_AUTH_DISABLED: - _DASHBOARD_PASSWORD = "changeme-dashboard-Medecin2026!" + _DASHBOARD_PASSWORD = "changeme-dashboard-RpaVision2026!" api_logger.warning( "[SÉCURITÉ] DASHBOARD_PASSWORD non défini en env — utilisation d'un " "mot de passe par défaut temporaire. DÉFINIR DASHBOARD_PASSWORD " diff --git a/web_dashboard/templates/knowledge_base.html b/web_dashboard/templates/knowledge_base.html index d65d3d8ee..f27db0e36 100644 --- a/web_dashboard/templates/knowledge_base.html +++ b/web_dashboard/templates/knowledge_base.html @@ -492,6 +492,9 @@ function renderCompetences(competences) { } async function openCompetenceTest(competenceId) { + if (!confirmRunDialogReplay(competenceId)) { + return; + } clearTestPolling(); currentTest = { competenceId, replayId: null, lastState: null }; document.getElementById('testModal').classList.add('visible'); @@ -528,6 +531,24 @@ async function openCompetenceTest(competenceId) { } } +function confirmRunDialogReplay(competenceId) { + if (!mayOpenRunDialog(competenceId)) return true; + return window.confirm( + "Ce test peut ouvrir Win+R / Exécuter. Si la fenêtre Exécuter est déjà ouverte, le replay peut produire un faux positif. Fermez-la ou vérifiez l'état du poste avant de continuer." + ); +} + +function mayOpenRunDialog(competenceId) { + const normalized = String(competenceId || '') + .normalize('NFD') + .replace(/[\u0300-\u036f]/g, '') + .toLowerCase(); + return normalized.includes('win_r') + || normalized.includes('windows_r') + || normalized.includes('executer') + || normalized.includes('run_dialog'); +} + function startTestPolling() { clearTestPolling(); pollCompetenceTest(); @@ -568,6 +589,8 @@ function renderTestState(state) { const remaining = state.actions_remaining ?? ''; const failed = state.failed_action || {}; const phase = state.pause_phase || failed.phase || inferPausePhase(failed.action_id || ''); + const stepResults = compactStepResults(state.step_results || state.results || []); + const evidenceAvailable = hasReplayEvidence(state, stepResults); const progress = [ `Replay: ${state.replay_id || currentTest?.replayId || '-'}`, `Machine: ${state.machine_id || '-'}`, @@ -585,15 +608,20 @@ function renderTestState(state) { pause_phase: phase, message: state.message || state.pause_message, failed_action: failed, - results_count: Array.isArray(state.step_results || state.results) ? (state.step_results || state.results).length : 0, + results_count: stepResults.length, + evidence_available: evidenceAvailable, }, null, 2); if (state.status === 'paused_need_help') { const msg = escapeHtml(state.message || state.pause_message || 'Validation humaine requise'); if (phase === 'after' || state.verdict_required || failed.verdict_required) { - document.getElementById('testStatus').innerHTML = `
${msg}
`; + const evidenceWarning = evidenceAvailable + ? '' + : '
Verdict valide bloqué : aucune step_results ni evidence replay disponible. Choisir Incertain/Invalide ou relancer avec evidence.
'; + const validDisabled = evidenceAvailable ? '' : 'disabled title="Aucune step_results ni evidence replay disponible"'; + document.getElementById('testStatus').innerHTML = `
${msg}
${evidenceWarning}`; document.getElementById('testActions').innerHTML = ` - + @@ -653,6 +681,10 @@ async function submitCompetenceVerdict(kind) { if (!currentTest?.competenceId || !currentTest?.replayId) return; const state = currentTest.lastState || {}; const stepResults = compactStepResults(state.step_results || state.results || []); + if (kind === 'valid' && !hasReplayEvidence(state, stepResults)) { + document.getElementById('testStatus').innerHTML = '
Verdict valide refusé : aucune step_results ni evidence replay disponible.
'; + return; + } const payload = { verdict_id: newPromotionId(), verdict_kind: kind, @@ -710,6 +742,28 @@ function compactStepResults(results) { })); } +function hasReplayEvidence(state, stepResults) { + if (Array.isArray(stepResults) && stepResults.length > 0) return true; + return [ + state.evidence, + state.evidence_summary, + state.artifacts, + state.screenshots, + state.last_screenshot, + state.screenshot, + state.trace_path, + state.events, + ].some(hasMeaningfulEvidenceValue); +} + +function hasMeaningfulEvidenceValue(value) { + if (value === null || value === undefined) return false; + if (Array.isArray(value)) return value.length > 0; + if (typeof value === 'object') return Object.keys(value).length > 0; + if (typeof value === 'string') return value.trim() !== ''; + return Boolean(value); +} + function closeTestModal() { clearTestPolling(); document.getElementById('testModal').classList.remove('visible');