feat(vwb): add dashboard competence testing and health tools

This commit is contained in:
Dom
2026-06-02 16:27:19 +02:00
parent d38f0b0f2f
commit 18ed6cb751
23 changed files with 2769 additions and 27 deletions

View File

@@ -67,6 +67,41 @@ except ModuleNotFoundError:
flask_socketio.emit = _fake_emit
sys.modules["flask_socketio"] = flask_socketio
try:
import prometheus_client # noqa: F401
except ModuleNotFoundError:
prometheus_client = types.ModuleType("prometheus_client")
prometheus_client.CONTENT_TYPE_LATEST = "text/plain; version=0.0.4"
def _fake_generate_latest(*_args, **_kwargs):
return b""
class _FakeMetric:
def __init__(self, *_args, **_kwargs):
pass
def labels(self, **_kwargs):
return self
def inc(self, *_args, **_kwargs):
return None
def observe(self, *_args, **_kwargs):
return None
def set(self, *_args, **_kwargs):
return None
def info(self, *_args, **_kwargs):
return None
prometheus_client.generate_latest = _fake_generate_latest
prometheus_client.Counter = _FakeMetric
prometheus_client.Histogram = _FakeMetric
prometheus_client.Gauge = _FakeMetric
prometheus_client.Info = _FakeMetric
sys.modules["prometheus_client"] = prometheus_client
# =============================================================================
# GPU Preflight — vérification avant les tests GPU

View File

@@ -0,0 +1,33 @@
"""Conftest pour les tests securite — assure que le agent_v0 local de
rpa_vision_v3 est trouve avant le standalone de ~/ai/agent_v0/.
Le conftest racine ajoute /home/dom/ai/rpa_vision_v3 mais ne previent pas
le shadow par /home/dom/ai. On force l'ordre + on purge un eventuel
agent_v0 standalone deja charge depuis ~/ai/.
"""
import sys
from pathlib import Path
ROOT = str(Path(__file__).resolve().parents[2])
# Forcer rpa_vision_v3 en tete (pattern reutilise depuis tests/integration/conftest.py)
if ROOT in sys.path:
sys.path.remove(ROOT)
sys.path.insert(0, ROOT)
# Purger tout import precoce de agent_v0 / core qui pointe vers un autre dossier
# (cf. ~/ai/agent_v0/ standalone qui n'est pas le package serveur du repo).
_AGENT_V0_LOCAL = str(Path(ROOT) / "agent_v0")
for _mod_name in list(sys.modules):
if _mod_name == "agent_v0" or _mod_name.startswith("agent_v0."):
_mod = sys.modules[_mod_name]
_mod_file = getattr(_mod, "__file__", "") or ""
if not _mod_file.startswith(_AGENT_V0_LOCAL):
del sys.modules[_mod_name]
# Pre-import du module local pour eviter qu'un autre conftest n'importe
# le standalone /home/dom/ai/agent_v0/ avant nous.
try: # pragma: no cover - garde de path
from agent_v0 import server_v1 as _sv1 # noqa: F401
except Exception:
pass

View File

@@ -0,0 +1,212 @@
"""Tests securite /api/v1/lea/competences/candidate/persist.
Specs §6 :
- Token Bearer obligatoire
- Couplage machine_id (via guard fleet)
- Rate limit 10/min/machine_id
- Path traversal interdit (slug strict)
- PII detection (regle d'or HDS)
"""
from __future__ import annotations
import sys
from pathlib import Path
import pytest
_ROOT = str(Path(__file__).resolve().parents[2])
if _ROOT not in sys.path:
sys.path.insert(0, _ROOT)
pytestmark = pytest.mark.security
_TEST_API_TOKEN = "test_persist_security_token_xyz"
@pytest.fixture
def persist_client(monkeypatch, tmp_path):
monkeypatch.setenv("RPA_API_TOKEN", _TEST_API_TOKEN)
monkeypatch.delenv("RPA_AUTH_DISABLED", raising=False)
monkeypatch.setenv("RPA_AGENTS_DB_PATH", str(tmp_path / "agents.db"))
from fastapi.testclient import TestClient
from agent_v0.server_v1 import api_stream
from agent_v0.server_v1.agent_registry import AgentRegistry
from core.competences import persist as P
monkeypatch.setattr(api_stream, "API_TOKEN", _TEST_API_TOKEN)
test_registry = AgentRegistry(db_path=str(tmp_path / "agents.db"))
monkeypatch.setattr(api_stream, "agent_registry", test_registry)
candidate_dir = tmp_path / "competences" / "candidate"
candidate_dir.mkdir(parents=True, exist_ok=True)
monkeypatch.setattr(P, "COMPETENCES_ROOT", tmp_path / "competences")
monkeypatch.setattr(P, "CANDIDATE_DIR", candidate_dir)
monkeypatch.setattr(P, "AUDIT_PATH", tmp_path / "competences" / "persist_audit.jsonl")
monkeypatch.setattr(
P, "INCOMPLETE_PATH", tmp_path / "competences" / "incomplete_learnings.jsonl"
)
P.persist_rate_limiter.reset()
client = TestClient(api_stream.app, raise_server_exceptions=False)
return client, tmp_path
def _good_payload(name="Securite Test", persist_id="uuid-sec-1"):
return {
"name": name,
"machine_id": "machine_sec_x",
"workflow_ir": {
"steps": [{"kind": "click", "parameters": {"target": "OK"}}],
"preconditions": [],
},
"learning_metadata": {"persist_id": persist_id},
}
# ---------------------------------------------------------------------------
# Token Bearer
# ---------------------------------------------------------------------------
class TestPersistAuthToken:
def test_no_token_returns_401(self, persist_client):
client, _ = persist_client
resp = client.post(
"/api/v1/lea/competences/candidate/persist",
json=_good_payload(),
)
assert resp.status_code == 401
def test_wrong_token_returns_401(self, persist_client):
client, _ = persist_client
resp = client.post(
"/api/v1/lea/competences/candidate/persist",
json=_good_payload(),
headers={"Authorization": "Bearer wrong_token_xyz"},
)
assert resp.status_code == 401
def test_valid_token_returns_201(self, persist_client):
client, _ = persist_client
resp = client.post(
"/api/v1/lea/competences/candidate/persist",
json=_good_payload(),
headers={"Authorization": f"Bearer {_TEST_API_TOKEN}"},
)
assert resp.status_code == 201
# ---------------------------------------------------------------------------
# Rate limit
# ---------------------------------------------------------------------------
class TestPersistRateLimit:
def test_rate_limit_11th_call_returns_429(self, persist_client, monkeypatch):
client, _ = persist_client
from core.competences import persist as P
# Forcer max_per_minute=3 pour rendre le test rapide et deterministe
P.persist_rate_limiter.max_per_minute = 3
P.persist_rate_limiter.reset()
headers = {"Authorization": f"Bearer {_TEST_API_TOKEN}"}
# 3 appels OK
for i in range(3):
payload = _good_payload(name=f"Rate {i}", persist_id=f"uuid-rate-{i}")
r = client.post(
"/api/v1/lea/competences/candidate/persist",
json=payload,
headers=headers,
)
assert r.status_code in (201, 409), f"call #{i}: {r.text}"
# 4eme appel -> 429
r4 = client.post(
"/api/v1/lea/competences/candidate/persist",
json=_good_payload(name="Rate Trop", persist_id="uuid-rate-overflow"),
headers=headers,
)
assert r4.status_code == 429
assert "Retry-After" in {k.title() for k in r4.headers.keys()}
# Cleanup pour ne pas polluer d'autres tests
P.persist_rate_limiter.max_per_minute = 10
# ---------------------------------------------------------------------------
# Path traversal & slug strict
# ---------------------------------------------------------------------------
class TestPersistPathTraversal:
def test_path_traversal_in_name_blocked(self, persist_client):
client, tmp_path = persist_client
resp = client.post(
"/api/v1/lea/competences/candidate/persist",
json={
**_good_payload(),
"name": "../../etc/passwd",
},
headers={"Authorization": f"Bearer {_TEST_API_TOKEN}"},
)
# Le slug strict supprime les `/`, `.`, etc. -> resultat = 'etcpasswd'
# ou bien rejete si la longueur tombe sous le minimum.
# Dans tous les cas, AUCUN fichier ne doit etre ecrit hors CANDIDATE_DIR.
if resp.status_code == 201:
yaml_path = resp.json()["yaml_path"]
assert yaml_path.startswith("data/competences/candidate/")
# Verifier aucun fichier hors candidate
etc_target = Path("/etc/passwd.yaml")
assert not etc_target.exists() or etc_target.is_file() # existant ok
else:
assert resp.status_code == 400
def test_slug_with_null_byte_blocked(self, persist_client):
client, _ = persist_client
resp = client.post(
"/api/v1/lea/competences/candidate/persist",
json={
**_good_payload(),
"name": "abc\x00xyz",
},
headers={"Authorization": f"Bearer {_TEST_API_TOKEN}"},
)
# null byte est non-ASCII -> retire par slugify -> "abcxyz" valide
# ou rejet si l'encodage casse. Tolerer les deux mais pas de 500.
assert resp.status_code in (201, 400)
# ---------------------------------------------------------------------------
# PII detection
# ---------------------------------------------------------------------------
class TestPersistPiiDetection:
def test_email_in_workflow_rejected(self, persist_client):
client, _ = persist_client
payload = _good_payload(persist_id="uuid-pii-email")
payload["workflow_ir"]["steps"].append(
{"kind": "type", "parameters": {"value": "patient: john.doe@hopital.fr"}}
)
resp = client.post(
"/api/v1/lea/competences/candidate/persist",
json=payload,
headers={"Authorization": f"Bearer {_TEST_API_TOKEN}"},
)
assert resp.status_code == 400
assert resp.json()["detail"]["error"] == "pii_detected"
def test_phone_in_annotations_rejected(self, persist_client):
client, _ = persist_client
payload = _good_payload(persist_id="uuid-pii-phone")
payload["annotations_semantiques"] = {"intent_fr": "appeler 01 23 45 67 89"}
resp = client.post(
"/api/v1/lea/competences/candidate/persist",
json=payload,
headers={"Authorization": f"Bearer {_TEST_API_TOKEN}"},
)
assert resp.status_code == 400
assert resp.json()["detail"]["error"] == "pii_detected"

View File

@@ -63,6 +63,16 @@ class TestDashboardRoutes:
assert 'competences' in data
assert 'items' in data['competences']
def test_knowledge_base_page_includes_test_safety_guards(self, client):
"""Le bouton Tester embarque les garde-fous Win+R et evidence vide."""
resp = client.get('/knowledge-base')
assert resp.status_code == 200
html = resp.get_data(as_text=True)
assert 'confirmRunDialogReplay' in html
assert 'peut ouvrir Win+R / Exécuter' in html
assert 'hasReplayEvidence' in html
assert 'Verdict valide refusé' in html
def test_dashboard_replay_competence_proxy(self, client, monkeypatch):
"""Le dashboard lance un replay competence supervise via streaming."""
calls = []

View File

@@ -6,6 +6,7 @@ Vérifie que les fonctions d'extraction d'apps et de génération
d'actions de setup 100% visuelles fonctionnent correctement.
"""
import pytest
import os
import sys
from pathlib import Path
@@ -13,6 +14,10 @@ from pathlib import Path
ROOT = Path(__file__).parent.parent.parent
sys.path.insert(0, str(ROOT))
# api_stream est fail-closed si RPA_API_TOKEN est absent. Ces tests ciblent les
# helpers de setup, pas le bootstrap d'authentification.
os.environ.setdefault("RPA_API_TOKEN", "test_env_setup_token_0123456789abcdef")
from agent_v0.server_v1.api_stream import (
_extract_required_apps_from_events,
_extract_required_apps_from_workflow,
@@ -630,7 +635,7 @@ class TestGenerateSetupActions:
}
actions = _generate_setup_actions(app_info)
assert len(actions) == 7
assert len(actions) == 10
assert actions[0]["type"] == "key_combo"
assert actions[0]["keys"] == ["win", "r"]
@@ -652,7 +657,17 @@ class TestGenerateSetupActions:
assert actions[5]["duration_ms"] == 2000
assert actions[6]["type"] == "verify_screen"
assert actions[6]["expected_window_title_contains"] == ["Bloc-notes", "notepad"]
assert actions[6]["_setup_step"] == "verify_app_ready_before_fresh_document"
assert actions[7]["type"] == "key_combo"
assert actions[7]["keys"] == ["ctrl", "n"]
assert actions[7]["_setup_step"] == "ensure_fresh_document"
assert actions[8]["type"] == "wait"
assert actions[8]["duration_ms"] == 400
assert actions[9]["type"] == "verify_screen"
assert actions[9]["expected_window_title_contains"] == ["Bloc-notes", "notepad"]
# Toutes les actions sont marquées comme phase setup
for action in actions:
@@ -1126,9 +1141,10 @@ class TestSetupPipeline:
app_info = _extract_required_apps_from_events(events)
assert app_info["primary_app"] == "Notepad.exe"
assert app_info["has_neutral_window_title"] is True
actions = _generate_setup_actions(app_info)
assert len(actions) == 7
assert len(actions) == 10
types = [a["type"] for a in actions]
steps = [a.get("_setup_step") for a in actions]
@@ -1139,11 +1155,14 @@ class TestSetupPipeline:
"wait_launch_command",
"submit_run_dialog",
"wait_app_launch",
"verify_app_ready_before_fresh_document",
"ensure_fresh_document",
"wait_fresh_document",
"verify_app_ready",
]
assert steps == expected_step_order, steps
assert types.count("key_combo") == 2
assert types.count("key_combo") == 3
idx_type = steps.index("type_launch_command")
assert actions[idx_type]["text"] == "notepad"
@@ -1165,14 +1184,15 @@ class TestSetupPipeline:
app_info = _extract_required_apps_from_workflow(workflow)
assert app_info["primary_app"] == "Notepad.exe"
assert app_info["has_neutral_window_title"] is True
actions = _generate_setup_actions(app_info)
assert len(actions) == 7
assert len(actions) == 10
# Le texte tapé doit être la commande shell pour le setup Win+R.
type_action = [a for a in actions if a["type"] == "type"][0]
assert type_action["text"] == "notepad"
# Le setup Notepad s'appuie maintenant sur deux key_combo.
# Win+R, Enter, puis Ctrl+N pour garantir un document vierge.
key_combos = [a for a in actions if a["type"] == "key_combo"]
assert len(key_combos) == 2
assert len(key_combos) == 3

View File

@@ -0,0 +1,62 @@
from pathlib import Path
import sys
from types import SimpleNamespace
from PIL import Image
import core.llm.ocr_extractor as ocr_extractor
def _blank_png(path: Path) -> None:
Image.new("RGB", (120, 40), "white").save(path)
def test_extract_digits_tesseract_filters_numeric_pattern(tmp_path, monkeypatch):
image_path = tmp_path / "screen.png"
_blank_png(image_path)
def fake_image_to_string(_img, lang, config):
assert lang == "eng"
assert "tessedit_char_whitelist=0123456789" in config
return "IPP 25003284 MOREL\n25003362 abc 1234\n25012257"
monkeypatch.setitem(
sys.modules,
"pytesseract",
SimpleNamespace(image_to_string=fake_image_to_string),
)
values = ocr_extractor.extract_digits_tesseract_from_image(
str(image_path),
pattern=r"^25\d{6}$",
)
assert values == ["25003284", "25003362", "25012257"]
def test_extract_table_tesseract_engine_delegates_to_digits(tmp_path, monkeypatch):
image_path = tmp_path / "screen.png"
_blank_png(image_path)
calls = {}
def fake_extract_digits(image_path_arg, region=None, pattern=None, limit=None):
calls["args"] = (image_path_arg, region, pattern, limit)
return ["25003284", "25003362"]
monkeypatch.setattr(
ocr_extractor,
"extract_digits_tesseract_from_image",
fake_extract_digits,
)
values = ocr_extractor.extract_table_from_image(
str(image_path),
region=(10, 20, 30, 40),
pattern=r"^25\d{6}$",
limit=2,
engine="tesseract",
)
assert values == ["25003284", "25003362"]
assert calls["args"] == (str(image_path), (10, 20, 30, 40), r"^25\d{6}$", 2)

View File

@@ -0,0 +1,414 @@
"""
Tests unitaires — rebranchement P1-LEA-SHADOW du bouton "Apprenez-moi".
Vérifie que :
1. Le client HTTP `start_learning_session` POSTe bien le payload attendu
sur `/api/learn/start` avec le bon Authorization Bearer.
2. Le retry+backoff fonctionne sur erreur transitoire puis succès.
3. Une 3e tentative échouée lève LeaOrchestratorError.
4. Les méthodes `_start_lea_orchestrator_session` de ChatWindow et
SmartTrayV1 sont fail-safe : si le client lève, on ne propage pas.
Les imports PyQt5/tkinter/pystray sont mockés pour permettre l'exécution
des tests sur Linux (CI / dev) sans dépendances Windows.
"""
from __future__ import annotations
import sys
import types
import unittest
from pathlib import Path
from unittest.mock import MagicMock, patch
# ---------------------------------------------------------------------------
# sys.path fix : pytest, lancé depuis la racine projet, peut insérer
# /home/dom/ai en tête (présence d'un autre `agent_v0` legacy dans ce dossier
# parent). Le shadow casse `import agent_v0.agent_v1`. On purge l'entrée
# parasite et on insère la racine du projet en tête.
# ---------------------------------------------------------------------------
_PROJECT_ROOT = Path(__file__).resolve().parents[2]
_PARENT = _PROJECT_ROOT.parent
sys.path[:] = [p for p in sys.path if Path(p).resolve() != _PARENT.resolve()]
if str(_PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(_PROJECT_ROOT))
# Purger un import `agent_v0` venu du mauvais chemin (legacy)
_existing = sys.modules.get("agent_v0")
if _existing is not None and not getattr(_existing, "__file__", "").startswith(
str(_PROJECT_ROOT)
):
for _name in list(sys.modules):
if _name == "agent_v0" or _name.startswith("agent_v0."):
sys.modules.pop(_name, None)
# ---------------------------------------------------------------------------
# Stubs des dépendances lourdes UI (pystray, PIL.Image avec ImageDraw, etc.)
# pour permettre l'import de smart_tray / chat_window depuis Linux.
# ---------------------------------------------------------------------------
def _install_ui_stubs() -> None:
"""Installe des stubs minimaux pour pystray, PIL, FeedbackBus, etc."""
# pystray
if "pystray" not in sys.modules:
pystray_mod = types.ModuleType("pystray")
pystray_mod.Menu = MagicMock()
pystray_mod.Menu.SEPARATOR = object()
pystray_mod.MenuItem = MagicMock()
pystray_mod.Icon = MagicMock()
sys.modules["pystray"] = pystray_mod
# PIL.Image + PIL.ImageDraw (les imports en haut de smart_tray)
if "PIL" not in sys.modules:
pil_mod = types.ModuleType("PIL")
sys.modules["PIL"] = pil_mod
if "PIL.Image" not in sys.modules:
image_mod = types.ModuleType("PIL.Image")
image_mod.Image = MagicMock
image_mod.new = MagicMock(return_value=MagicMock())
sys.modules["PIL.Image"] = image_mod
sys.modules["PIL"].Image = image_mod # type: ignore[attr-defined]
if "PIL.ImageDraw" not in sys.modules:
draw_mod = types.ModuleType("PIL.ImageDraw")
draw_mod.Draw = MagicMock(return_value=MagicMock())
sys.modules["PIL.ImageDraw"] = draw_mod
sys.modules["PIL"].ImageDraw = draw_mod # type: ignore[attr-defined]
_install_ui_stubs()
# ---------------------------------------------------------------------------
# Tests du client HTTP — partie facilement testable, sans dépendance UI.
# ---------------------------------------------------------------------------
class StartLearningSessionTests(unittest.TestCase):
"""Tests du client HTTP `start_learning_session`."""
def _make_response(self, status=200, json_data=None):
resp = MagicMock()
resp.status_code = status
resp.json.return_value = json_data or {
"session_id": "sess-abc-123",
"state": "LISTENING",
"message": "Ok, je regarde. Vas-y, je note les actions...",
}
if status >= 400:
import httpx # noqa
resp.raise_for_status.side_effect = Exception(f"HTTP {status}")
else:
resp.raise_for_status.return_value = None
return resp
def test_post_payload_and_bearer(self):
"""Le POST contient payload + Authorization Bearer attendu."""
from agent_v0.agent_v1.network.lea_orchestrator_client import (
start_learning_session,
)
captured = {}
class _FakeClient:
def __init__(self, timeout):
captured["timeout"] = timeout
def __enter__(self):
return self
def __exit__(self, *a):
return False
def post(self_inner, url, json, headers):
captured["url"] = url
captured["json"] = json
captured["headers"] = headers
resp = MagicMock()
resp.status_code = 200
resp.raise_for_status.return_value = None
resp.json.return_value = {
"session_id": "sess-1",
"state": "LISTENING",
"message": "Ok, je regarde.",
}
return resp
with patch("httpx.Client", _FakeClient):
resp = start_learning_session(
"http://localhost:5004",
machine_id="poste-tim-01",
session_name="Facturation urgences",
api_token="tok-secret",
trigger_source="windows_button",
)
self.assertEqual(resp.session_id, "sess-1")
self.assertEqual(resp.state, "LISTENING")
self.assertEqual(captured["url"], "http://localhost:5004/api/learn/start")
self.assertEqual(captured["json"], {
"machine_id": "poste-tim-01",
"session_name": "Facturation urgences",
"trigger_source": "windows_button",
})
self.assertEqual(captured["headers"]["Authorization"], "Bearer tok-secret")
self.assertEqual(captured["headers"]["Content-Type"], "application/json")
self.assertEqual(captured["timeout"], 10.0)
def test_no_token_omits_authorization_header(self):
"""Si api_token est vide, pas de header Authorization."""
from agent_v0.agent_v1.network.lea_orchestrator_client import (
start_learning_session,
)
captured_headers = {}
class _FakeClient:
def __init__(self, timeout):
pass
def __enter__(self):
return self
def __exit__(self, *a):
return False
def post(self_inner, url, json, headers):
captured_headers.update(headers)
resp = MagicMock()
resp.raise_for_status.return_value = None
resp.json.return_value = {
"session_id": "x",
"state": "LISTENING",
"message": "",
}
return resp
with patch("httpx.Client", _FakeClient):
start_learning_session(
"http://localhost:5004",
machine_id="m",
session_name="n",
api_token="",
)
self.assertNotIn("Authorization", captured_headers)
def test_retry_then_success(self):
"""1 échec puis 1 succès → retourne la réponse sans lever."""
from agent_v0.agent_v1.network.lea_orchestrator_client import (
start_learning_session,
)
calls = {"n": 0}
class _FakeClient:
def __init__(self, timeout):
pass
def __enter__(self):
return self
def __exit__(self, *a):
return False
def post(self_inner, url, json, headers):
calls["n"] += 1
if calls["n"] == 1:
raise RuntimeError("connexion refusée")
resp = MagicMock()
resp.raise_for_status.return_value = None
resp.json.return_value = {
"session_id": "ok",
"state": "LISTENING",
"message": "Ok",
}
return resp
with patch("httpx.Client", _FakeClient), \
patch("time.sleep") as sleep_mock:
resp = start_learning_session(
"http://localhost:5004",
machine_id="m",
session_name="n",
api_token="t",
backoff_s=(0.01, 0.01),
)
self.assertEqual(resp.session_id, "ok")
self.assertEqual(calls["n"], 2)
sleep_mock.assert_called()
def test_three_failures_raise(self):
"""3 échecs consécutifs → LeaOrchestratorError."""
from agent_v0.agent_v1.network.lea_orchestrator_client import (
LeaOrchestratorError,
start_learning_session,
)
class _FakeClient:
def __init__(self, timeout):
pass
def __enter__(self):
return self
def __exit__(self, *a):
return False
def post(self_inner, url, json, headers):
raise RuntimeError("network down")
with patch("httpx.Client", _FakeClient), patch("time.sleep"):
with self.assertRaises(LeaOrchestratorError):
start_learning_session(
"http://localhost:5004",
machine_id="m",
session_name="n",
backoff_s=(0.01, 0.01),
)
def test_missing_session_id_raises(self):
"""Réponse 200 sans session_id → LeaOrchestratorError."""
from agent_v0.agent_v1.network.lea_orchestrator_client import (
LeaOrchestratorError,
start_learning_session,
)
class _FakeClient:
def __init__(self, timeout):
pass
def __enter__(self):
return self
def __exit__(self, *a):
return False
def post(self_inner, url, json, headers):
resp = MagicMock()
resp.raise_for_status.return_value = None
resp.json.return_value = {"state": "LISTENING"}
return resp
with patch("httpx.Client", _FakeClient), patch("time.sleep"):
with self.assertRaises(LeaOrchestratorError):
start_learning_session(
"http://localhost:5004",
machine_id="m",
session_name="n",
backoff_s=(0.01, 0.01),
)
# ---------------------------------------------------------------------------
# Tests du wiring `_start_lea_orchestrator_session` sur SmartTrayV1 + ChatWindow.
# On invoque la méthode sur une instance non-construite pour isoler le wiring
# du reste (UI tkinter / pystray non démarrés).
# ---------------------------------------------------------------------------
class SmartTrayRebranchementTests(unittest.TestCase):
"""Vérifie le wiring côté smart_tray._start_lea_orchestrator_session."""
def test_smart_tray_calls_start_learning_session(self):
from agent_v0.agent_v1.ui import smart_tray as smart_tray_mod
from agent_v0.agent_v1.network.lea_orchestrator_client import (
LearnStartResponse,
)
instance = smart_tray_mod.SmartTrayV1.__new__(smart_tray_mod.SmartTrayV1)
instance._notifier = MagicMock()
fake_resp = LearnStartResponse(
session_id="s-1", state="LISTENING", message="Ok"
)
with patch(
"agent_v0.agent_v1.network.lea_orchestrator_client.start_learning_session",
return_value=fake_resp,
) as start_mock:
instance._start_lea_orchestrator_session("ma tache")
start_mock.assert_called_once()
_, kwargs = start_mock.call_args
self.assertEqual(kwargs["session_name"], "ma tache")
self.assertEqual(kwargs["trigger_source"], "tray_button")
# Pas d'appel notifier sur succès (silencieux)
instance._notifier.notify.assert_not_called()
def test_smart_tray_failsafe_on_orchestrator_error(self):
from agent_v0.agent_v1.ui import smart_tray as smart_tray_mod
from agent_v0.agent_v1.network.lea_orchestrator_client import (
LeaOrchestratorError,
)
instance = smart_tray_mod.SmartTrayV1.__new__(smart_tray_mod.SmartTrayV1)
instance._notifier = MagicMock()
with patch(
"agent_v0.agent_v1.network.lea_orchestrator_client.start_learning_session",
side_effect=LeaOrchestratorError("down"),
):
# Ne doit PAS lever
instance._start_lea_orchestrator_session("tache X")
instance._notifier.notify.assert_called_once()
title, msg = instance._notifier.notify.call_args.args
self.assertEqual(title, "Léa")
self.assertIn("local", msg.lower())
class ChatWindowRebranchementTests(unittest.TestCase):
"""Vérifie le wiring côté chat_window._start_lea_orchestrator_session."""
def test_chat_window_calls_start_learning_session(self):
from agent_v0.agent_v1.ui import chat_window as chat_window_mod
from agent_v0.agent_v1.network.lea_orchestrator_client import (
LearnStartResponse,
)
instance = chat_window_mod.ChatWindow.__new__(chat_window_mod.ChatWindow)
instance._add_lea_message = MagicMock()
fake_resp = LearnStartResponse(
session_id="s-42",
state="LISTENING",
message="Ok, je regarde. Vas-y, je note...",
)
with patch(
"agent_v0.agent_v1.network.lea_orchestrator_client.start_learning_session",
return_value=fake_resp,
) as start_mock:
instance._start_lea_orchestrator_session("Facturation urgences")
start_mock.assert_called_once()
_, kwargs = start_mock.call_args
self.assertEqual(kwargs["session_name"], "Facturation urgences")
self.assertEqual(kwargs["trigger_source"], "windows_button")
# Le message d'accueil de Léa doit être affiché
instance._add_lea_message.assert_called_with(
"Ok, je regarde. Vas-y, je note..."
)
def test_chat_window_failsafe_on_orchestrator_error(self):
from agent_v0.agent_v1.ui import chat_window as chat_window_mod
from agent_v0.agent_v1.network.lea_orchestrator_client import (
LeaOrchestratorError,
)
instance = chat_window_mod.ChatWindow.__new__(chat_window_mod.ChatWindow)
instance._add_lea_message = MagicMock()
with patch(
"agent_v0.agent_v1.network.lea_orchestrator_client.start_learning_session",
side_effect=LeaOrchestratorError("timeout"),
):
# Ne doit PAS lever
instance._start_lea_orchestrator_session("tache Y")
# Un message dégradé doit être affiché à l'utilisateur
instance._add_lea_message.assert_called_once()
msg = instance._add_lea_message.call_args.args[0]
self.assertIn("Impossible de joindre", msg)
self.assertIn("localement", msg)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,116 @@
"""Tests D5-v3a mini-fix : num_ctx=4096 explicite sur les 3 sites grounding
bbox legacy de resolve_engine.py.
Avant fix : aucun des 3 sites ne précisait num_ctx → Ollama héritait du
Modelfile qwen2.5vl:7b-rpa (PARAMETER num_ctx 8192). Confirmé via
`ollama show qwen2.5vl:7b-rpa --modelfile` (Codex 2026-05-25 18:45).
Après fix : payload contient `options.num_ctx = 4096` sur les 3 sites :
- resolve_engine.py:985 (Essai 2 Ollama grounding bbox)
- resolve_engine.py:1015 (Essai 2 fallback multi-image)
- resolve_engine.py:3016 (_locate_popup_button)
Référence : inbox_claude/2026-05-25_1845_codex-to-claude_GO-D5v3a-mini-fix-numctx4096.md
"""
from __future__ import annotations
import re
import sys
from pathlib import Path
import pytest
ROOT = Path(__file__).resolve().parents[2]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
@pytest.mark.unit
def test_all_three_bbox_sites_have_num_ctx_4096():
"""Vérifie par lecture statique du fichier source que les 3 sites
grounding bbox legacy passent num_ctx=4096 dans options.
Test résistant aux changements de numéro de ligne (matche la signature
payload plutôt que la ligne exacte)."""
src = (ROOT / "agent_v0" / "server_v1" / "resolve_engine.py").read_text()
# Compter les occurrences de num_ctx=4096 dans des dicts options bbox
pattern = re.compile(r'"options":\s*\{[^}]*"num_ctx":\s*4096[^}]*\}')
matches = pattern.findall(src)
assert len(matches) >= 3, (
f"Attendu : 3 sites bbox avec num_ctx=4096, trouvé : {len(matches)}. "
f"D5-v3a mini-fix non appliqué sur tous les sites."
)
@pytest.mark.unit
def test_locate_popup_button_payload_num_ctx(monkeypatch):
"""Test runtime : _locate_popup_button construit un payload avec
num_ctx=4096 et model=qwen2.5vl:7b."""
captured = {}
def fake_post(url, json=None, timeout=None):
captured["url"] = url
captured["payload"] = json
# Simuler réponse vide → la fonction retourne None mais on a capturé le payload
from unittest.mock import MagicMock
resp = MagicMock()
resp.ok = False
resp.json.return_value = {"message": {"content": ""}}
return resp
# Le module fait `import requests as _requests` dans la fonction → patch
# via le module global requests (alias _requests).
import requests
monkeypatch.setattr(requests, "post", fake_post)
from agent_v0.server_v1 import resolve_engine as re_module
result = re_module._locate_popup_button(
screenshot_b64="fake_b64_data",
button_text="OK",
screen_width=1920,
screen_height=1080,
)
# Le payload doit avoir num_ctx=4096
assert captured["payload"]["options"]["num_ctx"] == 4096, (
f"_locate_popup_button payload sans num_ctx=4096 : "
f"{captured['payload']['options']}"
)
# Modèle non changé
assert captured["payload"]["model"] == "qwen2.5vl:7b"
@pytest.mark.unit
def test_num_ctx_did_not_break_other_options():
"""Vérifie qu'on n'a PAS perdu temperature ni num_predict en ajoutant
num_ctx. Lecture statique fichier source."""
src = (ROOT / "agent_v0" / "server_v1" / "resolve_engine.py").read_text()
# Toutes les options bbox doivent toujours avoir temperature ET num_predict
pattern = re.compile(
r'"options":\s*\{[^}]*"temperature":\s*0\.1[^}]*"num_predict":\s*\d+[^}]*"num_ctx":\s*4096[^}]*\}'
)
matches = pattern.findall(src)
assert len(matches) >= 3, (
f"Attendu : 3 sites bbox avec temperature + num_predict + num_ctx, "
f"trouvé : {len(matches)}. Une option a peut-être été perdue lors du fix."
)
@pytest.mark.unit
def test_no_helper_migration_done():
"""Vérifie qu'on n'a PAS introduit d'appel à generate_bbox_grounding
ou autre helper dans resolve_engine.py (constraint D5-v3a mini-fix :
pas de migration helper, seulement num_ctx)."""
src = (ROOT / "agent_v0" / "server_v1" / "resolve_engine.py").read_text()
assert "generate_bbox_grounding" not in src, (
"D5-v3a mini-fix : pas de helper bbox attendu. "
"generate_bbox_grounding sera D5-v3b."
)
# generate_grounding (D5-v2) n'est pas non plus consommé ici
assert "generate_grounding(" not in src, (
"D5-v3a mini-fix : pas de migration vers generate_grounding(). "
"D5-v2 reste API préparatoire."
)

View File

@@ -0,0 +1,152 @@
"""Tests C-P1 : tolérance préfixe dans _text_match_fuzzy.
Cas réel : OCR partiel `observed='Enregi'` sur cible `expected='Enregistrer'`
provoquait un rejet pre-check `expected='Enregistrer' observed='Enregi'`.
Patch : accepter si observed est préfixe d'expected avec len ≥ 4 et ≥ 50%
de la longueur expected.
Référence : inbox_claude/2026-05-25_1938_codex-to-claude_TACHES-projet-ocr-d5v3c-lea.md
Fix : agent_v0/server_v1/resolve_engine.py:_text_match_fuzzy
"""
from __future__ import annotations
import sys
from pathlib import Path
import pytest
ROOT = Path(__file__).resolve().parents[2]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
# ────────────────────────────────────────────────────────────────────────────
# Cas qui MOTIVENT le patch (rejet incorrect avant)
# ────────────────────────────────────────────────────────────────────────────
@pytest.mark.unit
def test_enregi_matches_enregistrer():
"""Cas réel rapporté Codex : OCR partiel 'Enregi' sur 'Enregistrer' doit matcher."""
from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
assert _text_match_fuzzy("Enregistrer", "Enregi") is True
@pytest.mark.unit
def test_coller_matches_collier():
"""Préfixe 4 chars sur 6 (66%) doit matcher."""
from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
assert _text_match_fuzzy("Coller", "Coll") is True
@pytest.mark.unit
def test_cancel_matches_canc():
"""Préfixe 4 chars sur 6 (66%) doit matcher."""
from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
assert _text_match_fuzzy("Cancel", "Canc") is True
# ────────────────────────────────────────────────────────────────────────────
# Garde-fous : préfixes trop courts/faibles DOIVENT être rejetés
# ────────────────────────────────────────────────────────────────────────────
@pytest.mark.unit
def test_save_does_not_match_sa_too_short():
"""Préfixe < 4 chars rejeté (faux positif risque élevé)."""
from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
assert _text_match_fuzzy("Save", "Sa") is False
@pytest.mark.unit
def test_bouton_does_not_match_bo_too_short():
"""Préfixe 2 chars rejeté (faux positif probable)."""
from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
assert _text_match_fuzzy("Bouton", "Bo") is False
@pytest.mark.unit
def test_enregistrer_sous_does_not_match_enregi_below_50pct():
"""Préfixe 6 chars sur 16 (37% < 50%) rejeté (trop ambigu).
Ce cas évite que 'Enregi' (partiel de 'Enregistrer') soit accepté pour
'Enregistrer sous' alors qu'il devrait viser 'Enregistrer' tout court.
"""
from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
# _normalize_for_match retire les espaces ? À vérifier. Si oui, observed
# doit être préfixe de "enregistrersous" (15 chars), 6/15 = 40% < 50%.
assert _text_match_fuzzy("Enregistrer sous", "Enregi") is False
@pytest.mark.unit
def test_save_matches_save_substring_unchanged():
"""Cas existant substring : 'Save' dans 'Saved' doit toujours matcher."""
from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
assert _text_match_fuzzy("Save", "Saved") is True
# ────────────────────────────────────────────────────────────────────────────
# Comportements existants préservés (regression guards)
# ────────────────────────────────────────────────────────────────────────────
@pytest.mark.unit
def test_token_matching_still_works():
"""Cas multi-tokens existant : 'coller saisir dossier patient' / 'u saisir le dossier patient' → 3/4 ≥ 60%."""
from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
assert _text_match_fuzzy(
"Coller ou saisir le dossier patient",
"u saisir le dossier patient",
) is True
@pytest.mark.unit
def test_unrelated_text_still_rejected():
"""Texte totalement différent toujours rejeté."""
from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
assert _text_match_fuzzy("Enregistrer", "Annuler") is False
@pytest.mark.unit
def test_empty_expected_returns_true():
"""Expected vide = pas de contrainte = match."""
from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
assert _text_match_fuzzy("", "anything") is True
@pytest.mark.unit
def test_empty_observed_does_not_match_non_empty_expected():
"""Observed vide rejeté (sauf si expected vide aussi)."""
from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
assert _text_match_fuzzy("Enregistrer", "") is False
# ────────────────────────────────────────────────────────────────────────────
# Edge cases préfixe
# ────────────────────────────────────────────────────────────────────────────
@pytest.mark.unit
def test_prefix_exactly_4_chars_at_50_pct():
"""4 chars / 8 chars = 50% exact + len ≥ 4 → ACCEPT."""
from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
# "Continue" = 8 chars, "Cont" = 4 chars = 50%
assert _text_match_fuzzy("Continue", "Cont") is True
@pytest.mark.unit
def test_prefix_3_chars_rejected_even_if_high_ratio():
"""3 chars rejeté même si ≥ 50% (garde-fou minimum 4 chars)."""
from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
# "Sa" = 2 chars, "Save" = 4 chars (50%) → rejeté car < 4 chars
assert _text_match_fuzzy("Save", "Sav") is False
@pytest.mark.unit
def test_prefix_not_strict_prefix_rejected():
"""Si observed n'est PAS un préfixe strict, prefix rule ne s'applique pas."""
from agent_v0.server_v1.resolve_engine import _text_match_fuzzy
# "Enregistrer" / "Sauver" : pas substring, pas préfixe, 1 token "sauver"
# absent de "enregistrer" → 0/1 < 0.60 → False
assert _text_match_fuzzy("Enregistrer", "Sauver") is False

View File

@@ -0,0 +1,153 @@
import os
from types import SimpleNamespace
from agent_v0.server_v1.replay_engine import (
_create_replay_state,
_edge_to_normalized_actions,
)
os.environ.setdefault("RPA_AUTH_DISABLED", "true")
from agent_v0.server_v1.api_stream import _normalize_action_target_semantics
from visual_workflow_builder.backend.services.learned_workflow_bridge import (
_vwb_params_to_target_spec,
)
class _FakeAction:
def __init__(self, type_, target=None, parameters=None):
self.type = type_
self.target = target
self.parameters = parameters or {}
class _FakeEdge:
def __init__(self, action):
self.edge_id = "edge_anchor"
self.from_node = "node_src"
self.to_node = "node_dst"
self.action = action
def test_vwb_target_spec_preserves_visual_anchor_semantics():
target = _vwb_params_to_target_spec(
"double_click_anchor",
{
"visual_anchor": {
"anchor_id": "anchor_a518f6d5e727_1778849657",
"target_text": "- W - ICE rapport urgenc.",
"description": "Word document icon with text.",
"ocr_description": "Word document icon with text.",
},
},
)
assert target["by_text"] == "- W - ICE rapport urgenc."
hints = target["context_hints"]
assert hints["anchor_id"] == "anchor_a518f6d5e727_1778849657"
assert hints["target_text"] == "- W - ICE rapport urgenc."
assert hints["description"] == "Word document icon with text."
assert hints["vlm_description"] == "Word document icon with text."
def test_replay_normalization_lifts_anchor_semantics_from_context_hints():
target = SimpleNamespace(
by_role="icon",
by_text=None,
by_position=(0.12, 0.18),
context_hints={
"anchor_id": "anchor_a518f6d5e727_1778849657",
"target_text": "- W - ICE rapport urgenc.",
"description": "Word document icon with text.",
"ocr_description": "Word document icon with text.",
"anchor_image_base64": "abc123",
},
)
edge = _FakeEdge(
_FakeAction("mouse_click", target=target, parameters={"button": "double"})
)
actions = _edge_to_normalized_actions(edge, params={})
assert len(actions) == 1
action = actions[0]
target_spec = action["target_spec"]
assert action["visual_mode"] is True
assert action["target_description"] == "- W - ICE rapport urgenc."
assert target_spec["by_text"] == "- W - ICE rapport urgenc."
assert target_spec["anchor_id"] == "anchor_a518f6d5e727_1778849657"
assert target_spec["vlm_description"] == "Word document icon with text."
assert target_spec["anchor_image_base64"] == "abc123"
def test_replay_state_strips_anchor_image_but_keeps_semantic_label():
action = {
"action_id": "act_anchor",
"type": "click",
"target_spec": {
"anchor_id": "anchor_a518f6d5e727_1778849657",
"anchor_image_base64": "abc123",
"by_text": "- W - ICE rapport urgenc.",
"target_text": "- W - ICE rapport urgenc.",
"description": "Word document icon with text.",
},
}
state = _create_replay_state("replay", "workflow", "session", 1, actions=[action])
target_spec = state["actions"][0]["target_spec"]
assert "anchor_image_base64" not in target_spec
assert target_spec["anchor_id"] == "anchor_a518f6d5e727_1778849657"
assert target_spec["by_text"] == "- W - ICE rapport urgenc."
assert target_spec["description"] == "Word document icon with text."
def test_compound_click_step_keeps_visual_anchor_semantics():
edge = _FakeEdge(
_FakeAction(
"compound",
parameters={
"steps": [
{
"type": "mouse_click",
"x_pct": 0.12,
"y_pct": 0.18,
"target_text": "- W - ICE rapport urgenc.",
"description": "Word document icon with text.",
"anchor_id": "anchor_a518f6d5e727_1778849657",
}
]
},
)
)
actions = _edge_to_normalized_actions(edge, params={})
assert len(actions) == 1
target_spec = actions[0]["target_spec"]
assert actions[0]["visual_mode"] is True
assert actions[0]["target_description"] == "- W - ICE rapport urgenc."
assert target_spec["by_text"] == "- W - ICE rapport urgenc."
assert target_spec["anchor_id"] == "anchor_a518f6d5e727_1778849657"
def test_serialized_action_semantics_are_promoted_before_enqueue():
action = {
"action_id": "step_from_tmp",
"type": "click",
"target_spec": {
"anchor_id": "anchor_tmp",
"target_text": "- W - ICE rapport urgenc.",
"description": "Word document icon with text.",
"ocr_description": "Word document icon with text.",
"anchor_image_base64": "abc123",
},
}
_normalize_action_target_semantics(action)
target_spec = action["target_spec"]
assert target_spec["by_text"] == "- W - ICE rapport urgenc."
assert target_spec["by_text_source"] == "visual_anchor"
assert target_spec["vlm_description"] == "Word document icon with text."
assert action["target_description"] == "- W - ICE rapport urgenc."

View File

@@ -0,0 +1,310 @@
"""Tests pour D5-v2 : profil grounding VLM centralisé + generate_grounding().
Couvre :
- vlm_config.get_grounding_profile() avec valeurs par défaut et overrides env
- ollama_client.OllamaClient.generate_grounding() avec mocks requests.post
- Parsing JSON prefill-aware (reconstitution {"x_pct": ...} → dict)
- Pas d'appel Ollama live (tous les requests.post sont mockés)
Référence : inbox_claude/2026-05-25_1620_codex-to-claude_GO-revue-strategique-D5v2-C2d.md
Fix : core/detection/vlm_config.py (get_grounding_profile) +
core/detection/ollama_client.py (generate_grounding, _extract_first_json_object)
"""
from __future__ import annotations
import json
import sys
from pathlib import Path
from unittest.mock import patch, MagicMock
import pytest
ROOT = Path(__file__).resolve().parents[2]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
# ────────────────────────────────────────────────────────────────────────────
# vlm_config.get_grounding_profile
# ────────────────────────────────────────────────────────────────────────────
@pytest.mark.unit
def test_grounding_profile_defaults(monkeypatch):
"""Sans env vars, valeurs par défaut D5-v2."""
for k in ("RPA_GROUNDING_MODEL", "RPA_GROUNDING_CTX", "RPA_GROUNDING_FALLBACK", "RPA_VLM_PREFILL"):
monkeypatch.delenv(k, raising=False)
from core.detection.vlm_config import get_grounding_profile
p = get_grounding_profile()
assert p["model"] == "qwen3.5:9b"
assert p["num_ctx"] == 4096
assert p["prefill"] == '{"x_pct":'
assert p["temperature"] == 0.0
assert p["num_predict"] == 96
assert p["fallback_model"] == "qwen2.5vl:7b-rpa"
assert p["keep_alive"] == "30m"
# qwen3.5 = thinking model → think doit être False côté payload
# Le profile expose think comme bool ; False signifie "envoyer think:false"
assert p["think"] is False
@pytest.mark.unit
def test_grounding_profile_env_override(monkeypatch):
"""Env vars override modèle, ctx, fallback."""
monkeypatch.setenv("RPA_GROUNDING_MODEL", "qwen2.5vl:7b-rpa")
monkeypatch.setenv("RPA_GROUNDING_CTX", "8192")
monkeypatch.setenv("RPA_GROUNDING_FALLBACK", "gemma4:latest")
from core.detection.vlm_config import get_grounding_profile
p = get_grounding_profile()
assert p["model"] == "qwen2.5vl:7b-rpa"
assert p["num_ctx"] == 8192
assert p["fallback_model"] == "gemma4:latest"
# qwen2.5vl n'est PAS thinking et n'est PAS gemma4 → think=True (rien à envoyer)
assert p["think"] is True
@pytest.mark.unit
def test_grounding_profile_ctx_invalid_falls_back_to_default(monkeypatch):
"""RPA_GROUNDING_CTX non-numeric → fallback 4096."""
monkeypatch.setenv("RPA_GROUNDING_CTX", "not_a_number")
from core.detection.vlm_config import get_grounding_profile
p = get_grounding_profile()
assert p["num_ctx"] == 4096
@pytest.mark.unit
def test_grounding_profile_prefill_disabled(monkeypatch):
"""RPA_VLM_PREFILL=false → prefill None."""
monkeypatch.setenv("RPA_VLM_PREFILL", "false")
from core.detection.vlm_config import get_grounding_profile
p = get_grounding_profile()
assert p["prefill"] is None
@pytest.mark.unit
def test_grounding_profile_gemma4_triggers_think_false(monkeypatch):
"""Si on remplace par gemma4, think_false doit être déclenché."""
monkeypatch.setenv("RPA_GROUNDING_MODEL", "gemma4:latest")
from core.detection.vlm_config import get_grounding_profile
p = get_grounding_profile()
assert p["think"] is False # gemma4 needs think=false
# ────────────────────────────────────────────────────────────────────────────
# _extract_first_json_object
# ────────────────────────────────────────────────────────────────────────────
@pytest.mark.unit
def test_extract_first_json_object_clean():
"""JSON propre directement."""
from core.detection.ollama_client import _extract_first_json_object
obj = _extract_first_json_object('{"x_pct": 0.5, "y_pct": 0.3, "confidence": 0.95}')
assert obj == {"x_pct": 0.5, "y_pct": 0.3, "confidence": 0.95}
@pytest.mark.unit
def test_extract_first_json_object_with_trailing_text():
"""JSON suivi de texte parasite (typique VLM)."""
from core.detection.ollama_client import _extract_first_json_object
text = '{"x_pct": 0.4, "y_pct": 0.6, "confidence": 0.88}\n\nThe button is located in the bottom-right area.'
obj = _extract_first_json_object(text)
assert obj["x_pct"] == 0.4
assert obj["confidence"] == 0.88
@pytest.mark.unit
def test_extract_first_json_object_with_nested():
"""JSON avec objet imbriqué."""
from core.detection.ollama_client import _extract_first_json_object
text = '{"x_pct": 0.5, "meta": {"source": "qwen", "score": 0.9}}'
obj = _extract_first_json_object(text)
assert obj["meta"]["source"] == "qwen"
@pytest.mark.unit
def test_extract_first_json_object_with_braces_in_strings():
"""Les accolades dans les strings ne doivent pas perturber le compteur."""
from core.detection.ollama_client import _extract_first_json_object
text = '{"x_pct": 0.5, "label": "Click {here}"}'
obj = _extract_first_json_object(text)
assert obj["label"] == "Click {here}"
@pytest.mark.unit
def test_extract_first_json_object_invalid():
"""Texte sans JSON → None."""
from core.detection.ollama_client import _extract_first_json_object
assert _extract_first_json_object("no json here at all") is None
@pytest.mark.unit
def test_extract_first_json_object_empty():
"""Texte vide → None."""
from core.detection.ollama_client import _extract_first_json_object
assert _extract_first_json_object("") is None
assert _extract_first_json_object(None) is None
# ────────────────────────────────────────────────────────────────────────────
# OllamaClient.generate_grounding (mocks requests.post)
# ────────────────────────────────────────────────────────────────────────────
@pytest.fixture
def mock_client(monkeypatch):
"""Construit un OllamaClient sans connexion réelle à Ollama."""
from core.detection import ollama_client as oc_module
# Bypass _check_connection
monkeypatch.setattr(oc_module.OllamaClient, "_check_connection", lambda self: True)
# Force le modèle pour éviter get_vlm_model() qui appelle Ollama
client = oc_module.OllamaClient(model="qwen2.5vl:7b-rpa")
return client
@pytest.mark.unit
def test_generate_grounding_payload_uses_profile(mock_client, monkeypatch):
"""Le payload envoyé à Ollama utilise le profile (model, ctx, prefill, temp, etc.)."""
monkeypatch.delenv("RPA_VLM_PREFILL", raising=False)
captured = {}
def fake_post(url, json=None, timeout=None):
captured["url"] = url
captured["payload"] = json
# Réponse simulée : Ollama renvoie le contenu APRÈS le prefill
resp = MagicMock()
resp.status_code = 200
resp.json.return_value = {
"message": {
"content": ' 0.5, "y_pct": 0.3, "confidence": 0.95}'
}
}
return resp
monkeypatch.setattr("core.detection.ollama_client.requests.post", fake_post)
result = mock_client.generate_grounding(prompt="Find the Save button")
# Payload vérifié
assert captured["url"].endswith("/api/chat")
payload = captured["payload"]
assert payload["model"] == "qwen3.5:9b" # défaut grounding
assert payload["options"]["num_ctx"] == 4096
assert payload["options"]["temperature"] == 0.0
assert payload["options"]["num_predict"] == 96
# qwen3.5 = thinking → think=false dans payload
assert payload.get("think") is False
# Le prefill doit être présent dans le dernier message (assistant)
last_msg = payload["messages"][-1]
assert last_msg["role"] == "assistant"
assert last_msg["content"] == '{"x_pct":'
@pytest.mark.unit
def test_generate_grounding_parses_prefilled_json(mock_client, monkeypatch):
"""Le JSON est correctement reconstitué via prefill + parsé."""
monkeypatch.delenv("RPA_VLM_PREFILL", raising=False)
def fake_post(url, json=None, timeout=None):
resp = MagicMock()
resp.status_code = 200
# Ollama renvoie SANS le prefill (le client le rajoute)
resp.json.return_value = {
"message": {
"content": ' 0.42, "y_pct": 0.68, "confidence": 0.91}'
}
}
return resp
monkeypatch.setattr("core.detection.ollama_client.requests.post", fake_post)
result = mock_client.generate_grounding(prompt="Find OK button")
assert result["success"] is True
# response contient le JSON complet reconstitué
assert result["response"].startswith('{"x_pct":')
# parsed_json est le dict reconstruit
parsed = result["parsed_json"]
assert parsed is not None
assert parsed["x_pct"] == 0.42
assert parsed["y_pct"] == 0.68
assert parsed["confidence"] == 0.91
# profile_used est exposé
assert result["profile_used"]["model"] == "qwen3.5:9b"
assert result["profile_used"]["num_ctx"] == 4096
@pytest.mark.unit
def test_generate_grounding_restores_original_model(mock_client, monkeypatch):
"""Après generate_grounding, self.model est restauré (pas de side-effect)."""
original = mock_client.model # qwen2.5vl:7b-rpa
def fake_post(url, json=None, timeout=None):
resp = MagicMock()
resp.status_code = 200
resp.json.return_value = {"message": {"content": ' 0.5, "y_pct": 0.5, "confidence": 0.5}'}}
return resp
monkeypatch.setattr("core.detection.ollama_client.requests.post", fake_post)
mock_client.generate_grounding(prompt="test")
assert mock_client.model == original, (
f"self.model doit être restauré ({original}), trouvé : {mock_client.model}"
)
@pytest.mark.unit
def test_generate_grounding_handles_ollama_error(mock_client, monkeypatch):
"""Si Ollama retourne 500, success=False, parsed_json=None, model restauré."""
def fake_post(url, json=None, timeout=None):
resp = MagicMock()
resp.status_code = 500
resp.text = "Internal error"
return resp
monkeypatch.setattr("core.detection.ollama_client.requests.post", fake_post)
result = mock_client.generate_grounding(prompt="test")
assert result["success"] is False
assert result["parsed_json"] is None
@pytest.mark.unit
def test_generate_grounding_profile_override(mock_client, monkeypatch):
"""Override profile explicite dans l'appel (pour tests / contextes spéciaux)."""
captured = {}
def fake_post(url, json=None, timeout=None):
captured["payload"] = json
resp = MagicMock()
resp.status_code = 200
resp.json.return_value = {"message": {"content": ' 0.1, "y_pct": 0.1, "confidence": 0.5}'}}
return resp
monkeypatch.setattr("core.detection.ollama_client.requests.post", fake_post)
custom_profile = {
"model": "gemma4:latest",
"num_ctx": 2048,
"prefill": None,
"temperature": 0.2,
"num_predict": 50,
"think": False,
"keep_alive": "5m",
"fallback_model": "qwen2.5vl:7b-rpa",
}
result = mock_client.generate_grounding(prompt="test", profile=custom_profile)
payload = captured["payload"]
assert payload["model"] == "gemma4:latest"
assert payload["options"]["num_ctx"] == 2048
assert payload["options"]["temperature"] == 0.2
# Pas de prefill → pas de message assistant
assert payload["messages"][-1]["role"] == "user"
# gemma4 needs think=false → injecté
assert payload.get("think") is False

View File

@@ -0,0 +1,197 @@
"""Tests pour C1c : WorkflowPipeline(enable_ui_detection=False) ne doit pas
charger OWL-v2 sur GPU.
Contexte : depuis 2026-05-25, agent_chat instancie WorkflowPipeline avec
enable_ui_detection=False par défaut (override AGENT_CHAT_ENABLE_UI_DETECTION=1),
pour économiser ~900 MiB VRAM au boot du service rpa-agent-chat.
Référence : inbox_claude/2026-05-25_1341_codex-to-claude_C1c-C2b-plan-action.md
Fix : agent_chat/app.py:296 (WorkflowPipeline kwargs env-driven)
Contrat validé : core/pipeline/workflow_pipeline.py:117-118 :
self.ui_detector = None
if enable_ui_detection:
... # UIDetector chargé → OWL/VLM init
"""
from __future__ import annotations
import sys
from pathlib import Path
import pytest
ROOT = Path(__file__).resolve().parents[2]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
def _patch_light_pipeline_deps(monkeypatch, wp_module):
class FakeCLIPEmbedder:
def __init__(self, device=None):
self.device = device
class FakeFusionEngine:
pass
class FakeStateEmbeddingBuilder:
def __init__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
class FakeFAISSManager:
def __init__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
monkeypatch.setattr(wp_module, "CLIPEmbedder", FakeCLIPEmbedder)
monkeypatch.setattr(wp_module, "FusionEngine", FakeFusionEngine)
monkeypatch.setattr(wp_module, "StateEmbeddingBuilder", FakeStateEmbeddingBuilder)
monkeypatch.setattr(wp_module, "FAISSManager", FakeFAISSManager)
@pytest.mark.unit
def test_workflow_pipeline_ui_detection_disabled_no_owl_load(monkeypatch):
"""WorkflowPipeline(enable_ui_detection=False) → self.ui_detector is None,
pas d'instantiation UIDetector, donc pas de chargement OWL/VLM."""
from core.pipeline import workflow_pipeline as wp_module
ui_detector_calls = []
_patch_light_pipeline_deps(monkeypatch, wp_module)
class FakeUIDetector:
def __init__(self, config=None):
ui_detector_calls.append(config)
monkeypatch.setattr(wp_module, "UIDetector", FakeUIDetector)
pipeline = wp_module.WorkflowPipeline(
enable_ui_detection=False,
enable_vlm=False,
)
assert pipeline.ui_detector is None, (
"ui_detector doit être None quand enable_ui_detection=False"
)
assert len(ui_detector_calls) == 0, (
f"UIDetector instancié alors que ui_detection=False : "
f"{ui_detector_calls}"
)
@pytest.mark.unit
def test_workflow_pipeline_ui_detection_enabled_calls_ui_detector(monkeypatch):
"""Contrat inverse : enable_ui_detection=True → UIDetector instancié."""
from core.pipeline import workflow_pipeline as wp_module
ui_detector_calls = []
_patch_light_pipeline_deps(monkeypatch, wp_module)
class FakeUIDetector:
def __init__(self, config=None):
ui_detector_calls.append(config)
monkeypatch.setattr(wp_module, "UIDetector", FakeUIDetector)
pipeline = wp_module.WorkflowPipeline(
enable_ui_detection=True,
enable_vlm=True,
)
assert pipeline.ui_detector is not None, (
"ui_detector doit être instancié quand enable_ui_detection=True"
)
assert len(ui_detector_calls) == 1
# Le config passé doit refléter enable_vlm
config = ui_detector_calls[0]
assert config.use_vlm_classification is True
assert config.use_owl_detection is True # Par défaut DetectionConfig
@pytest.mark.unit
def test_workflow_pipeline_reuses_clip_embedder_for_state_builder(monkeypatch):
"""WorkflowPipeline ne doit pas charger un second OpenCLIP en auto-GPU.
Le premier CLIP respecte `use_gpu`; StateEmbeddingBuilder doit le réutiliser
au lieu de créer `CLIPEmbedder()` avec auto-détection CUDA.
"""
from core.pipeline import workflow_pipeline as wp_module
clip_instances = []
builder_calls = []
class FakeCLIPEmbedder:
def __init__(self, device=None):
self.device = device
clip_instances.append(self)
class FakeStateEmbeddingBuilder:
def __init__(
self,
fusion_engine=None,
embedders=None,
output_dir=None,
use_clip=True,
):
builder_calls.append(
{
"embedders": embedders,
"use_clip": use_clip,
"output_dir": output_dir,
}
)
monkeypatch.setattr(wp_module, "CLIPEmbedder", FakeCLIPEmbedder)
monkeypatch.setattr(wp_module, "FAISSManager", lambda *args, **kwargs: object())
monkeypatch.setattr(wp_module, "StateEmbeddingBuilder", FakeStateEmbeddingBuilder)
pipeline = wp_module.WorkflowPipeline(
use_gpu=False,
enable_ui_detection=False,
enable_vlm=False,
)
assert len(clip_instances) == 1
assert clip_instances[0].device == "cpu"
assert len(builder_calls) == 1
assert builder_calls[0]["use_clip"] is False
assert builder_calls[0]["embedders"]["image"] is pipeline.clip_embedder
assert builder_calls[0]["embedders"]["text"] is pipeline.clip_embedder
assert builder_calls[0]["embedders"]["title"] is pipeline.clip_embedder
assert builder_calls[0]["embedders"]["ui"] is pipeline.clip_embedder
@pytest.mark.unit
def test_agent_chat_app_env_flag_default_off(monkeypatch):
"""Smoke test du parsing env-driven dans agent_chat/app.py.
Vérifie le pattern utilisé pour décider si UI detection doit être activée.
"""
monkeypatch.delenv("AGENT_CHAT_ENABLE_UI_DETECTION", raising=False)
import os
enabled = os.environ.get(
"AGENT_CHAT_ENABLE_UI_DETECTION", "0"
).strip() in ("1", "true", "yes")
assert enabled is False, "Sans env var, doit être False"
@pytest.mark.unit
def test_agent_chat_app_env_flag_explicit_on(monkeypatch):
"""AGENT_CHAT_ENABLE_UI_DETECTION=1 → True."""
monkeypatch.setenv("AGENT_CHAT_ENABLE_UI_DETECTION", "1")
import os
enabled = os.environ.get(
"AGENT_CHAT_ENABLE_UI_DETECTION", "0"
).strip() in ("1", "true", "yes")
assert enabled is True
@pytest.mark.unit
def test_agent_chat_app_env_flag_explicit_off(monkeypatch):
"""AGENT_CHAT_ENABLE_UI_DETECTION=0 → False."""
monkeypatch.setenv("AGENT_CHAT_ENABLE_UI_DETECTION", "0")
import os
enabled = os.environ.get(
"AGENT_CHAT_ENABLE_UI_DETECTION", "0"
).strip() in ("1", "true", "yes")
assert enabled is False