Architecture 3 niveaux implémentée et testée (137 tests unitaires + 21 visuels) : MÉSO (acteur intelligent) : - P0 Critic : vérification sémantique post-action via gemma4 (replay_verifier.py) - P1 Observer : pré-analyse écran avant chaque action (api_stream.py /pre_analyze) - P2 Grounding/Policy : séparation localisation (grounding.py) et décision (policy.py) - P3 Recovery : rollback automatique Ctrl+Z/Escape/Alt+F4 (recovery.py) - P4 Learning : apprentissage runtime avec boucle de consolidation (replay_learner.py) MACRO (planificateur) : - TaskPlanner : comprend les ordres en langage naturel via gemma4 (task_planner.py) - Contexte métier TIM/CIM-10 pour les hôpitaux (domain_context.py) - Endpoint POST /api/v1/task pour l'exécution par instruction Traçabilité : - Audit trail complet avec 18 champs par action (audit_trail.py) - Endpoints GET /audit/history, /audit/summary, /audit/export (CSV) Grounding : - Fix parsing bbox_2d qwen2.5vl (pixels relatifs, pas grille 1000x1000) - Benchmarks visuels sur captures réelles (3 approches : baseline, zoom, Citrix) - Reproductibilité validée : variance < 0.008 sur 10 itérations Sécurité : - Tokens de production retirés du code source → .env.local - Secret key aléatoire si non configuré - Suppression logs qui leakent les tokens Résultats : 80% de replay (vs 12.5% avant), 100% détection visuelle Citrix JPEG Q20 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
763 lines
28 KiB
Python
763 lines
28 KiB
Python
# tests/unit/test_task_planner.py
|
||
"""
|
||
Tests unitaires du TaskPlanner (planificateur MACRO).
|
||
|
||
Vérifie :
|
||
1. La compréhension d'ordres simples (understand)
|
||
2. Le matching de workflows par description sémantique
|
||
3. La détection de boucles et l'extraction de paramètres
|
||
4. La conversion étapes → actions JSON (format correct)
|
||
5. L'extraction de descriptions de session
|
||
|
||
Toutes les réponses gemma4 sont mockées pour la reproductibilité.
|
||
"""
|
||
|
||
import json
|
||
import os
|
||
import sys
|
||
import tempfile
|
||
from pathlib import Path
|
||
from unittest.mock import MagicMock, patch, Mock
|
||
|
||
import pytest
|
||
|
||
_ROOT = str(Path(__file__).resolve().parents[2])
|
||
if _ROOT not in sys.path:
|
||
sys.path.insert(0, _ROOT)
|
||
|
||
from agent_v0.server_v1.task_planner import TaskPlanner, TaskPlan
|
||
|
||
|
||
# =========================================================================
|
||
# Fixtures
|
||
# =========================================================================
|
||
|
||
@pytest.fixture
|
||
def planner():
|
||
"""TaskPlanner avec port gemma4 factice."""
|
||
return TaskPlanner(gemma4_port="11435", domain_id="generic")
|
||
|
||
|
||
@pytest.fixture
|
||
def sample_workflows():
|
||
"""Workflows disponibles pour les tests de matching."""
|
||
return [
|
||
{
|
||
"session_id": "sess_001",
|
||
"name": "Bloc-notes",
|
||
"description": "Ouvrir Bloc-notes via Exécuter (Win+R) et écrire du texte",
|
||
"machine": "PC-01",
|
||
"event_count": 25,
|
||
},
|
||
{
|
||
"session_id": "sess_002",
|
||
"name": "Explorateur de fichiers",
|
||
"description": "Naviguer dans l'Explorateur de fichiers et ouvrir des images",
|
||
"machine": "PC-01",
|
||
"event_count": 40,
|
||
},
|
||
{
|
||
"session_id": "sess_003",
|
||
"name": "DxCare, Codage CIM-10",
|
||
"description": "Ouvrir un dossier patient dans DxCare et coder les diagnostics CIM-10",
|
||
"machine": "PC-TIM",
|
||
"event_count": 80,
|
||
},
|
||
]
|
||
|
||
|
||
def _mock_gemma4_response(content: str):
|
||
"""Créer un mock de réponse HTTP gemma4."""
|
||
mock_resp = MagicMock()
|
||
mock_resp.ok = True
|
||
mock_resp.status_code = 200
|
||
mock_resp.json.return_value = {
|
||
"message": {"content": content}
|
||
}
|
||
return mock_resp
|
||
|
||
|
||
# =========================================================================
|
||
# Tests : understand — ordre simple
|
||
# =========================================================================
|
||
|
||
class TestUnderstandOrdreSimple:
|
||
"""Vérifier que understand() parse correctement des réponses gemma4."""
|
||
|
||
def test_understand_ordre_simple(self, planner, sample_workflows):
|
||
"""'Ouvre le bloc-notes' → understood=True."""
|
||
gemma4_response = (
|
||
"COMPRIS: OUI\n"
|
||
"WORKFLOW: 1\n"
|
||
"CONFIANCE: 0.9\n"
|
||
"PARAMETRES: AUCUN\n"
|
||
"BOUCLE: NON\n"
|
||
"SOURCE_BOUCLE: aucun\n"
|
||
"PLAN:\n"
|
||
"1. Ouvrir le Bloc-notes via Win+R\n"
|
||
"2. Taper notepad et valider\n"
|
||
)
|
||
|
||
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||
plan = planner.understand(
|
||
"Ouvre le bloc-notes",
|
||
available_workflows=sample_workflows,
|
||
)
|
||
|
||
assert plan.understood is True
|
||
assert plan.instruction == "Ouvre le bloc-notes"
|
||
|
||
def test_understand_instruction_non_comprise(self, planner):
|
||
"""Instruction incompréhensible → understood=False."""
|
||
gemma4_response = "COMPRIS: NON\nWORKFLOW: AUCUN\nBOUCLE: NON\n"
|
||
|
||
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||
plan = planner.understand("xyzzy blah blah")
|
||
|
||
assert plan.understood is False
|
||
|
||
def test_understand_gemma4_erreur_http(self, planner):
|
||
"""Erreur HTTP gemma4 → plan.error renseigné."""
|
||
mock_resp = MagicMock()
|
||
mock_resp.ok = False
|
||
mock_resp.status_code = 500
|
||
|
||
with patch("requests.post", return_value=mock_resp):
|
||
plan = planner.understand("Ouvre le bloc-notes")
|
||
|
||
assert plan.understood is False
|
||
assert "500" in plan.error
|
||
|
||
def test_understand_gemma4_timeout(self, planner):
|
||
"""Timeout gemma4 → plan.error renseigné."""
|
||
import requests
|
||
with patch("requests.post", side_effect=requests.Timeout("timeout")):
|
||
plan = planner.understand("Ouvre le bloc-notes")
|
||
|
||
assert plan.understood is False
|
||
assert "erreur" in plan.error.lower() or "timeout" in plan.error.lower()
|
||
|
||
|
||
# =========================================================================
|
||
# Tests : matching workflow
|
||
# =========================================================================
|
||
|
||
class TestUnderstandIdentifieWorkflow:
|
||
"""Vérifier que le matching de workflow fonctionne."""
|
||
|
||
def test_understand_identifie_workflow(self, planner, sample_workflows):
|
||
"""Quand un workflow matche, workflow_match est rempli."""
|
||
gemma4_response = (
|
||
"COMPRIS: OUI\n"
|
||
"WORKFLOW: 1\n"
|
||
"CONFIANCE: 0.9\n"
|
||
"PARAMETRES: AUCUN\n"
|
||
"BOUCLE: NON\n"
|
||
"SOURCE_BOUCLE: aucun\n"
|
||
"PLAN:\n"
|
||
"1. Lancer le Bloc-notes\n"
|
||
)
|
||
|
||
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||
plan = planner.understand(
|
||
"Ouvre le bloc-notes",
|
||
available_workflows=sample_workflows,
|
||
)
|
||
|
||
assert plan.workflow_match == "sess_001"
|
||
assert plan.workflow_name == "Bloc-notes"
|
||
assert plan.mode == "replay"
|
||
assert plan.match_confidence >= 0.8
|
||
|
||
def test_understand_workflow_aucun_match(self, planner, sample_workflows):
|
||
"""Aucun workflow correspondant → mode libre."""
|
||
gemma4_response = (
|
||
"COMPRIS: OUI\n"
|
||
"WORKFLOW: AUCUN\n"
|
||
"PARAMETRES: AUCUN\n"
|
||
"BOUCLE: NON\n"
|
||
"SOURCE_BOUCLE: aucun\n"
|
||
"PLAN:\n"
|
||
"1. Ouvrir Chrome\n"
|
||
"2. Aller sur Google\n"
|
||
)
|
||
|
||
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||
plan = planner.understand(
|
||
"Recherche voiture sur Google",
|
||
available_workflows=sample_workflows,
|
||
)
|
||
|
||
assert plan.understood is True
|
||
assert plan.workflow_match == ""
|
||
assert plan.mode == "free"
|
||
|
||
def test_understand_workflow_second_match(self, planner, sample_workflows):
|
||
"""Workflow 2 sélectionné correctement."""
|
||
gemma4_response = (
|
||
"COMPRIS: OUI\n"
|
||
"WORKFLOW: 2\n"
|
||
"CONFIANCE: 0.85\n"
|
||
"BOUCLE: NON\n"
|
||
"PLAN:\n"
|
||
"1. Ouvrir l'explorateur de fichiers\n"
|
||
)
|
||
|
||
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||
plan = planner.understand(
|
||
"Ouvre mes images",
|
||
available_workflows=sample_workflows,
|
||
)
|
||
|
||
assert plan.workflow_match == "sess_002"
|
||
assert plan.workflow_name == "Explorateur de fichiers"
|
||
|
||
def test_understand_workflow_avec_description_dans_prompt(self, planner, sample_workflows):
|
||
"""Le prompt envoyé à gemma4 inclut les descriptions des workflows."""
|
||
captured_body = {}
|
||
|
||
def capture_post(url, json=None, **kwargs):
|
||
captured_body.update(json or {})
|
||
return _mock_gemma4_response("COMPRIS: OUI\nWORKFLOW: AUCUN\nBOUCLE: NON\n")
|
||
|
||
with patch("requests.post", side_effect=capture_post):
|
||
planner.understand(
|
||
"Ouvre le bloc-notes",
|
||
available_workflows=sample_workflows,
|
||
)
|
||
|
||
prompt_content = captured_body["messages"][0]["content"]
|
||
# La description doit apparaître dans le prompt
|
||
assert "Ouvrir Bloc-notes via Exécuter" in prompt_content
|
||
assert "Naviguer dans l'Explorateur" in prompt_content
|
||
|
||
|
||
# =========================================================================
|
||
# Tests : détection de boucle
|
||
# =========================================================================
|
||
|
||
class TestUnderstandDetecteBoucle:
|
||
"""Vérifier la détection de boucle."""
|
||
|
||
def test_understand_detecte_boucle(self, planner, sample_workflows):
|
||
"""'traite TOUS les dossiers' → is_loop=True."""
|
||
gemma4_response = (
|
||
"COMPRIS: OUI\n"
|
||
"WORKFLOW: 3\n"
|
||
"CONFIANCE: 0.8\n"
|
||
"PARAMETRES: AUCUN\n"
|
||
"BOUCLE: OUI\n"
|
||
"SOURCE_BOUCLE: écran\n"
|
||
"PLAN:\n"
|
||
"1. Pour chaque dossier dans la liste\n"
|
||
"2. Ouvrir le dossier\n"
|
||
"3. Coder les diagnostics\n"
|
||
)
|
||
|
||
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||
plan = planner.understand(
|
||
"Traite TOUS les dossiers de la liste",
|
||
available_workflows=sample_workflows,
|
||
)
|
||
|
||
assert plan.is_loop is True
|
||
assert plan.loop_source == "écran"
|
||
|
||
def test_understand_pas_de_boucle(self, planner):
|
||
"""Ordre simple → is_loop=False."""
|
||
gemma4_response = (
|
||
"COMPRIS: OUI\n"
|
||
"WORKFLOW: AUCUN\n"
|
||
"BOUCLE: NON\n"
|
||
"SOURCE_BOUCLE: aucun\n"
|
||
"PLAN:\n"
|
||
"1. Ouvrir le navigateur\n"
|
||
)
|
||
|
||
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||
plan = planner.understand("Ouvre le navigateur")
|
||
|
||
assert plan.is_loop is False
|
||
|
||
|
||
# =========================================================================
|
||
# Tests : extraction de paramètres
|
||
# =========================================================================
|
||
|
||
class TestUnderstandExtraitParametres:
|
||
"""Vérifier l'extraction des paramètres."""
|
||
|
||
def test_understand_extrait_parametres(self, planner, sample_workflows):
|
||
"""'dossiers de janvier' → parameters contient mois=janvier."""
|
||
gemma4_response = (
|
||
"COMPRIS: OUI\n"
|
||
"WORKFLOW: 3\n"
|
||
"CONFIANCE: 0.85\n"
|
||
"PARAMETRES: mois=janvier\n"
|
||
"BOUCLE: OUI\n"
|
||
"SOURCE_BOUCLE: écran\n"
|
||
"PLAN:\n"
|
||
"1. Filtrer les dossiers de janvier\n"
|
||
)
|
||
|
||
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||
plan = planner.understand(
|
||
"Traite les dossiers de janvier",
|
||
available_workflows=sample_workflows,
|
||
)
|
||
|
||
assert "mois" in plan.parameters
|
||
assert plan.parameters["mois"] == "janvier"
|
||
|
||
def test_understand_parametres_multiples(self, planner):
|
||
"""Plusieurs paramètres sur des lignes séparées."""
|
||
gemma4_response = (
|
||
"COMPRIS: OUI\n"
|
||
"WORKFLOW: AUCUN\n"
|
||
"PARAMETRES:\n"
|
||
"- patient=DUPONT\n"
|
||
"- date=2026-01-15\n"
|
||
"BOUCLE: NON\n"
|
||
"PLAN:\n"
|
||
"1. Rechercher le patient DUPONT\n"
|
||
)
|
||
|
||
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||
plan = planner.understand("Cherche le dossier de DUPONT du 15 janvier")
|
||
|
||
assert plan.parameters.get("patient") == "DUPONT"
|
||
assert plan.parameters.get("date") == "2026-01-15"
|
||
|
||
def test_understand_parametres_inline(self, planner):
|
||
"""Paramètres sur la même ligne que PARAMETRES:."""
|
||
gemma4_response = (
|
||
"COMPRIS: OUI\n"
|
||
"WORKFLOW: AUCUN\n"
|
||
"PARAMETRES: nom=Martin, ville=Paris\n"
|
||
"BOUCLE: NON\n"
|
||
"PLAN:\n"
|
||
"1. Chercher Martin à Paris\n"
|
||
)
|
||
|
||
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||
plan = planner.understand("Cherche Martin à Paris")
|
||
|
||
assert plan.parameters.get("nom") == "Martin"
|
||
assert plan.parameters.get("ville") == "Paris"
|
||
|
||
|
||
# =========================================================================
|
||
# Tests : _parse_understanding (parsing tolérant)
|
||
# =========================================================================
|
||
|
||
class TestParseUnderstanding:
|
||
"""Tester le parsing tolérant de réponses gemma4 variées."""
|
||
|
||
def test_parse_markdown_gras(self, planner):
|
||
"""Réponse avec **gras** → parsée correctement."""
|
||
plan = TaskPlan(instruction="test")
|
||
content = (
|
||
"**COMPRIS:** OUI\n"
|
||
"**WORKFLOW:** AUCUN\n"
|
||
"**BOUCLE:** NON\n"
|
||
"**PLAN:**\n"
|
||
"1. Première étape\n"
|
||
)
|
||
result = planner._parse_understanding(plan, content, [])
|
||
assert result.understood is True
|
||
assert result.mode == "free"
|
||
|
||
def test_parse_confiance_pourcentage(self, planner, sample_workflows):
|
||
"""CONFIANCE: 90% → match_confidence=0.9."""
|
||
plan = TaskPlan(instruction="test")
|
||
content = (
|
||
"COMPRIS: OUI\n"
|
||
"WORKFLOW: 1\n"
|
||
"CONFIANCE: 90%\n"
|
||
"BOUCLE: NON\n"
|
||
)
|
||
result = planner._parse_understanding(plan, content, sample_workflows)
|
||
assert result.match_confidence == pytest.approx(0.9)
|
||
|
||
def test_parse_confiance_virgule(self, planner, sample_workflows):
|
||
"""CONFIANCE: 0,85 → match_confidence=0.85."""
|
||
plan = TaskPlan(instruction="test")
|
||
content = (
|
||
"COMPRIS: OUI\n"
|
||
"WORKFLOW: 1\n"
|
||
"CONFIANCE: 0,85\n"
|
||
"BOUCLE: NON\n"
|
||
)
|
||
result = planner._parse_understanding(plan, content, sample_workflows)
|
||
assert result.match_confidence == pytest.approx(0.85)
|
||
|
||
def test_parse_workflow_avec_parentheses(self, planner, sample_workflows):
|
||
"""WORKFLOW: 2 (Explorateur) → index 2 correctement extrait."""
|
||
plan = TaskPlan(instruction="test")
|
||
content = (
|
||
"COMPRIS: OUI\n"
|
||
"WORKFLOW: 2 (Explorateur de fichiers)\n"
|
||
"BOUCLE: NON\n"
|
||
)
|
||
result = planner._parse_understanding(plan, content, sample_workflows)
|
||
assert result.workflow_match == "sess_002"
|
||
|
||
def test_parse_workflow_aucun_variantes(self, planner, sample_workflows):
|
||
"""Toutes les variantes de 'aucun' sont reconnues."""
|
||
for val in ("AUCUN", "None", "N/A", "-", "NON"):
|
||
plan = TaskPlan(instruction="test")
|
||
content = f"COMPRIS: OUI\nWORKFLOW: {val}\nBOUCLE: NON\n"
|
||
result = planner._parse_understanding(plan, content, sample_workflows)
|
||
assert result.workflow_match == "", f"Devrait être vide pour '{val}'"
|
||
|
||
def test_parse_etapes_tirets(self, planner):
|
||
"""Étapes avec tirets → ajoutées au plan."""
|
||
plan = TaskPlan(instruction="test")
|
||
content = (
|
||
"COMPRIS: OUI\n"
|
||
"WORKFLOW: AUCUN\n"
|
||
"BOUCLE: NON\n"
|
||
"PLAN:\n"
|
||
"- Ouvrir l'application\n"
|
||
"- Cliquer sur Fichier\n"
|
||
"- Sauvegarder\n"
|
||
)
|
||
result = planner._parse_understanding(plan, content, [])
|
||
assert len(result.steps) == 3
|
||
|
||
|
||
# =========================================================================
|
||
# Tests : _steps_to_actions
|
||
# =========================================================================
|
||
|
||
class TestStepsToActions:
|
||
"""Vérifier la conversion étapes → actions JSON."""
|
||
|
||
def test_steps_to_actions_format(self, planner):
|
||
"""Les actions générées ont le bon format (type, target_spec, etc.)."""
|
||
gemma4_response = (
|
||
'{"type": "click", "target_spec": {"by_text": "Rechercher"}}\n'
|
||
'{"type": "type", "text": "bloc-notes"}\n'
|
||
'{"type": "key_combo", "keys": ["enter"]}\n'
|
||
'{"type": "wait", "duration_ms": 2000}\n'
|
||
)
|
||
|
||
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||
actions = planner._steps_to_actions(
|
||
[{"description": "1. Ouvrir le bloc-notes"}],
|
||
{},
|
||
)
|
||
|
||
assert len(actions) == 4
|
||
assert actions[0]["type"] == "click"
|
||
assert actions[0]["visual_mode"] is True # Ajouté automatiquement
|
||
assert actions[0]["target_spec"]["by_text"] == "Rechercher"
|
||
assert actions[1]["type"] == "type"
|
||
assert actions[1]["text"] == "bloc-notes"
|
||
assert actions[2]["type"] == "key_combo"
|
||
assert actions[2]["keys"] == ["enter"]
|
||
assert actions[3]["type"] == "wait"
|
||
assert actions[3]["duration_ms"] == 2000
|
||
|
||
def test_steps_to_actions_json_array(self, planner):
|
||
"""gemma4 retourne un tableau JSON → parsé correctement."""
|
||
gemma4_response = (
|
||
'Voici les actions :\n'
|
||
'```json\n'
|
||
'[\n'
|
||
' {"type": "click", "target_spec": {"by_text": "Fichier"}},\n'
|
||
' {"type": "click", "target_spec": {"by_text": "Ouvrir"}}\n'
|
||
']\n'
|
||
'```\n'
|
||
)
|
||
|
||
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||
actions = planner._steps_to_actions(
|
||
[{"description": "1. Ouvrir un fichier"}],
|
||
{},
|
||
)
|
||
|
||
assert len(actions) == 2
|
||
assert actions[0]["target_spec"]["by_text"] == "Fichier"
|
||
assert actions[1]["target_spec"]["by_text"] == "Ouvrir"
|
||
|
||
def test_steps_to_actions_nested_json(self, planner):
|
||
"""JSON imbriqué (target_spec) → parsé correctement."""
|
||
gemma4_response = (
|
||
'{"type": "click", "target_spec": {"by_text": "OK", "window_title": "Confirmation"}}\n'
|
||
)
|
||
|
||
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||
actions = planner._steps_to_actions(
|
||
[{"description": "1. Confirmer"}],
|
||
{},
|
||
)
|
||
|
||
assert len(actions) == 1
|
||
assert actions[0]["target_spec"]["window_title"] == "Confirmation"
|
||
|
||
def test_steps_to_actions_gemma4_erreur(self, planner):
|
||
"""Erreur gemma4 → liste vide."""
|
||
mock_resp = MagicMock()
|
||
mock_resp.ok = False
|
||
|
||
with patch("requests.post", return_value=mock_resp):
|
||
actions = planner._steps_to_actions(
|
||
[{"description": "1. Faire quelque chose"}],
|
||
{},
|
||
)
|
||
|
||
assert actions == []
|
||
|
||
def test_steps_to_actions_filtre_types_invalides(self, planner):
|
||
"""Seuls les types valides (click, type, key_combo, wait) sont acceptés."""
|
||
gemma4_response = (
|
||
'{"type": "click", "target_spec": {"by_text": "OK"}}\n'
|
||
'{"type": "invalid_action", "foo": "bar"}\n'
|
||
'{"type": "wait", "duration_ms": 500}\n'
|
||
'{"not_a_type": "test"}\n'
|
||
)
|
||
|
||
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||
actions = planner._steps_to_actions(
|
||
[{"description": "1. Test"}],
|
||
{},
|
||
)
|
||
|
||
assert len(actions) == 2
|
||
assert actions[0]["type"] == "click"
|
||
assert actions[1]["type"] == "wait"
|
||
|
||
|
||
# =========================================================================
|
||
# Tests : _parse_actions_json (parsing robuste)
|
||
# =========================================================================
|
||
|
||
class TestParseActionsJson:
|
||
"""Tester le parsing robuste d'actions JSON."""
|
||
|
||
def test_parse_json_une_par_ligne(self):
|
||
"""Actions JSON une par ligne."""
|
||
content = (
|
||
'{"type": "click", "target_spec": {"by_text": "A"}}\n'
|
||
'{"type": "type", "text": "hello"}\n'
|
||
)
|
||
actions = TaskPlanner._parse_actions_json(content)
|
||
assert len(actions) == 2
|
||
|
||
def test_parse_json_array(self):
|
||
"""Tableau JSON."""
|
||
content = '[{"type": "click", "target_spec": {"by_text": "A"}}, {"type": "wait", "duration_ms": 1000}]'
|
||
actions = TaskPlanner._parse_actions_json(content)
|
||
assert len(actions) == 2
|
||
|
||
def test_parse_json_avec_texte_autour(self):
|
||
"""JSON entouré de commentaires texte."""
|
||
content = (
|
||
"Voici les actions RPA :\n\n"
|
||
'{"type": "click", "target_spec": {"by_text": "Envoyer"}}\n'
|
||
"\n"
|
||
"C'est tout.\n"
|
||
)
|
||
actions = TaskPlanner._parse_actions_json(content)
|
||
assert len(actions) == 1
|
||
assert actions[0]["target_spec"]["by_text"] == "Envoyer"
|
||
|
||
def test_parse_json_vide(self):
|
||
"""Contenu vide → liste vide."""
|
||
assert TaskPlanner._parse_actions_json("") == []
|
||
assert TaskPlanner._parse_actions_json("Pas de JSON ici") == []
|
||
|
||
def test_parse_json_markdown_code_block(self):
|
||
"""JSON dans un bloc de code markdown."""
|
||
content = (
|
||
"```json\n"
|
||
'{"type": "type", "text": "bonjour"}\n'
|
||
"```\n"
|
||
)
|
||
actions = TaskPlanner._parse_actions_json(content)
|
||
assert len(actions) == 1
|
||
assert actions[0]["text"] == "bonjour"
|
||
|
||
|
||
# =========================================================================
|
||
# Tests : _extract_session_description
|
||
# =========================================================================
|
||
|
||
class TestExtractSessionDescription:
|
||
"""Vérifier que les descriptions de session sont lisibles et sémantiques."""
|
||
|
||
def _write_events(self, tmp_path, events):
|
||
"""Écrire des événements dans un fichier JSONL temporaire."""
|
||
events_file = tmp_path / "live_events.jsonl"
|
||
with open(events_file, "w") as f:
|
||
for evt in events:
|
||
f.write(json.dumps(evt, ensure_ascii=False) + "\n")
|
||
return events_file
|
||
|
||
def test_extract_session_description_bloc_notes(self, tmp_path):
|
||
"""Session Bloc-notes via Win+R → description sémantique."""
|
||
events = [
|
||
{"event": {"type": "key_combo", "keys": ["win", "r"],
|
||
"window": {"title": "Bureau"}}},
|
||
{"event": {"type": "window_focus_change",
|
||
"from": {"title": "Bureau"},
|
||
"to": {"title": "Exécuter"}}},
|
||
{"event": {"type": "text_input", "text": "notepad",
|
||
"window": {"title": "Exécuter"}}},
|
||
{"event": {"type": "mouse_click", "button": "left",
|
||
"window": {"title": "Exécuter"}}},
|
||
{"event": {"type": "window_focus_change",
|
||
"from": {"title": "Exécuter"},
|
||
"to": {"title": "Sans titre – Bloc-notes"}}},
|
||
{"event": {"type": "text_input", "text": "Bonjour le monde",
|
||
"window": {"title": "Sans titre – Bloc-notes"}}},
|
||
]
|
||
events_file = self._write_events(tmp_path, events)
|
||
|
||
# Importer depuis api_stream (la fonction est au niveau module)
|
||
from agent_v0.server_v1.api_stream import _extract_session_description
|
||
desc = _extract_session_description(events_file)
|
||
|
||
assert desc["event_count"] == 6
|
||
# La description doit être lisible et pas juste "Bloc-notes, Exécuter"
|
||
description = desc["description"]
|
||
assert "Bloc-notes" in description or "bloc-notes" in description.lower()
|
||
# Le nom doit contenir l'app
|
||
assert "Bloc-notes" in desc["name"]
|
||
|
||
def test_extract_session_description_explorateur(self, tmp_path):
|
||
"""Session Explorateur de fichiers → description pertinente."""
|
||
events = [
|
||
{"event": {"type": "window_focus_change",
|
||
"from": {"title": "Bureau"},
|
||
"to": {"title": "Images – Explorateur de fichiers"}}},
|
||
{"event": {"type": "mouse_click", "button": "left",
|
||
"window": {"title": "Images – Explorateur de fichiers"}}},
|
||
{"event": {"type": "mouse_click", "button": "left",
|
||
"window": {"title": "Images – Explorateur de fichiers"}}},
|
||
{"event": {"type": "mouse_click", "button": "left",
|
||
"window": {"title": "Images – Explorateur de fichiers"}}},
|
||
]
|
||
events_file = self._write_events(tmp_path, events)
|
||
|
||
from agent_v0.server_v1.api_stream import _extract_session_description
|
||
desc = _extract_session_description(events_file)
|
||
|
||
assert "Explorateur" in desc["name"] or "Explorateur" in desc["description"]
|
||
|
||
def test_extract_session_description_vide(self, tmp_path):
|
||
"""Fichier vide → description par défaut."""
|
||
events_file = self._write_events(tmp_path, [])
|
||
|
||
from agent_v0.server_v1.api_stream import _extract_session_description
|
||
desc = _extract_session_description(events_file)
|
||
|
||
assert desc["event_count"] == 0
|
||
assert desc["name"] == "Session sans nom"
|
||
|
||
def test_extract_session_description_cmd(self, tmp_path):
|
||
"""Session avec cmd.exe → description contient cmd."""
|
||
events = [
|
||
{"event": {"type": "window_focus_change",
|
||
"from": {"title": "Bureau"},
|
||
"to": {"title": "C:\\Windows\\system32\\cmd.exe"}}},
|
||
{"event": {"type": "text_input", "text": "dir",
|
||
"window": {"title": "C:\\Windows\\system32\\cmd.exe"}}},
|
||
{"event": {"type": "text_input", "text": "cd documents",
|
||
"window": {"title": "C:\\Windows\\system32\\cmd.exe"}}},
|
||
]
|
||
events_file = self._write_events(tmp_path, events)
|
||
|
||
from agent_v0.server_v1.api_stream import _extract_session_description
|
||
desc = _extract_session_description(events_file)
|
||
|
||
assert desc["event_count"] == 3
|
||
# Le nom ou la description doit mentionner cmd
|
||
full = f"{desc['name']} {desc['description']}"
|
||
assert "cmd" in full.lower()
|
||
|
||
def test_extract_session_description_recherche_windows(self, tmp_path):
|
||
"""Session avec recherche Windows (Win+S) → description mentionne recherche."""
|
||
events = [
|
||
{"event": {"type": "key_combo", "keys": ["win", "s"],
|
||
"window": {"title": "Bureau"}}},
|
||
{"event": {"type": "window_focus_change",
|
||
"from": {"title": "Bureau"},
|
||
"to": {"title": "Rechercher"}}},
|
||
{"event": {"type": "text_input", "text": "calculator",
|
||
"window": {"title": "Rechercher"}}},
|
||
]
|
||
events_file = self._write_events(tmp_path, events)
|
||
|
||
from agent_v0.server_v1.api_stream import _extract_session_description
|
||
desc = _extract_session_description(events_file)
|
||
|
||
# La description doit mentionner la recherche Windows
|
||
assert "recherche" in desc["description"].lower()
|
||
|
||
|
||
# =========================================================================
|
||
# Tests : list_capabilities
|
||
# =========================================================================
|
||
|
||
class TestListCapabilities:
|
||
"""Vérifier le listing des capacités."""
|
||
|
||
def test_list_capabilities_avec_workflows(self, planner, sample_workflows):
|
||
"""Avec des workflows → texte lisible avec descriptions."""
|
||
text = planner.list_capabilities(sample_workflows)
|
||
assert "Léa sait faire" in text
|
||
assert "Bloc-notes" in text
|
||
|
||
def test_list_capabilities_sans_workflows(self, planner):
|
||
"""Sans workflows → message d'aide."""
|
||
text = planner.list_capabilities([])
|
||
assert "pas encore appris" in text
|
||
|
||
|
||
# =========================================================================
|
||
# Tests : execute (mode replay et free)
|
||
# =========================================================================
|
||
|
||
class TestExecute:
|
||
"""Vérifier l'exécution des plans."""
|
||
|
||
def test_execute_replay(self, planner):
|
||
"""Mode replay → callback appelé avec le bon session_id."""
|
||
plan = TaskPlan(
|
||
instruction="Ouvre le bloc-notes",
|
||
understood=True,
|
||
workflow_match="sess_001",
|
||
workflow_name="Bloc-notes",
|
||
mode="replay",
|
||
)
|
||
|
||
callback = MagicMock(return_value="replay_123")
|
||
result = planner.execute(plan, replay_callback=callback)
|
||
|
||
assert result.success is True
|
||
callback.assert_called_once_with(
|
||
session_id="sess_001",
|
||
machine_id="default",
|
||
params={},
|
||
)
|
||
|
||
def test_execute_non_compris(self, planner):
|
||
"""Plan non compris → échec."""
|
||
plan = TaskPlan(instruction="blah", understood=False)
|
||
result = planner.execute(plan)
|
||
assert result.success is False
|
||
assert "non comprise" in result.summary.lower() or "non comprise" in result.summary
|
||
|
||
def test_execute_sans_callback(self, planner):
|
||
"""Mode replay sans callback → échec."""
|
||
plan = TaskPlan(
|
||
instruction="test",
|
||
understood=True,
|
||
workflow_match="sess_001",
|
||
mode="replay",
|
||
)
|
||
result = planner.execute(plan, replay_callback=None)
|
||
assert result.success is False
|