Files
rpa_vision_v3/tests/unit/test_task_planner.py
Dom 99041f0117 feat: pipeline complet MACRO/MÉSO/MICRO — Critic, Observer, Policy, Recovery, Learning, Audit Trail, TaskPlanner
Architecture 3 niveaux implémentée et testée (137 tests unitaires + 21 visuels) :

MÉSO (acteur intelligent) :
- P0 Critic : vérification sémantique post-action via gemma4 (replay_verifier.py)
- P1 Observer : pré-analyse écran avant chaque action (api_stream.py /pre_analyze)
- P2 Grounding/Policy : séparation localisation (grounding.py) et décision (policy.py)
- P3 Recovery : rollback automatique Ctrl+Z/Escape/Alt+F4 (recovery.py)
- P4 Learning : apprentissage runtime avec boucle de consolidation (replay_learner.py)

MACRO (planificateur) :
- TaskPlanner : comprend les ordres en langage naturel via gemma4 (task_planner.py)
- Contexte métier TIM/CIM-10 pour les hôpitaux (domain_context.py)
- Endpoint POST /api/v1/task pour l'exécution par instruction

Traçabilité :
- Audit trail complet avec 18 champs par action (audit_trail.py)
- Endpoints GET /audit/history, /audit/summary, /audit/export (CSV)

Grounding :
- Fix parsing bbox_2d qwen2.5vl (pixels relatifs, pas grille 1000x1000)
- Benchmarks visuels sur captures réelles (3 approches : baseline, zoom, Citrix)
- Reproductibilité validée : variance < 0.008 sur 10 itérations

Sécurité :
- Tokens de production retirés du code source → .env.local
- Secret key aléatoire si non configuré
- Suppression logs qui leakent les tokens

Résultats : 80% de replay (vs 12.5% avant), 100% détection visuelle Citrix JPEG Q20

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-09 21:03:25 +02:00

763 lines
28 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# tests/unit/test_task_planner.py
"""
Tests unitaires du TaskPlanner (planificateur MACRO).
Vérifie :
1. La compréhension d'ordres simples (understand)
2. Le matching de workflows par description sémantique
3. La détection de boucles et l'extraction de paramètres
4. La conversion étapes → actions JSON (format correct)
5. L'extraction de descriptions de session
Toutes les réponses gemma4 sont mockées pour la reproductibilité.
"""
import json
import os
import sys
import tempfile
from pathlib import Path
from unittest.mock import MagicMock, patch, Mock
import pytest
_ROOT = str(Path(__file__).resolve().parents[2])
if _ROOT not in sys.path:
sys.path.insert(0, _ROOT)
from agent_v0.server_v1.task_planner import TaskPlanner, TaskPlan
# =========================================================================
# Fixtures
# =========================================================================
@pytest.fixture
def planner():
"""TaskPlanner avec port gemma4 factice."""
return TaskPlanner(gemma4_port="11435", domain_id="generic")
@pytest.fixture
def sample_workflows():
"""Workflows disponibles pour les tests de matching."""
return [
{
"session_id": "sess_001",
"name": "Bloc-notes",
"description": "Ouvrir Bloc-notes via Exécuter (Win+R) et écrire du texte",
"machine": "PC-01",
"event_count": 25,
},
{
"session_id": "sess_002",
"name": "Explorateur de fichiers",
"description": "Naviguer dans l'Explorateur de fichiers et ouvrir des images",
"machine": "PC-01",
"event_count": 40,
},
{
"session_id": "sess_003",
"name": "DxCare, Codage CIM-10",
"description": "Ouvrir un dossier patient dans DxCare et coder les diagnostics CIM-10",
"machine": "PC-TIM",
"event_count": 80,
},
]
def _mock_gemma4_response(content: str):
"""Créer un mock de réponse HTTP gemma4."""
mock_resp = MagicMock()
mock_resp.ok = True
mock_resp.status_code = 200
mock_resp.json.return_value = {
"message": {"content": content}
}
return mock_resp
# =========================================================================
# Tests : understand — ordre simple
# =========================================================================
class TestUnderstandOrdreSimple:
"""Vérifier que understand() parse correctement des réponses gemma4."""
def test_understand_ordre_simple(self, planner, sample_workflows):
"""'Ouvre le bloc-notes' → understood=True."""
gemma4_response = (
"COMPRIS: OUI\n"
"WORKFLOW: 1\n"
"CONFIANCE: 0.9\n"
"PARAMETRES: AUCUN\n"
"BOUCLE: NON\n"
"SOURCE_BOUCLE: aucun\n"
"PLAN:\n"
"1. Ouvrir le Bloc-notes via Win+R\n"
"2. Taper notepad et valider\n"
)
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
plan = planner.understand(
"Ouvre le bloc-notes",
available_workflows=sample_workflows,
)
assert plan.understood is True
assert plan.instruction == "Ouvre le bloc-notes"
def test_understand_instruction_non_comprise(self, planner):
"""Instruction incompréhensible → understood=False."""
gemma4_response = "COMPRIS: NON\nWORKFLOW: AUCUN\nBOUCLE: NON\n"
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
plan = planner.understand("xyzzy blah blah")
assert plan.understood is False
def test_understand_gemma4_erreur_http(self, planner):
"""Erreur HTTP gemma4 → plan.error renseigné."""
mock_resp = MagicMock()
mock_resp.ok = False
mock_resp.status_code = 500
with patch("requests.post", return_value=mock_resp):
plan = planner.understand("Ouvre le bloc-notes")
assert plan.understood is False
assert "500" in plan.error
def test_understand_gemma4_timeout(self, planner):
"""Timeout gemma4 → plan.error renseigné."""
import requests
with patch("requests.post", side_effect=requests.Timeout("timeout")):
plan = planner.understand("Ouvre le bloc-notes")
assert plan.understood is False
assert "erreur" in plan.error.lower() or "timeout" in plan.error.lower()
# =========================================================================
# Tests : matching workflow
# =========================================================================
class TestUnderstandIdentifieWorkflow:
"""Vérifier que le matching de workflow fonctionne."""
def test_understand_identifie_workflow(self, planner, sample_workflows):
"""Quand un workflow matche, workflow_match est rempli."""
gemma4_response = (
"COMPRIS: OUI\n"
"WORKFLOW: 1\n"
"CONFIANCE: 0.9\n"
"PARAMETRES: AUCUN\n"
"BOUCLE: NON\n"
"SOURCE_BOUCLE: aucun\n"
"PLAN:\n"
"1. Lancer le Bloc-notes\n"
)
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
plan = planner.understand(
"Ouvre le bloc-notes",
available_workflows=sample_workflows,
)
assert plan.workflow_match == "sess_001"
assert plan.workflow_name == "Bloc-notes"
assert plan.mode == "replay"
assert plan.match_confidence >= 0.8
def test_understand_workflow_aucun_match(self, planner, sample_workflows):
"""Aucun workflow correspondant → mode libre."""
gemma4_response = (
"COMPRIS: OUI\n"
"WORKFLOW: AUCUN\n"
"PARAMETRES: AUCUN\n"
"BOUCLE: NON\n"
"SOURCE_BOUCLE: aucun\n"
"PLAN:\n"
"1. Ouvrir Chrome\n"
"2. Aller sur Google\n"
)
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
plan = planner.understand(
"Recherche voiture sur Google",
available_workflows=sample_workflows,
)
assert plan.understood is True
assert plan.workflow_match == ""
assert plan.mode == "free"
def test_understand_workflow_second_match(self, planner, sample_workflows):
"""Workflow 2 sélectionné correctement."""
gemma4_response = (
"COMPRIS: OUI\n"
"WORKFLOW: 2\n"
"CONFIANCE: 0.85\n"
"BOUCLE: NON\n"
"PLAN:\n"
"1. Ouvrir l'explorateur de fichiers\n"
)
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
plan = planner.understand(
"Ouvre mes images",
available_workflows=sample_workflows,
)
assert plan.workflow_match == "sess_002"
assert plan.workflow_name == "Explorateur de fichiers"
def test_understand_workflow_avec_description_dans_prompt(self, planner, sample_workflows):
"""Le prompt envoyé à gemma4 inclut les descriptions des workflows."""
captured_body = {}
def capture_post(url, json=None, **kwargs):
captured_body.update(json or {})
return _mock_gemma4_response("COMPRIS: OUI\nWORKFLOW: AUCUN\nBOUCLE: NON\n")
with patch("requests.post", side_effect=capture_post):
planner.understand(
"Ouvre le bloc-notes",
available_workflows=sample_workflows,
)
prompt_content = captured_body["messages"][0]["content"]
# La description doit apparaître dans le prompt
assert "Ouvrir Bloc-notes via Exécuter" in prompt_content
assert "Naviguer dans l'Explorateur" in prompt_content
# =========================================================================
# Tests : détection de boucle
# =========================================================================
class TestUnderstandDetecteBoucle:
"""Vérifier la détection de boucle."""
def test_understand_detecte_boucle(self, planner, sample_workflows):
"""'traite TOUS les dossiers' → is_loop=True."""
gemma4_response = (
"COMPRIS: OUI\n"
"WORKFLOW: 3\n"
"CONFIANCE: 0.8\n"
"PARAMETRES: AUCUN\n"
"BOUCLE: OUI\n"
"SOURCE_BOUCLE: écran\n"
"PLAN:\n"
"1. Pour chaque dossier dans la liste\n"
"2. Ouvrir le dossier\n"
"3. Coder les diagnostics\n"
)
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
plan = planner.understand(
"Traite TOUS les dossiers de la liste",
available_workflows=sample_workflows,
)
assert plan.is_loop is True
assert plan.loop_source == "écran"
def test_understand_pas_de_boucle(self, planner):
"""Ordre simple → is_loop=False."""
gemma4_response = (
"COMPRIS: OUI\n"
"WORKFLOW: AUCUN\n"
"BOUCLE: NON\n"
"SOURCE_BOUCLE: aucun\n"
"PLAN:\n"
"1. Ouvrir le navigateur\n"
)
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
plan = planner.understand("Ouvre le navigateur")
assert plan.is_loop is False
# =========================================================================
# Tests : extraction de paramètres
# =========================================================================
class TestUnderstandExtraitParametres:
"""Vérifier l'extraction des paramètres."""
def test_understand_extrait_parametres(self, planner, sample_workflows):
"""'dossiers de janvier' → parameters contient mois=janvier."""
gemma4_response = (
"COMPRIS: OUI\n"
"WORKFLOW: 3\n"
"CONFIANCE: 0.85\n"
"PARAMETRES: mois=janvier\n"
"BOUCLE: OUI\n"
"SOURCE_BOUCLE: écran\n"
"PLAN:\n"
"1. Filtrer les dossiers de janvier\n"
)
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
plan = planner.understand(
"Traite les dossiers de janvier",
available_workflows=sample_workflows,
)
assert "mois" in plan.parameters
assert plan.parameters["mois"] == "janvier"
def test_understand_parametres_multiples(self, planner):
"""Plusieurs paramètres sur des lignes séparées."""
gemma4_response = (
"COMPRIS: OUI\n"
"WORKFLOW: AUCUN\n"
"PARAMETRES:\n"
"- patient=DUPONT\n"
"- date=2026-01-15\n"
"BOUCLE: NON\n"
"PLAN:\n"
"1. Rechercher le patient DUPONT\n"
)
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
plan = planner.understand("Cherche le dossier de DUPONT du 15 janvier")
assert plan.parameters.get("patient") == "DUPONT"
assert plan.parameters.get("date") == "2026-01-15"
def test_understand_parametres_inline(self, planner):
"""Paramètres sur la même ligne que PARAMETRES:."""
gemma4_response = (
"COMPRIS: OUI\n"
"WORKFLOW: AUCUN\n"
"PARAMETRES: nom=Martin, ville=Paris\n"
"BOUCLE: NON\n"
"PLAN:\n"
"1. Chercher Martin à Paris\n"
)
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
plan = planner.understand("Cherche Martin à Paris")
assert plan.parameters.get("nom") == "Martin"
assert plan.parameters.get("ville") == "Paris"
# =========================================================================
# Tests : _parse_understanding (parsing tolérant)
# =========================================================================
class TestParseUnderstanding:
"""Tester le parsing tolérant de réponses gemma4 variées."""
def test_parse_markdown_gras(self, planner):
"""Réponse avec **gras** → parsée correctement."""
plan = TaskPlan(instruction="test")
content = (
"**COMPRIS:** OUI\n"
"**WORKFLOW:** AUCUN\n"
"**BOUCLE:** NON\n"
"**PLAN:**\n"
"1. Première étape\n"
)
result = planner._parse_understanding(plan, content, [])
assert result.understood is True
assert result.mode == "free"
def test_parse_confiance_pourcentage(self, planner, sample_workflows):
"""CONFIANCE: 90% → match_confidence=0.9."""
plan = TaskPlan(instruction="test")
content = (
"COMPRIS: OUI\n"
"WORKFLOW: 1\n"
"CONFIANCE: 90%\n"
"BOUCLE: NON\n"
)
result = planner._parse_understanding(plan, content, sample_workflows)
assert result.match_confidence == pytest.approx(0.9)
def test_parse_confiance_virgule(self, planner, sample_workflows):
"""CONFIANCE: 0,85 → match_confidence=0.85."""
plan = TaskPlan(instruction="test")
content = (
"COMPRIS: OUI\n"
"WORKFLOW: 1\n"
"CONFIANCE: 0,85\n"
"BOUCLE: NON\n"
)
result = planner._parse_understanding(plan, content, sample_workflows)
assert result.match_confidence == pytest.approx(0.85)
def test_parse_workflow_avec_parentheses(self, planner, sample_workflows):
"""WORKFLOW: 2 (Explorateur) → index 2 correctement extrait."""
plan = TaskPlan(instruction="test")
content = (
"COMPRIS: OUI\n"
"WORKFLOW: 2 (Explorateur de fichiers)\n"
"BOUCLE: NON\n"
)
result = planner._parse_understanding(plan, content, sample_workflows)
assert result.workflow_match == "sess_002"
def test_parse_workflow_aucun_variantes(self, planner, sample_workflows):
"""Toutes les variantes de 'aucun' sont reconnues."""
for val in ("AUCUN", "None", "N/A", "-", "NON"):
plan = TaskPlan(instruction="test")
content = f"COMPRIS: OUI\nWORKFLOW: {val}\nBOUCLE: NON\n"
result = planner._parse_understanding(plan, content, sample_workflows)
assert result.workflow_match == "", f"Devrait être vide pour '{val}'"
def test_parse_etapes_tirets(self, planner):
"""Étapes avec tirets → ajoutées au plan."""
plan = TaskPlan(instruction="test")
content = (
"COMPRIS: OUI\n"
"WORKFLOW: AUCUN\n"
"BOUCLE: NON\n"
"PLAN:\n"
"- Ouvrir l'application\n"
"- Cliquer sur Fichier\n"
"- Sauvegarder\n"
)
result = planner._parse_understanding(plan, content, [])
assert len(result.steps) == 3
# =========================================================================
# Tests : _steps_to_actions
# =========================================================================
class TestStepsToActions:
"""Vérifier la conversion étapes → actions JSON."""
def test_steps_to_actions_format(self, planner):
"""Les actions générées ont le bon format (type, target_spec, etc.)."""
gemma4_response = (
'{"type": "click", "target_spec": {"by_text": "Rechercher"}}\n'
'{"type": "type", "text": "bloc-notes"}\n'
'{"type": "key_combo", "keys": ["enter"]}\n'
'{"type": "wait", "duration_ms": 2000}\n'
)
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
actions = planner._steps_to_actions(
[{"description": "1. Ouvrir le bloc-notes"}],
{},
)
assert len(actions) == 4
assert actions[0]["type"] == "click"
assert actions[0]["visual_mode"] is True # Ajouté automatiquement
assert actions[0]["target_spec"]["by_text"] == "Rechercher"
assert actions[1]["type"] == "type"
assert actions[1]["text"] == "bloc-notes"
assert actions[2]["type"] == "key_combo"
assert actions[2]["keys"] == ["enter"]
assert actions[3]["type"] == "wait"
assert actions[3]["duration_ms"] == 2000
def test_steps_to_actions_json_array(self, planner):
"""gemma4 retourne un tableau JSON → parsé correctement."""
gemma4_response = (
'Voici les actions :\n'
'```json\n'
'[\n'
' {"type": "click", "target_spec": {"by_text": "Fichier"}},\n'
' {"type": "click", "target_spec": {"by_text": "Ouvrir"}}\n'
']\n'
'```\n'
)
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
actions = planner._steps_to_actions(
[{"description": "1. Ouvrir un fichier"}],
{},
)
assert len(actions) == 2
assert actions[0]["target_spec"]["by_text"] == "Fichier"
assert actions[1]["target_spec"]["by_text"] == "Ouvrir"
def test_steps_to_actions_nested_json(self, planner):
"""JSON imbriqué (target_spec) → parsé correctement."""
gemma4_response = (
'{"type": "click", "target_spec": {"by_text": "OK", "window_title": "Confirmation"}}\n'
)
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
actions = planner._steps_to_actions(
[{"description": "1. Confirmer"}],
{},
)
assert len(actions) == 1
assert actions[0]["target_spec"]["window_title"] == "Confirmation"
def test_steps_to_actions_gemma4_erreur(self, planner):
"""Erreur gemma4 → liste vide."""
mock_resp = MagicMock()
mock_resp.ok = False
with patch("requests.post", return_value=mock_resp):
actions = planner._steps_to_actions(
[{"description": "1. Faire quelque chose"}],
{},
)
assert actions == []
def test_steps_to_actions_filtre_types_invalides(self, planner):
"""Seuls les types valides (click, type, key_combo, wait) sont acceptés."""
gemma4_response = (
'{"type": "click", "target_spec": {"by_text": "OK"}}\n'
'{"type": "invalid_action", "foo": "bar"}\n'
'{"type": "wait", "duration_ms": 500}\n'
'{"not_a_type": "test"}\n'
)
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
actions = planner._steps_to_actions(
[{"description": "1. Test"}],
{},
)
assert len(actions) == 2
assert actions[0]["type"] == "click"
assert actions[1]["type"] == "wait"
# =========================================================================
# Tests : _parse_actions_json (parsing robuste)
# =========================================================================
class TestParseActionsJson:
"""Tester le parsing robuste d'actions JSON."""
def test_parse_json_une_par_ligne(self):
"""Actions JSON une par ligne."""
content = (
'{"type": "click", "target_spec": {"by_text": "A"}}\n'
'{"type": "type", "text": "hello"}\n'
)
actions = TaskPlanner._parse_actions_json(content)
assert len(actions) == 2
def test_parse_json_array(self):
"""Tableau JSON."""
content = '[{"type": "click", "target_spec": {"by_text": "A"}}, {"type": "wait", "duration_ms": 1000}]'
actions = TaskPlanner._parse_actions_json(content)
assert len(actions) == 2
def test_parse_json_avec_texte_autour(self):
"""JSON entouré de commentaires texte."""
content = (
"Voici les actions RPA :\n\n"
'{"type": "click", "target_spec": {"by_text": "Envoyer"}}\n'
"\n"
"C'est tout.\n"
)
actions = TaskPlanner._parse_actions_json(content)
assert len(actions) == 1
assert actions[0]["target_spec"]["by_text"] == "Envoyer"
def test_parse_json_vide(self):
"""Contenu vide → liste vide."""
assert TaskPlanner._parse_actions_json("") == []
assert TaskPlanner._parse_actions_json("Pas de JSON ici") == []
def test_parse_json_markdown_code_block(self):
"""JSON dans un bloc de code markdown."""
content = (
"```json\n"
'{"type": "type", "text": "bonjour"}\n'
"```\n"
)
actions = TaskPlanner._parse_actions_json(content)
assert len(actions) == 1
assert actions[0]["text"] == "bonjour"
# =========================================================================
# Tests : _extract_session_description
# =========================================================================
class TestExtractSessionDescription:
"""Vérifier que les descriptions de session sont lisibles et sémantiques."""
def _write_events(self, tmp_path, events):
"""Écrire des événements dans un fichier JSONL temporaire."""
events_file = tmp_path / "live_events.jsonl"
with open(events_file, "w") as f:
for evt in events:
f.write(json.dumps(evt, ensure_ascii=False) + "\n")
return events_file
def test_extract_session_description_bloc_notes(self, tmp_path):
"""Session Bloc-notes via Win+R → description sémantique."""
events = [
{"event": {"type": "key_combo", "keys": ["win", "r"],
"window": {"title": "Bureau"}}},
{"event": {"type": "window_focus_change",
"from": {"title": "Bureau"},
"to": {"title": "Exécuter"}}},
{"event": {"type": "text_input", "text": "notepad",
"window": {"title": "Exécuter"}}},
{"event": {"type": "mouse_click", "button": "left",
"window": {"title": "Exécuter"}}},
{"event": {"type": "window_focus_change",
"from": {"title": "Exécuter"},
"to": {"title": "Sans titre Bloc-notes"}}},
{"event": {"type": "text_input", "text": "Bonjour le monde",
"window": {"title": "Sans titre Bloc-notes"}}},
]
events_file = self._write_events(tmp_path, events)
# Importer depuis api_stream (la fonction est au niveau module)
from agent_v0.server_v1.api_stream import _extract_session_description
desc = _extract_session_description(events_file)
assert desc["event_count"] == 6
# La description doit être lisible et pas juste "Bloc-notes, Exécuter"
description = desc["description"]
assert "Bloc-notes" in description or "bloc-notes" in description.lower()
# Le nom doit contenir l'app
assert "Bloc-notes" in desc["name"]
def test_extract_session_description_explorateur(self, tmp_path):
"""Session Explorateur de fichiers → description pertinente."""
events = [
{"event": {"type": "window_focus_change",
"from": {"title": "Bureau"},
"to": {"title": "Images Explorateur de fichiers"}}},
{"event": {"type": "mouse_click", "button": "left",
"window": {"title": "Images Explorateur de fichiers"}}},
{"event": {"type": "mouse_click", "button": "left",
"window": {"title": "Images Explorateur de fichiers"}}},
{"event": {"type": "mouse_click", "button": "left",
"window": {"title": "Images Explorateur de fichiers"}}},
]
events_file = self._write_events(tmp_path, events)
from agent_v0.server_v1.api_stream import _extract_session_description
desc = _extract_session_description(events_file)
assert "Explorateur" in desc["name"] or "Explorateur" in desc["description"]
def test_extract_session_description_vide(self, tmp_path):
"""Fichier vide → description par défaut."""
events_file = self._write_events(tmp_path, [])
from agent_v0.server_v1.api_stream import _extract_session_description
desc = _extract_session_description(events_file)
assert desc["event_count"] == 0
assert desc["name"] == "Session sans nom"
def test_extract_session_description_cmd(self, tmp_path):
"""Session avec cmd.exe → description contient cmd."""
events = [
{"event": {"type": "window_focus_change",
"from": {"title": "Bureau"},
"to": {"title": "C:\\Windows\\system32\\cmd.exe"}}},
{"event": {"type": "text_input", "text": "dir",
"window": {"title": "C:\\Windows\\system32\\cmd.exe"}}},
{"event": {"type": "text_input", "text": "cd documents",
"window": {"title": "C:\\Windows\\system32\\cmd.exe"}}},
]
events_file = self._write_events(tmp_path, events)
from agent_v0.server_v1.api_stream import _extract_session_description
desc = _extract_session_description(events_file)
assert desc["event_count"] == 3
# Le nom ou la description doit mentionner cmd
full = f"{desc['name']} {desc['description']}"
assert "cmd" in full.lower()
def test_extract_session_description_recherche_windows(self, tmp_path):
"""Session avec recherche Windows (Win+S) → description mentionne recherche."""
events = [
{"event": {"type": "key_combo", "keys": ["win", "s"],
"window": {"title": "Bureau"}}},
{"event": {"type": "window_focus_change",
"from": {"title": "Bureau"},
"to": {"title": "Rechercher"}}},
{"event": {"type": "text_input", "text": "calculator",
"window": {"title": "Rechercher"}}},
]
events_file = self._write_events(tmp_path, events)
from agent_v0.server_v1.api_stream import _extract_session_description
desc = _extract_session_description(events_file)
# La description doit mentionner la recherche Windows
assert "recherche" in desc["description"].lower()
# =========================================================================
# Tests : list_capabilities
# =========================================================================
class TestListCapabilities:
"""Vérifier le listing des capacités."""
def test_list_capabilities_avec_workflows(self, planner, sample_workflows):
"""Avec des workflows → texte lisible avec descriptions."""
text = planner.list_capabilities(sample_workflows)
assert "Léa sait faire" in text
assert "Bloc-notes" in text
def test_list_capabilities_sans_workflows(self, planner):
"""Sans workflows → message d'aide."""
text = planner.list_capabilities([])
assert "pas encore appris" in text
# =========================================================================
# Tests : execute (mode replay et free)
# =========================================================================
class TestExecute:
"""Vérifier l'exécution des plans."""
def test_execute_replay(self, planner):
"""Mode replay → callback appelé avec le bon session_id."""
plan = TaskPlan(
instruction="Ouvre le bloc-notes",
understood=True,
workflow_match="sess_001",
workflow_name="Bloc-notes",
mode="replay",
)
callback = MagicMock(return_value="replay_123")
result = planner.execute(plan, replay_callback=callback)
assert result.success is True
callback.assert_called_once_with(
session_id="sess_001",
machine_id="default",
params={},
)
def test_execute_non_compris(self, planner):
"""Plan non compris → échec."""
plan = TaskPlan(instruction="blah", understood=False)
result = planner.execute(plan)
assert result.success is False
assert "non comprise" in result.summary.lower() or "non comprise" in result.summary
def test_execute_sans_callback(self, planner):
"""Mode replay sans callback → échec."""
plan = TaskPlan(
instruction="test",
understood=True,
workflow_match="sess_001",
mode="replay",
)
result = planner.execute(plan, replay_callback=None)
assert result.success is False