# tests/unit/test_task_planner.py """ Tests unitaires du TaskPlanner (planificateur MACRO). Vérifie : 1. La compréhension d'ordres simples (understand) 2. Le matching de workflows par description sémantique 3. La détection de boucles et l'extraction de paramètres 4. La conversion étapes → actions JSON (format correct) 5. L'extraction de descriptions de session Toutes les réponses gemma4 sont mockées pour la reproductibilité. """ import json import os import sys import tempfile from pathlib import Path from unittest.mock import MagicMock, patch, Mock import pytest _ROOT = str(Path(__file__).resolve().parents[2]) if _ROOT not in sys.path: sys.path.insert(0, _ROOT) from agent_v0.server_v1.task_planner import TaskPlanner, TaskPlan # ========================================================================= # Fixtures # ========================================================================= @pytest.fixture def planner(): """TaskPlanner avec port gemma4 factice.""" return TaskPlanner(gemma4_port="11435", domain_id="generic") @pytest.fixture def sample_workflows(): """Workflows disponibles pour les tests de matching.""" return [ { "session_id": "sess_001", "name": "Bloc-notes", "description": "Ouvrir Bloc-notes via Exécuter (Win+R) et écrire du texte", "machine": "PC-01", "event_count": 25, }, { "session_id": "sess_002", "name": "Explorateur de fichiers", "description": "Naviguer dans l'Explorateur de fichiers et ouvrir des images", "machine": "PC-01", "event_count": 40, }, { "session_id": "sess_003", "name": "DxCare, Codage CIM-10", "description": "Ouvrir un dossier patient dans DxCare et coder les diagnostics CIM-10", "machine": "PC-TIM", "event_count": 80, }, ] def _mock_gemma4_response(content: str): """Créer un mock de réponse HTTP gemma4.""" mock_resp = MagicMock() mock_resp.ok = True mock_resp.status_code = 200 mock_resp.json.return_value = { "message": {"content": content} } return mock_resp # ========================================================================= # Tests : understand — ordre simple # ========================================================================= class TestUnderstandOrdreSimple: """Vérifier que understand() parse correctement des réponses gemma4.""" def test_understand_ordre_simple(self, planner, sample_workflows): """'Ouvre le bloc-notes' → understood=True.""" gemma4_response = ( "COMPRIS: OUI\n" "WORKFLOW: 1\n" "CONFIANCE: 0.9\n" "PARAMETRES: AUCUN\n" "BOUCLE: NON\n" "SOURCE_BOUCLE: aucun\n" "PLAN:\n" "1. Ouvrir le Bloc-notes via Win+R\n" "2. Taper notepad et valider\n" ) with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)): plan = planner.understand( "Ouvre le bloc-notes", available_workflows=sample_workflows, ) assert plan.understood is True assert plan.instruction == "Ouvre le bloc-notes" def test_understand_instruction_non_comprise(self, planner): """Instruction incompréhensible → understood=False.""" gemma4_response = "COMPRIS: NON\nWORKFLOW: AUCUN\nBOUCLE: NON\n" with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)): plan = planner.understand("xyzzy blah blah") assert plan.understood is False def test_understand_gemma4_erreur_http(self, planner): """Erreur HTTP gemma4 → plan.error renseigné.""" mock_resp = MagicMock() mock_resp.ok = False mock_resp.status_code = 500 with patch("requests.post", return_value=mock_resp): plan = planner.understand("Ouvre le bloc-notes") assert plan.understood is False assert "500" in plan.error def test_understand_gemma4_timeout(self, planner): """Timeout gemma4 → plan.error renseigné.""" import requests with patch("requests.post", side_effect=requests.Timeout("timeout")): plan = planner.understand("Ouvre le bloc-notes") assert plan.understood is False assert "erreur" in plan.error.lower() or "timeout" in plan.error.lower() # ========================================================================= # Tests : matching workflow # ========================================================================= class TestUnderstandIdentifieWorkflow: """Vérifier que le matching de workflow fonctionne.""" def test_understand_identifie_workflow(self, planner, sample_workflows): """Quand un workflow matche, workflow_match est rempli.""" gemma4_response = ( "COMPRIS: OUI\n" "WORKFLOW: 1\n" "CONFIANCE: 0.9\n" "PARAMETRES: AUCUN\n" "BOUCLE: NON\n" "SOURCE_BOUCLE: aucun\n" "PLAN:\n" "1. Lancer le Bloc-notes\n" ) with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)): plan = planner.understand( "Ouvre le bloc-notes", available_workflows=sample_workflows, ) assert plan.workflow_match == "sess_001" assert plan.workflow_name == "Bloc-notes" assert plan.mode == "replay" assert plan.match_confidence >= 0.8 def test_understand_workflow_aucun_match(self, planner, sample_workflows): """Aucun workflow correspondant → mode libre.""" gemma4_response = ( "COMPRIS: OUI\n" "WORKFLOW: AUCUN\n" "PARAMETRES: AUCUN\n" "BOUCLE: NON\n" "SOURCE_BOUCLE: aucun\n" "PLAN:\n" "1. Ouvrir Chrome\n" "2. Aller sur Google\n" ) with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)): plan = planner.understand( "Recherche voiture sur Google", available_workflows=sample_workflows, ) assert plan.understood is True assert plan.workflow_match == "" assert plan.mode == "free" def test_understand_workflow_second_match(self, planner, sample_workflows): """Workflow 2 sélectionné correctement.""" gemma4_response = ( "COMPRIS: OUI\n" "WORKFLOW: 2\n" "CONFIANCE: 0.85\n" "BOUCLE: NON\n" "PLAN:\n" "1. Ouvrir l'explorateur de fichiers\n" ) with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)): plan = planner.understand( "Ouvre mes images", available_workflows=sample_workflows, ) assert plan.workflow_match == "sess_002" assert plan.workflow_name == "Explorateur de fichiers" def test_understand_workflow_avec_description_dans_prompt(self, planner, sample_workflows): """Le prompt envoyé à gemma4 inclut les descriptions des workflows.""" captured_body = {} def capture_post(url, json=None, **kwargs): captured_body.update(json or {}) return _mock_gemma4_response("COMPRIS: OUI\nWORKFLOW: AUCUN\nBOUCLE: NON\n") with patch("requests.post", side_effect=capture_post): planner.understand( "Ouvre le bloc-notes", available_workflows=sample_workflows, ) prompt_content = captured_body["messages"][0]["content"] # La description doit apparaître dans le prompt assert "Ouvrir Bloc-notes via Exécuter" in prompt_content assert "Naviguer dans l'Explorateur" in prompt_content # ========================================================================= # Tests : détection de boucle # ========================================================================= class TestUnderstandDetecteBoucle: """Vérifier la détection de boucle.""" def test_understand_detecte_boucle(self, planner, sample_workflows): """'traite TOUS les dossiers' → is_loop=True.""" gemma4_response = ( "COMPRIS: OUI\n" "WORKFLOW: 3\n" "CONFIANCE: 0.8\n" "PARAMETRES: AUCUN\n" "BOUCLE: OUI\n" "SOURCE_BOUCLE: écran\n" "PLAN:\n" "1. Pour chaque dossier dans la liste\n" "2. Ouvrir le dossier\n" "3. Coder les diagnostics\n" ) with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)): plan = planner.understand( "Traite TOUS les dossiers de la liste", available_workflows=sample_workflows, ) assert plan.is_loop is True assert plan.loop_source == "écran" def test_understand_pas_de_boucle(self, planner): """Ordre simple → is_loop=False.""" gemma4_response = ( "COMPRIS: OUI\n" "WORKFLOW: AUCUN\n" "BOUCLE: NON\n" "SOURCE_BOUCLE: aucun\n" "PLAN:\n" "1. Ouvrir le navigateur\n" ) with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)): plan = planner.understand("Ouvre le navigateur") assert plan.is_loop is False # ========================================================================= # Tests : extraction de paramètres # ========================================================================= class TestUnderstandExtraitParametres: """Vérifier l'extraction des paramètres.""" def test_understand_extrait_parametres(self, planner, sample_workflows): """'dossiers de janvier' → parameters contient mois=janvier.""" gemma4_response = ( "COMPRIS: OUI\n" "WORKFLOW: 3\n" "CONFIANCE: 0.85\n" "PARAMETRES: mois=janvier\n" "BOUCLE: OUI\n" "SOURCE_BOUCLE: écran\n" "PLAN:\n" "1. Filtrer les dossiers de janvier\n" ) with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)): plan = planner.understand( "Traite les dossiers de janvier", available_workflows=sample_workflows, ) assert "mois" in plan.parameters assert plan.parameters["mois"] == "janvier" def test_understand_parametres_multiples(self, planner): """Plusieurs paramètres sur des lignes séparées.""" gemma4_response = ( "COMPRIS: OUI\n" "WORKFLOW: AUCUN\n" "PARAMETRES:\n" "- patient=DUPONT\n" "- date=2026-01-15\n" "BOUCLE: NON\n" "PLAN:\n" "1. Rechercher le patient DUPONT\n" ) with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)): plan = planner.understand("Cherche le dossier de DUPONT du 15 janvier") assert plan.parameters.get("patient") == "DUPONT" assert plan.parameters.get("date") == "2026-01-15" def test_understand_parametres_inline(self, planner): """Paramètres sur la même ligne que PARAMETRES:.""" gemma4_response = ( "COMPRIS: OUI\n" "WORKFLOW: AUCUN\n" "PARAMETRES: nom=Martin, ville=Paris\n" "BOUCLE: NON\n" "PLAN:\n" "1. Chercher Martin à Paris\n" ) with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)): plan = planner.understand("Cherche Martin à Paris") assert plan.parameters.get("nom") == "Martin" assert plan.parameters.get("ville") == "Paris" # ========================================================================= # Tests : _parse_understanding (parsing tolérant) # ========================================================================= class TestParseUnderstanding: """Tester le parsing tolérant de réponses gemma4 variées.""" def test_parse_markdown_gras(self, planner): """Réponse avec **gras** → parsée correctement.""" plan = TaskPlan(instruction="test") content = ( "**COMPRIS:** OUI\n" "**WORKFLOW:** AUCUN\n" "**BOUCLE:** NON\n" "**PLAN:**\n" "1. Première étape\n" ) result = planner._parse_understanding(plan, content, []) assert result.understood is True assert result.mode == "free" def test_parse_confiance_pourcentage(self, planner, sample_workflows): """CONFIANCE: 90% → match_confidence=0.9.""" plan = TaskPlan(instruction="test") content = ( "COMPRIS: OUI\n" "WORKFLOW: 1\n" "CONFIANCE: 90%\n" "BOUCLE: NON\n" ) result = planner._parse_understanding(plan, content, sample_workflows) assert result.match_confidence == pytest.approx(0.9) def test_parse_confiance_virgule(self, planner, sample_workflows): """CONFIANCE: 0,85 → match_confidence=0.85.""" plan = TaskPlan(instruction="test") content = ( "COMPRIS: OUI\n" "WORKFLOW: 1\n" "CONFIANCE: 0,85\n" "BOUCLE: NON\n" ) result = planner._parse_understanding(plan, content, sample_workflows) assert result.match_confidence == pytest.approx(0.85) def test_parse_workflow_avec_parentheses(self, planner, sample_workflows): """WORKFLOW: 2 (Explorateur) → index 2 correctement extrait.""" plan = TaskPlan(instruction="test") content = ( "COMPRIS: OUI\n" "WORKFLOW: 2 (Explorateur de fichiers)\n" "BOUCLE: NON\n" ) result = planner._parse_understanding(plan, content, sample_workflows) assert result.workflow_match == "sess_002" def test_parse_workflow_aucun_variantes(self, planner, sample_workflows): """Toutes les variantes de 'aucun' sont reconnues.""" for val in ("AUCUN", "None", "N/A", "-", "NON"): plan = TaskPlan(instruction="test") content = f"COMPRIS: OUI\nWORKFLOW: {val}\nBOUCLE: NON\n" result = planner._parse_understanding(plan, content, sample_workflows) assert result.workflow_match == "", f"Devrait être vide pour '{val}'" def test_parse_etapes_tirets(self, planner): """Étapes avec tirets → ajoutées au plan.""" plan = TaskPlan(instruction="test") content = ( "COMPRIS: OUI\n" "WORKFLOW: AUCUN\n" "BOUCLE: NON\n" "PLAN:\n" "- Ouvrir l'application\n" "- Cliquer sur Fichier\n" "- Sauvegarder\n" ) result = planner._parse_understanding(plan, content, []) assert len(result.steps) == 3 # ========================================================================= # Tests : _steps_to_actions # ========================================================================= class TestStepsToActions: """Vérifier la conversion étapes → actions JSON.""" def test_steps_to_actions_format(self, planner): """Les actions générées ont le bon format (type, target_spec, etc.).""" gemma4_response = ( '{"type": "click", "target_spec": {"by_text": "Rechercher"}}\n' '{"type": "type", "text": "bloc-notes"}\n' '{"type": "key_combo", "keys": ["enter"]}\n' '{"type": "wait", "duration_ms": 2000}\n' ) with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)): actions = planner._steps_to_actions( [{"description": "1. Ouvrir le bloc-notes"}], {}, ) assert len(actions) == 4 assert actions[0]["type"] == "click" assert actions[0]["visual_mode"] is True # Ajouté automatiquement assert actions[0]["target_spec"]["by_text"] == "Rechercher" assert actions[1]["type"] == "type" assert actions[1]["text"] == "bloc-notes" assert actions[2]["type"] == "key_combo" assert actions[2]["keys"] == ["enter"] assert actions[3]["type"] == "wait" assert actions[3]["duration_ms"] == 2000 def test_steps_to_actions_json_array(self, planner): """gemma4 retourne un tableau JSON → parsé correctement.""" gemma4_response = ( 'Voici les actions :\n' '```json\n' '[\n' ' {"type": "click", "target_spec": {"by_text": "Fichier"}},\n' ' {"type": "click", "target_spec": {"by_text": "Ouvrir"}}\n' ']\n' '```\n' ) with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)): actions = planner._steps_to_actions( [{"description": "1. Ouvrir un fichier"}], {}, ) assert len(actions) == 2 assert actions[0]["target_spec"]["by_text"] == "Fichier" assert actions[1]["target_spec"]["by_text"] == "Ouvrir" def test_steps_to_actions_nested_json(self, planner): """JSON imbriqué (target_spec) → parsé correctement.""" gemma4_response = ( '{"type": "click", "target_spec": {"by_text": "OK", "window_title": "Confirmation"}}\n' ) with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)): actions = planner._steps_to_actions( [{"description": "1. Confirmer"}], {}, ) assert len(actions) == 1 assert actions[0]["target_spec"]["window_title"] == "Confirmation" def test_steps_to_actions_gemma4_erreur(self, planner): """Erreur gemma4 → liste vide.""" mock_resp = MagicMock() mock_resp.ok = False with patch("requests.post", return_value=mock_resp): actions = planner._steps_to_actions( [{"description": "1. Faire quelque chose"}], {}, ) assert actions == [] def test_steps_to_actions_filtre_types_invalides(self, planner): """Seuls les types valides (click, type, key_combo, wait) sont acceptés.""" gemma4_response = ( '{"type": "click", "target_spec": {"by_text": "OK"}}\n' '{"type": "invalid_action", "foo": "bar"}\n' '{"type": "wait", "duration_ms": 500}\n' '{"not_a_type": "test"}\n' ) with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)): actions = planner._steps_to_actions( [{"description": "1. Test"}], {}, ) assert len(actions) == 2 assert actions[0]["type"] == "click" assert actions[1]["type"] == "wait" # ========================================================================= # Tests : _parse_actions_json (parsing robuste) # ========================================================================= class TestParseActionsJson: """Tester le parsing robuste d'actions JSON.""" def test_parse_json_une_par_ligne(self): """Actions JSON une par ligne.""" content = ( '{"type": "click", "target_spec": {"by_text": "A"}}\n' '{"type": "type", "text": "hello"}\n' ) actions = TaskPlanner._parse_actions_json(content) assert len(actions) == 2 def test_parse_json_array(self): """Tableau JSON.""" content = '[{"type": "click", "target_spec": {"by_text": "A"}}, {"type": "wait", "duration_ms": 1000}]' actions = TaskPlanner._parse_actions_json(content) assert len(actions) == 2 def test_parse_json_avec_texte_autour(self): """JSON entouré de commentaires texte.""" content = ( "Voici les actions RPA :\n\n" '{"type": "click", "target_spec": {"by_text": "Envoyer"}}\n' "\n" "C'est tout.\n" ) actions = TaskPlanner._parse_actions_json(content) assert len(actions) == 1 assert actions[0]["target_spec"]["by_text"] == "Envoyer" def test_parse_json_vide(self): """Contenu vide → liste vide.""" assert TaskPlanner._parse_actions_json("") == [] assert TaskPlanner._parse_actions_json("Pas de JSON ici") == [] def test_parse_json_markdown_code_block(self): """JSON dans un bloc de code markdown.""" content = ( "```json\n" '{"type": "type", "text": "bonjour"}\n' "```\n" ) actions = TaskPlanner._parse_actions_json(content) assert len(actions) == 1 assert actions[0]["text"] == "bonjour" # ========================================================================= # Tests : _extract_session_description # ========================================================================= class TestExtractSessionDescription: """Vérifier que les descriptions de session sont lisibles et sémantiques.""" def _write_events(self, tmp_path, events): """Écrire des événements dans un fichier JSONL temporaire.""" events_file = tmp_path / "live_events.jsonl" with open(events_file, "w") as f: for evt in events: f.write(json.dumps(evt, ensure_ascii=False) + "\n") return events_file def test_extract_session_description_bloc_notes(self, tmp_path): """Session Bloc-notes via Win+R → description sémantique.""" events = [ {"event": {"type": "key_combo", "keys": ["win", "r"], "window": {"title": "Bureau"}}}, {"event": {"type": "window_focus_change", "from": {"title": "Bureau"}, "to": {"title": "Exécuter"}}}, {"event": {"type": "text_input", "text": "notepad", "window": {"title": "Exécuter"}}}, {"event": {"type": "mouse_click", "button": "left", "window": {"title": "Exécuter"}}}, {"event": {"type": "window_focus_change", "from": {"title": "Exécuter"}, "to": {"title": "Sans titre – Bloc-notes"}}}, {"event": {"type": "text_input", "text": "Bonjour le monde", "window": {"title": "Sans titre – Bloc-notes"}}}, ] events_file = self._write_events(tmp_path, events) # Importer depuis api_stream (la fonction est au niveau module) from agent_v0.server_v1.api_stream import _extract_session_description desc = _extract_session_description(events_file) assert desc["event_count"] == 6 # La description doit être lisible et pas juste "Bloc-notes, Exécuter" description = desc["description"] assert "Bloc-notes" in description or "bloc-notes" in description.lower() # Le nom doit contenir l'app assert "Bloc-notes" in desc["name"] def test_extract_session_description_explorateur(self, tmp_path): """Session Explorateur de fichiers → description pertinente.""" events = [ {"event": {"type": "window_focus_change", "from": {"title": "Bureau"}, "to": {"title": "Images – Explorateur de fichiers"}}}, {"event": {"type": "mouse_click", "button": "left", "window": {"title": "Images – Explorateur de fichiers"}}}, {"event": {"type": "mouse_click", "button": "left", "window": {"title": "Images – Explorateur de fichiers"}}}, {"event": {"type": "mouse_click", "button": "left", "window": {"title": "Images – Explorateur de fichiers"}}}, ] events_file = self._write_events(tmp_path, events) from agent_v0.server_v1.api_stream import _extract_session_description desc = _extract_session_description(events_file) assert "Explorateur" in desc["name"] or "Explorateur" in desc["description"] def test_extract_session_description_vide(self, tmp_path): """Fichier vide → description par défaut.""" events_file = self._write_events(tmp_path, []) from agent_v0.server_v1.api_stream import _extract_session_description desc = _extract_session_description(events_file) assert desc["event_count"] == 0 assert desc["name"] == "Session sans nom" def test_extract_session_description_cmd(self, tmp_path): """Session avec cmd.exe → description contient cmd.""" events = [ {"event": {"type": "window_focus_change", "from": {"title": "Bureau"}, "to": {"title": "C:\\Windows\\system32\\cmd.exe"}}}, {"event": {"type": "text_input", "text": "dir", "window": {"title": "C:\\Windows\\system32\\cmd.exe"}}}, {"event": {"type": "text_input", "text": "cd documents", "window": {"title": "C:\\Windows\\system32\\cmd.exe"}}}, ] events_file = self._write_events(tmp_path, events) from agent_v0.server_v1.api_stream import _extract_session_description desc = _extract_session_description(events_file) assert desc["event_count"] == 3 # Le nom ou la description doit mentionner cmd full = f"{desc['name']} {desc['description']}" assert "cmd" in full.lower() def test_extract_session_description_recherche_windows(self, tmp_path): """Session avec recherche Windows (Win+S) → description mentionne recherche.""" events = [ {"event": {"type": "key_combo", "keys": ["win", "s"], "window": {"title": "Bureau"}}}, {"event": {"type": "window_focus_change", "from": {"title": "Bureau"}, "to": {"title": "Rechercher"}}}, {"event": {"type": "text_input", "text": "calculator", "window": {"title": "Rechercher"}}}, ] events_file = self._write_events(tmp_path, events) from agent_v0.server_v1.api_stream import _extract_session_description desc = _extract_session_description(events_file) # La description doit mentionner la recherche Windows assert "recherche" in desc["description"].lower() # ========================================================================= # Tests : list_capabilities # ========================================================================= class TestListCapabilities: """Vérifier le listing des capacités.""" def test_list_capabilities_avec_workflows(self, planner, sample_workflows): """Avec des workflows → texte lisible avec descriptions.""" text = planner.list_capabilities(sample_workflows) assert "Léa sait faire" in text assert "Bloc-notes" in text def test_list_capabilities_sans_workflows(self, planner): """Sans workflows → message d'aide.""" text = planner.list_capabilities([]) assert "pas encore appris" in text # ========================================================================= # Tests : execute (mode replay et free) # ========================================================================= class TestExecute: """Vérifier l'exécution des plans.""" def test_execute_replay(self, planner): """Mode replay → callback appelé avec le bon session_id.""" plan = TaskPlan( instruction="Ouvre le bloc-notes", understood=True, workflow_match="sess_001", workflow_name="Bloc-notes", mode="replay", ) callback = MagicMock(return_value="replay_123") result = planner.execute(plan, replay_callback=callback) assert result.success is True callback.assert_called_once_with( session_id="sess_001", machine_id="default", params={}, ) def test_execute_non_compris(self, planner): """Plan non compris → échec.""" plan = TaskPlan(instruction="blah", understood=False) result = planner.execute(plan) assert result.success is False assert "non comprise" in result.summary.lower() or "non comprise" in result.summary def test_execute_sans_callback(self, planner): """Mode replay sans callback → échec.""" plan = TaskPlan( instruction="test", understood=True, workflow_match="sess_001", mode="replay", ) result = planner.execute(plan, replay_callback=None) assert result.success is False