""" Tests fonctionnels pour P2 (Policy/Grounding), P3 (Recovery), P4 (Learning). Vérifie que chaque module fait bien son travail : - Grounding : localise ou retourne NOT_FOUND (pas de décision) - Policy : décide RETRY/SKIP/ABORT/SUPERVISE (pas de localisation) - Recovery : exécute Ctrl+Z / Escape / Alt+F4 selon le contexte - Learning : enregistre et requête les résultats structurés """ import json import shutil import sys import tempfile from pathlib import Path from unittest.mock import MagicMock, patch, PropertyMock import pytest _ROOT = str(Path(__file__).resolve().parents[2]) if _ROOT not in sys.path: sys.path.insert(0, _ROOT) # ========================================================================= # P2 : Grounding — localisation pure # ========================================================================= class TestGroundingEngine: def _make_engine(self): from agent_v0.agent_v1.core.grounding import GroundingEngine executor = MagicMock() executor._capture_screenshot_b64.return_value = "fake_b64_data" return GroundingEngine(executor), executor def test_server_found_retourne_coordonnees(self): """Si le serveur trouve l'élément, retourne ses coordonnées.""" engine, executor = self._make_engine() executor._server_resolve_target.return_value = { "resolved": True, "x_pct": 0.5, "y_pct": 0.3, "method": "som_text", "score": 0.95, "matched_element": {"label": "Enregistrer"}, } result = engine.locate("http://server", {"by_text": "Enregistrer"}, 0.5, 0.3, 1920, 1080) assert result.found is True assert result.x_pct == 0.5 assert result.y_pct == 0.3 assert result.method == "som_text" def test_server_not_found_cascade_template(self): """Si serveur échoue, cascade vers template matching.""" engine, executor = self._make_engine() executor._server_resolve_target.return_value = None executor._template_match_anchor.return_value = { "resolved": True, "x_pct": 0.4, "y_pct": 0.6, "score": 0.85, } result = engine.locate( "http://server", {"by_text": "OK", "anchor_image_base64": "abc123"}, 0.5, 0.3, 1920, 1080, ) assert result.found is True assert result.method == "anchor_template" def test_toutes_strategies_echouent_retourne_not_found(self): """Si toutes les stratégies échouent, retourne NOT_FOUND.""" engine, executor = self._make_engine() executor._server_resolve_target.return_value = None executor._template_match_anchor.return_value = None executor._hybrid_vlm_resolve.return_value = None result = engine.locate( "http://server", {"by_text": "Inexistant", "anchor_image_base64": "abc", "vlm_description": "bouton"}, 0.5, 0.3, 1920, 1080, ) assert result.found is False assert "échoué" in result.detail def test_screenshot_echoue_retourne_not_found(self): """Si la capture screenshot échoue, NOT_FOUND immédiat.""" engine, executor = self._make_engine() executor._capture_screenshot_b64.return_value = None result = engine.locate("http://server", {"by_text": "OK"}, 0.5, 0.3, 1920, 1080) assert result.found is False assert "screenshot" in result.detail.lower() def test_strategies_custom(self): """On peut spécifier les stratégies à utiliser.""" engine, executor = self._make_engine() executor._template_match_anchor.return_value = { "resolved": True, "x_pct": 0.2, "y_pct": 0.8, "score": 0.9, } # Seulement template, pas de serveur result = engine.locate( "", {"anchor_image_base64": "abc"}, 0.5, 0.3, 1920, 1080, strategies=["template"], ) assert result.found is True # Le serveur n'a PAS été appelé executor._server_resolve_target.assert_not_called() def test_grounding_result_to_dict(self): """Le GroundingResult se sérialise correctement.""" from agent_v0.agent_v1.core.grounding import GroundingResult r = GroundingResult(found=True, x_pct=0.5, y_pct=0.3, method="som", score=0.9) d = r.to_dict() assert d["found"] is True assert d["x_pct"] == 0.5 assert d["method"] == "som" # ========================================================================= # P2 : Policy — décisions quand grounding échoue # ========================================================================= class TestPolicyEngine: def _make_engine(self): from agent_v0.agent_v1.core.policy import PolicyEngine executor = MagicMock() executor._system_dialog_pause = None return PolicyEngine(executor), executor def test_premier_essai_popup_fermee_retry(self): """Premier échec + popup fermée → RETRY.""" from agent_v0.agent_v1.core.policy import Decision engine, executor = self._make_engine() executor._handle_popup_vlm.return_value = True # Popup fermée decision = engine.decide( action={"type": "click"}, target_spec={"by_text": "OK"}, retry_count=0, ) assert decision.decision == Decision.RETRY assert "popup" in decision.reason.lower() def test_premier_essai_pas_de_popup_retry(self): """Premier échec + pas de popup → RETRY quand même (max_retries > 0).""" from agent_v0.agent_v1.core.policy import Decision engine, executor = self._make_engine() executor._handle_popup_vlm.return_value = False decision = engine.decide( action={"type": "click"}, target_spec={"by_text": "OK"}, retry_count=0, max_retries=2, ) assert decision.decision == Decision.RETRY def test_max_retries_acteur_passer_skip(self): """Max retries atteint + acteur dit PASSER → SKIP.""" from agent_v0.agent_v1.core.policy import Decision engine, executor = self._make_engine() executor._actor_decide.return_value = "PASSER" decision = engine.decide( action={"type": "click"}, target_spec={"by_text": "Onglet"}, retry_count=1, max_retries=1, ) assert decision.decision == Decision.SKIP def test_max_retries_acteur_stopper_abort(self): """Max retries atteint + acteur dit STOPPER → ABORT.""" from agent_v0.agent_v1.core.policy import Decision engine, executor = self._make_engine() executor._actor_decide.return_value = "STOPPER" decision = engine.decide( action={"type": "click"}, target_spec={"by_text": "X"}, retry_count=1, max_retries=1, ) assert decision.decision == Decision.ABORT def test_max_retries_acteur_executer_supervise(self): """Max retries + acteur dit EXECUTER → SUPERVISE (rendre la main).""" from agent_v0.agent_v1.core.policy import Decision engine, executor = self._make_engine() executor._actor_decide.return_value = "EXECUTER" decision = engine.decide( action={"type": "click"}, target_spec={"by_text": "X"}, retry_count=1, max_retries=1, ) assert decision.decision == Decision.SUPERVISE def test_policy_decision_to_dict(self): """PolicyDecision se sérialise correctement.""" from agent_v0.agent_v1.core.policy import PolicyDecision, Decision d = PolicyDecision(decision=Decision.SKIP, reason="État atteint").to_dict() assert d["decision"] == "skip" assert d["reason"] == "État atteint" # ========================================================================= # P3 : Recovery — rollback après échec # ========================================================================= class TestRecoveryEngine: def _make_engine(self): from agent_v0.agent_v1.core.recovery import RecoveryEngine executor = MagicMock() executor.keyboard = MagicMock() executor.sct = MagicMock() executor.sct.monitors = [{}, {"width": 1920, "height": 1080}] executor._click = MagicMock() return RecoveryEngine(executor), executor def test_popup_detectee_escape(self): """Critic dit "popup" → Recovery fait Escape.""" from agent_v0.agent_v1.core.recovery import RecoveryAction engine, executor = self._make_engine() result = engine.attempt( failed_action={"type": "click"}, critic_detail="Une popup d'erreur est apparue", ) assert result.action_taken == RecoveryAction.ESCAPE assert result.success is True # Vérifie que Escape a été pressé executor.keyboard.press.assert_called() def test_frappe_incorrecte_undo(self): """Frappe incorrecte → Recovery fait Ctrl+Z.""" from agent_v0.agent_v1.core.recovery import RecoveryAction engine, executor = self._make_engine() result = engine.attempt( failed_action={"type": "type"}, critic_detail="Le texte a été tapé au mauvais endroit", ) assert result.action_taken == RecoveryAction.UNDO assert result.success is True def test_mauvaise_fenetre_close(self): """Mauvaise fenêtre → Recovery fait Alt+F4.""" from agent_v0.agent_v1.core.recovery import RecoveryAction engine, executor = self._make_engine() result = engine.attempt( failed_action={"type": "click"}, critic_detail="Mauvaise fenêtre ouverte au lieu du bloc-notes", ) assert result.action_taken == RecoveryAction.CLOSE_WINDOW assert result.success is True def test_menu_ouvert_escape(self): """Menu déroulant ouvert → Recovery fait Escape.""" from agent_v0.agent_v1.core.recovery import RecoveryAction engine, executor = self._make_engine() result = engine.attempt( failed_action={"type": "click"}, critic_detail="Un menu déroulant s'est ouvert", ) assert result.action_taken == RecoveryAction.ESCAPE assert result.success is True def test_aucune_strategie_applicable(self): """Pas de pattern reconnu → NONE.""" from agent_v0.agent_v1.core.recovery import RecoveryAction engine, executor = self._make_engine() result = engine.attempt( failed_action={"type": "wait"}, critic_detail="Quelque chose d'inattendu", ) assert result.action_taken == RecoveryAction.NONE assert result.success is False def test_recovery_result_to_dict(self): """RecoveryResult se sérialise correctement.""" from agent_v0.agent_v1.core.recovery import RecoveryResult, RecoveryAction d = RecoveryResult( action_taken=RecoveryAction.UNDO, success=True, detail="Ctrl+Z" ).to_dict() assert d["action_taken"] == "undo" assert d["success"] is True # ========================================================================= # P4 : Learning — apprentissage runtime # ========================================================================= class TestReplayLearner: @pytest.fixture def learner(self): tmpdir = tempfile.mkdtemp(prefix="test_learning_") from agent_v0.server_v1.replay_learner import ReplayLearner l = ReplayLearner(learning_dir=tmpdir) yield l shutil.rmtree(tmpdir, ignore_errors=True) def test_record_et_load_session(self, learner): """Enregistrer un résultat et le relire depuis le fichier.""" from agent_v0.server_v1.replay_learner import ActionOutcome outcome = ActionOutcome( session_id="test_session", action_id="act_001", action_type="click", target_description="Bouton Enregistrer", resolution_method="som_text", resolution_score=0.95, success=True, ) learner.record(outcome) # Relire loaded = learner.load_session("test_session") assert len(loaded) == 1 assert loaded[0].action_id == "act_001" assert loaded[0].success is True assert loaded[0].resolution_method == "som_text" def test_record_from_replay_result(self, learner): """Convertir le format replay en ActionOutcome.""" learner.record_from_replay_result( session_id="s1", action={"action_id": "a1", "type": "click", "target_spec": {"by_text": "OK", "window_title": "App"}}, result={"success": True, "resolution_method": "template", "resolution_score": 0.9}, verification={"verified": True, "semantic_verified": True, "semantic_detail": "OK"}, ) loaded = learner.load_session("s1") assert len(loaded) == 1 assert loaded[0].target_description == "OK" assert loaded[0].semantic_verified is True def test_query_similar(self, learner): """Requêter des résultats similaires par description.""" from agent_v0.server_v1.replay_learner import ActionOutcome # Enregistrer plusieurs résultats for i, (desc, method, success) in enumerate([ ("Bouton Enregistrer", "som_text", True), ("Bouton Annuler", "template", True), ("Bouton Enregistrer", "vlm_direct", False), ("Menu Fichier", "som_text", True), ]): learner.record(ActionOutcome( session_id="s1", action_id=f"a{i}", action_type="click", target_description=desc, resolution_method=method, success=success, )) # Chercher "Enregistrer" results = learner.query_similar(target_description="Enregistrer") assert len(results) == 2 # Les deux résultats concernent "Enregistrer" for r in results: assert "enregistrer" in r["outcome"]["target_description"].lower() def test_get_stats(self, learner): """Les statistiques globales sont correctes.""" from agent_v0.server_v1.replay_learner import ActionOutcome for success, method in [(True, "som"), (True, "som"), (False, "template"), (True, "vlm")]: learner.record(ActionOutcome( session_id="s1", action_id="a", action_type="click", success=success, resolution_method=method, )) stats = learner.get_stats() assert stats["total"] == 4 assert stats["success_rate"] == 0.75 assert stats["methods"]["som"]["success_rate"] == 1.0 assert stats["methods"]["template"]["success_rate"] == 0.0 def test_gemma4_indisponible_pas_de_crash(self, learner): """Le learning fonctionne même sans VLM.""" from agent_v0.server_v1.replay_learner import ActionOutcome # Pas de crash, juste un record simple learner.record(ActionOutcome( session_id="s1", action_id="a1", action_type="click", success=False, error="target_not_found", )) stats = learner.get_stats() assert stats["total"] == 1 assert stats["success_rate"] == 0.0 def test_fichier_jsonl_format(self, learner): """Le fichier JSONL contient du JSON valide ligne par ligne.""" from agent_v0.server_v1.replay_learner import ActionOutcome learner.record(ActionOutcome( session_id="s1", action_id="a1", action_type="click", success=True, )) learner.record(ActionOutcome( session_id="s1", action_id="a2", action_type="type", success=False, )) jsonl_file = learner.learning_dir / "s1.jsonl" assert jsonl_file.is_file() with open(jsonl_file) as f: lines = f.readlines() assert len(lines) == 2 for line in lines: data = json.loads(line) # Doit être du JSON valide assert "action_id" in data assert "success" in data # ========================================================================= # Boucle d'apprentissage : consolidation cross-workflow # ========================================================================= class TestLearningLoop: """Tests de la boucle d'apprentissage : les replays passés améliorent les suivants.""" @pytest.fixture def learner(self): tmpdir = tempfile.mkdtemp(prefix="test_learning_loop_") from agent_v0.server_v1.replay_learner import ReplayLearner l = ReplayLearner(learning_dir=tmpdir) yield l shutil.rmtree(tmpdir, ignore_errors=True) def test_best_strategy_apprend_du_succes(self, learner): """La meilleure stratégie est celle qui a le plus de succès.""" from agent_v0.server_v1.replay_learner import ActionOutcome # template échoue 3 fois sur "Enregistrer" for i in range(3): learner.record(ActionOutcome( session_id=f"s{i}", action_id=f"a{i}", action_type="click", target_description="Enregistrer", resolution_method="anchor_template", success=False, )) # som_text réussit 2 fois sur "Enregistrer" for i in range(2): learner.record(ActionOutcome( session_id=f"s{10+i}", action_id=f"a{10+i}", action_type="click", target_description="Enregistrer", resolution_method="som_text_match", success=True, )) best = learner.best_strategy_for("Enregistrer") assert best == "som_text_match" def test_best_strategy_minimum_2_essais(self, learner): """Il faut au moins 2 essais pour qu'une stratégie soit recommandée.""" from agent_v0.server_v1.replay_learner import ActionOutcome # Un seul succès → pas assez pour recommander learner.record(ActionOutcome( session_id="s1", action_id="a1", action_type="click", target_description="OK", resolution_method="vlm_direct", success=True, )) best = learner.best_strategy_for("OK") assert best is None def test_best_strategy_rien_si_historique_vide(self, learner): """Pas d'historique → pas de recommandation.""" best = learner.best_strategy_for("Inexistant") assert best is None def test_consolidate_workflow_enrichit_les_actions(self, learner): """La consolidation injecte _learned_strategy dans les target_spec.""" from agent_v0.server_v1.replay_learner import ActionOutcome # Historique : som_text_match marche pour "Fichier" for i in range(3): learner.record(ActionOutcome( session_id=f"s{i}", action_id=f"a{i}", action_type="click", target_description="Fichier", resolution_method="som_text_match", success=True, )) # Workflow avec une action "Fichier" actions = [ {"type": "click", "target_spec": {"by_text": "Fichier", "window_title": "Bloc-notes"}}, {"type": "type", "text": "bonjour"}, {"type": "click", "target_spec": {"by_text": "Inconnu"}}, ] enriched = learner.consolidate_workflow(actions) assert enriched == 1 # Seul "Fichier" a un historique assert actions[0]["target_spec"]["_learned_strategy"] == "som_text_match" assert "_learned_strategy" not in actions[2].get("target_spec", {}) def test_consolidation_cross_workflow(self, learner): """Un succès dans le workflow A améliore le workflow B.""" from agent_v0.server_v1.replay_learner import ActionOutcome # Workflow A : "Enregistrer" réussit avec grounding_vlm for i in range(3): learner.record(ActionOutcome( session_id="workflow_A", action_id=f"a{i}", action_type="click", target_description="Enregistrer", window_title="Bloc-notes", resolution_method="grounding_vlm", success=True, )) # Workflow B : contient aussi "Enregistrer" workflow_b = [ {"type": "click", "target_spec": {"by_text": "Enregistrer", "window_title": "Bloc-notes"}}, ] enriched = learner.consolidate_workflow(workflow_b, "workflow_B") assert enriched == 1 assert workflow_b[0]["target_spec"]["_learned_strategy"] == "grounding_vlm" def test_grounding_reordonne_strategies(self): """Le GroundingEngine réordonne ses stratégies selon _learned_strategy.""" from agent_v0.agent_v1.core.grounding import GroundingEngine executor = MagicMock() executor._capture_screenshot_b64.return_value = "fake" # Simuler que template marche executor._server_resolve_target.return_value = None executor._template_match_anchor.return_value = { "resolved": True, "x_pct": 0.5, "y_pct": 0.5, "score": 0.9, } executor._hybrid_vlm_resolve.return_value = None engine = GroundingEngine(executor) # Avec _learned_strategy = anchor_template → template en premier result = engine.locate( "http://server", {"by_text": "OK", "anchor_image_base64": "abc", "_learned_strategy": "anchor_template"}, 0.5, 0.3, 1920, 1080, ) assert result.found is True assert result.method == "anchor_template" # Le serveur n'a PAS été appelé (template était en premier) executor._server_resolve_target.assert_not_called()