""" Tests unitaires pour le Critic (ReplayVerifier.verify_with_critic) et l'enrichissement des actions avec intentions. Vérifie les FONCTIONNALITÉS, pas juste la non-régression : 1. Le Critic fusionne correctement pixel + sémantique 2. La matrice de décision (4 cas) est correcte 3. L'enrichissement intentions parse bien les réponses gemma4 4. Les fallbacks fonctionnent quand le VLM est indisponible """ import base64 import io import json import sys from pathlib import Path from unittest.mock import MagicMock, patch, Mock import pytest _ROOT = str(Path(__file__).resolve().parents[2]) if _ROOT not in sys.path: sys.path.insert(0, _ROOT) from agent_v0.server_v1.replay_verifier import ReplayVerifier, VerificationResult # ========================================================================= # Fixtures # ========================================================================= def _make_screenshot_b64(width=100, height=100, color=(128, 128, 128)): """Créer un screenshot base64 factice (JPEG).""" from PIL import Image img = Image.new("RGB", (width, height), color) buf = io.BytesIO() img.save(buf, format="JPEG", quality=50) return base64.b64encode(buf.getvalue()).decode() @pytest.fixture def verifier(): return ReplayVerifier() @pytest.fixture def screenshot_gray(): return _make_screenshot_b64(100, 100, (128, 128, 128)) @pytest.fixture def screenshot_white(): return _make_screenshot_b64(100, 100, (255, 255, 255)) # ========================================================================= # Tests VerificationResult — nouveaux champs sémantiques # ========================================================================= class TestVerificationResult: def test_to_dict_sans_semantique(self): """Sans vérification sémantique, les champs semantic_ sont absents du dict.""" r = VerificationResult( verified=True, confidence=0.8, changes_detected=True, change_area_pct=5.0, suggestion="continue", detail="test", ) d = r.to_dict() assert "semantic_verified" not in d assert d["verified"] is True assert d["confidence"] == 0.8 def test_to_dict_avec_semantique(self): """Avec vérification sémantique, les champs semantic_ sont présents.""" r = VerificationResult( verified=True, confidence=0.9, changes_detected=True, change_area_pct=5.0, suggestion="continue", detail="test", semantic_verified=True, semantic_detail="Bouton visible", semantic_elapsed_ms=1500.0, ) d = r.to_dict() assert d["semantic_verified"] is True assert d["semantic_detail"] == "Bouton visible" assert d["semantic_elapsed_ms"] == 1500.0 def test_to_dict_semantique_false(self): """semantic_verified=False doit apparaître dans le dict.""" r = VerificationResult( verified=False, confidence=0.7, changes_detected=True, change_area_pct=5.0, suggestion="retry", semantic_verified=False, semantic_detail="Mauvais écran", semantic_elapsed_ms=2000.0, ) d = r.to_dict() assert d["semantic_verified"] is False # ========================================================================= # Tests verify_with_critic — matrice de décision # ========================================================================= class TestVerifyWithCritic: def test_sans_expected_result_retourne_pixel_seul(self, verifier, screenshot_gray): """Sans expected_result, verify_with_critic = verify_action (pixel seul).""" result = verifier.verify_with_critic( action={"type": "click", "action_id": "test"}, result={"success": True}, screenshot_before=screenshot_gray, screenshot_after=screenshot_gray, expected_result="", # Pas d'attendu ) # Pixel seul — pas de champ semantic assert result.semantic_verified is None def test_sans_screenshots_pas_de_semantique(self, verifier): """Sans screenshots, pas de vérification sémantique possible.""" result = verifier.verify_with_critic( action={"type": "click", "action_id": "test"}, result={"success": True}, screenshot_before=None, screenshot_after=None, expected_result="Le fichier est ouvert", ) # Pas de screenshots → pixel seul (confidence basse) assert result.verified is True assert result.confidence < 0.5 def test_pixel_pas_change_et_expected_result_skip_vlm( self, verifier, screenshot_gray, ): """Si pixel identiques + expected_result → skip VLM (pas de changement = retry).""" result = verifier.verify_with_critic( action={"type": "click", "action_id": "test", "x_pct": 0.5, "y_pct": 0.5}, result={"success": True}, screenshot_before=screenshot_gray, screenshot_after=screenshot_gray, # Même image → aucun changement expected_result="Le menu s'est ouvert", ) # Pas de changement pixel → retry, VLM non appelé assert result.verified is False assert result.suggestion == "retry" assert result.semantic_verified is None # VLM non appelé @patch("agent_v0.server_v1.replay_verifier.ReplayVerifier._verify_semantic") def test_pixel_ok_semantic_ok( self, mock_semantic, verifier, screenshot_gray, screenshot_white, ): """Pixel OK + Semantic OK → vérifié avec haute confiance.""" mock_semantic.return_value = { "verified": True, "detail": "Le menu est bien ouvert", "elapsed_ms": 2000.0, } result = verifier.verify_with_critic( action={"type": "click", "action_id": "test"}, result={"success": True}, screenshot_before=screenshot_gray, screenshot_after=screenshot_white, # Différent → changement détecté expected_result="Le menu s'est ouvert", ) assert result.verified is True assert result.semantic_verified is True assert result.confidence >= 0.7 assert "Critic OK" in result.detail @patch("agent_v0.server_v1.replay_verifier.ReplayVerifier._verify_semantic") def test_pixel_ok_semantic_non( self, mock_semantic, verifier, screenshot_gray, screenshot_white, ): """Pixel OK + Semantic NON → INATTENDU (changement mais pas le bon).""" mock_semantic.return_value = { "verified": False, "detail": "Une erreur est apparue au lieu du menu", "elapsed_ms": 2500.0, } result = verifier.verify_with_critic( action={"type": "click", "action_id": "test"}, result={"success": True}, screenshot_before=screenshot_gray, screenshot_after=screenshot_white, expected_result="Le menu s'est ouvert", ) assert result.verified is False assert result.semantic_verified is False assert result.suggestion == "retry" assert "Critic NON" in result.detail @patch("agent_v0.server_v1.replay_verifier.ReplayVerifier._verify_semantic") def test_vlm_indisponible_fallback_pixel( self, mock_semantic, verifier, screenshot_gray, screenshot_white, ): """VLM indisponible → fallback sur pixel seul.""" mock_semantic.return_value = None # VLM down result = verifier.verify_with_critic( action={"type": "click", "action_id": "test"}, result={"success": True}, screenshot_before=screenshot_gray, screenshot_after=screenshot_white, expected_result="Le menu s'est ouvert", ) # Fallback pixel seul — le changement est détecté assert result.verified is True assert result.semantic_verified is None # Pas de VLM # ========================================================================= # Tests _verify_semantic — parsing de la réponse VLM # ========================================================================= class TestVerifySemantic: @patch("requests.post") def test_parse_verdict_oui(self, mock_post, verifier, screenshot_white): """Parse correctement VERDICT: OUI.""" mock_resp = MagicMock() mock_resp.ok = True mock_resp.json.return_value = { "message": {"content": "VERDICT: OUI\nRAISON: Le fichier est bien ouvert"} } mock_post.return_value = mock_resp result = verifier._verify_semantic( screenshot_before=screenshot_white, screenshot_after=screenshot_white, expected_result="Le fichier est ouvert", ) assert result is not None assert result["verified"] is True assert "ouvert" in result["detail"] @patch("requests.post") def test_parse_verdict_non(self, mock_post, verifier, screenshot_white): """Parse correctement VERDICT: NON.""" mock_resp = MagicMock() mock_resp.ok = True mock_resp.json.return_value = { "message": {"content": "VERDICT: NON\nRAISON: L'écran n'a pas changé"} } mock_post.return_value = mock_resp result = verifier._verify_semantic( screenshot_before=screenshot_white, screenshot_after=screenshot_white, expected_result="Le menu s'est ouvert", ) assert result is not None assert result["verified"] is False @patch("requests.post") def test_vlm_timeout_retourne_none(self, mock_post, verifier, screenshot_white): """Timeout VLM → retourne None (fallback gracieux).""" import requests as _real_requests mock_post.side_effect = _real_requests.Timeout("timeout") result = verifier._verify_semantic( screenshot_before=screenshot_white, screenshot_after=screenshot_white, expected_result="Le fichier est ouvert", ) assert result is None def test_sans_screenshot_after_retourne_none(self, verifier): """Sans screenshot_after, pas de vérification possible.""" result = verifier._verify_semantic( screenshot_before=None, screenshot_after=None, expected_result="Le fichier est ouvert", ) assert result is None # ========================================================================= # Tests _merge_results — matrice pixel x sémantique # ========================================================================= class TestMergeResults: def test_pixel_ok_sem_ok(self, verifier): pixel = VerificationResult( verified=True, confidence=0.7, changes_detected=True, change_area_pct=5.0, suggestion="continue", ) semantic = {"verified": True, "detail": "OK", "elapsed_ms": 1000} result = verifier._merge_results(pixel, semantic) assert result.verified is True assert result.semantic_verified is True assert result.confidence >= 0.7 def test_pixel_ok_sem_non(self, verifier): """Pixel OK + Sémantique NON = inattendu → retry.""" pixel = VerificationResult( verified=True, confidence=0.7, changes_detected=True, change_area_pct=5.0, suggestion="continue", ) semantic = {"verified": False, "detail": "Erreur popup", "elapsed_ms": 2000} result = verifier._merge_results(pixel, semantic) assert result.verified is False assert result.semantic_verified is False assert result.suggestion == "retry" def test_pixel_non_sem_ok(self, verifier): """Pixel inchangé + Sémantique OK = état subtil → continue.""" pixel = VerificationResult( verified=False, confidence=0.5, changes_detected=False, change_area_pct=0.1, suggestion="retry", ) semantic = {"verified": True, "detail": "Onglet déjà actif", "elapsed_ms": 1500} result = verifier._merge_results(pixel, semantic) assert result.verified is True assert result.semantic_verified is True assert result.suggestion == "continue" def test_pixel_non_sem_non(self, verifier): """Pixel inchangé + Sémantique NON = échec complet → retry.""" pixel = VerificationResult( verified=False, confidence=0.5, changes_detected=False, change_area_pct=0.0, suggestion="retry", ) semantic = {"verified": False, "detail": "Rien ne s'est passé", "elapsed_ms": 3000} result = verifier._merge_results(pixel, semantic) assert result.verified is False assert result.semantic_verified is False assert result.confidence >= 0.7 # Haute confiance dans l'échec # ========================================================================= # Tests enrichissement intentions (stream_processor) # ========================================================================= class TestEnrichActionsWithIntentions: @patch("requests.post") @patch("requests.get") def test_enrichissement_parse_reponse_gemma4(self, mock_get, mock_post): """La réponse gemma4 est correctement parsée en intention/avant/après.""" from agent_v0.server_v1.stream_processor import _enrich_actions_with_intentions import tempfile, shutil # Mock gemma4 disponible mock_tags_resp = MagicMock() mock_tags_resp.ok = True mock_get.return_value = mock_tags_resp mock_chat_resp = MagicMock() mock_chat_resp.ok = True mock_chat_resp.json.return_value = { "message": { "content": ( "INTENTION: Ouvrir le fichier client dans le logiciel\n" "AVANT: Le logiciel est ouvert sur la page d'accueil\n" "APRÈS: Le fichier client est affiché dans la fenêtre" ) } } mock_post.return_value = mock_chat_resp actions = [ { "type": "click", "action_id": "act_001", "target_spec": {"by_text": "Ouvrir", "window_title": "Logiciel"}, }, { "type": "wait", "action_id": "act_002", "duration_ms": 1000, }, ] tmpdir = Path(tempfile.mkdtemp()) try: (tmpdir / "shots").mkdir() _enrich_actions_with_intentions(actions, tmpdir) # L'action click doit être enrichie assert actions[0].get("intention") == "Ouvrir le fichier client dans le logiciel" assert actions[0].get("expected_state") == "Le logiciel est ouvert sur la page d'accueil" assert actions[0].get("expected_result") == "Le fichier client est affiché dans la fenêtre" # expected_state doit aussi être dans target_spec (pour l'Observer) assert actions[0]["target_spec"]["expected_state"] == "Le logiciel est ouvert sur la page d'accueil" # L'action wait ne doit PAS être enrichie assert "intention" not in actions[1] finally: shutil.rmtree(tmpdir) @patch("requests.get") def test_gemma4_indisponible_pas_de_crash(self, mock_get): """Si gemma4 est down, l'enrichissement est silencieusement désactivé.""" from agent_v0.server_v1.stream_processor import _enrich_actions_with_intentions import tempfile, shutil mock_get.side_effect = ConnectionError("gemma4 down") actions = [ {"type": "click", "action_id": "act_001", "target_spec": {"by_text": "OK"}}, ] tmpdir = Path(tempfile.mkdtemp()) try: (tmpdir / "shots").mkdir() _enrich_actions_with_intentions(actions, tmpdir) # Aucun crash, aucune intention ajoutée assert "intention" not in actions[0] finally: shutil.rmtree(tmpdir) @patch("requests.post") @patch("requests.get") def test_reponse_gemma4_malformee(self, mock_get, mock_post): """Si gemma4 retourne du texte non structuré, pas de crash.""" from agent_v0.server_v1.stream_processor import _enrich_actions_with_intentions import tempfile, shutil mock_tags = MagicMock() mock_tags.ok = True mock_get.return_value = mock_tags mock_resp = MagicMock() mock_resp.ok = True mock_resp.json.return_value = { "message": {"content": "Je ne comprends pas cette demande."} } mock_post.return_value = mock_resp actions = [ {"type": "click", "action_id": "act_001", "target_spec": {"by_text": "OK"}}, ] tmpdir = Path(tempfile.mkdtemp()) try: (tmpdir / "shots").mkdir() _enrich_actions_with_intentions(actions, tmpdir) # Pas de crash, mais pas d'intention non plus assert "intention" not in actions[0] finally: shutil.rmtree(tmpdir)