Files
rpa_vision_v3/tests/unit/test_policy_grounding_recovery_learning.py
Dom aee64f54b1 feat(security): détection dialogues système Windows + fail-closed
Nouveau module system_dialog_guard.py :
- Détection UAC, CredUI, SmartScreen, Defender, Driver install
- Multi-signal (ClassName UIA, process, title FR/EN, parent_path)
- Faux positifs validés (OSIRIS, OBSIUS, MEDSPHERE, Chrome, Excel)

Intégration dans executor.py et policy.py :
- 6 points de décision (avant click/type/key_combo, VLM, policy)
- Pause supervisée au lieu de clic aveugle
- Fail-closed en cas d'exception (P0-D audit)
- Notification systray + remontée serveur

Fix mock test policy engine pour compat _system_dialog_pause=None.
39 + 5 tests unitaires.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 16:48:00 +02:00

532 lines
21 KiB
Python

"""
Tests fonctionnels pour P2 (Policy/Grounding), P3 (Recovery), P4 (Learning).
Vérifie que chaque module fait bien son travail :
- Grounding : localise ou retourne NOT_FOUND (pas de décision)
- Policy : décide RETRY/SKIP/ABORT/SUPERVISE (pas de localisation)
- Recovery : exécute Ctrl+Z / Escape / Alt+F4 selon le contexte
- Learning : enregistre et requête les résultats structurés
"""
import json
import shutil
import sys
import tempfile
from pathlib import Path
from unittest.mock import MagicMock, patch, PropertyMock
import pytest
_ROOT = str(Path(__file__).resolve().parents[2])
if _ROOT not in sys.path:
sys.path.insert(0, _ROOT)
# =========================================================================
# P2 : Grounding — localisation pure
# =========================================================================
class TestGroundingEngine:
def _make_engine(self):
from agent_v0.agent_v1.core.grounding import GroundingEngine
executor = MagicMock()
executor._capture_screenshot_b64.return_value = "fake_b64_data"
return GroundingEngine(executor), executor
def test_server_found_retourne_coordonnees(self):
"""Si le serveur trouve l'élément, retourne ses coordonnées."""
engine, executor = self._make_engine()
executor._server_resolve_target.return_value = {
"resolved": True, "x_pct": 0.5, "y_pct": 0.3,
"method": "som_text", "score": 0.95,
"matched_element": {"label": "Enregistrer"},
}
result = engine.locate("http://server", {"by_text": "Enregistrer"}, 0.5, 0.3, 1920, 1080)
assert result.found is True
assert result.x_pct == 0.5
assert result.y_pct == 0.3
assert result.method == "som_text"
def test_server_not_found_cascade_template(self):
"""Si serveur échoue, cascade vers template matching."""
engine, executor = self._make_engine()
executor._server_resolve_target.return_value = None
executor._template_match_anchor.return_value = {
"resolved": True, "x_pct": 0.4, "y_pct": 0.6,
"score": 0.85,
}
result = engine.locate(
"http://server",
{"by_text": "OK", "anchor_image_base64": "abc123"},
0.5, 0.3, 1920, 1080,
)
assert result.found is True
assert result.method == "anchor_template"
def test_toutes_strategies_echouent_retourne_not_found(self):
"""Si toutes les stratégies échouent, retourne NOT_FOUND."""
engine, executor = self._make_engine()
executor._server_resolve_target.return_value = None
executor._template_match_anchor.return_value = None
executor._hybrid_vlm_resolve.return_value = None
result = engine.locate(
"http://server",
{"by_text": "Inexistant", "anchor_image_base64": "abc", "vlm_description": "bouton"},
0.5, 0.3, 1920, 1080,
)
assert result.found is False
assert "échoué" in result.detail
def test_screenshot_echoue_retourne_not_found(self):
"""Si la capture screenshot échoue, NOT_FOUND immédiat."""
engine, executor = self._make_engine()
executor._capture_screenshot_b64.return_value = None
result = engine.locate("http://server", {"by_text": "OK"}, 0.5, 0.3, 1920, 1080)
assert result.found is False
assert "screenshot" in result.detail.lower()
def test_strategies_custom(self):
"""On peut spécifier les stratégies à utiliser."""
engine, executor = self._make_engine()
executor._template_match_anchor.return_value = {
"resolved": True, "x_pct": 0.2, "y_pct": 0.8, "score": 0.9,
}
# Seulement template, pas de serveur
result = engine.locate(
"", {"anchor_image_base64": "abc"}, 0.5, 0.3, 1920, 1080,
strategies=["template"],
)
assert result.found is True
# Le serveur n'a PAS été appelé
executor._server_resolve_target.assert_not_called()
def test_grounding_result_to_dict(self):
"""Le GroundingResult se sérialise correctement."""
from agent_v0.agent_v1.core.grounding import GroundingResult
r = GroundingResult(found=True, x_pct=0.5, y_pct=0.3, method="som", score=0.9)
d = r.to_dict()
assert d["found"] is True
assert d["x_pct"] == 0.5
assert d["method"] == "som"
# =========================================================================
# P2 : Policy — décisions quand grounding échoue
# =========================================================================
class TestPolicyEngine:
def _make_engine(self):
from agent_v0.agent_v1.core.policy import PolicyEngine
executor = MagicMock()
executor._system_dialog_pause = None
return PolicyEngine(executor), executor
def test_premier_essai_popup_fermee_retry(self):
"""Premier échec + popup fermée → RETRY."""
from agent_v0.agent_v1.core.policy import Decision
engine, executor = self._make_engine()
executor._handle_popup_vlm.return_value = True # Popup fermée
decision = engine.decide(
action={"type": "click"},
target_spec={"by_text": "OK"},
retry_count=0,
)
assert decision.decision == Decision.RETRY
assert "popup" in decision.reason.lower()
def test_premier_essai_pas_de_popup_retry(self):
"""Premier échec + pas de popup → RETRY quand même (max_retries > 0)."""
from agent_v0.agent_v1.core.policy import Decision
engine, executor = self._make_engine()
executor._handle_popup_vlm.return_value = False
decision = engine.decide(
action={"type": "click"},
target_spec={"by_text": "OK"},
retry_count=0,
max_retries=2,
)
assert decision.decision == Decision.RETRY
def test_max_retries_acteur_passer_skip(self):
"""Max retries atteint + acteur dit PASSER → SKIP."""
from agent_v0.agent_v1.core.policy import Decision
engine, executor = self._make_engine()
executor._actor_decide.return_value = "PASSER"
decision = engine.decide(
action={"type": "click"},
target_spec={"by_text": "Onglet"},
retry_count=1,
max_retries=1,
)
assert decision.decision == Decision.SKIP
def test_max_retries_acteur_stopper_abort(self):
"""Max retries atteint + acteur dit STOPPER → ABORT."""
from agent_v0.agent_v1.core.policy import Decision
engine, executor = self._make_engine()
executor._actor_decide.return_value = "STOPPER"
decision = engine.decide(
action={"type": "click"},
target_spec={"by_text": "X"},
retry_count=1,
max_retries=1,
)
assert decision.decision == Decision.ABORT
def test_max_retries_acteur_executer_supervise(self):
"""Max retries + acteur dit EXECUTER → SUPERVISE (rendre la main)."""
from agent_v0.agent_v1.core.policy import Decision
engine, executor = self._make_engine()
executor._actor_decide.return_value = "EXECUTER"
decision = engine.decide(
action={"type": "click"},
target_spec={"by_text": "X"},
retry_count=1,
max_retries=1,
)
assert decision.decision == Decision.SUPERVISE
def test_policy_decision_to_dict(self):
"""PolicyDecision se sérialise correctement."""
from agent_v0.agent_v1.core.policy import PolicyDecision, Decision
d = PolicyDecision(decision=Decision.SKIP, reason="État atteint").to_dict()
assert d["decision"] == "skip"
assert d["reason"] == "État atteint"
# =========================================================================
# P3 : Recovery — rollback après échec
# =========================================================================
class TestRecoveryEngine:
def _make_engine(self):
from agent_v0.agent_v1.core.recovery import RecoveryEngine
executor = MagicMock()
executor.keyboard = MagicMock()
executor.sct = MagicMock()
executor.sct.monitors = [{}, {"width": 1920, "height": 1080}]
executor._click = MagicMock()
return RecoveryEngine(executor), executor
def test_popup_detectee_escape(self):
"""Critic dit "popup" → Recovery fait Escape."""
from agent_v0.agent_v1.core.recovery import RecoveryAction
engine, executor = self._make_engine()
result = engine.attempt(
failed_action={"type": "click"},
critic_detail="Une popup d'erreur est apparue",
)
assert result.action_taken == RecoveryAction.ESCAPE
assert result.success is True
# Vérifie que Escape a été pressé
executor.keyboard.press.assert_called()
def test_frappe_incorrecte_undo(self):
"""Frappe incorrecte → Recovery fait Ctrl+Z."""
from agent_v0.agent_v1.core.recovery import RecoveryAction
engine, executor = self._make_engine()
result = engine.attempt(
failed_action={"type": "type"},
critic_detail="Le texte a été tapé au mauvais endroit",
)
assert result.action_taken == RecoveryAction.UNDO
assert result.success is True
def test_mauvaise_fenetre_close(self):
"""Mauvaise fenêtre → Recovery fait Alt+F4."""
from agent_v0.agent_v1.core.recovery import RecoveryAction
engine, executor = self._make_engine()
result = engine.attempt(
failed_action={"type": "click"},
critic_detail="Mauvaise fenêtre ouverte au lieu du bloc-notes",
)
assert result.action_taken == RecoveryAction.CLOSE_WINDOW
assert result.success is True
def test_menu_ouvert_escape(self):
"""Menu déroulant ouvert → Recovery fait Escape."""
from agent_v0.agent_v1.core.recovery import RecoveryAction
engine, executor = self._make_engine()
result = engine.attempt(
failed_action={"type": "click"},
critic_detail="Un menu déroulant s'est ouvert",
)
assert result.action_taken == RecoveryAction.ESCAPE
assert result.success is True
def test_aucune_strategie_applicable(self):
"""Pas de pattern reconnu → NONE."""
from agent_v0.agent_v1.core.recovery import RecoveryAction
engine, executor = self._make_engine()
result = engine.attempt(
failed_action={"type": "wait"},
critic_detail="Quelque chose d'inattendu",
)
assert result.action_taken == RecoveryAction.NONE
assert result.success is False
def test_recovery_result_to_dict(self):
"""RecoveryResult se sérialise correctement."""
from agent_v0.agent_v1.core.recovery import RecoveryResult, RecoveryAction
d = RecoveryResult(
action_taken=RecoveryAction.UNDO, success=True, detail="Ctrl+Z"
).to_dict()
assert d["action_taken"] == "undo"
assert d["success"] is True
# =========================================================================
# P4 : Learning — apprentissage runtime
# =========================================================================
class TestReplayLearner:
@pytest.fixture
def learner(self):
tmpdir = tempfile.mkdtemp(prefix="test_learning_")
from agent_v0.server_v1.replay_learner import ReplayLearner
l = ReplayLearner(learning_dir=tmpdir)
yield l
shutil.rmtree(tmpdir, ignore_errors=True)
def test_record_et_load_session(self, learner):
"""Enregistrer un résultat et le relire depuis le fichier."""
from agent_v0.server_v1.replay_learner import ActionOutcome
outcome = ActionOutcome(
session_id="test_session",
action_id="act_001",
action_type="click",
target_description="Bouton Enregistrer",
resolution_method="som_text",
resolution_score=0.95,
success=True,
)
learner.record(outcome)
# Relire
loaded = learner.load_session("test_session")
assert len(loaded) == 1
assert loaded[0].action_id == "act_001"
assert loaded[0].success is True
assert loaded[0].resolution_method == "som_text"
def test_record_from_replay_result(self, learner):
"""Convertir le format replay en ActionOutcome."""
learner.record_from_replay_result(
session_id="s1",
action={"action_id": "a1", "type": "click", "target_spec": {"by_text": "OK", "window_title": "App"}},
result={"success": True, "resolution_method": "template", "resolution_score": 0.9},
verification={"verified": True, "semantic_verified": True, "semantic_detail": "OK"},
)
loaded = learner.load_session("s1")
assert len(loaded) == 1
assert loaded[0].target_description == "OK"
assert loaded[0].semantic_verified is True
def test_query_similar(self, learner):
"""Requêter des résultats similaires par description."""
from agent_v0.server_v1.replay_learner import ActionOutcome
# Enregistrer plusieurs résultats
for i, (desc, method, success) in enumerate([
("Bouton Enregistrer", "som_text", True),
("Bouton Annuler", "template", True),
("Bouton Enregistrer", "vlm_direct", False),
("Menu Fichier", "som_text", True),
]):
learner.record(ActionOutcome(
session_id="s1", action_id=f"a{i}",
action_type="click", target_description=desc,
resolution_method=method, success=success,
))
# Chercher "Enregistrer"
results = learner.query_similar(target_description="Enregistrer")
assert len(results) == 2
# Les deux résultats concernent "Enregistrer"
for r in results:
assert "enregistrer" in r["outcome"]["target_description"].lower()
def test_get_stats(self, learner):
"""Les statistiques globales sont correctes."""
from agent_v0.server_v1.replay_learner import ActionOutcome
for success, method in [(True, "som"), (True, "som"), (False, "template"), (True, "vlm")]:
learner.record(ActionOutcome(
session_id="s1", action_id="a",
action_type="click", success=success,
resolution_method=method,
))
stats = learner.get_stats()
assert stats["total"] == 4
assert stats["success_rate"] == 0.75
assert stats["methods"]["som"]["success_rate"] == 1.0
assert stats["methods"]["template"]["success_rate"] == 0.0
def test_gemma4_indisponible_pas_de_crash(self, learner):
"""Le learning fonctionne même sans VLM."""
from agent_v0.server_v1.replay_learner import ActionOutcome
# Pas de crash, juste un record simple
learner.record(ActionOutcome(
session_id="s1", action_id="a1", action_type="click",
success=False, error="target_not_found",
))
stats = learner.get_stats()
assert stats["total"] == 1
assert stats["success_rate"] == 0.0
def test_fichier_jsonl_format(self, learner):
"""Le fichier JSONL contient du JSON valide ligne par ligne."""
from agent_v0.server_v1.replay_learner import ActionOutcome
learner.record(ActionOutcome(
session_id="s1", action_id="a1", action_type="click", success=True,
))
learner.record(ActionOutcome(
session_id="s1", action_id="a2", action_type="type", success=False,
))
jsonl_file = learner.learning_dir / "s1.jsonl"
assert jsonl_file.is_file()
with open(jsonl_file) as f:
lines = f.readlines()
assert len(lines) == 2
for line in lines:
data = json.loads(line) # Doit être du JSON valide
assert "action_id" in data
assert "success" in data
# =========================================================================
# Boucle d'apprentissage : consolidation cross-workflow
# =========================================================================
class TestLearningLoop:
"""Tests de la boucle d'apprentissage : les replays passés améliorent les suivants."""
@pytest.fixture
def learner(self):
tmpdir = tempfile.mkdtemp(prefix="test_learning_loop_")
from agent_v0.server_v1.replay_learner import ReplayLearner
l = ReplayLearner(learning_dir=tmpdir)
yield l
shutil.rmtree(tmpdir, ignore_errors=True)
def test_best_strategy_apprend_du_succes(self, learner):
"""La meilleure stratégie est celle qui a le plus de succès."""
from agent_v0.server_v1.replay_learner import ActionOutcome
# template échoue 3 fois sur "Enregistrer"
for i in range(3):
learner.record(ActionOutcome(
session_id=f"s{i}", action_id=f"a{i}", action_type="click",
target_description="Enregistrer", resolution_method="anchor_template",
success=False,
))
# som_text réussit 2 fois sur "Enregistrer"
for i in range(2):
learner.record(ActionOutcome(
session_id=f"s{10+i}", action_id=f"a{10+i}", action_type="click",
target_description="Enregistrer", resolution_method="som_text_match",
success=True,
))
best = learner.best_strategy_for("Enregistrer")
assert best == "som_text_match"
def test_best_strategy_minimum_2_essais(self, learner):
"""Il faut au moins 2 essais pour qu'une stratégie soit recommandée."""
from agent_v0.server_v1.replay_learner import ActionOutcome
# Un seul succès → pas assez pour recommander
learner.record(ActionOutcome(
session_id="s1", action_id="a1", action_type="click",
target_description="OK", resolution_method="vlm_direct",
success=True,
))
best = learner.best_strategy_for("OK")
assert best is None
def test_best_strategy_rien_si_historique_vide(self, learner):
"""Pas d'historique → pas de recommandation."""
best = learner.best_strategy_for("Inexistant")
assert best is None
def test_consolidate_workflow_enrichit_les_actions(self, learner):
"""La consolidation injecte _learned_strategy dans les target_spec."""
from agent_v0.server_v1.replay_learner import ActionOutcome
# Historique : som_text_match marche pour "Fichier"
for i in range(3):
learner.record(ActionOutcome(
session_id=f"s{i}", action_id=f"a{i}", action_type="click",
target_description="Fichier", resolution_method="som_text_match",
success=True,
))
# Workflow avec une action "Fichier"
actions = [
{"type": "click", "target_spec": {"by_text": "Fichier", "window_title": "Bloc-notes"}},
{"type": "type", "text": "bonjour"},
{"type": "click", "target_spec": {"by_text": "Inconnu"}},
]
enriched = learner.consolidate_workflow(actions)
assert enriched == 1 # Seul "Fichier" a un historique
assert actions[0]["target_spec"]["_learned_strategy"] == "som_text_match"
assert "_learned_strategy" not in actions[2].get("target_spec", {})
def test_consolidation_cross_workflow(self, learner):
"""Un succès dans le workflow A améliore le workflow B."""
from agent_v0.server_v1.replay_learner import ActionOutcome
# Workflow A : "Enregistrer" réussit avec grounding_vlm
for i in range(3):
learner.record(ActionOutcome(
session_id="workflow_A", action_id=f"a{i}", action_type="click",
target_description="Enregistrer",
window_title="Bloc-notes",
resolution_method="grounding_vlm", success=True,
))
# Workflow B : contient aussi "Enregistrer"
workflow_b = [
{"type": "click", "target_spec": {"by_text": "Enregistrer", "window_title": "Bloc-notes"}},
]
enriched = learner.consolidate_workflow(workflow_b, "workflow_B")
assert enriched == 1
assert workflow_b[0]["target_spec"]["_learned_strategy"] == "grounding_vlm"
def test_grounding_reordonne_strategies(self):
"""Le GroundingEngine réordonne ses stratégies selon _learned_strategy."""
from agent_v0.agent_v1.core.grounding import GroundingEngine
executor = MagicMock()
executor._capture_screenshot_b64.return_value = "fake"
# Simuler que template marche
executor._server_resolve_target.return_value = None
executor._template_match_anchor.return_value = {
"resolved": True, "x_pct": 0.5, "y_pct": 0.5, "score": 0.9,
}
executor._hybrid_vlm_resolve.return_value = None
engine = GroundingEngine(executor)
# Avec _learned_strategy = anchor_template → template en premier
result = engine.locate(
"http://server",
{"by_text": "OK", "anchor_image_base64": "abc", "_learned_strategy": "anchor_template"},
0.5, 0.3, 1920, 1080,
)
assert result.found is True
assert result.method == "anchor_template"
# Le serveur n'a PAS été appelé (template était en premier)
executor._server_resolve_target.assert_not_called()