Files
rpa_vision_v3/tests/unit/test_policy_grounding_recovery_learning.py
2026-05-24 21:07:12 +02:00

952 lines
37 KiB
Python

"""
Tests fonctionnels pour P2 (Policy/Grounding), P3 (Recovery), P4 (Learning).
Vérifie que chaque module fait bien son travail :
- Grounding : localise ou retourne NOT_FOUND (pas de décision)
- Policy : décide RETRY/SKIP/ABORT/SUPERVISE (pas de localisation)
- Recovery : exécute Ctrl+Z / Escape / Alt+F4 selon le contexte
- Learning : enregistre et requête les résultats structurés
"""
import json
import shutil
import sys
import tempfile
from pathlib import Path
from unittest.mock import MagicMock, patch, PropertyMock
import pytest
_ROOT = str(Path(__file__).resolve().parents[2])
if _ROOT not in sys.path:
sys.path.insert(0, _ROOT)
# =========================================================================
# P2 : Grounding — localisation pure
# =========================================================================
class TestGroundingEngine:
def _make_engine(self):
from agent_v0.agent_v1.core.grounding import GroundingEngine
executor = MagicMock()
executor._capture_screenshot_b64.return_value = "fake_b64_data"
return GroundingEngine(executor), executor
def test_server_found_retourne_coordonnees(self):
"""Si le serveur trouve l'élément, retourne ses coordonnées."""
engine, executor = self._make_engine()
executor._server_resolve_target.return_value = {
"resolved": True, "x_pct": 0.5, "y_pct": 0.3,
"method": "som_text", "score": 0.95,
"matched_element": {"label": "Enregistrer"},
}
result = engine.locate("http://server", {"by_text": "Enregistrer"}, 0.5, 0.3, 1920, 1080)
assert result.found is True
assert result.x_pct == 0.5
assert result.y_pct == 0.3
assert result.method == "som_text"
def test_server_not_found_cascade_template(self):
"""Si serveur échoue, cascade vers template matching."""
engine, executor = self._make_engine()
executor._server_resolve_target.return_value = None
executor._template_match_anchor.return_value = {
"resolved": True, "x_pct": 0.4, "y_pct": 0.6,
"score": 0.85,
}
result = engine.locate(
"http://server",
{"by_text": "OK", "anchor_image_base64": "abc123"},
0.5, 0.3, 1920, 1080,
)
assert result.found is True
assert result.method == "anchor_template"
def test_toutes_strategies_echouent_retourne_not_found(self):
"""Si toutes les stratégies échouent, retourne NOT_FOUND."""
engine, executor = self._make_engine()
executor._server_resolve_target.return_value = None
executor._template_match_anchor.return_value = None
executor._hybrid_vlm_resolve.return_value = None
result = engine.locate(
"http://server",
{"by_text": "Inexistant", "anchor_image_base64": "abc", "vlm_description": "bouton"},
0.5, 0.3, 1920, 1080,
)
assert result.found is False
assert "échoué" in result.detail
def test_screenshot_echoue_retourne_not_found(self):
"""Si la capture screenshot échoue, NOT_FOUND immédiat."""
engine, executor = self._make_engine()
executor._capture_screenshot_b64.return_value = None
result = engine.locate("http://server", {"by_text": "OK"}, 0.5, 0.3, 1920, 1080)
assert result.found is False
assert "screenshot" in result.detail.lower()
def test_strategies_custom(self):
"""On peut spécifier les stratégies à utiliser."""
engine, executor = self._make_engine()
executor._template_match_anchor.return_value = {
"resolved": True, "x_pct": 0.2, "y_pct": 0.8, "score": 0.9,
}
# Seulement template, pas de serveur
result = engine.locate(
"", {"anchor_image_base64": "abc"}, 0.5, 0.3, 1920, 1080,
strategies=["template"],
)
assert result.found is True
# Le serveur n'a PAS été appelé
executor._server_resolve_target.assert_not_called()
def test_grounding_result_to_dict(self):
"""Le GroundingResult se sérialise correctement."""
from agent_v0.agent_v1.core.grounding import GroundingResult
r = GroundingResult(found=True, x_pct=0.5, y_pct=0.3, method="som", score=0.9)
d = r.to_dict()
assert d["found"] is True
assert d["x_pct"] == 0.5
assert d["method"] == "som"
def test_start_button_uses_full_screen_instead_of_active_window(self):
"""Le bouton Démarrer doit être résolu sur l'écran entier."""
engine, executor = self._make_engine()
executor._server_resolve_target.return_value = {
"resolved": True,
"x_pct": 0.02,
"y_pct": 0.98,
"method": "som_text",
"score": 0.9,
"matched_element": {"label": "Démarrer"},
}
engine._capture_window_or_screen = MagicMock(return_value="fake_b64_data")
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
return_value={"rect": [100, 100, 1100, 900]},
):
result = engine.locate(
"http://server",
{"by_text": "Démarrer", "by_role": "start_button"},
0.02, 0.98, 1920, 1080,
)
assert result.found is True
engine._capture_window_or_screen.assert_called_once_with(None)
def test_regular_targets_stay_scoped_to_active_window(self):
"""Les cibles applicatives ordinaires restent bornées à la fenêtre active."""
engine, executor = self._make_engine()
executor._server_resolve_target.return_value = {
"resolved": True,
"x_pct": 0.5,
"y_pct": 0.25,
"method": "som_text",
"score": 0.9,
"matched_element": {"label": "Enregistrer"},
}
engine._capture_window_or_screen = MagicMock(return_value="fake_b64_data")
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
return_value={"rect": [100, 200, 1100, 1000]},
):
result = engine.locate(
"http://server",
{"by_text": "Enregistrer", "by_role": "button"},
0.5, 0.3, 1920, 1080,
)
assert result.found is True
engine._capture_window_or_screen.assert_called_once_with(
{"left": 100, "top": 200, "width": 1000, "height": 800}
)
def test_unknown_window_rect_falls_back_to_full_screen_on_visual_mismatch(self):
"""Un titre inconnu n'est accepté que si le crop est validé visuellement."""
engine, executor = self._make_engine()
executor._server_resolve_target.return_value = {
"resolved": True,
"x_pct": 0.5,
"y_pct": 0.25,
"method": "som_text",
"score": 0.9,
"matched_element": {"label": "Enregistrer"},
}
executor._find_text_on_screen.return_value = None
engine._capture_window_or_screen = MagicMock(
side_effect=["fake_window_b64", "fake_screen_b64"]
)
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
return_value={
"title": "unknown_window",
"rect": [100, 200, 1100, 1000],
},
):
result = engine.locate(
"http://server",
{"by_text": "Enregistrer", "by_role": "button"},
0.5, 0.3, 1920, 1080,
)
assert result.found is True
assert [c.args[0] for c in engine._capture_window_or_screen.call_args_list] == [
{"left": 100, "top": 200, "width": 1000, "height": 800},
None,
]
def test_taskbar_like_rect_falls_back_to_full_screen(self):
"""Une taskbar/systray ne doit jamais être utilisée comme fenêtre active."""
engine, executor = self._make_engine()
executor._server_resolve_target.return_value = {
"resolved": True,
"x_pct": 0.5,
"y_pct": 0.25,
"method": "som_text",
"score": 0.9,
"matched_element": {"label": "Enregistrer"},
}
engine._capture_window_or_screen = MagicMock(return_value="fake_b64_data")
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
return_value={
"title": "Fenêtre de dépassement de capacité de la barre d'état système",
"rect": [0, 1492, 2560, 1600],
},
):
result = engine.locate(
"http://server",
{"by_text": "Enregistrer", "by_role": "button"},
0.5, 0.3, 2560, 1600,
)
assert result.found is True
engine._capture_window_or_screen.assert_called_once_with(None)
def test_visually_mismatched_window_crop_falls_back_to_full_screen(self):
"""Un crop fenêtre plausible mais visuellement faux est rejeté."""
engine, executor = self._make_engine()
executor._server_resolve_target.return_value = {
"resolved": True,
"x_pct": 0.5,
"y_pct": 0.25,
"method": "som_text",
"score": 0.9,
"matched_element": {"label": "Enregistrer"},
}
executor._find_text_on_screen.return_value = None
engine._capture_window_or_screen = MagicMock(
side_effect=["fake_window_b64", "fake_screen_b64"]
)
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
return_value={
"title": "Enregistrer sous",
"rect": [100, 200, 1100, 1000],
},
):
result = engine.locate(
"http://server",
{
"by_text": "Enregistrer",
"by_role": "button",
"window_title": "Enregistrer sous",
},
0.5, 0.3, 1920, 1080,
)
assert result.found is True
assert [c.args[0] for c in engine._capture_window_or_screen.call_args_list] == [
{"left": 100, "top": 200, "width": 1000, "height": 800},
None,
]
executor._server_resolve_target.assert_called_once_with(
"http://server",
"fake_screen_b64",
{
"by_text": "Enregistrer",
"by_role": "button",
"window_title": "Enregistrer sous",
},
0.5,
0.3,
1920,
1080,
)
def test_visually_validated_window_crop_stays_scoped(self):
"""Un crop fenêtre plausible et validé visuellement reste autorisé."""
engine, executor = self._make_engine()
executor._server_resolve_target.return_value = {
"resolved": True,
"x_pct": 0.5,
"y_pct": 0.25,
"method": "som_text",
"score": 0.9,
"matched_element": {"label": "Enregistrer"},
}
executor._find_text_on_screen.return_value = (321, 222)
engine._capture_window_or_screen = MagicMock(return_value="fake_window_b64")
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
return_value={
"title": "Enregistrer sous",
"rect": [100, 200, 1100, 1000],
},
):
result = engine.locate(
"http://server",
{
"by_text": "Enregistrer",
"by_role": "button",
"window_title": "Enregistrer sous",
},
0.5, 0.3, 1920, 1080,
)
assert result.found is True
engine._capture_window_or_screen.assert_called_once_with(
{"left": 100, "top": 200, "width": 1000, "height": 800}
)
def test_lea_active_window_does_not_scope_external_target(self):
"""Une fenêtre Léa au premier plan ne doit jamais contraindre une cible externe."""
engine, executor = self._make_engine()
executor._server_resolve_target.return_value = {
"resolved": True,
"x_pct": 0.5,
"y_pct": 0.25,
"method": "som_text",
"score": 0.9,
"matched_element": {"label": "Bloc-notes"},
}
engine._capture_window_or_screen = MagicMock(return_value="fake_b64_data")
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
return_value={
"title": "Léa — Assistante",
"app_name": "pythonw.exe",
"rect": [1948, 750, 2570, 1606],
},
):
result = engine.locate(
"http://server",
{"by_text": "Bloc-notes", "by_role": "search_result"},
0.2, 0.5, 2560, 1600,
)
assert result.found is True
engine._capture_window_or_screen.assert_called_once_with(None)
executor._server_resolve_target.assert_called_once_with(
"http://server",
"fake_b64_data",
{"by_text": "Bloc-notes", "by_role": "search_result"},
0.2,
0.5,
2560,
1600,
)
def test_lea_active_window_stays_scoped_for_explicit_lea_target(self):
"""Si la cible mentionne explicitement Léa, le scope sur sa fenêtre reste autorisé."""
engine, executor = self._make_engine()
executor._server_resolve_target.return_value = {
"resolved": True,
"x_pct": 0.5,
"y_pct": 0.25,
"method": "som_text",
"score": 0.9,
"matched_element": {"label": "Continuer"},
}
engine._capture_window_or_screen = MagicMock(return_value="fake_b64_data")
with patch(
"agent_v0.agent_v1.window_info_crossplatform.get_active_window_rect",
return_value={
"title": "Léa — Assistante",
"app_name": "pythonw.exe",
"rect": [1948, 750, 2570, 1606],
},
):
result = engine.locate(
"http://server",
{
"by_text": "Continuer",
"by_role": "button",
"window_title": "Léa — Assistante",
},
0.5, 0.3, 3000, 2000,
)
assert result.found is True
engine._capture_window_or_screen.assert_called_once_with(
{"left": 1948, "top": 750, "width": 622, "height": 856}
)
def test_allow_position_fallback_returns_recorded_coords(self):
"""Quand autorisé, le grounding peut retomber sur la position enregistrée."""
engine, executor = self._make_engine()
executor._server_resolve_target.return_value = None
executor._template_match_anchor.return_value = None
executor._hybrid_vlm_resolve.return_value = None
result = engine.locate(
"http://server",
{
"by_role": "start_button",
"vlm_description": "icône Windows",
"screen_scope": "full_screen",
"allow_position_fallback": True,
},
0.387891, 0.974375, 1920, 1080,
)
assert result.found is True
assert result.method == "position_fallback"
assert result.x_pct == pytest.approx(0.387891)
assert result.y_pct == pytest.approx(0.974375)
# =========================================================================
# P2 : Policy — décisions quand grounding échoue
# =========================================================================
class TestPolicyEngine:
def _make_engine(self):
from agent_v0.agent_v1.core.policy import PolicyEngine
executor = MagicMock()
executor._system_dialog_pause = None
return PolicyEngine(executor), executor
def test_premier_essai_popup_fermee_retry(self):
"""Premier échec + popup fermée → RETRY."""
from agent_v0.agent_v1.core.policy import Decision
engine, executor = self._make_engine()
executor._handle_popup_vlm.return_value = True # Popup fermée
decision = engine.decide(
action={"type": "click"},
target_spec={"by_text": "OK"},
retry_count=0,
)
assert decision.decision == Decision.RETRY
assert "popup" in decision.reason.lower()
def test_premier_essai_pas_de_popup_retry(self):
"""Premier échec + pas de popup → RETRY quand même (max_retries > 0)."""
from agent_v0.agent_v1.core.policy import Decision
engine, executor = self._make_engine()
executor._handle_popup_vlm.return_value = False
decision = engine.decide(
action={"type": "click"},
target_spec={"by_text": "OK"},
retry_count=0,
max_retries=2,
)
assert decision.decision == Decision.RETRY
def test_max_retries_acteur_passer_skip(self):
"""Max retries atteint + acteur dit PASSER → SKIP."""
from agent_v0.agent_v1.core.policy import Decision
engine, executor = self._make_engine()
executor._actor_decide.return_value = "PASSER"
decision = engine.decide(
action={"type": "click"},
target_spec={"by_text": "Onglet"},
retry_count=1,
max_retries=1,
)
assert decision.decision == Decision.SKIP
def test_max_retries_acteur_stopper_abort(self):
"""Max retries atteint + acteur dit STOPPER → ABORT."""
from agent_v0.agent_v1.core.policy import Decision
engine, executor = self._make_engine()
executor._actor_decide.return_value = "STOPPER"
decision = engine.decide(
action={"type": "click"},
target_spec={"by_text": "X"},
retry_count=1,
max_retries=1,
)
assert decision.decision == Decision.ABORT
def test_max_retries_acteur_executer_supervise(self):
"""Max retries + acteur dit EXECUTER → SUPERVISE (rendre la main)."""
from agent_v0.agent_v1.core.policy import Decision
engine, executor = self._make_engine()
executor._actor_decide.return_value = "EXECUTER"
decision = engine.decide(
action={"type": "click"},
target_spec={"by_text": "X"},
retry_count=1,
max_retries=1,
)
assert decision.decision == Decision.SUPERVISE
def test_policy_decision_to_dict(self):
"""PolicyDecision se sérialise correctement."""
from agent_v0.agent_v1.core.policy import PolicyDecision, Decision
d = PolicyDecision(decision=Decision.SKIP, reason="État atteint").to_dict()
assert d["decision"] == "skip"
assert d["reason"] == "État atteint"
# =========================================================================
# P3 : Recovery — rollback après échec
# =========================================================================
class TestRecoveryEngine:
def _make_engine(self):
from agent_v0.agent_v1.core.recovery import RecoveryEngine
executor = MagicMock()
executor.keyboard = MagicMock()
executor.sct = MagicMock()
executor.sct.monitors = [{}, {"width": 1920, "height": 1080}]
executor._click = MagicMock()
return RecoveryEngine(executor), executor
def test_popup_detectee_escape(self):
"""Critic dit "popup" → Recovery fait Escape."""
from agent_v0.agent_v1.core.recovery import RecoveryAction
engine, executor = self._make_engine()
result = engine.attempt(
failed_action={"type": "click"},
critic_detail="Une popup d'erreur est apparue",
)
assert result.action_taken == RecoveryAction.ESCAPE
assert result.success is True
# Vérifie que Escape a été pressé
executor.keyboard.press.assert_called()
def test_frappe_incorrecte_undo(self):
"""Frappe incorrecte → Recovery fait Ctrl+Z."""
from agent_v0.agent_v1.core.recovery import RecoveryAction
engine, executor = self._make_engine()
result = engine.attempt(
failed_action={"type": "type"},
critic_detail="Le texte a été tapé au mauvais endroit",
)
assert result.action_taken == RecoveryAction.UNDO
assert result.success is True
def test_mauvaise_fenetre_close(self):
"""Mauvaise fenêtre → Recovery fait Alt+F4."""
from agent_v0.agent_v1.core.recovery import RecoveryAction
engine, executor = self._make_engine()
result = engine.attempt(
failed_action={"type": "click"},
critic_detail="Mauvaise fenêtre ouverte au lieu du bloc-notes",
)
assert result.action_taken == RecoveryAction.CLOSE_WINDOW
assert result.success is True
def test_menu_ouvert_escape(self):
"""Menu déroulant ouvert → Recovery fait Escape."""
from agent_v0.agent_v1.core.recovery import RecoveryAction
engine, executor = self._make_engine()
result = engine.attempt(
failed_action={"type": "click"},
critic_detail="Un menu déroulant s'est ouvert",
)
assert result.action_taken == RecoveryAction.ESCAPE
assert result.success is True
def test_aucune_strategie_applicable(self):
"""Pas de pattern reconnu → NONE."""
from agent_v0.agent_v1.core.recovery import RecoveryAction
engine, executor = self._make_engine()
result = engine.attempt(
failed_action={"type": "wait"},
critic_detail="Quelque chose d'inattendu",
)
assert result.action_taken == RecoveryAction.NONE
assert result.success is False
def test_recovery_result_to_dict(self):
"""RecoveryResult se sérialise correctement."""
from agent_v0.agent_v1.core.recovery import RecoveryResult, RecoveryAction
d = RecoveryResult(
action_taken=RecoveryAction.UNDO, success=True, detail="Ctrl+Z"
).to_dict()
assert d["action_taken"] == "undo"
assert d["success"] is True
# =========================================================================
# P4 : Learning — apprentissage runtime
# =========================================================================
class TestReplayLearner:
@pytest.fixture
def learner(self):
tmpdir = tempfile.mkdtemp(prefix="test_learning_")
from agent_v0.server_v1.replay_learner import ReplayLearner
l = ReplayLearner(learning_dir=tmpdir)
yield l
shutil.rmtree(tmpdir, ignore_errors=True)
def test_record_et_load_session(self, learner):
"""Enregistrer un résultat et le relire depuis le fichier."""
from agent_v0.server_v1.replay_learner import ActionOutcome
outcome = ActionOutcome(
session_id="test_session",
action_id="act_001",
action_type="click",
target_description="Bouton Enregistrer",
resolution_method="som_text",
resolution_score=0.95,
success=True,
)
learner.record(outcome)
# Relire
loaded = learner.load_session("test_session")
assert len(loaded) == 1
assert loaded[0].action_id == "act_001"
assert loaded[0].success is True
assert loaded[0].resolution_method == "som_text"
def test_record_from_replay_result(self, learner):
"""Convertir le format replay en ActionOutcome."""
learner.record_from_replay_result(
session_id="s1",
action={"action_id": "a1", "type": "click", "target_spec": {"by_text": "OK", "window_title": "App"}},
result={"success": True, "resolution_method": "template", "resolution_score": 0.9},
verification={"verified": True, "semantic_verified": True, "semantic_detail": "OK"},
)
loaded = learner.load_session("s1")
assert len(loaded) == 1
assert loaded[0].target_description == "OK"
assert loaded[0].semantic_verified is True
def test_query_similar(self, learner):
"""Requêter des résultats similaires par description."""
from agent_v0.server_v1.replay_learner import ActionOutcome
# Enregistrer plusieurs résultats
for i, (desc, method, success) in enumerate([
("Bouton Enregistrer", "som_text", True),
("Bouton Annuler", "template", True),
("Bouton Enregistrer", "vlm_direct", False),
("Menu Fichier", "som_text", True),
]):
learner.record(ActionOutcome(
session_id="s1", action_id=f"a{i}",
action_type="click", target_description=desc,
resolution_method=method, success=success,
))
# Chercher "Enregistrer"
results = learner.query_similar(target_description="Enregistrer")
assert len(results) == 2
# Les deux résultats concernent "Enregistrer"
for r in results:
assert "enregistrer" in r["outcome"]["target_description"].lower()
def test_get_stats(self, learner):
"""Les statistiques globales sont correctes."""
from agent_v0.server_v1.replay_learner import ActionOutcome
for success, method in [(True, "som"), (True, "som"), (False, "template"), (True, "vlm")]:
learner.record(ActionOutcome(
session_id="s1", action_id="a",
action_type="click", success=success,
resolution_method=method,
))
stats = learner.get_stats()
assert stats["total"] == 4
assert stats["success_rate"] == 0.75
assert stats["methods"]["som"]["success_rate"] == 1.0
assert stats["methods"]["template"]["success_rate"] == 0.0
def test_gemma4_indisponible_pas_de_crash(self, learner):
"""Le learning fonctionne même sans VLM."""
from agent_v0.server_v1.replay_learner import ActionOutcome
# Pas de crash, juste un record simple
learner.record(ActionOutcome(
session_id="s1", action_id="a1", action_type="click",
success=False, error="target_not_found",
))
stats = learner.get_stats()
assert stats["total"] == 1
assert stats["success_rate"] == 0.0
def test_fichier_jsonl_format(self, learner):
"""Le fichier JSONL contient du JSON valide ligne par ligne."""
from agent_v0.server_v1.replay_learner import ActionOutcome
learner.record(ActionOutcome(
session_id="s1", action_id="a1", action_type="click", success=True,
))
learner.record(ActionOutcome(
session_id="s1", action_id="a2", action_type="type", success=False,
))
jsonl_file = learner.learning_dir / "s1.jsonl"
assert jsonl_file.is_file()
with open(jsonl_file) as f:
lines = f.readlines()
assert len(lines) == 2
for line in lines:
data = json.loads(line) # Doit être du JSON valide
assert "action_id" in data
assert "success" in data
def test_record_human_correction_persists_to_memory_helper(self, learner, monkeypatch):
"""Une correction humaine doit alimenter la mémoire persistante via replay_memory."""
captured = {}
def fake_memory_record_success(**kwargs):
captured.update(kwargs)
return True
monkeypatch.setattr(
"agent_v0.server_v1.replay_memory.memory_record_success",
fake_memory_record_success,
)
learner.record_human_correction(
session_id="s_corr",
action={
"action_id": "a_corr",
"target_spec": {"by_text": "Valider", "window_title": "Bloc-notes"},
},
correction={"x_pct": 0.42, "y_pct": 0.84},
)
loaded = learner.load_session("s_corr")
assert len(loaded) == 1
assert loaded[0].resolution_method == "human_supervised"
assert loaded[0].window_title == "Bloc-notes"
assert captured["window_title"] == "Bloc-notes"
assert captured["target_spec"]["by_text"] == "Valider"
assert captured["x_pct"] == 0.42
assert captured["y_pct"] == 0.84
assert captured["method"] == "human_supervised"
assert captured["confidence"] == 1.0
def test_record_human_correction_fallback_window_title_from_action(self, learner, monkeypatch):
"""Si target_spec.window_title est absent, on retombe sur action.window_title."""
captured = {}
def fake_memory_record_success(**kwargs):
captured.update(kwargs)
return True
monkeypatch.setattr(
"agent_v0.server_v1.replay_memory.memory_record_success",
fake_memory_record_success,
)
learner.record_human_correction(
session_id="s_corr2",
action={
"action_id": "a_corr2",
"window_title": "Fenêtre fallback",
"target_spec": {"by_text": "Enregistrer"},
},
correction={"x_pct": 0.1, "y_pct": 0.2},
)
assert captured["window_title"] == "Fenêtre fallback"
def test_record_human_correction_uses_last_click_contract(self, learner, monkeypatch):
"""Le contrat agent envoie les coordonnées dans correction.last_click."""
captured = {}
def fake_memory_record_success(**kwargs):
captured.update(kwargs)
return True
monkeypatch.setattr(
"agent_v0.server_v1.replay_memory.memory_record_success",
fake_memory_record_success,
)
learner.record_human_correction(
session_id="s_corr_last_click",
action={
"action_id": "a_corr_last_click",
"target_spec": {"by_text": "Enregistrer", "window_title": "Bloc-notes"},
},
correction={
"actions": [{"type": "click", "x_pct": 0.33, "y_pct": 0.66}],
"last_click": {"type": "click", "x_pct": 0.33, "y_pct": 0.66},
},
)
assert captured["x_pct"] == 0.33
assert captured["y_pct"] == 0.66
assert len(learner.load_session("s_corr_last_click")) == 1
def test_record_human_correction_rejects_out_of_bounds_coords(self, learner, monkeypatch):
"""Une correction hors écran ne doit pas alimenter la mémoire."""
captured = {}
def fake_memory_record_success(**kwargs):
captured.update(kwargs)
return True
monkeypatch.setattr(
"agent_v0.server_v1.replay_memory.memory_record_success",
fake_memory_record_success,
)
learner.record_human_correction(
session_id="s_corr_oob",
action={
"action_id": "a_corr_oob",
"target_spec": {"by_text": "Enregistrer", "window_title": "Bloc-notes"},
},
correction={
"actions": [{"type": "click", "x_pct": 1.748, "y_pct": 0.135}],
"last_click": {"type": "click", "x_pct": 1.748, "y_pct": 0.135},
},
)
assert captured == {}
assert learner.load_session("s_corr_oob") == []
# =========================================================================
# Boucle d'apprentissage : consolidation cross-workflow
# =========================================================================
class TestLearningLoop:
"""Tests de la boucle d'apprentissage : les replays passés améliorent les suivants."""
@pytest.fixture
def learner(self):
tmpdir = tempfile.mkdtemp(prefix="test_learning_loop_")
from agent_v0.server_v1.replay_learner import ReplayLearner
l = ReplayLearner(learning_dir=tmpdir)
yield l
shutil.rmtree(tmpdir, ignore_errors=True)
def test_best_strategy_apprend_du_succes(self, learner):
"""La meilleure stratégie est celle qui a le plus de succès."""
from agent_v0.server_v1.replay_learner import ActionOutcome
# template échoue 3 fois sur "Enregistrer"
for i in range(3):
learner.record(ActionOutcome(
session_id=f"s{i}", action_id=f"a{i}", action_type="click",
target_description="Enregistrer", resolution_method="anchor_template",
success=False,
))
# som_text réussit 2 fois sur "Enregistrer"
for i in range(2):
learner.record(ActionOutcome(
session_id=f"s{10+i}", action_id=f"a{10+i}", action_type="click",
target_description="Enregistrer", resolution_method="som_text_match",
success=True,
))
best = learner.best_strategy_for("Enregistrer")
assert best == "som_text_match"
def test_best_strategy_minimum_2_essais(self, learner):
"""Il faut au moins 2 essais pour qu'une stratégie soit recommandée."""
from agent_v0.server_v1.replay_learner import ActionOutcome
# Un seul succès → pas assez pour recommander
learner.record(ActionOutcome(
session_id="s1", action_id="a1", action_type="click",
target_description="OK", resolution_method="vlm_direct",
success=True,
))
best = learner.best_strategy_for("OK")
assert best is None
def test_best_strategy_rien_si_historique_vide(self, learner):
"""Pas d'historique → pas de recommandation."""
best = learner.best_strategy_for("Inexistant")
assert best is None
def test_consolidate_workflow_enrichit_les_actions(self, learner):
"""La consolidation injecte _learned_strategy dans les target_spec."""
from agent_v0.server_v1.replay_learner import ActionOutcome
# Historique : som_text_match marche pour "Fichier"
for i in range(3):
learner.record(ActionOutcome(
session_id=f"s{i}", action_id=f"a{i}", action_type="click",
target_description="Fichier", resolution_method="som_text_match",
success=True,
))
# Workflow avec une action "Fichier"
actions = [
{"type": "click", "target_spec": {"by_text": "Fichier", "window_title": "Bloc-notes"}},
{"type": "type", "text": "bonjour"},
{"type": "click", "target_spec": {"by_text": "Inconnu"}},
]
enriched = learner.consolidate_workflow(actions)
assert enriched == 1 # Seul "Fichier" a un historique
assert actions[0]["target_spec"]["_learned_strategy"] == "som_text_match"
assert "_learned_strategy" not in actions[2].get("target_spec", {})
def test_consolidation_cross_workflow(self, learner):
"""Un succès dans le workflow A améliore le workflow B."""
from agent_v0.server_v1.replay_learner import ActionOutcome
# Workflow A : "Enregistrer" réussit avec grounding_vlm
for i in range(3):
learner.record(ActionOutcome(
session_id="workflow_A", action_id=f"a{i}", action_type="click",
target_description="Enregistrer",
window_title="Bloc-notes",
resolution_method="grounding_vlm", success=True,
))
# Workflow B : contient aussi "Enregistrer"
workflow_b = [
{"type": "click", "target_spec": {"by_text": "Enregistrer", "window_title": "Bloc-notes"}},
]
enriched = learner.consolidate_workflow(workflow_b, "workflow_B")
assert enriched == 1
assert workflow_b[0]["target_spec"]["_learned_strategy"] == "grounding_vlm"
def test_grounding_reordonne_strategies(self):
"""Le GroundingEngine réordonne ses stratégies selon _learned_strategy."""
from agent_v0.agent_v1.core.grounding import GroundingEngine
executor = MagicMock()
executor._capture_screenshot_b64.return_value = "fake"
# Simuler que template marche
executor._server_resolve_target.return_value = None
executor._template_match_anchor.return_value = {
"resolved": True, "x_pct": 0.5, "y_pct": 0.5, "score": 0.9,
}
executor._hybrid_vlm_resolve.return_value = None
engine = GroundingEngine(executor)
# Avec _learned_strategy = anchor_template → template en premier
result = engine.locate(
"http://server",
{"by_text": "OK", "anchor_image_base64": "abc", "_learned_strategy": "anchor_template"},
0.5, 0.3, 1920, 1080,
)
assert result.found is True
assert result.method == "anchor_template"
# Le serveur n'a PAS été appelé (template était en premier)
executor._server_resolve_target.assert_not_called()