Refonte majeure du système Agent Chat et ajout de nombreux modules : - Chat unifié : suppression du dual Workflows/Agent Libre, tout passe par /api/chat avec résolution en 3 niveaux (workflow → geste → "montre-moi") - GestureCatalog : 38 raccourcis clavier universels Windows avec matching sémantique, substitution automatique dans les replays, et endpoint /api/gestures - Mode Copilot : exécution pas-à-pas des workflows avec validation humaine via WebSocket (approve/skip/abort) avant chaque action - Léa UI (agent_v0/lea_ui/) : interface PyQt5 pour Windows avec overlay transparent pour feedback visuel pendant le replay - Data Extraction (core/extraction/) : moteur d'extraction visuelle de données (OCR + VLM → SQLite), avec schémas YAML et export CSV/Excel - ReplayVerifier (agent_v0/server_v1/) : vérification post-action par comparaison de screenshots, avec logique de retry (max 3) - IntentParser durci : meilleur fallback regex, type GREETING, patterns améliorés - Dashboard : nouvelles pages gestures, streaming, extractions - Tests : 63 tests GestureCatalog, 47 tests extraction, corrections tests existants - Dépréciation : /api/agent/plan et /api/agent/execute retournent HTTP 410, suppression du code hardcodé _plan_to_replay_actions Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
548 lines
20 KiB
Python
548 lines
20 KiB
Python
"""
|
|
Tests unitaires pour ErrorHandler
|
|
|
|
Teste toutes les fonctionnalités de gestion d'erreurs :
|
|
- Gestion des échecs de matching
|
|
- Gestion des targets introuvables
|
|
- Gestion des violations de post-conditions
|
|
- Détection de changements UI
|
|
- Système de rollback
|
|
- Logging et statistiques
|
|
|
|
Note: Les legacy methods (handle_matching_failure, handle_target_not_found,
|
|
handle_postcondition_failure) délèguent maintenant à handle_error() qui utilise
|
|
RecoveryStrategyFactory. Les résultats dépendent des stratégies disponibles.
|
|
"""
|
|
|
|
import pytest
|
|
import numpy as np
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from unittest.mock import Mock, patch, MagicMock
|
|
import tempfile
|
|
import shutil
|
|
|
|
from core.execution.error_handler import (
|
|
ErrorHandler,
|
|
ErrorType,
|
|
RecoveryStrategy,
|
|
ErrorContext,
|
|
RecoveryResult
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def temp_error_dir():
|
|
"""Créer un répertoire temporaire pour les logs d'erreurs."""
|
|
temp_dir = tempfile.mkdtemp()
|
|
yield temp_dir
|
|
shutil.rmtree(temp_dir)
|
|
|
|
|
|
@pytest.fixture
|
|
def error_handler(temp_error_dir):
|
|
"""Créer une instance de ErrorHandler pour les tests."""
|
|
return ErrorHandler(
|
|
error_log_dir=temp_error_dir,
|
|
max_retry_attempts=3,
|
|
ui_change_threshold=0.70,
|
|
enable_auto_recovery=True
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_screen_state():
|
|
"""Créer un ScreenState mock pour les tests."""
|
|
# Créer un mock simple au lieu d'utiliser les vraies classes
|
|
mock_state = Mock()
|
|
mock_state.raw_level = Mock()
|
|
mock_state.raw_level.screenshot_path = Path("/tmp/test_screenshot.png")
|
|
mock_state.raw_level.window_title = "Test Window"
|
|
|
|
mock_state.perception_level = Mock()
|
|
mock_state.perception_level.ui_elements = [
|
|
Mock(
|
|
element_id="elem_1",
|
|
role="button",
|
|
text="Click Me",
|
|
bbox=(100, 100, 200, 150)
|
|
)
|
|
]
|
|
|
|
return mock_state
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_workflow_node():
|
|
"""Créer un WorkflowNode mock pour les tests."""
|
|
mock_node = Mock()
|
|
mock_node.node_id = "node_1"
|
|
mock_node.label = "Test Node"
|
|
return mock_node
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_workflow_edge():
|
|
"""Créer un WorkflowEdge mock pour les tests."""
|
|
mock_action = Mock()
|
|
mock_action.type = Mock()
|
|
mock_action.type.value = "mouse_click"
|
|
mock_action.target = Mock(role="button", text_pattern="Click Me")
|
|
|
|
mock_edge = Mock()
|
|
mock_edge.from_node = "node_1"
|
|
mock_edge.to_node = "node_2"
|
|
mock_edge.action = mock_action
|
|
|
|
return mock_edge
|
|
|
|
|
|
class TestErrorHandlerInitialization:
|
|
"""Tests d'initialisation de ErrorHandler."""
|
|
|
|
def test_initialization_default_params(self, temp_error_dir):
|
|
"""Test initialisation avec paramètres par défaut."""
|
|
handler = ErrorHandler(error_log_dir=temp_error_dir)
|
|
|
|
assert handler.max_retry_attempts == 3
|
|
assert handler.ui_change_threshold == 0.70
|
|
assert handler.enable_auto_recovery is True
|
|
assert len(handler.error_history) == 0
|
|
assert len(handler.edge_failure_counts) == 0
|
|
assert len(handler.problematic_edges) == 0
|
|
assert len(handler.action_history) == 0
|
|
|
|
def test_initialization_custom_params(self, temp_error_dir):
|
|
"""Test initialisation avec paramètres personnalisés."""
|
|
handler = ErrorHandler(
|
|
error_log_dir=temp_error_dir,
|
|
max_retry_attempts=5,
|
|
ui_change_threshold=0.80,
|
|
enable_auto_recovery=False
|
|
)
|
|
|
|
assert handler.max_retry_attempts == 5
|
|
assert handler.ui_change_threshold == 0.80
|
|
assert handler.enable_auto_recovery is False
|
|
|
|
def test_error_log_directory_created(self, temp_error_dir):
|
|
"""Test que le répertoire de logs est créé."""
|
|
handler = ErrorHandler(error_log_dir=temp_error_dir)
|
|
assert Path(temp_error_dir).exists()
|
|
|
|
|
|
class TestMatchingFailureHandling:
|
|
"""Tests de gestion des échecs de matching.
|
|
|
|
Note: handle_matching_failure délègue maintenant à handle_error() via
|
|
RecoveryStrategyFactory. L'exception MatchingFailedException interne
|
|
n'est pas mappée par les stratégies, donc handle_error retourne ABORT.
|
|
"""
|
|
|
|
@patch('core.execution.error_handler.ErrorHandler._log_error_with_correlation', return_value='test_id')
|
|
def test_handle_matching_failure_very_low_confidence(
|
|
self, mock_log, error_handler, mock_screen_state
|
|
):
|
|
"""Test gestion d'échec avec confiance très faible (<0.70)."""
|
|
candidate_nodes = [Mock(node_id="node_1", label="Node 1")]
|
|
|
|
result = error_handler.handle_matching_failure(
|
|
screen_state=mock_screen_state,
|
|
candidate_nodes=candidate_nodes,
|
|
best_confidence=0.50,
|
|
threshold=0.85
|
|
)
|
|
|
|
assert result.success is False
|
|
# Le handle_error centralisé retourne ABORT quand pas de stratégie
|
|
assert result.strategy_used in (RecoveryStrategy.ABORT, RecoveryStrategy.PAUSE)
|
|
assert len(error_handler.error_history) == 1
|
|
|
|
@patch('core.execution.error_handler.ErrorHandler._log_error_with_correlation', return_value='test_id')
|
|
def test_handle_matching_failure_close_to_threshold(
|
|
self, mock_log, error_handler, mock_screen_state
|
|
):
|
|
"""Test gestion d'échec avec confiance proche du seuil."""
|
|
candidate_nodes = [Mock(node_id="node_1", label="Node 1")]
|
|
|
|
result = error_handler.handle_matching_failure(
|
|
screen_state=mock_screen_state,
|
|
candidate_nodes=candidate_nodes,
|
|
best_confidence=0.82,
|
|
threshold=0.85
|
|
)
|
|
|
|
assert result.success is False
|
|
# Le handle_error centralisé peut retourner ABORT ou RETRY selon les stratégies
|
|
assert result.strategy_used in (RecoveryStrategy.ABORT, RecoveryStrategy.RETRY)
|
|
|
|
@patch('core.execution.error_handler.ErrorHandler._log_error_with_correlation', return_value='test_id')
|
|
def test_matching_failure_creates_error_log(
|
|
self, mock_log, error_handler, mock_screen_state, temp_error_dir
|
|
):
|
|
"""Test que l'échec de matching appelle le logging."""
|
|
candidate_nodes = [Mock(node_id="node_1", label="Node 1")]
|
|
|
|
error_handler.handle_matching_failure(
|
|
screen_state=mock_screen_state,
|
|
candidate_nodes=candidate_nodes,
|
|
best_confidence=0.50,
|
|
threshold=0.85
|
|
)
|
|
|
|
# Vérifier que le logging a été appelé
|
|
assert mock_log.called
|
|
|
|
|
|
class TestTargetNotFoundHandling:
|
|
"""Tests de gestion des targets introuvables.
|
|
|
|
Note: handle_target_not_found délègue à handle_error() via
|
|
RecoveryStrategyFactory. Le TargetNotFoundError est classifié comme
|
|
TARGET_NOT_FOUND et une stratégie de fallback spatial est tentée.
|
|
"""
|
|
|
|
@patch('core.execution.error_handler.ErrorHandler._log_error_with_correlation', return_value='test_id')
|
|
def test_handle_target_not_found_first_attempt(
|
|
self, mock_log, error_handler, mock_screen_state, mock_workflow_edge
|
|
):
|
|
"""Test gestion de target introuvable (première tentative)."""
|
|
result = error_handler.handle_target_not_found(
|
|
action=mock_workflow_edge.action,
|
|
screen_state=mock_screen_state,
|
|
edge=mock_workflow_edge
|
|
)
|
|
|
|
assert result.success is False
|
|
# L'erreur est bien enregistrée dans l'historique
|
|
assert len(error_handler.error_history) == 1
|
|
assert error_handler.error_history[0].error_type == ErrorType.TARGET_NOT_FOUND
|
|
|
|
@patch('core.execution.error_handler.ErrorHandler._log_error_with_correlation', return_value='test_id')
|
|
def test_handle_target_not_found_max_retries(
|
|
self, mock_log, error_handler, mock_screen_state, mock_workflow_edge
|
|
):
|
|
"""Test gestion après plusieurs tentatives."""
|
|
# Simuler plusieurs tentatives
|
|
for _ in range(error_handler.max_retry_attempts + 1):
|
|
result = error_handler.handle_target_not_found(
|
|
action=mock_workflow_edge.action,
|
|
screen_state=mock_screen_state,
|
|
edge=mock_workflow_edge
|
|
)
|
|
|
|
# Vérifier que toutes les erreurs ont été enregistrées
|
|
assert len(error_handler.error_history) == error_handler.max_retry_attempts + 1
|
|
assert result.success is False
|
|
|
|
@patch('core.execution.error_handler.ErrorHandler._log_error_with_correlation', return_value='test_id')
|
|
def test_edge_failure_count_incremented(
|
|
self, mock_log, error_handler, mock_screen_state, mock_workflow_edge
|
|
):
|
|
"""Test que les erreurs sont enregistrées dans l'historique."""
|
|
error_handler.handle_target_not_found(
|
|
action=mock_workflow_edge.action,
|
|
screen_state=mock_screen_state,
|
|
edge=mock_workflow_edge
|
|
)
|
|
|
|
# Vérifier que l'erreur est dans l'historique
|
|
assert len(error_handler.error_history) == 1
|
|
assert error_handler.error_history[0].error_type == ErrorType.TARGET_NOT_FOUND
|
|
|
|
@patch('core.execution.error_handler.ErrorHandler._log_error_with_correlation', return_value='test_id')
|
|
def test_edge_marked_problematic_after_multiple_failures(
|
|
self, mock_log, error_handler, mock_screen_state, mock_workflow_edge
|
|
):
|
|
"""Test qu'un edge accumule des erreurs après >3 échecs."""
|
|
# Simuler 4 échecs
|
|
for _ in range(4):
|
|
error_handler.handle_target_not_found(
|
|
action=mock_workflow_edge.action,
|
|
screen_state=mock_screen_state,
|
|
edge=mock_workflow_edge
|
|
)
|
|
|
|
# Vérifier que 4 erreurs sont enregistrées
|
|
assert len(error_handler.error_history) == 4
|
|
for error in error_handler.error_history:
|
|
assert error.error_type == ErrorType.TARGET_NOT_FOUND
|
|
|
|
|
|
class TestPostconditionFailureHandling:
|
|
"""Tests de gestion des violations de post-conditions.
|
|
|
|
Note: handle_postcondition_failure délègue à handle_error() via
|
|
RecoveryStrategyFactory.
|
|
"""
|
|
|
|
@patch('core.execution.error_handler.ErrorHandler._log_error_with_correlation', return_value='test_id')
|
|
def test_handle_postcondition_failure_first_attempt(
|
|
self, mock_log, error_handler, mock_screen_state, mock_workflow_edge, mock_workflow_node
|
|
):
|
|
"""Test gestion de violation de post-condition (première tentative)."""
|
|
result = error_handler.handle_postcondition_failure(
|
|
edge=mock_workflow_edge,
|
|
screen_state=mock_screen_state,
|
|
expected_node=mock_workflow_node,
|
|
timeout_ms=5000
|
|
)
|
|
|
|
assert result.success is False
|
|
assert len(error_handler.error_history) == 1
|
|
|
|
@patch('core.execution.error_handler.ErrorHandler._log_error_with_correlation', return_value='test_id')
|
|
def test_handle_postcondition_failure_max_retries(
|
|
self, mock_log, error_handler, mock_screen_state, mock_workflow_edge, mock_workflow_node
|
|
):
|
|
"""Test gestion après max retries atteint."""
|
|
# Simuler plusieurs tentatives
|
|
for _ in range(error_handler.max_retry_attempts + 1):
|
|
result = error_handler.handle_postcondition_failure(
|
|
edge=mock_workflow_edge,
|
|
screen_state=mock_screen_state,
|
|
expected_node=mock_workflow_node
|
|
)
|
|
|
|
assert result.success is False
|
|
assert len(error_handler.error_history) == error_handler.max_retry_attempts + 1
|
|
|
|
|
|
class TestUIChangeDetection:
|
|
"""Tests de détection de changements UI."""
|
|
|
|
@patch('core.execution.error_handler.ErrorHandler._log_error_with_correlation', return_value='test_id')
|
|
def test_detect_ui_change_below_threshold(
|
|
self, mock_log, error_handler, mock_screen_state, mock_workflow_node
|
|
):
|
|
"""Test détection de changement UI (similarité < seuil)."""
|
|
ui_changed, recovery = error_handler.detect_ui_change(
|
|
current_state=mock_screen_state,
|
|
expected_node=mock_workflow_node,
|
|
current_similarity=0.60
|
|
)
|
|
|
|
assert ui_changed is True
|
|
assert recovery is not None
|
|
assert recovery.strategy_used == RecoveryStrategy.PAUSE
|
|
assert len(error_handler.error_history) == 1
|
|
assert error_handler.error_history[0].error_type == ErrorType.UI_CHANGED
|
|
|
|
def test_detect_ui_change_above_threshold(
|
|
self, error_handler, mock_screen_state, mock_workflow_node
|
|
):
|
|
"""Test pas de changement UI (similarité >= seuil)."""
|
|
ui_changed, recovery = error_handler.detect_ui_change(
|
|
current_state=mock_screen_state,
|
|
expected_node=mock_workflow_node,
|
|
current_similarity=0.85
|
|
)
|
|
|
|
assert ui_changed is False
|
|
assert recovery is None
|
|
|
|
|
|
class TestRollbackSystem:
|
|
"""Tests du système de rollback."""
|
|
|
|
def test_record_action(self, error_handler, mock_screen_state, mock_workflow_edge):
|
|
"""Test enregistrement d'une action pour rollback."""
|
|
error_handler.record_action(
|
|
action=mock_workflow_edge.action,
|
|
state_before=mock_screen_state
|
|
)
|
|
|
|
assert len(error_handler.action_history) == 1
|
|
assert error_handler.action_history[0][0] == mock_workflow_edge.action
|
|
assert error_handler.action_history[0][1] == mock_screen_state
|
|
|
|
def test_action_history_limited_to_max(
|
|
self, error_handler, mock_screen_state, mock_workflow_edge
|
|
):
|
|
"""Test que l'historique est limité à max_action_history."""
|
|
# Ajouter plus d'actions que la limite
|
|
for i in range(error_handler.max_action_history + 5):
|
|
action = Mock()
|
|
action.type = Mock()
|
|
action.type.value = "mouse_click"
|
|
action.target = Mock(role="button", text_pattern=f"Button {i}")
|
|
error_handler.record_action(action, mock_screen_state)
|
|
|
|
assert len(error_handler.action_history) == error_handler.max_action_history
|
|
|
|
def test_rollback_last_action_success(
|
|
self, error_handler, mock_screen_state, mock_workflow_edge
|
|
):
|
|
"""Test rollback d'une action avec succès."""
|
|
error_handler.record_action(
|
|
action=mock_workflow_edge.action,
|
|
state_before=mock_screen_state
|
|
)
|
|
|
|
result = error_handler.rollback_last_action()
|
|
|
|
assert result.success is True
|
|
assert result.strategy_used == RecoveryStrategy.ROLLBACK
|
|
assert len(error_handler.action_history) == 0
|
|
|
|
def test_rollback_with_empty_history(self, error_handler):
|
|
"""Test rollback sans historique."""
|
|
result = error_handler.rollback_last_action()
|
|
|
|
assert result.success is False
|
|
assert "no action" in result.message.lower()
|
|
|
|
|
|
class TestStatisticsAndReporting:
|
|
"""Tests des statistiques et rapports."""
|
|
|
|
@patch('core.execution.error_handler.ErrorHandler._log_error_with_correlation', return_value='test_id')
|
|
def test_get_problematic_edges(
|
|
self, mock_log, error_handler, mock_screen_state, mock_workflow_edge
|
|
):
|
|
"""Test que les erreurs sont bien accumulées pour les edges.
|
|
|
|
Note: Avec le handle_error centralisé, edge_failure_counts n'est
|
|
incrémenté que dans _escalate_error (quand aucune stratégie n'est trouvée).
|
|
On vérifie plutôt que les erreurs sont accumulées dans l'historique.
|
|
"""
|
|
# Créer 4 échecs
|
|
for _ in range(4):
|
|
error_handler.handle_target_not_found(
|
|
action=mock_workflow_edge.action,
|
|
screen_state=mock_screen_state,
|
|
edge=mock_workflow_edge
|
|
)
|
|
|
|
# Vérifier que 4 erreurs sont dans l'historique
|
|
assert len(error_handler.error_history) == 4
|
|
|
|
stats = error_handler.get_error_statistics()
|
|
assert stats['total_errors'] == 4
|
|
|
|
@patch('core.execution.error_handler.ErrorHandler._log_error_with_correlation', return_value='test_id')
|
|
def test_get_error_statistics(
|
|
self, mock_log, error_handler, mock_screen_state, mock_workflow_edge
|
|
):
|
|
"""Test récupération des statistiques d'erreurs."""
|
|
# Créer différents types d'erreurs
|
|
error_handler.handle_target_not_found(
|
|
action=mock_workflow_edge.action,
|
|
screen_state=mock_screen_state,
|
|
edge=mock_workflow_edge
|
|
)
|
|
|
|
error_handler.handle_matching_failure(
|
|
screen_state=mock_screen_state,
|
|
candidate_nodes=[Mock()],
|
|
best_confidence=0.50,
|
|
threshold=0.85
|
|
)
|
|
|
|
stats = error_handler.get_error_statistics()
|
|
|
|
assert stats['total_errors'] == 2
|
|
assert 'error_counts' in stats
|
|
assert 'problematic_edges_count' in stats
|
|
assert 'problematic_edges' in stats
|
|
|
|
@patch('core.execution.error_handler.ErrorHandler._log_error_with_correlation', return_value='test_id')
|
|
def test_error_history_accumulation(
|
|
self, mock_log, error_handler, mock_screen_state, mock_workflow_edge
|
|
):
|
|
"""Test accumulation de l'historique d'erreurs."""
|
|
# Créer plusieurs erreurs
|
|
for i in range(5):
|
|
error_handler.handle_target_not_found(
|
|
action=mock_workflow_edge.action,
|
|
screen_state=mock_screen_state,
|
|
edge=mock_workflow_edge
|
|
)
|
|
|
|
assert len(error_handler.error_history) == 5
|
|
|
|
# Vérifier que toutes ont le bon type
|
|
for error in error_handler.error_history:
|
|
assert error.error_type == ErrorType.TARGET_NOT_FOUND
|
|
|
|
|
|
class TestErrorLogging:
|
|
"""Tests du système de logging d'erreurs."""
|
|
|
|
@patch('core.execution.error_handler.ErrorHandler._log_error_with_correlation', return_value='test_id')
|
|
def test_error_log_creates_directory(
|
|
self, mock_log, error_handler, mock_screen_state, temp_error_dir
|
|
):
|
|
"""Test que le logging est appelé lors d'un handle_matching_failure."""
|
|
error_handler.handle_matching_failure(
|
|
screen_state=mock_screen_state,
|
|
candidate_nodes=[Mock()],
|
|
best_confidence=0.50,
|
|
threshold=0.85
|
|
)
|
|
|
|
# Vérifier que _log_error_with_correlation a été appelé
|
|
assert mock_log.called
|
|
|
|
@patch('core.execution.error_handler.ErrorHandler._log_error_with_correlation', return_value='test_id')
|
|
def test_error_log_contains_report(
|
|
self, mock_log, error_handler, mock_screen_state, temp_error_dir
|
|
):
|
|
"""Test que le log est appelé avec un ErrorContext."""
|
|
error_handler.handle_matching_failure(
|
|
screen_state=mock_screen_state,
|
|
candidate_nodes=[Mock()],
|
|
best_confidence=0.50,
|
|
threshold=0.85
|
|
)
|
|
|
|
# Vérifier que _log_error_with_correlation a été appelé
|
|
assert mock_log.called
|
|
call_args = mock_log.call_args
|
|
assert call_args is not None
|
|
|
|
# Vérifier que le premier argument est un ErrorContext
|
|
error_ctx = call_args[0][0]
|
|
assert isinstance(error_ctx, ErrorContext)
|
|
assert error_ctx.message is not None
|
|
|
|
|
|
class TestSuggestionGeneration:
|
|
"""Tests de génération de suggestions."""
|
|
|
|
def test_suggestions_for_very_low_confidence(self, error_handler):
|
|
"""Test suggestions pour confiance très faible."""
|
|
suggestions = error_handler._generate_matching_suggestions(
|
|
best_confidence=0.50,
|
|
threshold=0.85,
|
|
candidate_nodes=[Mock()]
|
|
)
|
|
|
|
assert len(suggestions) > 0
|
|
assert any("CREATE_NEW_NODE" in s for s in suggestions)
|
|
|
|
def test_suggestions_for_close_confidence(self, error_handler):
|
|
"""Test suggestions pour confiance proche du seuil."""
|
|
suggestions = error_handler._generate_matching_suggestions(
|
|
best_confidence=0.82,
|
|
threshold=0.85,
|
|
candidate_nodes=[Mock()]
|
|
)
|
|
|
|
assert len(suggestions) > 0
|
|
assert any("UPDATE_NODE" in s or "ADJUST_THRESHOLD" in s for s in suggestions)
|
|
|
|
def test_suggestions_for_no_candidates(self, error_handler):
|
|
"""Test suggestions sans candidats."""
|
|
suggestions = error_handler._generate_matching_suggestions(
|
|
best_confidence=0.50,
|
|
threshold=0.85,
|
|
candidate_nodes=[]
|
|
)
|
|
|
|
assert any("NO_CANDIDATES" in s for s in suggestions)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
pytest.main([__file__, '-v'])
|