- Add CoachingDecision enum (ACCEPT, REJECT, CORRECT, EXECUTE_MANUAL, SKIP) - Add CoachingResponse dataclass for user decisions - Add WAITING_COACHING state to ExecutionState - Implement _request_coaching_decision() with callback or polling support - Implement submit_coaching_decision() for external API/UI submission - Implement _apply_coaching_correction() for applying user corrections - Implement _record_coaching_feedback() integrating with: - TrainingDataCollector for session recording - FeedbackProcessor for statistics - CorrectionPackIntegration for automatic correction capture - Add get_coaching_stats() for session statistics - Add 17 unit tests for COACHING functionality COACHING mode now: 1. Suggests actions to user 2. Waits for user decision (accept/reject/correct/manual/skip) 3. Applies corrections if provided 4. Records all feedback for learning 5. Propagates corrections to Correction Packs automatically Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
279 lines
9.2 KiB
Python
279 lines
9.2 KiB
Python
"""
|
|
Tests for COACHING mode in ExecutionLoop.
|
|
|
|
Tests the coaching decision flow, corrections, and integration
|
|
with Correction Packs.
|
|
"""
|
|
|
|
import pytest
|
|
from unittest.mock import MagicMock, patch
|
|
from datetime import datetime
|
|
|
|
from core.execution.execution_loop import (
|
|
ExecutionLoop,
|
|
ExecutionMode,
|
|
ExecutionState,
|
|
ExecutionContext,
|
|
CoachingDecision,
|
|
CoachingResponse,
|
|
StepResult
|
|
)
|
|
|
|
|
|
class TestCoachingDecision:
|
|
"""Tests for CoachingDecision enum."""
|
|
|
|
def test_all_decisions_exist(self):
|
|
"""Verify all expected decisions exist."""
|
|
assert CoachingDecision.ACCEPT.value == "accept"
|
|
assert CoachingDecision.REJECT.value == "reject"
|
|
assert CoachingDecision.CORRECT.value == "correct"
|
|
assert CoachingDecision.EXECUTE_MANUAL.value == "manual"
|
|
assert CoachingDecision.SKIP.value == "skip"
|
|
|
|
|
|
class TestCoachingResponse:
|
|
"""Tests for CoachingResponse dataclass."""
|
|
|
|
def test_basic_response(self):
|
|
"""Test creating basic response."""
|
|
response = CoachingResponse(decision=CoachingDecision.ACCEPT)
|
|
assert response.decision == CoachingDecision.ACCEPT
|
|
assert response.correction is None
|
|
assert response.feedback is None
|
|
assert response.executed_manually is False
|
|
|
|
def test_response_with_correction(self):
|
|
"""Test response with correction data."""
|
|
correction = {
|
|
'target': {'id': 'new_button'},
|
|
'params': {'timeout': 5}
|
|
}
|
|
response = CoachingResponse(
|
|
decision=CoachingDecision.CORRECT,
|
|
correction=correction,
|
|
feedback="Button moved to new location"
|
|
)
|
|
assert response.decision == CoachingDecision.CORRECT
|
|
assert response.correction == correction
|
|
assert response.feedback == "Button moved to new location"
|
|
|
|
def test_manual_execution_response(self):
|
|
"""Test response for manual execution."""
|
|
response = CoachingResponse(
|
|
decision=CoachingDecision.EXECUTE_MANUAL,
|
|
executed_manually=True
|
|
)
|
|
assert response.decision == CoachingDecision.EXECUTE_MANUAL
|
|
assert response.executed_manually is True
|
|
|
|
|
|
class TestExecutionLoopCoaching:
|
|
"""Tests for ExecutionLoop COACHING mode."""
|
|
|
|
@pytest.fixture
|
|
def mock_pipeline(self):
|
|
"""Create mock pipeline."""
|
|
pipeline = MagicMock()
|
|
pipeline.load_workflow.return_value = MagicMock(
|
|
workflow_id="test_wf",
|
|
learning_state="COACHING",
|
|
entry_nodes=["node_1"]
|
|
)
|
|
return pipeline
|
|
|
|
@pytest.fixture
|
|
def coaching_callback(self):
|
|
"""Create mock coaching callback."""
|
|
return MagicMock(return_value=CoachingResponse(
|
|
decision=CoachingDecision.ACCEPT
|
|
))
|
|
|
|
def test_init_with_coaching_callback(self, mock_pipeline, coaching_callback):
|
|
"""Test ExecutionLoop init with coaching callback."""
|
|
loop = ExecutionLoop(
|
|
pipeline=mock_pipeline,
|
|
coaching_callback=coaching_callback
|
|
)
|
|
assert loop.coaching_callback == coaching_callback
|
|
assert loop._coaching_stats['suggestions_made'] == 0
|
|
|
|
def test_coaching_stats_initial(self, mock_pipeline):
|
|
"""Test initial coaching stats."""
|
|
loop = ExecutionLoop(pipeline=mock_pipeline)
|
|
stats = loop.get_coaching_stats()
|
|
|
|
assert stats['suggestions_made'] == 0
|
|
assert stats['accepted'] == 0
|
|
assert stats['rejected'] == 0
|
|
assert stats['corrected'] == 0
|
|
assert stats['manual_executions'] == 0
|
|
assert stats['acceptance_rate'] == 0.0
|
|
assert stats['correction_rate'] == 0.0
|
|
|
|
def test_apply_coaching_correction_target(self, mock_pipeline):
|
|
"""Test applying target correction."""
|
|
loop = ExecutionLoop(pipeline=mock_pipeline)
|
|
|
|
original = {
|
|
'action': 'click',
|
|
'target': {'id': 'old_button'},
|
|
'params': {'timeout': 10}
|
|
}
|
|
correction = {
|
|
'target': {'id': 'new_button'}
|
|
}
|
|
|
|
corrected = loop._apply_coaching_correction(original, correction)
|
|
|
|
assert corrected['target']['id'] == 'new_button'
|
|
assert corrected['action'] == 'click'
|
|
assert corrected['params']['timeout'] == 10
|
|
|
|
def test_apply_coaching_correction_params(self, mock_pipeline):
|
|
"""Test applying params correction."""
|
|
loop = ExecutionLoop(pipeline=mock_pipeline)
|
|
|
|
original = {
|
|
'action': 'type',
|
|
'target': {'id': 'input'},
|
|
'params': {'value': 'old', 'timeout': 10}
|
|
}
|
|
correction = {
|
|
'params': {'value': 'new_value', 'delay': 0.1}
|
|
}
|
|
|
|
corrected = loop._apply_coaching_correction(original, correction)
|
|
|
|
assert corrected['params']['value'] == 'new_value'
|
|
assert corrected['params']['delay'] == 0.1
|
|
assert corrected['params']['timeout'] == 10
|
|
|
|
def test_apply_coaching_correction_action(self, mock_pipeline):
|
|
"""Test applying action type correction."""
|
|
loop = ExecutionLoop(pipeline=mock_pipeline)
|
|
|
|
original = {
|
|
'action': 'click',
|
|
'target': {'id': 'element'}
|
|
}
|
|
correction = {
|
|
'action': 'double_click'
|
|
}
|
|
|
|
corrected = loop._apply_coaching_correction(original, correction)
|
|
|
|
assert corrected['action'] == 'double_click'
|
|
|
|
def test_apply_coaching_correction_none(self, mock_pipeline):
|
|
"""Test that None correction returns original."""
|
|
loop = ExecutionLoop(pipeline=mock_pipeline)
|
|
|
|
original = {'action': 'click', 'target': {'id': 'btn'}}
|
|
corrected = loop._apply_coaching_correction(original, None)
|
|
|
|
assert corrected == original
|
|
|
|
def test_submit_coaching_decision_wrong_state(self, mock_pipeline):
|
|
"""Test submitting decision in wrong state fails."""
|
|
loop = ExecutionLoop(pipeline=mock_pipeline)
|
|
loop.state = ExecutionState.RUNNING
|
|
|
|
response = CoachingResponse(decision=CoachingDecision.ACCEPT)
|
|
result = loop.submit_coaching_decision(response)
|
|
|
|
assert result is False
|
|
|
|
def test_submit_coaching_decision_correct_state(self, mock_pipeline):
|
|
"""Test submitting decision in correct state succeeds."""
|
|
loop = ExecutionLoop(pipeline=mock_pipeline)
|
|
loop.state = ExecutionState.WAITING_COACHING
|
|
|
|
response = CoachingResponse(decision=CoachingDecision.ACCEPT)
|
|
result = loop.submit_coaching_decision(response)
|
|
|
|
assert result is True
|
|
assert loop._coaching_response == response
|
|
|
|
def test_request_coaching_decision_with_callback(self, mock_pipeline, coaching_callback):
|
|
"""Test requesting coaching decision uses callback."""
|
|
loop = ExecutionLoop(
|
|
pipeline=mock_pipeline,
|
|
coaching_callback=coaching_callback
|
|
)
|
|
loop.context = ExecutionContext(
|
|
workflow_id="test_wf",
|
|
execution_id="exec_001",
|
|
mode=ExecutionMode.COACHING,
|
|
started_at=datetime.now()
|
|
)
|
|
|
|
action_info = {'action': 'click', 'target': {'id': 'btn'}}
|
|
response = loop._request_coaching_decision(action_info)
|
|
|
|
assert response.decision == CoachingDecision.ACCEPT
|
|
coaching_callback.assert_called_once()
|
|
|
|
|
|
class TestCoachingIntegration:
|
|
"""Integration tests for COACHING mode."""
|
|
|
|
@pytest.fixture
|
|
def mock_pipeline(self):
|
|
"""Create mock pipeline."""
|
|
pipeline = MagicMock()
|
|
workflow = MagicMock()
|
|
workflow.workflow_id = "test_wf"
|
|
workflow.learning_state = "COACHING"
|
|
workflow.entry_nodes = ["node_1"]
|
|
pipeline.load_workflow.return_value = workflow
|
|
return pipeline
|
|
|
|
def test_determine_execution_mode_coaching(self, mock_pipeline):
|
|
"""Test that COACHING learning state maps to COACHING mode."""
|
|
loop = ExecutionLoop(pipeline=mock_pipeline)
|
|
workflow = mock_pipeline.load_workflow()
|
|
|
|
mode = loop._determine_execution_mode(workflow)
|
|
|
|
assert mode == ExecutionMode.COACHING
|
|
|
|
def test_coaching_stats_update(self, mock_pipeline):
|
|
"""Test that coaching stats update correctly."""
|
|
loop = ExecutionLoop(pipeline=mock_pipeline)
|
|
|
|
# Simulate stats updates
|
|
loop._coaching_stats['suggestions_made'] = 10
|
|
loop._coaching_stats['accepted'] = 7
|
|
loop._coaching_stats['corrected'] = 2
|
|
loop._coaching_stats['rejected'] = 1
|
|
|
|
stats = loop.get_coaching_stats()
|
|
|
|
assert stats['suggestions_made'] == 10
|
|
assert stats['acceptance_rate'] == 0.7
|
|
assert stats['correction_rate'] == 0.2
|
|
|
|
|
|
class TestExecutionStateCoaching:
|
|
"""Tests for WAITING_COACHING state."""
|
|
|
|
def test_waiting_coaching_state_exists(self):
|
|
"""Verify WAITING_COACHING state exists."""
|
|
assert ExecutionState.WAITING_COACHING.value == "coaching"
|
|
|
|
def test_state_transitions(self):
|
|
"""Test state can transition to WAITING_COACHING."""
|
|
# This tests that the enum value is valid
|
|
states = [
|
|
ExecutionState.IDLE,
|
|
ExecutionState.RUNNING,
|
|
ExecutionState.WAITING_COACHING,
|
|
ExecutionState.COMPLETED
|
|
]
|
|
assert ExecutionState.WAITING_COACHING in states
|
|
|
|
|
|
if __name__ == '__main__':
|
|
pytest.main([__file__, '-v'])
|