feat(execution): Implement complete COACHING mode in ExecutionLoop
- Add CoachingDecision enum (ACCEPT, REJECT, CORRECT, EXECUTE_MANUAL, SKIP) - Add CoachingResponse dataclass for user decisions - Add WAITING_COACHING state to ExecutionState - Implement _request_coaching_decision() with callback or polling support - Implement submit_coaching_decision() for external API/UI submission - Implement _apply_coaching_correction() for applying user corrections - Implement _record_coaching_feedback() integrating with: - TrainingDataCollector for session recording - FeedbackProcessor for statistics - CorrectionPackIntegration for automatic correction capture - Add get_coaching_stats() for session statistics - Add 17 unit tests for COACHING functionality COACHING mode now: 1. Suggests actions to user 2. Waits for user decision (accept/reject/correct/manual/skip) 3. Applies corrections if provided 4. Records all feedback for learning 5. Propagates corrections to Correction Packs automatically Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
278
tests/test_coaching_loop.py
Normal file
278
tests/test_coaching_loop.py
Normal file
@@ -0,0 +1,278 @@
|
||||
"""
|
||||
Tests for COACHING mode in ExecutionLoop.
|
||||
|
||||
Tests the coaching decision flow, corrections, and integration
|
||||
with Correction Packs.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch
|
||||
from datetime import datetime
|
||||
|
||||
from core.execution.execution_loop import (
|
||||
ExecutionLoop,
|
||||
ExecutionMode,
|
||||
ExecutionState,
|
||||
ExecutionContext,
|
||||
CoachingDecision,
|
||||
CoachingResponse,
|
||||
StepResult
|
||||
)
|
||||
|
||||
|
||||
class TestCoachingDecision:
|
||||
"""Tests for CoachingDecision enum."""
|
||||
|
||||
def test_all_decisions_exist(self):
|
||||
"""Verify all expected decisions exist."""
|
||||
assert CoachingDecision.ACCEPT.value == "accept"
|
||||
assert CoachingDecision.REJECT.value == "reject"
|
||||
assert CoachingDecision.CORRECT.value == "correct"
|
||||
assert CoachingDecision.EXECUTE_MANUAL.value == "manual"
|
||||
assert CoachingDecision.SKIP.value == "skip"
|
||||
|
||||
|
||||
class TestCoachingResponse:
|
||||
"""Tests for CoachingResponse dataclass."""
|
||||
|
||||
def test_basic_response(self):
|
||||
"""Test creating basic response."""
|
||||
response = CoachingResponse(decision=CoachingDecision.ACCEPT)
|
||||
assert response.decision == CoachingDecision.ACCEPT
|
||||
assert response.correction is None
|
||||
assert response.feedback is None
|
||||
assert response.executed_manually is False
|
||||
|
||||
def test_response_with_correction(self):
|
||||
"""Test response with correction data."""
|
||||
correction = {
|
||||
'target': {'id': 'new_button'},
|
||||
'params': {'timeout': 5}
|
||||
}
|
||||
response = CoachingResponse(
|
||||
decision=CoachingDecision.CORRECT,
|
||||
correction=correction,
|
||||
feedback="Button moved to new location"
|
||||
)
|
||||
assert response.decision == CoachingDecision.CORRECT
|
||||
assert response.correction == correction
|
||||
assert response.feedback == "Button moved to new location"
|
||||
|
||||
def test_manual_execution_response(self):
|
||||
"""Test response for manual execution."""
|
||||
response = CoachingResponse(
|
||||
decision=CoachingDecision.EXECUTE_MANUAL,
|
||||
executed_manually=True
|
||||
)
|
||||
assert response.decision == CoachingDecision.EXECUTE_MANUAL
|
||||
assert response.executed_manually is True
|
||||
|
||||
|
||||
class TestExecutionLoopCoaching:
|
||||
"""Tests for ExecutionLoop COACHING mode."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_pipeline(self):
|
||||
"""Create mock pipeline."""
|
||||
pipeline = MagicMock()
|
||||
pipeline.load_workflow.return_value = MagicMock(
|
||||
workflow_id="test_wf",
|
||||
learning_state="COACHING",
|
||||
entry_nodes=["node_1"]
|
||||
)
|
||||
return pipeline
|
||||
|
||||
@pytest.fixture
|
||||
def coaching_callback(self):
|
||||
"""Create mock coaching callback."""
|
||||
return MagicMock(return_value=CoachingResponse(
|
||||
decision=CoachingDecision.ACCEPT
|
||||
))
|
||||
|
||||
def test_init_with_coaching_callback(self, mock_pipeline, coaching_callback):
|
||||
"""Test ExecutionLoop init with coaching callback."""
|
||||
loop = ExecutionLoop(
|
||||
pipeline=mock_pipeline,
|
||||
coaching_callback=coaching_callback
|
||||
)
|
||||
assert loop.coaching_callback == coaching_callback
|
||||
assert loop._coaching_stats['suggestions_made'] == 0
|
||||
|
||||
def test_coaching_stats_initial(self, mock_pipeline):
|
||||
"""Test initial coaching stats."""
|
||||
loop = ExecutionLoop(pipeline=mock_pipeline)
|
||||
stats = loop.get_coaching_stats()
|
||||
|
||||
assert stats['suggestions_made'] == 0
|
||||
assert stats['accepted'] == 0
|
||||
assert stats['rejected'] == 0
|
||||
assert stats['corrected'] == 0
|
||||
assert stats['manual_executions'] == 0
|
||||
assert stats['acceptance_rate'] == 0.0
|
||||
assert stats['correction_rate'] == 0.0
|
||||
|
||||
def test_apply_coaching_correction_target(self, mock_pipeline):
|
||||
"""Test applying target correction."""
|
||||
loop = ExecutionLoop(pipeline=mock_pipeline)
|
||||
|
||||
original = {
|
||||
'action': 'click',
|
||||
'target': {'id': 'old_button'},
|
||||
'params': {'timeout': 10}
|
||||
}
|
||||
correction = {
|
||||
'target': {'id': 'new_button'}
|
||||
}
|
||||
|
||||
corrected = loop._apply_coaching_correction(original, correction)
|
||||
|
||||
assert corrected['target']['id'] == 'new_button'
|
||||
assert corrected['action'] == 'click'
|
||||
assert corrected['params']['timeout'] == 10
|
||||
|
||||
def test_apply_coaching_correction_params(self, mock_pipeline):
|
||||
"""Test applying params correction."""
|
||||
loop = ExecutionLoop(pipeline=mock_pipeline)
|
||||
|
||||
original = {
|
||||
'action': 'type',
|
||||
'target': {'id': 'input'},
|
||||
'params': {'value': 'old', 'timeout': 10}
|
||||
}
|
||||
correction = {
|
||||
'params': {'value': 'new_value', 'delay': 0.1}
|
||||
}
|
||||
|
||||
corrected = loop._apply_coaching_correction(original, correction)
|
||||
|
||||
assert corrected['params']['value'] == 'new_value'
|
||||
assert corrected['params']['delay'] == 0.1
|
||||
assert corrected['params']['timeout'] == 10
|
||||
|
||||
def test_apply_coaching_correction_action(self, mock_pipeline):
|
||||
"""Test applying action type correction."""
|
||||
loop = ExecutionLoop(pipeline=mock_pipeline)
|
||||
|
||||
original = {
|
||||
'action': 'click',
|
||||
'target': {'id': 'element'}
|
||||
}
|
||||
correction = {
|
||||
'action': 'double_click'
|
||||
}
|
||||
|
||||
corrected = loop._apply_coaching_correction(original, correction)
|
||||
|
||||
assert corrected['action'] == 'double_click'
|
||||
|
||||
def test_apply_coaching_correction_none(self, mock_pipeline):
|
||||
"""Test that None correction returns original."""
|
||||
loop = ExecutionLoop(pipeline=mock_pipeline)
|
||||
|
||||
original = {'action': 'click', 'target': {'id': 'btn'}}
|
||||
corrected = loop._apply_coaching_correction(original, None)
|
||||
|
||||
assert corrected == original
|
||||
|
||||
def test_submit_coaching_decision_wrong_state(self, mock_pipeline):
|
||||
"""Test submitting decision in wrong state fails."""
|
||||
loop = ExecutionLoop(pipeline=mock_pipeline)
|
||||
loop.state = ExecutionState.RUNNING
|
||||
|
||||
response = CoachingResponse(decision=CoachingDecision.ACCEPT)
|
||||
result = loop.submit_coaching_decision(response)
|
||||
|
||||
assert result is False
|
||||
|
||||
def test_submit_coaching_decision_correct_state(self, mock_pipeline):
|
||||
"""Test submitting decision in correct state succeeds."""
|
||||
loop = ExecutionLoop(pipeline=mock_pipeline)
|
||||
loop.state = ExecutionState.WAITING_COACHING
|
||||
|
||||
response = CoachingResponse(decision=CoachingDecision.ACCEPT)
|
||||
result = loop.submit_coaching_decision(response)
|
||||
|
||||
assert result is True
|
||||
assert loop._coaching_response == response
|
||||
|
||||
def test_request_coaching_decision_with_callback(self, mock_pipeline, coaching_callback):
|
||||
"""Test requesting coaching decision uses callback."""
|
||||
loop = ExecutionLoop(
|
||||
pipeline=mock_pipeline,
|
||||
coaching_callback=coaching_callback
|
||||
)
|
||||
loop.context = ExecutionContext(
|
||||
workflow_id="test_wf",
|
||||
execution_id="exec_001",
|
||||
mode=ExecutionMode.COACHING,
|
||||
started_at=datetime.now()
|
||||
)
|
||||
|
||||
action_info = {'action': 'click', 'target': {'id': 'btn'}}
|
||||
response = loop._request_coaching_decision(action_info)
|
||||
|
||||
assert response.decision == CoachingDecision.ACCEPT
|
||||
coaching_callback.assert_called_once()
|
||||
|
||||
|
||||
class TestCoachingIntegration:
|
||||
"""Integration tests for COACHING mode."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_pipeline(self):
|
||||
"""Create mock pipeline."""
|
||||
pipeline = MagicMock()
|
||||
workflow = MagicMock()
|
||||
workflow.workflow_id = "test_wf"
|
||||
workflow.learning_state = "COACHING"
|
||||
workflow.entry_nodes = ["node_1"]
|
||||
pipeline.load_workflow.return_value = workflow
|
||||
return pipeline
|
||||
|
||||
def test_determine_execution_mode_coaching(self, mock_pipeline):
|
||||
"""Test that COACHING learning state maps to COACHING mode."""
|
||||
loop = ExecutionLoop(pipeline=mock_pipeline)
|
||||
workflow = mock_pipeline.load_workflow()
|
||||
|
||||
mode = loop._determine_execution_mode(workflow)
|
||||
|
||||
assert mode == ExecutionMode.COACHING
|
||||
|
||||
def test_coaching_stats_update(self, mock_pipeline):
|
||||
"""Test that coaching stats update correctly."""
|
||||
loop = ExecutionLoop(pipeline=mock_pipeline)
|
||||
|
||||
# Simulate stats updates
|
||||
loop._coaching_stats['suggestions_made'] = 10
|
||||
loop._coaching_stats['accepted'] = 7
|
||||
loop._coaching_stats['corrected'] = 2
|
||||
loop._coaching_stats['rejected'] = 1
|
||||
|
||||
stats = loop.get_coaching_stats()
|
||||
|
||||
assert stats['suggestions_made'] == 10
|
||||
assert stats['acceptance_rate'] == 0.7
|
||||
assert stats['correction_rate'] == 0.2
|
||||
|
||||
|
||||
class TestExecutionStateCoaching:
|
||||
"""Tests for WAITING_COACHING state."""
|
||||
|
||||
def test_waiting_coaching_state_exists(self):
|
||||
"""Verify WAITING_COACHING state exists."""
|
||||
assert ExecutionState.WAITING_COACHING.value == "coaching"
|
||||
|
||||
def test_state_transitions(self):
|
||||
"""Test state can transition to WAITING_COACHING."""
|
||||
# This tests that the enum value is valid
|
||||
states = [
|
||||
ExecutionState.IDLE,
|
||||
ExecutionState.RUNNING,
|
||||
ExecutionState.WAITING_COACHING,
|
||||
ExecutionState.COMPLETED
|
||||
]
|
||||
assert ExecutionState.WAITING_COACHING in states
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
Reference in New Issue
Block a user