feat(execution): Implement complete COACHING mode in ExecutionLoop

- Add CoachingDecision enum (ACCEPT, REJECT, CORRECT, EXECUTE_MANUAL, SKIP) - Add CoachingResponse dataclass for user decisions - Add WAITING_COACHING state to ExecutionState - Implement _request_coaching_decision() with callback or polling support - Implement submit_coaching_decision() for external API/UI submission - Implement _apply_coaching_correction() for applying user corrections - Implement _record_coaching_feedback() integrating with: - TrainingDataCollector for session recording - FeedbackProcessor for statistics - CorrectionPackIntegration for automatic correction capture - Add get_coaching_stats() for session statistics - Add 17 unit tests for COACHING functionality COACHING mode now: 1. Suggests actions to user 2. Waits for user decision (accept/reject/correct/manual/skip) 3. Applies corrections if provided 4. Records all feedback for learning 5. Propagates corrections to Correction Packs automatically Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-18 19:14:47 +01:00
parent efb184fdb9
commit d6e2530f2a
2 changed files with 1639 additions and 0 deletions
--- a/tests/test_coaching_loop.py
+++ b/tests/test_coaching_loop.py
@@ -0,0 +1,278 @@
+"""
+Tests for COACHING mode in ExecutionLoop.
+
+Tests the coaching decision flow, corrections, and integration
+with Correction Packs.
+"""
+
+import pytest
+from unittest.mock import MagicMock, patch
+from datetime import datetime
+
+from core.execution.execution_loop import (
+    ExecutionLoop,
+    ExecutionMode,
+    ExecutionState,
+    ExecutionContext,
+    CoachingDecision,
+    CoachingResponse,
+    StepResult
+)
+
+
+class TestCoachingDecision:
+    """Tests for CoachingDecision enum."""
+
+    def test_all_decisions_exist(self):
+        """Verify all expected decisions exist."""
+        assert CoachingDecision.ACCEPT.value == "accept"
+        assert CoachingDecision.REJECT.value == "reject"
+        assert CoachingDecision.CORRECT.value == "correct"
+        assert CoachingDecision.EXECUTE_MANUAL.value == "manual"
+        assert CoachingDecision.SKIP.value == "skip"
+
+
+class TestCoachingResponse:
+    """Tests for CoachingResponse dataclass."""
+
+    def test_basic_response(self):
+        """Test creating basic response."""
+        response = CoachingResponse(decision=CoachingDecision.ACCEPT)
+        assert response.decision == CoachingDecision.ACCEPT
+        assert response.correction is None
+        assert response.feedback is None
+        assert response.executed_manually is False
+
+    def test_response_with_correction(self):
+        """Test response with correction data."""
+        correction = {
+            'target': {'id': 'new_button'},
+            'params': {'timeout': 5}
+        }
+        response = CoachingResponse(
+            decision=CoachingDecision.CORRECT,
+            correction=correction,
+            feedback="Button moved to new location"
+        )
+        assert response.decision == CoachingDecision.CORRECT
+        assert response.correction == correction
+        assert response.feedback == "Button moved to new location"
+
+    def test_manual_execution_response(self):
+        """Test response for manual execution."""
+        response = CoachingResponse(
+            decision=CoachingDecision.EXECUTE_MANUAL,
+            executed_manually=True
+        )
+        assert response.decision == CoachingDecision.EXECUTE_MANUAL
+        assert response.executed_manually is True
+
+
+class TestExecutionLoopCoaching:
+    """Tests for ExecutionLoop COACHING mode."""
+
+    @pytest.fixture
+    def mock_pipeline(self):
+        """Create mock pipeline."""
+        pipeline = MagicMock()
+        pipeline.load_workflow.return_value = MagicMock(
+            workflow_id="test_wf",
+            learning_state="COACHING",
+            entry_nodes=["node_1"]
+        )
+        return pipeline
+
+    @pytest.fixture
+    def coaching_callback(self):
+        """Create mock coaching callback."""
+        return MagicMock(return_value=CoachingResponse(
+            decision=CoachingDecision.ACCEPT
+        ))
+
+    def test_init_with_coaching_callback(self, mock_pipeline, coaching_callback):
+        """Test ExecutionLoop init with coaching callback."""
+        loop = ExecutionLoop(
+            pipeline=mock_pipeline,
+            coaching_callback=coaching_callback
+        )
+        assert loop.coaching_callback == coaching_callback
+        assert loop._coaching_stats['suggestions_made'] == 0
+
+    def test_coaching_stats_initial(self, mock_pipeline):
+        """Test initial coaching stats."""
+        loop = ExecutionLoop(pipeline=mock_pipeline)
+        stats = loop.get_coaching_stats()
+
+        assert stats['suggestions_made'] == 0
+        assert stats['accepted'] == 0
+        assert stats['rejected'] == 0
+        assert stats['corrected'] == 0
+        assert stats['manual_executions'] == 0
+        assert stats['acceptance_rate'] == 0.0
+        assert stats['correction_rate'] == 0.0
+
+    def test_apply_coaching_correction_target(self, mock_pipeline):
+        """Test applying target correction."""
+        loop = ExecutionLoop(pipeline=mock_pipeline)
+
+        original = {
+            'action': 'click',
+            'target': {'id': 'old_button'},
+            'params': {'timeout': 10}
+        }
+        correction = {
+            'target': {'id': 'new_button'}
+        }
+
+        corrected = loop._apply_coaching_correction(original, correction)
+
+        assert corrected['target']['id'] == 'new_button'
+        assert corrected['action'] == 'click'
+        assert corrected['params']['timeout'] == 10
+
+    def test_apply_coaching_correction_params(self, mock_pipeline):
+        """Test applying params correction."""
+        loop = ExecutionLoop(pipeline=mock_pipeline)
+
+        original = {
+            'action': 'type',
+            'target': {'id': 'input'},
+            'params': {'value': 'old', 'timeout': 10}
+        }
+        correction = {
+            'params': {'value': 'new_value', 'delay': 0.1}
+        }
+
+        corrected = loop._apply_coaching_correction(original, correction)
+
+        assert corrected['params']['value'] == 'new_value'
+        assert corrected['params']['delay'] == 0.1
+        assert corrected['params']['timeout'] == 10
+
+    def test_apply_coaching_correction_action(self, mock_pipeline):
+        """Test applying action type correction."""
+        loop = ExecutionLoop(pipeline=mock_pipeline)
+
+        original = {
+            'action': 'click',
+            'target': {'id': 'element'}
+        }
+        correction = {
+            'action': 'double_click'
+        }
+
+        corrected = loop._apply_coaching_correction(original, correction)
+
+        assert corrected['action'] == 'double_click'
+
+    def test_apply_coaching_correction_none(self, mock_pipeline):
+        """Test that None correction returns original."""
+        loop = ExecutionLoop(pipeline=mock_pipeline)
+
+        original = {'action': 'click', 'target': {'id': 'btn'}}
+        corrected = loop._apply_coaching_correction(original, None)
+
+        assert corrected == original
+
+    def test_submit_coaching_decision_wrong_state(self, mock_pipeline):
+        """Test submitting decision in wrong state fails."""
+        loop = ExecutionLoop(pipeline=mock_pipeline)
+        loop.state = ExecutionState.RUNNING
+
+        response = CoachingResponse(decision=CoachingDecision.ACCEPT)
+        result = loop.submit_coaching_decision(response)
+
+        assert result is False
+
+    def test_submit_coaching_decision_correct_state(self, mock_pipeline):
+        """Test submitting decision in correct state succeeds."""
+        loop = ExecutionLoop(pipeline=mock_pipeline)
+        loop.state = ExecutionState.WAITING_COACHING
+
+        response = CoachingResponse(decision=CoachingDecision.ACCEPT)
+        result = loop.submit_coaching_decision(response)
+
+        assert result is True
+        assert loop._coaching_response == response
+
+    def test_request_coaching_decision_with_callback(self, mock_pipeline, coaching_callback):
+        """Test requesting coaching decision uses callback."""
+        loop = ExecutionLoop(
+            pipeline=mock_pipeline,
+            coaching_callback=coaching_callback
+        )
+        loop.context = ExecutionContext(
+            workflow_id="test_wf",
+            execution_id="exec_001",
+            mode=ExecutionMode.COACHING,
+            started_at=datetime.now()
+        )
+
+        action_info = {'action': 'click', 'target': {'id': 'btn'}}
+        response = loop._request_coaching_decision(action_info)
+
+        assert response.decision == CoachingDecision.ACCEPT
+        coaching_callback.assert_called_once()
+
+
+class TestCoachingIntegration:
+    """Integration tests for COACHING mode."""
+
+    @pytest.fixture
+    def mock_pipeline(self):
+        """Create mock pipeline."""
+        pipeline = MagicMock()
+        workflow = MagicMock()
+        workflow.workflow_id = "test_wf"
+        workflow.learning_state = "COACHING"
+        workflow.entry_nodes = ["node_1"]
+        pipeline.load_workflow.return_value = workflow
+        return pipeline
+
+    def test_determine_execution_mode_coaching(self, mock_pipeline):
+        """Test that COACHING learning state maps to COACHING mode."""
+        loop = ExecutionLoop(pipeline=mock_pipeline)
+        workflow = mock_pipeline.load_workflow()
+
+        mode = loop._determine_execution_mode(workflow)
+
+        assert mode == ExecutionMode.COACHING
+
+    def test_coaching_stats_update(self, mock_pipeline):
+        """Test that coaching stats update correctly."""
+        loop = ExecutionLoop(pipeline=mock_pipeline)
+
+        # Simulate stats updates
+        loop._coaching_stats['suggestions_made'] = 10
+        loop._coaching_stats['accepted'] = 7
+        loop._coaching_stats['corrected'] = 2
+        loop._coaching_stats['rejected'] = 1
+
+        stats = loop.get_coaching_stats()
+
+        assert stats['suggestions_made'] == 10
+        assert stats['acceptance_rate'] == 0.7
+        assert stats['correction_rate'] == 0.2
+
+
+class TestExecutionStateCoaching:
+    """Tests for WAITING_COACHING state."""
+
+    def test_waiting_coaching_state_exists(self):
+        """Verify WAITING_COACHING state exists."""
+        assert ExecutionState.WAITING_COACHING.value == "coaching"
+
+    def test_state_transitions(self):
+        """Test state can transition to WAITING_COACHING."""
+        # This tests that the enum value is valid
+        states = [
+            ExecutionState.IDLE,
+            ExecutionState.RUNNING,
+            ExecutionState.WAITING_COACHING,
+            ExecutionState.COMPLETED
+        ]
+        assert ExecutionState.WAITING_COACHING in states
+
+
+if __name__ == '__main__':
+    pytest.main([__file__, '-v'])