feat(execution): Implement complete COACHING mode in ExecutionLoop

- Add CoachingDecision enum (ACCEPT, REJECT, CORRECT, EXECUTE_MANUAL, SKIP)
- Add CoachingResponse dataclass for user decisions
- Add WAITING_COACHING state to ExecutionState
- Implement _request_coaching_decision() with callback or polling support
- Implement submit_coaching_decision() for external API/UI submission
- Implement _apply_coaching_correction() for applying user corrections
- Implement _record_coaching_feedback() integrating with:
  - TrainingDataCollector for session recording
  - FeedbackProcessor for statistics
  - CorrectionPackIntegration for automatic correction capture
- Add get_coaching_stats() for session statistics
- Add 17 unit tests for COACHING functionality

COACHING mode now:
1. Suggests actions to user
2. Waits for user decision (accept/reject/correct/manual/skip)
3. Applies corrections if provided
4. Records all feedback for learning
5. Propagates corrections to Correction Packs automatically

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Dom
2026-01-18 19:14:47 +01:00
parent efb184fdb9
commit d6e2530f2a
2 changed files with 1639 additions and 0 deletions

File diff suppressed because it is too large Load Diff

278
tests/test_coaching_loop.py Normal file
View File

@@ -0,0 +1,278 @@
"""
Tests for COACHING mode in ExecutionLoop.
Tests the coaching decision flow, corrections, and integration
with Correction Packs.
"""
import pytest
from unittest.mock import MagicMock, patch
from datetime import datetime
from core.execution.execution_loop import (
ExecutionLoop,
ExecutionMode,
ExecutionState,
ExecutionContext,
CoachingDecision,
CoachingResponse,
StepResult
)
class TestCoachingDecision:
"""Tests for CoachingDecision enum."""
def test_all_decisions_exist(self):
"""Verify all expected decisions exist."""
assert CoachingDecision.ACCEPT.value == "accept"
assert CoachingDecision.REJECT.value == "reject"
assert CoachingDecision.CORRECT.value == "correct"
assert CoachingDecision.EXECUTE_MANUAL.value == "manual"
assert CoachingDecision.SKIP.value == "skip"
class TestCoachingResponse:
"""Tests for CoachingResponse dataclass."""
def test_basic_response(self):
"""Test creating basic response."""
response = CoachingResponse(decision=CoachingDecision.ACCEPT)
assert response.decision == CoachingDecision.ACCEPT
assert response.correction is None
assert response.feedback is None
assert response.executed_manually is False
def test_response_with_correction(self):
"""Test response with correction data."""
correction = {
'target': {'id': 'new_button'},
'params': {'timeout': 5}
}
response = CoachingResponse(
decision=CoachingDecision.CORRECT,
correction=correction,
feedback="Button moved to new location"
)
assert response.decision == CoachingDecision.CORRECT
assert response.correction == correction
assert response.feedback == "Button moved to new location"
def test_manual_execution_response(self):
"""Test response for manual execution."""
response = CoachingResponse(
decision=CoachingDecision.EXECUTE_MANUAL,
executed_manually=True
)
assert response.decision == CoachingDecision.EXECUTE_MANUAL
assert response.executed_manually is True
class TestExecutionLoopCoaching:
"""Tests for ExecutionLoop COACHING mode."""
@pytest.fixture
def mock_pipeline(self):
"""Create mock pipeline."""
pipeline = MagicMock()
pipeline.load_workflow.return_value = MagicMock(
workflow_id="test_wf",
learning_state="COACHING",
entry_nodes=["node_1"]
)
return pipeline
@pytest.fixture
def coaching_callback(self):
"""Create mock coaching callback."""
return MagicMock(return_value=CoachingResponse(
decision=CoachingDecision.ACCEPT
))
def test_init_with_coaching_callback(self, mock_pipeline, coaching_callback):
"""Test ExecutionLoop init with coaching callback."""
loop = ExecutionLoop(
pipeline=mock_pipeline,
coaching_callback=coaching_callback
)
assert loop.coaching_callback == coaching_callback
assert loop._coaching_stats['suggestions_made'] == 0
def test_coaching_stats_initial(self, mock_pipeline):
"""Test initial coaching stats."""
loop = ExecutionLoop(pipeline=mock_pipeline)
stats = loop.get_coaching_stats()
assert stats['suggestions_made'] == 0
assert stats['accepted'] == 0
assert stats['rejected'] == 0
assert stats['corrected'] == 0
assert stats['manual_executions'] == 0
assert stats['acceptance_rate'] == 0.0
assert stats['correction_rate'] == 0.0
def test_apply_coaching_correction_target(self, mock_pipeline):
"""Test applying target correction."""
loop = ExecutionLoop(pipeline=mock_pipeline)
original = {
'action': 'click',
'target': {'id': 'old_button'},
'params': {'timeout': 10}
}
correction = {
'target': {'id': 'new_button'}
}
corrected = loop._apply_coaching_correction(original, correction)
assert corrected['target']['id'] == 'new_button'
assert corrected['action'] == 'click'
assert corrected['params']['timeout'] == 10
def test_apply_coaching_correction_params(self, mock_pipeline):
"""Test applying params correction."""
loop = ExecutionLoop(pipeline=mock_pipeline)
original = {
'action': 'type',
'target': {'id': 'input'},
'params': {'value': 'old', 'timeout': 10}
}
correction = {
'params': {'value': 'new_value', 'delay': 0.1}
}
corrected = loop._apply_coaching_correction(original, correction)
assert corrected['params']['value'] == 'new_value'
assert corrected['params']['delay'] == 0.1
assert corrected['params']['timeout'] == 10
def test_apply_coaching_correction_action(self, mock_pipeline):
"""Test applying action type correction."""
loop = ExecutionLoop(pipeline=mock_pipeline)
original = {
'action': 'click',
'target': {'id': 'element'}
}
correction = {
'action': 'double_click'
}
corrected = loop._apply_coaching_correction(original, correction)
assert corrected['action'] == 'double_click'
def test_apply_coaching_correction_none(self, mock_pipeline):
"""Test that None correction returns original."""
loop = ExecutionLoop(pipeline=mock_pipeline)
original = {'action': 'click', 'target': {'id': 'btn'}}
corrected = loop._apply_coaching_correction(original, None)
assert corrected == original
def test_submit_coaching_decision_wrong_state(self, mock_pipeline):
"""Test submitting decision in wrong state fails."""
loop = ExecutionLoop(pipeline=mock_pipeline)
loop.state = ExecutionState.RUNNING
response = CoachingResponse(decision=CoachingDecision.ACCEPT)
result = loop.submit_coaching_decision(response)
assert result is False
def test_submit_coaching_decision_correct_state(self, mock_pipeline):
"""Test submitting decision in correct state succeeds."""
loop = ExecutionLoop(pipeline=mock_pipeline)
loop.state = ExecutionState.WAITING_COACHING
response = CoachingResponse(decision=CoachingDecision.ACCEPT)
result = loop.submit_coaching_decision(response)
assert result is True
assert loop._coaching_response == response
def test_request_coaching_decision_with_callback(self, mock_pipeline, coaching_callback):
"""Test requesting coaching decision uses callback."""
loop = ExecutionLoop(
pipeline=mock_pipeline,
coaching_callback=coaching_callback
)
loop.context = ExecutionContext(
workflow_id="test_wf",
execution_id="exec_001",
mode=ExecutionMode.COACHING,
started_at=datetime.now()
)
action_info = {'action': 'click', 'target': {'id': 'btn'}}
response = loop._request_coaching_decision(action_info)
assert response.decision == CoachingDecision.ACCEPT
coaching_callback.assert_called_once()
class TestCoachingIntegration:
"""Integration tests for COACHING mode."""
@pytest.fixture
def mock_pipeline(self):
"""Create mock pipeline."""
pipeline = MagicMock()
workflow = MagicMock()
workflow.workflow_id = "test_wf"
workflow.learning_state = "COACHING"
workflow.entry_nodes = ["node_1"]
pipeline.load_workflow.return_value = workflow
return pipeline
def test_determine_execution_mode_coaching(self, mock_pipeline):
"""Test that COACHING learning state maps to COACHING mode."""
loop = ExecutionLoop(pipeline=mock_pipeline)
workflow = mock_pipeline.load_workflow()
mode = loop._determine_execution_mode(workflow)
assert mode == ExecutionMode.COACHING
def test_coaching_stats_update(self, mock_pipeline):
"""Test that coaching stats update correctly."""
loop = ExecutionLoop(pipeline=mock_pipeline)
# Simulate stats updates
loop._coaching_stats['suggestions_made'] = 10
loop._coaching_stats['accepted'] = 7
loop._coaching_stats['corrected'] = 2
loop._coaching_stats['rejected'] = 1
stats = loop.get_coaching_stats()
assert stats['suggestions_made'] == 10
assert stats['acceptance_rate'] == 0.7
assert stats['correction_rate'] == 0.2
class TestExecutionStateCoaching:
"""Tests for WAITING_COACHING state."""
def test_waiting_coaching_state_exists(self):
"""Verify WAITING_COACHING state exists."""
assert ExecutionState.WAITING_COACHING.value == "coaching"
def test_state_transitions(self):
"""Test state can transition to WAITING_COACHING."""
# This tests that the enum value is valid
states = [
ExecutionState.IDLE,
ExecutionState.RUNNING,
ExecutionState.WAITING_COACHING,
ExecutionState.COMPLETED
]
assert ExecutionState.WAITING_COACHING in states
if __name__ == '__main__':
pytest.main([__file__, '-v'])