- Smart systray (pystray+plyer) remplace PyQt5 : notifications toast, menu dynamique avec workflows, chat "Que dois-je faire ?", icône colorée - Preflight GPU : check_machine_ready() + @pytest.mark.gpu dans conftest - Correction 63 tests cassés → 0 failed (1200 passed) - Tests VWB obsolètes déplacés vers _a_trier/ - Support qwen3-vl:8b sur GPU (remplace qwen2.5vl:3b) - fix images < 32x32 (Ollama panic) - fix force_json=False (qwen3-vl incompatible) - fix temperature 0.1 (0.0 bloque avec images) - Fix captor Windows : Key.esc, _get_key_name() - Fix LeaServerClient : check_connection, list_workflows format - deploy_windows.py : packaging propre client Windows - VWB : edges visibles (#607d8b) + fitView automatique Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
494 lines
18 KiB
Python
494 lines
18 KiB
Python
"""
|
|
End-to-End Tests for COACHING Mode
|
|
|
|
Tests the complete OBSERVATION -> COACHING -> AUTO workflow:
|
|
1. Start in OBSERVATION mode (record user actions)
|
|
2. Transition to COACHING mode (suggest actions, get user feedback)
|
|
3. Accumulate corrections in Correction Packs
|
|
4. Track metrics and determine readiness for AUTO mode
|
|
5. Transition to AUTO mode when confidence threshold is met
|
|
|
|
This test simulates the complete learning journey of a workflow.
|
|
"""
|
|
|
|
import pytest
|
|
import tempfile
|
|
import shutil
|
|
import time
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
|
|
@pytest.fixture
|
|
def temp_storage():
|
|
"""Create temporary storage directories."""
|
|
temp_dir = tempfile.mkdtemp()
|
|
yield Path(temp_dir)
|
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
|
|
|
|
@pytest.fixture
|
|
def coaching_persistence(temp_storage):
|
|
"""Create coaching persistence with temp storage."""
|
|
from core.coaching import CoachingSessionPersistence
|
|
return CoachingSessionPersistence(temp_storage / 'coaching_sessions')
|
|
|
|
|
|
@pytest.fixture
|
|
def correction_service(temp_storage):
|
|
"""Create correction pack service with temp storage."""
|
|
from core.corrections import CorrectionPackService
|
|
return CorrectionPackService(storage_path=temp_storage / 'correction_packs')
|
|
|
|
|
|
@pytest.fixture
|
|
def metrics_collector(coaching_persistence):
|
|
"""Create metrics collector."""
|
|
from core.coaching import CoachingMetricsCollector
|
|
return CoachingMetricsCollector(coaching_persistence)
|
|
|
|
|
|
class TestCoachingE2E:
|
|
"""End-to-end tests for the complete COACHING workflow."""
|
|
|
|
def test_complete_learning_journey(
|
|
self,
|
|
coaching_persistence,
|
|
correction_service,
|
|
metrics_collector
|
|
):
|
|
"""
|
|
Test the complete learning journey from OBSERVATION to AUTO.
|
|
|
|
Scenario:
|
|
1. Create workflow and start first COACHING session
|
|
2. Make decisions (mix of accept, correct, reject)
|
|
3. Corrections are captured in Correction Packs
|
|
4. Run multiple sessions to build confidence
|
|
5. Check metrics and readiness for AUTO
|
|
"""
|
|
workflow_id = "wf_e2e_test_001"
|
|
|
|
# =====================================================================
|
|
# Phase 1: First COACHING session - Learning phase
|
|
# =====================================================================
|
|
print("\n=== Phase 1: First COACHING Session ===")
|
|
|
|
session1 = coaching_persistence.create_session(
|
|
workflow_id=workflow_id,
|
|
execution_id="exec_001",
|
|
total_steps=5,
|
|
metadata={'phase': 'learning'}
|
|
)
|
|
|
|
# Simulate decisions with some corrections
|
|
from core.coaching.session_persistence import CoachingDecisionRecord
|
|
|
|
decisions_p1 = [
|
|
('accept', None),
|
|
('correct', {'target': {'id': 'new_btn'}}),
|
|
('accept', None),
|
|
('reject', None),
|
|
('accept', None),
|
|
]
|
|
|
|
for i, (decision, correction) in enumerate(decisions_p1):
|
|
record = CoachingDecisionRecord(
|
|
step_index=i,
|
|
node_id=f"node_{i+1}",
|
|
action_type='click',
|
|
decision=decision,
|
|
correction=correction,
|
|
feedback=f"Decision {i+1}"
|
|
)
|
|
session1.add_decision(record)
|
|
|
|
coaching_persistence.save_session(session1)
|
|
coaching_persistence.complete_session(session1.session_id, success=True)
|
|
|
|
# Verify session stats
|
|
session1_reloaded = coaching_persistence.load_session(session1.session_id)
|
|
assert session1_reloaded.stats['accepted'] == 3
|
|
assert session1_reloaded.stats['corrected'] == 1
|
|
assert session1_reloaded.stats['rejected'] == 1
|
|
|
|
print(f"Session 1 completed: {session1_reloaded.stats}")
|
|
|
|
# =====================================================================
|
|
# Phase 2: Multiple sessions to improve acceptance rate
|
|
# =====================================================================
|
|
print("\n=== Phase 2: Multiple Training Sessions ===")
|
|
|
|
# Session 2: Better acceptance after learning
|
|
session2 = coaching_persistence.create_session(
|
|
workflow_id=workflow_id,
|
|
execution_id="exec_002",
|
|
total_steps=5
|
|
)
|
|
|
|
# Most actions accepted now (corrections are working)
|
|
decisions_p2 = [
|
|
('accept', None),
|
|
('accept', None),
|
|
('accept', None),
|
|
('accept', None),
|
|
('correct', {'target': {'text': 'Submit'}}),
|
|
]
|
|
|
|
for i, (decision, correction) in enumerate(decisions_p2):
|
|
record = CoachingDecisionRecord(
|
|
step_index=i,
|
|
node_id=f"node_{i+1}",
|
|
action_type='click',
|
|
decision=decision,
|
|
correction=correction
|
|
)
|
|
session2.add_decision(record)
|
|
|
|
coaching_persistence.save_session(session2)
|
|
coaching_persistence.complete_session(session2.session_id, success=True)
|
|
print(f"Session 2 completed: {session2.stats}")
|
|
|
|
# Sessions 3-5: High acceptance rate
|
|
for sess_num in range(3, 6):
|
|
session = coaching_persistence.create_session(
|
|
workflow_id=workflow_id,
|
|
execution_id=f"exec_{sess_num:03d}",
|
|
total_steps=5
|
|
)
|
|
|
|
# All accepted
|
|
for i in range(5):
|
|
record = CoachingDecisionRecord(
|
|
step_index=i,
|
|
node_id=f"node_{i+1}",
|
|
action_type='click',
|
|
decision='accept'
|
|
)
|
|
session.add_decision(record)
|
|
|
|
coaching_persistence.save_session(session)
|
|
coaching_persistence.complete_session(session.session_id, success=True)
|
|
print(f"Session {sess_num} completed: all accepted")
|
|
|
|
# =====================================================================
|
|
# Phase 3: Check Metrics and Learning Progress
|
|
# =====================================================================
|
|
print("\n=== Phase 3: Checking Metrics ===")
|
|
|
|
metrics = metrics_collector.get_workflow_metrics(workflow_id)
|
|
|
|
print(f"Total sessions: {metrics.total_sessions}")
|
|
print(f"Total decisions: {metrics.total_decisions}")
|
|
print(f"Acceptance rate: {metrics.acceptance_rate:.2%}")
|
|
print(f"Correction rate: {metrics.correction_rate:.2%}")
|
|
print(f"Confidence score: {metrics.confidence_score:.2f}")
|
|
print(f"Learning progress: {metrics.learning_progress.value}")
|
|
print(f"Ready for AUTO: {metrics.ready_for_auto}")
|
|
print(f"Recommendations: {metrics.recommendations}")
|
|
|
|
# Assertions
|
|
assert metrics.total_sessions == 5
|
|
assert metrics.total_decisions == 25
|
|
assert metrics.acceptance_rate > 0.8 # Should be high after training
|
|
assert metrics.correction_rate < 0.15 # Should be low
|
|
|
|
# =====================================================================
|
|
# Phase 4: Verify Readiness for AUTO
|
|
# =====================================================================
|
|
print("\n=== Phase 4: AUTO Mode Readiness ===")
|
|
|
|
# The workflow should be ready for AUTO after successful training
|
|
assert metrics.ready_for_auto, "Workflow should be ready for AUTO mode"
|
|
assert metrics.learning_progress.value in ['ready', 'autonomous']
|
|
|
|
print("SUCCESS: Workflow is ready for autonomous execution!")
|
|
|
|
def test_session_persistence_and_recovery(self, coaching_persistence):
|
|
"""
|
|
Test that COACHING sessions can be paused and resumed.
|
|
"""
|
|
print("\n=== Testing Session Persistence ===")
|
|
|
|
workflow_id = "wf_persistence_test"
|
|
|
|
# Create and partially complete a session
|
|
session = coaching_persistence.create_session(
|
|
workflow_id=workflow_id,
|
|
execution_id="exec_persist",
|
|
total_steps=10
|
|
)
|
|
|
|
from core.coaching.session_persistence import CoachingDecisionRecord
|
|
|
|
# Add 3 decisions
|
|
for i in range(3):
|
|
record = CoachingDecisionRecord(
|
|
step_index=i,
|
|
node_id=f"node_{i+1}",
|
|
action_type='click',
|
|
decision='accept'
|
|
)
|
|
session.add_decision(record)
|
|
|
|
coaching_persistence.save_session(session)
|
|
|
|
# Pause the session
|
|
coaching_persistence.pause_session(session.session_id)
|
|
|
|
# Verify paused
|
|
loaded = coaching_persistence.load_session(session.session_id)
|
|
assert loaded.status.value == 'paused'
|
|
assert len(loaded.decisions) == 3
|
|
assert loaded.current_step_index == 3
|
|
|
|
# Resume the session
|
|
resumed = coaching_persistence.resume_session(session.session_id)
|
|
assert resumed.status.value == 'active'
|
|
assert resumed.can_resume() is True
|
|
|
|
# Continue adding decisions
|
|
for i in range(3, 6):
|
|
record = CoachingDecisionRecord(
|
|
step_index=i,
|
|
node_id=f"node_{i+1}",
|
|
action_type='click',
|
|
decision='accept'
|
|
)
|
|
resumed.add_decision(record)
|
|
|
|
coaching_persistence.save_session(resumed)
|
|
|
|
# Verify continuation
|
|
final = coaching_persistence.load_session(session.session_id)
|
|
assert len(final.decisions) == 6
|
|
assert final.current_step_index == 6
|
|
|
|
print("SUCCESS: Session persistence and recovery works correctly!")
|
|
|
|
def test_correction_integration_with_coaching(
|
|
self,
|
|
coaching_persistence,
|
|
correction_service
|
|
):
|
|
"""
|
|
Test that COACHING corrections integrate with Correction Packs.
|
|
"""
|
|
print("\n=== Testing Correction Integration ===")
|
|
|
|
from core.corrections import CorrectionPackIntegration
|
|
|
|
# Create integration
|
|
integration = CorrectionPackIntegration(
|
|
service=correction_service,
|
|
auto_create_pack=True
|
|
)
|
|
|
|
workflow_id = "wf_correction_test"
|
|
|
|
# Create COACHING session
|
|
session = coaching_persistence.create_session(
|
|
workflow_id=workflow_id,
|
|
execution_id="exec_correction",
|
|
total_steps=5
|
|
)
|
|
|
|
from core.coaching.session_persistence import CoachingDecisionRecord
|
|
|
|
# Simulate corrections
|
|
corrections_made = [
|
|
{
|
|
'action_type': 'click',
|
|
'element_type': 'button',
|
|
'failure_reason': 'element_not_found',
|
|
'correction_type': 'target_change',
|
|
'original_target': {'text': 'OK'},
|
|
'corrected_target': {'text': 'Valider'}
|
|
},
|
|
{
|
|
'action_type': 'type',
|
|
'element_type': 'input',
|
|
'failure_reason': 'wrong_field',
|
|
'correction_type': 'target_change',
|
|
'original_target': {'id': 'email'},
|
|
'corrected_target': {'name': 'user_email'}
|
|
}
|
|
]
|
|
|
|
# Add decisions with corrections
|
|
for i, correction_data in enumerate(corrections_made):
|
|
record = CoachingDecisionRecord(
|
|
step_index=i,
|
|
node_id=f"node_{i+1}",
|
|
action_type=correction_data['action_type'],
|
|
decision='correct',
|
|
correction=correction_data
|
|
)
|
|
session.add_decision(record)
|
|
|
|
# Capture correction in Correction Pack
|
|
integration.capture_correction(
|
|
correction_data=correction_data,
|
|
session_id=session.session_id,
|
|
workflow_id=workflow_id
|
|
)
|
|
|
|
coaching_persistence.save_session(session)
|
|
coaching_persistence.complete_session(session.session_id, success=True)
|
|
|
|
# Verify corrections captured in pack
|
|
pack = correction_service.get_pack(integration._default_pack_id)
|
|
corrections_list = pack.get('corrections') if isinstance(pack, dict) else pack.corrections
|
|
assert len(corrections_list) == 2
|
|
|
|
print(f"Captured {len(corrections_list)} corrections in Correction Pack")
|
|
print("SUCCESS: Corrections integrated correctly!")
|
|
|
|
def test_metrics_threshold_for_auto_mode(self, coaching_persistence, metrics_collector):
|
|
"""
|
|
Test that metrics correctly determine AUTO mode readiness.
|
|
"""
|
|
print("\n=== Testing AUTO Mode Threshold ===")
|
|
|
|
from core.coaching.session_persistence import CoachingDecisionRecord
|
|
|
|
workflow_id = "wf_threshold_test"
|
|
|
|
# Test case 1: Below threshold (too few sessions)
|
|
session = coaching_persistence.create_session(
|
|
workflow_id=workflow_id,
|
|
execution_id="exec_001",
|
|
total_steps=5
|
|
)
|
|
|
|
for i in range(5):
|
|
record = CoachingDecisionRecord(
|
|
step_index=i,
|
|
node_id=f"node_{i+1}",
|
|
action_type='click',
|
|
decision='accept'
|
|
)
|
|
session.add_decision(record)
|
|
|
|
coaching_persistence.save_session(session)
|
|
coaching_persistence.complete_session(session.session_id, success=True)
|
|
|
|
metrics = metrics_collector.get_workflow_metrics(workflow_id)
|
|
assert not metrics.ready_for_auto, "Should not be ready with only 1 session"
|
|
|
|
# Test case 2: Meet minimum sessions
|
|
for sess_num in range(2, 6):
|
|
session = coaching_persistence.create_session(
|
|
workflow_id=workflow_id,
|
|
execution_id=f"exec_{sess_num:03d}",
|
|
total_steps=5
|
|
)
|
|
|
|
for i in range(5):
|
|
record = CoachingDecisionRecord(
|
|
step_index=i,
|
|
node_id=f"node_{i+1}",
|
|
action_type='click',
|
|
decision='accept'
|
|
)
|
|
session.add_decision(record)
|
|
|
|
coaching_persistence.save_session(session)
|
|
coaching_persistence.complete_session(session.session_id, success=True)
|
|
|
|
metrics = metrics_collector.get_workflow_metrics(workflow_id)
|
|
print(f"After 5 sessions - Acceptance: {metrics.acceptance_rate:.2%}, Ready: {metrics.ready_for_auto}")
|
|
assert metrics.ready_for_auto, "Should be ready after 5 sessions with high acceptance"
|
|
|
|
print("SUCCESS: Threshold calculation works correctly!")
|
|
|
|
def test_global_metrics_aggregation(self, coaching_persistence, metrics_collector):
|
|
"""
|
|
Test global metrics aggregation across multiple workflows.
|
|
"""
|
|
print("\n=== Testing Global Metrics ===")
|
|
|
|
from core.coaching.session_persistence import CoachingDecisionRecord
|
|
|
|
# Create sessions for multiple workflows
|
|
workflows = ["wf_global_1", "wf_global_2", "wf_global_3"]
|
|
|
|
for wf_id in workflows:
|
|
for sess_num in range(3):
|
|
session = coaching_persistence.create_session(
|
|
workflow_id=wf_id,
|
|
execution_id=f"exec_{wf_id}_{sess_num}",
|
|
total_steps=3
|
|
)
|
|
|
|
for i in range(3):
|
|
decision = 'accept' if i != 1 else 'correct'
|
|
record = CoachingDecisionRecord(
|
|
step_index=i,
|
|
node_id=f"node_{i+1}",
|
|
action_type='click',
|
|
decision=decision
|
|
)
|
|
session.add_decision(record)
|
|
|
|
coaching_persistence.save_session(session)
|
|
coaching_persistence.complete_session(session.session_id, success=True)
|
|
|
|
# Get global metrics
|
|
global_metrics = metrics_collector.get_global_metrics()
|
|
|
|
print(f"Total workflows: {global_metrics.total_workflows}")
|
|
print(f"Total sessions: {global_metrics.total_sessions}")
|
|
print(f"Total decisions: {global_metrics.total_decisions}")
|
|
print(f"Acceptance rate: {global_metrics.overall_acceptance_rate:.2%}")
|
|
|
|
assert global_metrics.total_workflows == 3
|
|
assert global_metrics.total_sessions == 9 # 3 workflows x 3 sessions
|
|
assert global_metrics.total_decisions == 27 # 9 sessions x 3 decisions
|
|
|
|
print("SUCCESS: Global metrics aggregation works correctly!")
|
|
|
|
|
|
class TestCoachingAPIIntegration:
|
|
"""Tests for COACHING API integration."""
|
|
|
|
def test_api_session_lifecycle(self, coaching_persistence):
|
|
"""Test session lifecycle through persistence layer (API simulation)."""
|
|
print("\n=== Testing API Session Lifecycle ===")
|
|
|
|
from core.coaching.session_persistence import CoachingDecisionRecord
|
|
|
|
# Create session (simulating POST /api/coaching-sessions)
|
|
session = coaching_persistence.create_session(
|
|
workflow_id="wf_api_test",
|
|
execution_id="exec_api",
|
|
total_steps=3
|
|
)
|
|
assert session.session_id is not None
|
|
|
|
# Add decision (simulating POST /api/coaching-sessions/{id}/decisions)
|
|
record = CoachingDecisionRecord(
|
|
step_index=0,
|
|
node_id="node_1",
|
|
action_type="click",
|
|
decision="accept"
|
|
)
|
|
session.add_decision(record)
|
|
coaching_persistence.save_session(session)
|
|
|
|
# Get session (simulating GET /api/coaching-sessions/{id})
|
|
loaded = coaching_persistence.load_session(session.session_id)
|
|
assert loaded is not None
|
|
assert len(loaded.decisions) == 1
|
|
|
|
# Complete session (simulating POST /api/coaching-sessions/{id}/complete)
|
|
completed = coaching_persistence.complete_session(session.session_id, success=True)
|
|
assert completed.status.value == 'completed'
|
|
|
|
print("SUCCESS: API session lifecycle works correctly!")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
pytest.main([__file__, '-v', '-s'])
|