Files
rpa_vision_v3/tests/test_coaching_e2e.py
Dom ad15237fe0 feat: smart systray Léa (plyer), preflight GPU, fix tests, support qwen3-vl
- Smart systray (pystray+plyer) remplace PyQt5 : notifications toast,
  menu dynamique avec workflows, chat "Que dois-je faire ?", icône colorée
- Preflight GPU : check_machine_ready() + @pytest.mark.gpu dans conftest
- Correction 63 tests cassés → 0 failed (1200 passed)
- Tests VWB obsolètes déplacés vers _a_trier/
- Support qwen3-vl:8b sur GPU (remplace qwen2.5vl:3b)
  - fix images < 32x32 (Ollama panic)
  - fix force_json=False (qwen3-vl incompatible)
  - fix temperature 0.1 (0.0 bloque avec images)
- Fix captor Windows : Key.esc, _get_key_name()
- Fix LeaServerClient : check_connection, list_workflows format
- deploy_windows.py : packaging propre client Windows
- VWB : edges visibles (#607d8b) + fitView automatique

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-16 22:25:12 +01:00

494 lines
18 KiB
Python

"""
End-to-End Tests for COACHING Mode
Tests the complete OBSERVATION -> COACHING -> AUTO workflow:
1. Start in OBSERVATION mode (record user actions)
2. Transition to COACHING mode (suggest actions, get user feedback)
3. Accumulate corrections in Correction Packs
4. Track metrics and determine readiness for AUTO mode
5. Transition to AUTO mode when confidence threshold is met
This test simulates the complete learning journey of a workflow.
"""
import pytest
import tempfile
import shutil
import time
from pathlib import Path
from datetime import datetime
from unittest.mock import MagicMock, patch
@pytest.fixture
def temp_storage():
"""Create temporary storage directories."""
temp_dir = tempfile.mkdtemp()
yield Path(temp_dir)
shutil.rmtree(temp_dir, ignore_errors=True)
@pytest.fixture
def coaching_persistence(temp_storage):
"""Create coaching persistence with temp storage."""
from core.coaching import CoachingSessionPersistence
return CoachingSessionPersistence(temp_storage / 'coaching_sessions')
@pytest.fixture
def correction_service(temp_storage):
"""Create correction pack service with temp storage."""
from core.corrections import CorrectionPackService
return CorrectionPackService(storage_path=temp_storage / 'correction_packs')
@pytest.fixture
def metrics_collector(coaching_persistence):
"""Create metrics collector."""
from core.coaching import CoachingMetricsCollector
return CoachingMetricsCollector(coaching_persistence)
class TestCoachingE2E:
"""End-to-end tests for the complete COACHING workflow."""
def test_complete_learning_journey(
self,
coaching_persistence,
correction_service,
metrics_collector
):
"""
Test the complete learning journey from OBSERVATION to AUTO.
Scenario:
1. Create workflow and start first COACHING session
2. Make decisions (mix of accept, correct, reject)
3. Corrections are captured in Correction Packs
4. Run multiple sessions to build confidence
5. Check metrics and readiness for AUTO
"""
workflow_id = "wf_e2e_test_001"
# =====================================================================
# Phase 1: First COACHING session - Learning phase
# =====================================================================
print("\n=== Phase 1: First COACHING Session ===")
session1 = coaching_persistence.create_session(
workflow_id=workflow_id,
execution_id="exec_001",
total_steps=5,
metadata={'phase': 'learning'}
)
# Simulate decisions with some corrections
from core.coaching.session_persistence import CoachingDecisionRecord
decisions_p1 = [
('accept', None),
('correct', {'target': {'id': 'new_btn'}}),
('accept', None),
('reject', None),
('accept', None),
]
for i, (decision, correction) in enumerate(decisions_p1):
record = CoachingDecisionRecord(
step_index=i,
node_id=f"node_{i+1}",
action_type='click',
decision=decision,
correction=correction,
feedback=f"Decision {i+1}"
)
session1.add_decision(record)
coaching_persistence.save_session(session1)
coaching_persistence.complete_session(session1.session_id, success=True)
# Verify session stats
session1_reloaded = coaching_persistence.load_session(session1.session_id)
assert session1_reloaded.stats['accepted'] == 3
assert session1_reloaded.stats['corrected'] == 1
assert session1_reloaded.stats['rejected'] == 1
print(f"Session 1 completed: {session1_reloaded.stats}")
# =====================================================================
# Phase 2: Multiple sessions to improve acceptance rate
# =====================================================================
print("\n=== Phase 2: Multiple Training Sessions ===")
# Session 2: Better acceptance after learning
session2 = coaching_persistence.create_session(
workflow_id=workflow_id,
execution_id="exec_002",
total_steps=5
)
# Most actions accepted now (corrections are working)
decisions_p2 = [
('accept', None),
('accept', None),
('accept', None),
('accept', None),
('correct', {'target': {'text': 'Submit'}}),
]
for i, (decision, correction) in enumerate(decisions_p2):
record = CoachingDecisionRecord(
step_index=i,
node_id=f"node_{i+1}",
action_type='click',
decision=decision,
correction=correction
)
session2.add_decision(record)
coaching_persistence.save_session(session2)
coaching_persistence.complete_session(session2.session_id, success=True)
print(f"Session 2 completed: {session2.stats}")
# Sessions 3-5: High acceptance rate
for sess_num in range(3, 6):
session = coaching_persistence.create_session(
workflow_id=workflow_id,
execution_id=f"exec_{sess_num:03d}",
total_steps=5
)
# All accepted
for i in range(5):
record = CoachingDecisionRecord(
step_index=i,
node_id=f"node_{i+1}",
action_type='click',
decision='accept'
)
session.add_decision(record)
coaching_persistence.save_session(session)
coaching_persistence.complete_session(session.session_id, success=True)
print(f"Session {sess_num} completed: all accepted")
# =====================================================================
# Phase 3: Check Metrics and Learning Progress
# =====================================================================
print("\n=== Phase 3: Checking Metrics ===")
metrics = metrics_collector.get_workflow_metrics(workflow_id)
print(f"Total sessions: {metrics.total_sessions}")
print(f"Total decisions: {metrics.total_decisions}")
print(f"Acceptance rate: {metrics.acceptance_rate:.2%}")
print(f"Correction rate: {metrics.correction_rate:.2%}")
print(f"Confidence score: {metrics.confidence_score:.2f}")
print(f"Learning progress: {metrics.learning_progress.value}")
print(f"Ready for AUTO: {metrics.ready_for_auto}")
print(f"Recommendations: {metrics.recommendations}")
# Assertions
assert metrics.total_sessions == 5
assert metrics.total_decisions == 25
assert metrics.acceptance_rate > 0.8 # Should be high after training
assert metrics.correction_rate < 0.15 # Should be low
# =====================================================================
# Phase 4: Verify Readiness for AUTO
# =====================================================================
print("\n=== Phase 4: AUTO Mode Readiness ===")
# The workflow should be ready for AUTO after successful training
assert metrics.ready_for_auto, "Workflow should be ready for AUTO mode"
assert metrics.learning_progress.value in ['ready', 'autonomous']
print("SUCCESS: Workflow is ready for autonomous execution!")
def test_session_persistence_and_recovery(self, coaching_persistence):
"""
Test that COACHING sessions can be paused and resumed.
"""
print("\n=== Testing Session Persistence ===")
workflow_id = "wf_persistence_test"
# Create and partially complete a session
session = coaching_persistence.create_session(
workflow_id=workflow_id,
execution_id="exec_persist",
total_steps=10
)
from core.coaching.session_persistence import CoachingDecisionRecord
# Add 3 decisions
for i in range(3):
record = CoachingDecisionRecord(
step_index=i,
node_id=f"node_{i+1}",
action_type='click',
decision='accept'
)
session.add_decision(record)
coaching_persistence.save_session(session)
# Pause the session
coaching_persistence.pause_session(session.session_id)
# Verify paused
loaded = coaching_persistence.load_session(session.session_id)
assert loaded.status.value == 'paused'
assert len(loaded.decisions) == 3
assert loaded.current_step_index == 3
# Resume the session
resumed = coaching_persistence.resume_session(session.session_id)
assert resumed.status.value == 'active'
assert resumed.can_resume() is True
# Continue adding decisions
for i in range(3, 6):
record = CoachingDecisionRecord(
step_index=i,
node_id=f"node_{i+1}",
action_type='click',
decision='accept'
)
resumed.add_decision(record)
coaching_persistence.save_session(resumed)
# Verify continuation
final = coaching_persistence.load_session(session.session_id)
assert len(final.decisions) == 6
assert final.current_step_index == 6
print("SUCCESS: Session persistence and recovery works correctly!")
def test_correction_integration_with_coaching(
self,
coaching_persistence,
correction_service
):
"""
Test that COACHING corrections integrate with Correction Packs.
"""
print("\n=== Testing Correction Integration ===")
from core.corrections import CorrectionPackIntegration
# Create integration
integration = CorrectionPackIntegration(
service=correction_service,
auto_create_pack=True
)
workflow_id = "wf_correction_test"
# Create COACHING session
session = coaching_persistence.create_session(
workflow_id=workflow_id,
execution_id="exec_correction",
total_steps=5
)
from core.coaching.session_persistence import CoachingDecisionRecord
# Simulate corrections
corrections_made = [
{
'action_type': 'click',
'element_type': 'button',
'failure_reason': 'element_not_found',
'correction_type': 'target_change',
'original_target': {'text': 'OK'},
'corrected_target': {'text': 'Valider'}
},
{
'action_type': 'type',
'element_type': 'input',
'failure_reason': 'wrong_field',
'correction_type': 'target_change',
'original_target': {'id': 'email'},
'corrected_target': {'name': 'user_email'}
}
]
# Add decisions with corrections
for i, correction_data in enumerate(corrections_made):
record = CoachingDecisionRecord(
step_index=i,
node_id=f"node_{i+1}",
action_type=correction_data['action_type'],
decision='correct',
correction=correction_data
)
session.add_decision(record)
# Capture correction in Correction Pack
integration.capture_correction(
correction_data=correction_data,
session_id=session.session_id,
workflow_id=workflow_id
)
coaching_persistence.save_session(session)
coaching_persistence.complete_session(session.session_id, success=True)
# Verify corrections captured in pack
pack = correction_service.get_pack(integration._default_pack_id)
corrections_list = pack.get('corrections') if isinstance(pack, dict) else pack.corrections
assert len(corrections_list) == 2
print(f"Captured {len(corrections_list)} corrections in Correction Pack")
print("SUCCESS: Corrections integrated correctly!")
def test_metrics_threshold_for_auto_mode(self, coaching_persistence, metrics_collector):
"""
Test that metrics correctly determine AUTO mode readiness.
"""
print("\n=== Testing AUTO Mode Threshold ===")
from core.coaching.session_persistence import CoachingDecisionRecord
workflow_id = "wf_threshold_test"
# Test case 1: Below threshold (too few sessions)
session = coaching_persistence.create_session(
workflow_id=workflow_id,
execution_id="exec_001",
total_steps=5
)
for i in range(5):
record = CoachingDecisionRecord(
step_index=i,
node_id=f"node_{i+1}",
action_type='click',
decision='accept'
)
session.add_decision(record)
coaching_persistence.save_session(session)
coaching_persistence.complete_session(session.session_id, success=True)
metrics = metrics_collector.get_workflow_metrics(workflow_id)
assert not metrics.ready_for_auto, "Should not be ready with only 1 session"
# Test case 2: Meet minimum sessions
for sess_num in range(2, 6):
session = coaching_persistence.create_session(
workflow_id=workflow_id,
execution_id=f"exec_{sess_num:03d}",
total_steps=5
)
for i in range(5):
record = CoachingDecisionRecord(
step_index=i,
node_id=f"node_{i+1}",
action_type='click',
decision='accept'
)
session.add_decision(record)
coaching_persistence.save_session(session)
coaching_persistence.complete_session(session.session_id, success=True)
metrics = metrics_collector.get_workflow_metrics(workflow_id)
print(f"After 5 sessions - Acceptance: {metrics.acceptance_rate:.2%}, Ready: {metrics.ready_for_auto}")
assert metrics.ready_for_auto, "Should be ready after 5 sessions with high acceptance"
print("SUCCESS: Threshold calculation works correctly!")
def test_global_metrics_aggregation(self, coaching_persistence, metrics_collector):
"""
Test global metrics aggregation across multiple workflows.
"""
print("\n=== Testing Global Metrics ===")
from core.coaching.session_persistence import CoachingDecisionRecord
# Create sessions for multiple workflows
workflows = ["wf_global_1", "wf_global_2", "wf_global_3"]
for wf_id in workflows:
for sess_num in range(3):
session = coaching_persistence.create_session(
workflow_id=wf_id,
execution_id=f"exec_{wf_id}_{sess_num}",
total_steps=3
)
for i in range(3):
decision = 'accept' if i != 1 else 'correct'
record = CoachingDecisionRecord(
step_index=i,
node_id=f"node_{i+1}",
action_type='click',
decision=decision
)
session.add_decision(record)
coaching_persistence.save_session(session)
coaching_persistence.complete_session(session.session_id, success=True)
# Get global metrics
global_metrics = metrics_collector.get_global_metrics()
print(f"Total workflows: {global_metrics.total_workflows}")
print(f"Total sessions: {global_metrics.total_sessions}")
print(f"Total decisions: {global_metrics.total_decisions}")
print(f"Acceptance rate: {global_metrics.overall_acceptance_rate:.2%}")
assert global_metrics.total_workflows == 3
assert global_metrics.total_sessions == 9 # 3 workflows x 3 sessions
assert global_metrics.total_decisions == 27 # 9 sessions x 3 decisions
print("SUCCESS: Global metrics aggregation works correctly!")
class TestCoachingAPIIntegration:
"""Tests for COACHING API integration."""
def test_api_session_lifecycle(self, coaching_persistence):
"""Test session lifecycle through persistence layer (API simulation)."""
print("\n=== Testing API Session Lifecycle ===")
from core.coaching.session_persistence import CoachingDecisionRecord
# Create session (simulating POST /api/coaching-sessions)
session = coaching_persistence.create_session(
workflow_id="wf_api_test",
execution_id="exec_api",
total_steps=3
)
assert session.session_id is not None
# Add decision (simulating POST /api/coaching-sessions/{id}/decisions)
record = CoachingDecisionRecord(
step_index=0,
node_id="node_1",
action_type="click",
decision="accept"
)
session.add_decision(record)
coaching_persistence.save_session(session)
# Get session (simulating GET /api/coaching-sessions/{id})
loaded = coaching_persistence.load_session(session.session_id)
assert loaded is not None
assert len(loaded.decisions) == 1
# Complete session (simulating POST /api/coaching-sessions/{id}/complete)
completed = coaching_persistence.complete_session(session.session_id, success=True)
assert completed.status.value == 'completed'
print("SUCCESS: API session lifecycle works correctly!")
if __name__ == '__main__':
pytest.main([__file__, '-v', '-s'])