Files
rpa_vision_v3/tests/unit/test_chat_interface.py
Dom f541bb8ce4 feat: Léa chat + IRBuilder enrichi (stratégies V4 complètes)
Aspect 2/4 Léa : interface conversationnelle
- chat_interface.py : ChatSession thread-safe, états idle/planning/awaiting/executing/done
- 5 endpoints REST : /api/v1/chat/* (session, message, history, confirm, sessions)
- web_dashboard/chat.html + chat.js : UI minimaliste, polling 2s, pas de framework
- Proxy Flask /api/chat/* → serveur streaming
- 34 tests (happy path, abandon, refus, erreurs, gemma4 down)

IRBuilder enrichi pour plans V4 complets
- _event_to_action() appelle enrich_click_from_screenshot() quand session_dir dispo
- Chaque clic porte _enrichment (by_text OCR, anchor_image_base64, vlm_description)
- ExecutionCompiler consomme l'enrichissement pour produire 3 stratégies par clic
  Avant : [ocr] uniquement, target="unknown_window"
  Après : [ocr, template, vlm] avec vrai texte OCR ("Rechercher", "Ouvrir")

Validé sur session réelle : 10/10 clics enrichis (by_text + anchor + vlm_description)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-10 09:01:13 +02:00

442 lines
16 KiB
Python

# tests/unit/test_chat_interface.py
"""
Tests unitaires du module chat_interface (Léa conversationnelle).
Vérifie :
1. Création de session (état initial, message d'accueil)
2. Envoi de message → appel TaskPlanner mocké
3. Historique (get_history)
4. Transitions d'états idle → planning → awaiting_confirmation → executing → done
5. Abandon (utilisateur répond "non")
6. Fallback gracieux quand gemma4/TaskPlanner indisponible
7. ChatManager (création, listing, cleanup)
"""
from __future__ import annotations
import sys
import time
from pathlib import Path
from unittest.mock import MagicMock
import pytest
_ROOT = str(Path(__file__).resolve().parents[2])
if _ROOT not in sys.path:
sys.path.insert(0, _ROOT)
from agent_v0.server_v1.chat_interface import (
ChatSession,
ChatManager,
STATE_IDLE,
STATE_PLANNING,
STATE_AWAITING_CONFIRMATION,
STATE_EXECUTING,
STATE_DONE,
STATE_ERROR,
ROLE_USER,
ROLE_LEA,
)
from agent_v0.server_v1.task_planner import TaskPlan
# =============================================================================
# Fixtures
# =============================================================================
@pytest.fixture
def sample_workflows():
return [
{
"session_id": "sess_bloc_notes",
"name": "Bloc-notes",
"description": "Ouvrir Bloc-notes via Exécuter (Win+R) et écrire du texte",
},
]
@pytest.fixture
def understood_plan():
"""TaskPlan qui a compris l'ordre et matche un workflow."""
return TaskPlan(
instruction="ouvre le bloc-notes et écris bonjour",
understood=True,
workflow_match="sess_bloc_notes",
workflow_name="Bloc-notes",
match_confidence=0.9,
parameters={"texte": "bonjour"},
is_loop=False,
mode="replay",
)
@pytest.fixture
def unknown_plan():
"""TaskPlan qui n'a pas compris."""
return TaskPlan(
instruction="fais le café",
understood=False,
error="aucun workflow ne correspond",
)
@pytest.fixture
def mock_task_planner(understood_plan):
planner = MagicMock()
planner.understand.return_value = understood_plan
return planner
@pytest.fixture
def mock_replay_callback():
return MagicMock(return_value="replay_abc123")
@pytest.fixture
def mock_status_provider():
"""Retourne un dict par défaut 'running' — peut être modifié dans les tests."""
return MagicMock(return_value={
"status": "running",
"completed_actions": 1,
"total_actions": 5,
})
@pytest.fixture
def session(mock_task_planner, sample_workflows, mock_replay_callback, mock_status_provider):
return ChatSession(
task_planner=mock_task_planner,
workflows_provider=lambda: sample_workflows,
replay_callback=mock_replay_callback,
status_provider=mock_status_provider,
)
# =============================================================================
# Tests création session
# =============================================================================
class TestSessionCreation:
def test_session_id_generated(self):
s = ChatSession()
assert s.session_id.startswith("chat_")
def test_initial_state_is_idle(self):
s = ChatSession()
assert s.state == STATE_IDLE
def test_welcome_message_present(self):
s = ChatSession()
history = s.get_history()
assert len(history) == 1
assert history[0]["role"] == ROLE_LEA
assert "Bonjour" in history[0]["content"] or "Léa" in history[0]["content"]
def test_session_id_custom(self):
s = ChatSession(session_id="custom_42")
assert s.session_id == "custom_42"
# =============================================================================
# Tests send_message
# =============================================================================
class TestSendMessage:
def test_empty_message_rejected(self, session):
result = session.send_message("")
assert result["ok"] is False
def test_send_message_calls_planner(self, session, mock_task_planner):
session.send_message("ouvre le bloc-notes")
mock_task_planner.understand.assert_called_once()
call = mock_task_planner.understand.call_args
assert call.kwargs["instruction"] == "ouvre le bloc-notes"
# workflows_provider a été appelé et passé
assert "available_workflows" in call.kwargs
assert len(call.kwargs["available_workflows"]) == 1
def test_send_message_transitions_to_awaiting_confirmation(self, session):
result = session.send_message("ouvre le bloc-notes")
assert result["ok"] is True
assert session.state == STATE_AWAITING_CONFIRMATION
assert result["state"] == STATE_AWAITING_CONFIRMATION
def test_user_message_added_to_history(self, session):
session.send_message("ouvre le bloc-notes")
history = session.get_history()
user_msgs = [m for m in history if m["role"] == ROLE_USER]
assert len(user_msgs) == 1
assert user_msgs[0]["content"] == "ouvre le bloc-notes"
def test_lea_proposal_added_to_history(self, session):
session.send_message("ouvre le bloc-notes")
history = session.get_history()
lea_msgs = [m for m in history if m["role"] == ROLE_LEA]
# Bienvenue + proposition
assert len(lea_msgs) == 2
proposal = lea_msgs[-1]["content"]
assert "Bloc-notes" in proposal
assert "oui" in proposal.lower() or "y aller" in proposal.lower()
def test_proposal_contains_confidence(self, session):
session.send_message("ouvre le bloc-notes")
history = session.get_history()
proposal = history[-1]["content"]
# 0.9 → 90%
assert "90" in proposal
def test_proposal_contains_parameters(self, session):
session.send_message("ouvre le bloc-notes")
history = session.get_history()
proposal = history[-1]["content"]
assert "texte" in proposal
assert "bonjour" in proposal
# =============================================================================
# Tests get_history
# =============================================================================
class TestGetHistory:
def test_history_returns_list_of_dicts(self, session):
history = session.get_history()
assert isinstance(history, list)
assert all(isinstance(m, dict) for m in history)
def test_history_message_structure(self, session):
history = session.get_history()
msg = history[0]
assert "role" in msg
assert "content" in msg
assert "timestamp" in msg
assert "meta" in msg
def test_history_grows_with_messages(self, session):
initial = len(session.get_history())
session.send_message("ouvre le bloc-notes")
assert len(session.get_history()) > initial
# =============================================================================
# Tests transitions d'états
# =============================================================================
class TestStateTransitions:
def test_full_happy_path(self, session, mock_task_planner, mock_replay_callback):
"""idle → planning → awaiting_confirmation → executing → done."""
# Départ : idle
assert session.state == STATE_IDLE
# Envoi message → planning → awaiting_confirmation
session.send_message("ouvre le bloc-notes")
assert session.state == STATE_AWAITING_CONFIRMATION
# Confirmation → executing
result = session.confirm(confirmed=True)
assert result["ok"] is True
assert session.state == STATE_EXECUTING
mock_replay_callback.assert_called_once()
call = mock_replay_callback.call_args
assert call.kwargs["session_id"] == "sess_bloc_notes"
# Simulation : replay terminé → done
session._status_provider.return_value = {
"status": "done",
"completed_actions": 5,
"total_actions": 5,
}
session.refresh_progress()
assert session.state == STATE_DONE
def test_confirm_via_message_oui(self, session, mock_replay_callback):
"""Le TIM peut répondre 'oui' en message au lieu d'un bouton."""
session.send_message("ouvre le bloc-notes")
assert session.state == STATE_AWAITING_CONFIRMATION
session.send_message("oui")
assert session.state == STATE_EXECUTING
mock_replay_callback.assert_called_once()
def test_refusal_via_confirm_false(self, session, mock_replay_callback):
"""confirm(False) → retour à idle, pas d'exécution."""
session.send_message("ouvre le bloc-notes")
result = session.confirm(confirmed=False)
assert result["ok"] is True
assert result["confirmed"] is False
assert session.state == STATE_IDLE
mock_replay_callback.assert_not_called()
def test_refusal_via_message_non(self, session, mock_replay_callback):
"""Le TIM répond 'non' → annulation."""
session.send_message("ouvre le bloc-notes")
session.send_message("non")
assert session.state == STATE_IDLE
mock_replay_callback.assert_not_called()
# Le message d'annulation doit être dans l'historique
history = session.get_history()
assert any("annule" in m["content"].lower() for m in history)
def test_ambiguous_confirmation_reply(self, session):
"""Réponse ambiguë pendant awaiting_confirmation → demande de clarification."""
session.send_message("ouvre le bloc-notes")
result = session.send_message("peut-être")
assert session.state == STATE_AWAITING_CONFIRMATION
assert result.get("needs_clarification") is True
def test_failed_replay_transitions_to_error(self, session):
"""replay_callback lève une exception → état error."""
session._replay_callback = MagicMock(side_effect=RuntimeError("boom"))
session.send_message("ouvre le bloc-notes")
result = session.confirm(confirmed=True)
assert result["ok"] is False
assert session.state == STATE_ERROR
def test_replay_failure_from_status(self, session):
"""Le replay rapporte 'failed' → état error."""
session.send_message("ouvre le bloc-notes")
session.confirm(confirmed=True)
assert session.state == STATE_EXECUTING
session._status_provider.return_value = {
"status": "failed",
"error": "element introuvable",
}
session.refresh_progress()
assert session.state == STATE_ERROR
# =============================================================================
# Tests fallback / résilience
# =============================================================================
class TestResilience:
def test_no_task_planner_graceful(self):
"""Sans TaskPlanner, on reste gracieux."""
s = ChatSession(task_planner=None)
result = s.send_message("test")
assert result["ok"] is False
assert s.state == STATE_ERROR
# Message d'erreur présent dans l'historique
history = s.get_history()
assert any("désolée" in m["content"].lower() or "indisponible" in m["content"].lower()
for m in history)
def test_task_planner_exception_graceful(self, mock_replay_callback):
"""TaskPlanner lève une exception (gemma4 down) → état error propre."""
planner = MagicMock()
planner.understand.side_effect = RuntimeError("gemma4 offline")
s = ChatSession(
task_planner=planner,
workflows_provider=lambda: [],
replay_callback=mock_replay_callback,
)
result = s.send_message("test")
assert result["ok"] is False
assert s.state == STATE_ERROR
def test_instruction_not_understood(self, unknown_plan, mock_replay_callback):
"""Plan.understood = False → message d'erreur explicite."""
planner = MagicMock()
planner.understand.return_value = unknown_plan
s = ChatSession(
task_planner=planner,
workflows_provider=lambda: [],
replay_callback=mock_replay_callback,
)
result = s.send_message("fais le café")
assert result["ok"] is False
assert s.state == STATE_ERROR
history = s.get_history()
assert any("reformuler" in m["content"].lower() for m in history)
def test_no_replay_callback(self, mock_task_planner, sample_workflows):
"""Sans replay_callback, on refuse l'exécution proprement."""
s = ChatSession(
task_planner=mock_task_planner,
workflows_provider=lambda: sample_workflows,
replay_callback=None,
)
s.send_message("ouvre le bloc-notes")
result = s.confirm(confirmed=True)
assert result["ok"] is False
assert s.state == STATE_ERROR
# =============================================================================
# Tests snapshot
# =============================================================================
class TestSnapshot:
def test_snapshot_structure(self, session):
snap = session.get_snapshot()
assert "session_id" in snap
assert "state" in snap
assert "messages" in snap
assert "pending_plan" in snap
assert "active_replay_id" in snap
assert "progress" in snap
def test_snapshot_includes_pending_plan_when_awaiting(self, session):
session.send_message("ouvre le bloc-notes")
snap = session.get_snapshot()
assert snap["state"] == STATE_AWAITING_CONFIRMATION
assert snap["pending_plan"] is not None
assert snap["pending_plan"]["workflow_name"] == "Bloc-notes"
def test_snapshot_no_pending_plan_in_idle(self, session):
snap = session.get_snapshot()
assert snap["pending_plan"] is None
# =============================================================================
# Tests ChatManager
# =============================================================================
class TestChatManager:
def test_create_session(self, mock_task_planner, sample_workflows):
mgr = ChatManager(
task_planner=mock_task_planner,
workflows_provider=lambda: sample_workflows,
)
s = mgr.create_session()
assert s is not None
assert s.session_id in [x["session_id"] for x in mgr.list_sessions()]
def test_get_session(self, mock_task_planner):
mgr = ChatManager(task_planner=mock_task_planner)
s = mgr.create_session()
retrieved = mgr.get_session(s.session_id)
assert retrieved is s
def test_get_session_not_found(self):
mgr = ChatManager()
assert mgr.get_session("unknown") is None
def test_delete_session(self, mock_task_planner):
mgr = ChatManager(task_planner=mock_task_planner)
s = mgr.create_session()
assert mgr.delete_session(s.session_id) is True
assert mgr.get_session(s.session_id) is None
def test_cleanup_old_sessions(self, mock_task_planner):
mgr = ChatManager(task_planner=mock_task_planner)
s = mgr.create_session()
# Simuler une session très ancienne
s.updated_at = time.time() - 100000
removed = mgr.cleanup_old(max_age_s=3600)
assert removed == 1
assert mgr.get_session(s.session_id) is None
def test_list_sessions_structure(self, mock_task_planner):
mgr = ChatManager(task_planner=mock_task_planner)
mgr.create_session(machine_id="pc-01")
sessions = mgr.list_sessions()
assert len(sessions) == 1
s = sessions[0]
assert "session_id" in s
assert "state" in s
assert "machine_id" in s
assert s["machine_id"] == "pc-01"