feat(agent): add learn action flow and grounding guards

This commit is contained in:
Dom
2026-06-02 16:24:10 +02:00
parent 86b3c8f7e7
commit d38f0b0f2f
39 changed files with 5901 additions and 212 deletions

View File

@@ -83,9 +83,24 @@ app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024 # 50 MB max upload (sécuri
_ALLOWED_ORIGINS = [
"http://localhost:3002",
"http://localhost:5002",
"http://localhost:5004",
"https://vwb.labs.laurinebazin.design",
"https://lea.labs.laurinebazin.design",
# LAN local : serveur Linux (192.168.1.40) + Léa Windows (192.168.1.11).
# Sans ces origines, engineio rejette la ChatWindow tkinter Windows et
# même les requêtes self-loopback (cf. journal 2026-05-24 11:00:47).
"http://192.168.1.40:5004",
"http://192.168.1.40:5005",
"http://192.168.1.11:5004",
"http://192.168.1.11:5005",
]
# Override possible via LEA_CORS_ALLOWED_ORIGINS=comma,separated,list pour
# environnements non-LAN. Vide ou absent → garde la liste par défaut ci-dessus.
_extra_origins = os.environ.get("LEA_CORS_ALLOWED_ORIGINS", "").strip()
if _extra_origins:
_ALLOWED_ORIGINS.extend(
o.strip() for o in _extra_origins.split(",") if o.strip()
)
socketio = SocketIO(app, cors_allowed_origins=_ALLOWED_ORIGINS)
@@ -199,6 +214,9 @@ _pending_imports: Dict[str, Dict[str, Any]] = {}
# Copilot state — suivi du mode pas-à-pas
_copilot_sessions: Dict[str, Dict[str, Any]] = {}
# LearnActionOrchestrator — P1-LEA SHADOW (apprentissage Léa-first)
learn_action_orchestrator = None # injecté par init_system()
_COPILOT_KEYWORDS = [
"copilot", "co-pilot",
"pas à pas", "pas-à-pas", "pas a pas",
@@ -278,8 +296,24 @@ def init_system():
if EXECUTION_AVAILABLE:
try:
# Pipeline de workflow (matching + actions)
workflow_pipeline = WorkflowPipeline()
logger.info("✓ WorkflowPipeline initialisé")
# Depuis C1c 2026-05-25 : désactiver UI detection (OWL/VLM côté
# UIDetector via DetectionConfig) par défaut pour économiser
# ~900 MiB VRAM au boot du chat service. Le chemin SocketIO 5004
# / narration ChatWindow / ExecutionLoop n'utilise pas
# workflow_pipeline.ui_detector (grep confirmé). Activation
# explicite : AGENT_CHAT_ENABLE_UI_DETECTION=1.
_ui_detection_enabled = os.environ.get(
"AGENT_CHAT_ENABLE_UI_DETECTION", "0"
).strip() in ("1", "true", "yes")
workflow_pipeline = WorkflowPipeline(
enable_ui_detection=_ui_detection_enabled,
enable_vlm=_ui_detection_enabled,
)
logger.info(
f"✓ WorkflowPipeline initialisé "
f"(ui_detection={_ui_detection_enabled}, "
f"économie ~900 MiB VRAM si False)"
)
# Capture d'écran
screen_capturer = ScreenCapturer()
@@ -356,6 +390,26 @@ def init_system():
else:
logger.info(" Import Excel non disponible (openpyxl manquant ?)")
# 8. LearnActionOrchestrator (P1-LEA SHADOW) — apprentissage Léa-first
global learn_action_orchestrator
try:
from .handlers.learn_action import get_learn_action_orchestrator
def _learn_emit(event: str, payload: Dict[str, Any]) -> None:
try:
socketio.emit(event, payload)
except Exception:
logger.debug("learn emit silenced", exc_info=True)
learn_action_orchestrator = get_learn_action_orchestrator(emit=_learn_emit)
resumed = learn_action_orchestrator.resume_sessions()
logger.info(
f"✓ LearnActionOrchestrator initialisé (sessions reprises: {len(resumed)})"
)
except Exception as e:
logger.warning(f"⚠ LearnActionOrchestrator: {e}")
learn_action_orchestrator = None
# =============================================================================
# Routes Web
@@ -768,6 +822,24 @@ def api_chat():
if not message:
return jsonify({"error": "Message vide"}), 400
# 0. Routage P1-LEA : si une session d'apprentissage est active pour ce
# session_id, l'orchestrateur traite le message ; sinon on tombe sur le
# flux normal (intent_parser / matcher / confirmation).
if learn_action_orchestrator is not None and session_id:
try:
learn_reply = learn_action_orchestrator.handle_chat_message(
session_id, message
)
except Exception:
logger.exception("learn_action_orchestrator error")
learn_reply = None
if learn_reply is not None:
return jsonify({
"session_id": session_id,
"response": learn_reply,
"handler": "learn_action",
})
# 1. Obtenir ou créer la session
session = conversation_manager.get_or_create_session(session_id=session_id)
@@ -1834,7 +1906,13 @@ def _poll_replay_progress(replay_id: str, workflow_name: str, total_actions: int
"completed": completed,
"total": total_actions,
"failed_action": data.get("failed_action"),
"reason": data.get("error") or "Action incertaine",
"reason": (
data.get("pause_message")
or data.get("message")
or data.get("error")
or "Action incertaine"
),
"safety_checks": data.get("safety_checks") or [],
})
was_paused = True
elapsed = 0
@@ -2713,6 +2791,72 @@ def urgences_list():
return jsonify({"orchestrations": list_orchestrations()})
# =============================================================================
# P1-LEA SHADOW — déclenchement d'apprentissage depuis l'extérieur
# =============================================================================
@app.route('/api/learn/start', methods=['POST'])
def api_learn_start():
"""Déclenche une session d'apprentissage Léa-first.
Endpoint utilisé par le bouton Windows (ChatWindow tkinter) ou tout autre
client externe pour démarrer le cycle Shadow → Persist côté agent-chat.
Payload JSON :
- machine_id (str, obligatoire) : identifiant de la machine où
l'apprentissage est en cours (sera repris pour le persist).
- session_name (str | None, optionnel) : nom d'affichage de la
session (ignoré pour l'instant — réservé futur).
- user_id (str | None, optionnel) : défaut "default".
- trigger_source (str, optionnel) : défaut "windows_button".
Utilisé pour distinguer du "magic_phrase" ou "proactive".
Retours :
- 200 : {"session_id": str, "state": str, "message": str}
- 400 : machine_id absent ou vide
- 503 : orchestrateur non initialisé (init_system pas appelé)
- 500 : exception interne (shadow_start, état illégal, etc.)
Auth/CORS : suit le pattern des autres routes API du module (pas d'auth
Flask explicite — l'API est en LAN derrière le reverse proxy /
SocketIO cors_allowed_origins).
"""
if learn_action_orchestrator is None:
return jsonify({
"error": "LearnActionOrchestrator non initialisé",
}), 503
data = request.get_json(silent=True) or {}
machine_id = (data.get("machine_id") or "").strip()
if not machine_id:
return jsonify({
"error": "machine_id requis (str non vide)",
}), 400
user_id = (data.get("user_id") or "default").strip() or "default"
trigger_source = (data.get("trigger_source") or "windows_button").strip() or "windows_button"
# session_name reçu mais non utilisé pour l'instant (réservé futur)
_session_name = data.get("session_name")
try:
st, reply = learn_action_orchestrator.start_session(
user_id=user_id,
trigger_source=trigger_source,
machine_id=machine_id,
)
except Exception as exc:
logger.exception("api_learn_start failed")
return jsonify({
"error": f"démarrage apprentissage impossible: {exc}",
}), 500
return jsonify({
"session_id": st.session_id,
"state": st.state.value if hasattr(st.state, "value") else str(st.state),
"message": reply,
})
# =============================================================================
# Main
# =============================================================================

View File

@@ -137,11 +137,31 @@ class AutonomousPlanner:
logger.info(f"AutonomousPlanner initialized (LLM: {self.llm_model}, available: {self.llm_available}, visual: {self._owl_detector is not None}, vlm: {self._vlm_client is not None})")
def _init_visual_detection(self):
"""Initialise le détecteur visuel OWL-v2."""
"""Initialise le détecteur visuel OWL-v2.
Désactivé par défaut depuis 2026-05-25 (C1b) : OWL-v2 chargeait sur
CUDA au boot et retenait ~600 MiB VRAM même en cas d'OOM silencieux,
fausssant les benchs perf et contribuant à l'offload Ollama VLM.
Comme `autonomous_planner` est largement non-wired au runtime actif
(cf. mémoire projet : HTTP 410 dépréciés), le défaut est skip.
Activation : `AGENT_CHAT_ENABLE_OWL=1` (env var).
Device : `AGENT_CHAT_OWL_DEVICE=cuda|cpu` (override l'auto-détect).
"""
if os.environ.get("AGENT_CHAT_ENABLE_OWL", "0").strip() not in ("1", "true", "yes"):
logger.info(
"OWL-v2 visual detector skipped at boot "
"(AGENT_CHAT_ENABLE_OWL!=1, économie ~600 MiB VRAM)"
)
return
if VISUAL_DETECTION_AVAILABLE and OwlDetector:
try:
self._owl_detector = OwlDetector(confidence_threshold=0.1)
logger.info("OWL-v2 visual detector initialized")
device = os.environ.get("AGENT_CHAT_OWL_DEVICE", "").strip() or None
self._owl_detector = OwlDetector(
confidence_threshold=0.1,
device=device,
)
logger.info(f"OWL-v2 visual detector initialized (device={device or 'auto'})")
except Exception as e:
logger.warning(f"Could not initialize OWL detector: {e}")
self._owl_detector = None

View File

@@ -0,0 +1,29 @@
"""Agent-chat handlers package.
Contient les orchestrateurs spécialisés (apprentissage Léa, etc.) appelés
par `agent_chat.app` quand le routage normal d'intent ne suffit pas.
"""
from .learn_action import (
LearnActionOrchestrator,
LearnState,
LearnIntent,
LearnIntentParser,
OptionCFormatter,
StreamingClient,
StateStore,
PersistPayloadBuilder,
get_learn_action_orchestrator,
)
__all__ = [
"LearnActionOrchestrator",
"LearnState",
"LearnIntent",
"LearnIntentParser",
"OptionCFormatter",
"StreamingClient",
"StateStore",
"PersistPayloadBuilder",
"get_learn_action_orchestrator",
]

File diff suppressed because it is too large Load Diff

View File

@@ -56,6 +56,13 @@ OLLAMA_HOST = os.getenv("RPA_OLLAMA_HOST", "localhost")
# Configurable via variable d'environnement RPA_API_TOKEN
API_TOKEN = os.environ.get("RPA_API_TOKEN", "")
# --- Orchestrateur Léa-first (agent-chat Linux) ---
# Endpoint racine du service agent-chat qui héberge POST /api/learn/start
# (P1-LEA-SHADOW). Configurable via RPA_AGENT_CHAT_URL.
# Défaut : localhost:5004 (même machine en dev). En POC clinique, doit
# pointer vers le DGX Spark (ex. http://agent-chat.dgx-local:5004).
AGENT_CHAT_URL = os.environ.get("RPA_AGENT_CHAT_URL", "http://localhost:5004")
# Paramètres de session
MAX_SESSION_DURATION_S = 60 * 60 # 1 heure
SESSIONS_ROOT = BASE_DIR / "sessions"

View File

@@ -56,6 +56,8 @@ class EventCaptorV1:
# État des touches modificatrices
self.modifiers = set()
self._pending_standalone_win = False
self._suppress_release_only_win_combo = False
# Tracking du focus fenêtre
self.last_window = None
@@ -327,6 +329,56 @@ class EventCaptorV1:
return {"kind": "key", "name": key.name}
return {"kind": "unknown", "str": str(key)}
@staticmethod
def _raw_key_name(raw_key: Dict[str, Any]) -> Optional[str]:
"""Nom lisible depuis un raw_key sérialisé."""
if raw_key.get("kind") == "vk":
char = raw_key.get("char")
if char and len(str(char)) == 1:
return str(char).lower()
if raw_key.get("kind") == "key":
name = raw_key.get("name")
return str(name).lower() if name else None
return None
def _emit_release_only_windows_combo(self) -> bool:
"""Infère Win+<touche> si Windows/NoMachine n'a livré que les releases.
Certaines sessions ne remontent pas les press de Win+S via pynput,
mais livrent ensuite release('s') puis release('cmd'). Sans cette
inférence ciblée, le geste système est perdu et les releases polluent
le prochain text_input.
"""
with self._text_lock:
raw_keys = list(self._raw_key_buffer)
if len(raw_keys) < 2:
return False
cmd_names = {"cmd", "cmd_l", "cmd_r"}
last = raw_keys[-1]
if last.get("action") != "release" or self._raw_key_name(last) not in cmd_names:
return False
combo_key = None
for raw in reversed(raw_keys[:-1]):
if raw.get("action") != "release":
continue
name = self._raw_key_name(raw)
if name and name not in self._MODIFIER_KEY_NAMES:
combo_key = name
break
if not combo_key:
return False
self._raw_key_buffer.clear()
event = {
"type": "key_combo",
"keys": ["win", combo_key],
"raw_keys": raw_keys,
"timestamp": time.time(),
}
self._inject_screen_metadata(event)
self.on_event(event)
return True
def _on_press(self, key):
# TOUJOURS enregistrer le press brut dans le buffer raw_keys
with self._text_lock:
@@ -344,6 +396,7 @@ class EventCaptorV1:
self.modifiers.add("shift")
elif key in (Key.cmd, Key.cmd_l, Key.cmd_r):
self.modifiers.add("win")
self._pending_standalone_win = True
# --- Combos avec modificateur (sauf Shift seul) ---
# Shift seul n'est pas un « vrai » modificateur pour les combos :
@@ -369,6 +422,9 @@ class EventCaptorV1:
# Ne PAS émettre de combo si c'est un modificateur seul
# (ex: appui sur Ctrl sans autre touche = pas de combo)
if key_name and key_name not in self._MODIFIER_KEY_NAMES:
self._pending_standalone_win = False
if "win" in self.modifiers:
self._suppress_release_only_win_combo = True
# Un combo interrompt la saisie texte en cours
self._flush_text_buffer()
# Attacher les raw_keys accumulés (press des modificateurs + press de la touche)
@@ -400,6 +456,7 @@ class EventCaptorV1:
- Enter / Tab : flush immédiat + émission de l'événement
- Escape : vide le buffer sans émettre
"""
escape_raw_keys = None
with self._text_lock:
# --- Touches spéciales ---
if key == Key.backspace:
@@ -411,12 +468,14 @@ class EventCaptorV1:
if key == Key.esc:
# Annuler la saisie en cours
self._text_buffer.clear()
self._raw_key_buffer.clear()
self._text_start_pos = None
self._cancel_flush_timer()
return
escape_raw_keys = list(self._raw_key_buffer)
self._raw_key_buffer.clear()
# Émettre hors lock après le bloc critique.
pass
if key in (Key.enter, Key.tab):
elif key in (Key.enter, Key.tab):
# Flush immédiat — on relâche le lock avant d'appeler
# _flush_text_buffer (qui prend aussi le lock)
pass # on sort du with et on flush après
@@ -454,6 +513,18 @@ class EventCaptorV1:
# Touche spéciale non gérée (F1, Insert, etc.) — on ignore
return
if escape_raw_keys is not None:
event = {
"type": "key_combo",
"keys": ["escape"],
"timestamp": time.time(),
}
if escape_raw_keys:
event["raw_keys"] = escape_raw_keys
self._inject_screen_metadata(event)
self.on_event(event)
return
# Si on arrive ici, c'est Enter ou Tab → flush le buffer en cours
# puis émettre le caractère spécial comme text_input séparé
self._flush_text_buffer()
@@ -551,6 +622,35 @@ class EventCaptorV1:
**self._encode_key(key),
})
if key in (Key.cmd, Key.cmd_l, Key.cmd_r) and self._suppress_release_only_win_combo:
with self._text_lock:
self._raw_key_buffer.clear()
self._pending_standalone_win = False
self._suppress_release_only_win_combo = False
self.modifiers.discard("win")
return
if key in (Key.cmd, Key.cmd_l, Key.cmd_r) and self._emit_release_only_windows_combo():
self._pending_standalone_win = False
self._suppress_release_only_win_combo = False
self.modifiers.discard("win")
return
if key in (Key.cmd, Key.cmd_l, Key.cmd_r) and self._pending_standalone_win:
with self._text_lock:
raw_keys = list(self._raw_key_buffer)
self._raw_key_buffer.clear()
event = {
"type": "key_combo",
"keys": ["win"],
"raw_keys": raw_keys,
"timestamp": time.time(),
}
self._inject_screen_metadata(event)
self.on_event(event)
self._pending_standalone_win = False
self._suppress_release_only_win_combo = False
if key in (Key.ctrl, Key.ctrl_l, Key.ctrl_r):
self.modifiers.discard("ctrl")
elif key in (Key.alt, Key.alt_l, Key.alt_r):
@@ -559,6 +659,8 @@ class EventCaptorV1:
self.modifiers.discard("shift")
elif key in (Key.cmd, Key.cmd_l, Key.cmd_r):
self.modifiers.discard("win")
self._pending_standalone_win = False
self._suppress_release_only_win_combo = False
# ----------------------------------------------------------------
# Métadonnées système

File diff suppressed because it is too large Load Diff

View File

@@ -84,6 +84,15 @@ class GroundingEngine:
if by_role in {"start_button"}:
return False
has_anchor = bool(target_spec.get("anchor_image_base64"))
context_hints = target_spec.get("context_hints") or {}
has_window_or_text_hint = any(
str(target_spec.get(key, "") or "").strip()
for key in ("window_title", "by_text", "vlm_description")
) or bool(str(context_hints.get("window_title", "") or "").strip())
if has_anchor and not has_window_or_text_hint and not by_role:
return False
return True
@staticmethod
@@ -174,6 +183,26 @@ class GroundingEngine:
hints.append(variant)
return hints
@staticmethod
def _server_rejects_text_fallback(raw: Optional[Dict[str, Any]]) -> bool:
"""Dire si un rejet serveur doit bloquer le fallback texte local.
Un rejet explicite n'est pas un simple "non trouvé": le serveur a vu
un candidat et l'a refusé pour une raison de qualité/zone. Refaire une
recherche OCR large côté client contournerait ce garde-fou.
"""
if not raw or raw.get("resolved"):
return False
reason = str(raw.get("reason") or "")
method = str(raw.get("method") or "")
return (
method.startswith("rejected_")
or reason.startswith("close_tab_")
or reason.startswith("drift_")
or "below_threshold" in reason
)
def _window_crop_matches_target_visually(
self,
screenshot_b64: str,
@@ -331,11 +360,31 @@ class GroundingEngine:
cap_w = window_rect["width"] if window_rect else screen_width
cap_h = window_rect["height"] if window_rect else screen_height
skip_text_fallback_after_server_reject = False
for strategy in strategies:
if (
strategy == "vlm_local"
and skip_text_fallback_after_server_reject
and target_spec.get("by_text")
):
by_text = target_spec.get("by_text", "")
logger.info(
"[GROUNDING] Rejet serveur explicite pour '%s'"
"skip fallback local hybrid_text_direct",
by_text,
)
print(
f" [GROUNDING] Rejet serveur explicite pour '{by_text}' "
"→ pas de fallback texte local"
)
continue
result = self._try_strategy(
strategy, server_url, screenshot_b64, target_spec,
fallback_x, fallback_y, cap_w, cap_h,
)
if strategy == "server" and self._server_rejects_text_fallback(result.raw):
skip_text_fallback_after_server_reject = True
if result.found:
# ── Conversion coords fenêtre → coords écran ──
if window_rect:
@@ -429,6 +478,14 @@ class GroundingEngine:
detail=raw.get("matched_element", {}).get("label", ""),
raw=raw,
)
if raw:
return GroundingResult(
found=False,
method=raw.get("method", "server"),
score=raw.get("score", 0.0),
detail=raw.get("reason", "server: pas trouvé"),
raw=raw,
)
elif strategy == "template":
anchor_b64 = target_spec.get("anchor_image_base64", "")

View File

@@ -121,10 +121,7 @@ class AgentV1:
# Wiring ChatWindow → Executor pour Plan B (pause_message → bulle interactive)
# Permet à l'executor d'afficher une bulle paused dans la fenêtre Léa V1
# quand le serveur signale replay_paused=True via /replay/next.
try:
self._executor._chat_window_ref = self._chat_window
except Exception:
logger.debug("Wiring chat_window→executor échoué (non bloquant)", exc_info=True)
self._wire_chat_window_to_executor()
# Boucles permanentes (pas besoin de session active)
self.running = True
@@ -154,6 +151,15 @@ class AgentV1:
shared_state=self._state,
)
def _wire_chat_window_to_executor(self) -> None:
"""Relie l'executor courant à la ChatWindow pour les pauses supervisees."""
if self._executor is None or self._chat_window is None:
return
try:
self._executor._chat_window_ref = self._chat_window
except Exception:
logger.debug("Wiring chat_window->executor echoue (non bloquant)", exc_info=True)
def _delayed_cleanup(self):
"""Nettoyage en arrière-plan après 30s pour ne pas bloquer le démarrage."""
time.sleep(30)
@@ -224,6 +230,7 @@ class AgentV1:
# Initialiser l'executeur partage
self._executor = ActionExecutorV1()
self._wire_chat_window_to_executor()
self.shot_counter = 0
self.running = True

View File

@@ -0,0 +1,147 @@
"""
Client HTTP minimal pour l'orchestrateur Léa-first (agent-chat Linux).
Rebranchement P1-LEA-SHADOW : le bouton "Apprenez-moi" côté Windows déclenche
la création d'une session d'apprentissage côté agent-chat (REST) AVANT de
lancer la capture locale. Le pipeline streaming (capture frames/événements
via start_recording) n'est PAS modifié — seule la prise de contact initiale
avec Léa change.
Contrat :
POST {AGENT_CHAT_URL}/api/learn/start
Headers : Authorization: Bearer <RPA_API_TOKEN>, Content-Type: application/json
Body : { machine_id, session_name, user_id?, trigger_source }
Réponse : { session_id, state, message }
Politique :
- Timeout 10s (connect + read)
- Retry x2 avec backoff 0.5s puis 1.0s
- En cas d'échec définitif : lève LeaOrchestratorError (le caller doit
basculer en mode dégradé : start_recording local sans assistance).
"""
from __future__ import annotations
import logging
import time
from dataclasses import dataclass
from typing import Optional
logger = logging.getLogger(__name__)
# Timeout HTTP (connect + read) — 10s comme spec
_HTTP_TIMEOUT_S = 10.0
# Nombre de tentatives totales (1 + 2 retry)
_MAX_ATTEMPTS = 3
# Backoff progressif entre les tentatives
_BACKOFF_S = (0.5, 1.0)
@dataclass(frozen=True)
class LearnStartResponse:
"""Réponse normalisée de POST /api/learn/start."""
session_id: str
state: str
message: str
class LeaOrchestratorError(RuntimeError):
"""Erreur définitive de communication avec l'orchestrateur Léa."""
def start_learning_session(
base_url: str,
*,
machine_id: str,
session_name: str,
api_token: str = "",
user_id: Optional[str] = None,
trigger_source: str = "windows_button",
timeout_s: float = _HTTP_TIMEOUT_S,
max_attempts: int = _MAX_ATTEMPTS,
backoff_s: tuple = _BACKOFF_S,
) -> LearnStartResponse:
"""Démarre une session d'apprentissage via l'orchestrateur agent-chat.
Args:
base_url: URL racine de l'agent-chat (ex. http://localhost:5004).
machine_id: Identifiant unique du poste Windows.
session_name: Nom humain de la tâche (saisi par l'utilisateur).
api_token: Bearer token (RPA_API_TOKEN). Vide => header omis.
user_id: Identifiant utilisateur optionnel.
trigger_source: Source du déclenchement (windows_button, tray, ...).
timeout_s: Timeout total connect+read par tentative.
max_attempts: Nombre total de tentatives (1 + retry).
backoff_s: Tuple des délais en secondes entre tentatives (len = max_attempts-1).
Returns:
LearnStartResponse normalisée.
Raises:
LeaOrchestratorError: si toutes les tentatives échouent.
"""
# Import local : httpx peut ne pas être installé sur tous les postes
# Windows historiques. On veut un message d'erreur clair plutôt qu'un
# ImportError en chaîne au moment du clic bouton.
try:
import httpx
except ImportError as exc: # pragma: no cover (dépend du venv)
raise LeaOrchestratorError(
"httpx non disponible — installer httpx>=0.27 sur le poste Windows."
) from exc
url = base_url.rstrip("/") + "/api/learn/start"
payload = {
"machine_id": machine_id,
"session_name": session_name,
"trigger_source": trigger_source,
}
if user_id:
payload["user_id"] = user_id
headers = {"Content-Type": "application/json"}
if api_token:
headers["Authorization"] = f"Bearer {api_token}"
last_exc: Optional[Exception] = None
for attempt in range(max_attempts):
try:
logger.info(
"POST %s (tentative %d/%d) machine_id=%s session=%s",
url, attempt + 1, max_attempts, machine_id, session_name,
)
with httpx.Client(timeout=timeout_s) as client:
resp = client.post(url, json=payload, headers=headers)
resp.raise_for_status()
data = resp.json()
session_id = data.get("session_id", "")
state = data.get("state", "")
message = data.get("message", "")
if not session_id:
raise LeaOrchestratorError(
f"Réponse invalide (pas de session_id) : {data!r}"
)
logger.info(
"Session Léa démarrée : session_id=%s state=%s",
session_id, state,
)
return LearnStartResponse(
session_id=str(session_id),
state=str(state),
message=str(message),
)
except Exception as exc: # noqa: BLE001 — on retry sur toute erreur réseau/HTTP
last_exc = exc
logger.warning(
"Echec tentative %d/%d POST %s : %s",
attempt + 1, max_attempts, url, exc,
)
if attempt < max_attempts - 1:
delay = backoff_s[attempt] if attempt < len(backoff_s) else backoff_s[-1]
time.sleep(delay)
raise LeaOrchestratorError(
f"Echec définitif POST {url} après {max_attempts} tentatives : {last_exc}"
)

View File

@@ -63,8 +63,14 @@ JPEG_QUALITY = 85
# Taille max de la queue (backpressure)
QUEUE_MAX_SIZE = 100
# Types d'événements à ne jamais dropper
PRIORITY_EVENT_TYPES = {"click", "key", "scroll", "action", "screenshot"}
# Types d'événements à ne jamais dropper.
# Les noms historiques sont conservés, mais les événements réels du captor
# Agent V1 sont mouse_click/key_combo/text_input/mouse_scroll.
PRIORITY_EVENT_TYPES = {
"click", "key", "scroll", "action", "screenshot",
"mouse_click", "double_click", "key_combo", "key_press",
"text_input", "mouse_scroll",
}
# Purge locale après ACK serveur (Partie A de l'audit)
# Activé par défaut : le serveur conserve déjà les screenshots 180 jours

View File

@@ -9,6 +9,7 @@ Tourne dans son propre thread daemon pour ne pas bloquer pystray.
import logging
import os
import math
import threading
import time
from datetime import datetime
@@ -121,7 +122,7 @@ def _tpl_done(payload: Dict[str, Any]) -> tuple:
def _tpl_need_confirm(payload: Dict[str, Any]) -> tuple:
action = payload.get("action") or {}
desc = action.get("description") if isinstance(action, dict) else None
title = desc or "Validation requise"
title = desc or "J'attends ton accord avant de continuer"
return ("?", ACTION_ICON_RUN, str(title))
@@ -867,11 +868,19 @@ class ChatWindow:
pass
except Exception:
logger.debug("force-show chat_window silenced", exc_info=True)
# UX fix mai 2026 : repartir d'un chat vide pour focaliser
# l'attention sur la question (clear visuel uniquement,
# self._messages reste intact pour la traçabilité debug).
self._clear_chat_history()
self._render_paused_bubble(payload)
try:
# UX fix mai 2026 : repartir d'un chat vide pour focaliser
# l'attention sur la question (clear visuel uniquement,
# self._messages reste intact pour la traçabilité debug).
self._clear_chat_history()
self._render_paused_bubble(payload)
except Exception:
logger.exception("render paused bubble failed; using fallback")
try:
self._clear_chat_history()
self._render_paused_fallback_bubble(payload)
except Exception:
logger.debug("render paused fallback silenced", exc_info=True)
self._root.after(0, _show_and_render)
@@ -895,7 +904,11 @@ class ChatWindow:
logger.debug("clear chat history silenced", exc_info=True)
@staticmethod
def _compute_paused_bubble_height(reason_str: str) -> tuple:
def _compute_paused_bubble_height(
reason_str: str,
chars_per_line: int = 52,
max_rows: int = 14,
) -> tuple:
"""Calcule la hauteur du Text (en lignes) + si une scrollbar est
nécessaire pour le message d'une bulle paused.
@@ -910,11 +923,11 @@ class ChatWindow:
if not reason_str:
return 2, False
text = str(reason_str)
# Estimation : ~60 chars/ligne effectifs avec wraplength.
wrapped_lines = (len(text) // 60) + 1
explicit_lines = text.count("\n") + 1
estimated = max(wrapped_lines, explicit_lines)
cap = 12
chars_per_line = max(24, int(chars_per_line or 52))
estimated = 0
for raw_line in text.splitlines() or [""]:
estimated += max(1, math.ceil(len(raw_line) / chars_per_line))
cap = max(2, int(max_rows or 14))
height = max(2, min(cap, estimated))
# Scrollbar dès que le cap est atteint OU contenu long (filet
# textuel : ≥ 200 chars implique souvent un débordement visuel
@@ -922,6 +935,46 @@ class ChatWindow:
needs_scroll = (estimated >= cap) or (len(text) > 200)
return height, needs_scroll
def _paused_text_layout(self) -> tuple:
"""Retourne ``(wrap_px, chars_per_line, max_rows)`` pour la bulle pause.
La fenêtre Léa est souvent redimensionnée à ~380px de large sur le
poste Windows. Les anciennes estimations fixes calculaient trop peu
de lignes et tronquaient le message. On part donc des dimensions
réelles du canvas et de la métrique de la police Tk.
"""
canvas_w = 0
canvas_h = 0
try:
canvas_w = int(self._canvas.winfo_width()) if self._canvas is not None else 0
canvas_h = int(self._canvas.winfo_height()) if self._canvas is not None else 0
except Exception:
canvas_w = canvas_h = 0
# Marges: container + padding inner + petite marge droite. La bulle
# de pause est une alerte critique, elle utilise donc presque toute
# la largeur disponible sur les fenêtres étroites.
wrap_px = max(220, canvas_w - (2 * MARGIN) - 52) if canvas_w else 360
avg_char = 8
line_px = 22
try:
from tkinter import font as tkfont
font = tkfont.Font(font=FONT_MSG)
avg_char = max(6, font.measure("n"))
line_px = max(18, font.metrics("linespace"))
except Exception:
pass
chars_per_line = max(24, int(wrap_px / avg_char))
# Réserver titre, metadata, boutons, feedback et padding. Même sur
# une petite fenêtre, on garde assez de lignes pour ne pas couper un
# message d'erreur standard.
max_rows = 14
if canvas_h:
max_rows = max(5, min(18, int((canvas_h - 145) / line_px)))
return wrap_px, chars_per_line, max_rows
def _render_paused_bubble(self, payload: Dict[str, Any]) -> None:
tk = self._tk
if getattr(self, "_msg_frame", None) is None:
@@ -941,7 +994,7 @@ class ChatWindow:
container, bg=PAUSED_BG, padx=14, pady=12,
highlightbackground=PAUSED_BORDER, highlightthickness=2,
)
inner.pack(anchor=tk.W, padx=(0, 50), fill=tk.X)
inner.pack(anchor=tk.W, padx=(0, 12), fill=tk.X)
tk.Label(
inner, text=f"⏸ Pause supervisée • {now}",
@@ -949,31 +1002,44 @@ class ChatWindow:
font=("Segoe UI", 12, "bold"), anchor="w",
).pack(fill=tk.X, anchor=tk.W)
# Message scrollable pour les longs reasons (ex: 200+ chars depuis le serveur).
# On utilise un Text en mode read-only avec hauteur calculée selon la longueur.
# Patch 22 mai 2026 : prendre en compte les \n explicites (titres
# fenêtre / patterns) et activer la scrollbar dès que le cap de
# hauteur est atteint — sinon les bulles de pause étaient
# tronquées visuellement sans aucun ascenseur visible.
# Message borné et scrollable : sur une fenêtre Léa étroite, une
# bulle trop haute fait disparaître le début du diagnostic hors du
# viewport. On garde donc la bulle compacte et on scrolle le texte.
reason_str = str(reason)
height_lines, needs_scroll = self._compute_paused_bubble_height(reason_str)
msg_frame = tk.Frame(inner, bg=PAUSED_BG)
msg_frame.pack(fill=tk.X, anchor=tk.W, pady=(6, 0))
reason_text = tk.Text(
msg_frame, bg=PAUSED_BG, fg=PAUSED_FG,
font=FONT_MSG, wrap=tk.WORD, bd=0, height=height_lines,
highlightthickness=0, relief=tk.FLAT, cursor="arrow",
_wrap_px, chars_per_line, max_rows = self._paused_text_layout()
text_rows, needs_text_scroll = self._compute_paused_bubble_height(
reason_str,
chars_per_line=chars_per_line,
max_rows=max_rows,
)
reason_text.insert("1.0", reason_str)
reason_text.configure(state="disabled")
reason_text.pack(side=tk.LEFT, fill=tk.X, expand=True)
if needs_scroll:
reason_scroll = tk.Scrollbar(
msg_frame, orient=tk.VERTICAL,
command=reason_text.yview, width=8,
text_frame = tk.Frame(inner, bg=PAUSED_BG)
text_frame.pack(fill=tk.X, anchor=tk.W, pady=(6, 0))
reason_msg = tk.Text(
text_frame,
height=text_rows,
wrap=tk.WORD,
bg=PAUSED_BG,
fg=PAUSED_FG,
font=FONT_MSG,
bd=0,
highlightthickness=0,
relief=tk.FLAT,
padx=0,
pady=0,
cursor="arrow",
)
reason_msg.insert("1.0", reason_str)
reason_msg.configure(state="disabled")
reason_msg.pack(side=tk.LEFT, fill=tk.X, expand=True)
if needs_text_scroll:
scrollbar = tk.Scrollbar(
text_frame,
orient=tk.VERTICAL,
command=reason_msg.yview,
width=12,
)
reason_text.configure(yscrollcommand=reason_scroll.set)
reason_scroll.pack(side=tk.RIGHT, fill=tk.Y)
reason_msg.configure(yscrollcommand=scrollbar.set)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y, padx=(6, 0))
tk.Label(
inner, text=f"{workflow} — étape {completed}/{total}",
@@ -1018,6 +1084,89 @@ class ChatWindow:
# Scroll automatique vers la nouvelle bulle (visible immédiatement)
self._scroll_to_bottom()
def _render_paused_fallback_bubble(self, payload: Dict[str, Any]) -> None:
"""Rendu minimal de secours si la bulle riche echoue."""
tk = self._tk
if getattr(self, "_msg_frame", None) is None:
return
replay_id = str(payload.get("replay_id", "") or "")
workflow = payload.get("workflow", "?")
reason = str(
payload.get("reason")
or "Action incertaine - j'ai besoin de votre validation."
)
completed = payload.get("completed", 0)
total = payload.get("total", "?")
now = datetime.now().strftime("%H:%M")
container = tk.Frame(self._msg_frame, bg=BG_COLOR)
container.pack(fill=tk.X, padx=MARGIN, pady=6)
inner = tk.Frame(
container, bg=PAUSED_BG, padx=14, pady=12,
highlightbackground=PAUSED_BORDER, highlightthickness=2,
)
inner.pack(anchor=tk.W, padx=(0, 12), fill=tk.X)
tk.Label(
inner, text=f"Pause supervisee - {now}",
bg=PAUSED_BG, fg=PAUSED_FG,
font=("Segoe UI", 12, "bold"), anchor="w",
).pack(fill=tk.X, anchor=tk.W)
wrap_px = 360
try:
if self._canvas is not None:
wrap_px = max(220, int(self._canvas.winfo_width()) - 80)
except Exception:
pass
tk.Label(
inner, text=reason, bg=PAUSED_BG, fg=PAUSED_FG,
font=FONT_MSG, wraplength=wrap_px, justify=tk.LEFT,
anchor=tk.W,
).pack(fill=tk.X, anchor=tk.W, pady=(6, 0))
tk.Label(
inner, text=f"{workflow} - etape {completed}/{total}",
bg=PAUSED_BG, fg=TIMESTAMP_FG, font=FONT_TIMESTAMP, anchor="w",
).pack(fill=tk.X, anchor=tk.W, pady=(4, 8))
btn_frame = tk.Frame(inner, bg=PAUSED_BG)
btn_frame.pack(fill=tk.X, anchor=tk.W)
btn_resume = tk.Button(
btn_frame, text="Continuer",
bg=PAUSED_BTN_RESUME_BG, fg="white", font=FONT_QUICK_BTN,
padx=14, pady=4, bd=0, cursor="hand2",
activebackground=PAUSED_BTN_RESUME_HOVER, activeforeground="white",
command=lambda: self._on_paused_resume(replay_id),
)
btn_resume.pack(side=tk.LEFT, padx=(0, 8))
btn_abort = tk.Button(
btn_frame, text="Annuler",
bg=PAUSED_BTN_ABORT_BG, fg="white", font=FONT_QUICK_BTN,
padx=14, pady=4, bd=0, cursor="hand2",
activebackground=PAUSED_BTN_ABORT_HOVER, activeforeground="white",
command=lambda: self._on_paused_abort(replay_id),
)
btn_abort.pack(side=tk.LEFT)
feedback_label = tk.Label(
inner, text="", bg=PAUSED_BG, fg=PAUSED_FG,
font=FONT_TIMESTAMP, anchor="w",
)
feedback_label.pack(fill=tk.X, anchor=tk.W, pady=(6, 0))
self._active_paused_bubble = {
"container": container, "inner": inner,
"btn_resume": btn_resume, "btn_abort": btn_abort,
"feedback_label": feedback_label,
"replay_id": replay_id,
}
self._scroll_to_bottom()
def _close_active_paused_bubble(self, reason: str) -> None:
if self._active_paused_bubble is None or self._root is None:
return
@@ -1524,8 +1673,19 @@ class ChatWindow:
self._add_lea_message(
f"C'est parti ! Montrez-moi comment faire \u00ab {name} \u00bb."
)
# --- P1-LEA-SHADOW : d\u00e9clencher d'abord l'orchestrateur L\u00e9a Linux ---
# On contacte agent-chat AVANT la capture locale : si la session
# serveur d\u00e9marre, on r\u00e9cup\u00e8re un session_id + un message d'accueil
# de L\u00e9a qu'on affiche dans le chat. Si \u00e9chec : mode d\u00e9grad\u00e9
# (capture locale uniquement, sans assistance conversationnelle).
self._start_lea_orchestrator_session(name)
# --- Comportement historique pr\u00e9serv\u00e9 : capture locale ---
# Le pipeline streaming (frames/\u00e9v\u00e9nements) reste pilot\u00e9 par
# agent_v1 local. L'orchestrateur Linux ne touche PAS \u00e0 la
# capture, il pilote uniquement le dialogue de fin de session.
try:
# Utiliser l'etat partage si disponible (synchronise le systray)
if self._shared_state is not None:
self._shared_state.start_recording(name)
elif self._on_start_callback is not None:
@@ -1533,6 +1693,60 @@ class ChatWindow:
except Exception as e:
self._add_lea_message(f"Oups, un probl\u00e8me : {e}")
def _start_lea_orchestrator_session(self, session_name: str) -> None:
"""Appelle POST /api/learn/start c\u00f4t\u00e9 agent-chat Linux (P1-LEA-SHADOW).
Fail-safe : toute erreur (config absente, httpx manquant, timeout,
500 serveur...) bascule en mode d\u00e9grad\u00e9 sans bloquer la capture
locale. Un message clair est affich\u00e9 dans le chat.
"""
try:
from ..config import AGENT_CHAT_URL, API_TOKEN, MACHINE_ID
from ..network.lea_orchestrator_client import (
LeaOrchestratorError,
start_learning_session,
)
except Exception as exc: # pragma: no cover (import-time)
logger.error("Impossible de charger le client orchestrateur L\u00e9a : %s", exc)
self._add_lea_message(
"\u26a0 Impossible de joindre L\u00e9a serveur. "
"L'apprentissage continue localement, mais sans assistance "
"conversationnelle."
)
return
try:
resp = start_learning_session(
AGENT_CHAT_URL,
machine_id=MACHINE_ID,
session_name=session_name,
api_token=API_TOKEN,
trigger_source="windows_button",
)
except LeaOrchestratorError as exc:
logger.error("Orchestrateur L\u00e9a injoignable : %s", exc)
self._add_lea_message(
"\u26a0 Impossible de joindre L\u00e9a serveur. "
"L'apprentissage continue localement, mais sans assistance "
"conversationnelle."
)
return
except Exception as exc: # noqa: BLE001 \u2014 d\u00e9fensif
logger.exception("Erreur inattendue orchestrateur L\u00e9a")
self._add_lea_message(
f"\u26a0 Erreur orchestrateur L\u00e9a : {exc}. "
"L'apprentissage continue localement."
)
return
# Affichage du message d'accueil renvoy\u00e9 par L\u00e9a (si pr\u00e9sent)
if resp.message:
self._add_lea_message(resp.message)
logger.info(
"Session orchestrateur L\u00e9a OK : id=%s state=%s",
resp.session_id, resp.state,
)
def _on_quick_tasks(self) -> None:
"""Bouton Lancer — demande ce que L\u00e9a sait faire."""
self._add_user_message("Qu'est-ce que vous savez faire ?")

View File

@@ -0,0 +1,484 @@
"""Contrat de lisibilite des messages visibles par l'humain.
Ce module ne branche encore aucun point runtime. Il fournit une brique pure et
testable pour que les sorties UI de Lea puissent refuser les messages trop
generiques ou trop techniques avant affichage.
"""
from __future__ import annotations
import logging
import re
import unicodedata
from dataclasses import dataclass
from typing import Iterable, Mapping
logger = logging.getLogger(__name__)
SUPERVISED_PAUSE_LABELS = (
"J'essaie de",
"J'attendais",
"Je vois",
"Peux-tu",
)
MAX_VISIBLE_MESSAGE_CHARS = 720
MAX_FIELD_CHARS = 180
MIN_FIELD_CHARS = 4
_GENERIC_PHRASES = (
"un element",
"un élément",
"l'element",
"l'élément",
"element inconnu",
"élément inconnu",
"cette action",
"cette cible",
"cible inconnue",
"validation requise",
"action requise",
)
_ACTIONABLE_FRENCH_HINTS = (
"peux-tu",
"cliquer",
"ouvrir",
"selectionner",
"sélectionner",
"choisir",
"saisir",
"corriger",
"montrer",
"indiquer",
"valider",
"fermer",
"placer",
"mettre",
"reprendre",
)
_TECHNICAL_ENGLISH_TERMS = (
"target_not_found",
"target not found",
"no_screen_change",
"no screen change",
"wrong_window",
"wrong window",
"validation required",
"retry",
"fallback",
"timeout",
"screenshot",
"validator",
"failure",
"failed",
"resolve target",
"postcondition",
"please",
"click",
"button",
"target",
"expected",
"actual",
"observed",
)
_TECHNICAL_FIELD_RE = re.compile(
r"\b(?:"
r"action_id|replay_id|session_id|workflow_id|machine_id|target_spec|"
r"vlm_description|resolution_method|resolution_score|retry_count|"
r"x_pct|y_pct|screenshot_b64|expected_window_title|current_action_index"
r")\b",
re.IGNORECASE,
)
_TECHNICAL_IDENTIFIER_RE = re.compile(
r"\b(?:action|replay|session|sess|workflow|node|edge|target|retry|"
r"precheck|wait|trace|event|machine|run)_[A-Za-z0-9][A-Za-z0-9_.:-]{3,}\b"
)
_UUID_RE = re.compile(
r"\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b",
re.IGNORECASE,
)
_LONG_HEX_RE = re.compile(r"\b[0-9a-f]{16,}\b", re.IGNORECASE)
_PIXEL_TUPLE_RE = re.compile(r"\(\s*\d{2,5}\s*,\s*\d{2,5}\s*\)")
_PIXEL_FIELD_RE = re.compile(
r"\b(?:x|y|left|top|width|height|w|h|x_pct|y_pct)\s*[=:]\s*-?\d+(?:[.,]\d+)?",
re.IGNORECASE,
)
_PX_RE = re.compile(r"\b\d{2,5}\s*px\b", re.IGNORECASE)
_SCORE_RE = re.compile(
r"\b(?:score|confidence|confiance|similarit[eé]|threshold|seuil|"
r"probabilit[eé])\s*[:=]\s*\d+(?:[.,]\d+)?%?\b",
re.IGNORECASE,
)
@dataclass(frozen=True)
class MessageValidationIssue:
"""Un probleme detecte dans un message visible par l'humain."""
code: str
detail: str
@dataclass(frozen=True)
class MessageValidationResult:
"""Resultat de validation d'un message utilisateur."""
issues: tuple[MessageValidationIssue, ...] = ()
@property
def valid(self) -> bool:
return not self.issues
def raise_for_errors(self) -> None:
if not self.valid:
raise MessageContractError(self)
class MessageContractError(ValueError):
"""Erreur levee quand un message ne respecte pas le contrat humain."""
def __init__(self, result: MessageValidationResult):
self.result = result
details = "; ".join(f"{issue.code}: {issue.detail}" for issue in result.issues)
super().__init__(f"Message humain invalide: {details}")
@dataclass(frozen=True)
class SupervisedPauseFields:
"""Champs obligatoires pour expliquer une pause supervisee."""
intention: str
attendu: str
vu: str
demande: str
DEFAULT_SUPERVISED_PAUSE_FIELDS = SupervisedPauseFields(
intention="continuer une etape supervisee",
attendu="un accord humain clair avant de continuer",
vu="je suis sur une etape qui demande une verification humaine",
demande="indiquer si je peux continuer ou corriger l'action attendue",
)
def format_supervised_pause_message(
*,
intention: str,
attendu: str,
vu: str,
demande: str,
) -> str:
"""Formatter une pause supervisee claire et actionnable.
Le message retourne exactement quatre lignes. Si un champ reste vague ou
technique, la fonction leve ``MessageContractError`` au lieu de produire un
message degradant pour l'utilisateur.
"""
fields = SupervisedPauseFields(
intention=_one_line(intention),
attendu=_one_line(attendu),
vu=_one_line(vu),
demande=_one_line(demande),
)
message = "\n".join(
(
f"J'essaie de : {fields.intention}",
f"J'attendais : {fields.attendu}",
f"Je vois : {fields.vu}",
f"Peux-tu : {fields.demande}",
)
)
validate_supervised_pause_message(message).raise_for_errors()
return message
def format_supervised_pause_from_mapping(payload: Mapping[str, object]) -> str:
"""Formatter depuis un mapping runtime avec noms de champs explicites.
Alias acceptes pour faciliter l'integration progressive:
``intention|trying_to``, ``attendu|expected``, ``vu|observed``,
``demande|request``.
"""
return format_supervised_pause_message(
intention=_mapping_text(payload, "intention", "trying_to"),
attendu=_mapping_text(payload, "attendu", "expected"),
vu=_mapping_text(payload, "vu", "observed"),
demande=_mapping_text(payload, "demande", "request"),
)
def coerce_supervised_pause_message(
message: object = "",
*,
intention: object = "",
attendu: object = "",
vu: object = "",
demande: object = "",
) -> str:
"""Retourner une pause supervisee valide, meme depuis un ancien message.
Si ``message`` respecte deja le contrat strict, il est conserve. Sinon on
compose les quatre champs avec les valeurs explicites disponibles. Les
valeurs trop vagues ou techniques sont remplacees par des fallbacks clairs.
"""
raw_message = _one_line(message)
if raw_message and validate_supervised_pause_message(raw_message).valid:
return raw_message
defaults = DEFAULT_SUPERVISED_PAUSE_FIELDS
candidates = SupervisedPauseFields(
intention=_safe_field_text(intention, defaults.intention),
attendu=_safe_field_text(attendu, defaults.attendu),
vu=_safe_field_text(vu, defaults.vu),
demande=_safe_field_text(demande or raw_message, defaults.demande),
)
try:
return format_supervised_pause_message(
intention=candidates.intention,
attendu=candidates.attendu,
vu=candidates.vu,
demande=candidates.demande,
)
except MessageContractError:
return format_supervised_pause_message(
intention=defaults.intention,
attendu=defaults.attendu,
vu=defaults.vu,
demande=defaults.demande,
)
def warn_visible_message(
message: object,
*,
source: str,
supervised_pause: bool = False,
) -> str:
"""Log contract violations without modifying the visible message."""
text = str(message or "")
validator = validate_supervised_pause_message if supervised_pause else validate_visible_message
result = validator(text)
if not result.valid:
logger.warning(
"[message_contract] invalid_message source=%s codes=%s",
source,
[issue.code for issue in result.issues],
)
return text
def validate_supervised_pause_message(message: str) -> MessageValidationResult:
"""Valider le contrat strict d'une pause supervisee."""
issues = list(validate_visible_message(message).issues)
fields, structure_issues = _parse_supervised_pause(message)
issues.extend(structure_issues)
if fields:
for name, value in fields.items():
if len(value) < MIN_FIELD_CHARS:
issues.append(
MessageValidationIssue(
"field_too_short",
f"{name} doit etre explicite",
)
)
if len(value) > MAX_FIELD_CHARS:
issues.append(
MessageValidationIssue(
"field_too_long",
f"{name} depasse {MAX_FIELD_CHARS} caracteres",
)
)
demande = fields.get("demande", "")
if not _contains_actionable_french(demande) or len(demande.split()) < 4:
issues.append(
MessageValidationIssue(
"not_actionable",
"la demande doit contenir une action concrete en francais",
)
)
return _dedupe_issues(issues)
def validate_visible_message(message: str) -> MessageValidationResult:
"""Valider qu'un message visible n'est ni generique ni technique."""
text = str(message or "").strip()
issues: list[MessageValidationIssue] = []
if not text:
return MessageValidationResult(
(MessageValidationIssue("empty_message", "message vide"),)
)
if len(text) > MAX_VISIBLE_MESSAGE_CHARS:
issues.append(
MessageValidationIssue(
"message_too_long",
f"message au-dela de {MAX_VISIBLE_MESSAGE_CHARS} caracteres",
)
)
folded = _fold(text)
seen_generic_phrases: set[str] = set()
for phrase in _GENERIC_PHRASES:
folded_phrase = _fold(phrase)
if folded_phrase in seen_generic_phrases:
continue
seen_generic_phrases.add(folded_phrase)
if folded_phrase in folded:
issues.append(
MessageValidationIssue(
"generic_phrase",
f"formulation trop generique: {phrase}",
)
)
for term in _TECHNICAL_ENGLISH_TERMS:
if _fold(term) in folded:
issues.append(
MessageValidationIssue(
"technical_english",
f"anglais technique visible: {term}",
)
)
for code, pattern, detail in (
("technical_field", _TECHNICAL_FIELD_RE, "champ technique brut"),
("technical_identifier", _TECHNICAL_IDENTIFIER_RE, "identifiant technique brut"),
("technical_identifier", _UUID_RE, "UUID brut"),
("technical_identifier", _LONG_HEX_RE, "hash technique brut"),
("raw_coordinates", _PIXEL_TUPLE_RE, "coordonnees pixel brutes"),
("raw_coordinates", _PIXEL_FIELD_RE, "coordonnees techniques brutes"),
("raw_coordinates", _PX_RE, "coordonnees pixel brutes"),
("raw_score", _SCORE_RE, "score ou confiance brut"),
):
if pattern.search(text):
issues.append(MessageValidationIssue(code, detail))
return _dedupe_issues(issues)
def is_valid_visible_message(message: str) -> bool:
"""Raccourci booleen pour les points d'integration UI."""
return validate_visible_message(message).valid
def is_valid_supervised_pause_message(message: str) -> bool:
"""Raccourci booleen pour les pauses supervisees."""
return validate_supervised_pause_message(message).valid
def _parse_supervised_pause(
message: str,
) -> tuple[dict[str, str], list[MessageValidationIssue]]:
lines = [line.rstrip() for line in str(message or "").splitlines() if line.strip()]
issues: list[MessageValidationIssue] = []
if len(lines) != 4:
issues.append(
MessageValidationIssue(
"invalid_structure",
"une pause supervisee doit contenir exactement 4 lignes",
)
)
return {}, issues
specs = (
("intention", r"^J'essaie de\s*:\s*(.+)$"),
("attendu", r"^J'attendais\s*:\s*(.+)$"),
("vu", r"^Je vois\s*:\s*(.+)$"),
("demande", r"^Peux-tu\s*:\s*(.+)$"),
)
fields: dict[str, str] = {}
for line, (name, pattern) in zip(lines, specs):
match = re.match(pattern, line)
if not match:
issues.append(
MessageValidationIssue(
"invalid_structure",
f"ligne {len(fields) + 1} doit commencer par {SUPERVISED_PAUSE_LABELS[len(fields)]}",
)
)
continue
fields[name] = match.group(1).strip()
if len(fields) != 4:
return {}, issues
return fields, issues
def _contains_actionable_french(text: str) -> bool:
folded = _fold(text)
return any(_fold(hint) in folded for hint in _ACTIONABLE_FRENCH_HINTS)
def _one_line(value: object) -> str:
return re.sub(r"\s+", " ", str(value or "")).strip()
def _mapping_text(payload: Mapping[str, object], *keys: str) -> str:
for key in keys:
value = payload.get(key)
if value is not None:
return str(value)
return ""
def _safe_field_text(value: object, fallback: str) -> str:
text = _one_line(value)
if len(text) < MIN_FIELD_CHARS or len(text) > MAX_FIELD_CHARS:
return fallback
if not validate_visible_message(text).valid:
return fallback
return text
def _fold(text: str) -> str:
normalized = unicodedata.normalize("NFKD", str(text or ""))
ascii_text = "".join(ch for ch in normalized if not unicodedata.combining(ch))
return ascii_text.casefold()
def _dedupe_issues(issues: Iterable[MessageValidationIssue]) -> MessageValidationResult:
seen: set[tuple[str, str]] = set()
deduped: list[MessageValidationIssue] = []
for issue in issues:
key = (issue.code, issue.detail)
if key in seen:
continue
seen.add(key)
deduped.append(issue)
return MessageValidationResult(tuple(deduped))
__all__ = [
"MAX_FIELD_CHARS",
"MAX_VISIBLE_MESSAGE_CHARS",
"MessageContractError",
"MessageValidationIssue",
"MessageValidationResult",
"SUPERVISED_PAUSE_LABELS",
"SupervisedPauseFields",
"coerce_supervised_pause_message",
"format_supervised_pause_from_mapping",
"format_supervised_pause_message",
"is_valid_supervised_pause_message",
"is_valid_visible_message",
"validate_supervised_pause_message",
"validate_visible_message",
"warn_visible_message",
]

View File

@@ -82,6 +82,12 @@ ICONE_PAR_NIVEAU: dict[NiveauMessage, str] = {
NiveauMessage.BLOCAGE: "?",
}
# Les pauses supervisees peuvent contenir une raison precise, parfois longue
# (fenetre observee, fenetre attendue, action en cours). On garde l'information
# utile et on laisse les widgets UI gerer le wrap/scroll.
MAX_TARGET_DESCRIPTION_CHARS = 1024
MAX_GENERIC_TECHNICAL_MESSAGE_CHARS = 1024
@dataclass
class MessageUtilisateur:
@@ -147,9 +153,9 @@ def _nettoyer_description_cible(description: str) -> str:
desc = description.strip()
# Retirer les guillemets encapsulants
desc = desc.strip("'\"`")
# Limiter la longueur
if len(desc) > 80:
desc = desc[:77] + "..."
# Limiter la longueur sans perdre les details utiles a la supervision.
if len(desc) > MAX_TARGET_DESCRIPTION_CHARS:
desc = desc[: MAX_TARGET_DESCRIPTION_CHARS - 3] + "..."
return desc
@@ -566,8 +572,8 @@ def formatter_erreur_generique(
# Fallback : message technique tronqué
msg_tronque = message_technique.strip()
if len(msg_tronque) > 120:
msg_tronque = msg_tronque[:117] + "..."
if len(msg_tronque) > MAX_GENERIC_TECHNICAL_MESSAGE_CHARS:
msg_tronque = msg_tronque[: MAX_GENERIC_TECHNICAL_MESSAGE_CHARS - 3] + "..."
return MessageUtilisateur(
niveau=NiveauMessage.ATTENTION,

View File

@@ -371,7 +371,13 @@ class SmartTrayV1:
)
if name and name.strip():
name = name.strip()
# Utiliser l'etat partage si disponible
# --- P1-LEA-SHADOW : d\u00e9clencher d'abord l'orchestrateur L\u00e9a Linux ---
# On contacte agent-chat AVANT la capture locale. Si \u00e9chec,
# bascule en mode d\u00e9grad\u00e9 (capture locale sans assistance).
self._start_lea_orchestrator_session(name)
# --- Comportement historique pr\u00e9serv\u00e9 : capture locale ---
if self._shared_state is not None:
try:
self._shared_state.start_recording(name)
@@ -393,6 +399,55 @@ class SmartTrayV1:
threading.Thread(target=_dialog, daemon=True).start()
def _start_lea_orchestrator_session(self, session_name: str) -> None:
"""Appelle POST /api/learn/start côté agent-chat Linux (P1-LEA-SHADOW).
Fail-safe : toute erreur (config absente, httpx manquant, timeout,
5xx serveur...) bascule en mode dégradé sans bloquer la capture
locale. L'utilisateur est informé via le NotificationManager.
"""
try:
from ..config import AGENT_CHAT_URL, API_TOKEN, MACHINE_ID
from ..network.lea_orchestrator_client import (
LeaOrchestratorError,
start_learning_session,
)
except Exception as exc: # pragma: no cover (import-time)
logger.error("Impossible de charger le client orchestrateur Léa : %s", exc)
self._notifier.notify(
"Léa",
"Serveur injoignable — apprentissage local uniquement.",
)
return
try:
resp = start_learning_session(
AGENT_CHAT_URL,
machine_id=MACHINE_ID,
session_name=session_name,
api_token=API_TOKEN,
trigger_source="tray_button",
)
except LeaOrchestratorError as exc:
logger.error("Orchestrateur Léa injoignable : %s", exc)
self._notifier.notify(
"Léa",
"Serveur injoignable — apprentissage local uniquement.",
)
return
except Exception: # noqa: BLE001 — défensif
logger.exception("Erreur inattendue orchestrateur Léa")
self._notifier.notify(
"Léa",
"Erreur orchestrateur — apprentissage local uniquement.",
)
return
logger.info(
"Session orchestrateur Léa OK : id=%s state=%s",
resp.session_id, resp.state,
)
def _on_stop_session(self, _icon=None, _item=None) -> None:
"""Termine la session en cours et envoie les donnees."""
count = self.actions_count

View File

@@ -43,6 +43,9 @@ class EventCaptorV1:
# État des touches modificatrices
self.modifiers = set()
self._pending_standalone_win = False
self._suppress_release_only_win_combo = False
self._raw_key_buffer: List[Dict[str, Any]] = []
# Tracking du focus fenêtre
self.last_window = None
@@ -91,6 +94,7 @@ class EventCaptorV1:
# Flush du buffer texte restant avant arrêt
self._flush_text_buffer()
# Annuler le timer s'il est en cours
emit_escape = False
with self._text_lock:
if self._text_flush_timer is not None:
self._text_flush_timer.cancel()
@@ -159,7 +163,80 @@ class EventCaptorV1:
# Clavier
# ----------------------------------------------------------------
@staticmethod
def _get_key_name(key) -> Optional[str]:
"""Convertit un objet pynput Key/KeyCode en nom lisible."""
if isinstance(key, KeyCode):
return key.char if key.char else None
if isinstance(key, Key):
return key.name
return str(key)
@staticmethod
def _encode_key(key) -> Dict[str, Any]:
if isinstance(key, KeyCode):
return {"kind": "vk", "vk": key.vk, "char": key.char}
if isinstance(key, Key):
return {"kind": "key", "name": key.name}
return {"kind": "unknown", "str": str(key)}
@staticmethod
def _raw_key_name(raw_key: Dict[str, Any]) -> Optional[str]:
if raw_key.get("kind") == "vk":
char = raw_key.get("char")
if char and len(str(char)) == 1:
return str(char).lower()
if raw_key.get("kind") == "key":
name = raw_key.get("name")
return str(name).lower() if name else None
return None
def _emit_release_only_windows_combo(self) -> bool:
"""Infère Win+<touche> quand seuls les releases sont capturés."""
with self._text_lock:
raw_keys = list(getattr(self, "_raw_key_buffer", []))
if len(raw_keys) < 2:
return False
cmd_names = {"cmd", "cmd_l", "cmd_r"}
last = raw_keys[-1]
if last.get("action") != "release" or self._raw_key_name(last) not in cmd_names:
return False
combo_key = None
modifier_names = {
"ctrl", "ctrl_l", "ctrl_r",
"alt", "alt_l", "alt_r",
"shift", "shift_l", "shift_r",
"cmd", "cmd_l", "cmd_r",
}
for raw in reversed(raw_keys[:-1]):
if raw.get("action") != "release":
continue
name = self._raw_key_name(raw)
if name and name not in modifier_names:
combo_key = name
break
if not combo_key:
return False
self._raw_key_buffer.clear()
event = {
"type": "key_combo",
"keys": ["win", combo_key],
"raw_keys": raw_keys,
"timestamp": time.time(),
}
self.on_event(event)
return True
def _on_press(self, key):
with self._text_lock:
if not hasattr(self, "_raw_key_buffer"):
self._raw_key_buffer = []
self._raw_key_buffer.append({
"action": "press",
**self._encode_key(key),
})
# Gestion des touches modificatrices
if key in (Key.ctrl, Key.ctrl_l, Key.ctrl_r):
self.modifiers.add("ctrl")
@@ -167,15 +244,26 @@ class EventCaptorV1:
self.modifiers.add("alt")
elif key in (Key.shift, Key.shift_l, Key.shift_r):
self.modifiers.add("shift")
elif key in (Key.cmd, Key.cmd_l, Key.cmd_r):
self.modifiers.add("win")
self._pending_standalone_win = True
# --- Combos avec modificateur (sauf Shift seul) ---
# Shift seul n'est pas un « vrai » modificateur pour les combos :
# Shift+a = 'A' = saisie texte, pas un raccourci.
# On considère un combo seulement si Ctrl ou Alt est enfoncé.
has_real_modifier = self.modifiers & {"ctrl", "alt"}
# On considère un combo seulement si Ctrl, Alt ou Win est enfoncé.
has_real_modifier = self.modifiers & {"ctrl", "alt", "win"}
if has_real_modifier:
key_name = self._get_key_name(key)
if key_name and key_name not in ("ctrl", "alt", "shift"):
if key_name and key_name not in (
"ctrl", "ctrl_l", "ctrl_r",
"alt", "alt_l", "alt_r",
"shift", "shift_l", "shift_r",
"cmd", "cmd_l", "cmd_r",
):
self._pending_standalone_win = False
if "win" in self.modifiers:
self._suppress_release_only_win_combo = True
# Un combo interrompt la saisie texte en cours
self._flush_text_buffer()
event = {
@@ -205,14 +293,18 @@ class EventCaptorV1:
self._reset_flush_timer()
return
if key == Key.escape:
escape_keys = [Key.esc]
key_escape = getattr(Key, "escape", None)
if key_escape is not None:
escape_keys.append(key_escape)
if key in escape_keys:
# Annuler la saisie en cours
self._text_buffer.clear()
self._text_start_pos = None
self._cancel_flush_timer()
return
emit_escape = True
if key in (Key.enter, Key.tab):
elif key in (Key.enter, Key.tab):
# Flush immédiat — on relâche le lock avant d'appeler
# _flush_text_buffer (qui prend aussi le lock)
pass # on sort du with et on flush après
@@ -238,6 +330,15 @@ class EventCaptorV1:
# Touche spéciale non gérée (F1, Insert, etc.) — on ignore
return
if emit_escape:
event = {
"type": "key_combo",
"keys": ["escape"],
"timestamp": time.time(),
}
self.on_event(event)
return
# Si on arrive ici, c'est Enter ou Tab → flush immédiat
self._flush_text_buffer()
@@ -290,12 +391,46 @@ class EventCaptorV1:
self.on_event(event)
def _on_release(self, key):
with self._text_lock:
self._raw_key_buffer.append({
"action": "release",
**self._encode_key(key),
})
if key in (Key.cmd, Key.cmd_l, Key.cmd_r) and self._suppress_release_only_win_combo:
with self._text_lock:
self._raw_key_buffer.clear()
self._pending_standalone_win = False
self._suppress_release_only_win_combo = False
self.modifiers.discard("win")
return
if key in (Key.cmd, Key.cmd_l, Key.cmd_r) and self._emit_release_only_windows_combo():
self._pending_standalone_win = False
self._suppress_release_only_win_combo = False
self.modifiers.discard("win")
return
if key in (Key.cmd, Key.cmd_l, Key.cmd_r) and self._pending_standalone_win:
event = {
"type": "key_combo",
"keys": ["win"],
"timestamp": time.time(),
}
self.on_event(event)
self._pending_standalone_win = False
self._suppress_release_only_win_combo = False
if key in (Key.ctrl, Key.ctrl_l, Key.ctrl_r):
self.modifiers.discard("ctrl")
elif key in (Key.alt, Key.alt_l, Key.alt_r):
self.modifiers.discard("alt")
elif key in (Key.shift, Key.shift_l, Key.shift_r):
self.modifiers.discard("shift")
elif key in (Key.cmd, Key.cmd_l, Key.cmd_r):
self.modifiers.discard("win")
self._pending_standalone_win = False
self._suppress_release_only_win_combo = False
def _watch_window_focus(self):
"""Surveille proactivement le changement de fenêtre pour le stagiaire."""

View File

@@ -171,13 +171,17 @@ def handle_detected_pattern(pattern: Dict[str, Any]) -> bool:
screenshot = sct.grab(monitor)
screen = Image.frombytes('RGB', screenshot.size, screenshot.bgra, 'raw', 'BGRX')
# EasyOCR (rapide, bonne qualité GUI) avec fallback docTR.
# gpu=True : harmonisé avec dialog_handler.py et title_verifier.py.
# Coût VRAM ~0.5 GB, sous le budget RTX 5070 (cf. deploy/VRAM_BUDGET.md).
# EasyOCR (bonne qualité GUI) avec fallback docTR. Par défaut CPU :
# le replay server réserve la VRAM à Ollama.
words = []
try:
import easyocr
_reader = easyocr.Reader(['fr', 'en'], gpu=True, verbose=False)
from core.llm.ocr_extractor import easyocr_gpu_enabled
_reader = easyocr.Reader(
['fr', 'en'],
gpu=easyocr_gpu_enabled(default=False),
verbose=False,
)
results = _reader.readtext(np.array(screen))
for (bbox_pts, text, conf) in results:
if not text or len(text.strip()) < 1:

View File

@@ -248,8 +248,10 @@ class DialogHandler:
try:
import easyocr
from core.llm.ocr_extractor import easyocr_gpu_enabled
gpu = easyocr_gpu_enabled(default=False)
self._easyocr_reader = easyocr.Reader(
['fr', 'en'], gpu=True, verbose=False
['fr', 'en'], gpu=gpu, verbose=False
)
return self._easyocr_reader
except ImportError:

View File

@@ -144,19 +144,21 @@ class FastDetector:
_easyocr_reader = None # Singleton EasyOCR (chargé une fois)
def _ocr_extract(self, image) -> List[Dict[str, Any]]:
"""Extrait les mots visibles via EasyOCR (GPU, ~500ms).
"""Extrait les mots visibles via EasyOCR.
Fallback sur docTR si EasyOCR non disponible.
"""
try:
import numpy as np
import easyocr
from core.llm.ocr_extractor import easyocr_gpu_enabled
# Singleton : charger le reader une seule fois
if FastDetector._easyocr_reader is None:
print(f"🔍 [FAST/ocr] Chargement EasyOCR (GPU)...")
gpu = easyocr_gpu_enabled(default=False)
print(f"🔍 [FAST/ocr] Chargement EasyOCR ({'GPU' if gpu else 'CPU'})...")
FastDetector._easyocr_reader = easyocr.Reader(
['fr', 'en'], gpu=True, verbose=False
['fr', 'en'], gpu=gpu, verbose=False
)
results = FastDetector._easyocr_reader.readtext(np.array(image))

View File

@@ -148,10 +148,16 @@ class TitleVerifier:
try:
import easyocr
import numpy as np
from core.llm.ocr_extractor import easyocr_gpu_enabled
if TitleVerifier._easyocr_reader is None:
gpu = easyocr_gpu_enabled(default=False)
TitleVerifier._easyocr_reader = easyocr.Reader(
['fr', 'en'], gpu=True, verbose=False
['fr', 'en'], gpu=gpu, verbose=False
)
logger.info(
"TitleVerifier EasyOCR initialisé (fr+en, %s)",
"GPU" if gpu else "CPU",
)
def _easyocr_extract_text(img):

View File

@@ -12,6 +12,7 @@ Ce fichier garantit que:
- Le GPU est vérifié avant les tests qui en ont besoin
"""
import sys
import types
from pathlib import Path
import pytest
@@ -31,6 +32,42 @@ except ImportError as e:
print(f" sys.path: {sys.path[:3]}...")
# Certains tests HTTP d'agent_chat n'ont pas besoin du transport SocketIO reel.
# Le service de production garde Flask-SocketIO comme dependance, mais l'env de
# test local peut etre minimal. On fournit alors un shim strictement pytest.
try:
import flask_socketio # noqa: F401
except ModuleNotFoundError:
flask_socketio = types.ModuleType("flask_socketio")
class _FakeSocketIO:
def __init__(self, app=None, *args, **kwargs):
self.app = app
self.args = args
self.kwargs = kwargs
self.handlers = {}
self.emitted = []
def on(self, event):
def decorator(func):
self.handlers[event] = func
return func
return decorator
def emit(self, event, payload=None, **kwargs):
self.emitted.append((event, payload, kwargs))
def run(self, *args, **kwargs):
return None
def _fake_emit(*_args, **_kwargs):
return None
flask_socketio.SocketIO = _FakeSocketIO
flask_socketio.emit = _fake_emit
sys.modules["flask_socketio"] = flask_socketio
# =============================================================================
# GPU Preflight — vérification avant les tests GPU
# =============================================================================

View File

@@ -0,0 +1,254 @@
"""Tests integration pour agent_chat.handlers.learn_action.
Mocks HTTP uniquement — pas de lancement du streaming server réel.
"""
from __future__ import annotations
import json
from unittest.mock import MagicMock, patch
import pytest
from agent_chat.handlers.learn_action import (
LearnActionOrchestrator,
LearnIntentParser,
LearnState,
StateStore,
StreamingClient,
)
@pytest.fixture
def fake_http_client():
"""Mock httpx.Client (méthode request)."""
client = MagicMock()
return client
def _mk_response(status: int = 200, body: dict | None = None):
resp = MagicMock()
resp.status_code = status
resp.json.return_value = body or {}
resp.text = json.dumps(body or {})
return resp
class TestStreamingClient:
def test_shadow_start_calls_correct_endpoint(self, fake_http_client):
fake_http_client.request.return_value = _mk_response(200, {"ok": True})
client = StreamingClient(
base_url="http://stream:5005",
token="abc",
http_client=fake_http_client,
retries=0,
)
out = client.shadow_start("sid_xyz", user_id="dom")
assert out == {"ok": True}
call = fake_http_client.request.call_args
assert call[0][0] == "POST"
assert call[0][1] == "http://stream:5005/api/v1/shadow/start"
assert call[1]["json"]["session_id"] == "sid_xyz"
assert "Authorization" in call[1]["headers"]
assert call[1]["headers"]["Authorization"] == "Bearer abc"
def test_retry_on_failure(self, fake_http_client):
# 1ere tentative : exception, 2eme : succès
fake_http_client.request.side_effect = [
Exception("conn refused"),
_mk_response(200, {"ok": True}),
]
client = StreamingClient(
base_url="http://stream:5005",
token="",
http_client=fake_http_client,
retries=1,
)
out = client.shadow_stop("sid")
assert out == {"ok": True}
assert fake_http_client.request.call_count == 2
def test_retry_exhausted_raises(self, fake_http_client):
fake_http_client.request.side_effect = Exception("boom")
client = StreamingClient(
base_url="http://stream:5005",
token="",
http_client=fake_http_client,
retries=2,
)
with pytest.raises(RuntimeError, match="unreachable"):
client.shadow_stop("sid")
class TestFullFlowIntegration:
def test_end_to_end_with_http_mock(self, tmp_path, fake_http_client):
# Mock séquence HTTP : start, stop, understanding, build, persist
understanding_body = {
"understanding": [
{"action_type": "click", "target_label": "Patient", "widget_type": "Fenêtre"},
{
"action_type": "type",
"target_label": "IPP",
"widget_type": "Champ",
"value": "25003284",
},
]
}
fake_http_client.request.side_effect = [
_mk_response(200, {"ok": True}), # shadow_start
_mk_response(200, {"ok": True}), # shadow_stop
_mk_response(200, understanding_body), # shadow_understanding
_mk_response(200, {"ok": True}), # shadow_build
_mk_response(200, {"slug": "facture_urg"}), # persist
]
client = StreamingClient(
base_url="http://stream:5005",
token="t",
http_client=fake_http_client,
retries=0,
)
orch = LearnActionOrchestrator(
streaming_client=client,
intent_parser=LearnIntentParser(use_llm_fallback=False),
state_store=StateStore(tmp_path),
emit=MagicMock(),
)
st, _ = orch.start_session(user_id="dom", machine_id="m1")
sid = st.session_id
assert st.state == LearnState.WAITING_USER_STOP
# Stop
orch.handle_chat_message(sid, "c'est bon")
assert orch._sessions[sid].state == LearnState.ITERATING_FEEDBACK
# Validation globale
orch.handle_chat_message(sid, "parfait")
assert orch._sessions[sid].state == LearnState.NAMING
# Nom
orch.handle_chat_message(sid, "facturation urgences")
# Marquer IPP comme paramètre
reply = orch.handle_chat_message(sid, "ça change à chaque fois")
assert orch._sessions[sid].state == LearnState.DONE
assert "facture_urg" in (reply or "")
def test_streaming_down_during_stop(self, tmp_path, fake_http_client):
# shadow_start OK, shadow_stop échoue
fake_http_client.request.side_effect = [
_mk_response(200, {"ok": True}), # shadow_start
Exception("boom 1"), # shadow_stop attempt 1
Exception("boom 2"), # shadow_stop attempt 2 (retry)
Exception("boom 3"), # shadow_stop attempt 3 (retry)
]
client = StreamingClient(
base_url="http://stream:5005",
token="",
http_client=fake_http_client,
retries=2,
)
orch = LearnActionOrchestrator(
streaming_client=client,
intent_parser=LearnIntentParser(use_llm_fallback=False),
state_store=StateStore(tmp_path),
emit=MagicMock(),
)
st, _ = orch.start_session(user_id="dom")
sid = st.session_id
reply = orch.handle_chat_message(sid, "stop")
assert "n'arrive pas à clôturer" in (reply or "") or "réessaie" in (reply or "").lower()
# ============================================================
# POST /api/learn/start (Correction #4)
# ============================================================
class TestApiLearnStart:
"""Tests integration de la route HTTP POST /api/learn/start."""
def _make_orchestrator(self, tmp_path):
client_http = MagicMock()
client_http.request.return_value = _mk_response(200, {"ok": True})
stream = StreamingClient(
base_url="http://stream:5005",
token="",
http_client=client_http,
retries=0,
)
return LearnActionOrchestrator(
streaming_client=stream,
intent_parser=LearnIntentParser(use_llm_fallback=False),
state_store=StateStore(tmp_path),
emit=MagicMock(),
)
def test_api_learn_start_creates_session(self, tmp_path):
from agent_chat import app as app_module
orch = self._make_orchestrator(tmp_path)
app_module.learn_action_orchestrator = orch
try:
client = app_module.app.test_client()
resp = client.post(
"/api/learn/start",
json={
"machine_id": "DESKTOP-58D5CAC_windows",
"user_id": "dom",
"trigger_source": "windows_button",
},
)
assert resp.status_code == 200
data = resp.get_json()
assert "session_id" in data
assert data["state"] == LearnState.WAITING_USER_STOP.value
assert data["message"]
# Vérifie que la session existe bien côté orchestrateur
sid = data["session_id"]
assert orch._sessions[sid].machine_id == "DESKTOP-58D5CAC_windows"
assert orch._sessions[sid].trigger_source == "windows_button"
finally:
app_module.learn_action_orchestrator = None
def test_api_learn_start_400_without_machine_id(self, tmp_path):
from agent_chat import app as app_module
orch = self._make_orchestrator(tmp_path)
app_module.learn_action_orchestrator = orch
try:
client = app_module.app.test_client()
resp = client.post("/api/learn/start", json={"user_id": "dom"})
assert resp.status_code == 400
data = resp.get_json()
assert "machine_id" in (data.get("error") or "").lower()
finally:
app_module.learn_action_orchestrator = None
def test_api_learn_start_400_with_empty_machine_id(self, tmp_path):
from agent_chat import app as app_module
orch = self._make_orchestrator(tmp_path)
app_module.learn_action_orchestrator = orch
try:
client = app_module.app.test_client()
resp = client.post(
"/api/learn/start",
json={"machine_id": " "},
)
assert resp.status_code == 400
finally:
app_module.learn_action_orchestrator = None
def test_api_learn_start_503_if_orchestrator_not_initialized(self):
from agent_chat import app as app_module
prev = app_module.learn_action_orchestrator
app_module.learn_action_orchestrator = None
try:
client = app_module.app.test_client()
resp = client.post(
"/api/learn/start",
json={"machine_id": "m1"},
)
assert resp.status_code == 503
finally:
app_module.learn_action_orchestrator = prev

View File

@@ -15,8 +15,10 @@ garantit que l'env est defini AVANT tout import.
from __future__ import annotations
import os
import sqlite3
import sys
import tempfile
import time
from pathlib import Path
import pytest
@@ -273,6 +275,107 @@ def test_reenroll_after_uninstall_reactivates(agents_client):
assert agent["version"] == "1.1.0"
def test_reenroll_after_admin_revoke_is_forbidden(agents_client):
client, token, _ = agents_client
client.post(
"/api/v1/agents/enroll",
json={"machine_id": "revoked-001", "user_name": "Revoked"},
headers=_auth_headers(token),
)
revoke = client.post(
"/api/v1/agents/uninstall",
json={"machine_id": "revoked-001", "reason": "admin_revoke"},
headers=_auth_headers(token),
)
assert revoke.status_code == 200
resp = client.post(
"/api/v1/agents/enroll",
json={"machine_id": "revoked-001", "user_name": "Revoked Again"},
headers=_auth_headers(token),
)
assert resp.status_code == 403, resp.text
detail = resp.json()["detail"]
assert detail["error"] == "agent_revoked"
assert detail["existing"]["machine_id"] == "revoked-001"
assert detail["existing"]["uninstall_reason"] == "admin_revoke"
def test_revoked_agent_cannot_stream_or_poll(agents_client):
client, token, _ = agents_client
client.post(
"/api/v1/agents/enroll",
json={"machine_id": "revoked-runtime-001", "user_name": "Runtime"},
headers=_auth_headers(token),
)
client.post(
"/api/v1/agents/uninstall",
json={"machine_id": "revoked-runtime-001", "reason": "admin_revoke"},
headers=_auth_headers(token),
)
event_resp = client.post(
"/api/v1/traces/stream/event",
json={
"session_id": "sess_revoked_runtime",
"timestamp": time.time(),
"event": {"type": "heartbeat"},
"machine_id": "revoked-runtime-001",
},
headers=_auth_headers(token),
)
assert event_resp.status_code == 403, event_resp.text
assert event_resp.json()["detail"]["error"] == "agent_not_active"
next_resp = client.get(
"/api/v1/traces/stream/replay/next",
params={
"session_id": "sess_revoked_runtime",
"machine_id": "revoked-runtime-001",
},
headers=_auth_headers(token),
)
assert next_resp.status_code == 403, next_resp.text
assert next_resp.json()["detail"]["error"] == "agent_not_active"
def test_active_agent_stream_updates_last_seen(agents_client):
client, token, registry = agents_client
machine_id = "last-seen-001"
client.post(
"/api/v1/agents/enroll",
json={"machine_id": machine_id, "user_name": "Seen"},
headers=_auth_headers(token),
)
stale = "2000-01-01T00:00:00+00:00"
with sqlite3.connect(str(registry.db_path)) as conn:
conn.execute(
"UPDATE enrolled_agents SET last_seen_at = ? WHERE machine_id = ?",
(stale, machine_id),
)
conn.commit()
resp = client.post(
"/api/v1/traces/stream/event",
json={
"session_id": "sess_last_seen",
"timestamp": time.time(),
"event": {"type": "heartbeat"},
"machine_id": machine_id,
},
headers=_auth_headers(token),
)
assert resp.status_code == 200, resp.text
row = registry.get(machine_id)
assert row is not None
assert row["last_seen_at"] != stale
# ---------------------------------------------------------------------------
# GET /api/v1/agents/fleet
# ---------------------------------------------------------------------------

View File

@@ -0,0 +1,198 @@
"""Mesure du gain perf RPA_SKIP_INTENTION_ENRICHMENT sur build_replay.
Harnais lecture seule : charge une fixture raw events réelle (smoke Bloc-notes
2026-05-20 - même session que replay_sess_e96e5822 18/18 du 2026-05-25) et
appelle directement build_replay_from_raw_events() sans déclencher dispatch
ni replay live.
Ne pas lancer en CI standard : test perf, run manuel uniquement.
Run :
.venv/bin/python -m pytest tests/integration/test_build_replay_perf.py \
-m performance -s -v
Référence : inbox_claude/2026-05-25_1244_codex-to-claude_recadrage-demo-1juin.md
(mission C2) et plan docs/plans/PLAN_STABILISATION_DEMO_2026-06-01.md
(P0 performance mesurable).
"""
from __future__ import annotations
import json
import sys
import time
from pathlib import Path
import pytest
ROOT = Path(__file__).resolve().parents[2]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
SESSION_DIR = (
ROOT
/ "data"
/ "training"
/ "live_sessions"
/ "DESKTOP-58D5CAC_windows"
/ "sess_20260520T102916_066851"
)
FIXTURE = SESSION_DIR / "live_events.jsonl"
def _load_raw_events() -> list:
"""Charge la fixture raw events réelle (55 events bruts, 16 actions utiles)."""
if not FIXTURE.exists():
pytest.skip(f"Fixture absente : {FIXTURE}")
with FIXTURE.open() as f:
return [json.loads(line) for line in f if line.strip()]
@pytest.fixture
def raw_events():
return _load_raw_events()
@pytest.fixture
def session_dir() -> str:
"""Chemin vers session_dir (déclenche l'enrichissement gemma4 si présent)."""
if not SESSION_DIR.exists():
pytest.skip(f"Session dir absent : {SESSION_DIR}")
return str(SESSION_DIR)
def _extract_perf_breakdown(caplog) -> list[tuple[str, float]]:
"""Extrait les spans [PERF] build.step* des logs capturés.
Format attendu : "[PERF] build.<step_name> session=<sid> elapsed_ms=<X>"
Retourne [(step_name, elapsed_ms)] dans l'ordre d'apparition.
"""
import re
pattern = re.compile(r"\[PERF\] build\.(\S+) session=\S+ elapsed_ms=([\d.]+)")
out = []
for record in caplog.records:
m = pattern.search(record.getMessage())
if m:
out.append((m.group(1), float(m.group(2))))
return out
@pytest.mark.performance
def test_build_replay_perf_skip_enrichment(monkeypatch, raw_events, session_dir, caplog):
"""Mesure build_replay_from_raw_events avec et sans RPA_SKIP_INTENTION_ENRICHMENT.
Asserts :
- skip enrichissement est au moins 3x plus rapide
- même nombre d'actions produites dans les 2 modes
- skip → 0 actions avec intention non-vide
- full → au moins 1 action avec intention (preuve que gemma4 a tourné)
Print [PERF] explicite des deux mesures (capturé via -s).
"""
import logging
from agent_v0.server_v1.stream_processor import build_replay_from_raw_events
# Capter les logs INFO du stream_processor pour récupérer les spans [PERF]
caplog.set_level(logging.INFO, logger="agent_v0.server_v1.stream_processor")
# Premier run : enrichissement actif (comportement legacy)
monkeypatch.delenv("RPA_SKIP_INTENTION_ENRICHMENT", raising=False)
monkeypatch.delenv("RPA_SKIP_ENRICHMENT", raising=False)
t0 = time.perf_counter()
actions_full = build_replay_from_raw_events(
raw_events, session_id="perf_full", session_dir=session_dir
)
elapsed_full_ms = (time.perf_counter() - t0) * 1000
breakdown_full = _extract_perf_breakdown(caplog)
caplog.clear()
# Second run : skip enrichissement activé (Phase 1 Codex 2026-05-25)
monkeypatch.setenv("RPA_SKIP_INTENTION_ENRICHMENT", "1")
t0 = time.perf_counter()
actions_skip = build_replay_from_raw_events(
raw_events, session_id="perf_skip", session_dir=session_dir
)
elapsed_skip_ms = (time.perf_counter() - t0) * 1000
breakdown_skip = _extract_perf_breakdown(caplog)
speedup = elapsed_full_ms / max(1.0, elapsed_skip_ms)
intentions_full = sum(1 for a in actions_full if a.get("intention"))
intentions_skip = sum(1 for a in actions_skip if a.get("intention"))
print(
f"\n[PERF] build_replay events={len(raw_events)} "
f"actions_full={len(actions_full)} actions_skip={len(actions_skip)} "
f"full_ms={elapsed_full_ms:.0f} skip_ms={elapsed_skip_ms:.0f} "
f"speedup={speedup:.1f}x "
f"intentions_full={intentions_full} intentions_skip={intentions_skip}"
)
# Décomposition par étape (C2b) — utile pour identifier les vraies cibles
# d'optimisation post-skip enrichissement.
def _format_breakdown(label: str, b: list[tuple[str, float]]) -> str:
if not b:
return f" {label}: (aucun span [PERF] capturé)"
lines = [f" {label}:"]
for step, ms in b:
bar = "" * max(1, int(ms / 500)) # 1 char par 500ms
lines.append(f" {step:40s} {ms:>7.0f} ms {bar}")
return "\n".join(lines)
print(_format_breakdown("Décomposition FULL", breakdown_full))
print(_format_breakdown("Décomposition SKIP", breakdown_skip))
# Invariants — même nombre d'actions, juste les champs intention en moins
assert len(actions_skip) == len(actions_full), (
f"Le skip ne doit pas changer le nombre d'actions "
f"(full={len(actions_full)}, skip={len(actions_skip)})"
)
# Skip → 0 actions avec intention enrichie
assert intentions_skip == 0, (
f"Skip enrichment doit produire 0 intention non-vide "
f"(observé : {intentions_skip})"
)
# Full → au moins 1 action avec intention (sinon gemma4 a planté ou la
# fixture n'a pas d'action eligible). Si 0, c'est anormal et on échoue
# bruyamment.
assert intentions_full > 0, (
f"Full enrichment doit produire au moins 1 intention non-vide "
f"sur fixture {FIXTURE.name}. Si 0 → gemma4 indisponible ou fixture "
f"non éligible (toutes les actions filtrées avant enrichissement)."
)
# Gain perf minimum : 3x.
# Mesure réelle observée (2026-05-25 sur fixture 16 actions, 9 enrichies) :
# full=93.8s, skip=24.1s, speedup=3.9x.
# Le mode skip n'est pas instantané (~24s) car d'autres étapes consomment
# du temps : extraction crops d'ancrage pour clics visual_mode, consolidation
# avec ReplayLearner, normalisation des waits, etc. Seul gemma4 est skippé.
# Estimation initiale 215x était basée sur l'hypothèse "gemma4 seul gros
# coût" — invalidée par la mesure.
assert speedup >= 3.0, (
f"Gain insuffisant : {speedup:.1f}x (attendu ≥ 3x). "
f"Soit gemma4 cache-hit, soit la fixture n'a pas d'action éligible, "
f"soit Ollama indisponible (fallback rapide). full_ms={elapsed_full_ms:.0f}, "
f"skip_ms={elapsed_skip_ms:.0f}."
)
@pytest.mark.performance
def test_build_replay_skip_alias_works(monkeypatch, raw_events, session_dir):
"""Vérifie que l'alias RPA_SKIP_ENRICHMENT a le même effet."""
from agent_v0.server_v1.stream_processor import build_replay_from_raw_events
monkeypatch.delenv("RPA_SKIP_INTENTION_ENRICHMENT", raising=False)
monkeypatch.setenv("RPA_SKIP_ENRICHMENT", "1")
actions = build_replay_from_raw_events(
raw_events, session_id="perf_alias", session_dir=session_dir
)
intentions = sum(1 for a in actions if a.get("intention"))
print(f"\n[PERF] alias RPA_SKIP_ENRICHMENT actions={len(actions)} intentions={intentions}")
assert intentions == 0, (
f"L'alias RPA_SKIP_ENRICHMENT doit aussi désactiver l'enrichissement "
f"(observé : {intentions} intentions)"
)

View File

@@ -65,7 +65,7 @@ def test_tpl_need_confirm_extracts_action_description():
def test_tpl_need_confirm_fallback():
_, _, title = cw._tpl_need_confirm({})
assert "Validation" in title
assert "accord" in title
def test_tpl_step_result_ok():

View File

@@ -24,15 +24,19 @@ class TestReplayResumePreservesOriginalAction:
monkeypatch.setattr(api_stream_mod, "API_TOKEN", self._TEST_API_TOKEN)
@pytest.fixture
def client(self, monkeypatch):
def client(self, monkeypatch, tmp_path):
from fastapi.testclient import TestClient
from agent_v0.server_v1 import api_stream
from agent_v0.server_v1.agent_registry import AgentRegistry
monkeypatch.setattr(api_stream, "API_TOKEN", self._TEST_API_TOKEN)
saved_states = dict(api_stream._replay_states)
saved_queues = dict(api_stream._replay_queues)
saved_retry = dict(api_stream._retry_pending)
original_registry = api_stream.agent_registry
empty_registry = AgentRegistry(db_path=str(tmp_path / "empty_agents.db"))
monkeypatch.setattr(api_stream, "agent_registry", empty_registry)
api_stream._replay_states.clear()
api_stream._replay_queues.clear()
@@ -47,6 +51,7 @@ class TestReplayResumePreservesOriginalAction:
api_stream._replay_queues.update(saved_queues)
api_stream._retry_pending.clear()
api_stream._retry_pending.update(saved_retry)
monkeypatch.setattr(api_stream, "agent_registry", original_registry)
def test_resume_reinjects_full_original_action_from_failed_action(self, client):
http_client, api_stream, token = client
@@ -144,6 +149,7 @@ class TestReplayResumePreservesOriginalAction:
next_resp = http_client.get(
"/api/v1/traces/stream/replay/next",
params={"session_id": "sess_resume_watchdog", "machine_id": "pc-watchdog"},
headers={"Authorization": f"Bearer {token}"},
)
assert next_resp.status_code == 200

View File

@@ -104,11 +104,12 @@ def test_replay_session_pipeline_skips_redundant_tab_switch(tmp_path):
# 1) Setup auto reconnaît Notepad et génère ses actions
assert app_info.get("primary_app") == "Notepad.exe"
assert app_info.get("has_neutral_window_title") is True
setup_actions = _generate_setup_actions(app_info, setup_id_prefix="setup_sess")
assert setup_actions, "le setup auto doit injecter des actions Notepad"
action_ids = {a.get("action_id", "") for a in setup_actions}
assert any("click_start" in aid for aid in action_ids)
assert any("click_result" in aid for aid in action_ids)
setup_steps = [a.get("_setup_step", "") for a in setup_actions]
assert "open_run_dialog" in setup_steps
assert "ensure_fresh_document" in setup_steps
# 2) Trim : le clic intra-Notepad redondant doit disparaître
trimmed = _trim_redundant_setup_events(raw_events, app_info)

View File

@@ -213,6 +213,24 @@ def test_edge_to_action_extract_text():
assert a["parameters"]["paragraph"] is True
def test_edge_to_action_extract_table_accepts_tesseract_engine_and_variable_name():
edge = _FakeEdge(_FakeAction(
"extract_table",
parameters={
"variable_name": "t_extraction_liste",
"pattern": r"^25\d{6}$",
"engine": "tesseract",
},
))
actions = _edge_to_normalized_actions(edge, params={})
assert len(actions) == 1
a = actions[0]
assert a["type"] == "extract_table"
assert a["parameters"]["output_var"] == "t_extraction_liste"
assert a["parameters"]["pattern"] == r"^25\d{6}$"
assert a["parameters"]["engine"] == "tesseract"
def test_edge_to_action_t2a_decision():
edge = _FakeEdge(_FakeAction(
"t2a_decision",

View File

@@ -0,0 +1,86 @@
"""Tests de non-régression pour le fix CORS engineio sur le service
rpa-agent-chat (port 5004).
Avant fix : les origines `http://192.168.1.40:5004` (self loopback) et
`http://192.168.1.11:5004` (Léa Windows) étaient rejetées par engineio,
provoquant `is not an accepted origin` dans le journal (24 mai 2026).
Fix : élargissement de `_ALLOWED_ORIGINS` dans agent_chat/app.py l. 83-99,
plus override possible via `LEA_CORS_ALLOWED_ORIGINS=comma,separated`.
Référence : inbox_codex/2026-05-25_1235_..._enquete-feedbackbus-5004.md
"""
from __future__ import annotations
import importlib
import sys
from pathlib import Path
import pytest
ROOT = Path(__file__).resolve().parents[2]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
@pytest.mark.unit
def test_lan_self_loopback_origin_allowed():
"""Le serveur doit accepter sa propre origine `http://192.168.1.40:5004`."""
from agent_chat import app
assert "http://192.168.1.40:5004" in app._ALLOWED_ORIGINS, (
"Origine self loopback 5004 absente — engineio va rejeter les "
"connexions SocketIO depuis le serveur lui-même (cf. journal "
"2026-05-24 11:00:47)."
)
@pytest.mark.unit
def test_lan_lea_windows_origin_allowed():
"""Le serveur doit accepter l'origine Léa Windows `http://192.168.1.11:5004`."""
from agent_chat import app
assert "http://192.168.1.11:5004" in app._ALLOWED_ORIGINS, (
"Origine Léa Windows 5004 absente — la ChatWindow tkinter ne peut "
"pas établir une session SocketIO."
)
@pytest.mark.unit
def test_legacy_origins_preserved():
"""Les origines historiques doivent rester acceptées (pas de régression)."""
from agent_chat import app
for origin in [
"http://localhost:3002",
"http://localhost:5002",
"https://vwb.labs.laurinebazin.design",
"https://lea.labs.laurinebazin.design",
]:
assert origin in app._ALLOWED_ORIGINS, f"Origine historique perdue : {origin}"
@pytest.mark.unit
def test_env_override_extends_allowed_origins(monkeypatch):
"""`LEA_CORS_ALLOWED_ORIGINS=...` étend la liste par défaut."""
monkeypatch.setenv(
"LEA_CORS_ALLOWED_ORIGINS",
"https://demo.client.example,http://10.0.0.5:5004",
)
# Re-import du module pour relire l'env
import agent_chat.app as app_module
importlib.reload(app_module)
assert "https://demo.client.example" in app_module._ALLOWED_ORIGINS
assert "http://10.0.0.5:5004" in app_module._ALLOWED_ORIGINS
# Origines par défaut toujours présentes
assert "http://192.168.1.40:5004" in app_module._ALLOWED_ORIGINS
@pytest.mark.unit
def test_env_override_empty_keeps_defaults(monkeypatch):
"""`LEA_CORS_ALLOWED_ORIGINS=''` ne casse rien — défauts conservés."""
monkeypatch.setenv("LEA_CORS_ALLOWED_ORIGINS", "")
import agent_chat.app as app_module
importlib.reload(app_module)
assert "http://192.168.1.40:5004" in app_module._ALLOWED_ORIGINS
assert len(app_module._ALLOWED_ORIGINS) >= 9, (
"Liste tronquée : attendu au moins 9 origines par défaut"
)

View File

@@ -0,0 +1,526 @@
"""Tests unit pour agent_chat.handlers.learn_action.
Couvre :
- LearnIntentParser (regex)
- OptionCFormatter
- StateStore (write atomique + reprise)
- LearnActionOrchestrator (transitions, garde-fous, persistance)
- PersistPayloadBuilder
"""
from __future__ import annotations
import json
import os
from pathlib import Path
from typing import Any, Dict, List
from unittest.mock import MagicMock
import pytest
from agent_chat.handlers.learn_action import (
LearnActionOrchestrator,
LearnIntent,
LearnIntentParser,
LearnState,
OptionCFormatter,
PersistPayloadBuilder,
SessionState,
StateStore,
)
# ============================================================
# LearnIntentParser
# ============================================================
class TestLearnIntentParser:
def setup_method(self):
# Désactive le LLM fallback pour isoler les tests regex
self.parser = LearnIntentParser(use_llm_fallback=False)
@pytest.mark.parametrize(
"msg",
[
"apprends-moi",
"Apprends moi",
"regarde-moi faire",
"observe",
"enregistre",
"on apprend",
"tu vas apprendre",
"Léa apprends",
],
)
def test_start_observe(self, msg):
r = self.parser.parse(msg, current_state=LearnState.IDLE)
assert r.intent == LearnIntent.START_OBSERVE
assert r.confidence >= 0.9
@pytest.mark.parametrize(
"msg",
[
"stop",
"c'est bon",
"j'ai fini",
"voilà c'est tout",
"fini",
"arrête",
"termine",
],
)
def test_user_stop_observe(self, msg):
r = self.parser.parse(msg, current_state=LearnState.WAITING_USER_STOP)
assert r.intent == LearnIntent.USER_STOP_OBSERVE
def test_correct_step_with_index(self):
r = self.parser.parse(
"Corrige l'étape 3 : il faut cliquer sur Valider",
current_state=LearnState.ITERATING_FEEDBACK,
)
assert r.intent == LearnIntent.CORRECT_STEP
assert r.step_index == 3
assert "valider" in (r.extra.get("new_intent") or "").lower()
def test_undo_step(self):
r = self.parser.parse(
"Retire l'étape 2", current_state=LearnState.ITERATING_FEEDBACK
)
assert r.intent == LearnIntent.UNDO_STEP
assert r.step_index == 2
def test_merge_next(self):
r = self.parser.parse(
"Fusionne avec la suivante", current_state=LearnState.ITERATING_FEEDBACK
)
assert r.intent == LearnIntent.MERGE_NEXT
def test_split_step(self):
r = self.parser.parse(
"Coupe l'étape 4", current_state=LearnState.ITERATING_FEEDBACK
)
assert r.intent == LearnIntent.SPLIT_STEP
assert r.step_index == 4
def test_cancel(self):
r = self.parser.parse("annule tout", current_state=LearnState.LISTENING)
assert r.intent == LearnIntent.CANCEL
def test_validate_in_iterating(self):
r = self.parser.parse(
"c'est parfait", current_state=LearnState.ITERATING_FEEDBACK
)
assert r.intent == LearnIntent.VALIDATE_STEP
def test_mark_parameter_variable(self):
r = self.parser.parse(
"ça change à chaque fois", current_state=LearnState.NAMING
)
assert r.intent == LearnIntent.MARK_PARAMETER
assert r.extra.get("is_parameter") is True
def test_mark_parameter_constant(self):
r = self.parser.parse(
"toujours pareil", current_state=LearnState.NAMING
)
assert r.intent == LearnIntent.MARK_PARAMETER
assert r.extra.get("is_parameter") is False
def test_name_competence_when_naming(self):
r = self.parser.parse(
"facturation urgences", current_state=LearnState.NAMING
)
assert r.intent == LearnIntent.NAME_COMPETENCE
assert "facturation" in (r.extra.get("name") or "")
def test_unknown_in_idle(self):
r = self.parser.parse(
"blabla random", current_state=LearnState.IDLE
)
assert r.intent == LearnIntent.UNKNOWN
def test_llm_fallback_disabled_after_failure(self, monkeypatch):
# Active le LLM mais simule une erreur réseau
parser = LearnIntentParser(use_llm_fallback=True)
# Force exception sur httpx
parser._parse_llm = lambda *args, **kwargs: None # type: ignore[method-assign]
r = parser.parse("zorglub blabla truc", current_state=LearnState.IDLE)
# Doit retomber gracieusement sur UNKNOWN sans crasher
assert r.intent == LearnIntent.UNKNOWN
# ============================================================
# OptionCFormatter
# ============================================================
class TestOptionCFormatter:
def setup_method(self):
self.fmt = OptionCFormatter()
def test_empty(self):
assert "aucune étape" in self.fmt.format([])
def test_simple_click(self):
understanding = [
{"action_type": "click", "target_label": "Valider", "widget_type": "Bouton"}
]
out = self.fmt.format(understanding)
assert "1." in out
assert "« Valider »" in out
assert "cliqué" in out
def test_type_with_value(self):
understanding = [
{
"action_type": "type",
"target_label": "IPP",
"widget_type": "Champ",
"value": "25003284",
}
]
out = self.fmt.format(understanding)
assert "« IPP »" in out
assert "« 25003284 »" in out
assert "saisi" in out
def test_low_confidence_suffix(self):
understanding = [
{
"action_type": "click",
"target_label": "Patient",
"widget_type": "Fenêtre",
"confidence_ocr": 0.4,
}
]
out = self.fmt.format(understanding)
assert "(à confirmer)" in out
def test_unknown_action_fallback(self):
understanding = [{"action_type": "wibble", "target_label": "X"}]
out = self.fmt.format(understanding)
assert "effectuée" in out
def test_closing_question(self):
q = self.fmt.closing_question()
assert "trompée" in q or "trompee" in q.lower().replace("é", "e")
# ============================================================
# StateStore
# ============================================================
class TestStateStore:
def test_save_and_load(self, tmp_path):
store = StateStore(tmp_path)
st = SessionState(
session_id="abc123",
user_id="dom",
state=LearnState.ITERATING_FEEDBACK,
)
store.save(st)
loaded = store.load("abc123")
assert loaded is not None
assert loaded.session_id == "abc123"
assert loaded.user_id == "dom"
assert loaded.state == LearnState.ITERATING_FEEDBACK
def test_atomic_write_no_partial(self, tmp_path):
store = StateStore(tmp_path)
st = SessionState(session_id="atomic1")
store.save(st)
# Pas de fichier .tmp restant
tmp_files = list(tmp_path.glob("*.tmp"))
assert tmp_files == []
def test_list_active_filters_done(self, tmp_path):
store = StateStore(tmp_path)
store.save(SessionState(session_id="s1", state=LearnState.ITERATING_FEEDBACK))
store.save(SessionState(session_id="s2", state=LearnState.DONE))
store.save(SessionState(session_id="s3", state=LearnState.ABORTED))
active = store.list_active()
ids = {s.session_id for s in active}
assert ids == {"s1"}
def test_session_id_sanitized(self, tmp_path):
store = StateStore(tmp_path)
st = SessionState(session_id="../../etc/passwd")
store.save(st)
# Aucun fichier hors tmp_path
files = list(tmp_path.glob("*.json"))
assert len(files) == 1
assert files[0].parent == tmp_path
def test_delete(self, tmp_path):
store = StateStore(tmp_path)
store.save(SessionState(session_id="del_me"))
store.delete("del_me")
assert store.load("del_me") is None
# ============================================================
# PersistPayloadBuilder
# ============================================================
class TestPersistPayloadBuilder:
def test_build_with_parameters(self):
st = SessionState(
session_id="sX",
competence_name="Test compétence",
user_id="dom",
parameters_marked=[
{
"step_index": 3,
"is_parameter": True,
"name": "ipp",
"example_value": "25003284",
"field_label": "IPP",
},
{
"step_index": 4,
"is_parameter": False,
"name": "type",
"example_value": "C2",
"field_label": "Type",
},
],
)
payload = PersistPayloadBuilder().build(st)
assert payload["name"] == "Test compétence"
assert payload["session_id"] == "sX"
assert payload["user_id"] == "dom"
# Seul le param flagué is_parameter=True doit apparaître
assert len(payload["parameters"]) == 1
assert payload["parameters"][0]["name"] == "ipp"
def test_persist_payload_includes_machine_id(self):
"""Correction #1 — payload doit inclure machine_id."""
st = SessionState(
session_id="sM",
competence_name="X",
machine_id="DESKTOP-58D5CAC_windows",
)
payload = PersistPayloadBuilder().build(st)
assert "machine_id" in payload
assert payload["machine_id"] == "DESKTOP-58D5CAC_windows"
def test_persist_payload_machine_id_none_when_absent(self):
"""Quand non fourni, machine_id reste présent à None dans le payload."""
st = SessionState(session_id="sM2", competence_name="X")
payload = PersistPayloadBuilder().build(st)
assert "machine_id" in payload
assert payload["machine_id"] is None
# ============================================================
# LearnActionOrchestrator (avec StreamingClient mocké)
# ============================================================
@pytest.fixture
def mock_streaming():
"""StreamingClient simulé."""
m = MagicMock()
m.shadow_start.return_value = {"ok": True}
m.shadow_stop.return_value = {"ok": True}
m.shadow_understanding.return_value = {
"understanding": [
{"action_type": "click", "target_label": "Patient", "widget_type": "Fenêtre"},
{
"action_type": "type",
"target_label": "IPP",
"widget_type": "Champ",
"value": "25003284",
},
]
}
m.shadow_feedback.return_value = {"ok": True}
m.shadow_build.return_value = {"ok": True}
m.competence_persist.return_value = {"slug": "facturation_urgences"}
return m
@pytest.fixture
def orchestrator(tmp_path, mock_streaming):
parser = LearnIntentParser(use_llm_fallback=False)
store = StateStore(tmp_path)
return LearnActionOrchestrator(
streaming_client=mock_streaming,
intent_parser=parser,
state_store=store,
emit=MagicMock(),
)
class TestLearnActionOrchestrator:
def test_start_session_transitions(self, orchestrator, mock_streaming):
st, reply = orchestrator.start_session(user_id="dom", trigger_source="button")
assert st.state == LearnState.WAITING_USER_STOP
mock_streaming.shadow_start.assert_called_once()
assert "je te regarde" in reply.lower() or "regarde" in reply.lower()
def test_full_happy_path(self, orchestrator, mock_streaming):
st, _ = orchestrator.start_session(user_id="dom", machine_id="m1")
sid = st.session_id
# Utilisateur dit stop
reply = orchestrator.handle_chat_message(sid, "c'est bon")
assert reply is not None
assert "j'ai compris" in reply.lower()
assert orchestrator._sessions[sid].state == LearnState.ITERATING_FEEDBACK
# Utilisateur valide globalement → NAMING
reply = orchestrator.handle_chat_message(sid, "c'est parfait")
assert orchestrator._sessions[sid].state == LearnState.NAMING
# Nomination
reply = orchestrator.handle_chat_message(sid, "facturation urgences")
# Maintenant Léa doit poser une question sur le paramètre IPP
assert "25003284" in (reply or "")
assert orchestrator._sessions[sid].competence_name == "facturation urgences"
# Marquer le paramètre comme variable
reply = orchestrator.handle_chat_message(sid, "ça change à chaque fois")
# Plus de pending → persist
mock_streaming.shadow_build.assert_called_once()
mock_streaming.competence_persist.assert_called_once()
assert orchestrator._sessions[sid].state == LearnState.DONE
def test_emergency_exit_after_3_corrections(self, orchestrator, mock_streaming):
st, _ = orchestrator.start_session(user_id="dom")
sid = st.session_id
orchestrator.handle_chat_message(sid, "c'est bon") # stop
for i in range(3):
r = orchestrator.handle_chat_message(
sid, "corrige l'étape 3 : clique sur Valider"
)
assert orchestrator._sessions[sid].state == LearnState.ITERATING_FEEDBACK
# 4e correction → ABORTED
r = orchestrator.handle_chat_message(
sid, "corrige l'étape 3 : clique sur Valider"
)
assert orchestrator._sessions[sid].state == LearnState.ABORTED
assert "n°3" in (r or "")
def test_cancel_anywhere(self, orchestrator, mock_streaming):
st, _ = orchestrator.start_session(user_id="dom")
sid = st.session_id
reply = orchestrator.handle_chat_message(sid, "annule tout")
assert orchestrator._sessions[sid].state == LearnState.ABORTED
assert "annule" in (reply or "").lower()
def test_idle_message_returns_none(self, orchestrator):
# Aucune session ouverte → None (laisser le flux normal gérer)
r = orchestrator.handle_chat_message("nonexistent", "Bonjour")
assert r is None
def test_state_persistence_across_reload(self, tmp_path, mock_streaming):
store = StateStore(tmp_path)
parser = LearnIntentParser(use_llm_fallback=False)
orch1 = LearnActionOrchestrator(
streaming_client=mock_streaming,
intent_parser=parser,
state_store=store,
emit=MagicMock(),
)
st, _ = orch1.start_session(user_id="dom")
sid = st.session_id
orch1.handle_chat_message(sid, "c'est bon") # passe en ITERATING_FEEDBACK
# Simule un crash + redémarrage
orch2 = LearnActionOrchestrator(
streaming_client=mock_streaming,
intent_parser=parser,
state_store=store,
emit=MagicMock(),
)
resumed = orch2.resume_sessions()
assert sid in resumed
assert orch2._sessions[sid].state == LearnState.ITERATING_FEEDBACK
def test_proactive_signal_cooldown(self, orchestrator):
r1 = orchestrator.handle_proactive_signal("action_repeat", {})
assert r1 is not None
# Deuxième signal immédiat → ignoré
r2 = orchestrator.handle_proactive_signal("action_repeat", {})
assert r2 is None
def test_illegal_transition_ignored(self, orchestrator, mock_streaming):
st, _ = orchestrator.start_session(user_id="dom")
# Tentative de passer directement de WAITING_USER_STOP à DONE
prev = orchestrator._sessions[st.session_id].state
orchestrator._transition(
orchestrator._sessions[st.session_id], LearnState.DONE
)
assert orchestrator._sessions[st.session_id].state == prev
# ============================================================
# Corrections P1-LEA-SHADOW 2026-06-01 (NO-GO Qwen)
# ============================================================
def test_start_session_stores_machine_id(self, orchestrator):
"""Correction #1 — machine_id transmis à start_session est stocké."""
st, _ = orchestrator.start_session(
user_id="dom",
trigger_source="windows_button",
machine_id="DESKTOP-58D5CAC_windows",
)
assert st.machine_id == "DESKTOP-58D5CAC_windows"
# Et la session en mémoire aussi
assert (
orchestrator._sessions[st.session_id].machine_id
== "DESKTOP-58D5CAC_windows"
)
def test_persist_blocked_without_machine_id(self, orchestrator, mock_streaming):
"""Correction #1 — persist refusé conversationnellement sans machine_id."""
st, _ = orchestrator.start_session(user_id="dom") # pas de machine_id
sid = st.session_id
orchestrator.handle_chat_message(sid, "c'est bon") # → ITERATING
orchestrator.handle_chat_message(sid, "c'est parfait") # → NAMING
orchestrator.handle_chat_message(sid, "ma competence") # nom
# Marquer paramètre → tentative persist
reply = orchestrator.handle_chat_message(sid, "ça change à chaque fois")
# competence_persist NE doit PAS avoir été appelée
mock_streaming.competence_persist.assert_not_called()
# Message métier explicite côté Léa
assert reply is not None
assert "machine" in reply.lower()
def test_datetime_uses_timezone_aware(self):
"""Correction #2 — created_at / last_transition_at sont timezone-aware."""
st = SessionState(session_id="tz1")
# Le format ISO doit contenir un offset (+00:00 ou Z) — tzinfo présent
# après reparse via fromisoformat (Python 3.11+).
from datetime import datetime as _dt
parsed_created = _dt.fromisoformat(st.created_at)
parsed_transition = _dt.fromisoformat(st.last_transition_at)
assert parsed_created.tzinfo is not None
assert parsed_transition.tzinfo is not None
# Sanity check : c'est bien UTC.
assert "+00:00" in st.created_at or st.created_at.endswith("Z")
def test_confirm_blocked_when_name_missing(self, orchestrator, mock_streaming):
"""Correction #3 — CONFIRM en NAMING avec competence_name=None reste NAMING."""
st, _ = orchestrator.start_session(
user_id="dom", machine_id="machine_x"
)
sid = st.session_id
orchestrator.handle_chat_message(sid, "c'est bon")
orchestrator.handle_chat_message(sid, "c'est parfait") # → NAMING
# Forcer competence_name à None et envoyer un CONFIRM
orchestrator._sessions[sid].competence_name = None
reply = orchestrator.handle_chat_message(sid, "ok") # CONFIRM
assert orchestrator._sessions[sid].state == LearnState.NAMING
assert reply is not None
assert "nom" in reply.lower() or "appeler" in reply.lower()
mock_streaming.competence_persist.assert_not_called()
def test_confirm_blocked_when_name_empty(self, orchestrator, mock_streaming):
"""Correction #3 — CONFIRM en NAMING avec competence_name='' reste NAMING."""
st, _ = orchestrator.start_session(
user_id="dom", machine_id="machine_x"
)
sid = st.session_id
orchestrator.handle_chat_message(sid, "c'est bon")
orchestrator.handle_chat_message(sid, "c'est parfait") # → NAMING
orchestrator._sessions[sid].competence_name = " " # vide après strip
reply = orchestrator.handle_chat_message(sid, "ok")
assert orchestrator._sessions[sid].state == LearnState.NAMING
assert reply is not None
assert "nom" in reply.lower() or "appeler" in reply.lower()
mock_streaming.competence_persist.assert_not_called()

View File

@@ -0,0 +1,121 @@
"""Tests pour le feature flag AGENT_CHAT_ENABLE_OWL (C1b).
Contexte : depuis 2026-05-25, OWL-v2 ne se charge plus au boot du service
rpa-agent-chat par défaut (économie ~600 MiB VRAM constatée par Codex après
restart C1). Activation via AGENT_CHAT_ENABLE_OWL=1.
Référence : inbox_claude/2026-05-25_1327_codex-to-claude_C1-post-restart-ok-c1b-vram.md
Fix : agent_chat/autonomous_planner.py _init_visual_detection() l. 139-...
"""
from __future__ import annotations
import sys
from pathlib import Path
import pytest
ROOT = Path(__file__).resolve().parents[2]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
@pytest.mark.unit
def test_owl_skipped_by_default(monkeypatch):
"""Sans AGENT_CHAT_ENABLE_OWL, OWL ne doit PAS se charger au boot."""
monkeypatch.delenv("AGENT_CHAT_ENABLE_OWL", raising=False)
from agent_chat.autonomous_planner import AutonomousPlanner
planner = AutonomousPlanner(llm_model="qwen2.5:7b")
assert planner._owl_detector is None, (
f"OWL chargé alors que flag OFF (économie VRAM perdue) : "
f"{planner._owl_detector}"
)
@pytest.mark.unit
def test_owl_skipped_when_flag_zero(monkeypatch):
"""AGENT_CHAT_ENABLE_OWL=0 → OWL skip."""
monkeypatch.setenv("AGENT_CHAT_ENABLE_OWL", "0")
from agent_chat.autonomous_planner import AutonomousPlanner
planner = AutonomousPlanner(llm_model="qwen2.5:7b")
assert planner._owl_detector is None
@pytest.mark.unit
def test_owl_skipped_when_flag_false(monkeypatch):
"""AGENT_CHAT_ENABLE_OWL=false → OWL skip (alias accepté)."""
monkeypatch.setenv("AGENT_CHAT_ENABLE_OWL", "false")
from agent_chat.autonomous_planner import AutonomousPlanner
planner = AutonomousPlanner(llm_model="qwen2.5:7b")
assert planner._owl_detector is None
@pytest.mark.unit
def test_owl_init_attempted_when_flag_one(monkeypatch):
"""AGENT_CHAT_ENABLE_OWL=1 → tentative d'init (succès ou échec rattrapé).
Le test ne valide PAS que OWL charge effectivement (dépend GPU + modèle
HF disponible), juste que le code passe la garde du flag et tente l'init.
On mocke OwlDetector pour vérifier qu'il est instancié.
"""
monkeypatch.setenv("AGENT_CHAT_ENABLE_OWL", "1")
from agent_chat import autonomous_planner as ap_module
calls = []
class FakeOwl:
def __init__(self, **kwargs):
calls.append(kwargs)
monkeypatch.setattr(ap_module, "OwlDetector", FakeOwl)
monkeypatch.setattr(ap_module, "VISUAL_DETECTION_AVAILABLE", True)
planner = ap_module.AutonomousPlanner(llm_model="qwen2.5:7b")
assert planner._owl_detector is not None, (
"OWL doit être instancié quand AGENT_CHAT_ENABLE_OWL=1"
)
assert len(calls) == 1
assert calls[0].get("confidence_threshold") == 0.1
@pytest.mark.unit
def test_owl_device_override(monkeypatch):
"""AGENT_CHAT_OWL_DEVICE=cpu force le device CPU même si CUDA dispo."""
monkeypatch.setenv("AGENT_CHAT_ENABLE_OWL", "1")
monkeypatch.setenv("AGENT_CHAT_OWL_DEVICE", "cpu")
from agent_chat import autonomous_planner as ap_module
calls = []
class FakeOwl:
def __init__(self, **kwargs):
calls.append(kwargs)
monkeypatch.setattr(ap_module, "OwlDetector", FakeOwl)
monkeypatch.setattr(ap_module, "VISUAL_DETECTION_AVAILABLE", True)
ap_module.AutonomousPlanner(llm_model="qwen2.5:7b")
assert calls[0].get("device") == "cpu"
@pytest.mark.unit
def test_owl_init_exception_caught(monkeypatch):
"""Si OWL crash à l'init (OOM CUDA, modèle absent, etc.), AutonomousPlanner
doit continuer à booter avec _owl_detector=None."""
monkeypatch.setenv("AGENT_CHAT_ENABLE_OWL", "1")
from agent_chat import autonomous_planner as ap_module
class CrashOwl:
def __init__(self, **kwargs):
raise RuntimeError("CUDA out of memory (simulation)")
monkeypatch.setattr(ap_module, "OwlDetector", CrashOwl)
monkeypatch.setattr(ap_module, "VISUAL_DETECTION_AVAILABLE", True)
planner = ap_module.AutonomousPlanner(llm_model="qwen2.5:7b")
assert planner._owl_detector is None, (
"L'exception doit être catchée — AutonomousPlanner ne doit pas crash"
)

View File

@@ -120,7 +120,7 @@ class TestDispatchPausedAction:
class TestPausedBubbleHeight:
"""Couvre _compute_paused_bubble_height — patch troncature 22 mai 2026."""
"""Couvre _compute_paused_bubble_height — anti-troncature pause UI."""
def test_empty_message_uses_minimum_height(self):
h, scroll = ChatWindow._compute_paused_bubble_height("")
@@ -133,10 +133,27 @@ class TestPausedBubbleHeight:
assert scroll is False
def test_long_single_line_triggers_scrollbar(self):
# ~600 chars sans \n → wrapped_lines = 600 // 60 + 1 = 11
msg = "x" * 600
h, scroll = ChatWindow._compute_paused_bubble_height(msg)
assert h == 11
assert h == 12
assert scroll is True
def test_narrow_window_estimate_keeps_wrong_window_message_visible(self):
"""Cas observé sur Windows : fenêtre Léa ~380px, message wrong_window
coupé après "attendu". Avec ~34 caractères par ligne, il faut
prévoir assez de lignes pour afficher le détail."""
msg = (
"Je m'attendais à voir la bonne fenêtre mais je vois autre chose. "
"Peux-tu vérifier que l'application est au premier plan ? "
"(Fenêtre incorrecte : attendu "
"'http192.168.1.408765dossier.htmlid=.txt - Bloc-notes', "
"actuel 'Program Manager')"
)
h, scroll = ChatWindow._compute_paused_bubble_height(
msg,
chars_per_line=34,
)
assert h >= 7
assert scroll is True
def test_message_with_many_newlines_uses_explicit_count(self):
@@ -150,11 +167,11 @@ class TestPausedBubbleHeight:
assert scroll is False
def test_cap_reached_triggers_scrollbar_even_if_short(self):
"""Quand on dépasse le cap (12 lignes), la scrollbar DOIT
"""Quand on dépasse le cap, la scrollbar DOIT
s'afficher quel que soit la longueur en caractères."""
msg = "\n".join([f"l{i}" for i in range(20)])
h, scroll = ChatWindow._compute_paused_bubble_height(msg)
assert h == 12 # plafond
assert h == 14 # plafond
assert scroll is True
def test_long_content_triggers_scrollbar_at_200_chars(self):
@@ -163,3 +180,18 @@ class TestPausedBubbleHeight:
msg = "x" * 220
h, scroll = ChatWindow._compute_paused_bubble_height(msg)
assert scroll is True
def test_dynamic_small_viewport_caps_rows_and_scrolls(self):
msg = (
"Je m'attendais à voir la bonne fenêtre mais je vois autre chose. "
"Peux-tu vérifier que l'application est au premier plan ? "
"(Post-vérif échouée : fenêtre '*test Bloc-notes' au lieu de "
"'Enregistrer sous')"
)
h, scroll = ChatWindow._compute_paused_bubble_height(
msg,
chars_per_line=32,
max_rows=5,
)
assert h == 5
assert scroll is True

View File

@@ -0,0 +1,269 @@
"""Tests C2d-bis : short-circuit SomEngine + _gemma4_read_element au build.
Niveau A : si vision_info.text non vide → SomEngine pas appelé (faible risque,
comportement par défaut depuis 2026-05-25).
Niveau B : flag RPA_SKIP_BUILD_VISION (ou alias RPA_SKIP_BUILD_VLM) actif →
SomEngine + _gemma4_read_element jamais appelés, même si
vision_info.text vide.
Référence : inbox_claude/2026-05-25_1700_codex-to-claude_AMEND-C2d-bis-gemini-short-circuit.md
Découverte C2c : inbox_codex/2026-05-25_1500_claude-to-codex_C2c-analyse-step4-crops.md
"""
from __future__ import annotations
import sys
import tempfile
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
ROOT = Path(__file__).resolve().parents[2]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
@pytest.fixture
def fake_screenshot(tmp_path):
"""Crée un screenshot PNG bidon (1920x1080 gris) pour les tests."""
from PIL import Image
img = Image.new("RGB", (1920, 1080), color=(128, 128, 128))
path = tmp_path / "shots" / "shot_0001_full.png"
path.parent.mkdir(parents=True, exist_ok=True)
img.save(path, "PNG")
return path
def _make_session_dir(tmp_path):
"""Session dir contenant shots/ vide (pour passer le check Path.is_dir)."""
session = tmp_path / "session"
(session / "shots").mkdir(parents=True, exist_ok=True)
return session
# ────────────────────────────────────────────────────────────────────────────
# Niveau A — short-circuit vision_info.text
# ────────────────────────────────────────────────────────────────────────────
@pytest.mark.unit
def test_niveau_a_vision_info_text_skips_som_and_gemma4(
monkeypatch, fake_screenshot, tmp_path
):
"""vision_info.text non vide → SomEngine et _gemma4_read_element JAMAIS appelés."""
monkeypatch.delenv("RPA_SKIP_BUILD_VISION", raising=False)
monkeypatch.delenv("RPA_SKIP_BUILD_VLM", raising=False)
from agent_v0.server_v1 import stream_processor as sp
# Mocks : SomEngine et gemma4 ne doivent PAS être appelés
som_calls = []
gemma_calls = []
def fake_som(*args, **kwargs):
som_calls.append(args)
return {"label": "should_not_be_used", "source": "som"}
def fake_gemma(*args, **kwargs):
gemma_calls.append(args)
return "should_not_be_used"
monkeypatch.setattr(sp, "_som_identify_clicked_element", fake_som)
monkeypatch.setattr(sp, "_gemma4_read_element", fake_gemma)
result = sp.enrich_click_from_screenshot(
screenshot_path=fake_screenshot,
click_x=500,
click_y=300,
screen_w=1920,
screen_h=1080,
window_title="Bloc-notes",
vision_info={"text": "Enregistrer", "type": "button"},
session_dir=_make_session_dir(tmp_path),
screenshot_id="shot_0001",
)
# Assertions
assert len(som_calls) == 0, f"SomEngine appelé alors que vision_info.text présent : {som_calls}"
assert len(gemma_calls) == 0, f"_gemma4_read_element appelé : {gemma_calls}"
# L'action garde tous les champs critiques
assert result["by_text"] == "Enregistrer"
assert result["by_text_source"] == "ocr"
assert result["by_role"] == "button"
assert result["window_title"] == "Bloc-notes"
assert result["anchor_image_base64"] # crop calculé
assert result["by_position"] == [round(500 / 1920, 6), round(300 / 1080, 6)]
@pytest.mark.unit
def test_niveau_a_vision_info_text_empty_calls_som(
monkeypatch, fake_screenshot, tmp_path
):
"""vision_info.text vide ET flag absent → SomEngine appelé (comportement legacy)."""
monkeypatch.delenv("RPA_SKIP_BUILD_VISION", raising=False)
monkeypatch.delenv("RPA_SKIP_BUILD_VLM", raising=False)
from agent_v0.server_v1 import stream_processor as sp
som_calls = []
gemma_calls = []
def fake_som(*args, **kwargs):
som_calls.append(args)
return {"label": "label_from_som", "source": "som"}
def fake_gemma(*args, **kwargs):
gemma_calls.append(args)
return "" # gemma trouve rien
monkeypatch.setattr(sp, "_som_identify_clicked_element", fake_som)
monkeypatch.setattr(sp, "_gemma4_read_element", fake_gemma)
result = sp.enrich_click_from_screenshot(
screenshot_path=fake_screenshot,
click_x=500,
click_y=300,
screen_w=1920,
screen_h=1080,
window_title="App",
vision_info={"text": "", "type": ""}, # vide
session_dir=_make_session_dir(tmp_path),
screenshot_id="shot_0001",
)
# SomEngine doit être appelé (comportement legacy préservé)
assert len(som_calls) == 1
# Gemma appelé aussi car SomEngine label utilisé comme element_text → on
# n'entre PAS dans le bloc gemma4
# (cf. ligne 974-981 : si som_elem.label → element_text = som_elem.label)
assert len(gemma_calls) == 0
# by_text vient de SomEngine
assert result["by_text"] == "label_from_som"
assert result["by_text_source"] == "ocr"
# ────────────────────────────────────────────────────────────────────────────
# Niveau B — flag RPA_SKIP_BUILD_VISION
# ────────────────────────────────────────────────────────────────────────────
@pytest.mark.unit
def test_niveau_b_flag_skip_build_vision_blocks_all(
monkeypatch, fake_screenshot, tmp_path
):
"""RPA_SKIP_BUILD_VISION=true → SomEngine et gemma4 jamais appelés, même
si vision_info.text est vide."""
monkeypatch.setenv("RPA_SKIP_BUILD_VISION", "true")
monkeypatch.delenv("RPA_SKIP_BUILD_VLM", raising=False)
from agent_v0.server_v1 import stream_processor as sp
som_calls = []
gemma_calls = []
monkeypatch.setattr(sp, "_som_identify_clicked_element",
lambda *a, **kw: som_calls.append(a) or {"label": "X"})
monkeypatch.setattr(sp, "_gemma4_read_element",
lambda *a, **kw: gemma_calls.append(a) or "X")
result = sp.enrich_click_from_screenshot(
screenshot_path=fake_screenshot,
click_x=100, click_y=100,
screen_w=1920, screen_h=1080,
window_title="App",
vision_info={"text": "", "type": ""},
session_dir=_make_session_dir(tmp_path),
screenshot_id="shot_0001",
)
assert len(som_calls) == 0, f"SomEngine appelé malgré flag actif : {som_calls}"
assert len(gemma_calls) == 0, f"gemma4 appelé malgré flag actif : {gemma_calls}"
# Action conservée avec canaux fallback
assert result["anchor_image_base64"] # crop préservé
assert result["window_title"] == "App"
assert result["by_position"] # position préservée
# by_text vide acceptable (le replay tombera sur anchor/position)
assert result["by_text"] == ""
@pytest.mark.unit
def test_niveau_b_alias_skip_build_vlm_works(
monkeypatch, fake_screenshot, tmp_path
):
"""Alias RPA_SKIP_BUILD_VLM=true accepté (compat message Codex 1650)."""
monkeypatch.delenv("RPA_SKIP_BUILD_VISION", raising=False)
monkeypatch.setenv("RPA_SKIP_BUILD_VLM", "true")
from agent_v0.server_v1 import stream_processor as sp
som_calls = []
monkeypatch.setattr(sp, "_som_identify_clicked_element",
lambda *a, **kw: som_calls.append(a))
monkeypatch.setattr(sp, "_gemma4_read_element",
lambda *a, **kw: "should_not_be_called")
sp.enrich_click_from_screenshot(
screenshot_path=fake_screenshot,
click_x=100, click_y=100,
screen_w=1920, screen_h=1080,
window_title="App",
vision_info={"text": ""},
session_dir=_make_session_dir(tmp_path),
screenshot_id="shot_0001",
)
assert len(som_calls) == 0
@pytest.mark.unit
def test_flag_off_calls_som_when_no_vision_text(
monkeypatch, fake_screenshot, tmp_path
):
"""Flag explicitement OFF + vision_info.text vide → comportement legacy."""
monkeypatch.setenv("RPA_SKIP_BUILD_VISION", "0")
monkeypatch.delenv("RPA_SKIP_BUILD_VLM", raising=False)
from agent_v0.server_v1 import stream_processor as sp
som_calls = []
monkeypatch.setattr(sp, "_som_identify_clicked_element",
lambda *a, **kw: som_calls.append(a) or None)
monkeypatch.setattr(sp, "_gemma4_read_element",
lambda *a, **kw: "from_gemma")
result = sp.enrich_click_from_screenshot(
screenshot_path=fake_screenshot,
click_x=100, click_y=100,
screen_w=1920, screen_h=1080,
window_title="App",
vision_info={"text": ""},
session_dir=_make_session_dir(tmp_path),
screenshot_id="shot_0001",
)
assert len(som_calls) == 1, "Flag OFF doit conserver SomEngine"
# gemma4 appelé car SomEngine retourne None
assert result["by_text"] == "from_gemma"
assert result["by_text_source"] == "vlm"
@pytest.mark.unit
def test_niveau_b_priority_over_niveau_a(
monkeypatch, fake_screenshot, tmp_path
):
"""Flag actif + vision_info.text non vide → log skip_som flag, pas vision_info."""
monkeypatch.setenv("RPA_SKIP_BUILD_VISION", "true")
from agent_v0.server_v1 import stream_processor as sp
som_calls = []
monkeypatch.setattr(sp, "_som_identify_clicked_element",
lambda *a, **kw: som_calls.append(a))
monkeypatch.setattr(sp, "_gemma4_read_element",
lambda *a, **kw: "should_not")
result = sp.enrich_click_from_screenshot(
screenshot_path=fake_screenshot,
click_x=100, click_y=100,
screen_w=1920, screen_h=1080,
window_title="App",
vision_info={"text": "Save", "type": "button"},
session_dir=_make_session_dir(tmp_path),
screenshot_id="shot_0001",
)
assert len(som_calls) == 0
# vision_info.text reste utilisé (priorité ligne 974-981 préservée)
assert result["by_text"] == "Save"
assert result["by_text_source"] == "ocr"

View File

@@ -15,6 +15,7 @@ On teste deux choses :
from __future__ import annotations
import sys
import types
from pathlib import Path
from types import SimpleNamespace
from unittest.mock import patch, MagicMock
@@ -22,6 +23,95 @@ from unittest.mock import patch, MagicMock
ROOT = Path(__file__).parent.parent.parent
sys.path.insert(0, str(ROOT))
def _install_fake_pynput_if_missing():
try:
import pynput # noqa: F401
return
except ModuleNotFoundError:
pass
class FakeKeyValue:
def __init__(self, name):
self.name = name
def __repr__(self):
return f"Key.{self.name}"
def __hash__(self):
return hash(("key", self.name))
def __eq__(self, other):
return isinstance(other, FakeKeyValue) and self.name == other.name
class FakeKey:
pass
for name in (
"enter", "tab", "esc", "backspace", "delete", "space",
"up", "down", "left", "right", "home", "end",
"page_up", "page_down", "f1", "f2", "f3", "f4", "f5", "f6",
"f7", "f8", "f9", "f10", "f11", "f12", "ctrl", "ctrl_l",
"ctrl_r", "alt", "alt_l", "alt_r", "shift", "shift_l",
"shift_r", "cmd", "insert", "print_screen", "caps_lock",
"num_lock",
):
setattr(FakeKey, name, FakeKeyValue(name))
class FakeKeyCode:
def __init__(self, char=None, vk=None):
self.char = char
self.vk = vk
@classmethod
def from_char(cls, char):
return cls(char=char)
@classmethod
def from_vk(cls, vk):
return cls(vk=vk)
def __hash__(self):
return hash(("keycode", self.char, self.vk))
def __eq__(self, other):
return (
isinstance(other, FakeKeyCode)
and self.char == other.char
and self.vk == other.vk
)
class FakeController:
def press(self, *_args, **_kwargs):
return None
def release(self, *_args, **_kwargs):
return None
def click(self, *_args, **_kwargs):
return None
class FakeButton:
left = "left"
right = "right"
pynput = types.ModuleType("pynput")
mouse = types.ModuleType("pynput.mouse")
keyboard = types.ModuleType("pynput.keyboard")
mouse.Button = FakeButton
mouse.Controller = FakeController
keyboard.Controller = FakeController
keyboard.Key = FakeKey
keyboard.KeyCode = FakeKeyCode
pynput.mouse = mouse
pynput.keyboard = keyboard
sys.modules["pynput"] = pynput
sys.modules["pynput.mouse"] = mouse
sys.modules["pynput.keyboard"] = keyboard
_install_fake_pynput_if_missing()
from agent_v0.agent_v1.core.executor import ActionExecutorV1 # noqa: E402
@@ -184,6 +274,44 @@ class TestPostVerifyWindowTransition:
expected_after="test Bloc-notes",
)
def test_enrich_target_context_marks_transition_and_generic_button(self):
spec = ActionExecutorV1._enrich_target_context_from_action(
{
"expected_window_before": "*test Bloc-notes",
"expected_window_title": "Enregistrer sous",
},
{
"by_text": "Enregistrer",
"by_role": "button",
"window_title": "*test Bloc-notes",
},
)
hints = spec["context_hints"]
assert hints["requires_window_transition"] is True
assert hints["expected_window_before"] == "*test Bloc-notes"
assert hints["expected_window_after"] == "Enregistrer sous"
assert hints["generic_button_text"] == "Enregistrer"
assert hints["button_expected_after_window"] == "Enregistrer sous"
def test_enrich_target_context_keeps_same_window_non_transition(self):
spec = ActionExecutorV1._enrich_target_context_from_action(
{
"expected_window_before": "*test Bloc-notes",
"expected_window_title": "test Bloc-notes",
},
{
"by_text": "test",
"by_role": "tab",
"window_title": "*test Bloc-notes",
},
)
hints = spec["context_hints"]
assert hints["expected_window_before"] == "*test Bloc-notes"
assert hints["expected_window_after"] == "test Bloc-notes"
assert "requires_window_transition" not in hints
# =========================================================================
# Routage de la garde dans verify_screen

View File

@@ -44,3 +44,80 @@ def test_template_strategy_passes_fallback_coords_to_anchor_drift_guard():
fallback_x_pct=0.708594,
fallback_y_pct=0.35,
)
def test_server_explicit_reject_skips_local_text_fallback():
executor = MagicMock()
executor._server_resolve_target = MagicMock(
return_value={
"resolved": False,
"method": "rejected_close_tab_zone_hybrid_text_direct",
"reason": "close_tab_out_of_recorded_zone",
"score": 0.8,
}
)
executor._hybrid_vlm_resolve = MagicMock(
return_value={
"resolved": True,
"x_pct": 0.1,
"y_pct": 0.13,
"method": "hybrid_text_direct",
"score": 0.9,
}
)
engine = GroundingEngine(executor)
engine._capture_window_or_screen = MagicMock(return_value="shot")
result = engine.locate(
"http://server",
{
"by_text": "test",
"context_hints": {"interaction": "close_tab"},
"screen_scope": "full_screen",
},
fallback_x=0.7,
fallback_y=0.04,
screen_width=2560,
screen_height=1600,
)
assert result.found is False
executor._hybrid_vlm_resolve.assert_not_called()
def test_server_plain_not_found_allows_local_text_fallback():
executor = MagicMock()
executor._server_resolve_target = MagicMock(
return_value={
"resolved": False,
"method": "server_no_match",
"reason": "not_found",
"score": 0.0,
}
)
executor._hybrid_vlm_resolve = MagicMock(
return_value={
"resolved": True,
"x_pct": 0.45,
"y_pct": 0.5,
"method": "hybrid_text_direct",
"score": 0.9,
}
)
engine = GroundingEngine(executor)
engine._capture_window_or_screen = MagicMock(return_value="shot")
result = engine.locate(
"http://server",
{"by_text": "Enregistrer", "screen_scope": "full_screen"},
fallback_x=0.5,
fallback_y=0.5,
screen_width=1920,
screen_height=1080,
)
assert result.found is True
assert result.method == "hybrid_text_direct"
executor._hybrid_vlm_resolve.assert_called_once()

View File

@@ -0,0 +1,162 @@
import importlib
import sys
import types
def _install_fake_pynput(monkeypatch):
class FakeKey:
def __init__(self, name):
self.name = name
def __repr__(self):
return f"Key.{self.name}"
for name in (
"ctrl",
"ctrl_l",
"ctrl_r",
"alt",
"alt_l",
"alt_r",
"shift",
"shift_l",
"shift_r",
"cmd",
"cmd_l",
"cmd_r",
"esc",
"enter",
"tab",
"space",
"backspace",
):
setattr(FakeKey, name, FakeKey(name))
class FakeKeyCode:
def __init__(self, char=None, vk=None):
self.char = char
self.vk = vk
pynput = types.ModuleType("pynput")
mouse = types.ModuleType("pynput.mouse")
keyboard = types.ModuleType("pynput.keyboard")
class FakeButton:
pass
mouse.Button = FakeButton
mouse.Listener = object
keyboard.Key = FakeKey
keyboard.KeyCode = FakeKeyCode
keyboard.Listener = object
pynput.mouse = mouse
pynput.keyboard = keyboard
monkeypatch.setitem(sys.modules, "pynput", pynput)
monkeypatch.setitem(sys.modules, "pynput.mouse", mouse)
monkeypatch.setitem(sys.modules, "pynput.keyboard", keyboard)
sys.modules.pop("agent_v0.agent_v1.core.captor", None)
return FakeKey, FakeKeyCode
def _load_captor(monkeypatch):
fake_key, fake_key_code = _install_fake_pynput(monkeypatch)
module = importlib.import_module("agent_v0.agent_v1.core.captor")
return module, fake_key, fake_key_code
def test_standalone_windows_key_is_emitted_on_release(monkeypatch):
captor_module, key, _key_code = _load_captor(monkeypatch)
events = []
captor = captor_module.EventCaptorV1(events.append)
captor._inject_screen_metadata = lambda _event: None
captor._on_press(key.cmd)
assert events == []
captor._on_release(key.cmd)
assert [event["keys"] for event in events] == [["win"]]
assert [raw["action"] for raw in events[0]["raw_keys"]] == ["press", "release"]
assert "win" not in captor.modifiers
def test_windows_shortcut_cancels_standalone_windows_key(monkeypatch):
captor_module, key, key_code = _load_captor(monkeypatch)
events = []
captor = captor_module.EventCaptorV1(events.append)
captor._inject_screen_metadata = lambda _event: None
captor._on_press(key.cmd)
captor._on_press(key_code(char="s", vk=83))
captor._on_release(key_code(char="s", vk=83))
captor._on_release(key.cmd)
assert [event["keys"] for event in events] == [["win", "s"]]
def test_release_only_windows_shortcut_is_inferred(monkeypatch):
captor_module, key, key_code = _load_captor(monkeypatch)
events = []
captor = captor_module.EventCaptorV1(events.append)
captor._inject_screen_metadata = lambda _event: None
# Windows/NoMachine can swallow press events for Win+S and only deliver
# release('s') then release('cmd').
captor._on_release(key_code(char="s", vk=83))
captor._on_release(key.cmd)
assert [event["keys"] for event in events] == [["win", "s"]]
assert [raw["action"] for raw in events[0]["raw_keys"]] == ["release", "release"]
def test_escape_key_is_emitted_as_key_combo(monkeypatch):
captor_module, key, _key_code = _load_captor(monkeypatch)
events = []
captor = captor_module.EventCaptorV1(events.append)
captor._inject_screen_metadata = lambda _event: None
captor._on_press(key.esc)
assert [event["keys"] for event in events] == [["escape"]]
def test_stream_processor_keeps_win_but_filters_other_modifiers():
from agent_v0.server_v1.stream_processor import (
_is_parasitic_event,
_needs_post_wait,
clean_compound_steps,
clean_enriched_actions,
)
assert _is_parasitic_event({"type": "key_combo", "keys": ["ctrl"]}) is True
assert _is_parasitic_event({"type": "key_combo", "keys": ["win"]}) is False
assert clean_enriched_actions(
[
{"type": "key_combo", "keys": ["ctrl"]},
{"type": "key_combo", "keys": ["win"]},
]
) == [{"type": "key_combo", "keys": ["win"]}]
assert clean_compound_steps(
[
{"type": "key_combo", "keys": ["shift"]},
{"type": "key_combo", "keys": ["win"]},
]
) == [{"type": "key_combo", "keys": ["win"]}]
assert _needs_post_wait({"type": "key_combo", "keys": ["win"]}) >= 1500
assert _needs_post_wait({"type": "key_combo", "keys": ["win", "s"]}) >= 1500
assert _needs_post_wait({"type": "key_combo", "keys": ["escape"]}) >= 500
def test_streamer_prioritizes_real_captor_event_types():
from agent_v0.agent_v1.network.streamer import TraceStreamer
streamer = TraceStreamer("sess_keyboard_priority")
assert streamer._is_priority_item("event", {"type": "key_combo"}) is True
assert streamer._is_priority_item("event", {"type": "text_input"}) is True
assert streamer._is_priority_item("event", {"type": "mouse_click"}) is True
assert streamer._is_priority_item("event", {"type": "heartbeat"}) is False

View File

@@ -0,0 +1,280 @@
"""Tests du contrat de messages humains pour Lea."""
from __future__ import annotations
import pytest
from agent_v0.agent_v1.ui.message_contract import (
MAX_FIELD_CHARS,
MessageContractError,
coerce_supervised_pause_message,
format_supervised_pause_from_mapping,
format_supervised_pause_message,
validate_supervised_pause_message,
validate_visible_message,
warn_visible_message,
)
def _valid_pause(**overrides: str) -> str:
fields = {
"intention": "ouvrir le dossier patient dans Aiva Urgence",
"attendu": "voir la fiche du patient ouverte avec la liste des passages",
"vu": "la page d'accueil Aiva Urgence sans le dossier patient",
"demande": "ouvrir le dossier patient puis me rendre la main",
}
fields.update(overrides)
return format_supervised_pause_message(**fields)
def _raw_pause(**overrides: str) -> str:
fields = {
"intention": "ouvrir le dossier patient dans Aiva Urgence",
"attendu": "voir la fiche du patient ouverte avec la liste des passages",
"vu": "la page d'accueil Aiva Urgence sans le dossier patient",
"demande": "ouvrir le dossier patient puis me rendre la main",
}
fields.update(overrides)
return "\n".join(
[
f"J'essaie de : {fields['intention']}",
f"J'attendais : {fields['attendu']}",
f"Je vois : {fields['vu']}",
f"Peux-tu : {fields['demande']}",
]
)
def _issue_codes(message: str) -> set[str]:
return {issue.code for issue in validate_supervised_pause_message(message).issues}
def test_format_supervised_pause_has_exact_four_field_structure():
message = _valid_pause()
assert message.splitlines() == [
"J'essaie de : ouvrir le dossier patient dans Aiva Urgence",
"J'attendais : voir la fiche du patient ouverte avec la liste des passages",
"Je vois : la page d'accueil Aiva Urgence sans le dossier patient",
"Peux-tu : ouvrir le dossier patient puis me rendre la main",
]
assert validate_supervised_pause_message(message).valid
def test_format_from_mapping_accepts_runtime_aliases():
message = format_supervised_pause_from_mapping(
{
"trying_to": "selectionner le passage aux urgences",
"expected": "voir le formulaire de codage du passage",
"observed": "la liste des passages reste affichee",
"request": "selectionner le bon passage puis me rendre la main",
}
)
assert "J'essaie de : selectionner le passage aux urgences" in message
assert validate_supervised_pause_message(message).valid
@pytest.mark.parametrize(
"bad_phrase",
[
"un element",
"un élément",
"cette action",
"Validation requise",
"cible inconnue",
],
)
def test_blacklist_refuses_generic_formulations(bad_phrase):
message = _raw_pause(vu=f"je vois {bad_phrase}")
result = validate_supervised_pause_message(message)
assert not result.valid
assert "generic_phrase" in {issue.code for issue in result.issues}
@pytest.mark.parametrize(
"technical_text",
[
"action_click_12ab34",
"replay_9f8e7d6c",
"session_id",
"target_spec.by_text",
"550e8400-e29b-41d4-a716-446655440000",
"a3f6c9d8e1b24567",
],
)
def test_refuses_raw_technical_identifiers(technical_text):
message = _raw_pause(attendu=f"voir le dossier patient apres {technical_text}")
assert "technical_identifier" in _issue_codes(message) or "technical_field" in _issue_codes(message)
@pytest.mark.parametrize(
"technical_text",
[
"(123, 456)",
"x=120 y=340",
"340px",
"score=0.87",
"confidence=0.91",
"similarité=0.42",
],
)
def test_refuses_pixels_and_raw_scores(technical_text):
message = _raw_pause(vu=f"la page Aiva avec {technical_text}")
codes = _issue_codes(message)
assert "raw_coordinates" in codes or "raw_score" in codes
@pytest.mark.parametrize(
"technical_english",
[
"target_not_found",
"no_screen_change",
"wrong_window",
"validation required",
"retry",
"screenshot",
],
)
def test_refuses_technical_english(technical_english):
message = _raw_pause(vu=f"le message {technical_english} est affiche")
assert "technical_english" in _issue_codes(message)
def test_refuses_raw_english_instruction():
message = _raw_pause(demande="please click the target button")
codes = _issue_codes(message)
assert "technical_english" in codes
assert "not_actionable" in codes
def test_refuses_messages_without_four_required_lines():
result = validate_supervised_pause_message("Je ne trouve pas le dossier patient.")
assert not result.valid
assert "invalid_structure" in {issue.code for issue in result.issues}
def test_refuses_wrong_label_order():
message = "\n".join(
[
"J'attendais : voir la fiche patient",
"J'essaie de : ouvrir le dossier patient",
"Je vois : la page d'accueil",
"Peux-tu : ouvrir le dossier puis me rendre la main",
]
)
assert "invalid_structure" in _issue_codes(message)
def test_demande_must_be_actionable_in_french():
message = "\n".join(
[
"J'essaie de : ouvrir le dossier patient",
"J'attendais : voir la fiche patient ouverte",
"Je vois : la page d'accueil Aiva Urgence",
"Peux-tu : merci beaucoup",
]
)
assert "not_actionable" in _issue_codes(message)
def test_visible_message_validator_accepts_clear_french_actionable_text():
message = (
"Je ne trouve pas le dossier patient dans Aiva Urgence. "
"Peux-tu ouvrir le dossier puis me rendre la main ?"
)
assert validate_visible_message(message).valid
def test_formatter_raises_instead_of_emitting_generic_message():
with pytest.raises(MessageContractError):
format_supervised_pause_message(
intention="faire cette action",
attendu="validation requise",
vu="un element",
demande="corriger",
)
def test_formatter_raises_on_too_short_request():
with pytest.raises(MessageContractError):
format_supervised_pause_message(
intention="ouvrir le dossier patient dans Aiva Urgence",
attendu="voir la fiche du patient ouverte",
vu="la page d'accueil Aiva Urgence",
demande="corriger",
)
def test_coerce_turns_legacy_validation_required_into_structured_pause():
message = coerce_supervised_pause_message("Validation requise")
assert validate_supervised_pause_message(message).valid
assert "Validation requise" not in message
assert message.splitlines()[0].startswith("J'essaie de :")
def test_coerce_keeps_clear_legacy_request_as_demande():
message = coerce_supervised_pause_message(
"Valider le dossier patient avant enregistrement",
intention="enregistrer le dossier patient",
attendu="avoir ton accord avant l'enregistrement",
vu="le formulaire patient est pret a etre enregistre",
)
assert validate_supervised_pause_message(message).valid
assert "Valider le dossier patient avant enregistrement" in message
def test_warn_visible_message_logs_without_modifying_message(caplog):
raw = "Validation requise"
returned = warn_visible_message(raw, source="unit.raw")
assert returned == raw
assert "invalid_message source=unit.raw" in caplog.text
assert "generic_phrase" in caplog.text
def test_warn_visible_message_accepts_supervised_pause_without_log(caplog):
message = _valid_pause()
returned = warn_visible_message(
message,
source="unit.final",
supervised_pause=True,
)
assert returned == message
assert "invalid_message" not in caplog.text
def test_refuses_overlong_fields_and_messages():
long_field = "ouvrir " + ("le dossier patient " * 45)
assert len(long_field) > MAX_FIELD_CHARS
message = "\n".join(
[
f"J'essaie de : {long_field}",
"J'attendais : voir la fiche patient ouverte",
"Je vois : la page d'accueil Aiva Urgence",
"Peux-tu : ouvrir le dossier patient puis me rendre la main",
]
)
codes = _issue_codes(message)
assert "field_too_long" in codes
assert "message_too_long" in codes

View File

@@ -0,0 +1,109 @@
import json
import sys
from pathlib import Path
ROOT = Path(__file__).parent.parent.parent
sys.path.insert(0, str(ROOT))
from tools import lea_micro_preflight as preflight
FREE_OUTPUT = """\
total used free shared buff/cache available
Mem: 64202 15500 32000 123 16702 47000
Swap: 8192 1024 7168
"""
def test_parse_free_m_extracts_ram_and_swap():
parsed = preflight.parse_free_m(FREE_OUTPUT)
assert parsed["mem"]["total"] == 64202
assert parsed["mem"]["available"] == 47000
assert parsed["swap"] == {"total": 8192, "used": 1024, "free": 7168}
def test_parse_free_m_accepts_french_locale_labels():
output = """\
total utilisé libre partagé tamp/cache disponible
Mem: 126365 60425 2919 12847 77071 65939
Échange: 8191 3397 4794
"""
parsed = preflight.parse_free_m(output)
assert parsed["mem"]["used"] == 60425
assert parsed["mem"]["available"] == 65939
assert parsed["swap"] == {"total": 8191, "used": 3397, "free": 4794}
def test_parse_nvidia_smi_memory_multiple_gpus():
parsed = preflight.parse_nvidia_smi_memory("8123, 24576\n3999 MiB, 12288 MiB\n")
assert parsed == [
{"free_mib": 8123, "total_mib": 24576},
{"free_mib": 3999, "total_mib": 12288},
]
def test_extract_ollama_tags_accepts_name_and_model_keys():
tags = preflight.extract_ollama_tags(
{
"models": [
{"name": "qwen2.5vl:7b-rpa"},
{"model": "qwen2.5:7b"},
{"name": ""},
"ignored",
]
}
)
assert tags == {"qwen2.5vl:7b-rpa", "qwen2.5:7b"}
def _install_fakes(monkeypatch, *, resident=True, tags_ok=True, swap_used=1024):
free_output = FREE_OUTPUT.replace("1024", str(swap_used), 1)
def fake_run_command(args, timeout=5.0):
if args[0] == "nvidia-smi":
return 0, "8123, 24576", ""
if args[0] == "free":
return 0, free_output, ""
raise AssertionError(f"unexpected command: {args!r}")
def fake_http_json(url, timeout=2.0):
if url.endswith("/api/tags"):
models = [{"name": "qwen2.5vl:7b-rpa"}]
if tags_ok:
models.append({"name": "qwen2.5:7b"})
return True, {"models": models}, ""
if url.endswith("/api/ps"):
models = [{"name": "qwen2.5vl:7b-rpa"}] if resident else []
return True, {"models": models}, ""
raise AssertionError(f"unexpected url: {url!r}")
monkeypatch.setattr(preflight, "run_command", fake_run_command)
monkeypatch.setattr(preflight, "http_json", fake_http_json)
def test_main_returns_zero_when_all_checks_ok(monkeypatch, capsys):
_install_fakes(monkeypatch)
assert preflight.main(["--json"]) == 0
report = json.loads(capsys.readouterr().out)
assert report["overall"] == "ok"
assert report["warmup"] == "disabled"
def test_main_warns_when_vlm_not_resident_and_strict_exits_one(monkeypatch):
_install_fakes(monkeypatch, resident=False)
assert preflight.main([]) == 0
assert preflight.main(["--strict"]) == 1
def test_main_fails_when_required_model_missing(monkeypatch):
_install_fakes(monkeypatch, tags_ok=False)
assert preflight.main([]) == 2

View File

@@ -88,9 +88,9 @@ class TestExtraction:
assert _nettoyer_description_cible(None) == ""
def test_nettoyer_description_tronque(self):
longue = "x" * 200
longue = "x" * 1100
resultat = _nettoyer_description_cible(longue)
assert len(resultat) <= 80
assert len(resultat) <= 1024
assert resultat.endswith("...")
@@ -345,9 +345,10 @@ class TestFormatterErreurGenerique:
assert msg.niveau == NiveauMessage.ATTENTION
def test_message_inconnu_tronque(self):
long_msg = "erreur très longue " * 20
long_msg = "erreur très longue " * 80
msg = formatter_erreur_generique(long_msg)
assert len(msg.corps) <= 200 # tronqué avec "..."
assert len(msg.corps) <= len("J'ai rencontré un souci : ") + 1024
assert msg.corps.endswith("...")
def test_pas_de_code_technique_dans_message_utilisateur(self):
"""Les messages présentés à l'utilisateur ne doivent pas contenir de