feat(agent): add learn action flow and grounding guards

This commit is contained in:
Dom
2026-06-02 16:24:10 +02:00
parent 86b3c8f7e7
commit d38f0b0f2f
39 changed files with 5901 additions and 212 deletions

View File

@@ -56,6 +56,13 @@ OLLAMA_HOST = os.getenv("RPA_OLLAMA_HOST", "localhost")
# Configurable via variable d'environnement RPA_API_TOKEN
API_TOKEN = os.environ.get("RPA_API_TOKEN", "")
# --- Orchestrateur Léa-first (agent-chat Linux) ---
# Endpoint racine du service agent-chat qui héberge POST /api/learn/start
# (P1-LEA-SHADOW). Configurable via RPA_AGENT_CHAT_URL.
# Défaut : localhost:5004 (même machine en dev). En POC clinique, doit
# pointer vers le DGX Spark (ex. http://agent-chat.dgx-local:5004).
AGENT_CHAT_URL = os.environ.get("RPA_AGENT_CHAT_URL", "http://localhost:5004")
# Paramètres de session
MAX_SESSION_DURATION_S = 60 * 60 # 1 heure
SESSIONS_ROOT = BASE_DIR / "sessions"

View File

@@ -56,6 +56,8 @@ class EventCaptorV1:
# État des touches modificatrices
self.modifiers = set()
self._pending_standalone_win = False
self._suppress_release_only_win_combo = False
# Tracking du focus fenêtre
self.last_window = None
@@ -327,6 +329,56 @@ class EventCaptorV1:
return {"kind": "key", "name": key.name}
return {"kind": "unknown", "str": str(key)}
@staticmethod
def _raw_key_name(raw_key: Dict[str, Any]) -> Optional[str]:
"""Nom lisible depuis un raw_key sérialisé."""
if raw_key.get("kind") == "vk":
char = raw_key.get("char")
if char and len(str(char)) == 1:
return str(char).lower()
if raw_key.get("kind") == "key":
name = raw_key.get("name")
return str(name).lower() if name else None
return None
def _emit_release_only_windows_combo(self) -> bool:
"""Infère Win+<touche> si Windows/NoMachine n'a livré que les releases.
Certaines sessions ne remontent pas les press de Win+S via pynput,
mais livrent ensuite release('s') puis release('cmd'). Sans cette
inférence ciblée, le geste système est perdu et les releases polluent
le prochain text_input.
"""
with self._text_lock:
raw_keys = list(self._raw_key_buffer)
if len(raw_keys) < 2:
return False
cmd_names = {"cmd", "cmd_l", "cmd_r"}
last = raw_keys[-1]
if last.get("action") != "release" or self._raw_key_name(last) not in cmd_names:
return False
combo_key = None
for raw in reversed(raw_keys[:-1]):
if raw.get("action") != "release":
continue
name = self._raw_key_name(raw)
if name and name not in self._MODIFIER_KEY_NAMES:
combo_key = name
break
if not combo_key:
return False
self._raw_key_buffer.clear()
event = {
"type": "key_combo",
"keys": ["win", combo_key],
"raw_keys": raw_keys,
"timestamp": time.time(),
}
self._inject_screen_metadata(event)
self.on_event(event)
return True
def _on_press(self, key):
# TOUJOURS enregistrer le press brut dans le buffer raw_keys
with self._text_lock:
@@ -344,6 +396,7 @@ class EventCaptorV1:
self.modifiers.add("shift")
elif key in (Key.cmd, Key.cmd_l, Key.cmd_r):
self.modifiers.add("win")
self._pending_standalone_win = True
# --- Combos avec modificateur (sauf Shift seul) ---
# Shift seul n'est pas un « vrai » modificateur pour les combos :
@@ -369,6 +422,9 @@ class EventCaptorV1:
# Ne PAS émettre de combo si c'est un modificateur seul
# (ex: appui sur Ctrl sans autre touche = pas de combo)
if key_name and key_name not in self._MODIFIER_KEY_NAMES:
self._pending_standalone_win = False
if "win" in self.modifiers:
self._suppress_release_only_win_combo = True
# Un combo interrompt la saisie texte en cours
self._flush_text_buffer()
# Attacher les raw_keys accumulés (press des modificateurs + press de la touche)
@@ -400,6 +456,7 @@ class EventCaptorV1:
- Enter / Tab : flush immédiat + émission de l'événement
- Escape : vide le buffer sans émettre
"""
escape_raw_keys = None
with self._text_lock:
# --- Touches spéciales ---
if key == Key.backspace:
@@ -411,12 +468,14 @@ class EventCaptorV1:
if key == Key.esc:
# Annuler la saisie en cours
self._text_buffer.clear()
self._raw_key_buffer.clear()
self._text_start_pos = None
self._cancel_flush_timer()
return
escape_raw_keys = list(self._raw_key_buffer)
self._raw_key_buffer.clear()
# Émettre hors lock après le bloc critique.
pass
if key in (Key.enter, Key.tab):
elif key in (Key.enter, Key.tab):
# Flush immédiat — on relâche le lock avant d'appeler
# _flush_text_buffer (qui prend aussi le lock)
pass # on sort du with et on flush après
@@ -454,6 +513,18 @@ class EventCaptorV1:
# Touche spéciale non gérée (F1, Insert, etc.) — on ignore
return
if escape_raw_keys is not None:
event = {
"type": "key_combo",
"keys": ["escape"],
"timestamp": time.time(),
}
if escape_raw_keys:
event["raw_keys"] = escape_raw_keys
self._inject_screen_metadata(event)
self.on_event(event)
return
# Si on arrive ici, c'est Enter ou Tab → flush le buffer en cours
# puis émettre le caractère spécial comme text_input séparé
self._flush_text_buffer()
@@ -551,6 +622,35 @@ class EventCaptorV1:
**self._encode_key(key),
})
if key in (Key.cmd, Key.cmd_l, Key.cmd_r) and self._suppress_release_only_win_combo:
with self._text_lock:
self._raw_key_buffer.clear()
self._pending_standalone_win = False
self._suppress_release_only_win_combo = False
self.modifiers.discard("win")
return
if key in (Key.cmd, Key.cmd_l, Key.cmd_r) and self._emit_release_only_windows_combo():
self._pending_standalone_win = False
self._suppress_release_only_win_combo = False
self.modifiers.discard("win")
return
if key in (Key.cmd, Key.cmd_l, Key.cmd_r) and self._pending_standalone_win:
with self._text_lock:
raw_keys = list(self._raw_key_buffer)
self._raw_key_buffer.clear()
event = {
"type": "key_combo",
"keys": ["win"],
"raw_keys": raw_keys,
"timestamp": time.time(),
}
self._inject_screen_metadata(event)
self.on_event(event)
self._pending_standalone_win = False
self._suppress_release_only_win_combo = False
if key in (Key.ctrl, Key.ctrl_l, Key.ctrl_r):
self.modifiers.discard("ctrl")
elif key in (Key.alt, Key.alt_l, Key.alt_r):
@@ -559,6 +659,8 @@ class EventCaptorV1:
self.modifiers.discard("shift")
elif key in (Key.cmd, Key.cmd_l, Key.cmd_r):
self.modifiers.discard("win")
self._pending_standalone_win = False
self._suppress_release_only_win_combo = False
# ----------------------------------------------------------------
# Métadonnées système

File diff suppressed because it is too large Load Diff

View File

@@ -84,6 +84,15 @@ class GroundingEngine:
if by_role in {"start_button"}:
return False
has_anchor = bool(target_spec.get("anchor_image_base64"))
context_hints = target_spec.get("context_hints") or {}
has_window_or_text_hint = any(
str(target_spec.get(key, "") or "").strip()
for key in ("window_title", "by_text", "vlm_description")
) or bool(str(context_hints.get("window_title", "") or "").strip())
if has_anchor and not has_window_or_text_hint and not by_role:
return False
return True
@staticmethod
@@ -174,6 +183,26 @@ class GroundingEngine:
hints.append(variant)
return hints
@staticmethod
def _server_rejects_text_fallback(raw: Optional[Dict[str, Any]]) -> bool:
"""Dire si un rejet serveur doit bloquer le fallback texte local.
Un rejet explicite n'est pas un simple "non trouvé": le serveur a vu
un candidat et l'a refusé pour une raison de qualité/zone. Refaire une
recherche OCR large côté client contournerait ce garde-fou.
"""
if not raw or raw.get("resolved"):
return False
reason = str(raw.get("reason") or "")
method = str(raw.get("method") or "")
return (
method.startswith("rejected_")
or reason.startswith("close_tab_")
or reason.startswith("drift_")
or "below_threshold" in reason
)
def _window_crop_matches_target_visually(
self,
screenshot_b64: str,
@@ -331,11 +360,31 @@ class GroundingEngine:
cap_w = window_rect["width"] if window_rect else screen_width
cap_h = window_rect["height"] if window_rect else screen_height
skip_text_fallback_after_server_reject = False
for strategy in strategies:
if (
strategy == "vlm_local"
and skip_text_fallback_after_server_reject
and target_spec.get("by_text")
):
by_text = target_spec.get("by_text", "")
logger.info(
"[GROUNDING] Rejet serveur explicite pour '%s'"
"skip fallback local hybrid_text_direct",
by_text,
)
print(
f" [GROUNDING] Rejet serveur explicite pour '{by_text}' "
"→ pas de fallback texte local"
)
continue
result = self._try_strategy(
strategy, server_url, screenshot_b64, target_spec,
fallback_x, fallback_y, cap_w, cap_h,
)
if strategy == "server" and self._server_rejects_text_fallback(result.raw):
skip_text_fallback_after_server_reject = True
if result.found:
# ── Conversion coords fenêtre → coords écran ──
if window_rect:
@@ -429,6 +478,14 @@ class GroundingEngine:
detail=raw.get("matched_element", {}).get("label", ""),
raw=raw,
)
if raw:
return GroundingResult(
found=False,
method=raw.get("method", "server"),
score=raw.get("score", 0.0),
detail=raw.get("reason", "server: pas trouvé"),
raw=raw,
)
elif strategy == "template":
anchor_b64 = target_spec.get("anchor_image_base64", "")

View File

@@ -121,10 +121,7 @@ class AgentV1:
# Wiring ChatWindow → Executor pour Plan B (pause_message → bulle interactive)
# Permet à l'executor d'afficher une bulle paused dans la fenêtre Léa V1
# quand le serveur signale replay_paused=True via /replay/next.
try:
self._executor._chat_window_ref = self._chat_window
except Exception:
logger.debug("Wiring chat_window→executor échoué (non bloquant)", exc_info=True)
self._wire_chat_window_to_executor()
# Boucles permanentes (pas besoin de session active)
self.running = True
@@ -154,6 +151,15 @@ class AgentV1:
shared_state=self._state,
)
def _wire_chat_window_to_executor(self) -> None:
"""Relie l'executor courant à la ChatWindow pour les pauses supervisees."""
if self._executor is None or self._chat_window is None:
return
try:
self._executor._chat_window_ref = self._chat_window
except Exception:
logger.debug("Wiring chat_window->executor echoue (non bloquant)", exc_info=True)
def _delayed_cleanup(self):
"""Nettoyage en arrière-plan après 30s pour ne pas bloquer le démarrage."""
time.sleep(30)
@@ -224,6 +230,7 @@ class AgentV1:
# Initialiser l'executeur partage
self._executor = ActionExecutorV1()
self._wire_chat_window_to_executor()
self.shot_counter = 0
self.running = True

View File

@@ -0,0 +1,147 @@
"""
Client HTTP minimal pour l'orchestrateur Léa-first (agent-chat Linux).
Rebranchement P1-LEA-SHADOW : le bouton "Apprenez-moi" côté Windows déclenche
la création d'une session d'apprentissage côté agent-chat (REST) AVANT de
lancer la capture locale. Le pipeline streaming (capture frames/événements
via start_recording) n'est PAS modifié — seule la prise de contact initiale
avec Léa change.
Contrat :
POST {AGENT_CHAT_URL}/api/learn/start
Headers : Authorization: Bearer <RPA_API_TOKEN>, Content-Type: application/json
Body : { machine_id, session_name, user_id?, trigger_source }
Réponse : { session_id, state, message }
Politique :
- Timeout 10s (connect + read)
- Retry x2 avec backoff 0.5s puis 1.0s
- En cas d'échec définitif : lève LeaOrchestratorError (le caller doit
basculer en mode dégradé : start_recording local sans assistance).
"""
from __future__ import annotations
import logging
import time
from dataclasses import dataclass
from typing import Optional
logger = logging.getLogger(__name__)
# Timeout HTTP (connect + read) — 10s comme spec
_HTTP_TIMEOUT_S = 10.0
# Nombre de tentatives totales (1 + 2 retry)
_MAX_ATTEMPTS = 3
# Backoff progressif entre les tentatives
_BACKOFF_S = (0.5, 1.0)
@dataclass(frozen=True)
class LearnStartResponse:
"""Réponse normalisée de POST /api/learn/start."""
session_id: str
state: str
message: str
class LeaOrchestratorError(RuntimeError):
"""Erreur définitive de communication avec l'orchestrateur Léa."""
def start_learning_session(
base_url: str,
*,
machine_id: str,
session_name: str,
api_token: str = "",
user_id: Optional[str] = None,
trigger_source: str = "windows_button",
timeout_s: float = _HTTP_TIMEOUT_S,
max_attempts: int = _MAX_ATTEMPTS,
backoff_s: tuple = _BACKOFF_S,
) -> LearnStartResponse:
"""Démarre une session d'apprentissage via l'orchestrateur agent-chat.
Args:
base_url: URL racine de l'agent-chat (ex. http://localhost:5004).
machine_id: Identifiant unique du poste Windows.
session_name: Nom humain de la tâche (saisi par l'utilisateur).
api_token: Bearer token (RPA_API_TOKEN). Vide => header omis.
user_id: Identifiant utilisateur optionnel.
trigger_source: Source du déclenchement (windows_button, tray, ...).
timeout_s: Timeout total connect+read par tentative.
max_attempts: Nombre total de tentatives (1 + retry).
backoff_s: Tuple des délais en secondes entre tentatives (len = max_attempts-1).
Returns:
LearnStartResponse normalisée.
Raises:
LeaOrchestratorError: si toutes les tentatives échouent.
"""
# Import local : httpx peut ne pas être installé sur tous les postes
# Windows historiques. On veut un message d'erreur clair plutôt qu'un
# ImportError en chaîne au moment du clic bouton.
try:
import httpx
except ImportError as exc: # pragma: no cover (dépend du venv)
raise LeaOrchestratorError(
"httpx non disponible — installer httpx>=0.27 sur le poste Windows."
) from exc
url = base_url.rstrip("/") + "/api/learn/start"
payload = {
"machine_id": machine_id,
"session_name": session_name,
"trigger_source": trigger_source,
}
if user_id:
payload["user_id"] = user_id
headers = {"Content-Type": "application/json"}
if api_token:
headers["Authorization"] = f"Bearer {api_token}"
last_exc: Optional[Exception] = None
for attempt in range(max_attempts):
try:
logger.info(
"POST %s (tentative %d/%d) machine_id=%s session=%s",
url, attempt + 1, max_attempts, machine_id, session_name,
)
with httpx.Client(timeout=timeout_s) as client:
resp = client.post(url, json=payload, headers=headers)
resp.raise_for_status()
data = resp.json()
session_id = data.get("session_id", "")
state = data.get("state", "")
message = data.get("message", "")
if not session_id:
raise LeaOrchestratorError(
f"Réponse invalide (pas de session_id) : {data!r}"
)
logger.info(
"Session Léa démarrée : session_id=%s state=%s",
session_id, state,
)
return LearnStartResponse(
session_id=str(session_id),
state=str(state),
message=str(message),
)
except Exception as exc: # noqa: BLE001 — on retry sur toute erreur réseau/HTTP
last_exc = exc
logger.warning(
"Echec tentative %d/%d POST %s : %s",
attempt + 1, max_attempts, url, exc,
)
if attempt < max_attempts - 1:
delay = backoff_s[attempt] if attempt < len(backoff_s) else backoff_s[-1]
time.sleep(delay)
raise LeaOrchestratorError(
f"Echec définitif POST {url} après {max_attempts} tentatives : {last_exc}"
)

View File

@@ -63,8 +63,14 @@ JPEG_QUALITY = 85
# Taille max de la queue (backpressure)
QUEUE_MAX_SIZE = 100
# Types d'événements à ne jamais dropper
PRIORITY_EVENT_TYPES = {"click", "key", "scroll", "action", "screenshot"}
# Types d'événements à ne jamais dropper.
# Les noms historiques sont conservés, mais les événements réels du captor
# Agent V1 sont mouse_click/key_combo/text_input/mouse_scroll.
PRIORITY_EVENT_TYPES = {
"click", "key", "scroll", "action", "screenshot",
"mouse_click", "double_click", "key_combo", "key_press",
"text_input", "mouse_scroll",
}
# Purge locale après ACK serveur (Partie A de l'audit)
# Activé par défaut : le serveur conserve déjà les screenshots 180 jours

View File

@@ -9,6 +9,7 @@ Tourne dans son propre thread daemon pour ne pas bloquer pystray.
import logging
import os
import math
import threading
import time
from datetime import datetime
@@ -121,7 +122,7 @@ def _tpl_done(payload: Dict[str, Any]) -> tuple:
def _tpl_need_confirm(payload: Dict[str, Any]) -> tuple:
action = payload.get("action") or {}
desc = action.get("description") if isinstance(action, dict) else None
title = desc or "Validation requise"
title = desc or "J'attends ton accord avant de continuer"
return ("?", ACTION_ICON_RUN, str(title))
@@ -867,11 +868,19 @@ class ChatWindow:
pass
except Exception:
logger.debug("force-show chat_window silenced", exc_info=True)
# UX fix mai 2026 : repartir d'un chat vide pour focaliser
# l'attention sur la question (clear visuel uniquement,
# self._messages reste intact pour la traçabilité debug).
self._clear_chat_history()
self._render_paused_bubble(payload)
try:
# UX fix mai 2026 : repartir d'un chat vide pour focaliser
# l'attention sur la question (clear visuel uniquement,
# self._messages reste intact pour la traçabilité debug).
self._clear_chat_history()
self._render_paused_bubble(payload)
except Exception:
logger.exception("render paused bubble failed; using fallback")
try:
self._clear_chat_history()
self._render_paused_fallback_bubble(payload)
except Exception:
logger.debug("render paused fallback silenced", exc_info=True)
self._root.after(0, _show_and_render)
@@ -895,7 +904,11 @@ class ChatWindow:
logger.debug("clear chat history silenced", exc_info=True)
@staticmethod
def _compute_paused_bubble_height(reason_str: str) -> tuple:
def _compute_paused_bubble_height(
reason_str: str,
chars_per_line: int = 52,
max_rows: int = 14,
) -> tuple:
"""Calcule la hauteur du Text (en lignes) + si une scrollbar est
nécessaire pour le message d'une bulle paused.
@@ -910,11 +923,11 @@ class ChatWindow:
if not reason_str:
return 2, False
text = str(reason_str)
# Estimation : ~60 chars/ligne effectifs avec wraplength.
wrapped_lines = (len(text) // 60) + 1
explicit_lines = text.count("\n") + 1
estimated = max(wrapped_lines, explicit_lines)
cap = 12
chars_per_line = max(24, int(chars_per_line or 52))
estimated = 0
for raw_line in text.splitlines() or [""]:
estimated += max(1, math.ceil(len(raw_line) / chars_per_line))
cap = max(2, int(max_rows or 14))
height = max(2, min(cap, estimated))
# Scrollbar dès que le cap est atteint OU contenu long (filet
# textuel : ≥ 200 chars implique souvent un débordement visuel
@@ -922,6 +935,46 @@ class ChatWindow:
needs_scroll = (estimated >= cap) or (len(text) > 200)
return height, needs_scroll
def _paused_text_layout(self) -> tuple:
"""Retourne ``(wrap_px, chars_per_line, max_rows)`` pour la bulle pause.
La fenêtre Léa est souvent redimensionnée à ~380px de large sur le
poste Windows. Les anciennes estimations fixes calculaient trop peu
de lignes et tronquaient le message. On part donc des dimensions
réelles du canvas et de la métrique de la police Tk.
"""
canvas_w = 0
canvas_h = 0
try:
canvas_w = int(self._canvas.winfo_width()) if self._canvas is not None else 0
canvas_h = int(self._canvas.winfo_height()) if self._canvas is not None else 0
except Exception:
canvas_w = canvas_h = 0
# Marges: container + padding inner + petite marge droite. La bulle
# de pause est une alerte critique, elle utilise donc presque toute
# la largeur disponible sur les fenêtres étroites.
wrap_px = max(220, canvas_w - (2 * MARGIN) - 52) if canvas_w else 360
avg_char = 8
line_px = 22
try:
from tkinter import font as tkfont
font = tkfont.Font(font=FONT_MSG)
avg_char = max(6, font.measure("n"))
line_px = max(18, font.metrics("linespace"))
except Exception:
pass
chars_per_line = max(24, int(wrap_px / avg_char))
# Réserver titre, metadata, boutons, feedback et padding. Même sur
# une petite fenêtre, on garde assez de lignes pour ne pas couper un
# message d'erreur standard.
max_rows = 14
if canvas_h:
max_rows = max(5, min(18, int((canvas_h - 145) / line_px)))
return wrap_px, chars_per_line, max_rows
def _render_paused_bubble(self, payload: Dict[str, Any]) -> None:
tk = self._tk
if getattr(self, "_msg_frame", None) is None:
@@ -941,7 +994,7 @@ class ChatWindow:
container, bg=PAUSED_BG, padx=14, pady=12,
highlightbackground=PAUSED_BORDER, highlightthickness=2,
)
inner.pack(anchor=tk.W, padx=(0, 50), fill=tk.X)
inner.pack(anchor=tk.W, padx=(0, 12), fill=tk.X)
tk.Label(
inner, text=f"⏸ Pause supervisée • {now}",
@@ -949,31 +1002,44 @@ class ChatWindow:
font=("Segoe UI", 12, "bold"), anchor="w",
).pack(fill=tk.X, anchor=tk.W)
# Message scrollable pour les longs reasons (ex: 200+ chars depuis le serveur).
# On utilise un Text en mode read-only avec hauteur calculée selon la longueur.
# Patch 22 mai 2026 : prendre en compte les \n explicites (titres
# fenêtre / patterns) et activer la scrollbar dès que le cap de
# hauteur est atteint — sinon les bulles de pause étaient
# tronquées visuellement sans aucun ascenseur visible.
# Message borné et scrollable : sur une fenêtre Léa étroite, une
# bulle trop haute fait disparaître le début du diagnostic hors du
# viewport. On garde donc la bulle compacte et on scrolle le texte.
reason_str = str(reason)
height_lines, needs_scroll = self._compute_paused_bubble_height(reason_str)
msg_frame = tk.Frame(inner, bg=PAUSED_BG)
msg_frame.pack(fill=tk.X, anchor=tk.W, pady=(6, 0))
reason_text = tk.Text(
msg_frame, bg=PAUSED_BG, fg=PAUSED_FG,
font=FONT_MSG, wrap=tk.WORD, bd=0, height=height_lines,
highlightthickness=0, relief=tk.FLAT, cursor="arrow",
_wrap_px, chars_per_line, max_rows = self._paused_text_layout()
text_rows, needs_text_scroll = self._compute_paused_bubble_height(
reason_str,
chars_per_line=chars_per_line,
max_rows=max_rows,
)
reason_text.insert("1.0", reason_str)
reason_text.configure(state="disabled")
reason_text.pack(side=tk.LEFT, fill=tk.X, expand=True)
if needs_scroll:
reason_scroll = tk.Scrollbar(
msg_frame, orient=tk.VERTICAL,
command=reason_text.yview, width=8,
text_frame = tk.Frame(inner, bg=PAUSED_BG)
text_frame.pack(fill=tk.X, anchor=tk.W, pady=(6, 0))
reason_msg = tk.Text(
text_frame,
height=text_rows,
wrap=tk.WORD,
bg=PAUSED_BG,
fg=PAUSED_FG,
font=FONT_MSG,
bd=0,
highlightthickness=0,
relief=tk.FLAT,
padx=0,
pady=0,
cursor="arrow",
)
reason_msg.insert("1.0", reason_str)
reason_msg.configure(state="disabled")
reason_msg.pack(side=tk.LEFT, fill=tk.X, expand=True)
if needs_text_scroll:
scrollbar = tk.Scrollbar(
text_frame,
orient=tk.VERTICAL,
command=reason_msg.yview,
width=12,
)
reason_text.configure(yscrollcommand=reason_scroll.set)
reason_scroll.pack(side=tk.RIGHT, fill=tk.Y)
reason_msg.configure(yscrollcommand=scrollbar.set)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y, padx=(6, 0))
tk.Label(
inner, text=f"{workflow} — étape {completed}/{total}",
@@ -1018,6 +1084,89 @@ class ChatWindow:
# Scroll automatique vers la nouvelle bulle (visible immédiatement)
self._scroll_to_bottom()
def _render_paused_fallback_bubble(self, payload: Dict[str, Any]) -> None:
"""Rendu minimal de secours si la bulle riche echoue."""
tk = self._tk
if getattr(self, "_msg_frame", None) is None:
return
replay_id = str(payload.get("replay_id", "") or "")
workflow = payload.get("workflow", "?")
reason = str(
payload.get("reason")
or "Action incertaine - j'ai besoin de votre validation."
)
completed = payload.get("completed", 0)
total = payload.get("total", "?")
now = datetime.now().strftime("%H:%M")
container = tk.Frame(self._msg_frame, bg=BG_COLOR)
container.pack(fill=tk.X, padx=MARGIN, pady=6)
inner = tk.Frame(
container, bg=PAUSED_BG, padx=14, pady=12,
highlightbackground=PAUSED_BORDER, highlightthickness=2,
)
inner.pack(anchor=tk.W, padx=(0, 12), fill=tk.X)
tk.Label(
inner, text=f"Pause supervisee - {now}",
bg=PAUSED_BG, fg=PAUSED_FG,
font=("Segoe UI", 12, "bold"), anchor="w",
).pack(fill=tk.X, anchor=tk.W)
wrap_px = 360
try:
if self._canvas is not None:
wrap_px = max(220, int(self._canvas.winfo_width()) - 80)
except Exception:
pass
tk.Label(
inner, text=reason, bg=PAUSED_BG, fg=PAUSED_FG,
font=FONT_MSG, wraplength=wrap_px, justify=tk.LEFT,
anchor=tk.W,
).pack(fill=tk.X, anchor=tk.W, pady=(6, 0))
tk.Label(
inner, text=f"{workflow} - etape {completed}/{total}",
bg=PAUSED_BG, fg=TIMESTAMP_FG, font=FONT_TIMESTAMP, anchor="w",
).pack(fill=tk.X, anchor=tk.W, pady=(4, 8))
btn_frame = tk.Frame(inner, bg=PAUSED_BG)
btn_frame.pack(fill=tk.X, anchor=tk.W)
btn_resume = tk.Button(
btn_frame, text="Continuer",
bg=PAUSED_BTN_RESUME_BG, fg="white", font=FONT_QUICK_BTN,
padx=14, pady=4, bd=0, cursor="hand2",
activebackground=PAUSED_BTN_RESUME_HOVER, activeforeground="white",
command=lambda: self._on_paused_resume(replay_id),
)
btn_resume.pack(side=tk.LEFT, padx=(0, 8))
btn_abort = tk.Button(
btn_frame, text="Annuler",
bg=PAUSED_BTN_ABORT_BG, fg="white", font=FONT_QUICK_BTN,
padx=14, pady=4, bd=0, cursor="hand2",
activebackground=PAUSED_BTN_ABORT_HOVER, activeforeground="white",
command=lambda: self._on_paused_abort(replay_id),
)
btn_abort.pack(side=tk.LEFT)
feedback_label = tk.Label(
inner, text="", bg=PAUSED_BG, fg=PAUSED_FG,
font=FONT_TIMESTAMP, anchor="w",
)
feedback_label.pack(fill=tk.X, anchor=tk.W, pady=(6, 0))
self._active_paused_bubble = {
"container": container, "inner": inner,
"btn_resume": btn_resume, "btn_abort": btn_abort,
"feedback_label": feedback_label,
"replay_id": replay_id,
}
self._scroll_to_bottom()
def _close_active_paused_bubble(self, reason: str) -> None:
if self._active_paused_bubble is None or self._root is None:
return
@@ -1524,8 +1673,19 @@ class ChatWindow:
self._add_lea_message(
f"C'est parti ! Montrez-moi comment faire \u00ab {name} \u00bb."
)
# --- P1-LEA-SHADOW : d\u00e9clencher d'abord l'orchestrateur L\u00e9a Linux ---
# On contacte agent-chat AVANT la capture locale : si la session
# serveur d\u00e9marre, on r\u00e9cup\u00e8re un session_id + un message d'accueil
# de L\u00e9a qu'on affiche dans le chat. Si \u00e9chec : mode d\u00e9grad\u00e9
# (capture locale uniquement, sans assistance conversationnelle).
self._start_lea_orchestrator_session(name)
# --- Comportement historique pr\u00e9serv\u00e9 : capture locale ---
# Le pipeline streaming (frames/\u00e9v\u00e9nements) reste pilot\u00e9 par
# agent_v1 local. L'orchestrateur Linux ne touche PAS \u00e0 la
# capture, il pilote uniquement le dialogue de fin de session.
try:
# Utiliser l'etat partage si disponible (synchronise le systray)
if self._shared_state is not None:
self._shared_state.start_recording(name)
elif self._on_start_callback is not None:
@@ -1533,6 +1693,60 @@ class ChatWindow:
except Exception as e:
self._add_lea_message(f"Oups, un probl\u00e8me : {e}")
def _start_lea_orchestrator_session(self, session_name: str) -> None:
"""Appelle POST /api/learn/start c\u00f4t\u00e9 agent-chat Linux (P1-LEA-SHADOW).
Fail-safe : toute erreur (config absente, httpx manquant, timeout,
500 serveur...) bascule en mode d\u00e9grad\u00e9 sans bloquer la capture
locale. Un message clair est affich\u00e9 dans le chat.
"""
try:
from ..config import AGENT_CHAT_URL, API_TOKEN, MACHINE_ID
from ..network.lea_orchestrator_client import (
LeaOrchestratorError,
start_learning_session,
)
except Exception as exc: # pragma: no cover (import-time)
logger.error("Impossible de charger le client orchestrateur L\u00e9a : %s", exc)
self._add_lea_message(
"\u26a0 Impossible de joindre L\u00e9a serveur. "
"L'apprentissage continue localement, mais sans assistance "
"conversationnelle."
)
return
try:
resp = start_learning_session(
AGENT_CHAT_URL,
machine_id=MACHINE_ID,
session_name=session_name,
api_token=API_TOKEN,
trigger_source="windows_button",
)
except LeaOrchestratorError as exc:
logger.error("Orchestrateur L\u00e9a injoignable : %s", exc)
self._add_lea_message(
"\u26a0 Impossible de joindre L\u00e9a serveur. "
"L'apprentissage continue localement, mais sans assistance "
"conversationnelle."
)
return
except Exception as exc: # noqa: BLE001 \u2014 d\u00e9fensif
logger.exception("Erreur inattendue orchestrateur L\u00e9a")
self._add_lea_message(
f"\u26a0 Erreur orchestrateur L\u00e9a : {exc}. "
"L'apprentissage continue localement."
)
return
# Affichage du message d'accueil renvoy\u00e9 par L\u00e9a (si pr\u00e9sent)
if resp.message:
self._add_lea_message(resp.message)
logger.info(
"Session orchestrateur L\u00e9a OK : id=%s state=%s",
resp.session_id, resp.state,
)
def _on_quick_tasks(self) -> None:
"""Bouton Lancer — demande ce que L\u00e9a sait faire."""
self._add_user_message("Qu'est-ce que vous savez faire ?")

View File

@@ -0,0 +1,484 @@
"""Contrat de lisibilite des messages visibles par l'humain.
Ce module ne branche encore aucun point runtime. Il fournit une brique pure et
testable pour que les sorties UI de Lea puissent refuser les messages trop
generiques ou trop techniques avant affichage.
"""
from __future__ import annotations
import logging
import re
import unicodedata
from dataclasses import dataclass
from typing import Iterable, Mapping
logger = logging.getLogger(__name__)
SUPERVISED_PAUSE_LABELS = (
"J'essaie de",
"J'attendais",
"Je vois",
"Peux-tu",
)
MAX_VISIBLE_MESSAGE_CHARS = 720
MAX_FIELD_CHARS = 180
MIN_FIELD_CHARS = 4
_GENERIC_PHRASES = (
"un element",
"un élément",
"l'element",
"l'élément",
"element inconnu",
"élément inconnu",
"cette action",
"cette cible",
"cible inconnue",
"validation requise",
"action requise",
)
_ACTIONABLE_FRENCH_HINTS = (
"peux-tu",
"cliquer",
"ouvrir",
"selectionner",
"sélectionner",
"choisir",
"saisir",
"corriger",
"montrer",
"indiquer",
"valider",
"fermer",
"placer",
"mettre",
"reprendre",
)
_TECHNICAL_ENGLISH_TERMS = (
"target_not_found",
"target not found",
"no_screen_change",
"no screen change",
"wrong_window",
"wrong window",
"validation required",
"retry",
"fallback",
"timeout",
"screenshot",
"validator",
"failure",
"failed",
"resolve target",
"postcondition",
"please",
"click",
"button",
"target",
"expected",
"actual",
"observed",
)
_TECHNICAL_FIELD_RE = re.compile(
r"\b(?:"
r"action_id|replay_id|session_id|workflow_id|machine_id|target_spec|"
r"vlm_description|resolution_method|resolution_score|retry_count|"
r"x_pct|y_pct|screenshot_b64|expected_window_title|current_action_index"
r")\b",
re.IGNORECASE,
)
_TECHNICAL_IDENTIFIER_RE = re.compile(
r"\b(?:action|replay|session|sess|workflow|node|edge|target|retry|"
r"precheck|wait|trace|event|machine|run)_[A-Za-z0-9][A-Za-z0-9_.:-]{3,}\b"
)
_UUID_RE = re.compile(
r"\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b",
re.IGNORECASE,
)
_LONG_HEX_RE = re.compile(r"\b[0-9a-f]{16,}\b", re.IGNORECASE)
_PIXEL_TUPLE_RE = re.compile(r"\(\s*\d{2,5}\s*,\s*\d{2,5}\s*\)")
_PIXEL_FIELD_RE = re.compile(
r"\b(?:x|y|left|top|width|height|w|h|x_pct|y_pct)\s*[=:]\s*-?\d+(?:[.,]\d+)?",
re.IGNORECASE,
)
_PX_RE = re.compile(r"\b\d{2,5}\s*px\b", re.IGNORECASE)
_SCORE_RE = re.compile(
r"\b(?:score|confidence|confiance|similarit[eé]|threshold|seuil|"
r"probabilit[eé])\s*[:=]\s*\d+(?:[.,]\d+)?%?\b",
re.IGNORECASE,
)
@dataclass(frozen=True)
class MessageValidationIssue:
"""Un probleme detecte dans un message visible par l'humain."""
code: str
detail: str
@dataclass(frozen=True)
class MessageValidationResult:
"""Resultat de validation d'un message utilisateur."""
issues: tuple[MessageValidationIssue, ...] = ()
@property
def valid(self) -> bool:
return not self.issues
def raise_for_errors(self) -> None:
if not self.valid:
raise MessageContractError(self)
class MessageContractError(ValueError):
"""Erreur levee quand un message ne respecte pas le contrat humain."""
def __init__(self, result: MessageValidationResult):
self.result = result
details = "; ".join(f"{issue.code}: {issue.detail}" for issue in result.issues)
super().__init__(f"Message humain invalide: {details}")
@dataclass(frozen=True)
class SupervisedPauseFields:
"""Champs obligatoires pour expliquer une pause supervisee."""
intention: str
attendu: str
vu: str
demande: str
DEFAULT_SUPERVISED_PAUSE_FIELDS = SupervisedPauseFields(
intention="continuer une etape supervisee",
attendu="un accord humain clair avant de continuer",
vu="je suis sur une etape qui demande une verification humaine",
demande="indiquer si je peux continuer ou corriger l'action attendue",
)
def format_supervised_pause_message(
*,
intention: str,
attendu: str,
vu: str,
demande: str,
) -> str:
"""Formatter une pause supervisee claire et actionnable.
Le message retourne exactement quatre lignes. Si un champ reste vague ou
technique, la fonction leve ``MessageContractError`` au lieu de produire un
message degradant pour l'utilisateur.
"""
fields = SupervisedPauseFields(
intention=_one_line(intention),
attendu=_one_line(attendu),
vu=_one_line(vu),
demande=_one_line(demande),
)
message = "\n".join(
(
f"J'essaie de : {fields.intention}",
f"J'attendais : {fields.attendu}",
f"Je vois : {fields.vu}",
f"Peux-tu : {fields.demande}",
)
)
validate_supervised_pause_message(message).raise_for_errors()
return message
def format_supervised_pause_from_mapping(payload: Mapping[str, object]) -> str:
"""Formatter depuis un mapping runtime avec noms de champs explicites.
Alias acceptes pour faciliter l'integration progressive:
``intention|trying_to``, ``attendu|expected``, ``vu|observed``,
``demande|request``.
"""
return format_supervised_pause_message(
intention=_mapping_text(payload, "intention", "trying_to"),
attendu=_mapping_text(payload, "attendu", "expected"),
vu=_mapping_text(payload, "vu", "observed"),
demande=_mapping_text(payload, "demande", "request"),
)
def coerce_supervised_pause_message(
message: object = "",
*,
intention: object = "",
attendu: object = "",
vu: object = "",
demande: object = "",
) -> str:
"""Retourner une pause supervisee valide, meme depuis un ancien message.
Si ``message`` respecte deja le contrat strict, il est conserve. Sinon on
compose les quatre champs avec les valeurs explicites disponibles. Les
valeurs trop vagues ou techniques sont remplacees par des fallbacks clairs.
"""
raw_message = _one_line(message)
if raw_message and validate_supervised_pause_message(raw_message).valid:
return raw_message
defaults = DEFAULT_SUPERVISED_PAUSE_FIELDS
candidates = SupervisedPauseFields(
intention=_safe_field_text(intention, defaults.intention),
attendu=_safe_field_text(attendu, defaults.attendu),
vu=_safe_field_text(vu, defaults.vu),
demande=_safe_field_text(demande or raw_message, defaults.demande),
)
try:
return format_supervised_pause_message(
intention=candidates.intention,
attendu=candidates.attendu,
vu=candidates.vu,
demande=candidates.demande,
)
except MessageContractError:
return format_supervised_pause_message(
intention=defaults.intention,
attendu=defaults.attendu,
vu=defaults.vu,
demande=defaults.demande,
)
def warn_visible_message(
message: object,
*,
source: str,
supervised_pause: bool = False,
) -> str:
"""Log contract violations without modifying the visible message."""
text = str(message or "")
validator = validate_supervised_pause_message if supervised_pause else validate_visible_message
result = validator(text)
if not result.valid:
logger.warning(
"[message_contract] invalid_message source=%s codes=%s",
source,
[issue.code for issue in result.issues],
)
return text
def validate_supervised_pause_message(message: str) -> MessageValidationResult:
"""Valider le contrat strict d'une pause supervisee."""
issues = list(validate_visible_message(message).issues)
fields, structure_issues = _parse_supervised_pause(message)
issues.extend(structure_issues)
if fields:
for name, value in fields.items():
if len(value) < MIN_FIELD_CHARS:
issues.append(
MessageValidationIssue(
"field_too_short",
f"{name} doit etre explicite",
)
)
if len(value) > MAX_FIELD_CHARS:
issues.append(
MessageValidationIssue(
"field_too_long",
f"{name} depasse {MAX_FIELD_CHARS} caracteres",
)
)
demande = fields.get("demande", "")
if not _contains_actionable_french(demande) or len(demande.split()) < 4:
issues.append(
MessageValidationIssue(
"not_actionable",
"la demande doit contenir une action concrete en francais",
)
)
return _dedupe_issues(issues)
def validate_visible_message(message: str) -> MessageValidationResult:
"""Valider qu'un message visible n'est ni generique ni technique."""
text = str(message or "").strip()
issues: list[MessageValidationIssue] = []
if not text:
return MessageValidationResult(
(MessageValidationIssue("empty_message", "message vide"),)
)
if len(text) > MAX_VISIBLE_MESSAGE_CHARS:
issues.append(
MessageValidationIssue(
"message_too_long",
f"message au-dela de {MAX_VISIBLE_MESSAGE_CHARS} caracteres",
)
)
folded = _fold(text)
seen_generic_phrases: set[str] = set()
for phrase in _GENERIC_PHRASES:
folded_phrase = _fold(phrase)
if folded_phrase in seen_generic_phrases:
continue
seen_generic_phrases.add(folded_phrase)
if folded_phrase in folded:
issues.append(
MessageValidationIssue(
"generic_phrase",
f"formulation trop generique: {phrase}",
)
)
for term in _TECHNICAL_ENGLISH_TERMS:
if _fold(term) in folded:
issues.append(
MessageValidationIssue(
"technical_english",
f"anglais technique visible: {term}",
)
)
for code, pattern, detail in (
("technical_field", _TECHNICAL_FIELD_RE, "champ technique brut"),
("technical_identifier", _TECHNICAL_IDENTIFIER_RE, "identifiant technique brut"),
("technical_identifier", _UUID_RE, "UUID brut"),
("technical_identifier", _LONG_HEX_RE, "hash technique brut"),
("raw_coordinates", _PIXEL_TUPLE_RE, "coordonnees pixel brutes"),
("raw_coordinates", _PIXEL_FIELD_RE, "coordonnees techniques brutes"),
("raw_coordinates", _PX_RE, "coordonnees pixel brutes"),
("raw_score", _SCORE_RE, "score ou confiance brut"),
):
if pattern.search(text):
issues.append(MessageValidationIssue(code, detail))
return _dedupe_issues(issues)
def is_valid_visible_message(message: str) -> bool:
"""Raccourci booleen pour les points d'integration UI."""
return validate_visible_message(message).valid
def is_valid_supervised_pause_message(message: str) -> bool:
"""Raccourci booleen pour les pauses supervisees."""
return validate_supervised_pause_message(message).valid
def _parse_supervised_pause(
message: str,
) -> tuple[dict[str, str], list[MessageValidationIssue]]:
lines = [line.rstrip() for line in str(message or "").splitlines() if line.strip()]
issues: list[MessageValidationIssue] = []
if len(lines) != 4:
issues.append(
MessageValidationIssue(
"invalid_structure",
"une pause supervisee doit contenir exactement 4 lignes",
)
)
return {}, issues
specs = (
("intention", r"^J'essaie de\s*:\s*(.+)$"),
("attendu", r"^J'attendais\s*:\s*(.+)$"),
("vu", r"^Je vois\s*:\s*(.+)$"),
("demande", r"^Peux-tu\s*:\s*(.+)$"),
)
fields: dict[str, str] = {}
for line, (name, pattern) in zip(lines, specs):
match = re.match(pattern, line)
if not match:
issues.append(
MessageValidationIssue(
"invalid_structure",
f"ligne {len(fields) + 1} doit commencer par {SUPERVISED_PAUSE_LABELS[len(fields)]}",
)
)
continue
fields[name] = match.group(1).strip()
if len(fields) != 4:
return {}, issues
return fields, issues
def _contains_actionable_french(text: str) -> bool:
folded = _fold(text)
return any(_fold(hint) in folded for hint in _ACTIONABLE_FRENCH_HINTS)
def _one_line(value: object) -> str:
return re.sub(r"\s+", " ", str(value or "")).strip()
def _mapping_text(payload: Mapping[str, object], *keys: str) -> str:
for key in keys:
value = payload.get(key)
if value is not None:
return str(value)
return ""
def _safe_field_text(value: object, fallback: str) -> str:
text = _one_line(value)
if len(text) < MIN_FIELD_CHARS or len(text) > MAX_FIELD_CHARS:
return fallback
if not validate_visible_message(text).valid:
return fallback
return text
def _fold(text: str) -> str:
normalized = unicodedata.normalize("NFKD", str(text or ""))
ascii_text = "".join(ch for ch in normalized if not unicodedata.combining(ch))
return ascii_text.casefold()
def _dedupe_issues(issues: Iterable[MessageValidationIssue]) -> MessageValidationResult:
seen: set[tuple[str, str]] = set()
deduped: list[MessageValidationIssue] = []
for issue in issues:
key = (issue.code, issue.detail)
if key in seen:
continue
seen.add(key)
deduped.append(issue)
return MessageValidationResult(tuple(deduped))
__all__ = [
"MAX_FIELD_CHARS",
"MAX_VISIBLE_MESSAGE_CHARS",
"MessageContractError",
"MessageValidationIssue",
"MessageValidationResult",
"SUPERVISED_PAUSE_LABELS",
"SupervisedPauseFields",
"coerce_supervised_pause_message",
"format_supervised_pause_from_mapping",
"format_supervised_pause_message",
"is_valid_supervised_pause_message",
"is_valid_visible_message",
"validate_supervised_pause_message",
"validate_visible_message",
"warn_visible_message",
]

View File

@@ -82,6 +82,12 @@ ICONE_PAR_NIVEAU: dict[NiveauMessage, str] = {
NiveauMessage.BLOCAGE: "?",
}
# Les pauses supervisees peuvent contenir une raison precise, parfois longue
# (fenetre observee, fenetre attendue, action en cours). On garde l'information
# utile et on laisse les widgets UI gerer le wrap/scroll.
MAX_TARGET_DESCRIPTION_CHARS = 1024
MAX_GENERIC_TECHNICAL_MESSAGE_CHARS = 1024
@dataclass
class MessageUtilisateur:
@@ -147,9 +153,9 @@ def _nettoyer_description_cible(description: str) -> str:
desc = description.strip()
# Retirer les guillemets encapsulants
desc = desc.strip("'\"`")
# Limiter la longueur
if len(desc) > 80:
desc = desc[:77] + "..."
# Limiter la longueur sans perdre les details utiles a la supervision.
if len(desc) > MAX_TARGET_DESCRIPTION_CHARS:
desc = desc[: MAX_TARGET_DESCRIPTION_CHARS - 3] + "..."
return desc
@@ -566,8 +572,8 @@ def formatter_erreur_generique(
# Fallback : message technique tronqué
msg_tronque = message_technique.strip()
if len(msg_tronque) > 120:
msg_tronque = msg_tronque[:117] + "..."
if len(msg_tronque) > MAX_GENERIC_TECHNICAL_MESSAGE_CHARS:
msg_tronque = msg_tronque[: MAX_GENERIC_TECHNICAL_MESSAGE_CHARS - 3] + "..."
return MessageUtilisateur(
niveau=NiveauMessage.ATTENTION,

View File

@@ -371,7 +371,13 @@ class SmartTrayV1:
)
if name and name.strip():
name = name.strip()
# Utiliser l'etat partage si disponible
# --- P1-LEA-SHADOW : d\u00e9clencher d'abord l'orchestrateur L\u00e9a Linux ---
# On contacte agent-chat AVANT la capture locale. Si \u00e9chec,
# bascule en mode d\u00e9grad\u00e9 (capture locale sans assistance).
self._start_lea_orchestrator_session(name)
# --- Comportement historique pr\u00e9serv\u00e9 : capture locale ---
if self._shared_state is not None:
try:
self._shared_state.start_recording(name)
@@ -393,6 +399,55 @@ class SmartTrayV1:
threading.Thread(target=_dialog, daemon=True).start()
def _start_lea_orchestrator_session(self, session_name: str) -> None:
"""Appelle POST /api/learn/start côté agent-chat Linux (P1-LEA-SHADOW).
Fail-safe : toute erreur (config absente, httpx manquant, timeout,
5xx serveur...) bascule en mode dégradé sans bloquer la capture
locale. L'utilisateur est informé via le NotificationManager.
"""
try:
from ..config import AGENT_CHAT_URL, API_TOKEN, MACHINE_ID
from ..network.lea_orchestrator_client import (
LeaOrchestratorError,
start_learning_session,
)
except Exception as exc: # pragma: no cover (import-time)
logger.error("Impossible de charger le client orchestrateur Léa : %s", exc)
self._notifier.notify(
"Léa",
"Serveur injoignable — apprentissage local uniquement.",
)
return
try:
resp = start_learning_session(
AGENT_CHAT_URL,
machine_id=MACHINE_ID,
session_name=session_name,
api_token=API_TOKEN,
trigger_source="tray_button",
)
except LeaOrchestratorError as exc:
logger.error("Orchestrateur Léa injoignable : %s", exc)
self._notifier.notify(
"Léa",
"Serveur injoignable — apprentissage local uniquement.",
)
return
except Exception: # noqa: BLE001 — défensif
logger.exception("Erreur inattendue orchestrateur Léa")
self._notifier.notify(
"Léa",
"Erreur orchestrateur — apprentissage local uniquement.",
)
return
logger.info(
"Session orchestrateur Léa OK : id=%s state=%s",
resp.session_id, resp.state,
)
def _on_stop_session(self, _icon=None, _item=None) -> None:
"""Termine la session en cours et envoie les donnees."""
count = self.actions_count

View File

@@ -43,6 +43,9 @@ class EventCaptorV1:
# État des touches modificatrices
self.modifiers = set()
self._pending_standalone_win = False
self._suppress_release_only_win_combo = False
self._raw_key_buffer: List[Dict[str, Any]] = []
# Tracking du focus fenêtre
self.last_window = None
@@ -91,6 +94,7 @@ class EventCaptorV1:
# Flush du buffer texte restant avant arrêt
self._flush_text_buffer()
# Annuler le timer s'il est en cours
emit_escape = False
with self._text_lock:
if self._text_flush_timer is not None:
self._text_flush_timer.cancel()
@@ -159,7 +163,80 @@ class EventCaptorV1:
# Clavier
# ----------------------------------------------------------------
@staticmethod
def _get_key_name(key) -> Optional[str]:
"""Convertit un objet pynput Key/KeyCode en nom lisible."""
if isinstance(key, KeyCode):
return key.char if key.char else None
if isinstance(key, Key):
return key.name
return str(key)
@staticmethod
def _encode_key(key) -> Dict[str, Any]:
if isinstance(key, KeyCode):
return {"kind": "vk", "vk": key.vk, "char": key.char}
if isinstance(key, Key):
return {"kind": "key", "name": key.name}
return {"kind": "unknown", "str": str(key)}
@staticmethod
def _raw_key_name(raw_key: Dict[str, Any]) -> Optional[str]:
if raw_key.get("kind") == "vk":
char = raw_key.get("char")
if char and len(str(char)) == 1:
return str(char).lower()
if raw_key.get("kind") == "key":
name = raw_key.get("name")
return str(name).lower() if name else None
return None
def _emit_release_only_windows_combo(self) -> bool:
"""Infère Win+<touche> quand seuls les releases sont capturés."""
with self._text_lock:
raw_keys = list(getattr(self, "_raw_key_buffer", []))
if len(raw_keys) < 2:
return False
cmd_names = {"cmd", "cmd_l", "cmd_r"}
last = raw_keys[-1]
if last.get("action") != "release" or self._raw_key_name(last) not in cmd_names:
return False
combo_key = None
modifier_names = {
"ctrl", "ctrl_l", "ctrl_r",
"alt", "alt_l", "alt_r",
"shift", "shift_l", "shift_r",
"cmd", "cmd_l", "cmd_r",
}
for raw in reversed(raw_keys[:-1]):
if raw.get("action") != "release":
continue
name = self._raw_key_name(raw)
if name and name not in modifier_names:
combo_key = name
break
if not combo_key:
return False
self._raw_key_buffer.clear()
event = {
"type": "key_combo",
"keys": ["win", combo_key],
"raw_keys": raw_keys,
"timestamp": time.time(),
}
self.on_event(event)
return True
def _on_press(self, key):
with self._text_lock:
if not hasattr(self, "_raw_key_buffer"):
self._raw_key_buffer = []
self._raw_key_buffer.append({
"action": "press",
**self._encode_key(key),
})
# Gestion des touches modificatrices
if key in (Key.ctrl, Key.ctrl_l, Key.ctrl_r):
self.modifiers.add("ctrl")
@@ -167,15 +244,26 @@ class EventCaptorV1:
self.modifiers.add("alt")
elif key in (Key.shift, Key.shift_l, Key.shift_r):
self.modifiers.add("shift")
elif key in (Key.cmd, Key.cmd_l, Key.cmd_r):
self.modifiers.add("win")
self._pending_standalone_win = True
# --- Combos avec modificateur (sauf Shift seul) ---
# Shift seul n'est pas un « vrai » modificateur pour les combos :
# Shift+a = 'A' = saisie texte, pas un raccourci.
# On considère un combo seulement si Ctrl ou Alt est enfoncé.
has_real_modifier = self.modifiers & {"ctrl", "alt"}
# On considère un combo seulement si Ctrl, Alt ou Win est enfoncé.
has_real_modifier = self.modifiers & {"ctrl", "alt", "win"}
if has_real_modifier:
key_name = self._get_key_name(key)
if key_name and key_name not in ("ctrl", "alt", "shift"):
if key_name and key_name not in (
"ctrl", "ctrl_l", "ctrl_r",
"alt", "alt_l", "alt_r",
"shift", "shift_l", "shift_r",
"cmd", "cmd_l", "cmd_r",
):
self._pending_standalone_win = False
if "win" in self.modifiers:
self._suppress_release_only_win_combo = True
# Un combo interrompt la saisie texte en cours
self._flush_text_buffer()
event = {
@@ -205,14 +293,18 @@ class EventCaptorV1:
self._reset_flush_timer()
return
if key == Key.escape:
escape_keys = [Key.esc]
key_escape = getattr(Key, "escape", None)
if key_escape is not None:
escape_keys.append(key_escape)
if key in escape_keys:
# Annuler la saisie en cours
self._text_buffer.clear()
self._text_start_pos = None
self._cancel_flush_timer()
return
emit_escape = True
if key in (Key.enter, Key.tab):
elif key in (Key.enter, Key.tab):
# Flush immédiat — on relâche le lock avant d'appeler
# _flush_text_buffer (qui prend aussi le lock)
pass # on sort du with et on flush après
@@ -238,6 +330,15 @@ class EventCaptorV1:
# Touche spéciale non gérée (F1, Insert, etc.) — on ignore
return
if emit_escape:
event = {
"type": "key_combo",
"keys": ["escape"],
"timestamp": time.time(),
}
self.on_event(event)
return
# Si on arrive ici, c'est Enter ou Tab → flush immédiat
self._flush_text_buffer()
@@ -290,12 +391,46 @@ class EventCaptorV1:
self.on_event(event)
def _on_release(self, key):
with self._text_lock:
self._raw_key_buffer.append({
"action": "release",
**self._encode_key(key),
})
if key in (Key.cmd, Key.cmd_l, Key.cmd_r) and self._suppress_release_only_win_combo:
with self._text_lock:
self._raw_key_buffer.clear()
self._pending_standalone_win = False
self._suppress_release_only_win_combo = False
self.modifiers.discard("win")
return
if key in (Key.cmd, Key.cmd_l, Key.cmd_r) and self._emit_release_only_windows_combo():
self._pending_standalone_win = False
self._suppress_release_only_win_combo = False
self.modifiers.discard("win")
return
if key in (Key.cmd, Key.cmd_l, Key.cmd_r) and self._pending_standalone_win:
event = {
"type": "key_combo",
"keys": ["win"],
"timestamp": time.time(),
}
self.on_event(event)
self._pending_standalone_win = False
self._suppress_release_only_win_combo = False
if key in (Key.ctrl, Key.ctrl_l, Key.ctrl_r):
self.modifiers.discard("ctrl")
elif key in (Key.alt, Key.alt_l, Key.alt_r):
self.modifiers.discard("alt")
elif key in (Key.shift, Key.shift_l, Key.shift_r):
self.modifiers.discard("shift")
elif key in (Key.cmd, Key.cmd_l, Key.cmd_r):
self.modifiers.discard("win")
self._pending_standalone_win = False
self._suppress_release_only_win_combo = False
def _watch_window_focus(self):
"""Surveille proactivement le changement de fenêtre pour le stagiaire."""