From d38f0b0f2fab8eb061508087aa08826bf8f212a5 Mon Sep 17 00:00:00 2001 From: Dom Date: Tue, 2 Jun 2026 16:24:10 +0200 Subject: [PATCH] feat(agent): add learn action flow and grounding guards --- agent_chat/app.py | 150 ++- agent_chat/autonomous_planner.py | 26 +- agent_chat/handlers/__init__.py | 29 + agent_chat/handlers/learn_action.py | 1192 +++++++++++++++++ agent_v0/agent_v1/config.py | 7 + agent_v0/agent_v1/core/captor.py | 108 +- agent_v0/agent_v1/core/executor.py | 912 +++++++++++-- agent_v0/agent_v1/core/grounding.py | 57 + agent_v0/agent_v1/main.py | 15 +- .../network/lea_orchestrator_client.py | 147 ++ agent_v0/agent_v1/network/streamer.py | 10 +- agent_v0/agent_v1/ui/chat_window.py | 286 +++- agent_v0/agent_v1/ui/message_contract.py | 484 +++++++ agent_v0/agent_v1/ui/messages.py | 16 +- agent_v0/agent_v1/ui/smart_tray.py | 57 +- .../windows_client/agent_v1/core/captor.py | 147 +- core/execution/input_handler.py | 12 +- core/grounding/dialog_handler.py | 4 +- core/grounding/fast_detector.py | 8 +- core/grounding/title_verifier.py | 8 +- tests/conftest.py | 39 +- ...est_agent_chat_learn_action_integration.py | 254 ++++ tests/integration/test_agents_enroll_api.py | 103 ++ tests/integration/test_build_replay_perf.py | 198 +++ .../integration/test_chat_window_templates.py | 2 +- ...replay_resume_preserves_original_action.py | 8 +- .../test_replay_session_trim_neutral.py | 7 +- tests/integration/test_t2a_extract.py | 18 + tests/unit/test_agent_chat_cors_lan.py | 86 ++ tests/unit/test_agent_chat_learn_action.py | 526 ++++++++ .../unit/test_autonomous_planner_owl_flag.py | 121 ++ .../unit/test_chat_window_paused_dispatch.py | 42 +- .../test_enrich_click_skip_build_vision.py | 269 ++++ .../unit/test_executor_verify_window_guard.py | 128 ++ tests/unit/test_grounding_engine.py | 77 ++ tests/unit/test_keyboard_system_keys.py | 162 +++ tests/unit/test_lea_message_contract.py | 280 ++++ tests/unit/test_lea_micro_preflight.py | 109 ++ tests/unit/test_lea_notifications.py | 9 +- 39 files changed, 5901 insertions(+), 212 deletions(-) create mode 100644 agent_chat/handlers/__init__.py create mode 100644 agent_chat/handlers/learn_action.py create mode 100644 agent_v0/agent_v1/network/lea_orchestrator_client.py create mode 100644 agent_v0/agent_v1/ui/message_contract.py create mode 100644 tests/integration/test_agent_chat_learn_action_integration.py create mode 100644 tests/integration/test_build_replay_perf.py create mode 100644 tests/unit/test_agent_chat_cors_lan.py create mode 100644 tests/unit/test_agent_chat_learn_action.py create mode 100644 tests/unit/test_autonomous_planner_owl_flag.py create mode 100644 tests/unit/test_enrich_click_skip_build_vision.py create mode 100644 tests/unit/test_keyboard_system_keys.py create mode 100644 tests/unit/test_lea_message_contract.py create mode 100644 tests/unit/test_lea_micro_preflight.py diff --git a/agent_chat/app.py b/agent_chat/app.py index 5644c780e..c2379eac0 100644 --- a/agent_chat/app.py +++ b/agent_chat/app.py @@ -83,9 +83,24 @@ app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024 # 50 MB max upload (sécuri _ALLOWED_ORIGINS = [ "http://localhost:3002", "http://localhost:5002", + "http://localhost:5004", "https://vwb.labs.laurinebazin.design", "https://lea.labs.laurinebazin.design", + # LAN local : serveur Linux (192.168.1.40) + Léa Windows (192.168.1.11). + # Sans ces origines, engineio rejette la ChatWindow tkinter Windows et + # même les requêtes self-loopback (cf. journal 2026-05-24 11:00:47). + "http://192.168.1.40:5004", + "http://192.168.1.40:5005", + "http://192.168.1.11:5004", + "http://192.168.1.11:5005", ] +# Override possible via LEA_CORS_ALLOWED_ORIGINS=comma,separated,list pour +# environnements non-LAN. Vide ou absent → garde la liste par défaut ci-dessus. +_extra_origins = os.environ.get("LEA_CORS_ALLOWED_ORIGINS", "").strip() +if _extra_origins: + _ALLOWED_ORIGINS.extend( + o.strip() for o in _extra_origins.split(",") if o.strip() + ) socketio = SocketIO(app, cors_allowed_origins=_ALLOWED_ORIGINS) @@ -199,6 +214,9 @@ _pending_imports: Dict[str, Dict[str, Any]] = {} # Copilot state — suivi du mode pas-à-pas _copilot_sessions: Dict[str, Dict[str, Any]] = {} +# LearnActionOrchestrator — P1-LEA SHADOW (apprentissage Léa-first) +learn_action_orchestrator = None # injecté par init_system() + _COPILOT_KEYWORDS = [ "copilot", "co-pilot", "pas à pas", "pas-à-pas", "pas a pas", @@ -278,8 +296,24 @@ def init_system(): if EXECUTION_AVAILABLE: try: # Pipeline de workflow (matching + actions) - workflow_pipeline = WorkflowPipeline() - logger.info("✓ WorkflowPipeline initialisé") + # Depuis C1c 2026-05-25 : désactiver UI detection (OWL/VLM côté + # UIDetector via DetectionConfig) par défaut pour économiser + # ~900 MiB VRAM au boot du chat service. Le chemin SocketIO 5004 + # / narration ChatWindow / ExecutionLoop n'utilise pas + # workflow_pipeline.ui_detector (grep confirmé). Activation + # explicite : AGENT_CHAT_ENABLE_UI_DETECTION=1. + _ui_detection_enabled = os.environ.get( + "AGENT_CHAT_ENABLE_UI_DETECTION", "0" + ).strip() in ("1", "true", "yes") + workflow_pipeline = WorkflowPipeline( + enable_ui_detection=_ui_detection_enabled, + enable_vlm=_ui_detection_enabled, + ) + logger.info( + f"✓ WorkflowPipeline initialisé " + f"(ui_detection={_ui_detection_enabled}, " + f"économie ~900 MiB VRAM si False)" + ) # Capture d'écran screen_capturer = ScreenCapturer() @@ -356,6 +390,26 @@ def init_system(): else: logger.info("ℹ Import Excel non disponible (openpyxl manquant ?)") + # 8. LearnActionOrchestrator (P1-LEA SHADOW) — apprentissage Léa-first + global learn_action_orchestrator + try: + from .handlers.learn_action import get_learn_action_orchestrator + + def _learn_emit(event: str, payload: Dict[str, Any]) -> None: + try: + socketio.emit(event, payload) + except Exception: + logger.debug("learn emit silenced", exc_info=True) + + learn_action_orchestrator = get_learn_action_orchestrator(emit=_learn_emit) + resumed = learn_action_orchestrator.resume_sessions() + logger.info( + f"✓ LearnActionOrchestrator initialisé (sessions reprises: {len(resumed)})" + ) + except Exception as e: + logger.warning(f"⚠ LearnActionOrchestrator: {e}") + learn_action_orchestrator = None + # ============================================================================= # Routes Web @@ -768,6 +822,24 @@ def api_chat(): if not message: return jsonify({"error": "Message vide"}), 400 + # 0. Routage P1-LEA : si une session d'apprentissage est active pour ce + # session_id, l'orchestrateur traite le message ; sinon on tombe sur le + # flux normal (intent_parser / matcher / confirmation). + if learn_action_orchestrator is not None and session_id: + try: + learn_reply = learn_action_orchestrator.handle_chat_message( + session_id, message + ) + except Exception: + logger.exception("learn_action_orchestrator error") + learn_reply = None + if learn_reply is not None: + return jsonify({ + "session_id": session_id, + "response": learn_reply, + "handler": "learn_action", + }) + # 1. Obtenir ou créer la session session = conversation_manager.get_or_create_session(session_id=session_id) @@ -1834,7 +1906,13 @@ def _poll_replay_progress(replay_id: str, workflow_name: str, total_actions: int "completed": completed, "total": total_actions, "failed_action": data.get("failed_action"), - "reason": data.get("error") or "Action incertaine", + "reason": ( + data.get("pause_message") + or data.get("message") + or data.get("error") + or "Action incertaine" + ), + "safety_checks": data.get("safety_checks") or [], }) was_paused = True elapsed = 0 @@ -2713,6 +2791,72 @@ def urgences_list(): return jsonify({"orchestrations": list_orchestrations()}) +# ============================================================================= +# P1-LEA SHADOW — déclenchement d'apprentissage depuis l'extérieur +# ============================================================================= + +@app.route('/api/learn/start', methods=['POST']) +def api_learn_start(): + """Déclenche une session d'apprentissage Léa-first. + + Endpoint utilisé par le bouton Windows (ChatWindow tkinter) ou tout autre + client externe pour démarrer le cycle Shadow → Persist côté agent-chat. + + Payload JSON : + - machine_id (str, obligatoire) : identifiant de la machine où + l'apprentissage est en cours (sera repris pour le persist). + - session_name (str | None, optionnel) : nom d'affichage de la + session (ignoré pour l'instant — réservé futur). + - user_id (str | None, optionnel) : défaut "default". + - trigger_source (str, optionnel) : défaut "windows_button". + Utilisé pour distinguer du "magic_phrase" ou "proactive". + + Retours : + - 200 : {"session_id": str, "state": str, "message": str} + - 400 : machine_id absent ou vide + - 503 : orchestrateur non initialisé (init_system pas appelé) + - 500 : exception interne (shadow_start, état illégal, etc.) + + Auth/CORS : suit le pattern des autres routes API du module (pas d'auth + Flask explicite — l'API est en LAN derrière le reverse proxy / + SocketIO cors_allowed_origins). + """ + if learn_action_orchestrator is None: + return jsonify({ + "error": "LearnActionOrchestrator non initialisé", + }), 503 + + data = request.get_json(silent=True) or {} + machine_id = (data.get("machine_id") or "").strip() + if not machine_id: + return jsonify({ + "error": "machine_id requis (str non vide)", + }), 400 + + user_id = (data.get("user_id") or "default").strip() or "default" + trigger_source = (data.get("trigger_source") or "windows_button").strip() or "windows_button" + # session_name reçu mais non utilisé pour l'instant (réservé futur) + _session_name = data.get("session_name") + + try: + st, reply = learn_action_orchestrator.start_session( + user_id=user_id, + trigger_source=trigger_source, + machine_id=machine_id, + ) + except Exception as exc: + logger.exception("api_learn_start failed") + return jsonify({ + "error": f"démarrage apprentissage impossible: {exc}", + }), 500 + + return jsonify({ + "session_id": st.session_id, + "state": st.state.value if hasattr(st.state, "value") else str(st.state), + "message": reply, + }) + + # ============================================================================= # Main # ============================================================================= diff --git a/agent_chat/autonomous_planner.py b/agent_chat/autonomous_planner.py index 7edbf4629..1921ebe1a 100644 --- a/agent_chat/autonomous_planner.py +++ b/agent_chat/autonomous_planner.py @@ -137,11 +137,31 @@ class AutonomousPlanner: logger.info(f"AutonomousPlanner initialized (LLM: {self.llm_model}, available: {self.llm_available}, visual: {self._owl_detector is not None}, vlm: {self._vlm_client is not None})") def _init_visual_detection(self): - """Initialise le détecteur visuel OWL-v2.""" + """Initialise le détecteur visuel OWL-v2. + + Désactivé par défaut depuis 2026-05-25 (C1b) : OWL-v2 chargeait sur + CUDA au boot et retenait ~600 MiB VRAM même en cas d'OOM silencieux, + fausssant les benchs perf et contribuant à l'offload Ollama VLM. + Comme `autonomous_planner` est largement non-wired au runtime actif + (cf. mémoire projet : HTTP 410 dépréciés), le défaut est skip. + + Activation : `AGENT_CHAT_ENABLE_OWL=1` (env var). + Device : `AGENT_CHAT_OWL_DEVICE=cuda|cpu` (override l'auto-détect). + """ + if os.environ.get("AGENT_CHAT_ENABLE_OWL", "0").strip() not in ("1", "true", "yes"): + logger.info( + "OWL-v2 visual detector skipped at boot " + "(AGENT_CHAT_ENABLE_OWL!=1, économie ~600 MiB VRAM)" + ) + return if VISUAL_DETECTION_AVAILABLE and OwlDetector: try: - self._owl_detector = OwlDetector(confidence_threshold=0.1) - logger.info("OWL-v2 visual detector initialized") + device = os.environ.get("AGENT_CHAT_OWL_DEVICE", "").strip() or None + self._owl_detector = OwlDetector( + confidence_threshold=0.1, + device=device, + ) + logger.info(f"OWL-v2 visual detector initialized (device={device or 'auto'})") except Exception as e: logger.warning(f"Could not initialize OWL detector: {e}") self._owl_detector = None diff --git a/agent_chat/handlers/__init__.py b/agent_chat/handlers/__init__.py new file mode 100644 index 000000000..33f2bbb19 --- /dev/null +++ b/agent_chat/handlers/__init__.py @@ -0,0 +1,29 @@ +"""Agent-chat handlers package. + +Contient les orchestrateurs spécialisés (apprentissage Léa, etc.) appelés +par `agent_chat.app` quand le routage normal d'intent ne suffit pas. +""" + +from .learn_action import ( + LearnActionOrchestrator, + LearnState, + LearnIntent, + LearnIntentParser, + OptionCFormatter, + StreamingClient, + StateStore, + PersistPayloadBuilder, + get_learn_action_orchestrator, +) + +__all__ = [ + "LearnActionOrchestrator", + "LearnState", + "LearnIntent", + "LearnIntentParser", + "OptionCFormatter", + "StreamingClient", + "StateStore", + "PersistPayloadBuilder", + "get_learn_action_orchestrator", +] diff --git a/agent_chat/handlers/learn_action.py b/agent_chat/handlers/learn_action.py new file mode 100644 index 000000000..356ad3ad3 --- /dev/null +++ b/agent_chat/handlers/learn_action.py @@ -0,0 +1,1192 @@ +""" +LearnActionOrchestrator — pilote du dialogue d'apprentissage Léa-first. + +Orchestre la machine d'état 8 phases (IDLE → LISTENING → WAITING_USER_STOP → +ANALYZING → PRESENTING → ITERATING_FEEDBACK → NAMING → PERSISTING → DONE / +ABORTED) en s'adossant au cycle Shadow exposé par le streaming server +(`agent_v0/server_v1/api_stream.py`) sur le port 5005, et au endpoint +`/api/v1/lea/competences/candidate/persist` livré en parallèle. + +Spec : `docs/POC/SPECS_AGENT_CHAT_LEARN_ACTION_2026-06-01.md`. + +Périmètre : +- Aucune dépendance importée à `agent_chat.app` (évite la dépendance circulaire). +- Appels HTTP via httpx sync (déjà disponible dans .venv). +- Persistance par session dans `agent_chat/state/.json`. +- Émission d'événements socket.io via callback `emit` injecté. +- Intent recognition hybride : regex → fallback Ollama `qwen2.5:0.5b`. + +Auteur : Claude — 2026-06-01 (P1-LEA SHADOW) +""" + +from __future__ import annotations + +import json +import logging +import os +import re +import threading +import time +import unicodedata +import uuid +from dataclasses import asdict, dataclass, field +from datetime import datetime, timezone +from enum import Enum +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + + +# ============================================================ +# Enums + dataclasses +# ============================================================ +class LearnState(str, Enum): + """Machine d'état de l'orchestrateur d'apprentissage.""" + + IDLE = "idle" + LISTENING = "listening" + WAITING_USER_STOP = "waiting_user_stop" + ANALYZING = "analyzing" + PRESENTING = "presenting" + ITERATING_FEEDBACK = "iterating_feedback" + NAMING = "naming" + PERSISTING = "persisting" + DONE = "done" + ABORTED = "aborted" + + +# Transitions autorisées (état_source -> {états_cibles}). +_ALLOWED_TRANSITIONS: Dict[LearnState, set] = { + LearnState.IDLE: {LearnState.LISTENING, LearnState.ABORTED}, + LearnState.LISTENING: {LearnState.WAITING_USER_STOP, LearnState.ABORTED}, + LearnState.WAITING_USER_STOP: {LearnState.ANALYZING, LearnState.ABORTED}, + LearnState.ANALYZING: {LearnState.PRESENTING, LearnState.ABORTED}, + LearnState.PRESENTING: {LearnState.ITERATING_FEEDBACK, LearnState.NAMING, LearnState.ABORTED}, + LearnState.ITERATING_FEEDBACK: {LearnState.ITERATING_FEEDBACK, LearnState.NAMING, LearnState.ABORTED}, + LearnState.NAMING: {LearnState.NAMING, LearnState.PERSISTING, LearnState.ABORTED}, + LearnState.PERSISTING: {LearnState.DONE, LearnState.ABORTED}, + LearnState.DONE: set(), + LearnState.ABORTED: set(), +} + + +class LearnIntent(str, Enum): + """Intents reconnus pendant une session d'apprentissage.""" + + START_OBSERVE = "start_observe" + USER_STOP_OBSERVE = "user_stop_observe" + VALIDATE_STEP = "validate_step" + CORRECT_STEP = "correct_step" + UNDO_STEP = "undo_step" + MERGE_NEXT = "merge_next" + SPLIT_STEP = "split_step" + NAME_COMPETENCE = "name_competence" + MARK_PARAMETER = "mark_parameter" + PERSIST = "persist" + CANCEL = "cancel" + CONFIRM = "confirm" + DENY = "deny" + UNKNOWN = "unknown" + + +@dataclass +class ParsedLearnIntent: + """Sortie du parser d'intents d'apprentissage.""" + + intent: LearnIntent + confidence: float + step_index: Optional[int] = None + raw_text: str = "" + extra: Dict[str, Any] = field(default_factory=dict) + + +@dataclass +class SessionState: + """État sérialisable d'une session d'apprentissage.""" + + session_id: str + user_id: Optional[str] = None + machine_id: Optional[str] = None # requis par /api/v1/lea/competences/candidate/persist + trigger_source: str = "button" # button | magic_phrase | proactive | windows_button + state: LearnState = LearnState.IDLE + created_at: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat()) + last_transition_at: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat()) + shadow_understanding: List[Dict[str, Any]] = field(default_factory=list) + pending_feedbacks: List[Dict[str, Any]] = field(default_factory=list) + correction_counters: Dict[str, int] = field(default_factory=dict) + competence_name: Optional[str] = None + parameters_marked: List[Dict[str, Any]] = field(default_factory=list) + abort_reason: Optional[str] = None + last_message_at: Optional[str] = None + last_recent_feedbacks: List[Dict[str, Any]] = field(default_factory=list) # pour détection boucle doute + + def to_dict(self) -> Dict[str, Any]: + d = asdict(self) + d["state"] = self.state.value if isinstance(self.state, LearnState) else self.state + return d + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "SessionState": + # Reconstruire LearnState + state_val = data.get("state", "idle") + try: + state = LearnState(state_val) + except ValueError: + state = LearnState.IDLE + kwargs = {k: v for k, v in data.items() if k != "state"} + return cls(state=state, **kwargs) + + +# ============================================================ +# StateStore — persistance JSON atomique +# ============================================================ +class StateStore: + """Persistance JSON par session — écriture atomique tmp + os.replace.""" + + def __init__(self, state_dir: Path): + self.state_dir = Path(state_dir) + self.state_dir.mkdir(parents=True, exist_ok=True) + self._lock = threading.RLock() + + def _path(self, session_id: str) -> Path: + # Sanitize session_id pour éviter directory traversal. + safe = re.sub(r"[^A-Za-z0-9_\-]", "_", session_id)[:64] + return self.state_dir / f"{safe}.json" + + def save(self, state: SessionState) -> None: + with self._lock: + path = self._path(state.session_id) + tmp = path.with_suffix(".json.tmp") + data = state.to_dict() + tmp.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8") + os.replace(tmp, path) + + def load(self, session_id: str) -> Optional[SessionState]: + with self._lock: + path = self._path(session_id) + if not path.exists(): + return None + try: + data = json.loads(path.read_text(encoding="utf-8")) + return SessionState.from_dict(data) + except Exception: + logger.exception("StateStore.load failed for %s", session_id) + return None + + def delete(self, session_id: str) -> None: + with self._lock: + path = self._path(session_id) + try: + path.unlink() + except FileNotFoundError: + pass + + def list_active(self) -> List[SessionState]: + out: List[SessionState] = [] + for p in self.state_dir.glob("*.json"): + try: + data = json.loads(p.read_text(encoding="utf-8")) + st = SessionState.from_dict(data) + if st.state not in (LearnState.DONE, LearnState.ABORTED): + out.append(st) + except Exception: + continue + return out + + +# ============================================================ +# StreamingClient — wrapper httpx vers streaming server (5005) +# ============================================================ +class StreamingClient: + """Client HTTP sync vers le streaming server. + + Timeout 5s par appel, retry x2 sur erreur connexion. Auth Bearer via + `RPA_API_TOKEN` env var. + """ + + DEFAULT_TIMEOUT = 5.0 + DEFAULT_RETRIES = 2 + + def __init__( + self, + base_url: Optional[str] = None, + token: Optional[str] = None, + timeout: float = DEFAULT_TIMEOUT, + retries: int = DEFAULT_RETRIES, + http_client: Any = None, # pour tests, injection httpx.Client mock + ): + self.base_url = ( + base_url + or os.environ.get("RPA_STREAMING_URL", "http://localhost:5005") + ).rstrip("/") + self.token = token if token is not None else os.environ.get("RPA_API_TOKEN", "") + self.timeout = timeout + self.retries = retries + self._http = http_client # si None on import httpx à la volée + + def _headers(self) -> Dict[str, str]: + h = {"Content-Type": "application/json"} + if self.token: + h["Authorization"] = f"Bearer {self.token}" + return h + + def _request(self, method: str, path: str, **kwargs) -> Dict[str, Any]: + """Wrapper avec retry x2 sur ConnectError / TimeoutException.""" + url = f"{self.base_url}{path}" + kwargs.setdefault("timeout", self.timeout) + kwargs.setdefault("headers", {}).update(self._headers()) + + last_exc: Optional[Exception] = None + for attempt in range(self.retries + 1): + try: + if self._http is not None: + resp = self._http.request(method, url, **kwargs) + else: + import httpx # import paresseux + + with httpx.Client() as client: + resp = client.request(method, url, **kwargs) + # Levée sur 5xx, mais on lit la réponse aussi sur 4xx pour le caller. + if resp.status_code >= 500: + raise RuntimeError( + f"streaming {method} {path} -> HTTP {resp.status_code}: " + f"{resp.text[:200]}" + ) + try: + return resp.json() + except Exception: + return {"status_code": resp.status_code, "text": resp.text} + except Exception as exc: + last_exc = exc + logger.warning( + "StreamingClient %s %s attempt %d/%d failed: %s", + method, path, attempt + 1, self.retries + 1, exc, + ) + if attempt < self.retries: + time.sleep(0.25 * (attempt + 1)) + else: + break + raise RuntimeError(f"streaming {method} {path} unreachable: {last_exc}") + + # ---- API Shadow ---- + def shadow_start(self, session_id: str, **extra) -> Dict[str, Any]: + payload = {"session_id": session_id, **extra} + return self._request("POST", "/api/v1/shadow/start", json=payload) + + def shadow_stop(self, session_id: str) -> Dict[str, Any]: + return self._request( + "POST", "/api/v1/shadow/stop", json={"session_id": session_id} + ) + + def shadow_understanding(self, session_id: str) -> Dict[str, Any]: + return self._request("GET", f"/api/v1/shadow/{session_id}/understanding") + + def shadow_feedback(self, payload: Dict[str, Any]) -> Dict[str, Any]: + return self._request("POST", "/api/v1/shadow/feedback", json=payload) + + def shadow_build(self, session_id: str) -> Dict[str, Any]: + return self._request( + "POST", "/api/v1/shadow/build", json={"session_id": session_id} + ) + + def competence_persist(self, payload: Dict[str, Any]) -> Dict[str, Any]: + return self._request( + "POST", + "/api/v1/lea/competences/candidate/persist", + json=payload, + ) + + +# ============================================================ +# Intent parser (regex hybride + fallback Ollama) +# ============================================================ +def _strip_accents(text: str) -> str: + nf = unicodedata.normalize("NFKD", text) + return "".join(c for c in nf if not unicodedata.combining(c)) + + +class LearnIntentParser: + """Parser hybride d'intents d'apprentissage. + + Approche : regex déterministe sur ~80 % des cas, fallback Ollama + `qwen2.5:0.5b` au-delà. Sans Ollama, retombe gracieusement sur UNKNOWN. + """ + + # Regex magiques (texte sans accents, lowercase) + _RE_START = re.compile( + r"\b(apprends?[ -]?moi|montre[ -]?moi|regarde[ -]?moi faire|" + r"observe|enregistre|on apprend|tu vas apprendre|" + r"lea apprends?|lea regarde)\b" + ) + _RE_STOP = re.compile( + r"\b(stop|arrete|c[' ]?est bon|c[' ]?est fini|j[' ]?ai fini|" + r"voila c[' ]?est tout|fin|termine|fini)\b" + ) + _RE_VALIDATE = re.compile( + r"\b(ok|oui|c[' ]?est ca|exact|parfait|valide?e?|bon|tout est bon|" + r"c[' ]?est correct|c[' ]?est juste|impeccable)\b" + ) + _RE_DENY = re.compile(r"\b(non|pas du tout|negatif|nan)\b") + _RE_CANCEL = re.compile( + r"\b(annule|annuler|abandonne|laisse tomber|jette|oublie|cancel)\b" + ) + _RE_STEP_NUM = re.compile( + r"\b(?:etape|numero|step|ligne|le|la|l[' ])?\s*([1-9]\d?)\b" + ) + _RE_CORRECT = re.compile( + r"\b(corrige|change|modifie|en fait|plutot|au lieu)\b" + ) + _RE_UNDO = re.compile(r"\b(retire|enleve|supprime|undo|annule l[' ]?etape)\b") + _RE_MERGE = re.compile(r"\b(fusionne|merge|regroupe|colle)\b") + _RE_SPLIT = re.compile(r"\b(coupe|split|separe|divise)\b") + _RE_PARAM_VAR = re.compile( + r"\b(ca change|c[' ]?est l[' ]?exemple|exemple|variable|parametre|" + r"a chaque fois different)\b" + ) + _RE_PARAM_CONST = re.compile( + r"\b(toujours|constante|fixe|ne change pas|toujours pareil)\b" + ) + + def __init__( + self, + use_llm_fallback: bool = True, + llm_model: str = "qwen2.5:0.5b", + ollama_url: Optional[str] = None, + confidence_threshold: float = 0.7, + ): + self.use_llm_fallback = use_llm_fallback + self.llm_model = llm_model + self.ollama_url = ( + ollama_url + or os.environ.get("OLLAMA_URL", "http://localhost:11434") + ).rstrip("/") + self.confidence_threshold = confidence_threshold + self._llm_disabled = False # bascule définitive si Ollama down + + # --- API publique --- + def parse( + self, + message: str, + current_state: LearnState = LearnState.IDLE, + ) -> ParsedLearnIntent: + """Reconnaît l'intent à partir du message utilisateur + état courant.""" + if not message or not message.strip(): + return ParsedLearnIntent(LearnIntent.UNKNOWN, 0.0, raw_text=message) + + raw = message.strip() + norm = _strip_accents(raw.lower()) + + regex_result = self._parse_regex(norm, current_state) + if regex_result.confidence >= 0.9: + regex_result.raw_text = raw + return regex_result + + # Fallback LLM si activé + dispo + if self.use_llm_fallback and not self._llm_disabled: + llm_result = self._parse_llm(raw, current_state) + if llm_result is not None: + llm_result.raw_text = raw + if llm_result.confidence >= self.confidence_threshold: + return llm_result + + # Sinon on retourne le meilleur effort regex (même faible) ou UNKNOWN + regex_result.raw_text = raw + return regex_result + + # --- Regex --- + def _extract_step_index(self, norm: str) -> Optional[int]: + m = self._RE_STEP_NUM.search(norm) + if m: + try: + return int(m.group(1)) + except (TypeError, ValueError): + return None + return None + + def _parse_regex(self, norm: str, state: LearnState) -> ParsedLearnIntent: + step_idx = self._extract_step_index(norm) + + # Annulation + if self._RE_CANCEL.search(norm): + return ParsedLearnIntent(LearnIntent.CANCEL, 0.95) + + # Démarrage observation (uniquement quand IDLE) + if state == LearnState.IDLE and self._RE_START.search(norm): + return ParsedLearnIntent(LearnIntent.START_OBSERVE, 0.95) + + # Stop observation (uniquement quand on observe) + if state in (LearnState.LISTENING, LearnState.WAITING_USER_STOP): + if self._RE_STOP.search(norm): + return ParsedLearnIntent(LearnIntent.USER_STOP_OBSERVE, 0.95) + + # Pendant itération feedback + if state in (LearnState.PRESENTING, LearnState.ITERATING_FEEDBACK): + if self._RE_UNDO.search(norm): + return ParsedLearnIntent( + LearnIntent.UNDO_STEP, 0.92, step_index=step_idx + ) + if self._RE_MERGE.search(norm): + return ParsedLearnIntent( + LearnIntent.MERGE_NEXT, 0.92, step_index=step_idx + ) + if self._RE_SPLIT.search(norm): + return ParsedLearnIntent( + LearnIntent.SPLIT_STEP, 0.92, step_index=step_idx + ) + if self._RE_CORRECT.search(norm): + # Extraire la nouvelle intent : reste de phrase après le verbe correctif + new_intent_text = self._extract_correction_payload(norm) + return ParsedLearnIntent( + LearnIntent.CORRECT_STEP, + 0.9, + step_index=step_idx, + extra={"new_intent": new_intent_text}, + ) + if self._RE_VALIDATE.search(norm) and not self._RE_DENY.search(norm): + return ParsedLearnIntent( + LearnIntent.VALIDATE_STEP, + 0.92, + step_index=step_idx, + ) + if self._RE_DENY.search(norm): + return ParsedLearnIntent(LearnIntent.DENY, 0.9) + + # Pendant nomination + if state == LearnState.NAMING: + if self._RE_PARAM_VAR.search(norm): + return ParsedLearnIntent( + LearnIntent.MARK_PARAMETER, + 0.9, + extra={"is_parameter": True}, + ) + if self._RE_PARAM_CONST.search(norm): + return ParsedLearnIntent( + LearnIntent.MARK_PARAMETER, + 0.9, + extra={"is_parameter": False}, + ) + if self._RE_VALIDATE.search(norm): + return ParsedLearnIntent(LearnIntent.CONFIRM, 0.9) + # Sinon le message est probablement un nom de compétence. + if 1 <= len(norm) <= 80: + return ParsedLearnIntent( + LearnIntent.NAME_COMPETENCE, + 0.85, + extra={"name": norm.strip()}, + ) + + # Acceptation / refus génériques + if self._RE_VALIDATE.search(norm): + return ParsedLearnIntent(LearnIntent.CONFIRM, 0.85) + if self._RE_DENY.search(norm): + return ParsedLearnIntent(LearnIntent.DENY, 0.85) + + return ParsedLearnIntent(LearnIntent.UNKNOWN, 0.3) + + def _extract_correction_payload(self, norm: str) -> str: + """Extrait le contenu après un verbe correctif ("corrige", "plutot", ...).""" + m = re.search( + r"\b(?:corrige|change|modifie|en fait|plutot|au lieu)\b[: ]+(.+)$", + norm, + ) + if m: + return m.group(1).strip() + return "" + + # --- LLM fallback --- + def _parse_llm( + self, raw: str, state: LearnState + ) -> Optional[ParsedLearnIntent]: + """Fallback Ollama `qwen2.5:0.5b`. Retourne None si Ollama down.""" + try: + import httpx + + allowed = [i.value for i in LearnIntent if i != LearnIntent.UNKNOWN] + prompt = ( + "Tu es un classifieur d'intents pour un assistant RPA. " + f"Etat courant : {state.value}. " + f"Message utilisateur : {raw!r}. " + "Choisis UN intent parmi cette liste : " + f"{allowed}. Réponds en JSON strict " + '{"intent": "...", "confidence": 0.0-1.0, "step_index": null|int, ' + '"new_intent": null|str}.' + ) + with httpx.Client(timeout=4.0) as client: + resp = client.post( + f"{self.ollama_url}/api/generate", + json={ + "model": self.llm_model, + "prompt": prompt, + "format": "json", + "stream": False, + }, + ) + if resp.status_code != 200: + logger.warning("Ollama %s -> HTTP %s", self.llm_model, resp.status_code) + return None + data = resp.json().get("response", "") + try: + parsed = json.loads(data) + except Exception: + return None + intent_str = parsed.get("intent", "unknown") + try: + intent_e = LearnIntent(intent_str) + except ValueError: + intent_e = LearnIntent.UNKNOWN + return ParsedLearnIntent( + intent=intent_e, + confidence=float(parsed.get("confidence") or 0.0), + step_index=parsed.get("step_index"), + extra={"new_intent": parsed.get("new_intent")}, + ) + except Exception as exc: + logger.warning("Ollama fallback unavailable, going regex-only: %s", exc) + self._llm_disabled = True + return None + + +# ============================================================ +# OptionCFormatter — restitution texte naturel + libellés OCR +# ============================================================ +class OptionCFormatter: + """Transforme `understanding[]` en texte naturel français.""" + + # Mapping action_type → verbe passé composé + _VERB_MAP: Dict[str, str] = { + "click": "cliqué", + "double_click": "double-cliqué", + "right_click": "clic droit", + "type": "saisi", + "type_text": "saisi", + "input": "saisi", + "open": "ouverte", + "open_window": "ouverte", + "close": "fermée", + "validate": "validé", + "submit": "validé", + "focus": "ouvert", + "select": "sélectionné", + "scroll": "scrollé", + "key_press": "appuyé", + "shortcut": "raccourci utilisé", + } + + LOW_CONF_SUFFIX = " (à confirmer)" + + def format(self, understanding: List[Dict[str, Any]]) -> str: + """Retourne le texte multi-ligne 'Option C'.""" + if not understanding: + return "(aucune étape comprise)" + + lines: List[str] = [] + for idx, step in enumerate(understanding, start=1): + lines.append(self._format_step(idx, step)) + return "\n".join(lines) + + def _format_step(self, idx: int, step: Dict[str, Any]) -> str: + action_type = ( + step.get("action_type") + or step.get("intent") + or step.get("type") + or "" + ) + action_type_norm = str(action_type).lower() + verbe = self._VERB_MAP.get(action_type_norm, "effectuée") + + # Récupérer label OCR (priorité target_label > target > label) + label = ( + step.get("target_label") + or step.get("target") + or step.get("label") + or step.get("element_text") + or "" + ) + widget = step.get("widget_type") or step.get("element_type") or "" + + # Valeur saisie le cas échéant + value = step.get("value") or step.get("typed_text") or step.get("text") or "" + + # Confidence OCR + confidence_ocr = step.get("confidence_ocr") + if confidence_ocr is None: + confidence_ocr = step.get("ocr_confidence") + try: + low_conf = ( + confidence_ocr is not None and float(confidence_ocr) < 0.6 + ) + except (TypeError, ValueError): + low_conf = False + + # Construire la phrase + widget_label = widget.capitalize() if widget else "Élément" + if label: + base = f"{idx}. {widget_label} « {label} »" + else: + base = f"{idx}. {widget_label}" + + if value: + line = f"{base} → {verbe} : « {value} »" + else: + line = f"{base} → {verbe}" + + if low_conf: + line += self.LOW_CONF_SUFFIX + return line + + def closing_question(self) -> str: + return "C'est bien ça ou je me suis trompée quelque part ?" + + +# ============================================================ +# PersistPayloadBuilder +# ============================================================ +class PersistPayloadBuilder: + """Construit le payload de persistance compétence.""" + + def build( + self, + session_state: SessionState, + ) -> Dict[str, Any]: + parameters = [] + for p in session_state.parameters_marked: + if p.get("is_parameter"): + parameters.append( + { + "step_index": p.get("step_index"), + "name": p.get("name") or f"param_{p.get('step_index')}", + "example_value": p.get("example_value"), + "field_label": p.get("field_label"), + } + ) + return { + "session_id": session_state.session_id, + "machine_id": session_state.machine_id, + "name": session_state.competence_name or "", + "parameters": parameters, + "trigger_source": session_state.trigger_source, + "user_id": session_state.user_id, + } + + +# ============================================================ +# Orchestrator +# ============================================================ +EmitFn = Callable[[str, Dict[str, Any]], None] + + +class LearnActionOrchestrator: + """Pilote la conversation Léa-first d'apprentissage d'une compétence. + + Le module ne touche jamais directement à `app.py`. Il expose une API + Python (`start_session`, `handle_chat_message`, `handle_proactive_signal`, + `resume_sessions`) qu'`app.py` appelle quand l'état n'est pas IDLE. + """ + + MAX_CORRECTIONS_PER_STEP = 3 + LOOP_DOUBT_WINDOW = 4 # garde les 4 derniers feedbacks pour détection oscillation + + def __init__( + self, + streaming_client: Optional[StreamingClient] = None, + intent_parser: Optional[LearnIntentParser] = None, + formatter: Optional[OptionCFormatter] = None, + state_store: Optional[StateStore] = None, + emit: Optional[EmitFn] = None, + state_dir: Optional[Path] = None, + ): + self.streaming = streaming_client or StreamingClient() + self.parser = intent_parser or LearnIntentParser() + self.formatter = formatter or OptionCFormatter() + if state_store is not None: + self.store = state_store + else: + default_dir = ( + state_dir + if state_dir is not None + else Path(__file__).resolve().parent.parent / "state" + ) + self.store = StateStore(default_dir) + self.emit: EmitFn = emit or (lambda evt, payload: None) + self.payload_builder = PersistPayloadBuilder() + + # Sessions en mémoire (clé : session_id). + self._sessions: Dict[str, SessionState] = {} + self._lock = threading.RLock() + + # --- API publique --- + def start_session( + self, + user_id: Optional[str] = None, + trigger_source: str = "button", + session_id: Optional[str] = None, + machine_id: Optional[str] = None, + ) -> Tuple[SessionState, str]: + """Démarre une session d'apprentissage. Retourne (state, reply_text). + + `machine_id` est requis pour pouvoir persister la compétence en fin de + cycle (cf. /api/v1/lea/competences/candidate/persist). Stocké dans + `state.machine_id` et propagé au payload persist via + `PersistPayloadBuilder`. + """ + sid = session_id or f"learn_{uuid.uuid4().hex[:12]}" + st = SessionState( + session_id=sid, + user_id=user_id, + machine_id=machine_id, + trigger_source=trigger_source, + state=LearnState.IDLE, + ) + with self._lock: + self._sessions[sid] = st + # Appel shadow_start + transition LISTENING + try: + self.streaming.shadow_start(sid, user_id=user_id) + except Exception as exc: + logger.error("shadow_start failed: %s", exc) + self._transition(st, LearnState.ABORTED, abort_reason=f"shadow_start_failed: {exc}") + return st, ( + "Je n'arrive pas à démarrer l'observation côté Windows. " + "On réessaie dans un instant ?" + ) + + self._transition(st, LearnState.LISTENING) + # Le streaming server crée WAITING_USER_STOP côté chat dès qu'il est OK + self._transition(st, LearnState.WAITING_USER_STOP) + return st, ( + "Je te regarde. Fais ce que tu veux m'apprendre, et dis-moi " + "« stop » ou « j'ai fini » quand c'est terminé." + ) + + def handle_chat_message( + self, session_id: str, message: str + ) -> Optional[str]: + """Traite un message utilisateur dans une session active. + + Retourne le texte de réponse Léa, ou None si l'orchestrateur ne gère + pas le message (à charge de l'appelant de router vers le flux normal). + """ + with self._lock: + st = self._sessions.get(session_id) or self.store.load(session_id) + if st is None or st.state in (LearnState.IDLE, LearnState.DONE, LearnState.ABORTED): + return None + self._sessions[session_id] = st + st.last_message_at = datetime.now(timezone.utc).isoformat() + + parsed = self.parser.parse(message, current_state=st.state) + logger.info( + "LearnOrchestrator [%s] state=%s intent=%s conf=%.2f", + session_id, st.state.value, parsed.intent.value, parsed.confidence, + ) + + if parsed.intent == LearnIntent.CANCEL: + return self._handle_cancel(st, reason="user_cancel") + + try: + if st.state == LearnState.WAITING_USER_STOP: + return self._handle_waiting_stop(st, parsed) + if st.state in (LearnState.PRESENTING, LearnState.ITERATING_FEEDBACK): + return self._handle_iterating(st, parsed) + if st.state == LearnState.NAMING: + return self._handle_naming(st, parsed) + except Exception as exc: + logger.exception("Orchestrator exception in state %s", st.state) + return ( + f"Désolée, j'ai eu un souci ({exc}). On garde tout pour reprendre." + ) + + return "Je n'ai pas bien compris. Tu peux reformuler ?" + + def handle_proactive_signal( + self, + signal_type: str, + payload: Dict[str, Any], + ) -> Optional[str]: + """Hook proactif (`screen_static`, `action_repeat`, `retry_threshold`).""" + # Garde-fou cooldown global (5 min) + now = time.time() + last = getattr(self, "_last_proactive_ts", 0.0) + if now - last < 300: + return None + self._last_proactive_ts = now + + if signal_type == "action_repeat": + return ( + "J'ai remarqué que tu fais souvent la même séquence. " + "Tu veux m'apprendre à la faire pour toi ?" + ) + if signal_type == "retry_threshold": + step = payload.get("step_index", "?") + return ( + f"Je n'arrive pas à reproduire l'étape n°{step}. " + "Tu peux me re-montrer ?" + ) + return None + + def resume_sessions(self) -> List[str]: + """Au démarrage : tente de reprendre les sessions non finalisées.""" + resumed: List[str] = [] + for st in self.store.list_active(): + self._sessions[st.session_id] = st + resumed.append(st.session_id) + logger.info( + "Resumed session %s in state %s", st.session_id, st.state.value + ) + return resumed + + # --- Handlers par état --- + def _handle_waiting_stop( + self, st: SessionState, parsed: ParsedLearnIntent + ) -> str: + if parsed.intent != LearnIntent.USER_STOP_OBSERVE: + return ( + "Je continue à observer. Dis-moi « stop » quand tu auras fini." + ) + + # Transition vers ANALYZING + self._transition(st, LearnState.ANALYZING) + try: + self.streaming.shadow_stop(st.session_id) + understanding_resp = self.streaming.shadow_understanding(st.session_id) + except Exception as exc: + logger.error("shadow_stop/understanding failed: %s", exc) + return ( + "Je n'arrive pas à clôturer l'observation côté Windows. " + "On réessaie ?" + ) + + understanding = understanding_resp.get("understanding") or understanding_resp.get("steps") or [] + st.shadow_understanding = understanding + self.store.save(st) + + # Restitution Option C + self._transition(st, LearnState.PRESENTING) + text = self.formatter.format(understanding) + question = self.formatter.closing_question() + self._transition(st, LearnState.ITERATING_FEEDBACK) + return f"Voilà ce que j'ai compris :\n\n{text}\n\n{question}" + + def _handle_iterating( + self, st: SessionState, parsed: ParsedLearnIntent + ) -> str: + # Si l'utilisateur valide globalement → passage NAMING + if parsed.intent == LearnIntent.VALIDATE_STEP and parsed.step_index is None: + return self._enter_naming(st) + if parsed.intent == LearnIntent.CONFIRM: + return self._enter_naming(st) + + step_idx = parsed.step_index + # Suivi compteur correction + if parsed.intent in ( + LearnIntent.CORRECT_STEP, + LearnIntent.UNDO_STEP, + LearnIntent.MERGE_NEXT, + LearnIntent.SPLIT_STEP, + ): + if step_idx is None: + return ( + "Quelle étape je dois corriger ? Dis-moi le numéro " + "(ex : « étape 3 »)." + ) + key = str(step_idx) + st.correction_counters[key] = st.correction_counters.get(key, 0) + 1 + # Sortie d'urgence si > 3 corrections + if st.correction_counters[key] > self.MAX_CORRECTIONS_PER_STEP: + return self._handle_emergency_exit(st, step_idx) + + # Détection boucle correct/undo + st.last_recent_feedbacks.append( + {"step": step_idx, "intent": parsed.intent.value} + ) + st.last_recent_feedbacks = st.last_recent_feedbacks[-self.LOOP_DOUBT_WINDOW:] + if self._detect_doubt_loop(st.last_recent_feedbacks, step_idx): + self.store.save(st) + return ( + f"On tourne en rond sur l'étape n°{step_idx}. Tu veux " + "qu'on relance l'enregistrement de cette étape seulement ?" + ) + + # Construire payload feedback + action_map = { + LearnIntent.CORRECT_STEP: "correct", + LearnIntent.UNDO_STEP: "undo", + LearnIntent.MERGE_NEXT: "merge_next", + LearnIntent.SPLIT_STEP: "split", + LearnIntent.VALIDATE_STEP: "validate", + } + if parsed.intent not in action_map: + return "Je n'ai pas bien compris. Tu peux préciser l'étape ?" + + payload: Dict[str, Any] = { + "session_id": st.session_id, + "action": action_map[parsed.intent], + "step_index": step_idx, + } + if parsed.intent == LearnIntent.CORRECT_STEP: + payload["new_intent"] = parsed.extra.get("new_intent") or "" + + try: + self.streaming.shadow_feedback(payload) + new_understanding = self.streaming.shadow_understanding(st.session_id) + except Exception as exc: + logger.error("shadow_feedback failed: %s", exc) + return ( + "Je n'arrive pas à appliquer ta correction côté Windows. " + "On réessaie ?" + ) + + st.shadow_understanding = ( + new_understanding.get("understanding") + or new_understanding.get("steps") + or [] + ) + st.pending_feedbacks.append(payload) + self.store.save(st) + + # Recap complet (pas de diff) + text = self.formatter.format(st.shadow_understanding) + return ( + f"OK, j'ai mis à jour :\n\n{text}\n\n" + "C'est bon ou il reste à corriger ?" + ) + + def _handle_naming( + self, st: SessionState, parsed: ParsedLearnIntent + ) -> str: + # 5.1 Nommage + if st.competence_name is None and parsed.intent == LearnIntent.NAME_COMPETENCE: + name = (parsed.extra.get("name") or parsed.raw_text or "").strip() + if not name or len(name) > 80: + return ( + "Le nom doit faire entre 1 et 80 caractères. " + "Tu peux me redonner un nom plus court ?" + ) + st.competence_name = name + self.store.save(st) + # 5.2 Parameters + return self._next_parameter_question(st) or self._persist_competence(st) + + # 5.2 Marquage paramètre + if parsed.intent == LearnIntent.MARK_PARAMETER: + # Trouve la prochaine question paramètre en attente + pending = self._pending_param(st) + if pending is not None: + step_idx, value, field_label = pending + st.parameters_marked.append( + { + "step_index": step_idx, + "is_parameter": bool(parsed.extra.get("is_parameter")), + "example_value": value, + "field_label": field_label, + "name": self._slugify(field_label or f"param_{step_idx}"), + } + ) + self.store.save(st) + nxt = self._next_parameter_question(st) + if nxt is not None: + return nxt + return self._persist_competence(st) + return self._persist_competence(st) + + # Confirmation finale du résumé → persist + # Garde anti-CONFIRM prématuré : on refuse de persister sans nom valide. + # Risque sinon = persistance d'une compétence avec name="" / None + # (rejetée 4xx par /api/v1/lea/competences/candidate/persist ou polluant + # le store sémantique). + if parsed.intent == LearnIntent.CONFIRM: + if not st.competence_name or not str(st.competence_name).strip(): + return ( + "Tu n'as pas encore donné de nom à cette compétence. " + "Comment veux-tu l'appeler ?" + ) + return self._persist_competence(st) + + # Pas encore de nom → demander + if st.competence_name is None: + return ( + "Comment on appelle cette tâche ? Tu peux la nommer simplement, " + "en français." + ) + return self._next_parameter_question(st) or self._persist_competence(st) + + # --- Transitions --- + def _transition( + self, + st: SessionState, + target: LearnState, + abort_reason: Optional[str] = None, + ) -> None: + if target not in _ALLOWED_TRANSITIONS.get(st.state, set()): + logger.warning( + "Illegal transition %s -> %s on session %s", + st.state.value, target.value, st.session_id, + ) + return + st.state = target + st.last_transition_at = datetime.now(timezone.utc).isoformat() + if abort_reason: + st.abort_reason = abort_reason + self.store.save(st) + try: + self.emit( + "lea:learn_state_changed", + { + "session_id": st.session_id, + "state": target.value, + "abort_reason": st.abort_reason, + }, + ) + except Exception: + logger.debug("emit failed", exc_info=True) + + # --- Helpers --- + def _handle_cancel(self, st: SessionState, reason: str) -> str: + try: + self.streaming.shadow_stop(st.session_id) + except Exception: + pass + self._transition(st, LearnState.ABORTED, abort_reason=reason) + return "OK, j'annule. Je garde tout au cas où tu reviennes plus tard." + + def _handle_emergency_exit(self, st: SessionState, step_idx: int) -> str: + self._transition(st, LearnState.ABORTED, abort_reason="too_many_corrections") + try: + self.streaming.shadow_stop(st.session_id) + except Exception: + pass + return ( + f"Je n'arrive pas à comprendre l'étape n°{step_idx}. " + "Je préfère qu'on reprenne plus tard. Je garde tout." + ) + + def _detect_doubt_loop( + self, recent: List[Dict[str, Any]], step_idx: int + ) -> bool: + """Détecte alternance correct/undo sur même step (≥ 2 fois).""" + relevant = [f for f in recent if f.get("step") == step_idx] + if len(relevant) < 4: + return False + intents = [f["intent"] for f in relevant[-4:]] + seen = set(intents) + return "correct_step" in seen and "undo_step" in seen + + def _enter_naming(self, st: SessionState) -> str: + self._transition(st, LearnState.NAMING) + return ( + "Super. Comment on appelle cette tâche ? Tu peux la nommer " + "simplement, en français." + ) + + def _pending_param( + self, st: SessionState + ) -> Optional[Tuple[int, str, str]]: + """Retourne (step_index, valeur saisie, field_label) du prochain step + avec valeur non encore marqué.""" + marked = {p["step_index"] for p in st.parameters_marked} + for idx, step in enumerate(st.shadow_understanding, start=1): + value = step.get("value") or step.get("typed_text") or step.get("text") + if value and idx not in marked: + field_label = ( + step.get("target_label") + or step.get("target") + or step.get("label") + or "" + ) + return idx, value, field_label + return None + + def _next_parameter_question(self, st: SessionState) -> Optional[str]: + pending = self._pending_param(st) + if pending is None: + return None + _, value, label = pending + return ( + f"La valeur « {value} » pour le champ « {label} » — c'est " + "l'exemple du jour ou ça doit toujours être ça ?" + ) + + def _slugify(self, text: str) -> str: + norm = _strip_accents(text or "").lower() + slug = re.sub(r"[^a-z0-9]+", "_", norm).strip("_") + return slug or "param" + + def _persist_competence(self, st: SessionState) -> str: + # Pré-requis : pas de paramètres en attente. + if self._pending_param(st) is not None: + return self._next_parameter_question(st) or "(...)" + + # Garde anti-persist sans machine_id (requis par + # /api/v1/lea/competences/candidate/persist — sinon 400). On préfère + # une erreur métier conversationnelle plutôt qu'une exception non gérée. + if not st.machine_id: + logger.error( + "persist refusé : machine_id manquant pour session %s", + st.session_id, + ) + return ( + "Je ne peux pas enregistrer la compétence : " + "je ne sais pas sur quelle machine elle a été apprise. " + "On reprend en redémarrant l'apprentissage depuis Windows ?" + ) + + # Garde anti-persist sans nom (cohérent avec la garde CONFIRM). + if not st.competence_name or not str(st.competence_name).strip(): + logger.error( + "persist refusé : competence_name manquant pour session %s", + st.session_id, + ) + return ( + "Tu n'as pas encore donné de nom à cette compétence. " + "Comment veux-tu l'appeler ?" + ) + + # shadow_build avant /persist + try: + self.streaming.shadow_build(st.session_id) + except Exception as exc: + logger.error("shadow_build failed: %s", exc) + return ( + "Je n'arrive pas à figer le workflow avant de l'enregistrer. " + "On réessaie ?" + ) + + self._transition(st, LearnState.PERSISTING) + payload = self.payload_builder.build(st) + try: + resp = self.streaming.competence_persist(payload) + except Exception as exc: + logger.error("competence_persist failed: %s", exc) + return ( + "Je n'ai pas pu enregistrer la compétence pour l'instant. " + "Je garde tout, on pourra réessayer." + ) + + self._transition(st, LearnState.DONE) + slug = resp.get("slug") or self._slugify(st.competence_name or "") + names = ", ".join(p["name"] for p in payload["parameters"]) or "aucun" + return ( + f"C'est enregistré sous « {st.competence_name} » " + f"(slug `{slug}`). Paramètres : {names}." + ) + + +# ============================================================ +# Singleton accessor +# ============================================================ +_orchestrator_singleton: Optional[LearnActionOrchestrator] = None +_orchestrator_lock = threading.Lock() + + +def get_learn_action_orchestrator( + emit: Optional[EmitFn] = None, + force_new: bool = False, +) -> LearnActionOrchestrator: + """Retourne le singleton orchestrateur (à appeler depuis `app.py`).""" + global _orchestrator_singleton + with _orchestrator_lock: + if _orchestrator_singleton is None or force_new: + _orchestrator_singleton = LearnActionOrchestrator(emit=emit) + elif emit is not None: + _orchestrator_singleton.emit = emit + return _orchestrator_singleton diff --git a/agent_v0/agent_v1/config.py b/agent_v0/agent_v1/config.py index d54880034..a3b03dacc 100644 --- a/agent_v0/agent_v1/config.py +++ b/agent_v0/agent_v1/config.py @@ -56,6 +56,13 @@ OLLAMA_HOST = os.getenv("RPA_OLLAMA_HOST", "localhost") # Configurable via variable d'environnement RPA_API_TOKEN API_TOKEN = os.environ.get("RPA_API_TOKEN", "") +# --- Orchestrateur Léa-first (agent-chat Linux) --- +# Endpoint racine du service agent-chat qui héberge POST /api/learn/start +# (P1-LEA-SHADOW). Configurable via RPA_AGENT_CHAT_URL. +# Défaut : localhost:5004 (même machine en dev). En POC clinique, doit +# pointer vers le DGX Spark (ex. http://agent-chat.dgx-local:5004). +AGENT_CHAT_URL = os.environ.get("RPA_AGENT_CHAT_URL", "http://localhost:5004") + # Paramètres de session MAX_SESSION_DURATION_S = 60 * 60 # 1 heure SESSIONS_ROOT = BASE_DIR / "sessions" diff --git a/agent_v0/agent_v1/core/captor.py b/agent_v0/agent_v1/core/captor.py index 77d3b99dc..2e685ae0a 100644 --- a/agent_v0/agent_v1/core/captor.py +++ b/agent_v0/agent_v1/core/captor.py @@ -56,6 +56,8 @@ class EventCaptorV1: # État des touches modificatrices self.modifiers = set() + self._pending_standalone_win = False + self._suppress_release_only_win_combo = False # Tracking du focus fenêtre self.last_window = None @@ -327,6 +329,56 @@ class EventCaptorV1: return {"kind": "key", "name": key.name} return {"kind": "unknown", "str": str(key)} + @staticmethod + def _raw_key_name(raw_key: Dict[str, Any]) -> Optional[str]: + """Nom lisible depuis un raw_key sérialisé.""" + if raw_key.get("kind") == "vk": + char = raw_key.get("char") + if char and len(str(char)) == 1: + return str(char).lower() + if raw_key.get("kind") == "key": + name = raw_key.get("name") + return str(name).lower() if name else None + return None + + def _emit_release_only_windows_combo(self) -> bool: + """Infère Win+ si Windows/NoMachine n'a livré que les releases. + + Certaines sessions ne remontent pas les press de Win+S via pynput, + mais livrent ensuite release('s') puis release('cmd'). Sans cette + inférence ciblée, le geste système est perdu et les releases polluent + le prochain text_input. + """ + with self._text_lock: + raw_keys = list(self._raw_key_buffer) + if len(raw_keys) < 2: + return False + cmd_names = {"cmd", "cmd_l", "cmd_r"} + last = raw_keys[-1] + if last.get("action") != "release" or self._raw_key_name(last) not in cmd_names: + return False + combo_key = None + for raw in reversed(raw_keys[:-1]): + if raw.get("action") != "release": + continue + name = self._raw_key_name(raw) + if name and name not in self._MODIFIER_KEY_NAMES: + combo_key = name + break + if not combo_key: + return False + self._raw_key_buffer.clear() + + event = { + "type": "key_combo", + "keys": ["win", combo_key], + "raw_keys": raw_keys, + "timestamp": time.time(), + } + self._inject_screen_metadata(event) + self.on_event(event) + return True + def _on_press(self, key): # TOUJOURS enregistrer le press brut dans le buffer raw_keys with self._text_lock: @@ -344,6 +396,7 @@ class EventCaptorV1: self.modifiers.add("shift") elif key in (Key.cmd, Key.cmd_l, Key.cmd_r): self.modifiers.add("win") + self._pending_standalone_win = True # --- Combos avec modificateur (sauf Shift seul) --- # Shift seul n'est pas un « vrai » modificateur pour les combos : @@ -369,6 +422,9 @@ class EventCaptorV1: # Ne PAS émettre de combo si c'est un modificateur seul # (ex: appui sur Ctrl sans autre touche = pas de combo) if key_name and key_name not in self._MODIFIER_KEY_NAMES: + self._pending_standalone_win = False + if "win" in self.modifiers: + self._suppress_release_only_win_combo = True # Un combo interrompt la saisie texte en cours self._flush_text_buffer() # Attacher les raw_keys accumulés (press des modificateurs + press de la touche) @@ -400,6 +456,7 @@ class EventCaptorV1: - Enter / Tab : flush immédiat + émission de l'événement - Escape : vide le buffer sans émettre """ + escape_raw_keys = None with self._text_lock: # --- Touches spéciales --- if key == Key.backspace: @@ -411,12 +468,14 @@ class EventCaptorV1: if key == Key.esc: # Annuler la saisie en cours self._text_buffer.clear() - self._raw_key_buffer.clear() self._text_start_pos = None self._cancel_flush_timer() - return + escape_raw_keys = list(self._raw_key_buffer) + self._raw_key_buffer.clear() + # Émettre hors lock après le bloc critique. + pass - if key in (Key.enter, Key.tab): + elif key in (Key.enter, Key.tab): # Flush immédiat — on relâche le lock avant d'appeler # _flush_text_buffer (qui prend aussi le lock) pass # on sort du with et on flush après @@ -454,6 +513,18 @@ class EventCaptorV1: # Touche spéciale non gérée (F1, Insert, etc.) — on ignore return + if escape_raw_keys is not None: + event = { + "type": "key_combo", + "keys": ["escape"], + "timestamp": time.time(), + } + if escape_raw_keys: + event["raw_keys"] = escape_raw_keys + self._inject_screen_metadata(event) + self.on_event(event) + return + # Si on arrive ici, c'est Enter ou Tab → flush le buffer en cours # puis émettre le caractère spécial comme text_input séparé self._flush_text_buffer() @@ -551,6 +622,35 @@ class EventCaptorV1: **self._encode_key(key), }) + if key in (Key.cmd, Key.cmd_l, Key.cmd_r) and self._suppress_release_only_win_combo: + with self._text_lock: + self._raw_key_buffer.clear() + self._pending_standalone_win = False + self._suppress_release_only_win_combo = False + self.modifiers.discard("win") + return + + if key in (Key.cmd, Key.cmd_l, Key.cmd_r) and self._emit_release_only_windows_combo(): + self._pending_standalone_win = False + self._suppress_release_only_win_combo = False + self.modifiers.discard("win") + return + + if key in (Key.cmd, Key.cmd_l, Key.cmd_r) and self._pending_standalone_win: + with self._text_lock: + raw_keys = list(self._raw_key_buffer) + self._raw_key_buffer.clear() + event = { + "type": "key_combo", + "keys": ["win"], + "raw_keys": raw_keys, + "timestamp": time.time(), + } + self._inject_screen_metadata(event) + self.on_event(event) + self._pending_standalone_win = False + self._suppress_release_only_win_combo = False + if key in (Key.ctrl, Key.ctrl_l, Key.ctrl_r): self.modifiers.discard("ctrl") elif key in (Key.alt, Key.alt_l, Key.alt_r): @@ -559,6 +659,8 @@ class EventCaptorV1: self.modifiers.discard("shift") elif key in (Key.cmd, Key.cmd_l, Key.cmd_r): self.modifiers.discard("win") + self._pending_standalone_win = False + self._suppress_release_only_win_combo = False # ---------------------------------------------------------------- # Métadonnées système diff --git a/agent_v0/agent_v1/core/executor.py b/agent_v0/agent_v1/core/executor.py index d62b20a1a..0bfa85c96 100644 --- a/agent_v0/agent_v1/core/executor.py +++ b/agent_v0/agent_v1/core/executor.py @@ -490,6 +490,197 @@ class ActionExecutorV1: or after_normalized in before_normalized ) + @staticmethod + def _is_generic_button_text(text: str) -> bool: + """Repérer les libellés trop génériques pour une mémoire sans contexte.""" + normalized = ActionExecutorV1._normalize_loose_text(text) + return normalized in { + "annuler", + "cancel", + "enregistrer", + "non", + "no", + "ok", + "oui", + "ouvrir", + "open", + "remplacer", + "replace", + "save", + "yes", + } + + @staticmethod + def _is_idempotent_navigation_combo(keys: Any) -> bool: + """True for keyboard navigation that may legitimately not move.""" + if not isinstance(keys, (list, tuple)): + return False + norm = { + str(k or "").strip().lower() + for k in keys + if str(k or "").strip() + } + if not norm: + return False + ctrl_names = {"ctrl", "control"} + navigation_names = {"end", "home", "page_down", "pagedown", "page_up", "pageup"} + return bool(norm & ctrl_names) and bool(norm & navigation_names) + + @staticmethod + def _is_idempotent_save_combo(keys: Any) -> bool: + """True for save shortcuts that may complete without visible change.""" + if not isinstance(keys, (list, tuple)): + return False + norm = { + str(k or "").strip().lower() + for k in keys + if str(k or "").strip() + } + return bool(norm & {"ctrl", "control"}) and "s" in norm + + @staticmethod + def _screenshot_has_vertical_scrollbar(screenshot_b64: str) -> Optional[bool]: + """Detect a visible vertical scrollbar in the current full-screen image. + + This is intentionally conservative and used only to avoid false pauses + on Ctrl+End/Home when a page has no scrollable area. + """ + if not screenshot_b64: + return None + try: + from PIL import Image + + raw = base64.b64decode(screenshot_b64) + img = Image.open(io.BytesIO(raw)).convert("RGB") + w, h = img.size + if w < 160 or h < 300: + return None + + top = max(140, int(h * 0.10)) + bottom = min(h - 80, int(h * 0.92)) + if bottom <= top: + return None + + # If the Léa chat window is visible on the right, its own scrollbar + # must not count as the target app scrollbar. + search_w = w + overlay_left = None + blue_threshold_rows = [] + scan_left = int(w * 0.45) + scan_top = int(h * 0.35) + scan_bottom = int(h * 0.90) + for y in range(scan_top, scan_bottom, 4): + run_start = None + run_len = 0 + best_start = None + best_len = 0 + for x in range(scan_left, w - 8, 4): + r, g, b = img.getpixel((x, y)) + is_lea_header_blue = r <= 80 and 70 <= g <= 150 and b >= 150 + if is_lea_header_blue: + if run_start is None: + run_start = x + run_len = 1 + else: + run_len += 1 + if run_len > best_len: + best_len = run_len + best_start = run_start + else: + run_start = None + run_len = 0 + if best_len >= 30 and best_start is not None: + blue_threshold_rows.append(best_start) + if blue_threshold_rows: + overlay_left = min(blue_threshold_rows) + if overlay_left > int(w * 0.55): + search_w = max(160, overlay_left - 8) + + # Browser/app scrollbars live on the right edge of the target area. + # Avoid the exact outer border and look for a grey thumb rather than + # a white track. + x1 = max(0, search_w - 34) + x2 = max(0, search_w - 4) + y_marks = [] + grey_pixels = 0 + for y in range(top, bottom): + row_has_thumb = False + for x in range(x1, x2): + r, g, b = img.getpixel((x, y)) + avg = (r + g + b) / 3 + neutral = max(r, g, b) - min(r, g, b) <= 24 + if neutral and 95 <= avg <= 225: + row_has_thumb = True + grey_pixels += 1 + y_marks.append(row_has_thumb) + + longest = 0 + run = 0 + for marked in y_marks: + if marked: + run += 1 + longest = max(longest, run) + else: + run = 0 + + visible = longest >= 24 and grey_pixels >= 120 + logger.debug( + "Scrollbar detect: visible=%s longest=%s grey_pixels=%s " + "search_w=%s overlay_left=%s", + visible, longest, grey_pixels, search_w, overlay_left, + ) + return visible + except Exception: + logger.debug("Scrollbar detect skipped", exc_info=True) + return None + + @staticmethod + def _enrich_target_context_from_action( + action: Optional[Dict[str, Any]], + target_spec: Optional[Dict[str, Any]], + ) -> Optional[Dict[str, Any]]: + """Ajouter le contrat de fenêtre à la cible avant résolution. + + Ce contexte entre dans la clé mémoire serveur et permet surtout de + refuser un hit mémoire trop générique quand un clic doit ouvrir une + autre fenêtre. + """ + if not isinstance(action, dict) or not isinstance(target_spec, dict): + return target_spec + + enriched = dict(target_spec) + hints = dict(enriched.get("context_hints") or {}) + before = str(action.get("expected_window_before") or "").strip() + after = str(action.get("expected_window_title") or "").strip() + if not before: + before = str(enriched.get("window_title") or hints.get("window_title") or "").strip() + + if before: + hints.setdefault("expected_window_before", before) + hints.setdefault("window_title", before) + enriched.setdefault("window_title", before) + if after: + hints.setdefault("expected_window_after", after) + if ActionExecutorV1._requires_post_verify_window_transition( + action, + enriched, + after, + ): + hints["requires_window_transition"] = True + + by_text = str(enriched.get("by_text") or "").strip() + if by_text and ActionExecutorV1._is_generic_button_text(by_text): + hints["generic_button_text"] = by_text + if before: + hints.setdefault("button_expected_before_window", before) + if after: + hints.setdefault("button_expected_after_window", after) + hints.setdefault("button_role", str(enriched.get("by_role") or "")) + + if hints: + enriched["context_hints"] = hints + return enriched + @staticmethod def _is_start_button_target(target_spec: Optional[Dict[str, Any]]) -> bool: """Détecter une action replay sémantisée comme bouton Démarrer.""" @@ -598,18 +789,143 @@ class ActionExecutorV1: """Cliquer le bouton attendu d'un dialogue runtime connu. Strategie : - 1. Resolution serveur par texte du bouton (vision stricte) - 2. Fallback local par template matching sur le texte - 3. Pas de fallback Enter ici : sur "Confirmer l'enregistrement", + 1. UIA local si disponible (bouton natif) + 2. Resolution serveur/local sur le crop de la fenetre active + 3. Fallback geometrique pour les confirmations Windows connues + 4. Dernier recours serveur/local plein ecran + 5. Pas de fallback Enter ici : sur "Confirmer l'enregistrement", le focus peut etre sur "Non", donc Enter serait ambigu. """ from ..config import SERVER_URL - screenshot_b64 = self._capture_screenshot_b64(max_width=0, quality=75) - if not screenshot_b64: - return None + window_rect = self._active_window_rect_for_dialog(current_title) + window_screenshot_b64 = "" + if window_rect: + window_screenshot_b64 = self._capture_region_b64( + window_rect["left"], + window_rect["top"], + window_rect["width"], + window_rect["height"], + max_width=0, + quality=75, + ) for button_text in dialog_spec.get("button_texts", ()): + uia_handled = self._try_click_runtime_dialog_button_uia( + button_text, + current_title, + screen_width, + screen_height, + ) + if uia_handled: + return uia_handled + + if SERVER_URL and window_rect and window_screenshot_b64: + target_spec = { + "by_text": button_text, + "by_role": "dialog_button", + "window_title": current_title, + "vlm_description": ( + f"Dans la fenêtre '{current_title}', " + f"le bouton '{button_text}'" + ), + } + resolved = self._server_resolve_target( + SERVER_URL, + window_screenshot_b64, + target_spec, + 0.5, + 0.5, + window_rect["width"], + window_rect["height"], + ) + if resolved and resolved.get("resolved"): + x_pct = float(resolved.get("x_pct", 0.5)) + y_pct = float(resolved.get("y_pct", 0.5)) + real_x = window_rect["left"] + int(x_pct * window_rect["width"]) + real_y = window_rect["top"] + int(y_pct * window_rect["height"]) + handled = self._click_runtime_dialog_candidate( + (real_x, real_y), + "left", + current_title, + { + "handled": True, + "button_text": button_text, + "x_pct": real_x / max(screen_width, 1), + "y_pct": real_y / max(screen_height, 1), + "resolution_method": resolved.get( + "method", "runtime_dialog_window_server" + ), + "resolution_score": resolved.get("score", 0.0), + "attention_scope": "active_window", + }, + ) + if handled: + logger.info( + f"[RUNTIME-DIALOG] '{current_title}' gere via serveur " + f"fenetre -> bouton '{button_text}' " + f"[{resolved.get('method', 'server')}]" + ) + return handled + + if window_rect and window_screenshot_b64: + local_pos = self._find_text_on_screen(window_screenshot_b64, button_text) + if local_pos: + rel_x, rel_y = local_pos + real_x = window_rect["left"] + rel_x + real_y = window_rect["top"] + rel_y + handled = self._click_runtime_dialog_candidate( + (real_x, real_y), + "left", + current_title, + { + "handled": True, + "button_text": button_text, + "x_pct": real_x / max(screen_width, 1), + "y_pct": real_y / max(screen_height, 1), + "resolution_method": "dialog_window_text_template", + "resolution_score": 0.8, + "attention_scope": "active_window", + }, + ) + if handled: + logger.info( + f"[RUNTIME-DIALOG] '{current_title}' gere localement " + f"fenetre -> bouton '{button_text}' [dialog_window_text_template]" + ) + return handled + + layout_pos = self._runtime_dialog_button_geometry_fallback( + dialog_spec, + button_text, + window_rect, + ) + if layout_pos: + handled = self._click_runtime_dialog_candidate( + layout_pos, + "left", + current_title, + { + "handled": True, + "button_text": button_text, + "x_pct": layout_pos[0] / max(screen_width, 1), + "y_pct": layout_pos[1] / max(screen_height, 1), + "resolution_method": "runtime_dialog_window_geometry", + "resolution_score": 0.7, + "attention_scope": "active_window", + }, + ) + if handled: + logger.info( + f"[RUNTIME-DIALOG] '{current_title}' gere par geometrie " + f"fenetre -> bouton '{button_text}'" + ) + return handled + + screenshot_b64 = self._capture_screenshot_b64(max_width=0, quality=75) + if not screenshot_b64: + continue + if SERVER_URL: target_spec = { "by_text": button_text, @@ -632,49 +948,207 @@ class ActionExecutorV1: if resolved and resolved.get("resolved"): x_pct = float(resolved.get("x_pct", 0.5)) y_pct = float(resolved.get("y_pct", 0.5)) - self._click( + handled = self._click_runtime_dialog_candidate( (int(x_pct * screen_width), int(y_pct * screen_height)), "left", + current_title, + { + "handled": True, + "button_text": button_text, + "x_pct": x_pct, + "y_pct": y_pct, + "resolution_method": resolved.get( + "method", "runtime_dialog_server" + ), + "resolution_score": resolved.get("score", 0.0), + "attention_scope": "full_screen", + }, ) - time.sleep(0.8) + if not handled: + continue logger.info( f"[RUNTIME-DIALOG] '{current_title}' gere via serveur " f"-> bouton '{button_text}' [{resolved.get('method', 'server')}]" ) - return { - "handled": True, - "button_text": button_text, - "x_pct": x_pct, - "y_pct": y_pct, - "resolution_method": resolved.get( - "method", "runtime_dialog_server" - ), - "resolution_score": resolved.get("score", 0.0), - } + return handled local_pos = self._find_text_on_screen(screenshot_b64, button_text) if local_pos: real_x, real_y = local_pos - self._click((real_x, real_y), "left") - time.sleep(0.8) + handled = self._click_runtime_dialog_candidate( + (real_x, real_y), + "left", + current_title, + { + "handled": True, + "button_text": button_text, + "x_pct": real_x / max(screen_width, 1), + "y_pct": real_y / max(screen_height, 1), + "resolution_method": "dialog_text_template", + "resolution_score": 0.8, + "attention_scope": "full_screen", + }, + ) + if not handled: + continue logger.info( f"[RUNTIME-DIALOG] '{current_title}' gere localement " f"-> bouton '{button_text}' [dialog_text_template]" ) - return { - "handled": True, - "button_text": button_text, - "x_pct": real_x / max(screen_width, 1), - "y_pct": real_y / max(screen_height, 1), - "resolution_method": "dialog_text_template", - "resolution_score": 0.8, - } + return handled logger.info( f"[RUNTIME-DIALOG] Aucun bouton resolu pour '{current_title}'" ) return None + def _active_window_rect_for_dialog( + self, + current_title: str, + ) -> Optional[Dict[str, int]]: + """Retourner le rect de la fenetre active si elle correspond au dialogue.""" + try: + from ..window_info_crossplatform import get_active_window_rect + + rect_info = get_active_window_rect() + if not rect_info or not rect_info.get("rect"): + return None + active_title = str(rect_info.get("title", "") or "") + if current_title and active_title: + current_norm = self._normalize_loose_text(current_title) + active_norm = self._normalize_loose_text(active_title) + if current_norm not in active_norm and active_norm not in current_norm: + return None + rect = rect_info["rect"] + if not isinstance(rect, (list, tuple)) or len(rect) != 4: + return None + left, top, right, bottom = [int(v) for v in rect] + width = right - left + height = bottom - top + if width <= 80 or height <= 50: + return None + return { + "left": left, + "top": top, + "right": right, + "bottom": bottom, + "width": width, + "height": height, + } + except Exception as exc: + logger.debug(f"[RUNTIME-DIALOG] Rect fenetre active indisponible: {exc}") + return None + + def _try_click_runtime_dialog_button_uia( + self, + button_text: str, + current_title: str, + screen_width: int, + screen_height: int, + ) -> Optional[Dict[str, Any]]: + """Cliquer un bouton de dialogue via UIA local quand le helper existe.""" + uia_target = { + "name": button_text, + "parent_path": [ + { + "name": current_title, + "control_type": "fenêtre", + } + ], + } + coords = self._resolve_via_uia_local(uia_target, screen_width, screen_height) + if not coords: + return None + x_pct, y_pct = coords + real_x = int(x_pct * screen_width) + real_y = int(y_pct * screen_height) + return self._click_runtime_dialog_candidate( + (real_x, real_y), + "left", + current_title, + { + "handled": True, + "button_text": button_text, + "x_pct": x_pct, + "y_pct": y_pct, + "resolution_method": "runtime_dialog_uia", + "resolution_score": 0.95, + "attention_scope": "active_window", + }, + ) + + def _runtime_dialog_button_geometry_fallback( + self, + dialog_spec: Dict[str, Any], + button_text: str, + window_rect: Optional[Dict[str, int]], + ) -> Optional[tuple[int, int]]: + """Fallback geometrique borne a certains dialogues Windows connus.""" + if not window_rect: + return None + if dialog_spec.get("id") != "confirm_save_overwrite": + return None + + normalized = self._normalize_loose_text(button_text) + affirmative = {"oui", "yes", "remplacer", "replace"} + negative = {"non", "no"} + if normalized in affirmative: + rel_x = 0.65 + elif normalized in negative: + rel_x = 0.84 + else: + return None + + rel_y = 0.82 + x = window_rect["left"] + int(window_rect["width"] * rel_x) + y = window_rect["top"] + int(window_rect["height"] * rel_y) + return (x, y) + + def _click_runtime_dialog_candidate( + self, + pos: tuple[int, int], + button_name: str, + current_title: str, + handled: Dict[str, Any], + ) -> Optional[Dict[str, Any]]: + """Cliquer puis verifier que le dialogue a effectivement quitte le focus.""" + self._click(pos, button_name) + next_title = self._wait_until_title_changes(current_title, timeout_s=2.0) + if next_title is None: + logger.warning( + "[RUNTIME-DIALOG] Clic sur '%s' a (%s,%s) sans fermeture du dialogue '%s'", + handled.get("button_text", ""), + pos[0], + pos[1], + current_title, + ) + return None + handled["post_title"] = next_title + return handled + + def _wait_until_title_changes( + self, + previous_title: str, + timeout_s: float = 2.0, + poll_interval_s: float = 0.15, + ) -> Optional[str]: + """Attendre que la fenetre active ne soit plus le titre donne.""" + previous_norm = self._normalize_loose_text(previous_title) + deadline = time.time() + timeout_s + while time.time() < deadline: + time.sleep(poll_interval_s) + try: + from ..window_info_crossplatform import get_active_window_info + + info = get_active_window_info() + current_title = str(info.get("title", "") or "") + except Exception: + continue + current_norm = self._normalize_loose_text(current_title) + if current_norm and current_norm != previous_norm: + return current_title + return None + def _try_dialog_resolver_server( self, current_title: str, @@ -837,13 +1311,25 @@ class ActionExecutorV1: parts.append(f"'{by_text}'") if window: parts.append(f"dans {window}") + if not parts: + # Fallback sur les métadonnées d'ancre visuelle quand l'action + # n'a pas de texte UI classique (ex: icône de bureau). + for key in ("target_text", "ocr_description", "description"): + value = str(target_spec.get(key, "") or "").strip() + if value: + parts.append(value[:80]) + break if not parts: # Fallback sur la vlm_description vlm = target_spec.get("vlm_description", "") if vlm: parts.append(vlm[:60]) else: - parts.append("un élément") + anchor_id = str(target_spec.get("anchor_id", "") or "").strip() + if anchor_id: + parts.append(f"la cible visuelle {anchor_id[:24]}") + else: + parts.append("une cible visuelle non décrite") if parts: return " ".join(parts) return "élément inconnu" @@ -1313,6 +1799,11 @@ class ActionExecutorV1: if adapted is not None: action = adapted["action"] target_spec = adapted["target_spec"] + target_spec = self._enrich_target_context_from_action( + action, + target_spec, + ) + action["target_spec"] = target_spec # Extraire le nom de l'application depuis un titre de fenêtre def _app_name(title): @@ -2116,81 +2607,134 @@ class ActionExecutorV1: # Stabilisation minimale avant la garde suivante. time.sleep(0.5) elif needs_screen_check and hash_before: - screen_changed = self._wait_for_screen_change( - hash_before, timeout_ms=3000 + is_idempotent_navigation = ( + action_type == "key_combo" + and self._is_idempotent_navigation_combo(action.get("keys", [])) ) - if not screen_changed: - logger.warning( - f"[LEA] Écran inchangé après {action_type} " - f"(action_id={action_id}) — pas d'effet visible" + is_idempotent_save = ( + action_type == "key_combo" + and self._is_idempotent_save_combo(action.get("keys", [])) + ) + navigation_has_scrollbar = None + if is_idempotent_navigation: + navigation_has_scrollbar = self._screenshot_has_vertical_scrollbar( + screenshot_before_b64 ) - # ── Mode apprentissage : clic sans effet = mauvais clic ── - # Si l'action était un clic visuel, l'écran inchangé prouve - # que le grounding a cliqué au mauvais endroit. Au lieu de - # passer silencieusement à la suite, Léa demande à l'humain. - if action_type == "click" and visual_mode: - print( - f" [ECHEC] Clic sans effet — " - f"je demande de l'aide" + if is_idempotent_save: + logger.info( + "[LEA] %s est une sauvegarde idempotente : " + "skip verification pixel-change", + action.get("keys", []), + ) + result["success"] = True + result["warning"] = "idempotent_save_no_visible_change" + elif is_idempotent_navigation and navigation_has_scrollbar is False: + logger.info( + "[LEA] %s sans ascenseur visible avant action : " + "skip verification pixel-change", + action.get("keys", []), + ) + result["success"] = True + result["warning"] = "no_scrollbar_idempotent_navigation" + else: + screen_changed = self._wait_for_screen_change( + hash_before, timeout_ms=3000 + ) + if not screen_changed: + logger.warning( + f"[LEA] Écran inchangé après {action_type} " + f"(action_id={action_id}) — pas d'effet visible" ) - try: - self.notifier.replay_learning_mode( - raison="no_screen_change", - ) - except Exception: - pass - human_actions = self._capture_human_correction( - timeout_s=30, - ) - if human_actions: - result["success"] = True - result["resolution_method"] = "human_supervised" - result["warning"] = "human_supervised_after_no_change" - last_click = None - for ha in reversed(human_actions): - if ha.get("type") == "click": - last_click = ha - break - if last_click: - result["actual_position"] = { - "x_pct": last_click["x_pct"], - "y_pct": last_click["y_pct"], + # ── Mode apprentissage : clic sans effet = mauvais clic ── + # Si l'action était un clic visuel, l'écran inchangé prouve + # que le grounding a cliqué au mauvais endroit. Au lieu de + # passer silencieusement à la suite, Léa demande à l'humain. + if action_type == "click" and visual_mode: + print( + f" [ECHEC] Clic sans effet — " + f"je demande de l'aide" + ) + try: + self.notifier.replay_learning_mode( + raison="no_screen_change", + ) + except Exception: + pass + + human_actions = self._capture_human_correction( + timeout_s=30, + ) + if human_actions: + result["success"] = True + result["resolution_method"] = "human_supervised" + result["warning"] = "human_supervised_after_no_change" + last_click = None + for ha in reversed(human_actions): + if ha.get("type") == "click": + last_click = ha + break + if last_click: + result["actual_position"] = { + "x_pct": last_click["x_pct"], + "y_pct": last_click["y_pct"], + } + result["correction"] = { + "actions": human_actions, + "action_count": len(human_actions), + "last_click": last_click, + "trigger": "no_screen_change", } - result["correction"] = { - "actions": human_actions, - "action_count": len(human_actions), - "last_click": last_click, - "trigger": "no_screen_change", - } + else: + # Timeout — l'humain n'a pas répondu + result["success"] = False + result["warning"] = "no_screen_change" + result["error"] = "Ecran inchange apres l'action" + elif is_idempotent_navigation: + has_scrollbar = navigation_has_scrollbar + if has_scrollbar is None: + has_scrollbar = self._screenshot_has_vertical_scrollbar( + screenshot_before_b64 + ) + if has_scrollbar is True: + result["success"] = False + result["warning"] = "no_screen_change" + result["error"] = "Ecran inchange apres l'action" + logger.warning( + "[LEA] %s sans changement visible avec ascenseur détecté", + action.get("keys", []), + ) + else: + logger.info( + "[LEA] %s sans changement visible et sans ascenseur " + "détecté : action considérée OK", + action.get("keys", []), + ) + result["success"] = True + result["warning"] = "no_scrollbar_idempotent_navigation" else: - # Timeout — l'humain n'a pas répondu + # Actions non-visuelles : comportement existant result["success"] = False result["warning"] = "no_screen_change" result["error"] = "Ecran inchange apres l'action" + print( + f" [ECHEC] Ecran inchange apres {action_type} — " + f"l'action n'a pas eu d'effet visible" + ) + try: + self.notifier.replay_no_screen_change(action_type) + except Exception: + pass else: - # Actions non-visuelles : comportement existant - result["success"] = False - result["warning"] = "no_screen_change" - result["error"] = "Ecran inchange apres l'action" - print( - f" [ECHEC] Ecran inchange apres {action_type} — " - f"l'action n'a pas eu d'effet visible" - ) - try: - self.notifier.replay_no_screen_change(action_type) - except Exception: - pass - else: - print(f" [OK] Changement d'ecran detecte apres {action_type}") - # Stabilisation page avant capture screenshot_after : - # _wait_for_screen_change retourne dès le 1er tick de diff - # (200ms), or sur une transition de page (fade, render API) - # le DOM n'est pas encore stable → screenshot ambigu → - # verify_action serveur déclenche retry fantôme. Cf. démo - # GHT 2026-05-11. - time.sleep(0.5) + print(f" [OK] Changement d'ecran detecte apres {action_type}") + # Stabilisation page avant capture screenshot_after : + # _wait_for_screen_change retourne dès le 1er tick de diff + # (200ms), or sur une transition de page (fade, render API) + # le DOM n'est pas encore stable → screenshot ambigu → + # verify_action serveur déclenche retry fantôme. Cf. démo + # GHT 2026-05-11. + time.sleep(0.5) else: # Pour type/wait/scroll, petit delai pour laisser l'ecran se stabiliser time.sleep(0.5) @@ -2403,56 +2947,108 @@ class ActionExecutorV1: if anchor.shape[0] >= screenshot.shape[0] or anchor.shape[1] >= screenshot.shape[1]: return None - result = cv2.matchTemplate(screenshot, anchor, cv2.TM_CCOEFF_NORMED) - _, max_val, _, max_loc = cv2.minMaxLoc(result) + def _match_region(name, region, offset_x, offset_y, threshold, drift): + if region.shape[0] < 10 or region.shape[1] < 10: + return None + if region.shape[0] >= screenshot.shape[0] or region.shape[1] >= screenshot.shape[1]: + return None - print(f" [ANCHOR-TM] Score={max_val:.3f}") + tm = cv2.matchTemplate(screenshot, region, cv2.TM_CCOEFF_NORMED) + _, score, _, loc = cv2.minMaxLoc(tm) + logger.debug("[ANCHOR-TM] region=%s score=%.3f loc=%s", name, score, loc) + if score < threshold: + return None + + # Rebuild the full-anchor top-left from the matching sub-region, + # then click the full anchor center. This stays visual: the + # sub-region localizes the moved target, the recorded coordinate + # is only used as a drift guard. + full_left = loc[0] - offset_x + full_top = loc[1] - offset_y + cx = full_left + anchor.shape[1] / 2.0 + cy = full_top + anchor.shape[0] / 2.0 + if cx < 0 or cy < 0 or cx >= screenshot.shape[1] or cy >= screenshot.shape[0]: + return None - # Seuil élevé : le crop anchor doit matcher très bien - if max_val >= 0.80: - # Centre du match en pixels - cx = max_loc[0] + anchor.shape[1] // 2 - cy = max_loc[1] + anchor.shape[0] // 2 - # Convertir en pourcentages par rapport au screenshot décodé x_pct = cx / screenshot.shape[1] y_pct = cy / screenshot.shape[0] - - # Garde drift : refuser un match trop loin de la - # position fallback enregistrée (anti faux positif sur - # crop similaire ailleurs à l'écran). if not self._anchor_match_within_drift( x_pct, y_pct, fallback_x_pct, fallback_y_pct, - max_drift=max_drift, + max_drift=drift, ): print( - f" [ANCHOR-TM] REJET drift " + f" [ANCHOR-TM] REJET drift {name} " f"({x_pct:.3f}, {y_pct:.3f}) loin de " f"({fallback_x_pct:.3f}, {fallback_y_pct:.3f})" ) logger.warning( - f"[ANCHOR-TM] Rejet drift : match ({x_pct:.3f}, " - f"{y_pct:.3f}) score={max_val:.3f} hors zone " - f"fallback ({fallback_x_pct:.3f}, " - f"{fallback_y_pct:.3f}) max_drift={max_drift:.2f}" + "[ANCHOR-TM] Rejet drift region=%s match=(%.3f, %.3f) " + "score=%.3f fallback=(%.3f, %.3f) max_drift=%.2f", + name, x_pct, y_pct, score, + fallback_x_pct, fallback_y_pct, drift, ) return None - print( - f" [ANCHOR-TM] TROUVÉ ({x_pct:.3f}, {y_pct:.3f}) " - f"score={max_val:.3f}" - ) - logger.info( - f"[ANCHOR-TM] Match anchor à ({x_pct:.3f}, {y_pct:.3f}) " - f"score={max_val:.3f}" - ) return { "resolved": True, - "method": "anchor_template", + "method": "anchor_template" if name == "full" else f"anchor_template_{name}", "x_pct": x_pct, "y_pct": y_pct, - "score": max_val, + "score": float(score), } + full_result = _match_region( + "full", anchor, 0, 0, threshold=0.80, drift=max_drift + ) + full_score = 0.0 + if full_result: + full_score = float(full_result.get("score", 0.0) or 0.0) + print(f" [ANCHOR-TM] Score={full_score:.3f}") + print( + f" [ANCHOR-TM] TROUVÉ ({full_result['x_pct']:.3f}, " + f"{full_result['y_pct']:.3f}) score={full_score:.3f}" + ) + logger.info( + "[ANCHOR-TM] Match anchor à (%.3f, %.3f) score=%.3f", + full_result["x_pct"], full_result["y_pct"], full_score, + ) + return full_result + + # Desktop icons are often recorded as icon + label + wallpaper. If + # the wallpaper changed, the full crop can fail even though the + # label still uniquely identifies the icon. Use high thresholds so + # this path only accepts near-identical sub-regions. + ah, aw = anchor.shape[:2] + subregions = [ + ("label", anchor[int(ah * 0.62):ah, :], 0, int(ah * 0.62), 0.92, max(max_drift, 0.35)), + ("icon_core", anchor[:int(ah * 0.72), int(aw * 0.20):int(aw * 0.82)], int(aw * 0.20), 0, 0.88, max_drift), + ] + best_sub = None + best_score = -1.0 + for name, region, ox, oy, threshold, drift in subregions: + sub_result = _match_region(name, region, ox, oy, threshold, drift) + if sub_result and sub_result.get("score", 0.0) > best_score: + best_sub = sub_result + best_score = float(sub_result.get("score", 0.0) or 0.0) + + print( + f" [ANCHOR-TM] Score={full_score:.3f}" + + (f", sous-region={best_score:.3f}" if best_sub else "") + ) + if best_sub: + print( + f" [ANCHOR-TM] TROUVÉ sous-region " + f"({best_sub['x_pct']:.3f}, {best_sub['y_pct']:.3f}) " + f"score={best_score:.3f}" + ) + logger.info( + "[ANCHOR-TM] Match sous-region anchor à (%.3f, %.3f) " + "method=%s score=%.3f", + best_sub["x_pct"], best_sub["y_pct"], + best_sub["method"], best_score, + ) + return best_sub + except Exception as e: print(f" [ANCHOR-TM] Erreur: {e}") logger.warning(f"[ANCHOR-TM] Erreur: {e}") @@ -2783,7 +3379,14 @@ Example: x_pct=0.50, y_pct=0.30""" pause_msg = data.get("pause_message") or "Léa a besoin de votre aide" replay_id = data.get("replay_id") or "" pause_key = (replay_id, pause_msg) - if getattr(self, "_last_pause_msg_shown", None) != pause_key: + chat_window = getattr(self, "_chat_window_ref", None) + has_active_bubble = bool( + getattr(chat_window, "_active_paused_bubble", None) + ) if chat_window is not None else False + if ( + getattr(self, "_last_pause_msg_shown", None) != pause_key + or not has_active_bubble + ): self._last_pause_msg_shown = pause_key completed = data.get("current_action_index", 0) total = data.get("total_actions", "?") @@ -2800,11 +3403,14 @@ Example: x_pct=0.50, y_pct=0.30""" # double — Dom ne voulait qu'un seul popup. Si la ChatWindow # n'est pas câblée (mode headless / tests), on retombe sur # le toast Tkinter custom comme dernier recours visuel. - chat_window = getattr(self, "_chat_window_ref", None) if chat_window is not None: try: # _add_paused_bubble est thread-safe (utilise root.after) # et force l'affichage + topmost + bell sonore. + logger.info( + "Affichage bulle pause replay=%s etape=%s/%s", + replay_id, completed, total, + ) chat_window._add_paused_bubble(payload) except Exception: logger.debug( @@ -2825,6 +3431,17 @@ Example: x_pct=0.50, y_pct=0.30""" logger.debug("paused_toast fallback silenced", exc_info=True) return False + if getattr(self, "_replay_paused", False): + chat_window = getattr(self, "_chat_window_ref", None) + if chat_window is not None: + try: + chat_window._close_active_paused_bubble(reason="server_cleared") + except Exception: + logger.debug( + "chat_window._close_active_paused_bubble silenced", + exc_info=True, + ) + action = data.get("action") if action is None: self._replay_paused = False @@ -3778,7 +4395,11 @@ Example: x_pct=0.50, y_pct=0.30""" time.sleep(0.05) if button_name == "double": - self.mouse.click(Button.left, 2) + # More reliable than click(..., 2) on Windows remote desktops: + # send two explicit clicks inside the OS double-click interval. + self.mouse.click(Button.left) + time.sleep(0.12) + self.mouse.click(Button.left) elif button_name == "right": self.mouse.click(Button.right) else: @@ -3938,3 +4559,44 @@ Example: x_pct=0.50, y_pct=0.30""" import traceback traceback.print_exc() return "" + + def _capture_region_b64( + self, + left: int, + top: int, + width: int, + height: int, + max_width: int = 800, + quality: int = 60, + ) -> str: + """Capturer une region ecran et retourner l'image en base64.""" + try: + from PIL import Image + + if width <= 0 or height <= 0: + return "" + + region = { + "left": int(left), + "top": int(top), + "width": int(width), + "height": int(height), + } + with mss.mss() as local_sct: + raw = local_sct.grab(region) + img = Image.frombytes("RGB", raw.size, raw.bgra, "raw", "BGRX") + + if max_width > 0 and img.width > max_width: + ratio = max_width / img.width + new_h = int(img.height * ratio) + img = img.resize((max_width, new_h), Image.LANCZOS) + + buffer = io.BytesIO() + img.save(buffer, format="JPEG", quality=quality) + return base64.b64encode(buffer.getvalue()).decode("utf-8") + except ImportError: + logger.debug("PIL non disponible, pas de capture region base64") + return "" + except Exception as e: + logger.warning(f"Capture region base64 echouee : {e}") + return "" diff --git a/agent_v0/agent_v1/core/grounding.py b/agent_v0/agent_v1/core/grounding.py index f778358fa..aa7621f3c 100644 --- a/agent_v0/agent_v1/core/grounding.py +++ b/agent_v0/agent_v1/core/grounding.py @@ -84,6 +84,15 @@ class GroundingEngine: if by_role in {"start_button"}: return False + has_anchor = bool(target_spec.get("anchor_image_base64")) + context_hints = target_spec.get("context_hints") or {} + has_window_or_text_hint = any( + str(target_spec.get(key, "") or "").strip() + for key in ("window_title", "by_text", "vlm_description") + ) or bool(str(context_hints.get("window_title", "") or "").strip()) + if has_anchor and not has_window_or_text_hint and not by_role: + return False + return True @staticmethod @@ -174,6 +183,26 @@ class GroundingEngine: hints.append(variant) return hints + @staticmethod + def _server_rejects_text_fallback(raw: Optional[Dict[str, Any]]) -> bool: + """Dire si un rejet serveur doit bloquer le fallback texte local. + + Un rejet explicite n'est pas un simple "non trouvé": le serveur a vu + un candidat et l'a refusé pour une raison de qualité/zone. Refaire une + recherche OCR large côté client contournerait ce garde-fou. + """ + if not raw or raw.get("resolved"): + return False + + reason = str(raw.get("reason") or "") + method = str(raw.get("method") or "") + return ( + method.startswith("rejected_") + or reason.startswith("close_tab_") + or reason.startswith("drift_") + or "below_threshold" in reason + ) + def _window_crop_matches_target_visually( self, screenshot_b64: str, @@ -331,11 +360,31 @@ class GroundingEngine: cap_w = window_rect["width"] if window_rect else screen_width cap_h = window_rect["height"] if window_rect else screen_height + skip_text_fallback_after_server_reject = False for strategy in strategies: + if ( + strategy == "vlm_local" + and skip_text_fallback_after_server_reject + and target_spec.get("by_text") + ): + by_text = target_spec.get("by_text", "") + logger.info( + "[GROUNDING] Rejet serveur explicite pour '%s' — " + "skip fallback local hybrid_text_direct", + by_text, + ) + print( + f" [GROUNDING] Rejet serveur explicite pour '{by_text}' " + "→ pas de fallback texte local" + ) + continue + result = self._try_strategy( strategy, server_url, screenshot_b64, target_spec, fallback_x, fallback_y, cap_w, cap_h, ) + if strategy == "server" and self._server_rejects_text_fallback(result.raw): + skip_text_fallback_after_server_reject = True if result.found: # ── Conversion coords fenêtre → coords écran ── if window_rect: @@ -429,6 +478,14 @@ class GroundingEngine: detail=raw.get("matched_element", {}).get("label", ""), raw=raw, ) + if raw: + return GroundingResult( + found=False, + method=raw.get("method", "server"), + score=raw.get("score", 0.0), + detail=raw.get("reason", "server: pas trouvé"), + raw=raw, + ) elif strategy == "template": anchor_b64 = target_spec.get("anchor_image_base64", "") diff --git a/agent_v0/agent_v1/main.py b/agent_v0/agent_v1/main.py index 516eaa55d..2d13fb67c 100644 --- a/agent_v0/agent_v1/main.py +++ b/agent_v0/agent_v1/main.py @@ -121,10 +121,7 @@ class AgentV1: # Wiring ChatWindow → Executor pour Plan B (pause_message → bulle interactive) # Permet à l'executor d'afficher une bulle paused dans la fenêtre Léa V1 # quand le serveur signale replay_paused=True via /replay/next. - try: - self._executor._chat_window_ref = self._chat_window - except Exception: - logger.debug("Wiring chat_window→executor échoué (non bloquant)", exc_info=True) + self._wire_chat_window_to_executor() # Boucles permanentes (pas besoin de session active) self.running = True @@ -154,6 +151,15 @@ class AgentV1: shared_state=self._state, ) + def _wire_chat_window_to_executor(self) -> None: + """Relie l'executor courant à la ChatWindow pour les pauses supervisees.""" + if self._executor is None or self._chat_window is None: + return + try: + self._executor._chat_window_ref = self._chat_window + except Exception: + logger.debug("Wiring chat_window->executor echoue (non bloquant)", exc_info=True) + def _delayed_cleanup(self): """Nettoyage en arrière-plan après 30s pour ne pas bloquer le démarrage.""" time.sleep(30) @@ -224,6 +230,7 @@ class AgentV1: # Initialiser l'executeur partage self._executor = ActionExecutorV1() + self._wire_chat_window_to_executor() self.shot_counter = 0 self.running = True diff --git a/agent_v0/agent_v1/network/lea_orchestrator_client.py b/agent_v0/agent_v1/network/lea_orchestrator_client.py new file mode 100644 index 000000000..6fe79eba9 --- /dev/null +++ b/agent_v0/agent_v1/network/lea_orchestrator_client.py @@ -0,0 +1,147 @@ +""" +Client HTTP minimal pour l'orchestrateur Léa-first (agent-chat Linux). + +Rebranchement P1-LEA-SHADOW : le bouton "Apprenez-moi" côté Windows déclenche +la création d'une session d'apprentissage côté agent-chat (REST) AVANT de +lancer la capture locale. Le pipeline streaming (capture frames/événements +via start_recording) n'est PAS modifié — seule la prise de contact initiale +avec Léa change. + +Contrat : + POST {AGENT_CHAT_URL}/api/learn/start + Headers : Authorization: Bearer , Content-Type: application/json + Body : { machine_id, session_name, user_id?, trigger_source } + Réponse : { session_id, state, message } + +Politique : + - Timeout 10s (connect + read) + - Retry x2 avec backoff 0.5s puis 1.0s + - En cas d'échec définitif : lève LeaOrchestratorError (le caller doit + basculer en mode dégradé : start_recording local sans assistance). +""" + +from __future__ import annotations + +import logging +import time +from dataclasses import dataclass +from typing import Optional + +logger = logging.getLogger(__name__) + + +# Timeout HTTP (connect + read) — 10s comme spec +_HTTP_TIMEOUT_S = 10.0 +# Nombre de tentatives totales (1 + 2 retry) +_MAX_ATTEMPTS = 3 +# Backoff progressif entre les tentatives +_BACKOFF_S = (0.5, 1.0) + + +@dataclass(frozen=True) +class LearnStartResponse: + """Réponse normalisée de POST /api/learn/start.""" + + session_id: str + state: str + message: str + + +class LeaOrchestratorError(RuntimeError): + """Erreur définitive de communication avec l'orchestrateur Léa.""" + + +def start_learning_session( + base_url: str, + *, + machine_id: str, + session_name: str, + api_token: str = "", + user_id: Optional[str] = None, + trigger_source: str = "windows_button", + timeout_s: float = _HTTP_TIMEOUT_S, + max_attempts: int = _MAX_ATTEMPTS, + backoff_s: tuple = _BACKOFF_S, +) -> LearnStartResponse: + """Démarre une session d'apprentissage via l'orchestrateur agent-chat. + + Args: + base_url: URL racine de l'agent-chat (ex. http://localhost:5004). + machine_id: Identifiant unique du poste Windows. + session_name: Nom humain de la tâche (saisi par l'utilisateur). + api_token: Bearer token (RPA_API_TOKEN). Vide => header omis. + user_id: Identifiant utilisateur optionnel. + trigger_source: Source du déclenchement (windows_button, tray, ...). + timeout_s: Timeout total connect+read par tentative. + max_attempts: Nombre total de tentatives (1 + retry). + backoff_s: Tuple des délais en secondes entre tentatives (len = max_attempts-1). + + Returns: + LearnStartResponse normalisée. + + Raises: + LeaOrchestratorError: si toutes les tentatives échouent. + """ + # Import local : httpx peut ne pas être installé sur tous les postes + # Windows historiques. On veut un message d'erreur clair plutôt qu'un + # ImportError en chaîne au moment du clic bouton. + try: + import httpx + except ImportError as exc: # pragma: no cover (dépend du venv) + raise LeaOrchestratorError( + "httpx non disponible — installer httpx>=0.27 sur le poste Windows." + ) from exc + + url = base_url.rstrip("/") + "/api/learn/start" + payload = { + "machine_id": machine_id, + "session_name": session_name, + "trigger_source": trigger_source, + } + if user_id: + payload["user_id"] = user_id + + headers = {"Content-Type": "application/json"} + if api_token: + headers["Authorization"] = f"Bearer {api_token}" + + last_exc: Optional[Exception] = None + for attempt in range(max_attempts): + try: + logger.info( + "POST %s (tentative %d/%d) machine_id=%s session=%s", + url, attempt + 1, max_attempts, machine_id, session_name, + ) + with httpx.Client(timeout=timeout_s) as client: + resp = client.post(url, json=payload, headers=headers) + resp.raise_for_status() + data = resp.json() + session_id = data.get("session_id", "") + state = data.get("state", "") + message = data.get("message", "") + if not session_id: + raise LeaOrchestratorError( + f"Réponse invalide (pas de session_id) : {data!r}" + ) + logger.info( + "Session Léa démarrée : session_id=%s state=%s", + session_id, state, + ) + return LearnStartResponse( + session_id=str(session_id), + state=str(state), + message=str(message), + ) + except Exception as exc: # noqa: BLE001 — on retry sur toute erreur réseau/HTTP + last_exc = exc + logger.warning( + "Echec tentative %d/%d POST %s : %s", + attempt + 1, max_attempts, url, exc, + ) + if attempt < max_attempts - 1: + delay = backoff_s[attempt] if attempt < len(backoff_s) else backoff_s[-1] + time.sleep(delay) + + raise LeaOrchestratorError( + f"Echec définitif POST {url} après {max_attempts} tentatives : {last_exc}" + ) diff --git a/agent_v0/agent_v1/network/streamer.py b/agent_v0/agent_v1/network/streamer.py index 382a6419e..a05c3dd39 100644 --- a/agent_v0/agent_v1/network/streamer.py +++ b/agent_v0/agent_v1/network/streamer.py @@ -63,8 +63,14 @@ JPEG_QUALITY = 85 # Taille max de la queue (backpressure) QUEUE_MAX_SIZE = 100 -# Types d'événements à ne jamais dropper -PRIORITY_EVENT_TYPES = {"click", "key", "scroll", "action", "screenshot"} +# Types d'événements à ne jamais dropper. +# Les noms historiques sont conservés, mais les événements réels du captor +# Agent V1 sont mouse_click/key_combo/text_input/mouse_scroll. +PRIORITY_EVENT_TYPES = { + "click", "key", "scroll", "action", "screenshot", + "mouse_click", "double_click", "key_combo", "key_press", + "text_input", "mouse_scroll", +} # Purge locale après ACK serveur (Partie A de l'audit) # Activé par défaut : le serveur conserve déjà les screenshots 180 jours diff --git a/agent_v0/agent_v1/ui/chat_window.py b/agent_v0/agent_v1/ui/chat_window.py index 8dff238bf..bb9805970 100644 --- a/agent_v0/agent_v1/ui/chat_window.py +++ b/agent_v0/agent_v1/ui/chat_window.py @@ -9,6 +9,7 @@ Tourne dans son propre thread daemon pour ne pas bloquer pystray. import logging import os +import math import threading import time from datetime import datetime @@ -121,7 +122,7 @@ def _tpl_done(payload: Dict[str, Any]) -> tuple: def _tpl_need_confirm(payload: Dict[str, Any]) -> tuple: action = payload.get("action") or {} desc = action.get("description") if isinstance(action, dict) else None - title = desc or "Validation requise" + title = desc or "J'attends ton accord avant de continuer" return ("?", ACTION_ICON_RUN, str(title)) @@ -867,11 +868,19 @@ class ChatWindow: pass except Exception: logger.debug("force-show chat_window silenced", exc_info=True) - # UX fix mai 2026 : repartir d'un chat vide pour focaliser - # l'attention sur la question (clear visuel uniquement, - # self._messages reste intact pour la traçabilité debug). - self._clear_chat_history() - self._render_paused_bubble(payload) + try: + # UX fix mai 2026 : repartir d'un chat vide pour focaliser + # l'attention sur la question (clear visuel uniquement, + # self._messages reste intact pour la traçabilité debug). + self._clear_chat_history() + self._render_paused_bubble(payload) + except Exception: + logger.exception("render paused bubble failed; using fallback") + try: + self._clear_chat_history() + self._render_paused_fallback_bubble(payload) + except Exception: + logger.debug("render paused fallback silenced", exc_info=True) self._root.after(0, _show_and_render) @@ -895,7 +904,11 @@ class ChatWindow: logger.debug("clear chat history silenced", exc_info=True) @staticmethod - def _compute_paused_bubble_height(reason_str: str) -> tuple: + def _compute_paused_bubble_height( + reason_str: str, + chars_per_line: int = 52, + max_rows: int = 14, + ) -> tuple: """Calcule la hauteur du Text (en lignes) + si une scrollbar est nécessaire pour le message d'une bulle paused. @@ -910,11 +923,11 @@ class ChatWindow: if not reason_str: return 2, False text = str(reason_str) - # Estimation : ~60 chars/ligne effectifs avec wraplength. - wrapped_lines = (len(text) // 60) + 1 - explicit_lines = text.count("\n") + 1 - estimated = max(wrapped_lines, explicit_lines) - cap = 12 + chars_per_line = max(24, int(chars_per_line or 52)) + estimated = 0 + for raw_line in text.splitlines() or [""]: + estimated += max(1, math.ceil(len(raw_line) / chars_per_line)) + cap = max(2, int(max_rows or 14)) height = max(2, min(cap, estimated)) # Scrollbar dès que le cap est atteint OU contenu long (filet # textuel : ≥ 200 chars implique souvent un débordement visuel @@ -922,6 +935,46 @@ class ChatWindow: needs_scroll = (estimated >= cap) or (len(text) > 200) return height, needs_scroll + def _paused_text_layout(self) -> tuple: + """Retourne ``(wrap_px, chars_per_line, max_rows)`` pour la bulle pause. + + La fenêtre Léa est souvent redimensionnée à ~380px de large sur le + poste Windows. Les anciennes estimations fixes calculaient trop peu + de lignes et tronquaient le message. On part donc des dimensions + réelles du canvas et de la métrique de la police Tk. + """ + canvas_w = 0 + canvas_h = 0 + try: + canvas_w = int(self._canvas.winfo_width()) if self._canvas is not None else 0 + canvas_h = int(self._canvas.winfo_height()) if self._canvas is not None else 0 + except Exception: + canvas_w = canvas_h = 0 + + # Marges: container + padding inner + petite marge droite. La bulle + # de pause est une alerte critique, elle utilise donc presque toute + # la largeur disponible sur les fenêtres étroites. + wrap_px = max(220, canvas_w - (2 * MARGIN) - 52) if canvas_w else 360 + + avg_char = 8 + line_px = 22 + try: + from tkinter import font as tkfont + font = tkfont.Font(font=FONT_MSG) + avg_char = max(6, font.measure("n")) + line_px = max(18, font.metrics("linespace")) + except Exception: + pass + + chars_per_line = max(24, int(wrap_px / avg_char)) + # Réserver titre, metadata, boutons, feedback et padding. Même sur + # une petite fenêtre, on garde assez de lignes pour ne pas couper un + # message d'erreur standard. + max_rows = 14 + if canvas_h: + max_rows = max(5, min(18, int((canvas_h - 145) / line_px))) + return wrap_px, chars_per_line, max_rows + def _render_paused_bubble(self, payload: Dict[str, Any]) -> None: tk = self._tk if getattr(self, "_msg_frame", None) is None: @@ -941,7 +994,7 @@ class ChatWindow: container, bg=PAUSED_BG, padx=14, pady=12, highlightbackground=PAUSED_BORDER, highlightthickness=2, ) - inner.pack(anchor=tk.W, padx=(0, 50), fill=tk.X) + inner.pack(anchor=tk.W, padx=(0, 12), fill=tk.X) tk.Label( inner, text=f"⏸ Pause supervisée • {now}", @@ -949,31 +1002,44 @@ class ChatWindow: font=("Segoe UI", 12, "bold"), anchor="w", ).pack(fill=tk.X, anchor=tk.W) - # Message scrollable pour les longs reasons (ex: 200+ chars depuis le serveur). - # On utilise un Text en mode read-only avec hauteur calculée selon la longueur. - # Patch 22 mai 2026 : prendre en compte les \n explicites (titres - # fenêtre / patterns) et activer la scrollbar dès que le cap de - # hauteur est atteint — sinon les bulles de pause étaient - # tronquées visuellement sans aucun ascenseur visible. + # Message borné et scrollable : sur une fenêtre Léa étroite, une + # bulle trop haute fait disparaître le début du diagnostic hors du + # viewport. On garde donc la bulle compacte et on scrolle le texte. reason_str = str(reason) - height_lines, needs_scroll = self._compute_paused_bubble_height(reason_str) - msg_frame = tk.Frame(inner, bg=PAUSED_BG) - msg_frame.pack(fill=tk.X, anchor=tk.W, pady=(6, 0)) - reason_text = tk.Text( - msg_frame, bg=PAUSED_BG, fg=PAUSED_FG, - font=FONT_MSG, wrap=tk.WORD, bd=0, height=height_lines, - highlightthickness=0, relief=tk.FLAT, cursor="arrow", + _wrap_px, chars_per_line, max_rows = self._paused_text_layout() + text_rows, needs_text_scroll = self._compute_paused_bubble_height( + reason_str, + chars_per_line=chars_per_line, + max_rows=max_rows, ) - reason_text.insert("1.0", reason_str) - reason_text.configure(state="disabled") - reason_text.pack(side=tk.LEFT, fill=tk.X, expand=True) - if needs_scroll: - reason_scroll = tk.Scrollbar( - msg_frame, orient=tk.VERTICAL, - command=reason_text.yview, width=8, + text_frame = tk.Frame(inner, bg=PAUSED_BG) + text_frame.pack(fill=tk.X, anchor=tk.W, pady=(6, 0)) + reason_msg = tk.Text( + text_frame, + height=text_rows, + wrap=tk.WORD, + bg=PAUSED_BG, + fg=PAUSED_FG, + font=FONT_MSG, + bd=0, + highlightthickness=0, + relief=tk.FLAT, + padx=0, + pady=0, + cursor="arrow", + ) + reason_msg.insert("1.0", reason_str) + reason_msg.configure(state="disabled") + reason_msg.pack(side=tk.LEFT, fill=tk.X, expand=True) + if needs_text_scroll: + scrollbar = tk.Scrollbar( + text_frame, + orient=tk.VERTICAL, + command=reason_msg.yview, + width=12, ) - reason_text.configure(yscrollcommand=reason_scroll.set) - reason_scroll.pack(side=tk.RIGHT, fill=tk.Y) + reason_msg.configure(yscrollcommand=scrollbar.set) + scrollbar.pack(side=tk.RIGHT, fill=tk.Y, padx=(6, 0)) tk.Label( inner, text=f"{workflow} — étape {completed}/{total}", @@ -1018,6 +1084,89 @@ class ChatWindow: # Scroll automatique vers la nouvelle bulle (visible immédiatement) self._scroll_to_bottom() + def _render_paused_fallback_bubble(self, payload: Dict[str, Any]) -> None: + """Rendu minimal de secours si la bulle riche echoue.""" + tk = self._tk + if getattr(self, "_msg_frame", None) is None: + return + + replay_id = str(payload.get("replay_id", "") or "") + workflow = payload.get("workflow", "?") + reason = str( + payload.get("reason") + or "Action incertaine - j'ai besoin de votre validation." + ) + completed = payload.get("completed", 0) + total = payload.get("total", "?") + now = datetime.now().strftime("%H:%M") + + container = tk.Frame(self._msg_frame, bg=BG_COLOR) + container.pack(fill=tk.X, padx=MARGIN, pady=6) + + inner = tk.Frame( + container, bg=PAUSED_BG, padx=14, pady=12, + highlightbackground=PAUSED_BORDER, highlightthickness=2, + ) + inner.pack(anchor=tk.W, padx=(0, 12), fill=tk.X) + + tk.Label( + inner, text=f"Pause supervisee - {now}", + bg=PAUSED_BG, fg=PAUSED_FG, + font=("Segoe UI", 12, "bold"), anchor="w", + ).pack(fill=tk.X, anchor=tk.W) + + wrap_px = 360 + try: + if self._canvas is not None: + wrap_px = max(220, int(self._canvas.winfo_width()) - 80) + except Exception: + pass + tk.Label( + inner, text=reason, bg=PAUSED_BG, fg=PAUSED_FG, + font=FONT_MSG, wraplength=wrap_px, justify=tk.LEFT, + anchor=tk.W, + ).pack(fill=tk.X, anchor=tk.W, pady=(6, 0)) + + tk.Label( + inner, text=f"{workflow} - etape {completed}/{total}", + bg=PAUSED_BG, fg=TIMESTAMP_FG, font=FONT_TIMESTAMP, anchor="w", + ).pack(fill=tk.X, anchor=tk.W, pady=(4, 8)) + + btn_frame = tk.Frame(inner, bg=PAUSED_BG) + btn_frame.pack(fill=tk.X, anchor=tk.W) + + btn_resume = tk.Button( + btn_frame, text="Continuer", + bg=PAUSED_BTN_RESUME_BG, fg="white", font=FONT_QUICK_BTN, + padx=14, pady=4, bd=0, cursor="hand2", + activebackground=PAUSED_BTN_RESUME_HOVER, activeforeground="white", + command=lambda: self._on_paused_resume(replay_id), + ) + btn_resume.pack(side=tk.LEFT, padx=(0, 8)) + + btn_abort = tk.Button( + btn_frame, text="Annuler", + bg=PAUSED_BTN_ABORT_BG, fg="white", font=FONT_QUICK_BTN, + padx=14, pady=4, bd=0, cursor="hand2", + activebackground=PAUSED_BTN_ABORT_HOVER, activeforeground="white", + command=lambda: self._on_paused_abort(replay_id), + ) + btn_abort.pack(side=tk.LEFT) + + feedback_label = tk.Label( + inner, text="", bg=PAUSED_BG, fg=PAUSED_FG, + font=FONT_TIMESTAMP, anchor="w", + ) + feedback_label.pack(fill=tk.X, anchor=tk.W, pady=(6, 0)) + + self._active_paused_bubble = { + "container": container, "inner": inner, + "btn_resume": btn_resume, "btn_abort": btn_abort, + "feedback_label": feedback_label, + "replay_id": replay_id, + } + self._scroll_to_bottom() + def _close_active_paused_bubble(self, reason: str) -> None: if self._active_paused_bubble is None or self._root is None: return @@ -1524,8 +1673,19 @@ class ChatWindow: self._add_lea_message( f"C'est parti ! Montrez-moi comment faire \u00ab {name} \u00bb." ) + + # --- P1-LEA-SHADOW : d\u00e9clencher d'abord l'orchestrateur L\u00e9a Linux --- + # On contacte agent-chat AVANT la capture locale : si la session + # serveur d\u00e9marre, on r\u00e9cup\u00e8re un session_id + un message d'accueil + # de L\u00e9a qu'on affiche dans le chat. Si \u00e9chec : mode d\u00e9grad\u00e9 + # (capture locale uniquement, sans assistance conversationnelle). + self._start_lea_orchestrator_session(name) + + # --- Comportement historique pr\u00e9serv\u00e9 : capture locale --- + # Le pipeline streaming (frames/\u00e9v\u00e9nements) reste pilot\u00e9 par + # agent_v1 local. L'orchestrateur Linux ne touche PAS \u00e0 la + # capture, il pilote uniquement le dialogue de fin de session. try: - # Utiliser l'etat partage si disponible (synchronise le systray) if self._shared_state is not None: self._shared_state.start_recording(name) elif self._on_start_callback is not None: @@ -1533,6 +1693,60 @@ class ChatWindow: except Exception as e: self._add_lea_message(f"Oups, un probl\u00e8me : {e}") + def _start_lea_orchestrator_session(self, session_name: str) -> None: + """Appelle POST /api/learn/start c\u00f4t\u00e9 agent-chat Linux (P1-LEA-SHADOW). + + Fail-safe : toute erreur (config absente, httpx manquant, timeout, + 500 serveur...) bascule en mode d\u00e9grad\u00e9 sans bloquer la capture + locale. Un message clair est affich\u00e9 dans le chat. + """ + try: + from ..config import AGENT_CHAT_URL, API_TOKEN, MACHINE_ID + from ..network.lea_orchestrator_client import ( + LeaOrchestratorError, + start_learning_session, + ) + except Exception as exc: # pragma: no cover (import-time) + logger.error("Impossible de charger le client orchestrateur L\u00e9a : %s", exc) + self._add_lea_message( + "\u26a0 Impossible de joindre L\u00e9a serveur. " + "L'apprentissage continue localement, mais sans assistance " + "conversationnelle." + ) + return + + try: + resp = start_learning_session( + AGENT_CHAT_URL, + machine_id=MACHINE_ID, + session_name=session_name, + api_token=API_TOKEN, + trigger_source="windows_button", + ) + except LeaOrchestratorError as exc: + logger.error("Orchestrateur L\u00e9a injoignable : %s", exc) + self._add_lea_message( + "\u26a0 Impossible de joindre L\u00e9a serveur. " + "L'apprentissage continue localement, mais sans assistance " + "conversationnelle." + ) + return + except Exception as exc: # noqa: BLE001 \u2014 d\u00e9fensif + logger.exception("Erreur inattendue orchestrateur L\u00e9a") + self._add_lea_message( + f"\u26a0 Erreur orchestrateur L\u00e9a : {exc}. " + "L'apprentissage continue localement." + ) + return + + # Affichage du message d'accueil renvoy\u00e9 par L\u00e9a (si pr\u00e9sent) + if resp.message: + self._add_lea_message(resp.message) + logger.info( + "Session orchestrateur L\u00e9a OK : id=%s state=%s", + resp.session_id, resp.state, + ) + def _on_quick_tasks(self) -> None: """Bouton Lancer — demande ce que L\u00e9a sait faire.""" self._add_user_message("Qu'est-ce que vous savez faire ?") diff --git a/agent_v0/agent_v1/ui/message_contract.py b/agent_v0/agent_v1/ui/message_contract.py new file mode 100644 index 000000000..af3dea57a --- /dev/null +++ b/agent_v0/agent_v1/ui/message_contract.py @@ -0,0 +1,484 @@ +"""Contrat de lisibilite des messages visibles par l'humain. + +Ce module ne branche encore aucun point runtime. Il fournit une brique pure et +testable pour que les sorties UI de Lea puissent refuser les messages trop +generiques ou trop techniques avant affichage. +""" + +from __future__ import annotations + +import logging +import re +import unicodedata +from dataclasses import dataclass +from typing import Iterable, Mapping + + +logger = logging.getLogger(__name__) + +SUPERVISED_PAUSE_LABELS = ( + "J'essaie de", + "J'attendais", + "Je vois", + "Peux-tu", +) + +MAX_VISIBLE_MESSAGE_CHARS = 720 +MAX_FIELD_CHARS = 180 +MIN_FIELD_CHARS = 4 + +_GENERIC_PHRASES = ( + "un element", + "un élément", + "l'element", + "l'élément", + "element inconnu", + "élément inconnu", + "cette action", + "cette cible", + "cible inconnue", + "validation requise", + "action requise", +) + +_ACTIONABLE_FRENCH_HINTS = ( + "peux-tu", + "cliquer", + "ouvrir", + "selectionner", + "sélectionner", + "choisir", + "saisir", + "corriger", + "montrer", + "indiquer", + "valider", + "fermer", + "placer", + "mettre", + "reprendre", +) + +_TECHNICAL_ENGLISH_TERMS = ( + "target_not_found", + "target not found", + "no_screen_change", + "no screen change", + "wrong_window", + "wrong window", + "validation required", + "retry", + "fallback", + "timeout", + "screenshot", + "validator", + "failure", + "failed", + "resolve target", + "postcondition", + "please", + "click", + "button", + "target", + "expected", + "actual", + "observed", +) + +_TECHNICAL_FIELD_RE = re.compile( + r"\b(?:" + r"action_id|replay_id|session_id|workflow_id|machine_id|target_spec|" + r"vlm_description|resolution_method|resolution_score|retry_count|" + r"x_pct|y_pct|screenshot_b64|expected_window_title|current_action_index" + r")\b", + re.IGNORECASE, +) +_TECHNICAL_IDENTIFIER_RE = re.compile( + r"\b(?:action|replay|session|sess|workflow|node|edge|target|retry|" + r"precheck|wait|trace|event|machine|run)_[A-Za-z0-9][A-Za-z0-9_.:-]{3,}\b" +) +_UUID_RE = re.compile( + r"\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b", + re.IGNORECASE, +) +_LONG_HEX_RE = re.compile(r"\b[0-9a-f]{16,}\b", re.IGNORECASE) +_PIXEL_TUPLE_RE = re.compile(r"\(\s*\d{2,5}\s*,\s*\d{2,5}\s*\)") +_PIXEL_FIELD_RE = re.compile( + r"\b(?:x|y|left|top|width|height|w|h|x_pct|y_pct)\s*[=:]\s*-?\d+(?:[.,]\d+)?", + re.IGNORECASE, +) +_PX_RE = re.compile(r"\b\d{2,5}\s*px\b", re.IGNORECASE) +_SCORE_RE = re.compile( + r"\b(?:score|confidence|confiance|similarit[eé]|threshold|seuil|" + r"probabilit[eé])\s*[:=]\s*\d+(?:[.,]\d+)?%?\b", + re.IGNORECASE, +) + +@dataclass(frozen=True) +class MessageValidationIssue: + """Un probleme detecte dans un message visible par l'humain.""" + + code: str + detail: str + + +@dataclass(frozen=True) +class MessageValidationResult: + """Resultat de validation d'un message utilisateur.""" + + issues: tuple[MessageValidationIssue, ...] = () + + @property + def valid(self) -> bool: + return not self.issues + + def raise_for_errors(self) -> None: + if not self.valid: + raise MessageContractError(self) + + +class MessageContractError(ValueError): + """Erreur levee quand un message ne respecte pas le contrat humain.""" + + def __init__(self, result: MessageValidationResult): + self.result = result + details = "; ".join(f"{issue.code}: {issue.detail}" for issue in result.issues) + super().__init__(f"Message humain invalide: {details}") + + +@dataclass(frozen=True) +class SupervisedPauseFields: + """Champs obligatoires pour expliquer une pause supervisee.""" + + intention: str + attendu: str + vu: str + demande: str + + +DEFAULT_SUPERVISED_PAUSE_FIELDS = SupervisedPauseFields( + intention="continuer une etape supervisee", + attendu="un accord humain clair avant de continuer", + vu="je suis sur une etape qui demande une verification humaine", + demande="indiquer si je peux continuer ou corriger l'action attendue", +) + + +def format_supervised_pause_message( + *, + intention: str, + attendu: str, + vu: str, + demande: str, +) -> str: + """Formatter une pause supervisee claire et actionnable. + + Le message retourne exactement quatre lignes. Si un champ reste vague ou + technique, la fonction leve ``MessageContractError`` au lieu de produire un + message degradant pour l'utilisateur. + """ + + fields = SupervisedPauseFields( + intention=_one_line(intention), + attendu=_one_line(attendu), + vu=_one_line(vu), + demande=_one_line(demande), + ) + message = "\n".join( + ( + f"J'essaie de : {fields.intention}", + f"J'attendais : {fields.attendu}", + f"Je vois : {fields.vu}", + f"Peux-tu : {fields.demande}", + ) + ) + validate_supervised_pause_message(message).raise_for_errors() + return message + + +def format_supervised_pause_from_mapping(payload: Mapping[str, object]) -> str: + """Formatter depuis un mapping runtime avec noms de champs explicites. + + Alias acceptes pour faciliter l'integration progressive: + ``intention|trying_to``, ``attendu|expected``, ``vu|observed``, + ``demande|request``. + """ + + return format_supervised_pause_message( + intention=_mapping_text(payload, "intention", "trying_to"), + attendu=_mapping_text(payload, "attendu", "expected"), + vu=_mapping_text(payload, "vu", "observed"), + demande=_mapping_text(payload, "demande", "request"), + ) + + +def coerce_supervised_pause_message( + message: object = "", + *, + intention: object = "", + attendu: object = "", + vu: object = "", + demande: object = "", +) -> str: + """Retourner une pause supervisee valide, meme depuis un ancien message. + + Si ``message`` respecte deja le contrat strict, il est conserve. Sinon on + compose les quatre champs avec les valeurs explicites disponibles. Les + valeurs trop vagues ou techniques sont remplacees par des fallbacks clairs. + """ + + raw_message = _one_line(message) + if raw_message and validate_supervised_pause_message(raw_message).valid: + return raw_message + + defaults = DEFAULT_SUPERVISED_PAUSE_FIELDS + candidates = SupervisedPauseFields( + intention=_safe_field_text(intention, defaults.intention), + attendu=_safe_field_text(attendu, defaults.attendu), + vu=_safe_field_text(vu, defaults.vu), + demande=_safe_field_text(demande or raw_message, defaults.demande), + ) + + try: + return format_supervised_pause_message( + intention=candidates.intention, + attendu=candidates.attendu, + vu=candidates.vu, + demande=candidates.demande, + ) + except MessageContractError: + return format_supervised_pause_message( + intention=defaults.intention, + attendu=defaults.attendu, + vu=defaults.vu, + demande=defaults.demande, + ) + + +def warn_visible_message( + message: object, + *, + source: str, + supervised_pause: bool = False, +) -> str: + """Log contract violations without modifying the visible message.""" + + text = str(message or "") + validator = validate_supervised_pause_message if supervised_pause else validate_visible_message + result = validator(text) + if not result.valid: + logger.warning( + "[message_contract] invalid_message source=%s codes=%s", + source, + [issue.code for issue in result.issues], + ) + return text + + +def validate_supervised_pause_message(message: str) -> MessageValidationResult: + """Valider le contrat strict d'une pause supervisee.""" + + issues = list(validate_visible_message(message).issues) + fields, structure_issues = _parse_supervised_pause(message) + issues.extend(structure_issues) + + if fields: + for name, value in fields.items(): + if len(value) < MIN_FIELD_CHARS: + issues.append( + MessageValidationIssue( + "field_too_short", + f"{name} doit etre explicite", + ) + ) + if len(value) > MAX_FIELD_CHARS: + issues.append( + MessageValidationIssue( + "field_too_long", + f"{name} depasse {MAX_FIELD_CHARS} caracteres", + ) + ) + demande = fields.get("demande", "") + if not _contains_actionable_french(demande) or len(demande.split()) < 4: + issues.append( + MessageValidationIssue( + "not_actionable", + "la demande doit contenir une action concrete en francais", + ) + ) + + return _dedupe_issues(issues) + + +def validate_visible_message(message: str) -> MessageValidationResult: + """Valider qu'un message visible n'est ni generique ni technique.""" + + text = str(message or "").strip() + issues: list[MessageValidationIssue] = [] + + if not text: + return MessageValidationResult( + (MessageValidationIssue("empty_message", "message vide"),) + ) + + if len(text) > MAX_VISIBLE_MESSAGE_CHARS: + issues.append( + MessageValidationIssue( + "message_too_long", + f"message au-dela de {MAX_VISIBLE_MESSAGE_CHARS} caracteres", + ) + ) + + folded = _fold(text) + seen_generic_phrases: set[str] = set() + for phrase in _GENERIC_PHRASES: + folded_phrase = _fold(phrase) + if folded_phrase in seen_generic_phrases: + continue + seen_generic_phrases.add(folded_phrase) + if folded_phrase in folded: + issues.append( + MessageValidationIssue( + "generic_phrase", + f"formulation trop generique: {phrase}", + ) + ) + + for term in _TECHNICAL_ENGLISH_TERMS: + if _fold(term) in folded: + issues.append( + MessageValidationIssue( + "technical_english", + f"anglais technique visible: {term}", + ) + ) + + for code, pattern, detail in ( + ("technical_field", _TECHNICAL_FIELD_RE, "champ technique brut"), + ("technical_identifier", _TECHNICAL_IDENTIFIER_RE, "identifiant technique brut"), + ("technical_identifier", _UUID_RE, "UUID brut"), + ("technical_identifier", _LONG_HEX_RE, "hash technique brut"), + ("raw_coordinates", _PIXEL_TUPLE_RE, "coordonnees pixel brutes"), + ("raw_coordinates", _PIXEL_FIELD_RE, "coordonnees techniques brutes"), + ("raw_coordinates", _PX_RE, "coordonnees pixel brutes"), + ("raw_score", _SCORE_RE, "score ou confiance brut"), + ): + if pattern.search(text): + issues.append(MessageValidationIssue(code, detail)) + + return _dedupe_issues(issues) + + +def is_valid_visible_message(message: str) -> bool: + """Raccourci booleen pour les points d'integration UI.""" + + return validate_visible_message(message).valid + + +def is_valid_supervised_pause_message(message: str) -> bool: + """Raccourci booleen pour les pauses supervisees.""" + + return validate_supervised_pause_message(message).valid + + +def _parse_supervised_pause( + message: str, +) -> tuple[dict[str, str], list[MessageValidationIssue]]: + lines = [line.rstrip() for line in str(message or "").splitlines() if line.strip()] + issues: list[MessageValidationIssue] = [] + + if len(lines) != 4: + issues.append( + MessageValidationIssue( + "invalid_structure", + "une pause supervisee doit contenir exactement 4 lignes", + ) + ) + return {}, issues + + specs = ( + ("intention", r"^J'essaie de\s*:\s*(.+)$"), + ("attendu", r"^J'attendais\s*:\s*(.+)$"), + ("vu", r"^Je vois\s*:\s*(.+)$"), + ("demande", r"^Peux-tu\s*:\s*(.+)$"), + ) + fields: dict[str, str] = {} + for line, (name, pattern) in zip(lines, specs): + match = re.match(pattern, line) + if not match: + issues.append( + MessageValidationIssue( + "invalid_structure", + f"ligne {len(fields) + 1} doit commencer par {SUPERVISED_PAUSE_LABELS[len(fields)]}", + ) + ) + continue + fields[name] = match.group(1).strip() + + if len(fields) != 4: + return {}, issues + + return fields, issues + + +def _contains_actionable_french(text: str) -> bool: + folded = _fold(text) + return any(_fold(hint) in folded for hint in _ACTIONABLE_FRENCH_HINTS) + + +def _one_line(value: object) -> str: + return re.sub(r"\s+", " ", str(value or "")).strip() + + +def _mapping_text(payload: Mapping[str, object], *keys: str) -> str: + for key in keys: + value = payload.get(key) + if value is not None: + return str(value) + return "" + + +def _safe_field_text(value: object, fallback: str) -> str: + text = _one_line(value) + if len(text) < MIN_FIELD_CHARS or len(text) > MAX_FIELD_CHARS: + return fallback + if not validate_visible_message(text).valid: + return fallback + return text + + +def _fold(text: str) -> str: + normalized = unicodedata.normalize("NFKD", str(text or "")) + ascii_text = "".join(ch for ch in normalized if not unicodedata.combining(ch)) + return ascii_text.casefold() + + +def _dedupe_issues(issues: Iterable[MessageValidationIssue]) -> MessageValidationResult: + seen: set[tuple[str, str]] = set() + deduped: list[MessageValidationIssue] = [] + for issue in issues: + key = (issue.code, issue.detail) + if key in seen: + continue + seen.add(key) + deduped.append(issue) + return MessageValidationResult(tuple(deduped)) + + +__all__ = [ + "MAX_FIELD_CHARS", + "MAX_VISIBLE_MESSAGE_CHARS", + "MessageContractError", + "MessageValidationIssue", + "MessageValidationResult", + "SUPERVISED_PAUSE_LABELS", + "SupervisedPauseFields", + "coerce_supervised_pause_message", + "format_supervised_pause_from_mapping", + "format_supervised_pause_message", + "is_valid_supervised_pause_message", + "is_valid_visible_message", + "validate_supervised_pause_message", + "validate_visible_message", + "warn_visible_message", +] diff --git a/agent_v0/agent_v1/ui/messages.py b/agent_v0/agent_v1/ui/messages.py index bf38e2be5..a7f375039 100644 --- a/agent_v0/agent_v1/ui/messages.py +++ b/agent_v0/agent_v1/ui/messages.py @@ -82,6 +82,12 @@ ICONE_PAR_NIVEAU: dict[NiveauMessage, str] = { NiveauMessage.BLOCAGE: "?", } +# Les pauses supervisees peuvent contenir une raison precise, parfois longue +# (fenetre observee, fenetre attendue, action en cours). On garde l'information +# utile et on laisse les widgets UI gerer le wrap/scroll. +MAX_TARGET_DESCRIPTION_CHARS = 1024 +MAX_GENERIC_TECHNICAL_MESSAGE_CHARS = 1024 + @dataclass class MessageUtilisateur: @@ -147,9 +153,9 @@ def _nettoyer_description_cible(description: str) -> str: desc = description.strip() # Retirer les guillemets encapsulants desc = desc.strip("'\"`") - # Limiter la longueur - if len(desc) > 80: - desc = desc[:77] + "..." + # Limiter la longueur sans perdre les details utiles a la supervision. + if len(desc) > MAX_TARGET_DESCRIPTION_CHARS: + desc = desc[: MAX_TARGET_DESCRIPTION_CHARS - 3] + "..." return desc @@ -566,8 +572,8 @@ def formatter_erreur_generique( # Fallback : message technique tronqué msg_tronque = message_technique.strip() - if len(msg_tronque) > 120: - msg_tronque = msg_tronque[:117] + "..." + if len(msg_tronque) > MAX_GENERIC_TECHNICAL_MESSAGE_CHARS: + msg_tronque = msg_tronque[: MAX_GENERIC_TECHNICAL_MESSAGE_CHARS - 3] + "..." return MessageUtilisateur( niveau=NiveauMessage.ATTENTION, diff --git a/agent_v0/agent_v1/ui/smart_tray.py b/agent_v0/agent_v1/ui/smart_tray.py index e158e101c..4e356ceff 100644 --- a/agent_v0/agent_v1/ui/smart_tray.py +++ b/agent_v0/agent_v1/ui/smart_tray.py @@ -371,7 +371,13 @@ class SmartTrayV1: ) if name and name.strip(): name = name.strip() - # Utiliser l'etat partage si disponible + + # --- P1-LEA-SHADOW : d\u00e9clencher d'abord l'orchestrateur L\u00e9a Linux --- + # On contacte agent-chat AVANT la capture locale. Si \u00e9chec, + # bascule en mode d\u00e9grad\u00e9 (capture locale sans assistance). + self._start_lea_orchestrator_session(name) + + # --- Comportement historique pr\u00e9serv\u00e9 : capture locale --- if self._shared_state is not None: try: self._shared_state.start_recording(name) @@ -393,6 +399,55 @@ class SmartTrayV1: threading.Thread(target=_dialog, daemon=True).start() + def _start_lea_orchestrator_session(self, session_name: str) -> None: + """Appelle POST /api/learn/start côté agent-chat Linux (P1-LEA-SHADOW). + + Fail-safe : toute erreur (config absente, httpx manquant, timeout, + 5xx serveur...) bascule en mode dégradé sans bloquer la capture + locale. L'utilisateur est informé via le NotificationManager. + """ + try: + from ..config import AGENT_CHAT_URL, API_TOKEN, MACHINE_ID + from ..network.lea_orchestrator_client import ( + LeaOrchestratorError, + start_learning_session, + ) + except Exception as exc: # pragma: no cover (import-time) + logger.error("Impossible de charger le client orchestrateur Léa : %s", exc) + self._notifier.notify( + "Léa", + "Serveur injoignable — apprentissage local uniquement.", + ) + return + + try: + resp = start_learning_session( + AGENT_CHAT_URL, + machine_id=MACHINE_ID, + session_name=session_name, + api_token=API_TOKEN, + trigger_source="tray_button", + ) + except LeaOrchestratorError as exc: + logger.error("Orchestrateur Léa injoignable : %s", exc) + self._notifier.notify( + "Léa", + "Serveur injoignable — apprentissage local uniquement.", + ) + return + except Exception: # noqa: BLE001 — défensif + logger.exception("Erreur inattendue orchestrateur Léa") + self._notifier.notify( + "Léa", + "Erreur orchestrateur — apprentissage local uniquement.", + ) + return + + logger.info( + "Session orchestrateur Léa OK : id=%s state=%s", + resp.session_id, resp.state, + ) + def _on_stop_session(self, _icon=None, _item=None) -> None: """Termine la session en cours et envoie les donnees.""" count = self.actions_count diff --git a/agent_v0/deploy/windows_client/agent_v1/core/captor.py b/agent_v0/deploy/windows_client/agent_v1/core/captor.py index 9d3244727..be8f759f7 100644 --- a/agent_v0/deploy/windows_client/agent_v1/core/captor.py +++ b/agent_v0/deploy/windows_client/agent_v1/core/captor.py @@ -43,6 +43,9 @@ class EventCaptorV1: # État des touches modificatrices self.modifiers = set() + self._pending_standalone_win = False + self._suppress_release_only_win_combo = False + self._raw_key_buffer: List[Dict[str, Any]] = [] # Tracking du focus fenêtre self.last_window = None @@ -91,6 +94,7 @@ class EventCaptorV1: # Flush du buffer texte restant avant arrêt self._flush_text_buffer() # Annuler le timer s'il est en cours + emit_escape = False with self._text_lock: if self._text_flush_timer is not None: self._text_flush_timer.cancel() @@ -159,7 +163,80 @@ class EventCaptorV1: # Clavier # ---------------------------------------------------------------- + @staticmethod + def _get_key_name(key) -> Optional[str]: + """Convertit un objet pynput Key/KeyCode en nom lisible.""" + if isinstance(key, KeyCode): + return key.char if key.char else None + if isinstance(key, Key): + return key.name + return str(key) + + @staticmethod + def _encode_key(key) -> Dict[str, Any]: + if isinstance(key, KeyCode): + return {"kind": "vk", "vk": key.vk, "char": key.char} + if isinstance(key, Key): + return {"kind": "key", "name": key.name} + return {"kind": "unknown", "str": str(key)} + + @staticmethod + def _raw_key_name(raw_key: Dict[str, Any]) -> Optional[str]: + if raw_key.get("kind") == "vk": + char = raw_key.get("char") + if char and len(str(char)) == 1: + return str(char).lower() + if raw_key.get("kind") == "key": + name = raw_key.get("name") + return str(name).lower() if name else None + return None + + def _emit_release_only_windows_combo(self) -> bool: + """Infère Win+ quand seuls les releases sont capturés.""" + with self._text_lock: + raw_keys = list(getattr(self, "_raw_key_buffer", [])) + if len(raw_keys) < 2: + return False + cmd_names = {"cmd", "cmd_l", "cmd_r"} + last = raw_keys[-1] + if last.get("action") != "release" or self._raw_key_name(last) not in cmd_names: + return False + combo_key = None + modifier_names = { + "ctrl", "ctrl_l", "ctrl_r", + "alt", "alt_l", "alt_r", + "shift", "shift_l", "shift_r", + "cmd", "cmd_l", "cmd_r", + } + for raw in reversed(raw_keys[:-1]): + if raw.get("action") != "release": + continue + name = self._raw_key_name(raw) + if name and name not in modifier_names: + combo_key = name + break + if not combo_key: + return False + self._raw_key_buffer.clear() + + event = { + "type": "key_combo", + "keys": ["win", combo_key], + "raw_keys": raw_keys, + "timestamp": time.time(), + } + self.on_event(event) + return True + def _on_press(self, key): + with self._text_lock: + if not hasattr(self, "_raw_key_buffer"): + self._raw_key_buffer = [] + self._raw_key_buffer.append({ + "action": "press", + **self._encode_key(key), + }) + # Gestion des touches modificatrices if key in (Key.ctrl, Key.ctrl_l, Key.ctrl_r): self.modifiers.add("ctrl") @@ -167,15 +244,26 @@ class EventCaptorV1: self.modifiers.add("alt") elif key in (Key.shift, Key.shift_l, Key.shift_r): self.modifiers.add("shift") + elif key in (Key.cmd, Key.cmd_l, Key.cmd_r): + self.modifiers.add("win") + self._pending_standalone_win = True # --- Combos avec modificateur (sauf Shift seul) --- # Shift seul n'est pas un « vrai » modificateur pour les combos : # Shift+a = 'A' = saisie texte, pas un raccourci. - # On considère un combo seulement si Ctrl ou Alt est enfoncé. - has_real_modifier = self.modifiers & {"ctrl", "alt"} + # On considère un combo seulement si Ctrl, Alt ou Win est enfoncé. + has_real_modifier = self.modifiers & {"ctrl", "alt", "win"} if has_real_modifier: key_name = self._get_key_name(key) - if key_name and key_name not in ("ctrl", "alt", "shift"): + if key_name and key_name not in ( + "ctrl", "ctrl_l", "ctrl_r", + "alt", "alt_l", "alt_r", + "shift", "shift_l", "shift_r", + "cmd", "cmd_l", "cmd_r", + ): + self._pending_standalone_win = False + if "win" in self.modifiers: + self._suppress_release_only_win_combo = True # Un combo interrompt la saisie texte en cours self._flush_text_buffer() event = { @@ -205,14 +293,18 @@ class EventCaptorV1: self._reset_flush_timer() return - if key == Key.escape: + escape_keys = [Key.esc] + key_escape = getattr(Key, "escape", None) + if key_escape is not None: + escape_keys.append(key_escape) + if key in escape_keys: # Annuler la saisie en cours self._text_buffer.clear() self._text_start_pos = None self._cancel_flush_timer() - return + emit_escape = True - if key in (Key.enter, Key.tab): + elif key in (Key.enter, Key.tab): # Flush immédiat — on relâche le lock avant d'appeler # _flush_text_buffer (qui prend aussi le lock) pass # on sort du with et on flush après @@ -238,6 +330,15 @@ class EventCaptorV1: # Touche spéciale non gérée (F1, Insert, etc.) — on ignore return + if emit_escape: + event = { + "type": "key_combo", + "keys": ["escape"], + "timestamp": time.time(), + } + self.on_event(event) + return + # Si on arrive ici, c'est Enter ou Tab → flush immédiat self._flush_text_buffer() @@ -290,12 +391,46 @@ class EventCaptorV1: self.on_event(event) def _on_release(self, key): + with self._text_lock: + self._raw_key_buffer.append({ + "action": "release", + **self._encode_key(key), + }) + + if key in (Key.cmd, Key.cmd_l, Key.cmd_r) and self._suppress_release_only_win_combo: + with self._text_lock: + self._raw_key_buffer.clear() + self._pending_standalone_win = False + self._suppress_release_only_win_combo = False + self.modifiers.discard("win") + return + + if key in (Key.cmd, Key.cmd_l, Key.cmd_r) and self._emit_release_only_windows_combo(): + self._pending_standalone_win = False + self._suppress_release_only_win_combo = False + self.modifiers.discard("win") + return + + if key in (Key.cmd, Key.cmd_l, Key.cmd_r) and self._pending_standalone_win: + event = { + "type": "key_combo", + "keys": ["win"], + "timestamp": time.time(), + } + self.on_event(event) + self._pending_standalone_win = False + self._suppress_release_only_win_combo = False + if key in (Key.ctrl, Key.ctrl_l, Key.ctrl_r): self.modifiers.discard("ctrl") elif key in (Key.alt, Key.alt_l, Key.alt_r): self.modifiers.discard("alt") elif key in (Key.shift, Key.shift_l, Key.shift_r): self.modifiers.discard("shift") + elif key in (Key.cmd, Key.cmd_l, Key.cmd_r): + self.modifiers.discard("win") + self._pending_standalone_win = False + self._suppress_release_only_win_combo = False def _watch_window_focus(self): """Surveille proactivement le changement de fenêtre pour le stagiaire.""" diff --git a/core/execution/input_handler.py b/core/execution/input_handler.py index 30ffdcaa1..b06c77e92 100644 --- a/core/execution/input_handler.py +++ b/core/execution/input_handler.py @@ -171,13 +171,17 @@ def handle_detected_pattern(pattern: Dict[str, Any]) -> bool: screenshot = sct.grab(monitor) screen = Image.frombytes('RGB', screenshot.size, screenshot.bgra, 'raw', 'BGRX') - # EasyOCR (rapide, bonne qualité GUI) avec fallback docTR. - # gpu=True : harmonisé avec dialog_handler.py et title_verifier.py. - # Coût VRAM ~0.5 GB, sous le budget RTX 5070 (cf. deploy/VRAM_BUDGET.md). + # EasyOCR (bonne qualité GUI) avec fallback docTR. Par défaut CPU : + # le replay server réserve la VRAM à Ollama. words = [] try: import easyocr - _reader = easyocr.Reader(['fr', 'en'], gpu=True, verbose=False) + from core.llm.ocr_extractor import easyocr_gpu_enabled + _reader = easyocr.Reader( + ['fr', 'en'], + gpu=easyocr_gpu_enabled(default=False), + verbose=False, + ) results = _reader.readtext(np.array(screen)) for (bbox_pts, text, conf) in results: if not text or len(text.strip()) < 1: diff --git a/core/grounding/dialog_handler.py b/core/grounding/dialog_handler.py index 2f28d76e4..501970d0b 100644 --- a/core/grounding/dialog_handler.py +++ b/core/grounding/dialog_handler.py @@ -248,8 +248,10 @@ class DialogHandler: try: import easyocr + from core.llm.ocr_extractor import easyocr_gpu_enabled + gpu = easyocr_gpu_enabled(default=False) self._easyocr_reader = easyocr.Reader( - ['fr', 'en'], gpu=True, verbose=False + ['fr', 'en'], gpu=gpu, verbose=False ) return self._easyocr_reader except ImportError: diff --git a/core/grounding/fast_detector.py b/core/grounding/fast_detector.py index 63e39a2e5..a03542b57 100644 --- a/core/grounding/fast_detector.py +++ b/core/grounding/fast_detector.py @@ -144,19 +144,21 @@ class FastDetector: _easyocr_reader = None # Singleton EasyOCR (chargé une fois) def _ocr_extract(self, image) -> List[Dict[str, Any]]: - """Extrait les mots visibles via EasyOCR (GPU, ~500ms). + """Extrait les mots visibles via EasyOCR. Fallback sur docTR si EasyOCR non disponible. """ try: import numpy as np import easyocr + from core.llm.ocr_extractor import easyocr_gpu_enabled # Singleton : charger le reader une seule fois if FastDetector._easyocr_reader is None: - print(f"🔍 [FAST/ocr] Chargement EasyOCR (GPU)...") + gpu = easyocr_gpu_enabled(default=False) + print(f"🔍 [FAST/ocr] Chargement EasyOCR ({'GPU' if gpu else 'CPU'})...") FastDetector._easyocr_reader = easyocr.Reader( - ['fr', 'en'], gpu=True, verbose=False + ['fr', 'en'], gpu=gpu, verbose=False ) results = FastDetector._easyocr_reader.readtext(np.array(image)) diff --git a/core/grounding/title_verifier.py b/core/grounding/title_verifier.py index 3a87e7169..628a8796d 100644 --- a/core/grounding/title_verifier.py +++ b/core/grounding/title_verifier.py @@ -148,10 +148,16 @@ class TitleVerifier: try: import easyocr import numpy as np + from core.llm.ocr_extractor import easyocr_gpu_enabled if TitleVerifier._easyocr_reader is None: + gpu = easyocr_gpu_enabled(default=False) TitleVerifier._easyocr_reader = easyocr.Reader( - ['fr', 'en'], gpu=True, verbose=False + ['fr', 'en'], gpu=gpu, verbose=False + ) + logger.info( + "TitleVerifier EasyOCR initialisé (fr+en, %s)", + "GPU" if gpu else "CPU", ) def _easyocr_extract_text(img): diff --git a/tests/conftest.py b/tests/conftest.py index 7f407ce2f..d3ea8c375 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,6 +12,7 @@ Ce fichier garantit que: - Le GPU est vérifié avant les tests qui en ont besoin """ import sys +import types from pathlib import Path import pytest @@ -31,6 +32,42 @@ except ImportError as e: print(f" sys.path: {sys.path[:3]}...") +# Certains tests HTTP d'agent_chat n'ont pas besoin du transport SocketIO reel. +# Le service de production garde Flask-SocketIO comme dependance, mais l'env de +# test local peut etre minimal. On fournit alors un shim strictement pytest. +try: + import flask_socketio # noqa: F401 +except ModuleNotFoundError: + flask_socketio = types.ModuleType("flask_socketio") + + class _FakeSocketIO: + def __init__(self, app=None, *args, **kwargs): + self.app = app + self.args = args + self.kwargs = kwargs + self.handlers = {} + self.emitted = [] + + def on(self, event): + def decorator(func): + self.handlers[event] = func + return func + return decorator + + def emit(self, event, payload=None, **kwargs): + self.emitted.append((event, payload, kwargs)) + + def run(self, *args, **kwargs): + return None + + def _fake_emit(*_args, **_kwargs): + return None + + flask_socketio.SocketIO = _FakeSocketIO + flask_socketio.emit = _fake_emit + sys.modules["flask_socketio"] = flask_socketio + + # ============================================================================= # GPU Preflight — vérification avant les tests GPU # ============================================================================= @@ -61,4 +98,4 @@ def _gpu_preflight_check(request): max_gpu_util_percent=max_util, ) if not result.ready: - pytest.skip(f"GPU pas prêt : {result.reason}") \ No newline at end of file + pytest.skip(f"GPU pas prêt : {result.reason}") diff --git a/tests/integration/test_agent_chat_learn_action_integration.py b/tests/integration/test_agent_chat_learn_action_integration.py new file mode 100644 index 000000000..c0e503a88 --- /dev/null +++ b/tests/integration/test_agent_chat_learn_action_integration.py @@ -0,0 +1,254 @@ +"""Tests integration pour agent_chat.handlers.learn_action. + +Mocks HTTP uniquement — pas de lancement du streaming server réel. +""" + +from __future__ import annotations + +import json +from unittest.mock import MagicMock, patch + +import pytest + +from agent_chat.handlers.learn_action import ( + LearnActionOrchestrator, + LearnIntentParser, + LearnState, + StateStore, + StreamingClient, +) + + +@pytest.fixture +def fake_http_client(): + """Mock httpx.Client (méthode request).""" + client = MagicMock() + return client + + +def _mk_response(status: int = 200, body: dict | None = None): + resp = MagicMock() + resp.status_code = status + resp.json.return_value = body or {} + resp.text = json.dumps(body or {}) + return resp + + +class TestStreamingClient: + def test_shadow_start_calls_correct_endpoint(self, fake_http_client): + fake_http_client.request.return_value = _mk_response(200, {"ok": True}) + client = StreamingClient( + base_url="http://stream:5005", + token="abc", + http_client=fake_http_client, + retries=0, + ) + out = client.shadow_start("sid_xyz", user_id="dom") + assert out == {"ok": True} + call = fake_http_client.request.call_args + assert call[0][0] == "POST" + assert call[0][1] == "http://stream:5005/api/v1/shadow/start" + assert call[1]["json"]["session_id"] == "sid_xyz" + assert "Authorization" in call[1]["headers"] + assert call[1]["headers"]["Authorization"] == "Bearer abc" + + def test_retry_on_failure(self, fake_http_client): + # 1ere tentative : exception, 2eme : succès + fake_http_client.request.side_effect = [ + Exception("conn refused"), + _mk_response(200, {"ok": True}), + ] + client = StreamingClient( + base_url="http://stream:5005", + token="", + http_client=fake_http_client, + retries=1, + ) + out = client.shadow_stop("sid") + assert out == {"ok": True} + assert fake_http_client.request.call_count == 2 + + def test_retry_exhausted_raises(self, fake_http_client): + fake_http_client.request.side_effect = Exception("boom") + client = StreamingClient( + base_url="http://stream:5005", + token="", + http_client=fake_http_client, + retries=2, + ) + with pytest.raises(RuntimeError, match="unreachable"): + client.shadow_stop("sid") + + +class TestFullFlowIntegration: + def test_end_to_end_with_http_mock(self, tmp_path, fake_http_client): + # Mock séquence HTTP : start, stop, understanding, build, persist + understanding_body = { + "understanding": [ + {"action_type": "click", "target_label": "Patient", "widget_type": "Fenêtre"}, + { + "action_type": "type", + "target_label": "IPP", + "widget_type": "Champ", + "value": "25003284", + }, + ] + } + fake_http_client.request.side_effect = [ + _mk_response(200, {"ok": True}), # shadow_start + _mk_response(200, {"ok": True}), # shadow_stop + _mk_response(200, understanding_body), # shadow_understanding + _mk_response(200, {"ok": True}), # shadow_build + _mk_response(200, {"slug": "facture_urg"}), # persist + ] + client = StreamingClient( + base_url="http://stream:5005", + token="t", + http_client=fake_http_client, + retries=0, + ) + orch = LearnActionOrchestrator( + streaming_client=client, + intent_parser=LearnIntentParser(use_llm_fallback=False), + state_store=StateStore(tmp_path), + emit=MagicMock(), + ) + + st, _ = orch.start_session(user_id="dom", machine_id="m1") + sid = st.session_id + assert st.state == LearnState.WAITING_USER_STOP + + # Stop + orch.handle_chat_message(sid, "c'est bon") + assert orch._sessions[sid].state == LearnState.ITERATING_FEEDBACK + + # Validation globale + orch.handle_chat_message(sid, "parfait") + assert orch._sessions[sid].state == LearnState.NAMING + + # Nom + orch.handle_chat_message(sid, "facturation urgences") + + # Marquer IPP comme paramètre + reply = orch.handle_chat_message(sid, "ça change à chaque fois") + assert orch._sessions[sid].state == LearnState.DONE + assert "facture_urg" in (reply or "") + + def test_streaming_down_during_stop(self, tmp_path, fake_http_client): + # shadow_start OK, shadow_stop échoue + fake_http_client.request.side_effect = [ + _mk_response(200, {"ok": True}), # shadow_start + Exception("boom 1"), # shadow_stop attempt 1 + Exception("boom 2"), # shadow_stop attempt 2 (retry) + Exception("boom 3"), # shadow_stop attempt 3 (retry) + ] + client = StreamingClient( + base_url="http://stream:5005", + token="", + http_client=fake_http_client, + retries=2, + ) + orch = LearnActionOrchestrator( + streaming_client=client, + intent_parser=LearnIntentParser(use_llm_fallback=False), + state_store=StateStore(tmp_path), + emit=MagicMock(), + ) + st, _ = orch.start_session(user_id="dom") + sid = st.session_id + reply = orch.handle_chat_message(sid, "stop") + assert "n'arrive pas à clôturer" in (reply or "") or "réessaie" in (reply or "").lower() + + +# ============================================================ +# POST /api/learn/start (Correction #4) +# ============================================================ +class TestApiLearnStart: + """Tests integration de la route HTTP POST /api/learn/start.""" + + def _make_orchestrator(self, tmp_path): + client_http = MagicMock() + client_http.request.return_value = _mk_response(200, {"ok": True}) + stream = StreamingClient( + base_url="http://stream:5005", + token="", + http_client=client_http, + retries=0, + ) + return LearnActionOrchestrator( + streaming_client=stream, + intent_parser=LearnIntentParser(use_llm_fallback=False), + state_store=StateStore(tmp_path), + emit=MagicMock(), + ) + + def test_api_learn_start_creates_session(self, tmp_path): + from agent_chat import app as app_module + + orch = self._make_orchestrator(tmp_path) + app_module.learn_action_orchestrator = orch + try: + client = app_module.app.test_client() + resp = client.post( + "/api/learn/start", + json={ + "machine_id": "DESKTOP-58D5CAC_windows", + "user_id": "dom", + "trigger_source": "windows_button", + }, + ) + assert resp.status_code == 200 + data = resp.get_json() + assert "session_id" in data + assert data["state"] == LearnState.WAITING_USER_STOP.value + assert data["message"] + # Vérifie que la session existe bien côté orchestrateur + sid = data["session_id"] + assert orch._sessions[sid].machine_id == "DESKTOP-58D5CAC_windows" + assert orch._sessions[sid].trigger_source == "windows_button" + finally: + app_module.learn_action_orchestrator = None + + def test_api_learn_start_400_without_machine_id(self, tmp_path): + from agent_chat import app as app_module + + orch = self._make_orchestrator(tmp_path) + app_module.learn_action_orchestrator = orch + try: + client = app_module.app.test_client() + resp = client.post("/api/learn/start", json={"user_id": "dom"}) + assert resp.status_code == 400 + data = resp.get_json() + assert "machine_id" in (data.get("error") or "").lower() + finally: + app_module.learn_action_orchestrator = None + + def test_api_learn_start_400_with_empty_machine_id(self, tmp_path): + from agent_chat import app as app_module + + orch = self._make_orchestrator(tmp_path) + app_module.learn_action_orchestrator = orch + try: + client = app_module.app.test_client() + resp = client.post( + "/api/learn/start", + json={"machine_id": " "}, + ) + assert resp.status_code == 400 + finally: + app_module.learn_action_orchestrator = None + + def test_api_learn_start_503_if_orchestrator_not_initialized(self): + from agent_chat import app as app_module + + prev = app_module.learn_action_orchestrator + app_module.learn_action_orchestrator = None + try: + client = app_module.app.test_client() + resp = client.post( + "/api/learn/start", + json={"machine_id": "m1"}, + ) + assert resp.status_code == 503 + finally: + app_module.learn_action_orchestrator = prev diff --git a/tests/integration/test_agents_enroll_api.py b/tests/integration/test_agents_enroll_api.py index b06fd89fd..f7d84a4d7 100644 --- a/tests/integration/test_agents_enroll_api.py +++ b/tests/integration/test_agents_enroll_api.py @@ -15,8 +15,10 @@ garantit que l'env est defini AVANT tout import. from __future__ import annotations import os +import sqlite3 import sys import tempfile +import time from pathlib import Path import pytest @@ -273,6 +275,107 @@ def test_reenroll_after_uninstall_reactivates(agents_client): assert agent["version"] == "1.1.0" +def test_reenroll_after_admin_revoke_is_forbidden(agents_client): + client, token, _ = agents_client + + client.post( + "/api/v1/agents/enroll", + json={"machine_id": "revoked-001", "user_name": "Revoked"}, + headers=_auth_headers(token), + ) + revoke = client.post( + "/api/v1/agents/uninstall", + json={"machine_id": "revoked-001", "reason": "admin_revoke"}, + headers=_auth_headers(token), + ) + assert revoke.status_code == 200 + + resp = client.post( + "/api/v1/agents/enroll", + json={"machine_id": "revoked-001", "user_name": "Revoked Again"}, + headers=_auth_headers(token), + ) + + assert resp.status_code == 403, resp.text + detail = resp.json()["detail"] + assert detail["error"] == "agent_revoked" + assert detail["existing"]["machine_id"] == "revoked-001" + assert detail["existing"]["uninstall_reason"] == "admin_revoke" + + +def test_revoked_agent_cannot_stream_or_poll(agents_client): + client, token, _ = agents_client + + client.post( + "/api/v1/agents/enroll", + json={"machine_id": "revoked-runtime-001", "user_name": "Runtime"}, + headers=_auth_headers(token), + ) + client.post( + "/api/v1/agents/uninstall", + json={"machine_id": "revoked-runtime-001", "reason": "admin_revoke"}, + headers=_auth_headers(token), + ) + + event_resp = client.post( + "/api/v1/traces/stream/event", + json={ + "session_id": "sess_revoked_runtime", + "timestamp": time.time(), + "event": {"type": "heartbeat"}, + "machine_id": "revoked-runtime-001", + }, + headers=_auth_headers(token), + ) + assert event_resp.status_code == 403, event_resp.text + assert event_resp.json()["detail"]["error"] == "agent_not_active" + + next_resp = client.get( + "/api/v1/traces/stream/replay/next", + params={ + "session_id": "sess_revoked_runtime", + "machine_id": "revoked-runtime-001", + }, + headers=_auth_headers(token), + ) + assert next_resp.status_code == 403, next_resp.text + assert next_resp.json()["detail"]["error"] == "agent_not_active" + + +def test_active_agent_stream_updates_last_seen(agents_client): + client, token, registry = agents_client + machine_id = "last-seen-001" + + client.post( + "/api/v1/agents/enroll", + json={"machine_id": machine_id, "user_name": "Seen"}, + headers=_auth_headers(token), + ) + stale = "2000-01-01T00:00:00+00:00" + with sqlite3.connect(str(registry.db_path)) as conn: + conn.execute( + "UPDATE enrolled_agents SET last_seen_at = ? WHERE machine_id = ?", + (stale, machine_id), + ) + conn.commit() + + resp = client.post( + "/api/v1/traces/stream/event", + json={ + "session_id": "sess_last_seen", + "timestamp": time.time(), + "event": {"type": "heartbeat"}, + "machine_id": machine_id, + }, + headers=_auth_headers(token), + ) + + assert resp.status_code == 200, resp.text + row = registry.get(machine_id) + assert row is not None + assert row["last_seen_at"] != stale + + # --------------------------------------------------------------------------- # GET /api/v1/agents/fleet # --------------------------------------------------------------------------- diff --git a/tests/integration/test_build_replay_perf.py b/tests/integration/test_build_replay_perf.py new file mode 100644 index 000000000..fed694fa7 --- /dev/null +++ b/tests/integration/test_build_replay_perf.py @@ -0,0 +1,198 @@ +"""Mesure du gain perf RPA_SKIP_INTENTION_ENRICHMENT sur build_replay. + +Harnais lecture seule : charge une fixture raw events réelle (smoke Bloc-notes +2026-05-20 - même session que replay_sess_e96e5822 18/18 du 2026-05-25) et +appelle directement build_replay_from_raw_events() sans déclencher dispatch +ni replay live. + +Ne pas lancer en CI standard : test perf, run manuel uniquement. + +Run : + .venv/bin/python -m pytest tests/integration/test_build_replay_perf.py \ + -m performance -s -v + +Référence : inbox_claude/2026-05-25_1244_codex-to-claude_recadrage-demo-1juin.md +(mission C2) et plan docs/plans/PLAN_STABILISATION_DEMO_2026-06-01.md +(P0 performance mesurable). +""" +from __future__ import annotations + +import json +import sys +import time +from pathlib import Path + +import pytest + + +ROOT = Path(__file__).resolve().parents[2] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + + +SESSION_DIR = ( + ROOT + / "data" + / "training" + / "live_sessions" + / "DESKTOP-58D5CAC_windows" + / "sess_20260520T102916_066851" +) +FIXTURE = SESSION_DIR / "live_events.jsonl" + + +def _load_raw_events() -> list: + """Charge la fixture raw events réelle (55 events bruts, 16 actions utiles).""" + if not FIXTURE.exists(): + pytest.skip(f"Fixture absente : {FIXTURE}") + with FIXTURE.open() as f: + return [json.loads(line) for line in f if line.strip()] + + +@pytest.fixture +def raw_events(): + return _load_raw_events() + + +@pytest.fixture +def session_dir() -> str: + """Chemin vers session_dir (déclenche l'enrichissement gemma4 si présent).""" + if not SESSION_DIR.exists(): + pytest.skip(f"Session dir absent : {SESSION_DIR}") + return str(SESSION_DIR) + + +def _extract_perf_breakdown(caplog) -> list[tuple[str, float]]: + """Extrait les spans [PERF] build.step* des logs capturés. + + Format attendu : "[PERF] build. session= elapsed_ms=" + Retourne [(step_name, elapsed_ms)] dans l'ordre d'apparition. + """ + import re + pattern = re.compile(r"\[PERF\] build\.(\S+) session=\S+ elapsed_ms=([\d.]+)") + out = [] + for record in caplog.records: + m = pattern.search(record.getMessage()) + if m: + out.append((m.group(1), float(m.group(2)))) + return out + + +@pytest.mark.performance +def test_build_replay_perf_skip_enrichment(monkeypatch, raw_events, session_dir, caplog): + """Mesure build_replay_from_raw_events avec et sans RPA_SKIP_INTENTION_ENRICHMENT. + + Asserts : + - skip enrichissement est au moins 3x plus rapide + - même nombre d'actions produites dans les 2 modes + - skip → 0 actions avec intention non-vide + - full → au moins 1 action avec intention (preuve que gemma4 a tourné) + + Print [PERF] explicite des deux mesures (capturé via -s). + """ + import logging + from agent_v0.server_v1.stream_processor import build_replay_from_raw_events + + # Capter les logs INFO du stream_processor pour récupérer les spans [PERF] + caplog.set_level(logging.INFO, logger="agent_v0.server_v1.stream_processor") + + # Premier run : enrichissement actif (comportement legacy) + monkeypatch.delenv("RPA_SKIP_INTENTION_ENRICHMENT", raising=False) + monkeypatch.delenv("RPA_SKIP_ENRICHMENT", raising=False) + t0 = time.perf_counter() + actions_full = build_replay_from_raw_events( + raw_events, session_id="perf_full", session_dir=session_dir + ) + elapsed_full_ms = (time.perf_counter() - t0) * 1000 + breakdown_full = _extract_perf_breakdown(caplog) + caplog.clear() + + # Second run : skip enrichissement activé (Phase 1 Codex 2026-05-25) + monkeypatch.setenv("RPA_SKIP_INTENTION_ENRICHMENT", "1") + t0 = time.perf_counter() + actions_skip = build_replay_from_raw_events( + raw_events, session_id="perf_skip", session_dir=session_dir + ) + elapsed_skip_ms = (time.perf_counter() - t0) * 1000 + breakdown_skip = _extract_perf_breakdown(caplog) + + speedup = elapsed_full_ms / max(1.0, elapsed_skip_ms) + intentions_full = sum(1 for a in actions_full if a.get("intention")) + intentions_skip = sum(1 for a in actions_skip if a.get("intention")) + + print( + f"\n[PERF] build_replay events={len(raw_events)} " + f"actions_full={len(actions_full)} actions_skip={len(actions_skip)} " + f"full_ms={elapsed_full_ms:.0f} skip_ms={elapsed_skip_ms:.0f} " + f"speedup={speedup:.1f}x " + f"intentions_full={intentions_full} intentions_skip={intentions_skip}" + ) + + # Décomposition par étape (C2b) — utile pour identifier les vraies cibles + # d'optimisation post-skip enrichissement. + def _format_breakdown(label: str, b: list[tuple[str, float]]) -> str: + if not b: + return f" {label}: (aucun span [PERF] capturé)" + lines = [f" {label}:"] + for step, ms in b: + bar = "█" * max(1, int(ms / 500)) # 1 char par 500ms + lines.append(f" {step:40s} {ms:>7.0f} ms {bar}") + return "\n".join(lines) + + print(_format_breakdown("Décomposition FULL", breakdown_full)) + print(_format_breakdown("Décomposition SKIP", breakdown_skip)) + + # Invariants — même nombre d'actions, juste les champs intention en moins + assert len(actions_skip) == len(actions_full), ( + f"Le skip ne doit pas changer le nombre d'actions " + f"(full={len(actions_full)}, skip={len(actions_skip)})" + ) + + # Skip → 0 actions avec intention enrichie + assert intentions_skip == 0, ( + f"Skip enrichment doit produire 0 intention non-vide " + f"(observé : {intentions_skip})" + ) + + # Full → au moins 1 action avec intention (sinon gemma4 a planté ou la + # fixture n'a pas d'action eligible). Si 0, c'est anormal et on échoue + # bruyamment. + assert intentions_full > 0, ( + f"Full enrichment doit produire au moins 1 intention non-vide " + f"sur fixture {FIXTURE.name}. Si 0 → gemma4 indisponible ou fixture " + f"non éligible (toutes les actions filtrées avant enrichissement)." + ) + + # Gain perf minimum : 3x. + # Mesure réelle observée (2026-05-25 sur fixture 16 actions, 9 enrichies) : + # full=93.8s, skip=24.1s, speedup=3.9x. + # Le mode skip n'est pas instantané (~24s) car d'autres étapes consomment + # du temps : extraction crops d'ancrage pour clics visual_mode, consolidation + # avec ReplayLearner, normalisation des waits, etc. Seul gemma4 est skippé. + # Estimation initiale 215x était basée sur l'hypothèse "gemma4 seul gros + # coût" — invalidée par la mesure. + assert speedup >= 3.0, ( + f"Gain insuffisant : {speedup:.1f}x (attendu ≥ 3x). " + f"Soit gemma4 cache-hit, soit la fixture n'a pas d'action éligible, " + f"soit Ollama indisponible (fallback rapide). full_ms={elapsed_full_ms:.0f}, " + f"skip_ms={elapsed_skip_ms:.0f}." + ) + + +@pytest.mark.performance +def test_build_replay_skip_alias_works(monkeypatch, raw_events, session_dir): + """Vérifie que l'alias RPA_SKIP_ENRICHMENT a le même effet.""" + from agent_v0.server_v1.stream_processor import build_replay_from_raw_events + + monkeypatch.delenv("RPA_SKIP_INTENTION_ENRICHMENT", raising=False) + monkeypatch.setenv("RPA_SKIP_ENRICHMENT", "1") + actions = build_replay_from_raw_events( + raw_events, session_id="perf_alias", session_dir=session_dir + ) + intentions = sum(1 for a in actions if a.get("intention")) + + print(f"\n[PERF] alias RPA_SKIP_ENRICHMENT actions={len(actions)} intentions={intentions}") + assert intentions == 0, ( + f"L'alias RPA_SKIP_ENRICHMENT doit aussi désactiver l'enrichissement " + f"(observé : {intentions} intentions)" + ) diff --git a/tests/integration/test_chat_window_templates.py b/tests/integration/test_chat_window_templates.py index 1504a0090..6fbc28cda 100644 --- a/tests/integration/test_chat_window_templates.py +++ b/tests/integration/test_chat_window_templates.py @@ -65,7 +65,7 @@ def test_tpl_need_confirm_extracts_action_description(): def test_tpl_need_confirm_fallback(): _, _, title = cw._tpl_need_confirm({}) - assert "Validation" in title + assert "accord" in title def test_tpl_step_result_ok(): diff --git a/tests/integration/test_replay_resume_preserves_original_action.py b/tests/integration/test_replay_resume_preserves_original_action.py index f7f8218bb..ad04c1467 100644 --- a/tests/integration/test_replay_resume_preserves_original_action.py +++ b/tests/integration/test_replay_resume_preserves_original_action.py @@ -24,15 +24,19 @@ class TestReplayResumePreservesOriginalAction: monkeypatch.setattr(api_stream_mod, "API_TOKEN", self._TEST_API_TOKEN) @pytest.fixture - def client(self, monkeypatch): + def client(self, monkeypatch, tmp_path): from fastapi.testclient import TestClient from agent_v0.server_v1 import api_stream + from agent_v0.server_v1.agent_registry import AgentRegistry monkeypatch.setattr(api_stream, "API_TOKEN", self._TEST_API_TOKEN) saved_states = dict(api_stream._replay_states) saved_queues = dict(api_stream._replay_queues) saved_retry = dict(api_stream._retry_pending) + original_registry = api_stream.agent_registry + empty_registry = AgentRegistry(db_path=str(tmp_path / "empty_agents.db")) + monkeypatch.setattr(api_stream, "agent_registry", empty_registry) api_stream._replay_states.clear() api_stream._replay_queues.clear() @@ -47,6 +51,7 @@ class TestReplayResumePreservesOriginalAction: api_stream._replay_queues.update(saved_queues) api_stream._retry_pending.clear() api_stream._retry_pending.update(saved_retry) + monkeypatch.setattr(api_stream, "agent_registry", original_registry) def test_resume_reinjects_full_original_action_from_failed_action(self, client): http_client, api_stream, token = client @@ -144,6 +149,7 @@ class TestReplayResumePreservesOriginalAction: next_resp = http_client.get( "/api/v1/traces/stream/replay/next", params={"session_id": "sess_resume_watchdog", "machine_id": "pc-watchdog"}, + headers={"Authorization": f"Bearer {token}"}, ) assert next_resp.status_code == 200 diff --git a/tests/integration/test_replay_session_trim_neutral.py b/tests/integration/test_replay_session_trim_neutral.py index d87721b8d..70f2305b7 100644 --- a/tests/integration/test_replay_session_trim_neutral.py +++ b/tests/integration/test_replay_session_trim_neutral.py @@ -104,11 +104,12 @@ def test_replay_session_pipeline_skips_redundant_tab_switch(tmp_path): # 1) Setup auto reconnaît Notepad et génère ses actions assert app_info.get("primary_app") == "Notepad.exe" + assert app_info.get("has_neutral_window_title") is True setup_actions = _generate_setup_actions(app_info, setup_id_prefix="setup_sess") assert setup_actions, "le setup auto doit injecter des actions Notepad" - action_ids = {a.get("action_id", "") for a in setup_actions} - assert any("click_start" in aid for aid in action_ids) - assert any("click_result" in aid for aid in action_ids) + setup_steps = [a.get("_setup_step", "") for a in setup_actions] + assert "open_run_dialog" in setup_steps + assert "ensure_fresh_document" in setup_steps # 2) Trim : le clic intra-Notepad redondant doit disparaître trimmed = _trim_redundant_setup_events(raw_events, app_info) diff --git a/tests/integration/test_t2a_extract.py b/tests/integration/test_t2a_extract.py index dc8401b45..0ffe44148 100644 --- a/tests/integration/test_t2a_extract.py +++ b/tests/integration/test_t2a_extract.py @@ -213,6 +213,24 @@ def test_edge_to_action_extract_text(): assert a["parameters"]["paragraph"] is True +def test_edge_to_action_extract_table_accepts_tesseract_engine_and_variable_name(): + edge = _FakeEdge(_FakeAction( + "extract_table", + parameters={ + "variable_name": "t_extraction_liste", + "pattern": r"^25\d{6}$", + "engine": "tesseract", + }, + )) + actions = _edge_to_normalized_actions(edge, params={}) + assert len(actions) == 1 + a = actions[0] + assert a["type"] == "extract_table" + assert a["parameters"]["output_var"] == "t_extraction_liste" + assert a["parameters"]["pattern"] == r"^25\d{6}$" + assert a["parameters"]["engine"] == "tesseract" + + def test_edge_to_action_t2a_decision(): edge = _FakeEdge(_FakeAction( "t2a_decision", diff --git a/tests/unit/test_agent_chat_cors_lan.py b/tests/unit/test_agent_chat_cors_lan.py new file mode 100644 index 000000000..9cbf9e305 --- /dev/null +++ b/tests/unit/test_agent_chat_cors_lan.py @@ -0,0 +1,86 @@ +"""Tests de non-régression pour le fix CORS engineio sur le service +rpa-agent-chat (port 5004). + +Avant fix : les origines `http://192.168.1.40:5004` (self loopback) et +`http://192.168.1.11:5004` (Léa Windows) étaient rejetées par engineio, +provoquant `is not an accepted origin` dans le journal (24 mai 2026). + +Fix : élargissement de `_ALLOWED_ORIGINS` dans agent_chat/app.py l. 83-99, +plus override possible via `LEA_CORS_ALLOWED_ORIGINS=comma,separated`. + +Référence : inbox_codex/2026-05-25_1235_..._enquete-feedbackbus-5004.md +""" +from __future__ import annotations + +import importlib +import sys +from pathlib import Path + +import pytest + + +ROOT = Path(__file__).resolve().parents[2] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + + +@pytest.mark.unit +def test_lan_self_loopback_origin_allowed(): + """Le serveur doit accepter sa propre origine `http://192.168.1.40:5004`.""" + from agent_chat import app + assert "http://192.168.1.40:5004" in app._ALLOWED_ORIGINS, ( + "Origine self loopback 5004 absente — engineio va rejeter les " + "connexions SocketIO depuis le serveur lui-même (cf. journal " + "2026-05-24 11:00:47)." + ) + + +@pytest.mark.unit +def test_lan_lea_windows_origin_allowed(): + """Le serveur doit accepter l'origine Léa Windows `http://192.168.1.11:5004`.""" + from agent_chat import app + assert "http://192.168.1.11:5004" in app._ALLOWED_ORIGINS, ( + "Origine Léa Windows 5004 absente — la ChatWindow tkinter ne peut " + "pas établir une session SocketIO." + ) + + +@pytest.mark.unit +def test_legacy_origins_preserved(): + """Les origines historiques doivent rester acceptées (pas de régression).""" + from agent_chat import app + for origin in [ + "http://localhost:3002", + "http://localhost:5002", + "https://vwb.labs.laurinebazin.design", + "https://lea.labs.laurinebazin.design", + ]: + assert origin in app._ALLOWED_ORIGINS, f"Origine historique perdue : {origin}" + + +@pytest.mark.unit +def test_env_override_extends_allowed_origins(monkeypatch): + """`LEA_CORS_ALLOWED_ORIGINS=...` étend la liste par défaut.""" + monkeypatch.setenv( + "LEA_CORS_ALLOWED_ORIGINS", + "https://demo.client.example,http://10.0.0.5:5004", + ) + # Re-import du module pour relire l'env + import agent_chat.app as app_module + importlib.reload(app_module) + assert "https://demo.client.example" in app_module._ALLOWED_ORIGINS + assert "http://10.0.0.5:5004" in app_module._ALLOWED_ORIGINS + # Origines par défaut toujours présentes + assert "http://192.168.1.40:5004" in app_module._ALLOWED_ORIGINS + + +@pytest.mark.unit +def test_env_override_empty_keeps_defaults(monkeypatch): + """`LEA_CORS_ALLOWED_ORIGINS=''` ne casse rien — défauts conservés.""" + monkeypatch.setenv("LEA_CORS_ALLOWED_ORIGINS", "") + import agent_chat.app as app_module + importlib.reload(app_module) + assert "http://192.168.1.40:5004" in app_module._ALLOWED_ORIGINS + assert len(app_module._ALLOWED_ORIGINS) >= 9, ( + "Liste tronquée : attendu au moins 9 origines par défaut" + ) diff --git a/tests/unit/test_agent_chat_learn_action.py b/tests/unit/test_agent_chat_learn_action.py new file mode 100644 index 000000000..c1ae95d94 --- /dev/null +++ b/tests/unit/test_agent_chat_learn_action.py @@ -0,0 +1,526 @@ +"""Tests unit pour agent_chat.handlers.learn_action. + +Couvre : +- LearnIntentParser (regex) +- OptionCFormatter +- StateStore (write atomique + reprise) +- LearnActionOrchestrator (transitions, garde-fous, persistance) +- PersistPayloadBuilder +""" + +from __future__ import annotations + +import json +import os +from pathlib import Path +from typing import Any, Dict, List +from unittest.mock import MagicMock + +import pytest + +from agent_chat.handlers.learn_action import ( + LearnActionOrchestrator, + LearnIntent, + LearnIntentParser, + LearnState, + OptionCFormatter, + PersistPayloadBuilder, + SessionState, + StateStore, +) + + +# ============================================================ +# LearnIntentParser +# ============================================================ +class TestLearnIntentParser: + def setup_method(self): + # Désactive le LLM fallback pour isoler les tests regex + self.parser = LearnIntentParser(use_llm_fallback=False) + + @pytest.mark.parametrize( + "msg", + [ + "apprends-moi", + "Apprends moi", + "regarde-moi faire", + "observe", + "enregistre", + "on apprend", + "tu vas apprendre", + "Léa apprends", + ], + ) + def test_start_observe(self, msg): + r = self.parser.parse(msg, current_state=LearnState.IDLE) + assert r.intent == LearnIntent.START_OBSERVE + assert r.confidence >= 0.9 + + @pytest.mark.parametrize( + "msg", + [ + "stop", + "c'est bon", + "j'ai fini", + "voilà c'est tout", + "fini", + "arrête", + "termine", + ], + ) + def test_user_stop_observe(self, msg): + r = self.parser.parse(msg, current_state=LearnState.WAITING_USER_STOP) + assert r.intent == LearnIntent.USER_STOP_OBSERVE + + def test_correct_step_with_index(self): + r = self.parser.parse( + "Corrige l'étape 3 : il faut cliquer sur Valider", + current_state=LearnState.ITERATING_FEEDBACK, + ) + assert r.intent == LearnIntent.CORRECT_STEP + assert r.step_index == 3 + assert "valider" in (r.extra.get("new_intent") or "").lower() + + def test_undo_step(self): + r = self.parser.parse( + "Retire l'étape 2", current_state=LearnState.ITERATING_FEEDBACK + ) + assert r.intent == LearnIntent.UNDO_STEP + assert r.step_index == 2 + + def test_merge_next(self): + r = self.parser.parse( + "Fusionne avec la suivante", current_state=LearnState.ITERATING_FEEDBACK + ) + assert r.intent == LearnIntent.MERGE_NEXT + + def test_split_step(self): + r = self.parser.parse( + "Coupe l'étape 4", current_state=LearnState.ITERATING_FEEDBACK + ) + assert r.intent == LearnIntent.SPLIT_STEP + assert r.step_index == 4 + + def test_cancel(self): + r = self.parser.parse("annule tout", current_state=LearnState.LISTENING) + assert r.intent == LearnIntent.CANCEL + + def test_validate_in_iterating(self): + r = self.parser.parse( + "c'est parfait", current_state=LearnState.ITERATING_FEEDBACK + ) + assert r.intent == LearnIntent.VALIDATE_STEP + + def test_mark_parameter_variable(self): + r = self.parser.parse( + "ça change à chaque fois", current_state=LearnState.NAMING + ) + assert r.intent == LearnIntent.MARK_PARAMETER + assert r.extra.get("is_parameter") is True + + def test_mark_parameter_constant(self): + r = self.parser.parse( + "toujours pareil", current_state=LearnState.NAMING + ) + assert r.intent == LearnIntent.MARK_PARAMETER + assert r.extra.get("is_parameter") is False + + def test_name_competence_when_naming(self): + r = self.parser.parse( + "facturation urgences", current_state=LearnState.NAMING + ) + assert r.intent == LearnIntent.NAME_COMPETENCE + assert "facturation" in (r.extra.get("name") or "") + + def test_unknown_in_idle(self): + r = self.parser.parse( + "blabla random", current_state=LearnState.IDLE + ) + assert r.intent == LearnIntent.UNKNOWN + + def test_llm_fallback_disabled_after_failure(self, monkeypatch): + # Active le LLM mais simule une erreur réseau + parser = LearnIntentParser(use_llm_fallback=True) + # Force exception sur httpx + parser._parse_llm = lambda *args, **kwargs: None # type: ignore[method-assign] + r = parser.parse("zorglub blabla truc", current_state=LearnState.IDLE) + # Doit retomber gracieusement sur UNKNOWN sans crasher + assert r.intent == LearnIntent.UNKNOWN + + +# ============================================================ +# OptionCFormatter +# ============================================================ +class TestOptionCFormatter: + def setup_method(self): + self.fmt = OptionCFormatter() + + def test_empty(self): + assert "aucune étape" in self.fmt.format([]) + + def test_simple_click(self): + understanding = [ + {"action_type": "click", "target_label": "Valider", "widget_type": "Bouton"} + ] + out = self.fmt.format(understanding) + assert "1." in out + assert "« Valider »" in out + assert "cliqué" in out + + def test_type_with_value(self): + understanding = [ + { + "action_type": "type", + "target_label": "IPP", + "widget_type": "Champ", + "value": "25003284", + } + ] + out = self.fmt.format(understanding) + assert "« IPP »" in out + assert "« 25003284 »" in out + assert "saisi" in out + + def test_low_confidence_suffix(self): + understanding = [ + { + "action_type": "click", + "target_label": "Patient", + "widget_type": "Fenêtre", + "confidence_ocr": 0.4, + } + ] + out = self.fmt.format(understanding) + assert "(à confirmer)" in out + + def test_unknown_action_fallback(self): + understanding = [{"action_type": "wibble", "target_label": "X"}] + out = self.fmt.format(understanding) + assert "effectuée" in out + + def test_closing_question(self): + q = self.fmt.closing_question() + assert "trompée" in q or "trompee" in q.lower().replace("é", "e") + + +# ============================================================ +# StateStore +# ============================================================ +class TestStateStore: + def test_save_and_load(self, tmp_path): + store = StateStore(tmp_path) + st = SessionState( + session_id="abc123", + user_id="dom", + state=LearnState.ITERATING_FEEDBACK, + ) + store.save(st) + loaded = store.load("abc123") + assert loaded is not None + assert loaded.session_id == "abc123" + assert loaded.user_id == "dom" + assert loaded.state == LearnState.ITERATING_FEEDBACK + + def test_atomic_write_no_partial(self, tmp_path): + store = StateStore(tmp_path) + st = SessionState(session_id="atomic1") + store.save(st) + # Pas de fichier .tmp restant + tmp_files = list(tmp_path.glob("*.tmp")) + assert tmp_files == [] + + def test_list_active_filters_done(self, tmp_path): + store = StateStore(tmp_path) + store.save(SessionState(session_id="s1", state=LearnState.ITERATING_FEEDBACK)) + store.save(SessionState(session_id="s2", state=LearnState.DONE)) + store.save(SessionState(session_id="s3", state=LearnState.ABORTED)) + active = store.list_active() + ids = {s.session_id for s in active} + assert ids == {"s1"} + + def test_session_id_sanitized(self, tmp_path): + store = StateStore(tmp_path) + st = SessionState(session_id="../../etc/passwd") + store.save(st) + # Aucun fichier hors tmp_path + files = list(tmp_path.glob("*.json")) + assert len(files) == 1 + assert files[0].parent == tmp_path + + def test_delete(self, tmp_path): + store = StateStore(tmp_path) + store.save(SessionState(session_id="del_me")) + store.delete("del_me") + assert store.load("del_me") is None + + +# ============================================================ +# PersistPayloadBuilder +# ============================================================ +class TestPersistPayloadBuilder: + def test_build_with_parameters(self): + st = SessionState( + session_id="sX", + competence_name="Test compétence", + user_id="dom", + parameters_marked=[ + { + "step_index": 3, + "is_parameter": True, + "name": "ipp", + "example_value": "25003284", + "field_label": "IPP", + }, + { + "step_index": 4, + "is_parameter": False, + "name": "type", + "example_value": "C2", + "field_label": "Type", + }, + ], + ) + payload = PersistPayloadBuilder().build(st) + assert payload["name"] == "Test compétence" + assert payload["session_id"] == "sX" + assert payload["user_id"] == "dom" + # Seul le param flagué is_parameter=True doit apparaître + assert len(payload["parameters"]) == 1 + assert payload["parameters"][0]["name"] == "ipp" + + def test_persist_payload_includes_machine_id(self): + """Correction #1 — payload doit inclure machine_id.""" + st = SessionState( + session_id="sM", + competence_name="X", + machine_id="DESKTOP-58D5CAC_windows", + ) + payload = PersistPayloadBuilder().build(st) + assert "machine_id" in payload + assert payload["machine_id"] == "DESKTOP-58D5CAC_windows" + + def test_persist_payload_machine_id_none_when_absent(self): + """Quand non fourni, machine_id reste présent à None dans le payload.""" + st = SessionState(session_id="sM2", competence_name="X") + payload = PersistPayloadBuilder().build(st) + assert "machine_id" in payload + assert payload["machine_id"] is None + + +# ============================================================ +# LearnActionOrchestrator (avec StreamingClient mocké) +# ============================================================ +@pytest.fixture +def mock_streaming(): + """StreamingClient simulé.""" + m = MagicMock() + m.shadow_start.return_value = {"ok": True} + m.shadow_stop.return_value = {"ok": True} + m.shadow_understanding.return_value = { + "understanding": [ + {"action_type": "click", "target_label": "Patient", "widget_type": "Fenêtre"}, + { + "action_type": "type", + "target_label": "IPP", + "widget_type": "Champ", + "value": "25003284", + }, + ] + } + m.shadow_feedback.return_value = {"ok": True} + m.shadow_build.return_value = {"ok": True} + m.competence_persist.return_value = {"slug": "facturation_urgences"} + return m + + +@pytest.fixture +def orchestrator(tmp_path, mock_streaming): + parser = LearnIntentParser(use_llm_fallback=False) + store = StateStore(tmp_path) + return LearnActionOrchestrator( + streaming_client=mock_streaming, + intent_parser=parser, + state_store=store, + emit=MagicMock(), + ) + + +class TestLearnActionOrchestrator: + def test_start_session_transitions(self, orchestrator, mock_streaming): + st, reply = orchestrator.start_session(user_id="dom", trigger_source="button") + assert st.state == LearnState.WAITING_USER_STOP + mock_streaming.shadow_start.assert_called_once() + assert "je te regarde" in reply.lower() or "regarde" in reply.lower() + + def test_full_happy_path(self, orchestrator, mock_streaming): + st, _ = orchestrator.start_session(user_id="dom", machine_id="m1") + sid = st.session_id + + # Utilisateur dit stop + reply = orchestrator.handle_chat_message(sid, "c'est bon") + assert reply is not None + assert "j'ai compris" in reply.lower() + assert orchestrator._sessions[sid].state == LearnState.ITERATING_FEEDBACK + + # Utilisateur valide globalement → NAMING + reply = orchestrator.handle_chat_message(sid, "c'est parfait") + assert orchestrator._sessions[sid].state == LearnState.NAMING + + # Nomination + reply = orchestrator.handle_chat_message(sid, "facturation urgences") + # Maintenant Léa doit poser une question sur le paramètre IPP + assert "25003284" in (reply or "") + assert orchestrator._sessions[sid].competence_name == "facturation urgences" + + # Marquer le paramètre comme variable + reply = orchestrator.handle_chat_message(sid, "ça change à chaque fois") + # Plus de pending → persist + mock_streaming.shadow_build.assert_called_once() + mock_streaming.competence_persist.assert_called_once() + assert orchestrator._sessions[sid].state == LearnState.DONE + + def test_emergency_exit_after_3_corrections(self, orchestrator, mock_streaming): + st, _ = orchestrator.start_session(user_id="dom") + sid = st.session_id + orchestrator.handle_chat_message(sid, "c'est bon") # stop + + for i in range(3): + r = orchestrator.handle_chat_message( + sid, "corrige l'étape 3 : clique sur Valider" + ) + assert orchestrator._sessions[sid].state == LearnState.ITERATING_FEEDBACK + + # 4e correction → ABORTED + r = orchestrator.handle_chat_message( + sid, "corrige l'étape 3 : clique sur Valider" + ) + assert orchestrator._sessions[sid].state == LearnState.ABORTED + assert "n°3" in (r or "") + + def test_cancel_anywhere(self, orchestrator, mock_streaming): + st, _ = orchestrator.start_session(user_id="dom") + sid = st.session_id + reply = orchestrator.handle_chat_message(sid, "annule tout") + assert orchestrator._sessions[sid].state == LearnState.ABORTED + assert "annule" in (reply or "").lower() + + def test_idle_message_returns_none(self, orchestrator): + # Aucune session ouverte → None (laisser le flux normal gérer) + r = orchestrator.handle_chat_message("nonexistent", "Bonjour") + assert r is None + + def test_state_persistence_across_reload(self, tmp_path, mock_streaming): + store = StateStore(tmp_path) + parser = LearnIntentParser(use_llm_fallback=False) + orch1 = LearnActionOrchestrator( + streaming_client=mock_streaming, + intent_parser=parser, + state_store=store, + emit=MagicMock(), + ) + st, _ = orch1.start_session(user_id="dom") + sid = st.session_id + orch1.handle_chat_message(sid, "c'est bon") # passe en ITERATING_FEEDBACK + + # Simule un crash + redémarrage + orch2 = LearnActionOrchestrator( + streaming_client=mock_streaming, + intent_parser=parser, + state_store=store, + emit=MagicMock(), + ) + resumed = orch2.resume_sessions() + assert sid in resumed + assert orch2._sessions[sid].state == LearnState.ITERATING_FEEDBACK + + def test_proactive_signal_cooldown(self, orchestrator): + r1 = orchestrator.handle_proactive_signal("action_repeat", {}) + assert r1 is not None + # Deuxième signal immédiat → ignoré + r2 = orchestrator.handle_proactive_signal("action_repeat", {}) + assert r2 is None + + def test_illegal_transition_ignored(self, orchestrator, mock_streaming): + st, _ = orchestrator.start_session(user_id="dom") + # Tentative de passer directement de WAITING_USER_STOP à DONE + prev = orchestrator._sessions[st.session_id].state + orchestrator._transition( + orchestrator._sessions[st.session_id], LearnState.DONE + ) + assert orchestrator._sessions[st.session_id].state == prev + + # ============================================================ + # Corrections P1-LEA-SHADOW 2026-06-01 (NO-GO Qwen) + # ============================================================ + def test_start_session_stores_machine_id(self, orchestrator): + """Correction #1 — machine_id transmis à start_session est stocké.""" + st, _ = orchestrator.start_session( + user_id="dom", + trigger_source="windows_button", + machine_id="DESKTOP-58D5CAC_windows", + ) + assert st.machine_id == "DESKTOP-58D5CAC_windows" + # Et la session en mémoire aussi + assert ( + orchestrator._sessions[st.session_id].machine_id + == "DESKTOP-58D5CAC_windows" + ) + + def test_persist_blocked_without_machine_id(self, orchestrator, mock_streaming): + """Correction #1 — persist refusé conversationnellement sans machine_id.""" + st, _ = orchestrator.start_session(user_id="dom") # pas de machine_id + sid = st.session_id + orchestrator.handle_chat_message(sid, "c'est bon") # → ITERATING + orchestrator.handle_chat_message(sid, "c'est parfait") # → NAMING + orchestrator.handle_chat_message(sid, "ma competence") # nom + # Marquer paramètre → tentative persist + reply = orchestrator.handle_chat_message(sid, "ça change à chaque fois") + # competence_persist NE doit PAS avoir été appelée + mock_streaming.competence_persist.assert_not_called() + # Message métier explicite côté Léa + assert reply is not None + assert "machine" in reply.lower() + + def test_datetime_uses_timezone_aware(self): + """Correction #2 — created_at / last_transition_at sont timezone-aware.""" + st = SessionState(session_id="tz1") + # Le format ISO doit contenir un offset (+00:00 ou Z) — tzinfo présent + # après reparse via fromisoformat (Python 3.11+). + from datetime import datetime as _dt + parsed_created = _dt.fromisoformat(st.created_at) + parsed_transition = _dt.fromisoformat(st.last_transition_at) + assert parsed_created.tzinfo is not None + assert parsed_transition.tzinfo is not None + # Sanity check : c'est bien UTC. + assert "+00:00" in st.created_at or st.created_at.endswith("Z") + + def test_confirm_blocked_when_name_missing(self, orchestrator, mock_streaming): + """Correction #3 — CONFIRM en NAMING avec competence_name=None reste NAMING.""" + st, _ = orchestrator.start_session( + user_id="dom", machine_id="machine_x" + ) + sid = st.session_id + orchestrator.handle_chat_message(sid, "c'est bon") + orchestrator.handle_chat_message(sid, "c'est parfait") # → NAMING + # Forcer competence_name à None et envoyer un CONFIRM + orchestrator._sessions[sid].competence_name = None + reply = orchestrator.handle_chat_message(sid, "ok") # CONFIRM + assert orchestrator._sessions[sid].state == LearnState.NAMING + assert reply is not None + assert "nom" in reply.lower() or "appeler" in reply.lower() + mock_streaming.competence_persist.assert_not_called() + + def test_confirm_blocked_when_name_empty(self, orchestrator, mock_streaming): + """Correction #3 — CONFIRM en NAMING avec competence_name='' reste NAMING.""" + st, _ = orchestrator.start_session( + user_id="dom", machine_id="machine_x" + ) + sid = st.session_id + orchestrator.handle_chat_message(sid, "c'est bon") + orchestrator.handle_chat_message(sid, "c'est parfait") # → NAMING + orchestrator._sessions[sid].competence_name = " " # vide après strip + reply = orchestrator.handle_chat_message(sid, "ok") + assert orchestrator._sessions[sid].state == LearnState.NAMING + assert reply is not None + assert "nom" in reply.lower() or "appeler" in reply.lower() + mock_streaming.competence_persist.assert_not_called() diff --git a/tests/unit/test_autonomous_planner_owl_flag.py b/tests/unit/test_autonomous_planner_owl_flag.py new file mode 100644 index 000000000..017954dda --- /dev/null +++ b/tests/unit/test_autonomous_planner_owl_flag.py @@ -0,0 +1,121 @@ +"""Tests pour le feature flag AGENT_CHAT_ENABLE_OWL (C1b). + +Contexte : depuis 2026-05-25, OWL-v2 ne se charge plus au boot du service +rpa-agent-chat par défaut (économie ~600 MiB VRAM constatée par Codex après +restart C1). Activation via AGENT_CHAT_ENABLE_OWL=1. + +Référence : inbox_claude/2026-05-25_1327_codex-to-claude_C1-post-restart-ok-c1b-vram.md +Fix : agent_chat/autonomous_planner.py _init_visual_detection() l. 139-... +""" +from __future__ import annotations + +import sys +from pathlib import Path + +import pytest + + +ROOT = Path(__file__).resolve().parents[2] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + + +@pytest.mark.unit +def test_owl_skipped_by_default(monkeypatch): + """Sans AGENT_CHAT_ENABLE_OWL, OWL ne doit PAS se charger au boot.""" + monkeypatch.delenv("AGENT_CHAT_ENABLE_OWL", raising=False) + from agent_chat.autonomous_planner import AutonomousPlanner + + planner = AutonomousPlanner(llm_model="qwen2.5:7b") + assert planner._owl_detector is None, ( + f"OWL chargé alors que flag OFF (économie VRAM perdue) : " + f"{planner._owl_detector}" + ) + + +@pytest.mark.unit +def test_owl_skipped_when_flag_zero(monkeypatch): + """AGENT_CHAT_ENABLE_OWL=0 → OWL skip.""" + monkeypatch.setenv("AGENT_CHAT_ENABLE_OWL", "0") + from agent_chat.autonomous_planner import AutonomousPlanner + + planner = AutonomousPlanner(llm_model="qwen2.5:7b") + assert planner._owl_detector is None + + +@pytest.mark.unit +def test_owl_skipped_when_flag_false(monkeypatch): + """AGENT_CHAT_ENABLE_OWL=false → OWL skip (alias accepté).""" + monkeypatch.setenv("AGENT_CHAT_ENABLE_OWL", "false") + from agent_chat.autonomous_planner import AutonomousPlanner + + planner = AutonomousPlanner(llm_model="qwen2.5:7b") + assert planner._owl_detector is None + + +@pytest.mark.unit +def test_owl_init_attempted_when_flag_one(monkeypatch): + """AGENT_CHAT_ENABLE_OWL=1 → tentative d'init (succès ou échec rattrapé). + + Le test ne valide PAS que OWL charge effectivement (dépend GPU + modèle + HF disponible), juste que le code passe la garde du flag et tente l'init. + On mocke OwlDetector pour vérifier qu'il est instancié. + """ + monkeypatch.setenv("AGENT_CHAT_ENABLE_OWL", "1") + from agent_chat import autonomous_planner as ap_module + + calls = [] + + class FakeOwl: + def __init__(self, **kwargs): + calls.append(kwargs) + + monkeypatch.setattr(ap_module, "OwlDetector", FakeOwl) + monkeypatch.setattr(ap_module, "VISUAL_DETECTION_AVAILABLE", True) + + planner = ap_module.AutonomousPlanner(llm_model="qwen2.5:7b") + assert planner._owl_detector is not None, ( + "OWL doit être instancié quand AGENT_CHAT_ENABLE_OWL=1" + ) + assert len(calls) == 1 + assert calls[0].get("confidence_threshold") == 0.1 + + +@pytest.mark.unit +def test_owl_device_override(monkeypatch): + """AGENT_CHAT_OWL_DEVICE=cpu force le device CPU même si CUDA dispo.""" + monkeypatch.setenv("AGENT_CHAT_ENABLE_OWL", "1") + monkeypatch.setenv("AGENT_CHAT_OWL_DEVICE", "cpu") + from agent_chat import autonomous_planner as ap_module + + calls = [] + + class FakeOwl: + def __init__(self, **kwargs): + calls.append(kwargs) + + monkeypatch.setattr(ap_module, "OwlDetector", FakeOwl) + monkeypatch.setattr(ap_module, "VISUAL_DETECTION_AVAILABLE", True) + + ap_module.AutonomousPlanner(llm_model="qwen2.5:7b") + assert calls[0].get("device") == "cpu" + + +@pytest.mark.unit +def test_owl_init_exception_caught(monkeypatch): + """Si OWL crash à l'init (OOM CUDA, modèle absent, etc.), AutonomousPlanner + doit continuer à booter avec _owl_detector=None.""" + monkeypatch.setenv("AGENT_CHAT_ENABLE_OWL", "1") + from agent_chat import autonomous_planner as ap_module + + class CrashOwl: + def __init__(self, **kwargs): + raise RuntimeError("CUDA out of memory (simulation)") + + monkeypatch.setattr(ap_module, "OwlDetector", CrashOwl) + monkeypatch.setattr(ap_module, "VISUAL_DETECTION_AVAILABLE", True) + + planner = ap_module.AutonomousPlanner(llm_model="qwen2.5:7b") + assert planner._owl_detector is None, ( + "L'exception doit être catchée — AutonomousPlanner ne doit pas crash" + ) diff --git a/tests/unit/test_chat_window_paused_dispatch.py b/tests/unit/test_chat_window_paused_dispatch.py index 7ce1683cc..904e0be8d 100644 --- a/tests/unit/test_chat_window_paused_dispatch.py +++ b/tests/unit/test_chat_window_paused_dispatch.py @@ -120,7 +120,7 @@ class TestDispatchPausedAction: class TestPausedBubbleHeight: - """Couvre _compute_paused_bubble_height — patch troncature 22 mai 2026.""" + """Couvre _compute_paused_bubble_height — anti-troncature pause UI.""" def test_empty_message_uses_minimum_height(self): h, scroll = ChatWindow._compute_paused_bubble_height("") @@ -133,10 +133,27 @@ class TestPausedBubbleHeight: assert scroll is False def test_long_single_line_triggers_scrollbar(self): - # ~600 chars sans \n → wrapped_lines = 600 // 60 + 1 = 11 msg = "x" * 600 h, scroll = ChatWindow._compute_paused_bubble_height(msg) - assert h == 11 + assert h == 12 + assert scroll is True + + def test_narrow_window_estimate_keeps_wrong_window_message_visible(self): + """Cas observé sur Windows : fenêtre Léa ~380px, message wrong_window + coupé après "attendu". Avec ~34 caractères par ligne, il faut + prévoir assez de lignes pour afficher le détail.""" + msg = ( + "Je m'attendais à voir la bonne fenêtre mais je vois autre chose. " + "Peux-tu vérifier que l'application est au premier plan ? " + "(Fenêtre incorrecte : attendu " + "'http192.168.1.408765dossier.htmlid=.txt - Bloc-notes', " + "actuel 'Program Manager')" + ) + h, scroll = ChatWindow._compute_paused_bubble_height( + msg, + chars_per_line=34, + ) + assert h >= 7 assert scroll is True def test_message_with_many_newlines_uses_explicit_count(self): @@ -150,11 +167,11 @@ class TestPausedBubbleHeight: assert scroll is False def test_cap_reached_triggers_scrollbar_even_if_short(self): - """Quand on dépasse le cap (12 lignes), la scrollbar DOIT + """Quand on dépasse le cap, la scrollbar DOIT s'afficher quel que soit la longueur en caractères.""" msg = "\n".join([f"l{i}" for i in range(20)]) h, scroll = ChatWindow._compute_paused_bubble_height(msg) - assert h == 12 # plafond + assert h == 14 # plafond assert scroll is True def test_long_content_triggers_scrollbar_at_200_chars(self): @@ -163,3 +180,18 @@ class TestPausedBubbleHeight: msg = "x" * 220 h, scroll = ChatWindow._compute_paused_bubble_height(msg) assert scroll is True + + def test_dynamic_small_viewport_caps_rows_and_scrolls(self): + msg = ( + "Je m'attendais à voir la bonne fenêtre mais je vois autre chose. " + "Peux-tu vérifier que l'application est au premier plan ? " + "(Post-vérif échouée : fenêtre '*test – Bloc-notes' au lieu de " + "'Enregistrer sous')" + ) + h, scroll = ChatWindow._compute_paused_bubble_height( + msg, + chars_per_line=32, + max_rows=5, + ) + assert h == 5 + assert scroll is True diff --git a/tests/unit/test_enrich_click_skip_build_vision.py b/tests/unit/test_enrich_click_skip_build_vision.py new file mode 100644 index 000000000..eaa410ca5 --- /dev/null +++ b/tests/unit/test_enrich_click_skip_build_vision.py @@ -0,0 +1,269 @@ +"""Tests C2d-bis : short-circuit SomEngine + _gemma4_read_element au build. + +Niveau A : si vision_info.text non vide → SomEngine pas appelé (faible risque, + comportement par défaut depuis 2026-05-25). +Niveau B : flag RPA_SKIP_BUILD_VISION (ou alias RPA_SKIP_BUILD_VLM) actif → + SomEngine + _gemma4_read_element jamais appelés, même si + vision_info.text vide. + +Référence : inbox_claude/2026-05-25_1700_codex-to-claude_AMEND-C2d-bis-gemini-short-circuit.md +Découverte C2c : inbox_codex/2026-05-25_1500_claude-to-codex_C2c-analyse-step4-crops.md +""" +from __future__ import annotations + +import sys +import tempfile +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + + +ROOT = Path(__file__).resolve().parents[2] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + + +@pytest.fixture +def fake_screenshot(tmp_path): + """Crée un screenshot PNG bidon (1920x1080 gris) pour les tests.""" + from PIL import Image + img = Image.new("RGB", (1920, 1080), color=(128, 128, 128)) + path = tmp_path / "shots" / "shot_0001_full.png" + path.parent.mkdir(parents=True, exist_ok=True) + img.save(path, "PNG") + return path + + +def _make_session_dir(tmp_path): + """Session dir contenant shots/ vide (pour passer le check Path.is_dir).""" + session = tmp_path / "session" + (session / "shots").mkdir(parents=True, exist_ok=True) + return session + + +# ──────────────────────────────────────────────────────────────────────────── +# Niveau A — short-circuit vision_info.text +# ──────────────────────────────────────────────────────────────────────────── + + +@pytest.mark.unit +def test_niveau_a_vision_info_text_skips_som_and_gemma4( + monkeypatch, fake_screenshot, tmp_path +): + """vision_info.text non vide → SomEngine et _gemma4_read_element JAMAIS appelés.""" + monkeypatch.delenv("RPA_SKIP_BUILD_VISION", raising=False) + monkeypatch.delenv("RPA_SKIP_BUILD_VLM", raising=False) + from agent_v0.server_v1 import stream_processor as sp + + # Mocks : SomEngine et gemma4 ne doivent PAS être appelés + som_calls = [] + gemma_calls = [] + + def fake_som(*args, **kwargs): + som_calls.append(args) + return {"label": "should_not_be_used", "source": "som"} + + def fake_gemma(*args, **kwargs): + gemma_calls.append(args) + return "should_not_be_used" + + monkeypatch.setattr(sp, "_som_identify_clicked_element", fake_som) + monkeypatch.setattr(sp, "_gemma4_read_element", fake_gemma) + + result = sp.enrich_click_from_screenshot( + screenshot_path=fake_screenshot, + click_x=500, + click_y=300, + screen_w=1920, + screen_h=1080, + window_title="Bloc-notes", + vision_info={"text": "Enregistrer", "type": "button"}, + session_dir=_make_session_dir(tmp_path), + screenshot_id="shot_0001", + ) + + # Assertions + assert len(som_calls) == 0, f"SomEngine appelé alors que vision_info.text présent : {som_calls}" + assert len(gemma_calls) == 0, f"_gemma4_read_element appelé : {gemma_calls}" + # L'action garde tous les champs critiques + assert result["by_text"] == "Enregistrer" + assert result["by_text_source"] == "ocr" + assert result["by_role"] == "button" + assert result["window_title"] == "Bloc-notes" + assert result["anchor_image_base64"] # crop calculé + assert result["by_position"] == [round(500 / 1920, 6), round(300 / 1080, 6)] + + +@pytest.mark.unit +def test_niveau_a_vision_info_text_empty_calls_som( + monkeypatch, fake_screenshot, tmp_path +): + """vision_info.text vide ET flag absent → SomEngine appelé (comportement legacy).""" + monkeypatch.delenv("RPA_SKIP_BUILD_VISION", raising=False) + monkeypatch.delenv("RPA_SKIP_BUILD_VLM", raising=False) + from agent_v0.server_v1 import stream_processor as sp + + som_calls = [] + gemma_calls = [] + + def fake_som(*args, **kwargs): + som_calls.append(args) + return {"label": "label_from_som", "source": "som"} + + def fake_gemma(*args, **kwargs): + gemma_calls.append(args) + return "" # gemma trouve rien + + monkeypatch.setattr(sp, "_som_identify_clicked_element", fake_som) + monkeypatch.setattr(sp, "_gemma4_read_element", fake_gemma) + + result = sp.enrich_click_from_screenshot( + screenshot_path=fake_screenshot, + click_x=500, + click_y=300, + screen_w=1920, + screen_h=1080, + window_title="App", + vision_info={"text": "", "type": ""}, # vide + session_dir=_make_session_dir(tmp_path), + screenshot_id="shot_0001", + ) + + # SomEngine doit être appelé (comportement legacy préservé) + assert len(som_calls) == 1 + # Gemma appelé aussi car SomEngine label utilisé comme element_text → on + # n'entre PAS dans le bloc gemma4 + # (cf. ligne 974-981 : si som_elem.label → element_text = som_elem.label) + assert len(gemma_calls) == 0 + # by_text vient de SomEngine + assert result["by_text"] == "label_from_som" + assert result["by_text_source"] == "ocr" + + +# ──────────────────────────────────────────────────────────────────────────── +# Niveau B — flag RPA_SKIP_BUILD_VISION +# ──────────────────────────────────────────────────────────────────────────── + + +@pytest.mark.unit +def test_niveau_b_flag_skip_build_vision_blocks_all( + monkeypatch, fake_screenshot, tmp_path +): + """RPA_SKIP_BUILD_VISION=true → SomEngine et gemma4 jamais appelés, même + si vision_info.text est vide.""" + monkeypatch.setenv("RPA_SKIP_BUILD_VISION", "true") + monkeypatch.delenv("RPA_SKIP_BUILD_VLM", raising=False) + from agent_v0.server_v1 import stream_processor as sp + + som_calls = [] + gemma_calls = [] + monkeypatch.setattr(sp, "_som_identify_clicked_element", + lambda *a, **kw: som_calls.append(a) or {"label": "X"}) + monkeypatch.setattr(sp, "_gemma4_read_element", + lambda *a, **kw: gemma_calls.append(a) or "X") + + result = sp.enrich_click_from_screenshot( + screenshot_path=fake_screenshot, + click_x=100, click_y=100, + screen_w=1920, screen_h=1080, + window_title="App", + vision_info={"text": "", "type": ""}, + session_dir=_make_session_dir(tmp_path), + screenshot_id="shot_0001", + ) + + assert len(som_calls) == 0, f"SomEngine appelé malgré flag actif : {som_calls}" + assert len(gemma_calls) == 0, f"gemma4 appelé malgré flag actif : {gemma_calls}" + # Action conservée avec canaux fallback + assert result["anchor_image_base64"] # crop préservé + assert result["window_title"] == "App" + assert result["by_position"] # position préservée + # by_text vide acceptable (le replay tombera sur anchor/position) + assert result["by_text"] == "" + + +@pytest.mark.unit +def test_niveau_b_alias_skip_build_vlm_works( + monkeypatch, fake_screenshot, tmp_path +): + """Alias RPA_SKIP_BUILD_VLM=true accepté (compat message Codex 1650).""" + monkeypatch.delenv("RPA_SKIP_BUILD_VISION", raising=False) + monkeypatch.setenv("RPA_SKIP_BUILD_VLM", "true") + from agent_v0.server_v1 import stream_processor as sp + + som_calls = [] + monkeypatch.setattr(sp, "_som_identify_clicked_element", + lambda *a, **kw: som_calls.append(a)) + monkeypatch.setattr(sp, "_gemma4_read_element", + lambda *a, **kw: "should_not_be_called") + + sp.enrich_click_from_screenshot( + screenshot_path=fake_screenshot, + click_x=100, click_y=100, + screen_w=1920, screen_h=1080, + window_title="App", + vision_info={"text": ""}, + session_dir=_make_session_dir(tmp_path), + screenshot_id="shot_0001", + ) + assert len(som_calls) == 0 + + +@pytest.mark.unit +def test_flag_off_calls_som_when_no_vision_text( + monkeypatch, fake_screenshot, tmp_path +): + """Flag explicitement OFF + vision_info.text vide → comportement legacy.""" + monkeypatch.setenv("RPA_SKIP_BUILD_VISION", "0") + monkeypatch.delenv("RPA_SKIP_BUILD_VLM", raising=False) + from agent_v0.server_v1 import stream_processor as sp + + som_calls = [] + monkeypatch.setattr(sp, "_som_identify_clicked_element", + lambda *a, **kw: som_calls.append(a) or None) + monkeypatch.setattr(sp, "_gemma4_read_element", + lambda *a, **kw: "from_gemma") + + result = sp.enrich_click_from_screenshot( + screenshot_path=fake_screenshot, + click_x=100, click_y=100, + screen_w=1920, screen_h=1080, + window_title="App", + vision_info={"text": ""}, + session_dir=_make_session_dir(tmp_path), + screenshot_id="shot_0001", + ) + assert len(som_calls) == 1, "Flag OFF doit conserver SomEngine" + # gemma4 appelé car SomEngine retourne None + assert result["by_text"] == "from_gemma" + assert result["by_text_source"] == "vlm" + + +@pytest.mark.unit +def test_niveau_b_priority_over_niveau_a( + monkeypatch, fake_screenshot, tmp_path +): + """Flag actif + vision_info.text non vide → log skip_som flag, pas vision_info.""" + monkeypatch.setenv("RPA_SKIP_BUILD_VISION", "true") + from agent_v0.server_v1 import stream_processor as sp + + som_calls = [] + monkeypatch.setattr(sp, "_som_identify_clicked_element", + lambda *a, **kw: som_calls.append(a)) + monkeypatch.setattr(sp, "_gemma4_read_element", + lambda *a, **kw: "should_not") + + result = sp.enrich_click_from_screenshot( + screenshot_path=fake_screenshot, + click_x=100, click_y=100, + screen_w=1920, screen_h=1080, + window_title="App", + vision_info={"text": "Save", "type": "button"}, + session_dir=_make_session_dir(tmp_path), + screenshot_id="shot_0001", + ) + assert len(som_calls) == 0 + # vision_info.text reste utilisé (priorité ligne 974-981 préservée) + assert result["by_text"] == "Save" + assert result["by_text_source"] == "ocr" diff --git a/tests/unit/test_executor_verify_window_guard.py b/tests/unit/test_executor_verify_window_guard.py index 68a0f90a3..b92d70fa7 100644 --- a/tests/unit/test_executor_verify_window_guard.py +++ b/tests/unit/test_executor_verify_window_guard.py @@ -15,6 +15,7 @@ On teste deux choses : from __future__ import annotations import sys +import types from pathlib import Path from types import SimpleNamespace from unittest.mock import patch, MagicMock @@ -22,6 +23,95 @@ from unittest.mock import patch, MagicMock ROOT = Path(__file__).parent.parent.parent sys.path.insert(0, str(ROOT)) + +def _install_fake_pynput_if_missing(): + try: + import pynput # noqa: F401 + return + except ModuleNotFoundError: + pass + + class FakeKeyValue: + def __init__(self, name): + self.name = name + + def __repr__(self): + return f"Key.{self.name}" + + def __hash__(self): + return hash(("key", self.name)) + + def __eq__(self, other): + return isinstance(other, FakeKeyValue) and self.name == other.name + + class FakeKey: + pass + + for name in ( + "enter", "tab", "esc", "backspace", "delete", "space", + "up", "down", "left", "right", "home", "end", + "page_up", "page_down", "f1", "f2", "f3", "f4", "f5", "f6", + "f7", "f8", "f9", "f10", "f11", "f12", "ctrl", "ctrl_l", + "ctrl_r", "alt", "alt_l", "alt_r", "shift", "shift_l", + "shift_r", "cmd", "insert", "print_screen", "caps_lock", + "num_lock", + ): + setattr(FakeKey, name, FakeKeyValue(name)) + + class FakeKeyCode: + def __init__(self, char=None, vk=None): + self.char = char + self.vk = vk + + @classmethod + def from_char(cls, char): + return cls(char=char) + + @classmethod + def from_vk(cls, vk): + return cls(vk=vk) + + def __hash__(self): + return hash(("keycode", self.char, self.vk)) + + def __eq__(self, other): + return ( + isinstance(other, FakeKeyCode) + and self.char == other.char + and self.vk == other.vk + ) + + class FakeController: + def press(self, *_args, **_kwargs): + return None + + def release(self, *_args, **_kwargs): + return None + + def click(self, *_args, **_kwargs): + return None + + class FakeButton: + left = "left" + right = "right" + + pynput = types.ModuleType("pynput") + mouse = types.ModuleType("pynput.mouse") + keyboard = types.ModuleType("pynput.keyboard") + mouse.Button = FakeButton + mouse.Controller = FakeController + keyboard.Controller = FakeController + keyboard.Key = FakeKey + keyboard.KeyCode = FakeKeyCode + pynput.mouse = mouse + pynput.keyboard = keyboard + sys.modules["pynput"] = pynput + sys.modules["pynput.mouse"] = mouse + sys.modules["pynput.keyboard"] = keyboard + + +_install_fake_pynput_if_missing() + from agent_v0.agent_v1.core.executor import ActionExecutorV1 # noqa: E402 @@ -184,6 +274,44 @@ class TestPostVerifyWindowTransition: expected_after="test – Bloc-notes", ) + def test_enrich_target_context_marks_transition_and_generic_button(self): + spec = ActionExecutorV1._enrich_target_context_from_action( + { + "expected_window_before": "*test – Bloc-notes", + "expected_window_title": "Enregistrer sous", + }, + { + "by_text": "Enregistrer", + "by_role": "button", + "window_title": "*test – Bloc-notes", + }, + ) + + hints = spec["context_hints"] + assert hints["requires_window_transition"] is True + assert hints["expected_window_before"] == "*test – Bloc-notes" + assert hints["expected_window_after"] == "Enregistrer sous" + assert hints["generic_button_text"] == "Enregistrer" + assert hints["button_expected_after_window"] == "Enregistrer sous" + + def test_enrich_target_context_keeps_same_window_non_transition(self): + spec = ActionExecutorV1._enrich_target_context_from_action( + { + "expected_window_before": "*test – Bloc-notes", + "expected_window_title": "test – Bloc-notes", + }, + { + "by_text": "test", + "by_role": "tab", + "window_title": "*test – Bloc-notes", + }, + ) + + hints = spec["context_hints"] + assert hints["expected_window_before"] == "*test – Bloc-notes" + assert hints["expected_window_after"] == "test – Bloc-notes" + assert "requires_window_transition" not in hints + # ========================================================================= # Routage de la garde dans verify_screen diff --git a/tests/unit/test_grounding_engine.py b/tests/unit/test_grounding_engine.py index 4c7d762d9..4cbd46549 100644 --- a/tests/unit/test_grounding_engine.py +++ b/tests/unit/test_grounding_engine.py @@ -44,3 +44,80 @@ def test_template_strategy_passes_fallback_coords_to_anchor_drift_guard(): fallback_x_pct=0.708594, fallback_y_pct=0.35, ) + + +def test_server_explicit_reject_skips_local_text_fallback(): + executor = MagicMock() + executor._server_resolve_target = MagicMock( + return_value={ + "resolved": False, + "method": "rejected_close_tab_zone_hybrid_text_direct", + "reason": "close_tab_out_of_recorded_zone", + "score": 0.8, + } + ) + executor._hybrid_vlm_resolve = MagicMock( + return_value={ + "resolved": True, + "x_pct": 0.1, + "y_pct": 0.13, + "method": "hybrid_text_direct", + "score": 0.9, + } + ) + + engine = GroundingEngine(executor) + engine._capture_window_or_screen = MagicMock(return_value="shot") + + result = engine.locate( + "http://server", + { + "by_text": "test", + "context_hints": {"interaction": "close_tab"}, + "screen_scope": "full_screen", + }, + fallback_x=0.7, + fallback_y=0.04, + screen_width=2560, + screen_height=1600, + ) + + assert result.found is False + executor._hybrid_vlm_resolve.assert_not_called() + + +def test_server_plain_not_found_allows_local_text_fallback(): + executor = MagicMock() + executor._server_resolve_target = MagicMock( + return_value={ + "resolved": False, + "method": "server_no_match", + "reason": "not_found", + "score": 0.0, + } + ) + executor._hybrid_vlm_resolve = MagicMock( + return_value={ + "resolved": True, + "x_pct": 0.45, + "y_pct": 0.5, + "method": "hybrid_text_direct", + "score": 0.9, + } + ) + + engine = GroundingEngine(executor) + engine._capture_window_or_screen = MagicMock(return_value="shot") + + result = engine.locate( + "http://server", + {"by_text": "Enregistrer", "screen_scope": "full_screen"}, + fallback_x=0.5, + fallback_y=0.5, + screen_width=1920, + screen_height=1080, + ) + + assert result.found is True + assert result.method == "hybrid_text_direct" + executor._hybrid_vlm_resolve.assert_called_once() diff --git a/tests/unit/test_keyboard_system_keys.py b/tests/unit/test_keyboard_system_keys.py new file mode 100644 index 000000000..28d715ae4 --- /dev/null +++ b/tests/unit/test_keyboard_system_keys.py @@ -0,0 +1,162 @@ +import importlib +import sys +import types + + +def _install_fake_pynput(monkeypatch): + class FakeKey: + def __init__(self, name): + self.name = name + + def __repr__(self): + return f"Key.{self.name}" + + for name in ( + "ctrl", + "ctrl_l", + "ctrl_r", + "alt", + "alt_l", + "alt_r", + "shift", + "shift_l", + "shift_r", + "cmd", + "cmd_l", + "cmd_r", + "esc", + "enter", + "tab", + "space", + "backspace", + ): + setattr(FakeKey, name, FakeKey(name)) + + class FakeKeyCode: + def __init__(self, char=None, vk=None): + self.char = char + self.vk = vk + + pynput = types.ModuleType("pynput") + mouse = types.ModuleType("pynput.mouse") + keyboard = types.ModuleType("pynput.keyboard") + + class FakeButton: + pass + + mouse.Button = FakeButton + mouse.Listener = object + keyboard.Key = FakeKey + keyboard.KeyCode = FakeKeyCode + keyboard.Listener = object + pynput.mouse = mouse + pynput.keyboard = keyboard + + monkeypatch.setitem(sys.modules, "pynput", pynput) + monkeypatch.setitem(sys.modules, "pynput.mouse", mouse) + monkeypatch.setitem(sys.modules, "pynput.keyboard", keyboard) + sys.modules.pop("agent_v0.agent_v1.core.captor", None) + return FakeKey, FakeKeyCode + + +def _load_captor(monkeypatch): + fake_key, fake_key_code = _install_fake_pynput(monkeypatch) + module = importlib.import_module("agent_v0.agent_v1.core.captor") + return module, fake_key, fake_key_code + + +def test_standalone_windows_key_is_emitted_on_release(monkeypatch): + captor_module, key, _key_code = _load_captor(monkeypatch) + events = [] + captor = captor_module.EventCaptorV1(events.append) + captor._inject_screen_metadata = lambda _event: None + + captor._on_press(key.cmd) + assert events == [] + + captor._on_release(key.cmd) + + assert [event["keys"] for event in events] == [["win"]] + assert [raw["action"] for raw in events[0]["raw_keys"]] == ["press", "release"] + assert "win" not in captor.modifiers + + +def test_windows_shortcut_cancels_standalone_windows_key(monkeypatch): + captor_module, key, key_code = _load_captor(monkeypatch) + events = [] + captor = captor_module.EventCaptorV1(events.append) + captor._inject_screen_metadata = lambda _event: None + + captor._on_press(key.cmd) + captor._on_press(key_code(char="s", vk=83)) + captor._on_release(key_code(char="s", vk=83)) + captor._on_release(key.cmd) + + assert [event["keys"] for event in events] == [["win", "s"]] + + +def test_release_only_windows_shortcut_is_inferred(monkeypatch): + captor_module, key, key_code = _load_captor(monkeypatch) + events = [] + captor = captor_module.EventCaptorV1(events.append) + captor._inject_screen_metadata = lambda _event: None + + # Windows/NoMachine can swallow press events for Win+S and only deliver + # release('s') then release('cmd'). + captor._on_release(key_code(char="s", vk=83)) + captor._on_release(key.cmd) + + assert [event["keys"] for event in events] == [["win", "s"]] + assert [raw["action"] for raw in events[0]["raw_keys"]] == ["release", "release"] + + +def test_escape_key_is_emitted_as_key_combo(monkeypatch): + captor_module, key, _key_code = _load_captor(monkeypatch) + events = [] + captor = captor_module.EventCaptorV1(events.append) + captor._inject_screen_metadata = lambda _event: None + + captor._on_press(key.esc) + + assert [event["keys"] for event in events] == [["escape"]] + + +def test_stream_processor_keeps_win_but_filters_other_modifiers(): + from agent_v0.server_v1.stream_processor import ( + _is_parasitic_event, + _needs_post_wait, + clean_compound_steps, + clean_enriched_actions, + ) + + assert _is_parasitic_event({"type": "key_combo", "keys": ["ctrl"]}) is True + assert _is_parasitic_event({"type": "key_combo", "keys": ["win"]}) is False + + assert clean_enriched_actions( + [ + {"type": "key_combo", "keys": ["ctrl"]}, + {"type": "key_combo", "keys": ["win"]}, + ] + ) == [{"type": "key_combo", "keys": ["win"]}] + + assert clean_compound_steps( + [ + {"type": "key_combo", "keys": ["shift"]}, + {"type": "key_combo", "keys": ["win"]}, + ] + ) == [{"type": "key_combo", "keys": ["win"]}] + + assert _needs_post_wait({"type": "key_combo", "keys": ["win"]}) >= 1500 + assert _needs_post_wait({"type": "key_combo", "keys": ["win", "s"]}) >= 1500 + assert _needs_post_wait({"type": "key_combo", "keys": ["escape"]}) >= 500 + + +def test_streamer_prioritizes_real_captor_event_types(): + from agent_v0.agent_v1.network.streamer import TraceStreamer + + streamer = TraceStreamer("sess_keyboard_priority") + + assert streamer._is_priority_item("event", {"type": "key_combo"}) is True + assert streamer._is_priority_item("event", {"type": "text_input"}) is True + assert streamer._is_priority_item("event", {"type": "mouse_click"}) is True + assert streamer._is_priority_item("event", {"type": "heartbeat"}) is False diff --git a/tests/unit/test_lea_message_contract.py b/tests/unit/test_lea_message_contract.py new file mode 100644 index 000000000..4ccab20be --- /dev/null +++ b/tests/unit/test_lea_message_contract.py @@ -0,0 +1,280 @@ +"""Tests du contrat de messages humains pour Lea.""" + +from __future__ import annotations + +import pytest + +from agent_v0.agent_v1.ui.message_contract import ( + MAX_FIELD_CHARS, + MessageContractError, + coerce_supervised_pause_message, + format_supervised_pause_from_mapping, + format_supervised_pause_message, + validate_supervised_pause_message, + validate_visible_message, + warn_visible_message, +) + + +def _valid_pause(**overrides: str) -> str: + fields = { + "intention": "ouvrir le dossier patient dans Aiva Urgence", + "attendu": "voir la fiche du patient ouverte avec la liste des passages", + "vu": "la page d'accueil Aiva Urgence sans le dossier patient", + "demande": "ouvrir le dossier patient puis me rendre la main", + } + fields.update(overrides) + return format_supervised_pause_message(**fields) + + +def _raw_pause(**overrides: str) -> str: + fields = { + "intention": "ouvrir le dossier patient dans Aiva Urgence", + "attendu": "voir la fiche du patient ouverte avec la liste des passages", + "vu": "la page d'accueil Aiva Urgence sans le dossier patient", + "demande": "ouvrir le dossier patient puis me rendre la main", + } + fields.update(overrides) + return "\n".join( + [ + f"J'essaie de : {fields['intention']}", + f"J'attendais : {fields['attendu']}", + f"Je vois : {fields['vu']}", + f"Peux-tu : {fields['demande']}", + ] + ) + + +def _issue_codes(message: str) -> set[str]: + return {issue.code for issue in validate_supervised_pause_message(message).issues} + + +def test_format_supervised_pause_has_exact_four_field_structure(): + message = _valid_pause() + + assert message.splitlines() == [ + "J'essaie de : ouvrir le dossier patient dans Aiva Urgence", + "J'attendais : voir la fiche du patient ouverte avec la liste des passages", + "Je vois : la page d'accueil Aiva Urgence sans le dossier patient", + "Peux-tu : ouvrir le dossier patient puis me rendre la main", + ] + assert validate_supervised_pause_message(message).valid + + +def test_format_from_mapping_accepts_runtime_aliases(): + message = format_supervised_pause_from_mapping( + { + "trying_to": "selectionner le passage aux urgences", + "expected": "voir le formulaire de codage du passage", + "observed": "la liste des passages reste affichee", + "request": "selectionner le bon passage puis me rendre la main", + } + ) + + assert "J'essaie de : selectionner le passage aux urgences" in message + assert validate_supervised_pause_message(message).valid + + +@pytest.mark.parametrize( + "bad_phrase", + [ + "un element", + "un élément", + "cette action", + "Validation requise", + "cible inconnue", + ], +) +def test_blacklist_refuses_generic_formulations(bad_phrase): + message = _raw_pause(vu=f"je vois {bad_phrase}") + + result = validate_supervised_pause_message(message) + + assert not result.valid + assert "generic_phrase" in {issue.code for issue in result.issues} + + +@pytest.mark.parametrize( + "technical_text", + [ + "action_click_12ab34", + "replay_9f8e7d6c", + "session_id", + "target_spec.by_text", + "550e8400-e29b-41d4-a716-446655440000", + "a3f6c9d8e1b24567", + ], +) +def test_refuses_raw_technical_identifiers(technical_text): + message = _raw_pause(attendu=f"voir le dossier patient apres {technical_text}") + + assert "technical_identifier" in _issue_codes(message) or "technical_field" in _issue_codes(message) + + +@pytest.mark.parametrize( + "technical_text", + [ + "(123, 456)", + "x=120 y=340", + "340px", + "score=0.87", + "confidence=0.91", + "similarité=0.42", + ], +) +def test_refuses_pixels_and_raw_scores(technical_text): + message = _raw_pause(vu=f"la page Aiva avec {technical_text}") + + codes = _issue_codes(message) + + assert "raw_coordinates" in codes or "raw_score" in codes + + +@pytest.mark.parametrize( + "technical_english", + [ + "target_not_found", + "no_screen_change", + "wrong_window", + "validation required", + "retry", + "screenshot", + ], +) +def test_refuses_technical_english(technical_english): + message = _raw_pause(vu=f"le message {technical_english} est affiche") + + assert "technical_english" in _issue_codes(message) + + +def test_refuses_raw_english_instruction(): + message = _raw_pause(demande="please click the target button") + + codes = _issue_codes(message) + + assert "technical_english" in codes + assert "not_actionable" in codes + + +def test_refuses_messages_without_four_required_lines(): + result = validate_supervised_pause_message("Je ne trouve pas le dossier patient.") + + assert not result.valid + assert "invalid_structure" in {issue.code for issue in result.issues} + + +def test_refuses_wrong_label_order(): + message = "\n".join( + [ + "J'attendais : voir la fiche patient", + "J'essaie de : ouvrir le dossier patient", + "Je vois : la page d'accueil", + "Peux-tu : ouvrir le dossier puis me rendre la main", + ] + ) + + assert "invalid_structure" in _issue_codes(message) + + +def test_demande_must_be_actionable_in_french(): + message = "\n".join( + [ + "J'essaie de : ouvrir le dossier patient", + "J'attendais : voir la fiche patient ouverte", + "Je vois : la page d'accueil Aiva Urgence", + "Peux-tu : merci beaucoup", + ] + ) + + assert "not_actionable" in _issue_codes(message) + + +def test_visible_message_validator_accepts_clear_french_actionable_text(): + message = ( + "Je ne trouve pas le dossier patient dans Aiva Urgence. " + "Peux-tu ouvrir le dossier puis me rendre la main ?" + ) + + assert validate_visible_message(message).valid + + +def test_formatter_raises_instead_of_emitting_generic_message(): + with pytest.raises(MessageContractError): + format_supervised_pause_message( + intention="faire cette action", + attendu="validation requise", + vu="un element", + demande="corriger", + ) + + +def test_formatter_raises_on_too_short_request(): + with pytest.raises(MessageContractError): + format_supervised_pause_message( + intention="ouvrir le dossier patient dans Aiva Urgence", + attendu="voir la fiche du patient ouverte", + vu="la page d'accueil Aiva Urgence", + demande="corriger", + ) + + +def test_coerce_turns_legacy_validation_required_into_structured_pause(): + message = coerce_supervised_pause_message("Validation requise") + + assert validate_supervised_pause_message(message).valid + assert "Validation requise" not in message + assert message.splitlines()[0].startswith("J'essaie de :") + + +def test_coerce_keeps_clear_legacy_request_as_demande(): + message = coerce_supervised_pause_message( + "Valider le dossier patient avant enregistrement", + intention="enregistrer le dossier patient", + attendu="avoir ton accord avant l'enregistrement", + vu="le formulaire patient est pret a etre enregistre", + ) + + assert validate_supervised_pause_message(message).valid + assert "Valider le dossier patient avant enregistrement" in message + + +def test_warn_visible_message_logs_without_modifying_message(caplog): + raw = "Validation requise" + + returned = warn_visible_message(raw, source="unit.raw") + + assert returned == raw + assert "invalid_message source=unit.raw" in caplog.text + assert "generic_phrase" in caplog.text + + +def test_warn_visible_message_accepts_supervised_pause_without_log(caplog): + message = _valid_pause() + + returned = warn_visible_message( + message, + source="unit.final", + supervised_pause=True, + ) + + assert returned == message + assert "invalid_message" not in caplog.text + + +def test_refuses_overlong_fields_and_messages(): + long_field = "ouvrir " + ("le dossier patient " * 45) + assert len(long_field) > MAX_FIELD_CHARS + + message = "\n".join( + [ + f"J'essaie de : {long_field}", + "J'attendais : voir la fiche patient ouverte", + "Je vois : la page d'accueil Aiva Urgence", + "Peux-tu : ouvrir le dossier patient puis me rendre la main", + ] + ) + + codes = _issue_codes(message) + + assert "field_too_long" in codes + assert "message_too_long" in codes diff --git a/tests/unit/test_lea_micro_preflight.py b/tests/unit/test_lea_micro_preflight.py new file mode 100644 index 000000000..0e74ba706 --- /dev/null +++ b/tests/unit/test_lea_micro_preflight.py @@ -0,0 +1,109 @@ +import json +import sys +from pathlib import Path + + +ROOT = Path(__file__).parent.parent.parent +sys.path.insert(0, str(ROOT)) + +from tools import lea_micro_preflight as preflight + + +FREE_OUTPUT = """\ + total used free shared buff/cache available +Mem: 64202 15500 32000 123 16702 47000 +Swap: 8192 1024 7168 +""" + + +def test_parse_free_m_extracts_ram_and_swap(): + parsed = preflight.parse_free_m(FREE_OUTPUT) + + assert parsed["mem"]["total"] == 64202 + assert parsed["mem"]["available"] == 47000 + assert parsed["swap"] == {"total": 8192, "used": 1024, "free": 7168} + + +def test_parse_free_m_accepts_french_locale_labels(): + output = """\ + total utilisé libre partagé tamp/cache disponible +Mem: 126365 60425 2919 12847 77071 65939 +Échange: 8191 3397 4794 +""" + + parsed = preflight.parse_free_m(output) + + assert parsed["mem"]["used"] == 60425 + assert parsed["mem"]["available"] == 65939 + assert parsed["swap"] == {"total": 8191, "used": 3397, "free": 4794} + + +def test_parse_nvidia_smi_memory_multiple_gpus(): + parsed = preflight.parse_nvidia_smi_memory("8123, 24576\n3999 MiB, 12288 MiB\n") + + assert parsed == [ + {"free_mib": 8123, "total_mib": 24576}, + {"free_mib": 3999, "total_mib": 12288}, + ] + + +def test_extract_ollama_tags_accepts_name_and_model_keys(): + tags = preflight.extract_ollama_tags( + { + "models": [ + {"name": "qwen2.5vl:7b-rpa"}, + {"model": "qwen2.5:7b"}, + {"name": ""}, + "ignored", + ] + } + ) + + assert tags == {"qwen2.5vl:7b-rpa", "qwen2.5:7b"} + + +def _install_fakes(monkeypatch, *, resident=True, tags_ok=True, swap_used=1024): + free_output = FREE_OUTPUT.replace("1024", str(swap_used), 1) + + def fake_run_command(args, timeout=5.0): + if args[0] == "nvidia-smi": + return 0, "8123, 24576", "" + if args[0] == "free": + return 0, free_output, "" + raise AssertionError(f"unexpected command: {args!r}") + + def fake_http_json(url, timeout=2.0): + if url.endswith("/api/tags"): + models = [{"name": "qwen2.5vl:7b-rpa"}] + if tags_ok: + models.append({"name": "qwen2.5:7b"}) + return True, {"models": models}, "" + if url.endswith("/api/ps"): + models = [{"name": "qwen2.5vl:7b-rpa"}] if resident else [] + return True, {"models": models}, "" + raise AssertionError(f"unexpected url: {url!r}") + + monkeypatch.setattr(preflight, "run_command", fake_run_command) + monkeypatch.setattr(preflight, "http_json", fake_http_json) + + +def test_main_returns_zero_when_all_checks_ok(monkeypatch, capsys): + _install_fakes(monkeypatch) + + assert preflight.main(["--json"]) == 0 + report = json.loads(capsys.readouterr().out) + assert report["overall"] == "ok" + assert report["warmup"] == "disabled" + + +def test_main_warns_when_vlm_not_resident_and_strict_exits_one(monkeypatch): + _install_fakes(monkeypatch, resident=False) + + assert preflight.main([]) == 0 + assert preflight.main(["--strict"]) == 1 + + +def test_main_fails_when_required_model_missing(monkeypatch): + _install_fakes(monkeypatch, tags_ok=False) + + assert preflight.main([]) == 2 diff --git a/tests/unit/test_lea_notifications.py b/tests/unit/test_lea_notifications.py index 696100699..9b0c4b9f1 100644 --- a/tests/unit/test_lea_notifications.py +++ b/tests/unit/test_lea_notifications.py @@ -88,9 +88,9 @@ class TestExtraction: assert _nettoyer_description_cible(None) == "" def test_nettoyer_description_tronque(self): - longue = "x" * 200 + longue = "x" * 1100 resultat = _nettoyer_description_cible(longue) - assert len(resultat) <= 80 + assert len(resultat) <= 1024 assert resultat.endswith("...") @@ -345,9 +345,10 @@ class TestFormatterErreurGenerique: assert msg.niveau == NiveauMessage.ATTENTION def test_message_inconnu_tronque(self): - long_msg = "erreur très longue " * 20 + long_msg = "erreur très longue " * 80 msg = formatter_erreur_generique(long_msg) - assert len(msg.corps) <= 200 # tronqué avec "..." + assert len(msg.corps) <= len("J'ai rencontré un souci : ") + 1024 + assert msg.corps.endswith("...") def test_pas_de_code_technique_dans_message_utilisateur(self): """Les messages présentés à l'utilisateur ne doivent pas contenir de