diff --git a/agent_v0/agent_v1/main.py b/agent_v0/agent_v1/main.py index fa1798c17..4dd136602 100644 --- a/agent_v0/agent_v1/main.py +++ b/agent_v0/agent_v1/main.py @@ -21,6 +21,7 @@ from .network.streamer import TraceStreamer from .ui.shared_state import AgentState from .ui.smart_tray import SmartTrayV1 from .ui.chat_window import ChatWindow +from .ui.capture_server import CaptureServer from .session.storage import SessionStorage from .vision.capturer import VisionCapturer @@ -97,6 +98,10 @@ class AgentV1: threading.Thread(target=self._replay_poll_loop, daemon=True).start() threading.Thread(target=self._background_heartbeat_loop, daemon=True).start() + # Mini-serveur HTTP pour captures a la demande (port 5006) + self._capture_server = CaptureServer() + self._capture_server.start() + # UI Tray intelligent (remplace TrayAppV1, plus de PyQt5) self.ui = SmartTrayV1( self.start_session, diff --git a/agent_v0/agent_v1/ui/capture_server.py b/agent_v0/agent_v1/ui/capture_server.py new file mode 100644 index 000000000..8220597e9 --- /dev/null +++ b/agent_v0/agent_v1/ui/capture_server.py @@ -0,0 +1,123 @@ +""" +Mini serveur HTTP sur l'agent Windows pour les captures d'ecran a la demande. + +Ecoute sur le port 5006 (configurable via RPA_CAPTURE_PORT). +Endpoints : + GET /capture -> screenshot frais en base64 (JPEG) + GET /health -> {"status": "ok"} +""" +import threading +import logging +import json +import base64 +import io +import os +import time +from http.server import HTTPServer, BaseHTTPRequestHandler + +logger = logging.getLogger(__name__) + +CAPTURE_PORT = int(os.environ.get("RPA_CAPTURE_PORT", "5006")) + + +class CaptureHandler(BaseHTTPRequestHandler): + """Retourne un screenshot frais a chaque requete GET /capture.""" + + def do_GET(self): + if self.path == "/capture": + self._handle_capture() + elif self.path == "/health": + self._send_json(200, {"status": "ok"}) + else: + self._send_json(404, {"error": "not found"}) + + def do_OPTIONS(self): + """Gestion CORS preflight.""" + self.send_response(200) + self._cors_headers() + self.send_header("Content-Length", "0") + self.end_headers() + + # ------------------------------------------------------------------ + + def _handle_capture(self): + """Capture l'ecran principal et le renvoie en base64 JPEG.""" + t0 = time.perf_counter() + try: + import mss + from PIL import Image + + with mss.mss() as sct: + monitor = sct.monitors[1] # ecran principal + raw = sct.grab(monitor) + + img = Image.frombytes("RGB", raw.size, raw.bgra, "raw", "BGRX") + + buf = io.BytesIO() + img.save(buf, format="JPEG", quality=80) + img_b64 = base64.b64encode(buf.getvalue()).decode() + + elapsed_ms = (time.perf_counter() - t0) * 1000 + logger.info(f"Capture {img.width}x{img.height} en {elapsed_ms:.0f}ms") + + self._send_json(200, { + "image": img_b64, + "width": img.width, + "height": img.height, + "format": "jpeg", + "source": "windows_live", + "capture_ms": round(elapsed_ms), + }) + + except Exception as e: + logger.error(f"Erreur capture : {e}") + self._send_json(500, {"error": str(e)}) + + # ------------------------------------------------------------------ + + def _send_json(self, code: int, data: dict): + body = json.dumps(data).encode() + self.send_response(code) + self.send_header("Content-Type", "application/json") + self._cors_headers() + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def _cors_headers(self): + self.send_header("Access-Control-Allow-Origin", "*") + self.send_header("Access-Control-Allow-Methods", "GET, OPTIONS") + self.send_header("Access-Control-Allow-Headers", "Content-Type") + + def log_message(self, format, *args): + """Supprime les logs HTTP par defaut (trop verbeux).""" + pass + + +class CaptureServer: + """Serveur de capture d'ecran en temps reel (thread daemon).""" + + def __init__(self, port: int = CAPTURE_PORT): + self._port = port + self._server: HTTPServer | None = None + self._thread: threading.Thread | None = None + + def start(self): + """Demarre le serveur dans un thread daemon.""" + try: + self._server = HTTPServer(("0.0.0.0", self._port), CaptureHandler) + self._thread = threading.Thread( + target=self._server.serve_forever, daemon=True + ) + self._thread.start() + logger.info(f"Capture server demarre sur le port {self._port}") + print(f"[CAPTURE] Serveur de capture demarre sur le port {self._port}") + except Exception as e: + logger.error(f"Impossible de demarrer le capture server : {e}") + print(f"[CAPTURE] ERREUR demarrage : {e}") + + def stop(self): + """Arrete le serveur proprement.""" + if self._server: + self._server.shutdown() + logger.info("Capture server arrete") diff --git a/visual_workflow_builder/backend/api/screen_capture.py b/visual_workflow_builder/backend/api/screen_capture.py index 780be3b16..72b50d661 100644 --- a/visual_workflow_builder/backend/api/screen_capture.py +++ b/visual_workflow_builder/backend/api/screen_capture.py @@ -124,57 +124,36 @@ def capture_screen(): @cross_origin() def capture_windows(): """ - Récupère le dernier screenshot du PC Windows (via streaming server). + Capture l'ecran Windows EN TEMPS REEL via le mini-serveur de l'agent. - Le client Agent V1 envoie des heartbeats toutes les 5s. - On récupère le plus récent comme capture. + L'agent V1 expose un serveur HTTP sur le port 5006 qui retourne + un screenshot frais a chaque requete (< 100ms). + Plus besoin de lire de vieux fichiers heartbeat sur disque. """ - import glob - from pathlib import Path + import requests as http_client - # Remonter jusqu'à la racine du projet (rpa_vision_v3/) - project_root = Path(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) - live_dir = project_root / "data" / "training" / "live_sessions" - - # Chercher aussi dans les sous-dossiers machine (multi-machine) - import time as _time - - # Chercher le screenshot le plus récent dans TOUS les dossiers - all_shots = [] - for pattern in ["bg_*/shots/heartbeat_*.png", "sess_*/shots/heartbeat_*.png", - "*/bg_*/shots/heartbeat_*.png", "bg_*/shots/shot_*_full.png", - "sess_*/shots/shot_*_full.png"]: - all_shots.extend(live_dir.glob(pattern)) - - if not all_shots: - return jsonify({'error': 'Aucun screenshot Windows. Lancez l\'agent V1 sur le PC cible.'}), 404 - - # Trier par date de modification (le plus récent en premier) - all_shots.sort(key=lambda p: p.stat().st_mtime, reverse=True) - latest_shot = all_shots[0] - age_seconds = _time.time() - latest_shot.stat().st_mtime - - if not latest_shot: - return jsonify({'error': 'Aucun screenshot Windows disponible'}), 404 + agent_host = os.environ.get('RPA_WINDOWS_AGENT_HOST', '192.168.1.11') + agent_port = int(os.environ.get('RPA_WINDOWS_AGENT_PORT', '5006')) + agent_url = f'http://{agent_host}:{agent_port}/capture' try: - from PIL import Image - img = Image.open(latest_shot) - buf = io.BytesIO() - img.save(buf, format='PNG') - img_base64 = base64.b64encode(buf.getvalue()).decode('utf-8') - + resp = http_client.get(agent_url, timeout=10) + if resp.ok: + return jsonify(resp.json()) return jsonify({ - 'image': img_base64, - 'width': img.width, - 'height': img.height, - 'format': 'png', - 'source': 'windows', - 'file': str(latest_shot.name), - 'session': latest_shot.parent.parent.name, - 'age_seconds': round(age_seconds, 1), - 'fresh': age_seconds < 30, - }) + 'error': f'Agent a repondu {resp.status_code}', + 'detail': resp.text[:500], + }), 503 + except http_client.ConnectionError: + return jsonify({ + 'error': f'Agent Windows non connecte ({agent_host}:{agent_port})', + 'hint': 'Verifiez que l\'agent V1 est lance sur le PC Windows ' + 'et que le port 5006 est accessible.', + }), 503 + except http_client.Timeout: + return jsonify({ + 'error': 'Timeout — l\'agent n\'a pas repondu dans les 10s', + }), 504 except Exception as e: return jsonify({'error': str(e)}), 500 diff --git a/visual_workflow_builder/frontend_v4/src/components/CapturePanel.tsx b/visual_workflow_builder/frontend_v4/src/components/CapturePanel.tsx index a5296a5ba..6a9252ef9 100644 --- a/visual_workflow_builder/frontend_v4/src/components/CapturePanel.tsx +++ b/visual_workflow_builder/frontend_v4/src/components/CapturePanel.tsx @@ -111,25 +111,28 @@ export default function CapturePanel({ } }; - // Capture intelligente — auto-détection OS + // Capture intelligente — tente d'abord l'agent Windows distant, + // puis fallback sur la capture locale si l'agent est injoignable. const doSmartCapture = async () => { - const isWindows = navigator.platform?.includes('Win') || navigator.userAgent?.includes('Windows'); - if (isWindows) { - try { - const resp = await fetch('/api/screen-capture/capture-windows', { method: 'POST' }); - const data = await resp.json(); - if (data.image) { - setCurrentCapture({ - screenshot_base64: data.image, - width: data.width, - height: data.height, - source: 'windows', - } as any); - } - } catch {} - } else { - onCapture(); + try { + const resp = await fetch('/api/screen-capture/capture-windows', { method: 'POST' }); + const data = await resp.json(); + if (resp.ok && data.image) { + setCurrentCapture({ + screenshot_base64: data.image, + width: data.width, + height: data.height, + source: data.source || 'windows', + } as any); + return; + } + // Agent indisponible — fallback capture locale + console.warn('Agent Windows indisponible, fallback local:', data.error); + } catch (err) { + console.warn('Erreur capture Windows, fallback local:', err); } + // Fallback : capture locale (ecran du serveur Linux) + onCapture(); }; const handleTimerCapture = () => {