feat: capture Windows temps réel via mini serveur HTTP (port 5006)

- CaptureServer : serveur HTTP daemon sur l'agent Windows
- Capture fraîche mss en ~94ms à chaque requête
- Plus de lecture de vieux heartbeats sur disque
- Fallback capture locale si agent indisponible
- Firewall Windows port 5006 configuré

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dom
2026-03-18 11:20:57 +01:00
parent ae65be2555
commit d4871249ea
4 changed files with 172 additions and 62 deletions

View File

@@ -21,6 +21,7 @@ from .network.streamer import TraceStreamer
from .ui.shared_state import AgentState from .ui.shared_state import AgentState
from .ui.smart_tray import SmartTrayV1 from .ui.smart_tray import SmartTrayV1
from .ui.chat_window import ChatWindow from .ui.chat_window import ChatWindow
from .ui.capture_server import CaptureServer
from .session.storage import SessionStorage from .session.storage import SessionStorage
from .vision.capturer import VisionCapturer from .vision.capturer import VisionCapturer
@@ -97,6 +98,10 @@ class AgentV1:
threading.Thread(target=self._replay_poll_loop, daemon=True).start() threading.Thread(target=self._replay_poll_loop, daemon=True).start()
threading.Thread(target=self._background_heartbeat_loop, daemon=True).start() threading.Thread(target=self._background_heartbeat_loop, daemon=True).start()
# Mini-serveur HTTP pour captures a la demande (port 5006)
self._capture_server = CaptureServer()
self._capture_server.start()
# UI Tray intelligent (remplace TrayAppV1, plus de PyQt5) # UI Tray intelligent (remplace TrayAppV1, plus de PyQt5)
self.ui = SmartTrayV1( self.ui = SmartTrayV1(
self.start_session, self.start_session,

View File

@@ -0,0 +1,123 @@
"""
Mini serveur HTTP sur l'agent Windows pour les captures d'ecran a la demande.
Ecoute sur le port 5006 (configurable via RPA_CAPTURE_PORT).
Endpoints :
GET /capture -> screenshot frais en base64 (JPEG)
GET /health -> {"status": "ok"}
"""
import threading
import logging
import json
import base64
import io
import os
import time
from http.server import HTTPServer, BaseHTTPRequestHandler
logger = logging.getLogger(__name__)
CAPTURE_PORT = int(os.environ.get("RPA_CAPTURE_PORT", "5006"))
class CaptureHandler(BaseHTTPRequestHandler):
"""Retourne un screenshot frais a chaque requete GET /capture."""
def do_GET(self):
if self.path == "/capture":
self._handle_capture()
elif self.path == "/health":
self._send_json(200, {"status": "ok"})
else:
self._send_json(404, {"error": "not found"})
def do_OPTIONS(self):
"""Gestion CORS preflight."""
self.send_response(200)
self._cors_headers()
self.send_header("Content-Length", "0")
self.end_headers()
# ------------------------------------------------------------------
def _handle_capture(self):
"""Capture l'ecran principal et le renvoie en base64 JPEG."""
t0 = time.perf_counter()
try:
import mss
from PIL import Image
with mss.mss() as sct:
monitor = sct.monitors[1] # ecran principal
raw = sct.grab(monitor)
img = Image.frombytes("RGB", raw.size, raw.bgra, "raw", "BGRX")
buf = io.BytesIO()
img.save(buf, format="JPEG", quality=80)
img_b64 = base64.b64encode(buf.getvalue()).decode()
elapsed_ms = (time.perf_counter() - t0) * 1000
logger.info(f"Capture {img.width}x{img.height} en {elapsed_ms:.0f}ms")
self._send_json(200, {
"image": img_b64,
"width": img.width,
"height": img.height,
"format": "jpeg",
"source": "windows_live",
"capture_ms": round(elapsed_ms),
})
except Exception as e:
logger.error(f"Erreur capture : {e}")
self._send_json(500, {"error": str(e)})
# ------------------------------------------------------------------
def _send_json(self, code: int, data: dict):
body = json.dumps(data).encode()
self.send_response(code)
self.send_header("Content-Type", "application/json")
self._cors_headers()
self.send_header("Content-Length", str(len(body)))
self.end_headers()
self.wfile.write(body)
def _cors_headers(self):
self.send_header("Access-Control-Allow-Origin", "*")
self.send_header("Access-Control-Allow-Methods", "GET, OPTIONS")
self.send_header("Access-Control-Allow-Headers", "Content-Type")
def log_message(self, format, *args):
"""Supprime les logs HTTP par defaut (trop verbeux)."""
pass
class CaptureServer:
"""Serveur de capture d'ecran en temps reel (thread daemon)."""
def __init__(self, port: int = CAPTURE_PORT):
self._port = port
self._server: HTTPServer | None = None
self._thread: threading.Thread | None = None
def start(self):
"""Demarre le serveur dans un thread daemon."""
try:
self._server = HTTPServer(("0.0.0.0", self._port), CaptureHandler)
self._thread = threading.Thread(
target=self._server.serve_forever, daemon=True
)
self._thread.start()
logger.info(f"Capture server demarre sur le port {self._port}")
print(f"[CAPTURE] Serveur de capture demarre sur le port {self._port}")
except Exception as e:
logger.error(f"Impossible de demarrer le capture server : {e}")
print(f"[CAPTURE] ERREUR demarrage : {e}")
def stop(self):
"""Arrete le serveur proprement."""
if self._server:
self._server.shutdown()
logger.info("Capture server arrete")

View File

@@ -124,57 +124,36 @@ def capture_screen():
@cross_origin() @cross_origin()
def capture_windows(): def capture_windows():
""" """
Récupère le dernier screenshot du PC Windows (via streaming server). Capture l'ecran Windows EN TEMPS REEL via le mini-serveur de l'agent.
Le client Agent V1 envoie des heartbeats toutes les 5s. L'agent V1 expose un serveur HTTP sur le port 5006 qui retourne
On récupère le plus récent comme capture. un screenshot frais a chaque requete (< 100ms).
Plus besoin de lire de vieux fichiers heartbeat sur disque.
""" """
import glob import requests as http_client
from pathlib import Path
# Remonter jusqu'à la racine du projet (rpa_vision_v3/) agent_host = os.environ.get('RPA_WINDOWS_AGENT_HOST', '192.168.1.11')
project_root = Path(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) agent_port = int(os.environ.get('RPA_WINDOWS_AGENT_PORT', '5006'))
live_dir = project_root / "data" / "training" / "live_sessions" agent_url = f'http://{agent_host}:{agent_port}/capture'
# Chercher aussi dans les sous-dossiers machine (multi-machine)
import time as _time
# Chercher le screenshot le plus récent dans TOUS les dossiers
all_shots = []
for pattern in ["bg_*/shots/heartbeat_*.png", "sess_*/shots/heartbeat_*.png",
"*/bg_*/shots/heartbeat_*.png", "bg_*/shots/shot_*_full.png",
"sess_*/shots/shot_*_full.png"]:
all_shots.extend(live_dir.glob(pattern))
if not all_shots:
return jsonify({'error': 'Aucun screenshot Windows. Lancez l\'agent V1 sur le PC cible.'}), 404
# Trier par date de modification (le plus récent en premier)
all_shots.sort(key=lambda p: p.stat().st_mtime, reverse=True)
latest_shot = all_shots[0]
age_seconds = _time.time() - latest_shot.stat().st_mtime
if not latest_shot:
return jsonify({'error': 'Aucun screenshot Windows disponible'}), 404
try: try:
from PIL import Image resp = http_client.get(agent_url, timeout=10)
img = Image.open(latest_shot) if resp.ok:
buf = io.BytesIO() return jsonify(resp.json())
img.save(buf, format='PNG')
img_base64 = base64.b64encode(buf.getvalue()).decode('utf-8')
return jsonify({ return jsonify({
'image': img_base64, 'error': f'Agent a repondu {resp.status_code}',
'width': img.width, 'detail': resp.text[:500],
'height': img.height, }), 503
'format': 'png', except http_client.ConnectionError:
'source': 'windows', return jsonify({
'file': str(latest_shot.name), 'error': f'Agent Windows non connecte ({agent_host}:{agent_port})',
'session': latest_shot.parent.parent.name, 'hint': 'Verifiez que l\'agent V1 est lance sur le PC Windows '
'age_seconds': round(age_seconds, 1), 'et que le port 5006 est accessible.',
'fresh': age_seconds < 30, }), 503
}) except http_client.Timeout:
return jsonify({
'error': 'Timeout — l\'agent n\'a pas repondu dans les 10s',
}), 504
except Exception as e: except Exception as e:
return jsonify({'error': str(e)}), 500 return jsonify({'error': str(e)}), 500

View File

@@ -111,25 +111,28 @@ export default function CapturePanel({
} }
}; };
// Capture intelligente — auto-détection OS // Capture intelligente — tente d'abord l'agent Windows distant,
// puis fallback sur la capture locale si l'agent est injoignable.
const doSmartCapture = async () => { const doSmartCapture = async () => {
const isWindows = navigator.platform?.includes('Win') || navigator.userAgent?.includes('Windows'); try {
if (isWindows) { const resp = await fetch('/api/screen-capture/capture-windows', { method: 'POST' });
try { const data = await resp.json();
const resp = await fetch('/api/screen-capture/capture-windows', { method: 'POST' }); if (resp.ok && data.image) {
const data = await resp.json(); setCurrentCapture({
if (data.image) { screenshot_base64: data.image,
setCurrentCapture({ width: data.width,
screenshot_base64: data.image, height: data.height,
width: data.width, source: data.source || 'windows',
height: data.height, } as any);
source: 'windows', return;
} as any); }
} // Agent indisponible — fallback capture locale
} catch {} console.warn('Agent Windows indisponible, fallback local:', data.error);
} else { } catch (err) {
onCapture(); console.warn('Erreur capture Windows, fallback local:', err);
} }
// Fallback : capture locale (ecran du serveur Linux)
onCapture();
}; };
const handleTimerCapture = () => { const handleTimerCapture = () => {