feat: replay visuel Windows opérationnel — template matching + VWB complet

- Bouton "Windows" dans VWB pour exécuter sur le PC distant
- Template matching OpenCV multi-scale pour localiser les ancres visuelles
- Proxy VWB→streaming server avec chargement ancre (thumb, pas full)
- Fix executor Windows : mss lazy, result reporting, debug prints
- Fix poll replay permanent (sans session active)
- Mapping types VWB→executor (click_anchor→click, type_text→type)
- CORS streaming server, capture Windows dans VWB
- Dédup heartbeats côté client (hash perceptuel)
- Mode cloud VLM configurable via RPA_VLM_MODEL
- Fix resolve_target : pas de ScreenAnalyzer fallback (trop lent)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dom
2026-03-17 18:56:44 +01:00
parent dd149c1cbb
commit 371db69543
7 changed files with 361 additions and 15 deletions

View File

@@ -120,6 +120,60 @@ def capture_screen():
}), 500
@screen_capture_bp.route('/capture-windows', methods=['POST'])
@cross_origin()
def capture_windows():
"""
Récupère le dernier screenshot du PC Windows (via streaming server).
Le client Agent V1 envoie des heartbeats toutes les 5s.
On récupère le plus récent comme capture.
"""
import glob
from pathlib import Path
# Remonter jusqu'à la racine du projet (rpa_vision_v3/)
project_root = Path(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
live_dir = project_root / "data" / "training" / "live_sessions"
# Trouver la session la plus récente
sessions = sorted(live_dir.glob("sess_*/shots"), key=lambda p: p.parent.name, reverse=True)
if not sessions:
return jsonify({'error': 'Aucune session Windows trouvée'}), 404
# Chercher le screenshot plein écran le plus récent (full ou heartbeat, pas les crops)
latest_shot = None
for session_shots in sessions[:3]:
shots = [s for s in session_shots.glob("*.png")
if "full" in s.name or "heartbeat" in s.name or "focus" in s.name]
if shots:
shots.sort(key=lambda p: p.stat().st_mtime, reverse=True)
latest_shot = shots[0]
break
if not latest_shot:
return jsonify({'error': 'Aucun screenshot Windows disponible'}), 404
try:
from PIL import Image
img = Image.open(latest_shot)
buf = io.BytesIO()
img.save(buf, format='PNG')
img_base64 = base64.b64encode(buf.getvalue()).decode('utf-8')
return jsonify({
'image': img_base64,
'width': img.width,
'height': img.height,
'format': 'png',
'source': 'windows',
'file': str(latest_shot.name),
'session': latest_shot.parent.parent.name,
})
except Exception as e:
return jsonify({'error': str(e)}), 500
@screen_capture_bp.route('/detect-elements', methods=['POST'])
@cross_origin()
def detect_elements():