Suppression du .git embarqué dans agent_v0/ — le code est maintenant tracké normalement dans le repo principal. Inclut : agent_v1 (client), server_v1 (streaming), lea_ui (chat client) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
173 lines
6.6 KiB
Python
173 lines
6.6 KiB
Python
# agent_v0/server_v1/worker_stream.py
|
|
"""
|
|
Worker de Streaming Temps Réel — délègue au StreamProcessor (core pipeline).
|
|
|
|
Surveille les sessions live, analyse screenshots et crops via ScreenAnalyzer + CLIP,
|
|
et met à jour le graphe d'intention en temps réel.
|
|
|
|
Tous les calculs GPU tournent sur le serveur (RTX 5070).
|
|
"""
|
|
|
|
import logging
|
|
import threading
|
|
import time
|
|
from pathlib import Path
|
|
from typing import Set
|
|
|
|
from .stream_processor import StreamProcessor
|
|
|
|
logger = logging.getLogger("worker_stream")
|
|
|
|
|
|
class StreamWorker:
|
|
"""
|
|
Worker qui surveille les sessions live et délègue au StreamProcessor.
|
|
|
|
Deux modes de fonctionnement :
|
|
- Polling (start) : boucle qui surveille le dossier live_sessions
|
|
- Direct (process_*) : appelé directement par l'API pour traitement immédiat
|
|
"""
|
|
|
|
def __init__(self, live_dir: str = "data/training/live_sessions", processor: StreamProcessor = None):
|
|
self.live_dir = Path(live_dir)
|
|
self.live_dir.mkdir(parents=True, exist_ok=True)
|
|
self.running = False
|
|
self.processed_files: Set[str] = set()
|
|
|
|
# StreamProcessor partagé (créé si non fourni)
|
|
self.processor = processor or StreamProcessor(data_dir=str(self.live_dir))
|
|
|
|
self._thread: threading.Thread = None
|
|
|
|
def start(self, blocking: bool = True):
|
|
"""Démarrer le worker en mode polling."""
|
|
self.running = True
|
|
logger.info("StreamWorker démarré — surveillance des sessions live.")
|
|
|
|
if blocking:
|
|
self._poll_loop()
|
|
else:
|
|
self._thread = threading.Thread(target=self._poll_loop, daemon=True)
|
|
self._thread.start()
|
|
|
|
def stop(self):
|
|
"""Arrêter proprement le worker."""
|
|
self.running = False
|
|
if self._thread and self._thread.is_alive():
|
|
self._thread.join(timeout=5)
|
|
logger.info("StreamWorker arrêté.")
|
|
|
|
def _poll_loop(self):
|
|
"""Boucle de polling pour les sessions live sur disque."""
|
|
while self.running:
|
|
try:
|
|
self._check_live_sessions()
|
|
time.sleep(0.5)
|
|
except KeyboardInterrupt:
|
|
self.stop()
|
|
except Exception as e:
|
|
logger.error(f"Erreur worker loop: {e}")
|
|
|
|
def _check_live_sessions(self):
|
|
"""Parcourir les sessions en cours pour trouver du travail."""
|
|
if not self.live_dir.exists():
|
|
return
|
|
for session_path in self.live_dir.iterdir():
|
|
if session_path.is_dir():
|
|
self._process_session_incremental(session_path)
|
|
|
|
def _process_session_incremental(self, session_path: Path):
|
|
"""Analyser les nouveaux éléments d'une session active."""
|
|
session_id = session_path.name
|
|
shots_dir = session_path / "shots"
|
|
|
|
# Enregistrer la session si pas encore fait
|
|
self.processor.session_manager.get_or_create(session_id)
|
|
|
|
# Traiter les nouveaux screenshots full
|
|
for shot_file in sorted(shots_dir.glob("*.png")) if shots_dir.exists() else []:
|
|
file_key = str(shot_file)
|
|
if file_key in self.processed_files:
|
|
continue
|
|
|
|
shot_id = shot_file.stem
|
|
if "_crop" in shot_id:
|
|
result = self.processor.process_crop(session_id, shot_id, str(shot_file))
|
|
logger.debug(f"Crop traité: {shot_id}")
|
|
elif shot_id.startswith("heartbeat_") or shot_id.startswith("focus_") or shot_id.startswith("res_shot_"):
|
|
# Pas d'analyse GPU pour les heartbeats, focus et res_shot
|
|
self.processor.session_manager.add_screenshot(session_id, shot_id, str(shot_file))
|
|
elif shot_id.startswith("shot_") and "_full" in shot_id:
|
|
result = self.processor.process_screenshot(session_id, shot_id, str(shot_file))
|
|
logger.info(
|
|
f"Screenshot analysé: {shot_id} | "
|
|
f"{result.get('ui_elements_count', 0)} UI, "
|
|
f"{result.get('text_detected', 0)} textes"
|
|
)
|
|
else:
|
|
# Autres screenshots non reconnus : stocker sans analyser
|
|
self.processor.session_manager.add_screenshot(session_id, shot_id, str(shot_file))
|
|
|
|
self.processed_files.add(file_key)
|
|
|
|
# Traiter les événements
|
|
event_file = session_path / "live_events.jsonl"
|
|
if event_file.exists():
|
|
self._ingest_events(session_id, event_file)
|
|
|
|
def _ingest_events(self, session_id: str, event_file: Path):
|
|
"""Lire et ingérer les événements depuis un fichier JSONL."""
|
|
import json
|
|
|
|
event_key = f"{session_id}:events:{event_file.stat().st_size}"
|
|
if event_key in self.processed_files:
|
|
return
|
|
|
|
try:
|
|
with open(event_file, "r", encoding="utf-8") as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
try:
|
|
event_data = json.loads(line)
|
|
self.processor.process_event(session_id, event_data)
|
|
except json.JSONDecodeError:
|
|
continue
|
|
self.processed_files.add(event_key)
|
|
except Exception as e:
|
|
logger.error(f"Erreur lecture événements {event_file}: {e}")
|
|
|
|
# =========================================================================
|
|
# API directe (appelé par api_stream.py)
|
|
# =========================================================================
|
|
|
|
def process_screenshot_direct(self, session_id: str, shot_id: str, file_path: str):
|
|
"""Traitement direct d'un screenshot (appelé par l'API)."""
|
|
return self.processor.process_screenshot(session_id, shot_id, file_path)
|
|
|
|
def process_crop_direct(self, session_id: str, shot_id: str, file_path: str):
|
|
"""Traitement direct d'un crop (appelé par l'API)."""
|
|
return self.processor.process_crop(session_id, shot_id, file_path)
|
|
|
|
def process_event_direct(self, session_id: str, event_data: dict):
|
|
"""Traitement direct d'un événement (appelé par l'API)."""
|
|
return self.processor.process_event(session_id, event_data)
|
|
|
|
def finalize_session(self, session_id: str):
|
|
"""Finaliser une session et construire le workflow."""
|
|
return self.processor.finalize_session(session_id)
|
|
|
|
@property
|
|
def stats(self):
|
|
return self.processor.stats
|
|
|
|
|
|
if __name__ == "__main__":
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s [STREAM-WORKER] %(message)s",
|
|
)
|
|
worker = StreamWorker()
|
|
worker.start()
|