fix: replay routing — lookup machine_id dans replay_states + auto-inject machine_id

- /replay/next cherche dans replay_states par machine_id (pas seulement machine_replay_target)
- execute-windows auto-détecte la machine Windows connectée
- resolve_target utilise ThreadPool par défaut (pas le GPU executor saturé)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dom
2026-03-18 12:05:42 +01:00
parent 81d2d016ff
commit 58e8bbafff
2 changed files with 50 additions and 23 deletions

View File

@@ -1166,26 +1166,37 @@ async def get_next_action(session_id: str, machine_id: str = "default"):
with _replay_lock:
queue = _replay_queues.get(session_id, [])
if not queue:
# Seul le lookup machine_replay_target est conservé (sûr : mapping explicite
# créé lors du POST /replay). Le cross-session stealing a été supprimé
# car il causait des race conditions entre agents.
if machine_id != "default":
target_sid = _machine_replay_target.get(machine_id)
if target_sid and target_sid != session_id:
target_queue = _replay_queues.get(target_sid, [])
if target_queue:
logger.info(
f"Replay machine-target: {machine_id} -> "
f"transfert queue {target_sid} -> {session_id}"
)
queue = target_queue
_replay_queues[session_id] = target_queue
del _replay_queues[target_sid]
for state in _replay_states.values():
if state["session_id"] == target_sid and state["status"] == "running":
state["session_id"] = session_id
_machine_replay_target[machine_id] = session_id
if not queue and machine_id != "default":
# Lookup 1 : machine_replay_target (mapping explicite POST /replay)
target_sid = _machine_replay_target.get(machine_id)
if target_sid and target_sid != session_id:
target_queue = _replay_queues.get(target_sid, [])
if target_queue:
queue = target_queue
_replay_queues[session_id] = target_queue
del _replay_queues[target_sid]
for state in _replay_states.values():
if state["session_id"] == target_sid and state["status"] == "running":
state["session_id"] = session_id
_machine_replay_target[machine_id] = session_id
logger.info(f"Replay machine-target: {machine_id} -> {target_sid} -> {session_id}")
# Lookup 2 : chercher dans les replay_states actifs pour cette machine
if not queue:
for state in _replay_states.values():
if (state.get("machine_id") == machine_id
and state["status"] == "running"
and state["session_id"] != session_id):
other_sid = state["session_id"]
other_queue = _replay_queues.get(other_sid, [])
if other_queue:
queue = other_queue
_replay_queues[session_id] = other_queue
del _replay_queues[other_sid]
state["session_id"] = session_id
_machine_replay_target[machine_id] = session_id
logger.info(f"Replay machine-state: {machine_id} -> {other_sid} -> {session_id}")
break
if not queue:
return {"action": None, "session_id": session_id, "machine_id": machine_id}
@@ -1603,11 +1614,12 @@ async def resolve_target(request: ResolveTargetRequest):
tmp_path = tmp.name
try:
# Lancer la résolution visuelle dans le thread GPU
# Lancer la résolution visuelle dans un thread SÉPARÉ (pas le GPU executor
# qui peut être saturé par le SessionWorker). Le template matching est CPU-only.
import asyncio
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(
_gpu_executor,
None, # ThreadPool par défaut (pas _gpu_executor)
_resolve_target_sync,
tmp_path,
request.target_spec,