feat(qw4): hook safety_checks_provider + extension /replay/resume avec acquittements
Some checks failed
tests / Lint (ruff + black) (push) Successful in 16s
tests / Tests unitaires (sans GPU) (push) Failing after 13s
tests / Tests sécurité (critique) (push) Has been skipped

replay_state enrichi de safety_checks, checks_acknowledged, pause_reason,
pause_payload (audit trail).

Branche supervisée pause_for_human :
- appel build_pause_payload() avant bascule paused_need_help
- log [BUS] lea:safety_checks_generated (count, sources)
- fallback safe sur exception (pause sans checks plutôt que crash)
- déclenchement si safety_level/safety_checks déclarés OU execution_mode != autonomous
- sinon comportement legacy (skip silencieux)

POST /replay/resume :
- accepte body { acknowledged_check_ids: [...] }
- vérifie tous les checks required acquittés, sinon 400 required_checks_missing
- stocke checks_acknowledged comme audit trail
- nettoie safety_checks/pause_payload après reprise

Proxy VWB /api/v3/replay/resume → streaming /replay/{id}/resume (forward bearer
token + acknowledged_check_ids).

Backward 100% : workflows sans safety_checks → resume sans acquittement requis.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dom
2026-05-05 23:45:22 +02:00
parent af13cd80ff
commit 65da557310
4 changed files with 182 additions and 2 deletions

View File

@@ -2928,8 +2928,54 @@ async def get_next_action(session_id: str, machine_id: str = "default"):
type_ = action.get("type")
# pause_for_human : no-op en mode autonome — on saute et on continue
# pause_for_human : pause supervisée si safety_level/safety_checks ou mode supervised,
# sinon no-op en mode autonome (skip).
if type_ == "pause_for_human":
_params = action.get("parameters") or {}
_exec_mode = (
(owning_replay or {}).get("params", {}).get("execution_mode", "autonomous")
if owning_replay else "autonomous"
)
_has_safety_decl = bool(_params.get("safety_level") or _params.get("safety_checks"))
_is_supervised = _exec_mode != "autonomous"
if owning_replay is not None and (_has_safety_decl or _is_supervised):
# QW4 — Construire le payload de pause enrichi (déclaratif + LLM contextuel)
try:
from agent_v0.server_v1.safety_checks_provider import build_pause_payload
last_screenshot_path = owning_replay.get("last_screenshot")
payload = build_pause_payload(action, owning_replay, last_screenshot_path)
owning_replay["safety_checks"] = payload.checks
owning_replay["pause_payload"] = {
"checks": payload.checks,
"pause_reason": payload.pause_reason,
"message": payload.message,
}
if payload.message:
owning_replay["pause_message"] = payload.message
# Bus event d'observabilité (pattern QW1/QW2 = logger.info)
logger.info(
"[BUS] lea:safety_checks_generated replay=%s count=%d sources=%s",
owning_replay.get("replay_id", "?"),
len(payload.checks),
[c["source"] for c in payload.checks],
)
except Exception as e:
logger.warning("QW4 build_pause_payload échec (%s) — pause sans checks", e)
owning_replay["safety_checks"] = []
# Conserver le contexte de l'action (audit + reprise)
owning_replay["failed_action"] = {
"action_id": action.get("action_id"),
"type": "pause_for_human",
"reason": "user_request",
}
owning_replay["status"] = "paused_need_help"
queue.pop(0)
_replay_queues[session_id] = queue
return {"action": None, "session_id": session_id, "machine_id": machine_id}
# Mode autonome sans safety_checks → skip (comportement legacy)
logger.info(
"pause_for_human ignorée (mode autonome) — replay %s continue",
owning_replay["replay_id"] if owning_replay else "?"
@@ -4104,8 +4150,16 @@ async def list_replays():
}
class ReplayResumeRequest(BaseModel):
"""Body optionnel pour /replay/resume — QW4 acquittement de safety_checks."""
acknowledged_check_ids: List[str] = []
@app.post("/api/v1/traces/stream/replay/{replay_id}/resume")
async def resume_replay(replay_id: str):
async def resume_replay(
replay_id: str,
payload: Optional[ReplayResumeRequest] = None,
):
"""Reprendre un replay en pause supervisee (paused_need_help).
L'utilisateur a intervenu manuellement (naviguer vers le bon ecran,
@@ -4113,6 +4167,10 @@ async def resume_replay(replay_id: str):
est reinjectee en tete de queue pour etre re-tentee.
Si le replay n'est pas en pause, retourne une erreur 409 (conflit).
QW4 — Si des safety_checks sont attachés à la pause, tous ceux marqués
`required` doivent figurer dans `acknowledged_check_ids`. Sinon → 400
avec `{"error": "required_checks_missing", "missing": [...]}`.
"""
with _replay_lock:
state = _replay_states.get(replay_id)
@@ -4131,6 +4189,25 @@ async def resume_replay(replay_id: str):
),
)
# QW4 — Vérification des safety_checks required avant reprise
safety_checks = state.get("safety_checks") or []
ack_ids = (payload.acknowledged_check_ids if payload else []) or []
if safety_checks:
required_ids = {c["id"] for c in safety_checks if c.get("required")}
ack_set = set(ack_ids)
missing = sorted(required_ids - ack_set)
if missing:
raise HTTPException(
status_code=400,
detail={"error": "required_checks_missing", "missing": missing},
)
# Audit trail
state["checks_acknowledged"] = sorted(ack_set)
logger.info(
"QW4 resume replay=%s acquittements=%d (%s)",
state.get("replay_id"), len(ack_set), sorted(ack_set),
)
# Recuperer l'action echouee pour la reinjecter
failed_action = state.get("failed_action")
session_id = state["session_id"]
@@ -4139,6 +4216,10 @@ async def resume_replay(replay_id: str):
state["status"] = "running"
state["failed_action"] = None
state["pause_message"] = None
# QW4 — vider safety_checks après acquittement (la pause est résolue)
state["safety_checks"] = []
state["pause_payload"] = None
state["pause_reason"] = ""
# Reinjecter l'action echouee en tete de queue (sera re-tentee)
# pause_for_human est une pause intentionnelle, pas une erreur — ne pas réinjecter

View File

@@ -1384,6 +1384,11 @@ def _create_replay_state(
# QW2 — Anneaux d'historique pour LoopDetector (5 derniers max)
"_screenshot_history": [], # images PIL des N derniers heartbeats (LoopDetector embed à chaque tick)
"_action_history": [], # N dernières actions exécutées (signature)
# QW4 — Safety checks (hybride déclaratif + LLM contextuel) et audit acquittements
"safety_checks": [], # liste produite par SafetyChecksProvider
"checks_acknowledged": [], # ids acquittés via /replay/resume (audit trail)
"pause_reason": "", # "loop_detected" | "" pour V1
"pause_payload": None, # payload complet pour debug/audit
}