Compare commits
10 Commits
41c1250c99
...
5543e25f9d
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5543e25f9d | ||
|
|
2a07d8084b | ||
|
|
35b27ae492 | ||
|
|
b584bbabc3 | ||
|
|
8817f527e7 | ||
|
|
964856ab30 | ||
|
|
a67d896104 | ||
|
|
90c1d8036f | ||
|
|
6261002039 | ||
|
|
0e6e61f2b1 |
@@ -219,6 +219,10 @@ from .replay_engine import (
|
|||||||
_is_learned_workflow,
|
_is_learned_workflow,
|
||||||
_edge_to_normalized_actions,
|
_edge_to_normalized_actions,
|
||||||
_substitute_variables,
|
_substitute_variables,
|
||||||
|
_resolve_runtime_vars,
|
||||||
|
_SERVER_SIDE_ACTION_TYPES,
|
||||||
|
_handle_extract_text_action,
|
||||||
|
_handle_t2a_decision_action,
|
||||||
_expand_compound_steps,
|
_expand_compound_steps,
|
||||||
_pre_check_screen_state as _pre_check_screen_state_impl,
|
_pre_check_screen_state as _pre_check_screen_state_impl,
|
||||||
_detect_popup_hint as _detect_popup_hint_impl,
|
_detect_popup_hint as _detect_popup_hint_impl,
|
||||||
@@ -2758,8 +2762,29 @@ async def get_next_action(session_id: str, machine_id: str = "default"):
|
|||||||
|
|
||||||
Si la session de l'agent n'a pas d'actions en attente, cherche dans les
|
Si la session de l'agent n'a pas d'actions en attente, cherche dans les
|
||||||
autres queues de la MÊME machine (pas cross-machine).
|
autres queues de la MÊME machine (pas cross-machine).
|
||||||
|
|
||||||
|
Acquire timeout : si une action serveur lente (extract_text OCR,
|
||||||
|
t2a_decision LLM) tient le lock, on retourne immédiatement
|
||||||
|
{action: None, server_busy: True} avant que le client ne timeout à 5s.
|
||||||
|
Sans cela, des actions seraient popped serveur puis envoyées sur des
|
||||||
|
sockets clients déjà fermées par timeout — perdues silencieusement.
|
||||||
|
|
||||||
|
L'acquire et les actions serveur lentes sont exécutés via
|
||||||
|
run_in_executor : sinon l'appel synchrone bloque l'event loop FastAPI
|
||||||
|
(single-threaded) et même les polls qui devraient recevoir server_busy
|
||||||
|
sont bloqués jusqu'à libération — ce qui annule l'effet du timeout.
|
||||||
"""
|
"""
|
||||||
with _replay_lock:
|
import asyncio
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
acquired = await loop.run_in_executor(None, _replay_lock.acquire, True, 4.5)
|
||||||
|
if not acquired:
|
||||||
|
return {
|
||||||
|
"action": None,
|
||||||
|
"session_id": session_id,
|
||||||
|
"machine_id": machine_id,
|
||||||
|
"server_busy": True,
|
||||||
|
}
|
||||||
|
try:
|
||||||
# Verifier si le replay est en pause supervisee (target_not_found).
|
# Verifier si le replay est en pause supervisee (target_not_found).
|
||||||
# Dans ce cas, NE PAS envoyer d'action — attendre l'intervention utilisateur.
|
# Dans ce cas, NE PAS envoyer d'action — attendre l'intervention utilisateur.
|
||||||
for state in _replay_states.values():
|
for state in _replay_states.values():
|
||||||
@@ -2824,6 +2849,7 @@ async def get_next_action(session_id: str, machine_id: str = "default"):
|
|||||||
break
|
break
|
||||||
if target_state:
|
if target_state:
|
||||||
queue = target_queue
|
queue = target_queue
|
||||||
|
owning_replay = target_state
|
||||||
_replay_queues[session_id] = target_queue
|
_replay_queues[session_id] = target_queue
|
||||||
del _replay_queues[target_sid]
|
del _replay_queues[target_sid]
|
||||||
target_state["session_id"] = session_id
|
target_state["session_id"] = session_id
|
||||||
@@ -2840,6 +2866,7 @@ async def get_next_action(session_id: str, machine_id: str = "default"):
|
|||||||
other_queue = _replay_queues.get(other_sid, [])
|
other_queue = _replay_queues.get(other_sid, [])
|
||||||
if other_queue:
|
if other_queue:
|
||||||
queue = other_queue
|
queue = other_queue
|
||||||
|
owning_replay = state
|
||||||
_replay_queues[session_id] = other_queue
|
_replay_queues[session_id] = other_queue
|
||||||
del _replay_queues[other_sid]
|
del _replay_queues[other_sid]
|
||||||
state["session_id"] = session_id
|
state["session_id"] = session_id
|
||||||
@@ -2850,8 +2877,80 @@ async def get_next_action(session_id: str, machine_id: str = "default"):
|
|||||||
if not queue:
|
if not queue:
|
||||||
return {"action": None, "session_id": session_id, "machine_id": machine_id}
|
return {"action": None, "session_id": session_id, "machine_id": machine_id}
|
||||||
|
|
||||||
# Peek à la prochaine action SANS la retirer (pour le pre-check)
|
# ── Boucle de traitement : actions serveur (extract_text, t2a_decision)
|
||||||
action = queue[0]
|
# exécutées entièrement côté serveur jusqu'à trouver une action visuelle
|
||||||
|
# à transmettre à l'Agent V1 ou un pause_for_human qui bloque le replay.
|
||||||
|
action = None
|
||||||
|
while queue:
|
||||||
|
action = queue[0]
|
||||||
|
|
||||||
|
# Résoudre les variables runtime ({{var}} et {{var.field}})
|
||||||
|
if owning_replay is not None:
|
||||||
|
runtime_vars = owning_replay.get("variables") or {}
|
||||||
|
if runtime_vars:
|
||||||
|
action = _resolve_runtime_vars(action, runtime_vars)
|
||||||
|
|
||||||
|
type_ = action.get("type")
|
||||||
|
|
||||||
|
# pause_for_human : no-op en mode autonome — on saute et on continue
|
||||||
|
if type_ == "pause_for_human":
|
||||||
|
logger.info(
|
||||||
|
"pause_for_human ignorée (mode autonome) — replay %s continue",
|
||||||
|
owning_replay["replay_id"] if owning_replay else "?"
|
||||||
|
)
|
||||||
|
queue.pop(0)
|
||||||
|
_replay_queues[session_id] = queue
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Actions serveur : exécuter HORS event loop pour ne pas bloquer
|
||||||
|
# les autres polls (extract_text OCR ~5s, t2a_decision LLM ~8-13s).
|
||||||
|
# Le lock reste tenu (queue cohérente) mais l'event loop est libre,
|
||||||
|
# donc les polls concurrents peuvent recevoir {server_busy: True}.
|
||||||
|
if type_ in _SERVER_SIDE_ACTION_TYPES and owning_replay is not None:
|
||||||
|
try:
|
||||||
|
if type_ == "extract_text":
|
||||||
|
await loop.run_in_executor(
|
||||||
|
None,
|
||||||
|
_handle_extract_text_action,
|
||||||
|
action, owning_replay, session_id, _last_heartbeat,
|
||||||
|
)
|
||||||
|
elif type_ == "t2a_decision":
|
||||||
|
await loop.run_in_executor(
|
||||||
|
None,
|
||||||
|
_handle_t2a_decision_action,
|
||||||
|
action, owning_replay,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Action serveur {type_} a levé : {e}")
|
||||||
|
queue.pop(0)
|
||||||
|
_replay_queues[session_id] = queue
|
||||||
|
continue # action suivante
|
||||||
|
|
||||||
|
# Clic conditionnel : si l'action a un paramètre "condition", évaluer la variable
|
||||||
|
# Format : "dec.critere1_valide" → runtime_vars["dec"]["critere1_valide"]
|
||||||
|
condition_key = (action.get("parameters") or {}).get("condition")
|
||||||
|
if condition_key and owning_replay is not None:
|
||||||
|
runtime_vars = owning_replay.get("variables") or {}
|
||||||
|
parts = condition_key.split(".", 1)
|
||||||
|
if len(parts) == 2:
|
||||||
|
val = (runtime_vars.get(parts[0]) or {}).get(parts[1])
|
||||||
|
else:
|
||||||
|
val = runtime_vars.get(parts[0])
|
||||||
|
if not val:
|
||||||
|
logger.info("Clic conditionnel ignoré (%s=%s) — action %s",
|
||||||
|
condition_key, val, action.get("action_id", "?"))
|
||||||
|
queue.pop(0)
|
||||||
|
_replay_queues[session_id] = queue
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Action visuelle : sortir de la boucle pour la transmettre à l'Agent V1
|
||||||
|
break
|
||||||
|
|
||||||
|
# Si la queue s'est vidée après les exécutions serveur, rien à transmettre
|
||||||
|
if not queue or action is None:
|
||||||
|
return {"action": None, "session_id": session_id, "machine_id": machine_id}
|
||||||
|
finally:
|
||||||
|
_replay_lock.release()
|
||||||
|
|
||||||
# ---- Pre-check écran (optionnel, non bloquant) ----
|
# ---- Pre-check écran (optionnel, non bloquant) ----
|
||||||
# Ne s'applique qu'aux actions qui ont un from_node (actions de workflow,
|
# Ne s'applique qu'aux actions qui ont un from_node (actions de workflow,
|
||||||
@@ -3879,7 +3978,9 @@ async def resume_replay(replay_id: str):
|
|||||||
state["pause_message"] = None
|
state["pause_message"] = None
|
||||||
|
|
||||||
# Reinjecter l'action echouee en tete de queue (sera re-tentee)
|
# Reinjecter l'action echouee en tete de queue (sera re-tentee)
|
||||||
if failed_action and failed_action.get("action_id"):
|
# pause_for_human est une pause intentionnelle, pas une erreur — ne pas réinjecter
|
||||||
|
if (failed_action and failed_action.get("action_id")
|
||||||
|
and failed_action.get("reason") != "user_request"):
|
||||||
# Reconstruire l'action a partir du retry_pending ou de l'original
|
# Reconstruire l'action a partir du retry_pending ou de l'original
|
||||||
original_action_id = failed_action["action_id"]
|
original_action_id = failed_action["action_id"]
|
||||||
# Chercher l'action originale dans les retry_pending
|
# Chercher l'action originale dans les retry_pending
|
||||||
@@ -3920,6 +4021,26 @@ async def resume_replay(replay_id: str):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/v1/traces/stream/replay/{replay_id}/cancel")
|
||||||
|
async def cancel_replay(replay_id: str):
|
||||||
|
"""Annuler un replay (quel que soit son statut) et vider sa queue."""
|
||||||
|
with _replay_lock:
|
||||||
|
state = _replay_states.get(replay_id)
|
||||||
|
if not state:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Replay '{replay_id}' non trouvé")
|
||||||
|
session_id = state["session_id"]
|
||||||
|
state["status"] = "cancelled"
|
||||||
|
state["failed_action"] = None
|
||||||
|
state["pause_message"] = None
|
||||||
|
_replay_queues[session_id] = []
|
||||||
|
keys_to_del = [k for k, v in _retry_pending.items() if v.get("replay_id") == replay_id]
|
||||||
|
for k in keys_to_del:
|
||||||
|
_retry_pending.pop(k, None)
|
||||||
|
|
||||||
|
logger.info("Replay %s annulé manuellement", replay_id)
|
||||||
|
return {"status": "cancelled", "replay_id": replay_id, "session_id": session_id}
|
||||||
|
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# Visual Replay — Résolution visuelle des cibles (module resolve_engine)
|
# Visual Replay — Résolution visuelle des cibles (module resolve_engine)
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|||||||
@@ -32,8 +32,16 @@ _ALLOWED_ACTION_TYPES = {
|
|||||||
"click", "type", "key_combo", "scroll", "wait",
|
"click", "type", "key_combo", "scroll", "wait",
|
||||||
"file_open", "file_save", "file_close", "file_new", "file_dialog",
|
"file_open", "file_save", "file_close", "file_new", "file_dialog",
|
||||||
"double_click", "right_click", "drag",
|
"double_click", "right_click", "drag",
|
||||||
"verify_screen", # Replay hybride : vérification visuelle entre groupes
|
"verify_screen", # Replay hybride : vérification visuelle entre groupes
|
||||||
|
"pause_for_human", # Pause supervisée explicite (interceptée par /replay/next)
|
||||||
|
"extract_text", # OCR serveur sur dernier heartbeat → variable workflow
|
||||||
|
"t2a_decision", # Analyse LLM facturation T2A → variable workflow
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Types d'actions exécutées CÔTÉ SERVEUR (jamais transmises à l'Agent V1).
|
||||||
|
# Le pipeline /replay/next les traite en boucle interne et passe à l'action
|
||||||
|
# suivante jusqu'à trouver une action visuelle (à transmettre au client).
|
||||||
|
_SERVER_SIDE_ACTION_TYPES = {"extract_text", "t2a_decision"}
|
||||||
_MAX_ACTION_TEXT_LENGTH = 10000
|
_MAX_ACTION_TEXT_LENGTH = 10000
|
||||||
_MAX_KEYS_PER_COMBO = 10
|
_MAX_KEYS_PER_COMBO = 10
|
||||||
# Touches autorisées dans les key_combo (modificateurs + touches spéciales + caractères simples)
|
# Touches autorisées dans les key_combo (modificateurs + touches spéciales + caractères simples)
|
||||||
@@ -852,6 +860,30 @@ def _edge_to_normalized_actions(edge, params: Dict[str, Any]) -> List[Dict[str,
|
|||||||
keys = [action_params["key"]]
|
keys = [action_params["key"]]
|
||||||
normalized["keys"] = keys
|
normalized["keys"] = keys
|
||||||
|
|
||||||
|
elif action_type == "pause_for_human":
|
||||||
|
normalized["type"] = "pause_for_human"
|
||||||
|
normalized["parameters"] = {
|
||||||
|
"message": action_params.get("message", "Validation requise"),
|
||||||
|
}
|
||||||
|
return [normalized] # pas de target/coords pour cette action logique
|
||||||
|
|
||||||
|
elif action_type == "extract_text":
|
||||||
|
normalized["type"] = "extract_text"
|
||||||
|
normalized["parameters"] = {
|
||||||
|
"output_var": action_params.get("output_var", "extracted_text"),
|
||||||
|
"paragraph": bool(action_params.get("paragraph", True)),
|
||||||
|
}
|
||||||
|
return [normalized]
|
||||||
|
|
||||||
|
elif action_type == "t2a_decision":
|
||||||
|
normalized["type"] = "t2a_decision"
|
||||||
|
normalized["parameters"] = {
|
||||||
|
"input_template": action_params.get("input_template", ""),
|
||||||
|
"output_var": action_params.get("output_var", "t2a_result"),
|
||||||
|
"model": action_params.get("model"),
|
||||||
|
}
|
||||||
|
return [normalized]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
logger.warning(f"Type d'action inconnu : {action_type}")
|
logger.warning(f"Type d'action inconnu : {action_type}")
|
||||||
return []
|
return []
|
||||||
@@ -886,6 +918,143 @@ def _substitute_variables(text: str, params: Dict[str, Any], defaults: Dict[str,
|
|||||||
return re.sub(r'\$\{(\w+)\}', replacer, text)
|
return re.sub(r'\$\{(\w+)\}', replacer, text)
|
||||||
|
|
||||||
|
|
||||||
|
# Regex pour le templating runtime : {{var}} ou {{var.champ}} ou {{var.champ.sous}}
|
||||||
|
_RUNTIME_VAR_PATTERN = re.compile(r'\{\{\s*(\w+)(?:\.([\w.]+))?\s*\}\}')
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_runtime_vars_in_str(text: str, variables: Dict[str, Any]) -> str:
|
||||||
|
"""Remplace {{var}} et {{var.field}} par leur valeur depuis le dict variables.
|
||||||
|
|
||||||
|
Variables/champs absents : laissés tels quels (ne casse pas le pipeline).
|
||||||
|
Pour les valeurs non-str (dict, list), str() est appelé.
|
||||||
|
"""
|
||||||
|
def replacer(match):
|
||||||
|
var_name = match.group(1)
|
||||||
|
path = match.group(2)
|
||||||
|
if var_name not in variables:
|
||||||
|
return match.group(0)
|
||||||
|
value = variables[var_name]
|
||||||
|
if path:
|
||||||
|
for field in path.split('.'):
|
||||||
|
if isinstance(value, dict) and field in value:
|
||||||
|
value = value[field]
|
||||||
|
else:
|
||||||
|
return match.group(0)
|
||||||
|
return str(value)
|
||||||
|
|
||||||
|
return _RUNTIME_VAR_PATTERN.sub(replacer, text)
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_runtime_vars(value: Any, variables: Dict[str, Any]) -> Any:
|
||||||
|
"""Résout récursivement les {{var}} et {{var.field}} dans une valeur.
|
||||||
|
|
||||||
|
Supporte str, dict, list. Les autres types sont retournés tels quels.
|
||||||
|
Si variables est vide ou None, value est retournée inchangée.
|
||||||
|
"""
|
||||||
|
if not variables:
|
||||||
|
return value
|
||||||
|
if isinstance(value, str):
|
||||||
|
return _resolve_runtime_vars_in_str(value, variables)
|
||||||
|
if isinstance(value, dict):
|
||||||
|
return {k: _resolve_runtime_vars(v, variables) for k, v in value.items()}
|
||||||
|
if isinstance(value, list):
|
||||||
|
return [_resolve_runtime_vars(item, variables) for item in value]
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Handlers pour les actions exécutées côté serveur (extract_text, t2a_decision)
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
def _handle_extract_text_action(
|
||||||
|
action: Dict[str, Any],
|
||||||
|
replay_state: Dict[str, Any],
|
||||||
|
session_id: str,
|
||||||
|
last_heartbeat: Dict[str, Dict[str, Any]],
|
||||||
|
) -> bool:
|
||||||
|
"""Traite une action extract_text côté serveur. Stocke le texte OCRisé dans
|
||||||
|
replay_state["variables"][output_var]. Retourne True si succès.
|
||||||
|
|
||||||
|
Robuste aux échecs : si pas de heartbeat ou OCR raté, stocke "" et retourne
|
||||||
|
False (le pipeline continue, pas de blocage).
|
||||||
|
"""
|
||||||
|
params = action.get("parameters") or {}
|
||||||
|
output_var = (params.get("output_var") or "extracted_text").strip()
|
||||||
|
paragraph = bool(params.get("paragraph", True))
|
||||||
|
|
||||||
|
heartbeat = last_heartbeat.get(session_id) or {}
|
||||||
|
path = heartbeat.get("path")
|
||||||
|
text = ""
|
||||||
|
|
||||||
|
if path:
|
||||||
|
try:
|
||||||
|
from core.llm import extract_text_from_image
|
||||||
|
text = extract_text_from_image(path, paragraph=paragraph)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("extract_text OCR échoué (%s) — variable '%s' = ''", e, output_var)
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
"extract_text : pas de heartbeat pour session %s — variable '%s' = ''",
|
||||||
|
session_id, output_var,
|
||||||
|
)
|
||||||
|
|
||||||
|
replay_state.setdefault("variables", {})[output_var] = text
|
||||||
|
logger.info(
|
||||||
|
"extract_text → variable '%s' (%d chars) replay %s",
|
||||||
|
output_var, len(text), replay_state.get("replay_id", "?"),
|
||||||
|
)
|
||||||
|
return bool(text)
|
||||||
|
|
||||||
|
|
||||||
|
def _handle_t2a_decision_action(
|
||||||
|
action: Dict[str, Any],
|
||||||
|
replay_state: Dict[str, Any],
|
||||||
|
) -> bool:
|
||||||
|
"""Traite une action t2a_decision côté serveur. Stocke le résultat JSON
|
||||||
|
dans replay_state["variables"][output_var]. Retourne True si succès.
|
||||||
|
|
||||||
|
Le DPI à analyser vient de action.parameters.input_template (déjà résolu
|
||||||
|
par _resolve_runtime_vars donc les {{var}} sont remplis).
|
||||||
|
"""
|
||||||
|
params = action.get("parameters") or {}
|
||||||
|
output_var = (params.get("output_var") or "t2a_result").strip()
|
||||||
|
dpi_text = (params.get("input_template") or params.get("dpi") or "").strip()
|
||||||
|
model = params.get("model") or None # None → DEFAULT_MODEL
|
||||||
|
|
||||||
|
if not dpi_text:
|
||||||
|
logger.warning(
|
||||||
|
"t2a_decision : input vide — variable '%s' = {decision: 'INDETERMINE'}", output_var,
|
||||||
|
)
|
||||||
|
replay_state.setdefault("variables", {})[output_var] = {
|
||||||
|
"decision": "INDETERMINE",
|
||||||
|
"justification": "DPI vide ou non extrait",
|
||||||
|
"confiance": "faible",
|
||||||
|
"_error": "empty_input",
|
||||||
|
}
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
from core.llm import analyze_dpi, DEFAULT_MODEL
|
||||||
|
result = analyze_dpi(dpi_text, model=model or DEFAULT_MODEL)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("t2a_decision : analyze_dpi exception %s", e)
|
||||||
|
result = {
|
||||||
|
"decision": "INDETERMINE",
|
||||||
|
"justification": f"Erreur analyse : {e}",
|
||||||
|
"confiance": "faible",
|
||||||
|
"_error": str(e),
|
||||||
|
}
|
||||||
|
|
||||||
|
replay_state.setdefault("variables", {})[output_var] = result
|
||||||
|
decision = result.get("decision", "?")
|
||||||
|
elapsed = result.get("_elapsed_s", "?")
|
||||||
|
logger.info(
|
||||||
|
"t2a_decision → variable '%s' decision=%s (%ss) replay %s",
|
||||||
|
output_var, decision, elapsed, replay_state.get("replay_id", "?"),
|
||||||
|
)
|
||||||
|
return "_error" not in result
|
||||||
|
|
||||||
|
|
||||||
def _expand_compound_steps(
|
def _expand_compound_steps(
|
||||||
steps: List[Dict[str, Any]], base: Dict[str, Any], params: Dict[str, Any]
|
steps: List[Dict[str, Any]], base: Dict[str, Any], params: Dict[str, Any]
|
||||||
) -> List[Dict[str, Any]]:
|
) -> List[Dict[str, Any]]:
|
||||||
@@ -1208,6 +1377,10 @@ def _create_replay_state(
|
|||||||
# Champs pour pause supervisée (target_not_found)
|
# Champs pour pause supervisée (target_not_found)
|
||||||
"failed_action": None, # Contexte de l'action en echec (quand paused_need_help)
|
"failed_action": None, # Contexte de l'action en echec (quand paused_need_help)
|
||||||
"pause_message": None, # Message a afficher a l'utilisateur
|
"pause_message": None, # Message a afficher a l'utilisateur
|
||||||
|
# Variables d'exécution produites en cours de workflow (extract_text,
|
||||||
|
# t2a_decision, etc.). Résolues via templating {{var}} ou {{var.field}}
|
||||||
|
# dans les paramètres des actions suivantes.
|
||||||
|
"variables": {},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -2193,22 +2193,33 @@ def _validate_resolution_quality(
|
|||||||
dx = abs(resolved_x - fallback_x_pct)
|
dx = abs(resolved_x - fallback_x_pct)
|
||||||
dy = abs(resolved_y - fallback_y_pct)
|
dy = abs(resolved_y - fallback_y_pct)
|
||||||
if dx > _RESOLUTION_MAX_DRIFT or dy > _RESOLUTION_MAX_DRIFT:
|
if dx > _RESOLUTION_MAX_DRIFT or dy > _RESOLUTION_MAX_DRIFT:
|
||||||
|
# Exception : si le template matching trouve l'image avec une
|
||||||
|
# similarité quasi parfaite, on fait confiance à la position
|
||||||
|
# visuelle peu importe le drift. Une image retrouvée à >= 0.95
|
||||||
|
# de score est SUR l'écran à l'endroit indiqué — le drift par
|
||||||
|
# rapport à l'enregistrement ne reflète qu'un changement de
|
||||||
|
# layout (scroll, redimensionnement, F11, devtools), pas une
|
||||||
|
# erreur de résolution.
|
||||||
|
_HIGH_CONFIDENCE = 0.95
|
||||||
|
if score >= _HIGH_CONFIDENCE and method.startswith("template_matching"):
|
||||||
|
logger.info(
|
||||||
|
"[REPLAY] Drift (%.3f, %.3f) > %.2f IGNORÉ : score=%.3f >= %.2f "
|
||||||
|
"sur %s — résultat visuel fiable, on l'utilise",
|
||||||
|
dx, dy, _RESOLUTION_MAX_DRIFT, score, _HIGH_CONFIDENCE, method,
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"[REPLAY] Resolution REJETÉE (drift trop grand) : "
|
"[REPLAY] Drift trop grand (%.3f, %.3f) > %.2f — fallback coords enregistrées (%.3f, %.3f)",
|
||||||
"method=%s resolved=(%.3f, %.3f) expected=(%.3f, %.3f) "
|
dx, dy, _RESOLUTION_MAX_DRIFT, fallback_x_pct, fallback_y_pct,
|
||||||
"drift=(%.3f, %.3f) max=%.2f",
|
|
||||||
method, resolved_x, resolved_y,
|
|
||||||
fallback_x_pct, fallback_y_pct,
|
|
||||||
dx, dy, _RESOLUTION_MAX_DRIFT,
|
|
||||||
)
|
)
|
||||||
|
# Fallback : coordonnées enregistrées lors de la capture (écran identique = safe)
|
||||||
return {
|
return {
|
||||||
"resolved": False,
|
"resolved": True,
|
||||||
"method": f"rejected_drift_{method}",
|
"method": "fallback_recorded_coords",
|
||||||
"reason": f"drift_dx{dx:.3f}_dy{dy:.3f}_max{_RESOLUTION_MAX_DRIFT:.2f}",
|
"reason": f"drift_dx{dx:.3f}_dy{dy:.3f}_using_recorded",
|
||||||
"original_method": method,
|
"original_method": method,
|
||||||
"original_score": score,
|
"original_score": score,
|
||||||
"drift_dx": round(dx, 3),
|
|
||||||
"drift_dy": round(dy, 3),
|
|
||||||
"x_pct": fallback_x_pct,
|
"x_pct": fallback_x_pct,
|
||||||
"y_pct": fallback_y_pct,
|
"y_pct": fallback_y_pct,
|
||||||
}
|
}
|
||||||
|
|||||||
15
core/llm/__init__.py
Normal file
15
core/llm/__init__.py
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
"""Modules LLM (clients Ollama et décisionnels métier) + extracteur OCR."""
|
||||||
|
|
||||||
|
from .t2a_decision import (
|
||||||
|
PROMPT_TEMPLATE,
|
||||||
|
DEFAULT_MODEL,
|
||||||
|
analyze_dpi,
|
||||||
|
)
|
||||||
|
from .ocr_extractor import extract_text_from_image
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"PROMPT_TEMPLATE",
|
||||||
|
"DEFAULT_MODEL",
|
||||||
|
"analyze_dpi",
|
||||||
|
"extract_text_from_image",
|
||||||
|
]
|
||||||
71
core/llm/ocr_extractor.py
Normal file
71
core/llm/ocr_extractor.py
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
"""Extracteur OCR — texte depuis une image (screenshot d'écran).
|
||||||
|
|
||||||
|
Utilise EasyOCR fr+en. Singleton (chargement modèle ~3s au premier appel).
|
||||||
|
|
||||||
|
Conçu pour le pipeline streaming serveur (action `extract_text`) : récupère
|
||||||
|
un screenshot fresh (dernier heartbeat ou capture forcée), applique l'OCR,
|
||||||
|
retourne le texte concaténé pour analyse downstream (ex: t2a_decision).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_easyocr_reader = None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_reader():
|
||||||
|
"""Initialise EasyOCR fr+en au premier appel (singleton)."""
|
||||||
|
global _easyocr_reader
|
||||||
|
if _easyocr_reader is None:
|
||||||
|
import easyocr
|
||||||
|
try:
|
||||||
|
_easyocr_reader = easyocr.Reader(['fr', 'en'], gpu=True, verbose=False)
|
||||||
|
logger.info("EasyOCR initialisé (fr+en, GPU)")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("EasyOCR GPU indisponible (%s), fallback CPU", e)
|
||||||
|
_easyocr_reader = easyocr.Reader(['fr', 'en'], gpu=False, verbose=False)
|
||||||
|
return _easyocr_reader
|
||||||
|
|
||||||
|
|
||||||
|
def extract_text_from_image(
|
||||||
|
image_path: str,
|
||||||
|
region: Optional[Tuple[int, int, int, int]] = None,
|
||||||
|
paragraph: bool = True,
|
||||||
|
) -> str:
|
||||||
|
"""Extrait le texte d'une image via EasyOCR.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_path: chemin du PNG sur disque.
|
||||||
|
region: (x, y, w, h) pour cropper avant OCR. None = image entière.
|
||||||
|
paragraph: True pour regrouper les lignes en paragraphes (lisible),
|
||||||
|
False pour blocs séparés (granulaire).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Texte concaténé. Chaque ligne / paragraphe est séparé par un saut de ligne.
|
||||||
|
En cas d'erreur, retourne une chaîne vide et log un warning.
|
||||||
|
"""
|
||||||
|
path = Path(image_path)
|
||||||
|
if not path.exists():
|
||||||
|
logger.warning("extract_text: fichier introuvable %s", image_path)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
try:
|
||||||
|
from PIL import Image
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
img = Image.open(path)
|
||||||
|
if region:
|
||||||
|
x, y, w, h = region
|
||||||
|
img = img.crop((x, y, x + w, y + h))
|
||||||
|
|
||||||
|
reader = _get_reader()
|
||||||
|
results = reader.readtext(np.array(img), detail=0, paragraph=paragraph)
|
||||||
|
return "\n".join(str(r).strip() for r in results if r)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("extract_text échoué sur %s : %s", image_path, e)
|
||||||
|
return ""
|
||||||
168
core/llm/t2a_decision.py
Normal file
168
core/llm/t2a_decision.py
Normal file
@@ -0,0 +1,168 @@
|
|||||||
|
"""Aide à la décision de facturation urgences T2A/PMSI via LLM local.
|
||||||
|
|
||||||
|
Décide si un passage aux urgences relève :
|
||||||
|
- du FORFAIT_URGENCE (passage simple, retour à domicile)
|
||||||
|
- de la REQUALIFICATION_HOSPITALISATION (séjour MCO, valorisation 1k-5k€+)
|
||||||
|
|
||||||
|
Le prompt impose une extraction littérale des faits du DPI (pas d'invention)
|
||||||
|
et une modulation honnête de la confiance. Validé sur 15 DPI synthétiques :
|
||||||
|
qwen2.5:7b atteint 100 % d'accuracy en ~5 s/cas avec 4,7 Go VRAM.
|
||||||
|
|
||||||
|
Voir docs/clients/ght_sud_95/ et demo/facturation_urgences/RESULTATS.md pour le
|
||||||
|
bench comparatif des 11 LLMs évalués.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import urllib.error
|
||||||
|
import urllib.request
|
||||||
|
from typing import Any, Dict
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434/api/generate")
|
||||||
|
DEFAULT_MODEL = os.environ.get("T2A_MODEL", "qwen2.5:7b")
|
||||||
|
DEFAULT_TIMEOUT = 60 # secondes
|
||||||
|
|
||||||
|
PROMPT_TEMPLATE = """Tu es médecin DIM (Département d'Information Médicale), expert en facturation T2A/PMSI aux urgences hospitalières en France.
|
||||||
|
|
||||||
|
Analyse le dossier patient ci-dessous pour déterminer si le passage relève :
|
||||||
|
- FORFAIT_URGENCE : passage simple, retour à domicile, sans surveillance prolongée ni soins continus
|
||||||
|
- REQUALIFICATION_HOSPITALISATION : séjour MCO requis selon les 3 critères PMSI/ATIH
|
||||||
|
|
||||||
|
LES 3 CRITÈRES UHCD (au moins 2 sur 3 validés ⇒ REQUALIFICATION) :
|
||||||
|
1. Pathologie potentiellement évolutive (instabilité hémodynamique, terrain à risque, traitement nécessitant adaptation)
|
||||||
|
2. Surveillance médicale et paramédicale prolongée (constantes itératives, observations IDE/médecin, durée > 6 h)
|
||||||
|
3. Examens complémentaires ou actes thérapeutiques (biologie, imagerie, sutures, gestes techniques)
|
||||||
|
|
||||||
|
INSTRUCTIONS STRICTES :
|
||||||
|
1. N'utilise QUE des éléments littéralement présents dans le dossier patient. N'invente AUCUN critère.
|
||||||
|
2. Pour CHAQUE critère (1, 2, 3), tu DOIS produire un texte de preuve qui contient AU MOINS UNE CITATION LITTÉRALE du dossier entre guillemets français « ... ». Exemple : « FC à 110 bpm, TA 92/60 ».
|
||||||
|
3. Si le critère est NON validé, ne renvoie JAMAIS un fallback creux : explique factuellement ce qui manque, en citant le dossier (ex: « Sortie à H+2 », « Aucun acte technique au compte-rendu »).
|
||||||
|
4. Le texte de chaque preuve fait 2-3 phrases : (i) la citation littérale, (ii) l'analyse PMSI, (iii) la conclusion validé/non validé.
|
||||||
|
5. Calcule la durée totale du passage en heures (admission → sortie/transfert) à partir des horaires du dossier.
|
||||||
|
6. Module ta confiance honnêtement :
|
||||||
|
- "elevee" uniquement si tous les indices convergent
|
||||||
|
- "moyenne" si éléments ambivalents
|
||||||
|
- "faible" si information manquante ou très atypique
|
||||||
|
|
||||||
|
Réponds STRICTEMENT en JSON valide, sans texte avant ni après :
|
||||||
|
{{
|
||||||
|
"duree_passage_heures": <nombre>,
|
||||||
|
"elements_pour_hospitalisation": [<phrases littéralement extraites du dossier>],
|
||||||
|
"elements_pour_forfait": [<phrases littéralement extraites du dossier>],
|
||||||
|
"decision": "FORFAIT_URGENCE" | "REQUALIFICATION_HOSPITALISATION",
|
||||||
|
"decision_court": "UHCD" | "Forfait Urgences",
|
||||||
|
"preuve_critere1": "<2-3 phrases incluant AU MOINS UNE citation littérale entre « » (motif, symptôme, terrain à risque, traitement). Si non validé : factualise ce qui manque en citant le dossier.>",
|
||||||
|
"critere1_valide": true | false,
|
||||||
|
"preuve_critere2": "<2-3 phrases incluant AU MOINS UNE citation littérale entre « » (constantes, observations IDE, durée surveillance). Si non validé : factualise.>",
|
||||||
|
"critere2_valide": true | false,
|
||||||
|
"preuve_critere3": "<2-3 phrases incluant AU MOINS UNE citation littérale entre « » (actes/examens : biologie, imagerie, suture, etc.). Si non validé : factualise.>",
|
||||||
|
"critere3_valide": true | false,
|
||||||
|
"justification": "<2-3 phrases synthétiques s'appuyant explicitement sur les preuves ci-dessus, avec au moins une citation>",
|
||||||
|
"confiance": "elevee" | "moyenne" | "faible"
|
||||||
|
}}
|
||||||
|
|
||||||
|
DOSSIER PATIENT :
|
||||||
|
{dpi}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def analyze_dpi(
|
||||||
|
dpi_text: str,
|
||||||
|
model: str = DEFAULT_MODEL,
|
||||||
|
timeout: int = DEFAULT_TIMEOUT,
|
||||||
|
ollama_url: str = OLLAMA_URL,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Soumet un DPI urgences à un LLM Ollama et retourne la décision JSON.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dpi_text: Texte du dossier patient (concaténation des onglets ou DPI brut).
|
||||||
|
model: Modèle Ollama à utiliser (default qwen2.5:7b — 100% accuracy bench).
|
||||||
|
timeout: Timeout HTTP en secondes.
|
||||||
|
ollama_url: Endpoint Ollama (default localhost:11434/api/generate).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict avec :
|
||||||
|
decision: "FORFAIT_URGENCE" | "REQUALIFICATION_HOSPITALISATION"
|
||||||
|
elements_pour_hospitalisation: List[str]
|
||||||
|
elements_pour_forfait: List[str]
|
||||||
|
duree_passage_heures: float
|
||||||
|
justification: str
|
||||||
|
confiance: "elevee" | "moyenne" | "faible"
|
||||||
|
_elapsed_s: float (latence)
|
||||||
|
_model: str
|
||||||
|
En cas d'erreur :
|
||||||
|
{"_error": str, "_elapsed_s": float} (réseau / Ollama indisponible)
|
||||||
|
{"_parse_error": True, "_raw": str, "_elapsed_s": float} (JSON invalide)
|
||||||
|
"""
|
||||||
|
payload = {
|
||||||
|
"model": model,
|
||||||
|
"prompt": PROMPT_TEMPLATE.format(dpi=dpi_text),
|
||||||
|
"stream": False,
|
||||||
|
"format": "json",
|
||||||
|
"keep_alive": "5m",
|
||||||
|
"options": {
|
||||||
|
"temperature": 0.1,
|
||||||
|
"num_predict": 1500,
|
||||||
|
"num_ctx": 16384,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
data = json.dumps(payload).encode("utf-8")
|
||||||
|
req = urllib.request.Request(
|
||||||
|
ollama_url,
|
||||||
|
data=data,
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
method="POST",
|
||||||
|
)
|
||||||
|
t0 = time.time()
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||||
|
body = json.loads(resp.read().decode("utf-8"))
|
||||||
|
except (urllib.error.URLError, TimeoutError, ConnectionError) as e:
|
||||||
|
elapsed = round(time.time() - t0, 1)
|
||||||
|
logger.warning("analyze_dpi: Ollama indisponible (%s) après %.1fs", e, elapsed)
|
||||||
|
return {"_error": str(e), "_elapsed_s": elapsed, "_model": model}
|
||||||
|
|
||||||
|
elapsed = time.time() - t0
|
||||||
|
|
||||||
|
raw_response = body.get("response", "").strip()
|
||||||
|
raw_thinking = body.get("thinking", "").strip()
|
||||||
|
|
||||||
|
candidates = [raw_response]
|
||||||
|
if not raw_response and raw_thinking:
|
||||||
|
last_close = raw_thinking.rfind("}")
|
||||||
|
last_open = raw_thinking.rfind("{", 0, last_close)
|
||||||
|
if last_open != -1 and last_close != -1:
|
||||||
|
candidates.append(raw_thinking[last_open:last_close + 1])
|
||||||
|
|
||||||
|
parsed = None
|
||||||
|
for cand in candidates:
|
||||||
|
cleaned = cand
|
||||||
|
if cleaned.startswith("```"):
|
||||||
|
cleaned = cleaned.split("\n", 1)[-1]
|
||||||
|
if cleaned.endswith("```"):
|
||||||
|
cleaned = cleaned.rsplit("```", 1)[0]
|
||||||
|
cleaned = cleaned.strip()
|
||||||
|
try:
|
||||||
|
parsed = json.loads(cleaned)
|
||||||
|
break
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if parsed is None:
|
||||||
|
return {
|
||||||
|
"_parse_error": True,
|
||||||
|
"_raw": (raw_response or raw_thinking)[:500],
|
||||||
|
"_elapsed_s": round(elapsed, 1),
|
||||||
|
"_model": model,
|
||||||
|
}
|
||||||
|
|
||||||
|
parsed["_elapsed_s"] = round(elapsed, 1)
|
||||||
|
parsed["_model"] = model
|
||||||
|
parsed["_eval_count"] = body.get("eval_count")
|
||||||
|
return parsed
|
||||||
28
deploy/systemd/rpa-mockup-easily.service
Normal file
28
deploy/systemd/rpa-mockup-easily.service
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Maquette Easily Assure (démo GHT Sud 95) - serveur statique HTTP
|
||||||
|
After=network-online.target
|
||||||
|
Wants=network-online.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
User=dom
|
||||||
|
Group=dom
|
||||||
|
WorkingDirectory=/home/dom/ai/rpa_vision_v3/docs/clients/ght_sud_95/mockup_easily_assure
|
||||||
|
ExecStart=/usr/bin/python3 -m http.server 8765 --bind 0.0.0.0
|
||||||
|
|
||||||
|
Restart=on-failure
|
||||||
|
RestartSec=3
|
||||||
|
TimeoutStopSec=10
|
||||||
|
|
||||||
|
NoNewPrivileges=true
|
||||||
|
PrivateTmp=true
|
||||||
|
ProtectSystem=strict
|
||||||
|
ProtectHome=read-only
|
||||||
|
ReadOnlyPaths=/home/dom/ai/rpa_vision_v3/docs/clients/ght_sud_95/mockup_easily_assure
|
||||||
|
|
||||||
|
StandardOutput=journal
|
||||||
|
StandardError=journal
|
||||||
|
SyslogIdentifier=rpa-mockup-easily
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
2515
docs/superpowers/plans/2026-05-05-qw-suite-mai.md
Normal file
2515
docs/superpowers/plans/2026-05-05-qw-suite-mai.md
Normal file
File diff suppressed because it is too large
Load Diff
467
docs/superpowers/specs/2026-05-05-qw-suite-mai-design.md
Normal file
467
docs/superpowers/specs/2026-05-05-qw-suite-mai-design.md
Normal file
@@ -0,0 +1,467 @@
|
|||||||
|
# Spec — QW Suite Mai 2026
|
||||||
|
|
||||||
|
| Champ | Valeur |
|
||||||
|
|---|---|
|
||||||
|
| Date | 2026-05-05 |
|
||||||
|
| Auteur | Dom + Claude (brainstorming structuré) |
|
||||||
|
| Branche | `feature/qw-suite-mai` (depuis `feature/feedback-bus`) |
|
||||||
|
| Backup | `backup/pre-qw-suite-mai-2026-05-05` à pousser sur Gitea avant 1er commit |
|
||||||
|
| Statut | Design approuvé — spec à valider par Dom avant `writing-plans` |
|
||||||
|
| Cibles démo | GHT Sud 95 (1ère sem mai 2026, date à confirmer) |
|
||||||
|
| Contraintes inviolables | 100% vision · 100% local (Ollama) · backward compatible |
|
||||||
|
|
||||||
|
## 1. Contexte & motivation
|
||||||
|
|
||||||
|
Suite à l'exploration comparative de 5 frameworks computer-use (Simular Agent-S, browser-use, OpenAI CUA sample, Coasty open-cu, Showlab OOTB), trois quick wins ont été identifiés comme améliorations à fort ratio valeur/risque pour RPA Vision V3, alignés avec la philosophie du projet (vision pure, souveraineté, supervision médicale) :
|
||||||
|
|
||||||
|
- **QW1 — Multi-écrans propre** (inspiré OOTB) : capture et grounding sur l'écran cible plutôt que sur le composite tous écrans. Gain de perf grounding + correction des coordonnées.
|
||||||
|
- **QW2 — LoopDetector composite** (inspiré browser-use) : détecter quand Léa exécute des actions techniquement valides mais que l'écran ne progresse pas, et escalader vers l'humain plutôt que de tourner en rond muettement.
|
||||||
|
- **QW4 — Safety checks hybrides** (inspiré OpenAI CUA + browser-use Pydantic registry) : enrichir l'action `pause_for_human` avec une liste de vérifications à acquitter, mêlant déclaratif (workflow) et contextuel (LLM local).
|
||||||
|
|
||||||
|
Effet cumulé attendu : Léa devient observable, robuste et auditable sans rien céder sur le 100% local.
|
||||||
|
|
||||||
|
## 2. Décisions de design (récap)
|
||||||
|
|
||||||
|
| Sujet | Décision |
|
||||||
|
|---|---|
|
||||||
|
| Activation | Default-ON pour tous les workflows (Dom recréera ce qui en a besoin) |
|
||||||
|
| QW1 — Stratégie ciblage écran | `monitor_index` enregistré à la capture → fallback focus actif → fallback composite (backward) |
|
||||||
|
| QW1 — Niveau de stack | Client Agent V1 (capture) + serveur (routeur) + `core/execution/input_handler.py` (capture locale) |
|
||||||
|
| QW2 — Signal de boucle | Composite OR : screen_static (CLIP) + action_repeat + retry_threshold |
|
||||||
|
| QW2 — Sortie | `replay_state["status"] = "paused_need_help"` avec `pause_reason` structuré |
|
||||||
|
| QW4 — Source des checks | Hybride : déclaratif workflow + LLM contextuel sur `safety_level: "medical_critical"` |
|
||||||
|
| QW4 — Robustesse LLM | `medgemma:4b` + timeout 5s + `format=json` Ollama + JSON Schema strict + fallback safe (zéro check additionnel) + kill-switch env var |
|
||||||
|
| QW4 — UX VWB | Bulle existante préservée + `<ChecklistPanel>` au-dessus de Continuer (bouton désactivé tant que required non cochés) |
|
||||||
|
| Ordre de livraison | QW1 → QW2 → QW4 (du moins invasif au plus visible) |
|
||||||
|
| Plan timing | Option A : QW1+QW2 avant démo ; QW4 enchaîné dès validation des deux premiers |
|
||||||
|
| Kill-switches | Env vars sur QW2 et QW4, surchargeables par `systemctl edit` |
|
||||||
|
| Backward compatibility | 100% — aucun champ obligatoire ajouté au DSL ; workflows existants se comportent comme avant |
|
||||||
|
|
||||||
|
## 3. Architecture globale
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────┐ ┌─────────────────────────────────┐
|
||||||
|
│ Agent V1 (Windows) │ │ Serveur Streaming (5005) │
|
||||||
|
│ │ │ │
|
||||||
|
│ ┌──────────────────┐ │ │ ┌───────────────────────────┐ │
|
||||||
|
│ │ ScreenCapture │ │ │ │ LoopDetector [QW2] │ │
|
||||||
|
│ │ + monitor_index │───┼────────▶│ │ • screen_static (CLIP) │ │
|
||||||
|
│ │ [QW1] │ │ HTTP │ │ • action_repeat │ │
|
||||||
|
│ └──────────────────┘ │ │ │ • retry_threshold │ │
|
||||||
|
│ │ │ │ → paused_need_help │ │
|
||||||
|
│ ┌──────────────────┐ │ │ └───────────────────────────┘ │
|
||||||
|
│ │ FeedbackBus lea:*│◀──┼─────────┤ │
|
||||||
|
│ │ chat_window │ │ │ ┌───────────────────────────┐ │
|
||||||
|
│ └──────────────────┘ │ │ │ SafetyChecksProvider │ │
|
||||||
|
└─────────────────────────┘ │ │ [QW4] │ │
|
||||||
|
│ │ • declarative (workflow) │ │
|
||||||
|
│ │ • LLM contextual │ │
|
||||||
|
│ │ ‒ medgemma:4b 5s/JSON │ │
|
||||||
|
│ │ ‒ fallback safe │ │
|
||||||
|
│ │ • kill-switch env var │ │
|
||||||
|
│ └───────────────────────────┘ │
|
||||||
|
│ │
|
||||||
|
│ ┌───────────────────────────┐ │
|
||||||
|
│ │ MonitorRouter [QW1] │ │
|
||||||
|
│ │ • cible monitor_index │ │
|
||||||
|
│ │ • fallback focus actif │ │
|
||||||
|
│ └───────────────────────────┘ │
|
||||||
|
└─────────────────────────────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────────────────────────┐
|
||||||
|
│ VWB Frontend (3002) │
|
||||||
|
│ │
|
||||||
|
│ PauseDialog (étendu) [QW4-UX] │
|
||||||
|
│ • bulle existante préservée │
|
||||||
|
│ • + ChecklistPanel │
|
||||||
|
│ (cases à cocher acquittables)│
|
||||||
|
│ • + pause_reason si loop │
|
||||||
|
│ Continuer désactivé tant que │
|
||||||
|
│ required-checks non cochés │
|
||||||
|
└─────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Principes invariants
|
||||||
|
1. Aucun nouveau service, aucune nouvelle DB. Tout dans la stack existante (Agent V1 + serveur 5005 + VWB 3002).
|
||||||
|
2. 3 modules serveur isolés (`monitor_router.py`, `loop_detector.py`, `safety_checks_provider.py`) — couplage faible, testables individuellement, désactivables par env var.
|
||||||
|
3. Backward compatible : workflows sans nouveaux champs se comportent comme avant.
|
||||||
|
4. Kill-switches env vars sur QW2 et QW4, override possible via `systemctl edit` pendant la démo.
|
||||||
|
5. 100% vision : QW1 pure capture + grounding ; QW2 réutilise le `_clip_embedder` déjà chargé ; QW4 LLM = Ollama local strict.
|
||||||
|
6. Bus `lea:*` étendu de 4 events d'observabilité : `lea:loop_detected`, `lea:safety_checks_generated`, `lea:safety_checks_llm_failed`, `lea:monitor_routed`.
|
||||||
|
|
||||||
|
### Surface de modification (ordre A)
|
||||||
|
|
||||||
|
| QW | Fichiers nouveaux | Fichiers modifiés |
|
||||||
|
|---|---|---|
|
||||||
|
| QW1 | `agent_v0/server_v1/monitor_router.py` | `agent_v0/agent_v1/capture/screen_capture.py`, `core/execution/input_handler.py`, `agent_v0/server_v1/api_stream.py` (~10 lignes) |
|
||||||
|
| QW2 | `agent_v0/server_v1/loop_detector.py` | `agent_v0/server_v1/replay_engine.py` (~30 lignes), `agent_v0/server_v1/api_stream.py` (~20 lignes) |
|
||||||
|
| QW4 | `agent_v0/server_v1/safety_checks_provider.py`, `visual_workflow_builder/frontend_v4/src/components/PauseDialog.tsx` | `agent_v0/server_v1/replay_engine.py`, `agent_v0/server_v1/api_stream.py` (`/replay/resume`), `visual_workflow_builder/frontend_v4/src/types.ts`, `visual_workflow_builder/frontend_v4/src/components/PropertiesPanel.tsx` |
|
||||||
|
|
||||||
|
## 4. QW1 — Multi-écrans
|
||||||
|
|
||||||
|
### 4.1 Composants
|
||||||
|
|
||||||
|
**Client Agent V1** — `agent_v0/agent_v1/capture/screen_capture.py` (existant à modifier)
|
||||||
|
- Enrichit chaque heartbeat / event avec :
|
||||||
|
- `monitor_index: int`
|
||||||
|
- `monitors_geometry: [{idx, x, y, w, h, primary}]`
|
||||||
|
- Détection via `screeninfo` (port direct depuis Showlab OOTB)
|
||||||
|
- Capture de l'écran *actif uniquement* (poids réseau identique à aujourd'hui)
|
||||||
|
- Si `screeninfo` indisponible côté Windows : envoie `monitors_geometry: []`, comportement composite préservé
|
||||||
|
|
||||||
|
**Serveur** — nouveau `agent_v0/server_v1/monitor_router.py` (~80 lignes)
|
||||||
|
- API : `resolve_target_monitor(action: dict, session_state: dict) → MonitorTarget`
|
||||||
|
- `MonitorTarget = {idx, offset_x, offset_y, w, h, source: "action" | "focus" | "composite_fallback"}`
|
||||||
|
- Stratégie :
|
||||||
|
1. Lit `action.get("monitor_index")` si présent → cible cet écran
|
||||||
|
2. Sinon `session_state.get("last_focused_monitor")` → cible focus actif
|
||||||
|
3. Sinon `monitors[0]` composite (comportement actuel — backward)
|
||||||
|
|
||||||
|
**Input local Linux** — `core/execution/input_handler.py` modifs ciblées
|
||||||
|
- Signature changée : `_capture_screen(monitor_idx=None) → (image, w, h, offset_x, offset_y)`
|
||||||
|
- Quand `monitor_idx` fourni : capture uniquement ce monitor
|
||||||
|
- Toutes les fonctions `_grounding_*` (`_grounding_ocr`, `_grounding_ui_tars`, `_grounding_vlm`) propagent l'offset pour traduire les coords retournées en coords absolues écran
|
||||||
|
|
||||||
|
### 4.2 Data flow replay
|
||||||
|
|
||||||
|
```
|
||||||
|
Action [monitor_index=1] reçue par serveur
|
||||||
|
→ MonitorRouter.resolve()
|
||||||
|
→ target_monitor = {idx:1, offset:(1920,0), w:1920, h:1080, source:"action"}
|
||||||
|
→ grounding capture monitor 1 uniquement (image 1920×1080, pas 3840×1080)
|
||||||
|
→ UI-TARS / OCR / VLM cherche cible → coords locales (640, 540)
|
||||||
|
→ coords absolues = (640+1920, 540+0) = (2560, 540)
|
||||||
|
→ pyautogui.click(2560, 540)
|
||||||
|
→ bus.emit("lea:monitor_routed", {idx:1, source:"action"})
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.3 Error handling
|
||||||
|
|
||||||
|
| Cas | Comportement |
|
||||||
|
|---|---|
|
||||||
|
| `monitor_index` absent (vieille session) | Fallback focus actif, log info `lea:monitor_routed source=focus` |
|
||||||
|
| Monitor enregistré n'existe plus (2nd écran débranché) | Fallback focus actif, event `lea:monitor_unavailable` warning |
|
||||||
|
| `mss.monitors[i]` hors limites | Fallback `monitors[0]` composite, event `lea:monitor_invalid_index` error |
|
||||||
|
| `screeninfo` non installé côté Agent V1 | `monitors_geometry: []`, fallback composite (comportement actuel) — pas de blocage |
|
||||||
|
|
||||||
|
### 4.4 Tests QW1
|
||||||
|
|
||||||
|
- `tests/unit/test_monitor_router.py` : 4 cas (cible OK, fallback focus, fallback composite, monitor débranché)
|
||||||
|
- `tests/integration/test_grounding_offset.py` : capture 1 monitor + clic résolu avec offset (mock pyautogui)
|
||||||
|
- Smoke : 1 workflow Easily rejoué, vérification visuelle que le clic atterrit au bon endroit
|
||||||
|
|
||||||
|
### 4.5 Compat workflows existants
|
||||||
|
|
||||||
|
Aucune action n'a `monitor_index` aujourd'hui → 100% des workflows existants partent en fallback focus actif → comportement quasi-identique au composite actuel mais sur un seul écran (gain de perf grounding même sans recréation de workflow).
|
||||||
|
|
||||||
|
## 5. QW2 — LoopDetector composite
|
||||||
|
|
||||||
|
### 5.1 Composants
|
||||||
|
|
||||||
|
**Nouveau** `agent_v0/server_v1/loop_detector.py` (~150 lignes)
|
||||||
|
- Classe `LoopDetector` avec 3 sous-détecteurs
|
||||||
|
- API : `evaluate(replay_state, screenshot_history, action_history) → LoopVerdict`
|
||||||
|
- `LoopVerdict = {detected: bool, reason: str, signal: str, evidence: dict}`
|
||||||
|
|
||||||
|
**Hook** dans `agent_v0/server_v1/api_stream.py`
|
||||||
|
- Après chaque `report_action_result`, appel `loop_detector.evaluate(...)` si `RPA_LOOP_DETECTOR_ENABLED=1` (défaut)
|
||||||
|
- Si `verdict.detected` :
|
||||||
|
- `replay_state["status"] = "paused_need_help"`
|
||||||
|
- `replay_state["pause_reason"] = verdict.reason`
|
||||||
|
- `replay_state["pause_message"] = f"Léa semble bloquée — {verdict.signal}"`
|
||||||
|
- bus.emit `lea:loop_detected` avec `{signal, evidence, replay_id}`
|
||||||
|
|
||||||
|
**Étendu** dans `replay_engine.py` :
|
||||||
|
- `_create_replay_state()` ajoute :
|
||||||
|
- `"_screenshot_history": []` (anneau de 5 derniers embeddings CLIP)
|
||||||
|
- `"_action_history": []` (anneau des 5 dernières actions)
|
||||||
|
- `_pre_check_screen_state()` continue indépendamment (signal différent : check pré-action vs détection post-action de stagnation)
|
||||||
|
|
||||||
|
### 5.2 Signaux composites
|
||||||
|
|
||||||
|
| Signal | Détecteur | Seuil par défaut | Source |
|
||||||
|
|---|---|---|---|
|
||||||
|
| `screen_static` | A | 4 captures consécutives avec CLIP similarity > 0.99 | `_clip_embedder` déjà chargé serveur |
|
||||||
|
| `action_repeat` | B | 3 actions consécutives identiques (type + coords) | `_action_history` |
|
||||||
|
| `retry_threshold` | C | 3 retries sur même `action_id` | `replay_state["retried_actions"]` (déjà existant) |
|
||||||
|
|
||||||
|
Un seul signal positif suffit à déclencher l'escalade.
|
||||||
|
|
||||||
|
### 5.3 Data flow
|
||||||
|
|
||||||
|
```
|
||||||
|
Action exécutée → result reçu via /replay/result
|
||||||
|
↓
|
||||||
|
LoopDetector.evaluate(state, screenshots, actions) si RPA_LOOP_DETECTOR_ENABLED=1
|
||||||
|
├─ A.check_screen_static() → embed(latest), compare aux N-1 derniers
|
||||||
|
├─ B.check_action_repeat() → compare action_history[-3:]
|
||||||
|
└─ C.check_retry_threshold() → state["retried_actions"] >= 3
|
||||||
|
↓
|
||||||
|
Si verdict.detected:
|
||||||
|
state["status"] = "paused_need_help"
|
||||||
|
state["pause_reason"] = verdict.reason
|
||||||
|
state["pause_message"] = f"Léa semble bloquée — {verdict.signal} ({evidence})"
|
||||||
|
bus.emit("lea:loop_detected", {signal, evidence, replay_id})
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5.4 Error handling
|
||||||
|
|
||||||
|
| Cas | Comportement |
|
||||||
|
|---|---|
|
||||||
|
| CLIP embedder unavailable | Signal A désactivé (warning log 1×), B+C continuent. Pas de blocage. |
|
||||||
|
| `_screenshot_history` < N | Signal A skip silencieusement (pas assez d'historique) |
|
||||||
|
| `embed_image()` lève une exception | Catch + log warning, replay continue (verdict = `detected=False`) |
|
||||||
|
| `RPA_LOOP_DETECTOR_ENABLED=0` | Module entier bypassé, comportement antérieur |
|
||||||
|
| Faux positif détecté en pleine démo | `RPA_LOOP_DETECTOR_ENABLED=0` via `systemctl edit rpa-streaming` + restart → reprise immédiate |
|
||||||
|
|
||||||
|
### 5.5 Configuration env vars
|
||||||
|
|
||||||
|
- `RPA_LOOP_DETECTOR_ENABLED=1` (défaut)
|
||||||
|
- `RPA_LOOP_SCREEN_STATIC_THRESHOLD=0.99`
|
||||||
|
- `RPA_LOOP_SCREEN_STATIC_N=4`
|
||||||
|
- `RPA_LOOP_ACTION_REPEAT_N=3`
|
||||||
|
- `RPA_LOOP_RETRY_THRESHOLD=3`
|
||||||
|
|
||||||
|
### 5.6 Tests QW2
|
||||||
|
|
||||||
|
- `tests/unit/test_loop_detector.py` : 8 cas (chaque signal isolé, chaque combinaison, kill-switch, embedder absent)
|
||||||
|
- `tests/integration/test_loop_detector_replay.py` : 3 cas — replay simulé qui boucle → vérifier transition `running → paused_need_help` avec bonne raison
|
||||||
|
- Pas de smoke démo (impossible à reproduire fiable, on s'appuie sur les tests intégration)
|
||||||
|
|
||||||
|
### 5.7 Compat VWB
|
||||||
|
|
||||||
|
Aucune côté frontend pour QW2 : la pause `paused_need_help` existe déjà. Le `pause_reason` enrichi sera affiché par le composant `PauseDialog` étendu en QW4. Avant la livraison de QW4, la raison s'affichera en texte dans le `pause_message` (donc utile dès le commit QW2).
|
||||||
|
|
||||||
|
## 6. QW4 — Safety checks hybrides
|
||||||
|
|
||||||
|
### 6.1 Contrat de l'action étendue (rétro-compatible)
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"type": "pause_for_human",
|
||||||
|
"parameters": {
|
||||||
|
"message": "Validation T2A avant codage",
|
||||||
|
"safety_level": "medical_critical",
|
||||||
|
"safety_checks": [
|
||||||
|
{"id": "check_ipp", "label": "Vérifier IPP patient", "required": true},
|
||||||
|
{"id": "check_cim10", "label": "Confirmer code CIM-10", "required": true}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
`safety_level` et `safety_checks` sont **optionnels**. Action sans ces champs → comportement actuel (bulle simple, aucun appel LLM).
|
||||||
|
|
||||||
|
### 6.2 Composants serveur
|
||||||
|
|
||||||
|
**Nouveau** `agent_v0/server_v1/safety_checks_provider.py` (~180 lignes)
|
||||||
|
- API : `build_pause_payload(action, replay_state, last_screenshot) → PausePayload`
|
||||||
|
- Concatène : checks déclaratifs (workflow) + checks contextuels (LLM si `safety_level == "medical_critical"`)
|
||||||
|
- Chaque check porte sa source : `source: "declarative" | "llm_contextual"` et son `evidence` (vide pour déclaratif, justification courte pour LLM)
|
||||||
|
- Format check final :
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"id": "check_xxx",
|
||||||
|
"label": "...",
|
||||||
|
"required": true,
|
||||||
|
"source": "declarative" | "llm_contextual",
|
||||||
|
"evidence": null | "..."
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**LLM contextual call** — sous-fonction `_call_llm_for_contextual_checks()`
|
||||||
|
- Modèle : `medgemma:4b` (env `RPA_SAFETY_CHECKS_LLM_MODEL`)
|
||||||
|
- Timeout dur : 5s (env `RPA_SAFETY_CHECKS_LLM_TIMEOUT_S`)
|
||||||
|
- `format=json` natif Ollama + JSON Schema strict :
|
||||||
|
```json
|
||||||
|
{"additional_checks": [{"label": "string", "evidence": "string"}]}
|
||||||
|
```
|
||||||
|
- Max 3 checks ajoutés (env `RPA_SAFETY_CHECKS_LLM_MAX_CHECKS`)
|
||||||
|
- Prompt : screenshot heartbeat actuel + workflow message + liste des checks déclaratifs (évite doublons)
|
||||||
|
- Tout échec (timeout, exception, JSON invalide post-schema) → `additional_checks = []`, event `lea:safety_checks_llm_failed`, replay continue
|
||||||
|
|
||||||
|
**Hook** dans `replay_engine.py` — branche `action_type == "pause_for_human"`
|
||||||
|
- Avant de basculer en `paused_need_help`, appel `safety_checks_provider.build_pause_payload(...)`
|
||||||
|
- Stocke `replay_state["safety_checks"] = payload.checks`
|
||||||
|
- Stocke `replay_state["pause_payload"] = payload` (pour debug/audit)
|
||||||
|
|
||||||
|
**Modif** `api_stream.py` — endpoint `/replay/resume`
|
||||||
|
- Reçoit `{acknowledged_check_ids: [...]}` dans le body POST
|
||||||
|
- Vérifie : tous les checks `required=true` doivent être dans `acknowledged_check_ids`
|
||||||
|
- Sinon : `400 {error: "required_checks_missing", missing: [...]}`
|
||||||
|
- Stocke `replay_state["checks_acknowledged"] = acknowledged_check_ids` (audit trail)
|
||||||
|
- Reprise normale du replay
|
||||||
|
|
||||||
|
### 6.3 Composants frontend VWB
|
||||||
|
|
||||||
|
**Nouveau** `visual_workflow_builder/frontend_v4/src/components/PauseDialog.tsx` (~200 lignes)
|
||||||
|
- Props : `pauseMessage`, `pauseReason`, `safetyChecks`, `onResume(ackIds)`, `onCancel`
|
||||||
|
- Si `safetyChecks.length === 0` : rend la bulle existante (legacy, comportement actuel)
|
||||||
|
- Sinon : bulle + `<ChecklistPanel>` avec checkboxes
|
||||||
|
- Bouton Continuer disabled tant que `checks.filter(c => c.required && !checked).length > 0`
|
||||||
|
- POST `/replay/resume` avec body `{acknowledged_check_ids: [...]}`
|
||||||
|
- Visuel source :
|
||||||
|
- Badge `[Léa]` pour `source: "llm_contextual"` (avec tooltip `evidence`)
|
||||||
|
- Badge `[obligatoire]` pour `required: true`
|
||||||
|
|
||||||
|
**Étendu** `types.ts`
|
||||||
|
- `PauseAction['parameters']` : ajout `safety_level?`, `safety_checks?`
|
||||||
|
- `Execution` : ajout `pause_reason?`, `safety_checks?`
|
||||||
|
|
||||||
|
**Étendu** `PropertiesPanel.tsx:1356` — éditeur de l'action `pause_for_human`
|
||||||
|
- Section "Niveau de sécurité" : dropdown `standard | medical_critical`
|
||||||
|
- Section "Checks à valider" : liste éditable (id + label + required)
|
||||||
|
|
||||||
|
### 6.4 Data flow complet
|
||||||
|
|
||||||
|
```
|
||||||
|
Action pause_for_human (medical_critical, 2 checks déclaratifs) atteinte
|
||||||
|
↓
|
||||||
|
SafetyChecksProvider.build_pause_payload()
|
||||||
|
├─ checks = [...declarative] (2 entrées)
|
||||||
|
├─ if safety_level == "medical_critical" and RPA_SAFETY_CHECKS_LLM_ENABLED=1:
|
||||||
|
│ llm_checks = _call_llm_for_contextual_checks() (max 3, timeout 5s)
|
||||||
|
│ checks += llm_checks
|
||||||
|
└─ return PausePayload(checks, pause_reason, message)
|
||||||
|
↓
|
||||||
|
replay_state["status"] = "paused_need_help"
|
||||||
|
replay_state["safety_checks"] = checks
|
||||||
|
bus.emit("lea:safety_checks_generated", {count, sources})
|
||||||
|
↓
|
||||||
|
Frontend VWB poll /replay/state → reçoit pause_payload
|
||||||
|
↓
|
||||||
|
<PauseDialog> rend ChecklistPanel
|
||||||
|
↓
|
||||||
|
Médecin coche les 4 checks → clique Continuer
|
||||||
|
↓
|
||||||
|
POST /replay/resume {acknowledged_check_ids: [4 ids]}
|
||||||
|
↓
|
||||||
|
Serveur valide (tous required acquittés) → reprise du replay
|
||||||
|
replay_state["checks_acknowledged"] = [...] (audit trail conservé)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.5 Error handling
|
||||||
|
|
||||||
|
| Cas | Comportement |
|
||||||
|
|---|---|
|
||||||
|
| `safety_level` absent | Pas d'appel LLM ; checks déclaratifs uniquement (peut être `[]`) → bulle simple si vide, checklist sinon |
|
||||||
|
| Ollama timeout 5s | Event `lea:safety_checks_llm_failed`, `additional_checks=[]`, fallback safe (déclaratifs seuls) |
|
||||||
|
| Ollama JSON malformé (post `format=json` — théoriquement impossible) | Idem timeout, fallback safe |
|
||||||
|
| LLM produit un check absurde | Accepté tel quel, le superviseur ignore (pas de filtrage en V1) |
|
||||||
|
| Frontend reçoit `safety_checks=[]` | Bulle simple, comportement legacy |
|
||||||
|
| `RPA_SAFETY_CHECKS_LLM_ENABLED=0` | Couche LLM bypassée, déclaratifs gardés |
|
||||||
|
| `/replay/resume` sans `acknowledged_check_ids` sur required | `400 required_checks_missing` |
|
||||||
|
| Frontend POST `/replay/resume` rejeté | Toast d'erreur côté UI, état pause conservé, possibilité de cocher manquants et réessayer |
|
||||||
|
|
||||||
|
### 6.6 Configuration env vars
|
||||||
|
|
||||||
|
- `RPA_SAFETY_CHECKS_LLM_ENABLED=1` (défaut)
|
||||||
|
- `RPA_SAFETY_CHECKS_LLM_MODEL=medgemma:4b`
|
||||||
|
- `RPA_SAFETY_CHECKS_LLM_TIMEOUT_S=5`
|
||||||
|
- `RPA_SAFETY_CHECKS_LLM_MAX_CHECKS=3`
|
||||||
|
|
||||||
|
### 6.7 Tests QW4
|
||||||
|
|
||||||
|
- `tests/unit/test_safety_checks_provider.py` : 7 cas (déclaratif seul, hybride réussi, LLM timeout, LLM JSON invalide, kill-switch, max_checks respecté, déclaratif vide)
|
||||||
|
- `tests/integration/test_replay_resume_acknowledgments.py` : 3 cas (resume OK, missing required → 400, audit trail enregistré dans `checks_acknowledged`)
|
||||||
|
- Frontend : `tests/components/PauseDialog.test.tsx` si suite Vitest existe (à confirmer pendant l'implémentation), sinon test manuel avec checklist écrite
|
||||||
|
- Smoke : 1 workflow Easily avec `pause_for_human medical_critical` enrichi → vérification full chain
|
||||||
|
|
||||||
|
### 6.8 Compat workflows existants
|
||||||
|
|
||||||
|
100% backward — `pause_for_human` actuels n'ont ni `safety_level` ni `safety_checks` → comportement strictement identique. Aucune recréation forcée. Dom enrichira uniquement les workflows qu'il veut promouvoir au niveau `medical_critical`.
|
||||||
|
|
||||||
|
## 7. Tests, sécurité de la branche, livraison
|
||||||
|
|
||||||
|
### 7.1 Filet de sécurité avant TOUT commit sur `feature/qw-suite-mai`
|
||||||
|
|
||||||
|
1. Branche backup poussée Gitea : `backup/pre-qw-suite-mai-2026-05-05`
|
||||||
|
2. Capture baseline E2E :
|
||||||
|
```
|
||||||
|
pytest tests/test_pipeline_e2e.py \
|
||||||
|
tests/test_phase0_integration.py \
|
||||||
|
tests/integration/test_stream_processor.py \
|
||||||
|
-q 2>&1 | tee .qw-baseline.log
|
||||||
|
```
|
||||||
|
3. Smoke démo : 1 dérouler complet d'un workflow Easily Assure, archivage screenshot/vidéo de référence
|
||||||
|
4. État VWB validé : démarrage Vite local, ouverture d'un workflow, lancement d'un replay simple, screenshot "tout va bien"
|
||||||
|
|
||||||
|
### 7.2 Discipline TDD légère par QW
|
||||||
|
|
||||||
|
- Test unitaire écrit AVANT le code de production (1 test rouge → 1 implémentation → vert)
|
||||||
|
- Pas de TDD complet sur le frontend (Vitest + React = trop d'outillage à valider en parallèle), test manuel cadré avec checklist écrite
|
||||||
|
- Re-run de la suite baseline après chaque commit QW, comparaison au log archivé
|
||||||
|
- Toute régression bloque le passage au QW suivant tant qu'elle n'est pas comprise et résolue
|
||||||
|
|
||||||
|
### 7.3 Compat VWB — checklist explicite avant commit QW4
|
||||||
|
|
||||||
|
- [ ] Workflow ancien (sans `safety_checks`) → bulle simple s'affiche normalement
|
||||||
|
- [ ] Workflow nouveau avec `safety_checks` déclaratifs uniquement → checklist visible, **pas** d'appel Ollama (vérification logs)
|
||||||
|
- [ ] Workflow `medical_critical` → checklist + checks LLM apparaissent (vérification logs Ollama call dans les 5s)
|
||||||
|
- [ ] Continuer désactivé tant que required non cochés
|
||||||
|
- [ ] POST `/replay/resume` avec mauvais payload → toast d'erreur côté UI, pas de crash
|
||||||
|
- [ ] PropertiesPanel : édition de `safety_checks` ne casse pas l'édition d'autres params de `pause_for_human`
|
||||||
|
- [ ] DB `workflows.db` : ouverture après commit, aucune migration cassante (schéma JSON est libre)
|
||||||
|
|
||||||
|
### 7.4 Plan de commits
|
||||||
|
|
||||||
|
```
|
||||||
|
1. test(qw1): tests monitor_router + grounding_offset (rouges)
|
||||||
|
2. feat(qw1): multi-écrans piloté par monitor_index (verts)
|
||||||
|
3. test(qw2): tests loop_detector composite (rouges)
|
||||||
|
4. feat(qw2): LoopDetector composite avec kill-switch env
|
||||||
|
5. test(qw4): tests safety_checks_provider + replay_resume (rouges)
|
||||||
|
6. feat(qw4): safety_checks hybride déclaratif + LLM contextuel
|
||||||
|
7. feat(vwb): PauseDialog + ChecklistPanel + extension PropertiesPanel
|
||||||
|
8. docs(qw): docs/QW_SUITE_MAI.md + mise à jour MEMORY.md
|
||||||
|
```
|
||||||
|
|
||||||
|
Chaque commit signé Co-Authored-By Claude. Branche poussée régulièrement sur Gitea pour backup distant.
|
||||||
|
|
||||||
|
### 7.5 Stratégie en cas de régression critique pendant la démo
|
||||||
|
|
||||||
|
Kill-switches env vars surchargeables sans redéploiement code :
|
||||||
|
|
||||||
|
```
|
||||||
|
systemctl edit rpa-streaming
|
||||||
|
# Ajouter sous [Service] :
|
||||||
|
Environment=RPA_LOOP_DETECTOR_ENABLED=0
|
||||||
|
Environment=RPA_SAFETY_CHECKS_LLM_ENABLED=0
|
||||||
|
systemctl restart rpa-streaming
|
||||||
|
```
|
||||||
|
|
||||||
|
Si problème grave au-delà des kill-switches : rollback à `backup/pre-qw-suite-mai-2026-05-05`.
|
||||||
|
|
||||||
|
```
|
||||||
|
git checkout backup/pre-qw-suite-mai-2026-05-05
|
||||||
|
./svc.sh restart
|
||||||
|
```
|
||||||
|
|
||||||
|
### 7.6 Plan de livraison (Option A validée)
|
||||||
|
|
||||||
|
**Avant démo GHT (cette semaine) — Sprint priorité 1**
|
||||||
|
- QW1 : tests + code + smoke (~1j)
|
||||||
|
- QW2 : tests + code + tests intégration (~2j)
|
||||||
|
- Capture baseline + replay smoke entre chaque
|
||||||
|
- Si QW1+QW2 validés et probants → on enchaîne sur QW4 dès que possible (Dom accepte le weekend si "effet waouh" auprès de spécialistes RPA)
|
||||||
|
|
||||||
|
**Après démo / dès validation QW1+QW2 — Sprint priorité 2**
|
||||||
|
- QW4 serveur (provider + LLM + endpoint resume) (~3j)
|
||||||
|
- QW4 frontend (PauseDialog + PropertiesPanel) (~2j)
|
||||||
|
- Doc + mise à jour MEMORY.md
|
||||||
|
|
||||||
|
**Total estimé** : ~8.5j-h ingénieur senior, étalable selon le retour démo.
|
||||||
|
|
||||||
|
## 8. Ce qui n'est PAS dans ce spec (out of scope)
|
||||||
|
|
||||||
|
- F1 (DSL d'actions Pydantic-first) : refactor de fond, sera son propre spec après la démo.
|
||||||
|
- F2 (Mixture-of-Grounding routeur adaptatif) : nécessite F1, son propre spec.
|
||||||
|
- F3 (Best-of-N + Reflection) : nécessite F1, son propre spec.
|
||||||
|
- QW3 (`output_model_schema` Pydantic pour `extract_text`) : opportuniste, sera intégré quand on touchera `extract_text` pour autre chose.
|
||||||
|
- Toute introduction de Pydantic-AI / instructor / Playwright / accessibility-tree : interdit (contraintes inviolables).
|
||||||
|
- Refonte du composant pause en `<PauseDialog>` à 3 modes (option C de Q6) : reportée après démo si retour utilisateurs justifie l'investissement.
|
||||||
|
|
||||||
|
## 9. Open questions
|
||||||
|
|
||||||
|
Aucune. Toutes les décisions de design ont été tranchées via les 7 questions clarifiantes du brainstorming du 5 mai 2026.
|
||||||
131
tests/integration/test_pause_for_human.py
Normal file
131
tests/integration/test_pause_for_human.py
Normal file
@@ -0,0 +1,131 @@
|
|||||||
|
"""Tests de l'action pause_for_human (C.5).
|
||||||
|
|
||||||
|
Vérifie la chaîne :
|
||||||
|
- Validation côté replay_engine accepte le nouveau type
|
||||||
|
- Conversion edge → action normalisée préserve le message
|
||||||
|
- Bridge VWB → core mappe correctement
|
||||||
|
- Le bridge VWB construit bien un edge avec action.type='pause_for_human'
|
||||||
|
"""
|
||||||
|
|
||||||
|
from agent_v0.server_v1.replay_engine import (
|
||||||
|
_ALLOWED_ACTION_TYPES,
|
||||||
|
_validate_replay_action,
|
||||||
|
_edge_to_normalized_actions,
|
||||||
|
)
|
||||||
|
from visual_workflow_builder.backend.services.learned_workflow_bridge import (
|
||||||
|
VWB_ACTION_TO_CORE,
|
||||||
|
convert_vwb_to_core_workflow,
|
||||||
|
_vwb_params_to_core,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
# Validation pipeline (replay_engine)
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_pause_for_human_in_allowed_types():
|
||||||
|
assert "pause_for_human" in _ALLOWED_ACTION_TYPES
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_pause_for_human_action_valid():
|
||||||
|
action = {"type": "pause_for_human", "parameters": {"message": "Valider UHCD ?"}}
|
||||||
|
assert _validate_replay_action(action) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_pause_for_human_no_params_still_valid():
|
||||||
|
"""Le validateur ne doit pas exiger 'message' (fallback côté handler)."""
|
||||||
|
action = {"type": "pause_for_human"}
|
||||||
|
assert _validate_replay_action(action) is None
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
# Conversion edge → action normalisée
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
class _FakeAction:
|
||||||
|
def __init__(self, type_, parameters=None):
|
||||||
|
self.type = type_
|
||||||
|
self.target = None
|
||||||
|
self.parameters = parameters or {}
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeEdge:
|
||||||
|
def __init__(self, action, edge_id="e1", from_node="n1", to_node="n2"):
|
||||||
|
self.edge_id = edge_id
|
||||||
|
self.from_node = from_node
|
||||||
|
self.to_node = to_node
|
||||||
|
self.action = action
|
||||||
|
|
||||||
|
|
||||||
|
def test_edge_to_action_pause_for_human_preserves_message():
|
||||||
|
edge = _FakeEdge(_FakeAction(
|
||||||
|
"pause_for_human",
|
||||||
|
parameters={"message": "Tu valides UHCD ?"},
|
||||||
|
))
|
||||||
|
actions = _edge_to_normalized_actions(edge, params={})
|
||||||
|
assert len(actions) == 1
|
||||||
|
a = actions[0]
|
||||||
|
assert a["type"] == "pause_for_human"
|
||||||
|
assert a["parameters"]["message"] == "Tu valides UHCD ?"
|
||||||
|
assert "x_pct" not in a # action logique, pas de coords
|
||||||
|
assert "y_pct" not in a
|
||||||
|
|
||||||
|
|
||||||
|
def test_edge_to_action_pause_for_human_default_message():
|
||||||
|
edge = _FakeEdge(_FakeAction("pause_for_human", parameters={}))
|
||||||
|
actions = _edge_to_normalized_actions(edge, params={})
|
||||||
|
assert actions[0]["parameters"]["message"] == "Validation requise"
|
||||||
|
|
||||||
|
|
||||||
|
def test_edge_to_action_pause_for_human_carries_edge_metadata():
|
||||||
|
edge = _FakeEdge(
|
||||||
|
_FakeAction("pause_for_human", parameters={"message": "x"}),
|
||||||
|
edge_id="edge_42", from_node="n_src", to_node="n_dst",
|
||||||
|
)
|
||||||
|
actions = _edge_to_normalized_actions(edge, params={})
|
||||||
|
a = actions[0]
|
||||||
|
assert a["edge_id"] == "edge_42"
|
||||||
|
assert a["from_node"] == "n_src"
|
||||||
|
assert a["to_node"] == "n_dst"
|
||||||
|
assert "action_id" in a
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
# Bridge VWB → core
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_vwb_action_to_core_passthrough():
|
||||||
|
assert VWB_ACTION_TO_CORE["pause_for_human"] == "pause_for_human"
|
||||||
|
|
||||||
|
|
||||||
|
def test_vwb_params_to_core_preserves_message():
|
||||||
|
core_params = _vwb_params_to_core("pause_for_human", {"message": "Coucou"})
|
||||||
|
assert core_params == {"message": "Coucou"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_vwb_params_to_core_default_message():
|
||||||
|
core_params = _vwb_params_to_core("pause_for_human", {})
|
||||||
|
assert core_params["message"] == "Validation requise"
|
||||||
|
|
||||||
|
|
||||||
|
def test_export_vwb_workflow_with_pause_step():
|
||||||
|
"""Un workflow VWB contenant une step pause_for_human doit produire un edge
|
||||||
|
avec action.type='pause_for_human' et message dans parameters."""
|
||||||
|
workflow_data = {"id": "wf_demo", "name": "Demo Urgences", "description": ""}
|
||||||
|
steps_data = [
|
||||||
|
{"id": "s1", "action_type": "click_anchor", "parameters": {"target_text": "25003284"}, "label": "Clic IPP"},
|
||||||
|
{"id": "s2", "action_type": "pause_for_human", "parameters": {"message": "Valider UHCD ?"}, "label": "Pause"},
|
||||||
|
{"id": "s3", "action_type": "click_anchor", "parameters": {"target_text": "Enregistrer"}, "label": "Clic Enregistrer"},
|
||||||
|
]
|
||||||
|
core = convert_vwb_to_core_workflow(workflow_data, steps_data)
|
||||||
|
assert core["learning_state"] == "COACHING"
|
||||||
|
assert len(core["nodes"]) == 3
|
||||||
|
assert len(core["edges"]) == 2
|
||||||
|
|
||||||
|
# L'edge sortant du node de pause doit avoir le bon type + message
|
||||||
|
pause_edges = [
|
||||||
|
e for e in core["edges"]
|
||||||
|
if e["action"]["type"] == "pause_for_human"
|
||||||
|
]
|
||||||
|
assert len(pause_edges) == 1
|
||||||
|
assert pause_edges[0]["action"]["parameters"]["message"] == "Valider UHCD ?"
|
||||||
282
tests/integration/test_t2a_extract.py
Normal file
282
tests/integration/test_t2a_extract.py
Normal file
@@ -0,0 +1,282 @@
|
|||||||
|
"""Tests des actions extract_text et t2a_decision (C+.5/.6).
|
||||||
|
|
||||||
|
Couvre :
|
||||||
|
- _resolve_runtime_vars : templating {{var}} / {{var.field}}
|
||||||
|
- _handle_extract_text_action : OCR mocké, stockage variable
|
||||||
|
- _handle_t2a_decision_action : analyze_dpi mocké, stockage JSON
|
||||||
|
- _edge_to_normalized_actions pour les 2 types
|
||||||
|
- Bridge VWB → core (mapping + paramètres)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from agent_v0.server_v1.replay_engine import (
|
||||||
|
_ALLOWED_ACTION_TYPES,
|
||||||
|
_SERVER_SIDE_ACTION_TYPES,
|
||||||
|
_resolve_runtime_vars,
|
||||||
|
_handle_extract_text_action,
|
||||||
|
_handle_t2a_decision_action,
|
||||||
|
_edge_to_normalized_actions,
|
||||||
|
_create_replay_state,
|
||||||
|
)
|
||||||
|
from visual_workflow_builder.backend.services.learned_workflow_bridge import (
|
||||||
|
VWB_ACTION_TO_CORE,
|
||||||
|
convert_vwb_to_core_workflow,
|
||||||
|
_vwb_params_to_core,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
# Templating runtime
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_resolve_simple_var():
|
||||||
|
r = _resolve_runtime_vars("Patient {{ipp}}", {"ipp": "25003284"})
|
||||||
|
assert r == "Patient 25003284"
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_field_access():
|
||||||
|
r = _resolve_runtime_vars(
|
||||||
|
"{{result.decision}} car {{result.justification}}",
|
||||||
|
{"result": {"decision": "UHCD", "justification": "asthme + insuf coro"}},
|
||||||
|
)
|
||||||
|
assert "UHCD car asthme + insuf coro" == r
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_missing_var_kept_intact():
|
||||||
|
r = _resolve_runtime_vars("Hello {{absent}} world", {"x": "y"})
|
||||||
|
assert r == "Hello {{absent}} world"
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_missing_field_kept_intact():
|
||||||
|
r = _resolve_runtime_vars("{{var.absent}}", {"var": {"present": "x"}})
|
||||||
|
assert r == "{{var.absent}}"
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_in_dict_recursive():
|
||||||
|
r = _resolve_runtime_vars(
|
||||||
|
{"msg": "IPP {{ipp}}", "nested": {"k": "{{ipp}}"}, "list": ["{{age}}"]},
|
||||||
|
{"ipp": "X", "age": 77},
|
||||||
|
)
|
||||||
|
assert r == {"msg": "IPP X", "nested": {"k": "X"}, "list": ["77"]}
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_empty_vars_noop():
|
||||||
|
val = {"k": "{{var}}"}
|
||||||
|
assert _resolve_runtime_vars(val, {}) == val
|
||||||
|
assert _resolve_runtime_vars(val, None) == val
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_non_string_passthrough():
|
||||||
|
assert _resolve_runtime_vars(42, {"x": "y"}) == 42
|
||||||
|
assert _resolve_runtime_vars(None, {"x": "y"}) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_resolve_handles_whitespace_in_braces():
|
||||||
|
r = _resolve_runtime_vars("{{ ipp }}", {"ipp": "X"})
|
||||||
|
assert r == "X"
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
# Action types & types serveur
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_extract_text_in_allowed():
|
||||||
|
assert "extract_text" in _ALLOWED_ACTION_TYPES
|
||||||
|
|
||||||
|
|
||||||
|
def test_t2a_decision_in_allowed():
|
||||||
|
assert "t2a_decision" in _ALLOWED_ACTION_TYPES
|
||||||
|
|
||||||
|
|
||||||
|
def test_server_side_types():
|
||||||
|
assert _SERVER_SIDE_ACTION_TYPES == {"extract_text", "t2a_decision"}
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
# Handler extract_text
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_handle_extract_text_stores_variable():
|
||||||
|
state = _create_replay_state("rep1", "wf", "sess", 3)
|
||||||
|
last_hb = {"sess": {"path": "/fake/heartbeat.png", "timestamp": 0}}
|
||||||
|
action = {
|
||||||
|
"type": "extract_text",
|
||||||
|
"parameters": {"output_var": "texte_motif", "paragraph": True},
|
||||||
|
}
|
||||||
|
with patch(
|
||||||
|
"core.llm.extract_text_from_image",
|
||||||
|
return_value="Patient asthme peakflow 260",
|
||||||
|
):
|
||||||
|
ok = _handle_extract_text_action(action, state, "sess", last_hb)
|
||||||
|
assert ok is True
|
||||||
|
assert state["variables"]["texte_motif"] == "Patient asthme peakflow 260"
|
||||||
|
|
||||||
|
|
||||||
|
def test_handle_extract_text_no_heartbeat_stores_empty():
|
||||||
|
state = _create_replay_state("rep1", "wf", "sess", 3)
|
||||||
|
last_hb = {} # pas de heartbeat
|
||||||
|
action = {"type": "extract_text", "parameters": {"output_var": "v"}}
|
||||||
|
ok = _handle_extract_text_action(action, state, "sess", last_hb)
|
||||||
|
assert ok is False
|
||||||
|
assert state["variables"]["v"] == ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_handle_extract_text_default_var_name():
|
||||||
|
state = _create_replay_state("rep1", "wf", "sess", 3)
|
||||||
|
last_hb = {"sess": {"path": "/x.png", "timestamp": 0}}
|
||||||
|
action = {"type": "extract_text", "parameters": {}}
|
||||||
|
with patch("core.llm.extract_text_from_image", return_value="abc"):
|
||||||
|
_handle_extract_text_action(action, state, "sess", last_hb)
|
||||||
|
assert "extracted_text" in state["variables"]
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
# Handler t2a_decision
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_handle_t2a_decision_stores_json():
|
||||||
|
state = _create_replay_state("rep1", "wf", "sess", 3)
|
||||||
|
action = {
|
||||||
|
"type": "t2a_decision",
|
||||||
|
"parameters": {
|
||||||
|
"input_template": "Patient 78 ans, asthme, peakflow 260",
|
||||||
|
"output_var": "decision_t2a",
|
||||||
|
"model": "qwen2.5:7b",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
fake_result = {
|
||||||
|
"decision": "REQUALIFICATION_HOSPITALISATION",
|
||||||
|
"justification": "Surveillance continue requise",
|
||||||
|
"confiance": "elevee",
|
||||||
|
"_elapsed_s": 4.2,
|
||||||
|
}
|
||||||
|
with patch("core.llm.analyze_dpi", return_value=fake_result):
|
||||||
|
ok = _handle_t2a_decision_action(action, state)
|
||||||
|
assert ok is True
|
||||||
|
assert state["variables"]["decision_t2a"]["decision"] == "REQUALIFICATION_HOSPITALISATION"
|
||||||
|
|
||||||
|
|
||||||
|
def test_handle_t2a_decision_empty_input_returns_indetermine():
|
||||||
|
state = _create_replay_state("rep1", "wf", "sess", 3)
|
||||||
|
action = {"type": "t2a_decision", "parameters": {"input_template": "", "output_var": "r"}}
|
||||||
|
ok = _handle_t2a_decision_action(action, state)
|
||||||
|
assert ok is False
|
||||||
|
assert state["variables"]["r"]["decision"] == "INDETERMINE"
|
||||||
|
|
||||||
|
|
||||||
|
def test_handle_t2a_decision_analyze_exception():
|
||||||
|
state = _create_replay_state("rep1", "wf", "sess", 3)
|
||||||
|
action = {"type": "t2a_decision", "parameters": {"input_template": "x", "output_var": "r"}}
|
||||||
|
with patch("core.llm.analyze_dpi", side_effect=RuntimeError("ollama down")):
|
||||||
|
ok = _handle_t2a_decision_action(action, state)
|
||||||
|
assert ok is False
|
||||||
|
assert state["variables"]["r"]["decision"] == "INDETERMINE"
|
||||||
|
assert "ollama down" in state["variables"]["r"]["_error"]
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
# Edge → action normalisée
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
class _FakeAction:
|
||||||
|
def __init__(self, type_, parameters=None):
|
||||||
|
self.type = type_
|
||||||
|
self.target = None
|
||||||
|
self.parameters = parameters or {}
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeEdge:
|
||||||
|
def __init__(self, action, edge_id="e1", from_node="n1", to_node="n2"):
|
||||||
|
self.edge_id = edge_id
|
||||||
|
self.from_node = from_node
|
||||||
|
self.to_node = to_node
|
||||||
|
self.action = action
|
||||||
|
|
||||||
|
|
||||||
|
def test_edge_to_action_extract_text():
|
||||||
|
edge = _FakeEdge(_FakeAction(
|
||||||
|
"extract_text",
|
||||||
|
parameters={"output_var": "texte_examens", "paragraph": True},
|
||||||
|
))
|
||||||
|
actions = _edge_to_normalized_actions(edge, params={})
|
||||||
|
assert len(actions) == 1
|
||||||
|
a = actions[0]
|
||||||
|
assert a["type"] == "extract_text"
|
||||||
|
assert a["parameters"]["output_var"] == "texte_examens"
|
||||||
|
assert a["parameters"]["paragraph"] is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_edge_to_action_t2a_decision():
|
||||||
|
edge = _FakeEdge(_FakeAction(
|
||||||
|
"t2a_decision",
|
||||||
|
parameters={
|
||||||
|
"input_template": "{{texte_motif}}",
|
||||||
|
"output_var": "result",
|
||||||
|
"model": "qwen2.5:7b",
|
||||||
|
},
|
||||||
|
))
|
||||||
|
actions = _edge_to_normalized_actions(edge, params={})
|
||||||
|
a = actions[0]
|
||||||
|
assert a["type"] == "t2a_decision"
|
||||||
|
assert a["parameters"]["input_template"] == "{{texte_motif}}"
|
||||||
|
assert a["parameters"]["output_var"] == "result"
|
||||||
|
assert a["parameters"]["model"] == "qwen2.5:7b"
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
# Bridge VWB → core
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_vwb_extract_text_passthrough():
|
||||||
|
assert VWB_ACTION_TO_CORE["extract_text"] == "extract_text"
|
||||||
|
|
||||||
|
|
||||||
|
def test_vwb_t2a_decision_passthrough():
|
||||||
|
assert VWB_ACTION_TO_CORE["t2a_decision"] == "t2a_decision"
|
||||||
|
|
||||||
|
|
||||||
|
def test_vwb_params_extract_text_preserves_output_var():
|
||||||
|
p = _vwb_params_to_core("extract_text", {"output_var": "v", "paragraph": False})
|
||||||
|
assert p == {"output_var": "v", "paragraph": False}
|
||||||
|
|
||||||
|
|
||||||
|
def test_vwb_params_extract_text_legacy_variable_name():
|
||||||
|
"""Compat avec l'ancien paramètre variable_name côté VWB."""
|
||||||
|
p = _vwb_params_to_core("extract_text", {"variable_name": "v_legacy"})
|
||||||
|
assert p["output_var"] == "v_legacy"
|
||||||
|
|
||||||
|
|
||||||
|
def test_vwb_params_t2a_decision_preserves_all():
|
||||||
|
p = _vwb_params_to_core("t2a_decision", {
|
||||||
|
"input_template": "DPI {{ipp}}",
|
||||||
|
"output_var": "dec",
|
||||||
|
"model": "qwen2.5:7b",
|
||||||
|
})
|
||||||
|
assert p == {"input_template": "DPI {{ipp}}", "output_var": "dec", "model": "qwen2.5:7b"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_export_workflow_with_t2a_chain():
|
||||||
|
"""Workflow VWB extract_text → t2a_decision → pause_for_human export propre."""
|
||||||
|
workflow_data = {"id": "wf_t2a", "name": "Demo T2A"}
|
||||||
|
steps_data = [
|
||||||
|
{"id": "s1", "action_type": "click_anchor", "parameters": {"target_text": "25003284"}, "label": "Clic IPP"},
|
||||||
|
{"id": "s2", "action_type": "extract_text", "parameters": {"output_var": "dpi"}, "label": "OCR"},
|
||||||
|
{"id": "s3", "action_type": "t2a_decision", "parameters": {
|
||||||
|
"input_template": "{{dpi}}", "output_var": "dec", "model": "qwen2.5:7b",
|
||||||
|
}, "label": "Analyse"},
|
||||||
|
{"id": "s4", "action_type": "pause_for_human", "parameters": {
|
||||||
|
"message": "Décision : {{dec.decision}} — {{dec.justification}}",
|
||||||
|
}, "label": "Validation"},
|
||||||
|
{"id": "s5", "action_type": "click_anchor", "parameters": {"target_text": "Enregistrer"}, "label": "Clic Enregistrer"},
|
||||||
|
]
|
||||||
|
core = convert_vwb_to_core_workflow(workflow_data, steps_data)
|
||||||
|
edge_types = [e["action"]["type"] for e in core["edges"]]
|
||||||
|
assert "extract_text" in edge_types
|
||||||
|
assert "t2a_decision" in edge_types
|
||||||
|
assert "pause_for_human" in edge_types
|
||||||
|
# Vérifier que le templating est bien transporté
|
||||||
|
t2a_edge = next(e for e in core["edges"] if e["action"]["type"] == "t2a_decision")
|
||||||
|
assert t2a_edge["action"]["parameters"]["input_template"] == "{{dpi}}"
|
||||||
@@ -868,6 +868,60 @@ def _load_anchor_metadata(anchor_id: str) -> Optional[Dict]:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _inject_anchor_targeting(action: Dict, anchor_id: str) -> None:
|
||||||
|
"""Enrichit une action avec la cible visuelle (x_pct/y_pct + visual_mode/target_spec).
|
||||||
|
|
||||||
|
Mutation in-place de `action`. Utilisé pour click_anchor*, type_text et
|
||||||
|
type_secret — toute action qui doit cibler une zone visuelle précise avant
|
||||||
|
d'agir (clic ou frappe avec focus).
|
||||||
|
|
||||||
|
Sans cette injection, l'agent côté Windows ne peut pas faire le pre-click
|
||||||
|
de focus avant `_type_text`, et le texte tape dans le vide.
|
||||||
|
"""
|
||||||
|
if not anchor_id:
|
||||||
|
return
|
||||||
|
|
||||||
|
anchor_meta = _load_anchor_metadata(anchor_id)
|
||||||
|
|
||||||
|
# Coordonnées du centre du bbox (fallback si template matching échoue)
|
||||||
|
if anchor_meta:
|
||||||
|
bbox = anchor_meta.get('bounding_box', {})
|
||||||
|
orig = anchor_meta.get('original_size', {})
|
||||||
|
orig_w = orig.get('width', 1920)
|
||||||
|
orig_h = orig.get('height', 1080)
|
||||||
|
if bbox.get('x') is not None and orig_w > 0 and orig_h > 0:
|
||||||
|
cx = (bbox['x'] + bbox.get('width', 0) / 2) / orig_w
|
||||||
|
cy = (bbox['y'] + bbox.get('height', 0) / 2) / orig_h
|
||||||
|
action['x_pct'] = round(cx, 4)
|
||||||
|
action['y_pct'] = round(cy, 4)
|
||||||
|
|
||||||
|
# Image de l'ancre pour template matching côté agent
|
||||||
|
anchor_b64 = _load_anchor_image_b64(anchor_id)
|
||||||
|
if anchor_b64:
|
||||||
|
target_spec = {
|
||||||
|
'anchor_image_base64': anchor_b64,
|
||||||
|
'anchor_id': anchor_id,
|
||||||
|
}
|
||||||
|
if anchor_meta:
|
||||||
|
target_spec['anchor_bbox'] = anchor_meta.get('bounding_box', {})
|
||||||
|
target_spec['original_size'] = anchor_meta.get('original_size', {})
|
||||||
|
|
||||||
|
action['visual_mode'] = True
|
||||||
|
action['target_spec'] = target_spec
|
||||||
|
logger.info(
|
||||||
|
"Action %s : ancre '%s' chargée (%d Ko), visual_mode activé",
|
||||||
|
action.get('action_id', '?'),
|
||||||
|
anchor_id,
|
||||||
|
len(anchor_b64) // 1024,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
"Action %s : ancre '%s' introuvable, fallback blind mode",
|
||||||
|
action.get('action_id', '?'),
|
||||||
|
anchor_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@api_v3_bp.route('/execute-windows', methods=['POST'])
|
@api_v3_bp.route('/execute-windows', methods=['POST'])
|
||||||
def execute_windows():
|
def execute_windows():
|
||||||
"""Proxy les actions du workflow vers le streaming server pour exécution sur Windows.
|
"""Proxy les actions du workflow vers le streaming server pour exécution sur Windows.
|
||||||
@@ -932,45 +986,14 @@ def execute_windows():
|
|||||||
if vwb_type in _ANCHOR_CLICK_TYPES:
|
if vwb_type in _ANCHOR_CLICK_TYPES:
|
||||||
anchor_id = action.get('anchor_id')
|
anchor_id = action.get('anchor_id')
|
||||||
if anchor_id:
|
if anchor_id:
|
||||||
anchor_meta = _load_anchor_metadata(anchor_id)
|
_inject_anchor_targeting(action, anchor_id)
|
||||||
|
|
||||||
# Calculer les coordonnées du centre du bbox (fallback si visual échoue)
|
# Propagation du by_text (ciblage textuel prioritaire sur template)
|
||||||
if anchor_meta:
|
_by_text = params.get('by_text', '')
|
||||||
bbox = anchor_meta.get('bounding_box', {})
|
if _by_text:
|
||||||
orig = anchor_meta.get('original_size', {})
|
action['by_text'] = _by_text
|
||||||
orig_w = orig.get('width', 1920)
|
if 'target_spec' in action:
|
||||||
orig_h = orig.get('height', 1080)
|
action['target_spec']['by_text'] = _by_text
|
||||||
if bbox.get('x') is not None and orig_w > 0 and orig_h > 0:
|
|
||||||
cx = (bbox['x'] + bbox.get('width', 0) / 2) / orig_w
|
|
||||||
cy = (bbox['y'] + bbox.get('height', 0) / 2) / orig_h
|
|
||||||
action['x_pct'] = round(cx, 4)
|
|
||||||
action['y_pct'] = round(cy, 4)
|
|
||||||
|
|
||||||
# Tenter aussi le visual_mode (template matching)
|
|
||||||
anchor_b64 = _load_anchor_image_b64(anchor_id)
|
|
||||||
if anchor_b64:
|
|
||||||
target_spec = {
|
|
||||||
'anchor_image_base64': anchor_b64,
|
|
||||||
'anchor_id': anchor_id,
|
|
||||||
}
|
|
||||||
if anchor_meta:
|
|
||||||
target_spec['anchor_bbox'] = anchor_meta.get('bounding_box', {})
|
|
||||||
target_spec['original_size'] = anchor_meta.get('original_size', {})
|
|
||||||
|
|
||||||
action['visual_mode'] = True
|
|
||||||
action['target_spec'] = target_spec
|
|
||||||
logger.info(
|
|
||||||
"Action %s : ancre '%s' chargée (%d Ko), visual_mode activé",
|
|
||||||
action.get('action_id', '?'),
|
|
||||||
anchor_id,
|
|
||||||
len(anchor_b64) // 1024,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
logger.warning(
|
|
||||||
"Action %s : ancre '%s' introuvable, fallback blind mode",
|
|
||||||
action.get('action_id', '?'),
|
|
||||||
anchor_id,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Mapper le bouton selon le type de clic VWB
|
# Mapper le bouton selon le type de clic VWB
|
||||||
if vwb_type == 'double_click_anchor':
|
if vwb_type == 'double_click_anchor':
|
||||||
@@ -979,13 +1002,18 @@ def execute_windows():
|
|||||||
action['button'] = 'right'
|
action['button'] = 'right'
|
||||||
|
|
||||||
# ---------------------------------------------------------------
|
# ---------------------------------------------------------------
|
||||||
# type_text / type_secret → extraire le texte
|
# type_text / type_secret → extraire le texte + cibler la zone
|
||||||
|
# de saisie si une ancre visuelle est associée au step.
|
||||||
|
# Sans ancre, l'agent tape là où le focus se trouve déjà
|
||||||
|
# (compatibilité avec les workflows historiques sans anchor).
|
||||||
# ---------------------------------------------------------------
|
# ---------------------------------------------------------------
|
||||||
if vwb_type in ('type_text', 'type_secret') and 'text' in params:
|
if vwb_type in ('type_text', 'type_secret') and 'text' in params:
|
||||||
action['text'] = params['text']
|
action['text'] = params['text']
|
||||||
# Ne pas forcer un clic préalable à (0,0) si pas de coordonnées
|
anchor_id = action.get('anchor_id') or (
|
||||||
# L'exécuteur ne cliquera que si x_pct > 0 et y_pct > 0
|
params.get('visual_anchor') or {}
|
||||||
# (le clic de positionnement est fait par l'action click_anchor précédente)
|
).get('anchor_id')
|
||||||
|
if anchor_id:
|
||||||
|
_inject_anchor_targeting(action, anchor_id)
|
||||||
|
|
||||||
# ---------------------------------------------------------------
|
# ---------------------------------------------------------------
|
||||||
# keyboard_shortcut / hotkey → extraire les touches
|
# keyboard_shortcut / hotkey → extraire les touches
|
||||||
@@ -1043,11 +1071,26 @@ def execute_windows():
|
|||||||
# Sinon, retirer les actions fichiers du flux principal
|
# Sinon, retirer les actions fichiers du flux principal
|
||||||
data['actions'] = non_file_actions
|
data['actions'] = non_file_actions
|
||||||
|
|
||||||
|
# Token Bearer pour le streaming server (auth obligatoire)
|
||||||
|
_stream_token = os.environ.get('RPA_API_TOKEN', '')
|
||||||
|
_stream_headers = {'Authorization': f'Bearer {_stream_token}'} if _stream_token else {}
|
||||||
|
|
||||||
|
# L'agent Windows poll sous session "agent_demo_user" (= agent_{user_id}, user_id="demo_user")
|
||||||
|
# On injecte directement dans cette session pour éviter le transfer cross-session
|
||||||
|
# et pour que /replay/raw ne tente pas l'auto-détection d'une session "sess_*"
|
||||||
|
# (qui échoue avec "Aucune session Agent V1 active" si l'agent n'a pas créé de session V1).
|
||||||
|
if not data.get('session_id'):
|
||||||
|
data['session_id'] = 'agent_demo_user'
|
||||||
|
|
||||||
# Injecter le machine_id pour le ciblage multi-machine
|
# Injecter le machine_id pour le ciblage multi-machine
|
||||||
# Chercher la première machine Windows connectée si pas spécifié
|
# Chercher la première machine Windows connectée si pas spécifié
|
||||||
if 'machine_id' not in data or not data.get('machine_id'):
|
if 'machine_id' not in data or not data.get('machine_id'):
|
||||||
try:
|
try:
|
||||||
machines_resp = req.get('http://localhost:5005/api/v1/traces/stream/machines', timeout=3)
|
machines_resp = req.get(
|
||||||
|
'http://localhost:5005/api/v1/traces/stream/machines',
|
||||||
|
headers=_stream_headers,
|
||||||
|
timeout=3,
|
||||||
|
)
|
||||||
if machines_resp.ok:
|
if machines_resp.ok:
|
||||||
machines = machines_resp.json().get('machines', [])
|
machines = machines_resp.json().get('machines', [])
|
||||||
for m in machines:
|
for m in machines:
|
||||||
@@ -1062,6 +1105,7 @@ def execute_windows():
|
|||||||
resp = req.post(
|
resp = req.post(
|
||||||
'http://localhost:5005/api/v1/traces/stream/replay/raw',
|
'http://localhost:5005/api/v1/traces/stream/replay/raw',
|
||||||
json=data,
|
json=data,
|
||||||
|
headers=_stream_headers,
|
||||||
timeout=30,
|
timeout=30,
|
||||||
)
|
)
|
||||||
return jsonify(resp.json()), resp.status_code
|
return jsonify(resp.json()), resp.status_code
|
||||||
|
|||||||
@@ -40,6 +40,17 @@ if _ROOT not in sys.path:
|
|||||||
STREAMING_SERVER_URL = "http://localhost:5005"
|
STREAMING_SERVER_URL = "http://localhost:5005"
|
||||||
|
|
||||||
|
|
||||||
|
def _stream_headers() -> Dict[str, str]:
|
||||||
|
"""Bearer token pour les appels proxy VWB → streaming server.
|
||||||
|
|
||||||
|
Retourne un dict vide si RPA_API_TOKEN n'est pas défini ; dans ce cas
|
||||||
|
les appels échoueront en 401 (auth obligatoire côté streaming).
|
||||||
|
"""
|
||||||
|
import os as _os
|
||||||
|
token = _os.environ.get("RPA_API_TOKEN", "")
|
||||||
|
return {"Authorization": f"Bearer {token}"} if token else {}
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Helpers — nom par défaut à l'import
|
# Helpers — nom par défaut à l'import
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -162,6 +173,7 @@ def list_learned_workflows():
|
|||||||
resp = http_requests.get(
|
resp = http_requests.get(
|
||||||
f"{STREAMING_SERVER_URL}/api/v1/traces/stream/workflows",
|
f"{STREAMING_SERVER_URL}/api/v1/traces/stream/workflows",
|
||||||
params=params,
|
params=params,
|
||||||
|
headers=_stream_headers(),
|
||||||
timeout=3,
|
timeout=3,
|
||||||
)
|
)
|
||||||
if resp.ok:
|
if resp.ok:
|
||||||
@@ -526,6 +538,7 @@ def _load_core_workflow(
|
|||||||
resp = http_requests.get(
|
resp = http_requests.get(
|
||||||
f"{STREAMING_SERVER_URL}/api/v1/traces/stream/workflows",
|
f"{STREAMING_SERVER_URL}/api/v1/traces/stream/workflows",
|
||||||
params=params,
|
params=params,
|
||||||
|
headers=_stream_headers(),
|
||||||
timeout=3,
|
timeout=3,
|
||||||
)
|
)
|
||||||
if resp.ok:
|
if resp.ok:
|
||||||
@@ -538,6 +551,7 @@ def _load_core_workflow(
|
|||||||
try:
|
try:
|
||||||
detail_resp = http_requests.get(
|
detail_resp = http_requests.get(
|
||||||
f"{STREAMING_SERVER_URL}/api/v1/traces/stream/workflow/{workflow_id}",
|
f"{STREAMING_SERVER_URL}/api/v1/traces/stream/workflow/{workflow_id}",
|
||||||
|
headers=_stream_headers(),
|
||||||
timeout=5,
|
timeout=5,
|
||||||
)
|
)
|
||||||
if detail_resp.ok:
|
if detail_resp.ok:
|
||||||
@@ -573,6 +587,7 @@ def _notify_streaming_reload():
|
|||||||
try:
|
try:
|
||||||
http_requests.post(
|
http_requests.post(
|
||||||
f"{STREAMING_SERVER_URL}/api/v1/traces/stream/reload-workflows",
|
f"{STREAMING_SERVER_URL}/api/v1/traces/stream/reload-workflows",
|
||||||
|
headers=_stream_headers(),
|
||||||
timeout=2,
|
timeout=2,
|
||||||
)
|
)
|
||||||
logger.debug("Streaming server notifié pour rechargement des workflows")
|
logger.debug("Streaming server notifié pour rechargement des workflows")
|
||||||
|
|||||||
@@ -13,11 +13,17 @@ from flask_caching import Cache
|
|||||||
from flask_migrate import Migrate
|
from flask_migrate import Migrate
|
||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
from logging.handlers import RotatingFileHandler
|
from logging.handlers import RotatingFileHandler
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
# Load environment variables
|
# Charger .env.local depuis la racine du projet AVANT tout : il contient
|
||||||
load_dotenv()
|
# RPA_API_TOKEN utilisé pour le proxy VWB → streaming server. Sans cela,
|
||||||
|
# le token est absent après chaque restart manuel du backend et tous les
|
||||||
|
# appels proxy renvoient 401 « Token API invalide ».
|
||||||
|
_PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
|
||||||
|
load_dotenv(_PROJECT_ROOT / '.env.local')
|
||||||
|
load_dotenv() # fallback .env dans cwd (n'écrase pas les vars déjà définies)
|
||||||
|
|
||||||
# Initialize Flask app
|
# Initialize Flask app
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|||||||
Binary file not shown.
@@ -57,7 +57,9 @@ VWB_ACTION_TO_CORE = {
|
|||||||
"scroll_to_anchor": "scroll",
|
"scroll_to_anchor": "scroll",
|
||||||
"visual_condition": "evaluate_condition",
|
"visual_condition": "evaluate_condition",
|
||||||
"screenshot_evidence": "screenshot",
|
"screenshot_evidence": "screenshot",
|
||||||
"extract_text": "extract_data",
|
"extract_text": "extract_text", # passthrough — handler serveur OCR + variable
|
||||||
|
"pause_for_human": "pause_for_human", # passthrough — intercepté par api_stream /replay/next
|
||||||
|
"t2a_decision": "t2a_decision", # passthrough — handler serveur LLM T2A + variable
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -660,6 +662,23 @@ def _vwb_params_to_core(action_type: str, params: Dict[str, Any]) -> Dict[str, A
|
|||||||
elif action_type == "wait_for_anchor":
|
elif action_type == "wait_for_anchor":
|
||||||
core_params["duration_ms"] = params.get("duration_ms", 2000)
|
core_params["duration_ms"] = params.get("duration_ms", 2000)
|
||||||
|
|
||||||
|
elif action_type == "pause_for_human":
|
||||||
|
core_params["message"] = params.get("message", "Validation requise")
|
||||||
|
|
||||||
|
elif action_type == "extract_text":
|
||||||
|
# variable_name côté VWB → output_var côté core (compat avec
|
||||||
|
# le catalogue VWB existant qui utilise variable_name)
|
||||||
|
var = params.get("output_var") or params.get("variable_name") or "extracted_text"
|
||||||
|
core_params["output_var"] = var
|
||||||
|
if "paragraph" in params:
|
||||||
|
core_params["paragraph"] = bool(params["paragraph"])
|
||||||
|
|
||||||
|
elif action_type == "t2a_decision":
|
||||||
|
core_params["input_template"] = params.get("input_template", "")
|
||||||
|
core_params["output_var"] = params.get("output_var", "t2a_result")
|
||||||
|
if params.get("model"):
|
||||||
|
core_params["model"] = params["model"]
|
||||||
|
|
||||||
return core_params
|
return core_params
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,8 @@ import type { UIElement } from '../services/uiDetection';
|
|||||||
import {
|
import {
|
||||||
loadLibraryAsync,
|
loadLibraryAsync,
|
||||||
saveLibrary,
|
saveLibrary,
|
||||||
compressThumbnail,
|
addCaptureToLibrary,
|
||||||
|
removeCaptureFromLibrary,
|
||||||
} from '../services/captureLibraryStorage';
|
} from '../services/captureLibraryStorage';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -40,6 +41,8 @@ interface LibraryItem {
|
|||||||
timestamp: Date;
|
timestamp: Date;
|
||||||
sessionId?: string;
|
sessionId?: string;
|
||||||
favorite?: boolean;
|
favorite?: boolean;
|
||||||
|
format?: 'v2';
|
||||||
|
fullImageUrl?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export default function CapturePanel({
|
export default function CapturePanel({
|
||||||
@@ -55,7 +58,7 @@ export default function CapturePanel({
|
|||||||
const [showLibraryGallery, setShowLibraryGallery] = useState(false);
|
const [showLibraryGallery, setShowLibraryGallery] = useState(false);
|
||||||
const [library, setLibrary] = useState<LibraryItem[]>([]);
|
const [library, setLibrary] = useState<LibraryItem[]>([]);
|
||||||
const [currentCapture, setCurrentCapture] = useState<Capture | null>(null);
|
const [currentCapture, setCurrentCapture] = useState<Capture | null>(null);
|
||||||
const [timerSeconds, setTimerSeconds] = useState(0);
|
const [timerSeconds, setTimerSeconds] = useState(5);
|
||||||
const [countdown, setCountdown] = useState<number | null>(null);
|
const [countdown, setCountdown] = useState<number | null>(null);
|
||||||
// Elements detectes sur l'apercu miniature
|
// Elements detectes sur l'apercu miniature
|
||||||
const [previewElements, setPreviewElements] = useState<UIElement[]>([]);
|
const [previewElements, setPreviewElements] = useState<UIElement[]>([]);
|
||||||
@@ -89,24 +92,35 @@ export default function CapturePanel({
|
|||||||
}
|
}
|
||||||
}, [library, libraryLoaded]);
|
}, [library, libraryLoaded]);
|
||||||
|
|
||||||
// Ajouter capture a la bibliotheque (thumbnail compresse JPEG 320x240)
|
// Helper : ajoute une capture à la bibliothèque (PNG HD upload backend +
|
||||||
|
// mise à jour de l'état local). Utilisé par le useEffect [capture] et par
|
||||||
|
// doSmartCapture (capture locale Windows qui ne passe pas par la prop parente).
|
||||||
|
const addToLibrary = useCallback(async (cap: Capture) => {
|
||||||
|
try {
|
||||||
|
const item = await addCaptureToLibrary(cap, { id: `cap_${Date.now()}` });
|
||||||
|
setLibrary(prev => [
|
||||||
|
{
|
||||||
|
id: item.id,
|
||||||
|
capture: item.capture,
|
||||||
|
timestamp: typeof item.timestamp === 'string' ? new Date(item.timestamp) : item.timestamp,
|
||||||
|
sessionId: item.sessionId,
|
||||||
|
favorite: item.favorite ?? false,
|
||||||
|
format: item.format,
|
||||||
|
fullImageUrl: item.fullImageUrl,
|
||||||
|
},
|
||||||
|
...prev.slice(0, 19),
|
||||||
|
]);
|
||||||
|
} catch (e) {
|
||||||
|
console.warn('[CapturePanel] Échec ajout bibliothèque', e);
|
||||||
|
}
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
// Capture venant du parent (path "fallback local" via prop capture)
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (!capture) return;
|
if (!capture) return;
|
||||||
setCurrentCapture(capture);
|
setCurrentCapture(capture);
|
||||||
let cancelled = false;
|
void addToLibrary(capture);
|
||||||
(async () => {
|
}, [capture, addToLibrary]);
|
||||||
const compressed = await compressThumbnail(capture.screenshot_base64);
|
|
||||||
if (cancelled) return;
|
|
||||||
const newItem: LibraryItem = {
|
|
||||||
id: `cap_${Date.now()}`,
|
|
||||||
capture: { ...capture, screenshot_base64: compressed },
|
|
||||||
timestamp: new Date(),
|
|
||||||
favorite: false,
|
|
||||||
};
|
|
||||||
setLibrary(prev => [newItem, ...prev.slice(0, 19)]);
|
|
||||||
})();
|
|
||||||
return () => { cancelled = true; };
|
|
||||||
}, [capture]);
|
|
||||||
|
|
||||||
// Detecter les elements UI quand une capture arrive
|
// Detecter les elements UI quand une capture arrive
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
@@ -152,19 +166,24 @@ export default function CapturePanel({
|
|||||||
const resp = await fetch('/api/screen-capture/capture-windows', { method: 'POST' });
|
const resp = await fetch('/api/screen-capture/capture-windows', { method: 'POST' });
|
||||||
const data = await resp.json();
|
const data = await resp.json();
|
||||||
if (resp.ok && data.image) {
|
if (resp.ok && data.image) {
|
||||||
setCurrentCapture({
|
const cap: Capture = {
|
||||||
screenshot_base64: data.image,
|
screenshot_base64: data.image,
|
||||||
width: data.width,
|
width: data.width,
|
||||||
height: data.height,
|
height: data.height,
|
||||||
source: data.source || 'windows',
|
source: data.source || 'windows',
|
||||||
} as any);
|
} as any;
|
||||||
|
setCurrentCapture(cap);
|
||||||
|
// Ajouter à la bibliothèque (le useEffect [capture] ne tire pas
|
||||||
|
// ici car on ne passe pas par la prop parente)
|
||||||
|
void addToLibrary(cap);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
console.warn('Agent Windows indisponible, fallback local:', data.error);
|
console.warn('Agent Windows indisponible, fallback local:', data.error);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.warn('Erreur capture Windows, fallback local:', err);
|
console.warn('Erreur capture Windows, fallback local:', err);
|
||||||
}
|
}
|
||||||
// Fallback : capture locale (ecran du serveur Linux)
|
// Fallback : capture locale (ecran du serveur Linux) — passe par la prop
|
||||||
|
// parente, l'ajout se fera dans le useEffect [capture]
|
||||||
onCapture();
|
onCapture();
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -189,13 +208,44 @@ export default function CapturePanel({
|
|||||||
}, 1000);
|
}, 1000);
|
||||||
};
|
};
|
||||||
|
|
||||||
const handleLibrarySelect = (item: LibraryItem) => {
|
const handleLibrarySelect = async (item: LibraryItem) => {
|
||||||
setCurrentCapture(item.capture);
|
// Format v2 : remplacer le thumbnail par le PNG HD téléchargé du backend
|
||||||
|
// pour que la sélection d'ancre utilise une image non pixélisée.
|
||||||
|
if (item.format === 'v2' && item.fullImageUrl) {
|
||||||
|
try {
|
||||||
|
const resp = await fetch(item.fullImageUrl);
|
||||||
|
if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
|
||||||
|
const blob = await resp.blob();
|
||||||
|
const base64 = await new Promise<string>((resolve, reject) => {
|
||||||
|
const reader = new FileReader();
|
||||||
|
reader.onload = () => {
|
||||||
|
const result = reader.result as string;
|
||||||
|
// FileReader → "data:image/png;base64,..." → on retire le préfixe
|
||||||
|
const idx = result.indexOf(',');
|
||||||
|
resolve(idx >= 0 ? result.slice(idx + 1) : result);
|
||||||
|
};
|
||||||
|
reader.onerror = () => reject(reader.error);
|
||||||
|
reader.readAsDataURL(blob);
|
||||||
|
});
|
||||||
|
setCurrentCapture({ ...item.capture, screenshot_base64: base64 });
|
||||||
|
} catch (e) {
|
||||||
|
console.warn('[CaptureLibrary] Échec chargement HD, fallback thumbnail', e);
|
||||||
|
setCurrentCapture(item.capture);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
setCurrentCapture(item.capture);
|
||||||
|
}
|
||||||
setIsFullscreen(true);
|
setIsFullscreen(true);
|
||||||
};
|
};
|
||||||
|
|
||||||
const handleDeleteLibraryItem = (id: string) => {
|
const handleDeleteLibraryItem = (id: string) => {
|
||||||
|
const target = library.find(it => it.id === id);
|
||||||
setLibrary(prev => prev.filter(item => item.id !== id));
|
setLibrary(prev => prev.filter(item => item.id !== id));
|
||||||
|
// v2 : supprimer aussi le PNG côté backend (le saveLibrary auto-déclenché
|
||||||
|
// par le useEffect ne nettoie que le JSON, pas les fichiers PNG orphelins).
|
||||||
|
if (target?.format === 'v2') {
|
||||||
|
void removeCaptureFromLibrary(id, true);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
return (
|
return (
|
||||||
@@ -204,17 +254,35 @@ export default function CapturePanel({
|
|||||||
|
|
||||||
{/* Capture — auto-detection OS navigateur */}
|
{/* Capture — auto-detection OS navigateur */}
|
||||||
<div className="capture-controls">
|
<div className="capture-controls">
|
||||||
<button disabled={countdown !== null} onClick={doSmartCapture}>
|
<button disabled={countdown !== null} onClick={doSmartCapture} title="Capture immédiate (sans délai)">
|
||||||
Capturer
|
Capturer
|
||||||
</button>
|
</button>
|
||||||
<select value={timerSeconds} onChange={(e) => setTimerSeconds(Number(e.target.value))}>
|
<label style={{ display: 'flex', alignItems: 'center', gap: 4, fontSize: 12 }}>
|
||||||
<option value="0">Immediat</option>
|
Délai :
|
||||||
<option value="3">3 sec</option>
|
<select
|
||||||
<option value="5">5 sec</option>
|
value={String(timerSeconds)}
|
||||||
<option value="10">10 sec</option>
|
onChange={(e) => {
|
||||||
</select>
|
const v = Number(e.target.value);
|
||||||
<button onClick={handleTimerCapture} disabled={countdown !== null}>
|
console.log('[CapturePanel] timerSeconds →', v);
|
||||||
{countdown !== null ? countdown : 'Timer'}
|
setTimerSeconds(v);
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<option value="0">Immediat</option>
|
||||||
|
<option value="3">3 sec</option>
|
||||||
|
<option value="5">5 sec</option>
|
||||||
|
<option value="10">10 sec</option>
|
||||||
|
</select>
|
||||||
|
</label>
|
||||||
|
<button
|
||||||
|
onClick={handleTimerCapture}
|
||||||
|
disabled={countdown !== null}
|
||||||
|
title={`Capture après ${timerSeconds}s — utile pour préparer l'écran avant la prise`}
|
||||||
|
>
|
||||||
|
{countdown !== null
|
||||||
|
? `${countdown}…`
|
||||||
|
: timerSeconds === 0
|
||||||
|
? 'Timer'
|
||||||
|
: `Capturer dans ${timerSeconds}s`}
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|||||||
@@ -54,11 +54,11 @@ export default function ToolPalette() {
|
|||||||
className="tool-item"
|
className="tool-item"
|
||||||
draggable
|
draggable
|
||||||
onDragStart={(e) => onDragStart(e, action.type)}
|
onDragStart={(e) => onDragStart(e, action.type)}
|
||||||
title={action.label}
|
title={`${action.label}\n\n${action.description}${action.needsAnchor ? '\n\n🎯 Nécessite une ancre visuelle' : ''}${action.params.length > 0 ? '\n\nParamètres : ' + action.params.map(p => p.name).join(', ') : ''}`}
|
||||||
>
|
>
|
||||||
<span className="tool-icon">{action.icon}</span>
|
<span className="tool-icon">{action.icon}</span>
|
||||||
<span className="tool-label">{action.label}</span>
|
<span className="tool-label">{action.label}</span>
|
||||||
{action.needsAnchor && <span className="tool-anchor-badge">🎯</span>}
|
{action.needsAnchor && <span className="tool-anchor-badge" title="Nécessite de viser un élément à l'écran">🎯</span>}
|
||||||
</div>
|
</div>
|
||||||
))}
|
))}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -43,6 +43,8 @@ export type ActionType =
|
|||||||
| 'screenshot_evidence'
|
| 'screenshot_evidence'
|
||||||
| 'visual_condition'
|
| 'visual_condition'
|
||||||
| 'loop_visual'
|
| 'loop_visual'
|
||||||
|
| 'pause_for_human'
|
||||||
|
| 't2a_decision'
|
||||||
| 'download_to_folder'
|
| 'download_to_folder'
|
||||||
| 'ai_analyze_text'
|
| 'ai_analyze_text'
|
||||||
| 'ai_ocr'
|
| 'ai_ocr'
|
||||||
@@ -108,8 +110,9 @@ export const ACTIONS: ActionDefinition[] = [
|
|||||||
] },
|
] },
|
||||||
|
|
||||||
// === EXTRACTION DE DONNÉES ===
|
// === EXTRACTION DE DONNÉES ===
|
||||||
{ type: 'extract_text', label: 'Extraire texte', icon: '📋', description: 'Extrait le texte visible dans la zone de l\'ancre via OCR.', category: 'data', needsAnchor: true, params: [
|
{ type: 'extract_text', label: 'Extraire texte (OCR écran)', icon: '📋', description: 'OCR EasyOCR fr+en sur le dernier screenshot. Stocke le texte dans une variable réutilisable plus loin via {{output_var}}. Pas d\'ancre nécessaire — extrait toute la page visible.', category: 'data', needsAnchor: false, params: [
|
||||||
{ name: 'variable_name', type: 'string', description: 'Nom de la variable pour stocker le résultat' }
|
{ name: 'output_var', type: 'string', description: 'Nom de la variable de sortie (ex: texte_motif). Réutilisable via {{nom}}.' },
|
||||||
|
{ name: 'paragraph', type: 'boolean', description: 'Regrouper en paragraphes (true) ou lignes brutes (false)' }
|
||||||
] },
|
] },
|
||||||
{ type: 'extract_table', label: 'Extraire tableau', icon: '📊', description: 'Extrait un tableau structuré depuis la zone de l\'ancre.', category: 'data', needsAnchor: true, params: [
|
{ type: 'extract_table', label: 'Extraire tableau', icon: '📊', description: 'Extrait un tableau structuré depuis la zone de l\'ancre.', category: 'data', needsAnchor: true, params: [
|
||||||
{ name: 'variable_name', type: 'string', description: 'Nom de la variable pour stocker le tableau' }
|
{ name: 'variable_name', type: 'string', description: 'Nom de la variable pour stocker le tableau' }
|
||||||
@@ -129,6 +132,14 @@ export const ACTIONS: ActionDefinition[] = [
|
|||||||
{ type: 'loop_visual', label: 'Boucle visuelle', icon: '🔁', description: 'Répète les étapes connectées tant que l\'ancre est visible.', category: 'logic', needsAnchor: true, hidden: true, params: [
|
{ type: 'loop_visual', label: 'Boucle visuelle', icon: '🔁', description: 'Répète les étapes connectées tant que l\'ancre est visible.', category: 'logic', needsAnchor: true, hidden: true, params: [
|
||||||
{ name: 'max_iterations', type: 'number', description: 'Nombre maximum d\'itérations' }
|
{ name: 'max_iterations', type: 'number', description: 'Nombre maximum d\'itérations' }
|
||||||
] },
|
] },
|
||||||
|
{ type: 'pause_for_human', label: 'Pause supervisée', icon: '⏸', description: 'Léa s\'arrête et demande validation humaine via une bulle interactive (boutons Continuer / Annuler).', category: 'logic', needsAnchor: false, params: [
|
||||||
|
{ name: 'message', type: 'string', description: 'Message affiché dans la bulle (ex: "Je ne suis pas sûre du critère 3, validez-vous UHCD ?")' }
|
||||||
|
] },
|
||||||
|
{ type: 't2a_decision', label: 'Décision T2A (LLM)', icon: '🧠', description: 'Analyse un DPI urgences via LLM local (qwen2.5:7b par défaut) et propose FORFAIT_URGENCE ou REQUALIFICATION_HOSPITALISATION. Retourne JSON {decision, justification, elements_pour/contre, confiance}. Bench validé 100% accuracy.', category: 'logic', needsAnchor: false, params: [
|
||||||
|
{ name: 'input_template', type: 'string', description: 'DPI à analyser. Supporte le templating {{var}} pour concaténer plusieurs extractions (ex: "{{texte_motif}}\\n{{texte_examens}}\\n{{texte_notes}}")' },
|
||||||
|
{ name: 'output_var', type: 'string', description: 'Variable de sortie (ex: decision_t2a). Accès aux champs : {{decision_t2a.decision}}, {{decision_t2a.justification}}' },
|
||||||
|
{ name: 'model', type: 'string', description: 'Modèle Ollama (default qwen2.5:7b). Autres : t2a-gemma3-27b-q4, gpt-oss:120b-cloud...' }
|
||||||
|
] },
|
||||||
|
|
||||||
// === INTELLIGENCE ARTIFICIELLE ===
|
// === INTELLIGENCE ARTIFICIELLE ===
|
||||||
{ type: 'ai_ocr', label: 'OCR Intelligent', icon: '📝', description: 'Reconnaissance de texte par IA sur la zone de l\'ancre.', category: 'ai', needsAnchor: true, params: [
|
{ type: 'ai_ocr', label: 'OCR Intelligent', icon: '📝', description: 'Reconnaissance de texte par IA sur la zone de l\'ancre.', category: 'ai', needsAnchor: true, params: [
|
||||||
|
|||||||
Reference in New Issue
Block a user