Compare commits
10 Commits
41c1250c99
...
backup-pre
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5543e25f9d | ||
|
|
2a07d8084b | ||
|
|
35b27ae492 | ||
|
|
b584bbabc3 | ||
|
|
8817f527e7 | ||
|
|
964856ab30 | ||
|
|
a67d896104 | ||
|
|
90c1d8036f | ||
|
|
6261002039 | ||
|
|
0e6e61f2b1 |
@@ -219,6 +219,10 @@ from .replay_engine import (
|
||||
_is_learned_workflow,
|
||||
_edge_to_normalized_actions,
|
||||
_substitute_variables,
|
||||
_resolve_runtime_vars,
|
||||
_SERVER_SIDE_ACTION_TYPES,
|
||||
_handle_extract_text_action,
|
||||
_handle_t2a_decision_action,
|
||||
_expand_compound_steps,
|
||||
_pre_check_screen_state as _pre_check_screen_state_impl,
|
||||
_detect_popup_hint as _detect_popup_hint_impl,
|
||||
@@ -2758,8 +2762,29 @@ async def get_next_action(session_id: str, machine_id: str = "default"):
|
||||
|
||||
Si la session de l'agent n'a pas d'actions en attente, cherche dans les
|
||||
autres queues de la MÊME machine (pas cross-machine).
|
||||
|
||||
Acquire timeout : si une action serveur lente (extract_text OCR,
|
||||
t2a_decision LLM) tient le lock, on retourne immédiatement
|
||||
{action: None, server_busy: True} avant que le client ne timeout à 5s.
|
||||
Sans cela, des actions seraient popped serveur puis envoyées sur des
|
||||
sockets clients déjà fermées par timeout — perdues silencieusement.
|
||||
|
||||
L'acquire et les actions serveur lentes sont exécutés via
|
||||
run_in_executor : sinon l'appel synchrone bloque l'event loop FastAPI
|
||||
(single-threaded) et même les polls qui devraient recevoir server_busy
|
||||
sont bloqués jusqu'à libération — ce qui annule l'effet du timeout.
|
||||
"""
|
||||
with _replay_lock:
|
||||
import asyncio
|
||||
loop = asyncio.get_event_loop()
|
||||
acquired = await loop.run_in_executor(None, _replay_lock.acquire, True, 4.5)
|
||||
if not acquired:
|
||||
return {
|
||||
"action": None,
|
||||
"session_id": session_id,
|
||||
"machine_id": machine_id,
|
||||
"server_busy": True,
|
||||
}
|
||||
try:
|
||||
# Verifier si le replay est en pause supervisee (target_not_found).
|
||||
# Dans ce cas, NE PAS envoyer d'action — attendre l'intervention utilisateur.
|
||||
for state in _replay_states.values():
|
||||
@@ -2824,6 +2849,7 @@ async def get_next_action(session_id: str, machine_id: str = "default"):
|
||||
break
|
||||
if target_state:
|
||||
queue = target_queue
|
||||
owning_replay = target_state
|
||||
_replay_queues[session_id] = target_queue
|
||||
del _replay_queues[target_sid]
|
||||
target_state["session_id"] = session_id
|
||||
@@ -2840,6 +2866,7 @@ async def get_next_action(session_id: str, machine_id: str = "default"):
|
||||
other_queue = _replay_queues.get(other_sid, [])
|
||||
if other_queue:
|
||||
queue = other_queue
|
||||
owning_replay = state
|
||||
_replay_queues[session_id] = other_queue
|
||||
del _replay_queues[other_sid]
|
||||
state["session_id"] = session_id
|
||||
@@ -2850,8 +2877,80 @@ async def get_next_action(session_id: str, machine_id: str = "default"):
|
||||
if not queue:
|
||||
return {"action": None, "session_id": session_id, "machine_id": machine_id}
|
||||
|
||||
# Peek à la prochaine action SANS la retirer (pour le pre-check)
|
||||
action = queue[0]
|
||||
# ── Boucle de traitement : actions serveur (extract_text, t2a_decision)
|
||||
# exécutées entièrement côté serveur jusqu'à trouver une action visuelle
|
||||
# à transmettre à l'Agent V1 ou un pause_for_human qui bloque le replay.
|
||||
action = None
|
||||
while queue:
|
||||
action = queue[0]
|
||||
|
||||
# Résoudre les variables runtime ({{var}} et {{var.field}})
|
||||
if owning_replay is not None:
|
||||
runtime_vars = owning_replay.get("variables") or {}
|
||||
if runtime_vars:
|
||||
action = _resolve_runtime_vars(action, runtime_vars)
|
||||
|
||||
type_ = action.get("type")
|
||||
|
||||
# pause_for_human : no-op en mode autonome — on saute et on continue
|
||||
if type_ == "pause_for_human":
|
||||
logger.info(
|
||||
"pause_for_human ignorée (mode autonome) — replay %s continue",
|
||||
owning_replay["replay_id"] if owning_replay else "?"
|
||||
)
|
||||
queue.pop(0)
|
||||
_replay_queues[session_id] = queue
|
||||
continue
|
||||
|
||||
# Actions serveur : exécuter HORS event loop pour ne pas bloquer
|
||||
# les autres polls (extract_text OCR ~5s, t2a_decision LLM ~8-13s).
|
||||
# Le lock reste tenu (queue cohérente) mais l'event loop est libre,
|
||||
# donc les polls concurrents peuvent recevoir {server_busy: True}.
|
||||
if type_ in _SERVER_SIDE_ACTION_TYPES and owning_replay is not None:
|
||||
try:
|
||||
if type_ == "extract_text":
|
||||
await loop.run_in_executor(
|
||||
None,
|
||||
_handle_extract_text_action,
|
||||
action, owning_replay, session_id, _last_heartbeat,
|
||||
)
|
||||
elif type_ == "t2a_decision":
|
||||
await loop.run_in_executor(
|
||||
None,
|
||||
_handle_t2a_decision_action,
|
||||
action, owning_replay,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Action serveur {type_} a levé : {e}")
|
||||
queue.pop(0)
|
||||
_replay_queues[session_id] = queue
|
||||
continue # action suivante
|
||||
|
||||
# Clic conditionnel : si l'action a un paramètre "condition", évaluer la variable
|
||||
# Format : "dec.critere1_valide" → runtime_vars["dec"]["critere1_valide"]
|
||||
condition_key = (action.get("parameters") or {}).get("condition")
|
||||
if condition_key and owning_replay is not None:
|
||||
runtime_vars = owning_replay.get("variables") or {}
|
||||
parts = condition_key.split(".", 1)
|
||||
if len(parts) == 2:
|
||||
val = (runtime_vars.get(parts[0]) or {}).get(parts[1])
|
||||
else:
|
||||
val = runtime_vars.get(parts[0])
|
||||
if not val:
|
||||
logger.info("Clic conditionnel ignoré (%s=%s) — action %s",
|
||||
condition_key, val, action.get("action_id", "?"))
|
||||
queue.pop(0)
|
||||
_replay_queues[session_id] = queue
|
||||
continue
|
||||
|
||||
# Action visuelle : sortir de la boucle pour la transmettre à l'Agent V1
|
||||
break
|
||||
|
||||
# Si la queue s'est vidée après les exécutions serveur, rien à transmettre
|
||||
if not queue or action is None:
|
||||
return {"action": None, "session_id": session_id, "machine_id": machine_id}
|
||||
finally:
|
||||
_replay_lock.release()
|
||||
|
||||
# ---- Pre-check écran (optionnel, non bloquant) ----
|
||||
# Ne s'applique qu'aux actions qui ont un from_node (actions de workflow,
|
||||
@@ -3879,7 +3978,9 @@ async def resume_replay(replay_id: str):
|
||||
state["pause_message"] = None
|
||||
|
||||
# Reinjecter l'action echouee en tete de queue (sera re-tentee)
|
||||
if failed_action and failed_action.get("action_id"):
|
||||
# pause_for_human est une pause intentionnelle, pas une erreur — ne pas réinjecter
|
||||
if (failed_action and failed_action.get("action_id")
|
||||
and failed_action.get("reason") != "user_request"):
|
||||
# Reconstruire l'action a partir du retry_pending ou de l'original
|
||||
original_action_id = failed_action["action_id"]
|
||||
# Chercher l'action originale dans les retry_pending
|
||||
@@ -3920,6 +4021,26 @@ async def resume_replay(replay_id: str):
|
||||
}
|
||||
|
||||
|
||||
@app.post("/api/v1/traces/stream/replay/{replay_id}/cancel")
|
||||
async def cancel_replay(replay_id: str):
|
||||
"""Annuler un replay (quel que soit son statut) et vider sa queue."""
|
||||
with _replay_lock:
|
||||
state = _replay_states.get(replay_id)
|
||||
if not state:
|
||||
raise HTTPException(status_code=404, detail=f"Replay '{replay_id}' non trouvé")
|
||||
session_id = state["session_id"]
|
||||
state["status"] = "cancelled"
|
||||
state["failed_action"] = None
|
||||
state["pause_message"] = None
|
||||
_replay_queues[session_id] = []
|
||||
keys_to_del = [k for k, v in _retry_pending.items() if v.get("replay_id") == replay_id]
|
||||
for k in keys_to_del:
|
||||
_retry_pending.pop(k, None)
|
||||
|
||||
logger.info("Replay %s annulé manuellement", replay_id)
|
||||
return {"status": "cancelled", "replay_id": replay_id, "session_id": session_id}
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Visual Replay — Résolution visuelle des cibles (module resolve_engine)
|
||||
# =========================================================================
|
||||
|
||||
@@ -32,8 +32,16 @@ _ALLOWED_ACTION_TYPES = {
|
||||
"click", "type", "key_combo", "scroll", "wait",
|
||||
"file_open", "file_save", "file_close", "file_new", "file_dialog",
|
||||
"double_click", "right_click", "drag",
|
||||
"verify_screen", # Replay hybride : vérification visuelle entre groupes
|
||||
"verify_screen", # Replay hybride : vérification visuelle entre groupes
|
||||
"pause_for_human", # Pause supervisée explicite (interceptée par /replay/next)
|
||||
"extract_text", # OCR serveur sur dernier heartbeat → variable workflow
|
||||
"t2a_decision", # Analyse LLM facturation T2A → variable workflow
|
||||
}
|
||||
|
||||
# Types d'actions exécutées CÔTÉ SERVEUR (jamais transmises à l'Agent V1).
|
||||
# Le pipeline /replay/next les traite en boucle interne et passe à l'action
|
||||
# suivante jusqu'à trouver une action visuelle (à transmettre au client).
|
||||
_SERVER_SIDE_ACTION_TYPES = {"extract_text", "t2a_decision"}
|
||||
_MAX_ACTION_TEXT_LENGTH = 10000
|
||||
_MAX_KEYS_PER_COMBO = 10
|
||||
# Touches autorisées dans les key_combo (modificateurs + touches spéciales + caractères simples)
|
||||
@@ -852,6 +860,30 @@ def _edge_to_normalized_actions(edge, params: Dict[str, Any]) -> List[Dict[str,
|
||||
keys = [action_params["key"]]
|
||||
normalized["keys"] = keys
|
||||
|
||||
elif action_type == "pause_for_human":
|
||||
normalized["type"] = "pause_for_human"
|
||||
normalized["parameters"] = {
|
||||
"message": action_params.get("message", "Validation requise"),
|
||||
}
|
||||
return [normalized] # pas de target/coords pour cette action logique
|
||||
|
||||
elif action_type == "extract_text":
|
||||
normalized["type"] = "extract_text"
|
||||
normalized["parameters"] = {
|
||||
"output_var": action_params.get("output_var", "extracted_text"),
|
||||
"paragraph": bool(action_params.get("paragraph", True)),
|
||||
}
|
||||
return [normalized]
|
||||
|
||||
elif action_type == "t2a_decision":
|
||||
normalized["type"] = "t2a_decision"
|
||||
normalized["parameters"] = {
|
||||
"input_template": action_params.get("input_template", ""),
|
||||
"output_var": action_params.get("output_var", "t2a_result"),
|
||||
"model": action_params.get("model"),
|
||||
}
|
||||
return [normalized]
|
||||
|
||||
else:
|
||||
logger.warning(f"Type d'action inconnu : {action_type}")
|
||||
return []
|
||||
@@ -886,6 +918,143 @@ def _substitute_variables(text: str, params: Dict[str, Any], defaults: Dict[str,
|
||||
return re.sub(r'\$\{(\w+)\}', replacer, text)
|
||||
|
||||
|
||||
# Regex pour le templating runtime : {{var}} ou {{var.champ}} ou {{var.champ.sous}}
|
||||
_RUNTIME_VAR_PATTERN = re.compile(r'\{\{\s*(\w+)(?:\.([\w.]+))?\s*\}\}')
|
||||
|
||||
|
||||
def _resolve_runtime_vars_in_str(text: str, variables: Dict[str, Any]) -> str:
|
||||
"""Remplace {{var}} et {{var.field}} par leur valeur depuis le dict variables.
|
||||
|
||||
Variables/champs absents : laissés tels quels (ne casse pas le pipeline).
|
||||
Pour les valeurs non-str (dict, list), str() est appelé.
|
||||
"""
|
||||
def replacer(match):
|
||||
var_name = match.group(1)
|
||||
path = match.group(2)
|
||||
if var_name not in variables:
|
||||
return match.group(0)
|
||||
value = variables[var_name]
|
||||
if path:
|
||||
for field in path.split('.'):
|
||||
if isinstance(value, dict) and field in value:
|
||||
value = value[field]
|
||||
else:
|
||||
return match.group(0)
|
||||
return str(value)
|
||||
|
||||
return _RUNTIME_VAR_PATTERN.sub(replacer, text)
|
||||
|
||||
|
||||
def _resolve_runtime_vars(value: Any, variables: Dict[str, Any]) -> Any:
|
||||
"""Résout récursivement les {{var}} et {{var.field}} dans une valeur.
|
||||
|
||||
Supporte str, dict, list. Les autres types sont retournés tels quels.
|
||||
Si variables est vide ou None, value est retournée inchangée.
|
||||
"""
|
||||
if not variables:
|
||||
return value
|
||||
if isinstance(value, str):
|
||||
return _resolve_runtime_vars_in_str(value, variables)
|
||||
if isinstance(value, dict):
|
||||
return {k: _resolve_runtime_vars(v, variables) for k, v in value.items()}
|
||||
if isinstance(value, list):
|
||||
return [_resolve_runtime_vars(item, variables) for item in value]
|
||||
return value
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Handlers pour les actions exécutées côté serveur (extract_text, t2a_decision)
|
||||
# =========================================================================
|
||||
|
||||
def _handle_extract_text_action(
|
||||
action: Dict[str, Any],
|
||||
replay_state: Dict[str, Any],
|
||||
session_id: str,
|
||||
last_heartbeat: Dict[str, Dict[str, Any]],
|
||||
) -> bool:
|
||||
"""Traite une action extract_text côté serveur. Stocke le texte OCRisé dans
|
||||
replay_state["variables"][output_var]. Retourne True si succès.
|
||||
|
||||
Robuste aux échecs : si pas de heartbeat ou OCR raté, stocke "" et retourne
|
||||
False (le pipeline continue, pas de blocage).
|
||||
"""
|
||||
params = action.get("parameters") or {}
|
||||
output_var = (params.get("output_var") or "extracted_text").strip()
|
||||
paragraph = bool(params.get("paragraph", True))
|
||||
|
||||
heartbeat = last_heartbeat.get(session_id) or {}
|
||||
path = heartbeat.get("path")
|
||||
text = ""
|
||||
|
||||
if path:
|
||||
try:
|
||||
from core.llm import extract_text_from_image
|
||||
text = extract_text_from_image(path, paragraph=paragraph)
|
||||
except Exception as e:
|
||||
logger.warning("extract_text OCR échoué (%s) — variable '%s' = ''", e, output_var)
|
||||
else:
|
||||
logger.warning(
|
||||
"extract_text : pas de heartbeat pour session %s — variable '%s' = ''",
|
||||
session_id, output_var,
|
||||
)
|
||||
|
||||
replay_state.setdefault("variables", {})[output_var] = text
|
||||
logger.info(
|
||||
"extract_text → variable '%s' (%d chars) replay %s",
|
||||
output_var, len(text), replay_state.get("replay_id", "?"),
|
||||
)
|
||||
return bool(text)
|
||||
|
||||
|
||||
def _handle_t2a_decision_action(
|
||||
action: Dict[str, Any],
|
||||
replay_state: Dict[str, Any],
|
||||
) -> bool:
|
||||
"""Traite une action t2a_decision côté serveur. Stocke le résultat JSON
|
||||
dans replay_state["variables"][output_var]. Retourne True si succès.
|
||||
|
||||
Le DPI à analyser vient de action.parameters.input_template (déjà résolu
|
||||
par _resolve_runtime_vars donc les {{var}} sont remplis).
|
||||
"""
|
||||
params = action.get("parameters") or {}
|
||||
output_var = (params.get("output_var") or "t2a_result").strip()
|
||||
dpi_text = (params.get("input_template") or params.get("dpi") or "").strip()
|
||||
model = params.get("model") or None # None → DEFAULT_MODEL
|
||||
|
||||
if not dpi_text:
|
||||
logger.warning(
|
||||
"t2a_decision : input vide — variable '%s' = {decision: 'INDETERMINE'}", output_var,
|
||||
)
|
||||
replay_state.setdefault("variables", {})[output_var] = {
|
||||
"decision": "INDETERMINE",
|
||||
"justification": "DPI vide ou non extrait",
|
||||
"confiance": "faible",
|
||||
"_error": "empty_input",
|
||||
}
|
||||
return False
|
||||
|
||||
try:
|
||||
from core.llm import analyze_dpi, DEFAULT_MODEL
|
||||
result = analyze_dpi(dpi_text, model=model or DEFAULT_MODEL)
|
||||
except Exception as e:
|
||||
logger.warning("t2a_decision : analyze_dpi exception %s", e)
|
||||
result = {
|
||||
"decision": "INDETERMINE",
|
||||
"justification": f"Erreur analyse : {e}",
|
||||
"confiance": "faible",
|
||||
"_error": str(e),
|
||||
}
|
||||
|
||||
replay_state.setdefault("variables", {})[output_var] = result
|
||||
decision = result.get("decision", "?")
|
||||
elapsed = result.get("_elapsed_s", "?")
|
||||
logger.info(
|
||||
"t2a_decision → variable '%s' decision=%s (%ss) replay %s",
|
||||
output_var, decision, elapsed, replay_state.get("replay_id", "?"),
|
||||
)
|
||||
return "_error" not in result
|
||||
|
||||
|
||||
def _expand_compound_steps(
|
||||
steps: List[Dict[str, Any]], base: Dict[str, Any], params: Dict[str, Any]
|
||||
) -> List[Dict[str, Any]]:
|
||||
@@ -1208,6 +1377,10 @@ def _create_replay_state(
|
||||
# Champs pour pause supervisée (target_not_found)
|
||||
"failed_action": None, # Contexte de l'action en echec (quand paused_need_help)
|
||||
"pause_message": None, # Message a afficher a l'utilisateur
|
||||
# Variables d'exécution produites en cours de workflow (extract_text,
|
||||
# t2a_decision, etc.). Résolues via templating {{var}} ou {{var.field}}
|
||||
# dans les paramètres des actions suivantes.
|
||||
"variables": {},
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -2193,22 +2193,33 @@ def _validate_resolution_quality(
|
||||
dx = abs(resolved_x - fallback_x_pct)
|
||||
dy = abs(resolved_y - fallback_y_pct)
|
||||
if dx > _RESOLUTION_MAX_DRIFT or dy > _RESOLUTION_MAX_DRIFT:
|
||||
# Exception : si le template matching trouve l'image avec une
|
||||
# similarité quasi parfaite, on fait confiance à la position
|
||||
# visuelle peu importe le drift. Une image retrouvée à >= 0.95
|
||||
# de score est SUR l'écran à l'endroit indiqué — le drift par
|
||||
# rapport à l'enregistrement ne reflète qu'un changement de
|
||||
# layout (scroll, redimensionnement, F11, devtools), pas une
|
||||
# erreur de résolution.
|
||||
_HIGH_CONFIDENCE = 0.95
|
||||
if score >= _HIGH_CONFIDENCE and method.startswith("template_matching"):
|
||||
logger.info(
|
||||
"[REPLAY] Drift (%.3f, %.3f) > %.2f IGNORÉ : score=%.3f >= %.2f "
|
||||
"sur %s — résultat visuel fiable, on l'utilise",
|
||||
dx, dy, _RESOLUTION_MAX_DRIFT, score, _HIGH_CONFIDENCE, method,
|
||||
)
|
||||
return result
|
||||
|
||||
logger.warning(
|
||||
"[REPLAY] Resolution REJETÉE (drift trop grand) : "
|
||||
"method=%s resolved=(%.3f, %.3f) expected=(%.3f, %.3f) "
|
||||
"drift=(%.3f, %.3f) max=%.2f",
|
||||
method, resolved_x, resolved_y,
|
||||
fallback_x_pct, fallback_y_pct,
|
||||
dx, dy, _RESOLUTION_MAX_DRIFT,
|
||||
"[REPLAY] Drift trop grand (%.3f, %.3f) > %.2f — fallback coords enregistrées (%.3f, %.3f)",
|
||||
dx, dy, _RESOLUTION_MAX_DRIFT, fallback_x_pct, fallback_y_pct,
|
||||
)
|
||||
# Fallback : coordonnées enregistrées lors de la capture (écran identique = safe)
|
||||
return {
|
||||
"resolved": False,
|
||||
"method": f"rejected_drift_{method}",
|
||||
"reason": f"drift_dx{dx:.3f}_dy{dy:.3f}_max{_RESOLUTION_MAX_DRIFT:.2f}",
|
||||
"resolved": True,
|
||||
"method": "fallback_recorded_coords",
|
||||
"reason": f"drift_dx{dx:.3f}_dy{dy:.3f}_using_recorded",
|
||||
"original_method": method,
|
||||
"original_score": score,
|
||||
"drift_dx": round(dx, 3),
|
||||
"drift_dy": round(dy, 3),
|
||||
"x_pct": fallback_x_pct,
|
||||
"y_pct": fallback_y_pct,
|
||||
}
|
||||
|
||||
15
core/llm/__init__.py
Normal file
15
core/llm/__init__.py
Normal file
@@ -0,0 +1,15 @@
|
||||
"""Modules LLM (clients Ollama et décisionnels métier) + extracteur OCR."""
|
||||
|
||||
from .t2a_decision import (
|
||||
PROMPT_TEMPLATE,
|
||||
DEFAULT_MODEL,
|
||||
analyze_dpi,
|
||||
)
|
||||
from .ocr_extractor import extract_text_from_image
|
||||
|
||||
__all__ = [
|
||||
"PROMPT_TEMPLATE",
|
||||
"DEFAULT_MODEL",
|
||||
"analyze_dpi",
|
||||
"extract_text_from_image",
|
||||
]
|
||||
71
core/llm/ocr_extractor.py
Normal file
71
core/llm/ocr_extractor.py
Normal file
@@ -0,0 +1,71 @@
|
||||
"""Extracteur OCR — texte depuis une image (screenshot d'écran).
|
||||
|
||||
Utilise EasyOCR fr+en. Singleton (chargement modèle ~3s au premier appel).
|
||||
|
||||
Conçu pour le pipeline streaming serveur (action `extract_text`) : récupère
|
||||
un screenshot fresh (dernier heartbeat ou capture forcée), applique l'OCR,
|
||||
retourne le texte concaténé pour analyse downstream (ex: t2a_decision).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_easyocr_reader = None
|
||||
|
||||
|
||||
def _get_reader():
|
||||
"""Initialise EasyOCR fr+en au premier appel (singleton)."""
|
||||
global _easyocr_reader
|
||||
if _easyocr_reader is None:
|
||||
import easyocr
|
||||
try:
|
||||
_easyocr_reader = easyocr.Reader(['fr', 'en'], gpu=True, verbose=False)
|
||||
logger.info("EasyOCR initialisé (fr+en, GPU)")
|
||||
except Exception as e:
|
||||
logger.warning("EasyOCR GPU indisponible (%s), fallback CPU", e)
|
||||
_easyocr_reader = easyocr.Reader(['fr', 'en'], gpu=False, verbose=False)
|
||||
return _easyocr_reader
|
||||
|
||||
|
||||
def extract_text_from_image(
|
||||
image_path: str,
|
||||
region: Optional[Tuple[int, int, int, int]] = None,
|
||||
paragraph: bool = True,
|
||||
) -> str:
|
||||
"""Extrait le texte d'une image via EasyOCR.
|
||||
|
||||
Args:
|
||||
image_path: chemin du PNG sur disque.
|
||||
region: (x, y, w, h) pour cropper avant OCR. None = image entière.
|
||||
paragraph: True pour regrouper les lignes en paragraphes (lisible),
|
||||
False pour blocs séparés (granulaire).
|
||||
|
||||
Returns:
|
||||
Texte concaténé. Chaque ligne / paragraphe est séparé par un saut de ligne.
|
||||
En cas d'erreur, retourne une chaîne vide et log un warning.
|
||||
"""
|
||||
path = Path(image_path)
|
||||
if not path.exists():
|
||||
logger.warning("extract_text: fichier introuvable %s", image_path)
|
||||
return ""
|
||||
|
||||
try:
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
|
||||
img = Image.open(path)
|
||||
if region:
|
||||
x, y, w, h = region
|
||||
img = img.crop((x, y, x + w, y + h))
|
||||
|
||||
reader = _get_reader()
|
||||
results = reader.readtext(np.array(img), detail=0, paragraph=paragraph)
|
||||
return "\n".join(str(r).strip() for r in results if r)
|
||||
except Exception as e:
|
||||
logger.warning("extract_text échoué sur %s : %s", image_path, e)
|
||||
return ""
|
||||
168
core/llm/t2a_decision.py
Normal file
168
core/llm/t2a_decision.py
Normal file
@@ -0,0 +1,168 @@
|
||||
"""Aide à la décision de facturation urgences T2A/PMSI via LLM local.
|
||||
|
||||
Décide si un passage aux urgences relève :
|
||||
- du FORFAIT_URGENCE (passage simple, retour à domicile)
|
||||
- de la REQUALIFICATION_HOSPITALISATION (séjour MCO, valorisation 1k-5k€+)
|
||||
|
||||
Le prompt impose une extraction littérale des faits du DPI (pas d'invention)
|
||||
et une modulation honnête de la confiance. Validé sur 15 DPI synthétiques :
|
||||
qwen2.5:7b atteint 100 % d'accuracy en ~5 s/cas avec 4,7 Go VRAM.
|
||||
|
||||
Voir docs/clients/ght_sud_95/ et demo/facturation_urgences/RESULTATS.md pour le
|
||||
bench comparatif des 11 LLMs évalués.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from typing import Any, Dict
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434/api/generate")
|
||||
DEFAULT_MODEL = os.environ.get("T2A_MODEL", "qwen2.5:7b")
|
||||
DEFAULT_TIMEOUT = 60 # secondes
|
||||
|
||||
PROMPT_TEMPLATE = """Tu es médecin DIM (Département d'Information Médicale), expert en facturation T2A/PMSI aux urgences hospitalières en France.
|
||||
|
||||
Analyse le dossier patient ci-dessous pour déterminer si le passage relève :
|
||||
- FORFAIT_URGENCE : passage simple, retour à domicile, sans surveillance prolongée ni soins continus
|
||||
- REQUALIFICATION_HOSPITALISATION : séjour MCO requis selon les 3 critères PMSI/ATIH
|
||||
|
||||
LES 3 CRITÈRES UHCD (au moins 2 sur 3 validés ⇒ REQUALIFICATION) :
|
||||
1. Pathologie potentiellement évolutive (instabilité hémodynamique, terrain à risque, traitement nécessitant adaptation)
|
||||
2. Surveillance médicale et paramédicale prolongée (constantes itératives, observations IDE/médecin, durée > 6 h)
|
||||
3. Examens complémentaires ou actes thérapeutiques (biologie, imagerie, sutures, gestes techniques)
|
||||
|
||||
INSTRUCTIONS STRICTES :
|
||||
1. N'utilise QUE des éléments littéralement présents dans le dossier patient. N'invente AUCUN critère.
|
||||
2. Pour CHAQUE critère (1, 2, 3), tu DOIS produire un texte de preuve qui contient AU MOINS UNE CITATION LITTÉRALE du dossier entre guillemets français « ... ». Exemple : « FC à 110 bpm, TA 92/60 ».
|
||||
3. Si le critère est NON validé, ne renvoie JAMAIS un fallback creux : explique factuellement ce qui manque, en citant le dossier (ex: « Sortie à H+2 », « Aucun acte technique au compte-rendu »).
|
||||
4. Le texte de chaque preuve fait 2-3 phrases : (i) la citation littérale, (ii) l'analyse PMSI, (iii) la conclusion validé/non validé.
|
||||
5. Calcule la durée totale du passage en heures (admission → sortie/transfert) à partir des horaires du dossier.
|
||||
6. Module ta confiance honnêtement :
|
||||
- "elevee" uniquement si tous les indices convergent
|
||||
- "moyenne" si éléments ambivalents
|
||||
- "faible" si information manquante ou très atypique
|
||||
|
||||
Réponds STRICTEMENT en JSON valide, sans texte avant ni après :
|
||||
{{
|
||||
"duree_passage_heures": <nombre>,
|
||||
"elements_pour_hospitalisation": [<phrases littéralement extraites du dossier>],
|
||||
"elements_pour_forfait": [<phrases littéralement extraites du dossier>],
|
||||
"decision": "FORFAIT_URGENCE" | "REQUALIFICATION_HOSPITALISATION",
|
||||
"decision_court": "UHCD" | "Forfait Urgences",
|
||||
"preuve_critere1": "<2-3 phrases incluant AU MOINS UNE citation littérale entre « » (motif, symptôme, terrain à risque, traitement). Si non validé : factualise ce qui manque en citant le dossier.>",
|
||||
"critere1_valide": true | false,
|
||||
"preuve_critere2": "<2-3 phrases incluant AU MOINS UNE citation littérale entre « » (constantes, observations IDE, durée surveillance). Si non validé : factualise.>",
|
||||
"critere2_valide": true | false,
|
||||
"preuve_critere3": "<2-3 phrases incluant AU MOINS UNE citation littérale entre « » (actes/examens : biologie, imagerie, suture, etc.). Si non validé : factualise.>",
|
||||
"critere3_valide": true | false,
|
||||
"justification": "<2-3 phrases synthétiques s'appuyant explicitement sur les preuves ci-dessus, avec au moins une citation>",
|
||||
"confiance": "elevee" | "moyenne" | "faible"
|
||||
}}
|
||||
|
||||
DOSSIER PATIENT :
|
||||
{dpi}
|
||||
"""
|
||||
|
||||
|
||||
def analyze_dpi(
|
||||
dpi_text: str,
|
||||
model: str = DEFAULT_MODEL,
|
||||
timeout: int = DEFAULT_TIMEOUT,
|
||||
ollama_url: str = OLLAMA_URL,
|
||||
) -> Dict[str, Any]:
|
||||
"""Soumet un DPI urgences à un LLM Ollama et retourne la décision JSON.
|
||||
|
||||
Args:
|
||||
dpi_text: Texte du dossier patient (concaténation des onglets ou DPI brut).
|
||||
model: Modèle Ollama à utiliser (default qwen2.5:7b — 100% accuracy bench).
|
||||
timeout: Timeout HTTP en secondes.
|
||||
ollama_url: Endpoint Ollama (default localhost:11434/api/generate).
|
||||
|
||||
Returns:
|
||||
Dict avec :
|
||||
decision: "FORFAIT_URGENCE" | "REQUALIFICATION_HOSPITALISATION"
|
||||
elements_pour_hospitalisation: List[str]
|
||||
elements_pour_forfait: List[str]
|
||||
duree_passage_heures: float
|
||||
justification: str
|
||||
confiance: "elevee" | "moyenne" | "faible"
|
||||
_elapsed_s: float (latence)
|
||||
_model: str
|
||||
En cas d'erreur :
|
||||
{"_error": str, "_elapsed_s": float} (réseau / Ollama indisponible)
|
||||
{"_parse_error": True, "_raw": str, "_elapsed_s": float} (JSON invalide)
|
||||
"""
|
||||
payload = {
|
||||
"model": model,
|
||||
"prompt": PROMPT_TEMPLATE.format(dpi=dpi_text),
|
||||
"stream": False,
|
||||
"format": "json",
|
||||
"keep_alive": "5m",
|
||||
"options": {
|
||||
"temperature": 0.1,
|
||||
"num_predict": 1500,
|
||||
"num_ctx": 16384,
|
||||
},
|
||||
}
|
||||
data = json.dumps(payload).encode("utf-8")
|
||||
req = urllib.request.Request(
|
||||
ollama_url,
|
||||
data=data,
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST",
|
||||
)
|
||||
t0 = time.time()
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
body = json.loads(resp.read().decode("utf-8"))
|
||||
except (urllib.error.URLError, TimeoutError, ConnectionError) as e:
|
||||
elapsed = round(time.time() - t0, 1)
|
||||
logger.warning("analyze_dpi: Ollama indisponible (%s) après %.1fs", e, elapsed)
|
||||
return {"_error": str(e), "_elapsed_s": elapsed, "_model": model}
|
||||
|
||||
elapsed = time.time() - t0
|
||||
|
||||
raw_response = body.get("response", "").strip()
|
||||
raw_thinking = body.get("thinking", "").strip()
|
||||
|
||||
candidates = [raw_response]
|
||||
if not raw_response and raw_thinking:
|
||||
last_close = raw_thinking.rfind("}")
|
||||
last_open = raw_thinking.rfind("{", 0, last_close)
|
||||
if last_open != -1 and last_close != -1:
|
||||
candidates.append(raw_thinking[last_open:last_close + 1])
|
||||
|
||||
parsed = None
|
||||
for cand in candidates:
|
||||
cleaned = cand
|
||||
if cleaned.startswith("```"):
|
||||
cleaned = cleaned.split("\n", 1)[-1]
|
||||
if cleaned.endswith("```"):
|
||||
cleaned = cleaned.rsplit("```", 1)[0]
|
||||
cleaned = cleaned.strip()
|
||||
try:
|
||||
parsed = json.loads(cleaned)
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
if parsed is None:
|
||||
return {
|
||||
"_parse_error": True,
|
||||
"_raw": (raw_response or raw_thinking)[:500],
|
||||
"_elapsed_s": round(elapsed, 1),
|
||||
"_model": model,
|
||||
}
|
||||
|
||||
parsed["_elapsed_s"] = round(elapsed, 1)
|
||||
parsed["_model"] = model
|
||||
parsed["_eval_count"] = body.get("eval_count")
|
||||
return parsed
|
||||
28
deploy/systemd/rpa-mockup-easily.service
Normal file
28
deploy/systemd/rpa-mockup-easily.service
Normal file
@@ -0,0 +1,28 @@
|
||||
[Unit]
|
||||
Description=Maquette Easily Assure (démo GHT Sud 95) - serveur statique HTTP
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=dom
|
||||
Group=dom
|
||||
WorkingDirectory=/home/dom/ai/rpa_vision_v3/docs/clients/ght_sud_95/mockup_easily_assure
|
||||
ExecStart=/usr/bin/python3 -m http.server 8765 --bind 0.0.0.0
|
||||
|
||||
Restart=on-failure
|
||||
RestartSec=3
|
||||
TimeoutStopSec=10
|
||||
|
||||
NoNewPrivileges=true
|
||||
PrivateTmp=true
|
||||
ProtectSystem=strict
|
||||
ProtectHome=read-only
|
||||
ReadOnlyPaths=/home/dom/ai/rpa_vision_v3/docs/clients/ght_sud_95/mockup_easily_assure
|
||||
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
SyslogIdentifier=rpa-mockup-easily
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
2515
docs/superpowers/plans/2026-05-05-qw-suite-mai.md
Normal file
2515
docs/superpowers/plans/2026-05-05-qw-suite-mai.md
Normal file
File diff suppressed because it is too large
Load Diff
467
docs/superpowers/specs/2026-05-05-qw-suite-mai-design.md
Normal file
467
docs/superpowers/specs/2026-05-05-qw-suite-mai-design.md
Normal file
@@ -0,0 +1,467 @@
|
||||
# Spec — QW Suite Mai 2026
|
||||
|
||||
| Champ | Valeur |
|
||||
|---|---|
|
||||
| Date | 2026-05-05 |
|
||||
| Auteur | Dom + Claude (brainstorming structuré) |
|
||||
| Branche | `feature/qw-suite-mai` (depuis `feature/feedback-bus`) |
|
||||
| Backup | `backup/pre-qw-suite-mai-2026-05-05` à pousser sur Gitea avant 1er commit |
|
||||
| Statut | Design approuvé — spec à valider par Dom avant `writing-plans` |
|
||||
| Cibles démo | GHT Sud 95 (1ère sem mai 2026, date à confirmer) |
|
||||
| Contraintes inviolables | 100% vision · 100% local (Ollama) · backward compatible |
|
||||
|
||||
## 1. Contexte & motivation
|
||||
|
||||
Suite à l'exploration comparative de 5 frameworks computer-use (Simular Agent-S, browser-use, OpenAI CUA sample, Coasty open-cu, Showlab OOTB), trois quick wins ont été identifiés comme améliorations à fort ratio valeur/risque pour RPA Vision V3, alignés avec la philosophie du projet (vision pure, souveraineté, supervision médicale) :
|
||||
|
||||
- **QW1 — Multi-écrans propre** (inspiré OOTB) : capture et grounding sur l'écran cible plutôt que sur le composite tous écrans. Gain de perf grounding + correction des coordonnées.
|
||||
- **QW2 — LoopDetector composite** (inspiré browser-use) : détecter quand Léa exécute des actions techniquement valides mais que l'écran ne progresse pas, et escalader vers l'humain plutôt que de tourner en rond muettement.
|
||||
- **QW4 — Safety checks hybrides** (inspiré OpenAI CUA + browser-use Pydantic registry) : enrichir l'action `pause_for_human` avec une liste de vérifications à acquitter, mêlant déclaratif (workflow) et contextuel (LLM local).
|
||||
|
||||
Effet cumulé attendu : Léa devient observable, robuste et auditable sans rien céder sur le 100% local.
|
||||
|
||||
## 2. Décisions de design (récap)
|
||||
|
||||
| Sujet | Décision |
|
||||
|---|---|
|
||||
| Activation | Default-ON pour tous les workflows (Dom recréera ce qui en a besoin) |
|
||||
| QW1 — Stratégie ciblage écran | `monitor_index` enregistré à la capture → fallback focus actif → fallback composite (backward) |
|
||||
| QW1 — Niveau de stack | Client Agent V1 (capture) + serveur (routeur) + `core/execution/input_handler.py` (capture locale) |
|
||||
| QW2 — Signal de boucle | Composite OR : screen_static (CLIP) + action_repeat + retry_threshold |
|
||||
| QW2 — Sortie | `replay_state["status"] = "paused_need_help"` avec `pause_reason` structuré |
|
||||
| QW4 — Source des checks | Hybride : déclaratif workflow + LLM contextuel sur `safety_level: "medical_critical"` |
|
||||
| QW4 — Robustesse LLM | `medgemma:4b` + timeout 5s + `format=json` Ollama + JSON Schema strict + fallback safe (zéro check additionnel) + kill-switch env var |
|
||||
| QW4 — UX VWB | Bulle existante préservée + `<ChecklistPanel>` au-dessus de Continuer (bouton désactivé tant que required non cochés) |
|
||||
| Ordre de livraison | QW1 → QW2 → QW4 (du moins invasif au plus visible) |
|
||||
| Plan timing | Option A : QW1+QW2 avant démo ; QW4 enchaîné dès validation des deux premiers |
|
||||
| Kill-switches | Env vars sur QW2 et QW4, surchargeables par `systemctl edit` |
|
||||
| Backward compatibility | 100% — aucun champ obligatoire ajouté au DSL ; workflows existants se comportent comme avant |
|
||||
|
||||
## 3. Architecture globale
|
||||
|
||||
```
|
||||
┌─────────────────────────┐ ┌─────────────────────────────────┐
|
||||
│ Agent V1 (Windows) │ │ Serveur Streaming (5005) │
|
||||
│ │ │ │
|
||||
│ ┌──────────────────┐ │ │ ┌───────────────────────────┐ │
|
||||
│ │ ScreenCapture │ │ │ │ LoopDetector [QW2] │ │
|
||||
│ │ + monitor_index │───┼────────▶│ │ • screen_static (CLIP) │ │
|
||||
│ │ [QW1] │ │ HTTP │ │ • action_repeat │ │
|
||||
│ └──────────────────┘ │ │ │ • retry_threshold │ │
|
||||
│ │ │ │ → paused_need_help │ │
|
||||
│ ┌──────────────────┐ │ │ └───────────────────────────┘ │
|
||||
│ │ FeedbackBus lea:*│◀──┼─────────┤ │
|
||||
│ │ chat_window │ │ │ ┌───────────────────────────┐ │
|
||||
│ └──────────────────┘ │ │ │ SafetyChecksProvider │ │
|
||||
└─────────────────────────┘ │ │ [QW4] │ │
|
||||
│ │ • declarative (workflow) │ │
|
||||
│ │ • LLM contextual │ │
|
||||
│ │ ‒ medgemma:4b 5s/JSON │ │
|
||||
│ │ ‒ fallback safe │ │
|
||||
│ │ • kill-switch env var │ │
|
||||
│ └───────────────────────────┘ │
|
||||
│ │
|
||||
│ ┌───────────────────────────┐ │
|
||||
│ │ MonitorRouter [QW1] │ │
|
||||
│ │ • cible monitor_index │ │
|
||||
│ │ • fallback focus actif │ │
|
||||
│ └───────────────────────────┘ │
|
||||
└─────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────┐
|
||||
│ VWB Frontend (3002) │
|
||||
│ │
|
||||
│ PauseDialog (étendu) [QW4-UX] │
|
||||
│ • bulle existante préservée │
|
||||
│ • + ChecklistPanel │
|
||||
│ (cases à cocher acquittables)│
|
||||
│ • + pause_reason si loop │
|
||||
│ Continuer désactivé tant que │
|
||||
│ required-checks non cochés │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Principes invariants
|
||||
1. Aucun nouveau service, aucune nouvelle DB. Tout dans la stack existante (Agent V1 + serveur 5005 + VWB 3002).
|
||||
2. 3 modules serveur isolés (`monitor_router.py`, `loop_detector.py`, `safety_checks_provider.py`) — couplage faible, testables individuellement, désactivables par env var.
|
||||
3. Backward compatible : workflows sans nouveaux champs se comportent comme avant.
|
||||
4. Kill-switches env vars sur QW2 et QW4, override possible via `systemctl edit` pendant la démo.
|
||||
5. 100% vision : QW1 pure capture + grounding ; QW2 réutilise le `_clip_embedder` déjà chargé ; QW4 LLM = Ollama local strict.
|
||||
6. Bus `lea:*` étendu de 4 events d'observabilité : `lea:loop_detected`, `lea:safety_checks_generated`, `lea:safety_checks_llm_failed`, `lea:monitor_routed`.
|
||||
|
||||
### Surface de modification (ordre A)
|
||||
|
||||
| QW | Fichiers nouveaux | Fichiers modifiés |
|
||||
|---|---|---|
|
||||
| QW1 | `agent_v0/server_v1/monitor_router.py` | `agent_v0/agent_v1/capture/screen_capture.py`, `core/execution/input_handler.py`, `agent_v0/server_v1/api_stream.py` (~10 lignes) |
|
||||
| QW2 | `agent_v0/server_v1/loop_detector.py` | `agent_v0/server_v1/replay_engine.py` (~30 lignes), `agent_v0/server_v1/api_stream.py` (~20 lignes) |
|
||||
| QW4 | `agent_v0/server_v1/safety_checks_provider.py`, `visual_workflow_builder/frontend_v4/src/components/PauseDialog.tsx` | `agent_v0/server_v1/replay_engine.py`, `agent_v0/server_v1/api_stream.py` (`/replay/resume`), `visual_workflow_builder/frontend_v4/src/types.ts`, `visual_workflow_builder/frontend_v4/src/components/PropertiesPanel.tsx` |
|
||||
|
||||
## 4. QW1 — Multi-écrans
|
||||
|
||||
### 4.1 Composants
|
||||
|
||||
**Client Agent V1** — `agent_v0/agent_v1/capture/screen_capture.py` (existant à modifier)
|
||||
- Enrichit chaque heartbeat / event avec :
|
||||
- `monitor_index: int`
|
||||
- `monitors_geometry: [{idx, x, y, w, h, primary}]`
|
||||
- Détection via `screeninfo` (port direct depuis Showlab OOTB)
|
||||
- Capture de l'écran *actif uniquement* (poids réseau identique à aujourd'hui)
|
||||
- Si `screeninfo` indisponible côté Windows : envoie `monitors_geometry: []`, comportement composite préservé
|
||||
|
||||
**Serveur** — nouveau `agent_v0/server_v1/monitor_router.py` (~80 lignes)
|
||||
- API : `resolve_target_monitor(action: dict, session_state: dict) → MonitorTarget`
|
||||
- `MonitorTarget = {idx, offset_x, offset_y, w, h, source: "action" | "focus" | "composite_fallback"}`
|
||||
- Stratégie :
|
||||
1. Lit `action.get("monitor_index")` si présent → cible cet écran
|
||||
2. Sinon `session_state.get("last_focused_monitor")` → cible focus actif
|
||||
3. Sinon `monitors[0]` composite (comportement actuel — backward)
|
||||
|
||||
**Input local Linux** — `core/execution/input_handler.py` modifs ciblées
|
||||
- Signature changée : `_capture_screen(monitor_idx=None) → (image, w, h, offset_x, offset_y)`
|
||||
- Quand `monitor_idx` fourni : capture uniquement ce monitor
|
||||
- Toutes les fonctions `_grounding_*` (`_grounding_ocr`, `_grounding_ui_tars`, `_grounding_vlm`) propagent l'offset pour traduire les coords retournées en coords absolues écran
|
||||
|
||||
### 4.2 Data flow replay
|
||||
|
||||
```
|
||||
Action [monitor_index=1] reçue par serveur
|
||||
→ MonitorRouter.resolve()
|
||||
→ target_monitor = {idx:1, offset:(1920,0), w:1920, h:1080, source:"action"}
|
||||
→ grounding capture monitor 1 uniquement (image 1920×1080, pas 3840×1080)
|
||||
→ UI-TARS / OCR / VLM cherche cible → coords locales (640, 540)
|
||||
→ coords absolues = (640+1920, 540+0) = (2560, 540)
|
||||
→ pyautogui.click(2560, 540)
|
||||
→ bus.emit("lea:monitor_routed", {idx:1, source:"action"})
|
||||
```
|
||||
|
||||
### 4.3 Error handling
|
||||
|
||||
| Cas | Comportement |
|
||||
|---|---|
|
||||
| `monitor_index` absent (vieille session) | Fallback focus actif, log info `lea:monitor_routed source=focus` |
|
||||
| Monitor enregistré n'existe plus (2nd écran débranché) | Fallback focus actif, event `lea:monitor_unavailable` warning |
|
||||
| `mss.monitors[i]` hors limites | Fallback `monitors[0]` composite, event `lea:monitor_invalid_index` error |
|
||||
| `screeninfo` non installé côté Agent V1 | `monitors_geometry: []`, fallback composite (comportement actuel) — pas de blocage |
|
||||
|
||||
### 4.4 Tests QW1
|
||||
|
||||
- `tests/unit/test_monitor_router.py` : 4 cas (cible OK, fallback focus, fallback composite, monitor débranché)
|
||||
- `tests/integration/test_grounding_offset.py` : capture 1 monitor + clic résolu avec offset (mock pyautogui)
|
||||
- Smoke : 1 workflow Easily rejoué, vérification visuelle que le clic atterrit au bon endroit
|
||||
|
||||
### 4.5 Compat workflows existants
|
||||
|
||||
Aucune action n'a `monitor_index` aujourd'hui → 100% des workflows existants partent en fallback focus actif → comportement quasi-identique au composite actuel mais sur un seul écran (gain de perf grounding même sans recréation de workflow).
|
||||
|
||||
## 5. QW2 — LoopDetector composite
|
||||
|
||||
### 5.1 Composants
|
||||
|
||||
**Nouveau** `agent_v0/server_v1/loop_detector.py` (~150 lignes)
|
||||
- Classe `LoopDetector` avec 3 sous-détecteurs
|
||||
- API : `evaluate(replay_state, screenshot_history, action_history) → LoopVerdict`
|
||||
- `LoopVerdict = {detected: bool, reason: str, signal: str, evidence: dict}`
|
||||
|
||||
**Hook** dans `agent_v0/server_v1/api_stream.py`
|
||||
- Après chaque `report_action_result`, appel `loop_detector.evaluate(...)` si `RPA_LOOP_DETECTOR_ENABLED=1` (défaut)
|
||||
- Si `verdict.detected` :
|
||||
- `replay_state["status"] = "paused_need_help"`
|
||||
- `replay_state["pause_reason"] = verdict.reason`
|
||||
- `replay_state["pause_message"] = f"Léa semble bloquée — {verdict.signal}"`
|
||||
- bus.emit `lea:loop_detected` avec `{signal, evidence, replay_id}`
|
||||
|
||||
**Étendu** dans `replay_engine.py` :
|
||||
- `_create_replay_state()` ajoute :
|
||||
- `"_screenshot_history": []` (anneau de 5 derniers embeddings CLIP)
|
||||
- `"_action_history": []` (anneau des 5 dernières actions)
|
||||
- `_pre_check_screen_state()` continue indépendamment (signal différent : check pré-action vs détection post-action de stagnation)
|
||||
|
||||
### 5.2 Signaux composites
|
||||
|
||||
| Signal | Détecteur | Seuil par défaut | Source |
|
||||
|---|---|---|---|
|
||||
| `screen_static` | A | 4 captures consécutives avec CLIP similarity > 0.99 | `_clip_embedder` déjà chargé serveur |
|
||||
| `action_repeat` | B | 3 actions consécutives identiques (type + coords) | `_action_history` |
|
||||
| `retry_threshold` | C | 3 retries sur même `action_id` | `replay_state["retried_actions"]` (déjà existant) |
|
||||
|
||||
Un seul signal positif suffit à déclencher l'escalade.
|
||||
|
||||
### 5.3 Data flow
|
||||
|
||||
```
|
||||
Action exécutée → result reçu via /replay/result
|
||||
↓
|
||||
LoopDetector.evaluate(state, screenshots, actions) si RPA_LOOP_DETECTOR_ENABLED=1
|
||||
├─ A.check_screen_static() → embed(latest), compare aux N-1 derniers
|
||||
├─ B.check_action_repeat() → compare action_history[-3:]
|
||||
└─ C.check_retry_threshold() → state["retried_actions"] >= 3
|
||||
↓
|
||||
Si verdict.detected:
|
||||
state["status"] = "paused_need_help"
|
||||
state["pause_reason"] = verdict.reason
|
||||
state["pause_message"] = f"Léa semble bloquée — {verdict.signal} ({evidence})"
|
||||
bus.emit("lea:loop_detected", {signal, evidence, replay_id})
|
||||
```
|
||||
|
||||
### 5.4 Error handling
|
||||
|
||||
| Cas | Comportement |
|
||||
|---|---|
|
||||
| CLIP embedder unavailable | Signal A désactivé (warning log 1×), B+C continuent. Pas de blocage. |
|
||||
| `_screenshot_history` < N | Signal A skip silencieusement (pas assez d'historique) |
|
||||
| `embed_image()` lève une exception | Catch + log warning, replay continue (verdict = `detected=False`) |
|
||||
| `RPA_LOOP_DETECTOR_ENABLED=0` | Module entier bypassé, comportement antérieur |
|
||||
| Faux positif détecté en pleine démo | `RPA_LOOP_DETECTOR_ENABLED=0` via `systemctl edit rpa-streaming` + restart → reprise immédiate |
|
||||
|
||||
### 5.5 Configuration env vars
|
||||
|
||||
- `RPA_LOOP_DETECTOR_ENABLED=1` (défaut)
|
||||
- `RPA_LOOP_SCREEN_STATIC_THRESHOLD=0.99`
|
||||
- `RPA_LOOP_SCREEN_STATIC_N=4`
|
||||
- `RPA_LOOP_ACTION_REPEAT_N=3`
|
||||
- `RPA_LOOP_RETRY_THRESHOLD=3`
|
||||
|
||||
### 5.6 Tests QW2
|
||||
|
||||
- `tests/unit/test_loop_detector.py` : 8 cas (chaque signal isolé, chaque combinaison, kill-switch, embedder absent)
|
||||
- `tests/integration/test_loop_detector_replay.py` : 3 cas — replay simulé qui boucle → vérifier transition `running → paused_need_help` avec bonne raison
|
||||
- Pas de smoke démo (impossible à reproduire fiable, on s'appuie sur les tests intégration)
|
||||
|
||||
### 5.7 Compat VWB
|
||||
|
||||
Aucune côté frontend pour QW2 : la pause `paused_need_help` existe déjà. Le `pause_reason` enrichi sera affiché par le composant `PauseDialog` étendu en QW4. Avant la livraison de QW4, la raison s'affichera en texte dans le `pause_message` (donc utile dès le commit QW2).
|
||||
|
||||
## 6. QW4 — Safety checks hybrides
|
||||
|
||||
### 6.1 Contrat de l'action étendue (rétro-compatible)
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "pause_for_human",
|
||||
"parameters": {
|
||||
"message": "Validation T2A avant codage",
|
||||
"safety_level": "medical_critical",
|
||||
"safety_checks": [
|
||||
{"id": "check_ipp", "label": "Vérifier IPP patient", "required": true},
|
||||
{"id": "check_cim10", "label": "Confirmer code CIM-10", "required": true}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
`safety_level` et `safety_checks` sont **optionnels**. Action sans ces champs → comportement actuel (bulle simple, aucun appel LLM).
|
||||
|
||||
### 6.2 Composants serveur
|
||||
|
||||
**Nouveau** `agent_v0/server_v1/safety_checks_provider.py` (~180 lignes)
|
||||
- API : `build_pause_payload(action, replay_state, last_screenshot) → PausePayload`
|
||||
- Concatène : checks déclaratifs (workflow) + checks contextuels (LLM si `safety_level == "medical_critical"`)
|
||||
- Chaque check porte sa source : `source: "declarative" | "llm_contextual"` et son `evidence` (vide pour déclaratif, justification courte pour LLM)
|
||||
- Format check final :
|
||||
```json
|
||||
{
|
||||
"id": "check_xxx",
|
||||
"label": "...",
|
||||
"required": true,
|
||||
"source": "declarative" | "llm_contextual",
|
||||
"evidence": null | "..."
|
||||
}
|
||||
```
|
||||
|
||||
**LLM contextual call** — sous-fonction `_call_llm_for_contextual_checks()`
|
||||
- Modèle : `medgemma:4b` (env `RPA_SAFETY_CHECKS_LLM_MODEL`)
|
||||
- Timeout dur : 5s (env `RPA_SAFETY_CHECKS_LLM_TIMEOUT_S`)
|
||||
- `format=json` natif Ollama + JSON Schema strict :
|
||||
```json
|
||||
{"additional_checks": [{"label": "string", "evidence": "string"}]}
|
||||
```
|
||||
- Max 3 checks ajoutés (env `RPA_SAFETY_CHECKS_LLM_MAX_CHECKS`)
|
||||
- Prompt : screenshot heartbeat actuel + workflow message + liste des checks déclaratifs (évite doublons)
|
||||
- Tout échec (timeout, exception, JSON invalide post-schema) → `additional_checks = []`, event `lea:safety_checks_llm_failed`, replay continue
|
||||
|
||||
**Hook** dans `replay_engine.py` — branche `action_type == "pause_for_human"`
|
||||
- Avant de basculer en `paused_need_help`, appel `safety_checks_provider.build_pause_payload(...)`
|
||||
- Stocke `replay_state["safety_checks"] = payload.checks`
|
||||
- Stocke `replay_state["pause_payload"] = payload` (pour debug/audit)
|
||||
|
||||
**Modif** `api_stream.py` — endpoint `/replay/resume`
|
||||
- Reçoit `{acknowledged_check_ids: [...]}` dans le body POST
|
||||
- Vérifie : tous les checks `required=true` doivent être dans `acknowledged_check_ids`
|
||||
- Sinon : `400 {error: "required_checks_missing", missing: [...]}`
|
||||
- Stocke `replay_state["checks_acknowledged"] = acknowledged_check_ids` (audit trail)
|
||||
- Reprise normale du replay
|
||||
|
||||
### 6.3 Composants frontend VWB
|
||||
|
||||
**Nouveau** `visual_workflow_builder/frontend_v4/src/components/PauseDialog.tsx` (~200 lignes)
|
||||
- Props : `pauseMessage`, `pauseReason`, `safetyChecks`, `onResume(ackIds)`, `onCancel`
|
||||
- Si `safetyChecks.length === 0` : rend la bulle existante (legacy, comportement actuel)
|
||||
- Sinon : bulle + `<ChecklistPanel>` avec checkboxes
|
||||
- Bouton Continuer disabled tant que `checks.filter(c => c.required && !checked).length > 0`
|
||||
- POST `/replay/resume` avec body `{acknowledged_check_ids: [...]}`
|
||||
- Visuel source :
|
||||
- Badge `[Léa]` pour `source: "llm_contextual"` (avec tooltip `evidence`)
|
||||
- Badge `[obligatoire]` pour `required: true`
|
||||
|
||||
**Étendu** `types.ts`
|
||||
- `PauseAction['parameters']` : ajout `safety_level?`, `safety_checks?`
|
||||
- `Execution` : ajout `pause_reason?`, `safety_checks?`
|
||||
|
||||
**Étendu** `PropertiesPanel.tsx:1356` — éditeur de l'action `pause_for_human`
|
||||
- Section "Niveau de sécurité" : dropdown `standard | medical_critical`
|
||||
- Section "Checks à valider" : liste éditable (id + label + required)
|
||||
|
||||
### 6.4 Data flow complet
|
||||
|
||||
```
|
||||
Action pause_for_human (medical_critical, 2 checks déclaratifs) atteinte
|
||||
↓
|
||||
SafetyChecksProvider.build_pause_payload()
|
||||
├─ checks = [...declarative] (2 entrées)
|
||||
├─ if safety_level == "medical_critical" and RPA_SAFETY_CHECKS_LLM_ENABLED=1:
|
||||
│ llm_checks = _call_llm_for_contextual_checks() (max 3, timeout 5s)
|
||||
│ checks += llm_checks
|
||||
└─ return PausePayload(checks, pause_reason, message)
|
||||
↓
|
||||
replay_state["status"] = "paused_need_help"
|
||||
replay_state["safety_checks"] = checks
|
||||
bus.emit("lea:safety_checks_generated", {count, sources})
|
||||
↓
|
||||
Frontend VWB poll /replay/state → reçoit pause_payload
|
||||
↓
|
||||
<PauseDialog> rend ChecklistPanel
|
||||
↓
|
||||
Médecin coche les 4 checks → clique Continuer
|
||||
↓
|
||||
POST /replay/resume {acknowledged_check_ids: [4 ids]}
|
||||
↓
|
||||
Serveur valide (tous required acquittés) → reprise du replay
|
||||
replay_state["checks_acknowledged"] = [...] (audit trail conservé)
|
||||
```
|
||||
|
||||
### 6.5 Error handling
|
||||
|
||||
| Cas | Comportement |
|
||||
|---|---|
|
||||
| `safety_level` absent | Pas d'appel LLM ; checks déclaratifs uniquement (peut être `[]`) → bulle simple si vide, checklist sinon |
|
||||
| Ollama timeout 5s | Event `lea:safety_checks_llm_failed`, `additional_checks=[]`, fallback safe (déclaratifs seuls) |
|
||||
| Ollama JSON malformé (post `format=json` — théoriquement impossible) | Idem timeout, fallback safe |
|
||||
| LLM produit un check absurde | Accepté tel quel, le superviseur ignore (pas de filtrage en V1) |
|
||||
| Frontend reçoit `safety_checks=[]` | Bulle simple, comportement legacy |
|
||||
| `RPA_SAFETY_CHECKS_LLM_ENABLED=0` | Couche LLM bypassée, déclaratifs gardés |
|
||||
| `/replay/resume` sans `acknowledged_check_ids` sur required | `400 required_checks_missing` |
|
||||
| Frontend POST `/replay/resume` rejeté | Toast d'erreur côté UI, état pause conservé, possibilité de cocher manquants et réessayer |
|
||||
|
||||
### 6.6 Configuration env vars
|
||||
|
||||
- `RPA_SAFETY_CHECKS_LLM_ENABLED=1` (défaut)
|
||||
- `RPA_SAFETY_CHECKS_LLM_MODEL=medgemma:4b`
|
||||
- `RPA_SAFETY_CHECKS_LLM_TIMEOUT_S=5`
|
||||
- `RPA_SAFETY_CHECKS_LLM_MAX_CHECKS=3`
|
||||
|
||||
### 6.7 Tests QW4
|
||||
|
||||
- `tests/unit/test_safety_checks_provider.py` : 7 cas (déclaratif seul, hybride réussi, LLM timeout, LLM JSON invalide, kill-switch, max_checks respecté, déclaratif vide)
|
||||
- `tests/integration/test_replay_resume_acknowledgments.py` : 3 cas (resume OK, missing required → 400, audit trail enregistré dans `checks_acknowledged`)
|
||||
- Frontend : `tests/components/PauseDialog.test.tsx` si suite Vitest existe (à confirmer pendant l'implémentation), sinon test manuel avec checklist écrite
|
||||
- Smoke : 1 workflow Easily avec `pause_for_human medical_critical` enrichi → vérification full chain
|
||||
|
||||
### 6.8 Compat workflows existants
|
||||
|
||||
100% backward — `pause_for_human` actuels n'ont ni `safety_level` ni `safety_checks` → comportement strictement identique. Aucune recréation forcée. Dom enrichira uniquement les workflows qu'il veut promouvoir au niveau `medical_critical`.
|
||||
|
||||
## 7. Tests, sécurité de la branche, livraison
|
||||
|
||||
### 7.1 Filet de sécurité avant TOUT commit sur `feature/qw-suite-mai`
|
||||
|
||||
1. Branche backup poussée Gitea : `backup/pre-qw-suite-mai-2026-05-05`
|
||||
2. Capture baseline E2E :
|
||||
```
|
||||
pytest tests/test_pipeline_e2e.py \
|
||||
tests/test_phase0_integration.py \
|
||||
tests/integration/test_stream_processor.py \
|
||||
-q 2>&1 | tee .qw-baseline.log
|
||||
```
|
||||
3. Smoke démo : 1 dérouler complet d'un workflow Easily Assure, archivage screenshot/vidéo de référence
|
||||
4. État VWB validé : démarrage Vite local, ouverture d'un workflow, lancement d'un replay simple, screenshot "tout va bien"
|
||||
|
||||
### 7.2 Discipline TDD légère par QW
|
||||
|
||||
- Test unitaire écrit AVANT le code de production (1 test rouge → 1 implémentation → vert)
|
||||
- Pas de TDD complet sur le frontend (Vitest + React = trop d'outillage à valider en parallèle), test manuel cadré avec checklist écrite
|
||||
- Re-run de la suite baseline après chaque commit QW, comparaison au log archivé
|
||||
- Toute régression bloque le passage au QW suivant tant qu'elle n'est pas comprise et résolue
|
||||
|
||||
### 7.3 Compat VWB — checklist explicite avant commit QW4
|
||||
|
||||
- [ ] Workflow ancien (sans `safety_checks`) → bulle simple s'affiche normalement
|
||||
- [ ] Workflow nouveau avec `safety_checks` déclaratifs uniquement → checklist visible, **pas** d'appel Ollama (vérification logs)
|
||||
- [ ] Workflow `medical_critical` → checklist + checks LLM apparaissent (vérification logs Ollama call dans les 5s)
|
||||
- [ ] Continuer désactivé tant que required non cochés
|
||||
- [ ] POST `/replay/resume` avec mauvais payload → toast d'erreur côté UI, pas de crash
|
||||
- [ ] PropertiesPanel : édition de `safety_checks` ne casse pas l'édition d'autres params de `pause_for_human`
|
||||
- [ ] DB `workflows.db` : ouverture après commit, aucune migration cassante (schéma JSON est libre)
|
||||
|
||||
### 7.4 Plan de commits
|
||||
|
||||
```
|
||||
1. test(qw1): tests monitor_router + grounding_offset (rouges)
|
||||
2. feat(qw1): multi-écrans piloté par monitor_index (verts)
|
||||
3. test(qw2): tests loop_detector composite (rouges)
|
||||
4. feat(qw2): LoopDetector composite avec kill-switch env
|
||||
5. test(qw4): tests safety_checks_provider + replay_resume (rouges)
|
||||
6. feat(qw4): safety_checks hybride déclaratif + LLM contextuel
|
||||
7. feat(vwb): PauseDialog + ChecklistPanel + extension PropertiesPanel
|
||||
8. docs(qw): docs/QW_SUITE_MAI.md + mise à jour MEMORY.md
|
||||
```
|
||||
|
||||
Chaque commit signé Co-Authored-By Claude. Branche poussée régulièrement sur Gitea pour backup distant.
|
||||
|
||||
### 7.5 Stratégie en cas de régression critique pendant la démo
|
||||
|
||||
Kill-switches env vars surchargeables sans redéploiement code :
|
||||
|
||||
```
|
||||
systemctl edit rpa-streaming
|
||||
# Ajouter sous [Service] :
|
||||
Environment=RPA_LOOP_DETECTOR_ENABLED=0
|
||||
Environment=RPA_SAFETY_CHECKS_LLM_ENABLED=0
|
||||
systemctl restart rpa-streaming
|
||||
```
|
||||
|
||||
Si problème grave au-delà des kill-switches : rollback à `backup/pre-qw-suite-mai-2026-05-05`.
|
||||
|
||||
```
|
||||
git checkout backup/pre-qw-suite-mai-2026-05-05
|
||||
./svc.sh restart
|
||||
```
|
||||
|
||||
### 7.6 Plan de livraison (Option A validée)
|
||||
|
||||
**Avant démo GHT (cette semaine) — Sprint priorité 1**
|
||||
- QW1 : tests + code + smoke (~1j)
|
||||
- QW2 : tests + code + tests intégration (~2j)
|
||||
- Capture baseline + replay smoke entre chaque
|
||||
- Si QW1+QW2 validés et probants → on enchaîne sur QW4 dès que possible (Dom accepte le weekend si "effet waouh" auprès de spécialistes RPA)
|
||||
|
||||
**Après démo / dès validation QW1+QW2 — Sprint priorité 2**
|
||||
- QW4 serveur (provider + LLM + endpoint resume) (~3j)
|
||||
- QW4 frontend (PauseDialog + PropertiesPanel) (~2j)
|
||||
- Doc + mise à jour MEMORY.md
|
||||
|
||||
**Total estimé** : ~8.5j-h ingénieur senior, étalable selon le retour démo.
|
||||
|
||||
## 8. Ce qui n'est PAS dans ce spec (out of scope)
|
||||
|
||||
- F1 (DSL d'actions Pydantic-first) : refactor de fond, sera son propre spec après la démo.
|
||||
- F2 (Mixture-of-Grounding routeur adaptatif) : nécessite F1, son propre spec.
|
||||
- F3 (Best-of-N + Reflection) : nécessite F1, son propre spec.
|
||||
- QW3 (`output_model_schema` Pydantic pour `extract_text`) : opportuniste, sera intégré quand on touchera `extract_text` pour autre chose.
|
||||
- Toute introduction de Pydantic-AI / instructor / Playwright / accessibility-tree : interdit (contraintes inviolables).
|
||||
- Refonte du composant pause en `<PauseDialog>` à 3 modes (option C de Q6) : reportée après démo si retour utilisateurs justifie l'investissement.
|
||||
|
||||
## 9. Open questions
|
||||
|
||||
Aucune. Toutes les décisions de design ont été tranchées via les 7 questions clarifiantes du brainstorming du 5 mai 2026.
|
||||
131
tests/integration/test_pause_for_human.py
Normal file
131
tests/integration/test_pause_for_human.py
Normal file
@@ -0,0 +1,131 @@
|
||||
"""Tests de l'action pause_for_human (C.5).
|
||||
|
||||
Vérifie la chaîne :
|
||||
- Validation côté replay_engine accepte le nouveau type
|
||||
- Conversion edge → action normalisée préserve le message
|
||||
- Bridge VWB → core mappe correctement
|
||||
- Le bridge VWB construit bien un edge avec action.type='pause_for_human'
|
||||
"""
|
||||
|
||||
from agent_v0.server_v1.replay_engine import (
|
||||
_ALLOWED_ACTION_TYPES,
|
||||
_validate_replay_action,
|
||||
_edge_to_normalized_actions,
|
||||
)
|
||||
from visual_workflow_builder.backend.services.learned_workflow_bridge import (
|
||||
VWB_ACTION_TO_CORE,
|
||||
convert_vwb_to_core_workflow,
|
||||
_vwb_params_to_core,
|
||||
)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Validation pipeline (replay_engine)
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def test_pause_for_human_in_allowed_types():
|
||||
assert "pause_for_human" in _ALLOWED_ACTION_TYPES
|
||||
|
||||
|
||||
def test_validate_pause_for_human_action_valid():
|
||||
action = {"type": "pause_for_human", "parameters": {"message": "Valider UHCD ?"}}
|
||||
assert _validate_replay_action(action) is None
|
||||
|
||||
|
||||
def test_validate_pause_for_human_no_params_still_valid():
|
||||
"""Le validateur ne doit pas exiger 'message' (fallback côté handler)."""
|
||||
action = {"type": "pause_for_human"}
|
||||
assert _validate_replay_action(action) is None
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Conversion edge → action normalisée
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
class _FakeAction:
|
||||
def __init__(self, type_, parameters=None):
|
||||
self.type = type_
|
||||
self.target = None
|
||||
self.parameters = parameters or {}
|
||||
|
||||
|
||||
class _FakeEdge:
|
||||
def __init__(self, action, edge_id="e1", from_node="n1", to_node="n2"):
|
||||
self.edge_id = edge_id
|
||||
self.from_node = from_node
|
||||
self.to_node = to_node
|
||||
self.action = action
|
||||
|
||||
|
||||
def test_edge_to_action_pause_for_human_preserves_message():
|
||||
edge = _FakeEdge(_FakeAction(
|
||||
"pause_for_human",
|
||||
parameters={"message": "Tu valides UHCD ?"},
|
||||
))
|
||||
actions = _edge_to_normalized_actions(edge, params={})
|
||||
assert len(actions) == 1
|
||||
a = actions[0]
|
||||
assert a["type"] == "pause_for_human"
|
||||
assert a["parameters"]["message"] == "Tu valides UHCD ?"
|
||||
assert "x_pct" not in a # action logique, pas de coords
|
||||
assert "y_pct" not in a
|
||||
|
||||
|
||||
def test_edge_to_action_pause_for_human_default_message():
|
||||
edge = _FakeEdge(_FakeAction("pause_for_human", parameters={}))
|
||||
actions = _edge_to_normalized_actions(edge, params={})
|
||||
assert actions[0]["parameters"]["message"] == "Validation requise"
|
||||
|
||||
|
||||
def test_edge_to_action_pause_for_human_carries_edge_metadata():
|
||||
edge = _FakeEdge(
|
||||
_FakeAction("pause_for_human", parameters={"message": "x"}),
|
||||
edge_id="edge_42", from_node="n_src", to_node="n_dst",
|
||||
)
|
||||
actions = _edge_to_normalized_actions(edge, params={})
|
||||
a = actions[0]
|
||||
assert a["edge_id"] == "edge_42"
|
||||
assert a["from_node"] == "n_src"
|
||||
assert a["to_node"] == "n_dst"
|
||||
assert "action_id" in a
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Bridge VWB → core
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def test_vwb_action_to_core_passthrough():
|
||||
assert VWB_ACTION_TO_CORE["pause_for_human"] == "pause_for_human"
|
||||
|
||||
|
||||
def test_vwb_params_to_core_preserves_message():
|
||||
core_params = _vwb_params_to_core("pause_for_human", {"message": "Coucou"})
|
||||
assert core_params == {"message": "Coucou"}
|
||||
|
||||
|
||||
def test_vwb_params_to_core_default_message():
|
||||
core_params = _vwb_params_to_core("pause_for_human", {})
|
||||
assert core_params["message"] == "Validation requise"
|
||||
|
||||
|
||||
def test_export_vwb_workflow_with_pause_step():
|
||||
"""Un workflow VWB contenant une step pause_for_human doit produire un edge
|
||||
avec action.type='pause_for_human' et message dans parameters."""
|
||||
workflow_data = {"id": "wf_demo", "name": "Demo Urgences", "description": ""}
|
||||
steps_data = [
|
||||
{"id": "s1", "action_type": "click_anchor", "parameters": {"target_text": "25003284"}, "label": "Clic IPP"},
|
||||
{"id": "s2", "action_type": "pause_for_human", "parameters": {"message": "Valider UHCD ?"}, "label": "Pause"},
|
||||
{"id": "s3", "action_type": "click_anchor", "parameters": {"target_text": "Enregistrer"}, "label": "Clic Enregistrer"},
|
||||
]
|
||||
core = convert_vwb_to_core_workflow(workflow_data, steps_data)
|
||||
assert core["learning_state"] == "COACHING"
|
||||
assert len(core["nodes"]) == 3
|
||||
assert len(core["edges"]) == 2
|
||||
|
||||
# L'edge sortant du node de pause doit avoir le bon type + message
|
||||
pause_edges = [
|
||||
e for e in core["edges"]
|
||||
if e["action"]["type"] == "pause_for_human"
|
||||
]
|
||||
assert len(pause_edges) == 1
|
||||
assert pause_edges[0]["action"]["parameters"]["message"] == "Valider UHCD ?"
|
||||
282
tests/integration/test_t2a_extract.py
Normal file
282
tests/integration/test_t2a_extract.py
Normal file
@@ -0,0 +1,282 @@
|
||||
"""Tests des actions extract_text et t2a_decision (C+.5/.6).
|
||||
|
||||
Couvre :
|
||||
- _resolve_runtime_vars : templating {{var}} / {{var.field}}
|
||||
- _handle_extract_text_action : OCR mocké, stockage variable
|
||||
- _handle_t2a_decision_action : analyze_dpi mocké, stockage JSON
|
||||
- _edge_to_normalized_actions pour les 2 types
|
||||
- Bridge VWB → core (mapping + paramètres)
|
||||
"""
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from agent_v0.server_v1.replay_engine import (
|
||||
_ALLOWED_ACTION_TYPES,
|
||||
_SERVER_SIDE_ACTION_TYPES,
|
||||
_resolve_runtime_vars,
|
||||
_handle_extract_text_action,
|
||||
_handle_t2a_decision_action,
|
||||
_edge_to_normalized_actions,
|
||||
_create_replay_state,
|
||||
)
|
||||
from visual_workflow_builder.backend.services.learned_workflow_bridge import (
|
||||
VWB_ACTION_TO_CORE,
|
||||
convert_vwb_to_core_workflow,
|
||||
_vwb_params_to_core,
|
||||
)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Templating runtime
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def test_resolve_simple_var():
|
||||
r = _resolve_runtime_vars("Patient {{ipp}}", {"ipp": "25003284"})
|
||||
assert r == "Patient 25003284"
|
||||
|
||||
|
||||
def test_resolve_field_access():
|
||||
r = _resolve_runtime_vars(
|
||||
"{{result.decision}} car {{result.justification}}",
|
||||
{"result": {"decision": "UHCD", "justification": "asthme + insuf coro"}},
|
||||
)
|
||||
assert "UHCD car asthme + insuf coro" == r
|
||||
|
||||
|
||||
def test_resolve_missing_var_kept_intact():
|
||||
r = _resolve_runtime_vars("Hello {{absent}} world", {"x": "y"})
|
||||
assert r == "Hello {{absent}} world"
|
||||
|
||||
|
||||
def test_resolve_missing_field_kept_intact():
|
||||
r = _resolve_runtime_vars("{{var.absent}}", {"var": {"present": "x"}})
|
||||
assert r == "{{var.absent}}"
|
||||
|
||||
|
||||
def test_resolve_in_dict_recursive():
|
||||
r = _resolve_runtime_vars(
|
||||
{"msg": "IPP {{ipp}}", "nested": {"k": "{{ipp}}"}, "list": ["{{age}}"]},
|
||||
{"ipp": "X", "age": 77},
|
||||
)
|
||||
assert r == {"msg": "IPP X", "nested": {"k": "X"}, "list": ["77"]}
|
||||
|
||||
|
||||
def test_resolve_empty_vars_noop():
|
||||
val = {"k": "{{var}}"}
|
||||
assert _resolve_runtime_vars(val, {}) == val
|
||||
assert _resolve_runtime_vars(val, None) == val
|
||||
|
||||
|
||||
def test_resolve_non_string_passthrough():
|
||||
assert _resolve_runtime_vars(42, {"x": "y"}) == 42
|
||||
assert _resolve_runtime_vars(None, {"x": "y"}) is None
|
||||
|
||||
|
||||
def test_resolve_handles_whitespace_in_braces():
|
||||
r = _resolve_runtime_vars("{{ ipp }}", {"ipp": "X"})
|
||||
assert r == "X"
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Action types & types serveur
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def test_extract_text_in_allowed():
|
||||
assert "extract_text" in _ALLOWED_ACTION_TYPES
|
||||
|
||||
|
||||
def test_t2a_decision_in_allowed():
|
||||
assert "t2a_decision" in _ALLOWED_ACTION_TYPES
|
||||
|
||||
|
||||
def test_server_side_types():
|
||||
assert _SERVER_SIDE_ACTION_TYPES == {"extract_text", "t2a_decision"}
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Handler extract_text
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def test_handle_extract_text_stores_variable():
|
||||
state = _create_replay_state("rep1", "wf", "sess", 3)
|
||||
last_hb = {"sess": {"path": "/fake/heartbeat.png", "timestamp": 0}}
|
||||
action = {
|
||||
"type": "extract_text",
|
||||
"parameters": {"output_var": "texte_motif", "paragraph": True},
|
||||
}
|
||||
with patch(
|
||||
"core.llm.extract_text_from_image",
|
||||
return_value="Patient asthme peakflow 260",
|
||||
):
|
||||
ok = _handle_extract_text_action(action, state, "sess", last_hb)
|
||||
assert ok is True
|
||||
assert state["variables"]["texte_motif"] == "Patient asthme peakflow 260"
|
||||
|
||||
|
||||
def test_handle_extract_text_no_heartbeat_stores_empty():
|
||||
state = _create_replay_state("rep1", "wf", "sess", 3)
|
||||
last_hb = {} # pas de heartbeat
|
||||
action = {"type": "extract_text", "parameters": {"output_var": "v"}}
|
||||
ok = _handle_extract_text_action(action, state, "sess", last_hb)
|
||||
assert ok is False
|
||||
assert state["variables"]["v"] == ""
|
||||
|
||||
|
||||
def test_handle_extract_text_default_var_name():
|
||||
state = _create_replay_state("rep1", "wf", "sess", 3)
|
||||
last_hb = {"sess": {"path": "/x.png", "timestamp": 0}}
|
||||
action = {"type": "extract_text", "parameters": {}}
|
||||
with patch("core.llm.extract_text_from_image", return_value="abc"):
|
||||
_handle_extract_text_action(action, state, "sess", last_hb)
|
||||
assert "extracted_text" in state["variables"]
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Handler t2a_decision
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def test_handle_t2a_decision_stores_json():
|
||||
state = _create_replay_state("rep1", "wf", "sess", 3)
|
||||
action = {
|
||||
"type": "t2a_decision",
|
||||
"parameters": {
|
||||
"input_template": "Patient 78 ans, asthme, peakflow 260",
|
||||
"output_var": "decision_t2a",
|
||||
"model": "qwen2.5:7b",
|
||||
},
|
||||
}
|
||||
fake_result = {
|
||||
"decision": "REQUALIFICATION_HOSPITALISATION",
|
||||
"justification": "Surveillance continue requise",
|
||||
"confiance": "elevee",
|
||||
"_elapsed_s": 4.2,
|
||||
}
|
||||
with patch("core.llm.analyze_dpi", return_value=fake_result):
|
||||
ok = _handle_t2a_decision_action(action, state)
|
||||
assert ok is True
|
||||
assert state["variables"]["decision_t2a"]["decision"] == "REQUALIFICATION_HOSPITALISATION"
|
||||
|
||||
|
||||
def test_handle_t2a_decision_empty_input_returns_indetermine():
|
||||
state = _create_replay_state("rep1", "wf", "sess", 3)
|
||||
action = {"type": "t2a_decision", "parameters": {"input_template": "", "output_var": "r"}}
|
||||
ok = _handle_t2a_decision_action(action, state)
|
||||
assert ok is False
|
||||
assert state["variables"]["r"]["decision"] == "INDETERMINE"
|
||||
|
||||
|
||||
def test_handle_t2a_decision_analyze_exception():
|
||||
state = _create_replay_state("rep1", "wf", "sess", 3)
|
||||
action = {"type": "t2a_decision", "parameters": {"input_template": "x", "output_var": "r"}}
|
||||
with patch("core.llm.analyze_dpi", side_effect=RuntimeError("ollama down")):
|
||||
ok = _handle_t2a_decision_action(action, state)
|
||||
assert ok is False
|
||||
assert state["variables"]["r"]["decision"] == "INDETERMINE"
|
||||
assert "ollama down" in state["variables"]["r"]["_error"]
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Edge → action normalisée
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
class _FakeAction:
|
||||
def __init__(self, type_, parameters=None):
|
||||
self.type = type_
|
||||
self.target = None
|
||||
self.parameters = parameters or {}
|
||||
|
||||
|
||||
class _FakeEdge:
|
||||
def __init__(self, action, edge_id="e1", from_node="n1", to_node="n2"):
|
||||
self.edge_id = edge_id
|
||||
self.from_node = from_node
|
||||
self.to_node = to_node
|
||||
self.action = action
|
||||
|
||||
|
||||
def test_edge_to_action_extract_text():
|
||||
edge = _FakeEdge(_FakeAction(
|
||||
"extract_text",
|
||||
parameters={"output_var": "texte_examens", "paragraph": True},
|
||||
))
|
||||
actions = _edge_to_normalized_actions(edge, params={})
|
||||
assert len(actions) == 1
|
||||
a = actions[0]
|
||||
assert a["type"] == "extract_text"
|
||||
assert a["parameters"]["output_var"] == "texte_examens"
|
||||
assert a["parameters"]["paragraph"] is True
|
||||
|
||||
|
||||
def test_edge_to_action_t2a_decision():
|
||||
edge = _FakeEdge(_FakeAction(
|
||||
"t2a_decision",
|
||||
parameters={
|
||||
"input_template": "{{texte_motif}}",
|
||||
"output_var": "result",
|
||||
"model": "qwen2.5:7b",
|
||||
},
|
||||
))
|
||||
actions = _edge_to_normalized_actions(edge, params={})
|
||||
a = actions[0]
|
||||
assert a["type"] == "t2a_decision"
|
||||
assert a["parameters"]["input_template"] == "{{texte_motif}}"
|
||||
assert a["parameters"]["output_var"] == "result"
|
||||
assert a["parameters"]["model"] == "qwen2.5:7b"
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Bridge VWB → core
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def test_vwb_extract_text_passthrough():
|
||||
assert VWB_ACTION_TO_CORE["extract_text"] == "extract_text"
|
||||
|
||||
|
||||
def test_vwb_t2a_decision_passthrough():
|
||||
assert VWB_ACTION_TO_CORE["t2a_decision"] == "t2a_decision"
|
||||
|
||||
|
||||
def test_vwb_params_extract_text_preserves_output_var():
|
||||
p = _vwb_params_to_core("extract_text", {"output_var": "v", "paragraph": False})
|
||||
assert p == {"output_var": "v", "paragraph": False}
|
||||
|
||||
|
||||
def test_vwb_params_extract_text_legacy_variable_name():
|
||||
"""Compat avec l'ancien paramètre variable_name côté VWB."""
|
||||
p = _vwb_params_to_core("extract_text", {"variable_name": "v_legacy"})
|
||||
assert p["output_var"] == "v_legacy"
|
||||
|
||||
|
||||
def test_vwb_params_t2a_decision_preserves_all():
|
||||
p = _vwb_params_to_core("t2a_decision", {
|
||||
"input_template": "DPI {{ipp}}",
|
||||
"output_var": "dec",
|
||||
"model": "qwen2.5:7b",
|
||||
})
|
||||
assert p == {"input_template": "DPI {{ipp}}", "output_var": "dec", "model": "qwen2.5:7b"}
|
||||
|
||||
|
||||
def test_export_workflow_with_t2a_chain():
|
||||
"""Workflow VWB extract_text → t2a_decision → pause_for_human export propre."""
|
||||
workflow_data = {"id": "wf_t2a", "name": "Demo T2A"}
|
||||
steps_data = [
|
||||
{"id": "s1", "action_type": "click_anchor", "parameters": {"target_text": "25003284"}, "label": "Clic IPP"},
|
||||
{"id": "s2", "action_type": "extract_text", "parameters": {"output_var": "dpi"}, "label": "OCR"},
|
||||
{"id": "s3", "action_type": "t2a_decision", "parameters": {
|
||||
"input_template": "{{dpi}}", "output_var": "dec", "model": "qwen2.5:7b",
|
||||
}, "label": "Analyse"},
|
||||
{"id": "s4", "action_type": "pause_for_human", "parameters": {
|
||||
"message": "Décision : {{dec.decision}} — {{dec.justification}}",
|
||||
}, "label": "Validation"},
|
||||
{"id": "s5", "action_type": "click_anchor", "parameters": {"target_text": "Enregistrer"}, "label": "Clic Enregistrer"},
|
||||
]
|
||||
core = convert_vwb_to_core_workflow(workflow_data, steps_data)
|
||||
edge_types = [e["action"]["type"] for e in core["edges"]]
|
||||
assert "extract_text" in edge_types
|
||||
assert "t2a_decision" in edge_types
|
||||
assert "pause_for_human" in edge_types
|
||||
# Vérifier que le templating est bien transporté
|
||||
t2a_edge = next(e for e in core["edges"] if e["action"]["type"] == "t2a_decision")
|
||||
assert t2a_edge["action"]["parameters"]["input_template"] == "{{dpi}}"
|
||||
@@ -868,6 +868,60 @@ def _load_anchor_metadata(anchor_id: str) -> Optional[Dict]:
|
||||
return None
|
||||
|
||||
|
||||
def _inject_anchor_targeting(action: Dict, anchor_id: str) -> None:
|
||||
"""Enrichit une action avec la cible visuelle (x_pct/y_pct + visual_mode/target_spec).
|
||||
|
||||
Mutation in-place de `action`. Utilisé pour click_anchor*, type_text et
|
||||
type_secret — toute action qui doit cibler une zone visuelle précise avant
|
||||
d'agir (clic ou frappe avec focus).
|
||||
|
||||
Sans cette injection, l'agent côté Windows ne peut pas faire le pre-click
|
||||
de focus avant `_type_text`, et le texte tape dans le vide.
|
||||
"""
|
||||
if not anchor_id:
|
||||
return
|
||||
|
||||
anchor_meta = _load_anchor_metadata(anchor_id)
|
||||
|
||||
# Coordonnées du centre du bbox (fallback si template matching échoue)
|
||||
if anchor_meta:
|
||||
bbox = anchor_meta.get('bounding_box', {})
|
||||
orig = anchor_meta.get('original_size', {})
|
||||
orig_w = orig.get('width', 1920)
|
||||
orig_h = orig.get('height', 1080)
|
||||
if bbox.get('x') is not None and orig_w > 0 and orig_h > 0:
|
||||
cx = (bbox['x'] + bbox.get('width', 0) / 2) / orig_w
|
||||
cy = (bbox['y'] + bbox.get('height', 0) / 2) / orig_h
|
||||
action['x_pct'] = round(cx, 4)
|
||||
action['y_pct'] = round(cy, 4)
|
||||
|
||||
# Image de l'ancre pour template matching côté agent
|
||||
anchor_b64 = _load_anchor_image_b64(anchor_id)
|
||||
if anchor_b64:
|
||||
target_spec = {
|
||||
'anchor_image_base64': anchor_b64,
|
||||
'anchor_id': anchor_id,
|
||||
}
|
||||
if anchor_meta:
|
||||
target_spec['anchor_bbox'] = anchor_meta.get('bounding_box', {})
|
||||
target_spec['original_size'] = anchor_meta.get('original_size', {})
|
||||
|
||||
action['visual_mode'] = True
|
||||
action['target_spec'] = target_spec
|
||||
logger.info(
|
||||
"Action %s : ancre '%s' chargée (%d Ko), visual_mode activé",
|
||||
action.get('action_id', '?'),
|
||||
anchor_id,
|
||||
len(anchor_b64) // 1024,
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"Action %s : ancre '%s' introuvable, fallback blind mode",
|
||||
action.get('action_id', '?'),
|
||||
anchor_id,
|
||||
)
|
||||
|
||||
|
||||
@api_v3_bp.route('/execute-windows', methods=['POST'])
|
||||
def execute_windows():
|
||||
"""Proxy les actions du workflow vers le streaming server pour exécution sur Windows.
|
||||
@@ -932,45 +986,14 @@ def execute_windows():
|
||||
if vwb_type in _ANCHOR_CLICK_TYPES:
|
||||
anchor_id = action.get('anchor_id')
|
||||
if anchor_id:
|
||||
anchor_meta = _load_anchor_metadata(anchor_id)
|
||||
_inject_anchor_targeting(action, anchor_id)
|
||||
|
||||
# Calculer les coordonnées du centre du bbox (fallback si visual échoue)
|
||||
if anchor_meta:
|
||||
bbox = anchor_meta.get('bounding_box', {})
|
||||
orig = anchor_meta.get('original_size', {})
|
||||
orig_w = orig.get('width', 1920)
|
||||
orig_h = orig.get('height', 1080)
|
||||
if bbox.get('x') is not None and orig_w > 0 and orig_h > 0:
|
||||
cx = (bbox['x'] + bbox.get('width', 0) / 2) / orig_w
|
||||
cy = (bbox['y'] + bbox.get('height', 0) / 2) / orig_h
|
||||
action['x_pct'] = round(cx, 4)
|
||||
action['y_pct'] = round(cy, 4)
|
||||
|
||||
# Tenter aussi le visual_mode (template matching)
|
||||
anchor_b64 = _load_anchor_image_b64(anchor_id)
|
||||
if anchor_b64:
|
||||
target_spec = {
|
||||
'anchor_image_base64': anchor_b64,
|
||||
'anchor_id': anchor_id,
|
||||
}
|
||||
if anchor_meta:
|
||||
target_spec['anchor_bbox'] = anchor_meta.get('bounding_box', {})
|
||||
target_spec['original_size'] = anchor_meta.get('original_size', {})
|
||||
|
||||
action['visual_mode'] = True
|
||||
action['target_spec'] = target_spec
|
||||
logger.info(
|
||||
"Action %s : ancre '%s' chargée (%d Ko), visual_mode activé",
|
||||
action.get('action_id', '?'),
|
||||
anchor_id,
|
||||
len(anchor_b64) // 1024,
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"Action %s : ancre '%s' introuvable, fallback blind mode",
|
||||
action.get('action_id', '?'),
|
||||
anchor_id,
|
||||
)
|
||||
# Propagation du by_text (ciblage textuel prioritaire sur template)
|
||||
_by_text = params.get('by_text', '')
|
||||
if _by_text:
|
||||
action['by_text'] = _by_text
|
||||
if 'target_spec' in action:
|
||||
action['target_spec']['by_text'] = _by_text
|
||||
|
||||
# Mapper le bouton selon le type de clic VWB
|
||||
if vwb_type == 'double_click_anchor':
|
||||
@@ -979,13 +1002,18 @@ def execute_windows():
|
||||
action['button'] = 'right'
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# type_text / type_secret → extraire le texte
|
||||
# type_text / type_secret → extraire le texte + cibler la zone
|
||||
# de saisie si une ancre visuelle est associée au step.
|
||||
# Sans ancre, l'agent tape là où le focus se trouve déjà
|
||||
# (compatibilité avec les workflows historiques sans anchor).
|
||||
# ---------------------------------------------------------------
|
||||
if vwb_type in ('type_text', 'type_secret') and 'text' in params:
|
||||
action['text'] = params['text']
|
||||
# Ne pas forcer un clic préalable à (0,0) si pas de coordonnées
|
||||
# L'exécuteur ne cliquera que si x_pct > 0 et y_pct > 0
|
||||
# (le clic de positionnement est fait par l'action click_anchor précédente)
|
||||
anchor_id = action.get('anchor_id') or (
|
||||
params.get('visual_anchor') or {}
|
||||
).get('anchor_id')
|
||||
if anchor_id:
|
||||
_inject_anchor_targeting(action, anchor_id)
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# keyboard_shortcut / hotkey → extraire les touches
|
||||
@@ -1043,11 +1071,26 @@ def execute_windows():
|
||||
# Sinon, retirer les actions fichiers du flux principal
|
||||
data['actions'] = non_file_actions
|
||||
|
||||
# Token Bearer pour le streaming server (auth obligatoire)
|
||||
_stream_token = os.environ.get('RPA_API_TOKEN', '')
|
||||
_stream_headers = {'Authorization': f'Bearer {_stream_token}'} if _stream_token else {}
|
||||
|
||||
# L'agent Windows poll sous session "agent_demo_user" (= agent_{user_id}, user_id="demo_user")
|
||||
# On injecte directement dans cette session pour éviter le transfer cross-session
|
||||
# et pour que /replay/raw ne tente pas l'auto-détection d'une session "sess_*"
|
||||
# (qui échoue avec "Aucune session Agent V1 active" si l'agent n'a pas créé de session V1).
|
||||
if not data.get('session_id'):
|
||||
data['session_id'] = 'agent_demo_user'
|
||||
|
||||
# Injecter le machine_id pour le ciblage multi-machine
|
||||
# Chercher la première machine Windows connectée si pas spécifié
|
||||
if 'machine_id' not in data or not data.get('machine_id'):
|
||||
try:
|
||||
machines_resp = req.get('http://localhost:5005/api/v1/traces/stream/machines', timeout=3)
|
||||
machines_resp = req.get(
|
||||
'http://localhost:5005/api/v1/traces/stream/machines',
|
||||
headers=_stream_headers,
|
||||
timeout=3,
|
||||
)
|
||||
if machines_resp.ok:
|
||||
machines = machines_resp.json().get('machines', [])
|
||||
for m in machines:
|
||||
@@ -1062,6 +1105,7 @@ def execute_windows():
|
||||
resp = req.post(
|
||||
'http://localhost:5005/api/v1/traces/stream/replay/raw',
|
||||
json=data,
|
||||
headers=_stream_headers,
|
||||
timeout=30,
|
||||
)
|
||||
return jsonify(resp.json()), resp.status_code
|
||||
|
||||
@@ -40,6 +40,17 @@ if _ROOT not in sys.path:
|
||||
STREAMING_SERVER_URL = "http://localhost:5005"
|
||||
|
||||
|
||||
def _stream_headers() -> Dict[str, str]:
|
||||
"""Bearer token pour les appels proxy VWB → streaming server.
|
||||
|
||||
Retourne un dict vide si RPA_API_TOKEN n'est pas défini ; dans ce cas
|
||||
les appels échoueront en 401 (auth obligatoire côté streaming).
|
||||
"""
|
||||
import os as _os
|
||||
token = _os.environ.get("RPA_API_TOKEN", "")
|
||||
return {"Authorization": f"Bearer {token}"} if token else {}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers — nom par défaut à l'import
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -162,6 +173,7 @@ def list_learned_workflows():
|
||||
resp = http_requests.get(
|
||||
f"{STREAMING_SERVER_URL}/api/v1/traces/stream/workflows",
|
||||
params=params,
|
||||
headers=_stream_headers(),
|
||||
timeout=3,
|
||||
)
|
||||
if resp.ok:
|
||||
@@ -526,6 +538,7 @@ def _load_core_workflow(
|
||||
resp = http_requests.get(
|
||||
f"{STREAMING_SERVER_URL}/api/v1/traces/stream/workflows",
|
||||
params=params,
|
||||
headers=_stream_headers(),
|
||||
timeout=3,
|
||||
)
|
||||
if resp.ok:
|
||||
@@ -538,6 +551,7 @@ def _load_core_workflow(
|
||||
try:
|
||||
detail_resp = http_requests.get(
|
||||
f"{STREAMING_SERVER_URL}/api/v1/traces/stream/workflow/{workflow_id}",
|
||||
headers=_stream_headers(),
|
||||
timeout=5,
|
||||
)
|
||||
if detail_resp.ok:
|
||||
@@ -573,6 +587,7 @@ def _notify_streaming_reload():
|
||||
try:
|
||||
http_requests.post(
|
||||
f"{STREAMING_SERVER_URL}/api/v1/traces/stream/reload-workflows",
|
||||
headers=_stream_headers(),
|
||||
timeout=2,
|
||||
)
|
||||
logger.debug("Streaming server notifié pour rechargement des workflows")
|
||||
|
||||
@@ -13,11 +13,17 @@ from flask_caching import Cache
|
||||
from flask_migrate import Migrate
|
||||
import os
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from logging.handlers import RotatingFileHandler
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
# Charger .env.local depuis la racine du projet AVANT tout : il contient
|
||||
# RPA_API_TOKEN utilisé pour le proxy VWB → streaming server. Sans cela,
|
||||
# le token est absent après chaque restart manuel du backend et tous les
|
||||
# appels proxy renvoient 401 « Token API invalide ».
|
||||
_PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
|
||||
load_dotenv(_PROJECT_ROOT / '.env.local')
|
||||
load_dotenv() # fallback .env dans cwd (n'écrase pas les vars déjà définies)
|
||||
|
||||
# Initialize Flask app
|
||||
app = Flask(__name__)
|
||||
|
||||
Binary file not shown.
@@ -57,7 +57,9 @@ VWB_ACTION_TO_CORE = {
|
||||
"scroll_to_anchor": "scroll",
|
||||
"visual_condition": "evaluate_condition",
|
||||
"screenshot_evidence": "screenshot",
|
||||
"extract_text": "extract_data",
|
||||
"extract_text": "extract_text", # passthrough — handler serveur OCR + variable
|
||||
"pause_for_human": "pause_for_human", # passthrough — intercepté par api_stream /replay/next
|
||||
"t2a_decision": "t2a_decision", # passthrough — handler serveur LLM T2A + variable
|
||||
}
|
||||
|
||||
|
||||
@@ -660,6 +662,23 @@ def _vwb_params_to_core(action_type: str, params: Dict[str, Any]) -> Dict[str, A
|
||||
elif action_type == "wait_for_anchor":
|
||||
core_params["duration_ms"] = params.get("duration_ms", 2000)
|
||||
|
||||
elif action_type == "pause_for_human":
|
||||
core_params["message"] = params.get("message", "Validation requise")
|
||||
|
||||
elif action_type == "extract_text":
|
||||
# variable_name côté VWB → output_var côté core (compat avec
|
||||
# le catalogue VWB existant qui utilise variable_name)
|
||||
var = params.get("output_var") or params.get("variable_name") or "extracted_text"
|
||||
core_params["output_var"] = var
|
||||
if "paragraph" in params:
|
||||
core_params["paragraph"] = bool(params["paragraph"])
|
||||
|
||||
elif action_type == "t2a_decision":
|
||||
core_params["input_template"] = params.get("input_template", "")
|
||||
core_params["output_var"] = params.get("output_var", "t2a_result")
|
||||
if params.get("model"):
|
||||
core_params["model"] = params["model"]
|
||||
|
||||
return core_params
|
||||
|
||||
|
||||
|
||||
@@ -4,7 +4,8 @@ import type { UIElement } from '../services/uiDetection';
|
||||
import {
|
||||
loadLibraryAsync,
|
||||
saveLibrary,
|
||||
compressThumbnail,
|
||||
addCaptureToLibrary,
|
||||
removeCaptureFromLibrary,
|
||||
} from '../services/captureLibraryStorage';
|
||||
|
||||
/**
|
||||
@@ -40,6 +41,8 @@ interface LibraryItem {
|
||||
timestamp: Date;
|
||||
sessionId?: string;
|
||||
favorite?: boolean;
|
||||
format?: 'v2';
|
||||
fullImageUrl?: string;
|
||||
}
|
||||
|
||||
export default function CapturePanel({
|
||||
@@ -55,7 +58,7 @@ export default function CapturePanel({
|
||||
const [showLibraryGallery, setShowLibraryGallery] = useState(false);
|
||||
const [library, setLibrary] = useState<LibraryItem[]>([]);
|
||||
const [currentCapture, setCurrentCapture] = useState<Capture | null>(null);
|
||||
const [timerSeconds, setTimerSeconds] = useState(0);
|
||||
const [timerSeconds, setTimerSeconds] = useState(5);
|
||||
const [countdown, setCountdown] = useState<number | null>(null);
|
||||
// Elements detectes sur l'apercu miniature
|
||||
const [previewElements, setPreviewElements] = useState<UIElement[]>([]);
|
||||
@@ -89,24 +92,35 @@ export default function CapturePanel({
|
||||
}
|
||||
}, [library, libraryLoaded]);
|
||||
|
||||
// Ajouter capture a la bibliotheque (thumbnail compresse JPEG 320x240)
|
||||
// Helper : ajoute une capture à la bibliothèque (PNG HD upload backend +
|
||||
// mise à jour de l'état local). Utilisé par le useEffect [capture] et par
|
||||
// doSmartCapture (capture locale Windows qui ne passe pas par la prop parente).
|
||||
const addToLibrary = useCallback(async (cap: Capture) => {
|
||||
try {
|
||||
const item = await addCaptureToLibrary(cap, { id: `cap_${Date.now()}` });
|
||||
setLibrary(prev => [
|
||||
{
|
||||
id: item.id,
|
||||
capture: item.capture,
|
||||
timestamp: typeof item.timestamp === 'string' ? new Date(item.timestamp) : item.timestamp,
|
||||
sessionId: item.sessionId,
|
||||
favorite: item.favorite ?? false,
|
||||
format: item.format,
|
||||
fullImageUrl: item.fullImageUrl,
|
||||
},
|
||||
...prev.slice(0, 19),
|
||||
]);
|
||||
} catch (e) {
|
||||
console.warn('[CapturePanel] Échec ajout bibliothèque', e);
|
||||
}
|
||||
}, []);
|
||||
|
||||
// Capture venant du parent (path "fallback local" via prop capture)
|
||||
useEffect(() => {
|
||||
if (!capture) return;
|
||||
setCurrentCapture(capture);
|
||||
let cancelled = false;
|
||||
(async () => {
|
||||
const compressed = await compressThumbnail(capture.screenshot_base64);
|
||||
if (cancelled) return;
|
||||
const newItem: LibraryItem = {
|
||||
id: `cap_${Date.now()}`,
|
||||
capture: { ...capture, screenshot_base64: compressed },
|
||||
timestamp: new Date(),
|
||||
favorite: false,
|
||||
};
|
||||
setLibrary(prev => [newItem, ...prev.slice(0, 19)]);
|
||||
})();
|
||||
return () => { cancelled = true; };
|
||||
}, [capture]);
|
||||
void addToLibrary(capture);
|
||||
}, [capture, addToLibrary]);
|
||||
|
||||
// Detecter les elements UI quand une capture arrive
|
||||
useEffect(() => {
|
||||
@@ -152,19 +166,24 @@ export default function CapturePanel({
|
||||
const resp = await fetch('/api/screen-capture/capture-windows', { method: 'POST' });
|
||||
const data = await resp.json();
|
||||
if (resp.ok && data.image) {
|
||||
setCurrentCapture({
|
||||
const cap: Capture = {
|
||||
screenshot_base64: data.image,
|
||||
width: data.width,
|
||||
height: data.height,
|
||||
source: data.source || 'windows',
|
||||
} as any);
|
||||
} as any;
|
||||
setCurrentCapture(cap);
|
||||
// Ajouter à la bibliothèque (le useEffect [capture] ne tire pas
|
||||
// ici car on ne passe pas par la prop parente)
|
||||
void addToLibrary(cap);
|
||||
return;
|
||||
}
|
||||
console.warn('Agent Windows indisponible, fallback local:', data.error);
|
||||
} catch (err) {
|
||||
console.warn('Erreur capture Windows, fallback local:', err);
|
||||
}
|
||||
// Fallback : capture locale (ecran du serveur Linux)
|
||||
// Fallback : capture locale (ecran du serveur Linux) — passe par la prop
|
||||
// parente, l'ajout se fera dans le useEffect [capture]
|
||||
onCapture();
|
||||
};
|
||||
|
||||
@@ -189,13 +208,44 @@ export default function CapturePanel({
|
||||
}, 1000);
|
||||
};
|
||||
|
||||
const handleLibrarySelect = (item: LibraryItem) => {
|
||||
setCurrentCapture(item.capture);
|
||||
const handleLibrarySelect = async (item: LibraryItem) => {
|
||||
// Format v2 : remplacer le thumbnail par le PNG HD téléchargé du backend
|
||||
// pour que la sélection d'ancre utilise une image non pixélisée.
|
||||
if (item.format === 'v2' && item.fullImageUrl) {
|
||||
try {
|
||||
const resp = await fetch(item.fullImageUrl);
|
||||
if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
|
||||
const blob = await resp.blob();
|
||||
const base64 = await new Promise<string>((resolve, reject) => {
|
||||
const reader = new FileReader();
|
||||
reader.onload = () => {
|
||||
const result = reader.result as string;
|
||||
// FileReader → "data:image/png;base64,..." → on retire le préfixe
|
||||
const idx = result.indexOf(',');
|
||||
resolve(idx >= 0 ? result.slice(idx + 1) : result);
|
||||
};
|
||||
reader.onerror = () => reject(reader.error);
|
||||
reader.readAsDataURL(blob);
|
||||
});
|
||||
setCurrentCapture({ ...item.capture, screenshot_base64: base64 });
|
||||
} catch (e) {
|
||||
console.warn('[CaptureLibrary] Échec chargement HD, fallback thumbnail', e);
|
||||
setCurrentCapture(item.capture);
|
||||
}
|
||||
} else {
|
||||
setCurrentCapture(item.capture);
|
||||
}
|
||||
setIsFullscreen(true);
|
||||
};
|
||||
|
||||
const handleDeleteLibraryItem = (id: string) => {
|
||||
const target = library.find(it => it.id === id);
|
||||
setLibrary(prev => prev.filter(item => item.id !== id));
|
||||
// v2 : supprimer aussi le PNG côté backend (le saveLibrary auto-déclenché
|
||||
// par le useEffect ne nettoie que le JSON, pas les fichiers PNG orphelins).
|
||||
if (target?.format === 'v2') {
|
||||
void removeCaptureFromLibrary(id, true);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
@@ -204,17 +254,35 @@ export default function CapturePanel({
|
||||
|
||||
{/* Capture — auto-detection OS navigateur */}
|
||||
<div className="capture-controls">
|
||||
<button disabled={countdown !== null} onClick={doSmartCapture}>
|
||||
<button disabled={countdown !== null} onClick={doSmartCapture} title="Capture immédiate (sans délai)">
|
||||
Capturer
|
||||
</button>
|
||||
<select value={timerSeconds} onChange={(e) => setTimerSeconds(Number(e.target.value))}>
|
||||
<option value="0">Immediat</option>
|
||||
<option value="3">3 sec</option>
|
||||
<option value="5">5 sec</option>
|
||||
<option value="10">10 sec</option>
|
||||
</select>
|
||||
<button onClick={handleTimerCapture} disabled={countdown !== null}>
|
||||
{countdown !== null ? countdown : 'Timer'}
|
||||
<label style={{ display: 'flex', alignItems: 'center', gap: 4, fontSize: 12 }}>
|
||||
Délai :
|
||||
<select
|
||||
value={String(timerSeconds)}
|
||||
onChange={(e) => {
|
||||
const v = Number(e.target.value);
|
||||
console.log('[CapturePanel] timerSeconds →', v);
|
||||
setTimerSeconds(v);
|
||||
}}
|
||||
>
|
||||
<option value="0">Immediat</option>
|
||||
<option value="3">3 sec</option>
|
||||
<option value="5">5 sec</option>
|
||||
<option value="10">10 sec</option>
|
||||
</select>
|
||||
</label>
|
||||
<button
|
||||
onClick={handleTimerCapture}
|
||||
disabled={countdown !== null}
|
||||
title={`Capture après ${timerSeconds}s — utile pour préparer l'écran avant la prise`}
|
||||
>
|
||||
{countdown !== null
|
||||
? `${countdown}…`
|
||||
: timerSeconds === 0
|
||||
? 'Timer'
|
||||
: `Capturer dans ${timerSeconds}s`}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -54,11 +54,11 @@ export default function ToolPalette() {
|
||||
className="tool-item"
|
||||
draggable
|
||||
onDragStart={(e) => onDragStart(e, action.type)}
|
||||
title={action.label}
|
||||
title={`${action.label}\n\n${action.description}${action.needsAnchor ? '\n\n🎯 Nécessite une ancre visuelle' : ''}${action.params.length > 0 ? '\n\nParamètres : ' + action.params.map(p => p.name).join(', ') : ''}`}
|
||||
>
|
||||
<span className="tool-icon">{action.icon}</span>
|
||||
<span className="tool-label">{action.label}</span>
|
||||
{action.needsAnchor && <span className="tool-anchor-badge">🎯</span>}
|
||||
{action.needsAnchor && <span className="tool-anchor-badge" title="Nécessite de viser un élément à l'écran">🎯</span>}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
@@ -43,6 +43,8 @@ export type ActionType =
|
||||
| 'screenshot_evidence'
|
||||
| 'visual_condition'
|
||||
| 'loop_visual'
|
||||
| 'pause_for_human'
|
||||
| 't2a_decision'
|
||||
| 'download_to_folder'
|
||||
| 'ai_analyze_text'
|
||||
| 'ai_ocr'
|
||||
@@ -108,8 +110,9 @@ export const ACTIONS: ActionDefinition[] = [
|
||||
] },
|
||||
|
||||
// === EXTRACTION DE DONNÉES ===
|
||||
{ type: 'extract_text', label: 'Extraire texte', icon: '📋', description: 'Extrait le texte visible dans la zone de l\'ancre via OCR.', category: 'data', needsAnchor: true, params: [
|
||||
{ name: 'variable_name', type: 'string', description: 'Nom de la variable pour stocker le résultat' }
|
||||
{ type: 'extract_text', label: 'Extraire texte (OCR écran)', icon: '📋', description: 'OCR EasyOCR fr+en sur le dernier screenshot. Stocke le texte dans une variable réutilisable plus loin via {{output_var}}. Pas d\'ancre nécessaire — extrait toute la page visible.', category: 'data', needsAnchor: false, params: [
|
||||
{ name: 'output_var', type: 'string', description: 'Nom de la variable de sortie (ex: texte_motif). Réutilisable via {{nom}}.' },
|
||||
{ name: 'paragraph', type: 'boolean', description: 'Regrouper en paragraphes (true) ou lignes brutes (false)' }
|
||||
] },
|
||||
{ type: 'extract_table', label: 'Extraire tableau', icon: '📊', description: 'Extrait un tableau structuré depuis la zone de l\'ancre.', category: 'data', needsAnchor: true, params: [
|
||||
{ name: 'variable_name', type: 'string', description: 'Nom de la variable pour stocker le tableau' }
|
||||
@@ -129,6 +132,14 @@ export const ACTIONS: ActionDefinition[] = [
|
||||
{ type: 'loop_visual', label: 'Boucle visuelle', icon: '🔁', description: 'Répète les étapes connectées tant que l\'ancre est visible.', category: 'logic', needsAnchor: true, hidden: true, params: [
|
||||
{ name: 'max_iterations', type: 'number', description: 'Nombre maximum d\'itérations' }
|
||||
] },
|
||||
{ type: 'pause_for_human', label: 'Pause supervisée', icon: '⏸', description: 'Léa s\'arrête et demande validation humaine via une bulle interactive (boutons Continuer / Annuler).', category: 'logic', needsAnchor: false, params: [
|
||||
{ name: 'message', type: 'string', description: 'Message affiché dans la bulle (ex: "Je ne suis pas sûre du critère 3, validez-vous UHCD ?")' }
|
||||
] },
|
||||
{ type: 't2a_decision', label: 'Décision T2A (LLM)', icon: '🧠', description: 'Analyse un DPI urgences via LLM local (qwen2.5:7b par défaut) et propose FORFAIT_URGENCE ou REQUALIFICATION_HOSPITALISATION. Retourne JSON {decision, justification, elements_pour/contre, confiance}. Bench validé 100% accuracy.', category: 'logic', needsAnchor: false, params: [
|
||||
{ name: 'input_template', type: 'string', description: 'DPI à analyser. Supporte le templating {{var}} pour concaténer plusieurs extractions (ex: "{{texte_motif}}\\n{{texte_examens}}\\n{{texte_notes}}")' },
|
||||
{ name: 'output_var', type: 'string', description: 'Variable de sortie (ex: decision_t2a). Accès aux champs : {{decision_t2a.decision}}, {{decision_t2a.justification}}' },
|
||||
{ name: 'model', type: 'string', description: 'Modèle Ollama (default qwen2.5:7b). Autres : t2a-gemma3-27b-q4, gpt-oss:120b-cloud...' }
|
||||
] },
|
||||
|
||||
// === INTELLIGENCE ARTIFICIELLE ===
|
||||
{ type: 'ai_ocr', label: 'OCR Intelligent', icon: '📝', description: 'Reconnaissance de texte par IA sur la zone de l\'ancre.', category: 'ai', needsAnchor: true, params: [
|
||||
|
||||
Reference in New Issue
Block a user