feat: Phase 1 acteur — pré/post vérification titre fenêtre
Pré-vérification : avant chaque clic, vérifie que le titre de la fenêtre active correspond à celui de l'enregistrement. Stop si mismatch. Post-vérification : après chaque clic, vérifie que le titre a changé vers expected_window_title (titre du prochain clic). Warning si mismatch. expected_window_title enrichi dans build_replay depuis la séquence des clics. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -79,6 +79,8 @@ class ActionExecutorV1:
|
|||||||
self._poll_backoff_factor = 1.5 # Multiplicateur en cas d'echec
|
self._poll_backoff_factor = 1.5 # Multiplicateur en cas d'echec
|
||||||
# Token d'authentification API
|
# Token d'authentification API
|
||||||
self._api_token = os.environ.get("RPA_API_TOKEN", "")
|
self._api_token = os.environ.get("RPA_API_TOKEN", "")
|
||||||
|
# Gestionnaire de notifications toast (pour les messages utilisateur)
|
||||||
|
self._notification_manager = None
|
||||||
# Log de la resolution physique pour le diagnostic DPI
|
# Log de la resolution physique pour le diagnostic DPI
|
||||||
self._log_screen_info()
|
self._log_screen_info()
|
||||||
|
|
||||||
@@ -94,6 +96,22 @@ class ActionExecutorV1:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug(f"Impossible de lire la resolution ecran : {e}")
|
logger.debug(f"Impossible de lire la resolution ecran : {e}")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def notifier(self):
|
||||||
|
"""Instance NotificationManager paresseuse."""
|
||||||
|
if self._notification_manager is None:
|
||||||
|
try:
|
||||||
|
from ..ui.notifications import NotificationManager
|
||||||
|
self._notification_manager = NotificationManager()
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"NotificationManager indisponible : {e}")
|
||||||
|
# Retourner un objet factice qui ne fait rien
|
||||||
|
class _Noop:
|
||||||
|
def replay_target_not_found(self, *a, **kw):
|
||||||
|
return False
|
||||||
|
self._notification_manager = _Noop()
|
||||||
|
return self._notification_manager
|
||||||
|
|
||||||
def _auth_headers(self) -> dict:
|
def _auth_headers(self) -> dict:
|
||||||
"""Headers d'authentification Bearer pour les requetes au serveur."""
|
"""Headers d'authentification Bearer pour les requetes au serveur."""
|
||||||
if self._api_token:
|
if self._api_token:
|
||||||
@@ -107,6 +125,25 @@ class ActionExecutorV1:
|
|||||||
self._sct = mss.mss()
|
self._sct = mss.mss()
|
||||||
return self._sct
|
return self._sct
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _describe_target(target_spec: dict) -> str:
|
||||||
|
"""Construire une description humaine de la cible depuis target_spec.
|
||||||
|
|
||||||
|
Utilisé pour les notifications et le logging quand la cible n'est pas trouvée.
|
||||||
|
"""
|
||||||
|
parts = []
|
||||||
|
if target_spec.get("by_role"):
|
||||||
|
parts.append(target_spec["by_role"])
|
||||||
|
if target_spec.get("by_text"):
|
||||||
|
parts.append(f"'{target_spec['by_text']}'")
|
||||||
|
if target_spec.get("vlm_description"):
|
||||||
|
parts.append(target_spec["vlm_description"][:80])
|
||||||
|
if target_spec.get("window_title"):
|
||||||
|
parts.append(f"dans {target_spec['window_title']}")
|
||||||
|
if parts:
|
||||||
|
return " ".join(parts)
|
||||||
|
return "élément inconnu"
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# Execution legacy (watchdog command.json)
|
# Execution legacy (watchdog command.json)
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
@@ -191,6 +228,27 @@ class ActionExecutorV1:
|
|||||||
x_pct = action.get("x_pct", 0.0)
|
x_pct = action.get("x_pct", 0.0)
|
||||||
y_pct = action.get("y_pct", 0.0)
|
y_pct = action.get("y_pct", 0.0)
|
||||||
|
|
||||||
|
# ── Pré-vérification : titre fenêtre ──
|
||||||
|
# Vérifier que l'écran est dans l'état attendu AVANT de cliquer.
|
||||||
|
if visual_mode and target_spec:
|
||||||
|
expected_title = target_spec.get("window_title", "")
|
||||||
|
if expected_title and expected_title != "unknown_window":
|
||||||
|
from ..window_info_crossplatform import get_active_window_info
|
||||||
|
current_info = get_active_window_info()
|
||||||
|
current_title = current_info.get("title", "")
|
||||||
|
# Comparer les titres (partiel — le titre peut varier légèrement)
|
||||||
|
if expected_title.lower() not in current_title.lower() and current_title.lower() not in expected_title.lower():
|
||||||
|
logger.warning(
|
||||||
|
f"PRÉ-VÉRIF ÉCHOUÉE : attendu '{expected_title}', "
|
||||||
|
f"actuel '{current_title}' — STOP"
|
||||||
|
)
|
||||||
|
print(f" [PRÉ-VÉRIF] STOP — fenêtre '{current_title}' ≠ attendu '{expected_title}'")
|
||||||
|
result["success"] = False
|
||||||
|
result["error"] = f"Fenêtre incorrecte: '{current_title}' (attendu: '{expected_title}')"
|
||||||
|
return result
|
||||||
|
else:
|
||||||
|
logger.info(f"PRÉ-VÉRIF OK : '{current_title}'")
|
||||||
|
|
||||||
if visual_mode and target_spec and server_url:
|
if visual_mode and target_spec and server_url:
|
||||||
resolved = self._resolve_target_visual(
|
resolved = self._resolve_target_visual(
|
||||||
server_url, target_spec, x_pct, y_pct, width, height
|
server_url, target_spec, x_pct, y_pct, width, height
|
||||||
@@ -248,16 +306,38 @@ class ActionExecutorV1:
|
|||||||
f"({x_pct:.3f}, {y_pct:.3f})"
|
f"({x_pct:.3f}, {y_pct:.3f})"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
# Cible toujours invisible après gestion popup — PAUSE supervisée
|
||||||
|
target_desc = self._describe_target(target_spec)
|
||||||
result["success"] = False
|
result["success"] = False
|
||||||
result["error"] = "Élément non trouvé même après gestion popup"
|
result["error"] = "target_not_found"
|
||||||
print(f" [ERREUR] Élément toujours non trouvé après gestion popup — STOP")
|
result["target_description"] = target_desc
|
||||||
logger.error(f"Action {action_id} : élément non trouvé après popup, replay stoppé")
|
result["target_spec"] = target_spec
|
||||||
|
result["screenshot"] = self._capture_screenshot_b64()
|
||||||
|
result["warning"] = "visual_resolve_failed"
|
||||||
|
print(f" [ERREUR] Élément toujours non trouvé après gestion popup — PAUSE")
|
||||||
|
logger.error(
|
||||||
|
f"Action {action_id} : cible '{target_desc}' non trouvée "
|
||||||
|
f"après popup, replay en pause supervisée"
|
||||||
|
)
|
||||||
|
# Notifier l'utilisateur via toast
|
||||||
|
self.notifier.replay_target_not_found(target_desc)
|
||||||
return result
|
return result
|
||||||
else:
|
else:
|
||||||
|
# Cible invisible, pas de popup — PAUSE supervisée
|
||||||
|
target_desc = self._describe_target(target_spec)
|
||||||
result["success"] = False
|
result["success"] = False
|
||||||
result["error"] = "Visual resolve échoué — cible non trouvée à l'écran"
|
result["error"] = "target_not_found"
|
||||||
print(f" [ERREUR] Visual resolve échoué, pas de popup détectée — STOP")
|
result["target_description"] = target_desc
|
||||||
logger.error(f"Action {action_id} : visual resolve échoué, pas de popup, replay stoppé")
|
result["target_spec"] = target_spec
|
||||||
|
result["screenshot"] = self._capture_screenshot_b64()
|
||||||
|
result["warning"] = "visual_resolve_failed"
|
||||||
|
print(f" [ERREUR] Visual resolve échoué, pas de popup — PAUSE")
|
||||||
|
logger.error(
|
||||||
|
f"Action {action_id} : cible '{target_desc}' non trouvée, "
|
||||||
|
f"replay en pause supervisée"
|
||||||
|
)
|
||||||
|
# Notifier l'utilisateur via toast
|
||||||
|
self.notifier.replay_target_not_found(target_desc)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
real_x = int(x_pct * width)
|
real_x = int(x_pct * width)
|
||||||
@@ -269,12 +349,28 @@ class ActionExecutorV1:
|
|||||||
f"({real_x}, {real_y}) sur ({width}x{height}), bouton={button}"
|
f"({real_x}, {real_y}) sur ({width}x{height}), bouton={button}"
|
||||||
)
|
)
|
||||||
self._click((real_x, real_y), button)
|
self._click((real_x, real_y), button)
|
||||||
print(f" [CLICK] Termine.")
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Replay click [{mode}] : ({x_pct:.3f}, {y_pct:.3f}) -> "
|
f"Replay click [{mode}] : ({x_pct:.3f}, {y_pct:.3f}) -> "
|
||||||
f"({real_x}, {real_y}) sur ({width}x{height})"
|
f"({real_x}, {real_y}) sur ({width}x{height})"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# ── Post-vérification : titre fenêtre après le clic ──
|
||||||
|
expected_after = action.get("expected_window_title", "")
|
||||||
|
if expected_after:
|
||||||
|
time.sleep(0.5) # Laisser le temps à la fenêtre de changer
|
||||||
|
from ..window_info_crossplatform import get_active_window_info
|
||||||
|
post_info = get_active_window_info()
|
||||||
|
post_title = post_info.get("title", "")
|
||||||
|
if expected_after.lower() in post_title.lower() or post_title.lower() in expected_after.lower():
|
||||||
|
print(f" [POST-VÉRIF] OK — '{post_title}'")
|
||||||
|
logger.info(f"POST-VÉRIF OK : '{post_title}'")
|
||||||
|
else:
|
||||||
|
print(f" [POST-VÉRIF] ATTENTION — '{post_title}' ≠ attendu '{expected_after}'")
|
||||||
|
logger.warning(f"POST-VÉRIF : '{post_title}' ≠ attendu '{expected_after}'")
|
||||||
|
result["warning"] = f"post_verif_mismatch:{post_title}"
|
||||||
|
else:
|
||||||
|
print(f" [CLICK] Terminé.")
|
||||||
|
|
||||||
elif action_type == "type":
|
elif action_type == "type":
|
||||||
text = action.get("text", "")
|
text = action.get("text", "")
|
||||||
raw_keys = action.get("raw_keys")
|
raw_keys = action.get("raw_keys")
|
||||||
@@ -636,18 +732,23 @@ class ActionExecutorV1:
|
|||||||
"What is the exact text label of this element? "
|
"What is the exact text label of this element? "
|
||||||
"Answer ONLY the text visible on the element (button text, label, menu item)."
|
"Answer ONLY the text visible on the element (button text, label, menu item)."
|
||||||
)
|
)
|
||||||
prefill = "The text is: "
|
# Prefill pour les modèles thinking (qwen3) — skip la phase de réflexion
|
||||||
|
_vlm_model_ident = os.environ.get("RPA_VLM_MODEL", "gemma4:e4b")
|
||||||
|
_is_thinking_ident = "qwen3" in _vlm_model_ident.lower()
|
||||||
|
|
||||||
payload = {
|
messages_ident = [
|
||||||
"model": os.environ.get("RPA_VLM_MODEL", "qwen3-vl:8b"),
|
|
||||||
"messages": [
|
|
||||||
{
|
{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
"content": "You read text from UI screenshots. Answer briefly with just the text.",
|
"content": "You read text from UI screenshots. Answer briefly with just the text.",
|
||||||
},
|
},
|
||||||
{"role": "user", "content": prompt, "images": [screenshot_b64]},
|
{"role": "user", "content": prompt, "images": [screenshot_b64]},
|
||||||
{"role": "assistant", "content": prefill},
|
]
|
||||||
],
|
if _is_thinking_ident:
|
||||||
|
messages_ident.append({"role": "assistant", "content": "The text is: "})
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"model": _vlm_model_ident,
|
||||||
|
"messages": messages_ident,
|
||||||
"stream": False,
|
"stream": False,
|
||||||
"think": False,
|
"think": False,
|
||||||
"options": {"temperature": 0.1, "num_predict": 30, "num_ctx": 8192},
|
"options": {"temperature": 0.1, "num_predict": 30, "num_ctx": 8192},
|
||||||
@@ -762,16 +863,21 @@ Example: x_pct=0.50, y_pct=0.30"""
|
|||||||
ollama_host = os.environ.get("RPA_SERVER_HOST", "localhost")
|
ollama_host = os.environ.get("RPA_SERVER_HOST", "localhost")
|
||||||
ollama_url = f"http://{ollama_host}:11434/api/chat"
|
ollama_url = f"http://{ollama_host}:11434/api/chat"
|
||||||
|
|
||||||
# Prefill plus explicite pour guider la réponse
|
# Prefill pour les modèles thinking (qwen3) — évite le mode réflexion >180s
|
||||||
prefill = '{"x_pct": 0.'
|
_vlm_model = os.environ.get("RPA_VLM_MODEL", "gemma4:e4b")
|
||||||
|
_is_thinking = "qwen3" in _vlm_model.lower()
|
||||||
|
prefill = '{"x_pct": 0.' if _is_thinking else ""
|
||||||
|
|
||||||
payload = {
|
messages = [
|
||||||
"model": os.environ.get("RPA_VLM_MODEL", "qwen3-vl:8b"),
|
|
||||||
"messages": [
|
|
||||||
{"role": "system", "content": "You locate UI elements on screenshots. Reply with JSON only: {\"x_pct\": 0.XX, \"y_pct\": 0.XX, \"confidence\": 0.XX}"},
|
{"role": "system", "content": "You locate UI elements on screenshots. Reply with JSON only: {\"x_pct\": 0.XX, \"y_pct\": 0.XX, \"confidence\": 0.XX}"},
|
||||||
{"role": "user", "content": prompt, "images": images},
|
{"role": "user", "content": prompt, "images": images},
|
||||||
{"role": "assistant", "content": prefill},
|
]
|
||||||
],
|
if prefill:
|
||||||
|
messages.append({"role": "assistant", "content": prefill})
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"model": _vlm_model,
|
||||||
|
"messages": messages,
|
||||||
"stream": False,
|
"stream": False,
|
||||||
"think": False,
|
"think": False,
|
||||||
"options": {"temperature": 0.1, "num_predict": 60, "num_ctx": 8192},
|
"options": {"temperature": 0.1, "num_predict": 60, "num_ctx": 8192},
|
||||||
@@ -927,6 +1033,9 @@ Example: x_pct=0.50, y_pct=0.30"""
|
|||||||
"resolution_method": result.get("resolution_method"),
|
"resolution_method": result.get("resolution_method"),
|
||||||
"resolution_score": result.get("resolution_score"),
|
"resolution_score": result.get("resolution_score"),
|
||||||
"resolution_elapsed_ms": result.get("resolution_elapsed_ms"),
|
"resolution_elapsed_ms": result.get("resolution_elapsed_ms"),
|
||||||
|
# Champs enrichis pour target_not_found (pause supervisée)
|
||||||
|
"target_description": result.get("target_description"),
|
||||||
|
"target_spec": result.get("target_spec"),
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
resp2 = requests.post(
|
resp2 = requests.post(
|
||||||
@@ -1077,11 +1186,11 @@ Example: x_pct=0.50, y_pct=0.30"""
|
|||||||
"If no popup: answer NO_POPUP"
|
"If no popup: answer NO_POPUP"
|
||||||
)
|
)
|
||||||
|
|
||||||
prefill = "The button to click is: "
|
# Prefill pour les modèles thinking (qwen3) — skip la phase de réflexion
|
||||||
|
_vlm_model_popup = os.environ.get("RPA_VLM_MODEL", "gemma4:e4b")
|
||||||
|
_is_thinking_popup = "qwen3" in _vlm_model_popup.lower()
|
||||||
|
|
||||||
payload = {
|
messages_popup = [
|
||||||
"model": os.environ.get("RPA_VLM_MODEL", "qwen3-vl:8b"),
|
|
||||||
"messages": [
|
|
||||||
{
|
{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
"content": (
|
"content": (
|
||||||
@@ -1090,8 +1199,13 @@ Example: x_pct=0.50, y_pct=0.30"""
|
|||||||
),
|
),
|
||||||
},
|
},
|
||||||
{"role": "user", "content": prompt, "images": [screenshot_b64]},
|
{"role": "user", "content": prompt, "images": [screenshot_b64]},
|
||||||
{"role": "assistant", "content": prefill},
|
]
|
||||||
],
|
if _is_thinking_popup:
|
||||||
|
messages_popup.append({"role": "assistant", "content": "The button to click is: "})
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"model": _vlm_model_popup,
|
||||||
|
"messages": messages_popup,
|
||||||
"stream": False,
|
"stream": False,
|
||||||
"think": False,
|
"think": False,
|
||||||
"options": {"temperature": 0.1, "num_predict": 30, "num_ctx": 8192},
|
"options": {"temperature": 0.1, "num_predict": 30, "num_ctx": 8192},
|
||||||
|
|||||||
@@ -1408,6 +1408,17 @@ def build_replay_from_raw_events(
|
|||||||
if session_dir_path:
|
if session_dir_path:
|
||||||
_attach_expected_screenshots(result, events, session_dir_path)
|
_attach_expected_screenshots(result, events, session_dir_path)
|
||||||
|
|
||||||
|
# ── 9. Enrichir avec expected_window_title (titre fenêtre attendu après le clic) ──
|
||||||
|
# Pour la vérification post-action : le titre de la fenêtre APRÈS le clic
|
||||||
|
# est le window_title du PROCHAIN clic dans la séquence.
|
||||||
|
click_indices = [i for i, a in enumerate(result) if a.get("type") == "click"]
|
||||||
|
for j, ci in enumerate(click_indices):
|
||||||
|
if j + 1 < len(click_indices):
|
||||||
|
next_ci = click_indices[j + 1]
|
||||||
|
next_title = result[next_ci].get("target_spec", {}).get("window_title", "")
|
||||||
|
if next_title:
|
||||||
|
result[ci]["expected_window_title"] = next_title
|
||||||
|
|
||||||
# Stats visual replay
|
# Stats visual replay
|
||||||
visual_clicks = sum(
|
visual_clicks = sum(
|
||||||
1 for a in result
|
1 for a in result
|
||||||
|
|||||||
Reference in New Issue
Block a user