feat: premier replay E2E + mode apprentissage supervisé
Premier replay fonctionnel de bout en bout (Bloc-notes, Chrome). Corrections critiques : - Fix double-lancement agent (Lea.bat start /b + verrou PID) - Sérialisation replay (threading.Lock dans poll_and_execute) - Garde UIA bbox >50% écran (rejet conteneurs "Bureau") - Filtre fenêtres bruit système (systray overflow) - Auto-nettoyage replays bloqués (paused_need_help) Cascade visuelle complète dans session_cleaner : - UIA local (10ms) → template matching (100ms) → serveur docTR/VLM - Nettoyage bureau pré-replay (clic "Afficher le bureau") - Crops 80x80 + vlm_description pour chaque clic Grounding contraint à la fenêtre active : - Capture croppée à la fenêtre au lieu de l'écran entier - Conversion coordonnées fenêtre → écran - Élimine les faux positifs taskbar/systray Mode apprentissage supervisé (SUPERVISE → capture humaine) : - Léa passe en mode capture quand elle est perdue - Capture mini-workflow humain (clics + frappes + combos) - Fin par Ctrl+Shift+L ou timeout inactivité 10s - Correction stockée dans target_memory.db via serveur Deploy Windows complet (grounding.py, policy.py, uia_helper.py). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -17,6 +17,7 @@ import base64
|
||||
import hashlib
|
||||
import io
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
import logging
|
||||
|
||||
@@ -72,6 +73,12 @@ class ActionExecutorV1:
|
||||
# different de celui qui utilise l'instance).
|
||||
self._sct = None
|
||||
self.running = True
|
||||
# ── Verrou de sérialisation replay ──
|
||||
# Garantit qu'UNE SEULE action de replay s'exécute à la fois.
|
||||
# Sans ce lock, deux threads (polling main.py + lea_ui) peuvent
|
||||
# consommer deux actions simultanément → race condition + mss
|
||||
# thread-unsafe retourne des résolutions fantômes (1024x768).
|
||||
self._replay_lock = threading.Lock()
|
||||
# Backoff exponentiel pour le polling replay (evite de marteler le serveur)
|
||||
self._poll_backoff = 1.0 # Delai actuel (secondes)
|
||||
self._poll_backoff_min = 1.0 # Delai minimal (reset apres succes)
|
||||
@@ -340,6 +347,25 @@ class ActionExecutorV1:
|
||||
)
|
||||
return None
|
||||
|
||||
# ── GARDE : rejeter les éléments géants (conteneurs) ──
|
||||
# Un élément qui couvre >50% de l'écran est un conteneur
|
||||
# (Bureau, Rechercher, liste), pas un bouton cliquable.
|
||||
# Cliquer au centre d'un conteneur = clic aveugle.
|
||||
elem_w = element.width()
|
||||
elem_h = element.height()
|
||||
screen_area = screen_width * screen_height
|
||||
elem_area = elem_w * elem_h
|
||||
if screen_area > 0 and elem_area / screen_area > 0.5:
|
||||
logger.warning(
|
||||
f"UIA REJET : '{name}' couvre {elem_area / screen_area * 100:.0f}% "
|
||||
f"de l'écran ({elem_w}x{elem_h}) — conteneur, pas un élément cliquable"
|
||||
)
|
||||
print(
|
||||
f" [UIA] REJET — '{name}' trop grand "
|
||||
f"({elem_w}x{elem_h}, {elem_area / screen_area * 100:.0f}% écran)"
|
||||
)
|
||||
return None
|
||||
|
||||
cx, cy = element.center()
|
||||
if screen_width <= 0 or screen_height <= 0:
|
||||
return None
|
||||
@@ -499,10 +525,25 @@ class ActionExecutorV1:
|
||||
"visual_resolved": False,
|
||||
}
|
||||
|
||||
# ── Délai inter-actions (anti race condition mss) ──
|
||||
wait_before = action.get("wait_before", 0.5)
|
||||
if wait_before > 0:
|
||||
time.sleep(wait_before)
|
||||
|
||||
try:
|
||||
monitor = self.sct.monitors[1]
|
||||
width, height = monitor["width"], monitor["height"]
|
||||
|
||||
# ── Diagnostic résolution ──
|
||||
logger.info(
|
||||
f"[REPLAY] Action {action_id} ({action_type}) — "
|
||||
f"écran replay: {width}x{height}, "
|
||||
f"x_pct={action.get('x_pct', 0):.4f}, "
|
||||
f"y_pct={action.get('y_pct', 0):.4f} "
|
||||
f"→ pixel ({int(action.get('x_pct', 0) * width)}, "
|
||||
f"{int(action.get('y_pct', 0) * height)})"
|
||||
)
|
||||
|
||||
# Resolution visuelle des coordonnees si demande
|
||||
x_pct = action.get("x_pct", 0.0)
|
||||
y_pct = action.get("y_pct", 0.0)
|
||||
@@ -526,7 +567,7 @@ class ActionExecutorV1:
|
||||
)
|
||||
if expected_title and expected_title != "unknown_window":
|
||||
from ..window_info_crossplatform import get_active_window_info
|
||||
from ..ui.messages import est_fenetre_lea
|
||||
from ..ui.messages import est_fenetre_lea, est_fenetre_bruit
|
||||
|
||||
# Polling court pour laisser le temps à la fenêtre de
|
||||
# se stabiliser (évite les faux négatifs sur transitions
|
||||
@@ -544,8 +585,9 @@ class ActionExecutorV1:
|
||||
time.sleep(0.3)
|
||||
continue
|
||||
|
||||
# Si on tombe sur unknown_window → on attend aussi
|
||||
if not current_title or current_title == "unknown_window":
|
||||
# Bruit système (systray overflow, taskbar, etc.)
|
||||
# → on attend que la vraie fenêtre reprenne le focus
|
||||
if est_fenetre_bruit(current_title):
|
||||
time.sleep(0.3)
|
||||
continue
|
||||
|
||||
@@ -686,8 +728,8 @@ class ActionExecutorV1:
|
||||
|
||||
if action_type == "click":
|
||||
# Si visual_mode est activé, le resolve DOIT réussir.
|
||||
# Pas de fallback blind — on arrête le replay si la cible
|
||||
# n'est pas trouvée visuellement. C'est un RPA VISUEL.
|
||||
# Pas de fallback blind — Léa VOIT l'écran et CHERCHE
|
||||
# l'élément. Si toute la cascade échoue → pause supervisée.
|
||||
if visual_mode and not result.get("visual_resolved"):
|
||||
# ── Policy : décider quoi faire quand grounding échoue ──
|
||||
from .policy import PolicyEngine, Decision
|
||||
@@ -709,7 +751,6 @@ class ActionExecutorV1:
|
||||
)
|
||||
|
||||
if policy_decision.decision == Decision.RETRY:
|
||||
# Re-tenter le grounding après correction (popup fermée, etc.)
|
||||
resolved2 = self._resolve_target_visual(
|
||||
server_url, target_spec, x_pct, y_pct, width, height
|
||||
)
|
||||
@@ -719,7 +760,6 @@ class ActionExecutorV1:
|
||||
result["visual_resolved"] = True
|
||||
print(f" [POLICY] Re-resolve OK après {policy_decision.action_taken}")
|
||||
else:
|
||||
# Re-resolve échoué — SUPERVISE (rendre la main)
|
||||
result["success"] = False
|
||||
result["error"] = "target_not_found"
|
||||
result["target_description"] = target_desc
|
||||
@@ -746,18 +786,55 @@ class ActionExecutorV1:
|
||||
)
|
||||
return result
|
||||
|
||||
else: # SUPERVISE ou CONTINUE
|
||||
result["success"] = False
|
||||
result["error"] = "target_not_found"
|
||||
result["target_description"] = target_desc
|
||||
result["target_spec"] = target_spec
|
||||
result["screenshot"] = self._capture_screenshot_b64()
|
||||
result["warning"] = "visual_resolve_failed"
|
||||
self.notifier.replay_target_not_found(
|
||||
target_desc,
|
||||
target_spec.get("window_title", ""),
|
||||
else: # SUPERVISE → mode apprentissage
|
||||
# Léa est perdue. Au lieu de s'arrêter, elle
|
||||
# passe en mode capture et enregistre ce que
|
||||
# l'humain fait (mini-workflow de correction).
|
||||
try:
|
||||
self.notifier.replay_target_not_found(
|
||||
target_desc,
|
||||
target_spec.get("window_title", ""),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
human_actions = self._capture_human_correction(
|
||||
timeout_s=120,
|
||||
)
|
||||
return result
|
||||
if human_actions:
|
||||
# L'humain a montré un mini-workflow
|
||||
result["success"] = True
|
||||
result["resolution_method"] = "human_supervised"
|
||||
result["warning"] = "human_supervised"
|
||||
# Stocker le dernier clic comme position résolue
|
||||
last_click = None
|
||||
for ha in reversed(human_actions):
|
||||
if ha.get("type") == "click":
|
||||
last_click = ha
|
||||
break
|
||||
if last_click:
|
||||
result["actual_position"] = {
|
||||
"x_pct": last_click["x_pct"],
|
||||
"y_pct": last_click["y_pct"],
|
||||
}
|
||||
# Envoyer toute la correction au serveur
|
||||
result["correction"] = {
|
||||
"actions": human_actions,
|
||||
"action_count": len(human_actions),
|
||||
"last_click": last_click,
|
||||
}
|
||||
logger.info(
|
||||
f"[APPRENTISSAGE] Correction reçue : "
|
||||
f"{len(human_actions)} actions — je m'en souviendrai."
|
||||
)
|
||||
else:
|
||||
# Timeout — l'humain n'a pas répondu
|
||||
result["success"] = False
|
||||
result["error"] = "target_not_found"
|
||||
result["target_description"] = target_desc
|
||||
result["target_spec"] = target_spec
|
||||
result["screenshot"] = self._capture_screenshot_b64()
|
||||
result["warning"] = "visual_resolve_failed"
|
||||
|
||||
real_x = int(x_pct * width)
|
||||
real_y = int(y_pct * height)
|
||||
@@ -1417,15 +1494,24 @@ Example: x_pct=0.50, y_pct=0.30"""
|
||||
2. Execute l'action (clic, texte, etc.)
|
||||
3. POST /replay/result avec le resultat + screenshot
|
||||
|
||||
Args:
|
||||
session_id: Identifiant de la session courante
|
||||
server_url: URL de base du serveur streaming
|
||||
machine_id: Identifiant de la machine (pour le replay multi-machine)
|
||||
Sérialisé par _replay_lock — une seule action à la fois.
|
||||
Sans ce lock, deux threads concurrents consomment deux actions
|
||||
et mss retourne des résolutions fantômes (thread-unsafe).
|
||||
|
||||
Retourne True si une action a ete executee, False sinon.
|
||||
IMPORTANT: Si une action est recue, le resultat est TOUJOURS rapporte
|
||||
au serveur (meme en cas d'erreur d'execution).
|
||||
"""
|
||||
# Sérialisation stricte : si un autre thread exécute déjà une
|
||||
# action, on abandonne ce poll immédiatement (pas de file d'attente).
|
||||
if not self._replay_lock.acquire(blocking=False):
|
||||
return False
|
||||
|
||||
try:
|
||||
return self._poll_and_execute_inner(session_id, server_url, machine_id)
|
||||
finally:
|
||||
self._replay_lock.release()
|
||||
|
||||
def _poll_and_execute_inner(self, session_id: str, server_url: str, machine_id: str) -> bool:
|
||||
"""Implémentation interne de poll_and_execute (protégée par _replay_lock)."""
|
||||
import requests
|
||||
|
||||
replay_next_url = f"{server_url}/traces/stream/replay/next"
|
||||
@@ -1499,11 +1585,14 @@ Example: x_pct=0.50, y_pct=0.30"""
|
||||
print(f">>> ERREUR EXECUTION : {e}")
|
||||
logger.error(f"Erreur execute_replay_action: {e}")
|
||||
import traceback
|
||||
tb_str = traceback.format_exc()
|
||||
traceback.print_exc()
|
||||
result = {
|
||||
"action_id": action_id,
|
||||
"success": False,
|
||||
"error": f"Exception executor: {e}",
|
||||
# Inclure le traceback complet pour diagnostiquer
|
||||
# les crashes côté agent depuis les logs serveur
|
||||
"error": f"{e}\n---TRACEBACK---\n{tb_str[-500:]}",
|
||||
"screenshot": None,
|
||||
}
|
||||
|
||||
@@ -1525,6 +1614,8 @@ Example: x_pct=0.50, y_pct=0.30"""
|
||||
# Champs enrichis pour target_not_found (pause supervisée)
|
||||
"target_description": result.get("target_description"),
|
||||
"target_spec": result.get("target_spec"),
|
||||
# Correction humaine (mode apprentissage supervisé)
|
||||
"correction": result.get("correction"),
|
||||
}
|
||||
try:
|
||||
resp2 = requests.post(
|
||||
@@ -2007,6 +2098,159 @@ Example: x_pct=0.50, y_pct=0.30"""
|
||||
|
||||
logger.debug(f"Texte saisi char-by-char ({len(text)} chars)")
|
||||
|
||||
# =========================================================================
|
||||
# Mode apprentissage — l'humain montre, Léa apprend
|
||||
# =========================================================================
|
||||
|
||||
# Hotkey pour signaler la fin de la correction humaine
|
||||
_LEARNING_DONE_HOTKEY = {Key.ctrl_l, Key.shift, KeyCode.from_char("l")}
|
||||
|
||||
def _capture_human_correction(self, timeout_s: float = 120.0) -> list[dict]:
|
||||
"""Capturer un mini-workflow de correction humaine.
|
||||
|
||||
Léa est perdue — elle passe en mode capture et enregistre
|
||||
TOUTES les actions de l'humain (clics, frappes, combos)
|
||||
jusqu'à ce que l'humain signale qu'il a fini :
|
||||
- Ctrl+Shift+L (hotkey)
|
||||
- Ou timeout d'inactivité (10s sans action)
|
||||
- Ou timeout global (120s)
|
||||
|
||||
Retourne la liste des actions capturées (peut être vide si timeout).
|
||||
C'est un mini-workflow, pas juste un clic.
|
||||
"""
|
||||
done_event = threading.Event()
|
||||
actions: list[dict] = []
|
||||
last_action_time = [time.time()]
|
||||
keys_pressed: set = set()
|
||||
INACTIVITY_TIMEOUT = 10.0 # secondes
|
||||
|
||||
monitor = self.sct.monitors[1]
|
||||
screen_w, screen_h = monitor["width"], monitor["height"]
|
||||
|
||||
def _on_click(x, y, button, pressed):
|
||||
if done_event.is_set():
|
||||
return False
|
||||
if pressed and button.name in ("left", "right"):
|
||||
action = {
|
||||
"type": "click",
|
||||
"x_pct": round(x / screen_w, 6),
|
||||
"y_pct": round(y / screen_h, 6),
|
||||
"button": button.name,
|
||||
"timestamp": time.time(),
|
||||
}
|
||||
# UIA snapshot
|
||||
try:
|
||||
from .uia_helper import get_shared_helper
|
||||
helper = get_shared_helper()
|
||||
if helper.available:
|
||||
elem = helper.query_at(int(x), int(y), with_parents=True)
|
||||
if elem:
|
||||
action["uia_snapshot"] = elem.to_dict()
|
||||
except Exception:
|
||||
pass
|
||||
actions.append(action)
|
||||
last_action_time[0] = time.time()
|
||||
logger.info(f"[APPRENTISSAGE] Clic ({x}, {y}) bouton={button.name}")
|
||||
|
||||
def _on_key_press(key):
|
||||
if done_event.is_set():
|
||||
return False
|
||||
keys_pressed.add(key)
|
||||
# Vérifier hotkey Ctrl+Shift+L
|
||||
if self._LEARNING_DONE_HOTKEY.issubset(keys_pressed):
|
||||
logger.info("[APPRENTISSAGE] Hotkey Ctrl+Shift+L — fin de correction")
|
||||
print(" [APPRENTISSAGE] Ctrl+Shift+L reçu — merci !")
|
||||
done_event.set()
|
||||
return False
|
||||
|
||||
def _on_key_release(key):
|
||||
keys_pressed.discard(key)
|
||||
if done_event.is_set():
|
||||
return False
|
||||
# Capturer les frappes texte (pas les modifiers seuls)
|
||||
if hasattr(key, "char") and key.char:
|
||||
actions.append({
|
||||
"type": "type",
|
||||
"text": key.char,
|
||||
"timestamp": time.time(),
|
||||
})
|
||||
last_action_time[0] = time.time()
|
||||
elif key == Key.enter:
|
||||
actions.append({
|
||||
"type": "key_combo",
|
||||
"keys": ["enter"],
|
||||
"timestamp": time.time(),
|
||||
})
|
||||
last_action_time[0] = time.time()
|
||||
|
||||
from pynput.mouse import Listener as MouseListener
|
||||
from pynput.keyboard import Listener as KeyboardListener
|
||||
|
||||
mouse_listener = MouseListener(on_click=_on_click)
|
||||
kbd_listener = KeyboardListener(
|
||||
on_press=_on_key_press, on_release=_on_key_release,
|
||||
)
|
||||
mouse_listener.start()
|
||||
kbd_listener.start()
|
||||
|
||||
logger.info(
|
||||
f"[APPRENTISSAGE] Mode capture activé (timeout={timeout_s}s, "
|
||||
f"inactivité={INACTIVITY_TIMEOUT}s, hotkey=Ctrl+Shift+L)"
|
||||
)
|
||||
print(
|
||||
f" [APPRENTISSAGE] Montre-moi comment faire.\n"
|
||||
f" Quand tu as fini → Ctrl+Shift+L\n"
|
||||
f" (ou j'attends {INACTIVITY_TIMEOUT}s sans action)"
|
||||
)
|
||||
|
||||
# Attendre : hotkey OU inactivité OU timeout global
|
||||
start = time.time()
|
||||
while not done_event.is_set():
|
||||
elapsed = time.time() - start
|
||||
if elapsed > timeout_s:
|
||||
logger.info("[APPRENTISSAGE] Timeout global")
|
||||
break
|
||||
# Timeout inactivité : si l'humain a fait au moins 1 action
|
||||
# et n'a rien fait depuis INACTIVITY_TIMEOUT secondes
|
||||
if actions and (time.time() - last_action_time[0]) > INACTIVITY_TIMEOUT:
|
||||
logger.info(
|
||||
f"[APPRENTISSAGE] Inactivité {INACTIVITY_TIMEOUT}s — "
|
||||
f"fin automatique ({len(actions)} actions)"
|
||||
)
|
||||
print(f" [APPRENTISSAGE] Pas d'action depuis {INACTIVITY_TIMEOUT}s — je reprends.")
|
||||
break
|
||||
time.sleep(0.2)
|
||||
|
||||
mouse_listener.stop()
|
||||
kbd_listener.stop()
|
||||
|
||||
logger.info(f"[APPRENTISSAGE] {len(actions)} actions capturées")
|
||||
print(f" [APPRENTISSAGE] {len(actions)} actions capturées — merci !")
|
||||
return actions
|
||||
|
||||
def _capture_crop_at(self, x: int, y: int, size: int = 80) -> str:
|
||||
"""Capturer un crop carré autour d'une position."""
|
||||
try:
|
||||
from PIL import Image
|
||||
|
||||
with mss.mss() as local_sct:
|
||||
monitor = local_sct.monitors[1]
|
||||
raw = local_sct.grab(monitor)
|
||||
img = Image.frombytes("RGB", raw.size, raw.bgra, "raw", "BGRX")
|
||||
|
||||
half = size // 2
|
||||
left = max(0, x - half)
|
||||
top = max(0, y - half)
|
||||
right = min(img.width, x + half)
|
||||
bottom = min(img.height, y + half)
|
||||
crop = img.crop((left, top, right, bottom))
|
||||
|
||||
buffer = io.BytesIO()
|
||||
crop.save(buffer, format="JPEG", quality=85)
|
||||
return base64.b64encode(buffer.getvalue()).decode("utf-8")
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
def _click(self, pos, button_name):
|
||||
"""Deplacer la souris via courbe de Bézier puis cliquer.
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@ Ref: docs/PLAN_ACTEUR_V1.md — Architecture MICRO (grounding + exécution)
|
||||
"""
|
||||
|
||||
import base64
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
@@ -126,19 +127,62 @@ class GroundingEngine:
|
||||
)
|
||||
|
||||
t_start = time.time()
|
||||
screenshot_b64 = self._executor._capture_screenshot_b64(max_width=0, quality=75)
|
||||
|
||||
# ── Capture contrainte à la fenêtre active ──
|
||||
# Le grounding ne voit QUE la fenêtre attendue — pas la taskbar,
|
||||
# pas le systray, pas les autres apps. Comme un humain qui regarde
|
||||
# l'application sur laquelle il travaille.
|
||||
window_rect = None
|
||||
try:
|
||||
from ..window_info_crossplatform import get_active_window_rect
|
||||
win_info = get_active_window_rect()
|
||||
if win_info and win_info.get("rect"):
|
||||
r = win_info["rect"] # [left, top, right, bottom]
|
||||
# Validation : fenêtre visible et pas minuscule
|
||||
w = r[2] - r[0]
|
||||
h = r[3] - r[1]
|
||||
if w > 50 and h > 50:
|
||||
window_rect = {
|
||||
"left": max(0, r[0]),
|
||||
"top": max(0, r[1]),
|
||||
"width": min(w, screen_width),
|
||||
"height": min(h, screen_height),
|
||||
}
|
||||
logger.info(
|
||||
f"Grounding contraint à la fenêtre : "
|
||||
f"{window_rect['width']}x{window_rect['height']} "
|
||||
f"à ({window_rect['left']}, {window_rect['top']})"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"Pas de window rect disponible : {e}")
|
||||
|
||||
screenshot_b64 = self._capture_window_or_screen(window_rect)
|
||||
if not screenshot_b64:
|
||||
return GroundingResult(
|
||||
found=False, detail="Capture screenshot échouée",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
# Dimensions de la zone capturée (fenêtre ou écran entier)
|
||||
cap_w = window_rect["width"] if window_rect else screen_width
|
||||
cap_h = window_rect["height"] if window_rect else screen_height
|
||||
|
||||
for strategy in strategies:
|
||||
result = self._try_strategy(
|
||||
strategy, server_url, screenshot_b64, target_spec,
|
||||
fallback_x, fallback_y, screen_width, screen_height,
|
||||
fallback_x, fallback_y, cap_w, cap_h,
|
||||
)
|
||||
if result.found:
|
||||
# ── Conversion coords fenêtre → coords écran ──
|
||||
if window_rect:
|
||||
# Le grounding a retourné des coords relatives à la fenêtre
|
||||
# On les convertit en coords relatives à l'écran entier
|
||||
abs_x = window_rect["left"] + result.x_pct * cap_w
|
||||
abs_y = window_rect["top"] + result.y_pct * cap_h
|
||||
result.x_pct = abs_x / screen_width
|
||||
result.y_pct = abs_y / screen_height
|
||||
result.detail = f"{result.detail} [fenêtre {cap_w}x{cap_h}]"
|
||||
|
||||
result.elapsed_ms = (time.time() - t_start) * 1000
|
||||
return result
|
||||
|
||||
@@ -148,6 +192,39 @@ class GroundingEngine:
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
def _capture_window_or_screen(self, window_rect: Optional[Dict]) -> str:
|
||||
"""Capturer soit la fenêtre active (croppée), soit l'écran entier.
|
||||
|
||||
Si window_rect est fourni, capture uniquement cette zone.
|
||||
Sinon, capture l'écran entier (fallback).
|
||||
"""
|
||||
try:
|
||||
from PIL import Image
|
||||
import mss as mss_lib
|
||||
|
||||
with mss_lib.mss() as local_sct:
|
||||
if window_rect:
|
||||
# Capture de la zone fenêtre uniquement
|
||||
region = {
|
||||
"left": window_rect["left"],
|
||||
"top": window_rect["top"],
|
||||
"width": window_rect["width"],
|
||||
"height": window_rect["height"],
|
||||
}
|
||||
raw = local_sct.grab(region)
|
||||
else:
|
||||
# Fallback écran entier
|
||||
raw = local_sct.grab(local_sct.monitors[1])
|
||||
|
||||
img = Image.frombytes("RGB", raw.size, raw.bgra, "raw", "BGRX")
|
||||
buffer = io.BytesIO()
|
||||
img.save(buffer, format="JPEG", quality=75)
|
||||
return base64.b64encode(buffer.getvalue()).decode("utf-8")
|
||||
except Exception as e:
|
||||
logger.warning(f"Capture échouée : {e}")
|
||||
# Fallback sur la méthode existante de l'executor
|
||||
return self._executor._capture_screenshot_b64(max_width=0, quality=75)
|
||||
|
||||
def _try_strategy(
|
||||
self,
|
||||
strategy: str,
|
||||
|
||||
@@ -568,6 +568,35 @@ def est_fenetre_lea(titre_fenetre: str) -> bool:
|
||||
return any(re.search(motif, titre_lower) for motif in _MOTIFS_FENETRE_LEA_REGEX)
|
||||
|
||||
|
||||
# Fenêtres parasites Windows à ignorer dans les pré-vérifications.
|
||||
# Ce ne sont pas des fenêtres applicatives — c'est du bruit système
|
||||
# qui prend le focus de manière imprévisible.
|
||||
_FENETRES_BRUIT_SYSTEME = (
|
||||
"fenêtre de dépassement de capacité",
|
||||
"overflow", # version anglaise systray
|
||||
"program manager",
|
||||
"barre des tâches",
|
||||
"task bar",
|
||||
"cortana",
|
||||
"action center",
|
||||
"centre de notifications",
|
||||
)
|
||||
|
||||
|
||||
def est_fenetre_bruit(titre_fenetre: str) -> bool:
|
||||
"""Détecter si un titre de fenêtre est du bruit système Windows.
|
||||
|
||||
Ces fenêtres prennent le focus de manière imprévisible (systray overflow,
|
||||
taskbar, Program Manager) et ne sont jamais la cible d'une action utilisateur.
|
||||
"""
|
||||
if not titre_fenetre:
|
||||
return True # pas de titre = bruit
|
||||
titre_lower = titre_fenetre.lower().strip()
|
||||
if titre_lower == "unknown_window":
|
||||
return True
|
||||
return any(p in titre_lower for p in _FENETRES_BRUIT_SYSTEME)
|
||||
|
||||
|
||||
# Conservé pour rétro-compatibilité avec le code qui listait MOTIFS_FENETRE_LEA
|
||||
MOTIFS_FENETRE_LEA = (
|
||||
"léa",
|
||||
|
||||
Reference in New Issue
Block a user