Files
rpa_vision_v3/agent_rust/src/executor.rs
Dom aa39af327f feat: agent Rust Phase 2 — visual mode (template matching serveur)
- visual.rs : resolve via POST /replay/resolve_target
- executor.rs : resolve avant chaque clic si visual_mode=true
- Fallback blind si matching échoue
- Binaire toujours 1.8 MB (pas de nouvelle dépendance)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-18 22:29:26 +01:00

385 lines
13 KiB
Rust

//! Exécuteur d'actions pour le replay.
//!
//! Simule les clics souris, la saisie de texte, les combos clavier et les attentes.
//! Utilise enigo pour la simulation, compatible Windows et Linux.
//! Reproduit le comportement de agent_v1/core/executor.py.
use crate::config::Config;
use crate::network::{Action, ActionResult};
use crate::visual;
use enigo::{
Coordinate, Direction, Enigo, Key, Keyboard, Mouse, Settings,
};
use std::thread;
use std::time::Duration;
/// Exécute une action de replay et retourne le résultat.
///
/// Dispatche vers le bon handler selon le type d'action.
/// Les coordonnées x_pct/y_pct (0.0-1.0) sont converties en pixels
/// à partir des dimensions de l'écran.
/// Si visual_mode est activé, résout d'abord la cible via le serveur.
pub fn execute_action(
action: &Action,
screen_width: u32,
screen_height: u32,
config: &Config,
) -> ActionResult {
match action.action_type.as_str() {
"click" => execute_click(action, screen_width, screen_height, config),
"type" => execute_type(action, screen_width, screen_height, config),
"key_combo" => execute_key_combo(action),
"scroll" => execute_scroll(action, screen_width, screen_height),
"wait" => execute_wait(action),
_ => ActionResult::error(
&action.action_id,
&format!("Type d'action inconnu : {}", action.action_type),
),
}
}
/// Résout les coordonnées visuellement si visual_mode est activé.
///
/// Si la résolution échoue, retourne les coordonnées de fallback (blind).
/// Si visual_mode est désactivé ou target_spec absent, retourne les coordonnées originales.
fn resolve_coordinates(
action: &Action,
screen_width: u32,
screen_height: u32,
config: &Config,
) -> (f64, f64) {
let mut x_pct = action.x_pct;
let mut y_pct = action.y_pct;
if action.visual_mode && !action.target_spec.is_null() {
println!(
" [VISUAL] Mode visuel active — resolution de la cible..."
);
match visual::resolve_target_visual(
config,
&action.target_spec,
x_pct,
y_pct,
screen_width,
screen_height,
) {
Some((rx, ry)) => {
println!(" [VISUAL] Resolu : ({:.4}, {:.4})", rx, ry);
x_pct = rx;
y_pct = ry;
}
None => {
println!(
" [VISUAL] Echec — fallback coordonnees aveugles ({:.4}, {:.4})",
x_pct, y_pct
);
}
}
}
(x_pct, y_pct)
}
/// Exécute un clic souris aux coordonnées normalisées.
/// Résout visuellement la cible si visual_mode est activé.
fn execute_click(action: &Action, screen_width: u32, screen_height: u32, config: &Config) -> ActionResult {
let (x_pct, y_pct) = resolve_coordinates(action, screen_width, screen_height, config);
let real_x = (x_pct * screen_width as f64) as i32;
let real_y = (y_pct * screen_height as f64) as i32;
println!(
" [CLICK] ({:.4}, {:.4}) -> ({}, {}) sur ({}x{}), bouton={}{}",
x_pct, y_pct, real_x, real_y, screen_width, screen_height, action.button,
if action.visual_mode { " [VISUAL]" } else { "" }
);
let mut enigo = match Enigo::new(&Settings::default()) {
Ok(e) => e,
Err(e) => {
return ActionResult::error(
&action.action_id,
&format!("Impossible d'initialiser enigo : {}", e),
);
}
};
// Déplacer la souris
if let Err(e) = enigo.move_mouse(real_x, real_y, Coordinate::Abs) {
return ActionResult::error(
&action.action_id,
&format!("Erreur deplacement souris : {}", e),
);
}
// Petit délai pour simuler le temps de réaction humain
thread::sleep(Duration::from_millis(100));
// Cliquer selon le bouton demandé
let button = match action.button.as_str() {
"right" => enigo::Button::Right,
"middle" => enigo::Button::Middle,
_ => enigo::Button::Left,
};
if action.button == "double" {
// Double-clic gauche
if let Err(e) = enigo.button(enigo::Button::Left, Direction::Click) {
return ActionResult::error(&action.action_id, &format!("Erreur clic : {}", e));
}
thread::sleep(Duration::from_millis(50));
if let Err(e) = enigo.button(enigo::Button::Left, Direction::Click) {
return ActionResult::error(&action.action_id, &format!("Erreur double-clic : {}", e));
}
} else if let Err(e) = enigo.button(button, Direction::Click) {
return ActionResult::error(&action.action_id, &format!("Erreur clic : {}", e));
}
println!(" [CLICK] Termine.");
ActionResult::ok(&action.action_id)
}
/// Exécute une saisie de texte.
///
/// Si des coordonnées sont fournies (x_pct > 0), clique d'abord
/// sur le champ avant de taper (comme en Python).
fn execute_type(action: &Action, screen_width: u32, screen_height: u32, config: &Config) -> ActionResult {
let text = &action.text;
println!(
" [TYPE] Texte: '{}' ({} chars)",
if text.len() > 50 { &text[..50] } else { text },
text.len()
);
// Résoudre visuellement les coordonnées si visual_mode est activé
let (x_pct, y_pct) = resolve_coordinates(action, screen_width, screen_height, config);
let mut enigo = match Enigo::new(&Settings::default()) {
Ok(e) => e,
Err(e) => {
return ActionResult::error(
&action.action_id,
&format!("Impossible d'initialiser enigo : {}", e),
);
}
};
// Clic préalable sur le champ si coordonnées disponibles
if x_pct > 0.0 && y_pct > 0.0 {
let real_x = (x_pct * screen_width as f64) as i32;
let real_y = (y_pct * screen_height as f64) as i32;
println!(" [TYPE] Clic prealable sur ({}, {}){}", real_x, real_y,
if action.visual_mode { " [VISUAL]" } else { "" });
if let Err(e) = enigo.move_mouse(real_x, real_y, Coordinate::Abs) {
eprintln!(" [TYPE] Erreur deplacement souris : {}", e);
}
thread::sleep(Duration::from_millis(100));
if let Err(e) = enigo.button(enigo::Button::Left, Direction::Click) {
eprintln!(" [TYPE] Erreur clic : {}", e);
}
thread::sleep(Duration::from_millis(300));
}
// Saisir le texte
if let Err(e) = enigo.text(text) {
return ActionResult::error(
&action.action_id,
&format!("Erreur saisie texte : {}", e),
);
}
println!(" [TYPE] Termine.");
ActionResult::ok(&action.action_id)
}
/// Exécute une combinaison de touches.
///
/// Ex: ["ctrl", "a"] -> maintenir Ctrl, appuyer A, relâcher Ctrl
/// Ex: ["enter"] -> appuyer Enter
fn execute_key_combo(action: &Action) -> ActionResult {
let keys = &action.keys;
println!(" [KEY_COMBO] Touches: {:?}", keys);
if keys.is_empty() {
return ActionResult::error(&action.action_id, "Aucune touche specifiee");
}
let mut enigo = match Enigo::new(&Settings::default()) {
Ok(e) => e,
Err(e) => {
return ActionResult::error(
&action.action_id,
&format!("Impossible d'initialiser enigo : {}", e),
);
}
};
// Résoudre les noms de touches
let resolved: Vec<Key> = keys
.iter()
.filter_map(|name| resolve_key(name))
.collect();
if resolved.is_empty() {
return ActionResult::error(
&action.action_id,
&format!("Aucune touche reconnue dans {:?}", keys),
);
}
if resolved.len() == 1 {
// Une seule touche : simple press/release
if let Err(e) = enigo.key(resolved[0], Direction::Click) {
return ActionResult::error(&action.action_id, &format!("Erreur touche : {}", e));
}
} else {
// Combo : maintenir les modifieurs, taper la dernière touche, relâcher
let (modifiers, last) = resolved.split_at(resolved.len() - 1);
for modifier in modifiers {
if let Err(e) = enigo.key(*modifier, Direction::Press) {
return ActionResult::error(
&action.action_id,
&format!("Erreur modifier press : {}", e),
);
}
}
thread::sleep(Duration::from_millis(50));
if let Err(e) = enigo.key(last[0], Direction::Click) {
// Toujours relâcher les modifieurs même en cas d'erreur
for modifier in modifiers.iter().rev() {
let _ = enigo.key(*modifier, Direction::Release);
}
return ActionResult::error(
&action.action_id,
&format!("Erreur touche finale : {}", e),
);
}
for modifier in modifiers.iter().rev() {
if let Err(e) = enigo.key(*modifier, Direction::Release) {
eprintln!(" [KEY_COMBO] Erreur release modifier : {}", e);
}
}
}
println!(" [KEY_COMBO] Termine.");
ActionResult::ok(&action.action_id)
}
/// Exécute un scroll de souris.
fn execute_scroll(action: &Action, screen_width: u32, screen_height: u32) -> ActionResult {
let real_x = if action.x_pct > 0.0 {
(action.x_pct * screen_width as f64) as i32
} else {
(0.5 * screen_width as f64) as i32
};
let real_y = if action.y_pct > 0.0 {
(action.y_pct * screen_height as f64) as i32
} else {
(0.5 * screen_height as f64) as i32
};
let delta = action.delta;
println!(" [SCROLL] delta={} a ({}, {})", delta, real_x, real_y);
let mut enigo = match Enigo::new(&Settings::default()) {
Ok(e) => e,
Err(e) => {
return ActionResult::error(
&action.action_id,
&format!("Impossible d'initialiser enigo : {}", e),
);
}
};
if let Err(e) = enigo.move_mouse(real_x, real_y, Coordinate::Abs) {
return ActionResult::error(
&action.action_id,
&format!("Erreur deplacement souris : {}", e),
);
}
thread::sleep(Duration::from_millis(50));
if let Err(e) = enigo.scroll(delta, enigo::Axis::Vertical) {
return ActionResult::error(
&action.action_id,
&format!("Erreur scroll : {}", e),
);
}
println!(" [SCROLL] Termine.");
ActionResult::ok(&action.action_id)
}
/// Exécute une attente (pause).
fn execute_wait(action: &Action) -> ActionResult {
let duration_ms = action.duration_ms;
println!(" [WAIT] {}ms...", duration_ms);
thread::sleep(Duration::from_millis(duration_ms));
println!(" [WAIT] Termine.");
ActionResult::ok(&action.action_id)
}
/// Résout un nom de touche (string) vers un enigo::Key.
///
/// Mapping compatible avec le Python executor (_SPECIAL_KEYS).
fn resolve_key(name: &str) -> Option<Key> {
match name.to_lowercase().as_str() {
// Touches de contrôle
"enter" | "return" => Some(Key::Return),
"tab" => Some(Key::Tab),
"escape" | "esc" => Some(Key::Escape),
"backspace" => Some(Key::Backspace),
"delete" => Some(Key::Delete),
"space" => Some(Key::Space),
// Touches de navigation
"up" => Some(Key::UpArrow),
"down" => Some(Key::DownArrow),
"left" => Some(Key::LeftArrow),
"right" => Some(Key::RightArrow),
"home" => Some(Key::Home),
"end" => Some(Key::End),
"page_up" | "pageup" => Some(Key::PageUp),
"page_down" | "pagedown" => Some(Key::PageDown),
// Touches de fonction
"f1" => Some(Key::F1),
"f2" => Some(Key::F2),
"f3" => Some(Key::F3),
"f4" => Some(Key::F4),
"f5" => Some(Key::F5),
"f6" => Some(Key::F6),
"f7" => Some(Key::F7),
"f8" => Some(Key::F8),
"f9" => Some(Key::F9),
"f10" => Some(Key::F10),
"f11" => Some(Key::F11),
"f12" => Some(Key::F12),
// Modifieurs
"ctrl" | "ctrl_l" | "ctrl_r" | "control" => Some(Key::Control),
"alt" | "alt_l" | "alt_r" => Some(Key::Alt),
"shift" | "shift_l" | "shift_r" => Some(Key::Shift),
"cmd" | "win" | "super" | "super_l" | "super_r" | "windows" | "meta" => Some(Key::Meta),
// Touches spéciales
"insert" => Some(Key::Other(0x2D)), // VK_INSERT
"caps_lock" | "capslock" => Some(Key::CapsLock),
// Caractère unique -> Unicode
s if s.len() == 1 => {
let c = s.chars().next().unwrap();
Some(Key::Unicode(c))
}
_ => {
eprintln!(" [KEY_COMBO] Touche inconnue : '{}', ignoree", name);
None
}
}
}