feat: agent Rust Phase 1 — POC headless fonctionnel

1527 lignes Rust, compile sans warnings, testé sur Linux.
- Capture d'écran (xcap) + JPEG base64 + hash dedup
- Heartbeat toutes les 5s vers streaming server
- Poll replay + exécution actions (clic, frappe, combos)
- Serveur HTTP port 5006 (capture, health, file-action)
- Compatible avec le streaming server Python existant

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dom
2026-03-18 20:22:04 +01:00
parent 792cc2aa9a
commit 757432ee19
10 changed files with 1702 additions and 0 deletions

331
agent_rust/src/executor.rs Normal file
View File

@@ -0,0 +1,331 @@
//! Exécuteur d'actions pour le replay.
//!
//! Simule les clics souris, la saisie de texte, les combos clavier et les attentes.
//! Utilise enigo pour la simulation, compatible Windows et Linux.
//! Reproduit le comportement de agent_v1/core/executor.py.
use crate::network::{Action, ActionResult};
use enigo::{
Coordinate, Direction, Enigo, Key, Keyboard, Mouse, Settings,
};
use std::thread;
use std::time::Duration;
/// Exécute une action de replay et retourne le résultat.
///
/// Dispatche vers le bon handler selon le type d'action.
/// Les coordonnées x_pct/y_pct (0.0-1.0) sont converties en pixels
/// à partir des dimensions de l'écran.
pub fn execute_action(
action: &Action,
screen_width: u32,
screen_height: u32,
) -> ActionResult {
match action.action_type.as_str() {
"click" => execute_click(action, screen_width, screen_height),
"type" => execute_type(action, screen_width, screen_height),
"key_combo" => execute_key_combo(action),
"scroll" => execute_scroll(action, screen_width, screen_height),
"wait" => execute_wait(action),
_ => ActionResult::error(
&action.action_id,
&format!("Type d'action inconnu : {}", action.action_type),
),
}
}
/// Exécute un clic souris aux coordonnées normalisées.
fn execute_click(action: &Action, screen_width: u32, screen_height: u32) -> ActionResult {
let real_x = (action.x_pct * screen_width as f64) as i32;
let real_y = (action.y_pct * screen_height as f64) as i32;
println!(
" [CLICK] ({:.3}, {:.3}) -> ({}, {}) sur ({}x{}), bouton={}",
action.x_pct, action.y_pct, real_x, real_y, screen_width, screen_height, action.button
);
let mut enigo = match Enigo::new(&Settings::default()) {
Ok(e) => e,
Err(e) => {
return ActionResult::error(
&action.action_id,
&format!("Impossible d'initialiser enigo : {}", e),
);
}
};
// Déplacer la souris
if let Err(e) = enigo.move_mouse(real_x, real_y, Coordinate::Abs) {
return ActionResult::error(
&action.action_id,
&format!("Erreur deplacement souris : {}", e),
);
}
// Petit délai pour simuler le temps de réaction humain
thread::sleep(Duration::from_millis(100));
// Cliquer selon le bouton demandé
let button = match action.button.as_str() {
"right" => enigo::Button::Right,
"middle" => enigo::Button::Middle,
_ => enigo::Button::Left,
};
if action.button == "double" {
// Double-clic gauche
if let Err(e) = enigo.button(enigo::Button::Left, Direction::Click) {
return ActionResult::error(&action.action_id, &format!("Erreur clic : {}", e));
}
thread::sleep(Duration::from_millis(50));
if let Err(e) = enigo.button(enigo::Button::Left, Direction::Click) {
return ActionResult::error(&action.action_id, &format!("Erreur double-clic : {}", e));
}
} else if let Err(e) = enigo.button(button, Direction::Click) {
return ActionResult::error(&action.action_id, &format!("Erreur clic : {}", e));
}
println!(" [CLICK] Termine.");
ActionResult::ok(&action.action_id)
}
/// Exécute une saisie de texte.
///
/// Si des coordonnées sont fournies (x_pct > 0), clique d'abord
/// sur le champ avant de taper (comme en Python).
fn execute_type(action: &Action, screen_width: u32, screen_height: u32) -> ActionResult {
let text = &action.text;
println!(
" [TYPE] Texte: '{}' ({} chars)",
if text.len() > 50 { &text[..50] } else { text },
text.len()
);
let mut enigo = match Enigo::new(&Settings::default()) {
Ok(e) => e,
Err(e) => {
return ActionResult::error(
&action.action_id,
&format!("Impossible d'initialiser enigo : {}", e),
);
}
};
// Clic préalable sur le champ si coordonnées disponibles
if action.x_pct > 0.0 && action.y_pct > 0.0 {
let real_x = (action.x_pct * screen_width as f64) as i32;
let real_y = (action.y_pct * screen_height as f64) as i32;
println!(" [TYPE] Clic prealable sur ({}, {})", real_x, real_y);
if let Err(e) = enigo.move_mouse(real_x, real_y, Coordinate::Abs) {
eprintln!(" [TYPE] Erreur deplacement souris : {}", e);
}
thread::sleep(Duration::from_millis(100));
if let Err(e) = enigo.button(enigo::Button::Left, Direction::Click) {
eprintln!(" [TYPE] Erreur clic : {}", e);
}
thread::sleep(Duration::from_millis(300));
}
// Saisir le texte
if let Err(e) = enigo.text(text) {
return ActionResult::error(
&action.action_id,
&format!("Erreur saisie texte : {}", e),
);
}
println!(" [TYPE] Termine.");
ActionResult::ok(&action.action_id)
}
/// Exécute une combinaison de touches.
///
/// Ex: ["ctrl", "a"] -> maintenir Ctrl, appuyer A, relâcher Ctrl
/// Ex: ["enter"] -> appuyer Enter
fn execute_key_combo(action: &Action) -> ActionResult {
let keys = &action.keys;
println!(" [KEY_COMBO] Touches: {:?}", keys);
if keys.is_empty() {
return ActionResult::error(&action.action_id, "Aucune touche specifiee");
}
let mut enigo = match Enigo::new(&Settings::default()) {
Ok(e) => e,
Err(e) => {
return ActionResult::error(
&action.action_id,
&format!("Impossible d'initialiser enigo : {}", e),
);
}
};
// Résoudre les noms de touches
let resolved: Vec<Key> = keys
.iter()
.filter_map(|name| resolve_key(name))
.collect();
if resolved.is_empty() {
return ActionResult::error(
&action.action_id,
&format!("Aucune touche reconnue dans {:?}", keys),
);
}
if resolved.len() == 1 {
// Une seule touche : simple press/release
if let Err(e) = enigo.key(resolved[0], Direction::Click) {
return ActionResult::error(&action.action_id, &format!("Erreur touche : {}", e));
}
} else {
// Combo : maintenir les modifieurs, taper la dernière touche, relâcher
let (modifiers, last) = resolved.split_at(resolved.len() - 1);
for modifier in modifiers {
if let Err(e) = enigo.key(*modifier, Direction::Press) {
return ActionResult::error(
&action.action_id,
&format!("Erreur modifier press : {}", e),
);
}
}
thread::sleep(Duration::from_millis(50));
if let Err(e) = enigo.key(last[0], Direction::Click) {
// Toujours relâcher les modifieurs même en cas d'erreur
for modifier in modifiers.iter().rev() {
let _ = enigo.key(*modifier, Direction::Release);
}
return ActionResult::error(
&action.action_id,
&format!("Erreur touche finale : {}", e),
);
}
for modifier in modifiers.iter().rev() {
if let Err(e) = enigo.key(*modifier, Direction::Release) {
eprintln!(" [KEY_COMBO] Erreur release modifier : {}", e);
}
}
}
println!(" [KEY_COMBO] Termine.");
ActionResult::ok(&action.action_id)
}
/// Exécute un scroll de souris.
fn execute_scroll(action: &Action, screen_width: u32, screen_height: u32) -> ActionResult {
let real_x = if action.x_pct > 0.0 {
(action.x_pct * screen_width as f64) as i32
} else {
(0.5 * screen_width as f64) as i32
};
let real_y = if action.y_pct > 0.0 {
(action.y_pct * screen_height as f64) as i32
} else {
(0.5 * screen_height as f64) as i32
};
let delta = action.delta;
println!(" [SCROLL] delta={} a ({}, {})", delta, real_x, real_y);
let mut enigo = match Enigo::new(&Settings::default()) {
Ok(e) => e,
Err(e) => {
return ActionResult::error(
&action.action_id,
&format!("Impossible d'initialiser enigo : {}", e),
);
}
};
if let Err(e) = enigo.move_mouse(real_x, real_y, Coordinate::Abs) {
return ActionResult::error(
&action.action_id,
&format!("Erreur deplacement souris : {}", e),
);
}
thread::sleep(Duration::from_millis(50));
if let Err(e) = enigo.scroll(delta, enigo::Axis::Vertical) {
return ActionResult::error(
&action.action_id,
&format!("Erreur scroll : {}", e),
);
}
println!(" [SCROLL] Termine.");
ActionResult::ok(&action.action_id)
}
/// Exécute une attente (pause).
fn execute_wait(action: &Action) -> ActionResult {
let duration_ms = action.duration_ms;
println!(" [WAIT] {}ms...", duration_ms);
thread::sleep(Duration::from_millis(duration_ms));
println!(" [WAIT] Termine.");
ActionResult::ok(&action.action_id)
}
/// Résout un nom de touche (string) vers un enigo::Key.
///
/// Mapping compatible avec le Python executor (_SPECIAL_KEYS).
fn resolve_key(name: &str) -> Option<Key> {
match name.to_lowercase().as_str() {
// Touches de contrôle
"enter" | "return" => Some(Key::Return),
"tab" => Some(Key::Tab),
"escape" | "esc" => Some(Key::Escape),
"backspace" => Some(Key::Backspace),
"delete" => Some(Key::Delete),
"space" => Some(Key::Space),
// Touches de navigation
"up" => Some(Key::UpArrow),
"down" => Some(Key::DownArrow),
"left" => Some(Key::LeftArrow),
"right" => Some(Key::RightArrow),
"home" => Some(Key::Home),
"end" => Some(Key::End),
"page_up" | "pageup" => Some(Key::PageUp),
"page_down" | "pagedown" => Some(Key::PageDown),
// Touches de fonction
"f1" => Some(Key::F1),
"f2" => Some(Key::F2),
"f3" => Some(Key::F3),
"f4" => Some(Key::F4),
"f5" => Some(Key::F5),
"f6" => Some(Key::F6),
"f7" => Some(Key::F7),
"f8" => Some(Key::F8),
"f9" => Some(Key::F9),
"f10" => Some(Key::F10),
"f11" => Some(Key::F11),
"f12" => Some(Key::F12),
// Modifieurs
"ctrl" | "ctrl_l" | "ctrl_r" | "control" => Some(Key::Control),
"alt" | "alt_l" | "alt_r" => Some(Key::Alt),
"shift" | "shift_l" | "shift_r" => Some(Key::Shift),
"cmd" | "win" | "super" | "super_l" | "super_r" | "windows" | "meta" => Some(Key::Meta),
// Touches spéciales
"insert" => Some(Key::Other(0x2D)), // VK_INSERT
"caps_lock" | "capslock" => Some(Key::CapsLock),
// Caractère unique -> Unicode
s if s.len() == 1 => {
let c = s.chars().next().unwrap();
Some(Key::Unicode(c))
}
_ => {
eprintln!(" [KEY_COMBO] Touche inconnue : '{}', ignoree", name);
None
}
}
}