feat: agent Rust Phase 2 — visual mode (template matching serveur)
- visual.rs : resolve via POST /replay/resolve_target - executor.rs : resolve avant chaque clic si visual_mode=true - Fallback blind si matching échoue - Binaire toujours 1.8 MB (pas de nouvelle dépendance) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -4,7 +4,9 @@
|
||||
//! Utilise enigo pour la simulation, compatible Windows et Linux.
|
||||
//! Reproduit le comportement de agent_v1/core/executor.py.
|
||||
|
||||
use crate::config::Config;
|
||||
use crate::network::{Action, ActionResult};
|
||||
use crate::visual;
|
||||
use enigo::{
|
||||
Coordinate, Direction, Enigo, Key, Keyboard, Mouse, Settings,
|
||||
};
|
||||
@@ -16,14 +18,16 @@ use std::time::Duration;
|
||||
/// Dispatche vers le bon handler selon le type d'action.
|
||||
/// Les coordonnées x_pct/y_pct (0.0-1.0) sont converties en pixels
|
||||
/// à partir des dimensions de l'écran.
|
||||
/// Si visual_mode est activé, résout d'abord la cible via le serveur.
|
||||
pub fn execute_action(
|
||||
action: &Action,
|
||||
screen_width: u32,
|
||||
screen_height: u32,
|
||||
config: &Config,
|
||||
) -> ActionResult {
|
||||
match action.action_type.as_str() {
|
||||
"click" => execute_click(action, screen_width, screen_height),
|
||||
"type" => execute_type(action, screen_width, screen_height),
|
||||
"click" => execute_click(action, screen_width, screen_height, config),
|
||||
"type" => execute_type(action, screen_width, screen_height, config),
|
||||
"key_combo" => execute_key_combo(action),
|
||||
"scroll" => execute_scroll(action, screen_width, screen_height),
|
||||
"wait" => execute_wait(action),
|
||||
@@ -34,14 +38,59 @@ pub fn execute_action(
|
||||
}
|
||||
}
|
||||
|
||||
/// Résout les coordonnées visuellement si visual_mode est activé.
|
||||
///
|
||||
/// Si la résolution échoue, retourne les coordonnées de fallback (blind).
|
||||
/// Si visual_mode est désactivé ou target_spec absent, retourne les coordonnées originales.
|
||||
fn resolve_coordinates(
|
||||
action: &Action,
|
||||
screen_width: u32,
|
||||
screen_height: u32,
|
||||
config: &Config,
|
||||
) -> (f64, f64) {
|
||||
let mut x_pct = action.x_pct;
|
||||
let mut y_pct = action.y_pct;
|
||||
|
||||
if action.visual_mode && !action.target_spec.is_null() {
|
||||
println!(
|
||||
" [VISUAL] Mode visuel active — resolution de la cible..."
|
||||
);
|
||||
match visual::resolve_target_visual(
|
||||
config,
|
||||
&action.target_spec,
|
||||
x_pct,
|
||||
y_pct,
|
||||
screen_width,
|
||||
screen_height,
|
||||
) {
|
||||
Some((rx, ry)) => {
|
||||
println!(" [VISUAL] Resolu : ({:.4}, {:.4})", rx, ry);
|
||||
x_pct = rx;
|
||||
y_pct = ry;
|
||||
}
|
||||
None => {
|
||||
println!(
|
||||
" [VISUAL] Echec — fallback coordonnees aveugles ({:.4}, {:.4})",
|
||||
x_pct, y_pct
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(x_pct, y_pct)
|
||||
}
|
||||
|
||||
/// Exécute un clic souris aux coordonnées normalisées.
|
||||
fn execute_click(action: &Action, screen_width: u32, screen_height: u32) -> ActionResult {
|
||||
let real_x = (action.x_pct * screen_width as f64) as i32;
|
||||
let real_y = (action.y_pct * screen_height as f64) as i32;
|
||||
/// Résout visuellement la cible si visual_mode est activé.
|
||||
fn execute_click(action: &Action, screen_width: u32, screen_height: u32, config: &Config) -> ActionResult {
|
||||
let (x_pct, y_pct) = resolve_coordinates(action, screen_width, screen_height, config);
|
||||
let real_x = (x_pct * screen_width as f64) as i32;
|
||||
let real_y = (y_pct * screen_height as f64) as i32;
|
||||
|
||||
println!(
|
||||
" [CLICK] ({:.3}, {:.3}) -> ({}, {}) sur ({}x{}), bouton={}",
|
||||
action.x_pct, action.y_pct, real_x, real_y, screen_width, screen_height, action.button
|
||||
" [CLICK] ({:.4}, {:.4}) -> ({}, {}) sur ({}x{}), bouton={}{}",
|
||||
x_pct, y_pct, real_x, real_y, screen_width, screen_height, action.button,
|
||||
if action.visual_mode { " [VISUAL]" } else { "" }
|
||||
);
|
||||
|
||||
let mut enigo = match Enigo::new(&Settings::default()) {
|
||||
@@ -93,7 +142,7 @@ fn execute_click(action: &Action, screen_width: u32, screen_height: u32) -> Acti
|
||||
///
|
||||
/// Si des coordonnées sont fournies (x_pct > 0), clique d'abord
|
||||
/// sur le champ avant de taper (comme en Python).
|
||||
fn execute_type(action: &Action, screen_width: u32, screen_height: u32) -> ActionResult {
|
||||
fn execute_type(action: &Action, screen_width: u32, screen_height: u32, config: &Config) -> ActionResult {
|
||||
let text = &action.text;
|
||||
println!(
|
||||
" [TYPE] Texte: '{}' ({} chars)",
|
||||
@@ -101,6 +150,9 @@ fn execute_type(action: &Action, screen_width: u32, screen_height: u32) -> Actio
|
||||
text.len()
|
||||
);
|
||||
|
||||
// Résoudre visuellement les coordonnées si visual_mode est activé
|
||||
let (x_pct, y_pct) = resolve_coordinates(action, screen_width, screen_height, config);
|
||||
|
||||
let mut enigo = match Enigo::new(&Settings::default()) {
|
||||
Ok(e) => e,
|
||||
Err(e) => {
|
||||
@@ -112,10 +164,11 @@ fn execute_type(action: &Action, screen_width: u32, screen_height: u32) -> Actio
|
||||
};
|
||||
|
||||
// Clic préalable sur le champ si coordonnées disponibles
|
||||
if action.x_pct > 0.0 && action.y_pct > 0.0 {
|
||||
let real_x = (action.x_pct * screen_width as f64) as i32;
|
||||
let real_y = (action.y_pct * screen_height as f64) as i32;
|
||||
println!(" [TYPE] Clic prealable sur ({}, {})", real_x, real_y);
|
||||
if x_pct > 0.0 && y_pct > 0.0 {
|
||||
let real_x = (x_pct * screen_width as f64) as i32;
|
||||
let real_y = (y_pct * screen_height as f64) as i32;
|
||||
println!(" [TYPE] Clic prealable sur ({}, {}){}", real_x, real_y,
|
||||
if action.visual_mode { " [VISUAL]" } else { "" });
|
||||
|
||||
if let Err(e) = enigo.move_mouse(real_x, real_y, Coordinate::Abs) {
|
||||
eprintln!(" [TYPE] Erreur deplacement souris : {}", e);
|
||||
|
||||
Reference in New Issue
Block a user