Compare commits
39 Commits
13390a71e7
...
v3.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
99041f0117 | ||
|
|
72a9651b94 | ||
|
|
8589e87a13 | ||
|
|
8a1dfc6e8b | ||
|
|
3bcf59e16f | ||
|
|
46206d9396 | ||
|
|
d3e928bebe | ||
|
|
a679fbb62b | ||
|
|
f0b311306d | ||
|
|
1c5ff42006 | ||
|
|
b09a3df054 | ||
|
|
fceb76de1f | ||
|
|
6d4ff4f215 | ||
|
|
2486e43def | ||
|
|
20b74286f7 | ||
|
|
a1c97504ab | ||
|
|
d6c7346898 | ||
|
|
90ee8ca8f4 | ||
|
|
84a91630e9 | ||
|
|
91614fbff0 | ||
|
|
c1ce6a3964 | ||
|
|
0bd0fbb8c5 | ||
|
|
394342be7e | ||
|
|
6724f43950 | ||
|
|
d99b17394a | ||
|
|
875367dea9 | ||
|
|
a74056ca22 | ||
|
|
6937b94f2a | ||
|
|
4f5c518d3a | ||
|
|
7dec3ab63a | ||
|
|
68d5bb7dd1 | ||
|
|
ef5d595d98 | ||
|
|
5ceee9c393 | ||
|
|
5e0b53cfd1 | ||
|
|
e8a8a588c1 | ||
|
|
18792fd7b4 | ||
|
|
1e8e2dd9f3 | ||
|
|
1253a40051 | ||
|
|
a92d04621a |
2
agent_rust/.gitignore
vendored
2
agent_rust/.gitignore
vendored
@@ -1,2 +0,0 @@
|
|||||||
/target
|
|
||||||
Cargo.lock
|
|
||||||
@@ -1,85 +0,0 @@
|
|||||||
[package]
|
|
||||||
name = "rpa-agent"
|
|
||||||
version = "0.2.0"
|
|
||||||
edition = "2021"
|
|
||||||
description = "Agent RPA Vision - Lea (Phases 1-5)"
|
|
||||||
|
|
||||||
[dependencies]
|
|
||||||
# Capture d'ecran
|
|
||||||
xcap = "0.7"
|
|
||||||
|
|
||||||
# Simulation souris/clavier (replay)
|
|
||||||
enigo = { version = "0.3", features = ["serde"] }
|
|
||||||
|
|
||||||
# Capture evenements souris/clavier (recording) — Phase 5
|
|
||||||
rdev = "0.5"
|
|
||||||
|
|
||||||
# Client HTTP (mode bloquant, pas de tokio)
|
|
||||||
reqwest = { version = "0.12", features = ["blocking", "multipart", "json"] }
|
|
||||||
|
|
||||||
# Traitement d'images (JPEG encode, resize, crop)
|
|
||||||
image = "0.25"
|
|
||||||
|
|
||||||
# Floutage zones sensibles — Phase 5
|
|
||||||
imageproc = "0.25"
|
|
||||||
|
|
||||||
# Encodage base64
|
|
||||||
base64 = "0.22"
|
|
||||||
|
|
||||||
# Serialisation JSON
|
|
||||||
serde = { version = "1", features = ["derive"] }
|
|
||||||
serde_json = "1"
|
|
||||||
|
|
||||||
# Mini serveur HTTP synchrone (port 5006)
|
|
||||||
tiny_http = "0.12"
|
|
||||||
|
|
||||||
# Hostname de la machine
|
|
||||||
hostname = "0.4"
|
|
||||||
|
|
||||||
# Date/heure
|
|
||||||
chrono = "0.4"
|
|
||||||
|
|
||||||
# Canaux inter-threads performants
|
|
||||||
crossbeam-channel = "0.5"
|
|
||||||
|
|
||||||
# Logging
|
|
||||||
log = "0.4"
|
|
||||||
env_logger = "0.11"
|
|
||||||
|
|
||||||
# Signal handling Unix (Ctrl+C)
|
|
||||||
[target.'cfg(unix)'.dependencies]
|
|
||||||
libc = "0.2"
|
|
||||||
|
|
||||||
# Dependances Windows uniquement — Phases 3-5
|
|
||||||
[target.'cfg(windows)'.dependencies]
|
|
||||||
# Systray — Phase 3
|
|
||||||
tray-icon = "0.19"
|
|
||||||
muda = "0.15"
|
|
||||||
|
|
||||||
# Boucle d'evenements — Phase 3
|
|
||||||
winit = { version = "0.30", features = ["rwh_06"] }
|
|
||||||
|
|
||||||
# Notifications toast — Phase 3
|
|
||||||
winrt-notification = "0.5"
|
|
||||||
|
|
||||||
# Chat WebView2 — Phase 4
|
|
||||||
wry = "0.48"
|
|
||||||
|
|
||||||
# Raw window handle pour wry + fenetre native
|
|
||||||
raw-window-handle = "0.6"
|
|
||||||
|
|
||||||
# Win32 API (info fenetre, dialogues, etc.)
|
|
||||||
windows-sys = { version = "0.59", features = [
|
|
||||||
"Win32_UI_WindowsAndMessaging",
|
|
||||||
"Win32_System_Threading",
|
|
||||||
"Win32_System_LibraryLoader",
|
|
||||||
"Win32_Foundation",
|
|
||||||
"Win32_Graphics_Gdi",
|
|
||||||
] }
|
|
||||||
|
|
||||||
[profile.release]
|
|
||||||
opt-level = "z"
|
|
||||||
lto = true
|
|
||||||
strip = true
|
|
||||||
codegen-units = 1
|
|
||||||
panic = "abort"
|
|
||||||
@@ -1,34 +0,0 @@
|
|||||||
╔══════════════════════════════════════════╗
|
|
||||||
║ Léa — Assistante IA ║
|
|
||||||
║ Automatisation de tâches ║
|
|
||||||
╚══════════════════════════════════════════╝
|
|
||||||
|
|
||||||
INSTALLATION
|
|
||||||
────────────
|
|
||||||
1. Copiez le dossier "Lea" sur votre Bureau
|
|
||||||
2. Double-cliquez sur "Lea.exe" pour démarrer
|
|
||||||
|
|
||||||
PREMIÈRE UTILISATION
|
|
||||||
────────────────────
|
|
||||||
• Léa s'ouvre automatiquement dans votre navigateur
|
|
||||||
• Cliquez "Apprenez-moi une tâche" pour commencer
|
|
||||||
• Effectuez votre tâche normalement
|
|
||||||
• Cliquez "C'est terminé" quand vous avez fini
|
|
||||||
• Léa a appris ! Demandez-lui de refaire la tâche
|
|
||||||
|
|
||||||
ARRÊTER LÉA
|
|
||||||
────────────
|
|
||||||
• Fermez la fenêtre Léa dans la barre des tâches
|
|
||||||
• Ou appuyez Ctrl+C dans le terminal
|
|
||||||
|
|
||||||
BESOIN D'AIDE ?
|
|
||||||
───────────────
|
|
||||||
Contactez le support : [à compléter]
|
|
||||||
|
|
||||||
────────────────────────────────────────────
|
|
||||||
⚠ Cet outil utilise l'intelligence artificielle.
|
|
||||||
Article 50 du Règlement européen sur l'IA.
|
|
||||||
Vos données restent sur votre ordinateur et notre
|
|
||||||
serveur sécurisé. Aucune donnée n'est partagée
|
|
||||||
avec des tiers.
|
|
||||||
────────────────────────────────────────────
|
|
||||||
@@ -1,101 +0,0 @@
|
|||||||
# RPA Vision Agent (Rust) — Phases 1-5
|
|
||||||
|
|
||||||
Agent complet pour RPA Vision V3, ecrit en Rust.
|
|
||||||
Parite fonctionnelle avec l'agent Python (`agent_v0/agent_v1/`) en un seul executable de 2.4 Mo.
|
|
||||||
|
|
||||||
## Fonctionnalites
|
|
||||||
|
|
||||||
### Phase 1 — Agent minimal (headless)
|
|
||||||
- **Heartbeat** : capture ecran toutes les 5s, JPEG, dedup par hash perceptuel
|
|
||||||
- **Replay** : poll serveur, execute actions (click, type, key_combo, scroll, wait)
|
|
||||||
- **Resolution visuelle** : resolution de cibles via le serveur (template matching)
|
|
||||||
- **Serveur de capture** : port 5006 (GET /capture, GET /health, POST /file-action)
|
|
||||||
|
|
||||||
### Phase 3 — Systray + Notifications
|
|
||||||
- **Systray** : icone avec cercle colore (gris=idle, rouge=enregistrement, vert=connecte, bleu=replay)
|
|
||||||
- **Menu contextuel** : Machine ID, statut, Apprenez-moi, C'est termine, Mes taches, ARRET D'URGENCE, Chat, Fichiers, Quitter
|
|
||||||
- **Notifications toast** : via winrt-notification (bienvenue, session, replay, connexion)
|
|
||||||
- **Etat partage** : thread-safe via AtomicBool + Mutex
|
|
||||||
|
|
||||||
### Phase 4 — Chat WebView2
|
|
||||||
- **WebView2** : fenetre 520x720, charge http://{server}:5004/chat
|
|
||||||
- **Positionnement** : bas-droite pres du systray
|
|
||||||
- **Fallback** : HTML embarque si le serveur est indisponible
|
|
||||||
- **Toggle** : show/hide via menu systray
|
|
||||||
|
|
||||||
### Phase 5 — Parite complete
|
|
||||||
- **Enregistrement** : capture evenements souris/clavier via rdev, envoi au serveur
|
|
||||||
- **Floutage** : detection de champs de saisie + blur gaussien (protection donnees sensibles)
|
|
||||||
- **Configuration** : BLUR_SENSITIVE, LOG_RETENTION_DAYS, CHAT_PORT
|
|
||||||
- **Health check** : verification connexion serveur toutes les 30s
|
|
||||||
|
|
||||||
## Build
|
|
||||||
|
|
||||||
### Linux (pour tests)
|
|
||||||
|
|
||||||
```bash
|
|
||||||
sudo apt install libpipewire-0.3-dev libclang-dev libgbm-dev libxdo-dev
|
|
||||||
cargo build --release
|
|
||||||
```
|
|
||||||
|
|
||||||
### Cross-compilation vers Windows
|
|
||||||
|
|
||||||
```bash
|
|
||||||
rustup target add x86_64-pc-windows-gnu
|
|
||||||
sudo apt install gcc-mingw-w64-x86-64
|
|
||||||
cargo build --release --target x86_64-pc-windows-gnu
|
|
||||||
```
|
|
||||||
|
|
||||||
### Deploiement sur le PC cible
|
|
||||||
|
|
||||||
```bash
|
|
||||||
sshpass -p 'loli' scp -o StrictHostKeyChecking=no \
|
|
||||||
target/x86_64-pc-windows-gnu/release/rpa-agent.exe \
|
|
||||||
dom@192.168.1.11:"C:\\rpa_vision\\rpa-agent.exe"
|
|
||||||
```
|
|
||||||
|
|
||||||
## Configuration
|
|
||||||
|
|
||||||
| Variable | Defaut | Description |
|
|
||||||
|---|---|---|
|
|
||||||
| `RPA_SERVER_URL` | `http://localhost:5005/api/v1` | URL du serveur streaming |
|
|
||||||
| `RPA_MACHINE_ID` | `{hostname}_{os}` | Identifiant de la machine |
|
|
||||||
| `RPA_CAPTURE_PORT` | `5006` | Port du serveur de capture |
|
|
||||||
| `RPA_HEARTBEAT_INTERVAL` | `5` | Intervalle heartbeat (secondes) |
|
|
||||||
| `RPA_JPEG_QUALITY` | `85` | Qualite JPEG (1-100) |
|
|
||||||
| `RPA_BLUR_SENSITIVE` | `true` | Flouter les zones sensibles |
|
|
||||||
| `RPA_LOG_RETENTION_DAYS` | `180` | Retention des logs (jours) |
|
|
||||||
| `RPA_CHAT_PORT` | `5004` | Port du serveur de chat |
|
|
||||||
|
|
||||||
## Architecture
|
|
||||||
|
|
||||||
```
|
|
||||||
src/
|
|
||||||
├── main.rs — Orchestrateur, 7 threads (heartbeat, replay, serveur, health, recorder, chat, tray)
|
|
||||||
├── config.rs — Configuration (env vars + defauts)
|
|
||||||
├── state.rs — Etat partage thread-safe (AtomicBool, Mutex)
|
|
||||||
├── capture.rs — Capture ecran (xcap), JPEG, hash perceptuel
|
|
||||||
├── network.rs — Client HTTP (heartbeat, poll replay, rapport resultat)
|
|
||||||
├── replay.rs — Boucle de polling replay avec notifications
|
|
||||||
├── executor.rs — Execution actions (click, type, key_combo, scroll, wait)
|
|
||||||
├── visual.rs — Resolution visuelle des cibles via le serveur
|
|
||||||
├── server.rs — Mini serveur HTTP port 5006 (/capture, /health, /file-action)
|
|
||||||
├── tray.rs — Icone systray + menu contextuel (tray-icon, winit)
|
|
||||||
├── notifications.rs — Notifications toast Windows (winrt-notification)
|
|
||||||
├── chat.rs — Fenetre de chat WebView2 (wry)
|
|
||||||
├── recorder.rs — Capture evenements souris/clavier (rdev)
|
|
||||||
└── blur.rs — Floutage zones sensibles (detection + box blur)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Taille du binaire
|
|
||||||
|
|
||||||
| Configuration | Taille |
|
|
||||||
|---|---|
|
|
||||||
| Release (LTO + strip + opt-level z) | **2.4 Mo** |
|
|
||||||
| Python equivalent (venv + packages) | ~200 Mo |
|
|
||||||
|
|
||||||
## Compatibilite
|
|
||||||
|
|
||||||
- **OS** : Windows 10/11 (systray, notifications, chat WebView2)
|
|
||||||
- **Fallback Linux** : mode console (heartbeat, replay, serveur)
|
|
||||||
- **Serveur** : compatible api_stream.py (port 5005)
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# Build du kit démo pour Windows
|
|
||||||
set -e
|
|
||||||
|
|
||||||
echo "=== Build Léa pour Windows ==="
|
|
||||||
cargo build --release --target x86_64-pc-windows-gnu
|
|
||||||
|
|
||||||
# Préparer le dossier de démo
|
|
||||||
DEMO_DIR="demo_kit/Lea"
|
|
||||||
rm -rf demo_kit
|
|
||||||
mkdir -p "$DEMO_DIR"
|
|
||||||
|
|
||||||
# Copier les fichiers
|
|
||||||
cp target/x86_64-pc-windows-gnu/release/rpa-agent.exe "$DEMO_DIR/Lea.exe"
|
|
||||||
cp config.txt "$DEMO_DIR/config.txt"
|
|
||||||
cp LISEZMOI.txt "$DEMO_DIR/LISEZMOI.txt"
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo "=== Kit démo prêt dans demo_kit/Lea/ ==="
|
|
||||||
ls -lh "$DEMO_DIR/"
|
|
||||||
echo ""
|
|
||||||
echo "Copiez le dossier Lea/ sur le PC du docteur."
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
# === Configuration Léa ===
|
|
||||||
# Adresse du serveur (ne pas modifier sauf instruction)
|
|
||||||
RPA_SERVER_URL=https://lea.labs.laurinebazin.design/api/v1
|
|
||||||
|
|
||||||
# Clé d'accès (ne pas modifier)
|
|
||||||
RPA_API_TOKEN=86031addb338e449fccdb1a983f61807aec15d42d482b9c7748ad607dc23caab
|
|
||||||
|
|
||||||
# Qualité des captures (1-100, défaut: 85)
|
|
||||||
RPA_JPEG_QUALITY=85
|
|
||||||
|
|
||||||
# Floutage des données sensibles (true/false)
|
|
||||||
RPA_BLUR_SENSITIVE=true
|
|
||||||
@@ -1,340 +0,0 @@
|
|||||||
//! Floutage des zones sensibles dans les captures d'ecran.
|
|
||||||
//!
|
|
||||||
//! Detecte les champs de saisie (zones claires rectangulaires) et applique
|
|
||||||
//! un flou gaussien pour proteger les donnees sensibles (mots de passe, etc.).
|
|
||||||
//! Equivalent de agent_v1/vision/blur_sensitive.py.
|
|
||||||
//!
|
|
||||||
//! Algorithme :
|
|
||||||
//! 1. Conversion en niveaux de gris
|
|
||||||
//! 2. Seuillage binaire (detecter les zones claires = champs de saisie)
|
|
||||||
//! 3. Detection de contours rectangulaires > 50px de large
|
|
||||||
//! 4. Application d'un flou gaussien sur les zones detectees
|
|
||||||
//!
|
|
||||||
//! Utilise le crate image pour le traitement et imageproc pour le flou.
|
|
||||||
|
|
||||||
use image::{DynamicImage, GrayImage, Rgba, RgbaImage};
|
|
||||||
|
|
||||||
/// Seuil de luminosite pour detecter les champs de saisie (0-255).
|
|
||||||
/// Les zones plus claires que ce seuil sont considerees comme des champs.
|
|
||||||
const BRIGHTNESS_THRESHOLD: u8 = 220;
|
|
||||||
|
|
||||||
/// Largeur minimale d'un champ de saisie detecte (en pixels).
|
|
||||||
const MIN_FIELD_WIDTH: u32 = 50;
|
|
||||||
|
|
||||||
/// Hauteur minimale d'un champ de saisie detecte (en pixels).
|
|
||||||
const MIN_FIELD_HEIGHT: u32 = 15;
|
|
||||||
|
|
||||||
/// Hauteur maximale d'un champ de saisie (evite de flouter l'ecran entier).
|
|
||||||
const MAX_FIELD_HEIGHT: u32 = 80;
|
|
||||||
|
|
||||||
/// Largeur maximale d'un champ (evite les faux positifs sur grandes zones blanches).
|
|
||||||
const MAX_FIELD_WIDTH: u32 = 800;
|
|
||||||
|
|
||||||
/// Intensite du flou gaussien (sigma).
|
|
||||||
const BLUR_SIGMA: f32 = 10.0;
|
|
||||||
|
|
||||||
/// Rectangle representant une zone a flouter.
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub struct BlurRegion {
|
|
||||||
pub x: u32,
|
|
||||||
pub y: u32,
|
|
||||||
pub width: u32,
|
|
||||||
pub height: u32,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Detecte les champs de saisie dans une image et les floute.
|
|
||||||
///
|
|
||||||
/// Retourne l'image modifiee avec les zones sensibles floutees.
|
|
||||||
/// Si aucun champ n'est detecte, retourne l'image inchangee.
|
|
||||||
pub fn blur_sensitive_fields(img: &DynamicImage) -> DynamicImage {
|
|
||||||
let regions = detect_input_fields(img);
|
|
||||||
|
|
||||||
if regions.is_empty() {
|
|
||||||
return img.clone();
|
|
||||||
}
|
|
||||||
|
|
||||||
println!(
|
|
||||||
"[BLUR] {} zone(s) sensible(s) detectee(s) — floutage...",
|
|
||||||
regions.len()
|
|
||||||
);
|
|
||||||
|
|
||||||
let mut result = img.to_rgba8();
|
|
||||||
|
|
||||||
for region in ®ions {
|
|
||||||
blur_region(&mut result, region);
|
|
||||||
}
|
|
||||||
|
|
||||||
DynamicImage::ImageRgba8(result)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Detecte les champs de saisie (zones claires rectangulaires).
|
|
||||||
///
|
|
||||||
/// Algorithme simplifie :
|
|
||||||
/// 1. Convertir en niveaux de gris
|
|
||||||
/// 2. Seuillage binaire
|
|
||||||
/// 3. Scanner les lignes horizontales pour trouver les series de pixels clairs
|
|
||||||
/// 4. Regrouper les series adjacentes en rectangles
|
|
||||||
pub fn detect_input_fields(img: &DynamicImage) -> Vec<BlurRegion> {
|
|
||||||
let gray = img.to_luma8();
|
|
||||||
let (width, height) = gray.dimensions();
|
|
||||||
let mut regions = Vec::new();
|
|
||||||
|
|
||||||
// Creer une image binaire (seuillage)
|
|
||||||
let binary = threshold_image(&gray, BRIGHTNESS_THRESHOLD);
|
|
||||||
|
|
||||||
// Scanner par bandes horizontales pour detecter les champs
|
|
||||||
// On cherche des sequences continues de pixels blancs sur plusieurs lignes
|
|
||||||
let mut y = 0;
|
|
||||||
while y < height {
|
|
||||||
// Pour chaque ligne, trouver les segments horizontaux blancs
|
|
||||||
let segments = find_white_segments(&binary, y, width);
|
|
||||||
|
|
||||||
for (seg_start, seg_end) in &segments {
|
|
||||||
let seg_width = seg_end - seg_start;
|
|
||||||
if seg_width < MIN_FIELD_WIDTH || seg_width > MAX_FIELD_WIDTH {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verifier combien de lignes consecutives partagent ce segment
|
|
||||||
let field_height = count_vertical_extent(
|
|
||||||
&binary,
|
|
||||||
*seg_start,
|
|
||||||
*seg_end,
|
|
||||||
y,
|
|
||||||
height,
|
|
||||||
);
|
|
||||||
|
|
||||||
if field_height >= MIN_FIELD_HEIGHT && field_height <= MAX_FIELD_HEIGHT {
|
|
||||||
// Verifier que cette region ne chevauche pas une region existante
|
|
||||||
let new_region = BlurRegion {
|
|
||||||
x: *seg_start,
|
|
||||||
y,
|
|
||||||
width: seg_width,
|
|
||||||
height: field_height,
|
|
||||||
};
|
|
||||||
|
|
||||||
if !overlaps_existing(®ions, &new_region) {
|
|
||||||
regions.push(new_region);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Avancer de la hauteur du dernier champ detecte, ou de 1 ligne
|
|
||||||
y += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Deduplication : fusionner les regions tres proches
|
|
||||||
merge_close_regions(&mut regions);
|
|
||||||
|
|
||||||
regions
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Applique un seuillage binaire simple.
|
|
||||||
fn threshold_image(gray: &GrayImage, threshold: u8) -> GrayImage {
|
|
||||||
let (width, height) = gray.dimensions();
|
|
||||||
let mut binary = GrayImage::new(width, height);
|
|
||||||
|
|
||||||
for y in 0..height {
|
|
||||||
for x in 0..width {
|
|
||||||
let pixel = gray.get_pixel(x, y).0[0];
|
|
||||||
if pixel >= threshold {
|
|
||||||
binary.put_pixel(x, y, image::Luma([255]));
|
|
||||||
} else {
|
|
||||||
binary.put_pixel(x, y, image::Luma([0]));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
binary
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Trouve les segments horizontaux de pixels blancs sur une ligne.
|
|
||||||
fn find_white_segments(binary: &GrayImage, y: u32, width: u32) -> Vec<(u32, u32)> {
|
|
||||||
let mut segments = Vec::new();
|
|
||||||
let mut in_segment = false;
|
|
||||||
let mut seg_start = 0u32;
|
|
||||||
|
|
||||||
for x in 0..width {
|
|
||||||
let is_white = binary.get_pixel(x, y).0[0] > 128;
|
|
||||||
|
|
||||||
if is_white && !in_segment {
|
|
||||||
seg_start = x;
|
|
||||||
in_segment = true;
|
|
||||||
} else if !is_white && in_segment {
|
|
||||||
segments.push((seg_start, x));
|
|
||||||
in_segment = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if in_segment {
|
|
||||||
segments.push((seg_start, width));
|
|
||||||
}
|
|
||||||
|
|
||||||
segments
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Compte le nombre de lignes consecutives ou le segment est blanc.
|
|
||||||
fn count_vertical_extent(
|
|
||||||
binary: &GrayImage,
|
|
||||||
seg_start: u32,
|
|
||||||
seg_end: u32,
|
|
||||||
start_y: u32,
|
|
||||||
max_y: u32,
|
|
||||||
) -> u32 {
|
|
||||||
let mut count = 0u32;
|
|
||||||
let check_width = seg_end - seg_start;
|
|
||||||
let threshold = (check_width as f64 * 0.7) as u32; // 70% doivent etre blancs
|
|
||||||
|
|
||||||
for y in start_y..max_y.min(start_y + MAX_FIELD_HEIGHT + 5) {
|
|
||||||
let mut white_count = 0u32;
|
|
||||||
for x in seg_start..seg_end {
|
|
||||||
if binary.get_pixel(x, y).0[0] > 128 {
|
|
||||||
white_count += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if white_count >= threshold {
|
|
||||||
count += 1;
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
count
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Verifie si une region chevauche une region existante.
|
|
||||||
fn overlaps_existing(regions: &[BlurRegion], new_region: &BlurRegion) -> bool {
|
|
||||||
for region in regions {
|
|
||||||
let x_overlap = new_region.x < region.x + region.width
|
|
||||||
&& new_region.x + new_region.width > region.x;
|
|
||||||
let y_overlap = new_region.y < region.y + region.height
|
|
||||||
&& new_region.y + new_region.height > region.y;
|
|
||||||
|
|
||||||
if x_overlap && y_overlap {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
false
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Fusionne les regions tres proches (< 10px de distance).
|
|
||||||
fn merge_close_regions(regions: &mut Vec<BlurRegion>) {
|
|
||||||
if regions.len() < 2 {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Tri par position (y, puis x)
|
|
||||||
regions.sort_by(|a, b| a.y.cmp(&b.y).then(a.x.cmp(&b.x)));
|
|
||||||
|
|
||||||
let mut merged = Vec::new();
|
|
||||||
let mut current = regions[0].clone();
|
|
||||||
|
|
||||||
for region in regions.iter().skip(1) {
|
|
||||||
let x_close = (current.x + current.width + 10 >= region.x)
|
|
||||||
&& (region.x + region.width + 10 >= current.x);
|
|
||||||
let y_close = (current.y + current.height + 5 >= region.y)
|
|
||||||
&& (region.y + region.height + 5 >= current.y);
|
|
||||||
|
|
||||||
if x_close && y_close {
|
|
||||||
// Fusionner
|
|
||||||
let min_x = current.x.min(region.x);
|
|
||||||
let min_y = current.y.min(region.y);
|
|
||||||
let max_x = (current.x + current.width).max(region.x + region.width);
|
|
||||||
let max_y = (current.y + current.height).max(region.y + region.height);
|
|
||||||
|
|
||||||
current = BlurRegion {
|
|
||||||
x: min_x,
|
|
||||||
y: min_y,
|
|
||||||
width: max_x - min_x,
|
|
||||||
height: max_y - min_y,
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
merged.push(current);
|
|
||||||
current = region.clone();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
merged.push(current);
|
|
||||||
|
|
||||||
*regions = merged;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Applique un flou gaussien sur une region de l'image.
|
|
||||||
///
|
|
||||||
/// Implementation simplifiee : box blur avec plusieurs passes
|
|
||||||
/// (approximation du gaussien, plus rapide que le vrai gaussien).
|
|
||||||
fn blur_region(img: &mut RgbaImage, region: &BlurRegion) {
|
|
||||||
let (img_w, img_h) = img.dimensions();
|
|
||||||
|
|
||||||
// Borner la region aux dimensions de l'image
|
|
||||||
let x_start = region.x.min(img_w);
|
|
||||||
let y_start = region.y.min(img_h);
|
|
||||||
let x_end = (region.x + region.width).min(img_w);
|
|
||||||
let y_end = (region.y + region.height).min(img_h);
|
|
||||||
|
|
||||||
if x_start >= x_end || y_start >= y_end {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
let radius = BLUR_SIGMA as u32;
|
|
||||||
let kernel_size = (radius * 2 + 1) as i32;
|
|
||||||
let kernel_area = (kernel_size * kernel_size) as u32;
|
|
||||||
|
|
||||||
// Box blur : moyenne des pixels dans un carre de rayon `radius`
|
|
||||||
// On fait 3 passes pour approximer un flou gaussien
|
|
||||||
for _pass in 0..3 {
|
|
||||||
// Copier les pixels de la region dans un buffer temporaire
|
|
||||||
let reg_w = (x_end - x_start) as usize;
|
|
||||||
let reg_h = (y_end - y_start) as usize;
|
|
||||||
let mut buffer: Vec<[u8; 4]> = Vec::with_capacity(reg_w * reg_h);
|
|
||||||
|
|
||||||
for y in y_start..y_end {
|
|
||||||
for x in x_start..x_end {
|
|
||||||
buffer.push(img.get_pixel(x, y).0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Appliquer le box blur
|
|
||||||
for y in y_start..y_end {
|
|
||||||
for x in x_start..x_end {
|
|
||||||
let mut sum_r = 0u32;
|
|
||||||
let mut sum_g = 0u32;
|
|
||||||
let mut sum_b = 0u32;
|
|
||||||
let mut count = 0u32;
|
|
||||||
|
|
||||||
for ky in -(radius as i32)..=(radius as i32) {
|
|
||||||
for kx in -(radius as i32)..=(radius as i32) {
|
|
||||||
let sx = x as i32 + kx;
|
|
||||||
let sy = y as i32 + ky;
|
|
||||||
|
|
||||||
if sx >= x_start as i32
|
|
||||||
&& sx < x_end as i32
|
|
||||||
&& sy >= y_start as i32
|
|
||||||
&& sy < y_end as i32
|
|
||||||
{
|
|
||||||
let bx = (sx - x_start as i32) as usize;
|
|
||||||
let by = (sy - y_start as i32) as usize;
|
|
||||||
let pixel = buffer[by * reg_w + bx];
|
|
||||||
sum_r += pixel[0] as u32;
|
|
||||||
sum_g += pixel[1] as u32;
|
|
||||||
sum_b += pixel[2] as u32;
|
|
||||||
count += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if count > 0 {
|
|
||||||
let pixel = Rgba([
|
|
||||||
(sum_r / count) as u8,
|
|
||||||
(sum_g / count) as u8,
|
|
||||||
(sum_b / count) as u8,
|
|
||||||
255,
|
|
||||||
]);
|
|
||||||
img.put_pixel(x, y, pixel);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let _ = kernel_area; // suppress unused warning
|
|
||||||
}
|
|
||||||
@@ -1,115 +0,0 @@
|
|||||||
//! Capture d'écran via xcap.
|
|
||||||
//!
|
|
||||||
//! Fournit la capture du moniteur principal, l'encodage JPEG en base64,
|
|
||||||
//! et un hash perceptuel rapide pour la déduplication des heartbeats.
|
|
||||||
|
|
||||||
use base64::Engine;
|
|
||||||
use image::codecs::jpeg::JpegEncoder;
|
|
||||||
use image::DynamicImage;
|
|
||||||
use std::io::Cursor;
|
|
||||||
|
|
||||||
/// Capture le moniteur principal et retourne un DynamicImage.
|
|
||||||
///
|
|
||||||
/// Utilise xcap pour la capture cross-platform (DXGI sur Windows, X11/Wayland sur Linux).
|
|
||||||
pub fn capture_screenshot() -> Option<DynamicImage> {
|
|
||||||
let monitors = match xcap::Monitor::all() {
|
|
||||||
Ok(m) => m,
|
|
||||||
Err(e) => {
|
|
||||||
eprintln!("[CAPTURE] Erreur enumeration moniteurs : {}", e);
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let primary = monitors
|
|
||||||
.into_iter()
|
|
||||||
.find(|m| m.is_primary().unwrap_or(false));
|
|
||||||
let monitor = match primary {
|
|
||||||
Some(m) => m,
|
|
||||||
None => {
|
|
||||||
eprintln!("[CAPTURE] Aucun moniteur principal trouve");
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
match monitor.capture_image() {
|
|
||||||
Ok(rgba_image) => Some(DynamicImage::ImageRgba8(rgba_image)),
|
|
||||||
Err(e) => {
|
|
||||||
eprintln!("[CAPTURE] Erreur capture ecran : {}", e);
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Encode une image en JPEG et retourne le résultat en base64.
|
|
||||||
///
|
|
||||||
/// La qualité doit être entre 1 (mauvaise) et 100 (excellente).
|
|
||||||
/// 85 est un bon compromis taille/qualité pour le streaming réseau.
|
|
||||||
pub fn screenshot_to_jpeg_base64(img: &DynamicImage, quality: u8) -> String {
|
|
||||||
let rgb = img.to_rgb8();
|
|
||||||
let mut buffer = Cursor::new(Vec::new());
|
|
||||||
|
|
||||||
let mut encoder = JpegEncoder::new_with_quality(&mut buffer, quality);
|
|
||||||
if let Err(e) = encoder.encode(
|
|
||||||
rgb.as_raw(),
|
|
||||||
rgb.width(),
|
|
||||||
rgb.height(),
|
|
||||||
image::ExtendedColorType::Rgb8,
|
|
||||||
) {
|
|
||||||
eprintln!("[CAPTURE] Erreur encodage JPEG : {}", e);
|
|
||||||
return String::new();
|
|
||||||
}
|
|
||||||
|
|
||||||
base64::engine::general_purpose::STANDARD.encode(buffer.into_inner())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Encode une image en JPEG et retourne les bytes bruts.
|
|
||||||
pub fn screenshot_to_jpeg_bytes(img: &DynamicImage, quality: u8) -> Vec<u8> {
|
|
||||||
let rgb = img.to_rgb8();
|
|
||||||
let mut buffer = Cursor::new(Vec::new());
|
|
||||||
|
|
||||||
let mut encoder = JpegEncoder::new_with_quality(&mut buffer, quality);
|
|
||||||
if let Err(e) = encoder.encode(
|
|
||||||
rgb.as_raw(),
|
|
||||||
rgb.width(),
|
|
||||||
rgb.height(),
|
|
||||||
image::ExtendedColorType::Rgb8,
|
|
||||||
) {
|
|
||||||
eprintln!("[CAPTURE] Erreur encodage JPEG : {}", e);
|
|
||||||
return Vec::new();
|
|
||||||
}
|
|
||||||
|
|
||||||
buffer.into_inner()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Calcule un hash perceptuel rapide pour la déduplication.
|
|
||||||
///
|
|
||||||
/// Réduit l'image à 16x16 en niveaux de gris, puis calcule
|
|
||||||
/// un hash simple basé sur les pixels. Identique à la logique
|
|
||||||
/// Python (_quick_hash) dans agent_v1.
|
|
||||||
pub fn image_hash(img: &DynamicImage) -> u64 {
|
|
||||||
let small = img.resize_exact(16, 16, image::imageops::FilterType::Nearest);
|
|
||||||
let gray = small.to_luma8();
|
|
||||||
|
|
||||||
// Hash FNV-1a simple sur les pixels (rapide, pas besoin de crypto)
|
|
||||||
let mut hash: u64 = 0xcbf29ce484222325;
|
|
||||||
for pixel in gray.as_raw() {
|
|
||||||
hash ^= *pixel as u64;
|
|
||||||
hash = hash.wrapping_mul(0x100000001b3);
|
|
||||||
}
|
|
||||||
hash
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Retourne les dimensions du moniteur principal (largeur, hauteur).
|
|
||||||
///
|
|
||||||
/// xcap utilise DXGI sur Windows qui retourne toujours les pixels physiques,
|
|
||||||
/// independamment du DPI awareness. Ceci est coherent avec les coordonnees
|
|
||||||
/// physiques d'enigo quand le process est DPI-aware.
|
|
||||||
pub fn screen_dimensions() -> Option<(u32, u32)> {
|
|
||||||
let monitors = xcap::Monitor::all().ok()?;
|
|
||||||
let primary = monitors
|
|
||||||
.into_iter()
|
|
||||||
.find(|m| m.is_primary().unwrap_or(false))?;
|
|
||||||
let w = primary.width().ok()?;
|
|
||||||
let h = primary.height().ok()?;
|
|
||||||
Some((w, h))
|
|
||||||
}
|
|
||||||
@@ -1,123 +0,0 @@
|
|||||||
//! Chat Léa via Edge en mode app (--app=URL).
|
|
||||||
//!
|
|
||||||
//! Ouvre Edge sans barre d'adresse — rendu propre et professionnel.
|
|
||||||
//! Equivalent de agent_v1/ui/chat_window.py (approche Edge mode app).
|
|
||||||
|
|
||||||
use crate::config::Config;
|
|
||||||
use crate::state::AgentState;
|
|
||||||
use std::sync::Arc;
|
|
||||||
use std::process::Command;
|
|
||||||
|
|
||||||
/// URL du serveur de chat
|
|
||||||
fn chat_url(config: &Config) -> String {
|
|
||||||
config.chat_url()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Chemin de Edge sur Windows (via le registre ou chemins courants)
|
|
||||||
fn find_edge() -> Option<String> {
|
|
||||||
let paths = [
|
|
||||||
r"C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe",
|
|
||||||
r"C:\Program Files\Microsoft\Edge\Application\msedge.exe",
|
|
||||||
];
|
|
||||||
for p in &paths {
|
|
||||||
if std::path::Path::new(p).exists() {
|
|
||||||
return Some(p.to_string());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Essayer via le registre
|
|
||||||
#[cfg(target_os = "windows")]
|
|
||||||
{
|
|
||||||
use std::process::Command;
|
|
||||||
if let Ok(output) = Command::new("reg")
|
|
||||||
.args(&["query", r"HKLM\SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\msedge.exe", "/ve"])
|
|
||||||
.output()
|
|
||||||
{
|
|
||||||
let text = String::from_utf8_lossy(&output.stdout);
|
|
||||||
for line in text.lines() {
|
|
||||||
if line.contains("REG_SZ") {
|
|
||||||
if let Some(path) = line.split("REG_SZ").last() {
|
|
||||||
let path = path.trim();
|
|
||||||
if std::path::Path::new(path).exists() {
|
|
||||||
return Some(path.to_string());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Lance le chat dans un thread.
|
|
||||||
///
|
|
||||||
/// Attend que `state.chat_visible` passe à true, puis ouvre Edge en mode app.
|
|
||||||
/// Quand la fenêtre est fermée, remet `chat_visible` à false.
|
|
||||||
pub fn run_chat_thread(config: &Config, state: Arc<AgentState>) {
|
|
||||||
let url = chat_url(config);
|
|
||||||
let edge_path = find_edge();
|
|
||||||
|
|
||||||
if let Some(ref path) = edge_path {
|
|
||||||
println!("[CHAT] Edge trouvé : {}", path);
|
|
||||||
} else {
|
|
||||||
println!("[CHAT] Edge non trouvé — fallback navigateur par défaut");
|
|
||||||
}
|
|
||||||
|
|
||||||
loop {
|
|
||||||
// Attendre l'activation
|
|
||||||
while !state.chat_visible.load(std::sync::atomic::Ordering::Relaxed) {
|
|
||||||
if !state.is_running() {
|
|
||||||
println!("[CHAT] Arrêt du thread chat");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
std::thread::sleep(std::time::Duration::from_millis(200));
|
|
||||||
}
|
|
||||||
|
|
||||||
println!("[CHAT] Ouverture du chat...");
|
|
||||||
println!("[CHAT] URL : {}", url);
|
|
||||||
|
|
||||||
let result = if let Some(ref path) = edge_path {
|
|
||||||
// Edge en mode app — fenêtre propre sans barre d'adresse
|
|
||||||
Command::new(path)
|
|
||||||
.args(&[
|
|
||||||
&format!("--app={}", url),
|
|
||||||
"--window-size=600,800",
|
|
||||||
"--window-position=1300,200",
|
|
||||||
"--disable-extensions",
|
|
||||||
"--no-first-run",
|
|
||||||
])
|
|
||||||
.spawn()
|
|
||||||
} else {
|
|
||||||
// Fallback : ouvrir dans le navigateur par défaut
|
|
||||||
#[cfg(target_os = "windows")]
|
|
||||||
{
|
|
||||||
Command::new("cmd")
|
|
||||||
.args(&["/C", "start", &url])
|
|
||||||
.spawn()
|
|
||||||
}
|
|
||||||
#[cfg(not(target_os = "windows"))]
|
|
||||||
{
|
|
||||||
Command::new("xdg-open")
|
|
||||||
.arg(&url)
|
|
||||||
.spawn()
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
match result {
|
|
||||||
Ok(mut child) => {
|
|
||||||
println!("[CHAT] Fenêtre ouverte (PID: {:?})", child.id());
|
|
||||||
// Attendre que la fenêtre se ferme
|
|
||||||
let _ = child.wait();
|
|
||||||
println!("[CHAT] Fenêtre fermée");
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
println!("[CHAT] Erreur ouverture : {}", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Marquer comme invisible
|
|
||||||
state.chat_visible.store(false, std::sync::atomic::Ordering::Relaxed);
|
|
||||||
|
|
||||||
// Petit délai avant de pouvoir réouvrir
|
|
||||||
std::thread::sleep(std::time::Duration::from_millis(500));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,246 +0,0 @@
|
|||||||
//! Configuration de l'agent RPA.
|
|
||||||
//!
|
|
||||||
//! Parametres charges depuis les variables d'environnement ou valeurs par defaut.
|
|
||||||
//! Un fichier `config.txt` (clé=valeur) peut être placé à côté de l'exécutable.
|
|
||||||
//! Les variables d'environnement ont priorité sur le fichier.
|
|
||||||
//! Compatible avec la configuration Python (agent_v1/config.py).
|
|
||||||
|
|
||||||
use std::env;
|
|
||||||
use std::fs;
|
|
||||||
use std::path::PathBuf;
|
|
||||||
|
|
||||||
/// Version de l'agent Rust
|
|
||||||
pub const AGENT_VERSION: &str = "0.2.0-rust";
|
|
||||||
|
|
||||||
/// Configuration complete de l'agent
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub struct Config {
|
|
||||||
/// URL de base du serveur streaming (ex: http://192.168.1.10:5005/api/v1)
|
|
||||||
pub server_url: String,
|
|
||||||
|
|
||||||
/// Identifiant unique de la machine (hostname_os par defaut)
|
|
||||||
pub machine_id: String,
|
|
||||||
|
|
||||||
/// Port du mini-serveur HTTP de capture (defaut: 5006)
|
|
||||||
pub capture_port: u16,
|
|
||||||
|
|
||||||
/// Intervalle du heartbeat en secondes
|
|
||||||
pub heartbeat_interval_s: u64,
|
|
||||||
|
|
||||||
/// Intervalle de polling replay en secondes
|
|
||||||
pub replay_poll_interval_s: f64,
|
|
||||||
|
|
||||||
/// Qualite JPEG pour les screenshots envoyes (1-100)
|
|
||||||
pub jpeg_quality: u8,
|
|
||||||
|
|
||||||
/// Flouter les zones sensibles dans les captures (defaut: true)
|
|
||||||
pub blur_sensitive: bool,
|
|
||||||
|
|
||||||
/// Retention des logs en jours (Article 12, Reglement IA, defaut: 180)
|
|
||||||
pub log_retention_days: u32,
|
|
||||||
|
|
||||||
/// Port du serveur de chat (defaut: 5004)
|
|
||||||
pub chat_port: u16,
|
|
||||||
|
|
||||||
/// Token Bearer pour l'authentification API (defaut: vide = pas d'auth)
|
|
||||||
pub api_token: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Config {
|
|
||||||
/// Charge le fichier `config.txt` situé à côté de l'exécutable (ou dans le dossier courant).
|
|
||||||
///
|
|
||||||
/// Format : une ligne par clé, `CLÉ=VALEUR`. Les lignes vides et celles commençant
|
|
||||||
/// par `#` sont ignorées. Seules les clés **absentes** de l'environnement sont injectées
|
|
||||||
/// (les variables d'environnement ont toujours priorité).
|
|
||||||
fn load_config_file() {
|
|
||||||
// 1. Chercher config.txt à côté de l'exécutable
|
|
||||||
let mut config_path: Option<PathBuf> = None;
|
|
||||||
|
|
||||||
if let Ok(exe) = env::current_exe() {
|
|
||||||
let candidate = exe.parent().map(|p| p.join("config.txt"));
|
|
||||||
if let Some(ref p) = candidate {
|
|
||||||
if p.is_file() {
|
|
||||||
config_path = candidate;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 2. Fallback : dossier courant
|
|
||||||
if config_path.is_none() {
|
|
||||||
let cwd_candidate = PathBuf::from("config.txt");
|
|
||||||
if cwd_candidate.is_file() {
|
|
||||||
config_path = Some(cwd_candidate);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let path = match config_path {
|
|
||||||
Some(p) => p,
|
|
||||||
None => return, // Pas de fichier config — ce n'est pas une erreur
|
|
||||||
};
|
|
||||||
|
|
||||||
let content = match fs::read_to_string(&path) {
|
|
||||||
Ok(c) => c,
|
|
||||||
Err(e) => {
|
|
||||||
eprintln!("[config] Impossible de lire {} : {}", path.display(), e);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
eprintln!("[config] Chargement de {}", path.display());
|
|
||||||
|
|
||||||
for line in content.lines() {
|
|
||||||
let trimmed = line.trim();
|
|
||||||
|
|
||||||
// Ignorer les lignes vides et les commentaires
|
|
||||||
if trimmed.is_empty() || trimmed.starts_with('#') {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Séparer au premier '='
|
|
||||||
if let Some(eq_pos) = trimmed.find('=') {
|
|
||||||
let key = trimmed[..eq_pos].trim();
|
|
||||||
let value = trimmed[eq_pos + 1..].trim();
|
|
||||||
|
|
||||||
if key.is_empty() {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ne positionner que si la variable n'existe pas déjà
|
|
||||||
if env::var(key).is_err() {
|
|
||||||
// SAFETY: appelé une seule fois au démarrage, avant tout thread
|
|
||||||
unsafe {
|
|
||||||
env::set_var(key, value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Charge la configuration depuis les variables d'environnement.
|
|
||||||
///
|
|
||||||
/// Le fichier `config.txt` est lu en premier (voir [`load_config_file`]) ;
|
|
||||||
/// les variables d'environnement déjà définies ne sont pas écrasées.
|
|
||||||
///
|
|
||||||
/// Variables supportees :
|
|
||||||
/// - `RPA_SERVER_URL` : URL du serveur (defaut: http://localhost:5005/api/v1)
|
|
||||||
/// - `RPA_MACHINE_ID` : Identifiant machine (defaut: hostname_os)
|
|
||||||
/// - `RPA_CAPTURE_PORT` : Port du serveur de capture (defaut: 5006)
|
|
||||||
/// - `RPA_HEARTBEAT_INTERVAL` : Intervalle heartbeat en secondes (defaut: 5)
|
|
||||||
/// - `RPA_JPEG_QUALITY` : Qualite JPEG (defaut: 85)
|
|
||||||
/// - `RPA_BLUR_SENSITIVE` : Flouter les zones sensibles (defaut: true)
|
|
||||||
/// - `RPA_LOG_RETENTION_DAYS` : Retention des logs en jours (defaut: 180)
|
|
||||||
/// - `RPA_CHAT_PORT` : Port du serveur de chat (defaut: 5004)
|
|
||||||
/// - `RPA_API_TOKEN` : Token Bearer pour l'authentification (defaut: vide)
|
|
||||||
pub fn from_env() -> Self {
|
|
||||||
// Charger config.txt AVANT de lire les variables d'environnement
|
|
||||||
Self::load_config_file();
|
|
||||||
let machine_id = env::var("RPA_MACHINE_ID").unwrap_or_else(|_| {
|
|
||||||
let host = hostname::get()
|
|
||||||
.map(|h| h.to_string_lossy().to_string())
|
|
||||||
.unwrap_or_else(|_| "unknown".to_string());
|
|
||||||
let os_name = if cfg!(target_os = "windows") {
|
|
||||||
"windows"
|
|
||||||
} else if cfg!(target_os = "linux") {
|
|
||||||
"linux"
|
|
||||||
} else {
|
|
||||||
"unknown"
|
|
||||||
};
|
|
||||||
format!("{}_{}", host, os_name)
|
|
||||||
});
|
|
||||||
|
|
||||||
let server_url = env::var("RPA_SERVER_URL")
|
|
||||||
.unwrap_or_else(|_| "http://localhost:5005/api/v1".to_string());
|
|
||||||
|
|
||||||
let capture_port = env::var("RPA_CAPTURE_PORT")
|
|
||||||
.ok()
|
|
||||||
.and_then(|v| v.parse().ok())
|
|
||||||
.unwrap_or(5006);
|
|
||||||
|
|
||||||
let heartbeat_interval_s = env::var("RPA_HEARTBEAT_INTERVAL")
|
|
||||||
.ok()
|
|
||||||
.and_then(|v| v.parse().ok())
|
|
||||||
.unwrap_or(5);
|
|
||||||
|
|
||||||
let jpeg_quality = env::var("RPA_JPEG_QUALITY")
|
|
||||||
.ok()
|
|
||||||
.and_then(|v| v.parse().ok())
|
|
||||||
.unwrap_or(85);
|
|
||||||
|
|
||||||
let blur_sensitive = env::var("RPA_BLUR_SENSITIVE")
|
|
||||||
.map(|v| v != "0" && v.to_lowercase() != "false")
|
|
||||||
.unwrap_or(true);
|
|
||||||
|
|
||||||
let log_retention_days = env::var("RPA_LOG_RETENTION_DAYS")
|
|
||||||
.ok()
|
|
||||||
.and_then(|v| v.parse().ok())
|
|
||||||
.unwrap_or(180);
|
|
||||||
|
|
||||||
let chat_port = env::var("RPA_CHAT_PORT")
|
|
||||||
.ok()
|
|
||||||
.and_then(|v| v.parse().ok())
|
|
||||||
.unwrap_or(5004);
|
|
||||||
|
|
||||||
let api_token = env::var("RPA_API_TOKEN").unwrap_or_default();
|
|
||||||
|
|
||||||
Config {
|
|
||||||
server_url,
|
|
||||||
machine_id,
|
|
||||||
capture_port,
|
|
||||||
heartbeat_interval_s,
|
|
||||||
replay_poll_interval_s: 1.0,
|
|
||||||
jpeg_quality,
|
|
||||||
blur_sensitive,
|
|
||||||
log_retention_days,
|
|
||||||
chat_port,
|
|
||||||
api_token,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// URL de base pour le streaming (ex: http://...:5005/api/v1/traces/stream)
|
|
||||||
pub fn streaming_url(&self) -> String {
|
|
||||||
format!("{}/traces/stream", self.server_url)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Session ID pour le heartbeat permanent (sans session active)
|
|
||||||
pub fn bg_session_id(&self) -> String {
|
|
||||||
format!("bg_{}", self.machine_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Session ID pour le polling replay (sans session active)
|
|
||||||
pub fn agent_session_id(&self) -> String {
|
|
||||||
format!("agent_{}", self.machine_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// URL du serveur de chat.
|
|
||||||
pub fn chat_url(&self) -> String {
|
|
||||||
// Extraire le host du server_url
|
|
||||||
let base = &self.server_url;
|
|
||||||
if let Some(host_start) = base.find("://") {
|
|
||||||
let after_scheme = &base[host_start + 3..];
|
|
||||||
if let Some(colon_pos) = after_scheme.find(':') {
|
|
||||||
let host = &after_scheme[..colon_pos];
|
|
||||||
return format!(
|
|
||||||
"http://{}:{}/?machine_id={}",
|
|
||||||
host, self.chat_port, self.machine_id
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
format!(
|
|
||||||
"http://localhost:{}/?machine_id={}",
|
|
||||||
self.chat_port, self.machine_id
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for Config {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
write!(
|
|
||||||
f,
|
|
||||||
"Config {{ server: {}, machine: {}, capture_port: {}, heartbeat: {}s, jpeg_q: {}, blur: {}, log_retention: {}j, chat_port: {}, auth: {} }}",
|
|
||||||
self.server_url, self.machine_id, self.capture_port,
|
|
||||||
self.heartbeat_interval_s, self.jpeg_quality,
|
|
||||||
self.blur_sensitive, self.log_retention_days, self.chat_port,
|
|
||||||
if self.api_token.is_empty() { "none" } else { "Bearer" },
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,384 +0,0 @@
|
|||||||
//! Exécuteur d'actions pour le replay.
|
|
||||||
//!
|
|
||||||
//! Simule les clics souris, la saisie de texte, les combos clavier et les attentes.
|
|
||||||
//! Utilise enigo pour la simulation, compatible Windows et Linux.
|
|
||||||
//! Reproduit le comportement de agent_v1/core/executor.py.
|
|
||||||
|
|
||||||
use crate::config::Config;
|
|
||||||
use crate::network::{Action, ActionResult};
|
|
||||||
use crate::visual;
|
|
||||||
use enigo::{
|
|
||||||
Coordinate, Direction, Enigo, Key, Keyboard, Mouse, Settings,
|
|
||||||
};
|
|
||||||
use std::thread;
|
|
||||||
use std::time::Duration;
|
|
||||||
|
|
||||||
/// Exécute une action de replay et retourne le résultat.
|
|
||||||
///
|
|
||||||
/// Dispatche vers le bon handler selon le type d'action.
|
|
||||||
/// Les coordonnées x_pct/y_pct (0.0-1.0) sont converties en pixels
|
|
||||||
/// à partir des dimensions de l'écran.
|
|
||||||
/// Si visual_mode est activé, résout d'abord la cible via le serveur.
|
|
||||||
pub fn execute_action(
|
|
||||||
action: &Action,
|
|
||||||
screen_width: u32,
|
|
||||||
screen_height: u32,
|
|
||||||
config: &Config,
|
|
||||||
) -> ActionResult {
|
|
||||||
match action.action_type.as_str() {
|
|
||||||
"click" => execute_click(action, screen_width, screen_height, config),
|
|
||||||
"type" => execute_type(action, screen_width, screen_height, config),
|
|
||||||
"key_combo" => execute_key_combo(action),
|
|
||||||
"scroll" => execute_scroll(action, screen_width, screen_height),
|
|
||||||
"wait" => execute_wait(action),
|
|
||||||
_ => ActionResult::error(
|
|
||||||
&action.action_id,
|
|
||||||
&format!("Type d'action inconnu : {}", action.action_type),
|
|
||||||
),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Résout les coordonnées visuellement si visual_mode est activé.
|
|
||||||
///
|
|
||||||
/// Si la résolution échoue, retourne les coordonnées de fallback (blind).
|
|
||||||
/// Si visual_mode est désactivé ou target_spec absent, retourne les coordonnées originales.
|
|
||||||
fn resolve_coordinates(
|
|
||||||
action: &Action,
|
|
||||||
screen_width: u32,
|
|
||||||
screen_height: u32,
|
|
||||||
config: &Config,
|
|
||||||
) -> (f64, f64) {
|
|
||||||
let mut x_pct = action.x_pct;
|
|
||||||
let mut y_pct = action.y_pct;
|
|
||||||
|
|
||||||
if action.visual_mode && !action.target_spec.is_null() {
|
|
||||||
println!(
|
|
||||||
" [VISUAL] Mode visuel active — resolution de la cible..."
|
|
||||||
);
|
|
||||||
match visual::resolve_target_visual(
|
|
||||||
config,
|
|
||||||
&action.target_spec,
|
|
||||||
x_pct,
|
|
||||||
y_pct,
|
|
||||||
screen_width,
|
|
||||||
screen_height,
|
|
||||||
) {
|
|
||||||
Some((rx, ry)) => {
|
|
||||||
println!(" [VISUAL] Resolu : ({:.4}, {:.4})", rx, ry);
|
|
||||||
x_pct = rx;
|
|
||||||
y_pct = ry;
|
|
||||||
}
|
|
||||||
None => {
|
|
||||||
println!(
|
|
||||||
" [VISUAL] Echec — fallback coordonnees aveugles ({:.4}, {:.4})",
|
|
||||||
x_pct, y_pct
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
(x_pct, y_pct)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Exécute un clic souris aux coordonnées normalisées.
|
|
||||||
/// Résout visuellement la cible si visual_mode est activé.
|
|
||||||
fn execute_click(action: &Action, screen_width: u32, screen_height: u32, config: &Config) -> ActionResult {
|
|
||||||
let (x_pct, y_pct) = resolve_coordinates(action, screen_width, screen_height, config);
|
|
||||||
let real_x = (x_pct * screen_width as f64) as i32;
|
|
||||||
let real_y = (y_pct * screen_height as f64) as i32;
|
|
||||||
|
|
||||||
println!(
|
|
||||||
" [CLICK] ({:.4}, {:.4}) -> ({}, {}) sur ({}x{}), bouton={}{}",
|
|
||||||
x_pct, y_pct, real_x, real_y, screen_width, screen_height, action.button,
|
|
||||||
if action.visual_mode { " [VISUAL]" } else { "" }
|
|
||||||
);
|
|
||||||
|
|
||||||
let mut enigo = match Enigo::new(&Settings::default()) {
|
|
||||||
Ok(e) => e,
|
|
||||||
Err(e) => {
|
|
||||||
return ActionResult::error(
|
|
||||||
&action.action_id,
|
|
||||||
&format!("Impossible d'initialiser enigo : {}", e),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Déplacer la souris
|
|
||||||
if let Err(e) = enigo.move_mouse(real_x, real_y, Coordinate::Abs) {
|
|
||||||
return ActionResult::error(
|
|
||||||
&action.action_id,
|
|
||||||
&format!("Erreur deplacement souris : {}", e),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Petit délai pour simuler le temps de réaction humain
|
|
||||||
thread::sleep(Duration::from_millis(100));
|
|
||||||
|
|
||||||
// Cliquer selon le bouton demandé
|
|
||||||
let button = match action.button.as_str() {
|
|
||||||
"right" => enigo::Button::Right,
|
|
||||||
"middle" => enigo::Button::Middle,
|
|
||||||
_ => enigo::Button::Left,
|
|
||||||
};
|
|
||||||
|
|
||||||
if action.button == "double" {
|
|
||||||
// Double-clic gauche
|
|
||||||
if let Err(e) = enigo.button(enigo::Button::Left, Direction::Click) {
|
|
||||||
return ActionResult::error(&action.action_id, &format!("Erreur clic : {}", e));
|
|
||||||
}
|
|
||||||
thread::sleep(Duration::from_millis(50));
|
|
||||||
if let Err(e) = enigo.button(enigo::Button::Left, Direction::Click) {
|
|
||||||
return ActionResult::error(&action.action_id, &format!("Erreur double-clic : {}", e));
|
|
||||||
}
|
|
||||||
} else if let Err(e) = enigo.button(button, Direction::Click) {
|
|
||||||
return ActionResult::error(&action.action_id, &format!("Erreur clic : {}", e));
|
|
||||||
}
|
|
||||||
|
|
||||||
println!(" [CLICK] Termine.");
|
|
||||||
ActionResult::ok(&action.action_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Exécute une saisie de texte.
|
|
||||||
///
|
|
||||||
/// Si des coordonnées sont fournies (x_pct > 0), clique d'abord
|
|
||||||
/// sur le champ avant de taper (comme en Python).
|
|
||||||
fn execute_type(action: &Action, screen_width: u32, screen_height: u32, config: &Config) -> ActionResult {
|
|
||||||
let text = &action.text;
|
|
||||||
println!(
|
|
||||||
" [TYPE] Texte: '{}' ({} chars)",
|
|
||||||
if text.len() > 50 { &text[..50] } else { text },
|
|
||||||
text.len()
|
|
||||||
);
|
|
||||||
|
|
||||||
// Résoudre visuellement les coordonnées si visual_mode est activé
|
|
||||||
let (x_pct, y_pct) = resolve_coordinates(action, screen_width, screen_height, config);
|
|
||||||
|
|
||||||
let mut enigo = match Enigo::new(&Settings::default()) {
|
|
||||||
Ok(e) => e,
|
|
||||||
Err(e) => {
|
|
||||||
return ActionResult::error(
|
|
||||||
&action.action_id,
|
|
||||||
&format!("Impossible d'initialiser enigo : {}", e),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Clic préalable sur le champ si coordonnées disponibles
|
|
||||||
if x_pct > 0.0 && y_pct > 0.0 {
|
|
||||||
let real_x = (x_pct * screen_width as f64) as i32;
|
|
||||||
let real_y = (y_pct * screen_height as f64) as i32;
|
|
||||||
println!(" [TYPE] Clic prealable sur ({}, {}){}", real_x, real_y,
|
|
||||||
if action.visual_mode { " [VISUAL]" } else { "" });
|
|
||||||
|
|
||||||
if let Err(e) = enigo.move_mouse(real_x, real_y, Coordinate::Abs) {
|
|
||||||
eprintln!(" [TYPE] Erreur deplacement souris : {}", e);
|
|
||||||
}
|
|
||||||
thread::sleep(Duration::from_millis(100));
|
|
||||||
if let Err(e) = enigo.button(enigo::Button::Left, Direction::Click) {
|
|
||||||
eprintln!(" [TYPE] Erreur clic : {}", e);
|
|
||||||
}
|
|
||||||
thread::sleep(Duration::from_millis(300));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Saisir le texte
|
|
||||||
if let Err(e) = enigo.text(text) {
|
|
||||||
return ActionResult::error(
|
|
||||||
&action.action_id,
|
|
||||||
&format!("Erreur saisie texte : {}", e),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
println!(" [TYPE] Termine.");
|
|
||||||
ActionResult::ok(&action.action_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Exécute une combinaison de touches.
|
|
||||||
///
|
|
||||||
/// Ex: ["ctrl", "a"] -> maintenir Ctrl, appuyer A, relâcher Ctrl
|
|
||||||
/// Ex: ["enter"] -> appuyer Enter
|
|
||||||
fn execute_key_combo(action: &Action) -> ActionResult {
|
|
||||||
let keys = &action.keys;
|
|
||||||
println!(" [KEY_COMBO] Touches: {:?}", keys);
|
|
||||||
|
|
||||||
if keys.is_empty() {
|
|
||||||
return ActionResult::error(&action.action_id, "Aucune touche specifiee");
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut enigo = match Enigo::new(&Settings::default()) {
|
|
||||||
Ok(e) => e,
|
|
||||||
Err(e) => {
|
|
||||||
return ActionResult::error(
|
|
||||||
&action.action_id,
|
|
||||||
&format!("Impossible d'initialiser enigo : {}", e),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Résoudre les noms de touches
|
|
||||||
let resolved: Vec<Key> = keys
|
|
||||||
.iter()
|
|
||||||
.filter_map(|name| resolve_key(name))
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
if resolved.is_empty() {
|
|
||||||
return ActionResult::error(
|
|
||||||
&action.action_id,
|
|
||||||
&format!("Aucune touche reconnue dans {:?}", keys),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
if resolved.len() == 1 {
|
|
||||||
// Une seule touche : simple press/release
|
|
||||||
if let Err(e) = enigo.key(resolved[0], Direction::Click) {
|
|
||||||
return ActionResult::error(&action.action_id, &format!("Erreur touche : {}", e));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Combo : maintenir les modifieurs, taper la dernière touche, relâcher
|
|
||||||
let (modifiers, last) = resolved.split_at(resolved.len() - 1);
|
|
||||||
|
|
||||||
for modifier in modifiers {
|
|
||||||
if let Err(e) = enigo.key(*modifier, Direction::Press) {
|
|
||||||
return ActionResult::error(
|
|
||||||
&action.action_id,
|
|
||||||
&format!("Erreur modifier press : {}", e),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
thread::sleep(Duration::from_millis(50));
|
|
||||||
|
|
||||||
if let Err(e) = enigo.key(last[0], Direction::Click) {
|
|
||||||
// Toujours relâcher les modifieurs même en cas d'erreur
|
|
||||||
for modifier in modifiers.iter().rev() {
|
|
||||||
let _ = enigo.key(*modifier, Direction::Release);
|
|
||||||
}
|
|
||||||
return ActionResult::error(
|
|
||||||
&action.action_id,
|
|
||||||
&format!("Erreur touche finale : {}", e),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
for modifier in modifiers.iter().rev() {
|
|
||||||
if let Err(e) = enigo.key(*modifier, Direction::Release) {
|
|
||||||
eprintln!(" [KEY_COMBO] Erreur release modifier : {}", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
println!(" [KEY_COMBO] Termine.");
|
|
||||||
ActionResult::ok(&action.action_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Exécute un scroll de souris.
|
|
||||||
fn execute_scroll(action: &Action, screen_width: u32, screen_height: u32) -> ActionResult {
|
|
||||||
let real_x = if action.x_pct > 0.0 {
|
|
||||||
(action.x_pct * screen_width as f64) as i32
|
|
||||||
} else {
|
|
||||||
(0.5 * screen_width as f64) as i32
|
|
||||||
};
|
|
||||||
let real_y = if action.y_pct > 0.0 {
|
|
||||||
(action.y_pct * screen_height as f64) as i32
|
|
||||||
} else {
|
|
||||||
(0.5 * screen_height as f64) as i32
|
|
||||||
};
|
|
||||||
|
|
||||||
let delta = action.delta;
|
|
||||||
println!(" [SCROLL] delta={} a ({}, {})", delta, real_x, real_y);
|
|
||||||
|
|
||||||
let mut enigo = match Enigo::new(&Settings::default()) {
|
|
||||||
Ok(e) => e,
|
|
||||||
Err(e) => {
|
|
||||||
return ActionResult::error(
|
|
||||||
&action.action_id,
|
|
||||||
&format!("Impossible d'initialiser enigo : {}", e),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
if let Err(e) = enigo.move_mouse(real_x, real_y, Coordinate::Abs) {
|
|
||||||
return ActionResult::error(
|
|
||||||
&action.action_id,
|
|
||||||
&format!("Erreur deplacement souris : {}", e),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
thread::sleep(Duration::from_millis(50));
|
|
||||||
|
|
||||||
if let Err(e) = enigo.scroll(delta, enigo::Axis::Vertical) {
|
|
||||||
return ActionResult::error(
|
|
||||||
&action.action_id,
|
|
||||||
&format!("Erreur scroll : {}", e),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
println!(" [SCROLL] Termine.");
|
|
||||||
ActionResult::ok(&action.action_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Exécute une attente (pause).
|
|
||||||
fn execute_wait(action: &Action) -> ActionResult {
|
|
||||||
let duration_ms = action.duration_ms;
|
|
||||||
println!(" [WAIT] {}ms...", duration_ms);
|
|
||||||
thread::sleep(Duration::from_millis(duration_ms));
|
|
||||||
println!(" [WAIT] Termine.");
|
|
||||||
ActionResult::ok(&action.action_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Résout un nom de touche (string) vers un enigo::Key.
|
|
||||||
///
|
|
||||||
/// Mapping compatible avec le Python executor (_SPECIAL_KEYS).
|
|
||||||
fn resolve_key(name: &str) -> Option<Key> {
|
|
||||||
match name.to_lowercase().as_str() {
|
|
||||||
// Touches de contrôle
|
|
||||||
"enter" | "return" => Some(Key::Return),
|
|
||||||
"tab" => Some(Key::Tab),
|
|
||||||
"escape" | "esc" => Some(Key::Escape),
|
|
||||||
"backspace" => Some(Key::Backspace),
|
|
||||||
"delete" => Some(Key::Delete),
|
|
||||||
"space" => Some(Key::Space),
|
|
||||||
|
|
||||||
// Touches de navigation
|
|
||||||
"up" => Some(Key::UpArrow),
|
|
||||||
"down" => Some(Key::DownArrow),
|
|
||||||
"left" => Some(Key::LeftArrow),
|
|
||||||
"right" => Some(Key::RightArrow),
|
|
||||||
"home" => Some(Key::Home),
|
|
||||||
"end" => Some(Key::End),
|
|
||||||
"page_up" | "pageup" => Some(Key::PageUp),
|
|
||||||
"page_down" | "pagedown" => Some(Key::PageDown),
|
|
||||||
|
|
||||||
// Touches de fonction
|
|
||||||
"f1" => Some(Key::F1),
|
|
||||||
"f2" => Some(Key::F2),
|
|
||||||
"f3" => Some(Key::F3),
|
|
||||||
"f4" => Some(Key::F4),
|
|
||||||
"f5" => Some(Key::F5),
|
|
||||||
"f6" => Some(Key::F6),
|
|
||||||
"f7" => Some(Key::F7),
|
|
||||||
"f8" => Some(Key::F8),
|
|
||||||
"f9" => Some(Key::F9),
|
|
||||||
"f10" => Some(Key::F10),
|
|
||||||
"f11" => Some(Key::F11),
|
|
||||||
"f12" => Some(Key::F12),
|
|
||||||
|
|
||||||
// Modifieurs
|
|
||||||
"ctrl" | "ctrl_l" | "ctrl_r" | "control" => Some(Key::Control),
|
|
||||||
"alt" | "alt_l" | "alt_r" => Some(Key::Alt),
|
|
||||||
"shift" | "shift_l" | "shift_r" => Some(Key::Shift),
|
|
||||||
"cmd" | "win" | "super" | "super_l" | "super_r" | "windows" | "meta" => Some(Key::Meta),
|
|
||||||
|
|
||||||
// Touches spéciales
|
|
||||||
"insert" => Some(Key::Other(0x2D)), // VK_INSERT
|
|
||||||
"caps_lock" | "capslock" => Some(Key::CapsLock),
|
|
||||||
|
|
||||||
// Caractère unique -> Unicode
|
|
||||||
s if s.len() == 1 => {
|
|
||||||
let c = s.chars().next().unwrap();
|
|
||||||
Some(Key::Unicode(c))
|
|
||||||
}
|
|
||||||
|
|
||||||
_ => {
|
|
||||||
eprintln!(" [KEY_COMBO] Touche inconnue : '{}', ignoree", name);
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,430 +0,0 @@
|
|||||||
//! Agent RPA Vision — Phases 1-5 (parite complete)
|
|
||||||
//!
|
|
||||||
//! Point d'entree principal. Architecture multi-threads :
|
|
||||||
//!
|
|
||||||
//! - Thread principal : boucle d'evenements systray (Windows) ou attente console (Linux)
|
|
||||||
//! - Thread heartbeat : capture + envoi toutes les 5s (avec dedup par hash)
|
|
||||||
//! - Thread replay : poll toutes les 1s, execute les actions
|
|
||||||
//! - Thread serveur : HTTP port 5006 pour les captures a la demande
|
|
||||||
//! - Thread recorder : capture evenements souris/clavier (quand enregistrement actif)
|
|
||||||
//! - Thread chat : fenetre WebView2 (Windows, a la demande)
|
|
||||||
//! - Thread health : verification connexion serveur (toutes les 30s)
|
|
||||||
//!
|
|
||||||
//! Le thread principal gere le systray sur Windows via winit.
|
|
||||||
//! Sur Linux, le thread principal attend Ctrl+C (mode console).
|
|
||||||
//!
|
|
||||||
//! Configuration via variables d'environnement ou valeurs par defaut.
|
|
||||||
//! Compatible avec le serveur streaming existant (api_stream.py, port 5005).
|
|
||||||
|
|
||||||
#[allow(dead_code)]
|
|
||||||
mod blur;
|
|
||||||
mod capture;
|
|
||||||
mod chat;
|
|
||||||
mod config;
|
|
||||||
mod executor;
|
|
||||||
mod network;
|
|
||||||
#[allow(dead_code)]
|
|
||||||
mod notifications;
|
|
||||||
mod recorder;
|
|
||||||
mod replay;
|
|
||||||
mod server;
|
|
||||||
#[allow(dead_code)]
|
|
||||||
mod state;
|
|
||||||
mod sysinfo;
|
|
||||||
mod tray;
|
|
||||||
mod visual;
|
|
||||||
|
|
||||||
use config::Config;
|
|
||||||
use reqwest::blocking::Client;
|
|
||||||
use state::AgentState;
|
|
||||||
use std::sync::Arc;
|
|
||||||
use std::thread;
|
|
||||||
use std::time::Duration;
|
|
||||||
|
|
||||||
/// Trouve un navigateur compatible sur Windows (Edge, Chrome, Brave, Firefox)
|
|
||||||
#[cfg(target_os = "windows")]
|
|
||||||
fn find_browser() -> Option<String> {
|
|
||||||
let paths = [
|
|
||||||
// Edge
|
|
||||||
r"C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe",
|
|
||||||
r"C:\Program Files\Microsoft\Edge\Application\msedge.exe",
|
|
||||||
// Chrome
|
|
||||||
r"C:\Program Files\Google\Chrome\Application\chrome.exe",
|
|
||||||
r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe",
|
|
||||||
// Brave
|
|
||||||
r"C:\Program Files\BraveSoftware\Brave-Browser\Application\brave.exe",
|
|
||||||
// Firefox (supporte --kiosk mais pas --app)
|
|
||||||
r"C:\Program Files\Mozilla Firefox\firefox.exe",
|
|
||||||
];
|
|
||||||
for p in &paths {
|
|
||||||
if std::path::Path::new(p).exists() {
|
|
||||||
return Some(p.to_string());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None
|
|
||||||
}
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
// --- DPI awareness (DOIT etre appele avant toute operation graphique) ---
|
|
||||||
// Rend le process DPI-aware sur Windows pour que les API (enigo, xcap,
|
|
||||||
// GetSystemMetrics, etc.) travaillent en coordonnees physiques (pixels reels)
|
|
||||||
// au lieu de coordonnees logiques (virtualisees par le DPI scaling).
|
|
||||||
// Sans cet appel, un ecran 2560x1600 a 150% DPI apparait comme 1707x1067
|
|
||||||
// pour enigo et GetSystemMetrics, ce qui cause des erreurs de positionnement
|
|
||||||
// pendant le replay.
|
|
||||||
// PROCESS_PER_MONITOR_DPI_AWARE = 2 : le niveau le plus precis.
|
|
||||||
#[cfg(target_os = "windows")]
|
|
||||||
{
|
|
||||||
// SetProcessDpiAwareness (shcore.dll) et SetProcessDPIAware (user32.dll)
|
|
||||||
// ne sont pas toujours exposes par windows-sys selon les features.
|
|
||||||
// On utilise des appels FFI raw pour eviter d'ajouter des features.
|
|
||||||
#[link(name = "shcore")]
|
|
||||||
extern "system" {
|
|
||||||
fn SetProcessDpiAwareness(value: i32) -> i32;
|
|
||||||
}
|
|
||||||
#[link(name = "user32")]
|
|
||||||
extern "system" {
|
|
||||||
fn SetProcessDPIAware() -> i32;
|
|
||||||
}
|
|
||||||
unsafe {
|
|
||||||
// Tenter SetProcessDpiAwareness(2) = PROCESS_PER_MONITOR_DPI_AWARE
|
|
||||||
let hr = SetProcessDpiAwareness(2);
|
|
||||||
if hr != 0 {
|
|
||||||
// Fallback pour Windows < 8.1 : SetProcessDPIAware()
|
|
||||||
SetProcessDPIAware();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialiser le logging
|
|
||||||
env_logger::Builder::from_env(
|
|
||||||
env_logger::Env::default().default_filter_or("info"),
|
|
||||||
)
|
|
||||||
.format_timestamp_secs()
|
|
||||||
.init();
|
|
||||||
|
|
||||||
let config = Config::from_env();
|
|
||||||
let config = Arc::new(config);
|
|
||||||
|
|
||||||
// Etat partage thread-safe
|
|
||||||
let state = AgentState::new();
|
|
||||||
|
|
||||||
// Banniere de demarrage
|
|
||||||
print_banner(&config);
|
|
||||||
|
|
||||||
// Handler Ctrl+C pour arret propre
|
|
||||||
install_ctrlc_handler(state.clone());
|
|
||||||
|
|
||||||
// Verifier que la capture d'ecran fonctionne
|
|
||||||
print!("[MAIN] Test de capture d'ecran... ");
|
|
||||||
match capture::screen_dimensions() {
|
|
||||||
Some((w, h)) => println!("OK ({}x{})", w, h),
|
|
||||||
None => {
|
|
||||||
println!("ECHEC");
|
|
||||||
eprintln!("[MAIN] ATTENTION : Capture d'ecran non disponible.");
|
|
||||||
eprintln!("[MAIN] Sur Linux sans display, les heartbeats seront desactives.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Thread 1 : Heartbeat loop
|
|
||||||
let hb_config = config.clone();
|
|
||||||
let hb_state = state.clone();
|
|
||||||
let _heartbeat_thread = thread::Builder::new()
|
|
||||||
.name("heartbeat".to_string())
|
|
||||||
.spawn(move || {
|
|
||||||
heartbeat_loop(&hb_config, &hb_state);
|
|
||||||
})
|
|
||||||
.expect("Impossible de demarrer le thread heartbeat");
|
|
||||||
|
|
||||||
// Thread 2 : Replay poll loop
|
|
||||||
let rp_config = config.clone();
|
|
||||||
let rp_state = state.clone();
|
|
||||||
let _replay_thread = thread::Builder::new()
|
|
||||||
.name("replay".to_string())
|
|
||||||
.spawn(move || {
|
|
||||||
replay::replay_poll_loop(&rp_config, &rp_state);
|
|
||||||
})
|
|
||||||
.expect("Impossible de demarrer le thread replay");
|
|
||||||
|
|
||||||
// Thread 3 : Capture HTTP server
|
|
||||||
let srv_port = config.capture_port;
|
|
||||||
let _server_thread = thread::Builder::new()
|
|
||||||
.name("capture-server".to_string())
|
|
||||||
.spawn(move || {
|
|
||||||
server::start_capture_server(srv_port);
|
|
||||||
})
|
|
||||||
.expect("Impossible de demarrer le thread serveur");
|
|
||||||
|
|
||||||
// Thread 4 : Health check (verification connexion serveur)
|
|
||||||
let hc_config = config.clone();
|
|
||||||
let hc_state = state.clone();
|
|
||||||
let _health_thread = thread::Builder::new()
|
|
||||||
.name("health-check".to_string())
|
|
||||||
.spawn(move || {
|
|
||||||
health_check_loop(&hc_config, &hc_state);
|
|
||||||
})
|
|
||||||
.expect("Impossible de demarrer le thread health check");
|
|
||||||
|
|
||||||
// Thread 5 : Recorder (capture evenements — inactif jusqu'a enregistrement)
|
|
||||||
let rec_config = config.clone();
|
|
||||||
let rec_state = state.clone();
|
|
||||||
let _recorder_rx = recorder::start_recorder(rec_config, rec_state);
|
|
||||||
|
|
||||||
// Thread 6 : Chat window (WebView2, a la demande)
|
|
||||||
let chat_config = config.clone();
|
|
||||||
let chat_state = state.clone();
|
|
||||||
chat::run_chat_thread(&chat_config, chat_state);
|
|
||||||
|
|
||||||
// Synchroniser les workflows disponibles depuis le serveur
|
|
||||||
let sync_config = config.clone();
|
|
||||||
let workflows = {
|
|
||||||
let client = Client::new();
|
|
||||||
network::fetch_workflows(&client, &sync_config)
|
|
||||||
};
|
|
||||||
if workflows.is_empty() {
|
|
||||||
println!("[MAIN] Aucun workflow disponible pour cette machine.");
|
|
||||||
} else {
|
|
||||||
println!(
|
|
||||||
"[MAIN] {} workflow(s) disponible(s) :",
|
|
||||||
workflows.len()
|
|
||||||
);
|
|
||||||
for wf in &workflows {
|
|
||||||
println!(
|
|
||||||
" - {} ({} noeuds, {} transitions)",
|
|
||||||
wf.name, wf.nodes, wf.edges
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
println!("\n[MAIN] Agent operationnel — tous les threads demarres.\n");
|
|
||||||
|
|
||||||
// Ouvrir Léa dans le navigateur disponible (mode app) au démarrage
|
|
||||||
#[cfg(target_os = "windows")]
|
|
||||||
{
|
|
||||||
let chat_url = config.chat_url();
|
|
||||||
if let Some(browser) = find_browser() {
|
|
||||||
let browser_name = if browser.contains("chrome") { "Chrome" }
|
|
||||||
else if browser.contains("edge") || browser.contains("Edge") { "Edge" }
|
|
||||||
else if browser.contains("brave") || browser.contains("Brave") { "Brave" }
|
|
||||||
else if browser.contains("firefox") || browser.contains("Firefox") { "Firefox" }
|
|
||||||
else { "navigateur" };
|
|
||||||
println!("[MAIN] Ouverture de Léa dans {}...", browser_name);
|
|
||||||
let _ = std::process::Command::new(&browser)
|
|
||||||
.args(&[
|
|
||||||
&format!("--app={}", chat_url),
|
|
||||||
"--window-size=600,800",
|
|
||||||
"--disable-extensions",
|
|
||||||
"--no-first-run",
|
|
||||||
])
|
|
||||||
.spawn();
|
|
||||||
} else {
|
|
||||||
println!("[MAIN] Aucun navigateur trouvé — ouvrez manuellement : {}", chat_url);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Attente principale : Ctrl+C pour arrêter
|
|
||||||
println!("[MAIN] Appuyez sur Ctrl+C pour quitter.\n");
|
|
||||||
loop {
|
|
||||||
if !state.is_running() {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
thread::sleep(Duration::from_millis(500));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Si on arrive ici, l'agent doit s'arreter
|
|
||||||
println!("\n[MAIN] Arret en cours...");
|
|
||||||
state.request_shutdown();
|
|
||||||
|
|
||||||
// Laisser le temps aux threads de se terminer
|
|
||||||
thread::sleep(Duration::from_millis(500));
|
|
||||||
|
|
||||||
println!("[MAIN] Agent arrete.");
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Installe un handler Ctrl+C qui met l'etat a "arret demande".
|
|
||||||
fn install_ctrlc_handler(state: Arc<AgentState>) {
|
|
||||||
#[cfg(unix)]
|
|
||||||
{
|
|
||||||
let mut fds = [0i32; 2];
|
|
||||||
unsafe {
|
|
||||||
if libc::pipe(fds.as_mut_ptr()) != 0 {
|
|
||||||
eprintln!("[MAIN] Impossible de creer le pipe pour Ctrl+C");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
static mut WRITE_FD: i32 = -1;
|
|
||||||
WRITE_FD = fds[1];
|
|
||||||
|
|
||||||
// Sauvegarder un pointeur vers l'etat dans une static
|
|
||||||
// pour pouvoir y acceder depuis le handler
|
|
||||||
static mut STATE_PTR: *const AgentState = std::ptr::null();
|
|
||||||
STATE_PTR = Arc::as_ptr(&state);
|
|
||||||
|
|
||||||
extern "C" fn sigint_handler(_sig: i32) {
|
|
||||||
unsafe {
|
|
||||||
if !STATE_PTR.is_null() {
|
|
||||||
(*STATE_PTR)
|
|
||||||
.running
|
|
||||||
.store(false, std::sync::atomic::Ordering::SeqCst);
|
|
||||||
}
|
|
||||||
let buf = [1u8];
|
|
||||||
let _ = libc::write(WRITE_FD, buf.as_ptr() as *const _, 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
libc::signal(libc::SIGINT, sigint_handler as *const () as libc::sighandler_t);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(not(unix))]
|
|
||||||
{
|
|
||||||
// Sur Windows, le systray gere l'arret via le menu "Quitter"
|
|
||||||
// Le handler console est un bonus pour le mode headless
|
|
||||||
let _ = state;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Boucle de heartbeat : capture un screenshot toutes les N secondes
|
|
||||||
/// et l'envoie au serveur si l'ecran a change.
|
|
||||||
/// Applique le floutage des zones sensibles si active dans la config.
|
|
||||||
fn heartbeat_loop(config: &Config, state: &AgentState) {
|
|
||||||
let client = Client::new();
|
|
||||||
let session_id = config.bg_session_id();
|
|
||||||
let mut last_hash: u64 = 0;
|
|
||||||
let mut consecutive_errors: u32 = 0;
|
|
||||||
|
|
||||||
println!(
|
|
||||||
"[HEARTBEAT] Boucle permanente demarree (session={}, intervalle={}s)",
|
|
||||||
session_id, config.heartbeat_interval_s
|
|
||||||
);
|
|
||||||
|
|
||||||
while state.is_running() {
|
|
||||||
// Verifier l'arret d'urgence
|
|
||||||
if state
|
|
||||||
.emergency_stop
|
|
||||||
.load(std::sync::atomic::Ordering::SeqCst)
|
|
||||||
{
|
|
||||||
thread::sleep(Duration::from_secs(1));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Capturer l'ecran
|
|
||||||
match capture::capture_screenshot() {
|
|
||||||
Some(img) => {
|
|
||||||
// Deduplication par hash perceptuel
|
|
||||||
let current_hash = capture::image_hash(&img);
|
|
||||||
if current_hash == last_hash {
|
|
||||||
thread::sleep(Duration::from_secs(config.heartbeat_interval_s));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
last_hash = current_hash;
|
|
||||||
|
|
||||||
// Appliquer le floutage des zones sensibles si active
|
|
||||||
let final_img = if config.blur_sensitive {
|
|
||||||
blur::blur_sensitive_fields(&img)
|
|
||||||
} else {
|
|
||||||
img
|
|
||||||
};
|
|
||||||
|
|
||||||
// Encoder en JPEG
|
|
||||||
let jpeg_bytes =
|
|
||||||
capture::screenshot_to_jpeg_bytes(&final_img, config.jpeg_quality);
|
|
||||||
if jpeg_bytes.is_empty() {
|
|
||||||
thread::sleep(Duration::from_secs(config.heartbeat_interval_s));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Envoyer au serveur
|
|
||||||
let success =
|
|
||||||
network::send_heartbeat(&client, config, &jpeg_bytes, &session_id);
|
|
||||||
if success {
|
|
||||||
consecutive_errors = 0;
|
|
||||||
} else {
|
|
||||||
consecutive_errors += 1;
|
|
||||||
if consecutive_errors == 1 || consecutive_errors % 12 == 0 {
|
|
||||||
eprintln!(
|
|
||||||
"[HEARTBEAT] {} erreur(s) consecutives",
|
|
||||||
consecutive_errors
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None => {
|
|
||||||
thread::sleep(Duration::from_secs(config.heartbeat_interval_s * 2));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
thread::sleep(Duration::from_secs(config.heartbeat_interval_s));
|
|
||||||
}
|
|
||||||
|
|
||||||
println!("[HEARTBEAT] Boucle arretee.");
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Boucle de health check : verifie la connexion au serveur toutes les 30s.
|
|
||||||
/// Met a jour l'etat de connexion dans AgentState.
|
|
||||||
fn health_check_loop(config: &Config, state: &AgentState) {
|
|
||||||
let client = Client::new();
|
|
||||||
let check_interval = Duration::from_secs(30);
|
|
||||||
let timeout = Duration::from_secs(5);
|
|
||||||
|
|
||||||
println!("[HEALTH] Boucle health check demarree (intervalle=30s)");
|
|
||||||
|
|
||||||
while state.is_running() {
|
|
||||||
let url = format!("{}/stats", config.server_url);
|
|
||||||
let request = client.get(&url).timeout(timeout);
|
|
||||||
let connected = network::with_auth(request, config)
|
|
||||||
.send()
|
|
||||||
.map(|r| r.status().is_success())
|
|
||||||
.unwrap_or(false);
|
|
||||||
|
|
||||||
let was_connected = state.connected.load(std::sync::atomic::Ordering::SeqCst);
|
|
||||||
state.set_connected(connected);
|
|
||||||
|
|
||||||
// Notifier si le statut a change
|
|
||||||
if connected != was_connected {
|
|
||||||
notifications::connection_changed(connected);
|
|
||||||
}
|
|
||||||
|
|
||||||
thread::sleep(check_interval);
|
|
||||||
}
|
|
||||||
|
|
||||||
println!("[HEALTH] Boucle arretee.");
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Affiche la banniere de demarrage.
|
|
||||||
fn print_banner(config: &Config) {
|
|
||||||
let meta = sysinfo::get_screen_metadata();
|
|
||||||
|
|
||||||
println!("======================================================");
|
|
||||||
println!(
|
|
||||||
" RPA Vision Agent v{} (Rust)",
|
|
||||||
config::AGENT_VERSION
|
|
||||||
);
|
|
||||||
println!(" Phases 1-5 — Parite complete");
|
|
||||||
println!("------------------------------------------------------");
|
|
||||||
println!(" Machine : {}", config.machine_id);
|
|
||||||
println!(" Serveur : {}", config.server_url);
|
|
||||||
println!(" Capture : port {}", config.capture_port);
|
|
||||||
println!(" Chat : port {}", config.chat_port);
|
|
||||||
println!(" Heartbeat : toutes les {}s", config.heartbeat_interval_s);
|
|
||||||
println!(" JPEG : qualite {}", config.jpeg_quality);
|
|
||||||
println!(" Floutage : {}", if config.blur_sensitive { "actif" } else { "inactif" });
|
|
||||||
println!(" Logs : retention {} jours", config.log_retention_days);
|
|
||||||
println!(" Auth : {}", if config.api_token.is_empty() { "aucune" } else { "Bearer token" });
|
|
||||||
println!(" Workflows : synchronisation au demarrage");
|
|
||||||
println!(
|
|
||||||
" Ecran : {}x{} @ {}% DPI",
|
|
||||||
meta.screen_resolution[0], meta.screen_resolution[1], meta.dpi_scale
|
|
||||||
);
|
|
||||||
println!(
|
|
||||||
" Moniteur : #{} ({})",
|
|
||||||
meta.monitor_index,
|
|
||||||
if meta.monitor_index == 0 { "principal" } else { "secondaire" }
|
|
||||||
);
|
|
||||||
println!("======================================================");
|
|
||||||
println!();
|
|
||||||
println!(" [IA] Cet agent utilise l'intelligence artificielle.");
|
|
||||||
println!(" Article 50 du Reglement europeen sur l'IA.");
|
|
||||||
println!();
|
|
||||||
}
|
|
||||||
@@ -1,391 +0,0 @@
|
|||||||
//! Client HTTP pour la communication avec le serveur streaming.
|
|
||||||
//!
|
|
||||||
//! Gère l'envoi des heartbeats (screenshots périodiques),
|
|
||||||
//! le polling des actions replay, et le rapport des résultats.
|
|
||||||
//! Compatible avec l'API de agent_v0/server_v1/api_stream.py (port 5005).
|
|
||||||
|
|
||||||
use crate::config::Config;
|
|
||||||
use crate::sysinfo;
|
|
||||||
use reqwest::blocking::{Client, RequestBuilder};
|
|
||||||
use serde::{Deserialize, Serialize};
|
|
||||||
|
|
||||||
/// Ajoute le header Authorization Bearer si un token est configure.
|
|
||||||
///
|
|
||||||
/// Si `config.api_token` est vide, la requete est retournee telle quelle.
|
|
||||||
pub fn with_auth(request: RequestBuilder, config: &Config) -> RequestBuilder {
|
|
||||||
if config.api_token.is_empty() {
|
|
||||||
request
|
|
||||||
} else {
|
|
||||||
request.header("Authorization", format!("Bearer {}", config.api_token))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Action de replay reçue du serveur.
|
|
||||||
///
|
|
||||||
/// Format identique à celui du Python executor (agent_v1/core/executor.py).
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
||||||
pub struct Action {
|
|
||||||
/// Identifiant unique de l'action
|
|
||||||
#[serde(default)]
|
|
||||||
pub action_id: String,
|
|
||||||
|
|
||||||
/// Type d'action : "click", "type", "key_combo", "scroll", "wait"
|
|
||||||
#[serde(rename = "type")]
|
|
||||||
pub action_type: String,
|
|
||||||
|
|
||||||
/// Coordonnée X normalisée (0.0 à 1.0)
|
|
||||||
#[serde(default)]
|
|
||||||
pub x_pct: f64,
|
|
||||||
|
|
||||||
/// Coordonnée Y normalisée (0.0 à 1.0)
|
|
||||||
#[serde(default)]
|
|
||||||
pub y_pct: f64,
|
|
||||||
|
|
||||||
/// Texte à taper (pour action "type")
|
|
||||||
#[serde(default)]
|
|
||||||
pub text: String,
|
|
||||||
|
|
||||||
/// Liste de touches (pour action "key_combo")
|
|
||||||
#[serde(default)]
|
|
||||||
pub keys: Vec<String>,
|
|
||||||
|
|
||||||
/// Bouton de souris : "left", "right", "double"
|
|
||||||
#[serde(default = "default_button")]
|
|
||||||
pub button: String,
|
|
||||||
|
|
||||||
/// Durée d'attente en ms (pour action "wait")
|
|
||||||
#[serde(default = "default_duration")]
|
|
||||||
pub duration_ms: u64,
|
|
||||||
|
|
||||||
/// Delta de scroll (pour action "scroll")
|
|
||||||
#[serde(default)]
|
|
||||||
pub delta: i32,
|
|
||||||
|
|
||||||
/// Mode visuel (résolution par le serveur)
|
|
||||||
#[serde(default)]
|
|
||||||
pub visual_mode: bool,
|
|
||||||
|
|
||||||
/// Spécification de la cible visuelle
|
|
||||||
#[serde(default)]
|
|
||||||
pub target_spec: serde_json::Value,
|
|
||||||
}
|
|
||||||
|
|
||||||
fn default_button() -> String {
|
|
||||||
"left".to_string()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn default_duration() -> u64 {
|
|
||||||
500
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Résultat d'exécution d'une action.
|
|
||||||
#[derive(Debug, Serialize, Deserialize)]
|
|
||||||
pub struct ActionResult {
|
|
||||||
pub action_id: String,
|
|
||||||
pub success: bool,
|
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
|
||||||
pub error: Option<String>,
|
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
|
||||||
pub screenshot: Option<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ActionResult {
|
|
||||||
/// Crée un résultat d'erreur.
|
|
||||||
pub fn error(action_id: &str, msg: &str) -> Self {
|
|
||||||
ActionResult {
|
|
||||||
action_id: action_id.to_string(),
|
|
||||||
success: false,
|
|
||||||
error: Some(msg.to_string()),
|
|
||||||
screenshot: None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Crée un résultat de succès.
|
|
||||||
pub fn ok(action_id: &str) -> Self {
|
|
||||||
ActionResult {
|
|
||||||
action_id: action_id.to_string(),
|
|
||||||
success: true,
|
|
||||||
error: None,
|
|
||||||
screenshot: None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Envoie un heartbeat (screenshot) au serveur streaming.
|
|
||||||
///
|
|
||||||
/// POST /traces/stream/image avec le screenshot en multipart.
|
|
||||||
/// Inclut les métadonnées système (DPI, résolution, fenêtre, moniteur)
|
|
||||||
/// dans les query params pour que le serveur puisse les exploiter.
|
|
||||||
/// Retourne true si l'envoi a réussi.
|
|
||||||
pub fn send_heartbeat(
|
|
||||||
client: &Client,
|
|
||||||
config: &Config,
|
|
||||||
jpeg_bytes: &[u8],
|
|
||||||
session_id: &str,
|
|
||||||
) -> bool {
|
|
||||||
let url = format!("{}/image", config.streaming_url());
|
|
||||||
let shot_id = format!("heartbeat_{}", chrono::Utc::now().timestamp());
|
|
||||||
|
|
||||||
// Collecter les métadonnées système
|
|
||||||
let meta = sysinfo::get_screen_metadata();
|
|
||||||
let dpi_str = meta.dpi_scale.to_string();
|
|
||||||
let screen_w_str = meta.screen_resolution[0].to_string();
|
|
||||||
let screen_h_str = meta.screen_resolution[1].to_string();
|
|
||||||
let monitor_str = meta.monitor_index.to_string();
|
|
||||||
|
|
||||||
// Sérialiser window_bounds en JSON compact (ou "null")
|
|
||||||
let wb_str = match meta.window_bounds {
|
|
||||||
Some(wb) => format!("[{},{},{},{}]", wb[0], wb[1], wb[2], wb[3]),
|
|
||||||
None => "null".to_string(),
|
|
||||||
};
|
|
||||||
|
|
||||||
let part = reqwest::blocking::multipart::Part::bytes(jpeg_bytes.to_vec())
|
|
||||||
.file_name("screenshot.jpg")
|
|
||||||
.mime_str("image/jpeg")
|
|
||||||
.unwrap_or_else(|_| {
|
|
||||||
reqwest::blocking::multipart::Part::bytes(jpeg_bytes.to_vec())
|
|
||||||
.file_name("screenshot.jpg")
|
|
||||||
});
|
|
||||||
|
|
||||||
let form = reqwest::blocking::multipart::Form::new().part("file", part);
|
|
||||||
|
|
||||||
let request = client
|
|
||||||
.post(&url)
|
|
||||||
.query(&[
|
|
||||||
("session_id", session_id),
|
|
||||||
("shot_id", &shot_id),
|
|
||||||
("machine_id", &config.machine_id),
|
|
||||||
("dpi_scale", &dpi_str),
|
|
||||||
("screen_w", &screen_w_str),
|
|
||||||
("screen_h", &screen_h_str),
|
|
||||||
("monitor_index", &monitor_str),
|
|
||||||
("window_bounds", &wb_str),
|
|
||||||
])
|
|
||||||
.multipart(form)
|
|
||||||
.timeout(std::time::Duration::from_secs(10));
|
|
||||||
|
|
||||||
match with_auth(request, config).send() {
|
|
||||||
Ok(resp) => {
|
|
||||||
if resp.status().is_success() {
|
|
||||||
true
|
|
||||||
} else {
|
|
||||||
eprintln!(
|
|
||||||
"[HEARTBEAT] Envoi echoue : HTTP {}",
|
|
||||||
resp.status()
|
|
||||||
);
|
|
||||||
false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
// Log discret pour ne pas spammer la console
|
|
||||||
eprintln!("[HEARTBEAT] Erreur reseau : {}", e);
|
|
||||||
false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Réponse du serveur pour GET /replay/next
|
|
||||||
#[derive(Debug, Deserialize)]
|
|
||||||
struct ReplayNextResponse {
|
|
||||||
action: Option<Action>,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Poll le serveur pour récupérer la prochaine action de replay.
|
|
||||||
///
|
|
||||||
/// GET /traces/stream/replay/next?session_id=...&machine_id=...
|
|
||||||
/// Retourne None si pas d'action en attente ou si le serveur est indisponible.
|
|
||||||
pub fn poll_next_action(client: &Client, config: &Config) -> Option<Action> {
|
|
||||||
let url = format!("{}/replay/next", config.streaming_url());
|
|
||||||
let session_id = config.agent_session_id();
|
|
||||||
|
|
||||||
let request = client
|
|
||||||
.get(&url)
|
|
||||||
.query(&[
|
|
||||||
("session_id", session_id.as_str()),
|
|
||||||
("machine_id", config.machine_id.as_str()),
|
|
||||||
])
|
|
||||||
.timeout(std::time::Duration::from_secs(5));
|
|
||||||
|
|
||||||
let resp = with_auth(request, config).send().ok()?;
|
|
||||||
|
|
||||||
if !resp.status().is_success() {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
let data: ReplayNextResponse = resp.json().ok()?;
|
|
||||||
data.action
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Informations résumées d'un workflow disponible.
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
||||||
pub struct WorkflowInfo {
|
|
||||||
/// Identifiant unique du workflow
|
|
||||||
pub workflow_id: String,
|
|
||||||
|
|
||||||
/// Nom lisible du workflow
|
|
||||||
#[serde(default)]
|
|
||||||
pub name: String,
|
|
||||||
|
|
||||||
/// Identifiant machine associé
|
|
||||||
#[serde(default)]
|
|
||||||
pub machine_id: String,
|
|
||||||
|
|
||||||
/// Nombre de nœuds
|
|
||||||
#[serde(default)]
|
|
||||||
pub nodes: u32,
|
|
||||||
|
|
||||||
/// Nombre de transitions
|
|
||||||
#[serde(default)]
|
|
||||||
pub edges: u32,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Réponse du serveur pour GET /traces/stream/workflows
|
|
||||||
#[derive(Debug, Deserialize)]
|
|
||||||
struct WorkflowsResponse {
|
|
||||||
#[serde(default)]
|
|
||||||
workflows: Vec<WorkflowInfo>,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Récupère la liste des workflows disponibles pour cette machine.
|
|
||||||
///
|
|
||||||
/// GET /traces/stream/workflows?machine_id=<machine_id>
|
|
||||||
/// Sauvegarde le résultat dans workflows.json à côté de l'exécutable.
|
|
||||||
/// Retourne la liste (éventuellement depuis le cache local si le serveur est indisponible).
|
|
||||||
pub fn fetch_workflows(client: &Client, config: &Config) -> Vec<WorkflowInfo> {
|
|
||||||
let url = format!("{}/workflows", config.streaming_url());
|
|
||||||
|
|
||||||
let request = client
|
|
||||||
.get(&url)
|
|
||||||
.query(&[("machine_id", config.machine_id.as_str())])
|
|
||||||
.timeout(std::time::Duration::from_secs(5));
|
|
||||||
|
|
||||||
let workflows = match with_auth(request, config).send() {
|
|
||||||
Ok(resp) if resp.status().is_success() => {
|
|
||||||
match resp.json::<WorkflowsResponse>() {
|
|
||||||
Ok(data) => data.workflows,
|
|
||||||
Err(e) => {
|
|
||||||
eprintln!("[WORKFLOWS] Erreur parsing reponse : {}", e);
|
|
||||||
Vec::new()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(resp) => {
|
|
||||||
eprintln!("[WORKFLOWS] Serveur HTTP {} — chargement cache local", resp.status());
|
|
||||||
return load_workflows_cache();
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
eprintln!("[WORKFLOWS] Serveur injoignable ({}) — chargement cache local", e);
|
|
||||||
return load_workflows_cache();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Sauvegarder dans le cache local
|
|
||||||
save_workflows_cache(&workflows);
|
|
||||||
|
|
||||||
workflows
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Chemin du fichier cache workflows.json (à côté de l'exécutable ou dans le dossier courant).
|
|
||||||
fn workflows_cache_path() -> std::path::PathBuf {
|
|
||||||
if let Ok(exe) = std::env::current_exe() {
|
|
||||||
if let Some(dir) = exe.parent() {
|
|
||||||
return dir.join("workflows.json");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::path::PathBuf::from("workflows.json")
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Sauvegarde les workflows dans le cache local.
|
|
||||||
fn save_workflows_cache(workflows: &[WorkflowInfo]) {
|
|
||||||
let path = workflows_cache_path();
|
|
||||||
match serde_json::to_string_pretty(workflows) {
|
|
||||||
Ok(json) => {
|
|
||||||
if let Err(e) = std::fs::write(&path, json) {
|
|
||||||
eprintln!("[WORKFLOWS] Erreur ecriture cache {} : {}", path.display(), e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
eprintln!("[WORKFLOWS] Erreur serialisation cache : {}", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Charge les workflows depuis le cache local.
|
|
||||||
fn load_workflows_cache() -> Vec<WorkflowInfo> {
|
|
||||||
let path = workflows_cache_path();
|
|
||||||
match std::fs::read_to_string(&path) {
|
|
||||||
Ok(content) => {
|
|
||||||
match serde_json::from_str::<Vec<WorkflowInfo>>(&content) {
|
|
||||||
Ok(workflows) => {
|
|
||||||
println!("[WORKFLOWS] {} workflow(s) charges depuis le cache local", workflows.len());
|
|
||||||
workflows
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
eprintln!("[WORKFLOWS] Erreur parsing cache : {}", e);
|
|
||||||
Vec::new()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(_) => Vec::new(), // Pas de cache, pas d'erreur
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Rapporte le résultat d'une action au serveur.
|
|
||||||
///
|
|
||||||
/// POST /traces/stream/replay/result avec le résultat en JSON.
|
|
||||||
pub fn report_result(client: &Client, config: &Config, result: &ActionResult) -> bool {
|
|
||||||
let url = format!("{}/replay/result", config.streaming_url());
|
|
||||||
let session_id = config.agent_session_id();
|
|
||||||
|
|
||||||
#[derive(Serialize)]
|
|
||||||
struct Report<'a> {
|
|
||||||
session_id: &'a str,
|
|
||||||
action_id: &'a str,
|
|
||||||
success: bool,
|
|
||||||
error: &'a Option<String>,
|
|
||||||
screenshot: &'a Option<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
let report = Report {
|
|
||||||
session_id: &session_id,
|
|
||||||
action_id: &result.action_id,
|
|
||||||
success: result.success,
|
|
||||||
error: &result.error,
|
|
||||||
screenshot: &result.screenshot,
|
|
||||||
};
|
|
||||||
|
|
||||||
let request = client
|
|
||||||
.post(&url)
|
|
||||||
.json(&report)
|
|
||||||
.timeout(std::time::Duration::from_secs(10));
|
|
||||||
|
|
||||||
match with_auth(request, config).send() {
|
|
||||||
Ok(resp) => {
|
|
||||||
if resp.status().is_success() {
|
|
||||||
if let Ok(data) = resp.json::<serde_json::Value>() {
|
|
||||||
let status = data.get("replay_status")
|
|
||||||
.and_then(|v| v.as_str())
|
|
||||||
.unwrap_or("?");
|
|
||||||
let remaining = data.get("remaining_actions")
|
|
||||||
.and_then(|v| v.as_i64())
|
|
||||||
.unwrap_or(-1);
|
|
||||||
println!(
|
|
||||||
" [RESULT] Rapporte : status={}, restant={}",
|
|
||||||
status, remaining
|
|
||||||
);
|
|
||||||
}
|
|
||||||
true
|
|
||||||
} else {
|
|
||||||
eprintln!(
|
|
||||||
" [RESULT] Rapport echoue : HTTP {}",
|
|
||||||
resp.status()
|
|
||||||
);
|
|
||||||
false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
eprintln!(" [RESULT] Erreur reseau : {}", e);
|
|
||||||
false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,135 +0,0 @@
|
|||||||
//! Notifications toast Windows.
|
|
||||||
//!
|
|
||||||
//! Affiche des notifications natives Windows via l'API WinRT (winrt-notification).
|
|
||||||
//! Equivalent de agent_v1/ui/notifications.py.
|
|
||||||
//!
|
|
||||||
//! Sur Linux/macOS : les notifications sont simplement affichees en console (log).
|
|
||||||
//! Le crate winrt-notification n'est disponible que sur Windows.
|
|
||||||
|
|
||||||
use std::sync::atomic::{AtomicU64, Ordering};
|
|
||||||
use std::time::{SystemTime, UNIX_EPOCH};
|
|
||||||
|
|
||||||
/// Intervalle minimum entre deux notifications identiques (en secondes).
|
|
||||||
/// Evite le spam de notifications si le meme evenement se repete.
|
|
||||||
const MIN_INTERVAL_SECS: u64 = 5;
|
|
||||||
|
|
||||||
/// Timestamp de la derniere notification envoyee (rate limiting).
|
|
||||||
static LAST_NOTIFY_TIME: AtomicU64 = AtomicU64::new(0);
|
|
||||||
|
|
||||||
/// Affiche une notification toast native.
|
|
||||||
///
|
|
||||||
/// Sur Windows : utilise winrt-notification pour les toasts natifs.
|
|
||||||
/// Sur les autres OS : affiche en console.
|
|
||||||
/// Rate-limited : pas plus d'une notification toutes les 5 secondes.
|
|
||||||
pub fn notify(title: &str, message: &str) {
|
|
||||||
// Rate limiting
|
|
||||||
let now = SystemTime::now()
|
|
||||||
.duration_since(UNIX_EPOCH)
|
|
||||||
.unwrap_or_default()
|
|
||||||
.as_secs();
|
|
||||||
let last = LAST_NOTIFY_TIME.load(Ordering::Relaxed);
|
|
||||||
if now - last < MIN_INTERVAL_SECS {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
LAST_NOTIFY_TIME.store(now, Ordering::Relaxed);
|
|
||||||
|
|
||||||
// Log console dans tous les cas
|
|
||||||
println!("[NOTIFICATION] {} : {}", title, message);
|
|
||||||
|
|
||||||
// Toast natif Windows
|
|
||||||
#[cfg(windows)]
|
|
||||||
{
|
|
||||||
notify_windows(title, message);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Implementation Windows via winrt-notification.
|
|
||||||
#[cfg(windows)]
|
|
||||||
fn notify_windows(title: &str, message: &str) {
|
|
||||||
use winrt_notification::{Toast, Sound};
|
|
||||||
|
|
||||||
let result = Toast::new(Toast::POWERSHELL_APP_ID)
|
|
||||||
.title(title)
|
|
||||||
.text1(message)
|
|
||||||
.sound(Some(Sound::Default))
|
|
||||||
.show();
|
|
||||||
|
|
||||||
if let Err(e) = result {
|
|
||||||
eprintln!("[NOTIFICATION] Erreur toast Windows : {:?}", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// --- Notifications predefinies (equivalent Python) ---
|
|
||||||
|
|
||||||
/// Notification de bienvenue au demarrage.
|
|
||||||
pub fn greet() {
|
|
||||||
notify(
|
|
||||||
"Lea - Assistant IA",
|
|
||||||
"Bonjour ! Lea est prete. (IA)\nJe peux observer et automatiser vos taches.",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Notification de debut de session d'enregistrement.
|
|
||||||
pub fn session_started(name: &str) {
|
|
||||||
notify(
|
|
||||||
"Enregistrement demarre",
|
|
||||||
&format!(
|
|
||||||
"C'est parti ! Je regarde et je memorise.\nSession : {}",
|
|
||||||
name
|
|
||||||
),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Notification de fin de session d'enregistrement.
|
|
||||||
pub fn session_ended(actions_count: u32) {
|
|
||||||
notify(
|
|
||||||
"Enregistrement termine",
|
|
||||||
&format!(
|
|
||||||
"C'est note ! J'ai compris les {} etapes.",
|
|
||||||
actions_count
|
|
||||||
),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Notification de debut de replay.
|
|
||||||
pub fn replay_started(name: &str) {
|
|
||||||
notify(
|
|
||||||
"Replay en cours",
|
|
||||||
&format!(
|
|
||||||
"Le systeme d'IA execute la tache...\nWorkflow : {}",
|
|
||||||
name
|
|
||||||
),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Notification de fin de replay.
|
|
||||||
pub fn replay_finished(success: bool) {
|
|
||||||
if success {
|
|
||||||
notify("Replay termine", "C'est fait ! La tache a ete executee avec succes.");
|
|
||||||
} else {
|
|
||||||
notify(
|
|
||||||
"Replay echoue",
|
|
||||||
"Hmm, j'ai eu un souci. Verifiez le resultat.",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Notification de changement de connexion.
|
|
||||||
pub fn connection_changed(connected: bool) {
|
|
||||||
if connected {
|
|
||||||
notify("Connexion etablie", "Connectee au serveur RPA Vision.");
|
|
||||||
} else {
|
|
||||||
notify(
|
|
||||||
"Connexion perdue",
|
|
||||||
"Connexion au serveur perdue. Tentative de reconnexion...",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Notification d'arret d'urgence.
|
|
||||||
pub fn emergency_stop_activated() {
|
|
||||||
notify(
|
|
||||||
"ARRET D'URGENCE",
|
|
||||||
"Toutes les operations ont ete arretees immediatement.",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
@@ -1,713 +0,0 @@
|
|||||||
//! Capture d'evenements souris/clavier pour l'enregistrement de sessions.
|
|
||||||
//!
|
|
||||||
//! Utilise rdev pour intercepter les evenements globaux (sans focus).
|
|
||||||
//! Les evenements sont envoyes au serveur streaming via network.rs.
|
|
||||||
//! Equivalent de agent_v1/core/captor.py.
|
|
||||||
//!
|
|
||||||
//! Le recorder est actif uniquement quand state.recording == true.
|
|
||||||
//! Il capture :
|
|
||||||
//! - Clics souris (gauche, droit, double-clic)
|
|
||||||
//! - Saisie clavier (buffer de texte avec flush apres 500ms d'inactivite)
|
|
||||||
//! - Combos clavier (Ctrl+C, Alt+Tab, etc.)
|
|
||||||
//!
|
|
||||||
//! Sur les OS non-Windows, rdev fonctionne aussi (Linux via X11/evdev)
|
|
||||||
//! mais les tests doivent etre faits manuellement.
|
|
||||||
|
|
||||||
use crate::capture;
|
|
||||||
use crate::config::Config;
|
|
||||||
use crate::state::AgentState;
|
|
||||||
use crossbeam_channel::{bounded, Receiver, Sender};
|
|
||||||
use std::sync::Arc;
|
|
||||||
use std::thread;
|
|
||||||
use std::time::{Duration, Instant};
|
|
||||||
|
|
||||||
/// Evenement capture et pret a etre envoye au serveur.
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub enum CapturedEvent {
|
|
||||||
/// Clic souris (x_pct, y_pct, bouton, window_title)
|
|
||||||
Click {
|
|
||||||
x_pct: f64,
|
|
||||||
y_pct: f64,
|
|
||||||
button: String,
|
|
||||||
window_title: String,
|
|
||||||
},
|
|
||||||
/// Double-clic (x_pct, y_pct, window_title)
|
|
||||||
DoubleClick {
|
|
||||||
x_pct: f64,
|
|
||||||
y_pct: f64,
|
|
||||||
window_title: String,
|
|
||||||
},
|
|
||||||
/// Texte saisi (accumule via le buffer de frappe)
|
|
||||||
Text {
|
|
||||||
text: String,
|
|
||||||
x_pct: f64,
|
|
||||||
y_pct: f64,
|
|
||||||
},
|
|
||||||
/// Combo clavier (ex: ["ctrl", "c"])
|
|
||||||
KeyCombo { keys: Vec<String> },
|
|
||||||
/// Scroll (delta, x_pct, y_pct)
|
|
||||||
Scroll {
|
|
||||||
delta: i32,
|
|
||||||
x_pct: f64,
|
|
||||||
y_pct: f64,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Etat interne du recorder pour le buffer de frappe.
|
|
||||||
struct RecorderState {
|
|
||||||
/// Buffer de texte en cours (flush apres 500ms d'inactivite)
|
|
||||||
text_buffer: String,
|
|
||||||
/// Dernier timestamp de frappe (pour le flush timeout)
|
|
||||||
last_keystroke: Instant,
|
|
||||||
/// Position du curseur au debut de la saisie
|
|
||||||
text_start_x: f64,
|
|
||||||
text_start_y: f64,
|
|
||||||
/// Derniere position du clic (pour le double-clic)
|
|
||||||
last_click_time: Instant,
|
|
||||||
last_click_x: f64,
|
|
||||||
last_click_y: f64,
|
|
||||||
/// Modifieurs actuellement enfonces
|
|
||||||
ctrl_held: bool,
|
|
||||||
alt_held: bool,
|
|
||||||
shift_held: bool,
|
|
||||||
meta_held: bool,
|
|
||||||
/// Dimensions de l'ecran (pour normaliser les coordonnees)
|
|
||||||
screen_width: u32,
|
|
||||||
screen_height: u32,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl RecorderState {
|
|
||||||
fn new(screen_width: u32, screen_height: u32) -> Self {
|
|
||||||
Self {
|
|
||||||
text_buffer: String::new(),
|
|
||||||
last_keystroke: Instant::now(),
|
|
||||||
text_start_x: 0.0,
|
|
||||||
text_start_y: 0.0,
|
|
||||||
last_click_time: Instant::now() - Duration::from_secs(10),
|
|
||||||
last_click_x: 0.0,
|
|
||||||
last_click_y: 0.0,
|
|
||||||
ctrl_held: false,
|
|
||||||
alt_held: false,
|
|
||||||
shift_held: false,
|
|
||||||
meta_held: false,
|
|
||||||
screen_width,
|
|
||||||
screen_height,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Normalise les coordonnees absolues en pourcentages (0.0-1.0).
|
|
||||||
fn normalize(&self, x: f64, y: f64) -> (f64, f64) {
|
|
||||||
if self.screen_width == 0 || self.screen_height == 0 {
|
|
||||||
return (0.0, 0.0);
|
|
||||||
}
|
|
||||||
(
|
|
||||||
x / self.screen_width as f64,
|
|
||||||
y / self.screen_height as f64,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Un modifieur est-il enfonce ?
|
|
||||||
fn any_modifier_held(&self) -> bool {
|
|
||||||
self.ctrl_held || self.alt_held || self.meta_held
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Delai de flush du buffer de texte (ms).
|
|
||||||
const TEXT_FLUSH_DELAY_MS: u64 = 500;
|
|
||||||
|
|
||||||
/// Seuil de distance pour considerer un double-clic (pixels).
|
|
||||||
const DOUBLE_CLICK_DIST_THRESHOLD: f64 = 10.0;
|
|
||||||
|
|
||||||
/// Seuil de temps pour un double-clic (ms).
|
|
||||||
const DOUBLE_CLICK_TIME_MS: u64 = 400;
|
|
||||||
|
|
||||||
/// Demarre le thread de capture d'evenements.
|
|
||||||
///
|
|
||||||
/// Cree un canal crossbeam pour envoyer les evenements captures
|
|
||||||
/// vers le thread d'envoi reseau. Le listener rdev tourne dans
|
|
||||||
/// un thread dedie car il bloque (callback-based).
|
|
||||||
pub fn start_recorder(
|
|
||||||
config: Arc<Config>,
|
|
||||||
state: Arc<AgentState>,
|
|
||||||
) -> Receiver<CapturedEvent> {
|
|
||||||
let (tx, rx) = bounded::<CapturedEvent>(100);
|
|
||||||
|
|
||||||
// Thread du listener rdev
|
|
||||||
let listener_state = state.clone();
|
|
||||||
let listener_tx = tx.clone();
|
|
||||||
thread::Builder::new()
|
|
||||||
.name("event-listener".to_string())
|
|
||||||
.spawn(move || {
|
|
||||||
event_listener_loop(listener_tx, listener_state);
|
|
||||||
})
|
|
||||||
.expect("Impossible de demarrer le thread listener");
|
|
||||||
|
|
||||||
// Thread de flush du buffer de texte
|
|
||||||
let flush_tx = tx;
|
|
||||||
let flush_state = state.clone();
|
|
||||||
thread::Builder::new()
|
|
||||||
.name("text-flush".to_string())
|
|
||||||
.spawn(move || {
|
|
||||||
text_flush_loop(flush_tx, flush_state);
|
|
||||||
})
|
|
||||||
.expect("Impossible de demarrer le thread flush");
|
|
||||||
|
|
||||||
// Thread d'envoi des evenements captures vers le serveur
|
|
||||||
let send_state = state;
|
|
||||||
let send_rx = rx.clone();
|
|
||||||
let send_config = config;
|
|
||||||
thread::Builder::new()
|
|
||||||
.name("event-sender".to_string())
|
|
||||||
.spawn(move || {
|
|
||||||
event_sender_loop(send_rx, send_config, send_state);
|
|
||||||
})
|
|
||||||
.expect("Impossible de demarrer le thread sender");
|
|
||||||
|
|
||||||
rx
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Boucle du listener rdev — capture les evenements souris/clavier globaux.
|
|
||||||
///
|
|
||||||
/// rdev::listen est bloquant et appelle le callback pour chaque evenement.
|
|
||||||
/// On filtre et transforme les evenements pertinents, puis on les envoie
|
|
||||||
/// via le canal crossbeam.
|
|
||||||
fn event_listener_loop(tx: Sender<CapturedEvent>, state: Arc<AgentState>) {
|
|
||||||
let (screen_w, screen_h) = capture::screen_dimensions().unwrap_or((1920, 1080));
|
|
||||||
let rec_state = std::sync::Mutex::new(RecorderState::new(screen_w, screen_h));
|
|
||||||
|
|
||||||
println!(
|
|
||||||
"[RECORDER] Listener demarre (ecran {}x{})",
|
|
||||||
screen_w, screen_h
|
|
||||||
);
|
|
||||||
|
|
||||||
// rdev::listen prend un callback FnMut
|
|
||||||
let callback = move |event: rdev::Event| {
|
|
||||||
// Ne capturer que si l'enregistrement est actif
|
|
||||||
if !state.recording.load(std::sync::atomic::Ordering::SeqCst) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut rs = match rec_state.lock() {
|
|
||||||
Ok(s) => s,
|
|
||||||
Err(_) => return,
|
|
||||||
};
|
|
||||||
|
|
||||||
match event.event_type {
|
|
||||||
rdev::EventType::ButtonPress(button) => {
|
|
||||||
let btn_name = match button {
|
|
||||||
rdev::Button::Left => "left",
|
|
||||||
rdev::Button::Right => "right",
|
|
||||||
rdev::Button::Middle => "middle",
|
|
||||||
_ => return,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Obtenir la position de la souris depuis l'evenement
|
|
||||||
// rdev ne fournit pas toujours les coordonnees dans ButtonPress,
|
|
||||||
// on utilise la derniere position connue via MouseMove.
|
|
||||||
// Pour simplifier, on capture la position courante du curseur.
|
|
||||||
let (mx, my) = get_cursor_position();
|
|
||||||
let (x_pct, y_pct) = rs.normalize(mx, my);
|
|
||||||
|
|
||||||
// Flush le buffer de texte avant le clic
|
|
||||||
if !rs.text_buffer.is_empty() {
|
|
||||||
let text_event = CapturedEvent::Text {
|
|
||||||
text: rs.text_buffer.clone(),
|
|
||||||
x_pct: rs.text_start_x,
|
|
||||||
y_pct: rs.text_start_y,
|
|
||||||
};
|
|
||||||
let _ = tx.try_send(text_event);
|
|
||||||
rs.text_buffer.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Detection double-clic
|
|
||||||
let now = Instant::now();
|
|
||||||
let dt = now.duration_since(rs.last_click_time);
|
|
||||||
let dx = (mx - rs.last_click_x).abs();
|
|
||||||
let dy = (my - rs.last_click_y).abs();
|
|
||||||
let dist = (dx * dx + dy * dy).sqrt();
|
|
||||||
|
|
||||||
if btn_name == "left"
|
|
||||||
&& dt < Duration::from_millis(DOUBLE_CLICK_TIME_MS)
|
|
||||||
&& dist < DOUBLE_CLICK_DIST_THRESHOLD
|
|
||||||
{
|
|
||||||
// Double-clic detecte
|
|
||||||
let event = CapturedEvent::DoubleClick {
|
|
||||||
x_pct,
|
|
||||||
y_pct,
|
|
||||||
window_title: get_active_window_title(),
|
|
||||||
};
|
|
||||||
let _ = tx.try_send(event);
|
|
||||||
} else {
|
|
||||||
// Clic simple
|
|
||||||
let event = CapturedEvent::Click {
|
|
||||||
x_pct,
|
|
||||||
y_pct,
|
|
||||||
button: btn_name.to_string(),
|
|
||||||
window_title: get_active_window_title(),
|
|
||||||
};
|
|
||||||
let _ = tx.try_send(event);
|
|
||||||
|
|
||||||
// Incrementer le compteur d'actions
|
|
||||||
state.increment_actions();
|
|
||||||
}
|
|
||||||
|
|
||||||
rs.last_click_time = now;
|
|
||||||
rs.last_click_x = mx;
|
|
||||||
rs.last_click_y = my;
|
|
||||||
}
|
|
||||||
|
|
||||||
rdev::EventType::KeyPress(key) => {
|
|
||||||
// Mettre a jour les modifieurs
|
|
||||||
match key {
|
|
||||||
rdev::Key::ControlLeft | rdev::Key::ControlRight => {
|
|
||||||
rs.ctrl_held = true;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
rdev::Key::Alt | rdev::Key::AltGr => {
|
|
||||||
rs.alt_held = true;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
rdev::Key::ShiftLeft | rdev::Key::ShiftRight => {
|
|
||||||
rs.shift_held = true;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
rdev::Key::MetaLeft | rdev::Key::MetaRight => {
|
|
||||||
rs.meta_held = true;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
_ => {}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Si un modifieur non-shift est enfonce, c'est un combo
|
|
||||||
if rs.any_modifier_held() {
|
|
||||||
let mut keys = Vec::new();
|
|
||||||
if rs.ctrl_held {
|
|
||||||
keys.push("ctrl".to_string());
|
|
||||||
}
|
|
||||||
if rs.alt_held {
|
|
||||||
keys.push("alt".to_string());
|
|
||||||
}
|
|
||||||
if rs.meta_held {
|
|
||||||
keys.push("win".to_string());
|
|
||||||
}
|
|
||||||
if rs.shift_held {
|
|
||||||
keys.push("shift".to_string());
|
|
||||||
}
|
|
||||||
keys.push(rdev_key_to_string(key));
|
|
||||||
|
|
||||||
// Flush le buffer avant le combo
|
|
||||||
if !rs.text_buffer.is_empty() {
|
|
||||||
let text_event = CapturedEvent::Text {
|
|
||||||
text: rs.text_buffer.clone(),
|
|
||||||
x_pct: rs.text_start_x,
|
|
||||||
y_pct: rs.text_start_y,
|
|
||||||
};
|
|
||||||
let _ = tx.try_send(text_event);
|
|
||||||
rs.text_buffer.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
let event = CapturedEvent::KeyCombo { keys };
|
|
||||||
let _ = tx.try_send(event);
|
|
||||||
state.increment_actions();
|
|
||||||
} else {
|
|
||||||
// Touche de saisie normale — ajouter au buffer
|
|
||||||
if let Some(c) = rdev_key_to_char(key) {
|
|
||||||
if rs.text_buffer.is_empty() {
|
|
||||||
let (mx, my) = get_cursor_position();
|
|
||||||
let (x, y) = rs.normalize(mx, my);
|
|
||||||
rs.text_start_x = x;
|
|
||||||
rs.text_start_y = y;
|
|
||||||
}
|
|
||||||
rs.text_buffer.push(c);
|
|
||||||
rs.last_keystroke = Instant::now();
|
|
||||||
} else {
|
|
||||||
// Touche speciale non-texte (Enter, Tab, etc.)
|
|
||||||
// Flush le buffer et envoyer comme combo simple
|
|
||||||
if !rs.text_buffer.is_empty() {
|
|
||||||
let text_event = CapturedEvent::Text {
|
|
||||||
text: rs.text_buffer.clone(),
|
|
||||||
x_pct: rs.text_start_x,
|
|
||||||
y_pct: rs.text_start_y,
|
|
||||||
};
|
|
||||||
let _ = tx.try_send(text_event);
|
|
||||||
rs.text_buffer.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
let key_name = rdev_key_to_string(key);
|
|
||||||
let event = CapturedEvent::KeyCombo {
|
|
||||||
keys: vec![key_name],
|
|
||||||
};
|
|
||||||
let _ = tx.try_send(event);
|
|
||||||
state.increment_actions();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
rdev::EventType::KeyRelease(key) => {
|
|
||||||
// Mettre a jour les modifieurs
|
|
||||||
match key {
|
|
||||||
rdev::Key::ControlLeft | rdev::Key::ControlRight => rs.ctrl_held = false,
|
|
||||||
rdev::Key::Alt | rdev::Key::AltGr => rs.alt_held = false,
|
|
||||||
rdev::Key::ShiftLeft | rdev::Key::ShiftRight => rs.shift_held = false,
|
|
||||||
rdev::Key::MetaLeft | rdev::Key::MetaRight => rs.meta_held = false,
|
|
||||||
_ => {}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
rdev::EventType::Wheel { delta_x: _, delta_y } => {
|
|
||||||
let (mx, my) = get_cursor_position();
|
|
||||||
let (x_pct, y_pct) = rs.normalize(mx, my);
|
|
||||||
let delta = if delta_y > 0 { 3 } else { -3 };
|
|
||||||
|
|
||||||
let event = CapturedEvent::Scroll {
|
|
||||||
delta,
|
|
||||||
x_pct,
|
|
||||||
y_pct,
|
|
||||||
};
|
|
||||||
let _ = tx.try_send(event);
|
|
||||||
state.increment_actions();
|
|
||||||
}
|
|
||||||
|
|
||||||
_ => {
|
|
||||||
// MouseMove et autres evenements ignores
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// rdev::listen est bloquant — il ne retourne qu'en cas d'erreur
|
|
||||||
if let Err(e) = rdev::listen(callback) {
|
|
||||||
eprintln!("[RECORDER] Erreur fatale du listener rdev : {:?}", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Boucle de flush periodique du buffer de texte.
|
|
||||||
///
|
|
||||||
/// Toutes les 100ms, verifie si le buffer de texte est non-vide
|
|
||||||
/// et si le delai de flush (500ms) est depasse. Si oui, flush le buffer
|
|
||||||
/// en envoyant un evenement Text.
|
|
||||||
fn text_flush_loop(_tx: Sender<CapturedEvent>, state: Arc<AgentState>) {
|
|
||||||
// Note: le flush est gere dans le callback rdev via le Mutex.
|
|
||||||
// Cette boucle est un filet de securite pour les cas ou le buffer
|
|
||||||
// resterait non-flush (timeout sans nouveau evenement).
|
|
||||||
// L'implementation complete necessiterait un acces partage au RecorderState.
|
|
||||||
// Pour l'instant, le flush est declenche par le prochain evenement (clic, combo).
|
|
||||||
|
|
||||||
while state.is_running() {
|
|
||||||
thread::sleep(Duration::from_millis(TEXT_FLUSH_DELAY_MS));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Boucle d'envoi des evenements captures vers le serveur streaming.
|
|
||||||
///
|
|
||||||
/// Lit les evenements du canal crossbeam et les envoie au serveur
|
|
||||||
/// via HTTP POST (format compatible avec le Python streamer).
|
|
||||||
fn event_sender_loop(
|
|
||||||
rx: Receiver<CapturedEvent>,
|
|
||||||
config: Arc<Config>,
|
|
||||||
state: Arc<AgentState>,
|
|
||||||
) {
|
|
||||||
let client = reqwest::blocking::Client::new();
|
|
||||||
|
|
||||||
println!("[RECORDER] Thread d'envoi d'evenements demarre");
|
|
||||||
|
|
||||||
loop {
|
|
||||||
// Bloquer jusqu'au prochain evenement (ou timeout)
|
|
||||||
match rx.recv_timeout(Duration::from_secs(1)) {
|
|
||||||
Ok(event) => {
|
|
||||||
if !state.recording.load(std::sync::atomic::Ordering::SeqCst) {
|
|
||||||
continue; // Enregistrement arrete entre-temps
|
|
||||||
}
|
|
||||||
|
|
||||||
let session_name = state.current_recording_name();
|
|
||||||
send_event_to_server(&client, &config, &event, &session_name);
|
|
||||||
}
|
|
||||||
Err(crossbeam_channel::RecvTimeoutError::Timeout) => {
|
|
||||||
if !state.is_running() {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(crossbeam_channel::RecvTimeoutError::Disconnected) => {
|
|
||||||
println!("[RECORDER] Canal deconnecte — arret du sender");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Envoie un evenement capture au serveur streaming.
|
|
||||||
///
|
|
||||||
/// Inclut la resolution de l'ecran dans chaque event pour que le serveur
|
|
||||||
/// puisse construire des ScreenStates avec la bonne resolution d'apprentissage
|
|
||||||
/// (au lieu du fallback 1920x1080).
|
|
||||||
fn send_event_to_server(
|
|
||||||
client: &reqwest::blocking::Client,
|
|
||||||
config: &Config,
|
|
||||||
event: &CapturedEvent,
|
|
||||||
session_name: &str,
|
|
||||||
) {
|
|
||||||
let url = format!("{}/traces/stream/event", config.server_url);
|
|
||||||
let timestamp = chrono::Utc::now().to_rfc3339();
|
|
||||||
let (screen_w, screen_h) = capture::screen_dimensions().unwrap_or((1920, 1080));
|
|
||||||
|
|
||||||
let payload = match event {
|
|
||||||
CapturedEvent::Click {
|
|
||||||
x_pct,
|
|
||||||
y_pct,
|
|
||||||
button,
|
|
||||||
window_title,
|
|
||||||
} => {
|
|
||||||
serde_json::json!({
|
|
||||||
"type": "click",
|
|
||||||
"x_pct": x_pct,
|
|
||||||
"y_pct": y_pct,
|
|
||||||
"button": button,
|
|
||||||
"window_title": window_title,
|
|
||||||
"session_name": session_name,
|
|
||||||
"machine_id": config.machine_id,
|
|
||||||
"timestamp": timestamp,
|
|
||||||
"screen_resolution": [screen_w, screen_h],
|
|
||||||
})
|
|
||||||
}
|
|
||||||
CapturedEvent::DoubleClick {
|
|
||||||
x_pct,
|
|
||||||
y_pct,
|
|
||||||
window_title,
|
|
||||||
} => {
|
|
||||||
serde_json::json!({
|
|
||||||
"type": "click",
|
|
||||||
"x_pct": x_pct,
|
|
||||||
"y_pct": y_pct,
|
|
||||||
"button": "double",
|
|
||||||
"window_title": window_title,
|
|
||||||
"session_name": session_name,
|
|
||||||
"machine_id": config.machine_id,
|
|
||||||
"timestamp": timestamp,
|
|
||||||
"screen_resolution": [screen_w, screen_h],
|
|
||||||
})
|
|
||||||
}
|
|
||||||
CapturedEvent::Text {
|
|
||||||
text,
|
|
||||||
x_pct,
|
|
||||||
y_pct,
|
|
||||||
} => {
|
|
||||||
serde_json::json!({
|
|
||||||
"type": "type",
|
|
||||||
"text": text,
|
|
||||||
"x_pct": x_pct,
|
|
||||||
"y_pct": y_pct,
|
|
||||||
"session_name": session_name,
|
|
||||||
"machine_id": config.machine_id,
|
|
||||||
"timestamp": timestamp,
|
|
||||||
"screen_resolution": [screen_w, screen_h],
|
|
||||||
})
|
|
||||||
}
|
|
||||||
CapturedEvent::KeyCombo { keys } => {
|
|
||||||
serde_json::json!({
|
|
||||||
"type": "key_combo",
|
|
||||||
"keys": keys,
|
|
||||||
"session_name": session_name,
|
|
||||||
"machine_id": config.machine_id,
|
|
||||||
"timestamp": timestamp,
|
|
||||||
"screen_resolution": [screen_w, screen_h],
|
|
||||||
})
|
|
||||||
}
|
|
||||||
CapturedEvent::Scroll {
|
|
||||||
delta,
|
|
||||||
x_pct,
|
|
||||||
y_pct,
|
|
||||||
} => {
|
|
||||||
serde_json::json!({
|
|
||||||
"type": "scroll",
|
|
||||||
"delta": delta,
|
|
||||||
"x_pct": x_pct,
|
|
||||||
"y_pct": y_pct,
|
|
||||||
"session_name": session_name,
|
|
||||||
"machine_id": config.machine_id,
|
|
||||||
"timestamp": timestamp,
|
|
||||||
"screen_resolution": [screen_w, screen_h],
|
|
||||||
})
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Envoi non-bloquant (on ne veut pas ralentir la capture)
|
|
||||||
match client
|
|
||||||
.post(&url)
|
|
||||||
.json(&payload)
|
|
||||||
.timeout(Duration::from_secs(5))
|
|
||||||
.send()
|
|
||||||
{
|
|
||||||
Ok(resp) => {
|
|
||||||
if !resp.status().is_success() {
|
|
||||||
eprintln!(
|
|
||||||
"[RECORDER] Envoi evenement echoue : HTTP {}",
|
|
||||||
resp.status()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
eprintln!("[RECORDER] Erreur reseau : {}", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Capturer un screenshot pour les clics (dual: full + crop)
|
|
||||||
if matches!(
|
|
||||||
event,
|
|
||||||
CapturedEvent::Click { .. } | CapturedEvent::DoubleClick { .. }
|
|
||||||
) {
|
|
||||||
if let Some(img) = capture::capture_screenshot() {
|
|
||||||
let jpeg = capture::screenshot_to_jpeg_bytes(&img, 80);
|
|
||||||
if !jpeg.is_empty() {
|
|
||||||
let shot_id = format!("rec_{}", chrono::Utc::now().timestamp_millis());
|
|
||||||
let _ = crate::network::send_heartbeat(
|
|
||||||
&reqwest::blocking::Client::new(),
|
|
||||||
&crate::config::Config::from_env(),
|
|
||||||
&jpeg,
|
|
||||||
session_name,
|
|
||||||
);
|
|
||||||
let _ = shot_id; // utilise implicitement via send_heartbeat
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// --- Fonctions utilitaires ---
|
|
||||||
|
|
||||||
/// Obtient la position actuelle du curseur souris.
|
|
||||||
fn get_cursor_position() -> (f64, f64) {
|
|
||||||
#[cfg(windows)]
|
|
||||||
{
|
|
||||||
use windows_sys::Win32::UI::WindowsAndMessaging::GetCursorPos;
|
|
||||||
use windows_sys::Win32::Foundation::POINT;
|
|
||||||
|
|
||||||
unsafe {
|
|
||||||
let mut point: POINT = std::mem::zeroed();
|
|
||||||
if GetCursorPos(&mut point) != 0 {
|
|
||||||
return (point.x as f64, point.y as f64);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback : position inconnue
|
|
||||||
(0.0, 0.0)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Obtient le titre de la fenetre active.
|
|
||||||
fn get_active_window_title() -> String {
|
|
||||||
#[cfg(windows)]
|
|
||||||
{
|
|
||||||
use windows_sys::Win32::UI::WindowsAndMessaging::{
|
|
||||||
GetForegroundWindow, GetWindowTextW,
|
|
||||||
};
|
|
||||||
|
|
||||||
unsafe {
|
|
||||||
let hwnd = GetForegroundWindow();
|
|
||||||
if !hwnd.is_null() {
|
|
||||||
let mut buf = [0u16; 256];
|
|
||||||
let len = GetWindowTextW(hwnd, buf.as_mut_ptr(), buf.len() as i32);
|
|
||||||
if len > 0 {
|
|
||||||
return String::from_utf16_lossy(&buf[..len as usize]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
"Inconnu".to_string()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Convertit une touche rdev en caractere texte (pour le buffer de saisie).
|
|
||||||
/// Retourne None pour les touches speciales (Enter, Tab, etc.).
|
|
||||||
fn rdev_key_to_char(key: rdev::Key) -> Option<char> {
|
|
||||||
match key {
|
|
||||||
rdev::Key::KeyA => Some('a'),
|
|
||||||
rdev::Key::KeyB => Some('b'),
|
|
||||||
rdev::Key::KeyC => Some('c'),
|
|
||||||
rdev::Key::KeyD => Some('d'),
|
|
||||||
rdev::Key::KeyE => Some('e'),
|
|
||||||
rdev::Key::KeyF => Some('f'),
|
|
||||||
rdev::Key::KeyG => Some('g'),
|
|
||||||
rdev::Key::KeyH => Some('h'),
|
|
||||||
rdev::Key::KeyI => Some('i'),
|
|
||||||
rdev::Key::KeyJ => Some('j'),
|
|
||||||
rdev::Key::KeyK => Some('k'),
|
|
||||||
rdev::Key::KeyL => Some('l'),
|
|
||||||
rdev::Key::KeyM => Some('m'),
|
|
||||||
rdev::Key::KeyN => Some('n'),
|
|
||||||
rdev::Key::KeyO => Some('o'),
|
|
||||||
rdev::Key::KeyP => Some('p'),
|
|
||||||
rdev::Key::KeyQ => Some('q'),
|
|
||||||
rdev::Key::KeyR => Some('r'),
|
|
||||||
rdev::Key::KeyS => Some('s'),
|
|
||||||
rdev::Key::KeyT => Some('t'),
|
|
||||||
rdev::Key::KeyU => Some('u'),
|
|
||||||
rdev::Key::KeyV => Some('v'),
|
|
||||||
rdev::Key::KeyW => Some('w'),
|
|
||||||
rdev::Key::KeyX => Some('x'),
|
|
||||||
rdev::Key::KeyY => Some('y'),
|
|
||||||
rdev::Key::KeyZ => Some('z'),
|
|
||||||
rdev::Key::Num0 => Some('0'),
|
|
||||||
rdev::Key::Num1 => Some('1'),
|
|
||||||
rdev::Key::Num2 => Some('2'),
|
|
||||||
rdev::Key::Num3 => Some('3'),
|
|
||||||
rdev::Key::Num4 => Some('4'),
|
|
||||||
rdev::Key::Num5 => Some('5'),
|
|
||||||
rdev::Key::Num6 => Some('6'),
|
|
||||||
rdev::Key::Num7 => Some('7'),
|
|
||||||
rdev::Key::Num8 => Some('8'),
|
|
||||||
rdev::Key::Num9 => Some('9'),
|
|
||||||
rdev::Key::Space => Some(' '),
|
|
||||||
rdev::Key::Minus => Some('-'),
|
|
||||||
rdev::Key::Equal => Some('='),
|
|
||||||
rdev::Key::LeftBracket => Some('['),
|
|
||||||
rdev::Key::RightBracket => Some(']'),
|
|
||||||
rdev::Key::SemiColon => Some(';'),
|
|
||||||
rdev::Key::Quote => Some('\''),
|
|
||||||
rdev::Key::Comma => Some(','),
|
|
||||||
rdev::Key::Dot => Some('.'),
|
|
||||||
rdev::Key::Slash => Some('/'),
|
|
||||||
rdev::Key::BackSlash => Some('\\'),
|
|
||||||
_ => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Convertit une touche rdev en nom de touche (pour les combos).
|
|
||||||
fn rdev_key_to_string(key: rdev::Key) -> String {
|
|
||||||
match key {
|
|
||||||
rdev::Key::Return => "enter".to_string(),
|
|
||||||
rdev::Key::Tab => "tab".to_string(),
|
|
||||||
rdev::Key::Escape => "escape".to_string(),
|
|
||||||
rdev::Key::Backspace => "backspace".to_string(),
|
|
||||||
rdev::Key::Delete => "delete".to_string(),
|
|
||||||
rdev::Key::Space => "space".to_string(),
|
|
||||||
rdev::Key::UpArrow => "up".to_string(),
|
|
||||||
rdev::Key::DownArrow => "down".to_string(),
|
|
||||||
rdev::Key::LeftArrow => "left".to_string(),
|
|
||||||
rdev::Key::RightArrow => "right".to_string(),
|
|
||||||
rdev::Key::Home => "home".to_string(),
|
|
||||||
rdev::Key::End => "end".to_string(),
|
|
||||||
rdev::Key::PageUp => "page_up".to_string(),
|
|
||||||
rdev::Key::PageDown => "page_down".to_string(),
|
|
||||||
rdev::Key::F1 => "f1".to_string(),
|
|
||||||
rdev::Key::F2 => "f2".to_string(),
|
|
||||||
rdev::Key::F3 => "f3".to_string(),
|
|
||||||
rdev::Key::F4 => "f4".to_string(),
|
|
||||||
rdev::Key::F5 => "f5".to_string(),
|
|
||||||
rdev::Key::F6 => "f6".to_string(),
|
|
||||||
rdev::Key::F7 => "f7".to_string(),
|
|
||||||
rdev::Key::F8 => "f8".to_string(),
|
|
||||||
rdev::Key::F9 => "f9".to_string(),
|
|
||||||
rdev::Key::F10 => "f10".to_string(),
|
|
||||||
rdev::Key::F11 => "f11".to_string(),
|
|
||||||
rdev::Key::F12 => "f12".to_string(),
|
|
||||||
rdev::Key::CapsLock => "caps_lock".to_string(),
|
|
||||||
rdev::Key::Insert => "insert".to_string(),
|
|
||||||
rdev::Key::PrintScreen => "print_screen".to_string(),
|
|
||||||
// Pour les lettres et chiffres, reutiliser rdev_key_to_char
|
|
||||||
other => {
|
|
||||||
if let Some(c) = rdev_key_to_char(other) {
|
|
||||||
c.to_string()
|
|
||||||
} else {
|
|
||||||
format!("{:?}", other).to_lowercase()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,125 +0,0 @@
|
|||||||
//! Boucle de polling replay.
|
|
||||||
//!
|
|
||||||
//! Poll le serveur toutes les secondes pour recuperer les actions a executer.
|
|
||||||
//! Quand une action est recue, l'execute via executor et rapporte le resultat.
|
|
||||||
//! Gere le backoff exponentiel en cas d'indisponibilite du serveur.
|
|
||||||
//!
|
|
||||||
//! Reproduit le comportement de _replay_poll_loop dans agent_v1/main.py.
|
|
||||||
|
|
||||||
use crate::capture;
|
|
||||||
use crate::config::Config;
|
|
||||||
use crate::executor;
|
|
||||||
use crate::network;
|
|
||||||
use crate::notifications;
|
|
||||||
use crate::state::AgentState;
|
|
||||||
use reqwest::blocking::Client;
|
|
||||||
use std::thread;
|
|
||||||
use std::time::Duration;
|
|
||||||
|
|
||||||
/// Boucle de polling replay (tourne dans un thread dedie).
|
|
||||||
///
|
|
||||||
/// - Poll GET /replay/next toutes les secondes
|
|
||||||
/// - Execute l'action via executor
|
|
||||||
/// - Capture un screenshot post-action
|
|
||||||
/// - Rapporte le resultat via POST /replay/result
|
|
||||||
/// - Backoff exponentiel si le serveur est indisponible
|
|
||||||
pub fn replay_poll_loop(config: &Config, state: &AgentState) {
|
|
||||||
let client = Client::new();
|
|
||||||
let mut poll_count: u64 = 0;
|
|
||||||
let backoff = config.replay_poll_interval_s;
|
|
||||||
let _backoff_max = 30.0_f64;
|
|
||||||
let _backoff_factor = 1.5_f64;
|
|
||||||
let mut replay_active = false;
|
|
||||||
|
|
||||||
println!(
|
|
||||||
"[REPLAY] Boucle replay demarree — poll toutes les {:.0}s sur {}",
|
|
||||||
config.replay_poll_interval_s, config.server_url
|
|
||||||
);
|
|
||||||
|
|
||||||
while state.is_running() {
|
|
||||||
// Verifier l'arret d'urgence
|
|
||||||
if state
|
|
||||||
.emergency_stop
|
|
||||||
.load(std::sync::atomic::Ordering::SeqCst)
|
|
||||||
{
|
|
||||||
if replay_active {
|
|
||||||
println!("[REPLAY] ARRET D'URGENCE — replay interrompu");
|
|
||||||
replay_active = false;
|
|
||||||
state.set_replay_active(false);
|
|
||||||
}
|
|
||||||
thread::sleep(Duration::from_secs(1));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
poll_count += 1;
|
|
||||||
|
|
||||||
// Log periodique toutes les 60s pour confirmer que la boucle tourne
|
|
||||||
let polls_per_minute = (60.0 / backoff).ceil() as u64;
|
|
||||||
if polls_per_minute > 0 && poll_count % polls_per_minute == 0 {
|
|
||||||
println!(
|
|
||||||
"[REPLAY] Poll #{} — session={} — serveur={}",
|
|
||||||
poll_count,
|
|
||||||
config.agent_session_id(),
|
|
||||||
config.server_url,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
match network::poll_next_action(&client, config) {
|
|
||||||
Some(action) => {
|
|
||||||
if !replay_active {
|
|
||||||
replay_active = true;
|
|
||||||
state.set_replay_active(true);
|
|
||||||
notifications::replay_started("workflow");
|
|
||||||
println!("[REPLAY] Replay demarre");
|
|
||||||
}
|
|
||||||
|
|
||||||
let action_type = action.action_type.clone();
|
|
||||||
let action_id = action.action_id.clone();
|
|
||||||
println!(
|
|
||||||
"\n>>> REPLAY ACTION RECUE : {} (id={})",
|
|
||||||
action_type, action_id
|
|
||||||
);
|
|
||||||
|
|
||||||
// Obtenir les dimensions de l'ecran
|
|
||||||
let (sw, sh) = capture::screen_dimensions().unwrap_or((1920, 1080));
|
|
||||||
|
|
||||||
// Executer l'action (avec config pour la resolution visuelle)
|
|
||||||
println!(">>> Execution de l'action {}...", action_type);
|
|
||||||
let mut result = executor::execute_action(&action, sw, sh, config);
|
|
||||||
println!(
|
|
||||||
">>> Resultat execution : success={}, error={:?}",
|
|
||||||
result.success, result.error
|
|
||||||
);
|
|
||||||
|
|
||||||
// Capture screenshot post-action (apres 500ms)
|
|
||||||
thread::sleep(Duration::from_millis(500));
|
|
||||||
if let Some(img) = capture::capture_screenshot() {
|
|
||||||
let b64 = capture::screenshot_to_jpeg_base64(&img, 60);
|
|
||||||
if !b64.is_empty() {
|
|
||||||
result.screenshot = Some(b64);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Rapporter le resultat au serveur (TOUJOURS, meme en erreur)
|
|
||||||
network::report_result(&client, config, &result);
|
|
||||||
|
|
||||||
// Poll plus rapidement pour enchainer les actions
|
|
||||||
thread::sleep(Duration::from_millis(200));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
None => {
|
|
||||||
if replay_active {
|
|
||||||
println!("[REPLAY] Replay termine — retour en mode capture");
|
|
||||||
replay_active = false;
|
|
||||||
state.set_replay_active(false);
|
|
||||||
notifications::replay_finished(true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let sleep_duration = Duration::from_secs_f64(backoff);
|
|
||||||
thread::sleep(sleep_duration);
|
|
||||||
}
|
|
||||||
|
|
||||||
println!("[REPLAY] Boucle arretee.");
|
|
||||||
}
|
|
||||||
@@ -1,402 +0,0 @@
|
|||||||
//! Mini serveur HTTP pour les captures d'écran à la demande.
|
|
||||||
//!
|
|
||||||
//! Écoute sur le port 5006 (configurable via RPA_CAPTURE_PORT).
|
|
||||||
//! Endpoints :
|
|
||||||
//! GET /capture -> screenshot frais en JSON {image, width, height, format}
|
|
||||||
//! GET /health -> {"status": "ok"}
|
|
||||||
//! POST /file-action -> opérations fichiers (list, create, move, copy, sort)
|
|
||||||
//!
|
|
||||||
//! Reproduit le comportement de agent_v1/ui/capture_server.py.
|
|
||||||
|
|
||||||
use crate::capture;
|
|
||||||
use serde_json::json;
|
|
||||||
use tiny_http::{Header, Method, Response, Server};
|
|
||||||
|
|
||||||
/// Démarre le serveur de capture sur le port donné (bloquant).
|
|
||||||
///
|
|
||||||
/// Cette fonction tourne dans un thread dédié et ne retourne jamais.
|
|
||||||
pub fn start_capture_server(port: u16) {
|
|
||||||
let addr = format!("0.0.0.0:{}", port);
|
|
||||||
let server = match Server::http(&addr) {
|
|
||||||
Ok(s) => s,
|
|
||||||
Err(e) => {
|
|
||||||
eprintln!("[CAPTURE] Impossible de demarrer le serveur sur {} : {}", addr, e);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
println!("[CAPTURE] Serveur de capture demarre sur le port {}", port);
|
|
||||||
|
|
||||||
for request in server.incoming_requests() {
|
|
||||||
let url = request.url().to_string();
|
|
||||||
let method = request.method().clone();
|
|
||||||
|
|
||||||
match (method, url.as_str()) {
|
|
||||||
(Method::Get, "/capture") => handle_capture(request),
|
|
||||||
(Method::Get, "/health") => handle_health(request),
|
|
||||||
(Method::Post, "/file-action") => handle_file_action(request),
|
|
||||||
(Method::Options, _) => handle_options(request),
|
|
||||||
_ => {
|
|
||||||
let body = json!({"error": "not found"}).to_string();
|
|
||||||
let _ = send_json_response(request, 404, &body);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// GET /capture — Capture un screenshot frais et le renvoie en JSON base64.
|
|
||||||
fn handle_capture(request: tiny_http::Request) {
|
|
||||||
let start = std::time::Instant::now();
|
|
||||||
|
|
||||||
match capture::capture_screenshot() {
|
|
||||||
Some(img) => {
|
|
||||||
let width = img.width();
|
|
||||||
let height = img.height();
|
|
||||||
let b64 = capture::screenshot_to_jpeg_base64(&img, 80);
|
|
||||||
let elapsed_ms = start.elapsed().as_millis();
|
|
||||||
|
|
||||||
let body = json!({
|
|
||||||
"image": b64,
|
|
||||||
"width": width,
|
|
||||||
"height": height,
|
|
||||||
"format": "jpeg",
|
|
||||||
"source": "rust_agent",
|
|
||||||
"capture_ms": elapsed_ms,
|
|
||||||
})
|
|
||||||
.to_string();
|
|
||||||
|
|
||||||
let _ = send_json_response(request, 200, &body);
|
|
||||||
}
|
|
||||||
None => {
|
|
||||||
let body = json!({"error": "Capture echouee"}).to_string();
|
|
||||||
let _ = send_json_response(request, 500, &body);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// GET /health — Vérification de santé.
|
|
||||||
fn handle_health(request: tiny_http::Request) {
|
|
||||||
let body = json!({
|
|
||||||
"status": "ok",
|
|
||||||
"agent": "rust",
|
|
||||||
"version": crate::config::AGENT_VERSION,
|
|
||||||
})
|
|
||||||
.to_string();
|
|
||||||
let _ = send_json_response(request, 200, &body);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// POST /file-action — Opérations fichiers sur la machine locale.
|
|
||||||
///
|
|
||||||
/// Body JSON attendu : {"action": "file_list_dir", "params": {"path": "C:\\..."}}
|
|
||||||
/// Actions supportées : file_list_dir, file_create_dir, file_move, file_copy, file_sort_by_ext
|
|
||||||
fn handle_file_action(mut request: tiny_http::Request) {
|
|
||||||
// Lire le body
|
|
||||||
let mut body_str = String::new();
|
|
||||||
if let Err(e) = request.as_reader().read_to_string(&mut body_str) {
|
|
||||||
let resp = json!({"error": format!("Erreur lecture body : {}", e)}).to_string();
|
|
||||||
let _ = send_json_response(request, 400, &resp);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parser le JSON
|
|
||||||
let data: serde_json::Value = match serde_json::from_str(&body_str) {
|
|
||||||
Ok(v) => v,
|
|
||||||
Err(_) => {
|
|
||||||
let resp = json!({"error": "JSON invalide"}).to_string();
|
|
||||||
let _ = send_json_response(request, 400, &resp);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let action = data.get("action").and_then(|v| v.as_str()).unwrap_or("");
|
|
||||||
let params = data.get("params").cloned().unwrap_or(json!({}));
|
|
||||||
|
|
||||||
if action.is_empty() {
|
|
||||||
let resp = json!({"error": "Parametre 'action' requis"}).to_string();
|
|
||||||
let _ = send_json_response(request, 400, &resp);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
let result = execute_file_action(action, ¶ms);
|
|
||||||
let code = if result.get("error").is_some() { 500 } else { 200 };
|
|
||||||
let _ = send_json_response(request, code, &result.to_string());
|
|
||||||
}
|
|
||||||
|
|
||||||
/// OPTIONS — Réponse CORS preflight.
|
|
||||||
fn handle_options(request: tiny_http::Request) {
|
|
||||||
let response = Response::empty(200)
|
|
||||||
.with_header(cors_origin())
|
|
||||||
.with_header(cors_methods())
|
|
||||||
.with_header(cors_headers());
|
|
||||||
let _ = request.respond(response);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Exécute une action fichier.
|
|
||||||
fn execute_file_action(action: &str, params: &serde_json::Value) -> serde_json::Value {
|
|
||||||
match action {
|
|
||||||
"file_list_dir" => {
|
|
||||||
let path = params.get("path").and_then(|v| v.as_str()).unwrap_or("");
|
|
||||||
let pattern = params
|
|
||||||
.get("pattern")
|
|
||||||
.and_then(|v| v.as_str())
|
|
||||||
.unwrap_or("*");
|
|
||||||
|
|
||||||
if path.is_empty() {
|
|
||||||
return json!({"error": "Parametre 'path' requis"});
|
|
||||||
}
|
|
||||||
if !is_safe_path(path) {
|
|
||||||
return json!({"error": format!("Chemin non autorise : {}", path)});
|
|
||||||
}
|
|
||||||
|
|
||||||
match std::fs::read_dir(path) {
|
|
||||||
Ok(entries) => {
|
|
||||||
let mut files = Vec::new();
|
|
||||||
let mut extensions: std::collections::HashMap<String, u32> =
|
|
||||||
std::collections::HashMap::new();
|
|
||||||
|
|
||||||
for entry in entries.flatten() {
|
|
||||||
if let Ok(metadata) = entry.metadata() {
|
|
||||||
if metadata.is_file() {
|
|
||||||
let name = entry.file_name().to_string_lossy().to_string();
|
|
||||||
|
|
||||||
// Filtrage par pattern (simple glob avec *)
|
|
||||||
if pattern != "*" && !simple_glob_match(pattern, &name) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
let ext = std::path::Path::new(&name)
|
|
||||||
.extension()
|
|
||||||
.map(|e| e.to_string_lossy().to_lowercase())
|
|
||||||
.unwrap_or_else(|| "sans_extension".to_string());
|
|
||||||
|
|
||||||
files.push(json!({
|
|
||||||
"name": name,
|
|
||||||
"extension": ext,
|
|
||||||
"size": metadata.len(),
|
|
||||||
"path": entry.path().to_string_lossy(),
|
|
||||||
}));
|
|
||||||
|
|
||||||
*extensions.entry(ext).or_insert(0) += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
json!({
|
|
||||||
"files": files,
|
|
||||||
"count": files.len(),
|
|
||||||
"extensions": extensions,
|
|
||||||
"path": path,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
Err(e) => json!({"error": format!("Erreur lecture dossier : {}", e)}),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
"file_create_dir" => {
|
|
||||||
let path = params.get("path").and_then(|v| v.as_str()).unwrap_or("");
|
|
||||||
if path.is_empty() {
|
|
||||||
return json!({"error": "Parametre 'path' requis"});
|
|
||||||
}
|
|
||||||
if !is_safe_path(path) {
|
|
||||||
return json!({"error": format!("Chemin non autorise : {}", path)});
|
|
||||||
}
|
|
||||||
|
|
||||||
let existed = std::path::Path::new(path).exists();
|
|
||||||
match std::fs::create_dir_all(path) {
|
|
||||||
Ok(_) => json!({
|
|
||||||
"created": !existed,
|
|
||||||
"path": path,
|
|
||||||
"already_existed": existed,
|
|
||||||
}),
|
|
||||||
Err(e) => json!({"error": format!("Erreur creation dossier : {}", e)}),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
"file_move" => {
|
|
||||||
let src = params.get("source").and_then(|v| v.as_str()).unwrap_or("");
|
|
||||||
let dst = params
|
|
||||||
.get("destination")
|
|
||||||
.and_then(|v| v.as_str())
|
|
||||||
.unwrap_or("");
|
|
||||||
|
|
||||||
if src.is_empty() || dst.is_empty() {
|
|
||||||
return json!({"error": "Parametres 'source' et 'destination' requis"});
|
|
||||||
}
|
|
||||||
if !is_safe_path(src) || !is_safe_path(dst) {
|
|
||||||
return json!({"error": "Chemin non autorise"});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Créer le dossier parent de destination
|
|
||||||
if let Some(parent) = std::path::Path::new(dst).parent() {
|
|
||||||
let _ = std::fs::create_dir_all(parent);
|
|
||||||
}
|
|
||||||
|
|
||||||
match std::fs::rename(src, dst) {
|
|
||||||
Ok(_) => json!({"moved": true, "source": src, "destination": dst}),
|
|
||||||
Err(e) => json!({"error": format!("Erreur deplacement : {}", e)}),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
"file_copy" => {
|
|
||||||
let src = params.get("source").and_then(|v| v.as_str()).unwrap_or("");
|
|
||||||
let dst = params
|
|
||||||
.get("destination")
|
|
||||||
.and_then(|v| v.as_str())
|
|
||||||
.unwrap_or("");
|
|
||||||
|
|
||||||
if src.is_empty() || dst.is_empty() {
|
|
||||||
return json!({"error": "Parametres 'source' et 'destination' requis"});
|
|
||||||
}
|
|
||||||
if !is_safe_path(src) || !is_safe_path(dst) {
|
|
||||||
return json!({"error": "Chemin non autorise"});
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(parent) = std::path::Path::new(dst).parent() {
|
|
||||||
let _ = std::fs::create_dir_all(parent);
|
|
||||||
}
|
|
||||||
|
|
||||||
match std::fs::copy(src, dst) {
|
|
||||||
Ok(_) => json!({"copied": true, "source": src, "destination": dst}),
|
|
||||||
Err(e) => json!({"error": format!("Erreur copie : {}", e)}),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
"file_sort_by_ext" => {
|
|
||||||
let source_dir = params
|
|
||||||
.get("source_dir")
|
|
||||||
.and_then(|v| v.as_str())
|
|
||||||
.unwrap_or("");
|
|
||||||
let create_subdirs = params
|
|
||||||
.get("create_subdirs")
|
|
||||||
.and_then(|v| v.as_bool())
|
|
||||||
.unwrap_or(true);
|
|
||||||
|
|
||||||
if source_dir.is_empty() {
|
|
||||||
return json!({"error": "Parametre 'source_dir' requis"});
|
|
||||||
}
|
|
||||||
if !is_safe_path(source_dir) {
|
|
||||||
return json!({"error": format!("Chemin non autorise : {}", source_dir)});
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut moved = Vec::new();
|
|
||||||
let mut extensions: std::collections::HashMap<String, u32> =
|
|
||||||
std::collections::HashMap::new();
|
|
||||||
|
|
||||||
if let Ok(entries) = std::fs::read_dir(source_dir) {
|
|
||||||
for entry in entries.flatten() {
|
|
||||||
if let Ok(metadata) = entry.metadata() {
|
|
||||||
if metadata.is_file() {
|
|
||||||
let name = entry.file_name().to_string_lossy().to_string();
|
|
||||||
let ext = std::path::Path::new(&name)
|
|
||||||
.extension()
|
|
||||||
.map(|e| e.to_string_lossy().to_lowercase())
|
|
||||||
.unwrap_or_else(|| "sans_extension".to_string());
|
|
||||||
|
|
||||||
let target_dir =
|
|
||||||
std::path::Path::new(source_dir).join(&ext);
|
|
||||||
|
|
||||||
if create_subdirs {
|
|
||||||
let _ = std::fs::create_dir_all(&target_dir);
|
|
||||||
} else if !target_dir.exists() {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
let dest = target_dir.join(&name);
|
|
||||||
if let Err(e) = std::fs::rename(entry.path(), &dest) {
|
|
||||||
eprintln!("[FILE] Erreur deplacement {} : {}", name, e);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
moved.push(json!({
|
|
||||||
"file": name,
|
|
||||||
"to": ext,
|
|
||||||
"destination": dest.to_string_lossy(),
|
|
||||||
}));
|
|
||||||
*extensions.entry(ext).or_insert(0) += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
json!({
|
|
||||||
"moved": moved,
|
|
||||||
"count": moved.len(),
|
|
||||||
"extensions": extensions,
|
|
||||||
"source_dir": source_dir,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
_ => json!({"error": format!("Action fichier inconnue : {}", action)}),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Vérifie qu'un chemin est dans une zone autorisée (sécurité anti-traversal).
|
|
||||||
///
|
|
||||||
/// Sur Windows : C:\Users, D:\, E:\
|
|
||||||
/// Sur Linux : /home, /tmp (pour les tests)
|
|
||||||
fn is_safe_path(path_str: &str) -> bool {
|
|
||||||
if path_str.is_empty() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Normaliser le chemin
|
|
||||||
let normalized = std::path::Path::new(path_str)
|
|
||||||
.to_string_lossy()
|
|
||||||
.to_uppercase();
|
|
||||||
|
|
||||||
if cfg!(target_os = "windows") {
|
|
||||||
let allowed = ["C:\\USERS", "D:\\", "E:\\"];
|
|
||||||
allowed.iter().any(|root| normalized.starts_with(root))
|
|
||||||
} else {
|
|
||||||
// Sur Linux (pour les tests)
|
|
||||||
let allowed = ["/HOME", "/TMP"];
|
|
||||||
allowed.iter().any(|root| normalized.starts_with(root))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Matching glob simple (supporte * comme wildcard).
|
|
||||||
fn simple_glob_match(pattern: &str, name: &str) -> bool {
|
|
||||||
if pattern == "*" {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
// Pattern simple : *.ext
|
|
||||||
if let Some(ext) = pattern.strip_prefix("*.") {
|
|
||||||
return name.to_lowercase().ends_with(&format!(".{}", ext.to_lowercase()));
|
|
||||||
}
|
|
||||||
// Sinon, comparaison exacte
|
|
||||||
name.to_lowercase() == pattern.to_lowercase()
|
|
||||||
}
|
|
||||||
|
|
||||||
// --- Headers CORS ---
|
|
||||||
|
|
||||||
fn cors_origin() -> Header {
|
|
||||||
Header::from_bytes("Access-Control-Allow-Origin", "*").unwrap()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn cors_methods() -> Header {
|
|
||||||
Header::from_bytes("Access-Control-Allow-Methods", "GET, POST, OPTIONS").unwrap()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn cors_headers() -> Header {
|
|
||||||
Header::from_bytes("Access-Control-Allow-Headers", "Content-Type").unwrap()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Envoie une réponse JSON avec les headers CORS.
|
|
||||||
fn send_json_response(
|
|
||||||
request: tiny_http::Request,
|
|
||||||
status_code: u16,
|
|
||||||
body: &str,
|
|
||||||
) -> Result<(), Box<dyn std::error::Error>> {
|
|
||||||
let status = tiny_http::StatusCode(status_code);
|
|
||||||
let content_type = Header::from_bytes("Content-Type", "application/json").unwrap();
|
|
||||||
|
|
||||||
let response = Response::from_string(body)
|
|
||||||
.with_status_code(status)
|
|
||||||
.with_header(content_type)
|
|
||||||
.with_header(cors_origin())
|
|
||||||
.with_header(cors_methods())
|
|
||||||
.with_header(cors_headers());
|
|
||||||
|
|
||||||
request.respond(response)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
@@ -1,175 +0,0 @@
|
|||||||
//! Etat partage thread-safe de l'agent.
|
|
||||||
//!
|
|
||||||
//! Centralise l'etat courant (enregistrement, replay, connexion, etc.)
|
|
||||||
//! accessible depuis tous les threads (systray, heartbeat, replay, recorder).
|
|
||||||
//! Equivalent de agent_v1/ui/shared_state.py.
|
|
||||||
|
|
||||||
use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
|
|
||||||
use std::sync::{Arc, Mutex};
|
|
||||||
|
|
||||||
/// Etats possibles de l'icone systray
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
||||||
pub enum TrayState {
|
|
||||||
/// Gris — en attente, pas de session active
|
|
||||||
Idle,
|
|
||||||
/// Rouge — enregistrement en cours
|
|
||||||
Recording,
|
|
||||||
/// Vert — connecte au serveur, pret
|
|
||||||
Connected,
|
|
||||||
/// Bleu — replay en cours
|
|
||||||
Replay,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Etat partage de l'agent, thread-safe via Arc + atomics.
|
|
||||||
///
|
|
||||||
/// Les booleens utilisent AtomicBool pour un acces lock-free.
|
|
||||||
/// Le nom de session utilise un Mutex car c'est une String.
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct AgentState {
|
|
||||||
/// Enregistrement en cours (session de capture)
|
|
||||||
pub recording: AtomicBool,
|
|
||||||
|
|
||||||
/// Nom de la session d'enregistrement courante
|
|
||||||
pub recording_name: Mutex<String>,
|
|
||||||
|
|
||||||
/// Replay en cours (execution d'actions)
|
|
||||||
pub replay_active: AtomicBool,
|
|
||||||
|
|
||||||
/// Connecte au serveur streaming
|
|
||||||
pub connected: AtomicBool,
|
|
||||||
|
|
||||||
/// Nombre d'actions capturees dans la session courante
|
|
||||||
pub actions_count: AtomicU32,
|
|
||||||
|
|
||||||
/// L'agent est en cours d'execution (false = arret demande)
|
|
||||||
pub running: AtomicBool,
|
|
||||||
|
|
||||||
/// Fenetre de chat visible
|
|
||||||
pub chat_visible: AtomicBool,
|
|
||||||
|
|
||||||
/// Arret d'urgence active
|
|
||||||
pub emergency_stop: AtomicBool,
|
|
||||||
|
|
||||||
/// Dernier message de notification (pour eviter les doublons)
|
|
||||||
#[allow(dead_code)]
|
|
||||||
pub last_notification: Mutex<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl AgentState {
|
|
||||||
/// Cree un nouvel etat avec les valeurs par defaut.
|
|
||||||
pub fn new() -> Arc<Self> {
|
|
||||||
Arc::new(Self {
|
|
||||||
recording: AtomicBool::new(false),
|
|
||||||
recording_name: Mutex::new(String::new()),
|
|
||||||
replay_active: AtomicBool::new(false),
|
|
||||||
connected: AtomicBool::new(false),
|
|
||||||
actions_count: AtomicU32::new(0),
|
|
||||||
running: AtomicBool::new(true),
|
|
||||||
chat_visible: AtomicBool::new(false),
|
|
||||||
emergency_stop: AtomicBool::new(false),
|
|
||||||
last_notification: Mutex::new(String::new()),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Demarre un enregistrement avec le nom donne.
|
|
||||||
pub fn start_recording(&self, name: &str) {
|
|
||||||
self.recording.store(true, Ordering::SeqCst);
|
|
||||||
self.actions_count.store(0, Ordering::SeqCst);
|
|
||||||
if let Ok(mut n) = self.recording_name.lock() {
|
|
||||||
*n = name.to_string();
|
|
||||||
}
|
|
||||||
println!("[STATE] Enregistrement demarre : '{}'", name);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Arrete l'enregistrement en cours.
|
|
||||||
pub fn stop_recording(&self) -> (String, u32) {
|
|
||||||
self.recording.store(false, Ordering::SeqCst);
|
|
||||||
let count = self.actions_count.load(Ordering::SeqCst);
|
|
||||||
let name = self
|
|
||||||
.recording_name
|
|
||||||
.lock()
|
|
||||||
.map(|n| n.clone())
|
|
||||||
.unwrap_or_default();
|
|
||||||
println!("[STATE] Enregistrement arrete : '{}' ({} actions)", name, count);
|
|
||||||
(name, count)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Incremente le compteur d'actions capturees.
|
|
||||||
pub fn increment_actions(&self) -> u32 {
|
|
||||||
self.actions_count.fetch_add(1, Ordering::SeqCst) + 1
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Verifie si l'agent est en cours d'execution.
|
|
||||||
pub fn is_running(&self) -> bool {
|
|
||||||
self.running.load(Ordering::SeqCst)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Demande l'arret de l'agent.
|
|
||||||
pub fn request_shutdown(&self) {
|
|
||||||
self.running.store(false, Ordering::SeqCst);
|
|
||||||
println!("[STATE] Arret demande");
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Active/desactive le replay.
|
|
||||||
pub fn set_replay_active(&self, active: bool) {
|
|
||||||
self.replay_active.store(active, Ordering::SeqCst);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Met a jour le statut de connexion au serveur.
|
|
||||||
pub fn set_connected(&self, connected: bool) {
|
|
||||||
let was_connected = self.connected.swap(connected, Ordering::SeqCst);
|
|
||||||
if was_connected != connected {
|
|
||||||
println!(
|
|
||||||
"[STATE] Connexion serveur : {}",
|
|
||||||
if connected { "CONNECTE" } else { "DECONNECTE" }
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Active l'arret d'urgence — stoppe tout immediatement.
|
|
||||||
pub fn emergency_stop(&self) {
|
|
||||||
self.emergency_stop.store(true, Ordering::SeqCst);
|
|
||||||
self.recording.store(false, Ordering::SeqCst);
|
|
||||||
self.replay_active.store(false, Ordering::SeqCst);
|
|
||||||
println!("[STATE] === ARRET D'URGENCE ACTIVE ===");
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Retourne l'etat courant du systray.
|
|
||||||
pub fn tray_state(&self) -> TrayState {
|
|
||||||
if self.recording.load(Ordering::SeqCst) {
|
|
||||||
TrayState::Recording
|
|
||||||
} else if self.replay_active.load(Ordering::SeqCst) {
|
|
||||||
TrayState::Replay
|
|
||||||
} else if self.connected.load(Ordering::SeqCst) {
|
|
||||||
TrayState::Connected
|
|
||||||
} else {
|
|
||||||
TrayState::Idle
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Retourne le nom de la session d'enregistrement courante.
|
|
||||||
pub fn current_recording_name(&self) -> String {
|
|
||||||
self.recording_name
|
|
||||||
.lock()
|
|
||||||
.map(|n| n.clone())
|
|
||||||
.unwrap_or_default()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for AgentState {
|
|
||||||
fn default() -> Self {
|
|
||||||
// Note: on ne peut pas retourner Arc<Self> depuis Default,
|
|
||||||
// donc on fournit les valeurs brutes. Utiliser new() de preference.
|
|
||||||
Self {
|
|
||||||
recording: AtomicBool::new(false),
|
|
||||||
recording_name: Mutex::new(String::new()),
|
|
||||||
replay_active: AtomicBool::new(false),
|
|
||||||
connected: AtomicBool::new(false),
|
|
||||||
actions_count: AtomicU32::new(0),
|
|
||||||
running: AtomicBool::new(true),
|
|
||||||
chat_visible: AtomicBool::new(false),
|
|
||||||
emergency_stop: AtomicBool::new(false),
|
|
||||||
last_notification: Mutex::new(String::new()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,274 +0,0 @@
|
|||||||
//! Métadonnées système : DPI, résolution, fenêtre active, moniteur.
|
|
||||||
//!
|
|
||||||
//! Expose des fonctions pour capturer les informations d'affichage
|
|
||||||
//! critiques qui seront envoyées au serveur avec chaque heartbeat.
|
|
||||||
//! Sur Windows, utilise les API Win32 (user32.dll).
|
|
||||||
//! Sur Linux, retourne des valeurs par défaut ou utilise xcap.
|
|
||||||
|
|
||||||
use serde::Serialize;
|
|
||||||
|
|
||||||
/// Métadonnées complètes de l'écran.
|
|
||||||
#[derive(Debug, Clone, Serialize)]
|
|
||||||
pub struct ScreenMetadata {
|
|
||||||
/// Facteur DPI en pourcentage (100 = normal, 150 = haute résolution)
|
|
||||||
pub dpi_scale: u32,
|
|
||||||
/// Résolution de l'écran principal [largeur, hauteur]
|
|
||||||
pub screen_resolution: [u32; 2],
|
|
||||||
/// Bounds de la fenêtre active [x, y, largeur, hauteur], None si pas de fenêtre
|
|
||||||
pub window_bounds: Option<[i32; 4]>,
|
|
||||||
/// Index du moniteur sur lequel se trouve la fenêtre active (0 = principal)
|
|
||||||
pub monitor_index: u32,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for ScreenMetadata {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
write!(
|
|
||||||
f,
|
|
||||||
"{}x{} @ {}% DPI, monitor #{}",
|
|
||||||
self.screen_resolution[0],
|
|
||||||
self.screen_resolution[1],
|
|
||||||
self.dpi_scale,
|
|
||||||
self.monitor_index,
|
|
||||||
)?;
|
|
||||||
if let Some(wb) = &self.window_bounds {
|
|
||||||
write!(f, ", fenetre [{}x{} @ ({},{})]", wb[2], wb[3], wb[0], wb[1])?;
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Windows : API Win32 via FFI
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
#[cfg(target_os = "windows")]
|
|
||||||
mod win {
|
|
||||||
use windows_sys::Win32::Foundation::{BOOL, LPARAM, RECT};
|
|
||||||
use windows_sys::Win32::Graphics::Gdi::{
|
|
||||||
EnumDisplayMonitors, GetMonitorInfoW, MonitorFromWindow, HMONITOR, MONITORINFO,
|
|
||||||
MONITOR_DEFAULTTOPRIMARY,
|
|
||||||
};
|
|
||||||
use windows_sys::Win32::UI::WindowsAndMessaging::{
|
|
||||||
GetForegroundWindow, GetSystemMetrics, GetWindowRect, SM_CXSCREEN, SM_CYSCREEN,
|
|
||||||
};
|
|
||||||
|
|
||||||
// GetDpiForSystem est dans Win32_UI_HiDpi (non activée).
|
|
||||||
// On utilise un appel FFI raw pour éviter d'ajouter une feature.
|
|
||||||
extern "system" {
|
|
||||||
fn GetDpiForSystem() -> u32;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Retourne le facteur DPI en % (100 = normal, 125, 150, 200...).
|
|
||||||
pub fn get_dpi_scale() -> u32 {
|
|
||||||
unsafe {
|
|
||||||
let dpi = GetDpiForSystem();
|
|
||||||
if dpi == 0 {
|
|
||||||
// Fallback si l'API n'est pas disponible (Windows < 10 1607)
|
|
||||||
100
|
|
||||||
} else {
|
|
||||||
(dpi * 100) / 96
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Retourne (largeur, hauteur) du moniteur principal via GetSystemMetrics.
|
|
||||||
///
|
|
||||||
/// IMPORTANT : Retourne la resolution physique uniquement si le process est
|
|
||||||
/// DPI-aware (SetProcessDpiAwareness(2) appele dans main.rs). Sans cela,
|
|
||||||
/// retourne la resolution logique (virtualisee par le DPI scaling).
|
|
||||||
pub fn get_screen_resolution() -> (u32, u32) {
|
|
||||||
unsafe {
|
|
||||||
let w = GetSystemMetrics(SM_CXSCREEN);
|
|
||||||
let h = GetSystemMetrics(SM_CYSCREEN);
|
|
||||||
if w > 0 && h > 0 {
|
|
||||||
(w as u32, h as u32)
|
|
||||||
} else {
|
|
||||||
(0, 0)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Retourne (x, y, largeur, hauteur) de la fenêtre active, ou None.
|
|
||||||
pub fn get_window_bounds() -> Option<(i32, i32, i32, i32)> {
|
|
||||||
unsafe {
|
|
||||||
let hwnd = GetForegroundWindow();
|
|
||||||
if hwnd.is_null() {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
let mut rect: RECT = std::mem::zeroed();
|
|
||||||
if GetWindowRect(hwnd, &mut rect) != 0 {
|
|
||||||
let w = rect.right - rect.left;
|
|
||||||
let h = rect.bottom - rect.top;
|
|
||||||
Some((rect.left, rect.top, w, h))
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Flag indiquant le moniteur principal dans MONITORINFO.dwFlags.
|
|
||||||
const MONITORINFOF_PRIMARY: u32 = 1;
|
|
||||||
|
|
||||||
/// Retourne l'index du moniteur sur lequel se trouve la fenêtre active.
|
|
||||||
/// 0 = moniteur principal. Enumère tous les moniteurs pour trouver l'index.
|
|
||||||
pub fn get_monitor_index() -> u32 {
|
|
||||||
unsafe {
|
|
||||||
let hwnd = GetForegroundWindow();
|
|
||||||
if hwnd.is_null() {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
let target_hmon = MonitorFromWindow(hwnd, MONITOR_DEFAULTTOPRIMARY);
|
|
||||||
if target_hmon.is_null() {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Énumérer les moniteurs pour trouver l'index
|
|
||||||
struct CallbackData {
|
|
||||||
target: HMONITOR,
|
|
||||||
current_index: u32,
|
|
||||||
found_index: u32,
|
|
||||||
}
|
|
||||||
|
|
||||||
unsafe extern "system" fn enum_callback(
|
|
||||||
hmonitor: HMONITOR,
|
|
||||||
_hdc: windows_sys::Win32::Graphics::Gdi::HDC,
|
|
||||||
_lprect: *mut RECT,
|
|
||||||
lparam: LPARAM,
|
|
||||||
) -> BOOL {
|
|
||||||
let data = &mut *(lparam as *mut CallbackData);
|
|
||||||
|
|
||||||
// Vérifier si c'est le moniteur principal — il est toujours #0
|
|
||||||
let mut info: MONITORINFO = std::mem::zeroed();
|
|
||||||
info.cbSize = std::mem::size_of::<MONITORINFO>() as u32;
|
|
||||||
GetMonitorInfoW(hmonitor, &mut info);
|
|
||||||
|
|
||||||
if info.dwFlags & MONITORINFOF_PRIMARY != 0 {
|
|
||||||
// Moniteur principal — index 0, mais on continue pour le comptage
|
|
||||||
if hmonitor == data.target {
|
|
||||||
data.found_index = 0;
|
|
||||||
}
|
|
||||||
} else if hmonitor == data.target {
|
|
||||||
data.found_index = data.current_index;
|
|
||||||
}
|
|
||||||
|
|
||||||
data.current_index += 1;
|
|
||||||
1 // TRUE, continuer l'énumération
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut data = CallbackData {
|
|
||||||
target: target_hmon,
|
|
||||||
current_index: 0,
|
|
||||||
found_index: 0,
|
|
||||||
};
|
|
||||||
|
|
||||||
EnumDisplayMonitors(
|
|
||||||
std::ptr::null_mut(), // HDC null = tous les moniteurs
|
|
||||||
std::ptr::null(),
|
|
||||||
Some(enum_callback),
|
|
||||||
&mut data as *mut CallbackData as LPARAM,
|
|
||||||
);
|
|
||||||
|
|
||||||
data.found_index
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// Linux / fallback : valeurs par défaut ou xcap
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
#[cfg(not(target_os = "windows"))]
|
|
||||||
mod fallback {
|
|
||||||
/// Sur Linux, pas de DPI système accessible simplement. Retourne 100%.
|
|
||||||
pub fn get_dpi_scale() -> u32 {
|
|
||||||
100
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Résolution via xcap (mêmes moniteurs que la capture).
|
|
||||||
pub fn get_screen_resolution() -> (u32, u32) {
|
|
||||||
if let Ok(monitors) = xcap::Monitor::all() {
|
|
||||||
if let Some(primary) = monitors.into_iter().find(|m| m.is_primary().unwrap_or(false)) {
|
|
||||||
let w = primary.width().unwrap_or(0);
|
|
||||||
let h = primary.height().unwrap_or(0);
|
|
||||||
return (w, h);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
(0, 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Pas d'API window bounds sur Linux en mode headless. Retourne None.
|
|
||||||
pub fn get_window_bounds() -> Option<(i32, i32, i32, i32)> {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Moniteur principal = index 0 (fallback).
|
|
||||||
pub fn get_monitor_index() -> u32 {
|
|
||||||
0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// =============================================================================
|
|
||||||
// API publique
|
|
||||||
// =============================================================================
|
|
||||||
|
|
||||||
/// Retourne le facteur DPI en % (100 = normal, 150 = haute résolution).
|
|
||||||
pub fn get_dpi_scale() -> u32 {
|
|
||||||
#[cfg(target_os = "windows")]
|
|
||||||
{
|
|
||||||
win::get_dpi_scale()
|
|
||||||
}
|
|
||||||
#[cfg(not(target_os = "windows"))]
|
|
||||||
{
|
|
||||||
fallback::get_dpi_scale()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Retourne (largeur, hauteur) du moniteur principal.
|
|
||||||
pub fn get_screen_resolution() -> (u32, u32) {
|
|
||||||
#[cfg(target_os = "windows")]
|
|
||||||
{
|
|
||||||
win::get_screen_resolution()
|
|
||||||
}
|
|
||||||
#[cfg(not(target_os = "windows"))]
|
|
||||||
{
|
|
||||||
fallback::get_screen_resolution()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Retourne (x, y, largeur, hauteur) de la fenêtre active, ou None.
|
|
||||||
pub fn get_window_bounds() -> Option<(i32, i32, i32, i32)> {
|
|
||||||
#[cfg(target_os = "windows")]
|
|
||||||
{
|
|
||||||
win::get_window_bounds()
|
|
||||||
}
|
|
||||||
#[cfg(not(target_os = "windows"))]
|
|
||||||
{
|
|
||||||
fallback::get_window_bounds()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Retourne l'index du moniteur de la fenêtre active (0 = principal).
|
|
||||||
pub fn get_monitor_index() -> u32 {
|
|
||||||
#[cfg(target_os = "windows")]
|
|
||||||
{
|
|
||||||
win::get_monitor_index()
|
|
||||||
}
|
|
||||||
#[cfg(not(target_os = "windows"))]
|
|
||||||
{
|
|
||||||
fallback::get_monitor_index()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Collecte toutes les métadonnées système en une seule structure.
|
|
||||||
pub fn get_screen_metadata() -> ScreenMetadata {
|
|
||||||
let (sw, sh) = get_screen_resolution();
|
|
||||||
let wb = get_window_bounds().map(|(x, y, w, h)| [x, y, w, h]);
|
|
||||||
|
|
||||||
ScreenMetadata {
|
|
||||||
dpi_scale: get_dpi_scale(),
|
|
||||||
screen_resolution: [sw, sh],
|
|
||||||
window_bounds: wb,
|
|
||||||
monitor_index: get_monitor_index(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,336 +0,0 @@
|
|||||||
//! Icone systray avec menu contextuel.
|
|
||||||
//!
|
|
||||||
//! Affiche une icone dans la barre des taches Windows avec un menu contextuel
|
|
||||||
//! permettant de controler l'agent (enregistrement, replay, chat, etc.).
|
|
||||||
//! Equivalent de agent_v1/ui/smart_tray.py.
|
|
||||||
//!
|
|
||||||
//! Utilise tray-icon (crate Tauri) pour l'icone et le menu.
|
|
||||||
//! Necessite une boucle d'evenements Windows (winit ou Win32 message pump).
|
|
||||||
//!
|
|
||||||
//! Sur Linux : le systray n'est pas disponible, l'agent tourne en mode console.
|
|
||||||
|
|
||||||
#[allow(unused_imports)]
|
|
||||||
use crate::config::Config;
|
|
||||||
#[allow(unused_imports)]
|
|
||||||
use crate::notifications;
|
|
||||||
#[allow(unused_imports)]
|
|
||||||
use crate::state::{AgentState, TrayState};
|
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
/// Identifiants des elements du menu (pour le dispatch des evenements).
|
|
||||||
#[cfg(windows)]
|
|
||||||
pub struct TrayMenuIds {
|
|
||||||
pub machine_info: tray_icon::menu::MenuItem,
|
|
||||||
pub status_item: tray_icon::menu::MenuItem,
|
|
||||||
pub start_recording: tray_icon::menu::MenuItem,
|
|
||||||
pub stop_recording: tray_icon::menu::MenuItem,
|
|
||||||
pub workflows_submenu: tray_icon::menu::Submenu,
|
|
||||||
pub emergency_stop: tray_icon::menu::MenuItem,
|
|
||||||
pub open_chat: tray_icon::menu::MenuItem,
|
|
||||||
pub open_files: tray_icon::menu::MenuItem,
|
|
||||||
pub quit: tray_icon::menu::MenuItem,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Cree l'icone du systray et la boucle d'evenements associee.
|
|
||||||
///
|
|
||||||
/// Cette fonction bloque le thread appelant (doit etre le thread principal sur Windows).
|
|
||||||
/// Sur les OS non-Windows, attend Ctrl+C en mode console.
|
|
||||||
#[cfg(windows)]
|
|
||||||
pub fn run_tray_loop(config: Arc<Config>, state: Arc<AgentState>) {
|
|
||||||
use tray_icon::{
|
|
||||||
menu::MenuEvent,
|
|
||||||
TrayIconBuilder,
|
|
||||||
};
|
|
||||||
use winit::application::ApplicationHandler;
|
|
||||||
use winit::event::WindowEvent;
|
|
||||||
use winit::event_loop::{ActiveEventLoop, ControlFlow, EventLoop};
|
|
||||||
use winit::window::WindowId;
|
|
||||||
|
|
||||||
// Creer le menu
|
|
||||||
let menu_ids = create_menu(&config);
|
|
||||||
let menu = build_tray_menu(&menu_ids);
|
|
||||||
|
|
||||||
// Generer l'icone initiale (gris = idle)
|
|
||||||
let icon = generate_tray_icon(TrayState::Idle);
|
|
||||||
|
|
||||||
// Creer l'icone systray
|
|
||||||
let tray = match TrayIconBuilder::new()
|
|
||||||
.with_menu(Box::new(menu))
|
|
||||||
.with_tooltip("Lea - Agent RPA Vision (IA)")
|
|
||||||
.with_icon(icon)
|
|
||||||
.build()
|
|
||||||
{
|
|
||||||
Ok(t) => t,
|
|
||||||
Err(e) => {
|
|
||||||
eprintln!("[TRAY] Impossible de creer l'icone systray : {}", e);
|
|
||||||
// Fallback mode console
|
|
||||||
fallback_console_loop(&state);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
println!("[TRAY] Icone systray creee — menu contextuel disponible");
|
|
||||||
notifications::greet();
|
|
||||||
|
|
||||||
// Structure pour l'ApplicationHandler de winit
|
|
||||||
struct TrayApp {
|
|
||||||
config: Arc<Config>,
|
|
||||||
state: Arc<AgentState>,
|
|
||||||
tray: tray_icon::TrayIcon,
|
|
||||||
menu_ids: TrayMenuIds,
|
|
||||||
current_tray_state: TrayState,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ApplicationHandler for TrayApp {
|
|
||||||
fn resumed(&mut self, _event_loop: &ActiveEventLoop) {
|
|
||||||
// Rien a faire — pas de fenetre winit
|
|
||||||
}
|
|
||||||
|
|
||||||
fn window_event(
|
|
||||||
&mut self,
|
|
||||||
_event_loop: &ActiveEventLoop,
|
|
||||||
_window_id: WindowId,
|
|
||||||
_event: WindowEvent,
|
|
||||||
) {
|
|
||||||
// Pas de fenetre winit — ignorer
|
|
||||||
}
|
|
||||||
|
|
||||||
fn about_to_wait(&mut self, event_loop: &ActiveEventLoop) {
|
|
||||||
// Verifier si l'agent doit s'arreter
|
|
||||||
if !self.state.is_running() {
|
|
||||||
event_loop.exit();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Traiter les evenements menu
|
|
||||||
let menu_receiver = MenuEvent::receiver();
|
|
||||||
if let Ok(event) = menu_receiver.try_recv() {
|
|
||||||
handle_menu_event(&event, &self.menu_ids, &self.config, &self.state);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Mettre a jour l'icone si l'etat a change
|
|
||||||
let new_state = self.state.tray_state();
|
|
||||||
if new_state != self.current_tray_state {
|
|
||||||
self.current_tray_state = new_state;
|
|
||||||
let tooltip = match new_state {
|
|
||||||
TrayState::Idle => "Lea - En attente",
|
|
||||||
TrayState::Recording => "Lea - ENREGISTREMENT EN COURS",
|
|
||||||
TrayState::Connected => "Lea - Connectee au serveur",
|
|
||||||
TrayState::Replay => "Lea - REPLAY EN COURS",
|
|
||||||
};
|
|
||||||
let _ = self.tray.set_tooltip(Some(tooltip));
|
|
||||||
let icon = generate_tray_icon(new_state);
|
|
||||||
let _ = self.tray.set_icon(Some(icon));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Attendre un peu avant le prochain cycle
|
|
||||||
event_loop.set_control_flow(ControlFlow::WaitUntil(
|
|
||||||
std::time::Instant::now() + std::time::Duration::from_millis(100),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Creer et demarrer la boucle d'evenements winit
|
|
||||||
let event_loop = match EventLoop::new() {
|
|
||||||
Ok(el) => el,
|
|
||||||
Err(e) => {
|
|
||||||
eprintln!("[TRAY] Impossible de creer la boucle d'evenements : {}", e);
|
|
||||||
fallback_console_loop(&state);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut app = TrayApp {
|
|
||||||
config,
|
|
||||||
state,
|
|
||||||
tray,
|
|
||||||
menu_ids,
|
|
||||||
current_tray_state: TrayState::Idle,
|
|
||||||
};
|
|
||||||
|
|
||||||
let _ = event_loop.run_app(&mut app);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Cree les elements de menu avec leurs labels.
|
|
||||||
#[cfg(windows)]
|
|
||||||
fn create_menu(config: &Config) -> TrayMenuIds {
|
|
||||||
use tray_icon::menu::{MenuItem, Submenu};
|
|
||||||
|
|
||||||
let machine_info = MenuItem::new(
|
|
||||||
format!("Machine : {}", config.machine_id),
|
|
||||||
false, // disabled — info seulement
|
|
||||||
None,
|
|
||||||
);
|
|
||||||
|
|
||||||
let status_item = MenuItem::new("Deconnectee", false, None);
|
|
||||||
|
|
||||||
let start_recording = MenuItem::new("Apprenez-moi une tache", true, None);
|
|
||||||
|
|
||||||
let stop_recording = MenuItem::new("C'est termine", true, None);
|
|
||||||
|
|
||||||
let workflows_submenu = Submenu::new("Mes taches", true);
|
|
||||||
let _ = workflows_submenu.append(&MenuItem::new("(chargement...)", false, None));
|
|
||||||
|
|
||||||
let emergency_stop = MenuItem::new("ARRET D'URGENCE", true, None);
|
|
||||||
|
|
||||||
let open_chat = MenuItem::new("Discuter avec Lea", true, None);
|
|
||||||
|
|
||||||
let open_files = MenuItem::new("Mes fichiers", true, None);
|
|
||||||
|
|
||||||
let quit = MenuItem::new("Quitter Lea", true, None);
|
|
||||||
|
|
||||||
TrayMenuIds {
|
|
||||||
machine_info,
|
|
||||||
status_item,
|
|
||||||
start_recording,
|
|
||||||
stop_recording,
|
|
||||||
workflows_submenu,
|
|
||||||
emergency_stop,
|
|
||||||
open_chat,
|
|
||||||
open_files,
|
|
||||||
quit,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Construit le menu systray a partir des elements.
|
|
||||||
#[cfg(windows)]
|
|
||||||
fn build_tray_menu(ids: &TrayMenuIds) -> tray_icon::menu::Menu {
|
|
||||||
use tray_icon::menu::{Menu, PredefinedMenuItem};
|
|
||||||
|
|
||||||
let menu = Menu::new();
|
|
||||||
|
|
||||||
let _ = menu.append(&ids.machine_info);
|
|
||||||
let _ = menu.append(&ids.status_item);
|
|
||||||
let _ = menu.append(&PredefinedMenuItem::separator());
|
|
||||||
let _ = menu.append(&ids.start_recording);
|
|
||||||
let _ = menu.append(&ids.stop_recording);
|
|
||||||
let _ = menu.append(&PredefinedMenuItem::separator());
|
|
||||||
let _ = menu.append(&ids.workflows_submenu);
|
|
||||||
let _ = menu.append(&PredefinedMenuItem::separator());
|
|
||||||
let _ = menu.append(&ids.emergency_stop);
|
|
||||||
let _ = menu.append(&ids.open_chat);
|
|
||||||
let _ = menu.append(&ids.open_files);
|
|
||||||
let _ = menu.append(&PredefinedMenuItem::separator());
|
|
||||||
let _ = menu.append(&ids.quit);
|
|
||||||
|
|
||||||
menu
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Gere un evenement de clic sur un element du menu.
|
|
||||||
#[cfg(windows)]
|
|
||||||
fn handle_menu_event(
|
|
||||||
event: &tray_icon::menu::MenuEvent,
|
|
||||||
ids: &TrayMenuIds,
|
|
||||||
_config: &Config,
|
|
||||||
state: &AgentState,
|
|
||||||
) {
|
|
||||||
let event_id = event.id();
|
|
||||||
|
|
||||||
if event_id == ids.start_recording.id() {
|
|
||||||
if !state.recording.load(std::sync::atomic::Ordering::SeqCst) {
|
|
||||||
let name = format!(
|
|
||||||
"session_{}",
|
|
||||||
chrono::Utc::now().format("%Y%m%d_%H%M%S")
|
|
||||||
);
|
|
||||||
state.start_recording(&name);
|
|
||||||
notifications::session_started(&name);
|
|
||||||
println!("[TRAY] Enregistrement demarre : {}", name);
|
|
||||||
}
|
|
||||||
} else if event_id == ids.stop_recording.id() {
|
|
||||||
if state.recording.load(std::sync::atomic::Ordering::SeqCst) {
|
|
||||||
let (name, count) = state.stop_recording();
|
|
||||||
notifications::session_ended(count);
|
|
||||||
println!(
|
|
||||||
"[TRAY] Enregistrement arrete : {} ({} actions)",
|
|
||||||
name, count
|
|
||||||
);
|
|
||||||
}
|
|
||||||
} else if event_id == ids.emergency_stop.id() {
|
|
||||||
state.emergency_stop();
|
|
||||||
notifications::emergency_stop_activated();
|
|
||||||
println!("[TRAY] ARRET D'URGENCE ACTIVE");
|
|
||||||
} else if event_id == ids.open_chat.id() {
|
|
||||||
state
|
|
||||||
.chat_visible
|
|
||||||
.store(true, std::sync::atomic::Ordering::SeqCst);
|
|
||||||
println!("[TRAY] Ouverture du chat demandee");
|
|
||||||
} else if event_id == ids.open_files.id() {
|
|
||||||
let sessions_dir = if cfg!(target_os = "windows") {
|
|
||||||
"C:\\rpa_vision\\sessions".to_string()
|
|
||||||
} else {
|
|
||||||
"/tmp/rpa_vision/sessions".to_string()
|
|
||||||
};
|
|
||||||
println!("[TRAY] Ouverture du dossier : {}", sessions_dir);
|
|
||||||
#[cfg(windows)]
|
|
||||||
{
|
|
||||||
let _ = std::process::Command::new("explorer")
|
|
||||||
.arg(&sessions_dir)
|
|
||||||
.spawn();
|
|
||||||
}
|
|
||||||
} else if event_id == ids.quit.id() {
|
|
||||||
println!("[TRAY] Fermeture demandee par l'utilisateur");
|
|
||||||
state.request_shutdown();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Genere une icone systray coloree selon l'etat.
|
|
||||||
///
|
|
||||||
/// Cree une image 32x32 RGBA avec un cercle colore :
|
|
||||||
/// - Gris (#808080) : idle
|
|
||||||
/// - Rouge (#FF0000) : enregistrement
|
|
||||||
/// - Vert (#00CC00) : connecte
|
|
||||||
/// - Bleu (#0066FF) : replay
|
|
||||||
#[cfg(windows)]
|
|
||||||
fn generate_tray_icon(tray_state: TrayState) -> tray_icon::Icon {
|
|
||||||
let size = 32u32;
|
|
||||||
let mut rgba = vec![0u8; (size * size * 4) as usize];
|
|
||||||
|
|
||||||
let (r, g, b) = match tray_state {
|
|
||||||
TrayState::Idle => (128u8, 128u8, 128u8),
|
|
||||||
TrayState::Recording => (255u8, 0u8, 0u8),
|
|
||||||
TrayState::Connected => (0u8, 204u8, 0u8),
|
|
||||||
TrayState::Replay => (0u8, 102u8, 255u8),
|
|
||||||
};
|
|
||||||
|
|
||||||
let center = (size / 2) as f64;
|
|
||||||
let radius = (size / 2 - 2) as f64;
|
|
||||||
|
|
||||||
for y in 0..size {
|
|
||||||
for x in 0..size {
|
|
||||||
let dx = x as f64 - center;
|
|
||||||
let dy = y as f64 - center;
|
|
||||||
let dist = (dx * dx + dy * dy).sqrt();
|
|
||||||
|
|
||||||
let offset = ((y * size + x) * 4) as usize;
|
|
||||||
if dist <= radius {
|
|
||||||
rgba[offset] = r;
|
|
||||||
rgba[offset + 1] = g;
|
|
||||||
rgba[offset + 2] = b;
|
|
||||||
rgba[offset + 3] = 255;
|
|
||||||
} else if dist <= radius + 1.0 {
|
|
||||||
let alpha = ((radius + 1.0 - dist) * 255.0) as u8;
|
|
||||||
rgba[offset] = r;
|
|
||||||
rgba[offset + 1] = g;
|
|
||||||
rgba[offset + 2] = b;
|
|
||||||
rgba[offset + 3] = alpha;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
tray_icon::Icon::from_rgba(rgba, size, size).expect("Erreur creation icone systray")
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Mode console (Linux ou fallback si le systray echoue).
|
|
||||||
fn fallback_console_loop(state: &AgentState) {
|
|
||||||
println!("[TRAY] Mode console — Appuyez sur Ctrl+C pour quitter");
|
|
||||||
while state.is_running() {
|
|
||||||
std::thread::sleep(std::time::Duration::from_millis(500));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Version non-Windows : pas de systray, l'agent tourne en mode console.
|
|
||||||
#[cfg(not(windows))]
|
|
||||||
pub fn run_tray_loop(_config: Arc<Config>, state: Arc<AgentState>) {
|
|
||||||
println!("[TRAY] Systray non disponible sur cet OS — mode console");
|
|
||||||
fallback_console_loop(&state);
|
|
||||||
}
|
|
||||||
@@ -1,110 +0,0 @@
|
|||||||
//! Résolution visuelle des cibles via le serveur.
|
|
||||||
//!
|
|
||||||
//! Envoie un screenshot + target_spec au serveur qui effectue le template
|
|
||||||
//! matching OpenCV et retourne les coordonnées résolues (x_pct, y_pct).
|
|
||||||
//! Approche server-side : pas de dépendance OpenCV dans le binaire Rust.
|
|
||||||
|
|
||||||
use crate::capture;
|
|
||||||
use crate::config::Config;
|
|
||||||
use reqwest::blocking::Client;
|
|
||||||
|
|
||||||
/// Résout visuellement une cible en envoyant le screenshot courant au serveur.
|
|
||||||
///
|
|
||||||
/// Capture l'écran, l'encode en JPEG base64, envoie au endpoint
|
|
||||||
/// `/traces/stream/replay/resolve_target` qui fait le template matching.
|
|
||||||
///
|
|
||||||
/// Retourne Some((x_pct, y_pct)) si la cible est trouvée, None sinon.
|
|
||||||
pub fn resolve_target_visual(
|
|
||||||
config: &Config,
|
|
||||||
target_spec: &serde_json::Value,
|
|
||||||
fallback_x: f64,
|
|
||||||
fallback_y: f64,
|
|
||||||
screen_width: u32,
|
|
||||||
screen_height: u32,
|
|
||||||
) -> Option<(f64, f64)> {
|
|
||||||
// 1. Capturer le screenshot actuel
|
|
||||||
let screenshot = match capture::capture_screenshot() {
|
|
||||||
Some(img) => img,
|
|
||||||
None => {
|
|
||||||
eprintln!(" [VISUAL] Echec capture screenshot pour résolution visuelle");
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Encoder en JPEG base64 (qualité 75 — bon compromis taille/précision)
|
|
||||||
let screenshot_b64 = capture::screenshot_to_jpeg_base64(&screenshot, 75);
|
|
||||||
if screenshot_b64.is_empty() {
|
|
||||||
eprintln!(" [VISUAL] Echec encodage JPEG");
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
println!(
|
|
||||||
" [VISUAL] Screenshot capture ({}x{}), envoi au serveur...",
|
|
||||||
screen_width, screen_height
|
|
||||||
);
|
|
||||||
|
|
||||||
// 2. Envoyer au serveur /replay/resolve_target
|
|
||||||
let client = Client::new();
|
|
||||||
let payload = serde_json::json!({
|
|
||||||
"session_id": config.agent_session_id(),
|
|
||||||
"screenshot_b64": screenshot_b64,
|
|
||||||
"target_spec": target_spec,
|
|
||||||
"fallback_x_pct": fallback_x,
|
|
||||||
"fallback_y_pct": fallback_y,
|
|
||||||
"screen_width": screen_width,
|
|
||||||
"screen_height": screen_height,
|
|
||||||
});
|
|
||||||
|
|
||||||
let url = format!("{}/traces/stream/replay/resolve_target", config.server_url);
|
|
||||||
|
|
||||||
let resp = match client
|
|
||||||
.post(&url)
|
|
||||||
.json(&payload)
|
|
||||||
.timeout(std::time::Duration::from_secs(30))
|
|
||||||
.send()
|
|
||||||
{
|
|
||||||
Ok(r) => r,
|
|
||||||
Err(e) => {
|
|
||||||
eprintln!(" [VISUAL] Erreur reseau vers {} : {}", url, e);
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
if !resp.status().is_success() {
|
|
||||||
eprintln!(
|
|
||||||
" [VISUAL] Serveur a repondu HTTP {}",
|
|
||||||
resp.status()
|
|
||||||
);
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 3. Parser la réponse
|
|
||||||
let data: serde_json::Value = match resp.json() {
|
|
||||||
Ok(d) => d,
|
|
||||||
Err(e) => {
|
|
||||||
eprintln!(" [VISUAL] Erreur parsing reponse JSON : {}", e);
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let resolved = data["resolved"].as_bool().unwrap_or(false);
|
|
||||||
if resolved {
|
|
||||||
let x = data["x_pct"].as_f64()?;
|
|
||||||
let y = data["y_pct"].as_f64()?;
|
|
||||||
let method = data["method"].as_str().unwrap_or("?");
|
|
||||||
let score = data["score"].as_f64().unwrap_or(0.0);
|
|
||||||
println!(
|
|
||||||
" [VISUAL] Resolu par {} (score={:.3}) : ({:.4}, {:.4})",
|
|
||||||
method, score, x, y
|
|
||||||
);
|
|
||||||
Some((x, y))
|
|
||||||
} else {
|
|
||||||
let reason = data["reason"].as_str().unwrap_or("inconnu");
|
|
||||||
let method = data["method"].as_str().unwrap_or("?");
|
|
||||||
println!(
|
|
||||||
" [VISUAL] Non resolu (methode={}, raison={})",
|
|
||||||
method, reason
|
|
||||||
);
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -52,8 +52,9 @@ API_TOKEN = os.environ.get("RPA_API_TOKEN", "")
|
|||||||
MAX_SESSION_DURATION_S = 60 * 60 # 1 heure
|
MAX_SESSION_DURATION_S = 60 * 60 # 1 heure
|
||||||
SESSIONS_ROOT = BASE_DIR / "sessions"
|
SESSIONS_ROOT = BASE_DIR / "sessions"
|
||||||
|
|
||||||
# Paramètres Vision (Crops pour qwen3-vl)
|
# Paramètres Vision (Crops pour la résolution visuelle)
|
||||||
TARGETED_CROP_SIZE = (150, 150)
|
# 80x80 : assez petit pour être discriminant (icônes), assez grand pour le contexte
|
||||||
|
TARGETED_CROP_SIZE = (80, 80)
|
||||||
SCREENSHOT_QUALITY = 85
|
SCREENSHOT_QUALITY = 85
|
||||||
|
|
||||||
# Floutage des données sensibles (conformité AI Act)
|
# Floutage des données sensibles (conformité AI Act)
|
||||||
|
|||||||
@@ -79,6 +79,8 @@ class ActionExecutorV1:
|
|||||||
self._poll_backoff_factor = 1.5 # Multiplicateur en cas d'echec
|
self._poll_backoff_factor = 1.5 # Multiplicateur en cas d'echec
|
||||||
# Token d'authentification API
|
# Token d'authentification API
|
||||||
self._api_token = os.environ.get("RPA_API_TOKEN", "")
|
self._api_token = os.environ.get("RPA_API_TOKEN", "")
|
||||||
|
# Gestionnaire de notifications toast (pour les messages utilisateur)
|
||||||
|
self._notification_manager = None
|
||||||
# Log de la resolution physique pour le diagnostic DPI
|
# Log de la resolution physique pour le diagnostic DPI
|
||||||
self._log_screen_info()
|
self._log_screen_info()
|
||||||
|
|
||||||
@@ -94,6 +96,22 @@ class ActionExecutorV1:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug(f"Impossible de lire la resolution ecran : {e}")
|
logger.debug(f"Impossible de lire la resolution ecran : {e}")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def notifier(self):
|
||||||
|
"""Instance NotificationManager paresseuse."""
|
||||||
|
if self._notification_manager is None:
|
||||||
|
try:
|
||||||
|
from ..ui.notifications import NotificationManager
|
||||||
|
self._notification_manager = NotificationManager()
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"NotificationManager indisponible : {e}")
|
||||||
|
# Retourner un objet factice qui ne fait rien
|
||||||
|
class _Noop:
|
||||||
|
def replay_target_not_found(self, *a, **kw):
|
||||||
|
return False
|
||||||
|
self._notification_manager = _Noop()
|
||||||
|
return self._notification_manager
|
||||||
|
|
||||||
def _auth_headers(self) -> dict:
|
def _auth_headers(self) -> dict:
|
||||||
"""Headers d'authentification Bearer pour les requetes au serveur."""
|
"""Headers d'authentification Bearer pour les requetes au serveur."""
|
||||||
if self._api_token:
|
if self._api_token:
|
||||||
@@ -107,6 +125,30 @@ class ActionExecutorV1:
|
|||||||
self._sct = mss.mss()
|
self._sct = mss.mss()
|
||||||
return self._sct
|
return self._sct
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _describe_target(target_spec: dict) -> str:
|
||||||
|
"""Construire une description humaine de la cible depuis target_spec.
|
||||||
|
|
||||||
|
Utilisé pour les notifications et le logging quand la cible n'est pas trouvée.
|
||||||
|
"""
|
||||||
|
parts = []
|
||||||
|
by_text = target_spec.get("by_text", "").strip()
|
||||||
|
window = target_spec.get("window_title", "").strip()
|
||||||
|
if by_text:
|
||||||
|
parts.append(f"'{by_text}'")
|
||||||
|
if window:
|
||||||
|
parts.append(f"dans {window}")
|
||||||
|
if not parts:
|
||||||
|
# Fallback sur la vlm_description
|
||||||
|
vlm = target_spec.get("vlm_description", "")
|
||||||
|
if vlm:
|
||||||
|
parts.append(vlm[:60])
|
||||||
|
else:
|
||||||
|
parts.append("un élément")
|
||||||
|
if parts:
|
||||||
|
return " ".join(parts)
|
||||||
|
return "élément inconnu"
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# Execution legacy (watchdog command.json)
|
# Execution legacy (watchdog command.json)
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
@@ -135,6 +177,166 @@ class ActionExecutorV1:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Echec de l'ordre {action} : {e}")
|
logger.error(f"Echec de l'ordre {action} : {e}")
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Acteur intelligent — décision gemma4 quand le magnétoscope bloque
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
def _actor_decide(self, action: dict, target_spec: dict) -> str:
|
||||||
|
"""Demander à gemma4 de décider quand le magnétoscope ne trouve pas la cible.
|
||||||
|
|
||||||
|
gemma4 reçoit le contexte (action prévue, état de l'écran) et décide :
|
||||||
|
- PASSER : l'état est déjà atteint (ex: onglet déjà actif)
|
||||||
|
- EXECUTER : l'action est nécessaire mais pas trouvable automatiquement
|
||||||
|
- STOPPER : l'état est incohérent, impossible de continuer
|
||||||
|
|
||||||
|
Appelle gemma4 en mode texte avec thinking (Docker port 11435).
|
||||||
|
Fallback : EXECUTER (pause supervisée) si gemma4 indisponible.
|
||||||
|
"""
|
||||||
|
import requests as _requests
|
||||||
|
|
||||||
|
gemma4_port = os.environ.get("GEMMA4_PORT", "11435")
|
||||||
|
by_text = target_spec.get("by_text", "")
|
||||||
|
window_title = target_spec.get("window_title", "")
|
||||||
|
|
||||||
|
# Récupérer le titre de la fenêtre ACTUELLE
|
||||||
|
try:
|
||||||
|
from ..window_info_crossplatform import get_active_window_info
|
||||||
|
current_info = get_active_window_info()
|
||||||
|
current_title = current_info.get("title", "")
|
||||||
|
except Exception:
|
||||||
|
current_title = ""
|
||||||
|
|
||||||
|
prompt = (
|
||||||
|
f"Tu es un robot RPA. L'action suivante est : cliquer sur '{by_text or 'un élément'}' "
|
||||||
|
f"dans '{window_title}'.\n"
|
||||||
|
f"La fenêtre active est \"{current_title}\".\n"
|
||||||
|
f"Dois-je faire cette action ?\n"
|
||||||
|
f"- EXECUTER : l'action est nécessaire\n"
|
||||||
|
f"- PASSER : le résultat est déjà atteint\n"
|
||||||
|
f"- STOPPER : état incohérent\n"
|
||||||
|
f"Réponds UN SEUL MOT."
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = _requests.post(
|
||||||
|
f"http://localhost:{gemma4_port}/api/chat",
|
||||||
|
json={
|
||||||
|
"model": "gemma4:e4b",
|
||||||
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
|
"stream": False,
|
||||||
|
"think": True,
|
||||||
|
"options": {"temperature": 0.1, "num_predict": 500},
|
||||||
|
},
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
content = resp.json().get("message", {}).get("content", "").strip().upper()
|
||||||
|
# Extraire le mot clé
|
||||||
|
for keyword in ("PASSER", "EXECUTER", "STOPPER"):
|
||||||
|
if keyword in content:
|
||||||
|
logger.info(f"Acteur gemma4 décide : {keyword}")
|
||||||
|
return keyword
|
||||||
|
logger.warning(f"Acteur gemma4 réponse inattendue : {content[:50]}")
|
||||||
|
return "EXECUTER"
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Acteur gemma4 indisponible : {e}")
|
||||||
|
return "EXECUTER"
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Observer — pré-analyse écran avant chaque action
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
def _observe_screen(
|
||||||
|
self, server_url: str, target_spec: dict,
|
||||||
|
screen_width: int, screen_height: int,
|
||||||
|
) -> dict:
|
||||||
|
"""Observer : analyser l'écran AVANT de résoudre la cible.
|
||||||
|
|
||||||
|
Détecte les popups, dialogues, et états inattendus AVANT de tenter
|
||||||
|
la résolution visuelle. C'est la "pre-exploration" qui améliore
|
||||||
|
dramatiquement les performances (cf. benchmarks Claude Computer Use).
|
||||||
|
|
||||||
|
Stratégie en 2 temps (rapide puis intelligent) :
|
||||||
|
1. Vérification rapide locale : titre fenêtre, popup connue
|
||||||
|
2. Si serveur disponible : envoi du screenshot pour pré-analyse VLM
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None si écran OK (pas de problème détecté)
|
||||||
|
Dict avec screen_state ("ok"|"popup"|"unexpected"), détails, coords popup
|
||||||
|
"""
|
||||||
|
import requests as _requests
|
||||||
|
|
||||||
|
# Étape 1 : vérification rapide locale (titre fenêtre)
|
||||||
|
try:
|
||||||
|
from ..window_info_crossplatform import get_active_window_info
|
||||||
|
current_info = get_active_window_info()
|
||||||
|
current_title = current_info.get("title", "").lower()
|
||||||
|
|
||||||
|
# Patterns de popup/dialogue courants (Windows FR + EN)
|
||||||
|
popup_patterns = [
|
||||||
|
"enregistrer", "sauvegarder", "voulez-vous",
|
||||||
|
"confirmer", "confirmation", "avertissement",
|
||||||
|
"erreur", "error", "warning", "alert",
|
||||||
|
"do you want", "save as", "are you sure",
|
||||||
|
]
|
||||||
|
for pattern in popup_patterns:
|
||||||
|
if pattern in current_title:
|
||||||
|
logger.info(f"Observer : popup détectée par titre — '{current_title}'")
|
||||||
|
# On ne peut pas résoudre les coords juste par le titre
|
||||||
|
# → retourner popup sans coords, le caller fera handle_popup_vlm()
|
||||||
|
return {
|
||||||
|
"screen_state": "popup",
|
||||||
|
"popup_label": current_title,
|
||||||
|
"popup_coords": None,
|
||||||
|
"detail": f"Popup détectée par titre : {current_title}",
|
||||||
|
}
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Étape 2 : pré-analyse serveur (si disponible)
|
||||||
|
if not server_url:
|
||||||
|
return None # Pas de serveur → pas de pré-analyse avancée
|
||||||
|
|
||||||
|
# Envoyer le screenshot au serveur pour détection popup via VLM
|
||||||
|
screenshot_b64 = self._capture_screenshot_b64(max_width=0, quality=60)
|
||||||
|
if not screenshot_b64:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
url = f"{server_url}/traces/stream/replay/pre_analyze"
|
||||||
|
from ..config import API_TOKEN
|
||||||
|
headers = {"Content-Type": "application/json"}
|
||||||
|
if API_TOKEN:
|
||||||
|
headers["Authorization"] = f"Bearer {API_TOKEN}"
|
||||||
|
|
||||||
|
resp = _requests.post(
|
||||||
|
url,
|
||||||
|
json={
|
||||||
|
"screenshot_b64": screenshot_b64,
|
||||||
|
"expected_state": target_spec.get("expected_state", ""),
|
||||||
|
"window_title": target_spec.get("window_title", ""),
|
||||||
|
"screen_width": screen_width,
|
||||||
|
"screen_height": screen_height,
|
||||||
|
},
|
||||||
|
headers=headers,
|
||||||
|
timeout=10,
|
||||||
|
)
|
||||||
|
|
||||||
|
if resp.ok:
|
||||||
|
data = resp.json()
|
||||||
|
state = data.get("screen_state", "ok")
|
||||||
|
if state != "ok":
|
||||||
|
logger.info(f"Observer serveur : {state} — {data.get('detail', '')}")
|
||||||
|
return data
|
||||||
|
# Serveur ne supporte pas encore /pre_analyze → silencieux
|
||||||
|
except _requests.Timeout:
|
||||||
|
logger.debug("Observer : serveur timeout (10s)")
|
||||||
|
except _requests.ConnectionError:
|
||||||
|
pass # Serveur indisponible — pas grave, on continue sans
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Observer : erreur serveur — {e}")
|
||||||
|
|
||||||
|
return None # Écran OK ou pas de pré-analyse possible
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# Execution replay (polling serveur)
|
# Execution replay (polling serveur)
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
@@ -191,41 +393,146 @@ class ActionExecutorV1:
|
|||||||
x_pct = action.get("x_pct", 0.0)
|
x_pct = action.get("x_pct", 0.0)
|
||||||
y_pct = action.get("y_pct", 0.0)
|
y_pct = action.get("y_pct", 0.0)
|
||||||
|
|
||||||
if visual_mode and target_spec and server_url:
|
# Extraire le nom de l'application depuis un titre de fenêtre
|
||||||
resolved = self._resolve_target_visual(
|
def _app_name(title):
|
||||||
server_url, target_spec, x_pct, y_pct, width, height
|
for sep in [" – ", " - ", " — "]:
|
||||||
)
|
if sep in title:
|
||||||
if resolved:
|
return title.split(sep)[-1].strip().lower()
|
||||||
x_pct = resolved["x_pct"]
|
return title.strip().lower()
|
||||||
y_pct = resolved["y_pct"]
|
|
||||||
result["visual_resolved"] = resolved.get("resolved", False)
|
|
||||||
if resolved.get("resolved"):
|
|
||||||
logger.info(
|
|
||||||
f"Visual resolve OK: {resolved.get('matched_element', {}).get('label', '?')} "
|
|
||||||
f"-> ({x_pct:.4f}, {y_pct:.4f})"
|
|
||||||
)
|
|
||||||
|
|
||||||
# ---- Hash AVANT l'action (pour verification post-action) ----
|
# ── Pré-vérification : titre fenêtre ──
|
||||||
# Seules les actions click et key_combo sont verifiees : elles
|
# Vérifier que l'écran est dans l'état attendu AVANT de cliquer.
|
||||||
# provoquent un changement visible de l'ecran (ouverture de fenetre,
|
if visual_mode and target_spec:
|
||||||
# focus, etc.). Les actions type/wait/scroll ne sont pas verifiees.
|
expected_title = target_spec.get("window_title", "")
|
||||||
|
if expected_title and expected_title != "unknown_window":
|
||||||
|
from ..window_info_crossplatform import get_active_window_info
|
||||||
|
current_info = get_active_window_info()
|
||||||
|
current_title = current_info.get("title", "")
|
||||||
|
|
||||||
|
current_app = _app_name(current_title)
|
||||||
|
expected_app = _app_name(expected_title)
|
||||||
|
title_match = (
|
||||||
|
current_app == expected_app
|
||||||
|
or expected_title.lower() in current_title.lower()
|
||||||
|
or current_title.lower() in expected_title.lower()
|
||||||
|
)
|
||||||
|
# Ignorer la fenêtre de Léa elle-même (overlay agent)
|
||||||
|
_lea_windows = ("léa", "lea —", "léa —", "lea -", "léa -", "lea assistante", "léa assistante")
|
||||||
|
is_lea_window = any(p in current_title.lower() for p in _lea_windows)
|
||||||
|
|
||||||
|
if not title_match and not is_lea_window:
|
||||||
|
logger.warning(
|
||||||
|
f"PRÉ-VÉRIF ÉCHOUÉE : attendu '{expected_title}', "
|
||||||
|
f"actuel '{current_title}' — STOP"
|
||||||
|
)
|
||||||
|
print(f" [PRÉ-VÉRIF] STOP — fenêtre '{current_title}' ≠ attendu '{expected_title}'")
|
||||||
|
result["success"] = False
|
||||||
|
result["error"] = f"Fenêtre incorrecte: '{current_title}' (attendu: '{expected_title}')"
|
||||||
|
return result
|
||||||
|
elif is_lea_window:
|
||||||
|
logger.info(f"PRÉ-VÉRIF : fenêtre Léa détectée, ignorée — on continue")
|
||||||
|
else:
|
||||||
|
logger.info(f"PRÉ-VÉRIF OK : '{current_title}'")
|
||||||
|
|
||||||
|
# ── OBSERVER : pré-analyse écran avant résolution ──
|
||||||
|
# Détecte popups, dialogues, états inattendus AVANT de chercher la cible.
|
||||||
|
# Si un problème est détecté, on le gère tout de suite (pas après l'échec).
|
||||||
|
# Ref: docs/VISION_RPA_INTELLIGENT.md — "Il observe"
|
||||||
|
if visual_mode and target_spec and action_type == "click":
|
||||||
|
observation = self._observe_screen(server_url, target_spec, width, height)
|
||||||
|
if observation:
|
||||||
|
obs_state = observation.get("screen_state", "ok")
|
||||||
|
|
||||||
|
if obs_state == "popup":
|
||||||
|
# Popup détectée AVANT la résolution — la fermer
|
||||||
|
popup_label = observation.get("popup_label", "popup")
|
||||||
|
popup_coords = observation.get("popup_coords")
|
||||||
|
print(f" [OBSERVER] Popup détectée : '{popup_label}' — fermeture")
|
||||||
|
logger.info(f"Observer : popup '{popup_label}' détectée avant résolution")
|
||||||
|
if popup_coords:
|
||||||
|
real_x = int(popup_coords["x_pct"] * width)
|
||||||
|
real_y = int(popup_coords["y_pct"] * height)
|
||||||
|
self._click((real_x, real_y), "left")
|
||||||
|
time.sleep(1.0)
|
||||||
|
print(f" [OBSERVER] Popup fermée — reprise du flow normal")
|
||||||
|
else:
|
||||||
|
# Pas de coordonnées → fallback sur handle_popup_vlm classique
|
||||||
|
self._handle_popup_vlm()
|
||||||
|
|
||||||
|
elif obs_state == "unexpected":
|
||||||
|
# État inattendu (pas la bonne page/écran)
|
||||||
|
detail = observation.get("detail", "état inattendu")
|
||||||
|
print(f" [OBSERVER] État inattendu : {detail}")
|
||||||
|
logger.warning(f"Observer : état inattendu — {detail}")
|
||||||
|
# Demander à l'acteur (gemma4) de décider
|
||||||
|
decision = self._actor_decide(action, target_spec)
|
||||||
|
if decision == "STOPPER":
|
||||||
|
result["success"] = False
|
||||||
|
result["error"] = f"observer_unexpected:{detail}"
|
||||||
|
return result
|
||||||
|
elif decision == "PASSER":
|
||||||
|
result["success"] = True
|
||||||
|
result["warning"] = "observer_skip"
|
||||||
|
return result
|
||||||
|
# EXECUTER → continuer normalement
|
||||||
|
|
||||||
|
if visual_mode and target_spec and server_url:
|
||||||
|
# ── GROUNDING : localisation pure via GroundingEngine ──
|
||||||
|
from .grounding import GroundingEngine
|
||||||
|
grounding = GroundingEngine(self)
|
||||||
|
grounding_result = grounding.locate(
|
||||||
|
server_url, target_spec, x_pct, y_pct, width, height,
|
||||||
|
)
|
||||||
|
if grounding_result.found:
|
||||||
|
x_pct = grounding_result.x_pct
|
||||||
|
y_pct = grounding_result.y_pct
|
||||||
|
result["visual_resolved"] = True
|
||||||
|
result["resolution_method"] = grounding_result.method
|
||||||
|
result["resolution_score"] = grounding_result.score
|
||||||
|
result["resolution_elapsed_ms"] = grounding_result.elapsed_ms
|
||||||
|
logger.info(
|
||||||
|
f"Grounding OK [{grounding_result.method}] "
|
||||||
|
f"{grounding_result.elapsed_ms:.0f}ms : "
|
||||||
|
f"{grounding_result.detail or '?'} "
|
||||||
|
f"-> ({x_pct:.4f}, {y_pct:.4f})"
|
||||||
|
)
|
||||||
|
|
||||||
|
# ---- Screenshot + hash AVANT l'action (pour le Critic post-action) ----
|
||||||
|
# Le serveur utilise screenshot_before + screenshot_after pour évaluer
|
||||||
|
# si l'action a eu l'effet attendu (Critic sémantique VLM).
|
||||||
needs_screen_check = action_type in ("click", "key_combo")
|
needs_screen_check = action_type in ("click", "key_combo")
|
||||||
hash_before = ""
|
hash_before = ""
|
||||||
|
screenshot_before_b64 = ""
|
||||||
if needs_screen_check:
|
if needs_screen_check:
|
||||||
hash_before = self._quick_screenshot_hash()
|
hash_before = self._quick_screenshot_hash()
|
||||||
|
screenshot_before_b64 = self._capture_screenshot_b64()
|
||||||
|
|
||||||
if action_type == "click":
|
if action_type == "click":
|
||||||
# Si visual_mode est activé, le resolve DOIT réussir.
|
# Si visual_mode est activé, le resolve DOIT réussir.
|
||||||
# Pas de fallback blind — on arrête le replay si la cible
|
# Pas de fallback blind — on arrête le replay si la cible
|
||||||
# n'est pas trouvée visuellement. C'est un RPA VISUEL.
|
# n'est pas trouvée visuellement. C'est un RPA VISUEL.
|
||||||
if visual_mode and not result.get("visual_resolved"):
|
if visual_mode and not result.get("visual_resolved"):
|
||||||
# Avant de STOP, vérifier s'il y a une popup imprévue via le VLM
|
# ── Policy : décider quoi faire quand grounding échoue ──
|
||||||
print(f" [POPUP-VLM] Cible non trouvée — vérification popup imprévue...")
|
from .policy import PolicyEngine, Decision
|
||||||
logger.info(f"Action {action_id} : cible non trouvée, tentative gestion popup VLM")
|
policy = PolicyEngine(self)
|
||||||
popup_handled = self._handle_popup_vlm()
|
target_desc = self._describe_target(target_spec)
|
||||||
if popup_handled:
|
retry_count = action.get("_retry_count", 0)
|
||||||
# Popup fermée — re-tenter le resolve
|
|
||||||
print(f" [POPUP-VLM] Popup gérée, re-tentative du resolve visuel...")
|
policy_decision = policy.decide(
|
||||||
|
action=action, target_spec=target_spec,
|
||||||
|
retry_count=retry_count, max_retries=1,
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
f" [POLICY] {policy_decision.decision.value} — "
|
||||||
|
f"{policy_decision.reason}"
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
f"Action {action_id} : Policy → {policy_decision.decision.value} "
|
||||||
|
f"({policy_decision.reason})"
|
||||||
|
)
|
||||||
|
|
||||||
|
if policy_decision.decision == Decision.RETRY:
|
||||||
|
# Re-tenter le grounding après correction (popup fermée, etc.)
|
||||||
resolved2 = self._resolve_target_visual(
|
resolved2 = self._resolve_target_visual(
|
||||||
server_url, target_spec, x_pct, y_pct, width, height
|
server_url, target_spec, x_pct, y_pct, width, height
|
||||||
)
|
)
|
||||||
@@ -233,25 +540,37 @@ class ActionExecutorV1:
|
|||||||
x_pct = resolved2["x_pct"]
|
x_pct = resolved2["x_pct"]
|
||||||
y_pct = resolved2["y_pct"]
|
y_pct = resolved2["y_pct"]
|
||||||
result["visual_resolved"] = True
|
result["visual_resolved"] = True
|
||||||
print(
|
print(f" [POLICY] Re-resolve OK après {policy_decision.action_taken}")
|
||||||
f" [POPUP-VLM] Re-resolve OK après popup : "
|
|
||||||
f"({x_pct:.3f}, {y_pct:.3f})"
|
|
||||||
)
|
|
||||||
logger.info(
|
|
||||||
f"Action {action_id} : re-resolve OK après popup "
|
|
||||||
f"({x_pct:.3f}, {y_pct:.3f})"
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
|
# Re-resolve échoué — SUPERVISE (rendre la main)
|
||||||
result["success"] = False
|
result["success"] = False
|
||||||
result["error"] = "Élément non trouvé même après gestion popup"
|
result["error"] = "target_not_found"
|
||||||
print(f" [ERREUR] Élément toujours non trouvé après gestion popup — STOP")
|
result["target_description"] = target_desc
|
||||||
logger.error(f"Action {action_id} : élément non trouvé après popup, replay stoppé")
|
result["target_spec"] = target_spec
|
||||||
|
result["screenshot"] = self._capture_screenshot_b64()
|
||||||
|
result["warning"] = "visual_resolve_failed"
|
||||||
|
self.notifier.replay_target_not_found(target_desc)
|
||||||
return result
|
return result
|
||||||
else:
|
|
||||||
|
elif policy_decision.decision == Decision.SKIP:
|
||||||
|
result["success"] = True
|
||||||
|
result["warning"] = "policy_skip"
|
||||||
|
return result
|
||||||
|
|
||||||
|
elif policy_decision.decision == Decision.ABORT:
|
||||||
result["success"] = False
|
result["success"] = False
|
||||||
result["error"] = "Visual resolve échoué — cible non trouvée à l'écran"
|
result["error"] = f"policy_abort:{target_desc}"
|
||||||
print(f" [ERREUR] Visual resolve échoué, pas de popup détectée — STOP")
|
self.notifier.replay_target_not_found(target_desc)
|
||||||
logger.error(f"Action {action_id} : visual resolve échoué, pas de popup, replay stoppé")
|
return result
|
||||||
|
|
||||||
|
else: # SUPERVISE ou CONTINUE
|
||||||
|
result["success"] = False
|
||||||
|
result["error"] = "target_not_found"
|
||||||
|
result["target_description"] = target_desc
|
||||||
|
result["target_spec"] = target_spec
|
||||||
|
result["screenshot"] = self._capture_screenshot_b64()
|
||||||
|
result["warning"] = "visual_resolve_failed"
|
||||||
|
self.notifier.replay_target_not_found(target_desc)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
real_x = int(x_pct * width)
|
real_x = int(x_pct * width)
|
||||||
@@ -263,12 +582,43 @@ class ActionExecutorV1:
|
|||||||
f"({real_x}, {real_y}) sur ({width}x{height}), bouton={button}"
|
f"({real_x}, {real_y}) sur ({width}x{height}), bouton={button}"
|
||||||
)
|
)
|
||||||
self._click((real_x, real_y), button)
|
self._click((real_x, real_y), button)
|
||||||
print(f" [CLICK] Termine.")
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Replay click [{mode}] : ({x_pct:.3f}, {y_pct:.3f}) -> "
|
f"Replay click [{mode}] : ({x_pct:.3f}, {y_pct:.3f}) -> "
|
||||||
f"({real_x}, {real_y}) sur ({width}x{height})"
|
f"({real_x}, {real_y}) sur ({width}x{height})"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# ── Post-vérification : polling du titre fenêtre ──
|
||||||
|
# On attend que le titre change vers celui attendu (max 10s)
|
||||||
|
# C'est 100% visuel — pas de wait fixe arbitraire
|
||||||
|
expected_after = action.get("expected_window_title", "")
|
||||||
|
if expected_after:
|
||||||
|
from ..window_info_crossplatform import get_active_window_info
|
||||||
|
max_wait = 10.0
|
||||||
|
poll_interval = 0.3
|
||||||
|
elapsed_wait = 0.0
|
||||||
|
matched = False
|
||||||
|
while elapsed_wait < max_wait:
|
||||||
|
time.sleep(poll_interval)
|
||||||
|
elapsed_wait += poll_interval
|
||||||
|
post_info = get_active_window_info()
|
||||||
|
post_title = post_info.get("title", "")
|
||||||
|
post_app = _app_name(post_title)
|
||||||
|
expected_app_after = _app_name(expected_after)
|
||||||
|
if (post_app == expected_app_after
|
||||||
|
or expected_after.lower() in post_title.lower()
|
||||||
|
or post_title.lower() in expected_after.lower()):
|
||||||
|
matched = True
|
||||||
|
break
|
||||||
|
if matched:
|
||||||
|
print(f" [POST-VÉRIF] OK en {elapsed_wait:.1f}s — '{post_title}'")
|
||||||
|
logger.info(f"POST-VÉRIF OK en {elapsed_wait:.1f}s : '{post_title}'")
|
||||||
|
else:
|
||||||
|
print(f" [POST-VÉRIF] TIMEOUT {max_wait}s — '{post_title}' ≠ '{expected_after}'")
|
||||||
|
logger.warning(f"POST-VÉRIF TIMEOUT : '{post_title}' ≠ '{expected_after}'")
|
||||||
|
result["warning"] = f"post_verif_timeout:{post_title}"
|
||||||
|
else:
|
||||||
|
print(f" [CLICK] Terminé.")
|
||||||
|
|
||||||
elif action_type == "type":
|
elif action_type == "type":
|
||||||
text = action.get("text", "")
|
text = action.get("text", "")
|
||||||
raw_keys = action.get("raw_keys")
|
raw_keys = action.get("raw_keys")
|
||||||
@@ -347,6 +697,10 @@ class ActionExecutorV1:
|
|||||||
|
|
||||||
result["success"] = True
|
result["success"] = True
|
||||||
|
|
||||||
|
# Stocker le screenshot_before pour le Critic côté serveur
|
||||||
|
if screenshot_before_b64:
|
||||||
|
result["screenshot_before"] = screenshot_before_b64
|
||||||
|
|
||||||
# ---- Verification post-action : l'ecran a-t-il change ? ----
|
# ---- Verification post-action : l'ecran a-t-il change ? ----
|
||||||
# Verifie UNIQUEMENT, ne tente PAS de gerer les popups
|
# Verifie UNIQUEMENT, ne tente PAS de gerer les popups
|
||||||
# (Enter/Escape perturbent l'application).
|
# (Enter/Escape perturbent l'application).
|
||||||
@@ -356,6 +710,17 @@ class ActionExecutorV1:
|
|||||||
hash_before, timeout_ms=3000
|
hash_before, timeout_ms=3000
|
||||||
)
|
)
|
||||||
if not screen_changed:
|
if not screen_changed:
|
||||||
|
# ── Recovery : tenter un rollback si l'action n'a pas eu d'effet ──
|
||||||
|
from .recovery import RecoveryEngine
|
||||||
|
recovery = RecoveryEngine(self)
|
||||||
|
recovery_result = recovery.attempt(
|
||||||
|
failed_action=action,
|
||||||
|
critic_detail="L'écran n'a pas changé après l'action",
|
||||||
|
)
|
||||||
|
if recovery_result.success:
|
||||||
|
print(f" [RECOVERY] {recovery_result.detail}")
|
||||||
|
result["recovery"] = recovery_result.to_dict()
|
||||||
|
|
||||||
result["success"] = False
|
result["success"] = False
|
||||||
result["warning"] = "no_screen_change"
|
result["warning"] = "no_screen_change"
|
||||||
result["error"] = "Ecran inchange apres l'action"
|
result["error"] = "Ecran inchange apres l'action"
|
||||||
@@ -389,24 +754,52 @@ class ActionExecutorV1:
|
|||||||
) -> dict:
|
) -> dict:
|
||||||
"""Résoudre la position d'un clic visuellement.
|
"""Résoudre la position d'un clic visuellement.
|
||||||
|
|
||||||
Stratégie hybride en cascade :
|
Stratégie en cascade — compréhension sémantique d'abord :
|
||||||
1. Template matching avec le crop anchor (rapide, fiable si l'UI n'a pas changé)
|
1. Serveur resolve_target (SomEngine + VLM) — comprend CE QU'ON CHERCHE
|
||||||
2. VLM identifie l'élément + template matching texte (approche hybride)
|
2. Template matching local (fallback rapide si serveur indisponible)
|
||||||
3. VLM direct coordonnées (legacy, peu fiable avec qwen3-vl:8b)
|
3. VLM local (fallback dev/test Linux)
|
||||||
|
|
||||||
|
Le template matching compare des pixels et donne des faux positifs quand
|
||||||
|
l'écran n'est pas dans le même état que l'enregistrement. Le SomEngine
|
||||||
|
comprend sémantiquement les éléments UI (bouton, menu, texte) et trouve
|
||||||
|
le bon élément peu importe l'état de l'écran.
|
||||||
"""
|
"""
|
||||||
|
import time as _time
|
||||||
|
t_start = _time.time()
|
||||||
|
|
||||||
screenshot_b64 = self._capture_screenshot_b64(max_width=0, quality=75)
|
screenshot_b64 = self._capture_screenshot_b64(max_width=0, quality=75)
|
||||||
if not screenshot_b64:
|
if not screenshot_b64:
|
||||||
logger.warning("Capture screenshot echouee pour visual resolve")
|
logger.warning("Capture screenshot echouee pour visual resolve")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# ---- ÉTAPE 1 : Template matching avec le crop anchor ----
|
def _with_metrics(result, method_override=None):
|
||||||
|
"""Enrichir le résultat avec les métriques de résolution."""
|
||||||
|
if result is None:
|
||||||
|
return None
|
||||||
|
elapsed_ms = (_time.time() - t_start) * 1000
|
||||||
|
result["resolution_method"] = method_override or result.get("method", "unknown")
|
||||||
|
result["resolution_score"] = result.get("score", 0.0)
|
||||||
|
result["resolution_elapsed_ms"] = round(elapsed_ms, 1)
|
||||||
|
return result
|
||||||
|
|
||||||
|
# ---- ÉTAPE 1 : Résolution serveur (SomEngine + VLM) ----
|
||||||
|
# Le serveur comprend sémantiquement ce qu'on cherche. Pas de faux positifs.
|
||||||
|
if server_url:
|
||||||
|
server_result = self._server_resolve_target(
|
||||||
|
server_url, screenshot_b64, target_spec,
|
||||||
|
fallback_x, fallback_y, screen_width, screen_height,
|
||||||
|
)
|
||||||
|
if server_result and server_result.get("resolved"):
|
||||||
|
return _with_metrics(server_result)
|
||||||
|
|
||||||
|
# ---- ÉTAPE 2 : Template matching local (fallback si serveur down) ----
|
||||||
anchor_b64 = target_spec.get("anchor_image_base64", "")
|
anchor_b64 = target_spec.get("anchor_image_base64", "")
|
||||||
if anchor_b64:
|
if anchor_b64:
|
||||||
tm_result = self._template_match_anchor(screenshot_b64, anchor_b64, screen_width, screen_height)
|
tm_result = self._template_match_anchor(screenshot_b64, anchor_b64, screen_width, screen_height)
|
||||||
if tm_result and tm_result.get("resolved"):
|
if tm_result and tm_result.get("resolved"):
|
||||||
return tm_result
|
return _with_metrics(tm_result)
|
||||||
|
|
||||||
# ---- ÉTAPE 2 : Approche hybride VLM identifie + template matching texte ----
|
# ---- ÉTAPE 3 : VLM local (fallback dev/test Linux) ----
|
||||||
by_text = target_spec.get("by_text", "")
|
by_text = target_spec.get("by_text", "")
|
||||||
vlm_description = target_spec.get("vlm_description", "")
|
vlm_description = target_spec.get("vlm_description", "")
|
||||||
if vlm_description or by_text:
|
if vlm_description or by_text:
|
||||||
@@ -414,16 +807,74 @@ class ActionExecutorV1:
|
|||||||
screenshot_b64, target_spec, screen_width, screen_height
|
screenshot_b64, target_spec, screen_width, screen_height
|
||||||
)
|
)
|
||||||
if hybrid_result and hybrid_result.get("resolved"):
|
if hybrid_result and hybrid_result.get("resolved"):
|
||||||
return hybrid_result
|
return _with_metrics(hybrid_result)
|
||||||
|
|
||||||
# ---- ÉTAPE 3 : VLM direct coordonnées (legacy, peu fiable) ----
|
|
||||||
vlm_result = self._vlm_direct_resolve(screenshot_b64, target_spec)
|
|
||||||
if vlm_result and vlm_result.get("resolved"):
|
|
||||||
return vlm_result
|
|
||||||
|
|
||||||
print(" [VISUAL] Toutes les méthodes ont échoué")
|
print(" [VISUAL] Toutes les méthodes ont échoué")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def _server_resolve_target(
|
||||||
|
self, server_url: str, screenshot_b64: str, target_spec: dict,
|
||||||
|
fallback_x: float, fallback_y: float,
|
||||||
|
screen_width: int, screen_height: int,
|
||||||
|
) -> dict:
|
||||||
|
"""Résolution visuelle via le serveur (SomEngine + VLM sur GPU).
|
||||||
|
|
||||||
|
Le serveur dispose de SomEngine (YOLO + docTR) et du VLM (qwen3-vl).
|
||||||
|
L'agent envoie le screenshot + target_spec, le serveur résout et
|
||||||
|
retourne les coordonnées.
|
||||||
|
"""
|
||||||
|
import requests as _requests
|
||||||
|
from ..config import API_TOKEN
|
||||||
|
|
||||||
|
url = f"{server_url}/traces/stream/replay/resolve_target"
|
||||||
|
payload = {
|
||||||
|
"session_id": "",
|
||||||
|
"screenshot_b64": screenshot_b64,
|
||||||
|
"target_spec": target_spec,
|
||||||
|
"fallback_x_pct": fallback_x,
|
||||||
|
"fallback_y_pct": fallback_y,
|
||||||
|
"screen_width": screen_width,
|
||||||
|
"screen_height": screen_height,
|
||||||
|
"strict_mode": True,
|
||||||
|
}
|
||||||
|
headers = {"Content-Type": "application/json"}
|
||||||
|
if API_TOKEN:
|
||||||
|
headers["Authorization"] = f"Bearer {API_TOKEN}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
print(f" [SERVER-RESOLVE] Appel serveur {server_url}...")
|
||||||
|
resp = _requests.post(url, json=payload, headers=headers, timeout=30)
|
||||||
|
if not resp.ok:
|
||||||
|
logger.warning(f"Server resolve HTTP {resp.status_code}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
data = resp.json()
|
||||||
|
resolved = data.get("resolved", False)
|
||||||
|
method = data.get("method", "server_unknown")
|
||||||
|
|
||||||
|
if resolved:
|
||||||
|
print(
|
||||||
|
f" [SERVER-RESOLVE] OK [{method}] "
|
||||||
|
f"→ ({data.get('x_pct', 0):.3f}, {data.get('y_pct', 0):.3f}) "
|
||||||
|
f"score={data.get('score', 0):.2f}"
|
||||||
|
)
|
||||||
|
logger.info(f"Server resolve OK [{method}] score={data.get('score', 0):.2f}")
|
||||||
|
else:
|
||||||
|
reason = data.get("reason", "unknown")
|
||||||
|
print(f" [SERVER-RESOLVE] Échec ({reason})")
|
||||||
|
logger.info(f"Server resolve échoué : {reason}")
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
except _requests.Timeout:
|
||||||
|
print(" [SERVER-RESOLVE] Timeout (30s)")
|
||||||
|
logger.warning("Server resolve timeout")
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [SERVER-RESOLVE] Erreur : {e}")
|
||||||
|
logger.warning(f"Server resolve erreur : {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
def _template_match_anchor(
|
def _template_match_anchor(
|
||||||
self, screenshot_b64: str, anchor_b64: str,
|
self, screenshot_b64: str, anchor_b64: str,
|
||||||
screen_width: int, screen_height: int,
|
screen_width: int, screen_height: int,
|
||||||
@@ -544,18 +995,23 @@ class ActionExecutorV1:
|
|||||||
"What is the exact text label of this element? "
|
"What is the exact text label of this element? "
|
||||||
"Answer ONLY the text visible on the element (button text, label, menu item)."
|
"Answer ONLY the text visible on the element (button text, label, menu item)."
|
||||||
)
|
)
|
||||||
prefill = "The text is: "
|
# Prefill pour les modèles thinking (qwen3) — skip la phase de réflexion
|
||||||
|
_vlm_model_ident = os.environ.get("RPA_VLM_MODEL", "gemma4:e4b")
|
||||||
|
_is_thinking_ident = "qwen3" in _vlm_model_ident.lower()
|
||||||
|
|
||||||
|
messages_ident = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You read text from UI screenshots. Answer briefly with just the text.",
|
||||||
|
},
|
||||||
|
{"role": "user", "content": prompt, "images": [screenshot_b64]},
|
||||||
|
]
|
||||||
|
if _is_thinking_ident:
|
||||||
|
messages_ident.append({"role": "assistant", "content": "The text is: "})
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": os.environ.get("RPA_VLM_MODEL", "qwen3-vl:8b"),
|
"model": _vlm_model_ident,
|
||||||
"messages": [
|
"messages": messages_ident,
|
||||||
{
|
|
||||||
"role": "system",
|
|
||||||
"content": "You read text from UI screenshots. Answer briefly with just the text.",
|
|
||||||
},
|
|
||||||
{"role": "user", "content": prompt, "images": [screenshot_b64]},
|
|
||||||
{"role": "assistant", "content": prefill},
|
|
||||||
],
|
|
||||||
"stream": False,
|
"stream": False,
|
||||||
"think": False,
|
"think": False,
|
||||||
"options": {"temperature": 0.1, "num_predict": 30, "num_ctx": 8192},
|
"options": {"temperature": 0.1, "num_predict": 30, "num_ctx": 8192},
|
||||||
@@ -670,16 +1126,21 @@ Example: x_pct=0.50, y_pct=0.30"""
|
|||||||
ollama_host = os.environ.get("RPA_SERVER_HOST", "localhost")
|
ollama_host = os.environ.get("RPA_SERVER_HOST", "localhost")
|
||||||
ollama_url = f"http://{ollama_host}:11434/api/chat"
|
ollama_url = f"http://{ollama_host}:11434/api/chat"
|
||||||
|
|
||||||
# Prefill plus explicite pour guider la réponse
|
# Prefill pour les modèles thinking (qwen3) — évite le mode réflexion >180s
|
||||||
prefill = '{"x_pct": 0.'
|
_vlm_model = os.environ.get("RPA_VLM_MODEL", "gemma4:e4b")
|
||||||
|
_is_thinking = "qwen3" in _vlm_model.lower()
|
||||||
|
prefill = '{"x_pct": 0.' if _is_thinking else ""
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": "You locate UI elements on screenshots. Reply with JSON only: {\"x_pct\": 0.XX, \"y_pct\": 0.XX, \"confidence\": 0.XX}"},
|
||||||
|
{"role": "user", "content": prompt, "images": images},
|
||||||
|
]
|
||||||
|
if prefill:
|
||||||
|
messages.append({"role": "assistant", "content": prefill})
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": os.environ.get("RPA_VLM_MODEL", "qwen3-vl:8b"),
|
"model": _vlm_model,
|
||||||
"messages": [
|
"messages": messages,
|
||||||
{"role": "system", "content": "You locate UI elements on screenshots. Reply with JSON only: {\"x_pct\": 0.XX, \"y_pct\": 0.XX, \"confidence\": 0.XX}"},
|
|
||||||
{"role": "user", "content": prompt, "images": images},
|
|
||||||
{"role": "assistant", "content": prefill},
|
|
||||||
],
|
|
||||||
"stream": False,
|
"stream": False,
|
||||||
"think": False,
|
"think": False,
|
||||||
"options": {"temperature": 0.1, "num_predict": 60, "num_ctx": 8192},
|
"options": {"temperature": 0.1, "num_predict": 60, "num_ctx": 8192},
|
||||||
@@ -832,6 +1293,14 @@ Example: x_pct=0.50, y_pct=0.30"""
|
|||||||
"error": result.get("error"),
|
"error": result.get("error"),
|
||||||
"warning": result.get("warning"),
|
"warning": result.get("warning"),
|
||||||
"screenshot": result.get("screenshot"),
|
"screenshot": result.get("screenshot"),
|
||||||
|
"screenshot_after": result.get("screenshot"),
|
||||||
|
"screenshot_before": result.get("screenshot_before"),
|
||||||
|
"resolution_method": result.get("resolution_method"),
|
||||||
|
"resolution_score": result.get("resolution_score"),
|
||||||
|
"resolution_elapsed_ms": result.get("resolution_elapsed_ms"),
|
||||||
|
# Champs enrichis pour target_not_found (pause supervisée)
|
||||||
|
"target_description": result.get("target_description"),
|
||||||
|
"target_spec": result.get("target_spec"),
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
resp2 = requests.post(
|
resp2 = requests.post(
|
||||||
@@ -887,7 +1356,29 @@ Example: x_pct=0.50, y_pct=0.30"""
|
|||||||
logger.warning("[POPUP-VLM] Capture screenshot échouée")
|
logger.warning("[POPUP-VLM] Capture screenshot échouée")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Étape 1 : Le VLM identifie le bouton à cliquer
|
# Essayer la détection popup via le serveur d'abord
|
||||||
|
from ..config import SERVER_URL, API_TOKEN
|
||||||
|
if SERVER_URL:
|
||||||
|
monitor = self.sct.monitors[1]
|
||||||
|
sw, sh = monitor["width"], monitor["height"]
|
||||||
|
server_result = self._server_resolve_target(
|
||||||
|
SERVER_URL, screenshot_b64,
|
||||||
|
{"vlm_description": "popup, dialog box, confirmation, or error message button (Oui, OK, Yes, Non, Enregistrer, Annuler)"},
|
||||||
|
0.5, 0.5, sw, sh,
|
||||||
|
)
|
||||||
|
if server_result and server_result.get("resolved"):
|
||||||
|
x_pct = server_result["x_pct"]
|
||||||
|
y_pct = server_result["y_pct"]
|
||||||
|
real_x = int(x_pct * sw)
|
||||||
|
real_y = int(y_pct * sh)
|
||||||
|
label = server_result.get("matched_element", {}).get("label", "popup")
|
||||||
|
print(f" [POPUP-SERVER] Popup détectée ! Clic sur '{label}' → ({real_x}, {real_y})")
|
||||||
|
logger.info(f"[POPUP-SERVER] Clic popup '{label}' à ({real_x}, {real_y})")
|
||||||
|
self._click((real_x, real_y), "left")
|
||||||
|
time.sleep(1.0)
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Fallback : VLM local identifie le bouton à cliquer
|
||||||
button_text = self._vlm_identify_popup_button(screenshot_b64)
|
button_text = self._vlm_identify_popup_button(screenshot_b64)
|
||||||
if not button_text:
|
if not button_text:
|
||||||
return False # Pas de popup ou VLM en échec
|
return False # Pas de popup ou VLM en échec
|
||||||
@@ -952,7 +1443,7 @@ Example: x_pct=0.50, y_pct=0.30"""
|
|||||||
ollama_url = f"http://{ollama_host}:11434/api/chat"
|
ollama_url = f"http://{ollama_host}:11434/api/chat"
|
||||||
|
|
||||||
prompt = (
|
prompt = (
|
||||||
"Look at this screenshot. Is there a popup dialog, confirmation dialog, "
|
"Regarde cette capture d'écran. Y a-t-il une popup, une boîte de dialogue, "
|
||||||
"error message, or modal window visible?\n"
|
"error message, or modal window visible?\n"
|
||||||
"If yes, what button should I click to proceed?\n"
|
"If yes, what button should I click to proceed?\n"
|
||||||
"Answer ONLY the button text (like: Oui, OK, Yes, Enregistrer, Non, "
|
"Answer ONLY the button text (like: Oui, OK, Yes, Enregistrer, Non, "
|
||||||
@@ -960,21 +1451,26 @@ Example: x_pct=0.50, y_pct=0.30"""
|
|||||||
"If no popup: answer NO_POPUP"
|
"If no popup: answer NO_POPUP"
|
||||||
)
|
)
|
||||||
|
|
||||||
prefill = "The button to click is: "
|
# Prefill pour les modèles thinking (qwen3) — skip la phase de réflexion
|
||||||
|
_vlm_model_popup = os.environ.get("RPA_VLM_MODEL", "gemma4:e4b")
|
||||||
|
_is_thinking_popup = "qwen3" in _vlm_model_popup.lower()
|
||||||
|
|
||||||
|
messages_popup = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": (
|
||||||
|
"You analyze screenshots to detect popup dialogs. "
|
||||||
|
"Answer briefly with just the button text. No JSON, no coordinates."
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{"role": "user", "content": prompt, "images": [screenshot_b64]},
|
||||||
|
]
|
||||||
|
if _is_thinking_popup:
|
||||||
|
messages_popup.append({"role": "assistant", "content": "The button to click is: "})
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": os.environ.get("RPA_VLM_MODEL", "qwen3-vl:8b"),
|
"model": _vlm_model_popup,
|
||||||
"messages": [
|
"messages": messages_popup,
|
||||||
{
|
|
||||||
"role": "system",
|
|
||||||
"content": (
|
|
||||||
"You analyze screenshots to detect popup dialogs. "
|
|
||||||
"Answer briefly with just the button text. No JSON, no coordinates."
|
|
||||||
),
|
|
||||||
},
|
|
||||||
{"role": "user", "content": prompt, "images": [screenshot_b64]},
|
|
||||||
{"role": "assistant", "content": prefill},
|
|
||||||
],
|
|
||||||
"stream": False,
|
"stream": False,
|
||||||
"think": False,
|
"think": False,
|
||||||
"options": {"temperature": 0.1, "num_predict": 30, "num_ctx": 8192},
|
"options": {"temperature": 0.1, "num_predict": 30, "num_ctx": 8192},
|
||||||
@@ -1083,7 +1579,7 @@ Example: x_pct=0.50, y_pct=0.30"""
|
|||||||
|
|
||||||
best_match = None
|
best_match = None
|
||||||
best_val = 0.0
|
best_val = 0.0
|
||||||
threshold = 0.55 # Seuil assez permissif pour le texte de bouton
|
threshold = 0.50 # Seuil équilibré
|
||||||
|
|
||||||
# Essayer plusieurs tailles de police pour couvrir différentes résolutions
|
# Essayer plusieurs tailles de police pour couvrir différentes résolutions
|
||||||
for font_size in [14, 16, 18, 20, 22, 24, 12, 26, 28, 10]:
|
for font_size in [14, 16, 18, 20, 22, 24, 12, 26, 28, 10]:
|
||||||
@@ -1258,58 +1754,43 @@ Example: x_pct=0.50, y_pct=0.30"""
|
|||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
||||||
def _type_text(self, text: str):
|
def _type_text(self, text: str):
|
||||||
"""Saisir du texte via copier-coller (methode principale) ou keyboard.type (fallback).
|
"""Saisir du texte caractère par caractère (anti-détection robot).
|
||||||
|
|
||||||
Le copier-coller via le presse-papiers est la methode principale car
|
Chaque caractère est tapé individuellement avec un délai aléatoire
|
||||||
keyboard.type() de pynput envoie les scancodes QWERTY, ce qui produit
|
pour simuler une frappe humaine. Les caractères spéciaux AZERTY
|
||||||
des caracteres incorrects sur les claviers AZERTY (ex: "ce" -> "ci").
|
(@ # € etc.) utilisent les bons VK codes via KeyCode.from_char().
|
||||||
Le copier-coller est agnostique du layout clavier.
|
|
||||||
|
Pas de copier-coller (détectable par les systèmes anti-robot Citrix).
|
||||||
"""
|
"""
|
||||||
|
import random
|
||||||
|
|
||||||
if not text:
|
if not text:
|
||||||
return
|
return
|
||||||
|
|
||||||
clipboard_ok = False
|
for char in text:
|
||||||
try:
|
|
||||||
import pyperclip
|
|
||||||
# Sauvegarder le contenu actuel du presse-papiers
|
|
||||||
try:
|
try:
|
||||||
old_clipboard = pyperclip.paste()
|
# Taper le caractère via from_char (respecte le layout clavier)
|
||||||
|
self.keyboard.press(KeyCode.from_char(char))
|
||||||
|
self.keyboard.release(KeyCode.from_char(char))
|
||||||
except Exception:
|
except Exception:
|
||||||
old_clipboard = None
|
# Fallback : keyboard.type pour les cas spéciaux
|
||||||
|
|
||||||
pyperclip.copy(text)
|
|
||||||
# Ctrl+V pour coller
|
|
||||||
self.keyboard.press(Key.ctrl)
|
|
||||||
time.sleep(0.02)
|
|
||||||
self.keyboard.press('v')
|
|
||||||
self.keyboard.release('v')
|
|
||||||
self.keyboard.release(Key.ctrl)
|
|
||||||
time.sleep(0.1)
|
|
||||||
|
|
||||||
# Restaurer le presse-papiers original
|
|
||||||
if old_clipboard is not None:
|
|
||||||
try:
|
try:
|
||||||
pyperclip.copy(old_clipboard)
|
self.keyboard.type(char)
|
||||||
except Exception:
|
except Exception as e:
|
||||||
pass
|
logger.debug(f"Impossible de taper '{char}': {e}")
|
||||||
|
# Délai humain entre les frappes (40-120ms)
|
||||||
|
time.sleep(random.uniform(0.04, 0.12))
|
||||||
|
|
||||||
clipboard_ok = True
|
logger.debug(f"Texte saisi char-by-char ({len(text)} chars)")
|
||||||
logger.debug(f"Texte saisi via presse-papiers ({len(text)} chars)")
|
|
||||||
except ImportError:
|
|
||||||
logger.debug("pyperclip non disponible, fallback sur keyboard.type()")
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Copier-coller echoue ({e}), fallback sur keyboard.type()")
|
|
||||||
|
|
||||||
if not clipboard_ok:
|
|
||||||
self.keyboard.type(text)
|
|
||||||
|
|
||||||
def _click(self, pos, button_name):
|
def _click(self, pos, button_name):
|
||||||
"""Deplacer la souris et cliquer.
|
"""Deplacer la souris via courbe de Bézier puis cliquer.
|
||||||
|
|
||||||
Supporte les boutons : left, right, double (double-clic gauche).
|
Le mouvement en courbe de Bézier simule un déplacement humain
|
||||||
|
(anti-détection robot pour Citrix et systèmes surveillés).
|
||||||
"""
|
"""
|
||||||
self.mouse.position = pos
|
self._bezier_move(pos)
|
||||||
time.sleep(0.1) # Delai pour simuler le temps de reaction humain
|
time.sleep(0.05)
|
||||||
|
|
||||||
if button_name == "double":
|
if button_name == "double":
|
||||||
self.mouse.click(Button.left, 2)
|
self.mouse.click(Button.left, 2)
|
||||||
@@ -1318,6 +1799,35 @@ Example: x_pct=0.50, y_pct=0.30"""
|
|||||||
else:
|
else:
|
||||||
self.mouse.click(Button.left)
|
self.mouse.click(Button.left)
|
||||||
|
|
||||||
|
def _bezier_move(self, target, steps=25):
|
||||||
|
"""Déplacer la souris vers target via une courbe de Bézier cubique.
|
||||||
|
|
||||||
|
Génère un mouvement naturel avec un point de contrôle aléatoire
|
||||||
|
pour éviter les lignes droites détectables par les anti-bots.
|
||||||
|
"""
|
||||||
|
import random
|
||||||
|
|
||||||
|
start = self.mouse.position
|
||||||
|
sx, sy = start
|
||||||
|
tx, ty = target
|
||||||
|
|
||||||
|
# Point de contrôle aléatoire (déviation latérale)
|
||||||
|
dist = ((tx - sx) ** 2 + (ty - sy) ** 2) ** 0.5
|
||||||
|
deviation = max(20, dist * 0.2)
|
||||||
|
cx = (sx + tx) / 2 + random.uniform(-deviation, deviation)
|
||||||
|
cy = (sy + ty) / 2 + random.uniform(-deviation, deviation)
|
||||||
|
|
||||||
|
for i in range(1, steps + 1):
|
||||||
|
t = i / steps
|
||||||
|
# Bézier quadratique : B(t) = (1-t)²·S + 2(1-t)t·C + t²·T
|
||||||
|
inv_t = 1 - t
|
||||||
|
x = inv_t * inv_t * sx + 2 * inv_t * t * cx + t * t * tx
|
||||||
|
y = inv_t * inv_t * sy + 2 * inv_t * t * cy + t * t * ty
|
||||||
|
self.mouse.position = (int(x), int(y))
|
||||||
|
# Vitesse variable (plus lent au début et à la fin)
|
||||||
|
speed = 0.005 + 0.01 * (1 - abs(2 * t - 1))
|
||||||
|
time.sleep(speed)
|
||||||
|
|
||||||
def _execute_key_combo(self, keys: list):
|
def _execute_key_combo(self, keys: list):
|
||||||
"""
|
"""
|
||||||
Executer une combinaison de touches.
|
Executer une combinaison de touches.
|
||||||
|
|||||||
214
agent_v0/agent_v1/core/grounding.py
Normal file
214
agent_v0/agent_v1/core/grounding.py
Normal file
@@ -0,0 +1,214 @@
|
|||||||
|
# agent_v1/core/grounding.py
|
||||||
|
"""
|
||||||
|
Module Grounding — localisation pure d'éléments UI sur l'écran.
|
||||||
|
|
||||||
|
Responsabilité unique : "Trouve l'élément X sur l'écran et retourne ses coordonnées."
|
||||||
|
Ne prend AUCUNE décision. Si l'élément n'est pas trouvé → retourne NOT_FOUND.
|
||||||
|
|
||||||
|
Stratégies disponibles (cascade configurable) :
|
||||||
|
1. Serveur SomEngine + VLM (GPU distant)
|
||||||
|
2. Template matching local (CPU, ~10ms)
|
||||||
|
3. VLM local direct (CPU/GPU local)
|
||||||
|
|
||||||
|
Séparé de Policy (qui décide quoi faire quand grounding échoue).
|
||||||
|
Ref: docs/PLAN_ACTEUR_V1.md — Architecture MICRO (grounding + exécution)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class GroundingResult:
|
||||||
|
"""Résultat d'une tentative de localisation visuelle."""
|
||||||
|
found: bool # L'élément a été trouvé
|
||||||
|
x_pct: float = 0.0 # Position X en % (0.0-1.0)
|
||||||
|
y_pct: float = 0.0 # Position Y en % (0.0-1.0)
|
||||||
|
method: str = "" # Méthode utilisée (server_som, anchor_template, vlm_direct...)
|
||||||
|
score: float = 0.0 # Confiance (0.0-1.0)
|
||||||
|
elapsed_ms: float = 0.0 # Temps de résolution
|
||||||
|
detail: str = "" # Info supplémentaire (label trouvé, raison échec)
|
||||||
|
raw: Optional[Dict] = None # Données brutes du resolver (pour debug)
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"found": self.found,
|
||||||
|
"x_pct": self.x_pct,
|
||||||
|
"y_pct": self.y_pct,
|
||||||
|
"method": self.method,
|
||||||
|
"score": round(self.score, 3),
|
||||||
|
"elapsed_ms": round(self.elapsed_ms, 1),
|
||||||
|
"detail": self.detail,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Résultat singleton pour "pas trouvé"
|
||||||
|
NOT_FOUND = GroundingResult(found=False, detail="Aucune méthode n'a trouvé l'élément")
|
||||||
|
|
||||||
|
|
||||||
|
class GroundingEngine:
|
||||||
|
"""Moteur de localisation visuelle d'éléments UI.
|
||||||
|
|
||||||
|
Encapsule la cascade de résolution (serveur → template → VLM local)
|
||||||
|
avec une interface unifiée. Ne prend aucune décision — c'est le rôle
|
||||||
|
de PolicyEngine.
|
||||||
|
|
||||||
|
Usage :
|
||||||
|
engine = GroundingEngine(executor)
|
||||||
|
result = engine.locate(screenshot_b64, target_spec, screen_w, screen_h)
|
||||||
|
if result.found:
|
||||||
|
click(result.x_pct, result.y_pct)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, executor):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
executor: ActionExecutorV1 — fournit les méthodes de résolution existantes.
|
||||||
|
"""
|
||||||
|
self._executor = executor
|
||||||
|
|
||||||
|
def locate(
|
||||||
|
self,
|
||||||
|
server_url: str,
|
||||||
|
target_spec: Dict[str, Any],
|
||||||
|
fallback_x: float,
|
||||||
|
fallback_y: float,
|
||||||
|
screen_width: int,
|
||||||
|
screen_height: int,
|
||||||
|
strategies: Optional[List[str]] = None,
|
||||||
|
) -> GroundingResult:
|
||||||
|
"""Localiser un élément UI sur l'écran.
|
||||||
|
|
||||||
|
Exécute la cascade de stratégies dans l'ordre et retourne
|
||||||
|
dès qu'une stratégie trouve l'élément.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
server_url: URL du serveur (SomEngine + VLM GPU)
|
||||||
|
target_spec: Spécification de la cible (by_text, anchor, vlm_description...)
|
||||||
|
fallback_x, fallback_y: Coordonnées de fallback (enregistrement)
|
||||||
|
screen_width, screen_height: Résolution écran
|
||||||
|
strategies: Liste ordonnée de stratégies à essayer.
|
||||||
|
Par défaut : ["server", "template", "vlm_local"]
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
GroundingResult avec found=True et coordonnées, ou NOT_FOUND
|
||||||
|
"""
|
||||||
|
if strategies is None:
|
||||||
|
strategies = ["server", "template", "vlm_local"]
|
||||||
|
|
||||||
|
# ── Apprentissage : réordonner les stratégies selon l'historique ──
|
||||||
|
# Si le Learning sait quelle méthode marche pour cette cible,
|
||||||
|
# la mettre en premier. C'est la boucle d'apprentissage.
|
||||||
|
learned = target_spec.get("_learned_strategy", "")
|
||||||
|
if learned:
|
||||||
|
strategy_map = {
|
||||||
|
"som_text_match": "server",
|
||||||
|
"grounding_vlm": "server",
|
||||||
|
"server_som": "server",
|
||||||
|
"anchor_template": "template",
|
||||||
|
"template_matching": "template",
|
||||||
|
"hybrid_text_direct": "vlm_local",
|
||||||
|
"hybrid_vlm_text": "vlm_local",
|
||||||
|
"vlm_direct": "vlm_local",
|
||||||
|
}
|
||||||
|
preferred = strategy_map.get(learned, "")
|
||||||
|
if preferred and preferred in strategies:
|
||||||
|
strategies = [preferred] + [s for s in strategies if s != preferred]
|
||||||
|
logger.info(
|
||||||
|
f"Grounding: stratégie réordonnée par l'apprentissage → "
|
||||||
|
f"{strategies} (learned={learned})"
|
||||||
|
)
|
||||||
|
|
||||||
|
t_start = time.time()
|
||||||
|
screenshot_b64 = self._executor._capture_screenshot_b64(max_width=0, quality=75)
|
||||||
|
if not screenshot_b64:
|
||||||
|
return GroundingResult(
|
||||||
|
found=False, detail="Capture screenshot échouée",
|
||||||
|
elapsed_ms=(time.time() - t_start) * 1000,
|
||||||
|
)
|
||||||
|
|
||||||
|
for strategy in strategies:
|
||||||
|
result = self._try_strategy(
|
||||||
|
strategy, server_url, screenshot_b64, target_spec,
|
||||||
|
fallback_x, fallback_y, screen_width, screen_height,
|
||||||
|
)
|
||||||
|
if result.found:
|
||||||
|
result.elapsed_ms = (time.time() - t_start) * 1000
|
||||||
|
return result
|
||||||
|
|
||||||
|
return GroundingResult(
|
||||||
|
found=False,
|
||||||
|
detail=f"Toutes les stratégies ont échoué ({', '.join(strategies)})",
|
||||||
|
elapsed_ms=(time.time() - t_start) * 1000,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _try_strategy(
|
||||||
|
self,
|
||||||
|
strategy: str,
|
||||||
|
server_url: str,
|
||||||
|
screenshot_b64: str,
|
||||||
|
target_spec: Dict[str, Any],
|
||||||
|
fallback_x: float,
|
||||||
|
fallback_y: float,
|
||||||
|
screen_width: int,
|
||||||
|
screen_height: int,
|
||||||
|
) -> GroundingResult:
|
||||||
|
"""Essayer une stratégie de grounding unique."""
|
||||||
|
|
||||||
|
if strategy == "server" and server_url:
|
||||||
|
raw = self._executor._server_resolve_target(
|
||||||
|
server_url, screenshot_b64, target_spec,
|
||||||
|
fallback_x, fallback_y, screen_width, screen_height,
|
||||||
|
)
|
||||||
|
if raw and raw.get("resolved"):
|
||||||
|
return GroundingResult(
|
||||||
|
found=True,
|
||||||
|
x_pct=raw["x_pct"],
|
||||||
|
y_pct=raw["y_pct"],
|
||||||
|
method=raw.get("method", "server"),
|
||||||
|
score=raw.get("score", 0.0),
|
||||||
|
detail=raw.get("matched_element", {}).get("label", ""),
|
||||||
|
raw=raw,
|
||||||
|
)
|
||||||
|
|
||||||
|
elif strategy == "template":
|
||||||
|
anchor_b64 = target_spec.get("anchor_image_base64", "")
|
||||||
|
if anchor_b64:
|
||||||
|
raw = self._executor._template_match_anchor(
|
||||||
|
screenshot_b64, anchor_b64, screen_width, screen_height,
|
||||||
|
)
|
||||||
|
if raw and raw.get("resolved"):
|
||||||
|
return GroundingResult(
|
||||||
|
found=True,
|
||||||
|
x_pct=raw["x_pct"],
|
||||||
|
y_pct=raw["y_pct"],
|
||||||
|
method="anchor_template",
|
||||||
|
score=raw.get("score", 0.0),
|
||||||
|
raw=raw,
|
||||||
|
)
|
||||||
|
|
||||||
|
elif strategy == "vlm_local":
|
||||||
|
by_text = target_spec.get("by_text", "")
|
||||||
|
vlm_desc = target_spec.get("vlm_description", "")
|
||||||
|
if vlm_desc or by_text:
|
||||||
|
raw = self._executor._hybrid_vlm_resolve(
|
||||||
|
screenshot_b64, target_spec, screen_width, screen_height,
|
||||||
|
)
|
||||||
|
if raw and raw.get("resolved"):
|
||||||
|
return GroundingResult(
|
||||||
|
found=True,
|
||||||
|
x_pct=raw["x_pct"],
|
||||||
|
y_pct=raw["y_pct"],
|
||||||
|
method=raw.get("method", "vlm_local"),
|
||||||
|
score=raw.get("score", 0.0),
|
||||||
|
detail=raw.get("matched_element", {}).get("label", ""),
|
||||||
|
raw=raw,
|
||||||
|
)
|
||||||
|
|
||||||
|
return GroundingResult(found=False, method=strategy, detail=f"{strategy}: pas trouvé")
|
||||||
152
agent_v0/agent_v1/core/policy.py
Normal file
152
agent_v0/agent_v1/core/policy.py
Normal file
@@ -0,0 +1,152 @@
|
|||||||
|
# agent_v1/core/policy.py
|
||||||
|
"""
|
||||||
|
Module Policy — décisions intelligentes quand le grounding échoue.
|
||||||
|
|
||||||
|
Responsabilité unique : "Le Grounding dit NOT_FOUND. Que fait-on ?"
|
||||||
|
Ne localise AUCUN élément — c'est le rôle du Grounding.
|
||||||
|
|
||||||
|
Décisions possibles :
|
||||||
|
- RETRY : re-tenter le grounding (après popup fermée, par exemple)
|
||||||
|
- SKIP : l'action n'est plus nécessaire (état déjà atteint)
|
||||||
|
- ABORT : arrêter le workflow (état incohérent)
|
||||||
|
- SUPERVISE : rendre la main à l'utilisateur
|
||||||
|
|
||||||
|
Séparé de Grounding (qui localise les éléments).
|
||||||
|
Ref: docs/PLAN_ACTEUR_V1.md — Architecture MÉSO (acteur intelligent)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class Decision(Enum):
|
||||||
|
"""Décisions possibles quand le grounding échoue."""
|
||||||
|
RETRY = "retry" # Re-tenter (après correction : popup fermée, navigation...)
|
||||||
|
SKIP = "skip" # Action inutile (état déjà atteint)
|
||||||
|
ABORT = "abort" # Arrêter le workflow (état incohérent)
|
||||||
|
SUPERVISE = "supervise" # Rendre la main à l'utilisateur (Léa dit "je bloque")
|
||||||
|
CONTINUE = "continue" # Continuer malgré l'échec (action non critique)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class PolicyDecision:
|
||||||
|
"""Résultat d'une décision Policy."""
|
||||||
|
decision: Decision
|
||||||
|
reason: str # Explication de la décision
|
||||||
|
action_taken: str = "" # Action corrective effectuée (ex: "popup fermée")
|
||||||
|
elapsed_ms: float = 0.0
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"decision": self.decision.value,
|
||||||
|
"reason": self.reason,
|
||||||
|
"action_taken": self.action_taken,
|
||||||
|
"elapsed_ms": round(self.elapsed_ms, 1),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class PolicyEngine:
|
||||||
|
"""Moteur de décision quand le grounding échoue.
|
||||||
|
|
||||||
|
Cascade de décision :
|
||||||
|
1. Popup détectée ? → fermer et RETRY
|
||||||
|
2. Acteur gemma4 → SKIP / ABORT / SUPERVISE
|
||||||
|
3. Fallback → SUPERVISE (rendre la main)
|
||||||
|
|
||||||
|
Usage :
|
||||||
|
policy = PolicyEngine(executor)
|
||||||
|
decision = policy.decide(action, target_spec, grounding_result)
|
||||||
|
if decision.decision == Decision.RETRY:
|
||||||
|
# re-tenter le grounding
|
||||||
|
elif decision.decision == Decision.SKIP:
|
||||||
|
# marquer comme réussi, passer à la suite
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, executor):
|
||||||
|
self._executor = executor
|
||||||
|
|
||||||
|
def decide(
|
||||||
|
self,
|
||||||
|
action: Dict[str, Any],
|
||||||
|
target_spec: Dict[str, Any],
|
||||||
|
retry_count: int = 0,
|
||||||
|
max_retries: int = 1,
|
||||||
|
) -> PolicyDecision:
|
||||||
|
"""Décider quoi faire quand le grounding a échoué.
|
||||||
|
|
||||||
|
Cascade :
|
||||||
|
1. Si c'est le premier essai → tenter de fermer une popup → RETRY
|
||||||
|
2. Si retry déjà fait → demander à l'acteur gemma4
|
||||||
|
3. Selon gemma4 : SKIP, ABORT, ou SUPERVISE
|
||||||
|
|
||||||
|
Args:
|
||||||
|
action: L'action qui a échoué
|
||||||
|
target_spec: La cible non trouvée
|
||||||
|
retry_count: Nombre de retries déjà faits
|
||||||
|
max_retries: Maximum de retries autorisés
|
||||||
|
"""
|
||||||
|
t_start = time.time()
|
||||||
|
|
||||||
|
# ── Étape 1 : Tentative de fermeture popup (premier essai) ──
|
||||||
|
if retry_count == 0:
|
||||||
|
popup_handled = self._try_close_popup()
|
||||||
|
if popup_handled:
|
||||||
|
return PolicyDecision(
|
||||||
|
decision=Decision.RETRY,
|
||||||
|
reason="Popup détectée et fermée, re-tentative",
|
||||||
|
action_taken="popup_closed",
|
||||||
|
elapsed_ms=(time.time() - t_start) * 1000,
|
||||||
|
)
|
||||||
|
|
||||||
|
# ── Étape 2 : Max retries atteint → acteur gemma4 ──
|
||||||
|
if retry_count >= max_retries:
|
||||||
|
actor_decision = self._ask_actor(action, target_spec)
|
||||||
|
|
||||||
|
if actor_decision == "PASSER":
|
||||||
|
return PolicyDecision(
|
||||||
|
decision=Decision.SKIP,
|
||||||
|
reason="Acteur gemma4 : l'état est déjà atteint",
|
||||||
|
elapsed_ms=(time.time() - t_start) * 1000,
|
||||||
|
)
|
||||||
|
elif actor_decision == "STOPPER":
|
||||||
|
return PolicyDecision(
|
||||||
|
decision=Decision.ABORT,
|
||||||
|
reason="Acteur gemma4 : état incohérent, arrêt",
|
||||||
|
elapsed_ms=(time.time() - t_start) * 1000,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# EXECUTER ou inconnu → pause supervisée
|
||||||
|
return PolicyDecision(
|
||||||
|
decision=Decision.SUPERVISE,
|
||||||
|
reason=f"Acteur gemma4 : {actor_decision}, pause supervisée",
|
||||||
|
elapsed_ms=(time.time() - t_start) * 1000,
|
||||||
|
)
|
||||||
|
|
||||||
|
# ── Étape 3 : Encore des retries disponibles → RETRY ──
|
||||||
|
return PolicyDecision(
|
||||||
|
decision=Decision.RETRY,
|
||||||
|
reason=f"Retry {retry_count + 1}/{max_retries}",
|
||||||
|
elapsed_ms=(time.time() - t_start) * 1000,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _try_close_popup(self) -> bool:
|
||||||
|
"""Tenter de fermer une popup via le handler VLM existant."""
|
||||||
|
try:
|
||||||
|
return self._executor._handle_popup_vlm()
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Policy: popup handler échoué : {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _ask_actor(self, action: Dict, target_spec: Dict) -> str:
|
||||||
|
"""Demander à gemma4 de décider (PASSER/EXECUTER/STOPPER)."""
|
||||||
|
try:
|
||||||
|
return self._executor._actor_decide(action, target_spec)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Policy: acteur gemma4 échoué : {e}")
|
||||||
|
return "EXECUTER" # Fallback → supervisé
|
||||||
215
agent_v0/agent_v1/core/recovery.py
Normal file
215
agent_v0/agent_v1/core/recovery.py
Normal file
@@ -0,0 +1,215 @@
|
|||||||
|
# agent_v1/core/recovery.py
|
||||||
|
"""
|
||||||
|
Module Recovery — mécanisme de rollback quand une action échoue.
|
||||||
|
|
||||||
|
Responsabilité : "L'action a échoué ou produit un résultat inattendu.
|
||||||
|
Comment revenir en arrière ?"
|
||||||
|
|
||||||
|
Stratégies de recovery :
|
||||||
|
1. Ctrl+Z (undo natif) — pour les frappes et modifications
|
||||||
|
2. Escape (fermer dialogue) — pour les popups/menus
|
||||||
|
3. Alt+F4 (fermer fenêtre) — si mauvaise application ouverte
|
||||||
|
4. Clic hors zone — fermer un menu déroulant
|
||||||
|
5. Navigation retour — retourner à l'écran précédent
|
||||||
|
|
||||||
|
Le Recovery est appelé par le Policy quand le Critic détecte un
|
||||||
|
résultat inattendu (pixel OK + sémantique NON = changement inattendu).
|
||||||
|
|
||||||
|
Ref: docs/VISION_RPA_INTELLIGENT.md — "Il se trompe" → correction
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class RecoveryAction(Enum):
|
||||||
|
"""Actions de recovery possibles."""
|
||||||
|
UNDO = "undo" # Ctrl+Z
|
||||||
|
ESCAPE = "escape" # Echap (fermer dialogue/menu)
|
||||||
|
CLOSE_WINDOW = "close" # Alt+F4
|
||||||
|
CLICK_AWAY = "click_away" # Clic hors zone (fermer menu)
|
||||||
|
NONE = "none" # Pas de recovery possible
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RecoveryResult:
|
||||||
|
"""Résultat d'une tentative de recovery."""
|
||||||
|
action_taken: RecoveryAction
|
||||||
|
success: bool
|
||||||
|
detail: str = ""
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"action_taken": self.action_taken.value,
|
||||||
|
"success": self.success,
|
||||||
|
"detail": self.detail,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RecoveryEngine:
|
||||||
|
"""Moteur de recovery — tente de revenir en arrière après un échec.
|
||||||
|
|
||||||
|
Choisit la stratégie de recovery en fonction du type d'action qui a échoué
|
||||||
|
et de l'état actuel de l'écran.
|
||||||
|
|
||||||
|
Usage :
|
||||||
|
recovery = RecoveryEngine(executor)
|
||||||
|
result = recovery.attempt(failed_action, critic_result)
|
||||||
|
if result.success:
|
||||||
|
# re-tenter l'action
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, executor):
|
||||||
|
self._executor = executor
|
||||||
|
|
||||||
|
def attempt(
|
||||||
|
self,
|
||||||
|
failed_action: Dict[str, Any],
|
||||||
|
critic_detail: str = "",
|
||||||
|
) -> RecoveryResult:
|
||||||
|
"""Tenter une recovery après un échec.
|
||||||
|
|
||||||
|
Sélectionne la stratégie appropriée selon le type d'action :
|
||||||
|
- click qui ouvre la mauvaise chose → Escape ou Ctrl+Z
|
||||||
|
- type qui tape au mauvais endroit → Ctrl+Z
|
||||||
|
- key_combo inattendu → Ctrl+Z
|
||||||
|
- popup apparue → Escape
|
||||||
|
|
||||||
|
Args:
|
||||||
|
failed_action: L'action qui a échoué
|
||||||
|
critic_detail: Détail du Critic (raison de l'échec sémantique)
|
||||||
|
"""
|
||||||
|
action_type = failed_action.get("type", "")
|
||||||
|
detail_lower = critic_detail.lower()
|
||||||
|
|
||||||
|
# Choisir la stratégie de recovery
|
||||||
|
strategy = self._select_strategy(action_type, detail_lower)
|
||||||
|
|
||||||
|
if strategy == RecoveryAction.NONE:
|
||||||
|
return RecoveryResult(
|
||||||
|
action_taken=RecoveryAction.NONE,
|
||||||
|
success=False,
|
||||||
|
detail="Pas de stratégie de recovery applicable",
|
||||||
|
)
|
||||||
|
|
||||||
|
return self._execute_recovery(strategy)
|
||||||
|
|
||||||
|
def _select_strategy(self, action_type: str, critic_detail: str) -> RecoveryAction:
|
||||||
|
"""Sélectionner la meilleure stratégie de recovery.
|
||||||
|
|
||||||
|
Priorité : type d'action d'abord (frappe → undo), puis contexte.
|
||||||
|
"""
|
||||||
|
# Frappe ou modification incorrecte → toujours Ctrl+Z
|
||||||
|
if action_type in ("type", "key_combo"):
|
||||||
|
return RecoveryAction.UNDO
|
||||||
|
|
||||||
|
# Popup/dialogue détecté
|
||||||
|
if any(w in critic_detail for w in ["popup", "dialog", "erreur", "error", "modal"]):
|
||||||
|
return RecoveryAction.ESCAPE
|
||||||
|
|
||||||
|
# Menu ouvert par erreur
|
||||||
|
if any(w in critic_detail for w in ["menu", "dropdown", "déroulant"]):
|
||||||
|
return RecoveryAction.ESCAPE
|
||||||
|
|
||||||
|
# Mauvaise fenêtre ouverte
|
||||||
|
if any(w in critic_detail for w in ["mauvaise fenêtre", "wrong window"]):
|
||||||
|
return RecoveryAction.CLOSE_WINDOW
|
||||||
|
|
||||||
|
# Clic qui a produit un résultat inattendu
|
||||||
|
if action_type == "click":
|
||||||
|
return RecoveryAction.ESCAPE
|
||||||
|
|
||||||
|
return RecoveryAction.NONE
|
||||||
|
|
||||||
|
def _execute_recovery(self, strategy: RecoveryAction) -> RecoveryResult:
|
||||||
|
"""Exécuter la stratégie de recovery choisie."""
|
||||||
|
from pynput.keyboard import Controller as KeyboardController, Key
|
||||||
|
|
||||||
|
keyboard = self._executor.keyboard
|
||||||
|
|
||||||
|
try:
|
||||||
|
if strategy == RecoveryAction.UNDO:
|
||||||
|
# Ctrl+Z
|
||||||
|
logger.info("Recovery : Ctrl+Z (undo)")
|
||||||
|
print(" [RECOVERY] Ctrl+Z — annulation de la dernière action")
|
||||||
|
keyboard.press(Key.ctrl)
|
||||||
|
keyboard.press('z')
|
||||||
|
keyboard.release('z')
|
||||||
|
keyboard.release(Key.ctrl)
|
||||||
|
time.sleep(0.5)
|
||||||
|
return RecoveryResult(
|
||||||
|
action_taken=RecoveryAction.UNDO,
|
||||||
|
success=True,
|
||||||
|
detail="Ctrl+Z exécuté",
|
||||||
|
)
|
||||||
|
|
||||||
|
elif strategy == RecoveryAction.ESCAPE:
|
||||||
|
# Echap
|
||||||
|
logger.info("Recovery : Escape (fermer dialogue)")
|
||||||
|
print(" [RECOVERY] Escape — fermeture dialogue/menu")
|
||||||
|
keyboard.press(Key.esc)
|
||||||
|
keyboard.release(Key.esc)
|
||||||
|
time.sleep(0.5)
|
||||||
|
return RecoveryResult(
|
||||||
|
action_taken=RecoveryAction.ESCAPE,
|
||||||
|
success=True,
|
||||||
|
detail="Escape exécuté",
|
||||||
|
)
|
||||||
|
|
||||||
|
elif strategy == RecoveryAction.CLOSE_WINDOW:
|
||||||
|
# Alt+F4 — AVEC vérification fenêtre active
|
||||||
|
# Sur un poste hospitalier, Alt+F4 sans vérif peut fermer le DPI patient
|
||||||
|
try:
|
||||||
|
from ..window_info_crossplatform import get_active_window_info
|
||||||
|
active = get_active_window_info()
|
||||||
|
active_title = active.get("title", "")
|
||||||
|
logger.info(f"Recovery : Alt+F4 sur '{active_title}'")
|
||||||
|
print(f" [RECOVERY] Alt+F4 — fermeture de '{active_title}'")
|
||||||
|
except Exception:
|
||||||
|
logger.info("Recovery : Alt+F4 (fenêtre active inconnue)")
|
||||||
|
print(" [RECOVERY] Alt+F4 — fermeture fenêtre indésirable")
|
||||||
|
|
||||||
|
keyboard.press(Key.alt)
|
||||||
|
keyboard.press(Key.f4)
|
||||||
|
keyboard.release(Key.f4)
|
||||||
|
keyboard.release(Key.alt)
|
||||||
|
time.sleep(1.0)
|
||||||
|
return RecoveryResult(
|
||||||
|
action_taken=RecoveryAction.CLOSE_WINDOW,
|
||||||
|
success=True,
|
||||||
|
detail=f"Alt+F4 exécuté sur '{active_title if 'active_title' in dir() else '?'}'",
|
||||||
|
)
|
||||||
|
|
||||||
|
elif strategy == RecoveryAction.CLICK_AWAY:
|
||||||
|
# Clic au centre de l'écran (hors popup)
|
||||||
|
logger.info("Recovery : clic hors zone")
|
||||||
|
print(" [RECOVERY] Clic hors zone — fermeture menu")
|
||||||
|
monitor = self._executor.sct.monitors[1]
|
||||||
|
w, h = monitor["width"], monitor["height"]
|
||||||
|
# Cliquer dans un coin neutre (10% depuis le haut-gauche)
|
||||||
|
self._executor._click((int(w * 0.1), int(h * 0.1)), "left")
|
||||||
|
time.sleep(0.5)
|
||||||
|
return RecoveryResult(
|
||||||
|
action_taken=RecoveryAction.CLICK_AWAY,
|
||||||
|
success=True,
|
||||||
|
detail="Clic hors zone exécuté",
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Recovery échoué ({strategy.value}) : {e}")
|
||||||
|
return RecoveryResult(
|
||||||
|
action_taken=strategy,
|
||||||
|
success=False,
|
||||||
|
detail=f"Erreur : {e}",
|
||||||
|
)
|
||||||
|
|
||||||
|
return RecoveryResult(
|
||||||
|
action_taken=RecoveryAction.NONE,
|
||||||
|
success=False,
|
||||||
|
detail="Stratégie non implémentée",
|
||||||
|
)
|
||||||
@@ -1,55 +0,0 @@
|
|||||||
# window_info.py
|
|
||||||
"""
|
|
||||||
Récupération des informations sur la fenêtre active (X11).
|
|
||||||
|
|
||||||
v0 :
|
|
||||||
- utilise xdotool pour obtenir :
|
|
||||||
- le titre de la fenêtre active
|
|
||||||
- le PID de la fenêtre active, puis le nom du process via ps
|
|
||||||
|
|
||||||
Si quelque chose ne fonctionne pas, on renvoie des valeurs "unknown".
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import subprocess
|
|
||||||
from typing import Dict, Optional
|
|
||||||
|
|
||||||
|
|
||||||
def _run_cmd(cmd: list[str]) -> Optional[str]:
|
|
||||||
"""Exécute une commande et renvoie la sortie texte (strippée), ou None en cas d'erreur."""
|
|
||||||
try:
|
|
||||||
out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
|
|
||||||
return out.decode("utf-8", errors="ignore").strip()
|
|
||||||
except Exception:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def get_active_window_info() -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
Renvoie un dict :
|
|
||||||
{
|
|
||||||
"title": "...",
|
|
||||||
"app_name": "..."
|
|
||||||
}
|
|
||||||
|
|
||||||
Nécessite xdotool installé sur le système.
|
|
||||||
"""
|
|
||||||
title = _run_cmd(["xdotool", "getactivewindow", "getwindowname"])
|
|
||||||
pid_str = _run_cmd(["xdotool", "getactivewindow", "getwindowpid"])
|
|
||||||
|
|
||||||
app_name: Optional[str] = None
|
|
||||||
if pid_str:
|
|
||||||
pid_str = pid_str.strip()
|
|
||||||
# On récupère le nom du binaire via ps
|
|
||||||
app_name = _run_cmd(["ps", "-p", pid_str, "-o", "comm="])
|
|
||||||
|
|
||||||
if not title:
|
|
||||||
title = "unknown_window"
|
|
||||||
if not app_name:
|
|
||||||
app_name = "unknown_app"
|
|
||||||
|
|
||||||
return {
|
|
||||||
"title": title,
|
|
||||||
"app_name": app_name,
|
|
||||||
}
|
|
||||||
@@ -1,192 +0,0 @@
|
|||||||
# window_info_crossplatform.py
|
|
||||||
"""
|
|
||||||
Récupération des informations sur la fenêtre active - CROSS-PLATFORM
|
|
||||||
|
|
||||||
Supporte:
|
|
||||||
- Linux (X11 via xdotool)
|
|
||||||
- Windows (via pywin32)
|
|
||||||
- macOS (via pyobjc)
|
|
||||||
|
|
||||||
Installation des dépendances:
|
|
||||||
pip install pywin32 # Windows
|
|
||||||
pip install pyobjc-framework-Cocoa # macOS
|
|
||||||
pip install psutil # Tous OS
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import platform
|
|
||||||
import subprocess
|
|
||||||
from typing import Dict, Optional
|
|
||||||
|
|
||||||
|
|
||||||
def _run_cmd(cmd: list[str]) -> Optional[str]:
|
|
||||||
"""Exécute une commande et renvoie la sortie texte (strippée), ou None en cas d'erreur."""
|
|
||||||
try:
|
|
||||||
out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
|
|
||||||
return out.decode("utf-8", errors="ignore").strip()
|
|
||||||
except Exception:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def get_active_window_info() -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
Renvoie un dict :
|
|
||||||
{
|
|
||||||
"title": "...",
|
|
||||||
"app_name": "..."
|
|
||||||
}
|
|
||||||
|
|
||||||
Détecte automatiquement l'OS et utilise la méthode appropriée.
|
|
||||||
"""
|
|
||||||
system = platform.system()
|
|
||||||
|
|
||||||
if system == "Linux":
|
|
||||||
return _get_window_info_linux()
|
|
||||||
elif system == "Windows":
|
|
||||||
return _get_window_info_windows()
|
|
||||||
elif system == "Darwin": # macOS
|
|
||||||
return _get_window_info_macos()
|
|
||||||
else:
|
|
||||||
return {"title": "unknown_window", "app_name": "unknown_app"}
|
|
||||||
|
|
||||||
|
|
||||||
def _get_window_info_linux() -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
Linux: utilise xdotool (X11)
|
|
||||||
|
|
||||||
Nécessite: sudo apt-get install xdotool
|
|
||||||
"""
|
|
||||||
title = _run_cmd(["xdotool", "getactivewindow", "getwindowname"])
|
|
||||||
pid_str = _run_cmd(["xdotool", "getactivewindow", "getwindowpid"])
|
|
||||||
|
|
||||||
app_name: Optional[str] = None
|
|
||||||
if pid_str:
|
|
||||||
pid_str = pid_str.strip()
|
|
||||||
# On récupère le nom du binaire via ps
|
|
||||||
app_name = _run_cmd(["ps", "-p", pid_str, "-o", "comm="])
|
|
||||||
|
|
||||||
if not title:
|
|
||||||
title = "unknown_window"
|
|
||||||
if not app_name:
|
|
||||||
app_name = "unknown_app"
|
|
||||||
|
|
||||||
return {
|
|
||||||
"title": title,
|
|
||||||
"app_name": app_name,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _get_window_info_windows() -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
Windows: utilise pywin32 + psutil
|
|
||||||
|
|
||||||
Nécessite: pip install pywin32 psutil
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
import win32gui
|
|
||||||
import win32process
|
|
||||||
import psutil
|
|
||||||
|
|
||||||
# Fenêtre au premier plan
|
|
||||||
hwnd = win32gui.GetForegroundWindow()
|
|
||||||
|
|
||||||
# Titre de la fenêtre
|
|
||||||
title = win32gui.GetWindowText(hwnd)
|
|
||||||
if not title:
|
|
||||||
title = "unknown_window"
|
|
||||||
|
|
||||||
# PID du processus
|
|
||||||
_, pid = win32process.GetWindowThreadProcessId(hwnd)
|
|
||||||
|
|
||||||
# Nom du processus
|
|
||||||
try:
|
|
||||||
process = psutil.Process(pid)
|
|
||||||
app_name = process.name()
|
|
||||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
||||||
app_name = "unknown_app"
|
|
||||||
|
|
||||||
return {
|
|
||||||
"title": title,
|
|
||||||
"app_name": app_name,
|
|
||||||
}
|
|
||||||
|
|
||||||
except ImportError:
|
|
||||||
# pywin32 ou psutil non installé
|
|
||||||
return {
|
|
||||||
"title": "unknown_window (pywin32 missing)",
|
|
||||||
"app_name": "unknown_app (pywin32 missing)",
|
|
||||||
}
|
|
||||||
except Exception as e:
|
|
||||||
return {
|
|
||||||
"title": f"error: {e}",
|
|
||||||
"app_name": "unknown_app",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _get_window_info_macos() -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
macOS: utilise pyobjc (AppKit)
|
|
||||||
|
|
||||||
Nécessite: pip install pyobjc-framework-Cocoa
|
|
||||||
|
|
||||||
Note: Nécessite les permissions "Accessibility" dans System Preferences
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
from AppKit import NSWorkspace
|
|
||||||
from Quartz import (
|
|
||||||
CGWindowListCopyWindowInfo,
|
|
||||||
kCGWindowListOptionOnScreenOnly,
|
|
||||||
kCGNullWindowID
|
|
||||||
)
|
|
||||||
|
|
||||||
# Application active
|
|
||||||
active_app = NSWorkspace.sharedWorkspace().activeApplication()
|
|
||||||
app_name = active_app.get('NSApplicationName', 'unknown_app')
|
|
||||||
|
|
||||||
# Titre de la fenêtre (via Quartz)
|
|
||||||
# On cherche la fenêtre de l'app active qui est au premier plan
|
|
||||||
window_list = CGWindowListCopyWindowInfo(
|
|
||||||
kCGWindowListOptionOnScreenOnly,
|
|
||||||
kCGNullWindowID
|
|
||||||
)
|
|
||||||
|
|
||||||
title = "unknown_window"
|
|
||||||
for window in window_list:
|
|
||||||
owner_name = window.get('kCGWindowOwnerName', '')
|
|
||||||
if owner_name == app_name:
|
|
||||||
window_title = window.get('kCGWindowName', '')
|
|
||||||
if window_title:
|
|
||||||
title = window_title
|
|
||||||
break
|
|
||||||
|
|
||||||
return {
|
|
||||||
"title": title,
|
|
||||||
"app_name": app_name,
|
|
||||||
}
|
|
||||||
|
|
||||||
except ImportError:
|
|
||||||
# pyobjc non installé
|
|
||||||
return {
|
|
||||||
"title": "unknown_window (pyobjc missing)",
|
|
||||||
"app_name": "unknown_app (pyobjc missing)",
|
|
||||||
}
|
|
||||||
except Exception as e:
|
|
||||||
return {
|
|
||||||
"title": f"error: {e}",
|
|
||||||
"app_name": "unknown_app",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# Test rapide
|
|
||||||
if __name__ == "__main__":
|
|
||||||
import time
|
|
||||||
|
|
||||||
print(f"OS détecté: {platform.system()}")
|
|
||||||
print("\nTest de capture fenêtre active (5 secondes)...")
|
|
||||||
print("Changez de fenêtre pour tester!\n")
|
|
||||||
|
|
||||||
for i in range(5):
|
|
||||||
info = get_active_window_info()
|
|
||||||
print(f"[{i+1}] App: {info['app_name']:20s} | Title: {info['title']}")
|
|
||||||
time.sleep(1)
|
|
||||||
@@ -1,55 +0,0 @@
|
|||||||
# window_info.py
|
|
||||||
"""
|
|
||||||
Récupération des informations sur la fenêtre active (X11).
|
|
||||||
|
|
||||||
v0 :
|
|
||||||
- utilise xdotool pour obtenir :
|
|
||||||
- le titre de la fenêtre active
|
|
||||||
- le PID de la fenêtre active, puis le nom du process via ps
|
|
||||||
|
|
||||||
Si quelque chose ne fonctionne pas, on renvoie des valeurs "unknown".
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import subprocess
|
|
||||||
from typing import Dict, Optional
|
|
||||||
|
|
||||||
|
|
||||||
def _run_cmd(cmd: list[str]) -> Optional[str]:
|
|
||||||
"""Exécute une commande et renvoie la sortie texte (strippée), ou None en cas d'erreur."""
|
|
||||||
try:
|
|
||||||
out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
|
|
||||||
return out.decode("utf-8", errors="ignore").strip()
|
|
||||||
except Exception:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def get_active_window_info() -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
Renvoie un dict :
|
|
||||||
{
|
|
||||||
"title": "...",
|
|
||||||
"app_name": "..."
|
|
||||||
}
|
|
||||||
|
|
||||||
Nécessite xdotool installé sur le système.
|
|
||||||
"""
|
|
||||||
title = _run_cmd(["xdotool", "getactivewindow", "getwindowname"])
|
|
||||||
pid_str = _run_cmd(["xdotool", "getactivewindow", "getwindowpid"])
|
|
||||||
|
|
||||||
app_name: Optional[str] = None
|
|
||||||
if pid_str:
|
|
||||||
pid_str = pid_str.strip()
|
|
||||||
# On récupère le nom du binaire via ps
|
|
||||||
app_name = _run_cmd(["ps", "-p", pid_str, "-o", "comm="])
|
|
||||||
|
|
||||||
if not title:
|
|
||||||
title = "unknown_window"
|
|
||||||
if not app_name:
|
|
||||||
app_name = "unknown_app"
|
|
||||||
|
|
||||||
return {
|
|
||||||
"title": title,
|
|
||||||
"app_name": app_name,
|
|
||||||
}
|
|
||||||
@@ -1,192 +0,0 @@
|
|||||||
# window_info_crossplatform.py
|
|
||||||
"""
|
|
||||||
Récupération des informations sur la fenêtre active - CROSS-PLATFORM
|
|
||||||
|
|
||||||
Supporte:
|
|
||||||
- Linux (X11 via xdotool)
|
|
||||||
- Windows (via pywin32)
|
|
||||||
- macOS (via pyobjc)
|
|
||||||
|
|
||||||
Installation des dépendances:
|
|
||||||
pip install pywin32 # Windows
|
|
||||||
pip install pyobjc-framework-Cocoa # macOS
|
|
||||||
pip install psutil # Tous OS
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import platform
|
|
||||||
import subprocess
|
|
||||||
from typing import Dict, Optional
|
|
||||||
|
|
||||||
|
|
||||||
def _run_cmd(cmd: list[str]) -> Optional[str]:
|
|
||||||
"""Exécute une commande et renvoie la sortie texte (strippée), ou None en cas d'erreur."""
|
|
||||||
try:
|
|
||||||
out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
|
|
||||||
return out.decode("utf-8", errors="ignore").strip()
|
|
||||||
except Exception:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def get_active_window_info() -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
Renvoie un dict :
|
|
||||||
{
|
|
||||||
"title": "...",
|
|
||||||
"app_name": "..."
|
|
||||||
}
|
|
||||||
|
|
||||||
Détecte automatiquement l'OS et utilise la méthode appropriée.
|
|
||||||
"""
|
|
||||||
system = platform.system()
|
|
||||||
|
|
||||||
if system == "Linux":
|
|
||||||
return _get_window_info_linux()
|
|
||||||
elif system == "Windows":
|
|
||||||
return _get_window_info_windows()
|
|
||||||
elif system == "Darwin": # macOS
|
|
||||||
return _get_window_info_macos()
|
|
||||||
else:
|
|
||||||
return {"title": "unknown_window", "app_name": "unknown_app"}
|
|
||||||
|
|
||||||
|
|
||||||
def _get_window_info_linux() -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
Linux: utilise xdotool (X11)
|
|
||||||
|
|
||||||
Nécessite: sudo apt-get install xdotool
|
|
||||||
"""
|
|
||||||
title = _run_cmd(["xdotool", "getactivewindow", "getwindowname"])
|
|
||||||
pid_str = _run_cmd(["xdotool", "getactivewindow", "getwindowpid"])
|
|
||||||
|
|
||||||
app_name: Optional[str] = None
|
|
||||||
if pid_str:
|
|
||||||
pid_str = pid_str.strip()
|
|
||||||
# On récupère le nom du binaire via ps
|
|
||||||
app_name = _run_cmd(["ps", "-p", pid_str, "-o", "comm="])
|
|
||||||
|
|
||||||
if not title:
|
|
||||||
title = "unknown_window"
|
|
||||||
if not app_name:
|
|
||||||
app_name = "unknown_app"
|
|
||||||
|
|
||||||
return {
|
|
||||||
"title": title,
|
|
||||||
"app_name": app_name,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _get_window_info_windows() -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
Windows: utilise pywin32 + psutil
|
|
||||||
|
|
||||||
Nécessite: pip install pywin32 psutil
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
import win32gui
|
|
||||||
import win32process
|
|
||||||
import psutil
|
|
||||||
|
|
||||||
# Fenêtre au premier plan
|
|
||||||
hwnd = win32gui.GetForegroundWindow()
|
|
||||||
|
|
||||||
# Titre de la fenêtre
|
|
||||||
title = win32gui.GetWindowText(hwnd)
|
|
||||||
if not title:
|
|
||||||
title = "unknown_window"
|
|
||||||
|
|
||||||
# PID du processus
|
|
||||||
_, pid = win32process.GetWindowThreadProcessId(hwnd)
|
|
||||||
|
|
||||||
# Nom du processus
|
|
||||||
try:
|
|
||||||
process = psutil.Process(pid)
|
|
||||||
app_name = process.name()
|
|
||||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
||||||
app_name = "unknown_app"
|
|
||||||
|
|
||||||
return {
|
|
||||||
"title": title,
|
|
||||||
"app_name": app_name,
|
|
||||||
}
|
|
||||||
|
|
||||||
except ImportError:
|
|
||||||
# pywin32 ou psutil non installé
|
|
||||||
return {
|
|
||||||
"title": "unknown_window (pywin32 missing)",
|
|
||||||
"app_name": "unknown_app (pywin32 missing)",
|
|
||||||
}
|
|
||||||
except Exception as e:
|
|
||||||
return {
|
|
||||||
"title": f"error: {e}",
|
|
||||||
"app_name": "unknown_app",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _get_window_info_macos() -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
macOS: utilise pyobjc (AppKit)
|
|
||||||
|
|
||||||
Nécessite: pip install pyobjc-framework-Cocoa
|
|
||||||
|
|
||||||
Note: Nécessite les permissions "Accessibility" dans System Preferences
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
from AppKit import NSWorkspace
|
|
||||||
from Quartz import (
|
|
||||||
CGWindowListCopyWindowInfo,
|
|
||||||
kCGWindowListOptionOnScreenOnly,
|
|
||||||
kCGNullWindowID
|
|
||||||
)
|
|
||||||
|
|
||||||
# Application active
|
|
||||||
active_app = NSWorkspace.sharedWorkspace().activeApplication()
|
|
||||||
app_name = active_app.get('NSApplicationName', 'unknown_app')
|
|
||||||
|
|
||||||
# Titre de la fenêtre (via Quartz)
|
|
||||||
# On cherche la fenêtre de l'app active qui est au premier plan
|
|
||||||
window_list = CGWindowListCopyWindowInfo(
|
|
||||||
kCGWindowListOptionOnScreenOnly,
|
|
||||||
kCGNullWindowID
|
|
||||||
)
|
|
||||||
|
|
||||||
title = "unknown_window"
|
|
||||||
for window in window_list:
|
|
||||||
owner_name = window.get('kCGWindowOwnerName', '')
|
|
||||||
if owner_name == app_name:
|
|
||||||
window_title = window.get('kCGWindowName', '')
|
|
||||||
if window_title:
|
|
||||||
title = window_title
|
|
||||||
break
|
|
||||||
|
|
||||||
return {
|
|
||||||
"title": title,
|
|
||||||
"app_name": app_name,
|
|
||||||
}
|
|
||||||
|
|
||||||
except ImportError:
|
|
||||||
# pyobjc non installé
|
|
||||||
return {
|
|
||||||
"title": "unknown_window (pyobjc missing)",
|
|
||||||
"app_name": "unknown_app (pyobjc missing)",
|
|
||||||
}
|
|
||||||
except Exception as e:
|
|
||||||
return {
|
|
||||||
"title": f"error: {e}",
|
|
||||||
"app_name": "unknown_app",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# Test rapide
|
|
||||||
if __name__ == "__main__":
|
|
||||||
import time
|
|
||||||
|
|
||||||
print(f"OS détecté: {platform.system()}")
|
|
||||||
print("\nTest de capture fenêtre active (5 secondes)...")
|
|
||||||
print("Changez de fenêtre pour tester!\n")
|
|
||||||
|
|
||||||
for i in range(5):
|
|
||||||
info = get_active_window_info()
|
|
||||||
print(f"[{i+1}] App: {info['app_name']:20s} | Title: {info['title']}")
|
|
||||||
time.sleep(1)
|
|
||||||
@@ -1,55 +0,0 @@
|
|||||||
# window_info.py
|
|
||||||
"""
|
|
||||||
Récupération des informations sur la fenêtre active (X11).
|
|
||||||
|
|
||||||
v0 :
|
|
||||||
- utilise xdotool pour obtenir :
|
|
||||||
- le titre de la fenêtre active
|
|
||||||
- le PID de la fenêtre active, puis le nom du process via ps
|
|
||||||
|
|
||||||
Si quelque chose ne fonctionne pas, on renvoie des valeurs "unknown".
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import subprocess
|
|
||||||
from typing import Dict, Optional
|
|
||||||
|
|
||||||
|
|
||||||
def _run_cmd(cmd: list[str]) -> Optional[str]:
|
|
||||||
"""Exécute une commande et renvoie la sortie texte (strippée), ou None en cas d'erreur."""
|
|
||||||
try:
|
|
||||||
out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
|
|
||||||
return out.decode("utf-8", errors="ignore").strip()
|
|
||||||
except Exception:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def get_active_window_info() -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
Renvoie un dict :
|
|
||||||
{
|
|
||||||
"title": "...",
|
|
||||||
"app_name": "..."
|
|
||||||
}
|
|
||||||
|
|
||||||
Nécessite xdotool installé sur le système.
|
|
||||||
"""
|
|
||||||
title = _run_cmd(["xdotool", "getactivewindow", "getwindowname"])
|
|
||||||
pid_str = _run_cmd(["xdotool", "getactivewindow", "getwindowpid"])
|
|
||||||
|
|
||||||
app_name: Optional[str] = None
|
|
||||||
if pid_str:
|
|
||||||
pid_str = pid_str.strip()
|
|
||||||
# On récupère le nom du binaire via ps
|
|
||||||
app_name = _run_cmd(["ps", "-p", pid_str, "-o", "comm="])
|
|
||||||
|
|
||||||
if not title:
|
|
||||||
title = "unknown_window"
|
|
||||||
if not app_name:
|
|
||||||
app_name = "unknown_app"
|
|
||||||
|
|
||||||
return {
|
|
||||||
"title": title,
|
|
||||||
"app_name": app_name,
|
|
||||||
}
|
|
||||||
@@ -1,192 +0,0 @@
|
|||||||
# window_info_crossplatform.py
|
|
||||||
"""
|
|
||||||
Récupération des informations sur la fenêtre active - CROSS-PLATFORM
|
|
||||||
|
|
||||||
Supporte:
|
|
||||||
- Linux (X11 via xdotool)
|
|
||||||
- Windows (via pywin32)
|
|
||||||
- macOS (via pyobjc)
|
|
||||||
|
|
||||||
Installation des dépendances:
|
|
||||||
pip install pywin32 # Windows
|
|
||||||
pip install pyobjc-framework-Cocoa # macOS
|
|
||||||
pip install psutil # Tous OS
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import platform
|
|
||||||
import subprocess
|
|
||||||
from typing import Dict, Optional
|
|
||||||
|
|
||||||
|
|
||||||
def _run_cmd(cmd: list[str]) -> Optional[str]:
|
|
||||||
"""Exécute une commande et renvoie la sortie texte (strippée), ou None en cas d'erreur."""
|
|
||||||
try:
|
|
||||||
out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
|
|
||||||
return out.decode("utf-8", errors="ignore").strip()
|
|
||||||
except Exception:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def get_active_window_info() -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
Renvoie un dict :
|
|
||||||
{
|
|
||||||
"title": "...",
|
|
||||||
"app_name": "..."
|
|
||||||
}
|
|
||||||
|
|
||||||
Détecte automatiquement l'OS et utilise la méthode appropriée.
|
|
||||||
"""
|
|
||||||
system = platform.system()
|
|
||||||
|
|
||||||
if system == "Linux":
|
|
||||||
return _get_window_info_linux()
|
|
||||||
elif system == "Windows":
|
|
||||||
return _get_window_info_windows()
|
|
||||||
elif system == "Darwin": # macOS
|
|
||||||
return _get_window_info_macos()
|
|
||||||
else:
|
|
||||||
return {"title": "unknown_window", "app_name": "unknown_app"}
|
|
||||||
|
|
||||||
|
|
||||||
def _get_window_info_linux() -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
Linux: utilise xdotool (X11)
|
|
||||||
|
|
||||||
Nécessite: sudo apt-get install xdotool
|
|
||||||
"""
|
|
||||||
title = _run_cmd(["xdotool", "getactivewindow", "getwindowname"])
|
|
||||||
pid_str = _run_cmd(["xdotool", "getactivewindow", "getwindowpid"])
|
|
||||||
|
|
||||||
app_name: Optional[str] = None
|
|
||||||
if pid_str:
|
|
||||||
pid_str = pid_str.strip()
|
|
||||||
# On récupère le nom du binaire via ps
|
|
||||||
app_name = _run_cmd(["ps", "-p", pid_str, "-o", "comm="])
|
|
||||||
|
|
||||||
if not title:
|
|
||||||
title = "unknown_window"
|
|
||||||
if not app_name:
|
|
||||||
app_name = "unknown_app"
|
|
||||||
|
|
||||||
return {
|
|
||||||
"title": title,
|
|
||||||
"app_name": app_name,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _get_window_info_windows() -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
Windows: utilise pywin32 + psutil
|
|
||||||
|
|
||||||
Nécessite: pip install pywin32 psutil
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
import win32gui
|
|
||||||
import win32process
|
|
||||||
import psutil
|
|
||||||
|
|
||||||
# Fenêtre au premier plan
|
|
||||||
hwnd = win32gui.GetForegroundWindow()
|
|
||||||
|
|
||||||
# Titre de la fenêtre
|
|
||||||
title = win32gui.GetWindowText(hwnd)
|
|
||||||
if not title:
|
|
||||||
title = "unknown_window"
|
|
||||||
|
|
||||||
# PID du processus
|
|
||||||
_, pid = win32process.GetWindowThreadProcessId(hwnd)
|
|
||||||
|
|
||||||
# Nom du processus
|
|
||||||
try:
|
|
||||||
process = psutil.Process(pid)
|
|
||||||
app_name = process.name()
|
|
||||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
||||||
app_name = "unknown_app"
|
|
||||||
|
|
||||||
return {
|
|
||||||
"title": title,
|
|
||||||
"app_name": app_name,
|
|
||||||
}
|
|
||||||
|
|
||||||
except ImportError:
|
|
||||||
# pywin32 ou psutil non installé
|
|
||||||
return {
|
|
||||||
"title": "unknown_window (pywin32 missing)",
|
|
||||||
"app_name": "unknown_app (pywin32 missing)",
|
|
||||||
}
|
|
||||||
except Exception as e:
|
|
||||||
return {
|
|
||||||
"title": f"error: {e}",
|
|
||||||
"app_name": "unknown_app",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _get_window_info_macos() -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
macOS: utilise pyobjc (AppKit)
|
|
||||||
|
|
||||||
Nécessite: pip install pyobjc-framework-Cocoa
|
|
||||||
|
|
||||||
Note: Nécessite les permissions "Accessibility" dans System Preferences
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
from AppKit import NSWorkspace
|
|
||||||
from Quartz import (
|
|
||||||
CGWindowListCopyWindowInfo,
|
|
||||||
kCGWindowListOptionOnScreenOnly,
|
|
||||||
kCGNullWindowID
|
|
||||||
)
|
|
||||||
|
|
||||||
# Application active
|
|
||||||
active_app = NSWorkspace.sharedWorkspace().activeApplication()
|
|
||||||
app_name = active_app.get('NSApplicationName', 'unknown_app')
|
|
||||||
|
|
||||||
# Titre de la fenêtre (via Quartz)
|
|
||||||
# On cherche la fenêtre de l'app active qui est au premier plan
|
|
||||||
window_list = CGWindowListCopyWindowInfo(
|
|
||||||
kCGWindowListOptionOnScreenOnly,
|
|
||||||
kCGNullWindowID
|
|
||||||
)
|
|
||||||
|
|
||||||
title = "unknown_window"
|
|
||||||
for window in window_list:
|
|
||||||
owner_name = window.get('kCGWindowOwnerName', '')
|
|
||||||
if owner_name == app_name:
|
|
||||||
window_title = window.get('kCGWindowName', '')
|
|
||||||
if window_title:
|
|
||||||
title = window_title
|
|
||||||
break
|
|
||||||
|
|
||||||
return {
|
|
||||||
"title": title,
|
|
||||||
"app_name": app_name,
|
|
||||||
}
|
|
||||||
|
|
||||||
except ImportError:
|
|
||||||
# pyobjc non installé
|
|
||||||
return {
|
|
||||||
"title": "unknown_window (pyobjc missing)",
|
|
||||||
"app_name": "unknown_app (pyobjc missing)",
|
|
||||||
}
|
|
||||||
except Exception as e:
|
|
||||||
return {
|
|
||||||
"title": f"error: {e}",
|
|
||||||
"app_name": "unknown_app",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# Test rapide
|
|
||||||
if __name__ == "__main__":
|
|
||||||
import time
|
|
||||||
|
|
||||||
print(f"OS détecté: {platform.system()}")
|
|
||||||
print("\nTest de capture fenêtre active (5 secondes)...")
|
|
||||||
print("Changez de fenêtre pour tester!\n")
|
|
||||||
|
|
||||||
for i in range(5):
|
|
||||||
info = get_active_window_info()
|
|
||||||
print(f"[{i+1}] App: {info['app_name']:20s} | Title: {info['title']}")
|
|
||||||
time.sleep(1)
|
|
||||||
@@ -41,8 +41,6 @@ FILE_MANIFEST: list[tuple[str, str]] = [
|
|||||||
("agent_v1/core/__init__.py", "agent_v1/core/__init__.py"),
|
("agent_v1/core/__init__.py", "agent_v1/core/__init__.py"),
|
||||||
("agent_v1/core/captor.py", "agent_v1/core/captor.py"),
|
("agent_v1/core/captor.py", "agent_v1/core/captor.py"),
|
||||||
("agent_v1/core/executor.py", "agent_v1/core/executor.py"),
|
("agent_v1/core/executor.py", "agent_v1/core/executor.py"),
|
||||||
("agent_v1/core/window_info.py", "agent_v1/core/window_info.py"),
|
|
||||||
("agent_v1/core/window_info_crossplatform.py", "agent_v1/core/window_info_crossplatform.py"),
|
|
||||||
|
|
||||||
# agent_v1/network
|
# agent_v1/network
|
||||||
("agent_v1/network/__init__.py", "agent_v1/network/__init__.py"),
|
("agent_v1/network/__init__.py", "agent_v1/network/__init__.py"),
|
||||||
|
|||||||
@@ -1,13 +1,6 @@
|
|||||||
# agent_v0.lea_ui — Interface utilisateur "Lea"
|
# agent_v0.lea_ui — Communication serveur pour l'agent Léa
|
||||||
#
|
#
|
||||||
# Panneau PyQt5 integre qui remplace le system tray + navigateur web
|
# Composant :
|
||||||
# par une interface unifiee pour piloter l'Agent RPA Vision V3.
|
|
||||||
#
|
|
||||||
# Composants :
|
|
||||||
# - LeaMainWindow : fenetre principale ancree a droite
|
|
||||||
# - ChatWidget : zone de conversation avec le serveur
|
|
||||||
# - OverlayWidget : feedback visuel pendant le replay
|
|
||||||
# - LeaServerClient : client API vers le serveur Linux
|
# - LeaServerClient : client API vers le serveur Linux
|
||||||
# - styles : theme et couleurs
|
|
||||||
|
|
||||||
__version__ = "0.1.0"
|
__version__ = "0.1.0"
|
||||||
|
|||||||
@@ -1,6 +0,0 @@
|
|||||||
# agent_v0/lea_ui/__main__.py
|
|
||||||
"""Permet le lancement via: python -m agent_v0.lea_ui"""
|
|
||||||
|
|
||||||
from .launcher import main
|
|
||||||
|
|
||||||
main()
|
|
||||||
@@ -1,250 +0,0 @@
|
|||||||
# agent_v0/lea_ui/chat_widget.py
|
|
||||||
"""
|
|
||||||
Widget de chat pour l'interface Lea.
|
|
||||||
|
|
||||||
Affiche les messages avec des bulles :
|
|
||||||
- Utilisateur a droite (fond indigo)
|
|
||||||
- Lea a gauche (fond blanc)
|
|
||||||
|
|
||||||
Communique avec le serveur Linux via LeaServerClient.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import logging
|
|
||||||
from typing import List, Optional
|
|
||||||
|
|
||||||
from PyQt5.QtCore import (
|
|
||||||
QPropertyAnimation,
|
|
||||||
QSize,
|
|
||||||
Qt,
|
|
||||||
QTimer,
|
|
||||||
pyqtSignal,
|
|
||||||
pyqtSlot,
|
|
||||||
)
|
|
||||||
from PyQt5.QtGui import QColor, QFont, QPainter, QPainterPath, QPen
|
|
||||||
from PyQt5.QtWidgets import (
|
|
||||||
QFrame,
|
|
||||||
QHBoxLayout,
|
|
||||||
QLabel,
|
|
||||||
QLineEdit,
|
|
||||||
QPushButton,
|
|
||||||
QScrollArea,
|
|
||||||
QSizePolicy,
|
|
||||||
QVBoxLayout,
|
|
||||||
QWidget,
|
|
||||||
)
|
|
||||||
|
|
||||||
from . import styles
|
|
||||||
|
|
||||||
logger = logging.getLogger("lea_ui.chat")
|
|
||||||
|
|
||||||
|
|
||||||
class ChatBubble(QFrame):
|
|
||||||
"""Bulle de message individuelle."""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
text: str,
|
|
||||||
is_user: bool = False,
|
|
||||||
parent: Optional[QWidget] = None,
|
|
||||||
) -> None:
|
|
||||||
super().__init__(parent)
|
|
||||||
self._is_user = is_user
|
|
||||||
|
|
||||||
# Style de la bulle
|
|
||||||
if is_user:
|
|
||||||
bg_color = styles.COLOR_BUBBLE_USER
|
|
||||||
text_color = styles.COLOR_TEXT_ON_ACCENT
|
|
||||||
align = Qt.AlignRight
|
|
||||||
else:
|
|
||||||
bg_color = styles.COLOR_BUBBLE_LEA
|
|
||||||
text_color = styles.COLOR_TEXT
|
|
||||||
align = Qt.AlignLeft
|
|
||||||
|
|
||||||
self.setStyleSheet(f"""
|
|
||||||
QFrame {{
|
|
||||||
background-color: {bg_color};
|
|
||||||
border-radius: {styles.BUBBLE_RADIUS}px;
|
|
||||||
padding: {styles.PADDING}px;
|
|
||||||
border: {"none" if is_user else f"1px solid {styles.COLOR_BORDER}"};
|
|
||||||
}}
|
|
||||||
""")
|
|
||||||
|
|
||||||
layout = QVBoxLayout(self)
|
|
||||||
layout.setContentsMargins(
|
|
||||||
styles.PADDING, styles.PADDING // 2,
|
|
||||||
styles.PADDING, styles.PADDING // 2,
|
|
||||||
)
|
|
||||||
|
|
||||||
label = QLabel(text)
|
|
||||||
label.setWordWrap(True)
|
|
||||||
label.setFont(QFont(styles.FONT_FAMILY, styles.FONT_SIZE_NORMAL))
|
|
||||||
label.setStyleSheet(f"color: {text_color}; background: transparent; border: none;")
|
|
||||||
label.setTextFormat(Qt.RichText)
|
|
||||||
label.setOpenExternalLinks(True)
|
|
||||||
layout.addWidget(label)
|
|
||||||
|
|
||||||
self.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Minimum)
|
|
||||||
self.setMaximumWidth(280)
|
|
||||||
|
|
||||||
|
|
||||||
class ChatWidget(QWidget):
|
|
||||||
"""Widget de chat complet avec zone de messages et champ de saisie.
|
|
||||||
|
|
||||||
Signals :
|
|
||||||
message_sent(str) : emis quand l'utilisateur envoie un message
|
|
||||||
"""
|
|
||||||
|
|
||||||
message_sent = pyqtSignal(str)
|
|
||||||
|
|
||||||
def __init__(self, parent: Optional[QWidget] = None) -> None:
|
|
||||||
super().__init__(parent)
|
|
||||||
self._messages: List[dict] = []
|
|
||||||
self._setup_ui()
|
|
||||||
|
|
||||||
def _setup_ui(self) -> None:
|
|
||||||
layout = QVBoxLayout(self)
|
|
||||||
layout.setContentsMargins(0, 0, 0, 0)
|
|
||||||
layout.setSpacing(0)
|
|
||||||
|
|
||||||
# Zone de messages (scrollable)
|
|
||||||
self._scroll_area = QScrollArea()
|
|
||||||
self._scroll_area.setWidgetResizable(True)
|
|
||||||
self._scroll_area.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
|
|
||||||
self._scroll_area.setStyleSheet(styles.CHAT_AREA_STYLE)
|
|
||||||
|
|
||||||
self._messages_container = QWidget()
|
|
||||||
self._messages_container.setObjectName("ChatContainer")
|
|
||||||
self._messages_layout = QVBoxLayout(self._messages_container)
|
|
||||||
self._messages_layout.setContentsMargins(
|
|
||||||
styles.PADDING, styles.PADDING,
|
|
||||||
styles.PADDING, styles.PADDING,
|
|
||||||
)
|
|
||||||
self._messages_layout.setSpacing(styles.SPACING)
|
|
||||||
self._messages_layout.addStretch()
|
|
||||||
|
|
||||||
self._scroll_area.setWidget(self._messages_container)
|
|
||||||
layout.addWidget(self._scroll_area, stretch=1)
|
|
||||||
|
|
||||||
# Separateur
|
|
||||||
sep = QFrame()
|
|
||||||
sep.setFrameShape(QFrame.HLine)
|
|
||||||
sep.setStyleSheet(f"background-color: {styles.COLOR_BORDER}; max-height: 1px;")
|
|
||||||
layout.addWidget(sep)
|
|
||||||
|
|
||||||
# Zone de saisie
|
|
||||||
input_layout = QHBoxLayout()
|
|
||||||
input_layout.setContentsMargins(
|
|
||||||
styles.PADDING, styles.SPACING,
|
|
||||||
styles.PADDING, styles.SPACING,
|
|
||||||
)
|
|
||||||
input_layout.setSpacing(styles.SPACING)
|
|
||||||
|
|
||||||
self._input = QLineEdit()
|
|
||||||
self._input.setObjectName("ChatInput")
|
|
||||||
self._input.setPlaceholderText("Ecrivez un message...")
|
|
||||||
self._input.setStyleSheet(styles.INPUT_STYLE)
|
|
||||||
self._input.returnPressed.connect(self._on_send)
|
|
||||||
input_layout.addWidget(self._input, stretch=1)
|
|
||||||
|
|
||||||
self._send_btn = QPushButton("Envoyer")
|
|
||||||
self._send_btn.setObjectName("SendButton")
|
|
||||||
self._send_btn.setStyleSheet(styles.SEND_BUTTON_STYLE)
|
|
||||||
self._send_btn.setCursor(Qt.PointingHandCursor)
|
|
||||||
self._send_btn.clicked.connect(self._on_send)
|
|
||||||
input_layout.addWidget(self._send_btn)
|
|
||||||
|
|
||||||
layout.addLayout(input_layout)
|
|
||||||
|
|
||||||
def _on_send(self) -> None:
|
|
||||||
"""Envoyer le message saisi."""
|
|
||||||
text = self._input.text().strip()
|
|
||||||
if not text:
|
|
||||||
return
|
|
||||||
|
|
||||||
self._input.clear()
|
|
||||||
self.add_user_message(text)
|
|
||||||
self.message_sent.emit(text)
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# API publique
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def add_user_message(self, text: str) -> None:
|
|
||||||
"""Ajouter un message utilisateur (bulle a droite)."""
|
|
||||||
self._add_bubble(text, is_user=True)
|
|
||||||
|
|
||||||
def add_lea_message(self, text: str) -> None:
|
|
||||||
"""Ajouter un message de Lea (bulle a gauche)."""
|
|
||||||
self._add_bubble(text, is_user=False)
|
|
||||||
|
|
||||||
def add_system_message(self, text: str) -> None:
|
|
||||||
"""Ajouter un message systeme (centre, discret)."""
|
|
||||||
label = QLabel(text)
|
|
||||||
label.setFont(QFont(styles.FONT_FAMILY, styles.FONT_SIZE_SMALL))
|
|
||||||
label.setStyleSheet(
|
|
||||||
f"color: {styles.COLOR_TEXT_SECONDARY}; "
|
|
||||||
f"background: transparent; padding: 4px;"
|
|
||||||
)
|
|
||||||
label.setAlignment(Qt.AlignCenter)
|
|
||||||
label.setWordWrap(True)
|
|
||||||
|
|
||||||
# Inserer avant le stretch final
|
|
||||||
count = self._messages_layout.count()
|
|
||||||
self._messages_layout.insertWidget(count - 1, label)
|
|
||||||
self._scroll_to_bottom()
|
|
||||||
|
|
||||||
def set_input_enabled(self, enabled: bool) -> None:
|
|
||||||
"""Activer/desactiver la saisie (pendant le chargement)."""
|
|
||||||
self._input.setEnabled(enabled)
|
|
||||||
self._send_btn.setEnabled(enabled)
|
|
||||||
if not enabled:
|
|
||||||
self._input.setPlaceholderText("Lea reflechit...")
|
|
||||||
else:
|
|
||||||
self._input.setPlaceholderText("Ecrivez un message...")
|
|
||||||
|
|
||||||
def clear_messages(self) -> None:
|
|
||||||
"""Effacer tous les messages."""
|
|
||||||
while self._messages_layout.count() > 1:
|
|
||||||
item = self._messages_layout.takeAt(0)
|
|
||||||
widget = item.widget()
|
|
||||||
if widget:
|
|
||||||
widget.deleteLater()
|
|
||||||
self._messages = []
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Internals
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def _add_bubble(self, text: str, is_user: bool) -> None:
|
|
||||||
"""Ajouter une bulle au conteneur de messages."""
|
|
||||||
bubble = ChatBubble(text, is_user=is_user)
|
|
||||||
|
|
||||||
# Conteneur d'alignement
|
|
||||||
row = QHBoxLayout()
|
|
||||||
row.setContentsMargins(0, 0, 0, 0)
|
|
||||||
if is_user:
|
|
||||||
row.addStretch()
|
|
||||||
row.addWidget(bubble)
|
|
||||||
else:
|
|
||||||
row.addWidget(bubble)
|
|
||||||
row.addStretch()
|
|
||||||
|
|
||||||
# Inserer avant le stretch final
|
|
||||||
count = self._messages_layout.count()
|
|
||||||
wrapper = QWidget()
|
|
||||||
wrapper.setLayout(row)
|
|
||||||
wrapper.setStyleSheet("background: transparent;")
|
|
||||||
self._messages_layout.insertWidget(count - 1, wrapper)
|
|
||||||
|
|
||||||
self._messages.append({"text": text, "is_user": is_user})
|
|
||||||
self._scroll_to_bottom()
|
|
||||||
|
|
||||||
def _scroll_to_bottom(self) -> None:
|
|
||||||
"""Scroller vers le bas apres l'ajout d'un message."""
|
|
||||||
QTimer.singleShot(50, lambda: (
|
|
||||||
self._scroll_area.verticalScrollBar().setValue(
|
|
||||||
self._scroll_area.verticalScrollBar().maximum()
|
|
||||||
)
|
|
||||||
))
|
|
||||||
@@ -1,218 +0,0 @@
|
|||||||
# agent_v0/lea_ui/launcher.py
|
|
||||||
"""
|
|
||||||
Point d'entree pour le panneau Lea.
|
|
||||||
|
|
||||||
Lancement autonome :
|
|
||||||
python -m agent_v0.lea_ui.launcher
|
|
||||||
|
|
||||||
Ou integre dans agent_v0/agent_v1/main.py avec flag --ui lea.
|
|
||||||
|
|
||||||
Ce module :
|
|
||||||
1. Cree l'application Qt
|
|
||||||
2. Instancie LeaServerClient
|
|
||||||
3. Instancie LeaMainWindow
|
|
||||||
4. Enregistre un raccourci global (Ctrl+Shift+L) via keyboard hook
|
|
||||||
5. Lance la boucle Qt
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
logger = logging.getLogger("lea_ui.launcher")
|
|
||||||
|
|
||||||
|
|
||||||
def _setup_logging(verbose: bool = False) -> None:
|
|
||||||
"""Configurer le logging pour le panneau Lea."""
|
|
||||||
level = logging.DEBUG if verbose else logging.INFO
|
|
||||||
logging.basicConfig(
|
|
||||||
level=level,
|
|
||||||
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
|
||||||
datefmt="%H:%M:%S",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _setup_global_hotkey(window) -> Optional[object]:
|
|
||||||
"""Enregistrer le raccourci global Ctrl+Shift+L pour afficher/cacher le panneau.
|
|
||||||
|
|
||||||
Utilise la librairie keyboard si disponible (Windows/Linux).
|
|
||||||
Retourne le hook pour pouvoir le desinscrire a l'arret.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
import keyboard
|
|
||||||
|
|
||||||
def on_hotkey():
|
|
||||||
# Appeler toggle_visibility dans le thread Qt
|
|
||||||
from PyQt5.QtCore import QTimer
|
|
||||||
QTimer.singleShot(0, window.toggle_visibility)
|
|
||||||
|
|
||||||
keyboard.add_hotkey("ctrl+shift+l", on_hotkey)
|
|
||||||
logger.info("Raccourci global Ctrl+Shift+L enregistre")
|
|
||||||
return True
|
|
||||||
except ImportError:
|
|
||||||
logger.info(
|
|
||||||
"Librairie 'keyboard' non disponible — "
|
|
||||||
"raccourci global Ctrl+Shift+L non enregistre. "
|
|
||||||
"Installez-la avec: pip install keyboard"
|
|
||||||
)
|
|
||||||
return None
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning("Impossible d'enregistrer le raccourci global : %s", e)
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _load_environment() -> None:
|
|
||||||
"""Charger les variables d'environnement depuis .env.local."""
|
|
||||||
env_paths = [
|
|
||||||
os.path.join(os.path.dirname(__file__), "..", "..", ".env.local"),
|
|
||||||
os.path.join(os.path.dirname(__file__), "..", ".env.local"),
|
|
||||||
]
|
|
||||||
for env_path in env_paths:
|
|
||||||
env_path = os.path.abspath(env_path)
|
|
||||||
if os.path.exists(env_path):
|
|
||||||
try:
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
load_dotenv(env_path)
|
|
||||||
logger.info("Variables d'environnement chargees depuis %s", env_path)
|
|
||||||
return
|
|
||||||
except ImportError:
|
|
||||||
# Fallback : chargement manuel
|
|
||||||
with open(env_path, "r", encoding="utf-8") as f:
|
|
||||||
for line in f:
|
|
||||||
line = line.strip()
|
|
||||||
if line and not line.startswith("#") and "=" in line:
|
|
||||||
key, value = line.split("=", 1)
|
|
||||||
value = value.strip("\"'")
|
|
||||||
os.environ[key.strip()] = value
|
|
||||||
logger.info("Variables chargees manuellement depuis %s", env_path)
|
|
||||||
return
|
|
||||||
|
|
||||||
|
|
||||||
def launch_lea(
|
|
||||||
server_host: Optional[str] = None,
|
|
||||||
chat_port: int = 5004,
|
|
||||||
stream_port: int = 5005,
|
|
||||||
verbose: bool = False,
|
|
||||||
session_id: Optional[str] = None,
|
|
||||||
) -> None:
|
|
||||||
"""Lancer le panneau Lea.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
server_host: adresse du serveur Linux (None = auto-detection)
|
|
||||||
chat_port: port du serveur chat
|
|
||||||
stream_port: port du serveur streaming
|
|
||||||
verbose: mode debug
|
|
||||||
session_id: identifiant de session pour le polling replay
|
|
||||||
"""
|
|
||||||
_setup_logging(verbose)
|
|
||||||
_load_environment()
|
|
||||||
|
|
||||||
# Import PyQt5 ici pour un message d'erreur clair si absent
|
|
||||||
try:
|
|
||||||
from PyQt5.QtWidgets import QApplication
|
|
||||||
from PyQt5.QtCore import Qt
|
|
||||||
except ImportError:
|
|
||||||
logger.error(
|
|
||||||
"PyQt5 n'est pas installe. Installez-le avec :\n"
|
|
||||||
" pip install PyQt5"
|
|
||||||
)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
from .server_client import LeaServerClient
|
|
||||||
from .main_window import LeaMainWindow
|
|
||||||
|
|
||||||
# Creer ou recuperer l'application Qt
|
|
||||||
app = QApplication.instance()
|
|
||||||
if app is None:
|
|
||||||
app = QApplication(sys.argv)
|
|
||||||
app.setQuitOnLastWindowClosed(False)
|
|
||||||
|
|
||||||
# Client serveur
|
|
||||||
client = LeaServerClient(
|
|
||||||
server_host=server_host,
|
|
||||||
chat_port=chat_port,
|
|
||||||
stream_port=stream_port,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Fenetre principale
|
|
||||||
window = LeaMainWindow(server_client=client)
|
|
||||||
window.show()
|
|
||||||
|
|
||||||
# Raccourci global
|
|
||||||
hotkey = _setup_global_hotkey(window)
|
|
||||||
|
|
||||||
# Polling replay (si session_id fourni)
|
|
||||||
if session_id:
|
|
||||||
client.start_polling(session_id)
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
"Panneau Lea demarre — serveur=%s, chat_port=%d, stream_port=%d",
|
|
||||||
client.server_host, chat_port, stream_port,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Boucle Qt
|
|
||||||
try:
|
|
||||||
exit_code = app.exec_()
|
|
||||||
finally:
|
|
||||||
window.shutdown()
|
|
||||||
if hotkey:
|
|
||||||
try:
|
|
||||||
import keyboard
|
|
||||||
keyboard.unhook_all()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
sys.exit(exit_code)
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
|
||||||
"""Point d'entree CLI."""
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
description="Panneau Lea — Interface utilisateur RPA Vision V3",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--server", "-s",
|
|
||||||
dest="server_host",
|
|
||||||
default=None,
|
|
||||||
help="Adresse du serveur Linux (defaut: RPA_SERVER_HOST ou localhost)",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--chat-port",
|
|
||||||
type=int,
|
|
||||||
default=5004,
|
|
||||||
help="Port du serveur chat (defaut: 5004)",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--stream-port",
|
|
||||||
type=int,
|
|
||||||
default=5005,
|
|
||||||
help="Port du serveur streaming (defaut: 5005)",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--session-id",
|
|
||||||
default=None,
|
|
||||||
help="Identifiant de session pour le polling replay",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--verbose", "-v",
|
|
||||||
action="store_true",
|
|
||||||
help="Mode debug (logs verbeux)",
|
|
||||||
)
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
launch_lea(
|
|
||||||
server_host=args.server_host,
|
|
||||||
chat_port=args.chat_port,
|
|
||||||
stream_port=args.stream_port,
|
|
||||||
verbose=args.verbose,
|
|
||||||
session_id=args.session_id,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,772 +0,0 @@
|
|||||||
# agent_v0/lea_ui/main_window.py
|
|
||||||
"""
|
|
||||||
Fenetre principale du panneau Lea.
|
|
||||||
|
|
||||||
Panneau semi-transparent, ancre a droite de l'ecran, toujours visible.
|
|
||||||
Peut etre reduit en mini-barre flottante (avatar + indicateur status).
|
|
||||||
|
|
||||||
Sections :
|
|
||||||
- Header : avatar "L" + status connexion
|
|
||||||
- Zone de chat : messages entrants/sortants (natif PyQt5)
|
|
||||||
- Zone de status : progression du replay
|
|
||||||
- Boutons rapides : "Apprends-moi", "Que sais-tu faire ?"
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import logging
|
|
||||||
from typing import Dict, Any, Optional
|
|
||||||
|
|
||||||
from PyQt5.QtCore import (
|
|
||||||
QPoint,
|
|
||||||
QPropertyAnimation,
|
|
||||||
QRect,
|
|
||||||
QSize,
|
|
||||||
Qt,
|
|
||||||
QTimer,
|
|
||||||
pyqtSignal,
|
|
||||||
pyqtSlot,
|
|
||||||
)
|
|
||||||
from PyQt5.QtGui import (
|
|
||||||
QColor,
|
|
||||||
QFont,
|
|
||||||
QIcon,
|
|
||||||
QKeySequence,
|
|
||||||
QPainter,
|
|
||||||
QPainterPath,
|
|
||||||
QPen,
|
|
||||||
)
|
|
||||||
from PyQt5.QtWidgets import (
|
|
||||||
QAction,
|
|
||||||
QApplication,
|
|
||||||
QDesktopWidget,
|
|
||||||
QFrame,
|
|
||||||
QGraphicsDropShadowEffect,
|
|
||||||
QHBoxLayout,
|
|
||||||
QLabel,
|
|
||||||
QProgressBar,
|
|
||||||
QPushButton,
|
|
||||||
QShortcut,
|
|
||||||
QSizePolicy,
|
|
||||||
QVBoxLayout,
|
|
||||||
QWidget,
|
|
||||||
)
|
|
||||||
|
|
||||||
from . import styles
|
|
||||||
from .chat_widget import ChatWidget
|
|
||||||
from .overlay import OverlayWidget
|
|
||||||
from .server_client import LeaServerClient
|
|
||||||
|
|
||||||
logger = logging.getLogger("lea_ui.main_window")
|
|
||||||
|
|
||||||
|
|
||||||
class LeaAvatar(QWidget):
|
|
||||||
"""Avatar rond avec l'initiale 'L'."""
|
|
||||||
|
|
||||||
def __init__(self, size: int = 40, parent: Optional[QWidget] = None) -> None:
|
|
||||||
super().__init__(parent)
|
|
||||||
self._size = size
|
|
||||||
self._connected = False
|
|
||||||
self.setFixedSize(size, size)
|
|
||||||
|
|
||||||
def set_connected(self, connected: bool) -> None:
|
|
||||||
self._connected = connected
|
|
||||||
self.update()
|
|
||||||
|
|
||||||
def paintEvent(self, event) -> None: # noqa: N802
|
|
||||||
painter = QPainter(self)
|
|
||||||
painter.setRenderHint(QPainter.Antialiasing, True)
|
|
||||||
|
|
||||||
# Cercle de fond
|
|
||||||
painter.setBrush(QColor(styles.COLOR_ACCENT))
|
|
||||||
painter.setPen(Qt.NoPen)
|
|
||||||
painter.drawEllipse(2, 2, self._size - 4, self._size - 4)
|
|
||||||
|
|
||||||
# Initiale "L"
|
|
||||||
painter.setPen(QColor(styles.COLOR_TEXT_ON_ACCENT))
|
|
||||||
font = QFont(styles.FONT_FAMILY, self._size // 3, QFont.Bold)
|
|
||||||
painter.setFont(font)
|
|
||||||
painter.drawText(
|
|
||||||
QRect(0, 0, self._size, self._size),
|
|
||||||
Qt.AlignCenter,
|
|
||||||
"L",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Indicateur de connexion (petit cercle en bas a droite)
|
|
||||||
indicator_size = 12
|
|
||||||
ix = self._size - indicator_size - 1
|
|
||||||
iy = self._size - indicator_size - 1
|
|
||||||
indicator_color = (
|
|
||||||
QColor(styles.COLOR_SUCCESS) if self._connected
|
|
||||||
else QColor(styles.COLOR_ERROR)
|
|
||||||
)
|
|
||||||
painter.setBrush(indicator_color)
|
|
||||||
painter.setPen(QPen(QColor(styles.COLOR_BG), 2))
|
|
||||||
painter.drawEllipse(ix, iy, indicator_size, indicator_size)
|
|
||||||
|
|
||||||
painter.end()
|
|
||||||
|
|
||||||
|
|
||||||
class LeaMainWindow(QWidget):
|
|
||||||
"""Panneau principal de l'interface Lea.
|
|
||||||
|
|
||||||
Fenetre semi-transparente, ancree a droite de l'ecran.
|
|
||||||
Peut basculer en mode mini-barre.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Signal pour les actions de replay a afficher sur l'overlay
|
|
||||||
replay_action_received = pyqtSignal(dict)
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
server_client: Optional[LeaServerClient] = None,
|
|
||||||
parent: Optional[QWidget] = None,
|
|
||||||
) -> None:
|
|
||||||
super().__init__(parent)
|
|
||||||
|
|
||||||
# Client serveur
|
|
||||||
self._client = server_client or LeaServerClient()
|
|
||||||
|
|
||||||
# Overlay de feedback
|
|
||||||
self._overlay = OverlayWidget()
|
|
||||||
|
|
||||||
# Mode courant
|
|
||||||
self._minimized = False
|
|
||||||
|
|
||||||
# Setup
|
|
||||||
self._setup_window()
|
|
||||||
self._setup_ui()
|
|
||||||
self._setup_shortcuts()
|
|
||||||
self._connect_signals()
|
|
||||||
self._start_connection_check()
|
|
||||||
|
|
||||||
# Message d'accueil
|
|
||||||
QTimer.singleShot(500, self._show_welcome)
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Setup
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def _setup_window(self) -> None:
|
|
||||||
"""Configurer les proprietes de la fenetre."""
|
|
||||||
self.setWindowFlags(
|
|
||||||
Qt.WindowStaysOnTopHint
|
|
||||||
| Qt.FramelessWindowHint
|
|
||||||
| Qt.Tool
|
|
||||||
)
|
|
||||||
self.setAttribute(Qt.WA_TranslucentBackground, True)
|
|
||||||
self.setObjectName("LeaMainWindow")
|
|
||||||
|
|
||||||
# Dimensions et position (ancre a droite)
|
|
||||||
self.setFixedWidth(styles.PANEL_WIDTH)
|
|
||||||
self.setMinimumHeight(styles.PANEL_MIN_HEIGHT)
|
|
||||||
self._anchor_to_right()
|
|
||||||
|
|
||||||
# Ombre portee
|
|
||||||
shadow = QGraphicsDropShadowEffect()
|
|
||||||
shadow.setBlurRadius(20)
|
|
||||||
shadow.setColor(QColor(0, 0, 0, 60))
|
|
||||||
shadow.setOffset(0, 4)
|
|
||||||
self.setGraphicsEffect(shadow)
|
|
||||||
|
|
||||||
def _anchor_to_right(self) -> None:
|
|
||||||
"""Positionner le panneau ancre a droite de l'ecran."""
|
|
||||||
desktop = QApplication.desktop()
|
|
||||||
if desktop:
|
|
||||||
screen_rect = desktop.availableGeometry(desktop.primaryScreen())
|
|
||||||
x = screen_rect.right() - styles.PANEL_WIDTH - 10
|
|
||||||
y = screen_rect.top() + 40
|
|
||||||
height = screen_rect.height() - 80
|
|
||||||
self.setGeometry(x, y, styles.PANEL_WIDTH, height)
|
|
||||||
|
|
||||||
def _setup_ui(self) -> None:
|
|
||||||
"""Construire l'interface du panneau."""
|
|
||||||
# Conteneur principal avec fond et coins arrondis
|
|
||||||
self._main_layout = QVBoxLayout(self)
|
|
||||||
self._main_layout.setContentsMargins(0, 0, 0, 0)
|
|
||||||
self._main_layout.setSpacing(0)
|
|
||||||
|
|
||||||
# Widget de fond (pour appliquer le style)
|
|
||||||
self._bg_widget = QWidget()
|
|
||||||
self._bg_widget.setObjectName("LeaPanelBg")
|
|
||||||
self._bg_widget.setStyleSheet(f"""
|
|
||||||
QWidget#LeaPanelBg {{
|
|
||||||
background-color: {styles.COLOR_BG};
|
|
||||||
border-radius: {styles.BORDER_RADIUS}px;
|
|
||||||
border: 1px solid {styles.COLOR_BORDER};
|
|
||||||
}}
|
|
||||||
""")
|
|
||||||
|
|
||||||
bg_layout = QVBoxLayout(self._bg_widget)
|
|
||||||
bg_layout.setContentsMargins(0, 0, 0, 0)
|
|
||||||
bg_layout.setSpacing(0)
|
|
||||||
|
|
||||||
# --- Header ---
|
|
||||||
self._header = self._create_header()
|
|
||||||
bg_layout.addWidget(self._header)
|
|
||||||
|
|
||||||
# --- Chat ---
|
|
||||||
self._chat = ChatWidget()
|
|
||||||
bg_layout.addWidget(self._chat, stretch=1)
|
|
||||||
|
|
||||||
# --- Zone de status replay ---
|
|
||||||
self._status_bar = self._create_status_bar()
|
|
||||||
bg_layout.addWidget(self._status_bar)
|
|
||||||
|
|
||||||
# --- Boutons rapides ---
|
|
||||||
self._quick_buttons = self._create_quick_buttons()
|
|
||||||
bg_layout.addWidget(self._quick_buttons)
|
|
||||||
|
|
||||||
self._main_layout.addWidget(self._bg_widget)
|
|
||||||
|
|
||||||
# --- Mini-barre (cachee par defaut) ---
|
|
||||||
self._mini_bar = self._create_mini_bar()
|
|
||||||
self._mini_bar.hide()
|
|
||||||
self._main_layout.addWidget(self._mini_bar)
|
|
||||||
|
|
||||||
def _create_header(self) -> QWidget:
|
|
||||||
"""Creer le header avec avatar et status."""
|
|
||||||
header = QWidget()
|
|
||||||
header.setObjectName("LeaHeader")
|
|
||||||
header.setStyleSheet(styles.HEADER_STYLE)
|
|
||||||
header.setFixedHeight(60)
|
|
||||||
|
|
||||||
layout = QHBoxLayout(header)
|
|
||||||
layout.setContentsMargins(
|
|
||||||
styles.PADDING, styles.SPACING,
|
|
||||||
styles.PADDING, styles.SPACING,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Avatar
|
|
||||||
self._avatar = LeaAvatar(styles.AVATAR_SIZE)
|
|
||||||
layout.addWidget(self._avatar)
|
|
||||||
|
|
||||||
# Titre + status
|
|
||||||
text_layout = QVBoxLayout()
|
|
||||||
text_layout.setSpacing(2)
|
|
||||||
|
|
||||||
title = QLabel("Lea")
|
|
||||||
title.setObjectName("LeaTitle")
|
|
||||||
title.setStyleSheet(styles.HEADER_STYLE)
|
|
||||||
text_layout.addWidget(title)
|
|
||||||
|
|
||||||
self._status_label = QLabel("Connexion...")
|
|
||||||
self._status_label.setObjectName("LeaStatus")
|
|
||||||
self._status_label.setStyleSheet(styles.HEADER_STYLE)
|
|
||||||
text_layout.addWidget(self._status_label)
|
|
||||||
|
|
||||||
layout.addLayout(text_layout, stretch=1)
|
|
||||||
|
|
||||||
# Bouton reduire
|
|
||||||
minimize_btn = QPushButton("_")
|
|
||||||
minimize_btn.setFixedSize(30, 30)
|
|
||||||
minimize_btn.setCursor(Qt.PointingHandCursor)
|
|
||||||
minimize_btn.setStyleSheet(f"""
|
|
||||||
QPushButton {{
|
|
||||||
background: transparent;
|
|
||||||
color: {styles.COLOR_TEXT_SECONDARY};
|
|
||||||
border: none;
|
|
||||||
border-radius: 15px;
|
|
||||||
font-size: 16px;
|
|
||||||
font-weight: bold;
|
|
||||||
}}
|
|
||||||
QPushButton:hover {{
|
|
||||||
background-color: {styles.COLOR_BORDER};
|
|
||||||
}}
|
|
||||||
""")
|
|
||||||
minimize_btn.clicked.connect(self.toggle_minimize)
|
|
||||||
layout.addWidget(minimize_btn)
|
|
||||||
|
|
||||||
return header
|
|
||||||
|
|
||||||
def _create_status_bar(self) -> QWidget:
|
|
||||||
"""Creer la barre de status du replay."""
|
|
||||||
container = QWidget()
|
|
||||||
container.setFixedHeight(50)
|
|
||||||
layout = QVBoxLayout(container)
|
|
||||||
layout.setContentsMargins(
|
|
||||||
styles.PADDING, styles.SPACING,
|
|
||||||
styles.PADDING, styles.SPACING,
|
|
||||||
)
|
|
||||||
layout.setSpacing(4)
|
|
||||||
|
|
||||||
self._replay_label = QLabel("")
|
|
||||||
self._replay_label.setObjectName("StatusLabel")
|
|
||||||
self._replay_label.setStyleSheet(styles.STATUS_LABEL_STYLE)
|
|
||||||
self._replay_label.hide()
|
|
||||||
layout.addWidget(self._replay_label)
|
|
||||||
|
|
||||||
self._progress_bar = QProgressBar()
|
|
||||||
self._progress_bar.setStyleSheet(styles.PROGRESS_STYLE)
|
|
||||||
self._progress_bar.setTextVisible(False)
|
|
||||||
self._progress_bar.hide()
|
|
||||||
layout.addWidget(self._progress_bar)
|
|
||||||
|
|
||||||
container.hide()
|
|
||||||
self._status_container = container
|
|
||||||
return container
|
|
||||||
|
|
||||||
def _create_quick_buttons(self) -> QWidget:
|
|
||||||
"""Creer les boutons d'action rapide."""
|
|
||||||
container = QWidget()
|
|
||||||
layout = QHBoxLayout(container)
|
|
||||||
layout.setContentsMargins(
|
|
||||||
styles.PADDING, styles.SPACING,
|
|
||||||
styles.PADDING, styles.PADDING,
|
|
||||||
)
|
|
||||||
layout.setSpacing(styles.SPACING)
|
|
||||||
|
|
||||||
btn_learn = QPushButton("Apprends-moi")
|
|
||||||
btn_learn.setObjectName("QuickButton")
|
|
||||||
btn_learn.setStyleSheet(styles.QUICK_BUTTON_STYLE)
|
|
||||||
btn_learn.setCursor(Qt.PointingHandCursor)
|
|
||||||
btn_learn.clicked.connect(self._on_learn_clicked)
|
|
||||||
layout.addWidget(btn_learn)
|
|
||||||
|
|
||||||
btn_list = QPushButton("Que sais-tu faire ?")
|
|
||||||
btn_list.setObjectName("QuickButton")
|
|
||||||
btn_list.setStyleSheet(styles.QUICK_BUTTON_STYLE)
|
|
||||||
btn_list.setCursor(Qt.PointingHandCursor)
|
|
||||||
btn_list.clicked.connect(self._on_list_clicked)
|
|
||||||
layout.addWidget(btn_list)
|
|
||||||
|
|
||||||
return container
|
|
||||||
|
|
||||||
def _create_mini_bar(self) -> QWidget:
|
|
||||||
"""Creer la mini-barre flottante (mode reduit)."""
|
|
||||||
bar = QWidget()
|
|
||||||
bar.setObjectName("MiniBar")
|
|
||||||
bar.setStyleSheet(styles.MINI_BAR_STYLE)
|
|
||||||
bar.setFixedSize(80, 50)
|
|
||||||
|
|
||||||
layout = QHBoxLayout(bar)
|
|
||||||
layout.setContentsMargins(8, 4, 8, 4)
|
|
||||||
|
|
||||||
mini_avatar = LeaAvatar(32)
|
|
||||||
self._mini_avatar = mini_avatar
|
|
||||||
layout.addWidget(mini_avatar)
|
|
||||||
|
|
||||||
expand_btn = QPushButton(">")
|
|
||||||
expand_btn.setFixedSize(24, 24)
|
|
||||||
expand_btn.setCursor(Qt.PointingHandCursor)
|
|
||||||
expand_btn.setStyleSheet(f"""
|
|
||||||
QPushButton {{
|
|
||||||
background: transparent;
|
|
||||||
color: {styles.COLOR_TEXT_SECONDARY};
|
|
||||||
border: none;
|
|
||||||
font-size: 14px;
|
|
||||||
font-weight: bold;
|
|
||||||
}}
|
|
||||||
QPushButton:hover {{
|
|
||||||
color: {styles.COLOR_ACCENT};
|
|
||||||
}}
|
|
||||||
""")
|
|
||||||
expand_btn.clicked.connect(self.toggle_minimize)
|
|
||||||
layout.addWidget(expand_btn)
|
|
||||||
|
|
||||||
return bar
|
|
||||||
|
|
||||||
def _setup_shortcuts(self) -> None:
|
|
||||||
"""Configurer les raccourcis globaux."""
|
|
||||||
# Ctrl+Shift+L pour afficher/cacher
|
|
||||||
# Note : Sur Windows, les raccourcis globaux necessitent
|
|
||||||
# un mecanisme supplementaire (keyboard hook). Ici on utilise
|
|
||||||
# le raccourci local qui fonctionne quand le panneau a le focus.
|
|
||||||
# Un hook global sera ajoute dans le launcher.
|
|
||||||
shortcut = QShortcut(QKeySequence("Ctrl+Shift+L"), self)
|
|
||||||
shortcut.activated.connect(self.toggle_visibility)
|
|
||||||
|
|
||||||
def _connect_signals(self) -> None:
|
|
||||||
"""Connecter les signaux internes."""
|
|
||||||
# Chat
|
|
||||||
self._chat.message_sent.connect(self._on_message_sent)
|
|
||||||
|
|
||||||
# Client serveur
|
|
||||||
self._client.set_on_connection_change(self._on_connection_changed)
|
|
||||||
self._client.set_on_replay_action(self._on_replay_action)
|
|
||||||
|
|
||||||
# Overlay
|
|
||||||
self._overlay.action_display_finished.connect(self._on_overlay_finished)
|
|
||||||
|
|
||||||
# Replay via signal (thread-safe)
|
|
||||||
self.replay_action_received.connect(self._handle_replay_action)
|
|
||||||
|
|
||||||
def _start_connection_check(self) -> None:
|
|
||||||
"""Demarrer le timer de verification de connexion."""
|
|
||||||
self._conn_timer = QTimer(self)
|
|
||||||
self._conn_timer.timeout.connect(self._check_connection)
|
|
||||||
self._conn_timer.start(10000) # Toutes les 10 secondes
|
|
||||||
# Premiere verification immediatement
|
|
||||||
QTimer.singleShot(1000, self._check_connection)
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Actions
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def _show_welcome(self) -> None:
|
|
||||||
"""Afficher le message d'accueil."""
|
|
||||||
self._chat.add_lea_message(
|
|
||||||
"Bonjour ! Je suis <b>Lea</b>, votre assistante RPA.<br>"
|
|
||||||
"Je peux apprendre vos taches, les rejouer, "
|
|
||||||
"et vous montrer ce que je fais.<br><br>"
|
|
||||||
"Que souhaitez-vous faire ?"
|
|
||||||
)
|
|
||||||
|
|
||||||
@pyqtSlot(str)
|
|
||||||
def _on_message_sent(self, message: str) -> None:
|
|
||||||
"""Traiter un message envoye par l'utilisateur."""
|
|
||||||
self._chat.set_input_enabled(False)
|
|
||||||
|
|
||||||
# Envoyer au serveur dans un timer pour ne pas bloquer
|
|
||||||
QTimer.singleShot(100, lambda: self._send_to_server(message))
|
|
||||||
|
|
||||||
def _send_to_server(self, message: str) -> None:
|
|
||||||
"""Envoyer le message au serveur et afficher la reponse."""
|
|
||||||
response = self._client.send_chat_message(message)
|
|
||||||
|
|
||||||
if response is None:
|
|
||||||
self._chat.add_lea_message(
|
|
||||||
"Je n'arrive pas a joindre le serveur. "
|
|
||||||
"Verifiez que le serveur Linux est demarre."
|
|
||||||
)
|
|
||||||
elif "error" in response:
|
|
||||||
self._chat.add_lea_message(
|
|
||||||
f"Erreur : {response['error']}"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# Extraire la reponse textuelle
|
|
||||||
reply_text = response.get("response", "")
|
|
||||||
if not reply_text:
|
|
||||||
# Construire une reponse a partir des donnees structurees
|
|
||||||
reply_text = self._format_response(response)
|
|
||||||
|
|
||||||
self._chat.add_lea_message(reply_text)
|
|
||||||
|
|
||||||
# Si un workflow a ete lance, mettre a jour la status bar
|
|
||||||
if response.get("success") and response.get("workflow"):
|
|
||||||
self._show_replay_status(
|
|
||||||
f"Execution : {response['workflow']}",
|
|
||||||
0, 1,
|
|
||||||
)
|
|
||||||
|
|
||||||
self._chat.set_input_enabled(True)
|
|
||||||
|
|
||||||
def _format_response(self, data: Dict[str, Any]) -> str:
|
|
||||||
"""Formater une reponse structuree du serveur en texte lisible."""
|
|
||||||
# Reponse de confirmation
|
|
||||||
if data.get("needs_confirmation"):
|
|
||||||
conf = data.get("confirmation", {})
|
|
||||||
return (
|
|
||||||
f"Voulez-vous que j'execute <b>{conf.get('workflow_name', '?')}</b> ?<br>"
|
|
||||||
f"Risque : {conf.get('risk_level', 'normal')}<br>"
|
|
||||||
"Repondez <b>oui</b> ou <b>non</b>."
|
|
||||||
)
|
|
||||||
|
|
||||||
# Liste de workflows
|
|
||||||
if "workflows" in data:
|
|
||||||
workflows = data["workflows"]
|
|
||||||
if not workflows:
|
|
||||||
return "Je ne connais aucun workflow pour le moment."
|
|
||||||
items = []
|
|
||||||
for wf in workflows[:10]:
|
|
||||||
name = wf.get("name", wf.get("id", "?"))
|
|
||||||
desc = wf.get("description", "")
|
|
||||||
items.append(f"- <b>{name}</b>{': ' + desc if desc else ''}")
|
|
||||||
result = "Voici ce que je sais faire :<br>" + "<br>".join(items)
|
|
||||||
if len(workflows) > 10:
|
|
||||||
result += f"<br><i>... et {len(workflows) - 10} autres</i>"
|
|
||||||
return result
|
|
||||||
|
|
||||||
# Workflow non trouve
|
|
||||||
if data.get("not_found"):
|
|
||||||
return (
|
|
||||||
f"Je ne trouve pas de workflow correspondant a "
|
|
||||||
f"'{data.get('query', '?')}'.<br>"
|
|
||||||
"Essayez 'Que sais-tu faire ?' pour voir la liste."
|
|
||||||
)
|
|
||||||
|
|
||||||
# Execution reussie
|
|
||||||
if data.get("success"):
|
|
||||||
return (
|
|
||||||
f"C'est parti ! J'execute <b>{data.get('workflow', '?')}</b>.<br>"
|
|
||||||
"Regardez l'ecran, je vais vous montrer ce que je fais."
|
|
||||||
)
|
|
||||||
|
|
||||||
# Confirmation/refus
|
|
||||||
if data.get("confirmed"):
|
|
||||||
return f"D'accord, je lance <b>{data.get('workflow', '?')}</b> !"
|
|
||||||
if data.get("denied"):
|
|
||||||
return "Pas de probleme, j'annule."
|
|
||||||
|
|
||||||
# Fallback
|
|
||||||
return str(data)
|
|
||||||
|
|
||||||
def _on_learn_clicked(self) -> None:
|
|
||||||
"""Action du bouton 'Apprends-moi'."""
|
|
||||||
self._chat.add_user_message("Apprends-moi une nouvelle tache")
|
|
||||||
self._chat.add_lea_message(
|
|
||||||
"D'accord ! Pour m'apprendre une tache :<br>"
|
|
||||||
"1. Cliquez sur <b>Demarrer</b> dans le tray Agent V1<br>"
|
|
||||||
"2. Effectuez votre tache normalement<br>"
|
|
||||||
"3. Cliquez sur <b>Terminer</b> quand c'est fini<br><br>"
|
|
||||||
"Je vais observer et apprendre automatiquement."
|
|
||||||
)
|
|
||||||
|
|
||||||
def _on_list_clicked(self) -> None:
|
|
||||||
"""Action du bouton 'Que sais-tu faire ?'."""
|
|
||||||
self._chat.add_user_message("Que sais-tu faire ?")
|
|
||||||
self._chat.set_input_enabled(False)
|
|
||||||
QTimer.singleShot(100, self._fetch_workflows)
|
|
||||||
|
|
||||||
def _fetch_workflows(self) -> None:
|
|
||||||
"""Recuperer et afficher la liste des workflows."""
|
|
||||||
workflows = self._client.list_workflows()
|
|
||||||
if workflows:
|
|
||||||
items = []
|
|
||||||
for wf in workflows[:15]:
|
|
||||||
name = wf.get("name", wf.get("id", "?"))
|
|
||||||
desc = wf.get("description", "")
|
|
||||||
items.append(f"- <b>{name}</b>{': ' + desc if desc else ''}")
|
|
||||||
text = "Voici les workflows que je connais :<br>" + "<br>".join(items)
|
|
||||||
if len(workflows) > 15:
|
|
||||||
text += f"<br><i>... et {len(workflows) - 15} autres</i>"
|
|
||||||
else:
|
|
||||||
text = (
|
|
||||||
"Je ne connais aucun workflow pour le moment.<br>"
|
|
||||||
"Apprenez-moi une tache avec le bouton 'Apprends-moi' !"
|
|
||||||
)
|
|
||||||
self._chat.add_lea_message(text)
|
|
||||||
self._chat.set_input_enabled(True)
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Connexion
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def _check_connection(self) -> None:
|
|
||||||
"""Verifier la connexion au serveur (dans un timer)."""
|
|
||||||
connected = self._client.check_connection()
|
|
||||||
self._update_connection_ui(connected)
|
|
||||||
|
|
||||||
def _on_connection_changed(self, connected: bool) -> None:
|
|
||||||
"""Callback quand l'etat de connexion change."""
|
|
||||||
# Appeler dans le thread principal via QTimer
|
|
||||||
QTimer.singleShot(0, lambda: self._update_connection_ui(connected))
|
|
||||||
|
|
||||||
def _update_connection_ui(self, connected: bool) -> None:
|
|
||||||
"""Mettre a jour l'UI selon l'etat de connexion."""
|
|
||||||
self._avatar.set_connected(connected)
|
|
||||||
if hasattr(self, '_mini_avatar'):
|
|
||||||
self._mini_avatar.set_connected(connected)
|
|
||||||
|
|
||||||
if connected:
|
|
||||||
self._status_label.setText(
|
|
||||||
f"Connecte a {self._client.server_host}"
|
|
||||||
)
|
|
||||||
self._status_label.setStyleSheet(
|
|
||||||
f"color: {styles.COLOR_SUCCESS}; "
|
|
||||||
f"font-family: '{styles.FONT_FAMILY}'; "
|
|
||||||
f"font-size: {styles.FONT_SIZE_SMALL}px; "
|
|
||||||
f"background: transparent; border: none;"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
error = self._client.last_error or "Serveur injoignable"
|
|
||||||
self._status_label.setText(f"Deconnecte ({error[:30]})")
|
|
||||||
self._status_label.setStyleSheet(
|
|
||||||
f"color: {styles.COLOR_ERROR}; "
|
|
||||||
f"font-family: '{styles.FONT_FAMILY}'; "
|
|
||||||
f"font-size: {styles.FONT_SIZE_SMALL}px; "
|
|
||||||
f"background: transparent; border: none;"
|
|
||||||
)
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Replay & Overlay
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def _on_replay_action(self, action: Dict[str, Any]) -> None:
|
|
||||||
"""Callback appelee depuis le thread de polling (pas thread-safe).
|
|
||||||
|
|
||||||
Emettre un signal pour traiter dans le thread Qt.
|
|
||||||
"""
|
|
||||||
self.replay_action_received.emit(action)
|
|
||||||
|
|
||||||
@pyqtSlot(dict)
|
|
||||||
def _handle_replay_action(self, action: Dict[str, Any]) -> None:
|
|
||||||
"""Traiter une action de replay dans le thread Qt.
|
|
||||||
|
|
||||||
Afficher l'overlay AVANT l'execution pour que l'utilisateur
|
|
||||||
voie ce qui va se passer.
|
|
||||||
"""
|
|
||||||
action_type = action.get("type", "?")
|
|
||||||
action_text = self._describe_action(action)
|
|
||||||
|
|
||||||
# Calculer les coordonnees ecran
|
|
||||||
desktop = QApplication.desktop()
|
|
||||||
screen = desktop.screenGeometry(desktop.primaryScreen()) if desktop else None
|
|
||||||
if screen:
|
|
||||||
sw, sh = screen.width(), screen.height()
|
|
||||||
else:
|
|
||||||
sw, sh = 1920, 1080
|
|
||||||
|
|
||||||
target_x = int(action.get("x_pct", 0.5) * sw)
|
|
||||||
target_y = int(action.get("y_pct", 0.5) * sh)
|
|
||||||
|
|
||||||
# Recuperer la progression depuis le replay status
|
|
||||||
replay = self._client.get_replay_status()
|
|
||||||
step_current = 0
|
|
||||||
step_total = 0
|
|
||||||
if replay:
|
|
||||||
step_total = replay.get("total_actions", 0)
|
|
||||||
step_current = replay.get("completed_actions", 0) + 1
|
|
||||||
|
|
||||||
# Mettre a jour la status bar
|
|
||||||
self._show_replay_status(action_text, step_current, step_total)
|
|
||||||
|
|
||||||
# Afficher l'overlay
|
|
||||||
self._overlay.show_action(
|
|
||||||
target_x, target_y,
|
|
||||||
action_text,
|
|
||||||
step_current, step_total,
|
|
||||||
duration_ms=1500,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Ajouter dans le chat
|
|
||||||
self._chat.add_system_message(
|
|
||||||
f"Etape {step_current}/{step_total} : {action_text}"
|
|
||||||
)
|
|
||||||
|
|
||||||
def _describe_action(self, action: Dict[str, Any]) -> str:
|
|
||||||
"""Generer une description lisible d'une action de replay."""
|
|
||||||
action_type = action.get("type", "?")
|
|
||||||
target_text = action.get("target_text", "")
|
|
||||||
target_role = action.get("target_role", "")
|
|
||||||
|
|
||||||
if action_type == "click":
|
|
||||||
target = target_text or target_role or "cet element"
|
|
||||||
return f"Je clique sur [{target}]"
|
|
||||||
elif action_type == "type":
|
|
||||||
text = action.get("text", "")
|
|
||||||
preview = text[:30] + "..." if len(text) > 30 else text
|
|
||||||
return f"Je tape : {preview}"
|
|
||||||
elif action_type == "key_combo":
|
|
||||||
keys = action.get("keys", [])
|
|
||||||
return f"Je tape : {'+'.join(keys)}"
|
|
||||||
elif action_type == "scroll":
|
|
||||||
return "Je fais defiler la page"
|
|
||||||
elif action_type == "wait":
|
|
||||||
ms = action.get("duration_ms", 500)
|
|
||||||
return f"J'attends {ms}ms"
|
|
||||||
else:
|
|
||||||
return f"Action : {action_type}"
|
|
||||||
|
|
||||||
def _on_overlay_finished(self) -> None:
|
|
||||||
"""Callback quand l'overlay a fini d'afficher une action."""
|
|
||||||
pass # L'executor continue de son cote
|
|
||||||
|
|
||||||
def _show_replay_status(
|
|
||||||
self, text: str, current: int, total: int,
|
|
||||||
) -> None:
|
|
||||||
"""Afficher la barre de progression du replay."""
|
|
||||||
self._status_container.show()
|
|
||||||
self._replay_label.show()
|
|
||||||
self._replay_label.setText(text)
|
|
||||||
|
|
||||||
if total > 0:
|
|
||||||
self._progress_bar.show()
|
|
||||||
self._progress_bar.setMaximum(total)
|
|
||||||
self._progress_bar.setValue(current)
|
|
||||||
else:
|
|
||||||
self._progress_bar.hide()
|
|
||||||
|
|
||||||
def hide_replay_status(self) -> None:
|
|
||||||
"""Masquer la barre de progression du replay."""
|
|
||||||
self._status_container.hide()
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Visibilite
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def toggle_visibility(self) -> None:
|
|
||||||
"""Afficher/cacher le panneau (raccourci Ctrl+Shift+L)."""
|
|
||||||
if self.isVisible():
|
|
||||||
self.hide()
|
|
||||||
else:
|
|
||||||
self.show()
|
|
||||||
self.raise_()
|
|
||||||
self.activateWindow()
|
|
||||||
|
|
||||||
def toggle_minimize(self) -> None:
|
|
||||||
"""Basculer entre panneau complet et mini-barre."""
|
|
||||||
if self._minimized:
|
|
||||||
# Restaurer
|
|
||||||
self._mini_bar.hide()
|
|
||||||
self._bg_widget.show()
|
|
||||||
self._minimized = False
|
|
||||||
self._anchor_to_right()
|
|
||||||
else:
|
|
||||||
# Reduire
|
|
||||||
self._bg_widget.hide()
|
|
||||||
self._mini_bar.show()
|
|
||||||
self._minimized = True
|
|
||||||
# Positionner la mini-barre en haut a droite
|
|
||||||
desktop = QApplication.desktop()
|
|
||||||
if desktop:
|
|
||||||
screen = desktop.availableGeometry(desktop.primaryScreen())
|
|
||||||
x = screen.right() - 90
|
|
||||||
y = screen.top() + 10
|
|
||||||
self.setGeometry(x, y, 80, 50)
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Drag (deplacer la fenetre sans barre de titre)
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def mousePressEvent(self, event) -> None: # noqa: N802
|
|
||||||
if event.button() == Qt.LeftButton:
|
|
||||||
self._drag_pos = event.globalPos() - self.frameGeometry().topLeft()
|
|
||||||
event.accept()
|
|
||||||
|
|
||||||
def mouseMoveEvent(self, event) -> None: # noqa: N802
|
|
||||||
if event.buttons() == Qt.LeftButton and hasattr(self, '_drag_pos'):
|
|
||||||
self.move(event.globalPos() - self._drag_pos)
|
|
||||||
event.accept()
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Painting (fond arrondi semi-transparent)
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def paintEvent(self, event) -> None: # noqa: N802
|
|
||||||
"""Peindre le fond semi-transparent avec coins arrondis."""
|
|
||||||
painter = QPainter(self)
|
|
||||||
painter.setRenderHint(QPainter.Antialiasing, True)
|
|
||||||
|
|
||||||
path = QPainterPath()
|
|
||||||
path.addRoundedRect(
|
|
||||||
0, 0, self.width(), self.height(),
|
|
||||||
styles.BORDER_RADIUS, styles.BORDER_RADIUS,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Fond semi-transparent
|
|
||||||
bg = QColor(styles.COLOR_BG)
|
|
||||||
bg.setAlpha(245) # Legerement transparent
|
|
||||||
painter.fillPath(path, bg)
|
|
||||||
|
|
||||||
# Bordure
|
|
||||||
painter.setPen(QPen(QColor(styles.COLOR_BORDER), 1))
|
|
||||||
painter.drawPath(path)
|
|
||||||
|
|
||||||
painter.end()
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Lifecycle
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def closeEvent(self, event) -> None: # noqa: N802
|
|
||||||
"""Ne pas fermer, juste cacher."""
|
|
||||||
event.ignore()
|
|
||||||
self.hide()
|
|
||||||
|
|
||||||
def shutdown(self) -> None:
|
|
||||||
"""Arret propre."""
|
|
||||||
self._conn_timer.stop()
|
|
||||||
self._overlay.hide_overlay()
|
|
||||||
self._client.shutdown()
|
|
||||||
logger.info("LeaMainWindow arretee")
|
|
||||||
@@ -1,354 +0,0 @@
|
|||||||
# agent_v0/lea_ui/overlay.py
|
|
||||||
"""
|
|
||||||
Overlay de feedback visuel pour le replay.
|
|
||||||
|
|
||||||
Fenetre transparente plein ecran, click-through, qui affiche :
|
|
||||||
- Cercle rouge pulsant autour de la cible du clic
|
|
||||||
- Texte descriptif de l'action en cours
|
|
||||||
- Fleche pointant vers la cible
|
|
||||||
- Barre de progression etape X/Y
|
|
||||||
|
|
||||||
Le overlay ne capture JAMAIS les clics (Qt.WA_TransparentForMouseEvents).
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import math
|
|
||||||
from typing import Optional, Tuple
|
|
||||||
|
|
||||||
from PyQt5.QtCore import (
|
|
||||||
QPoint,
|
|
||||||
QPropertyAnimation,
|
|
||||||
QRect,
|
|
||||||
QRectF,
|
|
||||||
QSize,
|
|
||||||
Qt,
|
|
||||||
QTimer,
|
|
||||||
pyqtProperty,
|
|
||||||
pyqtSignal,
|
|
||||||
)
|
|
||||||
from PyQt5.QtGui import (
|
|
||||||
QBrush,
|
|
||||||
QColor,
|
|
||||||
QFont,
|
|
||||||
QFontMetrics,
|
|
||||||
QPainter,
|
|
||||||
QPainterPath,
|
|
||||||
QPen,
|
|
||||||
QPolygonF,
|
|
||||||
)
|
|
||||||
from PyQt5.QtWidgets import QApplication, QDesktopWidget, QWidget
|
|
||||||
|
|
||||||
from . import styles
|
|
||||||
|
|
||||||
logger = logging.getLogger("lea_ui.overlay")
|
|
||||||
|
|
||||||
|
|
||||||
class OverlayWidget(QWidget):
|
|
||||||
"""Overlay plein ecran transparent pour le feedback visuel du replay.
|
|
||||||
|
|
||||||
Flags critiques :
|
|
||||||
- WindowStaysOnTopHint : toujours au-dessus
|
|
||||||
- FramelessWindowHint : pas de decoration
|
|
||||||
- Tool : n'apparait pas dans la barre des taches
|
|
||||||
- WA_TranslucentBackground : fond transparent
|
|
||||||
- WA_TransparentForMouseEvents : CLICK-THROUGH COMPLET
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Signal emis quand l'animation d'une action est terminee
|
|
||||||
action_display_finished = pyqtSignal()
|
|
||||||
|
|
||||||
def __init__(self, parent: Optional[QWidget] = None) -> None:
|
|
||||||
super().__init__(parent)
|
|
||||||
|
|
||||||
# Flags de fenetre pour click-through complet
|
|
||||||
self.setWindowFlags(
|
|
||||||
Qt.WindowStaysOnTopHint
|
|
||||||
| Qt.FramelessWindowHint
|
|
||||||
| Qt.Tool
|
|
||||||
)
|
|
||||||
self.setAttribute(Qt.WA_TranslucentBackground, True)
|
|
||||||
self.setAttribute(Qt.WA_TransparentForMouseEvents, True)
|
|
||||||
|
|
||||||
# Etat de l'affichage
|
|
||||||
self._target_pos: Optional[Tuple[int, int]] = None
|
|
||||||
self._action_text: str = ""
|
|
||||||
self._progress_current: int = 0
|
|
||||||
self._progress_total: int = 0
|
|
||||||
self._action_done: bool = False
|
|
||||||
self._visible = False
|
|
||||||
|
|
||||||
# Animation du cercle pulsant
|
|
||||||
self._pulse_radius: float = 30.0
|
|
||||||
self._pulse_growing = True
|
|
||||||
self._pulse_opacity: float = 0.8
|
|
||||||
|
|
||||||
# Timer d'animation
|
|
||||||
self._anim_timer = QTimer(self)
|
|
||||||
self._anim_timer.timeout.connect(self._animate_pulse)
|
|
||||||
self._anim_timer.setInterval(30) # ~33 FPS
|
|
||||||
|
|
||||||
# Timer d'effacement automatique
|
|
||||||
self._fade_timer = QTimer(self)
|
|
||||||
self._fade_timer.setSingleShot(True)
|
|
||||||
self._fade_timer.timeout.connect(self._on_fade)
|
|
||||||
|
|
||||||
# Couvrir tout l'ecran
|
|
||||||
self._update_geometry()
|
|
||||||
|
|
||||||
def _update_geometry(self) -> None:
|
|
||||||
"""Positionner l'overlay sur tout l'ecran principal."""
|
|
||||||
desktop = QApplication.desktop()
|
|
||||||
if desktop:
|
|
||||||
screen_rect = desktop.screenGeometry(desktop.primaryScreen())
|
|
||||||
self.setGeometry(screen_rect)
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# API publique
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def show_action(
|
|
||||||
self,
|
|
||||||
target_x: int,
|
|
||||||
target_y: int,
|
|
||||||
text: str,
|
|
||||||
step_current: int = 0,
|
|
||||||
step_total: int = 0,
|
|
||||||
duration_ms: int = 1500,
|
|
||||||
) -> None:
|
|
||||||
"""Afficher le feedback pour une action de replay.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
target_x: position X du clic cible (pixels ecran)
|
|
||||||
target_y: position Y du clic cible (pixels ecran)
|
|
||||||
text: description de l'action (ex: "Je clique sur [Valider]")
|
|
||||||
step_current: etape courante (1-indexed)
|
|
||||||
step_total: nombre total d'etapes
|
|
||||||
duration_ms: duree d'affichage en ms (defaut 1500ms)
|
|
||||||
"""
|
|
||||||
self._target_pos = (target_x, target_y)
|
|
||||||
self._action_text = text
|
|
||||||
self._progress_current = step_current
|
|
||||||
self._progress_total = step_total
|
|
||||||
self._action_done = False
|
|
||||||
self._pulse_radius = 30.0
|
|
||||||
self._pulse_opacity = 0.8
|
|
||||||
self._visible = True
|
|
||||||
|
|
||||||
self._update_geometry()
|
|
||||||
self.show()
|
|
||||||
self.raise_()
|
|
||||||
self._anim_timer.start()
|
|
||||||
|
|
||||||
# Programmer l'effacement
|
|
||||||
self._fade_timer.start(duration_ms)
|
|
||||||
self.update()
|
|
||||||
|
|
||||||
def show_done(self, text: Optional[str] = None) -> None:
|
|
||||||
"""Marquer l'action courante comme terminee (coche verte)."""
|
|
||||||
self._action_done = True
|
|
||||||
if text:
|
|
||||||
self._action_text = text
|
|
||||||
self.update()
|
|
||||||
|
|
||||||
# Effacer apres 800ms
|
|
||||||
self._fade_timer.start(800)
|
|
||||||
|
|
||||||
def hide_overlay(self) -> None:
|
|
||||||
"""Masquer immediatement l'overlay."""
|
|
||||||
self._anim_timer.stop()
|
|
||||||
self._fade_timer.stop()
|
|
||||||
self._visible = False
|
|
||||||
self._target_pos = None
|
|
||||||
self.hide()
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Animations
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def _animate_pulse(self) -> None:
|
|
||||||
"""Animer le cercle pulsant."""
|
|
||||||
if self._action_done:
|
|
||||||
# Pas d'animation en mode "done"
|
|
||||||
return
|
|
||||||
|
|
||||||
pulse_speed = 0.8
|
|
||||||
if self._pulse_growing:
|
|
||||||
self._pulse_radius += pulse_speed
|
|
||||||
if self._pulse_radius >= 45.0:
|
|
||||||
self._pulse_growing = False
|
|
||||||
else:
|
|
||||||
self._pulse_radius -= pulse_speed
|
|
||||||
if self._pulse_radius <= 25.0:
|
|
||||||
self._pulse_growing = True
|
|
||||||
|
|
||||||
# Opacite qui suit le pulse
|
|
||||||
self._pulse_opacity = 0.5 + 0.3 * (
|
|
||||||
(self._pulse_radius - 25.0) / 20.0
|
|
||||||
)
|
|
||||||
|
|
||||||
self.update()
|
|
||||||
|
|
||||||
def _on_fade(self) -> None:
|
|
||||||
"""Callback apres le timer d'effacement."""
|
|
||||||
self._anim_timer.stop()
|
|
||||||
self._visible = False
|
|
||||||
self._target_pos = None
|
|
||||||
self.hide()
|
|
||||||
self.action_display_finished.emit()
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Rendu
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def paintEvent(self, event) -> None: # noqa: N802
|
|
||||||
"""Dessiner l'overlay."""
|
|
||||||
if not self._visible or not self._target_pos:
|
|
||||||
return
|
|
||||||
|
|
||||||
painter = QPainter(self)
|
|
||||||
painter.setRenderHint(QPainter.Antialiasing, True)
|
|
||||||
|
|
||||||
tx, ty = self._target_pos
|
|
||||||
|
|
||||||
if self._action_done:
|
|
||||||
self._draw_done_indicator(painter, tx, ty)
|
|
||||||
else:
|
|
||||||
self._draw_pulse_circle(painter, tx, ty)
|
|
||||||
self._draw_arrow(painter, tx, ty)
|
|
||||||
|
|
||||||
self._draw_action_text(painter, tx, ty)
|
|
||||||
self._draw_progress_bar(painter)
|
|
||||||
|
|
||||||
painter.end()
|
|
||||||
|
|
||||||
def _draw_pulse_circle(self, painter: QPainter, cx: int, cy: int) -> None:
|
|
||||||
"""Dessiner le cercle rouge pulsant autour de la cible."""
|
|
||||||
# Cercle exterieur (pulsant, semi-transparent)
|
|
||||||
color = QColor(styles.COLOR_OVERLAY_PULSE)
|
|
||||||
color.setAlphaF(self._pulse_opacity * 0.4)
|
|
||||||
painter.setBrush(QBrush(color))
|
|
||||||
painter.setPen(Qt.NoPen)
|
|
||||||
painter.drawEllipse(
|
|
||||||
QPoint(cx, cy),
|
|
||||||
int(self._pulse_radius),
|
|
||||||
int(self._pulse_radius),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Cercle interieur (fixe, plus opaque)
|
|
||||||
color_inner = QColor(styles.COLOR_OVERLAY_PULSE)
|
|
||||||
color_inner.setAlphaF(0.7)
|
|
||||||
pen = QPen(color_inner, 3)
|
|
||||||
painter.setPen(pen)
|
|
||||||
painter.setBrush(Qt.NoBrush)
|
|
||||||
painter.drawEllipse(QPoint(cx, cy), 20, 20)
|
|
||||||
|
|
||||||
# Point central
|
|
||||||
painter.setPen(Qt.NoPen)
|
|
||||||
painter.setBrush(QBrush(QColor(styles.COLOR_OVERLAY_PULSE)))
|
|
||||||
painter.drawEllipse(QPoint(cx, cy), 4, 4)
|
|
||||||
|
|
||||||
def _draw_done_indicator(self, painter: QPainter, cx: int, cy: int) -> None:
|
|
||||||
"""Dessiner l'indicateur de succes (cercle vert + coche)."""
|
|
||||||
# Cercle vert
|
|
||||||
color = QColor(styles.COLOR_SUCCESS)
|
|
||||||
color.setAlphaF(0.8)
|
|
||||||
painter.setBrush(QBrush(color))
|
|
||||||
painter.setPen(Qt.NoPen)
|
|
||||||
painter.drawEllipse(QPoint(cx, cy), 25, 25)
|
|
||||||
|
|
||||||
# Coche blanche
|
|
||||||
pen = QPen(QColor(styles.COLOR_TEXT_ON_ACCENT), 3)
|
|
||||||
pen.setCapStyle(Qt.RoundCap)
|
|
||||||
pen.setJoinStyle(Qt.RoundJoin)
|
|
||||||
painter.setPen(pen)
|
|
||||||
painter.setBrush(Qt.NoBrush)
|
|
||||||
|
|
||||||
path = QPainterPath()
|
|
||||||
path.moveTo(cx - 10, cy)
|
|
||||||
path.lineTo(cx - 3, cy + 8)
|
|
||||||
path.lineTo(cx + 12, cy - 8)
|
|
||||||
painter.drawPath(path)
|
|
||||||
|
|
||||||
def _draw_arrow(self, painter: QPainter, tx: int, ty: int) -> None:
|
|
||||||
"""Dessiner une fleche pointant vers la cible depuis le texte."""
|
|
||||||
# Position du texte (au-dessus ou en dessous selon l'espace)
|
|
||||||
text_y = ty - 80 if ty > 120 else ty + 80
|
|
||||||
text_x = max(100, min(tx, self.width() - 200))
|
|
||||||
|
|
||||||
# Ligne de la fleche
|
|
||||||
color = QColor(styles.COLOR_OVERLAY_PULSE)
|
|
||||||
color.setAlphaF(0.6)
|
|
||||||
pen = QPen(color, 2, Qt.DashLine)
|
|
||||||
painter.setPen(pen)
|
|
||||||
painter.drawLine(text_x, text_y + (15 if text_y < ty else -15), tx, ty)
|
|
||||||
|
|
||||||
def _draw_action_text(self, painter: QPainter, tx: int, ty: int) -> None:
|
|
||||||
"""Dessiner le texte descriptif de l'action."""
|
|
||||||
if not self._action_text:
|
|
||||||
return
|
|
||||||
|
|
||||||
# Positionner le texte au-dessus ou en dessous de la cible
|
|
||||||
text_y = ty - 90 if ty > 140 else ty + 70
|
|
||||||
|
|
||||||
font = QFont(styles.FONT_FAMILY, styles.FONT_SIZE_LARGE, QFont.Bold)
|
|
||||||
painter.setFont(font)
|
|
||||||
metrics = QFontMetrics(font)
|
|
||||||
|
|
||||||
# Mesurer le texte
|
|
||||||
text_rect = metrics.boundingRect(self._action_text)
|
|
||||||
text_width = text_rect.width() + 30
|
|
||||||
text_height = text_rect.height() + 16
|
|
||||||
|
|
||||||
# Centrer horizontalement sur la cible (avec limites d'ecran)
|
|
||||||
box_x = max(10, min(tx - text_width // 2, self.width() - text_width - 10))
|
|
||||||
box_y = text_y - text_height // 2
|
|
||||||
|
|
||||||
# Fond semi-transparent arrondi
|
|
||||||
bg_color = QColor(31, 41, 55, 200) # Gris fonce semi-transparent
|
|
||||||
painter.setBrush(QBrush(bg_color))
|
|
||||||
painter.setPen(Qt.NoPen)
|
|
||||||
painter.drawRoundedRect(box_x, box_y, text_width, text_height, 8, 8)
|
|
||||||
|
|
||||||
# Texte blanc
|
|
||||||
painter.setPen(QPen(QColor(styles.COLOR_OVERLAY_TEXT)))
|
|
||||||
painter.drawText(
|
|
||||||
QRect(box_x, box_y, text_width, text_height),
|
|
||||||
Qt.AlignCenter,
|
|
||||||
self._action_text,
|
|
||||||
)
|
|
||||||
|
|
||||||
def _draw_progress_bar(self, painter: QPainter) -> None:
|
|
||||||
"""Dessiner la barre de progression en bas de l'ecran."""
|
|
||||||
if self._progress_total <= 0:
|
|
||||||
return
|
|
||||||
|
|
||||||
bar_width = 300
|
|
||||||
bar_height = 6
|
|
||||||
bar_x = (self.width() - bar_width) // 2
|
|
||||||
bar_y = self.height() - 50
|
|
||||||
|
|
||||||
# Fond
|
|
||||||
bg_color = QColor(255, 255, 255, 80)
|
|
||||||
painter.setBrush(QBrush(bg_color))
|
|
||||||
painter.setPen(Qt.NoPen)
|
|
||||||
painter.drawRoundedRect(bar_x, bar_y, bar_width, bar_height, 3, 3)
|
|
||||||
|
|
||||||
# Progression
|
|
||||||
progress_pct = self._progress_current / self._progress_total
|
|
||||||
fill_width = int(bar_width * progress_pct)
|
|
||||||
accent_color = QColor(styles.COLOR_ACCENT)
|
|
||||||
accent_color.setAlphaF(0.9)
|
|
||||||
painter.setBrush(QBrush(accent_color))
|
|
||||||
painter.drawRoundedRect(bar_x, bar_y, fill_width, bar_height, 3, 3)
|
|
||||||
|
|
||||||
# Label "Etape X/Y"
|
|
||||||
label_font = QFont(styles.FONT_FAMILY, styles.FONT_SIZE_SMALL)
|
|
||||||
painter.setFont(label_font)
|
|
||||||
painter.setPen(QPen(QColor(255, 255, 255, 200)))
|
|
||||||
painter.drawText(
|
|
||||||
QRect(bar_x, bar_y + bar_height + 4, bar_width, 20),
|
|
||||||
Qt.AlignCenter,
|
|
||||||
f"Etape {self._progress_current}/{self._progress_total}",
|
|
||||||
)
|
|
||||||
@@ -1,191 +0,0 @@
|
|||||||
# agent_v0/lea_ui/replay_integration.py
|
|
||||||
"""
|
|
||||||
Integration du feedback visuel (overlay) dans la boucle de replay de l'Agent V1.
|
|
||||||
|
|
||||||
Ce module fournit un wrapper autour de ActionExecutorV1.execute_replay_action
|
|
||||||
qui affiche l'overlay AVANT chaque action et la marque comme terminee APRES.
|
|
||||||
|
|
||||||
Sequence pour chaque action :
|
|
||||||
1. Afficher l'overlay avec la description de l'action (1.5s)
|
|
||||||
2. Attendre que l'overlay ait ete vu par l'utilisateur
|
|
||||||
3. Executer l'action
|
|
||||||
4. Mettre a jour l'overlay (coche verte)
|
|
||||||
5. Passer a l'action suivante
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import time
|
|
||||||
from typing import Any, Callable, Dict, Optional, Tuple
|
|
||||||
|
|
||||||
logger = logging.getLogger("lea_ui.replay_integration")
|
|
||||||
|
|
||||||
# Delai d'affichage de l'overlay avant execution (secondes)
|
|
||||||
PRE_ACTION_DELAY = 1.5
|
|
||||||
# Delai apres la coche verte (secondes)
|
|
||||||
POST_ACTION_DELAY = 0.5
|
|
||||||
|
|
||||||
|
|
||||||
class ReplayOverlayBridge:
|
|
||||||
"""Pont entre la boucle de replay et l'overlay.
|
|
||||||
|
|
||||||
Fonctionne de maniere thread-safe : la boucle de replay tourne dans
|
|
||||||
un thread daemon, et l'overlay est controle via des signaux Qt.
|
|
||||||
|
|
||||||
L'overlay est optionnel — si non connecte, l'execution continue normalement.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self._overlay = None
|
|
||||||
self._show_callback: Optional[Callable] = None
|
|
||||||
self._done_callback: Optional[Callable] = None
|
|
||||||
self._hide_callback: Optional[Callable] = None
|
|
||||||
self._enabled = False
|
|
||||||
|
|
||||||
# Compteur de progression
|
|
||||||
self._step_current = 0
|
|
||||||
self._step_total = 0
|
|
||||||
|
|
||||||
def connect_overlay(
|
|
||||||
self,
|
|
||||||
show_fn: Callable[[int, int, str, int, int, int], None],
|
|
||||||
done_fn: Callable[[Optional[str]], None],
|
|
||||||
hide_fn: Callable[[], None],
|
|
||||||
) -> None:
|
|
||||||
"""Connecter les callbacks de l'overlay.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
show_fn: overlay.show_action(target_x, target_y, text, step, total, duration_ms)
|
|
||||||
done_fn: overlay.show_done(text)
|
|
||||||
hide_fn: overlay.hide_overlay()
|
|
||||||
"""
|
|
||||||
self._show_callback = show_fn
|
|
||||||
self._done_callback = done_fn
|
|
||||||
self._hide_callback = hide_fn
|
|
||||||
self._enabled = True
|
|
||||||
logger.info("Overlay connecte au bridge de replay")
|
|
||||||
|
|
||||||
def disconnect_overlay(self) -> None:
|
|
||||||
"""Deconnecter l'overlay."""
|
|
||||||
self._show_callback = None
|
|
||||||
self._done_callback = None
|
|
||||||
self._hide_callback = None
|
|
||||||
self._enabled = False
|
|
||||||
|
|
||||||
def set_total_steps(self, total: int) -> None:
|
|
||||||
"""Definir le nombre total d'etapes du replay."""
|
|
||||||
self._step_total = total
|
|
||||||
self._step_current = 0
|
|
||||||
|
|
||||||
def wrap_execute(
|
|
||||||
self,
|
|
||||||
action: Dict[str, Any],
|
|
||||||
executor_fn: Callable[[Dict[str, Any]], Dict[str, Any]],
|
|
||||||
screen_width: int = 1920,
|
|
||||||
screen_height: int = 1080,
|
|
||||||
) -> Dict[str, Any]:
|
|
||||||
"""Wrapper autour de l'execution d'une action avec feedback overlay.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
action: action normalisee (type, x_pct, y_pct, text, keys, ...)
|
|
||||||
executor_fn: fonction d'execution (ex: ActionExecutorV1.execute_replay_action)
|
|
||||||
screen_width: largeur de l'ecran en pixels
|
|
||||||
screen_height: hauteur de l'ecran en pixels
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Resultat de l'execution (dict avec success, error, screenshot, ...)
|
|
||||||
"""
|
|
||||||
self._step_current += 1
|
|
||||||
|
|
||||||
if not self._enabled or not self._show_callback:
|
|
||||||
# Pas d'overlay — execution directe
|
|
||||||
return executor_fn(action)
|
|
||||||
|
|
||||||
# --- 1. Afficher l'overlay ---
|
|
||||||
action_text = self._describe_action(action)
|
|
||||||
target_x, target_y = self._get_target_coords(action, screen_width, screen_height)
|
|
||||||
|
|
||||||
try:
|
|
||||||
self._show_callback(
|
|
||||||
target_x, target_y,
|
|
||||||
action_text,
|
|
||||||
self._step_current,
|
|
||||||
self._step_total,
|
|
||||||
int(PRE_ACTION_DELAY * 1000),
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning("Erreur affichage overlay : %s", e)
|
|
||||||
|
|
||||||
# --- 2. Attendre que l'utilisateur ait vu ---
|
|
||||||
time.sleep(PRE_ACTION_DELAY)
|
|
||||||
|
|
||||||
# --- 3. Executer l'action ---
|
|
||||||
result = executor_fn(action)
|
|
||||||
|
|
||||||
# --- 4. Marquer comme terminee ---
|
|
||||||
if result.get("success"):
|
|
||||||
done_text = f"{action_text} OK"
|
|
||||||
else:
|
|
||||||
done_text = f"{action_text} ECHEC"
|
|
||||||
|
|
||||||
try:
|
|
||||||
if self._done_callback:
|
|
||||||
self._done_callback(done_text)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning("Erreur overlay done : %s", e)
|
|
||||||
|
|
||||||
time.sleep(POST_ACTION_DELAY)
|
|
||||||
|
|
||||||
# --- 5. Cacher si c'etait la derniere etape ---
|
|
||||||
if self._step_current >= self._step_total and self._hide_callback:
|
|
||||||
try:
|
|
||||||
self._hide_callback()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
def _describe_action(self, action: Dict[str, Any]) -> str:
|
|
||||||
"""Generer une description lisible d'une action."""
|
|
||||||
action_type = action.get("type", "?")
|
|
||||||
target_text = action.get("target_text", "")
|
|
||||||
target_role = action.get("target_role", "")
|
|
||||||
|
|
||||||
if action_type == "click":
|
|
||||||
target = target_text or target_role or "cet element"
|
|
||||||
return f"Je clique sur [{target}]"
|
|
||||||
elif action_type == "type":
|
|
||||||
text = action.get("text", "")
|
|
||||||
preview = text[:25] + "..." if len(text) > 25 else text
|
|
||||||
return f"Je tape : {preview}"
|
|
||||||
elif action_type == "key_combo":
|
|
||||||
keys = action.get("keys", [])
|
|
||||||
return f"Combinaison : {'+'.join(keys)}"
|
|
||||||
elif action_type == "scroll":
|
|
||||||
return "Defilement"
|
|
||||||
elif action_type == "wait":
|
|
||||||
ms = action.get("duration_ms", 500)
|
|
||||||
return f"Attente {ms}ms"
|
|
||||||
else:
|
|
||||||
return f"Action : {action_type}"
|
|
||||||
|
|
||||||
def _get_target_coords(
|
|
||||||
self, action: Dict[str, Any], sw: int, sh: int,
|
|
||||||
) -> Tuple[int, int]:
|
|
||||||
"""Calculer les coordonnees cible en pixels."""
|
|
||||||
x_pct = action.get("x_pct", 0.5)
|
|
||||||
y_pct = action.get("y_pct", 0.5)
|
|
||||||
return int(x_pct * sw), int(y_pct * sh)
|
|
||||||
|
|
||||||
|
|
||||||
# Instance globale (singleton) pour l'integration
|
|
||||||
_bridge: Optional[ReplayOverlayBridge] = None
|
|
||||||
|
|
||||||
|
|
||||||
def get_replay_bridge() -> ReplayOverlayBridge:
|
|
||||||
"""Obtenir l'instance globale du bridge overlay/replay."""
|
|
||||||
global _bridge
|
|
||||||
if _bridge is None:
|
|
||||||
_bridge = ReplayOverlayBridge()
|
|
||||||
return _bridge
|
|
||||||
@@ -1,200 +0,0 @@
|
|||||||
# agent_v0/lea_ui/styles.py
|
|
||||||
"""
|
|
||||||
Theme et couleurs pour l'interface Lea.
|
|
||||||
|
|
||||||
Palette douce et moderne, pensee pour ne pas fatiguer les yeux
|
|
||||||
lors d'une utilisation prolongee sur un poste de travail Windows.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Palette de couleurs
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
# Fond principal
|
|
||||||
COLOR_BG = "#F5F7FA"
|
|
||||||
# Fond secondaire (sidebar, header)
|
|
||||||
COLOR_BG_SECONDARY = "#EEF1F6"
|
|
||||||
# Fond des bulles utilisateur
|
|
||||||
COLOR_BUBBLE_USER = "#6366F1"
|
|
||||||
# Fond des bulles Lea
|
|
||||||
COLOR_BUBBLE_LEA = "#FFFFFF"
|
|
||||||
# Accent principal (indigo)
|
|
||||||
COLOR_ACCENT = "#6366F1"
|
|
||||||
# Accent hover
|
|
||||||
COLOR_ACCENT_HOVER = "#4F46E5"
|
|
||||||
# Texte principal
|
|
||||||
COLOR_TEXT = "#1F2937"
|
|
||||||
# Texte secondaire
|
|
||||||
COLOR_TEXT_SECONDARY = "#6B7280"
|
|
||||||
# Texte sur accent (blanc)
|
|
||||||
COLOR_TEXT_ON_ACCENT = "#FFFFFF"
|
|
||||||
# Bordure legere
|
|
||||||
COLOR_BORDER = "#E5E7EB"
|
|
||||||
# Succes (vert)
|
|
||||||
COLOR_SUCCESS = "#10B981"
|
|
||||||
# Erreur (rouge)
|
|
||||||
COLOR_ERROR = "#EF4444"
|
|
||||||
# Avertissement (orange)
|
|
||||||
COLOR_WARNING = "#F59E0B"
|
|
||||||
# Overlay rouge pulsant
|
|
||||||
COLOR_OVERLAY_PULSE = "#EF4444"
|
|
||||||
# Overlay texte
|
|
||||||
COLOR_OVERLAY_TEXT = "#FFFFFF"
|
|
||||||
# Overlay fond info
|
|
||||||
COLOR_OVERLAY_INFO_BG = "rgba(31, 41, 55, 200)"
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Typographie
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
FONT_FAMILY = "Segoe UI"
|
|
||||||
FONT_SIZE_SMALL = 11
|
|
||||||
FONT_SIZE_NORMAL = 13
|
|
||||||
FONT_SIZE_LARGE = 15
|
|
||||||
FONT_SIZE_TITLE = 18
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Dimensions
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
# Largeur du panneau Lea
|
|
||||||
PANEL_WIDTH = 380
|
|
||||||
# Hauteur minimale
|
|
||||||
PANEL_MIN_HEIGHT = 500
|
|
||||||
# Rayon des coins arrondis
|
|
||||||
BORDER_RADIUS = 12
|
|
||||||
# Rayon des bulles de chat
|
|
||||||
BUBBLE_RADIUS = 16
|
|
||||||
# Padding interne
|
|
||||||
PADDING = 12
|
|
||||||
# Taille de l'avatar
|
|
||||||
AVATAR_SIZE = 40
|
|
||||||
# Marge entre les elements
|
|
||||||
SPACING = 8
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Stylesheet global du panneau Lea
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
MAIN_WINDOW_STYLE = f"""
|
|
||||||
QWidget#LeaMainWindow {{
|
|
||||||
background-color: {COLOR_BG};
|
|
||||||
border-radius: {BORDER_RADIUS}px;
|
|
||||||
border: 1px solid {COLOR_BORDER};
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
HEADER_STYLE = f"""
|
|
||||||
QWidget#LeaHeader {{
|
|
||||||
background-color: {COLOR_BG_SECONDARY};
|
|
||||||
border-top-left-radius: {BORDER_RADIUS}px;
|
|
||||||
border-top-right-radius: {BORDER_RADIUS}px;
|
|
||||||
border-bottom: 1px solid {COLOR_BORDER};
|
|
||||||
}}
|
|
||||||
QLabel#LeaTitle {{
|
|
||||||
color: {COLOR_TEXT};
|
|
||||||
font-family: "{FONT_FAMILY}";
|
|
||||||
font-size: {FONT_SIZE_TITLE}px;
|
|
||||||
font-weight: bold;
|
|
||||||
}}
|
|
||||||
QLabel#LeaStatus {{
|
|
||||||
color: {COLOR_TEXT_SECONDARY};
|
|
||||||
font-family: "{FONT_FAMILY}";
|
|
||||||
font-size: {FONT_SIZE_SMALL}px;
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
CHAT_AREA_STYLE = f"""
|
|
||||||
QScrollArea {{
|
|
||||||
border: none;
|
|
||||||
background-color: {COLOR_BG};
|
|
||||||
}}
|
|
||||||
QWidget#ChatContainer {{
|
|
||||||
background-color: {COLOR_BG};
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
INPUT_STYLE = f"""
|
|
||||||
QLineEdit#ChatInput {{
|
|
||||||
background-color: {COLOR_BUBBLE_LEA};
|
|
||||||
border: 1px solid {COLOR_BORDER};
|
|
||||||
border-radius: 20px;
|
|
||||||
padding: 8px 16px;
|
|
||||||
font-family: "{FONT_FAMILY}";
|
|
||||||
font-size: {FONT_SIZE_NORMAL}px;
|
|
||||||
color: {COLOR_TEXT};
|
|
||||||
}}
|
|
||||||
QLineEdit#ChatInput:focus {{
|
|
||||||
border-color: {COLOR_ACCENT};
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
SEND_BUTTON_STYLE = f"""
|
|
||||||
QPushButton#SendButton {{
|
|
||||||
background-color: {COLOR_ACCENT};
|
|
||||||
color: {COLOR_TEXT_ON_ACCENT};
|
|
||||||
border: none;
|
|
||||||
border-radius: 20px;
|
|
||||||
padding: 8px 16px;
|
|
||||||
font-family: "{FONT_FAMILY}";
|
|
||||||
font-size: {FONT_SIZE_NORMAL}px;
|
|
||||||
font-weight: bold;
|
|
||||||
min-width: 50px;
|
|
||||||
}}
|
|
||||||
QPushButton#SendButton:hover {{
|
|
||||||
background-color: {COLOR_ACCENT_HOVER};
|
|
||||||
}}
|
|
||||||
QPushButton#SendButton:pressed {{
|
|
||||||
background-color: #3730A3;
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
QUICK_BUTTON_STYLE = f"""
|
|
||||||
QPushButton#QuickButton {{
|
|
||||||
background-color: {COLOR_BUBBLE_LEA};
|
|
||||||
color: {COLOR_ACCENT};
|
|
||||||
border: 1px solid {COLOR_ACCENT};
|
|
||||||
border-radius: 18px;
|
|
||||||
padding: 6px 14px;
|
|
||||||
font-family: "{FONT_FAMILY}";
|
|
||||||
font-size: {FONT_SIZE_SMALL}px;
|
|
||||||
}}
|
|
||||||
QPushButton#QuickButton:hover {{
|
|
||||||
background-color: {COLOR_ACCENT};
|
|
||||||
color: {COLOR_TEXT_ON_ACCENT};
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
PROGRESS_STYLE = f"""
|
|
||||||
QProgressBar {{
|
|
||||||
border: none;
|
|
||||||
border-radius: 4px;
|
|
||||||
background-color: {COLOR_BORDER};
|
|
||||||
text-align: center;
|
|
||||||
font-family: "{FONT_FAMILY}";
|
|
||||||
font-size: {FONT_SIZE_SMALL}px;
|
|
||||||
color: {COLOR_TEXT};
|
|
||||||
max-height: 8px;
|
|
||||||
}}
|
|
||||||
QProgressBar::chunk {{
|
|
||||||
background-color: {COLOR_ACCENT};
|
|
||||||
border-radius: 4px;
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
STATUS_LABEL_STYLE = f"""
|
|
||||||
QLabel#StatusLabel {{
|
|
||||||
color: {COLOR_TEXT_SECONDARY};
|
|
||||||
font-family: "{FONT_FAMILY}";
|
|
||||||
font-size: {FONT_SIZE_SMALL}px;
|
|
||||||
padding: 4px 8px;
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
|
|
||||||
MINI_BAR_STYLE = f"""
|
|
||||||
QWidget#MiniBar {{
|
|
||||||
background-color: {COLOR_BG_SECONDARY};
|
|
||||||
border-radius: 20px;
|
|
||||||
border: 1px solid {COLOR_BORDER};
|
|
||||||
}}
|
|
||||||
"""
|
|
||||||
@@ -7,10 +7,43 @@ current_dir = os.path.dirname(os.path.abspath(__file__))
|
|||||||
if current_dir not in sys.path:
|
if current_dir not in sys.path:
|
||||||
sys.path.append(current_dir)
|
sys.path.append(current_dir)
|
||||||
|
|
||||||
|
# Charger config.txt et .env comme variables d'environnement
|
||||||
|
# (équivalent du `set` dans Lea.bat, mais fonctionne aussi sans le .bat)
|
||||||
|
for config_file in ("config.txt", ".env"):
|
||||||
|
config_path = os.path.join(current_dir, config_file)
|
||||||
|
if os.path.isfile(config_path):
|
||||||
|
with open(config_path, encoding="utf-8", errors="ignore") as f:
|
||||||
|
for line in f:
|
||||||
|
line = line.strip()
|
||||||
|
if not line or line.startswith("#"):
|
||||||
|
continue
|
||||||
|
if "=" in line:
|
||||||
|
key, _, value = line.partition("=")
|
||||||
|
key = key.strip()
|
||||||
|
value = value.strip()
|
||||||
|
if key and value and key not in os.environ:
|
||||||
|
os.environ[key] = value
|
||||||
|
|
||||||
|
# Configurer le logging dans un fichier (fonctionne même avec pythonw.exe)
|
||||||
|
import logging
|
||||||
|
log_path = os.path.join(current_dir, "agent_debug.log")
|
||||||
|
logging.basicConfig(
|
||||||
|
filename=log_path,
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
|
||||||
|
)
|
||||||
|
logging.info("=== Agent V1 démarrage — config chargée ===")
|
||||||
|
logging.info("RPA_SERVER_URL=%s", os.environ.get("RPA_SERVER_URL", "(non défini)"))
|
||||||
|
logging.info("RPA_SERVER_HOST=%s", os.environ.get("RPA_SERVER_HOST", "(non défini)"))
|
||||||
|
logging.info("RPA_API_TOKEN=%s", os.environ.get("RPA_API_TOKEN", "(non défini)")[:8] + "...")
|
||||||
|
logging.info("RPA_BLUR_SENSITIVE=%s", os.environ.get("RPA_BLUR_SENSITIVE", "(non défini)"))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from agent_v1.main import main
|
from agent_v1.main import main
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
|
logging.error("Erreur d'importation : %s", e)
|
||||||
print(f"Erreur d'importation : {e}")
|
print(f"Erreur d'importation : {e}")
|
||||||
print("Assurez-vous d'être dans le répertoire racine du projet et que agent_v1 est bien un package Python.")
|
except Exception as e:
|
||||||
|
logging.error("Erreur fatale : %s", e, exc_info=True)
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
393
agent_v0/server_v1/audit_trail.py
Normal file
393
agent_v0/server_v1/audit_trail.py
Normal file
@@ -0,0 +1,393 @@
|
|||||||
|
# agent_v0/server_v1/audit_trail.py
|
||||||
|
"""
|
||||||
|
Module Audit Trail — traçabilité complète des actions RPA.
|
||||||
|
|
||||||
|
Responsabilité : "Chaque action exécutée par Léa est tracée, datée, attribuée."
|
||||||
|
|
||||||
|
En milieu hospitalier (codage CIM-10 via DPI), la traçabilité est une obligation
|
||||||
|
légale. Ce module enregistre chaque action avec :
|
||||||
|
- L'identité du TIM (Technicien d'Information Médicale) superviseur
|
||||||
|
- Le mode d'exécution (autonome, assisté, shadow)
|
||||||
|
- Le résultat détaillé (succès, échec, correction)
|
||||||
|
- L'horodatage ISO 8601
|
||||||
|
|
||||||
|
Format de stockage : fichiers JSONL datés dans data/audit/ (un par jour).
|
||||||
|
Aucune dépendance externe (stdlib + dataclasses uniquement).
|
||||||
|
|
||||||
|
Usage :
|
||||||
|
audit = AuditTrail()
|
||||||
|
audit.record(AuditEntry(
|
||||||
|
session_id="sess_abc",
|
||||||
|
action_id="act_001",
|
||||||
|
user_id="tim_dupont",
|
||||||
|
user_name="Marie Dupont",
|
||||||
|
...
|
||||||
|
))
|
||||||
|
entries = audit.query(user_id="tim_dupont", date_from="2026-04-01")
|
||||||
|
csv_data = audit.export_csv(date_from="2026-04-01", date_to="2026-04-06")
|
||||||
|
summary = audit.get_summary("2026-04-05")
|
||||||
|
"""
|
||||||
|
|
||||||
|
import csv
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import threading
|
||||||
|
from dataclasses import dataclass, asdict, fields
|
||||||
|
from datetime import datetime, date, timedelta
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Répertoire par défaut pour le stockage des fichiers d'audit
|
||||||
|
_DEFAULT_AUDIT_DIR = os.environ.get("RPA_AUDIT_DIR", "data/audit")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class AuditEntry:
|
||||||
|
"""Entrée d'audit — un événement tracé dans le système."""
|
||||||
|
|
||||||
|
# Horodatage ISO 8601 (ex: 2026-04-05T14:23:01.456789)
|
||||||
|
timestamp: str = ""
|
||||||
|
|
||||||
|
# Identifiants de session et d'action
|
||||||
|
session_id: str = ""
|
||||||
|
action_id: str = ""
|
||||||
|
|
||||||
|
# Identité de l'utilisateur superviseur
|
||||||
|
user_id: str = "" # Identifiant du TIM (login Windows ou configuré)
|
||||||
|
user_name: str = "" # Nom affiché (ex: "Marie Dupont")
|
||||||
|
machine_id: str = "" # ID du poste client (hostname ou configuré)
|
||||||
|
|
||||||
|
# Description de l'action
|
||||||
|
action_type: str = "" # click, type, key_combo, wait, etc.
|
||||||
|
action_detail: str = "" # Description humaine ("Clic sur 'Enregistrer' dans DxCare")
|
||||||
|
target_app: str = "" # Application cible (DxCare, Orbis, etc.)
|
||||||
|
|
||||||
|
# Mode d'exécution
|
||||||
|
execution_mode: str = "" # "autonomous", "assisted", "shadow"
|
||||||
|
|
||||||
|
# Résultat
|
||||||
|
result: str = "" # "success", "failed", "skipped", "recovered"
|
||||||
|
resolution_method: str = "" # Comment la cible a été trouvée (som_text_match, vlm_direct, etc.)
|
||||||
|
critic_result: str = "" # Résultat de la vérification sémantique
|
||||||
|
recovery_action: str = "" # Action corrective si échec (undo, escape, retry, none)
|
||||||
|
|
||||||
|
# Contexte métier
|
||||||
|
domain: str = "" # Domaine métier (tim_codage, generic, etc.)
|
||||||
|
workflow_id: str = "" # ID du workflow exécuté
|
||||||
|
workflow_name: str = "" # Nom lisible du workflow
|
||||||
|
|
||||||
|
# Performance
|
||||||
|
duration_ms: float = 0.0 # Durée de l'action en millisecondes
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
"""Convertir en dictionnaire sérialisable JSON."""
|
||||||
|
return asdict(self)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, data: Dict[str, Any]) -> "AuditEntry":
|
||||||
|
"""Créer une entrée depuis un dictionnaire.
|
||||||
|
|
||||||
|
Ignore les clés inconnues pour la compatibilité future.
|
||||||
|
"""
|
||||||
|
known_fields = {f.name for f in fields(cls)}
|
||||||
|
filtered = {k: v for k, v in data.items() if k in known_fields}
|
||||||
|
return cls(**filtered)
|
||||||
|
|
||||||
|
|
||||||
|
class AuditTrail:
|
||||||
|
"""Gestionnaire de traçabilité — enregistrement et consultation des actions.
|
||||||
|
|
||||||
|
Stocke chaque événement dans un fichier JSONL daté (un fichier par jour).
|
||||||
|
Thread-safe grâce à un verrou d'écriture.
|
||||||
|
|
||||||
|
Fichiers produits :
|
||||||
|
data/audit/audit_2026-04-05.jsonl
|
||||||
|
data/audit/audit_2026-04-06.jsonl
|
||||||
|
...
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, audit_dir: str = ""):
|
||||||
|
self.audit_dir = Path(audit_dir or _DEFAULT_AUDIT_DIR)
|
||||||
|
self.audit_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
logger.info(f"Audit Trail initialisé : {self.audit_dir}")
|
||||||
|
|
||||||
|
def _file_for_date(self, d: date) -> Path:
|
||||||
|
"""Chemin du fichier JSONL pour une date donnée."""
|
||||||
|
return self.audit_dir / f"audit_{d.isoformat()}.jsonl"
|
||||||
|
|
||||||
|
def record(self, entry: AuditEntry) -> None:
|
||||||
|
"""Enregistrer une entrée d'audit.
|
||||||
|
|
||||||
|
Ajoute un horodatage ISO 8601 si absent, puis écrit en append
|
||||||
|
dans le fichier JSONL du jour.
|
||||||
|
"""
|
||||||
|
# Horodatage automatique si absent
|
||||||
|
if not entry.timestamp:
|
||||||
|
entry.timestamp = datetime.now().isoformat()
|
||||||
|
|
||||||
|
# Déterminer le fichier du jour à partir du timestamp
|
||||||
|
try:
|
||||||
|
entry_date = datetime.fromisoformat(entry.timestamp).date()
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
entry_date = date.today()
|
||||||
|
|
||||||
|
audit_file = self._file_for_date(entry_date)
|
||||||
|
|
||||||
|
with self._lock:
|
||||||
|
try:
|
||||||
|
with open(audit_file, "a", encoding="utf-8") as f:
|
||||||
|
f.write(json.dumps(entry.to_dict(), ensure_ascii=False) + "\n")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Audit Trail: échec écriture {audit_file}: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"Audit: {entry.result} {entry.action_type} "
|
||||||
|
f"'{entry.action_detail[:50]}' "
|
||||||
|
f"[user={entry.user_id}] [session={entry.session_id}]"
|
||||||
|
)
|
||||||
|
|
||||||
|
def _load_file(self, filepath: Path) -> List[AuditEntry]:
|
||||||
|
"""Charger toutes les entrées d'un fichier JSONL."""
|
||||||
|
if not filepath.is_file():
|
||||||
|
return []
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
try:
|
||||||
|
with open(filepath, "r", encoding="utf-8") as f:
|
||||||
|
for line_num, line in enumerate(f, 1):
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
data = json.loads(line)
|
||||||
|
entries.append(AuditEntry.from_dict(data))
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.warning(
|
||||||
|
f"Audit Trail: ligne {line_num} invalide dans "
|
||||||
|
f"{filepath.name}: {e}"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Audit Trail: échec lecture {filepath}: {e}")
|
||||||
|
|
||||||
|
return entries
|
||||||
|
|
||||||
|
def _date_range(self, date_from: str = "", date_to: str = "") -> List[date]:
|
||||||
|
"""Calculer la liste de dates entre date_from et date_to (inclus).
|
||||||
|
|
||||||
|
Si date_from est vide, utilise aujourd'hui.
|
||||||
|
Si date_to est vide, utilise date_from.
|
||||||
|
Format attendu : YYYY-MM-DD.
|
||||||
|
"""
|
||||||
|
if date_from:
|
||||||
|
try:
|
||||||
|
d_from = date.fromisoformat(date_from)
|
||||||
|
except ValueError:
|
||||||
|
d_from = date.today()
|
||||||
|
else:
|
||||||
|
d_from = date.today()
|
||||||
|
|
||||||
|
if date_to:
|
||||||
|
try:
|
||||||
|
d_to = date.fromisoformat(date_to)
|
||||||
|
except ValueError:
|
||||||
|
d_to = d_from
|
||||||
|
else:
|
||||||
|
d_to = d_from
|
||||||
|
|
||||||
|
# Assurer l'ordre chronologique
|
||||||
|
if d_to < d_from:
|
||||||
|
d_from, d_to = d_to, d_from
|
||||||
|
|
||||||
|
dates = []
|
||||||
|
current = d_from
|
||||||
|
while current <= d_to:
|
||||||
|
dates.append(current)
|
||||||
|
current += timedelta(days=1)
|
||||||
|
|
||||||
|
return dates
|
||||||
|
|
||||||
|
def query(
|
||||||
|
self,
|
||||||
|
date_from: str = "",
|
||||||
|
date_to: str = "",
|
||||||
|
user_id: str = "",
|
||||||
|
session_id: str = "",
|
||||||
|
result: str = "",
|
||||||
|
action_type: str = "",
|
||||||
|
workflow_id: str = "",
|
||||||
|
domain: str = "",
|
||||||
|
limit: int = 500,
|
||||||
|
offset: int = 0,
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""Rechercher des entrées d'audit avec filtres.
|
||||||
|
|
||||||
|
Tous les filtres sont optionnels et combinés en AND.
|
||||||
|
Retourne les entrées triées par timestamp décroissant (plus récentes d'abord).
|
||||||
|
"""
|
||||||
|
dates = self._date_range(date_from, date_to)
|
||||||
|
all_entries: List[AuditEntry] = []
|
||||||
|
|
||||||
|
for d in dates:
|
||||||
|
filepath = self._file_for_date(d)
|
||||||
|
all_entries.extend(self._load_file(filepath))
|
||||||
|
|
||||||
|
# Appliquer les filtres
|
||||||
|
filtered = []
|
||||||
|
for entry in all_entries:
|
||||||
|
if user_id and entry.user_id != user_id:
|
||||||
|
continue
|
||||||
|
if session_id and entry.session_id != session_id:
|
||||||
|
continue
|
||||||
|
if result and entry.result != result:
|
||||||
|
continue
|
||||||
|
if action_type and entry.action_type != action_type:
|
||||||
|
continue
|
||||||
|
if workflow_id and entry.workflow_id != workflow_id:
|
||||||
|
continue
|
||||||
|
if domain and entry.domain != domain:
|
||||||
|
continue
|
||||||
|
filtered.append(entry)
|
||||||
|
|
||||||
|
# Tri par timestamp décroissant (plus récent en premier)
|
||||||
|
filtered.sort(key=lambda e: e.timestamp, reverse=True)
|
||||||
|
|
||||||
|
# Pagination
|
||||||
|
paginated = filtered[offset:offset + limit]
|
||||||
|
|
||||||
|
return [e.to_dict() for e in paginated]
|
||||||
|
|
||||||
|
def get_summary(self, target_date: str = "") -> Dict[str, Any]:
|
||||||
|
"""Résumé journalier d'une date donnée.
|
||||||
|
|
||||||
|
Retourne les statistiques agrégées :
|
||||||
|
- Nombre total d'actions
|
||||||
|
- Taux de succès
|
||||||
|
- Répartition par utilisateur
|
||||||
|
- Répartition par résultat
|
||||||
|
- Répartition par type d'action
|
||||||
|
- Répartition par workflow
|
||||||
|
- Répartition par mode d'exécution
|
||||||
|
"""
|
||||||
|
if not target_date:
|
||||||
|
target_date = date.today().isoformat()
|
||||||
|
|
||||||
|
try:
|
||||||
|
d = date.fromisoformat(target_date)
|
||||||
|
except ValueError:
|
||||||
|
d = date.today()
|
||||||
|
|
||||||
|
entries = self._load_file(self._file_for_date(d))
|
||||||
|
|
||||||
|
if not entries:
|
||||||
|
return {
|
||||||
|
"date": d.isoformat(),
|
||||||
|
"total_actions": 0,
|
||||||
|
"success_rate": 0.0,
|
||||||
|
"by_user": {},
|
||||||
|
"by_result": {},
|
||||||
|
"by_action_type": {},
|
||||||
|
"by_workflow": {},
|
||||||
|
"by_execution_mode": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
total = len(entries)
|
||||||
|
successes = sum(1 for e in entries if e.result == "success")
|
||||||
|
|
||||||
|
# Agrégations
|
||||||
|
by_user: Dict[str, Dict[str, Any]] = {}
|
||||||
|
by_result: Dict[str, int] = {}
|
||||||
|
by_action_type: Dict[str, int] = {}
|
||||||
|
by_workflow: Dict[str, int] = {}
|
||||||
|
by_execution_mode: Dict[str, int] = {}
|
||||||
|
|
||||||
|
for entry in entries:
|
||||||
|
# Par utilisateur
|
||||||
|
uid = entry.user_id or "inconnu"
|
||||||
|
if uid not in by_user:
|
||||||
|
by_user[uid] = {
|
||||||
|
"user_name": entry.user_name,
|
||||||
|
"total": 0,
|
||||||
|
"success": 0,
|
||||||
|
}
|
||||||
|
by_user[uid]["total"] += 1
|
||||||
|
if entry.result == "success":
|
||||||
|
by_user[uid]["success"] += 1
|
||||||
|
|
||||||
|
# Par résultat
|
||||||
|
r = entry.result or "inconnu"
|
||||||
|
by_result[r] = by_result.get(r, 0) + 1
|
||||||
|
|
||||||
|
# Par type d'action
|
||||||
|
at = entry.action_type or "inconnu"
|
||||||
|
by_action_type[at] = by_action_type.get(at, 0) + 1
|
||||||
|
|
||||||
|
# Par workflow
|
||||||
|
wf = entry.workflow_id or "inconnu"
|
||||||
|
by_workflow[wf] = by_workflow.get(wf, 0) + 1
|
||||||
|
|
||||||
|
# Par mode d'exécution
|
||||||
|
em = entry.execution_mode or "inconnu"
|
||||||
|
by_execution_mode[em] = by_execution_mode.get(em, 0) + 1
|
||||||
|
|
||||||
|
# Calculer le taux de succès par utilisateur
|
||||||
|
for uid, stats in by_user.items():
|
||||||
|
stats["success_rate"] = round(
|
||||||
|
stats["success"] / stats["total"], 3
|
||||||
|
) if stats["total"] > 0 else 0.0
|
||||||
|
|
||||||
|
return {
|
||||||
|
"date": d.isoformat(),
|
||||||
|
"total_actions": total,
|
||||||
|
"success_rate": round(successes / total, 3) if total > 0 else 0.0,
|
||||||
|
"by_user": by_user,
|
||||||
|
"by_result": by_result,
|
||||||
|
"by_action_type": by_action_type,
|
||||||
|
"by_workflow": by_workflow,
|
||||||
|
"by_execution_mode": by_execution_mode,
|
||||||
|
}
|
||||||
|
|
||||||
|
def export_csv(
|
||||||
|
self,
|
||||||
|
date_from: str = "",
|
||||||
|
date_to: str = "",
|
||||||
|
user_id: str = "",
|
||||||
|
session_id: str = "",
|
||||||
|
) -> str:
|
||||||
|
"""Exporter les entrées d'audit en CSV.
|
||||||
|
|
||||||
|
Retourne une chaîne CSV complète (avec en-tête).
|
||||||
|
Filtres optionnels par date, utilisateur, session.
|
||||||
|
"""
|
||||||
|
# Récupérer les entrées avec les mêmes filtres que query()
|
||||||
|
entries = self.query(
|
||||||
|
date_from=date_from,
|
||||||
|
date_to=date_to,
|
||||||
|
user_id=user_id,
|
||||||
|
session_id=session_id,
|
||||||
|
limit=100000, # Pas de pagination pour l'export
|
||||||
|
)
|
||||||
|
|
||||||
|
if not entries:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# En-têtes CSV — même ordre que le dataclass
|
||||||
|
fieldnames = [f.name for f in fields(AuditEntry)]
|
||||||
|
|
||||||
|
output = io.StringIO()
|
||||||
|
writer = csv.DictWriter(
|
||||||
|
output,
|
||||||
|
fieldnames=fieldnames,
|
||||||
|
extrasaction="ignore",
|
||||||
|
quoting=csv.QUOTE_MINIMAL,
|
||||||
|
)
|
||||||
|
writer.writeheader()
|
||||||
|
for entry_dict in entries:
|
||||||
|
writer.writerow(entry_dict)
|
||||||
|
|
||||||
|
return output.getvalue()
|
||||||
201
agent_v0/server_v1/domain_context.py
Normal file
201
agent_v0/server_v1/domain_context.py
Normal file
@@ -0,0 +1,201 @@
|
|||||||
|
# agent_v0/server_v1/domain_context.py
|
||||||
|
"""
|
||||||
|
Contexte métier pour les appels VLM — rend Léa experte du domaine.
|
||||||
|
|
||||||
|
Chaque workflow est associé à un domaine métier (médical, comptable, etc.)
|
||||||
|
qui enrichit TOUS les prompts VLM (Observer, Critic, acteur, enrichissement).
|
||||||
|
|
||||||
|
Un gemma4 qui sait qu'il regarde un DPI et que l'utilisateur fait du codage
|
||||||
|
CIM-10 prend des décisions bien meilleures qu'un VLM générique.
|
||||||
|
|
||||||
|
Premier domaine : TIM (Technicien d'Information Médicale)
|
||||||
|
- Logiciels DPI/DMS (dossier patient informatisé)
|
||||||
|
- Codage CIM-10 / CCAM / GHM
|
||||||
|
- Lecture de comptes rendus médicaux
|
||||||
|
- Validation des séjours / RSS / RSA
|
||||||
|
|
||||||
|
Usage :
|
||||||
|
ctx = get_domain_context("tim_codage")
|
||||||
|
prompt = f"{ctx.system_prompt}\n\n{user_prompt}"
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DomainContext:
|
||||||
|
"""Contexte métier pour un domaine spécifique."""
|
||||||
|
domain_id: str # Identifiant unique (tim_codage, comptabilite, etc.)
|
||||||
|
name: str # Nom lisible (Codage médical TIM)
|
||||||
|
description: str # Description courte du métier
|
||||||
|
|
||||||
|
# Prompt système injecté dans TOUS les appels VLM
|
||||||
|
system_prompt: str = ""
|
||||||
|
|
||||||
|
# Vocabulaire métier (termes que le VLM doit connaître)
|
||||||
|
vocabulary: List[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
# Applications connues (noms de logiciels que le VLM peut rencontrer)
|
||||||
|
known_apps: List[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
# Écrans types (descriptions des écrans courants du métier)
|
||||||
|
screen_patterns: Dict[str, str] = field(default_factory=dict)
|
||||||
|
|
||||||
|
def enrich_prompt(self, prompt: str, role: str = "") -> str:
|
||||||
|
"""Enrichir un prompt avec le contexte métier.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt: Le prompt original
|
||||||
|
role: Le rôle du VLM (observer, critic, actor, enrichment)
|
||||||
|
"""
|
||||||
|
parts = []
|
||||||
|
|
||||||
|
if self.system_prompt:
|
||||||
|
parts.append(self.system_prompt)
|
||||||
|
|
||||||
|
if role:
|
||||||
|
role_hint = _ROLE_HINTS.get(role, "")
|
||||||
|
if role_hint:
|
||||||
|
parts.append(role_hint.format(domain=self.name))
|
||||||
|
|
||||||
|
parts.append(prompt)
|
||||||
|
return "\n\n".join(parts)
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"domain_id": self.domain_id,
|
||||||
|
"name": self.name,
|
||||||
|
"description": self.description,
|
||||||
|
"known_apps": self.known_apps,
|
||||||
|
"vocabulary_count": len(self.vocabulary),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Hints par rôle VLM — adaptés au contexte métier
|
||||||
|
_ROLE_HINTS = {
|
||||||
|
"observer": (
|
||||||
|
"Tu observes un écran utilisé dans le domaine '{domain}'. "
|
||||||
|
"Cherche les popups, erreurs, ou états incohérents avec ce métier."
|
||||||
|
),
|
||||||
|
"critic": (
|
||||||
|
"Tu vérifies qu'une action dans le domaine '{domain}' a produit "
|
||||||
|
"le bon résultat. Sois précis sur ce que tu vois à l'écran."
|
||||||
|
),
|
||||||
|
"actor": (
|
||||||
|
"Tu décides si une action est nécessaire dans le contexte '{domain}'. "
|
||||||
|
"Utilise ta connaissance du métier pour juger si l'état est cohérent."
|
||||||
|
),
|
||||||
|
"enrichment": (
|
||||||
|
"Tu analyses un enregistrement de workflow dans le domaine '{domain}'. "
|
||||||
|
"Décris les intentions métier, pas juste les clics."
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Domaines pré-configurés
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
_TIM_CODAGE = DomainContext(
|
||||||
|
domain_id="tim_codage",
|
||||||
|
name="Codage médical TIM",
|
||||||
|
description=(
|
||||||
|
"Technicien d'Information Médicale : lecture de comptes rendus médicaux, "
|
||||||
|
"codage des diagnostics en CIM-10, codage des actes en CCAM, "
|
||||||
|
"validation des groupes homogènes de malades (GHM), "
|
||||||
|
"gestion des résumés de sortie standardisés (RSS/RSA)."
|
||||||
|
),
|
||||||
|
system_prompt=(
|
||||||
|
"Tu es un assistant expert en codage médical hospitalier. "
|
||||||
|
"L'utilisateur est un TIM (Technicien d'Information Médicale) qui utilise "
|
||||||
|
"un logiciel DPI (Dossier Patient Informatisé) ou DIM (Département d'Information Médicale). "
|
||||||
|
"Son travail : lire les comptes rendus médicaux des patients et coder les diagnostics "
|
||||||
|
"en CIM-10, les actes en CCAM, et valider les séjours pour le PMSI.\n\n"
|
||||||
|
"Vocabulaire du métier :\n"
|
||||||
|
"- DPI/DMS : logiciel de dossier patient (ex: Orbis, DxCare, Crossway, Easily, Hopital Manager)\n"
|
||||||
|
"- CIM-10 : Classification Internationale des Maladies, 10ème révision (codes diagnostics)\n"
|
||||||
|
"- CCAM : Classification Commune des Actes Médicaux (codes actes chirurgicaux/médicaux)\n"
|
||||||
|
"- GHM : Groupe Homogène de Malades (regroupement tarifaire)\n"
|
||||||
|
"- RSS : Résumé de Sortie Standardisé (données du séjour)\n"
|
||||||
|
"- RSA : Résumé de Sortie Anonyme (RSS anonymisé pour la T2A)\n"
|
||||||
|
"- DP : Diagnostic Principal (le code CIM-10 principal du séjour)\n"
|
||||||
|
"- DAS : Diagnostics Associés Significatifs\n"
|
||||||
|
"- CMA : Complication ou Morbidité Associée (augmente la sévérité)\n"
|
||||||
|
"- T2A : Tarification À l'Activité (financement des hôpitaux)\n"
|
||||||
|
"- PMSI : Programme de Médicalisation des Systèmes d'Information\n"
|
||||||
|
"- UM : Unité Médicale (service hospitalier)\n"
|
||||||
|
"- CR : Compte Rendu (document médical)\n\n"
|
||||||
|
"Écrans courants :\n"
|
||||||
|
"- Liste de patients / dossiers à coder\n"
|
||||||
|
"- Fiche patient (identité, séjour, UM)\n"
|
||||||
|
"- Écran de codage CIM-10 (recherche de codes, saisie DP/DAS)\n"
|
||||||
|
"- Visualiseur de comptes rendus médicaux\n"
|
||||||
|
"- Écran de validation / groupage GHM\n"
|
||||||
|
"- Recherche de codes (arborescence CIM-10 ou recherche textuelle)"
|
||||||
|
),
|
||||||
|
vocabulary=[
|
||||||
|
"CIM-10", "CCAM", "GHM", "RSS", "RSA", "PMSI", "T2A",
|
||||||
|
"diagnostic principal", "DAS", "CMA", "compte rendu",
|
||||||
|
"dossier patient", "séjour", "unité médicale", "codage",
|
||||||
|
"groupage", "valorisation", "exhaustivité",
|
||||||
|
],
|
||||||
|
known_apps=[
|
||||||
|
"Orbis", "DxCare", "Crossway", "Easily", "Hopital Manager",
|
||||||
|
"CORA", "AGFA", "Dedalus", "Maincare", "Softway Medical",
|
||||||
|
"WebPIMS", "CEPAGE", "Medimust",
|
||||||
|
],
|
||||||
|
screen_patterns={
|
||||||
|
"liste_patients": "Liste de dossiers patients avec colonnes (nom, prénom, date entrée, UM, statut codage)",
|
||||||
|
"fiche_patient": "Fiche d'identité patient avec numéro IPP, séjour, dates, UM",
|
||||||
|
"codage_cim10": "Écran de saisie des codes CIM-10 avec diagnostic principal et DAS",
|
||||||
|
"compte_rendu": "Visualiseur de compte rendu médical (texte libre, souvent PDF intégré)",
|
||||||
|
"recherche_code": "Recherche de code CIM-10 ou CCAM (champ de recherche + arborescence)",
|
||||||
|
"validation_ghm": "Écran de validation du groupage avec GHM calculé et valorisation",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
_GENERIC = DomainContext(
|
||||||
|
domain_id="generic",
|
||||||
|
name="Bureautique générale",
|
||||||
|
description="Automatisation bureautique générale (Office, navigateur, etc.)",
|
||||||
|
system_prompt=(
|
||||||
|
"Tu es un assistant RPA qui observe des applications bureautiques. "
|
||||||
|
"Décris précisément ce que tu vois à l'écran."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Registre des domaines disponibles
|
||||||
|
_DOMAINS: Dict[str, DomainContext] = {
|
||||||
|
"tim_codage": _TIM_CODAGE,
|
||||||
|
"generic": _GENERIC,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_domain_context(domain_id: str = "generic") -> DomainContext:
|
||||||
|
"""Récupérer le contexte métier par ID.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
domain_id: Identifiant du domaine (tim_codage, generic, etc.)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
DomainContext correspondant, ou generic si non trouvé.
|
||||||
|
"""
|
||||||
|
ctx = _DOMAINS.get(domain_id, _GENERIC)
|
||||||
|
if ctx is _GENERIC and domain_id != "generic":
|
||||||
|
logger.warning(f"Domaine '{domain_id}' non trouvé, utilisation de 'generic'")
|
||||||
|
return ctx
|
||||||
|
|
||||||
|
|
||||||
|
def register_domain(context: DomainContext) -> None:
|
||||||
|
"""Enregistrer un nouveau domaine métier."""
|
||||||
|
_DOMAINS[context.domain_id] = context
|
||||||
|
logger.info(f"Domaine '{context.domain_id}' enregistré ({context.name})")
|
||||||
|
|
||||||
|
|
||||||
|
def list_domains() -> List[Dict[str, Any]]:
|
||||||
|
"""Lister tous les domaines disponibles."""
|
||||||
|
return [ctx.to_dict() for ctx in _DOMAINS.values()]
|
||||||
346
agent_v0/server_v1/replay_learner.py
Normal file
346
agent_v0/server_v1/replay_learner.py
Normal file
@@ -0,0 +1,346 @@
|
|||||||
|
# agent_v0/server_v1/replay_learner.py
|
||||||
|
"""
|
||||||
|
Module Learning — apprentissage à partir des résultats de replay.
|
||||||
|
|
||||||
|
Responsabilité : "Chaque replay qui échoue enrichit notre base de connaissances."
|
||||||
|
|
||||||
|
Stocke les résultats structurés de chaque action (succès/échec, méthode,
|
||||||
|
screenshots, correction appliquée) pour :
|
||||||
|
1. Améliorer les décisions futures (Policy)
|
||||||
|
2. Affiner les stratégies de grounding (quel méthode marche pour quel écran)
|
||||||
|
3. Détecter les patterns récurrents d'échec
|
||||||
|
4. Alimenter le fine-tuning futur du VLM
|
||||||
|
|
||||||
|
Format inspiré du cahier des charges (docs/VISION_RPA_INTELLIGENT.md) :
|
||||||
|
{
|
||||||
|
"screenshot_before": "base64...",
|
||||||
|
"action": {"type": "click", "target": "Bouton Valider", ...},
|
||||||
|
"screenshot_after": "base64...",
|
||||||
|
"success": true,
|
||||||
|
"resolution_method": "som_text_match",
|
||||||
|
"correction": null,
|
||||||
|
"human_validated": false
|
||||||
|
}
|
||||||
|
|
||||||
|
Ref: docs/VISION_RPA_INTELLIGENT.md — Boucle d'apprentissage (section 4)
|
||||||
|
Ref: docs/PLAN_ACTEUR_V1.md — Phase 3 : apprentissage continu
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass, field, asdict
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Répertoire par défaut pour le stockage des résultats d'apprentissage
|
||||||
|
_DEFAULT_LEARNING_DIR = os.environ.get(
|
||||||
|
"RPA_LEARNING_DIR", "data/learning/replay_results"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ActionOutcome:
|
||||||
|
"""Résultat structuré d'une action de replay."""
|
||||||
|
# Identifiants
|
||||||
|
session_id: str
|
||||||
|
action_id: str
|
||||||
|
action_type: str # click, type, key_combo
|
||||||
|
timestamp: float = 0.0 # Epoch
|
||||||
|
|
||||||
|
# Contexte
|
||||||
|
target_description: str = "" # "Clic sur 'Enregistrer' dans Bloc-notes"
|
||||||
|
intention: str = "" # "Sauvegarder le fichier"
|
||||||
|
window_title: str = ""
|
||||||
|
|
||||||
|
# Résolution
|
||||||
|
resolution_method: str = "" # server_som, anchor_template, vlm_direct...
|
||||||
|
resolution_score: float = 0.0
|
||||||
|
resolution_elapsed_ms: float = 0.0
|
||||||
|
|
||||||
|
# Résultat
|
||||||
|
success: bool = False
|
||||||
|
error: str = ""
|
||||||
|
warning: str = ""
|
||||||
|
|
||||||
|
# Vérification (Critic)
|
||||||
|
pixel_verified: Optional[bool] = None
|
||||||
|
semantic_verified: Optional[bool] = None
|
||||||
|
critic_detail: str = ""
|
||||||
|
|
||||||
|
# Recovery
|
||||||
|
recovery_action: str = "" # undo, escape, close, none
|
||||||
|
recovery_success: bool = False
|
||||||
|
|
||||||
|
# Screenshots (chemins relatifs, pas base64 — trop lourd)
|
||||||
|
screenshot_before_path: str = ""
|
||||||
|
screenshot_after_path: str = ""
|
||||||
|
|
||||||
|
# Correction humaine (feedback loop)
|
||||||
|
human_validated: bool = False
|
||||||
|
human_correction: str = "" # Description de la correction
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return asdict(self)
|
||||||
|
|
||||||
|
|
||||||
|
class ReplayLearner:
|
||||||
|
"""Apprentissage à partir des résultats de replay.
|
||||||
|
|
||||||
|
Stocke chaque action dans un fichier JSONL par session.
|
||||||
|
Fournit des requêtes pour améliorer les décisions futures.
|
||||||
|
|
||||||
|
Usage côté serveur (api_stream.py) :
|
||||||
|
learner = ReplayLearner()
|
||||||
|
learner.record(outcome)
|
||||||
|
|
||||||
|
Usage côté Policy :
|
||||||
|
history = learner.query_similar(target_description, window_title)
|
||||||
|
# → "La dernière fois, template matching a échoué mais SoM a trouvé"
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, learning_dir: str = ""):
|
||||||
|
self.learning_dir = Path(learning_dir or _DEFAULT_LEARNING_DIR)
|
||||||
|
self.learning_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
# Cache mémoire des derniers résultats (pour requêtes rapides)
|
||||||
|
self._recent: List[ActionOutcome] = []
|
||||||
|
self._max_recent = 500
|
||||||
|
|
||||||
|
def record(self, outcome: ActionOutcome) -> None:
|
||||||
|
"""Enregistrer le résultat d'une action.
|
||||||
|
|
||||||
|
Écrit en append dans un fichier JSONL par session.
|
||||||
|
Garde aussi en mémoire pour les requêtes rapides.
|
||||||
|
"""
|
||||||
|
if not outcome.timestamp:
|
||||||
|
outcome.timestamp = time.time()
|
||||||
|
|
||||||
|
# Fichier JSONL par session
|
||||||
|
session_file = self.learning_dir / f"{outcome.session_id}.jsonl"
|
||||||
|
try:
|
||||||
|
with open(session_file, "a") as f:
|
||||||
|
f.write(json.dumps(outcome.to_dict(), ensure_ascii=False) + "\n")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Learning: échec écriture {session_file}: {e}")
|
||||||
|
|
||||||
|
# Cache mémoire
|
||||||
|
self._recent.append(outcome)
|
||||||
|
if len(self._recent) > self._max_recent:
|
||||||
|
self._recent = self._recent[-self._max_recent:]
|
||||||
|
|
||||||
|
# Log résumé
|
||||||
|
status = "OK" if outcome.success else "ÉCHEC"
|
||||||
|
logger.info(
|
||||||
|
f"Learning: {status} {outcome.action_type} "
|
||||||
|
f"'{outcome.target_description[:40]}' "
|
||||||
|
f"[{outcome.resolution_method}] "
|
||||||
|
f"critic={'OK' if outcome.semantic_verified else 'NON' if outcome.semantic_verified is False else '?'}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def record_from_replay_result(
|
||||||
|
self,
|
||||||
|
session_id: str,
|
||||||
|
action: Dict[str, Any],
|
||||||
|
result: Dict[str, Any],
|
||||||
|
verification: Optional[Dict] = None,
|
||||||
|
) -> None:
|
||||||
|
"""Enregistrer depuis les structures existantes du replay.
|
||||||
|
|
||||||
|
Convertit le format action/result du replay en ActionOutcome.
|
||||||
|
Appelé depuis api_stream.py après chaque action de replay.
|
||||||
|
"""
|
||||||
|
target_spec = action.get("target_spec", {})
|
||||||
|
outcome = ActionOutcome(
|
||||||
|
session_id=session_id,
|
||||||
|
action_id=action.get("action_id", ""),
|
||||||
|
action_type=action.get("type", ""),
|
||||||
|
target_description=target_spec.get("by_text", ""),
|
||||||
|
intention=action.get("intention", ""),
|
||||||
|
window_title=target_spec.get("window_title", ""),
|
||||||
|
resolution_method=result.get("resolution_method", ""),
|
||||||
|
resolution_score=result.get("resolution_score", 0.0),
|
||||||
|
resolution_elapsed_ms=result.get("resolution_elapsed_ms", 0.0),
|
||||||
|
success=result.get("success", False),
|
||||||
|
error=result.get("error", ""),
|
||||||
|
warning=result.get("warning", ""),
|
||||||
|
)
|
||||||
|
|
||||||
|
if verification:
|
||||||
|
outcome.pixel_verified = verification.get("verified")
|
||||||
|
outcome.semantic_verified = verification.get("semantic_verified")
|
||||||
|
outcome.critic_detail = verification.get("semantic_detail", "")
|
||||||
|
|
||||||
|
self.record(outcome)
|
||||||
|
|
||||||
|
def query_similar(
|
||||||
|
self,
|
||||||
|
target_description: str = "",
|
||||||
|
window_title: str = "",
|
||||||
|
limit: int = 10,
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""Chercher des résultats similaires dans l'historique.
|
||||||
|
|
||||||
|
Recherche par correspondance textuelle sur la description de cible
|
||||||
|
et le titre de fenêtre. Retourne les plus récents en premier.
|
||||||
|
|
||||||
|
Utile pour le Policy : "qu'est-ce qui a marché avant pour cette cible ?"
|
||||||
|
"""
|
||||||
|
results = []
|
||||||
|
target_lower = target_description.lower()
|
||||||
|
window_lower = window_title.lower()
|
||||||
|
|
||||||
|
for outcome in reversed(self._recent):
|
||||||
|
score = 0
|
||||||
|
if target_lower and target_lower in outcome.target_description.lower():
|
||||||
|
score += 2
|
||||||
|
if window_lower and window_lower in outcome.window_title.lower():
|
||||||
|
score += 1
|
||||||
|
if score > 0:
|
||||||
|
results.append({
|
||||||
|
"outcome": outcome.to_dict(),
|
||||||
|
"relevance": score,
|
||||||
|
})
|
||||||
|
if len(results) >= limit:
|
||||||
|
break
|
||||||
|
|
||||||
|
return sorted(results, key=lambda x: x["relevance"], reverse=True)
|
||||||
|
|
||||||
|
def best_strategy_for(
|
||||||
|
self,
|
||||||
|
target_description: str = "",
|
||||||
|
window_title: str = "",
|
||||||
|
) -> Optional[str]:
|
||||||
|
"""Quelle méthode de grounding a le mieux marché pour cette cible ?
|
||||||
|
|
||||||
|
Consulte l'historique et retourne la méthode qui a le plus haut
|
||||||
|
taux de succès pour des cibles similaires. C'est la boucle
|
||||||
|
d'apprentissage : les replays passés améliorent les suivants.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Nom de la meilleure méthode (ex: "som_text_match") ou None
|
||||||
|
"""
|
||||||
|
similar = self.query_similar(target_description, window_title, limit=20)
|
||||||
|
if not similar:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Compter les succès par méthode
|
||||||
|
method_stats: Dict[str, List[int]] = {} # method → [successes, total]
|
||||||
|
for entry in similar:
|
||||||
|
outcome = entry["outcome"]
|
||||||
|
method = outcome.get("resolution_method", "")
|
||||||
|
if not method:
|
||||||
|
continue
|
||||||
|
if method not in method_stats:
|
||||||
|
method_stats[method] = [0, 0]
|
||||||
|
method_stats[method][1] += 1
|
||||||
|
if outcome.get("success"):
|
||||||
|
method_stats[method][0] += 1
|
||||||
|
|
||||||
|
if not method_stats:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Retourner la méthode avec le meilleur taux de succès (minimum 2 occurrences)
|
||||||
|
best = None
|
||||||
|
best_rate = 0.0
|
||||||
|
for method, (successes, total) in method_stats.items():
|
||||||
|
if total >= 2: # Au moins 2 essais pour être significatif
|
||||||
|
rate = successes / total
|
||||||
|
if rate > best_rate:
|
||||||
|
best_rate = rate
|
||||||
|
best = method
|
||||||
|
|
||||||
|
if best:
|
||||||
|
logger.info(
|
||||||
|
f"Learning: meilleure stratégie pour '{target_description[:30]}' → "
|
||||||
|
f"{best} ({best_rate:.0%} sur {method_stats[best][1]} essais)"
|
||||||
|
)
|
||||||
|
|
||||||
|
return best
|
||||||
|
|
||||||
|
def consolidate_workflow(
|
||||||
|
self,
|
||||||
|
actions: list,
|
||||||
|
session_id: str = "",
|
||||||
|
) -> int:
|
||||||
|
"""Consolider un workflow avec les apprentissages passés.
|
||||||
|
|
||||||
|
Pour chaque action du workflow, vérifie si l'historique suggère
|
||||||
|
une meilleure stratégie de résolution. Si oui, l'ajoute en
|
||||||
|
hint dans le target_spec de l'action.
|
||||||
|
|
||||||
|
Modifie les actions in-place. Retourne le nombre d'actions enrichies.
|
||||||
|
|
||||||
|
C'est la cross-pollination : un replay qui a réussi "Enregistrer"
|
||||||
|
via som_text améliore tous les futurs workflows qui cliquent sur "Enregistrer".
|
||||||
|
"""
|
||||||
|
enriched = 0
|
||||||
|
for action in actions:
|
||||||
|
if action.get("type") != "click":
|
||||||
|
continue
|
||||||
|
target_spec = action.get("target_spec", {})
|
||||||
|
by_text = target_spec.get("by_text", "")
|
||||||
|
window = target_spec.get("window_title", "")
|
||||||
|
if not by_text:
|
||||||
|
continue
|
||||||
|
|
||||||
|
best = self.best_strategy_for(by_text, window)
|
||||||
|
if best:
|
||||||
|
target_spec["_learned_strategy"] = best
|
||||||
|
enriched += 1
|
||||||
|
|
||||||
|
if enriched:
|
||||||
|
logger.info(
|
||||||
|
f"Consolidation : {enriched} actions enrichies par l'apprentissage "
|
||||||
|
f"(session {session_id})"
|
||||||
|
)
|
||||||
|
return enriched
|
||||||
|
|
||||||
|
def get_stats(self) -> Dict[str, Any]:
|
||||||
|
"""Statistiques globales des résultats de replay."""
|
||||||
|
if not self._recent:
|
||||||
|
return {"total": 0}
|
||||||
|
|
||||||
|
total = len(self._recent)
|
||||||
|
successes = sum(1 for o in self._recent if o.success)
|
||||||
|
methods = {}
|
||||||
|
for o in self._recent:
|
||||||
|
m = o.resolution_method or "unknown"
|
||||||
|
if m not in methods:
|
||||||
|
methods[m] = {"total": 0, "success": 0}
|
||||||
|
methods[m]["total"] += 1
|
||||||
|
if o.success:
|
||||||
|
methods[m]["success"] += 1
|
||||||
|
|
||||||
|
return {
|
||||||
|
"total": total,
|
||||||
|
"success_rate": round(successes / total, 3) if total > 0 else 0,
|
||||||
|
"methods": {
|
||||||
|
m: {
|
||||||
|
"total": v["total"],
|
||||||
|
"success_rate": round(v["success"] / v["total"], 3) if v["total"] > 0 else 0,
|
||||||
|
}
|
||||||
|
for m, v in methods.items()
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def load_session(self, session_id: str) -> List[ActionOutcome]:
|
||||||
|
"""Charger tous les résultats d'une session depuis le fichier JSONL."""
|
||||||
|
session_file = self.learning_dir / f"{session_id}.jsonl"
|
||||||
|
if not session_file.is_file():
|
||||||
|
return []
|
||||||
|
|
||||||
|
outcomes = []
|
||||||
|
try:
|
||||||
|
with open(session_file) as f:
|
||||||
|
for line in f:
|
||||||
|
line = line.strip()
|
||||||
|
if line:
|
||||||
|
data = json.loads(line)
|
||||||
|
outcomes.append(ActionOutcome(**data))
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Learning: échec lecture {session_file}: {e}")
|
||||||
|
|
||||||
|
return outcomes
|
||||||
@@ -1,20 +1,24 @@
|
|||||||
# agent_v0/server_v1/replay_verifier.py
|
# agent_v0/server_v1/replay_verifier.py
|
||||||
"""
|
"""
|
||||||
ReplayVerifier — Vérification post-action pour le replay de workflows.
|
ReplayVerifier — Vérification post-action (Critic) pour le replay de workflows.
|
||||||
|
|
||||||
Compare les screenshots avant/après une action pour détecter si elle a eu
|
Deux niveaux de vérification :
|
||||||
un effet visible. Utilisé par l'API de replay pour décider si une action
|
1. PIXEL : Différence d'image avant/après (rapide, ~10ms)
|
||||||
a réussi ou si un retry est nécessaire.
|
- L'écran a-t-il changé ? Où ? De combien ?
|
||||||
|
2. SÉMANTIQUE : VLM évalue si le résultat correspond à l'attendu (~2-5s)
|
||||||
|
- L'action a-t-elle eu l'EFFET voulu ? (pas juste "des pixels ont bougé")
|
||||||
|
|
||||||
Stratégies de vérification :
|
Le niveau pixel existait déjà. Le niveau sémantique (Critic) est le chaînon
|
||||||
1. Différence d'image globale (avant == après → probablement rien ne s'est passé)
|
manquant identifié par comparaison avec Claude Computer Use et OpenAdapt.
|
||||||
2. Zone locale autour du clic (si l'action est un clic)
|
|
||||||
3. Détection de texte apparu (si l'action est une frappe)
|
Ref: docs/VISION_RPA_INTELLIGENT.md — étape VERIFY du pipeline.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
|
import time
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import Any, Dict, Optional, Tuple
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -35,9 +39,13 @@ class VerificationResult:
|
|||||||
suggestion: str # "retry", "skip", "abort", "continue"
|
suggestion: str # "retry", "skip", "abort", "continue"
|
||||||
detail: str = "" # Description humaine du résultat
|
detail: str = "" # Description humaine du résultat
|
||||||
local_change_pct: float = 0.0 # % de changement dans la zone locale (si applicable)
|
local_change_pct: float = 0.0 # % de changement dans la zone locale (si applicable)
|
||||||
|
# Critic sémantique (VLM)
|
||||||
|
semantic_verified: Optional[bool] = None # None = pas de vérif sémantique
|
||||||
|
semantic_detail: str = "" # Explication du VLM
|
||||||
|
semantic_elapsed_ms: float = 0.0 # Temps de la vérif sémantique
|
||||||
|
|
||||||
def to_dict(self) -> Dict[str, Any]:
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
return {
|
d = {
|
||||||
"verified": self.verified,
|
"verified": self.verified,
|
||||||
"confidence": round(self.confidence, 3),
|
"confidence": round(self.confidence, 3),
|
||||||
"changes_detected": self.changes_detected,
|
"changes_detected": self.changes_detected,
|
||||||
@@ -46,6 +54,11 @@ class VerificationResult:
|
|||||||
"detail": self.detail,
|
"detail": self.detail,
|
||||||
"local_change_pct": round(self.local_change_pct, 3),
|
"local_change_pct": round(self.local_change_pct, 3),
|
||||||
}
|
}
|
||||||
|
if self.semantic_verified is not None:
|
||||||
|
d["semantic_verified"] = self.semantic_verified
|
||||||
|
d["semantic_detail"] = self.semantic_detail
|
||||||
|
d["semantic_elapsed_ms"] = round(self.semantic_elapsed_ms, 1)
|
||||||
|
return d
|
||||||
|
|
||||||
|
|
||||||
class ReplayVerifier:
|
class ReplayVerifier:
|
||||||
@@ -345,3 +358,275 @@ class ReplayVerifier:
|
|||||||
f"(global={global_change_pct:.3f}%, local={local_change_pct:.3f}%)"
|
f"(global={global_change_pct:.3f}%, local={local_change_pct:.3f}%)"
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Critic sémantique — VLM évalue si le résultat correspond à l'attendu
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
def verify_with_critic(
|
||||||
|
self,
|
||||||
|
action: Dict[str, Any],
|
||||||
|
result: Dict[str, Any],
|
||||||
|
screenshot_before: Optional[str] = None,
|
||||||
|
screenshot_after: Optional[str] = None,
|
||||||
|
expected_result: str = "",
|
||||||
|
action_intention: str = "",
|
||||||
|
workflow_context: str = "",
|
||||||
|
) -> VerificationResult:
|
||||||
|
"""Vérification complète : pixel + sémantique (Critic).
|
||||||
|
|
||||||
|
Étape 1 : Vérification pixel (rapide, ~10ms) — l'écran a-t-il changé ?
|
||||||
|
Étape 2 : Vérification sémantique (VLM, ~2-5s) — le changement est-il le bon ?
|
||||||
|
|
||||||
|
La vérification sémantique n'est lancée que si :
|
||||||
|
- expected_result est fourni (description de l'état attendu après l'action)
|
||||||
|
- La vérification pixel a détecté un changement (sinon, pas besoin du VLM)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
action: L'action exécutée
|
||||||
|
result: Le résultat rapporté par l'agent
|
||||||
|
screenshot_before: Screenshot avant l'action (base64)
|
||||||
|
screenshot_after: Screenshot après l'action (base64)
|
||||||
|
expected_result: Description de l'état attendu après l'action
|
||||||
|
action_intention: Ce que l'action était censée faire
|
||||||
|
workflow_context: Contexte global (progression, objectif)
|
||||||
|
"""
|
||||||
|
# Étape 1 : vérification pixel (existante)
|
||||||
|
pixel_result = self.verify_action(
|
||||||
|
action=action,
|
||||||
|
result=result,
|
||||||
|
screenshot_before=screenshot_before,
|
||||||
|
screenshot_after=screenshot_after,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Pas de description attendue → retourner le résultat pixel seul
|
||||||
|
if not expected_result:
|
||||||
|
return pixel_result
|
||||||
|
|
||||||
|
# Si aucun changement pixel ET suggestion retry → pas besoin du VLM
|
||||||
|
if not pixel_result.changes_detected and pixel_result.suggestion == "retry":
|
||||||
|
return pixel_result
|
||||||
|
|
||||||
|
# Étape 2 : vérification sémantique via VLM
|
||||||
|
semantic = self._verify_semantic(
|
||||||
|
screenshot_before=screenshot_before,
|
||||||
|
screenshot_after=screenshot_after,
|
||||||
|
expected_result=expected_result,
|
||||||
|
action_intention=action_intention,
|
||||||
|
workflow_context=workflow_context,
|
||||||
|
)
|
||||||
|
|
||||||
|
if semantic is None:
|
||||||
|
# VLM indisponible → garder le résultat pixel seul
|
||||||
|
return pixel_result
|
||||||
|
|
||||||
|
# Fusionner les résultats pixel + sémantique
|
||||||
|
return self._merge_results(pixel_result, semantic)
|
||||||
|
|
||||||
|
def _verify_semantic(
|
||||||
|
self,
|
||||||
|
screenshot_before: Optional[str],
|
||||||
|
screenshot_after: Optional[str],
|
||||||
|
expected_result: str,
|
||||||
|
action_intention: str = "",
|
||||||
|
workflow_context: str = "",
|
||||||
|
) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Appeler le VLM pour évaluer sémantiquement le résultat de l'action.
|
||||||
|
|
||||||
|
Utilise gemma4 en mode texte+images (Docker port 11435) pour analyser
|
||||||
|
les screenshots avant/après et dire si le résultat attendu est atteint.
|
||||||
|
|
||||||
|
Sur Citrix (image plate), c'est la SEULE façon de vérifier intelligemment
|
||||||
|
si une action a eu l'effet voulu.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict avec {"verified": bool, "detail": str, "elapsed_ms": float}
|
||||||
|
ou None si le VLM est indisponible.
|
||||||
|
"""
|
||||||
|
import requests as _requests
|
||||||
|
|
||||||
|
if not screenshot_after:
|
||||||
|
return None
|
||||||
|
|
||||||
|
gemma4_port = os.environ.get("GEMMA4_PORT", "11435")
|
||||||
|
gemma4_url = f"http://localhost:{gemma4_port}/api/chat"
|
||||||
|
|
||||||
|
# Construire le prompt Critic
|
||||||
|
context_parts = []
|
||||||
|
if action_intention:
|
||||||
|
context_parts.append(f"Action effectuée : {action_intention}")
|
||||||
|
if workflow_context:
|
||||||
|
context_parts.append(f"Contexte : {workflow_context}")
|
||||||
|
context_str = "\n".join(context_parts)
|
||||||
|
|
||||||
|
# Deux images : avant et après
|
||||||
|
images = []
|
||||||
|
prompt_images = ""
|
||||||
|
if screenshot_before and screenshot_after:
|
||||||
|
images = [screenshot_before, screenshot_after]
|
||||||
|
prompt_images = (
|
||||||
|
"Image 1 = écran AVANT l'action.\n"
|
||||||
|
"Image 2 = écran APRÈS l'action.\n"
|
||||||
|
)
|
||||||
|
elif screenshot_after:
|
||||||
|
images = [screenshot_after]
|
||||||
|
prompt_images = "Image = écran APRÈS l'action.\n"
|
||||||
|
|
||||||
|
prompt = (
|
||||||
|
f"Tu es le VÉRIFICATEUR d'un robot RPA. Tu dois dire si l'action a réussi.\n\n"
|
||||||
|
f"{prompt_images}"
|
||||||
|
f"{context_str}\n\n"
|
||||||
|
f"Résultat attendu : {expected_result}\n\n"
|
||||||
|
f"Est-ce que le résultat attendu est visible à l'écran ?\n"
|
||||||
|
f"Réponds EXACTEMENT dans ce format :\n"
|
||||||
|
f"VERDICT: OUI ou NON\n"
|
||||||
|
f"RAISON: explication courte (1 ligne)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Injecter le contexte métier si disponible
|
||||||
|
from .domain_context import get_domain_context
|
||||||
|
domain = get_domain_context(os.environ.get("RPA_DOMAIN", "generic"))
|
||||||
|
messages = []
|
||||||
|
if domain.system_prompt:
|
||||||
|
messages.append({"role": "system", "content": domain.system_prompt})
|
||||||
|
messages.append({"role": "user", "content": prompt, "images": images})
|
||||||
|
|
||||||
|
try:
|
||||||
|
t_start = time.time()
|
||||||
|
resp = _requests.post(
|
||||||
|
gemma4_url,
|
||||||
|
json={
|
||||||
|
"model": "gemma4:e4b",
|
||||||
|
"messages": messages,
|
||||||
|
"stream": False,
|
||||||
|
"think": True,
|
||||||
|
"options": {"temperature": 0.1, "num_predict": 800},
|
||||||
|
},
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
elapsed_ms = (time.time() - t_start) * 1000
|
||||||
|
|
||||||
|
if not resp.ok:
|
||||||
|
logger.warning(f"Critic VLM HTTP {resp.status_code}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
content = resp.json().get("message", {}).get("content", "").strip()
|
||||||
|
|
||||||
|
# Parser le verdict
|
||||||
|
verified = None
|
||||||
|
detail = content
|
||||||
|
for line in content.split("\n"):
|
||||||
|
line_upper = line.strip().upper()
|
||||||
|
if line_upper.startswith("VERDICT:"):
|
||||||
|
verdict_text = line_upper.replace("VERDICT:", "").strip()
|
||||||
|
if "OUI" in verdict_text or "YES" in verdict_text:
|
||||||
|
verified = True
|
||||||
|
elif "NON" in verdict_text or "NO" in verdict_text:
|
||||||
|
verified = False
|
||||||
|
elif line_upper.startswith("RAISON:"):
|
||||||
|
detail = line.strip().replace("RAISON:", "").strip()
|
||||||
|
|
||||||
|
if verified is None:
|
||||||
|
# Fallback : chercher OUI/NON dans le texte brut
|
||||||
|
upper = content.upper()
|
||||||
|
if "OUI" in upper and "NON" not in upper:
|
||||||
|
verified = True
|
||||||
|
elif "NON" in upper:
|
||||||
|
verified = False
|
||||||
|
else:
|
||||||
|
logger.warning(f"Critic VLM réponse non parsable : {content[:100]}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Critic VLM : {'OUI' if verified else 'NON'} en {elapsed_ms:.0f}ms — {detail[:80]}"
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"verified": verified,
|
||||||
|
"detail": detail,
|
||||||
|
"elapsed_ms": elapsed_ms,
|
||||||
|
}
|
||||||
|
|
||||||
|
except _requests.Timeout:
|
||||||
|
logger.warning("Critic VLM timeout (30s)")
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Critic VLM erreur : {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _merge_results(
|
||||||
|
self,
|
||||||
|
pixel: VerificationResult,
|
||||||
|
semantic: Dict[str, Any],
|
||||||
|
) -> VerificationResult:
|
||||||
|
"""Fusionner les résultats pixel et sémantique.
|
||||||
|
|
||||||
|
Matrice de décision :
|
||||||
|
- Pixel OK + Semantic OK → vérifié (confiance haute)
|
||||||
|
- Pixel OK + Semantic NON → INATTENDU (l'écran a changé mais pas comme prévu)
|
||||||
|
- Pixel NON + Semantic OK → vérifié quand même (le VLM voit le résultat)
|
||||||
|
- Pixel NON + Semantic NON → échec (retry)
|
||||||
|
"""
|
||||||
|
sem_ok = semantic["verified"]
|
||||||
|
pix_ok = pixel.changes_detected
|
||||||
|
|
||||||
|
if pix_ok and sem_ok:
|
||||||
|
# Tout concorde — confiance maximale
|
||||||
|
return VerificationResult(
|
||||||
|
verified=True,
|
||||||
|
confidence=min(0.95, pixel.confidence + 0.2),
|
||||||
|
changes_detected=True,
|
||||||
|
change_area_pct=pixel.change_area_pct,
|
||||||
|
local_change_pct=pixel.local_change_pct,
|
||||||
|
suggestion="continue",
|
||||||
|
detail=f"Pixel OK + Critic OK : {semantic['detail']}",
|
||||||
|
semantic_verified=True,
|
||||||
|
semantic_detail=semantic["detail"],
|
||||||
|
semantic_elapsed_ms=semantic["elapsed_ms"],
|
||||||
|
)
|
||||||
|
|
||||||
|
elif pix_ok and not sem_ok:
|
||||||
|
# L'écran a changé mais pas dans le bon sens → INATTENDU
|
||||||
|
# C'est le cas le plus important : popup, erreur, mauvaise fenêtre
|
||||||
|
return VerificationResult(
|
||||||
|
verified=False,
|
||||||
|
confidence=0.7,
|
||||||
|
changes_detected=True,
|
||||||
|
change_area_pct=pixel.change_area_pct,
|
||||||
|
local_change_pct=pixel.local_change_pct,
|
||||||
|
suggestion="retry",
|
||||||
|
detail=f"Pixel OK mais Critic NON : {semantic['detail']}",
|
||||||
|
semantic_verified=False,
|
||||||
|
semantic_detail=semantic["detail"],
|
||||||
|
semantic_elapsed_ms=semantic["elapsed_ms"],
|
||||||
|
)
|
||||||
|
|
||||||
|
elif not pix_ok and sem_ok:
|
||||||
|
# Peu de pixels ont changé mais le VLM dit que le résultat est bon
|
||||||
|
# Ex: focus sur un onglet déjà visible (changement subtil)
|
||||||
|
return VerificationResult(
|
||||||
|
verified=True,
|
||||||
|
confidence=0.6,
|
||||||
|
changes_detected=False,
|
||||||
|
change_area_pct=pixel.change_area_pct,
|
||||||
|
local_change_pct=pixel.local_change_pct,
|
||||||
|
suggestion="continue",
|
||||||
|
detail=f"Pixel inchangé mais Critic OK : {semantic['detail']}",
|
||||||
|
semantic_verified=True,
|
||||||
|
semantic_detail=semantic["detail"],
|
||||||
|
semantic_elapsed_ms=semantic["elapsed_ms"],
|
||||||
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Rien n'a changé et le VLM confirme → échec
|
||||||
|
return VerificationResult(
|
||||||
|
verified=False,
|
||||||
|
confidence=0.8,
|
||||||
|
changes_detected=False,
|
||||||
|
change_area_pct=pixel.change_area_pct,
|
||||||
|
local_change_pct=pixel.local_change_pct,
|
||||||
|
suggestion="retry",
|
||||||
|
detail=f"Pixel inchangé + Critic NON : {semantic['detail']}",
|
||||||
|
semantic_verified=False,
|
||||||
|
semantic_detail=semantic["detail"],
|
||||||
|
semantic_elapsed_ms=semantic["elapsed_ms"],
|
||||||
|
)
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
596
agent_v0/server_v1/task_planner.py
Normal file
596
agent_v0/server_v1/task_planner.py
Normal file
@@ -0,0 +1,596 @@
|
|||||||
|
# agent_v0/server_v1/task_planner.py
|
||||||
|
"""
|
||||||
|
TaskPlanner — Planificateur MACRO pour RPA Vision V3.
|
||||||
|
|
||||||
|
Responsabilité : comprendre un ordre en langage naturel et l'exécuter.
|
||||||
|
|
||||||
|
"Traite les dossiers de janvier" →
|
||||||
|
1. Comprendre l'instruction (gemma4)
|
||||||
|
2. Trouver le workflow appris correspondant
|
||||||
|
3. Identifier les paramètres/variables
|
||||||
|
4. Exécuter (replay avec substitution) ou planifier (actions libres)
|
||||||
|
|
||||||
|
C'est le niveau MACRO de l'architecture 3 niveaux :
|
||||||
|
MACRO (TaskPlanner) → décompose et orchestre
|
||||||
|
MÉSO (Policy/Observer/Critic) → décide et vérifie
|
||||||
|
MICRO (Grounding/Executor) → localise et clique
|
||||||
|
|
||||||
|
Ref: docs/PLAN_ACTEUR_V1.md — Phase 3 : Planificateur
|
||||||
|
Ref: docs/VISION_RPA_INTELLIGENT.md — "Il observe" → "Il devient autonome"
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TaskPlan:
|
||||||
|
"""Plan d'exécution généré par le planificateur."""
|
||||||
|
instruction: str # Instruction originale de l'utilisateur
|
||||||
|
understood: bool = False # L'instruction a été comprise
|
||||||
|
workflow_match: str = "" # ID du workflow correspondant (si trouvé)
|
||||||
|
workflow_name: str = "" # Nom du workflow correspondant
|
||||||
|
match_confidence: float = 0.0 # Confiance du match (0-1)
|
||||||
|
parameters: Dict[str, Any] = field(default_factory=dict) # Variables extraites
|
||||||
|
is_loop: bool = False # Boucle sur une liste d'éléments
|
||||||
|
loop_source: str = "" # Source des éléments (écran, fichier, requête)
|
||||||
|
steps: List[Dict[str, Any]] = field(default_factory=list) # Actions planifiées
|
||||||
|
mode: str = "" # "replay" (workflow connu) ou "free" (actions générées)
|
||||||
|
error: str = ""
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"instruction": self.instruction,
|
||||||
|
"understood": self.understood,
|
||||||
|
"workflow_match": self.workflow_match,
|
||||||
|
"workflow_name": self.workflow_name,
|
||||||
|
"match_confidence": round(self.match_confidence, 3),
|
||||||
|
"parameters": self.parameters,
|
||||||
|
"is_loop": self.is_loop,
|
||||||
|
"loop_source": self.loop_source,
|
||||||
|
"steps_count": len(self.steps),
|
||||||
|
"mode": self.mode,
|
||||||
|
"error": self.error,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TaskResult:
|
||||||
|
"""Résultat de l'exécution d'une tâche."""
|
||||||
|
instruction: str
|
||||||
|
success: bool
|
||||||
|
total_items: int = 1 # Nombre d'éléments traités (1 si pas de boucle)
|
||||||
|
completed_items: int = 0
|
||||||
|
failed_items: int = 0
|
||||||
|
results: List[Dict[str, Any]] = field(default_factory=list)
|
||||||
|
elapsed_s: float = 0.0
|
||||||
|
summary: str = ""
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"instruction": self.instruction,
|
||||||
|
"success": self.success,
|
||||||
|
"total_items": self.total_items,
|
||||||
|
"completed_items": self.completed_items,
|
||||||
|
"failed_items": self.failed_items,
|
||||||
|
"elapsed_s": round(self.elapsed_s, 1),
|
||||||
|
"summary": self.summary,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TaskPlanner:
|
||||||
|
"""Planificateur MACRO — comprend les instructions et orchestre l'exécution.
|
||||||
|
|
||||||
|
Usage :
|
||||||
|
planner = TaskPlanner()
|
||||||
|
plan = planner.understand("traite les dossiers de janvier")
|
||||||
|
result = planner.execute(plan, replay_callback=launch_replay)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, gemma4_port: str = "", domain_id: str = ""):
|
||||||
|
self._gemma4_port = gemma4_port or os.environ.get("GEMMA4_PORT", "11435")
|
||||||
|
self._gemma4_url = f"http://localhost:{self._gemma4_port}/api/chat"
|
||||||
|
self._domain_id = domain_id or os.environ.get("RPA_DOMAIN", "generic")
|
||||||
|
|
||||||
|
# Charger le contexte métier
|
||||||
|
try:
|
||||||
|
from .domain_context import get_domain_context
|
||||||
|
self._domain = get_domain_context(self._domain_id)
|
||||||
|
except Exception:
|
||||||
|
self._domain = None
|
||||||
|
|
||||||
|
def understand(
|
||||||
|
self,
|
||||||
|
instruction: str,
|
||||||
|
available_workflows: Optional[List[Dict[str, Any]]] = None,
|
||||||
|
screen_context: str = "",
|
||||||
|
) -> TaskPlan:
|
||||||
|
"""Comprendre une instruction en langage naturel.
|
||||||
|
|
||||||
|
Étape 1 : gemma4 analyse l'instruction et identifie :
|
||||||
|
- Le type de tâche (ouvrir, traiter, rechercher, etc.)
|
||||||
|
- Le workflow correspondant (s'il en existe un)
|
||||||
|
- Les paramètres/variables (nom, date, fichier, etc.)
|
||||||
|
- Si c'est une boucle (traiter TOUS les dossiers)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
instruction: L'ordre de l'utilisateur ("traite les dossiers de janvier")
|
||||||
|
available_workflows: Liste des workflows connus [{name, description, session_id}]
|
||||||
|
screen_context: Description de l'écran actuel (pour le contexte)
|
||||||
|
"""
|
||||||
|
import requests as _requests
|
||||||
|
|
||||||
|
plan = TaskPlan(instruction=instruction)
|
||||||
|
|
||||||
|
# Construire la liste des workflows disponibles pour le prompt (top 10)
|
||||||
|
workflows_desc = "Aucun workflow enregistré."
|
||||||
|
if available_workflows:
|
||||||
|
top_workflows = available_workflows[:10]
|
||||||
|
lines = []
|
||||||
|
for i, wf in enumerate(top_workflows):
|
||||||
|
name = wf.get("name", wf.get("session_id", f"workflow_{i}"))
|
||||||
|
desc = wf.get("description", "")
|
||||||
|
sid = wf.get("session_id", "")
|
||||||
|
# Montrer la description métier pour aider le matching sémantique
|
||||||
|
label = f"{name}"
|
||||||
|
if desc:
|
||||||
|
label += f" — {desc}"
|
||||||
|
lines.append(f" {i+1}. {label} (id={sid})")
|
||||||
|
workflows_desc = "\n".join(lines)
|
||||||
|
|
||||||
|
# Contexte métier
|
||||||
|
domain_prompt = ""
|
||||||
|
if self._domain and self._domain.system_prompt:
|
||||||
|
domain_prompt = f"\nCONTEXTE MÉTIER :\n{self._domain.system_prompt}\n"
|
||||||
|
|
||||||
|
prompt = (
|
||||||
|
f"Tu es le PLANIFICATEUR d'un robot RPA (Léa). "
|
||||||
|
f"Analyse l'ordre utilisateur et identifie le workflow correspondant.\n"
|
||||||
|
f"{domain_prompt}\n"
|
||||||
|
f"WORKFLOWS DISPONIBLES :\n{workflows_desc}\n\n"
|
||||||
|
f"ORDRE : \"{instruction}\"\n\n"
|
||||||
|
f"RÈGLE DE MATCHING :\n"
|
||||||
|
f"- Compare l'INTENTION de l'ordre avec la DESCRIPTION de chaque workflow\n"
|
||||||
|
f"- \"Ouvre le bloc-notes\" correspond à un workflow décrit \"Ouvrir Bloc-notes via recherche\"\n"
|
||||||
|
f"- Un workflow qui utilise la même application EST un match même si les mots diffèrent\n"
|
||||||
|
f"- Si aucun workflow ne correspond, réponds WORKFLOW: AUCUN\n\n"
|
||||||
|
f"Réponds EXACTEMENT dans ce format (une ligne par champ) :\n"
|
||||||
|
f"COMPRIS: OUI\n"
|
||||||
|
f"WORKFLOW: <numéro> (ou AUCUN)\n"
|
||||||
|
f"CONFIANCE: <0.0 à 1.0>\n"
|
||||||
|
f"PARAMETRES: clé1=valeur1, clé2=valeur2 (ou AUCUN)\n"
|
||||||
|
f"BOUCLE: OUI ou NON\n"
|
||||||
|
f"SOURCE_BOUCLE: écran, fichier, ou aucun\n"
|
||||||
|
f"PLAN:\n"
|
||||||
|
f"1. première étape\n"
|
||||||
|
f"2. deuxième étape\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = _requests.post(
|
||||||
|
self._gemma4_url,
|
||||||
|
json={
|
||||||
|
"model": "gemma4:e4b",
|
||||||
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
|
"stream": False,
|
||||||
|
"think": True,
|
||||||
|
"options": {"temperature": 0.2, "num_predict": 800},
|
||||||
|
},
|
||||||
|
timeout=120,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not resp.ok:
|
||||||
|
plan.error = f"gemma4 HTTP {resp.status_code}"
|
||||||
|
return plan
|
||||||
|
|
||||||
|
content = resp.json().get("message", {}).get("content", "").strip()
|
||||||
|
logger.info(f"TaskPlanner: réponse gemma4 ({len(content)} chars)")
|
||||||
|
|
||||||
|
# Parser la réponse
|
||||||
|
plan = self._parse_understanding(plan, content, available_workflows)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
plan.error = f"gemma4 erreur: {e}"
|
||||||
|
logger.warning(f"TaskPlanner: {plan.error}")
|
||||||
|
|
||||||
|
return plan
|
||||||
|
|
||||||
|
def _parse_understanding(
|
||||||
|
self,
|
||||||
|
plan: TaskPlan,
|
||||||
|
content: str,
|
||||||
|
available_workflows: Optional[List[Dict]] = None,
|
||||||
|
) -> TaskPlan:
|
||||||
|
"""Parser la réponse de gemma4 pour construire le plan.
|
||||||
|
|
||||||
|
Tolérant aux variations de format :
|
||||||
|
- "COMPRIS : OUI" ou "COMPRIS: oui" ou "**COMPRIS:** OUI"
|
||||||
|
- Numéros de workflow : "1", "1.", "#1", "Workflow 1"
|
||||||
|
- Paramètres : "clé=valeur" ou "clé: valeur" sur la même ligne ou les suivantes
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Nettoyer le markdown (gras, italique)
|
||||||
|
content_clean = re.sub(r'\*{1,2}([^*]+)\*{1,2}', r'\1', content)
|
||||||
|
|
||||||
|
in_params_section = False
|
||||||
|
in_plan_section = False
|
||||||
|
|
||||||
|
for line in content_clean.split("\n"):
|
||||||
|
line_clean = line.strip()
|
||||||
|
if not line_clean:
|
||||||
|
continue
|
||||||
|
upper = line_clean.upper()
|
||||||
|
|
||||||
|
# --- COMPRIS ---
|
||||||
|
if re.match(r'^COMPRIS\s*[:=]', upper):
|
||||||
|
val = re.split(r'[:=]', upper, 1)[1].strip()
|
||||||
|
plan.understood = "OUI" in val or "YES" in val or "TRUE" in val
|
||||||
|
in_params_section = False
|
||||||
|
in_plan_section = False
|
||||||
|
|
||||||
|
# --- WORKFLOW ---
|
||||||
|
elif re.match(r'^WORKFLOW\s*[:=]', upper):
|
||||||
|
val = line_clean.split(":", 1)[1].strip() if ":" in line_clean else line_clean.split("=", 1)[1].strip()
|
||||||
|
val_upper = val.upper().strip()
|
||||||
|
in_params_section = False
|
||||||
|
in_plan_section = False
|
||||||
|
if val_upper in ("AUCUN", "NONE", "NON", "N/A", "-", ""):
|
||||||
|
continue
|
||||||
|
# Extraire le numéro : "1", "1.", "#1", "Workflow 1", "1 (Bloc-notes)"
|
||||||
|
num_match = re.search(r'(\d+)', val)
|
||||||
|
if num_match and available_workflows:
|
||||||
|
idx = int(num_match.group(1)) - 1
|
||||||
|
if 0 <= idx < len(available_workflows):
|
||||||
|
wf = available_workflows[idx]
|
||||||
|
plan.workflow_match = wf.get("session_id", "")
|
||||||
|
plan.workflow_name = wf.get("name", "")
|
||||||
|
plan.match_confidence = 0.8
|
||||||
|
plan.mode = "replay"
|
||||||
|
|
||||||
|
# --- CONFIANCE ---
|
||||||
|
elif re.match(r'^CONFIANCE\s*[:=]', upper):
|
||||||
|
val = re.split(r'[:=]', line_clean, 1)[1].strip()
|
||||||
|
in_params_section = False
|
||||||
|
in_plan_section = False
|
||||||
|
# Extraire un float : "0.9", "0,9", "90%"
|
||||||
|
float_match = re.search(r'(\d+[.,]\d+)', val)
|
||||||
|
if float_match:
|
||||||
|
try:
|
||||||
|
plan.match_confidence = float(float_match.group(1).replace(",", "."))
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
elif "%" in val:
|
||||||
|
pct_match = re.search(r'(\d+)', val)
|
||||||
|
if pct_match:
|
||||||
|
plan.match_confidence = int(pct_match.group(1)) / 100.0
|
||||||
|
|
||||||
|
# --- PARAMETRES ---
|
||||||
|
elif re.match(r'^PARAM[EÈ]TRES?\s*[:=]', upper):
|
||||||
|
val = re.split(r'[:=]', line_clean, 1)[1].strip()
|
||||||
|
in_plan_section = False
|
||||||
|
val_upper = val.upper().strip()
|
||||||
|
if val_upper in ("AUCUN", "NONE", "NON", "N/A", "-"):
|
||||||
|
in_params_section = False
|
||||||
|
continue
|
||||||
|
# Vide = paramètres sur les lignes suivantes
|
||||||
|
in_params_section = True
|
||||||
|
if val and val_upper not in ("", ):
|
||||||
|
# Paramètres sur la même ligne : "clé1=val1, clé2=val2"
|
||||||
|
self._extract_params_from_line(val, plan)
|
||||||
|
|
||||||
|
# --- BOUCLE ---
|
||||||
|
elif re.match(r'^BOUCLE\s*[:=]', upper):
|
||||||
|
val = re.split(r'[:=]', upper, 1)[1].strip()
|
||||||
|
plan.is_loop = "OUI" in val or "YES" in val or "TRUE" in val
|
||||||
|
in_params_section = False
|
||||||
|
in_plan_section = False
|
||||||
|
|
||||||
|
# --- SOURCE_BOUCLE ---
|
||||||
|
elif re.match(r'^SOURCE[_ ]BOUCLE\s*[:=]', upper):
|
||||||
|
plan.loop_source = re.split(r'[:=]', line_clean, 1)[1].strip()
|
||||||
|
in_params_section = False
|
||||||
|
in_plan_section = False
|
||||||
|
|
||||||
|
# --- PLAN ---
|
||||||
|
elif re.match(r'^PLAN\s*[:=]?\s*$', upper) or upper == "PLAN:":
|
||||||
|
in_plan_section = True
|
||||||
|
in_params_section = False
|
||||||
|
|
||||||
|
# --- Lignes de contenu (paramètres d'abord, puis étapes) ---
|
||||||
|
elif in_params_section and ("=" in line_clean or ": " in line_clean):
|
||||||
|
self._extract_params_from_line(line_clean, plan)
|
||||||
|
|
||||||
|
elif in_plan_section and re.match(r'^(\d+[.)]\s+|- )', line_clean):
|
||||||
|
plan.steps.append({"description": line_clean})
|
||||||
|
|
||||||
|
elif re.match(r'^(\d+[.)]\s+|- )', line_clean) and not in_params_section:
|
||||||
|
# Étape numérotée en dehors d'une section explicite
|
||||||
|
plan.steps.append({"description": line_clean})
|
||||||
|
|
||||||
|
# Si pas de workflow trouvé mais compris → mode libre
|
||||||
|
if plan.understood and not plan.workflow_match:
|
||||||
|
plan.mode = "free"
|
||||||
|
|
||||||
|
return plan
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_params_from_line(text: str, plan: TaskPlan) -> None:
|
||||||
|
"""Extraire des paramètres clé=valeur ou clé: valeur d'une ligne."""
|
||||||
|
import re
|
||||||
|
text = text.strip().strip("- ")
|
||||||
|
# Ignorer les labels de section
|
||||||
|
if re.match(r'^(COMPRIS|WORKFLOW|BOUCLE|SOURCE|PLAN|CONFIANCE)', text.upper()):
|
||||||
|
return
|
||||||
|
# Essayer clé=valeur d'abord
|
||||||
|
if "=" in text:
|
||||||
|
for part in text.split(","):
|
||||||
|
part = part.strip()
|
||||||
|
if "=" in part:
|
||||||
|
k, v = part.split("=", 1)
|
||||||
|
k, v = k.strip().strip("- "), v.strip()
|
||||||
|
if k and v and v.upper() not in ("AUCUN", "NONE"):
|
||||||
|
plan.parameters[k] = v
|
||||||
|
# Sinon clé: valeur (mais pas les labels de section)
|
||||||
|
elif ": " in text:
|
||||||
|
k, v = text.split(": ", 1)
|
||||||
|
k, v = k.strip().strip("- "), v.strip()
|
||||||
|
if k and v and len(k) < 30 and v.upper() not in ("AUCUN", "NONE"):
|
||||||
|
plan.parameters[k] = v
|
||||||
|
|
||||||
|
def execute(
|
||||||
|
self,
|
||||||
|
plan: TaskPlan,
|
||||||
|
replay_callback=None,
|
||||||
|
machine_id: str = "default",
|
||||||
|
) -> TaskResult:
|
||||||
|
"""Exécuter un plan.
|
||||||
|
|
||||||
|
Deux modes :
|
||||||
|
1. "replay" : relancer un workflow enregistré avec substitution de variables
|
||||||
|
2. "free" : exécuter les actions planifiées par gemma4
|
||||||
|
|
||||||
|
Args:
|
||||||
|
plan: Le plan généré par understand()
|
||||||
|
replay_callback: Fonction qui lance un replay
|
||||||
|
signature: (session_id, machine_id, params) → replay_id
|
||||||
|
machine_id: Machine cible pour l'exécution
|
||||||
|
"""
|
||||||
|
t_start = time.time()
|
||||||
|
result = TaskResult(instruction=plan.instruction, success=False)
|
||||||
|
|
||||||
|
if not plan.understood:
|
||||||
|
result.summary = f"Instruction non comprise : {plan.error or 'réponse gemma4 invalide'}"
|
||||||
|
return result
|
||||||
|
|
||||||
|
if plan.mode == "replay" and plan.workflow_match:
|
||||||
|
# Mode replay : relancer un workflow connu
|
||||||
|
result = self._execute_replay(plan, replay_callback, machine_id)
|
||||||
|
|
||||||
|
elif plan.mode == "free" and plan.steps:
|
||||||
|
# Mode libre : actions planifiées par gemma4
|
||||||
|
result = self._execute_free(plan, replay_callback, machine_id)
|
||||||
|
|
||||||
|
else:
|
||||||
|
result.summary = "Pas de workflow correspondant et pas d'actions planifiées"
|
||||||
|
|
||||||
|
result.elapsed_s = time.time() - t_start
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _execute_replay(
|
||||||
|
self,
|
||||||
|
plan: TaskPlan,
|
||||||
|
replay_callback,
|
||||||
|
machine_id: str,
|
||||||
|
) -> TaskResult:
|
||||||
|
"""Exécuter en mode replay (workflow connu)."""
|
||||||
|
result = TaskResult(instruction=plan.instruction, success=False)
|
||||||
|
|
||||||
|
if not replay_callback:
|
||||||
|
result.summary = "Pas de callback replay configuré"
|
||||||
|
return result
|
||||||
|
|
||||||
|
if plan.is_loop:
|
||||||
|
# Boucle : TODO — lister les éléments puis itérer
|
||||||
|
# Pour l'instant, exécution simple
|
||||||
|
logger.info(
|
||||||
|
f"TaskPlanner: boucle détectée mais pas encore implémentée, "
|
||||||
|
f"exécution simple du workflow {plan.workflow_name}"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
replay_id = replay_callback(
|
||||||
|
session_id=plan.workflow_match,
|
||||||
|
machine_id=machine_id,
|
||||||
|
params=plan.parameters,
|
||||||
|
)
|
||||||
|
result.success = True
|
||||||
|
result.completed_items = 1
|
||||||
|
result.total_items = 1
|
||||||
|
result.summary = (
|
||||||
|
f"Workflow '{plan.workflow_name}' lancé (replay={replay_id})"
|
||||||
|
f" avec paramètres {plan.parameters}" if plan.parameters else ""
|
||||||
|
)
|
||||||
|
result.results.append({
|
||||||
|
"replay_id": replay_id,
|
||||||
|
"workflow": plan.workflow_name,
|
||||||
|
"params": plan.parameters,
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
result.summary = f"Erreur lancement replay : {e}"
|
||||||
|
logger.error(f"TaskPlanner: {result.summary}")
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _execute_free(
|
||||||
|
self,
|
||||||
|
plan: TaskPlan,
|
||||||
|
replay_callback,
|
||||||
|
machine_id: str,
|
||||||
|
) -> TaskResult:
|
||||||
|
"""Exécuter en mode libre (actions planifiées par gemma4)."""
|
||||||
|
result = TaskResult(instruction=plan.instruction, success=False)
|
||||||
|
|
||||||
|
# Convertir les étapes en actions replay
|
||||||
|
actions = self._steps_to_actions(plan.steps, plan.parameters)
|
||||||
|
|
||||||
|
if not actions:
|
||||||
|
result.summary = "Impossible de convertir le plan en actions exécutables"
|
||||||
|
return result
|
||||||
|
|
||||||
|
if replay_callback:
|
||||||
|
try:
|
||||||
|
replay_id = replay_callback(
|
||||||
|
actions=actions,
|
||||||
|
machine_id=machine_id,
|
||||||
|
task_description=plan.instruction,
|
||||||
|
)
|
||||||
|
result.success = True
|
||||||
|
result.completed_items = 1
|
||||||
|
result.summary = f"Plan libre exécuté ({len(actions)} actions, replay={replay_id})"
|
||||||
|
except Exception as e:
|
||||||
|
result.summary = f"Erreur exécution plan libre : {e}"
|
||||||
|
else:
|
||||||
|
result.summary = f"Plan prêt ({len(actions)} actions) mais pas de callback"
|
||||||
|
result.results = actions
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _steps_to_actions(
|
||||||
|
self,
|
||||||
|
steps: List[Dict[str, Any]],
|
||||||
|
parameters: Dict[str, Any],
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""Convertir les étapes textuelles en actions replay.
|
||||||
|
|
||||||
|
Utilise gemma4 pour traduire chaque étape en action structurée.
|
||||||
|
Les types d'actions supportés : click, type, key_combo, wait.
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
import requests as _requests
|
||||||
|
|
||||||
|
steps_text = "\n".join(
|
||||||
|
s.get("description", str(s)) for s in steps
|
||||||
|
)
|
||||||
|
|
||||||
|
prompt = (
|
||||||
|
"Convertis ces étapes RPA en actions JSON.\n\n"
|
||||||
|
f"ÉTAPES :\n{steps_text}\n\n"
|
||||||
|
f"PARAMÈTRES : {json.dumps(parameters, ensure_ascii=False)}\n\n"
|
||||||
|
"TYPES D'ACTIONS DISPONIBLES :\n"
|
||||||
|
'- Cliquer : {"type": "click", "target_spec": {"by_text": "texte du bouton"}}\n'
|
||||||
|
'- Taper du texte : {"type": "type", "text": "texte à taper"}\n'
|
||||||
|
'- Raccourci clavier : {"type": "key_combo", "keys": ["ctrl", "s"]}\n'
|
||||||
|
'- Attendre : {"type": "wait", "duration_ms": 2000}\n\n'
|
||||||
|
"RÈGLES :\n"
|
||||||
|
"- UNE action JSON par ligne\n"
|
||||||
|
"- Pas de commentaires, pas de texte autour, JUSTE le JSON\n"
|
||||||
|
"- Utilise les paramètres fournis dans les valeurs\n\n"
|
||||||
|
"ACTIONS :\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = _requests.post(
|
||||||
|
self._gemma4_url,
|
||||||
|
json={
|
||||||
|
"model": "gemma4:e4b",
|
||||||
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
|
"stream": False,
|
||||||
|
"think": True,
|
||||||
|
"options": {"temperature": 0.1, "num_predict": 1500},
|
||||||
|
},
|
||||||
|
timeout=120,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not resp.ok:
|
||||||
|
return []
|
||||||
|
|
||||||
|
content = resp.json().get("message", {}).get("content", "")
|
||||||
|
return self._parse_actions_json(content)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"TaskPlanner: conversion étapes échouée : {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _parse_actions_json(content: str) -> List[Dict[str, Any]]:
|
||||||
|
"""Parser des actions JSON depuis une réponse VLM.
|
||||||
|
|
||||||
|
Tolère :
|
||||||
|
- Un JSON par ligne
|
||||||
|
- Un tableau JSON [...]
|
||||||
|
- Du texte autour des JSON (markdown, commentaires)
|
||||||
|
- Des objets imbriqués (target_spec)
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
|
||||||
|
actions = []
|
||||||
|
valid_types = {"click", "type", "key_combo", "wait"}
|
||||||
|
|
||||||
|
# Stratégie 1 : essayer de parser comme un tableau JSON
|
||||||
|
array_match = re.search(r'\[[\s\S]*\]', content)
|
||||||
|
if array_match:
|
||||||
|
try:
|
||||||
|
parsed = json.loads(array_match.group())
|
||||||
|
if isinstance(parsed, list):
|
||||||
|
for item in parsed:
|
||||||
|
if isinstance(item, dict) and item.get("type") in valid_types:
|
||||||
|
if item["type"] == "click":
|
||||||
|
item["visual_mode"] = True
|
||||||
|
actions.append(item)
|
||||||
|
if actions:
|
||||||
|
return actions
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Stratégie 2 : extraire les objets JSON individuels (supporte imbrication)
|
||||||
|
# Trouver chaque { ... } en gérant les accolades imbriquées
|
||||||
|
i = 0
|
||||||
|
while i < len(content):
|
||||||
|
if content[i] == '{':
|
||||||
|
depth = 0
|
||||||
|
start = i
|
||||||
|
while i < len(content):
|
||||||
|
if content[i] == '{':
|
||||||
|
depth += 1
|
||||||
|
elif content[i] == '}':
|
||||||
|
depth -= 1
|
||||||
|
if depth == 0:
|
||||||
|
candidate = content[start:i+1]
|
||||||
|
try:
|
||||||
|
action = json.loads(candidate)
|
||||||
|
if isinstance(action, dict) and action.get("type") in valid_types:
|
||||||
|
if action["type"] == "click":
|
||||||
|
action["visual_mode"] = True
|
||||||
|
actions.append(action)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
break
|
||||||
|
i += 1
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
return actions
|
||||||
|
|
||||||
|
def list_capabilities(
|
||||||
|
self,
|
||||||
|
available_workflows: List[Dict[str, Any]],
|
||||||
|
) -> str:
|
||||||
|
"""Lister ce que Léa sait faire (pour l'interface utilisateur)."""
|
||||||
|
if not available_workflows:
|
||||||
|
return "Léa n'a pas encore appris de workflows. Enregistrez-en un d'abord."
|
||||||
|
|
||||||
|
lines = ["Léa sait faire :"]
|
||||||
|
for wf in available_workflows:
|
||||||
|
name = wf.get("name", "?")
|
||||||
|
desc = wf.get("description", "")
|
||||||
|
lines.append(f" - {name}" + (f" ({desc})" if desc else ""))
|
||||||
|
|
||||||
|
lines.append("")
|
||||||
|
lines.append("Dites-lui ce que vous voulez faire en langage naturel.")
|
||||||
|
return "\n".join(lines)
|
||||||
185
agent_v0/server_v1/workflow_replay.py
Normal file
185
agent_v0/server_v1/workflow_replay.py
Normal file
@@ -0,0 +1,185 @@
|
|||||||
|
"""
|
||||||
|
workflow_replay.py — Pont entre le WorkflowRunner et le replay Agent V1.
|
||||||
|
|
||||||
|
Convertit un Workflow enrichi (avec embeddings CLIP + FAISS) en actions
|
||||||
|
de replay pour l'Agent V1, avec vérification FAISS à chaque étape.
|
||||||
|
|
||||||
|
Architecture :
|
||||||
|
Workflow (nodes + edges + embeddings)
|
||||||
|
→ pour chaque edge : action + embedding du node source
|
||||||
|
→ FAISS vérifie que l'écran actuel correspond au node attendu
|
||||||
|
→ si OK : exécuter l'action normalement
|
||||||
|
→ si MISMATCH : stopper ou adapter
|
||||||
|
|
||||||
|
Auteur : Dom + Claude
|
||||||
|
Date : 5 avril 2026
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def build_workflow_replay(
|
||||||
|
workflow_path: str,
|
||||||
|
session_dir: str,
|
||||||
|
faiss_manager=None,
|
||||||
|
clip_embedder=None,
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""Convertir un Workflow enrichi en actions de replay avec vérification FAISS.
|
||||||
|
|
||||||
|
Chaque action de clic est enrichie avec :
|
||||||
|
- L'embedding CLIP du node source (pour vérification au replay)
|
||||||
|
- Le titre de fenêtre attendu
|
||||||
|
- Les textes OCR du node (pour le grounding)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
workflow_path: Chemin vers le workflow JSON
|
||||||
|
session_dir: Répertoire de la session (pour les screenshots/crops)
|
||||||
|
faiss_manager: FAISSManager pré-chargé (optionnel, créé si None)
|
||||||
|
clip_embedder: CLIPEmbedder pré-chargé (optionnel, créé si None)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Liste d'actions prêtes pour la queue de replay Agent V1.
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
# Charger le workflow
|
||||||
|
with open(workflow_path) as f:
|
||||||
|
wf_data = json.load(f)
|
||||||
|
|
||||||
|
nodes = {n["node_id"]: n for n in wf_data.get("nodes", [])}
|
||||||
|
edges = wf_data.get("edges", [])
|
||||||
|
entry_nodes = wf_data.get("entry_nodes", [])
|
||||||
|
|
||||||
|
if not nodes or not edges:
|
||||||
|
logger.warning("Workflow vide : %d nodes, %d edges", len(nodes), len(edges))
|
||||||
|
return []
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Workflow '%s' chargé : %d nodes, %d edges",
|
||||||
|
wf_data.get("name", "?"), len(nodes), len(edges),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Construire la séquence d'actions depuis le graphe (BFS linéaire)
|
||||||
|
actions = []
|
||||||
|
visited = set()
|
||||||
|
current_node_id = entry_nodes[0] if entry_nodes else list(nodes.keys())[0]
|
||||||
|
|
||||||
|
while current_node_id and current_node_id not in visited:
|
||||||
|
visited.add(current_node_id)
|
||||||
|
node = nodes.get(current_node_id)
|
||||||
|
if not node:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Trouver l'edge sortant
|
||||||
|
outgoing = [e for e in edges if e.get("from_node") == current_node_id]
|
||||||
|
if not outgoing:
|
||||||
|
break
|
||||||
|
|
||||||
|
edge = outgoing[0] # Premier edge (linéaire)
|
||||||
|
action_data = edge.get("action", {})
|
||||||
|
next_node_id = edge.get("to_node")
|
||||||
|
next_node = nodes.get(next_node_id, {})
|
||||||
|
|
||||||
|
# Extraire les infos du node source pour la vérification
|
||||||
|
node_metadata = node.get("metadata", {})
|
||||||
|
node_title = node_metadata.get("window_title", "")
|
||||||
|
|
||||||
|
# Extraire les infos de l'action
|
||||||
|
action_type = action_data.get("type", "unknown")
|
||||||
|
target = action_data.get("target", {})
|
||||||
|
params = action_data.get("parameters", {})
|
||||||
|
|
||||||
|
if action_type == "compound":
|
||||||
|
# Actions compound : décomposer en étapes
|
||||||
|
steps = params.get("steps", [])
|
||||||
|
for step in steps:
|
||||||
|
step_type = step.get("type", "unknown")
|
||||||
|
step_action = {
|
||||||
|
"action_id": f"wf_{uuid.uuid4().hex[:8]}",
|
||||||
|
"type": _map_action_type(step_type),
|
||||||
|
"workflow_node": current_node_id,
|
||||||
|
"expected_window_title": node_title,
|
||||||
|
}
|
||||||
|
|
||||||
|
if step_type == "mouse_click":
|
||||||
|
step_action["x_pct"] = step.get("x_pct", 0)
|
||||||
|
step_action["y_pct"] = step.get("y_pct", 0)
|
||||||
|
step_action["button"] = step.get("button", "left")
|
||||||
|
step_action["visual_mode"] = True
|
||||||
|
# Target spec pour le grounding
|
||||||
|
step_action["target_spec"] = {
|
||||||
|
"by_text": target.get("by_text", ""),
|
||||||
|
"by_role": target.get("by_role", ""),
|
||||||
|
"by_text_source": "ocr" if target.get("by_text") else "",
|
||||||
|
"window_title": node_title,
|
||||||
|
"original_position": {
|
||||||
|
"y_relative": "",
|
||||||
|
"x_relative": "",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
# Ajouter le crop anchor si disponible
|
||||||
|
_attach_anchor(step_action, step, session_dir)
|
||||||
|
|
||||||
|
elif step_type in ("text_input", "key_press"):
|
||||||
|
if step_type == "text_input":
|
||||||
|
step_action["type"] = "type"
|
||||||
|
step_action["text"] = step.get("text", "")
|
||||||
|
else:
|
||||||
|
step_action["type"] = "key_combo"
|
||||||
|
step_action["keys"] = step.get("keys", [])
|
||||||
|
|
||||||
|
elif step_type == "wait":
|
||||||
|
step_action["type"] = "wait"
|
||||||
|
step_action["duration_ms"] = step.get("duration_ms", 500)
|
||||||
|
|
||||||
|
actions.append(step_action)
|
||||||
|
|
||||||
|
# Passer au node suivant
|
||||||
|
current_node_id = next_node_id
|
||||||
|
|
||||||
|
# Ajouter expected_window_title pour la post-vérification
|
||||||
|
click_indices = [i for i, a in enumerate(actions) if a.get("type") == "click"]
|
||||||
|
for j, ci in enumerate(click_indices):
|
||||||
|
if j + 1 < len(click_indices):
|
||||||
|
next_ci = click_indices[j + 1]
|
||||||
|
next_title = actions[next_ci].get("expected_window_title", "")
|
||||||
|
if next_title:
|
||||||
|
actions[ci]["expected_window_title"] = next_title
|
||||||
|
|
||||||
|
logger.info("Workflow → %d actions de replay", len(actions))
|
||||||
|
return actions
|
||||||
|
|
||||||
|
|
||||||
|
def _map_action_type(step_type: str) -> str:
|
||||||
|
"""Mapper les types d'action du workflow vers les types de replay."""
|
||||||
|
mapping = {
|
||||||
|
"mouse_click": "click",
|
||||||
|
"text_input": "type",
|
||||||
|
"key_press": "key_combo",
|
||||||
|
"wait": "wait",
|
||||||
|
"scroll": "scroll",
|
||||||
|
}
|
||||||
|
return mapping.get(step_type, step_type)
|
||||||
|
|
||||||
|
|
||||||
|
def _attach_anchor(action: dict, step: dict, session_dir: str) -> None:
|
||||||
|
"""Attacher le crop anchor au target_spec si disponible."""
|
||||||
|
import base64
|
||||||
|
|
||||||
|
# Chercher le crop dans le session_dir
|
||||||
|
screenshot_id = step.get("screenshot_id", "")
|
||||||
|
if screenshot_id and session_dir:
|
||||||
|
crop_path = Path(session_dir) / "shots" / f"{screenshot_id}_crop.png"
|
||||||
|
if crop_path.is_file():
|
||||||
|
action["target_spec"]["anchor_image_base64"] = base64.b64encode(
|
||||||
|
crop_path.read_bytes()
|
||||||
|
).decode()
|
||||||
@@ -1,55 +0,0 @@
|
|||||||
# window_info.py
|
|
||||||
"""
|
|
||||||
Récupération des informations sur la fenêtre active (X11).
|
|
||||||
|
|
||||||
v0 :
|
|
||||||
- utilise xdotool pour obtenir :
|
|
||||||
- le titre de la fenêtre active
|
|
||||||
- le PID de la fenêtre active, puis le nom du process via ps
|
|
||||||
|
|
||||||
Si quelque chose ne fonctionne pas, on renvoie des valeurs "unknown".
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import subprocess
|
|
||||||
from typing import Dict, Optional
|
|
||||||
|
|
||||||
|
|
||||||
def _run_cmd(cmd: list[str]) -> Optional[str]:
|
|
||||||
"""Exécute une commande et renvoie la sortie texte (strippée), ou None en cas d'erreur."""
|
|
||||||
try:
|
|
||||||
out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
|
|
||||||
return out.decode("utf-8", errors="ignore").strip()
|
|
||||||
except Exception:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def get_active_window_info() -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
Renvoie un dict :
|
|
||||||
{
|
|
||||||
"title": "...",
|
|
||||||
"app_name": "..."
|
|
||||||
}
|
|
||||||
|
|
||||||
Nécessite xdotool installé sur le système.
|
|
||||||
"""
|
|
||||||
title = _run_cmd(["xdotool", "getactivewindow", "getwindowname"])
|
|
||||||
pid_str = _run_cmd(["xdotool", "getactivewindow", "getwindowpid"])
|
|
||||||
|
|
||||||
app_name: Optional[str] = None
|
|
||||||
if pid_str:
|
|
||||||
pid_str = pid_str.strip()
|
|
||||||
# On récupère le nom du binaire via ps
|
|
||||||
app_name = _run_cmd(["ps", "-p", pid_str, "-o", "comm="])
|
|
||||||
|
|
||||||
if not title:
|
|
||||||
title = "unknown_window"
|
|
||||||
if not app_name:
|
|
||||||
app_name = "unknown_app"
|
|
||||||
|
|
||||||
return {
|
|
||||||
"title": title,
|
|
||||||
"app_name": app_name,
|
|
||||||
}
|
|
||||||
@@ -1,192 +0,0 @@
|
|||||||
# window_info_crossplatform.py
|
|
||||||
"""
|
|
||||||
Récupération des informations sur la fenêtre active - CROSS-PLATFORM
|
|
||||||
|
|
||||||
Supporte:
|
|
||||||
- Linux (X11 via xdotool)
|
|
||||||
- Windows (via pywin32)
|
|
||||||
- macOS (via pyobjc)
|
|
||||||
|
|
||||||
Installation des dépendances:
|
|
||||||
pip install pywin32 # Windows
|
|
||||||
pip install pyobjc-framework-Cocoa # macOS
|
|
||||||
pip install psutil # Tous OS
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import platform
|
|
||||||
import subprocess
|
|
||||||
from typing import Dict, Optional
|
|
||||||
|
|
||||||
|
|
||||||
def _run_cmd(cmd: list[str]) -> Optional[str]:
|
|
||||||
"""Exécute une commande et renvoie la sortie texte (strippée), ou None en cas d'erreur."""
|
|
||||||
try:
|
|
||||||
out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
|
|
||||||
return out.decode("utf-8", errors="ignore").strip()
|
|
||||||
except Exception:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def get_active_window_info() -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
Renvoie un dict :
|
|
||||||
{
|
|
||||||
"title": "...",
|
|
||||||
"app_name": "..."
|
|
||||||
}
|
|
||||||
|
|
||||||
Détecte automatiquement l'OS et utilise la méthode appropriée.
|
|
||||||
"""
|
|
||||||
system = platform.system()
|
|
||||||
|
|
||||||
if system == "Linux":
|
|
||||||
return _get_window_info_linux()
|
|
||||||
elif system == "Windows":
|
|
||||||
return _get_window_info_windows()
|
|
||||||
elif system == "Darwin": # macOS
|
|
||||||
return _get_window_info_macos()
|
|
||||||
else:
|
|
||||||
return {"title": "unknown_window", "app_name": "unknown_app"}
|
|
||||||
|
|
||||||
|
|
||||||
def _get_window_info_linux() -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
Linux: utilise xdotool (X11)
|
|
||||||
|
|
||||||
Nécessite: sudo apt-get install xdotool
|
|
||||||
"""
|
|
||||||
title = _run_cmd(["xdotool", "getactivewindow", "getwindowname"])
|
|
||||||
pid_str = _run_cmd(["xdotool", "getactivewindow", "getwindowpid"])
|
|
||||||
|
|
||||||
app_name: Optional[str] = None
|
|
||||||
if pid_str:
|
|
||||||
pid_str = pid_str.strip()
|
|
||||||
# On récupère le nom du binaire via ps
|
|
||||||
app_name = _run_cmd(["ps", "-p", pid_str, "-o", "comm="])
|
|
||||||
|
|
||||||
if not title:
|
|
||||||
title = "unknown_window"
|
|
||||||
if not app_name:
|
|
||||||
app_name = "unknown_app"
|
|
||||||
|
|
||||||
return {
|
|
||||||
"title": title,
|
|
||||||
"app_name": app_name,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _get_window_info_windows() -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
Windows: utilise pywin32 + psutil
|
|
||||||
|
|
||||||
Nécessite: pip install pywin32 psutil
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
import win32gui
|
|
||||||
import win32process
|
|
||||||
import psutil
|
|
||||||
|
|
||||||
# Fenêtre au premier plan
|
|
||||||
hwnd = win32gui.GetForegroundWindow()
|
|
||||||
|
|
||||||
# Titre de la fenêtre
|
|
||||||
title = win32gui.GetWindowText(hwnd)
|
|
||||||
if not title:
|
|
||||||
title = "unknown_window"
|
|
||||||
|
|
||||||
# PID du processus
|
|
||||||
_, pid = win32process.GetWindowThreadProcessId(hwnd)
|
|
||||||
|
|
||||||
# Nom du processus
|
|
||||||
try:
|
|
||||||
process = psutil.Process(pid)
|
|
||||||
app_name = process.name()
|
|
||||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
||||||
app_name = "unknown_app"
|
|
||||||
|
|
||||||
return {
|
|
||||||
"title": title,
|
|
||||||
"app_name": app_name,
|
|
||||||
}
|
|
||||||
|
|
||||||
except ImportError:
|
|
||||||
# pywin32 ou psutil non installé
|
|
||||||
return {
|
|
||||||
"title": "unknown_window (pywin32 missing)",
|
|
||||||
"app_name": "unknown_app (pywin32 missing)",
|
|
||||||
}
|
|
||||||
except Exception as e:
|
|
||||||
return {
|
|
||||||
"title": f"error: {e}",
|
|
||||||
"app_name": "unknown_app",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _get_window_info_macos() -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
macOS: utilise pyobjc (AppKit)
|
|
||||||
|
|
||||||
Nécessite: pip install pyobjc-framework-Cocoa
|
|
||||||
|
|
||||||
Note: Nécessite les permissions "Accessibility" dans System Preferences
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
from AppKit import NSWorkspace
|
|
||||||
from Quartz import (
|
|
||||||
CGWindowListCopyWindowInfo,
|
|
||||||
kCGWindowListOptionOnScreenOnly,
|
|
||||||
kCGNullWindowID
|
|
||||||
)
|
|
||||||
|
|
||||||
# Application active
|
|
||||||
active_app = NSWorkspace.sharedWorkspace().activeApplication()
|
|
||||||
app_name = active_app.get('NSApplicationName', 'unknown_app')
|
|
||||||
|
|
||||||
# Titre de la fenêtre (via Quartz)
|
|
||||||
# On cherche la fenêtre de l'app active qui est au premier plan
|
|
||||||
window_list = CGWindowListCopyWindowInfo(
|
|
||||||
kCGWindowListOptionOnScreenOnly,
|
|
||||||
kCGNullWindowID
|
|
||||||
)
|
|
||||||
|
|
||||||
title = "unknown_window"
|
|
||||||
for window in window_list:
|
|
||||||
owner_name = window.get('kCGWindowOwnerName', '')
|
|
||||||
if owner_name == app_name:
|
|
||||||
window_title = window.get('kCGWindowName', '')
|
|
||||||
if window_title:
|
|
||||||
title = window_title
|
|
||||||
break
|
|
||||||
|
|
||||||
return {
|
|
||||||
"title": title,
|
|
||||||
"app_name": app_name,
|
|
||||||
}
|
|
||||||
|
|
||||||
except ImportError:
|
|
||||||
# pyobjc non installé
|
|
||||||
return {
|
|
||||||
"title": "unknown_window (pyobjc missing)",
|
|
||||||
"app_name": "unknown_app (pyobjc missing)",
|
|
||||||
}
|
|
||||||
except Exception as e:
|
|
||||||
return {
|
|
||||||
"title": f"error: {e}",
|
|
||||||
"app_name": "unknown_app",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# Test rapide
|
|
||||||
if __name__ == "__main__":
|
|
||||||
import time
|
|
||||||
|
|
||||||
print(f"OS détecté: {platform.system()}")
|
|
||||||
print("\nTest de capture fenêtre active (5 secondes)...")
|
|
||||||
print("Changez de fenêtre pour tester!\n")
|
|
||||||
|
|
||||||
for i in range(5):
|
|
||||||
info = get_active_window_info()
|
|
||||||
print(f"[{i+1}] App: {info['app_name']:20s} | Title: {info['title']}")
|
|
||||||
time.sleep(1)
|
|
||||||
@@ -25,6 +25,7 @@ from __future__ import annotations
|
|||||||
import base64
|
import base64
|
||||||
import io
|
import io
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Optional, Tuple
|
from typing import List, Optional, Tuple
|
||||||
@@ -33,8 +34,10 @@ from PIL import Image, ImageDraw, ImageFont
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Chemin vers les poids YOLO d'OmniParser
|
# Chemin vers les poids YOLO d'OmniParser (configurable via env)
|
||||||
_YOLO_WEIGHTS = Path("/home/dom/ai/OmniParser/weights/icon_detect/model.pt")
|
_YOLO_WEIGHTS = Path(
|
||||||
|
os.environ.get("SOM_YOLO_WEIGHTS", "/home/dom/ai/OmniParser/weights/icon_detect/model.pt")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -165,17 +168,17 @@ class SomEngine:
|
|||||||
# ── 2. docTR : OCR pour lire le texte ──
|
# ── 2. docTR : OCR pour lire le texte ──
|
||||||
if self._ocr is not None:
|
if self._ocr is not None:
|
||||||
try:
|
try:
|
||||||
import numpy as np
|
|
||||||
from doctr.io import DocumentFile
|
from doctr.io import DocumentFile
|
||||||
# Convertir PIL → fichier temporaire pour docTR
|
# Convertir PIL → fichier temporaire pour docTR
|
||||||
import tempfile
|
import tempfile
|
||||||
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
|
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
|
||||||
screenshot.save(tmp, format="JPEG", quality=90)
|
screenshot.save(tmp, format="JPEG", quality=90)
|
||||||
tmp_path = tmp.name
|
tmp_path = tmp.name
|
||||||
doc = DocumentFile.from_images([tmp_path])
|
try:
|
||||||
import os
|
doc = DocumentFile.from_images([tmp_path])
|
||||||
os.unlink(tmp_path)
|
result_ocr = self._ocr(doc)
|
||||||
result_ocr = self._ocr(doc)
|
finally:
|
||||||
|
os.unlink(tmp_path)
|
||||||
|
|
||||||
for page in result_ocr.pages:
|
for page in result_ocr.pages:
|
||||||
for block in page.blocks:
|
for block in page.blocks:
|
||||||
@@ -288,3 +291,25 @@ class SomEngine:
|
|||||||
buf = io.BytesIO()
|
buf = io.BytesIO()
|
||||||
image.save(buf, format="JPEG", quality=quality)
|
image.save(buf, format="JPEG", quality=quality)
|
||||||
return base64.b64encode(buf.getvalue()).decode()
|
return base64.b64encode(buf.getvalue()).decode()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Singleton partagé (lazy-loaded, thread-safe)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
_shared_engine: Optional[SomEngine] = None
|
||||||
|
_shared_lock = __import__("threading").Lock()
|
||||||
|
|
||||||
|
|
||||||
|
def get_shared_engine(device: str = "cpu") -> Optional[SomEngine]:
|
||||||
|
"""Singleton SomEngine partagé entre tous les modules."""
|
||||||
|
global _shared_engine
|
||||||
|
if _shared_engine is None:
|
||||||
|
with _shared_lock:
|
||||||
|
if _shared_engine is None:
|
||||||
|
try:
|
||||||
|
_shared_engine = SomEngine(device=device)
|
||||||
|
logger.info("SomEngine singleton partagé initialisé")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("SomEngine non disponible : %s", e)
|
||||||
|
return None
|
||||||
|
return _shared_engine
|
||||||
|
|||||||
@@ -68,12 +68,19 @@ class TokenManager:
|
|||||||
logger.info(f"Loading token config. RPA_TOKEN_ADMIN present: {bool(admin_token)}")
|
logger.info(f"Loading token config. RPA_TOKEN_ADMIN present: {bool(admin_token)}")
|
||||||
logger.info(f"Loading token config. RPA_TOKEN_READONLY present: {bool(readonly_token)}")
|
logger.info(f"Loading token config. RPA_TOKEN_READONLY present: {bool(readonly_token)}")
|
||||||
if admin_token:
|
if admin_token:
|
||||||
logger.info(f"RPA_TOKEN_ADMIN value: {admin_token[:8]}...")
|
logger.info("RPA_TOKEN_ADMIN configuré")
|
||||||
if readonly_token:
|
if readonly_token:
|
||||||
logger.info(f"RPA_TOKEN_READONLY value: {readonly_token[:8]}...")
|
logger.info("RPA_TOKEN_READONLY configuré")
|
||||||
|
|
||||||
# Clé secrète pour signer les tokens
|
# Clé secrète pour signer les tokens — OBLIGATOIRE en production
|
||||||
self.secret_key = os.getenv("TOKEN_SECRET_KEY", "dev-token-secret-change-in-production")
|
self.secret_key = os.getenv("TOKEN_SECRET_KEY", "")
|
||||||
|
if not self.secret_key:
|
||||||
|
logger.warning(
|
||||||
|
"TOKEN_SECRET_KEY non défini — utilisation d'une clé aléatoire. "
|
||||||
|
"Définir TOKEN_SECRET_KEY dans .env.local pour la production."
|
||||||
|
)
|
||||||
|
import secrets
|
||||||
|
self.secret_key = secrets.token_hex(32)
|
||||||
|
|
||||||
# Tokens statiques pour rétrocompatibilité
|
# Tokens statiques pour rétrocompatibilité
|
||||||
self.admin_tokens = set()
|
self.admin_tokens = set()
|
||||||
@@ -89,11 +96,13 @@ class TokenManager:
|
|||||||
self.admin_tokens.add(admin_token)
|
self.admin_tokens.add(admin_token)
|
||||||
logger.info(f"Added RPA_TOKEN_ADMIN to admin_tokens")
|
logger.info(f"Added RPA_TOKEN_ADMIN to admin_tokens")
|
||||||
|
|
||||||
# Temporary fix: Add production tokens directly
|
# Tokens de production : lus EXCLUSIVEMENT depuis les variables d'environnement.
|
||||||
prod_admin_token = "73cf0db73f9a5064e79afebba96c85338be65cc2060b9c1d42c3ea5dd7d4e490"
|
# Ne JAMAIS hardcoder de tokens dans le code source.
|
||||||
prod_readonly_token = "7eea1de415cc69c02381ce09ff63aeebf3e1d9b476d54aa6730ba9de849e3dc6"
|
prod_admin_token = os.getenv("RPA_PROD_ADMIN_TOKEN", "")
|
||||||
self.admin_tokens.add(prod_admin_token)
|
prod_readonly_token = os.getenv("RPA_PROD_READONLY_TOKEN", "")
|
||||||
logger.info(f"Added hardcoded production admin token")
|
if prod_admin_token:
|
||||||
|
self.admin_tokens.add(prod_admin_token)
|
||||||
|
logger.info("Added RPA_PROD_ADMIN_TOKEN to admin_tokens")
|
||||||
|
|
||||||
self.read_only_tokens = set()
|
self.read_only_tokens = set()
|
||||||
if os.getenv("READ_ONLY_TOKENS"):
|
if os.getenv("READ_ONLY_TOKENS"):
|
||||||
@@ -102,11 +111,11 @@ class TokenManager:
|
|||||||
# Support tokens RPA Vision V3 (Fiche #23)
|
# Support tokens RPA Vision V3 (Fiche #23)
|
||||||
if readonly_token:
|
if readonly_token:
|
||||||
self.read_only_tokens.add(readonly_token)
|
self.read_only_tokens.add(readonly_token)
|
||||||
logger.info(f"Added RPA_TOKEN_READONLY to read_only_tokens")
|
logger.info("Added RPA_TOKEN_READONLY to read_only_tokens")
|
||||||
|
|
||||||
# Temporary fix: Add production tokens directly
|
if prod_readonly_token:
|
||||||
self.read_only_tokens.add(prod_readonly_token)
|
self.read_only_tokens.add(prod_readonly_token)
|
||||||
logger.info(f"Added hardcoded production readonly token")
|
logger.info("Added RPA_PROD_READONLY_TOKEN to read_only_tokens")
|
||||||
|
|
||||||
# Configuration expiration
|
# Configuration expiration
|
||||||
self.default_expiry_hours = int(os.getenv("TOKEN_EXPIRY_HOURS", "24"))
|
self.default_expiry_hours = int(os.getenv("TOKEN_EXPIRY_HOURS", "24"))
|
||||||
|
|||||||
@@ -1,275 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
# ============================================================
|
|
||||||
# build_lea_exe.sh — Cree un executable Windows autonome via PyInstaller
|
|
||||||
#
|
|
||||||
# IMPORTANT : Ce script doit tourner SUR WINDOWS (ou dans Wine/WSL
|
|
||||||
# avec acces a un Python Windows). PyInstaller ne peut pas produire
|
|
||||||
# un .exe Windows depuis Linux natif.
|
|
||||||
#
|
|
||||||
# Procedure recommandee :
|
|
||||||
# 1. Sur le PC Windows (192.168.1.11 ou autre) :
|
|
||||||
# - Installer Python 3.12 (https://python.org)
|
|
||||||
# - pip install pyinstaller
|
|
||||||
# 2. Copier ce script et le dossier agent_v0/ sur le PC Windows
|
|
||||||
# 3. Executer depuis PowerShell/cmd :
|
|
||||||
# python -m PyInstaller --onefile --windowed ^
|
|
||||||
# --name "Lea" ^
|
|
||||||
# --add-data "agent_v1;agent_v1" ^
|
|
||||||
# --add-data "lea_ui;lea_ui" ^
|
|
||||||
# --add-data "config.txt;." ^
|
|
||||||
# --hidden-import "pynput.keyboard._win32" ^
|
|
||||||
# --hidden-import "pynput.mouse._win32" ^
|
|
||||||
# --hidden-import "pystray._win32" ^
|
|
||||||
# --hidden-import "plyer.platforms.win.notification" ^
|
|
||||||
# --hidden-import "win32api" ^
|
|
||||||
# --hidden-import "win32con" ^
|
|
||||||
# --hidden-import "win32gui" ^
|
|
||||||
# run_agent_v1.py
|
|
||||||
#
|
|
||||||
# Le .exe resultant sera dans dist/Lea.exe (~50-100 MB)
|
|
||||||
#
|
|
||||||
# ============================================================
|
|
||||||
#
|
|
||||||
# OPTION ALTERNATIVE : Python Embedded (recommandee)
|
|
||||||
#
|
|
||||||
# Python Embedded est un Python portable officiel (pas d'installation).
|
|
||||||
# Combine avec le code source, c'est la methode la plus fiable
|
|
||||||
# pour les non-informaticiens.
|
|
||||||
#
|
|
||||||
# Sur une machine Windows :
|
|
||||||
# 1. Telecharger Python Embedded 3.12 :
|
|
||||||
# https://www.python.org/ftp/python/3.12.9/python-3.12.9-embed-amd64.zip
|
|
||||||
#
|
|
||||||
# 2. Dezipper dans un dossier temporaire
|
|
||||||
#
|
|
||||||
# 3. Activer pip dans Python Embedded :
|
|
||||||
# - Editer python312._pth, decommenter "import site"
|
|
||||||
# - Telecharger get-pip.py : https://bootstrap.pypa.io/get-pip.py
|
|
||||||
# - Executer : python.exe get-pip.py
|
|
||||||
#
|
|
||||||
# 4. Installer les dependances :
|
|
||||||
# python.exe -m pip install -r requirements_agent.txt
|
|
||||||
#
|
|
||||||
# 5. Copier le code source (agent_v1/, lea_ui/, run_agent_v1.py)
|
|
||||||
#
|
|
||||||
# 6. Zipper le tout → Lea_Portable.zip (~40-60 MB)
|
|
||||||
#
|
|
||||||
# Le Lea.bat dans ce cas utiliserait :
|
|
||||||
# python\python.exe run_agent_v1.py
|
|
||||||
# au lieu de .venv\Scripts\python.exe
|
|
||||||
#
|
|
||||||
# ============================================================
|
|
||||||
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
||||||
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
|
|
||||||
|
|
||||||
echo "============================================================"
|
|
||||||
echo " Build Lea.exe (PyInstaller)"
|
|
||||||
echo "============================================================"
|
|
||||||
echo ""
|
|
||||||
echo " Ce script ne peut pas produire un .exe Windows depuis Linux."
|
|
||||||
echo ""
|
|
||||||
echo " OPTIONS DISPONIBLES :"
|
|
||||||
echo ""
|
|
||||||
echo " 1. OPTION VIA PC WINDOWS (recommandee pour .exe) :"
|
|
||||||
echo " Copiez le dossier deploy/ sur le PC Windows"
|
|
||||||
echo " puis lancez la commande PyInstaller ci-dessous."
|
|
||||||
echo ""
|
|
||||||
echo " 2. OPTION ZIP + VENV (recommandee pour deploiement rapide) :"
|
|
||||||
echo " Lancez ./deploy/build_package.sh"
|
|
||||||
echo " Le zip resultant contient install.bat + Lea.bat"
|
|
||||||
echo ""
|
|
||||||
echo " 3. OPTION PYTHON EMBEDDED (recommandee pour zero install) :"
|
|
||||||
echo " Suivez les instructions dans ce script (section ALTERNATIVE)"
|
|
||||||
echo ""
|
|
||||||
echo "============================================================"
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
# Generer le .spec PyInstaller pour reference
|
|
||||||
SPEC_FILE="$SCRIPT_DIR/Lea.spec"
|
|
||||||
cat > "$SPEC_FILE" << 'PYINSTALLER_SPEC'
|
|
||||||
# -*- mode: python ; coding: utf-8 -*-
|
|
||||||
# Lea.spec — Configuration PyInstaller pour l'agent Lea
|
|
||||||
#
|
|
||||||
# Usage sur Windows :
|
|
||||||
# pip install pyinstaller
|
|
||||||
# pyinstaller Lea.spec
|
|
||||||
#
|
|
||||||
# Le .exe resultant sera dans dist/Lea.exe
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
block_cipher = None
|
|
||||||
|
|
||||||
# Repertoire de travail (ou se trouve ce .spec)
|
|
||||||
SPEC_DIR = os.path.dirname(os.path.abspath(SPEC())) if 'SPEC' in dir() else '.'
|
|
||||||
|
|
||||||
a = Analysis(
|
|
||||||
['run_agent_v1.py'],
|
|
||||||
pathex=['.'],
|
|
||||||
binaries=[],
|
|
||||||
datas=[
|
|
||||||
('agent_v1', 'agent_v1'),
|
|
||||||
('lea_ui', 'lea_ui'),
|
|
||||||
('config.txt', '.'),
|
|
||||||
('LISEZMOI.txt', '.'),
|
|
||||||
],
|
|
||||||
hiddenimports=[
|
|
||||||
# pynput backends Windows
|
|
||||||
'pynput.keyboard._win32',
|
|
||||||
'pynput.mouse._win32',
|
|
||||||
# pystray backend Windows
|
|
||||||
'pystray._win32',
|
|
||||||
# plyer notification Windows
|
|
||||||
'plyer.platforms.win',
|
|
||||||
'plyer.platforms.win.notification',
|
|
||||||
# pywin32
|
|
||||||
'win32api',
|
|
||||||
'win32con',
|
|
||||||
'win32gui',
|
|
||||||
'win32com',
|
|
||||||
'pythoncom',
|
|
||||||
# tkinter (stdlib, parfois manquant dans PyInstaller)
|
|
||||||
'tkinter',
|
|
||||||
'tkinter.simpledialog',
|
|
||||||
'tkinter.messagebox',
|
|
||||||
'tkinter.filedialog',
|
|
||||||
],
|
|
||||||
hookspath=[],
|
|
||||||
hooksconfig={},
|
|
||||||
runtime_hooks=[],
|
|
||||||
excludes=[
|
|
||||||
# Exclure les modules lourds non necessaires cote client
|
|
||||||
'torch',
|
|
||||||
'torchvision',
|
|
||||||
'transformers',
|
|
||||||
'clip',
|
|
||||||
'open_clip',
|
|
||||||
'faiss',
|
|
||||||
'cv2', # opencv pas obligatoire (blur_sensitive a un fallback)
|
|
||||||
'numpy', # requis par PIL mais pas directement
|
|
||||||
'scipy',
|
|
||||||
'sklearn',
|
|
||||||
'matplotlib',
|
|
||||||
'pandas',
|
|
||||||
'tensorflow',
|
|
||||||
],
|
|
||||||
win_no_prefer_redirects=False,
|
|
||||||
win_private_assemblies=False,
|
|
||||||
cipher=block_cipher,
|
|
||||||
noarchive=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
|
|
||||||
|
|
||||||
exe = EXE(
|
|
||||||
pyz,
|
|
||||||
a.scripts,
|
|
||||||
a.binaries,
|
|
||||||
a.zipfiles,
|
|
||||||
a.datas,
|
|
||||||
[],
|
|
||||||
name='Lea',
|
|
||||||
debug=False,
|
|
||||||
bootloader_ignore_signals=False,
|
|
||||||
strip=False,
|
|
||||||
upx=True,
|
|
||||||
upx_exclude=[],
|
|
||||||
runtime_tmpdir=None,
|
|
||||||
console=False, # --windowed : pas de console visible
|
|
||||||
disable_windowed_traceback=False,
|
|
||||||
argv_emulation=False,
|
|
||||||
target_arch=None,
|
|
||||||
codesign_identity=None,
|
|
||||||
entitlements_file=None,
|
|
||||||
# icon='assets/lea_icon.ico', # Decommenter quand l'icone sera creee
|
|
||||||
)
|
|
||||||
PYINSTALLER_SPEC
|
|
||||||
|
|
||||||
echo " Fichier Lea.spec genere dans : $SPEC_FILE"
|
|
||||||
echo ""
|
|
||||||
echo " Pour builder sur Windows :"
|
|
||||||
echo " 1. Copier le dossier Lea/ (apres build_package.sh) sur le PC Windows"
|
|
||||||
echo " 2. pip install pyinstaller"
|
|
||||||
echo " 3. cd Lea"
|
|
||||||
echo " 4. pyinstaller ../Lea.spec"
|
|
||||||
echo " 5. Le .exe sera dans dist/Lea.exe"
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
# Generer aussi un script batch pour builder sur Windows
|
|
||||||
WIN_BUILD="$SCRIPT_DIR/build_exe_windows.bat"
|
|
||||||
cat > "$WIN_BUILD" << 'WIN_BATCH'
|
|
||||||
@echo off
|
|
||||||
chcp 65001 >nul 2>&1
|
|
||||||
title Build Lea.exe
|
|
||||||
|
|
||||||
echo ============================================================
|
|
||||||
echo Build Lea.exe (PyInstaller)
|
|
||||||
echo ============================================================
|
|
||||||
echo.
|
|
||||||
|
|
||||||
:: Verifier PyInstaller
|
|
||||||
pip show pyinstaller >nul 2>&1
|
|
||||||
if errorlevel 1 (
|
|
||||||
echo Installation de PyInstaller...
|
|
||||||
pip install pyinstaller
|
|
||||||
)
|
|
||||||
|
|
||||||
:: Builder
|
|
||||||
echo Build en cours (cela prend 2-5 minutes)...
|
|
||||||
echo.
|
|
||||||
|
|
||||||
pyinstaller --onefile --windowed ^
|
|
||||||
--name "Lea" ^
|
|
||||||
--add-data "agent_v1;agent_v1" ^
|
|
||||||
--add-data "lea_ui;lea_ui" ^
|
|
||||||
--add-data "config.txt;." ^
|
|
||||||
--add-data "LISEZMOI.txt;." ^
|
|
||||||
--hidden-import "pynput.keyboard._win32" ^
|
|
||||||
--hidden-import "pynput.mouse._win32" ^
|
|
||||||
--hidden-import "pystray._win32" ^
|
|
||||||
--hidden-import "plyer.platforms.win.notification" ^
|
|
||||||
--hidden-import "win32api" ^
|
|
||||||
--hidden-import "win32con" ^
|
|
||||||
--hidden-import "win32gui" ^
|
|
||||||
--hidden-import "tkinter" ^
|
|
||||||
--hidden-import "tkinter.simpledialog" ^
|
|
||||||
--hidden-import "tkinter.messagebox" ^
|
|
||||||
--exclude-module "torch" ^
|
|
||||||
--exclude-module "torchvision" ^
|
|
||||||
--exclude-module "transformers" ^
|
|
||||||
--exclude-module "clip" ^
|
|
||||||
--exclude-module "faiss" ^
|
|
||||||
--exclude-module "scipy" ^
|
|
||||||
--exclude-module "sklearn" ^
|
|
||||||
--exclude-module "matplotlib" ^
|
|
||||||
--exclude-module "pandas" ^
|
|
||||||
--exclude-module "tensorflow" ^
|
|
||||||
run_agent_v1.py
|
|
||||||
|
|
||||||
if errorlevel 1 (
|
|
||||||
echo.
|
|
||||||
echo ERREUR : Le build a echoue.
|
|
||||||
pause
|
|
||||||
exit /b 1
|
|
||||||
)
|
|
||||||
|
|
||||||
echo.
|
|
||||||
echo ============================================================
|
|
||||||
echo Build termine !
|
|
||||||
echo.
|
|
||||||
echo Lea.exe est dans le dossier dist\
|
|
||||||
echo Taille :
|
|
||||||
dir dist\Lea.exe | findstr "Lea.exe"
|
|
||||||
echo.
|
|
||||||
echo Pour deployer : copiez dist\Lea.exe + config.txt + LISEZMOI.txt
|
|
||||||
echo ============================================================
|
|
||||||
pause
|
|
||||||
WIN_BATCH
|
|
||||||
|
|
||||||
echo " Script Windows genere : $WIN_BUILD"
|
|
||||||
echo ""
|
|
||||||
echo "============================================================"
|
|
||||||
@@ -85,7 +85,10 @@ echo ""
|
|||||||
# 4. Copier le package agent_v1 (code Python)
|
# 4. Copier le package agent_v1 (code Python)
|
||||||
# ---------------------------------------------------------------
|
# ---------------------------------------------------------------
|
||||||
echo "[4/7] Copie du code agent_v1..."
|
echo "[4/7] Copie du code agent_v1..."
|
||||||
# Copier tout le dossier en excluant les fichiers inutiles
|
# Copier tout le dossier en excluant uniquement les artefacts de build/test.
|
||||||
|
# IMPORTANT : ne PAS exclure les modules Python ui/ (shared_state, chat_window,
|
||||||
|
# capture_server) — ils sont requis par main.py et causent un crash au demarrage
|
||||||
|
# s'ils sont absents.
|
||||||
rsync -a \
|
rsync -a \
|
||||||
--exclude='__pycache__' \
|
--exclude='__pycache__' \
|
||||||
--exclude='*.pyc' \
|
--exclude='*.pyc' \
|
||||||
@@ -93,6 +96,7 @@ rsync -a \
|
|||||||
--exclude='sessions/' \
|
--exclude='sessions/' \
|
||||||
--exclude='logs/*.log' \
|
--exclude='logs/*.log' \
|
||||||
--exclude='.hypothesis' \
|
--exclude='.hypothesis' \
|
||||||
|
--exclude='*.md' \
|
||||||
"$PROJECT_ROOT/agent_v0/agent_v1/" \
|
"$PROJECT_ROOT/agent_v0/agent_v1/" \
|
||||||
"$PACKAGE_DIR/agent_v1/"
|
"$PACKAGE_DIR/agent_v1/"
|
||||||
|
|
||||||
@@ -108,8 +112,9 @@ echo ""
|
|||||||
# ---------------------------------------------------------------
|
# ---------------------------------------------------------------
|
||||||
echo "[5/7] Copie du module lea_ui..."
|
echo "[5/7] Copie du module lea_ui..."
|
||||||
mkdir -p "$PACKAGE_DIR/lea_ui"
|
mkdir -p "$PACKAGE_DIR/lea_ui"
|
||||||
cp "$PROJECT_ROOT/agent_v0/lea_ui/"*.py "$PACKAGE_DIR/lea_ui/"
|
cp "$PROJECT_ROOT/agent_v0/lea_ui/__init__.py" "$PACKAGE_DIR/lea_ui/"
|
||||||
echo " lea_ui/ copie ($(ls "$PACKAGE_DIR/lea_ui/"*.py | wc -l) fichiers)"
|
cp "$PROJECT_ROOT/agent_v0/lea_ui/server_client.py" "$PACKAGE_DIR/lea_ui/"
|
||||||
|
echo " lea_ui/ copie (2 fichiers)"
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
# ---------------------------------------------------------------
|
# ---------------------------------------------------------------
|
||||||
@@ -127,6 +132,56 @@ echo "[6/7] Configuration des packages Python..."
|
|||||||
echo " Structure d'imports verifiee"
|
echo " Structure d'imports verifiee"
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
# 6b. Verification des modules requis
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
echo "[6b/7] Verification des modules Python requis..."
|
||||||
|
MISSING=0
|
||||||
|
REQUIRED_FILES=(
|
||||||
|
"agent_v1/__init__.py"
|
||||||
|
"agent_v1/main.py"
|
||||||
|
"agent_v1/config.py"
|
||||||
|
"agent_v1/window_info.py"
|
||||||
|
"agent_v1/window_info_crossplatform.py"
|
||||||
|
"agent_v1/core/__init__.py"
|
||||||
|
"agent_v1/core/captor.py"
|
||||||
|
"agent_v1/core/executor.py"
|
||||||
|
"agent_v1/network/__init__.py"
|
||||||
|
"agent_v1/network/streamer.py"
|
||||||
|
"agent_v1/session/__init__.py"
|
||||||
|
"agent_v1/session/storage.py"
|
||||||
|
"agent_v1/ui/__init__.py"
|
||||||
|
"agent_v1/ui/shared_state.py"
|
||||||
|
"agent_v1/ui/smart_tray.py"
|
||||||
|
"agent_v1/ui/chat_window.py"
|
||||||
|
"agent_v1/ui/capture_server.py"
|
||||||
|
"agent_v1/ui/notifications.py"
|
||||||
|
"agent_v1/vision/__init__.py"
|
||||||
|
"agent_v1/vision/capturer.py"
|
||||||
|
"agent_v1/vision/blur_sensitive.py"
|
||||||
|
"agent_v1/vision/system_info.py"
|
||||||
|
"agent_v1/monitoring/__init__.py"
|
||||||
|
"lea_ui/__init__.py"
|
||||||
|
"lea_ui/server_client.py"
|
||||||
|
"run_agent_v1.py"
|
||||||
|
)
|
||||||
|
|
||||||
|
for req_file in "${REQUIRED_FILES[@]}"; do
|
||||||
|
if [[ ! -f "$PACKAGE_DIR/$req_file" ]]; then
|
||||||
|
echo -e " ${RED}MANQUANT : $req_file${NC}"
|
||||||
|
MISSING=$((MISSING + 1))
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ $MISSING -gt 0 ]]; then
|
||||||
|
echo ""
|
||||||
|
echo -e "${RED} ERREUR : $MISSING fichier(s) requis manquant(s) !${NC}"
|
||||||
|
echo -e "${RED} Le package est INCOMPLET — corrigez build_package.sh avant de deployer.${NC}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo -e " ${GREEN}Tous les ${#REQUIRED_FILES[@]} fichiers requis sont presents.${NC}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
# ---------------------------------------------------------------
|
# ---------------------------------------------------------------
|
||||||
# 7. Creer le zip
|
# 7. Creer le zip
|
||||||
# ---------------------------------------------------------------
|
# ---------------------------------------------------------------
|
||||||
|
|||||||
77
docs/CONSOLIDATION_20260405.md
Normal file
77
docs/CONSOLIDATION_20260405.md
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
# Consolidation — 5 avril 2026
|
||||||
|
|
||||||
|
## Ce qui fonctionne
|
||||||
|
|
||||||
|
### Pipeline d'entraînement (mesuré)
|
||||||
|
| Étape | Temps/screenshot | Extrapolation 1h |
|
||||||
|
|-------|-----------------|------------------|
|
||||||
|
| ScreenAnalyzer (OCR docTR) | 1.05s | 9 min |
|
||||||
|
| CLIP Embeddings (ViT-B-32) | 0.093s | 1 min |
|
||||||
|
| FAISS Index | <0.01s | <1s |
|
||||||
|
| GraphBuilder | 0.7s total | <1 min |
|
||||||
|
| **Total** | **1.2s/shot** | **~10 min** |
|
||||||
|
|
||||||
|
### Résolution visuelle
|
||||||
|
- **Grounding qwen2.5vl** : fonctionne sur les fenêtres croppées (by_text OCR/VLM)
|
||||||
|
- **Template matching** : fonctionne pour les icônes/taskbar (crop 80x80)
|
||||||
|
- **gemma4 enrichissement** : lit le texte des éléments sans OCR (onglets, icônes)
|
||||||
|
|
||||||
|
### Vérifications
|
||||||
|
- **CLIP** : vérifie la bonne application (sim > 0.75 sur fenêtre)
|
||||||
|
- **Titre fenêtre** : vérifie l'état par nom d'app (polling 10s)
|
||||||
|
- **Pré-vérification** : stoppe si mauvaise fenêtre AVANT de cliquer
|
||||||
|
|
||||||
|
### Acteur intelligent
|
||||||
|
- **gemma4 think=True** : décide PASSER/EXECUTER/STOPPER (5s, 75% correct)
|
||||||
|
- Branché dans l'executor quand target_not_found
|
||||||
|
- Mode texte CPU (pas d'image, pas de VRAM)
|
||||||
|
|
||||||
|
### Infrastructure
|
||||||
|
- Ollama 0.16.3 host (port 11434) : qwen2.5vl:7b GPU 9.4GB
|
||||||
|
- Docker Ollama 0.20 (port 11435) : gemma4:e4b GPU 3.6GB
|
||||||
|
- Jamais simultanés (auto-unload gemma4 après build)
|
||||||
|
- VM Win11 (192.168.122.14) : SSH + agent Léa
|
||||||
|
- Anti-bot : Bézier mouse + frappe char-by-char
|
||||||
|
|
||||||
|
## Problèmes identifiés (non résolus)
|
||||||
|
|
||||||
|
### P1 : Ambiguïté "Rechercher" (taskbar vs explorateur)
|
||||||
|
Le crop 80x80 de la barre de recherche Windows ressemble à la barre
|
||||||
|
de recherche de l'explorateur. Le template matching clique au mauvais
|
||||||
|
endroit. L'acteur doit apprendre à distinguer les contextes.
|
||||||
|
|
||||||
|
### P2 : Éléments VLM (by_text_source="vlm")
|
||||||
|
Le grounding qwen2.5vl ne trouve pas toujours les textes lus par
|
||||||
|
gemma4 (ex: "voiture elec" — texte d'onglet). L'acteur prend le
|
||||||
|
relais et décide PASSER quand l'état est déjà atteint.
|
||||||
|
|
||||||
|
### P3 : Premier chargement VLM lent
|
||||||
|
Le premier appel à qwen2.5vl ou gemma4 après redémarrage prend 30-60s
|
||||||
|
(chargement en VRAM). Les appels suivants sont rapides (0.2-5s).
|
||||||
|
|
||||||
|
## Architecture validée
|
||||||
|
|
||||||
|
```
|
||||||
|
ENREGISTREMENT (une fois)
|
||||||
|
Agent capture → screenshots + events
|
||||||
|
↓
|
||||||
|
BUILD (une fois, ~15s)
|
||||||
|
ScreenAnalyzer (OCR) → CLIP → FAISS → GraphBuilder
|
||||||
|
gemma4 enrichit les éléments sans OCR
|
||||||
|
→ Workflow enrichi + actions avec embeddings CLIP
|
||||||
|
|
||||||
|
REPLAY (à chaque exécution)
|
||||||
|
Fast path :
|
||||||
|
Titre fenêtre OK → grounding qwen2.5vl → clic → polling titre → OK
|
||||||
|
Slow path (quand target_not_found) :
|
||||||
|
Acteur gemma4 → PASSER / STOPPER / EXECUTER
|
||||||
|
```
|
||||||
|
|
||||||
|
## Métriques de replay sur VM
|
||||||
|
|
||||||
|
| Session | Résultat | Détail |
|
||||||
|
|---------|----------|--------|
|
||||||
|
| Notepad (dernier test) | 4/32 actions | Recherche + ouverture Bloc-notes OK, bloque sur onglet |
|
||||||
|
| Grounding texte OCR | 100% | Rechercher, Ouvrir, Fichier, Enregistrer |
|
||||||
|
| CLIP vérification | 100% | sim 0.87-0.99 |
|
||||||
|
| Acteur gemma4 | Validé unitairement | PASSER correct pour onglet actif |
|
||||||
146
docs/PLAN_ACTEUR_V1.md
Normal file
146
docs/PLAN_ACTEUR_V1.md
Normal file
@@ -0,0 +1,146 @@
|
|||||||
|
# Plan Acteur Intelligent — RPA Vision V3
|
||||||
|
|
||||||
|
**Date** : 5 avril 2026
|
||||||
|
**Validé par** : Dom + Claude
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Vision finale
|
||||||
|
|
||||||
|
L'utilisateur dit : "Traite-moi tous les dossiers du mois de janvier"
|
||||||
|
Le robot exécute. Autonome, adaptatif, intelligent.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Architecture 3 niveaux
|
||||||
|
|
||||||
|
```
|
||||||
|
MACRO → Planificateur LLM
|
||||||
|
"traite les dossiers de janvier"
|
||||||
|
→ décompose en étapes
|
||||||
|
→ boucle sur les éléments
|
||||||
|
→ rend compte des résultats
|
||||||
|
|
||||||
|
MÉSO → Acteur intelligent
|
||||||
|
Pour chaque étape :
|
||||||
|
→ regarde l'écran (gemma4)
|
||||||
|
→ comprend l'état
|
||||||
|
→ décide : agir / adapter / passer
|
||||||
|
→ exécute l'action
|
||||||
|
|
||||||
|
MICRO → Grounding + exécution
|
||||||
|
→ qwen2.5vl localise l'élément (bbox_2d)
|
||||||
|
→ Bézier mouse + char-by-char typing
|
||||||
|
→ Polling titre pour vérification
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## État actuel (5 avril 2026)
|
||||||
|
|
||||||
|
### MICRO — Opérationnel
|
||||||
|
- Grounding qwen2.5vl:7b sur GPU (Ollama 0.16.3, port 11434)
|
||||||
|
- Grounding sur fenêtre active (crop depuis screenshot live)
|
||||||
|
- Template matching 80x80 pour icônes (seuil 0.90)
|
||||||
|
- Position hint pour désambiguïser (en bas, en haut, à gauche)
|
||||||
|
- Pré-vérification titre fenêtre (par nom d'application)
|
||||||
|
- Post-vérification polling titre (max 10s)
|
||||||
|
- Bézier mouse + frappe char-by-char (anti-bot)
|
||||||
|
|
||||||
|
### MÉSO — À construire
|
||||||
|
- gemma4:e4b disponible sur GPU (Docker Ollama 0.20, port 11435)
|
||||||
|
- Enrichissement build_replay : gemma4 lit les éléments sans OCR ✓
|
||||||
|
- Auto-déchargement gemma4 après build ✓
|
||||||
|
- Manque : boucle perception → compréhension → décision au replay
|
||||||
|
|
||||||
|
### MACRO — À concevoir
|
||||||
|
- Pas encore démarré
|
||||||
|
- Nécessite : workflows comme templates avec variables
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Plan d'implémentation
|
||||||
|
|
||||||
|
### Phase 1 : Workflow comme template (build_replay)
|
||||||
|
|
||||||
|
**Objectif** : l'enregistrement produit un template paramétrable
|
||||||
|
|
||||||
|
Pour chaque action, stocker :
|
||||||
|
- `intention` : ce que l'utilisateur veut faire (gemma4)
|
||||||
|
- `variables` : les données qui changent (nom de fichier, texte, date)
|
||||||
|
- `expected_state` : description de l'écran avant l'action (gemma4)
|
||||||
|
- `expected_result` : description de l'écran après l'action
|
||||||
|
|
||||||
|
Exemple :
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"intention": "Ouvrir un fichier existant dans le Bloc-notes",
|
||||||
|
"action": "click",
|
||||||
|
"by_text": "voiture elec",
|
||||||
|
"variables": {"filename": "voiture elec"},
|
||||||
|
"expected_state": "Bloc-notes ouvert avec plusieurs onglets",
|
||||||
|
"expected_result": "L'onglet du fichier est actif"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Moteur** : gemma4 (Docker, port 11435) — une seule fois pendant le build
|
||||||
|
**Impact** : le build_replay devient plus riche, pas de changement au replay
|
||||||
|
|
||||||
|
### Phase 2 : Acteur décisionnel (replay)
|
||||||
|
|
||||||
|
**Objectif** : l'acteur compare l'état attendu et décide
|
||||||
|
|
||||||
|
Avant chaque action :
|
||||||
|
1. Capturer la fenêtre active
|
||||||
|
2. Comparer titre de fenêtre (rapide, gratuit)
|
||||||
|
3. Si mismatch → décrire l'état via gemma4 (texte pur, pas d'image)
|
||||||
|
4. Comparer état décrit vs expected_state
|
||||||
|
5. Décider : exécuter / adapter / passer
|
||||||
|
|
||||||
|
La décision est prise par gemma4 en **mode texte** (pas d'image = pas de VRAM) :
|
||||||
|
```
|
||||||
|
État attendu : "Bloc-notes ouvert avec un document vide"
|
||||||
|
État actuel : "Bloc-notes ouvert avec du contenu existant"
|
||||||
|
Action prévue : "Taper du texte"
|
||||||
|
→ Décision : "Ouvrir un nouvel onglet avant de taper"
|
||||||
|
```
|
||||||
|
|
||||||
|
**Moteur** : gemma4 texte (CPU, rapide) pour les décisions
|
||||||
|
**Impact** : changement dans l'executor côté agent
|
||||||
|
|
||||||
|
### Phase 3 : Planificateur (macro)
|
||||||
|
|
||||||
|
**Objectif** : décomposer une instruction en étapes
|
||||||
|
|
||||||
|
L'utilisateur dit : "Traite les dossiers de janvier"
|
||||||
|
Le planificateur :
|
||||||
|
1. Identifie le workflow appris ("traiter un dossier")
|
||||||
|
2. Liste les éléments (fichiers de janvier)
|
||||||
|
3. Pour chaque élément, instancie le template avec les variables
|
||||||
|
4. Lance l'acteur sur chaque instance
|
||||||
|
5. Collecte les résultats
|
||||||
|
|
||||||
|
**Moteur** : LLM (gemma4 ou plus gros modèle)
|
||||||
|
**Impact** : nouveau module de planification
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Contraintes techniques
|
||||||
|
|
||||||
|
| Ressource | Utilisation |
|
||||||
|
|-----------|-------------|
|
||||||
|
| GPU (12 GB) | Un seul modèle VLM à la fois |
|
||||||
|
| Port 11434 (host) | qwen2.5vl:7b — grounding (replay) |
|
||||||
|
| Port 11435 (Docker) | gemma4:e4b — compréhension (build + décision) |
|
||||||
|
| Séquencement | Build (gemma4) → auto-unload → Replay (qwen2.5vl) |
|
||||||
|
| Décisions replay | gemma4 en mode texte (CPU, pas de VRAM) |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Ordre de réalisation
|
||||||
|
|
||||||
|
1. **Phase 1** d'abord — enrichir l'enregistrement (intentions + variables + état)
|
||||||
|
2. **Phase 2** ensuite — acteur qui décide pendant le replay
|
||||||
|
3. **Phase 3** après — planificateur macro
|
||||||
|
|
||||||
|
Chaque phase est testable indépendamment.
|
||||||
@@ -26,6 +26,7 @@ markers =
|
|||||||
fiche8: Tests Fiche #8 (anti-bugs terrain)
|
fiche8: Tests Fiche #8 (anti-bugs terrain)
|
||||||
fiche9: Tests Fiche #9 (postconditions retry backoff)
|
fiche9: Tests Fiche #9 (postconditions retry backoff)
|
||||||
fiche10: Tests Fiche #10 (precision metrics engine)
|
fiche10: Tests Fiche #10 (precision metrics engine)
|
||||||
|
visual: Tests visuels sur captures réelles (nécessite serveur GPU)
|
||||||
|
|
||||||
# Note: Chemins Python gérés par tests/conftest.py
|
# Note: Chemins Python gérés par tests/conftest.py
|
||||||
|
|
||||||
|
|||||||
683
tests/unit/test_audit_trail.py
Normal file
683
tests/unit/test_audit_trail.py
Normal file
@@ -0,0 +1,683 @@
|
|||||||
|
# tests/unit/test_audit_trail.py
|
||||||
|
"""
|
||||||
|
Tests unitaires du module Audit Trail.
|
||||||
|
|
||||||
|
Vérifie l'enregistrement, la recherche, l'export CSV et le résumé
|
||||||
|
journalier des entrées d'audit.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import csv
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
from datetime import date, datetime, timedelta
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
# Importer depuis le bon chemin (agent_v0/server_v1/)
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
|
||||||
|
|
||||||
|
from agent_v0.server_v1.audit_trail import AuditEntry, AuditTrail
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Fixtures
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def audit_dir(tmp_path):
|
||||||
|
"""Répertoire temporaire pour les fichiers d'audit."""
|
||||||
|
d = tmp_path / "audit"
|
||||||
|
d.mkdir()
|
||||||
|
return str(d)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def audit(audit_dir):
|
||||||
|
"""Instance AuditTrail avec répertoire temporaire."""
|
||||||
|
return AuditTrail(audit_dir=audit_dir)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_entry(**kwargs) -> AuditEntry:
|
||||||
|
"""Créer une entrée d'audit avec des valeurs par défaut."""
|
||||||
|
defaults = {
|
||||||
|
"timestamp": datetime.now().isoformat(),
|
||||||
|
"session_id": "sess_test_001",
|
||||||
|
"action_id": "act_001",
|
||||||
|
"user_id": "tim_dupont",
|
||||||
|
"user_name": "Marie Dupont",
|
||||||
|
"machine_id": "PC-TIM-01",
|
||||||
|
"action_type": "click",
|
||||||
|
"action_detail": "Clic sur 'Enregistrer' dans DxCare",
|
||||||
|
"target_app": "DxCare",
|
||||||
|
"execution_mode": "assisted",
|
||||||
|
"result": "success",
|
||||||
|
"resolution_method": "som_text_match",
|
||||||
|
"critic_result": "semantic_ok",
|
||||||
|
"recovery_action": "",
|
||||||
|
"domain": "tim_codage",
|
||||||
|
"workflow_id": "wf_codage_cim10",
|
||||||
|
"workflow_name": "Codage CIM-10 séjour",
|
||||||
|
"duration_ms": 234.5,
|
||||||
|
}
|
||||||
|
defaults.update(kwargs)
|
||||||
|
return AuditEntry(**defaults)
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tests AuditEntry
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
class TestAuditEntry:
|
||||||
|
"""Tests de la structure AuditEntry."""
|
||||||
|
|
||||||
|
def test_creation_basique(self):
|
||||||
|
"""Créer une entrée avec tous les champs."""
|
||||||
|
entry = _make_entry()
|
||||||
|
assert entry.user_id == "tim_dupont"
|
||||||
|
assert entry.action_type == "click"
|
||||||
|
assert entry.result == "success"
|
||||||
|
assert entry.duration_ms == 234.5
|
||||||
|
|
||||||
|
def test_to_dict(self):
|
||||||
|
"""Sérialiser en dictionnaire."""
|
||||||
|
entry = _make_entry()
|
||||||
|
d = entry.to_dict()
|
||||||
|
assert isinstance(d, dict)
|
||||||
|
assert d["user_id"] == "tim_dupont"
|
||||||
|
assert d["domain"] == "tim_codage"
|
||||||
|
assert d["duration_ms"] == 234.5
|
||||||
|
|
||||||
|
def test_from_dict(self):
|
||||||
|
"""Désérialiser depuis un dictionnaire."""
|
||||||
|
entry = _make_entry()
|
||||||
|
d = entry.to_dict()
|
||||||
|
restored = AuditEntry.from_dict(d)
|
||||||
|
assert restored.user_id == entry.user_id
|
||||||
|
assert restored.action_detail == entry.action_detail
|
||||||
|
assert restored.duration_ms == entry.duration_ms
|
||||||
|
|
||||||
|
def test_from_dict_ignore_unknown_keys(self):
|
||||||
|
"""Les clés inconnues sont ignorées (compatibilité future)."""
|
||||||
|
d = {"user_id": "test", "unknown_field": "valeur", "future_key": 42}
|
||||||
|
entry = AuditEntry.from_dict(d)
|
||||||
|
assert entry.user_id == "test"
|
||||||
|
# Les champs inconnus ne lèvent pas d'erreur
|
||||||
|
|
||||||
|
def test_to_dict_json_serializable(self):
|
||||||
|
"""Le dictionnaire est sérialisable en JSON."""
|
||||||
|
entry = _make_entry(action_detail="Clic sur 'Validé' — accent français")
|
||||||
|
d = entry.to_dict()
|
||||||
|
json_str = json.dumps(d, ensure_ascii=False)
|
||||||
|
assert "accent français" in json_str
|
||||||
|
|
||||||
|
def test_default_values(self):
|
||||||
|
"""Une entrée vide a des valeurs par défaut cohérentes."""
|
||||||
|
entry = AuditEntry()
|
||||||
|
assert entry.timestamp == ""
|
||||||
|
assert entry.user_id == ""
|
||||||
|
assert entry.duration_ms == 0.0
|
||||||
|
assert entry.result == ""
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tests AuditTrail — enregistrement et lecture
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
class TestAuditTrailRecord:
|
||||||
|
"""Tests d'enregistrement des entrées."""
|
||||||
|
|
||||||
|
def test_record_and_reload(self, audit, audit_dir):
|
||||||
|
"""Enregistrer une entrée puis la relire depuis le fichier."""
|
||||||
|
entry = _make_entry()
|
||||||
|
audit.record(entry)
|
||||||
|
|
||||||
|
# Vérifier que le fichier existe
|
||||||
|
today = date.today().isoformat()
|
||||||
|
filepath = Path(audit_dir) / f"audit_{today}.jsonl"
|
||||||
|
assert filepath.exists()
|
||||||
|
|
||||||
|
# Lire le fichier directement
|
||||||
|
with open(filepath, "r", encoding="utf-8") as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
assert len(lines) == 1
|
||||||
|
|
||||||
|
data = json.loads(lines[0])
|
||||||
|
assert data["user_id"] == "tim_dupont"
|
||||||
|
assert data["action_detail"] == "Clic sur 'Enregistrer' dans DxCare"
|
||||||
|
|
||||||
|
def test_record_multiple_entries(self, audit, audit_dir):
|
||||||
|
"""Enregistrer plusieurs entrées dans le même fichier."""
|
||||||
|
for i in range(5):
|
||||||
|
entry = _make_entry(action_id=f"act_{i:03d}")
|
||||||
|
audit.record(entry)
|
||||||
|
|
||||||
|
today = date.today().isoformat()
|
||||||
|
filepath = Path(audit_dir) / f"audit_{today}.jsonl"
|
||||||
|
with open(filepath, "r", encoding="utf-8") as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
assert len(lines) == 5
|
||||||
|
|
||||||
|
def test_record_auto_timestamp(self, audit):
|
||||||
|
"""Le timestamp est généré automatiquement si absent."""
|
||||||
|
entry = _make_entry(timestamp="")
|
||||||
|
audit.record(entry)
|
||||||
|
|
||||||
|
# Le timestamp doit avoir été rempli
|
||||||
|
entries = audit.query()
|
||||||
|
assert len(entries) == 1
|
||||||
|
assert entries[0]["timestamp"] != ""
|
||||||
|
# Vérifier le format ISO 8601
|
||||||
|
datetime.fromisoformat(entries[0]["timestamp"])
|
||||||
|
|
||||||
|
def test_record_utf8_french(self, audit):
|
||||||
|
"""Les caractères français sont correctement enregistrés."""
|
||||||
|
entry = _make_entry(
|
||||||
|
action_detail="Saisie du diagnostic 'Hépatite à cytomégalovirus' — CIM-10: B25.1",
|
||||||
|
user_name="François Müller",
|
||||||
|
workflow_name="Codage séjour réanimation néonatale",
|
||||||
|
)
|
||||||
|
audit.record(entry)
|
||||||
|
|
||||||
|
entries = audit.query()
|
||||||
|
assert len(entries) == 1
|
||||||
|
assert "Hépatite" in entries[0]["action_detail"]
|
||||||
|
assert "François Müller" in entries[0]["user_name"]
|
||||||
|
assert "néonatale" in entries[0]["workflow_name"]
|
||||||
|
|
||||||
|
def test_record_creates_directory(self, tmp_path):
|
||||||
|
"""Le répertoire est créé automatiquement s'il n'existe pas."""
|
||||||
|
new_dir = str(tmp_path / "sub" / "deep" / "audit")
|
||||||
|
audit = AuditTrail(audit_dir=new_dir)
|
||||||
|
entry = _make_entry()
|
||||||
|
audit.record(entry)
|
||||||
|
|
||||||
|
assert Path(new_dir).exists()
|
||||||
|
entries = audit.query()
|
||||||
|
assert len(entries) == 1
|
||||||
|
|
||||||
|
def test_record_different_dates(self, audit, audit_dir):
|
||||||
|
"""Les entrées de dates différentes vont dans des fichiers différents."""
|
||||||
|
today = date.today()
|
||||||
|
yesterday = today - timedelta(days=1)
|
||||||
|
|
||||||
|
entry_today = _make_entry(timestamp=datetime.now().isoformat())
|
||||||
|
entry_yesterday = _make_entry(
|
||||||
|
timestamp=datetime.combine(yesterday, datetime.min.time()).isoformat(),
|
||||||
|
action_id="act_yesterday",
|
||||||
|
)
|
||||||
|
|
||||||
|
audit.record(entry_today)
|
||||||
|
audit.record(entry_yesterday)
|
||||||
|
|
||||||
|
# Vérifier les fichiers
|
||||||
|
file_today = Path(audit_dir) / f"audit_{today.isoformat()}.jsonl"
|
||||||
|
file_yesterday = Path(audit_dir) / f"audit_{yesterday.isoformat()}.jsonl"
|
||||||
|
assert file_today.exists()
|
||||||
|
assert file_yesterday.exists()
|
||||||
|
|
||||||
|
def test_jsonl_format(self, audit, audit_dir):
|
||||||
|
"""Chaque ligne du fichier est un JSON valide (format JSONL)."""
|
||||||
|
for i in range(3):
|
||||||
|
audit.record(_make_entry(action_id=f"act_{i}"))
|
||||||
|
|
||||||
|
today = date.today().isoformat()
|
||||||
|
filepath = Path(audit_dir) / f"audit_{today}.jsonl"
|
||||||
|
with open(filepath, "r", encoding="utf-8") as f:
|
||||||
|
for line_num, line in enumerate(f, 1):
|
||||||
|
line = line.strip()
|
||||||
|
assert line, f"Ligne {line_num} vide"
|
||||||
|
data = json.loads(line) # Ne doit pas lever d'exception
|
||||||
|
assert "action_id" in data
|
||||||
|
assert "timestamp" in data
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tests AuditTrail — requêtes avec filtres
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
class TestAuditTrailQuery:
|
||||||
|
"""Tests de recherche et filtrage."""
|
||||||
|
|
||||||
|
def _seed_entries(self, audit):
|
||||||
|
"""Insérer des entrées de test variées."""
|
||||||
|
entries = [
|
||||||
|
_make_entry(
|
||||||
|
action_id="act_001",
|
||||||
|
user_id="tim_dupont",
|
||||||
|
result="success",
|
||||||
|
action_type="click",
|
||||||
|
workflow_id="wf_01",
|
||||||
|
domain="tim_codage",
|
||||||
|
),
|
||||||
|
_make_entry(
|
||||||
|
action_id="act_002",
|
||||||
|
user_id="tim_dupont",
|
||||||
|
result="failed",
|
||||||
|
action_type="type",
|
||||||
|
workflow_id="wf_01",
|
||||||
|
domain="generic",
|
||||||
|
),
|
||||||
|
_make_entry(
|
||||||
|
action_id="act_003",
|
||||||
|
user_id="tim_martin",
|
||||||
|
user_name="Jean Martin",
|
||||||
|
result="success",
|
||||||
|
action_type="click",
|
||||||
|
workflow_id="wf_02",
|
||||||
|
domain="generic",
|
||||||
|
),
|
||||||
|
_make_entry(
|
||||||
|
action_id="act_004",
|
||||||
|
user_id="tim_martin",
|
||||||
|
user_name="Jean Martin",
|
||||||
|
result="recovered",
|
||||||
|
action_type="key_combo",
|
||||||
|
workflow_id="wf_02",
|
||||||
|
domain="generic",
|
||||||
|
),
|
||||||
|
_make_entry(
|
||||||
|
action_id="act_005",
|
||||||
|
user_id="tim_dupont",
|
||||||
|
result="success",
|
||||||
|
action_type="click",
|
||||||
|
workflow_id="wf_01",
|
||||||
|
domain="generic",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
for e in entries:
|
||||||
|
audit.record(e)
|
||||||
|
|
||||||
|
def test_query_all(self, audit):
|
||||||
|
"""Requête sans filtre retourne tout."""
|
||||||
|
self._seed_entries(audit)
|
||||||
|
results = audit.query()
|
||||||
|
assert len(results) == 5
|
||||||
|
|
||||||
|
def test_query_by_user(self, audit):
|
||||||
|
"""Filtrer par identifiant utilisateur."""
|
||||||
|
self._seed_entries(audit)
|
||||||
|
results = audit.query(user_id="tim_dupont")
|
||||||
|
assert len(results) == 3
|
||||||
|
assert all(r["user_id"] == "tim_dupont" for r in results)
|
||||||
|
|
||||||
|
def test_query_by_result(self, audit):
|
||||||
|
"""Filtrer par résultat."""
|
||||||
|
self._seed_entries(audit)
|
||||||
|
results = audit.query(result="success")
|
||||||
|
assert len(results) == 3
|
||||||
|
assert all(r["result"] == "success" for r in results)
|
||||||
|
|
||||||
|
def test_query_by_action_type(self, audit):
|
||||||
|
"""Filtrer par type d'action."""
|
||||||
|
self._seed_entries(audit)
|
||||||
|
results = audit.query(action_type="click")
|
||||||
|
assert len(results) == 3
|
||||||
|
|
||||||
|
def test_query_by_workflow(self, audit):
|
||||||
|
"""Filtrer par workflow."""
|
||||||
|
self._seed_entries(audit)
|
||||||
|
results = audit.query(workflow_id="wf_02")
|
||||||
|
assert len(results) == 2
|
||||||
|
|
||||||
|
def test_query_by_domain(self, audit):
|
||||||
|
"""Filtrer par domaine métier."""
|
||||||
|
self._seed_entries(audit)
|
||||||
|
results = audit.query(domain="tim_codage")
|
||||||
|
assert len(results) == 1
|
||||||
|
assert results[0]["action_id"] == "act_001"
|
||||||
|
|
||||||
|
def test_query_by_session(self, audit):
|
||||||
|
"""Filtrer par session."""
|
||||||
|
self._seed_entries(audit)
|
||||||
|
results = audit.query(session_id="sess_test_001")
|
||||||
|
assert len(results) == 5 # Toutes les entrées ont la même session
|
||||||
|
|
||||||
|
def test_query_combined_filters(self, audit):
|
||||||
|
"""Combinaison de plusieurs filtres (AND)."""
|
||||||
|
self._seed_entries(audit)
|
||||||
|
results = audit.query(user_id="tim_dupont", result="success")
|
||||||
|
assert len(results) == 2
|
||||||
|
|
||||||
|
def test_query_no_match(self, audit):
|
||||||
|
"""Filtre sans correspondance retourne une liste vide."""
|
||||||
|
self._seed_entries(audit)
|
||||||
|
results = audit.query(user_id="tim_inexistant")
|
||||||
|
assert len(results) == 0
|
||||||
|
|
||||||
|
def test_query_pagination_limit(self, audit):
|
||||||
|
"""Limiter le nombre de résultats."""
|
||||||
|
self._seed_entries(audit)
|
||||||
|
results = audit.query(limit=2)
|
||||||
|
assert len(results) == 2
|
||||||
|
|
||||||
|
def test_query_pagination_offset(self, audit):
|
||||||
|
"""Décalage dans les résultats."""
|
||||||
|
self._seed_entries(audit)
|
||||||
|
all_results = audit.query()
|
||||||
|
offset_results = audit.query(offset=3)
|
||||||
|
assert len(offset_results) == 2
|
||||||
|
assert offset_results[0] == all_results[3]
|
||||||
|
|
||||||
|
def test_query_sorted_by_timestamp_desc(self, audit):
|
||||||
|
"""Les résultats sont triés par timestamp décroissant."""
|
||||||
|
now = datetime.now()
|
||||||
|
for i in range(5):
|
||||||
|
ts = (now - timedelta(minutes=i)).isoformat()
|
||||||
|
audit.record(_make_entry(
|
||||||
|
timestamp=ts,
|
||||||
|
action_id=f"act_{i}",
|
||||||
|
))
|
||||||
|
|
||||||
|
results = audit.query()
|
||||||
|
timestamps = [r["timestamp"] for r in results]
|
||||||
|
assert timestamps == sorted(timestamps, reverse=True)
|
||||||
|
|
||||||
|
def test_query_date_range(self, audit):
|
||||||
|
"""Filtrer par plage de dates."""
|
||||||
|
today = date.today()
|
||||||
|
yesterday = today - timedelta(days=1)
|
||||||
|
|
||||||
|
# Entrée d'hier
|
||||||
|
audit.record(_make_entry(
|
||||||
|
timestamp=datetime.combine(yesterday, datetime.min.time()).isoformat(),
|
||||||
|
action_id="act_yesterday",
|
||||||
|
))
|
||||||
|
# Entrée d'aujourd'hui
|
||||||
|
audit.record(_make_entry(
|
||||||
|
timestamp=datetime.now().isoformat(),
|
||||||
|
action_id="act_today",
|
||||||
|
))
|
||||||
|
|
||||||
|
# Filtrer uniquement hier
|
||||||
|
results = audit.query(
|
||||||
|
date_from=yesterday.isoformat(),
|
||||||
|
date_to=yesterday.isoformat(),
|
||||||
|
)
|
||||||
|
assert len(results) == 1
|
||||||
|
assert results[0]["action_id"] == "act_yesterday"
|
||||||
|
|
||||||
|
# Filtrer les deux jours
|
||||||
|
results = audit.query(
|
||||||
|
date_from=yesterday.isoformat(),
|
||||||
|
date_to=today.isoformat(),
|
||||||
|
)
|
||||||
|
assert len(results) == 2
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tests AuditTrail — résumé journalier
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
class TestAuditTrailSummary:
|
||||||
|
"""Tests du résumé journalier."""
|
||||||
|
|
||||||
|
def test_summary_empty(self, audit):
|
||||||
|
"""Résumé d'un jour sans données."""
|
||||||
|
summary = audit.get_summary("2025-01-01")
|
||||||
|
assert summary["total_actions"] == 0
|
||||||
|
assert summary["success_rate"] == 0.0
|
||||||
|
assert summary["by_user"] == {}
|
||||||
|
|
||||||
|
def test_summary_basic(self, audit):
|
||||||
|
"""Résumé avec quelques entrées."""
|
||||||
|
audit.record(_make_entry(user_id="tim_dupont", result="success"))
|
||||||
|
audit.record(_make_entry(user_id="tim_dupont", result="failed"))
|
||||||
|
audit.record(_make_entry(user_id="tim_martin", user_name="Jean Martin", result="success"))
|
||||||
|
|
||||||
|
summary = audit.get_summary()
|
||||||
|
assert summary["total_actions"] == 3
|
||||||
|
assert summary["success_rate"] == round(2 / 3, 3)
|
||||||
|
|
||||||
|
def test_summary_by_user(self, audit):
|
||||||
|
"""Répartition par utilisateur."""
|
||||||
|
audit.record(_make_entry(user_id="tim_dupont", result="success"))
|
||||||
|
audit.record(_make_entry(user_id="tim_dupont", result="success"))
|
||||||
|
audit.record(_make_entry(user_id="tim_dupont", result="failed"))
|
||||||
|
audit.record(_make_entry(user_id="tim_martin", user_name="Jean Martin", result="success"))
|
||||||
|
|
||||||
|
summary = audit.get_summary()
|
||||||
|
assert "tim_dupont" in summary["by_user"]
|
||||||
|
assert summary["by_user"]["tim_dupont"]["total"] == 3
|
||||||
|
assert summary["by_user"]["tim_dupont"]["success"] == 2
|
||||||
|
assert summary["by_user"]["tim_dupont"]["success_rate"] == round(2 / 3, 3)
|
||||||
|
assert summary["by_user"]["tim_martin"]["total"] == 1
|
||||||
|
assert summary["by_user"]["tim_martin"]["success_rate"] == 1.0
|
||||||
|
|
||||||
|
def test_summary_by_result(self, audit):
|
||||||
|
"""Répartition par résultat."""
|
||||||
|
audit.record(_make_entry(result="success"))
|
||||||
|
audit.record(_make_entry(result="success"))
|
||||||
|
audit.record(_make_entry(result="failed"))
|
||||||
|
audit.record(_make_entry(result="recovered"))
|
||||||
|
|
||||||
|
summary = audit.get_summary()
|
||||||
|
assert summary["by_result"]["success"] == 2
|
||||||
|
assert summary["by_result"]["failed"] == 1
|
||||||
|
assert summary["by_result"]["recovered"] == 1
|
||||||
|
|
||||||
|
def test_summary_by_action_type(self, audit):
|
||||||
|
"""Répartition par type d'action."""
|
||||||
|
audit.record(_make_entry(action_type="click"))
|
||||||
|
audit.record(_make_entry(action_type="click"))
|
||||||
|
audit.record(_make_entry(action_type="type"))
|
||||||
|
|
||||||
|
summary = audit.get_summary()
|
||||||
|
assert summary["by_action_type"]["click"] == 2
|
||||||
|
assert summary["by_action_type"]["type"] == 1
|
||||||
|
|
||||||
|
def test_summary_by_workflow(self, audit):
|
||||||
|
"""Répartition par workflow."""
|
||||||
|
audit.record(_make_entry(workflow_id="wf_01"))
|
||||||
|
audit.record(_make_entry(workflow_id="wf_01"))
|
||||||
|
audit.record(_make_entry(workflow_id="wf_02"))
|
||||||
|
|
||||||
|
summary = audit.get_summary()
|
||||||
|
assert summary["by_workflow"]["wf_01"] == 2
|
||||||
|
assert summary["by_workflow"]["wf_02"] == 1
|
||||||
|
|
||||||
|
def test_summary_by_execution_mode(self, audit):
|
||||||
|
"""Répartition par mode d'exécution."""
|
||||||
|
audit.record(_make_entry(execution_mode="autonomous"))
|
||||||
|
audit.record(_make_entry(execution_mode="assisted"))
|
||||||
|
audit.record(_make_entry(execution_mode="assisted"))
|
||||||
|
|
||||||
|
summary = audit.get_summary()
|
||||||
|
assert summary["by_execution_mode"]["autonomous"] == 1
|
||||||
|
assert summary["by_execution_mode"]["assisted"] == 2
|
||||||
|
|
||||||
|
def test_summary_date_field(self, audit):
|
||||||
|
"""Le résumé contient la date demandée."""
|
||||||
|
today = date.today().isoformat()
|
||||||
|
summary = audit.get_summary(today)
|
||||||
|
assert summary["date"] == today
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tests AuditTrail — export CSV
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
class TestAuditTrailExportCSV:
|
||||||
|
"""Tests de l'export CSV."""
|
||||||
|
|
||||||
|
def test_export_csv_empty(self, audit):
|
||||||
|
"""Export sans données retourne une chaîne vide."""
|
||||||
|
csv_data = audit.export_csv(date_from="2025-01-01")
|
||||||
|
assert csv_data == ""
|
||||||
|
|
||||||
|
def test_export_csv_basic(self, audit):
|
||||||
|
"""Export CSV avec quelques entrées."""
|
||||||
|
audit.record(_make_entry(action_id="act_001"))
|
||||||
|
audit.record(_make_entry(action_id="act_002"))
|
||||||
|
|
||||||
|
csv_data = audit.export_csv()
|
||||||
|
assert csv_data
|
||||||
|
assert "act_001" in csv_data
|
||||||
|
assert "act_002" in csv_data
|
||||||
|
|
||||||
|
def test_export_csv_header(self, audit):
|
||||||
|
"""L'en-tête CSV contient tous les champs du dataclass."""
|
||||||
|
audit.record(_make_entry())
|
||||||
|
|
||||||
|
csv_data = audit.export_csv()
|
||||||
|
reader = csv.DictReader(io.StringIO(csv_data))
|
||||||
|
fieldnames = reader.fieldnames
|
||||||
|
assert "timestamp" in fieldnames
|
||||||
|
assert "user_id" in fieldnames
|
||||||
|
assert "action_detail" in fieldnames
|
||||||
|
assert "domain" in fieldnames
|
||||||
|
assert "duration_ms" in fieldnames
|
||||||
|
|
||||||
|
def test_export_csv_parseable(self, audit):
|
||||||
|
"""Le CSV produit est parseable par le module csv."""
|
||||||
|
for i in range(5):
|
||||||
|
audit.record(_make_entry(
|
||||||
|
action_id=f"act_{i}",
|
||||||
|
action_detail=f"Action {i} — avec des 'guillemets' et des, virgules",
|
||||||
|
))
|
||||||
|
|
||||||
|
csv_data = audit.export_csv()
|
||||||
|
reader = csv.DictReader(io.StringIO(csv_data))
|
||||||
|
rows = list(reader)
|
||||||
|
assert len(rows) == 5
|
||||||
|
|
||||||
|
# Vérifier que les valeurs sont correctes malgré les caractères spéciaux
|
||||||
|
for row in rows:
|
||||||
|
assert "virgules" in row["action_detail"]
|
||||||
|
|
||||||
|
def test_export_csv_filter_by_user(self, audit):
|
||||||
|
"""Export filtré par utilisateur."""
|
||||||
|
audit.record(_make_entry(user_id="tim_dupont", action_id="act_001"))
|
||||||
|
audit.record(_make_entry(user_id="tim_martin", action_id="act_002"))
|
||||||
|
|
||||||
|
csv_data = audit.export_csv(user_id="tim_dupont")
|
||||||
|
reader = csv.DictReader(io.StringIO(csv_data))
|
||||||
|
rows = list(reader)
|
||||||
|
assert len(rows) == 1
|
||||||
|
assert rows[0]["user_id"] == "tim_dupont"
|
||||||
|
|
||||||
|
def test_export_csv_utf8(self, audit):
|
||||||
|
"""L'export CSV gère correctement l'UTF-8 français."""
|
||||||
|
audit.record(_make_entry(
|
||||||
|
action_detail="Saisie 'Hépatite à cytomégalovirus' — réanimation néonatale",
|
||||||
|
user_name="François Müller",
|
||||||
|
))
|
||||||
|
|
||||||
|
csv_data = audit.export_csv()
|
||||||
|
assert "Hépatite" in csv_data
|
||||||
|
assert "François Müller" in csv_data
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tests de robustesse
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
class TestAuditTrailRobustness:
|
||||||
|
"""Tests de robustesse et cas limites."""
|
||||||
|
|
||||||
|
def test_directory_auto_creation(self, tmp_path):
|
||||||
|
"""Le répertoire est créé automatiquement s'il n'existe pas."""
|
||||||
|
audit_dir = str(tmp_path / "nonexistent" / "deep" / "audit")
|
||||||
|
assert not Path(audit_dir).exists()
|
||||||
|
|
||||||
|
audit = AuditTrail(audit_dir=audit_dir)
|
||||||
|
assert Path(audit_dir).exists()
|
||||||
|
|
||||||
|
def test_corrupted_jsonl_line(self, audit, audit_dir):
|
||||||
|
"""Une ligne corrompue dans le fichier JSONL ne fait pas crasher la lecture."""
|
||||||
|
# Écrire des entrées normales
|
||||||
|
audit.record(_make_entry(action_id="act_001"))
|
||||||
|
audit.record(_make_entry(action_id="act_002"))
|
||||||
|
|
||||||
|
# Injecter une ligne corrompue
|
||||||
|
today = date.today().isoformat()
|
||||||
|
filepath = Path(audit_dir) / f"audit_{today}.jsonl"
|
||||||
|
with open(filepath, "a", encoding="utf-8") as f:
|
||||||
|
f.write("{invalid json line\n")
|
||||||
|
|
||||||
|
# Ajouter encore une entrée valide
|
||||||
|
audit.record(_make_entry(action_id="act_003"))
|
||||||
|
|
||||||
|
# La lecture doit fonctionner et ignorer la ligne corrompue
|
||||||
|
entries = audit.query()
|
||||||
|
assert len(entries) == 3 # 2 valides avant + 1 valide après
|
||||||
|
|
||||||
|
def test_empty_file(self, audit, audit_dir):
|
||||||
|
"""Un fichier vide ne fait pas crasher."""
|
||||||
|
today = date.today().isoformat()
|
||||||
|
filepath = Path(audit_dir) / f"audit_{today}.jsonl"
|
||||||
|
filepath.touch() # Fichier vide
|
||||||
|
|
||||||
|
entries = audit.query()
|
||||||
|
assert len(entries) == 0
|
||||||
|
|
||||||
|
def test_concurrent_writes(self, audit):
|
||||||
|
"""Écritures concurrentes grâce au verrou threading."""
|
||||||
|
import threading
|
||||||
|
|
||||||
|
errors = []
|
||||||
|
|
||||||
|
def write_entries(start):
|
||||||
|
try:
|
||||||
|
for i in range(20):
|
||||||
|
audit.record(_make_entry(action_id=f"act_{start}_{i}"))
|
||||||
|
except Exception as e:
|
||||||
|
errors.append(str(e))
|
||||||
|
|
||||||
|
threads = [
|
||||||
|
threading.Thread(target=write_entries, args=(t,))
|
||||||
|
for t in range(5)
|
||||||
|
]
|
||||||
|
for t in threads:
|
||||||
|
t.start()
|
||||||
|
for t in threads:
|
||||||
|
t.join()
|
||||||
|
|
||||||
|
assert not errors, f"Erreurs concurrentes: {errors}"
|
||||||
|
entries = audit.query(limit=200)
|
||||||
|
assert len(entries) == 100 # 5 threads x 20 entrées
|
||||||
|
|
||||||
|
def test_query_invalid_date(self, audit):
|
||||||
|
"""Dates invalides ne font pas crasher."""
|
||||||
|
# Ne doit pas lever d'exception
|
||||||
|
results = audit.query(date_from="not-a-date")
|
||||||
|
assert isinstance(results, list)
|
||||||
|
|
||||||
|
def test_summary_invalid_date(self, audit):
|
||||||
|
"""Date invalide dans get_summary ne fait pas crasher."""
|
||||||
|
summary = audit.get_summary("not-a-date")
|
||||||
|
assert summary["total_actions"] == 0
|
||||||
|
|
||||||
|
def test_entry_all_fields_present_in_export(self, audit):
|
||||||
|
"""Tous les champs du dataclass sont présents dans l'export CSV."""
|
||||||
|
from dataclasses import fields as dc_fields
|
||||||
|
entry = _make_entry()
|
||||||
|
audit.record(entry)
|
||||||
|
|
||||||
|
csv_data = audit.export_csv()
|
||||||
|
reader = csv.DictReader(io.StringIO(csv_data))
|
||||||
|
row = next(reader)
|
||||||
|
|
||||||
|
expected_fields = {f.name for f in dc_fields(AuditEntry)}
|
||||||
|
actual_fields = set(row.keys())
|
||||||
|
assert expected_fields == actual_fields
|
||||||
|
|
||||||
|
def test_date_range_reversed(self, audit):
|
||||||
|
"""Plage de dates inversée (date_to < date_from) fonctionne quand même."""
|
||||||
|
today = date.today()
|
||||||
|
yesterday = today - timedelta(days=1)
|
||||||
|
|
||||||
|
audit.record(_make_entry(
|
||||||
|
timestamp=datetime.combine(yesterday, datetime.min.time()).isoformat(),
|
||||||
|
))
|
||||||
|
|
||||||
|
# date_from > date_to → doit quand même fonctionner
|
||||||
|
results = audit.query(
|
||||||
|
date_from=today.isoformat(),
|
||||||
|
date_to=yesterday.isoformat(),
|
||||||
|
)
|
||||||
|
# L'implémentation inverse automatiquement les dates
|
||||||
|
assert isinstance(results, list)
|
||||||
530
tests/unit/test_policy_grounding_recovery_learning.py
Normal file
530
tests/unit/test_policy_grounding_recovery_learning.py
Normal file
@@ -0,0 +1,530 @@
|
|||||||
|
"""
|
||||||
|
Tests fonctionnels pour P2 (Policy/Grounding), P3 (Recovery), P4 (Learning).
|
||||||
|
|
||||||
|
Vérifie que chaque module fait bien son travail :
|
||||||
|
- Grounding : localise ou retourne NOT_FOUND (pas de décision)
|
||||||
|
- Policy : décide RETRY/SKIP/ABORT/SUPERVISE (pas de localisation)
|
||||||
|
- Recovery : exécute Ctrl+Z / Escape / Alt+F4 selon le contexte
|
||||||
|
- Learning : enregistre et requête les résultats structurés
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import shutil
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import MagicMock, patch, PropertyMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
_ROOT = str(Path(__file__).resolve().parents[2])
|
||||||
|
if _ROOT not in sys.path:
|
||||||
|
sys.path.insert(0, _ROOT)
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# P2 : Grounding — localisation pure
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestGroundingEngine:
|
||||||
|
|
||||||
|
def _make_engine(self):
|
||||||
|
from agent_v0.agent_v1.core.grounding import GroundingEngine
|
||||||
|
executor = MagicMock()
|
||||||
|
executor._capture_screenshot_b64.return_value = "fake_b64_data"
|
||||||
|
return GroundingEngine(executor), executor
|
||||||
|
|
||||||
|
def test_server_found_retourne_coordonnees(self):
|
||||||
|
"""Si le serveur trouve l'élément, retourne ses coordonnées."""
|
||||||
|
engine, executor = self._make_engine()
|
||||||
|
executor._server_resolve_target.return_value = {
|
||||||
|
"resolved": True, "x_pct": 0.5, "y_pct": 0.3,
|
||||||
|
"method": "som_text", "score": 0.95,
|
||||||
|
"matched_element": {"label": "Enregistrer"},
|
||||||
|
}
|
||||||
|
result = engine.locate("http://server", {"by_text": "Enregistrer"}, 0.5, 0.3, 1920, 1080)
|
||||||
|
assert result.found is True
|
||||||
|
assert result.x_pct == 0.5
|
||||||
|
assert result.y_pct == 0.3
|
||||||
|
assert result.method == "som_text"
|
||||||
|
|
||||||
|
def test_server_not_found_cascade_template(self):
|
||||||
|
"""Si serveur échoue, cascade vers template matching."""
|
||||||
|
engine, executor = self._make_engine()
|
||||||
|
executor._server_resolve_target.return_value = None
|
||||||
|
executor._template_match_anchor.return_value = {
|
||||||
|
"resolved": True, "x_pct": 0.4, "y_pct": 0.6,
|
||||||
|
"score": 0.85,
|
||||||
|
}
|
||||||
|
result = engine.locate(
|
||||||
|
"http://server",
|
||||||
|
{"by_text": "OK", "anchor_image_base64": "abc123"},
|
||||||
|
0.5, 0.3, 1920, 1080,
|
||||||
|
)
|
||||||
|
assert result.found is True
|
||||||
|
assert result.method == "anchor_template"
|
||||||
|
|
||||||
|
def test_toutes_strategies_echouent_retourne_not_found(self):
|
||||||
|
"""Si toutes les stratégies échouent, retourne NOT_FOUND."""
|
||||||
|
engine, executor = self._make_engine()
|
||||||
|
executor._server_resolve_target.return_value = None
|
||||||
|
executor._template_match_anchor.return_value = None
|
||||||
|
executor._hybrid_vlm_resolve.return_value = None
|
||||||
|
result = engine.locate(
|
||||||
|
"http://server",
|
||||||
|
{"by_text": "Inexistant", "anchor_image_base64": "abc", "vlm_description": "bouton"},
|
||||||
|
0.5, 0.3, 1920, 1080,
|
||||||
|
)
|
||||||
|
assert result.found is False
|
||||||
|
assert "échoué" in result.detail
|
||||||
|
|
||||||
|
def test_screenshot_echoue_retourne_not_found(self):
|
||||||
|
"""Si la capture screenshot échoue, NOT_FOUND immédiat."""
|
||||||
|
engine, executor = self._make_engine()
|
||||||
|
executor._capture_screenshot_b64.return_value = None
|
||||||
|
result = engine.locate("http://server", {"by_text": "OK"}, 0.5, 0.3, 1920, 1080)
|
||||||
|
assert result.found is False
|
||||||
|
assert "screenshot" in result.detail.lower()
|
||||||
|
|
||||||
|
def test_strategies_custom(self):
|
||||||
|
"""On peut spécifier les stratégies à utiliser."""
|
||||||
|
engine, executor = self._make_engine()
|
||||||
|
executor._template_match_anchor.return_value = {
|
||||||
|
"resolved": True, "x_pct": 0.2, "y_pct": 0.8, "score": 0.9,
|
||||||
|
}
|
||||||
|
# Seulement template, pas de serveur
|
||||||
|
result = engine.locate(
|
||||||
|
"", {"anchor_image_base64": "abc"}, 0.5, 0.3, 1920, 1080,
|
||||||
|
strategies=["template"],
|
||||||
|
)
|
||||||
|
assert result.found is True
|
||||||
|
# Le serveur n'a PAS été appelé
|
||||||
|
executor._server_resolve_target.assert_not_called()
|
||||||
|
|
||||||
|
def test_grounding_result_to_dict(self):
|
||||||
|
"""Le GroundingResult se sérialise correctement."""
|
||||||
|
from agent_v0.agent_v1.core.grounding import GroundingResult
|
||||||
|
r = GroundingResult(found=True, x_pct=0.5, y_pct=0.3, method="som", score=0.9)
|
||||||
|
d = r.to_dict()
|
||||||
|
assert d["found"] is True
|
||||||
|
assert d["x_pct"] == 0.5
|
||||||
|
assert d["method"] == "som"
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# P2 : Policy — décisions quand grounding échoue
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestPolicyEngine:
|
||||||
|
|
||||||
|
def _make_engine(self):
|
||||||
|
from agent_v0.agent_v1.core.policy import PolicyEngine
|
||||||
|
executor = MagicMock()
|
||||||
|
return PolicyEngine(executor), executor
|
||||||
|
|
||||||
|
def test_premier_essai_popup_fermee_retry(self):
|
||||||
|
"""Premier échec + popup fermée → RETRY."""
|
||||||
|
from agent_v0.agent_v1.core.policy import Decision
|
||||||
|
engine, executor = self._make_engine()
|
||||||
|
executor._handle_popup_vlm.return_value = True # Popup fermée
|
||||||
|
|
||||||
|
decision = engine.decide(
|
||||||
|
action={"type": "click"},
|
||||||
|
target_spec={"by_text": "OK"},
|
||||||
|
retry_count=0,
|
||||||
|
)
|
||||||
|
assert decision.decision == Decision.RETRY
|
||||||
|
assert "popup" in decision.reason.lower()
|
||||||
|
|
||||||
|
def test_premier_essai_pas_de_popup_retry(self):
|
||||||
|
"""Premier échec + pas de popup → RETRY quand même (max_retries > 0)."""
|
||||||
|
from agent_v0.agent_v1.core.policy import Decision
|
||||||
|
engine, executor = self._make_engine()
|
||||||
|
executor._handle_popup_vlm.return_value = False
|
||||||
|
|
||||||
|
decision = engine.decide(
|
||||||
|
action={"type": "click"},
|
||||||
|
target_spec={"by_text": "OK"},
|
||||||
|
retry_count=0,
|
||||||
|
max_retries=2,
|
||||||
|
)
|
||||||
|
assert decision.decision == Decision.RETRY
|
||||||
|
|
||||||
|
def test_max_retries_acteur_passer_skip(self):
|
||||||
|
"""Max retries atteint + acteur dit PASSER → SKIP."""
|
||||||
|
from agent_v0.agent_v1.core.policy import Decision
|
||||||
|
engine, executor = self._make_engine()
|
||||||
|
executor._actor_decide.return_value = "PASSER"
|
||||||
|
|
||||||
|
decision = engine.decide(
|
||||||
|
action={"type": "click"},
|
||||||
|
target_spec={"by_text": "Onglet"},
|
||||||
|
retry_count=1,
|
||||||
|
max_retries=1,
|
||||||
|
)
|
||||||
|
assert decision.decision == Decision.SKIP
|
||||||
|
|
||||||
|
def test_max_retries_acteur_stopper_abort(self):
|
||||||
|
"""Max retries atteint + acteur dit STOPPER → ABORT."""
|
||||||
|
from agent_v0.agent_v1.core.policy import Decision
|
||||||
|
engine, executor = self._make_engine()
|
||||||
|
executor._actor_decide.return_value = "STOPPER"
|
||||||
|
|
||||||
|
decision = engine.decide(
|
||||||
|
action={"type": "click"},
|
||||||
|
target_spec={"by_text": "X"},
|
||||||
|
retry_count=1,
|
||||||
|
max_retries=1,
|
||||||
|
)
|
||||||
|
assert decision.decision == Decision.ABORT
|
||||||
|
|
||||||
|
def test_max_retries_acteur_executer_supervise(self):
|
||||||
|
"""Max retries + acteur dit EXECUTER → SUPERVISE (rendre la main)."""
|
||||||
|
from agent_v0.agent_v1.core.policy import Decision
|
||||||
|
engine, executor = self._make_engine()
|
||||||
|
executor._actor_decide.return_value = "EXECUTER"
|
||||||
|
|
||||||
|
decision = engine.decide(
|
||||||
|
action={"type": "click"},
|
||||||
|
target_spec={"by_text": "X"},
|
||||||
|
retry_count=1,
|
||||||
|
max_retries=1,
|
||||||
|
)
|
||||||
|
assert decision.decision == Decision.SUPERVISE
|
||||||
|
|
||||||
|
def test_policy_decision_to_dict(self):
|
||||||
|
"""PolicyDecision se sérialise correctement."""
|
||||||
|
from agent_v0.agent_v1.core.policy import PolicyDecision, Decision
|
||||||
|
d = PolicyDecision(decision=Decision.SKIP, reason="État atteint").to_dict()
|
||||||
|
assert d["decision"] == "skip"
|
||||||
|
assert d["reason"] == "État atteint"
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# P3 : Recovery — rollback après échec
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestRecoveryEngine:
|
||||||
|
|
||||||
|
def _make_engine(self):
|
||||||
|
from agent_v0.agent_v1.core.recovery import RecoveryEngine
|
||||||
|
executor = MagicMock()
|
||||||
|
executor.keyboard = MagicMock()
|
||||||
|
executor.sct = MagicMock()
|
||||||
|
executor.sct.monitors = [{}, {"width": 1920, "height": 1080}]
|
||||||
|
executor._click = MagicMock()
|
||||||
|
return RecoveryEngine(executor), executor
|
||||||
|
|
||||||
|
def test_popup_detectee_escape(self):
|
||||||
|
"""Critic dit "popup" → Recovery fait Escape."""
|
||||||
|
from agent_v0.agent_v1.core.recovery import RecoveryAction
|
||||||
|
engine, executor = self._make_engine()
|
||||||
|
result = engine.attempt(
|
||||||
|
failed_action={"type": "click"},
|
||||||
|
critic_detail="Une popup d'erreur est apparue",
|
||||||
|
)
|
||||||
|
assert result.action_taken == RecoveryAction.ESCAPE
|
||||||
|
assert result.success is True
|
||||||
|
# Vérifie que Escape a été pressé
|
||||||
|
executor.keyboard.press.assert_called()
|
||||||
|
|
||||||
|
def test_frappe_incorrecte_undo(self):
|
||||||
|
"""Frappe incorrecte → Recovery fait Ctrl+Z."""
|
||||||
|
from agent_v0.agent_v1.core.recovery import RecoveryAction
|
||||||
|
engine, executor = self._make_engine()
|
||||||
|
result = engine.attempt(
|
||||||
|
failed_action={"type": "type"},
|
||||||
|
critic_detail="Le texte a été tapé au mauvais endroit",
|
||||||
|
)
|
||||||
|
assert result.action_taken == RecoveryAction.UNDO
|
||||||
|
assert result.success is True
|
||||||
|
|
||||||
|
def test_mauvaise_fenetre_close(self):
|
||||||
|
"""Mauvaise fenêtre → Recovery fait Alt+F4."""
|
||||||
|
from agent_v0.agent_v1.core.recovery import RecoveryAction
|
||||||
|
engine, executor = self._make_engine()
|
||||||
|
result = engine.attempt(
|
||||||
|
failed_action={"type": "click"},
|
||||||
|
critic_detail="Mauvaise fenêtre ouverte au lieu du bloc-notes",
|
||||||
|
)
|
||||||
|
assert result.action_taken == RecoveryAction.CLOSE_WINDOW
|
||||||
|
assert result.success is True
|
||||||
|
|
||||||
|
def test_menu_ouvert_escape(self):
|
||||||
|
"""Menu déroulant ouvert → Recovery fait Escape."""
|
||||||
|
from agent_v0.agent_v1.core.recovery import RecoveryAction
|
||||||
|
engine, executor = self._make_engine()
|
||||||
|
result = engine.attempt(
|
||||||
|
failed_action={"type": "click"},
|
||||||
|
critic_detail="Un menu déroulant s'est ouvert",
|
||||||
|
)
|
||||||
|
assert result.action_taken == RecoveryAction.ESCAPE
|
||||||
|
assert result.success is True
|
||||||
|
|
||||||
|
def test_aucune_strategie_applicable(self):
|
||||||
|
"""Pas de pattern reconnu → NONE."""
|
||||||
|
from agent_v0.agent_v1.core.recovery import RecoveryAction
|
||||||
|
engine, executor = self._make_engine()
|
||||||
|
result = engine.attempt(
|
||||||
|
failed_action={"type": "wait"},
|
||||||
|
critic_detail="Quelque chose d'inattendu",
|
||||||
|
)
|
||||||
|
assert result.action_taken == RecoveryAction.NONE
|
||||||
|
assert result.success is False
|
||||||
|
|
||||||
|
def test_recovery_result_to_dict(self):
|
||||||
|
"""RecoveryResult se sérialise correctement."""
|
||||||
|
from agent_v0.agent_v1.core.recovery import RecoveryResult, RecoveryAction
|
||||||
|
d = RecoveryResult(
|
||||||
|
action_taken=RecoveryAction.UNDO, success=True, detail="Ctrl+Z"
|
||||||
|
).to_dict()
|
||||||
|
assert d["action_taken"] == "undo"
|
||||||
|
assert d["success"] is True
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# P4 : Learning — apprentissage runtime
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestReplayLearner:
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def learner(self):
|
||||||
|
tmpdir = tempfile.mkdtemp(prefix="test_learning_")
|
||||||
|
from agent_v0.server_v1.replay_learner import ReplayLearner
|
||||||
|
l = ReplayLearner(learning_dir=tmpdir)
|
||||||
|
yield l
|
||||||
|
shutil.rmtree(tmpdir, ignore_errors=True)
|
||||||
|
|
||||||
|
def test_record_et_load_session(self, learner):
|
||||||
|
"""Enregistrer un résultat et le relire depuis le fichier."""
|
||||||
|
from agent_v0.server_v1.replay_learner import ActionOutcome
|
||||||
|
outcome = ActionOutcome(
|
||||||
|
session_id="test_session",
|
||||||
|
action_id="act_001",
|
||||||
|
action_type="click",
|
||||||
|
target_description="Bouton Enregistrer",
|
||||||
|
resolution_method="som_text",
|
||||||
|
resolution_score=0.95,
|
||||||
|
success=True,
|
||||||
|
)
|
||||||
|
learner.record(outcome)
|
||||||
|
|
||||||
|
# Relire
|
||||||
|
loaded = learner.load_session("test_session")
|
||||||
|
assert len(loaded) == 1
|
||||||
|
assert loaded[0].action_id == "act_001"
|
||||||
|
assert loaded[0].success is True
|
||||||
|
assert loaded[0].resolution_method == "som_text"
|
||||||
|
|
||||||
|
def test_record_from_replay_result(self, learner):
|
||||||
|
"""Convertir le format replay en ActionOutcome."""
|
||||||
|
learner.record_from_replay_result(
|
||||||
|
session_id="s1",
|
||||||
|
action={"action_id": "a1", "type": "click", "target_spec": {"by_text": "OK", "window_title": "App"}},
|
||||||
|
result={"success": True, "resolution_method": "template", "resolution_score": 0.9},
|
||||||
|
verification={"verified": True, "semantic_verified": True, "semantic_detail": "OK"},
|
||||||
|
)
|
||||||
|
loaded = learner.load_session("s1")
|
||||||
|
assert len(loaded) == 1
|
||||||
|
assert loaded[0].target_description == "OK"
|
||||||
|
assert loaded[0].semantic_verified is True
|
||||||
|
|
||||||
|
def test_query_similar(self, learner):
|
||||||
|
"""Requêter des résultats similaires par description."""
|
||||||
|
from agent_v0.server_v1.replay_learner import ActionOutcome
|
||||||
|
# Enregistrer plusieurs résultats
|
||||||
|
for i, (desc, method, success) in enumerate([
|
||||||
|
("Bouton Enregistrer", "som_text", True),
|
||||||
|
("Bouton Annuler", "template", True),
|
||||||
|
("Bouton Enregistrer", "vlm_direct", False),
|
||||||
|
("Menu Fichier", "som_text", True),
|
||||||
|
]):
|
||||||
|
learner.record(ActionOutcome(
|
||||||
|
session_id="s1", action_id=f"a{i}",
|
||||||
|
action_type="click", target_description=desc,
|
||||||
|
resolution_method=method, success=success,
|
||||||
|
))
|
||||||
|
|
||||||
|
# Chercher "Enregistrer"
|
||||||
|
results = learner.query_similar(target_description="Enregistrer")
|
||||||
|
assert len(results) == 2
|
||||||
|
# Les deux résultats concernent "Enregistrer"
|
||||||
|
for r in results:
|
||||||
|
assert "enregistrer" in r["outcome"]["target_description"].lower()
|
||||||
|
|
||||||
|
def test_get_stats(self, learner):
|
||||||
|
"""Les statistiques globales sont correctes."""
|
||||||
|
from agent_v0.server_v1.replay_learner import ActionOutcome
|
||||||
|
for success, method in [(True, "som"), (True, "som"), (False, "template"), (True, "vlm")]:
|
||||||
|
learner.record(ActionOutcome(
|
||||||
|
session_id="s1", action_id="a",
|
||||||
|
action_type="click", success=success,
|
||||||
|
resolution_method=method,
|
||||||
|
))
|
||||||
|
|
||||||
|
stats = learner.get_stats()
|
||||||
|
assert stats["total"] == 4
|
||||||
|
assert stats["success_rate"] == 0.75
|
||||||
|
assert stats["methods"]["som"]["success_rate"] == 1.0
|
||||||
|
assert stats["methods"]["template"]["success_rate"] == 0.0
|
||||||
|
|
||||||
|
def test_gemma4_indisponible_pas_de_crash(self, learner):
|
||||||
|
"""Le learning fonctionne même sans VLM."""
|
||||||
|
from agent_v0.server_v1.replay_learner import ActionOutcome
|
||||||
|
# Pas de crash, juste un record simple
|
||||||
|
learner.record(ActionOutcome(
|
||||||
|
session_id="s1", action_id="a1", action_type="click",
|
||||||
|
success=False, error="target_not_found",
|
||||||
|
))
|
||||||
|
stats = learner.get_stats()
|
||||||
|
assert stats["total"] == 1
|
||||||
|
assert stats["success_rate"] == 0.0
|
||||||
|
|
||||||
|
def test_fichier_jsonl_format(self, learner):
|
||||||
|
"""Le fichier JSONL contient du JSON valide ligne par ligne."""
|
||||||
|
from agent_v0.server_v1.replay_learner import ActionOutcome
|
||||||
|
learner.record(ActionOutcome(
|
||||||
|
session_id="s1", action_id="a1", action_type="click", success=True,
|
||||||
|
))
|
||||||
|
learner.record(ActionOutcome(
|
||||||
|
session_id="s1", action_id="a2", action_type="type", success=False,
|
||||||
|
))
|
||||||
|
|
||||||
|
jsonl_file = learner.learning_dir / "s1.jsonl"
|
||||||
|
assert jsonl_file.is_file()
|
||||||
|
|
||||||
|
with open(jsonl_file) as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
assert len(lines) == 2
|
||||||
|
for line in lines:
|
||||||
|
data = json.loads(line) # Doit être du JSON valide
|
||||||
|
assert "action_id" in data
|
||||||
|
assert "success" in data
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Boucle d'apprentissage : consolidation cross-workflow
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestLearningLoop:
|
||||||
|
"""Tests de la boucle d'apprentissage : les replays passés améliorent les suivants."""
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def learner(self):
|
||||||
|
tmpdir = tempfile.mkdtemp(prefix="test_learning_loop_")
|
||||||
|
from agent_v0.server_v1.replay_learner import ReplayLearner
|
||||||
|
l = ReplayLearner(learning_dir=tmpdir)
|
||||||
|
yield l
|
||||||
|
shutil.rmtree(tmpdir, ignore_errors=True)
|
||||||
|
|
||||||
|
def test_best_strategy_apprend_du_succes(self, learner):
|
||||||
|
"""La meilleure stratégie est celle qui a le plus de succès."""
|
||||||
|
from agent_v0.server_v1.replay_learner import ActionOutcome
|
||||||
|
# template échoue 3 fois sur "Enregistrer"
|
||||||
|
for i in range(3):
|
||||||
|
learner.record(ActionOutcome(
|
||||||
|
session_id=f"s{i}", action_id=f"a{i}", action_type="click",
|
||||||
|
target_description="Enregistrer", resolution_method="anchor_template",
|
||||||
|
success=False,
|
||||||
|
))
|
||||||
|
# som_text réussit 2 fois sur "Enregistrer"
|
||||||
|
for i in range(2):
|
||||||
|
learner.record(ActionOutcome(
|
||||||
|
session_id=f"s{10+i}", action_id=f"a{10+i}", action_type="click",
|
||||||
|
target_description="Enregistrer", resolution_method="som_text_match",
|
||||||
|
success=True,
|
||||||
|
))
|
||||||
|
|
||||||
|
best = learner.best_strategy_for("Enregistrer")
|
||||||
|
assert best == "som_text_match"
|
||||||
|
|
||||||
|
def test_best_strategy_minimum_2_essais(self, learner):
|
||||||
|
"""Il faut au moins 2 essais pour qu'une stratégie soit recommandée."""
|
||||||
|
from agent_v0.server_v1.replay_learner import ActionOutcome
|
||||||
|
# Un seul succès → pas assez pour recommander
|
||||||
|
learner.record(ActionOutcome(
|
||||||
|
session_id="s1", action_id="a1", action_type="click",
|
||||||
|
target_description="OK", resolution_method="vlm_direct",
|
||||||
|
success=True,
|
||||||
|
))
|
||||||
|
best = learner.best_strategy_for("OK")
|
||||||
|
assert best is None
|
||||||
|
|
||||||
|
def test_best_strategy_rien_si_historique_vide(self, learner):
|
||||||
|
"""Pas d'historique → pas de recommandation."""
|
||||||
|
best = learner.best_strategy_for("Inexistant")
|
||||||
|
assert best is None
|
||||||
|
|
||||||
|
def test_consolidate_workflow_enrichit_les_actions(self, learner):
|
||||||
|
"""La consolidation injecte _learned_strategy dans les target_spec."""
|
||||||
|
from agent_v0.server_v1.replay_learner import ActionOutcome
|
||||||
|
# Historique : som_text_match marche pour "Fichier"
|
||||||
|
for i in range(3):
|
||||||
|
learner.record(ActionOutcome(
|
||||||
|
session_id=f"s{i}", action_id=f"a{i}", action_type="click",
|
||||||
|
target_description="Fichier", resolution_method="som_text_match",
|
||||||
|
success=True,
|
||||||
|
))
|
||||||
|
|
||||||
|
# Workflow avec une action "Fichier"
|
||||||
|
actions = [
|
||||||
|
{"type": "click", "target_spec": {"by_text": "Fichier", "window_title": "Bloc-notes"}},
|
||||||
|
{"type": "type", "text": "bonjour"},
|
||||||
|
{"type": "click", "target_spec": {"by_text": "Inconnu"}},
|
||||||
|
]
|
||||||
|
|
||||||
|
enriched = learner.consolidate_workflow(actions)
|
||||||
|
assert enriched == 1 # Seul "Fichier" a un historique
|
||||||
|
assert actions[0]["target_spec"]["_learned_strategy"] == "som_text_match"
|
||||||
|
assert "_learned_strategy" not in actions[2].get("target_spec", {})
|
||||||
|
|
||||||
|
def test_consolidation_cross_workflow(self, learner):
|
||||||
|
"""Un succès dans le workflow A améliore le workflow B."""
|
||||||
|
from agent_v0.server_v1.replay_learner import ActionOutcome
|
||||||
|
# Workflow A : "Enregistrer" réussit avec grounding_vlm
|
||||||
|
for i in range(3):
|
||||||
|
learner.record(ActionOutcome(
|
||||||
|
session_id="workflow_A", action_id=f"a{i}", action_type="click",
|
||||||
|
target_description="Enregistrer",
|
||||||
|
window_title="Bloc-notes",
|
||||||
|
resolution_method="grounding_vlm", success=True,
|
||||||
|
))
|
||||||
|
|
||||||
|
# Workflow B : contient aussi "Enregistrer"
|
||||||
|
workflow_b = [
|
||||||
|
{"type": "click", "target_spec": {"by_text": "Enregistrer", "window_title": "Bloc-notes"}},
|
||||||
|
]
|
||||||
|
enriched = learner.consolidate_workflow(workflow_b, "workflow_B")
|
||||||
|
assert enriched == 1
|
||||||
|
assert workflow_b[0]["target_spec"]["_learned_strategy"] == "grounding_vlm"
|
||||||
|
|
||||||
|
def test_grounding_reordonne_strategies(self):
|
||||||
|
"""Le GroundingEngine réordonne ses stratégies selon _learned_strategy."""
|
||||||
|
from agent_v0.agent_v1.core.grounding import GroundingEngine
|
||||||
|
executor = MagicMock()
|
||||||
|
executor._capture_screenshot_b64.return_value = "fake"
|
||||||
|
# Simuler que template marche
|
||||||
|
executor._server_resolve_target.return_value = None
|
||||||
|
executor._template_match_anchor.return_value = {
|
||||||
|
"resolved": True, "x_pct": 0.5, "y_pct": 0.5, "score": 0.9,
|
||||||
|
}
|
||||||
|
executor._hybrid_vlm_resolve.return_value = None
|
||||||
|
|
||||||
|
engine = GroundingEngine(executor)
|
||||||
|
|
||||||
|
# Avec _learned_strategy = anchor_template → template en premier
|
||||||
|
result = engine.locate(
|
||||||
|
"http://server",
|
||||||
|
{"by_text": "OK", "anchor_image_base64": "abc", "_learned_strategy": "anchor_template"},
|
||||||
|
0.5, 0.3, 1920, 1080,
|
||||||
|
)
|
||||||
|
assert result.found is True
|
||||||
|
assert result.method == "anchor_template"
|
||||||
|
# Le serveur n'a PAS été appelé (template était en premier)
|
||||||
|
executor._server_resolve_target.assert_not_called()
|
||||||
441
tests/unit/test_replay_critic.py
Normal file
441
tests/unit/test_replay_critic.py
Normal file
@@ -0,0 +1,441 @@
|
|||||||
|
"""
|
||||||
|
Tests unitaires pour le Critic (ReplayVerifier.verify_with_critic)
|
||||||
|
et l'enrichissement des actions avec intentions.
|
||||||
|
|
||||||
|
Vérifie les FONCTIONNALITÉS, pas juste la non-régression :
|
||||||
|
1. Le Critic fusionne correctement pixel + sémantique
|
||||||
|
2. La matrice de décision (4 cas) est correcte
|
||||||
|
3. L'enrichissement intentions parse bien les réponses gemma4
|
||||||
|
4. Les fallbacks fonctionnent quand le VLM est indisponible
|
||||||
|
"""
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import MagicMock, patch, Mock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
_ROOT = str(Path(__file__).resolve().parents[2])
|
||||||
|
if _ROOT not in sys.path:
|
||||||
|
sys.path.insert(0, _ROOT)
|
||||||
|
|
||||||
|
from agent_v0.server_v1.replay_verifier import ReplayVerifier, VerificationResult
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Fixtures
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def _make_screenshot_b64(width=100, height=100, color=(128, 128, 128)):
|
||||||
|
"""Créer un screenshot base64 factice (JPEG)."""
|
||||||
|
from PIL import Image
|
||||||
|
img = Image.new("RGB", (width, height), color)
|
||||||
|
buf = io.BytesIO()
|
||||||
|
img.save(buf, format="JPEG", quality=50)
|
||||||
|
return base64.b64encode(buf.getvalue()).decode()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def verifier():
|
||||||
|
return ReplayVerifier()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def screenshot_gray():
|
||||||
|
return _make_screenshot_b64(100, 100, (128, 128, 128))
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def screenshot_white():
|
||||||
|
return _make_screenshot_b64(100, 100, (255, 255, 255))
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tests VerificationResult — nouveaux champs sémantiques
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestVerificationResult:
|
||||||
|
|
||||||
|
def test_to_dict_sans_semantique(self):
|
||||||
|
"""Sans vérification sémantique, les champs semantic_ sont absents du dict."""
|
||||||
|
r = VerificationResult(
|
||||||
|
verified=True, confidence=0.8, changes_detected=True,
|
||||||
|
change_area_pct=5.0, suggestion="continue", detail="test",
|
||||||
|
)
|
||||||
|
d = r.to_dict()
|
||||||
|
assert "semantic_verified" not in d
|
||||||
|
assert d["verified"] is True
|
||||||
|
assert d["confidence"] == 0.8
|
||||||
|
|
||||||
|
def test_to_dict_avec_semantique(self):
|
||||||
|
"""Avec vérification sémantique, les champs semantic_ sont présents."""
|
||||||
|
r = VerificationResult(
|
||||||
|
verified=True, confidence=0.9, changes_detected=True,
|
||||||
|
change_area_pct=5.0, suggestion="continue", detail="test",
|
||||||
|
semantic_verified=True, semantic_detail="Bouton visible",
|
||||||
|
semantic_elapsed_ms=1500.0,
|
||||||
|
)
|
||||||
|
d = r.to_dict()
|
||||||
|
assert d["semantic_verified"] is True
|
||||||
|
assert d["semantic_detail"] == "Bouton visible"
|
||||||
|
assert d["semantic_elapsed_ms"] == 1500.0
|
||||||
|
|
||||||
|
def test_to_dict_semantique_false(self):
|
||||||
|
"""semantic_verified=False doit apparaître dans le dict."""
|
||||||
|
r = VerificationResult(
|
||||||
|
verified=False, confidence=0.7, changes_detected=True,
|
||||||
|
change_area_pct=5.0, suggestion="retry",
|
||||||
|
semantic_verified=False, semantic_detail="Mauvais écran",
|
||||||
|
semantic_elapsed_ms=2000.0,
|
||||||
|
)
|
||||||
|
d = r.to_dict()
|
||||||
|
assert d["semantic_verified"] is False
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tests verify_with_critic — matrice de décision
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestVerifyWithCritic:
|
||||||
|
|
||||||
|
def test_sans_expected_result_retourne_pixel_seul(self, verifier, screenshot_gray):
|
||||||
|
"""Sans expected_result, verify_with_critic = verify_action (pixel seul)."""
|
||||||
|
result = verifier.verify_with_critic(
|
||||||
|
action={"type": "click", "action_id": "test"},
|
||||||
|
result={"success": True},
|
||||||
|
screenshot_before=screenshot_gray,
|
||||||
|
screenshot_after=screenshot_gray,
|
||||||
|
expected_result="", # Pas d'attendu
|
||||||
|
)
|
||||||
|
# Pixel seul — pas de champ semantic
|
||||||
|
assert result.semantic_verified is None
|
||||||
|
|
||||||
|
def test_sans_screenshots_pas_de_semantique(self, verifier):
|
||||||
|
"""Sans screenshots, pas de vérification sémantique possible."""
|
||||||
|
result = verifier.verify_with_critic(
|
||||||
|
action={"type": "click", "action_id": "test"},
|
||||||
|
result={"success": True},
|
||||||
|
screenshot_before=None,
|
||||||
|
screenshot_after=None,
|
||||||
|
expected_result="Le fichier est ouvert",
|
||||||
|
)
|
||||||
|
# Pas de screenshots → pixel seul (confidence basse)
|
||||||
|
assert result.verified is True
|
||||||
|
assert result.confidence < 0.5
|
||||||
|
|
||||||
|
def test_pixel_pas_change_et_expected_result_skip_vlm(
|
||||||
|
self, verifier, screenshot_gray,
|
||||||
|
):
|
||||||
|
"""Si pixel identiques + expected_result → skip VLM (pas de changement = retry)."""
|
||||||
|
result = verifier.verify_with_critic(
|
||||||
|
action={"type": "click", "action_id": "test", "x_pct": 0.5, "y_pct": 0.5},
|
||||||
|
result={"success": True},
|
||||||
|
screenshot_before=screenshot_gray,
|
||||||
|
screenshot_after=screenshot_gray, # Même image → aucun changement
|
||||||
|
expected_result="Le menu s'est ouvert",
|
||||||
|
)
|
||||||
|
# Pas de changement pixel → retry, VLM non appelé
|
||||||
|
assert result.verified is False
|
||||||
|
assert result.suggestion == "retry"
|
||||||
|
assert result.semantic_verified is None # VLM non appelé
|
||||||
|
|
||||||
|
@patch("agent_v0.server_v1.replay_verifier.ReplayVerifier._verify_semantic")
|
||||||
|
def test_pixel_ok_semantic_ok(
|
||||||
|
self, mock_semantic, verifier, screenshot_gray, screenshot_white,
|
||||||
|
):
|
||||||
|
"""Pixel OK + Semantic OK → vérifié avec haute confiance."""
|
||||||
|
mock_semantic.return_value = {
|
||||||
|
"verified": True,
|
||||||
|
"detail": "Le menu est bien ouvert",
|
||||||
|
"elapsed_ms": 2000.0,
|
||||||
|
}
|
||||||
|
result = verifier.verify_with_critic(
|
||||||
|
action={"type": "click", "action_id": "test"},
|
||||||
|
result={"success": True},
|
||||||
|
screenshot_before=screenshot_gray,
|
||||||
|
screenshot_after=screenshot_white, # Différent → changement détecté
|
||||||
|
expected_result="Le menu s'est ouvert",
|
||||||
|
)
|
||||||
|
assert result.verified is True
|
||||||
|
assert result.semantic_verified is True
|
||||||
|
assert result.confidence >= 0.7
|
||||||
|
assert "Critic OK" in result.detail
|
||||||
|
|
||||||
|
@patch("agent_v0.server_v1.replay_verifier.ReplayVerifier._verify_semantic")
|
||||||
|
def test_pixel_ok_semantic_non(
|
||||||
|
self, mock_semantic, verifier, screenshot_gray, screenshot_white,
|
||||||
|
):
|
||||||
|
"""Pixel OK + Semantic NON → INATTENDU (changement mais pas le bon)."""
|
||||||
|
mock_semantic.return_value = {
|
||||||
|
"verified": False,
|
||||||
|
"detail": "Une erreur est apparue au lieu du menu",
|
||||||
|
"elapsed_ms": 2500.0,
|
||||||
|
}
|
||||||
|
result = verifier.verify_with_critic(
|
||||||
|
action={"type": "click", "action_id": "test"},
|
||||||
|
result={"success": True},
|
||||||
|
screenshot_before=screenshot_gray,
|
||||||
|
screenshot_after=screenshot_white,
|
||||||
|
expected_result="Le menu s'est ouvert",
|
||||||
|
)
|
||||||
|
assert result.verified is False
|
||||||
|
assert result.semantic_verified is False
|
||||||
|
assert result.suggestion == "retry"
|
||||||
|
assert "Critic NON" in result.detail
|
||||||
|
|
||||||
|
@patch("agent_v0.server_v1.replay_verifier.ReplayVerifier._verify_semantic")
|
||||||
|
def test_vlm_indisponible_fallback_pixel(
|
||||||
|
self, mock_semantic, verifier, screenshot_gray, screenshot_white,
|
||||||
|
):
|
||||||
|
"""VLM indisponible → fallback sur pixel seul."""
|
||||||
|
mock_semantic.return_value = None # VLM down
|
||||||
|
result = verifier.verify_with_critic(
|
||||||
|
action={"type": "click", "action_id": "test"},
|
||||||
|
result={"success": True},
|
||||||
|
screenshot_before=screenshot_gray,
|
||||||
|
screenshot_after=screenshot_white,
|
||||||
|
expected_result="Le menu s'est ouvert",
|
||||||
|
)
|
||||||
|
# Fallback pixel seul — le changement est détecté
|
||||||
|
assert result.verified is True
|
||||||
|
assert result.semantic_verified is None # Pas de VLM
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tests _verify_semantic — parsing de la réponse VLM
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestVerifySemantic:
|
||||||
|
|
||||||
|
@patch("requests.post")
|
||||||
|
def test_parse_verdict_oui(self, mock_post, verifier, screenshot_white):
|
||||||
|
"""Parse correctement VERDICT: OUI."""
|
||||||
|
mock_resp = MagicMock()
|
||||||
|
mock_resp.ok = True
|
||||||
|
mock_resp.json.return_value = {
|
||||||
|
"message": {"content": "VERDICT: OUI\nRAISON: Le fichier est bien ouvert"}
|
||||||
|
}
|
||||||
|
mock_post.return_value = mock_resp
|
||||||
|
result = verifier._verify_semantic(
|
||||||
|
screenshot_before=screenshot_white,
|
||||||
|
screenshot_after=screenshot_white,
|
||||||
|
expected_result="Le fichier est ouvert",
|
||||||
|
)
|
||||||
|
assert result is not None
|
||||||
|
assert result["verified"] is True
|
||||||
|
assert "ouvert" in result["detail"]
|
||||||
|
|
||||||
|
@patch("requests.post")
|
||||||
|
def test_parse_verdict_non(self, mock_post, verifier, screenshot_white):
|
||||||
|
"""Parse correctement VERDICT: NON."""
|
||||||
|
mock_resp = MagicMock()
|
||||||
|
mock_resp.ok = True
|
||||||
|
mock_resp.json.return_value = {
|
||||||
|
"message": {"content": "VERDICT: NON\nRAISON: L'écran n'a pas changé"}
|
||||||
|
}
|
||||||
|
mock_post.return_value = mock_resp
|
||||||
|
result = verifier._verify_semantic(
|
||||||
|
screenshot_before=screenshot_white,
|
||||||
|
screenshot_after=screenshot_white,
|
||||||
|
expected_result="Le menu s'est ouvert",
|
||||||
|
)
|
||||||
|
assert result is not None
|
||||||
|
assert result["verified"] is False
|
||||||
|
|
||||||
|
@patch("requests.post")
|
||||||
|
def test_vlm_timeout_retourne_none(self, mock_post, verifier, screenshot_white):
|
||||||
|
"""Timeout VLM → retourne None (fallback gracieux)."""
|
||||||
|
import requests as _real_requests
|
||||||
|
mock_post.side_effect = _real_requests.Timeout("timeout")
|
||||||
|
result = verifier._verify_semantic(
|
||||||
|
screenshot_before=screenshot_white,
|
||||||
|
screenshot_after=screenshot_white,
|
||||||
|
expected_result="Le fichier est ouvert",
|
||||||
|
)
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_sans_screenshot_after_retourne_none(self, verifier):
|
||||||
|
"""Sans screenshot_after, pas de vérification possible."""
|
||||||
|
result = verifier._verify_semantic(
|
||||||
|
screenshot_before=None,
|
||||||
|
screenshot_after=None,
|
||||||
|
expected_result="Le fichier est ouvert",
|
||||||
|
)
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tests _merge_results — matrice pixel x sémantique
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestMergeResults:
|
||||||
|
|
||||||
|
def test_pixel_ok_sem_ok(self, verifier):
|
||||||
|
pixel = VerificationResult(
|
||||||
|
verified=True, confidence=0.7, changes_detected=True,
|
||||||
|
change_area_pct=5.0, suggestion="continue",
|
||||||
|
)
|
||||||
|
semantic = {"verified": True, "detail": "OK", "elapsed_ms": 1000}
|
||||||
|
result = verifier._merge_results(pixel, semantic)
|
||||||
|
assert result.verified is True
|
||||||
|
assert result.semantic_verified is True
|
||||||
|
assert result.confidence >= 0.7
|
||||||
|
|
||||||
|
def test_pixel_ok_sem_non(self, verifier):
|
||||||
|
"""Pixel OK + Sémantique NON = inattendu → retry."""
|
||||||
|
pixel = VerificationResult(
|
||||||
|
verified=True, confidence=0.7, changes_detected=True,
|
||||||
|
change_area_pct=5.0, suggestion="continue",
|
||||||
|
)
|
||||||
|
semantic = {"verified": False, "detail": "Erreur popup", "elapsed_ms": 2000}
|
||||||
|
result = verifier._merge_results(pixel, semantic)
|
||||||
|
assert result.verified is False
|
||||||
|
assert result.semantic_verified is False
|
||||||
|
assert result.suggestion == "retry"
|
||||||
|
|
||||||
|
def test_pixel_non_sem_ok(self, verifier):
|
||||||
|
"""Pixel inchangé + Sémantique OK = état subtil → continue."""
|
||||||
|
pixel = VerificationResult(
|
||||||
|
verified=False, confidence=0.5, changes_detected=False,
|
||||||
|
change_area_pct=0.1, suggestion="retry",
|
||||||
|
)
|
||||||
|
semantic = {"verified": True, "detail": "Onglet déjà actif", "elapsed_ms": 1500}
|
||||||
|
result = verifier._merge_results(pixel, semantic)
|
||||||
|
assert result.verified is True
|
||||||
|
assert result.semantic_verified is True
|
||||||
|
assert result.suggestion == "continue"
|
||||||
|
|
||||||
|
def test_pixel_non_sem_non(self, verifier):
|
||||||
|
"""Pixel inchangé + Sémantique NON = échec complet → retry."""
|
||||||
|
pixel = VerificationResult(
|
||||||
|
verified=False, confidence=0.5, changes_detected=False,
|
||||||
|
change_area_pct=0.0, suggestion="retry",
|
||||||
|
)
|
||||||
|
semantic = {"verified": False, "detail": "Rien ne s'est passé", "elapsed_ms": 3000}
|
||||||
|
result = verifier._merge_results(pixel, semantic)
|
||||||
|
assert result.verified is False
|
||||||
|
assert result.semantic_verified is False
|
||||||
|
assert result.confidence >= 0.7 # Haute confiance dans l'échec
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tests enrichissement intentions (stream_processor)
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestEnrichActionsWithIntentions:
|
||||||
|
|
||||||
|
@patch("requests.post")
|
||||||
|
@patch("requests.get")
|
||||||
|
def test_enrichissement_parse_reponse_gemma4(self, mock_get, mock_post):
|
||||||
|
"""La réponse gemma4 est correctement parsée en intention/avant/après."""
|
||||||
|
from agent_v0.server_v1.stream_processor import _enrich_actions_with_intentions
|
||||||
|
import tempfile, shutil
|
||||||
|
|
||||||
|
# Mock gemma4 disponible
|
||||||
|
mock_tags_resp = MagicMock()
|
||||||
|
mock_tags_resp.ok = True
|
||||||
|
mock_get.return_value = mock_tags_resp
|
||||||
|
|
||||||
|
mock_chat_resp = MagicMock()
|
||||||
|
mock_chat_resp.ok = True
|
||||||
|
mock_chat_resp.json.return_value = {
|
||||||
|
"message": {
|
||||||
|
"content": (
|
||||||
|
"INTENTION: Ouvrir le fichier client dans le logiciel\n"
|
||||||
|
"AVANT: Le logiciel est ouvert sur la page d'accueil\n"
|
||||||
|
"APRÈS: Le fichier client est affiché dans la fenêtre"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mock_post.return_value = mock_chat_resp
|
||||||
|
|
||||||
|
actions = [
|
||||||
|
{
|
||||||
|
"type": "click",
|
||||||
|
"action_id": "act_001",
|
||||||
|
"target_spec": {"by_text": "Ouvrir", "window_title": "Logiciel"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "wait",
|
||||||
|
"action_id": "act_002",
|
||||||
|
"duration_ms": 1000,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
tmpdir = Path(tempfile.mkdtemp())
|
||||||
|
try:
|
||||||
|
(tmpdir / "shots").mkdir()
|
||||||
|
_enrich_actions_with_intentions(actions, tmpdir)
|
||||||
|
|
||||||
|
# L'action click doit être enrichie
|
||||||
|
assert actions[0].get("intention") == "Ouvrir le fichier client dans le logiciel"
|
||||||
|
assert actions[0].get("expected_state") == "Le logiciel est ouvert sur la page d'accueil"
|
||||||
|
assert actions[0].get("expected_result") == "Le fichier client est affiché dans la fenêtre"
|
||||||
|
# expected_state doit aussi être dans target_spec (pour l'Observer)
|
||||||
|
assert actions[0]["target_spec"]["expected_state"] == "Le logiciel est ouvert sur la page d'accueil"
|
||||||
|
|
||||||
|
# L'action wait ne doit PAS être enrichie
|
||||||
|
assert "intention" not in actions[1]
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(tmpdir)
|
||||||
|
|
||||||
|
@patch("requests.get")
|
||||||
|
def test_gemma4_indisponible_pas_de_crash(self, mock_get):
|
||||||
|
"""Si gemma4 est down, l'enrichissement est silencieusement désactivé."""
|
||||||
|
from agent_v0.server_v1.stream_processor import _enrich_actions_with_intentions
|
||||||
|
import tempfile, shutil
|
||||||
|
|
||||||
|
mock_get.side_effect = ConnectionError("gemma4 down")
|
||||||
|
|
||||||
|
actions = [
|
||||||
|
{"type": "click", "action_id": "act_001", "target_spec": {"by_text": "OK"}},
|
||||||
|
]
|
||||||
|
|
||||||
|
tmpdir = Path(tempfile.mkdtemp())
|
||||||
|
try:
|
||||||
|
(tmpdir / "shots").mkdir()
|
||||||
|
_enrich_actions_with_intentions(actions, tmpdir)
|
||||||
|
# Aucun crash, aucune intention ajoutée
|
||||||
|
assert "intention" not in actions[0]
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(tmpdir)
|
||||||
|
|
||||||
|
@patch("requests.post")
|
||||||
|
@patch("requests.get")
|
||||||
|
def test_reponse_gemma4_malformee(self, mock_get, mock_post):
|
||||||
|
"""Si gemma4 retourne du texte non structuré, pas de crash."""
|
||||||
|
from agent_v0.server_v1.stream_processor import _enrich_actions_with_intentions
|
||||||
|
import tempfile, shutil
|
||||||
|
|
||||||
|
mock_tags = MagicMock()
|
||||||
|
mock_tags.ok = True
|
||||||
|
mock_get.return_value = mock_tags
|
||||||
|
|
||||||
|
mock_resp = MagicMock()
|
||||||
|
mock_resp.ok = True
|
||||||
|
mock_resp.json.return_value = {
|
||||||
|
"message": {"content": "Je ne comprends pas cette demande."}
|
||||||
|
}
|
||||||
|
mock_post.return_value = mock_resp
|
||||||
|
|
||||||
|
actions = [
|
||||||
|
{"type": "click", "action_id": "act_001", "target_spec": {"by_text": "OK"}},
|
||||||
|
]
|
||||||
|
|
||||||
|
tmpdir = Path(tempfile.mkdtemp())
|
||||||
|
try:
|
||||||
|
(tmpdir / "shots").mkdir()
|
||||||
|
_enrich_actions_with_intentions(actions, tmpdir)
|
||||||
|
# Pas de crash, mais pas d'intention non plus
|
||||||
|
assert "intention" not in actions[0]
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(tmpdir)
|
||||||
762
tests/unit/test_task_planner.py
Normal file
762
tests/unit/test_task_planner.py
Normal file
@@ -0,0 +1,762 @@
|
|||||||
|
# tests/unit/test_task_planner.py
|
||||||
|
"""
|
||||||
|
Tests unitaires du TaskPlanner (planificateur MACRO).
|
||||||
|
|
||||||
|
Vérifie :
|
||||||
|
1. La compréhension d'ordres simples (understand)
|
||||||
|
2. Le matching de workflows par description sémantique
|
||||||
|
3. La détection de boucles et l'extraction de paramètres
|
||||||
|
4. La conversion étapes → actions JSON (format correct)
|
||||||
|
5. L'extraction de descriptions de session
|
||||||
|
|
||||||
|
Toutes les réponses gemma4 sont mockées pour la reproductibilité.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import MagicMock, patch, Mock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
_ROOT = str(Path(__file__).resolve().parents[2])
|
||||||
|
if _ROOT not in sys.path:
|
||||||
|
sys.path.insert(0, _ROOT)
|
||||||
|
|
||||||
|
from agent_v0.server_v1.task_planner import TaskPlanner, TaskPlan
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Fixtures
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def planner():
|
||||||
|
"""TaskPlanner avec port gemma4 factice."""
|
||||||
|
return TaskPlanner(gemma4_port="11435", domain_id="generic")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_workflows():
|
||||||
|
"""Workflows disponibles pour les tests de matching."""
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"session_id": "sess_001",
|
||||||
|
"name": "Bloc-notes",
|
||||||
|
"description": "Ouvrir Bloc-notes via Exécuter (Win+R) et écrire du texte",
|
||||||
|
"machine": "PC-01",
|
||||||
|
"event_count": 25,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"session_id": "sess_002",
|
||||||
|
"name": "Explorateur de fichiers",
|
||||||
|
"description": "Naviguer dans l'Explorateur de fichiers et ouvrir des images",
|
||||||
|
"machine": "PC-01",
|
||||||
|
"event_count": 40,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"session_id": "sess_003",
|
||||||
|
"name": "DxCare, Codage CIM-10",
|
||||||
|
"description": "Ouvrir un dossier patient dans DxCare et coder les diagnostics CIM-10",
|
||||||
|
"machine": "PC-TIM",
|
||||||
|
"event_count": 80,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _mock_gemma4_response(content: str):
|
||||||
|
"""Créer un mock de réponse HTTP gemma4."""
|
||||||
|
mock_resp = MagicMock()
|
||||||
|
mock_resp.ok = True
|
||||||
|
mock_resp.status_code = 200
|
||||||
|
mock_resp.json.return_value = {
|
||||||
|
"message": {"content": content}
|
||||||
|
}
|
||||||
|
return mock_resp
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tests : understand — ordre simple
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
class TestUnderstandOrdreSimple:
|
||||||
|
"""Vérifier que understand() parse correctement des réponses gemma4."""
|
||||||
|
|
||||||
|
def test_understand_ordre_simple(self, planner, sample_workflows):
|
||||||
|
"""'Ouvre le bloc-notes' → understood=True."""
|
||||||
|
gemma4_response = (
|
||||||
|
"COMPRIS: OUI\n"
|
||||||
|
"WORKFLOW: 1\n"
|
||||||
|
"CONFIANCE: 0.9\n"
|
||||||
|
"PARAMETRES: AUCUN\n"
|
||||||
|
"BOUCLE: NON\n"
|
||||||
|
"SOURCE_BOUCLE: aucun\n"
|
||||||
|
"PLAN:\n"
|
||||||
|
"1. Ouvrir le Bloc-notes via Win+R\n"
|
||||||
|
"2. Taper notepad et valider\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||||||
|
plan = planner.understand(
|
||||||
|
"Ouvre le bloc-notes",
|
||||||
|
available_workflows=sample_workflows,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert plan.understood is True
|
||||||
|
assert plan.instruction == "Ouvre le bloc-notes"
|
||||||
|
|
||||||
|
def test_understand_instruction_non_comprise(self, planner):
|
||||||
|
"""Instruction incompréhensible → understood=False."""
|
||||||
|
gemma4_response = "COMPRIS: NON\nWORKFLOW: AUCUN\nBOUCLE: NON\n"
|
||||||
|
|
||||||
|
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||||||
|
plan = planner.understand("xyzzy blah blah")
|
||||||
|
|
||||||
|
assert plan.understood is False
|
||||||
|
|
||||||
|
def test_understand_gemma4_erreur_http(self, planner):
|
||||||
|
"""Erreur HTTP gemma4 → plan.error renseigné."""
|
||||||
|
mock_resp = MagicMock()
|
||||||
|
mock_resp.ok = False
|
||||||
|
mock_resp.status_code = 500
|
||||||
|
|
||||||
|
with patch("requests.post", return_value=mock_resp):
|
||||||
|
plan = planner.understand("Ouvre le bloc-notes")
|
||||||
|
|
||||||
|
assert plan.understood is False
|
||||||
|
assert "500" in plan.error
|
||||||
|
|
||||||
|
def test_understand_gemma4_timeout(self, planner):
|
||||||
|
"""Timeout gemma4 → plan.error renseigné."""
|
||||||
|
import requests
|
||||||
|
with patch("requests.post", side_effect=requests.Timeout("timeout")):
|
||||||
|
plan = planner.understand("Ouvre le bloc-notes")
|
||||||
|
|
||||||
|
assert plan.understood is False
|
||||||
|
assert "erreur" in plan.error.lower() or "timeout" in plan.error.lower()
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tests : matching workflow
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
class TestUnderstandIdentifieWorkflow:
|
||||||
|
"""Vérifier que le matching de workflow fonctionne."""
|
||||||
|
|
||||||
|
def test_understand_identifie_workflow(self, planner, sample_workflows):
|
||||||
|
"""Quand un workflow matche, workflow_match est rempli."""
|
||||||
|
gemma4_response = (
|
||||||
|
"COMPRIS: OUI\n"
|
||||||
|
"WORKFLOW: 1\n"
|
||||||
|
"CONFIANCE: 0.9\n"
|
||||||
|
"PARAMETRES: AUCUN\n"
|
||||||
|
"BOUCLE: NON\n"
|
||||||
|
"SOURCE_BOUCLE: aucun\n"
|
||||||
|
"PLAN:\n"
|
||||||
|
"1. Lancer le Bloc-notes\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||||||
|
plan = planner.understand(
|
||||||
|
"Ouvre le bloc-notes",
|
||||||
|
available_workflows=sample_workflows,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert plan.workflow_match == "sess_001"
|
||||||
|
assert plan.workflow_name == "Bloc-notes"
|
||||||
|
assert plan.mode == "replay"
|
||||||
|
assert plan.match_confidence >= 0.8
|
||||||
|
|
||||||
|
def test_understand_workflow_aucun_match(self, planner, sample_workflows):
|
||||||
|
"""Aucun workflow correspondant → mode libre."""
|
||||||
|
gemma4_response = (
|
||||||
|
"COMPRIS: OUI\n"
|
||||||
|
"WORKFLOW: AUCUN\n"
|
||||||
|
"PARAMETRES: AUCUN\n"
|
||||||
|
"BOUCLE: NON\n"
|
||||||
|
"SOURCE_BOUCLE: aucun\n"
|
||||||
|
"PLAN:\n"
|
||||||
|
"1. Ouvrir Chrome\n"
|
||||||
|
"2. Aller sur Google\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||||||
|
plan = planner.understand(
|
||||||
|
"Recherche voiture sur Google",
|
||||||
|
available_workflows=sample_workflows,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert plan.understood is True
|
||||||
|
assert plan.workflow_match == ""
|
||||||
|
assert plan.mode == "free"
|
||||||
|
|
||||||
|
def test_understand_workflow_second_match(self, planner, sample_workflows):
|
||||||
|
"""Workflow 2 sélectionné correctement."""
|
||||||
|
gemma4_response = (
|
||||||
|
"COMPRIS: OUI\n"
|
||||||
|
"WORKFLOW: 2\n"
|
||||||
|
"CONFIANCE: 0.85\n"
|
||||||
|
"BOUCLE: NON\n"
|
||||||
|
"PLAN:\n"
|
||||||
|
"1. Ouvrir l'explorateur de fichiers\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||||||
|
plan = planner.understand(
|
||||||
|
"Ouvre mes images",
|
||||||
|
available_workflows=sample_workflows,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert plan.workflow_match == "sess_002"
|
||||||
|
assert plan.workflow_name == "Explorateur de fichiers"
|
||||||
|
|
||||||
|
def test_understand_workflow_avec_description_dans_prompt(self, planner, sample_workflows):
|
||||||
|
"""Le prompt envoyé à gemma4 inclut les descriptions des workflows."""
|
||||||
|
captured_body = {}
|
||||||
|
|
||||||
|
def capture_post(url, json=None, **kwargs):
|
||||||
|
captured_body.update(json or {})
|
||||||
|
return _mock_gemma4_response("COMPRIS: OUI\nWORKFLOW: AUCUN\nBOUCLE: NON\n")
|
||||||
|
|
||||||
|
with patch("requests.post", side_effect=capture_post):
|
||||||
|
planner.understand(
|
||||||
|
"Ouvre le bloc-notes",
|
||||||
|
available_workflows=sample_workflows,
|
||||||
|
)
|
||||||
|
|
||||||
|
prompt_content = captured_body["messages"][0]["content"]
|
||||||
|
# La description doit apparaître dans le prompt
|
||||||
|
assert "Ouvrir Bloc-notes via Exécuter" in prompt_content
|
||||||
|
assert "Naviguer dans l'Explorateur" in prompt_content
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tests : détection de boucle
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
class TestUnderstandDetecteBoucle:
|
||||||
|
"""Vérifier la détection de boucle."""
|
||||||
|
|
||||||
|
def test_understand_detecte_boucle(self, planner, sample_workflows):
|
||||||
|
"""'traite TOUS les dossiers' → is_loop=True."""
|
||||||
|
gemma4_response = (
|
||||||
|
"COMPRIS: OUI\n"
|
||||||
|
"WORKFLOW: 3\n"
|
||||||
|
"CONFIANCE: 0.8\n"
|
||||||
|
"PARAMETRES: AUCUN\n"
|
||||||
|
"BOUCLE: OUI\n"
|
||||||
|
"SOURCE_BOUCLE: écran\n"
|
||||||
|
"PLAN:\n"
|
||||||
|
"1. Pour chaque dossier dans la liste\n"
|
||||||
|
"2. Ouvrir le dossier\n"
|
||||||
|
"3. Coder les diagnostics\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||||||
|
plan = planner.understand(
|
||||||
|
"Traite TOUS les dossiers de la liste",
|
||||||
|
available_workflows=sample_workflows,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert plan.is_loop is True
|
||||||
|
assert plan.loop_source == "écran"
|
||||||
|
|
||||||
|
def test_understand_pas_de_boucle(self, planner):
|
||||||
|
"""Ordre simple → is_loop=False."""
|
||||||
|
gemma4_response = (
|
||||||
|
"COMPRIS: OUI\n"
|
||||||
|
"WORKFLOW: AUCUN\n"
|
||||||
|
"BOUCLE: NON\n"
|
||||||
|
"SOURCE_BOUCLE: aucun\n"
|
||||||
|
"PLAN:\n"
|
||||||
|
"1. Ouvrir le navigateur\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||||||
|
plan = planner.understand("Ouvre le navigateur")
|
||||||
|
|
||||||
|
assert plan.is_loop is False
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tests : extraction de paramètres
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
class TestUnderstandExtraitParametres:
|
||||||
|
"""Vérifier l'extraction des paramètres."""
|
||||||
|
|
||||||
|
def test_understand_extrait_parametres(self, planner, sample_workflows):
|
||||||
|
"""'dossiers de janvier' → parameters contient mois=janvier."""
|
||||||
|
gemma4_response = (
|
||||||
|
"COMPRIS: OUI\n"
|
||||||
|
"WORKFLOW: 3\n"
|
||||||
|
"CONFIANCE: 0.85\n"
|
||||||
|
"PARAMETRES: mois=janvier\n"
|
||||||
|
"BOUCLE: OUI\n"
|
||||||
|
"SOURCE_BOUCLE: écran\n"
|
||||||
|
"PLAN:\n"
|
||||||
|
"1. Filtrer les dossiers de janvier\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||||||
|
plan = planner.understand(
|
||||||
|
"Traite les dossiers de janvier",
|
||||||
|
available_workflows=sample_workflows,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "mois" in plan.parameters
|
||||||
|
assert plan.parameters["mois"] == "janvier"
|
||||||
|
|
||||||
|
def test_understand_parametres_multiples(self, planner):
|
||||||
|
"""Plusieurs paramètres sur des lignes séparées."""
|
||||||
|
gemma4_response = (
|
||||||
|
"COMPRIS: OUI\n"
|
||||||
|
"WORKFLOW: AUCUN\n"
|
||||||
|
"PARAMETRES:\n"
|
||||||
|
"- patient=DUPONT\n"
|
||||||
|
"- date=2026-01-15\n"
|
||||||
|
"BOUCLE: NON\n"
|
||||||
|
"PLAN:\n"
|
||||||
|
"1. Rechercher le patient DUPONT\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||||||
|
plan = planner.understand("Cherche le dossier de DUPONT du 15 janvier")
|
||||||
|
|
||||||
|
assert plan.parameters.get("patient") == "DUPONT"
|
||||||
|
assert plan.parameters.get("date") == "2026-01-15"
|
||||||
|
|
||||||
|
def test_understand_parametres_inline(self, planner):
|
||||||
|
"""Paramètres sur la même ligne que PARAMETRES:."""
|
||||||
|
gemma4_response = (
|
||||||
|
"COMPRIS: OUI\n"
|
||||||
|
"WORKFLOW: AUCUN\n"
|
||||||
|
"PARAMETRES: nom=Martin, ville=Paris\n"
|
||||||
|
"BOUCLE: NON\n"
|
||||||
|
"PLAN:\n"
|
||||||
|
"1. Chercher Martin à Paris\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||||||
|
plan = planner.understand("Cherche Martin à Paris")
|
||||||
|
|
||||||
|
assert plan.parameters.get("nom") == "Martin"
|
||||||
|
assert plan.parameters.get("ville") == "Paris"
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tests : _parse_understanding (parsing tolérant)
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
class TestParseUnderstanding:
|
||||||
|
"""Tester le parsing tolérant de réponses gemma4 variées."""
|
||||||
|
|
||||||
|
def test_parse_markdown_gras(self, planner):
|
||||||
|
"""Réponse avec **gras** → parsée correctement."""
|
||||||
|
plan = TaskPlan(instruction="test")
|
||||||
|
content = (
|
||||||
|
"**COMPRIS:** OUI\n"
|
||||||
|
"**WORKFLOW:** AUCUN\n"
|
||||||
|
"**BOUCLE:** NON\n"
|
||||||
|
"**PLAN:**\n"
|
||||||
|
"1. Première étape\n"
|
||||||
|
)
|
||||||
|
result = planner._parse_understanding(plan, content, [])
|
||||||
|
assert result.understood is True
|
||||||
|
assert result.mode == "free"
|
||||||
|
|
||||||
|
def test_parse_confiance_pourcentage(self, planner, sample_workflows):
|
||||||
|
"""CONFIANCE: 90% → match_confidence=0.9."""
|
||||||
|
plan = TaskPlan(instruction="test")
|
||||||
|
content = (
|
||||||
|
"COMPRIS: OUI\n"
|
||||||
|
"WORKFLOW: 1\n"
|
||||||
|
"CONFIANCE: 90%\n"
|
||||||
|
"BOUCLE: NON\n"
|
||||||
|
)
|
||||||
|
result = planner._parse_understanding(plan, content, sample_workflows)
|
||||||
|
assert result.match_confidence == pytest.approx(0.9)
|
||||||
|
|
||||||
|
def test_parse_confiance_virgule(self, planner, sample_workflows):
|
||||||
|
"""CONFIANCE: 0,85 → match_confidence=0.85."""
|
||||||
|
plan = TaskPlan(instruction="test")
|
||||||
|
content = (
|
||||||
|
"COMPRIS: OUI\n"
|
||||||
|
"WORKFLOW: 1\n"
|
||||||
|
"CONFIANCE: 0,85\n"
|
||||||
|
"BOUCLE: NON\n"
|
||||||
|
)
|
||||||
|
result = planner._parse_understanding(plan, content, sample_workflows)
|
||||||
|
assert result.match_confidence == pytest.approx(0.85)
|
||||||
|
|
||||||
|
def test_parse_workflow_avec_parentheses(self, planner, sample_workflows):
|
||||||
|
"""WORKFLOW: 2 (Explorateur) → index 2 correctement extrait."""
|
||||||
|
plan = TaskPlan(instruction="test")
|
||||||
|
content = (
|
||||||
|
"COMPRIS: OUI\n"
|
||||||
|
"WORKFLOW: 2 (Explorateur de fichiers)\n"
|
||||||
|
"BOUCLE: NON\n"
|
||||||
|
)
|
||||||
|
result = planner._parse_understanding(plan, content, sample_workflows)
|
||||||
|
assert result.workflow_match == "sess_002"
|
||||||
|
|
||||||
|
def test_parse_workflow_aucun_variantes(self, planner, sample_workflows):
|
||||||
|
"""Toutes les variantes de 'aucun' sont reconnues."""
|
||||||
|
for val in ("AUCUN", "None", "N/A", "-", "NON"):
|
||||||
|
plan = TaskPlan(instruction="test")
|
||||||
|
content = f"COMPRIS: OUI\nWORKFLOW: {val}\nBOUCLE: NON\n"
|
||||||
|
result = planner._parse_understanding(plan, content, sample_workflows)
|
||||||
|
assert result.workflow_match == "", f"Devrait être vide pour '{val}'"
|
||||||
|
|
||||||
|
def test_parse_etapes_tirets(self, planner):
|
||||||
|
"""Étapes avec tirets → ajoutées au plan."""
|
||||||
|
plan = TaskPlan(instruction="test")
|
||||||
|
content = (
|
||||||
|
"COMPRIS: OUI\n"
|
||||||
|
"WORKFLOW: AUCUN\n"
|
||||||
|
"BOUCLE: NON\n"
|
||||||
|
"PLAN:\n"
|
||||||
|
"- Ouvrir l'application\n"
|
||||||
|
"- Cliquer sur Fichier\n"
|
||||||
|
"- Sauvegarder\n"
|
||||||
|
)
|
||||||
|
result = planner._parse_understanding(plan, content, [])
|
||||||
|
assert len(result.steps) == 3
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tests : _steps_to_actions
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
class TestStepsToActions:
|
||||||
|
"""Vérifier la conversion étapes → actions JSON."""
|
||||||
|
|
||||||
|
def test_steps_to_actions_format(self, planner):
|
||||||
|
"""Les actions générées ont le bon format (type, target_spec, etc.)."""
|
||||||
|
gemma4_response = (
|
||||||
|
'{"type": "click", "target_spec": {"by_text": "Rechercher"}}\n'
|
||||||
|
'{"type": "type", "text": "bloc-notes"}\n'
|
||||||
|
'{"type": "key_combo", "keys": ["enter"]}\n'
|
||||||
|
'{"type": "wait", "duration_ms": 2000}\n'
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||||||
|
actions = planner._steps_to_actions(
|
||||||
|
[{"description": "1. Ouvrir le bloc-notes"}],
|
||||||
|
{},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(actions) == 4
|
||||||
|
assert actions[0]["type"] == "click"
|
||||||
|
assert actions[0]["visual_mode"] is True # Ajouté automatiquement
|
||||||
|
assert actions[0]["target_spec"]["by_text"] == "Rechercher"
|
||||||
|
assert actions[1]["type"] == "type"
|
||||||
|
assert actions[1]["text"] == "bloc-notes"
|
||||||
|
assert actions[2]["type"] == "key_combo"
|
||||||
|
assert actions[2]["keys"] == ["enter"]
|
||||||
|
assert actions[3]["type"] == "wait"
|
||||||
|
assert actions[3]["duration_ms"] == 2000
|
||||||
|
|
||||||
|
def test_steps_to_actions_json_array(self, planner):
|
||||||
|
"""gemma4 retourne un tableau JSON → parsé correctement."""
|
||||||
|
gemma4_response = (
|
||||||
|
'Voici les actions :\n'
|
||||||
|
'```json\n'
|
||||||
|
'[\n'
|
||||||
|
' {"type": "click", "target_spec": {"by_text": "Fichier"}},\n'
|
||||||
|
' {"type": "click", "target_spec": {"by_text": "Ouvrir"}}\n'
|
||||||
|
']\n'
|
||||||
|
'```\n'
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||||||
|
actions = planner._steps_to_actions(
|
||||||
|
[{"description": "1. Ouvrir un fichier"}],
|
||||||
|
{},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(actions) == 2
|
||||||
|
assert actions[0]["target_spec"]["by_text"] == "Fichier"
|
||||||
|
assert actions[1]["target_spec"]["by_text"] == "Ouvrir"
|
||||||
|
|
||||||
|
def test_steps_to_actions_nested_json(self, planner):
|
||||||
|
"""JSON imbriqué (target_spec) → parsé correctement."""
|
||||||
|
gemma4_response = (
|
||||||
|
'{"type": "click", "target_spec": {"by_text": "OK", "window_title": "Confirmation"}}\n'
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||||||
|
actions = planner._steps_to_actions(
|
||||||
|
[{"description": "1. Confirmer"}],
|
||||||
|
{},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(actions) == 1
|
||||||
|
assert actions[0]["target_spec"]["window_title"] == "Confirmation"
|
||||||
|
|
||||||
|
def test_steps_to_actions_gemma4_erreur(self, planner):
|
||||||
|
"""Erreur gemma4 → liste vide."""
|
||||||
|
mock_resp = MagicMock()
|
||||||
|
mock_resp.ok = False
|
||||||
|
|
||||||
|
with patch("requests.post", return_value=mock_resp):
|
||||||
|
actions = planner._steps_to_actions(
|
||||||
|
[{"description": "1. Faire quelque chose"}],
|
||||||
|
{},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert actions == []
|
||||||
|
|
||||||
|
def test_steps_to_actions_filtre_types_invalides(self, planner):
|
||||||
|
"""Seuls les types valides (click, type, key_combo, wait) sont acceptés."""
|
||||||
|
gemma4_response = (
|
||||||
|
'{"type": "click", "target_spec": {"by_text": "OK"}}\n'
|
||||||
|
'{"type": "invalid_action", "foo": "bar"}\n'
|
||||||
|
'{"type": "wait", "duration_ms": 500}\n'
|
||||||
|
'{"not_a_type": "test"}\n'
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch("requests.post", return_value=_mock_gemma4_response(gemma4_response)):
|
||||||
|
actions = planner._steps_to_actions(
|
||||||
|
[{"description": "1. Test"}],
|
||||||
|
{},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(actions) == 2
|
||||||
|
assert actions[0]["type"] == "click"
|
||||||
|
assert actions[1]["type"] == "wait"
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tests : _parse_actions_json (parsing robuste)
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
class TestParseActionsJson:
|
||||||
|
"""Tester le parsing robuste d'actions JSON."""
|
||||||
|
|
||||||
|
def test_parse_json_une_par_ligne(self):
|
||||||
|
"""Actions JSON une par ligne."""
|
||||||
|
content = (
|
||||||
|
'{"type": "click", "target_spec": {"by_text": "A"}}\n'
|
||||||
|
'{"type": "type", "text": "hello"}\n'
|
||||||
|
)
|
||||||
|
actions = TaskPlanner._parse_actions_json(content)
|
||||||
|
assert len(actions) == 2
|
||||||
|
|
||||||
|
def test_parse_json_array(self):
|
||||||
|
"""Tableau JSON."""
|
||||||
|
content = '[{"type": "click", "target_spec": {"by_text": "A"}}, {"type": "wait", "duration_ms": 1000}]'
|
||||||
|
actions = TaskPlanner._parse_actions_json(content)
|
||||||
|
assert len(actions) == 2
|
||||||
|
|
||||||
|
def test_parse_json_avec_texte_autour(self):
|
||||||
|
"""JSON entouré de commentaires texte."""
|
||||||
|
content = (
|
||||||
|
"Voici les actions RPA :\n\n"
|
||||||
|
'{"type": "click", "target_spec": {"by_text": "Envoyer"}}\n'
|
||||||
|
"\n"
|
||||||
|
"C'est tout.\n"
|
||||||
|
)
|
||||||
|
actions = TaskPlanner._parse_actions_json(content)
|
||||||
|
assert len(actions) == 1
|
||||||
|
assert actions[0]["target_spec"]["by_text"] == "Envoyer"
|
||||||
|
|
||||||
|
def test_parse_json_vide(self):
|
||||||
|
"""Contenu vide → liste vide."""
|
||||||
|
assert TaskPlanner._parse_actions_json("") == []
|
||||||
|
assert TaskPlanner._parse_actions_json("Pas de JSON ici") == []
|
||||||
|
|
||||||
|
def test_parse_json_markdown_code_block(self):
|
||||||
|
"""JSON dans un bloc de code markdown."""
|
||||||
|
content = (
|
||||||
|
"```json\n"
|
||||||
|
'{"type": "type", "text": "bonjour"}\n'
|
||||||
|
"```\n"
|
||||||
|
)
|
||||||
|
actions = TaskPlanner._parse_actions_json(content)
|
||||||
|
assert len(actions) == 1
|
||||||
|
assert actions[0]["text"] == "bonjour"
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tests : _extract_session_description
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
class TestExtractSessionDescription:
|
||||||
|
"""Vérifier que les descriptions de session sont lisibles et sémantiques."""
|
||||||
|
|
||||||
|
def _write_events(self, tmp_path, events):
|
||||||
|
"""Écrire des événements dans un fichier JSONL temporaire."""
|
||||||
|
events_file = tmp_path / "live_events.jsonl"
|
||||||
|
with open(events_file, "w") as f:
|
||||||
|
for evt in events:
|
||||||
|
f.write(json.dumps(evt, ensure_ascii=False) + "\n")
|
||||||
|
return events_file
|
||||||
|
|
||||||
|
def test_extract_session_description_bloc_notes(self, tmp_path):
|
||||||
|
"""Session Bloc-notes via Win+R → description sémantique."""
|
||||||
|
events = [
|
||||||
|
{"event": {"type": "key_combo", "keys": ["win", "r"],
|
||||||
|
"window": {"title": "Bureau"}}},
|
||||||
|
{"event": {"type": "window_focus_change",
|
||||||
|
"from": {"title": "Bureau"},
|
||||||
|
"to": {"title": "Exécuter"}}},
|
||||||
|
{"event": {"type": "text_input", "text": "notepad",
|
||||||
|
"window": {"title": "Exécuter"}}},
|
||||||
|
{"event": {"type": "mouse_click", "button": "left",
|
||||||
|
"window": {"title": "Exécuter"}}},
|
||||||
|
{"event": {"type": "window_focus_change",
|
||||||
|
"from": {"title": "Exécuter"},
|
||||||
|
"to": {"title": "Sans titre – Bloc-notes"}}},
|
||||||
|
{"event": {"type": "text_input", "text": "Bonjour le monde",
|
||||||
|
"window": {"title": "Sans titre – Bloc-notes"}}},
|
||||||
|
]
|
||||||
|
events_file = self._write_events(tmp_path, events)
|
||||||
|
|
||||||
|
# Importer depuis api_stream (la fonction est au niveau module)
|
||||||
|
from agent_v0.server_v1.api_stream import _extract_session_description
|
||||||
|
desc = _extract_session_description(events_file)
|
||||||
|
|
||||||
|
assert desc["event_count"] == 6
|
||||||
|
# La description doit être lisible et pas juste "Bloc-notes, Exécuter"
|
||||||
|
description = desc["description"]
|
||||||
|
assert "Bloc-notes" in description or "bloc-notes" in description.lower()
|
||||||
|
# Le nom doit contenir l'app
|
||||||
|
assert "Bloc-notes" in desc["name"]
|
||||||
|
|
||||||
|
def test_extract_session_description_explorateur(self, tmp_path):
|
||||||
|
"""Session Explorateur de fichiers → description pertinente."""
|
||||||
|
events = [
|
||||||
|
{"event": {"type": "window_focus_change",
|
||||||
|
"from": {"title": "Bureau"},
|
||||||
|
"to": {"title": "Images – Explorateur de fichiers"}}},
|
||||||
|
{"event": {"type": "mouse_click", "button": "left",
|
||||||
|
"window": {"title": "Images – Explorateur de fichiers"}}},
|
||||||
|
{"event": {"type": "mouse_click", "button": "left",
|
||||||
|
"window": {"title": "Images – Explorateur de fichiers"}}},
|
||||||
|
{"event": {"type": "mouse_click", "button": "left",
|
||||||
|
"window": {"title": "Images – Explorateur de fichiers"}}},
|
||||||
|
]
|
||||||
|
events_file = self._write_events(tmp_path, events)
|
||||||
|
|
||||||
|
from agent_v0.server_v1.api_stream import _extract_session_description
|
||||||
|
desc = _extract_session_description(events_file)
|
||||||
|
|
||||||
|
assert "Explorateur" in desc["name"] or "Explorateur" in desc["description"]
|
||||||
|
|
||||||
|
def test_extract_session_description_vide(self, tmp_path):
|
||||||
|
"""Fichier vide → description par défaut."""
|
||||||
|
events_file = self._write_events(tmp_path, [])
|
||||||
|
|
||||||
|
from agent_v0.server_v1.api_stream import _extract_session_description
|
||||||
|
desc = _extract_session_description(events_file)
|
||||||
|
|
||||||
|
assert desc["event_count"] == 0
|
||||||
|
assert desc["name"] == "Session sans nom"
|
||||||
|
|
||||||
|
def test_extract_session_description_cmd(self, tmp_path):
|
||||||
|
"""Session avec cmd.exe → description contient cmd."""
|
||||||
|
events = [
|
||||||
|
{"event": {"type": "window_focus_change",
|
||||||
|
"from": {"title": "Bureau"},
|
||||||
|
"to": {"title": "C:\\Windows\\system32\\cmd.exe"}}},
|
||||||
|
{"event": {"type": "text_input", "text": "dir",
|
||||||
|
"window": {"title": "C:\\Windows\\system32\\cmd.exe"}}},
|
||||||
|
{"event": {"type": "text_input", "text": "cd documents",
|
||||||
|
"window": {"title": "C:\\Windows\\system32\\cmd.exe"}}},
|
||||||
|
]
|
||||||
|
events_file = self._write_events(tmp_path, events)
|
||||||
|
|
||||||
|
from agent_v0.server_v1.api_stream import _extract_session_description
|
||||||
|
desc = _extract_session_description(events_file)
|
||||||
|
|
||||||
|
assert desc["event_count"] == 3
|
||||||
|
# Le nom ou la description doit mentionner cmd
|
||||||
|
full = f"{desc['name']} {desc['description']}"
|
||||||
|
assert "cmd" in full.lower()
|
||||||
|
|
||||||
|
def test_extract_session_description_recherche_windows(self, tmp_path):
|
||||||
|
"""Session avec recherche Windows (Win+S) → description mentionne recherche."""
|
||||||
|
events = [
|
||||||
|
{"event": {"type": "key_combo", "keys": ["win", "s"],
|
||||||
|
"window": {"title": "Bureau"}}},
|
||||||
|
{"event": {"type": "window_focus_change",
|
||||||
|
"from": {"title": "Bureau"},
|
||||||
|
"to": {"title": "Rechercher"}}},
|
||||||
|
{"event": {"type": "text_input", "text": "calculator",
|
||||||
|
"window": {"title": "Rechercher"}}},
|
||||||
|
]
|
||||||
|
events_file = self._write_events(tmp_path, events)
|
||||||
|
|
||||||
|
from agent_v0.server_v1.api_stream import _extract_session_description
|
||||||
|
desc = _extract_session_description(events_file)
|
||||||
|
|
||||||
|
# La description doit mentionner la recherche Windows
|
||||||
|
assert "recherche" in desc["description"].lower()
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tests : list_capabilities
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
class TestListCapabilities:
|
||||||
|
"""Vérifier le listing des capacités."""
|
||||||
|
|
||||||
|
def test_list_capabilities_avec_workflows(self, planner, sample_workflows):
|
||||||
|
"""Avec des workflows → texte lisible avec descriptions."""
|
||||||
|
text = planner.list_capabilities(sample_workflows)
|
||||||
|
assert "Léa sait faire" in text
|
||||||
|
assert "Bloc-notes" in text
|
||||||
|
|
||||||
|
def test_list_capabilities_sans_workflows(self, planner):
|
||||||
|
"""Sans workflows → message d'aide."""
|
||||||
|
text = planner.list_capabilities([])
|
||||||
|
assert "pas encore appris" in text
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tests : execute (mode replay et free)
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
class TestExecute:
|
||||||
|
"""Vérifier l'exécution des plans."""
|
||||||
|
|
||||||
|
def test_execute_replay(self, planner):
|
||||||
|
"""Mode replay → callback appelé avec le bon session_id."""
|
||||||
|
plan = TaskPlan(
|
||||||
|
instruction="Ouvre le bloc-notes",
|
||||||
|
understood=True,
|
||||||
|
workflow_match="sess_001",
|
||||||
|
workflow_name="Bloc-notes",
|
||||||
|
mode="replay",
|
||||||
|
)
|
||||||
|
|
||||||
|
callback = MagicMock(return_value="replay_123")
|
||||||
|
result = planner.execute(plan, replay_callback=callback)
|
||||||
|
|
||||||
|
assert result.success is True
|
||||||
|
callback.assert_called_once_with(
|
||||||
|
session_id="sess_001",
|
||||||
|
machine_id="default",
|
||||||
|
params={},
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_execute_non_compris(self, planner):
|
||||||
|
"""Plan non compris → échec."""
|
||||||
|
plan = TaskPlan(instruction="blah", understood=False)
|
||||||
|
result = planner.execute(plan)
|
||||||
|
assert result.success is False
|
||||||
|
assert "non comprise" in result.summary.lower() or "non comprise" in result.summary
|
||||||
|
|
||||||
|
def test_execute_sans_callback(self, planner):
|
||||||
|
"""Mode replay sans callback → échec."""
|
||||||
|
plan = TaskPlan(
|
||||||
|
instruction="test",
|
||||||
|
understood=True,
|
||||||
|
workflow_match="sess_001",
|
||||||
|
mode="replay",
|
||||||
|
)
|
||||||
|
result = planner.execute(plan, replay_callback=None)
|
||||||
|
assert result.success is False
|
||||||
419
tests/visual/test_grounding_benchmark.py
Normal file
419
tests/visual/test_grounding_benchmark.py
Normal file
@@ -0,0 +1,419 @@
|
|||||||
|
"""
|
||||||
|
Benchmark de grounding — 3 approches testées en boucle.
|
||||||
|
|
||||||
|
Compare la robustesse et la précision de :
|
||||||
|
1. Baseline : qwen2.5vl direct
|
||||||
|
2. Zoom progressif : 2 passes (full → crop → re-grounding)
|
||||||
|
3. OCR-first : docTR localise le texte, VLM seulement pour les icônes
|
||||||
|
|
||||||
|
Chaque approche est testée N fois sur les mêmes cibles.
|
||||||
|
Mesure : taux de détection, variance des coordonnées, temps moyen.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
_ROOT = str(Path(__file__).resolve().parents[2])
|
||||||
|
if _ROOT not in sys.path:
|
||||||
|
sys.path.insert(0, _ROOT)
|
||||||
|
|
||||||
|
_SHOTS_DIR = Path(_ROOT) / "data/training/live_sessions/DESKTOP-ST3VBSD_windows/sess_20260404T135010_cec5c8/shots"
|
||||||
|
|
||||||
|
# Nombre d'itérations par test
|
||||||
|
N_ITERATIONS = 5
|
||||||
|
|
||||||
|
|
||||||
|
def _load_screenshot(name: str) -> str:
|
||||||
|
path = _SHOTS_DIR / name
|
||||||
|
if not path.is_file():
|
||||||
|
pytest.skip(f"Screenshot {name} non disponible")
|
||||||
|
return base64.b64encode(path.read_bytes()).decode()
|
||||||
|
|
||||||
|
|
||||||
|
def _load_screenshot_pil(name: str):
|
||||||
|
from PIL import Image
|
||||||
|
path = _SHOTS_DIR / name
|
||||||
|
if not path.is_file():
|
||||||
|
pytest.skip(f"Screenshot {name} non disponible")
|
||||||
|
return Image.open(path)
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Approche 1 : Baseline qwen2.5vl direct
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_bbox_2d(content: str) -> Optional[Tuple[int, int, int, int]]:
|
||||||
|
"""Parser les coordonnées bbox_2d depuis une réponse qwen2.5vl.
|
||||||
|
|
||||||
|
qwen2.5vl retourne du JSON :
|
||||||
|
```json
|
||||||
|
[{"bbox_2d": [x1, y1, x2, y2], "label": "..."}]
|
||||||
|
```
|
||||||
|
Les coordonnées sont en pixels relatifs à l'image envoyée.
|
||||||
|
"""
|
||||||
|
# Stratégie 1 : parser le JSON complet (le plus fiable)
|
||||||
|
# Nettoyer les fences markdown
|
||||||
|
cleaned = re.sub(r'```(?:json)?\s*', '', content).strip()
|
||||||
|
try:
|
||||||
|
data = json.loads(cleaned)
|
||||||
|
if isinstance(data, list) and len(data) > 0:
|
||||||
|
bbox = data[0].get("bbox_2d")
|
||||||
|
if bbox and len(bbox) >= 4:
|
||||||
|
return (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))
|
||||||
|
elif isinstance(data, dict):
|
||||||
|
bbox = data.get("bbox_2d")
|
||||||
|
if bbox and len(bbox) >= 4:
|
||||||
|
return (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))
|
||||||
|
except (json.JSONDecodeError, ValueError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Stratégie 2 : regex ciblé sur "bbox_2d": [x1, y1, x2, y2]
|
||||||
|
bbox_match = re.search(
|
||||||
|
r'"bbox_2d"\s*:\s*\[\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\]',
|
||||||
|
content,
|
||||||
|
)
|
||||||
|
if bbox_match:
|
||||||
|
return tuple(int(bbox_match.group(i)) for i in range(1, 5))
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def grounding_baseline(screenshot_b64: str, description: str, img_width: int = 1280, img_height: int = 800) -> Optional[Tuple[float, float]]:
|
||||||
|
"""Grounding qwen2.5vl direct — retourne (x_pct, y_pct) normalisées.
|
||||||
|
|
||||||
|
qwen2.5vl retourne des coordonnées en pixels relatifs à l'image envoyée.
|
||||||
|
On normalise en divisant par les dimensions de l'image.
|
||||||
|
"""
|
||||||
|
import requests
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = requests.post(
|
||||||
|
"http://localhost:11434/api/chat",
|
||||||
|
json={
|
||||||
|
"model": "qwen2.5vl:7b",
|
||||||
|
"messages": [{"role": "user", "content": f"Detect '{description}' with a bounding box.", "images": [screenshot_b64]}],
|
||||||
|
"stream": False,
|
||||||
|
"options": {"temperature": 0.0, "num_predict": 100},
|
||||||
|
},
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
if not resp.ok:
|
||||||
|
return None
|
||||||
|
content = resp.json().get("message", {}).get("content", "")
|
||||||
|
bbox = _parse_bbox_2d(content)
|
||||||
|
if bbox:
|
||||||
|
x1, y1, x2, y2 = bbox
|
||||||
|
# Normaliser par les dimensions de l'image (pixels → 0-1)
|
||||||
|
cx = (x1 + x2) / 2 / img_width
|
||||||
|
cy = (y1 + y2) / 2 / img_height
|
||||||
|
if 0.0 <= cx <= 1.0 and 0.0 <= cy <= 1.0:
|
||||||
|
return (cx, cy)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Approche 2 : Zoom progressif (2 passes)
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def grounding_zoom(screenshot_b64: str, description: str, img_width: int = 1280, img_height: int = 800) -> Optional[Tuple[float, float]]:
|
||||||
|
"""Zoom progressif — passe 1 (full) puis passe 2 (crop 2x)."""
|
||||||
|
import requests
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
# Passe 1 : grounding sur l'image complète
|
||||||
|
result1 = grounding_baseline(screenshot_b64, description, img_width, img_height)
|
||||||
|
if result1 is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
x1_pct, y1_pct = result1
|
||||||
|
|
||||||
|
# Passe 2 : crop autour de la zone trouvée, re-grounding
|
||||||
|
try:
|
||||||
|
img_bytes = base64.b64decode(screenshot_b64)
|
||||||
|
img = Image.open(io.BytesIO(img_bytes))
|
||||||
|
w, h = img.size
|
||||||
|
|
||||||
|
# Crop 2x autour du point trouvé (25% de l'image de chaque côté)
|
||||||
|
crop_size = 0.25
|
||||||
|
cx_px = int(x1_pct * w)
|
||||||
|
cy_px = int(y1_pct * h)
|
||||||
|
x_left = max(0, cx_px - int(crop_size * w))
|
||||||
|
y_top = max(0, cy_px - int(crop_size * h))
|
||||||
|
x_right = min(w, cx_px + int(crop_size * w))
|
||||||
|
y_bottom = min(h, cy_px + int(crop_size * h))
|
||||||
|
|
||||||
|
cropped = img.crop((x_left, y_top, x_right, y_bottom))
|
||||||
|
crop_w, crop_h = cropped.size
|
||||||
|
|
||||||
|
# Encoder le crop en base64
|
||||||
|
buf = io.BytesIO()
|
||||||
|
cropped.save(buf, format="JPEG", quality=85)
|
||||||
|
crop_b64 = base64.b64encode(buf.getvalue()).decode()
|
||||||
|
|
||||||
|
# Passe 2 : re-grounding sur le crop (dimensions du crop)
|
||||||
|
result2 = grounding_baseline(crop_b64, description, crop_w, crop_h)
|
||||||
|
if result2 is None:
|
||||||
|
return result1 # Fallback sur passe 1
|
||||||
|
|
||||||
|
# Reconvertir les coordonnées du crop vers l'image originale
|
||||||
|
x2_in_crop, y2_in_crop = result2
|
||||||
|
x_final = (x_left + x2_in_crop * crop_w) / w
|
||||||
|
y_final = (y_top + y2_in_crop * crop_h) / h
|
||||||
|
return (x_final, y_final)
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
return result1 # Fallback
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Approche 3 : OCR-first (docTR)
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def grounding_ocr_first(screenshot_b64: str, description: str) -> Optional[Tuple[float, float]]:
|
||||||
|
"""OCR-first — docTR localise le texte, VLM pour les icônes."""
|
||||||
|
try:
|
||||||
|
from doctr.io import DocumentFile
|
||||||
|
from doctr.models import ocr_predictor
|
||||||
|
|
||||||
|
# Décoder l'image
|
||||||
|
img_bytes = base64.b64decode(screenshot_b64)
|
||||||
|
|
||||||
|
# OCR
|
||||||
|
predictor = ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True)
|
||||||
|
doc = DocumentFile.from_images([img_bytes])
|
||||||
|
result = predictor(doc)
|
||||||
|
|
||||||
|
# Chercher le texte dans les résultats OCR
|
||||||
|
target_lower = description.lower()
|
||||||
|
best_match = None
|
||||||
|
best_score = 0
|
||||||
|
|
||||||
|
for page in result.pages:
|
||||||
|
for block in page.blocks:
|
||||||
|
for line_obj in block.lines:
|
||||||
|
for word in line_obj.words:
|
||||||
|
word_text = word.value.lower()
|
||||||
|
# Match exact ou partiel
|
||||||
|
if target_lower in word_text or word_text in target_lower:
|
||||||
|
score = len(word_text) / max(len(target_lower), 1)
|
||||||
|
if score > best_score:
|
||||||
|
# Coordonnées normalisées (docTR retourne 0-1)
|
||||||
|
box = word.geometry # ((x1,y1), (x2,y2))
|
||||||
|
cx = (box[0][0] + box[1][0]) / 2
|
||||||
|
cy = (box[0][1] + box[1][1]) / 2
|
||||||
|
best_match = (cx, cy)
|
||||||
|
best_score = score
|
||||||
|
|
||||||
|
if best_match and best_score > 0.5:
|
||||||
|
return best_match
|
||||||
|
|
||||||
|
except ImportError:
|
||||||
|
pass # docTR non disponible
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Fallback VLM pour les éléments sans texte
|
||||||
|
return grounding_baseline(screenshot_b64, description)
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Framework de benchmark
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def run_benchmark(
|
||||||
|
approach_fn,
|
||||||
|
approach_name: str,
|
||||||
|
screenshot_b64: str,
|
||||||
|
description: str,
|
||||||
|
n_iterations: int = N_ITERATIONS,
|
||||||
|
) -> Dict:
|
||||||
|
"""Exécuter un benchmark : N itérations, mesurer variance et temps."""
|
||||||
|
results = []
|
||||||
|
times = []
|
||||||
|
|
||||||
|
for i in range(n_iterations):
|
||||||
|
t_start = time.time()
|
||||||
|
result = approach_fn(screenshot_b64, description)
|
||||||
|
elapsed = time.time() - t_start
|
||||||
|
times.append(elapsed)
|
||||||
|
|
||||||
|
if result is not None:
|
||||||
|
results.append(result)
|
||||||
|
|
||||||
|
# Statistiques
|
||||||
|
n_found = len(results)
|
||||||
|
detection_rate = n_found / n_iterations
|
||||||
|
|
||||||
|
stats = {
|
||||||
|
"approach": approach_name,
|
||||||
|
"target": description,
|
||||||
|
"iterations": n_iterations,
|
||||||
|
"detection_rate": round(detection_rate, 2),
|
||||||
|
"avg_time_ms": round(sum(times) / len(times) * 1000, 0),
|
||||||
|
}
|
||||||
|
|
||||||
|
if n_found >= 2:
|
||||||
|
xs = [r[0] for r in results]
|
||||||
|
ys = [r[1] for r in results]
|
||||||
|
stats["x_mean"] = round(sum(xs) / len(xs), 4)
|
||||||
|
stats["y_mean"] = round(sum(ys) / len(ys), 4)
|
||||||
|
stats["x_variance"] = round(max(xs) - min(xs), 4)
|
||||||
|
stats["y_variance"] = round(max(ys) - min(ys), 4)
|
||||||
|
stats["stable"] = stats["x_variance"] < 0.05 and stats["y_variance"] < 0.05
|
||||||
|
elif n_found == 1:
|
||||||
|
stats["x_mean"] = round(results[0][0], 4)
|
||||||
|
stats["y_mean"] = round(results[0][1], 4)
|
||||||
|
stats["x_variance"] = 0
|
||||||
|
stats["y_variance"] = 0
|
||||||
|
stats["stable"] = True
|
||||||
|
else:
|
||||||
|
stats["stable"] = False
|
||||||
|
|
||||||
|
return stats
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tests de benchmark comparatif
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# Cibles à tester (screenshot, description, nom)
|
||||||
|
_TARGETS = [
|
||||||
|
("shot_0001_full.png", "Rechercher", "Rechercher taskbar"),
|
||||||
|
("shot_0001_full.png", "agent_v1", "Dossier agent_v1"),
|
||||||
|
("shot_0004_full.png", "Fichier", "Menu Fichier"),
|
||||||
|
("shot_0004_full.png", "Modifier", "Menu Modifier"),
|
||||||
|
("shot_0004_full.png", "Ceci est un test.txt", "Onglet fichier"),
|
||||||
|
("shot_0014_full.png", "Rechercher sur Google ou saisir une URL", "Recherche Google"),
|
||||||
|
("shot_0014_full.png", "Gmail", "Lien Gmail"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.visual
|
||||||
|
class TestBenchmarkBaseline:
|
||||||
|
"""Benchmark de l'approche baseline (qwen2.5vl direct)."""
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("shot,desc,name", _TARGETS)
|
||||||
|
def test_baseline_robustesse(self, shot, desc, name):
|
||||||
|
screenshot = _load_screenshot(shot)
|
||||||
|
stats = run_benchmark(grounding_baseline, "baseline", screenshot, desc, N_ITERATIONS)
|
||||||
|
|
||||||
|
print(f"\n [{stats['approach']}] {name}:")
|
||||||
|
print(f" Détection: {stats['detection_rate']*100:.0f}% ({int(stats['detection_rate']*N_ITERATIONS)}/{N_ITERATIONS})")
|
||||||
|
print(f" Temps moyen: {stats['avg_time_ms']:.0f}ms")
|
||||||
|
if stats.get("x_mean") is not None:
|
||||||
|
print(f" Position: ({stats['x_mean']:.3f}, {stats['y_mean']:.3f})")
|
||||||
|
print(f" Variance: X={stats['x_variance']:.4f} Y={stats['y_variance']:.4f}")
|
||||||
|
print(f" Stable: {'OUI' if stats['stable'] else 'NON'}")
|
||||||
|
|
||||||
|
assert stats["detection_rate"] >= 0.6, f"{name}: détection trop faible ({stats['detection_rate']})"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.visual
|
||||||
|
class TestBenchmarkZoom:
|
||||||
|
"""Benchmark de l'approche zoom progressif."""
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("shot,desc,name", _TARGETS)
|
||||||
|
def test_zoom_robustesse(self, shot, desc, name):
|
||||||
|
screenshot = _load_screenshot(shot)
|
||||||
|
stats = run_benchmark(grounding_zoom, "zoom", screenshot, desc, N_ITERATIONS)
|
||||||
|
|
||||||
|
print(f"\n [{stats['approach']}] {name}:")
|
||||||
|
print(f" Détection: {stats['detection_rate']*100:.0f}% ({int(stats['detection_rate']*N_ITERATIONS)}/{N_ITERATIONS})")
|
||||||
|
print(f" Temps moyen: {stats['avg_time_ms']:.0f}ms")
|
||||||
|
if stats.get("x_mean") is not None:
|
||||||
|
print(f" Position: ({stats['x_mean']:.3f}, {stats['y_mean']:.3f})")
|
||||||
|
print(f" Variance: X={stats['x_variance']:.4f} Y={stats['y_variance']:.4f}")
|
||||||
|
print(f" Stable: {'OUI' if stats['stable'] else 'NON'}")
|
||||||
|
|
||||||
|
assert stats["detection_rate"] >= 0.6, f"{name}: détection trop faible ({stats['detection_rate']})"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.visual
|
||||||
|
class TestBenchmarkCitrix:
|
||||||
|
"""Benchmark baseline sur images dégradées (simulation Citrix JPEG Q20)."""
|
||||||
|
|
||||||
|
def _degrade_citrix(self, screenshot_b64: str) -> str:
|
||||||
|
"""Simuler compression Citrix (JPEG qualité 20)."""
|
||||||
|
from PIL import Image
|
||||||
|
img_bytes = base64.b64decode(screenshot_b64)
|
||||||
|
img = Image.open(io.BytesIO(img_bytes))
|
||||||
|
buf = io.BytesIO()
|
||||||
|
img.save(buf, "JPEG", quality=20)
|
||||||
|
return base64.b64encode(buf.getvalue()).decode()
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("shot,desc,name", _TARGETS)
|
||||||
|
def test_citrix_robustesse(self, shot, desc, name):
|
||||||
|
screenshot = _load_screenshot(shot)
|
||||||
|
citrix = self._degrade_citrix(screenshot)
|
||||||
|
stats = run_benchmark(grounding_baseline, "citrix_q20", citrix, desc, N_ITERATIONS)
|
||||||
|
|
||||||
|
print(f"\n [{stats['approach']}] {name}:")
|
||||||
|
print(f" Détection: {stats['detection_rate']*100:.0f}%")
|
||||||
|
print(f" Temps moyen: {stats['avg_time_ms']:.0f}ms")
|
||||||
|
if stats.get("x_mean") is not None:
|
||||||
|
print(f" Position: ({stats['x_mean']:.3f}, {stats['y_mean']:.3f})")
|
||||||
|
print(f" Variance: X={stats['x_variance']:.4f} Y={stats['y_variance']:.4f}")
|
||||||
|
print(f" Stable: {'OUI' if stats['stable'] else 'NON'}")
|
||||||
|
|
||||||
|
# Citrix peut être moins fiable — seuil plus bas
|
||||||
|
assert stats["detection_rate"] >= 0.4, f"{name} Citrix: détection trop faible ({stats['detection_rate']})"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.visual
|
||||||
|
class TestRapportComparatif:
|
||||||
|
"""Génère un rapport comparatif des 3 approches."""
|
||||||
|
|
||||||
|
def test_rapport_complet(self):
|
||||||
|
"""Exécuter les 3 approches sur toutes les cibles et comparer."""
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
all_results = []
|
||||||
|
|
||||||
|
for shot, desc, name in _TARGETS:
|
||||||
|
screenshot = _load_screenshot(shot)
|
||||||
|
|
||||||
|
# Citrix
|
||||||
|
img_bytes = base64.b64decode(screenshot)
|
||||||
|
img = Image.open(io.BytesIO(img_bytes))
|
||||||
|
buf = io.BytesIO()
|
||||||
|
img.save(buf, "JPEG", quality=20)
|
||||||
|
citrix = base64.b64encode(buf.getvalue()).decode()
|
||||||
|
|
||||||
|
for approach_fn, approach_name, img_b64 in [
|
||||||
|
(grounding_baseline, "baseline", screenshot),
|
||||||
|
(grounding_zoom, "zoom", screenshot),
|
||||||
|
(grounding_baseline, "citrix_q20", citrix),
|
||||||
|
]:
|
||||||
|
stats = run_benchmark(approach_fn, approach_name, img_b64, desc, 3)
|
||||||
|
stats["target_name"] = name
|
||||||
|
all_results.append(stats)
|
||||||
|
|
||||||
|
# Rapport
|
||||||
|
print("\n" + "=" * 80)
|
||||||
|
print("RAPPORT COMPARATIF — GROUNDING BENCHMARK")
|
||||||
|
print("=" * 80)
|
||||||
|
print(f"{'Cible':<25s} {'Approche':<12s} {'Détect.':<8s} {'Temps':<8s} {'Position':<20s} {'Var X':<8s} {'Var Y':<8s} {'Stable'}")
|
||||||
|
print("-" * 80)
|
||||||
|
for r in all_results:
|
||||||
|
pos = f"({r.get('x_mean',0):.3f}, {r.get('y_mean',0):.3f})" if r.get('x_mean') is not None else "N/A"
|
||||||
|
var_x = f"{r.get('x_variance',0):.4f}" if r.get('x_variance') is not None else "N/A"
|
||||||
|
var_y = f"{r.get('y_variance',0):.4f}" if r.get('y_variance') is not None else "N/A"
|
||||||
|
stable = "OUI" if r.get('stable') else "NON"
|
||||||
|
print(f"{r['target_name']:<25s} {r['approach']:<12s} {r['detection_rate']*100:5.0f}% {r['avg_time_ms']:5.0f}ms {pos:<20s} {var_x:<8s} {var_y:<8s} {stable}")
|
||||||
|
print("=" * 80)
|
||||||
445
tests/visual/test_visual_grounding.py
Normal file
445
tests/visual/test_visual_grounding.py
Normal file
@@ -0,0 +1,445 @@
|
|||||||
|
"""
|
||||||
|
Tests visuels sur captures d'écran réelles — Grounding benchmark.
|
||||||
|
|
||||||
|
Vérifie que le système trouve les bons éléments UI sur des screenshots
|
||||||
|
Windows réels. Pas besoin de VM — juste les images et le serveur.
|
||||||
|
|
||||||
|
Chaque test :
|
||||||
|
1. Charge un screenshot réel (sessions enregistrées)
|
||||||
|
2. Demande au serveur de localiser un élément (via /resolve_target)
|
||||||
|
3. Vérifie que les coordonnées retournées sont dans la zone attendue
|
||||||
|
|
||||||
|
C'est l'apprentissage de l'environnement Windows :
|
||||||
|
- Rechercher un programme
|
||||||
|
- Fermer/réduire/agrandir une fenêtre
|
||||||
|
- Naviguer dans les onglets
|
||||||
|
- Utiliser les menus
|
||||||
|
"""
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional, Tuple
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
_ROOT = str(Path(__file__).resolve().parents[2])
|
||||||
|
if _ROOT not in sys.path:
|
||||||
|
sys.path.insert(0, _ROOT)
|
||||||
|
|
||||||
|
# Répertoire des screenshots de test
|
||||||
|
_SHOTS_DIR = Path(_ROOT) / "data/training/live_sessions/DESKTOP-ST3VBSD_windows/sess_20260404T135010_cec5c8/shots"
|
||||||
|
|
||||||
|
# Résolution des screenshots
|
||||||
|
_SCREEN_W = 1280
|
||||||
|
_SCREEN_H = 800
|
||||||
|
|
||||||
|
|
||||||
|
def _load_screenshot(name: str) -> Optional[str]:
|
||||||
|
"""Charger un screenshot en base64."""
|
||||||
|
path = _SHOTS_DIR / name
|
||||||
|
if not path.is_file():
|
||||||
|
pytest.skip(f"Screenshot {name} non disponible")
|
||||||
|
return base64.b64encode(path.read_bytes()).decode()
|
||||||
|
|
||||||
|
|
||||||
|
def _in_zone(x_pct: float, y_pct: float, zone: dict) -> bool:
|
||||||
|
"""Vérifier si un point est dans une zone attendue.
|
||||||
|
|
||||||
|
zone = {"x_min": 0.3, "x_max": 0.5, "y_min": 0.9, "y_max": 1.0}
|
||||||
|
"""
|
||||||
|
return (
|
||||||
|
zone["x_min"] <= x_pct <= zone["x_max"]
|
||||||
|
and zone["y_min"] <= y_pct <= zone["y_max"]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_via_server(
|
||||||
|
screenshot_b64: str,
|
||||||
|
target_spec: dict,
|
||||||
|
strict: bool = True,
|
||||||
|
) -> Optional[dict]:
|
||||||
|
"""Résoudre une cible visuellement via le VLM (qwen2.5vl grounding direct).
|
||||||
|
|
||||||
|
Appelle qwen2.5vl directement pour le grounding (bbox_2d).
|
||||||
|
Si le VLM ne trouve pas, essaie aussi via l'endpoint serveur.
|
||||||
|
"""
|
||||||
|
import requests
|
||||||
|
import re
|
||||||
|
|
||||||
|
# ── Stratégie 1 : Grounding VLM direct (qwen2.5vl) ──
|
||||||
|
by_text = target_spec.get("by_text", "")
|
||||||
|
vlm_desc = target_spec.get("vlm_description", "")
|
||||||
|
search_text = by_text or vlm_desc
|
||||||
|
|
||||||
|
if search_text:
|
||||||
|
try:
|
||||||
|
prompt = f"Detect the element '{search_text}' with a bounding box."
|
||||||
|
resp = requests.post(
|
||||||
|
"http://localhost:11434/api/chat",
|
||||||
|
json={
|
||||||
|
"model": "qwen2.5vl:7b",
|
||||||
|
"messages": [{"role": "user", "content": prompt, "images": [screenshot_b64]}],
|
||||||
|
"stream": False,
|
||||||
|
"options": {"temperature": 0.0, "num_predict": 100},
|
||||||
|
},
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
if resp.ok:
|
||||||
|
content = resp.json().get("message", {}).get("content", "")
|
||||||
|
# Parser bbox_2d — qwen2.5vl retourne des pixels relatifs à l'image,
|
||||||
|
# PAS une grille 1000x1000.
|
||||||
|
bbox_match = re.search(
|
||||||
|
r'"bbox_2d"\s*:\s*\[\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\]',
|
||||||
|
content,
|
||||||
|
)
|
||||||
|
if bbox_match:
|
||||||
|
x1, y1, x2, y2 = [int(bbox_match.group(i)) for i in range(1, 5)]
|
||||||
|
# Normaliser par les dimensions de l'image (pixels → 0-1)
|
||||||
|
cx = (x1 + x2) / 2 / _SCREEN_W
|
||||||
|
cy = (y1 + y2) / 2 / _SCREEN_H
|
||||||
|
if 0.0 <= cx <= 1.0 and 0.0 <= cy <= 1.0:
|
||||||
|
return {
|
||||||
|
"resolved": True,
|
||||||
|
"method": "vlm_grounding",
|
||||||
|
"x_pct": cx,
|
||||||
|
"y_pct": cy,
|
||||||
|
"score": 0.8,
|
||||||
|
"raw_bbox": [x1, y1, x2, y2],
|
||||||
|
}
|
||||||
|
except requests.Timeout:
|
||||||
|
pytest.skip("qwen2.5vl timeout — premier chargement ?")
|
||||||
|
except requests.ConnectionError:
|
||||||
|
pytest.skip("Ollama non disponible (localhost:11434)")
|
||||||
|
|
||||||
|
# ── Stratégie 2 : Endpoint serveur (fallback) ──
|
||||||
|
token = os.environ.get("RPA_API_TOKEN", "")
|
||||||
|
if not token:
|
||||||
|
env_file = Path(_ROOT) / ".env.local"
|
||||||
|
if env_file.is_file():
|
||||||
|
for line in env_file.read_text().splitlines():
|
||||||
|
if line.startswith("RPA_API_TOKEN="):
|
||||||
|
token = line.split("=", 1)[1].strip()
|
||||||
|
|
||||||
|
headers = {"Content-Type": "application/json"}
|
||||||
|
if token:
|
||||||
|
headers["Authorization"] = f"Bearer {token}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = requests.post(
|
||||||
|
"http://localhost:5005/api/v1/traces/stream/replay/resolve_target",
|
||||||
|
json={
|
||||||
|
"session_id": "visual_test",
|
||||||
|
"screenshot_b64": screenshot_b64,
|
||||||
|
"target_spec": target_spec,
|
||||||
|
"screen_width": _SCREEN_W,
|
||||||
|
"screen_height": _SCREEN_H,
|
||||||
|
"fallback_x_pct": 0.5,
|
||||||
|
"fallback_y_pct": 0.5,
|
||||||
|
"strict_mode": strict,
|
||||||
|
},
|
||||||
|
headers=headers,
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
if resp.ok:
|
||||||
|
data = resp.json()
|
||||||
|
if data.get("resolved"):
|
||||||
|
return data
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _assert_found_in_zone(result: dict, zone: dict, element_name: str):
|
||||||
|
"""Vérifier qu'un élément a été trouvé dans la zone attendue."""
|
||||||
|
assert result is not None, f"{element_name}: pas de réponse du serveur"
|
||||||
|
assert result.get("resolved"), (
|
||||||
|
f"{element_name}: non trouvé (reason={result.get('reason', '?')})"
|
||||||
|
)
|
||||||
|
x = result.get("x_pct", 0)
|
||||||
|
y = result.get("y_pct", 0)
|
||||||
|
assert _in_zone(x, y, zone), (
|
||||||
|
f"{element_name}: trouvé à ({x:.3f}, {y:.3f}) "
|
||||||
|
f"mais attendu dans zone x=[{zone['x_min']:.2f}-{zone['x_max']:.2f}] "
|
||||||
|
f"y=[{zone['y_min']:.2f}-{zone['y_max']:.2f}]"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# shot_0001 : Explorateur de fichiers Windows
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.visual
|
||||||
|
class TestExplorateurFichiers:
|
||||||
|
"""Tests sur l'Explorateur de fichiers Windows (shot_0001)."""
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def screenshot(self):
|
||||||
|
return _load_screenshot("shot_0001_full.png")
|
||||||
|
|
||||||
|
def test_trouver_rechercher_taskbar(self, screenshot):
|
||||||
|
"""Trouver 'Rechercher' dans la barre des tâches."""
|
||||||
|
result = _resolve_via_server(screenshot, {
|
||||||
|
"by_text": "Rechercher",
|
||||||
|
"vlm_description": "La barre de recherche Windows dans la barre des tâches, en bas de l'écran",
|
||||||
|
})
|
||||||
|
_assert_found_in_zone(result, {
|
||||||
|
"x_min": 0.20, "x_max": 0.50,
|
||||||
|
"y_min": 0.90, "y_max": 1.00,
|
||||||
|
}, "Rechercher (taskbar)")
|
||||||
|
|
||||||
|
def test_trouver_bouton_fermer_explorateur(self, screenshot):
|
||||||
|
"""Trouver le bouton X (fermer) de l'Explorateur."""
|
||||||
|
result = _resolve_via_server(screenshot, {
|
||||||
|
"by_text": "",
|
||||||
|
"vlm_description": "Le bouton fermer (X) de la fenêtre Explorateur de fichiers, en haut à droite",
|
||||||
|
})
|
||||||
|
_assert_found_in_zone(result, {
|
||||||
|
"x_min": 0.90, "x_max": 1.00,
|
||||||
|
"y_min": 0.00, "y_max": 0.05,
|
||||||
|
}, "Bouton fermer (X)")
|
||||||
|
|
||||||
|
def test_trouver_bouton_reduire(self, screenshot):
|
||||||
|
"""Trouver le bouton réduire (-) de l'Explorateur."""
|
||||||
|
result = _resolve_via_server(screenshot, {
|
||||||
|
"by_text": "",
|
||||||
|
"vlm_description": "Le bouton réduire (minimize, -) de la fenêtre, en haut à droite à gauche du X",
|
||||||
|
})
|
||||||
|
_assert_found_in_zone(result, {
|
||||||
|
"x_min": 0.85, "x_max": 0.95,
|
||||||
|
"y_min": 0.00, "y_max": 0.05,
|
||||||
|
}, "Bouton réduire (-)")
|
||||||
|
|
||||||
|
def test_trouver_dossier_agent_v1(self, screenshot):
|
||||||
|
"""Trouver le dossier 'agent_v1' dans la liste des fichiers."""
|
||||||
|
result = _resolve_via_server(screenshot, {
|
||||||
|
"by_text": "agent_v1",
|
||||||
|
"vlm_description": "Le dossier agent_v1 dans la liste des fichiers de l'Explorateur",
|
||||||
|
})
|
||||||
|
_assert_found_in_zone(result, {
|
||||||
|
"x_min": 0.05, "x_max": 0.50,
|
||||||
|
"y_min": 0.10, "y_max": 0.30,
|
||||||
|
}, "Dossier agent_v1")
|
||||||
|
|
||||||
|
def test_trouver_bouton_demarrer(self, screenshot):
|
||||||
|
"""Trouver le bouton Démarrer (Windows) dans la barre des tâches."""
|
||||||
|
result = _resolve_via_server(screenshot, {
|
||||||
|
"by_text": "",
|
||||||
|
"vlm_description": "Le bouton Démarrer (logo Windows) dans la barre des tâches, en bas",
|
||||||
|
})
|
||||||
|
_assert_found_in_zone(result, {
|
||||||
|
"x_min": 0.18, "x_max": 0.30,
|
||||||
|
"y_min": 0.90, "y_max": 1.00,
|
||||||
|
}, "Bouton Démarrer")
|
||||||
|
|
||||||
|
def test_trouver_ce_pc(self, screenshot):
|
||||||
|
"""Trouver 'Ce PC' dans le panneau latéral de l'Explorateur."""
|
||||||
|
result = _resolve_via_server(screenshot, {
|
||||||
|
"by_text": "Ce PC",
|
||||||
|
"vlm_description": "L'élément 'Ce PC' dans le panneau de navigation gauche de l'Explorateur",
|
||||||
|
})
|
||||||
|
_assert_found_in_zone(result, {
|
||||||
|
"x_min": 0.00, "x_max": 0.12,
|
||||||
|
"y_min": 0.40, "y_max": 0.55,
|
||||||
|
}, "Ce PC")
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# shot_0004 : Bloc-notes avec onglets + Explorateur derrière
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.visual
|
||||||
|
class TestBlocNotesOnglets:
|
||||||
|
"""Tests sur le Bloc-notes avec plusieurs onglets (shot_0004)."""
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def screenshot(self):
|
||||||
|
return _load_screenshot("shot_0004_full.png")
|
||||||
|
|
||||||
|
def test_trouver_menu_fichier(self, screenshot):
|
||||||
|
"""Trouver le menu 'Fichier' du Bloc-notes."""
|
||||||
|
result = _resolve_via_server(screenshot, {
|
||||||
|
"by_text": "Fichier",
|
||||||
|
"vlm_description": "Le menu Fichier dans la barre de menus du Bloc-notes",
|
||||||
|
})
|
||||||
|
_assert_found_in_zone(result, {
|
||||||
|
"x_min": 0.02, "x_max": 0.10,
|
||||||
|
"y_min": 0.08, "y_max": 0.15,
|
||||||
|
}, "Menu Fichier")
|
||||||
|
|
||||||
|
def test_trouver_onglet_ceci_est_un_test(self, screenshot):
|
||||||
|
"""Trouver l'onglet 'Ceci est un test.txt' dans le Bloc-notes."""
|
||||||
|
result = _resolve_via_server(screenshot, {
|
||||||
|
"by_text": "Ceci est un test",
|
||||||
|
"vlm_description": "L'onglet 'Ceci est un test.txt' dans le Bloc-notes",
|
||||||
|
})
|
||||||
|
_assert_found_in_zone(result, {
|
||||||
|
"x_min": 0.40, "x_max": 0.70,
|
||||||
|
"y_min": 0.03, "y_max": 0.10,
|
||||||
|
}, "Onglet 'Ceci est un test.txt'")
|
||||||
|
|
||||||
|
def test_trouver_nouvel_onglet_plus(self, screenshot):
|
||||||
|
"""Trouver le bouton '+' pour ajouter un nouvel onglet."""
|
||||||
|
result = _resolve_via_server(screenshot, {
|
||||||
|
"by_text": "",
|
||||||
|
"vlm_description": "Le bouton + (plus) pour ajouter un nouvel onglet dans le Bloc-notes, à droite des onglets",
|
||||||
|
})
|
||||||
|
_assert_found_in_zone(result, {
|
||||||
|
"x_min": 0.55, "x_max": 0.70,
|
||||||
|
"y_min": 0.03, "y_max": 0.10,
|
||||||
|
}, "Bouton + (nouvel onglet)")
|
||||||
|
|
||||||
|
def test_trouver_bouton_fermer_onglet(self, screenshot):
|
||||||
|
"""Trouver le X de fermeture de l'onglet actif."""
|
||||||
|
result = _resolve_via_server(screenshot, {
|
||||||
|
"by_text": "",
|
||||||
|
"vlm_description": "Le bouton X pour fermer l'onglet actif 'Ceci est un test.txt' dans le Bloc-notes",
|
||||||
|
})
|
||||||
|
_assert_found_in_zone(result, {
|
||||||
|
"x_min": 0.50, "x_max": 0.65,
|
||||||
|
"y_min": 0.03, "y_max": 0.10,
|
||||||
|
}, "Fermer onglet (X)")
|
||||||
|
|
||||||
|
def test_trouver_menu_modifier(self, screenshot):
|
||||||
|
"""Trouver le menu 'Modifier' du Bloc-notes."""
|
||||||
|
result = _resolve_via_server(screenshot, {
|
||||||
|
"by_text": "Modifier",
|
||||||
|
"vlm_description": "Le menu Modifier dans la barre de menus du Bloc-notes",
|
||||||
|
})
|
||||||
|
_assert_found_in_zone(result, {
|
||||||
|
"x_min": 0.07, "x_max": 0.16,
|
||||||
|
"y_min": 0.08, "y_max": 0.15,
|
||||||
|
}, "Menu Modifier")
|
||||||
|
|
||||||
|
def test_trouver_encodage_utf8(self, screenshot):
|
||||||
|
"""Trouver l'indicateur d'encodage UTF-8 dans la barre de statut."""
|
||||||
|
result = _resolve_via_server(screenshot, {
|
||||||
|
"by_text": "UTF-8",
|
||||||
|
"vlm_description": "L'indicateur d'encodage UTF-8 dans la barre de statut en bas du Bloc-notes",
|
||||||
|
})
|
||||||
|
_assert_found_in_zone(result, {
|
||||||
|
"x_min": 0.60, "x_max": 0.80,
|
||||||
|
"y_min": 0.90, "y_max": 1.00,
|
||||||
|
}, "UTF-8 (barre de statut)")
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# shot_0014 : Google Chrome page d'accueil
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.visual
|
||||||
|
class TestGoogleChrome:
|
||||||
|
"""Tests sur Google Chrome avec page d'accueil (shot_0014)."""
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def screenshot(self):
|
||||||
|
return _load_screenshot("shot_0014_full.png")
|
||||||
|
|
||||||
|
def test_trouver_barre_recherche_google(self, screenshot):
|
||||||
|
"""Trouver la barre de recherche Google au centre."""
|
||||||
|
result = _resolve_via_server(screenshot, {
|
||||||
|
"by_text": "Rechercher sur Google",
|
||||||
|
"vlm_description": "La barre de recherche Google au centre de la page d'accueil",
|
||||||
|
})
|
||||||
|
_assert_found_in_zone(result, {
|
||||||
|
"x_min": 0.10, "x_max": 0.60,
|
||||||
|
"y_min": 0.30, "y_max": 0.50,
|
||||||
|
}, "Barre recherche Google")
|
||||||
|
|
||||||
|
def test_trouver_barre_adresse_chrome(self, screenshot):
|
||||||
|
"""Trouver la barre d'adresse de Chrome en haut."""
|
||||||
|
result = _resolve_via_server(screenshot, {
|
||||||
|
"by_text": "",
|
||||||
|
"vlm_description": "La barre d'adresse URL de Google Chrome, en haut du navigateur",
|
||||||
|
})
|
||||||
|
_assert_found_in_zone(result, {
|
||||||
|
"x_min": 0.10, "x_max": 0.60,
|
||||||
|
"y_min": 0.05, "y_max": 0.15,
|
||||||
|
}, "Barre d'adresse Chrome")
|
||||||
|
|
||||||
|
def test_trouver_nouvel_onglet_chrome(self, screenshot):
|
||||||
|
"""Trouver le bouton '+' pour un nouvel onglet Chrome."""
|
||||||
|
result = _resolve_via_server(screenshot, {
|
||||||
|
"by_text": "",
|
||||||
|
"vlm_description": "Le bouton + pour ouvrir un nouvel onglet dans Google Chrome",
|
||||||
|
})
|
||||||
|
_assert_found_in_zone(result, {
|
||||||
|
"x_min": 0.15, "x_max": 0.25,
|
||||||
|
"y_min": 0.00, "y_max": 0.06,
|
||||||
|
}, "Nouvel onglet (+) Chrome")
|
||||||
|
|
||||||
|
def test_trouver_fermer_chrome(self, screenshot):
|
||||||
|
"""Trouver le bouton X pour fermer Chrome."""
|
||||||
|
result = _resolve_via_server(screenshot, {
|
||||||
|
"by_text": "",
|
||||||
|
"vlm_description": "Le bouton fermer (X) de la fenêtre Google Chrome, en haut à droite",
|
||||||
|
})
|
||||||
|
_assert_found_in_zone(result, {
|
||||||
|
"x_min": 0.90, "x_max": 1.00,
|
||||||
|
"y_min": 0.00, "y_max": 0.06,
|
||||||
|
}, "Fermer Chrome (X)")
|
||||||
|
|
||||||
|
def test_trouver_gmail(self, screenshot):
|
||||||
|
"""Trouver le lien Gmail sur la page d'accueil Google."""
|
||||||
|
result = _resolve_via_server(screenshot, {
|
||||||
|
"by_text": "Gmail",
|
||||||
|
"vlm_description": "Le lien Gmail en haut à droite de la page Google",
|
||||||
|
})
|
||||||
|
_assert_found_in_zone(result, {
|
||||||
|
"x_min": 0.50, "x_max": 0.80,
|
||||||
|
"y_min": 0.10, "y_max": 0.20,
|
||||||
|
}, "Gmail")
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tests transversaux (connaissances de base Windows)
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.visual
|
||||||
|
class TestConnaissancesWindowsBase:
|
||||||
|
"""Connaissances de base Windows que tout utilisateur connaît."""
|
||||||
|
|
||||||
|
def test_rechercher_programme_depuis_explorateur(self):
|
||||||
|
"""Depuis l'Explorateur, trouver la barre de recherche Windows."""
|
||||||
|
screenshot = _load_screenshot("shot_0001_full.png")
|
||||||
|
result = _resolve_via_server(screenshot, {
|
||||||
|
"by_text": "Rechercher",
|
||||||
|
"vlm_description": "La barre de recherche dans la barre des tâches Windows en bas de l'écran",
|
||||||
|
})
|
||||||
|
assert result and result.get("resolved"), "Rechercher non trouvé"
|
||||||
|
|
||||||
|
def test_fermer_programme_depuis_blocnotes(self):
|
||||||
|
"""Depuis le Bloc-notes, trouver le bouton fermer."""
|
||||||
|
screenshot = _load_screenshot("shot_0004_full.png")
|
||||||
|
result = _resolve_via_server(screenshot, {
|
||||||
|
"by_text": "",
|
||||||
|
"vlm_description": "Le bouton X pour fermer la fenêtre du Bloc-notes, en haut à droite",
|
||||||
|
})
|
||||||
|
assert result and result.get("resolved"), "Bouton fermer non trouvé"
|
||||||
|
|
||||||
|
def test_ajouter_onglet_blocnotes(self):
|
||||||
|
"""Ajouter un nouvel onglet dans le Bloc-notes."""
|
||||||
|
screenshot = _load_screenshot("shot_0004_full.png")
|
||||||
|
result = _resolve_via_server(screenshot, {
|
||||||
|
"by_text": "",
|
||||||
|
"vlm_description": "Le bouton + pour ajouter un nouvel onglet dans le Bloc-notes",
|
||||||
|
})
|
||||||
|
assert result and result.get("resolved"), "Bouton + non trouvé"
|
||||||
|
|
||||||
|
def test_rechercher_sur_google(self):
|
||||||
|
"""Taper dans la barre de recherche Google."""
|
||||||
|
screenshot = _load_screenshot("shot_0014_full.png")
|
||||||
|
result = _resolve_via_server(screenshot, {
|
||||||
|
"by_text": "Rechercher sur Google",
|
||||||
|
"vlm_description": "Le champ de recherche Google",
|
||||||
|
})
|
||||||
|
assert result and result.get("resolved"), "Recherche Google non trouvée"
|
||||||
864
tests/visual/test_visual_robustness.py
Normal file
864
tests/visual/test_visual_robustness.py
Normal file
@@ -0,0 +1,864 @@
|
|||||||
|
"""
|
||||||
|
Tests de robustesse visuelle — Grounding VLM qwen2.5vl:7b.
|
||||||
|
|
||||||
|
Objectifs :
|
||||||
|
1. Reproductibilité : même screenshot + même cible → même résultat 10 fois
|
||||||
|
2. Robustesse Citrix : screenshots compressés JPEG qualité 15-25 → ça marche
|
||||||
|
3. Mesure de variance : coordonnées stables à < 5% de l'écran
|
||||||
|
|
||||||
|
Architecture des coordonnées qwen2.5vl :
|
||||||
|
- Format bbox_2d : [x1, y1, x2, y2] en pixels relatifs à l'image envoyée
|
||||||
|
- Pour une image 1280x800, X va de 0 à 1280 et Y de 0 à 800
|
||||||
|
- Normalisation : diviser par les dimensions de l'image (pas par 1000)
|
||||||
|
|
||||||
|
Calibration mesurée (5 avril 2026) sur screenshots 1280x800 :
|
||||||
|
- shot_0001/Rechercher (taskbar) : cx=0.458, cy=0.789
|
||||||
|
- shot_0001/agent_v1 (dossier) : cx=0.247, cy=0.201
|
||||||
|
- shot_0004/Fichier (menu) : cx=0.095, cy=0.086
|
||||||
|
- shot_0004/Modifier (menu) : cx=0.142, cy=0.085
|
||||||
|
- shot_0004/Ceci est un test.txt (onglet): cx=0.694, cy=0.053
|
||||||
|
- shot_0004/Close X (Bloc-notes) : cx=0.990, cy=0.041
|
||||||
|
- shot_0014/Google search (centre) : cx=0.539, cy=0.389
|
||||||
|
- shot_0014/Gmail (haut-droite) : cx=0.913, cy=0.130
|
||||||
|
"""
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import statistics
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
_ROOT = str(Path(__file__).resolve().parents[2])
|
||||||
|
if _ROOT not in sys.path:
|
||||||
|
sys.path.insert(0, _ROOT)
|
||||||
|
|
||||||
|
# Répertoire des screenshots de test
|
||||||
|
_SHOTS_DIR = (
|
||||||
|
Path(_ROOT)
|
||||||
|
/ "data/training/live_sessions/DESKTOP-ST3VBSD_windows"
|
||||||
|
/ "sess_20260404T135010_cec5c8/shots"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Résolution des screenshots
|
||||||
|
_SCREEN_W = 1280
|
||||||
|
_SCREEN_H = 800
|
||||||
|
|
||||||
|
# Nombre de répétitions pour les tests de reproductibilité
|
||||||
|
_N_REPEATS = 10
|
||||||
|
|
||||||
|
# Tolérance de variance maximale (en fraction de l'écran, 0.05 = 5%)
|
||||||
|
_MAX_VARIANCE = 0.05
|
||||||
|
|
||||||
|
# Taux de détection minimal (X sur _N_REPEATS)
|
||||||
|
_MIN_DETECTION_RATE = 8
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Utilitaires
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def _load_screenshot(name: str) -> Optional[str]:
|
||||||
|
"""Charger un screenshot en base64."""
|
||||||
|
path = _SHOTS_DIR / name
|
||||||
|
if not path.is_file():
|
||||||
|
pytest.skip(f"Screenshot {name} non disponible")
|
||||||
|
return base64.b64encode(path.read_bytes()).decode()
|
||||||
|
|
||||||
|
|
||||||
|
def _degrade_citrix(screenshot_b64: str, quality: int = 20) -> str:
|
||||||
|
"""Simuler compression Citrix : JPEG qualité basse puis retour PNG b64."""
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
raw = base64.b64decode(screenshot_b64)
|
||||||
|
img = Image.open(io.BytesIO(raw))
|
||||||
|
|
||||||
|
# Compression JPEG qualité basse (simulation Citrix)
|
||||||
|
buf_jpg = io.BytesIO()
|
||||||
|
img.save(buf_jpg, "JPEG", quality=quality)
|
||||||
|
buf_jpg.seek(0)
|
||||||
|
citrix_img = Image.open(buf_jpg)
|
||||||
|
|
||||||
|
# Re-encoder en PNG pour l'envoi au VLM
|
||||||
|
buf_png = io.BytesIO()
|
||||||
|
citrix_img.save(buf_png, "PNG")
|
||||||
|
return base64.b64encode(buf_png.getvalue()).decode()
|
||||||
|
|
||||||
|
|
||||||
|
def _grounding_vlm(
|
||||||
|
screenshot_b64: str,
|
||||||
|
element_description: str,
|
||||||
|
timeout: int = 60,
|
||||||
|
) -> Tuple[Optional[float], Optional[float], Optional[List[int]], str]:
|
||||||
|
"""Appeler qwen2.5vl pour localiser un élément.
|
||||||
|
|
||||||
|
Retourne (cx, cy, [x1,y1,x2,y2], raw_content).
|
||||||
|
cx et cy sont les centres normalisés sur la grille 1000.
|
||||||
|
"""
|
||||||
|
import requests
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = requests.post(
|
||||||
|
"http://localhost:11434/api/chat",
|
||||||
|
json={
|
||||||
|
"model": "qwen2.5vl:7b",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": (
|
||||||
|
f"Detect the element '{element_description}' "
|
||||||
|
f"with a bounding box."
|
||||||
|
),
|
||||||
|
"images": [screenshot_b64],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"stream": False,
|
||||||
|
"options": {"temperature": 0.1, "num_predict": 100},
|
||||||
|
},
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
except requests.ConnectionError:
|
||||||
|
pytest.skip("Ollama non disponible (localhost:11434)")
|
||||||
|
except requests.Timeout:
|
||||||
|
pytest.skip("qwen2.5vl timeout — modèle en cours de chargement ?")
|
||||||
|
|
||||||
|
content = resp.json().get("message", {}).get("content", "")
|
||||||
|
|
||||||
|
# Parser bbox_2d depuis la réponse JSON
|
||||||
|
# qwen2.5vl retourne des coordonnées en pixels relatifs à l'image envoyée,
|
||||||
|
# PAS sur une grille 1000x1000.
|
||||||
|
bbox_match = re.search(
|
||||||
|
r'"bbox_2d"\s*:\s*\[(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\]',
|
||||||
|
content,
|
||||||
|
)
|
||||||
|
if bbox_match:
|
||||||
|
x1, y1, x2, y2 = [int(bbox_match.group(i)) for i in range(1, 5)]
|
||||||
|
# Normaliser par les dimensions de l'image (pixels → 0-1)
|
||||||
|
cx = (x1 + x2) / 2 / _SCREEN_W
|
||||||
|
cy = (y1 + y2) / 2 / _SCREEN_H
|
||||||
|
return cx, cy, [x1, y1, x2, y2], content
|
||||||
|
|
||||||
|
return None, None, None, content
|
||||||
|
|
||||||
|
|
||||||
|
def _run_n_times(
|
||||||
|
screenshot_b64: str,
|
||||||
|
description: str,
|
||||||
|
n: int = _N_REPEATS,
|
||||||
|
delay: float = 0.2,
|
||||||
|
) -> List[Dict]:
|
||||||
|
"""Exécuter le grounding N fois et collecter les résultats."""
|
||||||
|
results = []
|
||||||
|
for i in range(n):
|
||||||
|
cx, cy, bbox, raw = _grounding_vlm(screenshot_b64, description)
|
||||||
|
results.append({
|
||||||
|
"run": i + 1,
|
||||||
|
"cx": cx,
|
||||||
|
"cy": cy,
|
||||||
|
"bbox": bbox,
|
||||||
|
"detected": cx is not None,
|
||||||
|
"raw": raw,
|
||||||
|
})
|
||||||
|
if i < n - 1:
|
||||||
|
time.sleep(delay)
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def _compute_stats(results: List[Dict]) -> Dict:
|
||||||
|
"""Calculer les statistiques de détection et de variance."""
|
||||||
|
detected = [r for r in results if r["detected"]]
|
||||||
|
n_total = len(results)
|
||||||
|
n_detected = len(detected)
|
||||||
|
|
||||||
|
stats = {
|
||||||
|
"total": n_total,
|
||||||
|
"detected": n_detected,
|
||||||
|
"rate": n_detected / n_total if n_total > 0 else 0,
|
||||||
|
"rate_str": f"{n_detected}/{n_total}",
|
||||||
|
}
|
||||||
|
|
||||||
|
if n_detected >= 2:
|
||||||
|
xs = [r["cx"] for r in detected]
|
||||||
|
ys = [r["cy"] for r in detected]
|
||||||
|
stats.update({
|
||||||
|
"x_min": min(xs),
|
||||||
|
"x_max": max(xs),
|
||||||
|
"x_mean": statistics.mean(xs),
|
||||||
|
"x_range": max(xs) - min(xs),
|
||||||
|
"x_stdev": statistics.stdev(xs) if n_detected >= 2 else 0,
|
||||||
|
"y_min": min(ys),
|
||||||
|
"y_max": max(ys),
|
||||||
|
"y_mean": statistics.mean(ys),
|
||||||
|
"y_range": max(ys) - min(ys),
|
||||||
|
"y_stdev": statistics.stdev(ys) if n_detected >= 2 else 0,
|
||||||
|
})
|
||||||
|
elif n_detected == 1:
|
||||||
|
stats.update({
|
||||||
|
"x_min": detected[0]["cx"],
|
||||||
|
"x_max": detected[0]["cx"],
|
||||||
|
"x_mean": detected[0]["cx"],
|
||||||
|
"x_range": 0,
|
||||||
|
"x_stdev": 0,
|
||||||
|
"y_min": detected[0]["cy"],
|
||||||
|
"y_max": detected[0]["cy"],
|
||||||
|
"y_mean": detected[0]["cy"],
|
||||||
|
"y_range": 0,
|
||||||
|
"y_stdev": 0,
|
||||||
|
})
|
||||||
|
|
||||||
|
return stats
|
||||||
|
|
||||||
|
|
||||||
|
def _assert_reproducible(
|
||||||
|
stats: Dict,
|
||||||
|
element_name: str,
|
||||||
|
min_rate: int = _MIN_DETECTION_RATE,
|
||||||
|
max_var: float = _MAX_VARIANCE,
|
||||||
|
):
|
||||||
|
"""Vérifier la reproductibilité : taux de détection + variance faible."""
|
||||||
|
assert stats["detected"] >= min_rate, (
|
||||||
|
f"{element_name}: seulement {stats['rate_str']} détections "
|
||||||
|
f"(minimum requis: {min_rate}/{stats['total']})"
|
||||||
|
)
|
||||||
|
|
||||||
|
if stats["detected"] >= 2:
|
||||||
|
assert stats["x_range"] < max_var, (
|
||||||
|
f"{element_name}: variance X trop élevée: "
|
||||||
|
f"{stats['x_range']:.4f} (max={max_var})"
|
||||||
|
)
|
||||||
|
assert stats["y_range"] < max_var, (
|
||||||
|
f"{element_name}: variance Y trop élevée: "
|
||||||
|
f"{stats['y_range']:.4f} (max={max_var})"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _assert_in_zone(
|
||||||
|
stats: Dict,
|
||||||
|
zone: Dict[str, float],
|
||||||
|
element_name: str,
|
||||||
|
):
|
||||||
|
"""Vérifier que la position moyenne est dans la zone attendue."""
|
||||||
|
assert stats["detected"] >= 1, f"{element_name}: aucune détection"
|
||||||
|
cx = stats["x_mean"]
|
||||||
|
cy = stats["y_mean"]
|
||||||
|
assert zone["x_min"] <= cx <= zone["x_max"], (
|
||||||
|
f"{element_name}: X moyen {cx:.4f} hors zone "
|
||||||
|
f"[{zone['x_min']:.2f}-{zone['x_max']:.2f}]"
|
||||||
|
)
|
||||||
|
assert zone["y_min"] <= cy <= zone["y_max"], (
|
||||||
|
f"{element_name}: Y moyen {cy:.4f} hors zone "
|
||||||
|
f"[{zone['y_min']:.2f}-{zone['y_max']:.2f}]"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Zones calibrées (mesurées le 5 avril 2026)
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
CALIBRATED_ZONES = {
|
||||||
|
# shot_0001 — Explorateur de fichiers Windows
|
||||||
|
"rechercher_taskbar": {
|
||||||
|
"x_min": 0.40, "x_max": 0.60,
|
||||||
|
"y_min": 0.74, "y_max": 0.84,
|
||||||
|
},
|
||||||
|
"agent_v1_folder": {
|
||||||
|
"x_min": 0.18, "x_max": 0.30,
|
||||||
|
"y_min": 0.16, "y_max": 0.26,
|
||||||
|
},
|
||||||
|
# shot_0004 — Bloc-notes avec onglets
|
||||||
|
"fichier_menu": {
|
||||||
|
"x_min": 0.06, "x_max": 0.13,
|
||||||
|
"y_min": 0.06, "y_max": 0.12,
|
||||||
|
},
|
||||||
|
"modifier_menu": {
|
||||||
|
"x_min": 0.11, "x_max": 0.18,
|
||||||
|
"y_min": 0.06, "y_max": 0.12,
|
||||||
|
},
|
||||||
|
"ceci_est_un_test_tab": {
|
||||||
|
"x_min": 0.65, "x_max": 0.75,
|
||||||
|
"y_min": 0.03, "y_max": 0.08,
|
||||||
|
},
|
||||||
|
"close_x_notepad": {
|
||||||
|
"x_min": 0.95, "x_max": 1.02,
|
||||||
|
"y_min": 0.02, "y_max": 0.06,
|
||||||
|
},
|
||||||
|
# shot_0014 — Google Chrome
|
||||||
|
"google_search_bar": {
|
||||||
|
"x_min": 0.48, "x_max": 0.60,
|
||||||
|
"y_min": 0.35, "y_max": 0.43,
|
||||||
|
},
|
||||||
|
"gmail_link": {
|
||||||
|
"x_min": 0.87, "x_max": 0.95,
|
||||||
|
"y_min": 0.10, "y_max": 0.16,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tests de reproductibilité — 10 appels consécutifs
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.visual
|
||||||
|
class TestReproductibilite:
|
||||||
|
"""Chaque test appelle le VLM 10 fois et vérifie la cohérence.
|
||||||
|
|
||||||
|
Critères de réussite :
|
||||||
|
- Au moins 8/10 détections
|
||||||
|
- Variance des coordonnées < 5% de l'écran sur chaque axe
|
||||||
|
- Position moyenne dans la zone calibrée
|
||||||
|
"""
|
||||||
|
|
||||||
|
# -- shot_0001 : Explorateur de fichiers --
|
||||||
|
|
||||||
|
@pytest.fixture(scope="class")
|
||||||
|
def shot_0001(self):
|
||||||
|
return _load_screenshot("shot_0001_full.png")
|
||||||
|
|
||||||
|
def test_rechercher_10_fois(self, shot_0001):
|
||||||
|
"""Le VLM trouve 'Rechercher' au même endroit 10 fois de suite."""
|
||||||
|
results = _run_n_times(
|
||||||
|
shot_0001,
|
||||||
|
"the 'Rechercher' search text in the Windows taskbar at the bottom",
|
||||||
|
)
|
||||||
|
stats = _compute_stats(results)
|
||||||
|
_assert_reproducible(stats, "Rechercher (taskbar)")
|
||||||
|
_assert_in_zone(stats, CALIBRATED_ZONES["rechercher_taskbar"], "Rechercher")
|
||||||
|
# Afficher le résumé pour le rapport
|
||||||
|
print(f"\n [Rechercher] {stats['rate_str']} détections, "
|
||||||
|
f"X=[{stats.get('x_min', 0):.4f}-{stats.get('x_max', 0):.4f}], "
|
||||||
|
f"Y=[{stats.get('y_min', 0):.4f}-{stats.get('y_max', 0):.4f}]")
|
||||||
|
|
||||||
|
def test_agent_v1_10_fois(self, shot_0001):
|
||||||
|
"""Le VLM trouve le dossier 'agent_v1' au même endroit 10 fois."""
|
||||||
|
results = _run_n_times(
|
||||||
|
shot_0001,
|
||||||
|
"the folder named 'agent_v1' in the file list",
|
||||||
|
)
|
||||||
|
stats = _compute_stats(results)
|
||||||
|
_assert_reproducible(stats, "agent_v1 (dossier)")
|
||||||
|
_assert_in_zone(stats, CALIBRATED_ZONES["agent_v1_folder"], "agent_v1")
|
||||||
|
print(f"\n [agent_v1] {stats['rate_str']} détections, "
|
||||||
|
f"X=[{stats.get('x_min', 0):.4f}-{stats.get('x_max', 0):.4f}], "
|
||||||
|
f"Y=[{stats.get('y_min', 0):.4f}-{stats.get('y_max', 0):.4f}]")
|
||||||
|
|
||||||
|
def test_close_x_explorateur_10_fois(self, shot_0001):
|
||||||
|
"""Le bouton X de la fenêtre maximisée : overflow X attendu.
|
||||||
|
|
||||||
|
Ce test vérifie que le VLM détecte bien le bouton X de façon cohérente.
|
||||||
|
Sur les fenêtres maximisées (1280px de large), les coordonnées X
|
||||||
|
dépassent la grille 1000 normalisée (cx > 1.0).
|
||||||
|
|
||||||
|
Note : le VLM peut parfois confondre le bouton X de la fenêtre avec
|
||||||
|
celui de l'onglet (ambiguïté multiple close buttons). On vérifie
|
||||||
|
que la majorité des détections ciblent le bon bouton.
|
||||||
|
"""
|
||||||
|
results = _run_n_times(
|
||||||
|
shot_0001,
|
||||||
|
"the X close button of the 'Lea' window",
|
||||||
|
)
|
||||||
|
# Vérifier que le VLM détecte bien quelque chose
|
||||||
|
detected = [r for r in results if r["detected"]]
|
||||||
|
assert len(detected) >= _MIN_DETECTION_RATE, (
|
||||||
|
f"Close X: seulement {len(detected)}/{len(results)} détections"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Classer les détections : overflow (bouton fenêtre) vs non-overflow (bouton onglet)
|
||||||
|
overflows = [r for r in detected if r["cx"] > 1.0]
|
||||||
|
non_overflows = [r for r in detected if r["cx"] <= 1.0]
|
||||||
|
|
||||||
|
# Au moins 60% des détections doivent viser le bouton fenêtre (overflow)
|
||||||
|
assert len(overflows) >= len(detected) * 0.6, (
|
||||||
|
f"Close X: seulement {len(overflows)}/{len(detected)} en overflow. "
|
||||||
|
f"Ambiguïté avec bouton onglet ({len(non_overflows)} non-overflow)."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Vérifier la cohérence des détections overflow (le cluster principal)
|
||||||
|
if len(overflows) >= 2:
|
||||||
|
bboxes = [r["bbox"] for r in overflows]
|
||||||
|
x1s = [b[0] for b in bboxes]
|
||||||
|
y1s = [b[1] for b in bboxes]
|
||||||
|
assert max(x1s) - min(x1s) < 20, (
|
||||||
|
f"Close X overflow: x1 trop variable: {min(x1s)}-{max(x1s)}"
|
||||||
|
)
|
||||||
|
assert max(y1s) - min(y1s) < 20, (
|
||||||
|
f"Close X overflow: y1 trop variable: {min(y1s)}-{max(y1s)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"\n [Close X Explorer] {len(detected)}/{len(results)} détections, "
|
||||||
|
f"{len(overflows)} overflow (fenêtre), {len(non_overflows)} non-overflow (onglet). "
|
||||||
|
f"cx_mean_overflow={statistics.mean([r['cx'] for r in overflows]):.4f}" if overflows else "")
|
||||||
|
|
||||||
|
# -- shot_0004 : Bloc-notes --
|
||||||
|
|
||||||
|
@pytest.fixture(scope="class")
|
||||||
|
def shot_0004(self):
|
||||||
|
return _load_screenshot("shot_0004_full.png")
|
||||||
|
|
||||||
|
def test_fichier_10_fois(self, shot_0004):
|
||||||
|
"""Le VLM trouve le menu 'Fichier' au même endroit 10 fois."""
|
||||||
|
results = _run_n_times(
|
||||||
|
shot_0004,
|
||||||
|
"the 'Fichier' menu item in the menu bar",
|
||||||
|
)
|
||||||
|
stats = _compute_stats(results)
|
||||||
|
_assert_reproducible(stats, "Fichier (menu)")
|
||||||
|
_assert_in_zone(stats, CALIBRATED_ZONES["fichier_menu"], "Fichier")
|
||||||
|
print(f"\n [Fichier] {stats['rate_str']} détections, "
|
||||||
|
f"X=[{stats.get('x_min', 0):.4f}-{stats.get('x_max', 0):.4f}], "
|
||||||
|
f"Y=[{stats.get('y_min', 0):.4f}-{stats.get('y_max', 0):.4f}]")
|
||||||
|
|
||||||
|
def test_modifier_10_fois(self, shot_0004):
|
||||||
|
"""Le VLM trouve le menu 'Modifier' au même endroit 10 fois."""
|
||||||
|
results = _run_n_times(
|
||||||
|
shot_0004,
|
||||||
|
"the 'Modifier' menu item in the menu bar",
|
||||||
|
)
|
||||||
|
stats = _compute_stats(results)
|
||||||
|
_assert_reproducible(stats, "Modifier (menu)")
|
||||||
|
_assert_in_zone(stats, CALIBRATED_ZONES["modifier_menu"], "Modifier")
|
||||||
|
print(f"\n [Modifier] {stats['rate_str']} détections, "
|
||||||
|
f"X=[{stats.get('x_min', 0):.4f}-{stats.get('x_max', 0):.4f}], "
|
||||||
|
f"Y=[{stats.get('y_min', 0):.4f}-{stats.get('y_max', 0):.4f}]")
|
||||||
|
|
||||||
|
def test_ceci_est_un_test_10_fois(self, shot_0004):
|
||||||
|
"""Le VLM trouve l'onglet 'Ceci est un test.txt' au même endroit 10 fois."""
|
||||||
|
results = _run_n_times(
|
||||||
|
shot_0004,
|
||||||
|
"the tab labeled 'Ceci est un test.txt'",
|
||||||
|
)
|
||||||
|
stats = _compute_stats(results)
|
||||||
|
_assert_reproducible(stats, "Ceci est un test.txt (onglet)")
|
||||||
|
_assert_in_zone(stats, CALIBRATED_ZONES["ceci_est_un_test_tab"], "Ceci est un test.txt")
|
||||||
|
print(f"\n [Ceci est un test.txt] {stats['rate_str']} détections, "
|
||||||
|
f"X=[{stats.get('x_min', 0):.4f}-{stats.get('x_max', 0):.4f}], "
|
||||||
|
f"Y=[{stats.get('y_min', 0):.4f}-{stats.get('y_max', 0):.4f}]")
|
||||||
|
|
||||||
|
# -- shot_0014 : Google Chrome --
|
||||||
|
|
||||||
|
@pytest.fixture(scope="class")
|
||||||
|
def shot_0014(self):
|
||||||
|
return _load_screenshot("shot_0014_full.png")
|
||||||
|
|
||||||
|
def test_google_search_10_fois(self, shot_0014):
|
||||||
|
"""Le VLM trouve la barre de recherche Google au même endroit 10 fois."""
|
||||||
|
results = _run_n_times(
|
||||||
|
shot_0014,
|
||||||
|
"the Google search bar 'Rechercher sur Google ou saisir une URL'",
|
||||||
|
)
|
||||||
|
stats = _compute_stats(results)
|
||||||
|
_assert_reproducible(stats, "Recherche Google")
|
||||||
|
_assert_in_zone(stats, CALIBRATED_ZONES["google_search_bar"], "Recherche Google")
|
||||||
|
print(f"\n [Google search] {stats['rate_str']} détections, "
|
||||||
|
f"X=[{stats.get('x_min', 0):.4f}-{stats.get('x_max', 0):.4f}], "
|
||||||
|
f"Y=[{stats.get('y_min', 0):.4f}-{stats.get('y_max', 0):.4f}]")
|
||||||
|
|
||||||
|
def test_gmail_10_fois(self, shot_0014):
|
||||||
|
"""Le VLM trouve le lien Gmail au même endroit 10 fois."""
|
||||||
|
results = _run_n_times(
|
||||||
|
shot_0014,
|
||||||
|
"the 'Gmail' link at the top of the page",
|
||||||
|
)
|
||||||
|
stats = _compute_stats(results)
|
||||||
|
_assert_reproducible(stats, "Gmail")
|
||||||
|
_assert_in_zone(stats, CALIBRATED_ZONES["gmail_link"], "Gmail")
|
||||||
|
print(f"\n [Gmail] {stats['rate_str']} détections, "
|
||||||
|
f"X=[{stats.get('x_min', 0):.4f}-{stats.get('x_max', 0):.4f}], "
|
||||||
|
f"Y=[{stats.get('y_min', 0):.4f}-{stats.get('y_max', 0):.4f}]")
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tests de robustesse Citrix — JPEG dégradé
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.visual
|
||||||
|
class TestCitrixRobustesse:
|
||||||
|
"""Vérifier que le grounding fonctionne sur des images compressées.
|
||||||
|
|
||||||
|
Simule un environnement Citrix/RDP avec compression JPEG qualité 15-25.
|
||||||
|
Compare les résultats original vs dégradé.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@pytest.fixture(scope="class")
|
||||||
|
def shots_original(self):
|
||||||
|
return {
|
||||||
|
"shot_0001": _load_screenshot("shot_0001_full.png"),
|
||||||
|
"shot_0004": _load_screenshot("shot_0004_full.png"),
|
||||||
|
"shot_0014": _load_screenshot("shot_0014_full.png"),
|
||||||
|
}
|
||||||
|
|
||||||
|
@pytest.fixture(scope="class")
|
||||||
|
def shots_citrix(self, shots_original):
|
||||||
|
return {
|
||||||
|
name: _degrade_citrix(b64, quality=20)
|
||||||
|
for name, b64 in shots_original.items()
|
||||||
|
}
|
||||||
|
|
||||||
|
def _compare_original_vs_citrix(
|
||||||
|
self,
|
||||||
|
original_b64: str,
|
||||||
|
citrix_b64: str,
|
||||||
|
description: str,
|
||||||
|
element_name: str,
|
||||||
|
zone: Dict,
|
||||||
|
n_runs: int = 5,
|
||||||
|
) -> Dict:
|
||||||
|
"""Comparer les résultats original vs Citrix."""
|
||||||
|
# 5 runs sur l'original
|
||||||
|
results_orig = _run_n_times(original_b64, description, n=n_runs, delay=0.2)
|
||||||
|
stats_orig = _compute_stats(results_orig)
|
||||||
|
|
||||||
|
# 5 runs sur le Citrix
|
||||||
|
results_citrix = _run_n_times(citrix_b64, description, n=n_runs, delay=0.2)
|
||||||
|
stats_citrix = _compute_stats(results_citrix)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"original": stats_orig,
|
||||||
|
"citrix": stats_citrix,
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_rechercher_citrix(self, shots_original, shots_citrix):
|
||||||
|
"""'Rechercher' détecté malgré compression JPEG Q20."""
|
||||||
|
comp = self._compare_original_vs_citrix(
|
||||||
|
shots_original["shot_0001"],
|
||||||
|
shots_citrix["shot_0001"],
|
||||||
|
"the 'Rechercher' search text in the Windows taskbar at the bottom",
|
||||||
|
"Rechercher",
|
||||||
|
CALIBRATED_ZONES["rechercher_taskbar"],
|
||||||
|
)
|
||||||
|
# Au moins 3/5 détections sur Citrix
|
||||||
|
assert comp["citrix"]["detected"] >= 3, (
|
||||||
|
f"Citrix Rechercher: seulement {comp['citrix']['rate_str']} détections"
|
||||||
|
)
|
||||||
|
# Position dans la zone calibrée
|
||||||
|
if comp["citrix"]["detected"] >= 1:
|
||||||
|
_assert_in_zone(comp["citrix"], CALIBRATED_ZONES["rechercher_taskbar"], "Rechercher (Citrix)")
|
||||||
|
print(f"\n [Rechercher Citrix] orig={comp['original']['rate_str']}, "
|
||||||
|
f"citrix={comp['citrix']['rate_str']}")
|
||||||
|
|
||||||
|
def test_fichier_citrix(self, shots_original, shots_citrix):
|
||||||
|
"""Menu 'Fichier' détecté malgré compression JPEG Q20."""
|
||||||
|
comp = self._compare_original_vs_citrix(
|
||||||
|
shots_original["shot_0004"],
|
||||||
|
shots_citrix["shot_0004"],
|
||||||
|
"the 'Fichier' menu item in the menu bar",
|
||||||
|
"Fichier",
|
||||||
|
CALIBRATED_ZONES["fichier_menu"],
|
||||||
|
)
|
||||||
|
assert comp["citrix"]["detected"] >= 3, (
|
||||||
|
f"Citrix Fichier: seulement {comp['citrix']['rate_str']} détections"
|
||||||
|
)
|
||||||
|
if comp["citrix"]["detected"] >= 1:
|
||||||
|
_assert_in_zone(comp["citrix"], CALIBRATED_ZONES["fichier_menu"], "Fichier (Citrix)")
|
||||||
|
print(f"\n [Fichier Citrix] orig={comp['original']['rate_str']}, "
|
||||||
|
f"citrix={comp['citrix']['rate_str']}")
|
||||||
|
|
||||||
|
def test_ceci_est_un_test_citrix(self, shots_original, shots_citrix):
|
||||||
|
"""Onglet 'Ceci est un test.txt' détecté malgré compression JPEG Q20."""
|
||||||
|
comp = self._compare_original_vs_citrix(
|
||||||
|
shots_original["shot_0004"],
|
||||||
|
shots_citrix["shot_0004"],
|
||||||
|
"the tab labeled 'Ceci est un test.txt'",
|
||||||
|
"Ceci est un test.txt",
|
||||||
|
CALIBRATED_ZONES["ceci_est_un_test_tab"],
|
||||||
|
)
|
||||||
|
assert comp["citrix"]["detected"] >= 3, (
|
||||||
|
f"Citrix tab: seulement {comp['citrix']['rate_str']} détections"
|
||||||
|
)
|
||||||
|
if comp["citrix"]["detected"] >= 1:
|
||||||
|
_assert_in_zone(
|
||||||
|
comp["citrix"],
|
||||||
|
CALIBRATED_ZONES["ceci_est_un_test_tab"],
|
||||||
|
"Ceci est un test.txt (Citrix)",
|
||||||
|
)
|
||||||
|
print(f"\n [Ceci est un test.txt Citrix] orig={comp['original']['rate_str']}, "
|
||||||
|
f"citrix={comp['citrix']['rate_str']}")
|
||||||
|
|
||||||
|
def test_google_search_citrix(self, shots_original, shots_citrix):
|
||||||
|
"""Barre de recherche Google détectée malgré compression JPEG Q20."""
|
||||||
|
comp = self._compare_original_vs_citrix(
|
||||||
|
shots_original["shot_0014"],
|
||||||
|
shots_citrix["shot_0014"],
|
||||||
|
"the Google search bar 'Rechercher sur Google ou saisir une URL'",
|
||||||
|
"Recherche Google",
|
||||||
|
CALIBRATED_ZONES["google_search_bar"],
|
||||||
|
)
|
||||||
|
assert comp["citrix"]["detected"] >= 3, (
|
||||||
|
f"Citrix Google: seulement {comp['citrix']['rate_str']} détections"
|
||||||
|
)
|
||||||
|
if comp["citrix"]["detected"] >= 1:
|
||||||
|
_assert_in_zone(
|
||||||
|
comp["citrix"],
|
||||||
|
CALIBRATED_ZONES["google_search_bar"],
|
||||||
|
"Recherche Google (Citrix)",
|
||||||
|
)
|
||||||
|
print(f"\n [Google search Citrix] orig={comp['original']['rate_str']}, "
|
||||||
|
f"citrix={comp['citrix']['rate_str']}")
|
||||||
|
|
||||||
|
def test_gmail_citrix(self, shots_original, shots_citrix):
|
||||||
|
"""Lien Gmail détecté malgré compression JPEG Q20."""
|
||||||
|
comp = self._compare_original_vs_citrix(
|
||||||
|
shots_original["shot_0014"],
|
||||||
|
shots_citrix["shot_0014"],
|
||||||
|
"the 'Gmail' link at the top of the page",
|
||||||
|
"Gmail",
|
||||||
|
CALIBRATED_ZONES["gmail_link"],
|
||||||
|
)
|
||||||
|
assert comp["citrix"]["detected"] >= 3, (
|
||||||
|
f"Citrix Gmail: seulement {comp['citrix']['rate_str']} détections"
|
||||||
|
)
|
||||||
|
if comp["citrix"]["detected"] >= 1:
|
||||||
|
_assert_in_zone(comp["citrix"], CALIBRATED_ZONES["gmail_link"], "Gmail (Citrix)")
|
||||||
|
print(f"\n [Gmail Citrix] orig={comp['original']['rate_str']}, "
|
||||||
|
f"citrix={comp['citrix']['rate_str']}")
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tests de dégradation progressive — qualité JPEG 50 → 15 → 5
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.visual
|
||||||
|
class TestDegradationProgressive:
|
||||||
|
"""Mesurer à partir de quelle qualité JPEG le grounding échoue."""
|
||||||
|
|
||||||
|
@pytest.fixture(scope="class")
|
||||||
|
def shot_0004(self):
|
||||||
|
return _load_screenshot("shot_0004_full.png")
|
||||||
|
|
||||||
|
def test_fichier_degradation_progressive(self, shot_0004):
|
||||||
|
"""Fichier menu : tester JPEG Q50, Q25, Q15, Q10, Q5."""
|
||||||
|
qualities = [50, 25, 15, 10, 5]
|
||||||
|
results_by_quality = {}
|
||||||
|
|
||||||
|
for q in qualities:
|
||||||
|
degraded = _degrade_citrix(shot_0004, quality=q)
|
||||||
|
results = _run_n_times(
|
||||||
|
degraded,
|
||||||
|
"the 'Fichier' menu item in the menu bar",
|
||||||
|
n=3,
|
||||||
|
delay=0.2,
|
||||||
|
)
|
||||||
|
stats = _compute_stats(results)
|
||||||
|
results_by_quality[q] = stats
|
||||||
|
|
||||||
|
# Afficher le rapport de dégradation
|
||||||
|
print("\n === Dégradation progressive : Fichier menu ===")
|
||||||
|
for q in qualities:
|
||||||
|
s = results_by_quality[q]
|
||||||
|
zone_ok = ""
|
||||||
|
if s["detected"] >= 1:
|
||||||
|
cx = s["x_mean"]
|
||||||
|
cy = s["y_mean"]
|
||||||
|
z = CALIBRATED_ZONES["fichier_menu"]
|
||||||
|
in_zone = z["x_min"] <= cx <= z["x_max"] and z["y_min"] <= cy <= z["y_max"]
|
||||||
|
zone_ok = " (in zone)" if in_zone else f" (HORS zone: {cx:.3f},{cy:.3f})"
|
||||||
|
print(f" Q{q:>2}: {s['rate_str']} détections{zone_ok}")
|
||||||
|
|
||||||
|
# Au moins Q50 et Q25 doivent fonctionner
|
||||||
|
assert results_by_quality[50]["detected"] >= 2, "Q50 devrait fonctionner"
|
||||||
|
assert results_by_quality[25]["detected"] >= 2, "Q25 devrait fonctionner"
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Rapport final — exécuté en dernier, résume tout
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.visual
|
||||||
|
class TestRapportFinal:
|
||||||
|
"""Rapport complet des capacités de grounding VLM.
|
||||||
|
|
||||||
|
Ce test exécute une batterie de détections et produit un rapport
|
||||||
|
structuré avec taux de détection, variance, et comparaison Citrix.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_rapport_complet(self):
|
||||||
|
"""Génère le rapport final de robustesse du grounding VLM."""
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
shots = {
|
||||||
|
"shot_0001": _load_screenshot("shot_0001_full.png"),
|
||||||
|
"shot_0004": _load_screenshot("shot_0004_full.png"),
|
||||||
|
"shot_0014": _load_screenshot("shot_0014_full.png"),
|
||||||
|
}
|
||||||
|
|
||||||
|
targets = [
|
||||||
|
("shot_0001", "Rechercher (taskbar)",
|
||||||
|
"the 'Rechercher' search text in the Windows taskbar at the bottom",
|
||||||
|
CALIBRATED_ZONES["rechercher_taskbar"]),
|
||||||
|
("shot_0001", "agent_v1 (dossier)",
|
||||||
|
"the folder named 'agent_v1' in the file list",
|
||||||
|
CALIBRATED_ZONES["agent_v1_folder"]),
|
||||||
|
("shot_0004", "Fichier (menu)",
|
||||||
|
"the 'Fichier' menu item in the menu bar",
|
||||||
|
CALIBRATED_ZONES["fichier_menu"]),
|
||||||
|
("shot_0004", "Modifier (menu)",
|
||||||
|
"the 'Modifier' menu item in the menu bar",
|
||||||
|
CALIBRATED_ZONES["modifier_menu"]),
|
||||||
|
("shot_0004", "Ceci est un test.txt (onglet)",
|
||||||
|
"the tab labeled 'Ceci est un test.txt'",
|
||||||
|
CALIBRATED_ZONES["ceci_est_un_test_tab"]),
|
||||||
|
("shot_0004", "Close X (Bloc-notes)",
|
||||||
|
"the close button X of the Notepad window at the top right",
|
||||||
|
CALIBRATED_ZONES["close_x_notepad"]),
|
||||||
|
("shot_0014", "Recherche Google (barre)",
|
||||||
|
"the Google search bar 'Rechercher sur Google ou saisir une URL'",
|
||||||
|
CALIBRATED_ZONES["google_search_bar"]),
|
||||||
|
("shot_0014", "Gmail (lien)",
|
||||||
|
"the 'Gmail' link at the top of the page",
|
||||||
|
CALIBRATED_ZONES["gmail_link"]),
|
||||||
|
]
|
||||||
|
|
||||||
|
report_lines = [
|
||||||
|
"",
|
||||||
|
"=" * 80,
|
||||||
|
"RAPPORT DE ROBUSTESSE — Grounding VLM qwen2.5vl:7b",
|
||||||
|
f"Date: {time.strftime('%Y-%m-%d %H:%M:%S')}",
|
||||||
|
f"Screenshots: 1280x800 (3 images, {len(targets)} cibles)",
|
||||||
|
f"Répétitions: 5 par cible (original + Citrix Q20)",
|
||||||
|
"=" * 80,
|
||||||
|
"",
|
||||||
|
"--- ORIGINAL (PNG) ---",
|
||||||
|
f"{'Élément':<35} {'Taux':>6} {'X moy':>8} {'Y moy':>8} "
|
||||||
|
f"{'Var X':>8} {'Var Y':>8} {'Zone':>6}",
|
||||||
|
"-" * 80,
|
||||||
|
]
|
||||||
|
|
||||||
|
all_original_stats = []
|
||||||
|
all_citrix_stats = []
|
||||||
|
|
||||||
|
for shot_name, label, desc, zone in targets:
|
||||||
|
# Original : 5 runs
|
||||||
|
results_orig = _run_n_times(shots[shot_name], desc, n=5, delay=0.2)
|
||||||
|
stats_orig = _compute_stats(results_orig)
|
||||||
|
all_original_stats.append((label, stats_orig, zone))
|
||||||
|
|
||||||
|
in_zone = "?"
|
||||||
|
if stats_orig["detected"] >= 1:
|
||||||
|
cx, cy = stats_orig["x_mean"], stats_orig["y_mean"]
|
||||||
|
ok = (zone["x_min"] <= cx <= zone["x_max"]
|
||||||
|
and zone["y_min"] <= cy <= zone["y_max"])
|
||||||
|
in_zone = "OK" if ok else "HORS"
|
||||||
|
|
||||||
|
report_lines.append(
|
||||||
|
f"{label:<35} {stats_orig['rate_str']:>6} "
|
||||||
|
f"{stats_orig.get('x_mean', 0):>8.4f} "
|
||||||
|
f"{stats_orig.get('y_mean', 0):>8.4f} "
|
||||||
|
f"{stats_orig.get('x_range', 0):>8.4f} "
|
||||||
|
f"{stats_orig.get('y_range', 0):>8.4f} "
|
||||||
|
f"{in_zone:>6}"
|
||||||
|
)
|
||||||
|
|
||||||
|
report_lines.extend([
|
||||||
|
"",
|
||||||
|
"--- CITRIX (JPEG Q20) ---",
|
||||||
|
f"{'Élément':<35} {'Taux':>6} {'X moy':>8} {'Y moy':>8} "
|
||||||
|
f"{'Var X':>8} {'Var Y':>8} {'Zone':>6} {'Écart orig':>10}",
|
||||||
|
"-" * 90,
|
||||||
|
])
|
||||||
|
|
||||||
|
for i, (shot_name, label, desc, zone) in enumerate(targets):
|
||||||
|
citrix_b64 = _degrade_citrix(shots[shot_name], quality=20)
|
||||||
|
results_citrix = _run_n_times(citrix_b64, desc, n=5, delay=0.2)
|
||||||
|
stats_citrix = _compute_stats(results_citrix)
|
||||||
|
all_citrix_stats.append((label, stats_citrix, zone))
|
||||||
|
|
||||||
|
in_zone = "?"
|
||||||
|
ecart = "N/A"
|
||||||
|
if stats_citrix["detected"] >= 1:
|
||||||
|
cx, cy = stats_citrix["x_mean"], stats_citrix["y_mean"]
|
||||||
|
ok = (zone["x_min"] <= cx <= zone["x_max"]
|
||||||
|
and zone["y_min"] <= cy <= zone["y_max"])
|
||||||
|
in_zone = "OK" if ok else "HORS"
|
||||||
|
|
||||||
|
# Calculer l'écart avec l'original
|
||||||
|
orig_stats = all_original_stats[i][1]
|
||||||
|
if orig_stats["detected"] >= 1:
|
||||||
|
dx = abs(cx - orig_stats["x_mean"])
|
||||||
|
dy = abs(cy - orig_stats["y_mean"])
|
||||||
|
ecart = f"{dx:.4f}/{dy:.4f}"
|
||||||
|
|
||||||
|
report_lines.append(
|
||||||
|
f"{label:<35} {stats_citrix['rate_str']:>6} "
|
||||||
|
f"{stats_citrix.get('x_mean', 0):>8.4f} "
|
||||||
|
f"{stats_citrix.get('y_mean', 0):>8.4f} "
|
||||||
|
f"{stats_citrix.get('x_range', 0):>8.4f} "
|
||||||
|
f"{stats_citrix.get('y_range', 0):>8.4f} "
|
||||||
|
f"{in_zone:>6} {ecart:>10}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Résumé
|
||||||
|
orig_total = sum(s["detected"] for _, s, _ in all_original_stats)
|
||||||
|
orig_max = sum(s["total"] for _, s, _ in all_original_stats)
|
||||||
|
citrix_total = sum(s["detected"] for _, s, _ in all_citrix_stats)
|
||||||
|
citrix_max = sum(s["total"] for _, s, _ in all_citrix_stats)
|
||||||
|
|
||||||
|
orig_in_zone = sum(
|
||||||
|
1 for _, s, z in all_original_stats
|
||||||
|
if s["detected"] >= 1
|
||||||
|
and z["x_min"] <= s["x_mean"] <= z["x_max"]
|
||||||
|
and z["y_min"] <= s["y_mean"] <= z["y_max"]
|
||||||
|
)
|
||||||
|
citrix_in_zone = sum(
|
||||||
|
1 for _, s, z in all_citrix_stats
|
||||||
|
if s["detected"] >= 1
|
||||||
|
and z["x_min"] <= s["x_mean"] <= z["x_max"]
|
||||||
|
and z["y_min"] <= s["y_mean"] <= z["y_max"]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Éléments non fiables
|
||||||
|
unreliable = []
|
||||||
|
for label, s, _ in all_original_stats:
|
||||||
|
if s["detected"] < 3:
|
||||||
|
unreliable.append(f"{label} (taux {s['rate_str']})")
|
||||||
|
elif s.get("x_range", 0) >= _MAX_VARIANCE or s.get("y_range", 0) >= _MAX_VARIANCE:
|
||||||
|
unreliable.append(
|
||||||
|
f"{label} (variance X={s.get('x_range', 0):.4f} "
|
||||||
|
f"Y={s.get('y_range', 0):.4f})"
|
||||||
|
)
|
||||||
|
|
||||||
|
report_lines.extend([
|
||||||
|
"",
|
||||||
|
"=" * 80,
|
||||||
|
"RÉSUMÉ",
|
||||||
|
"=" * 80,
|
||||||
|
f" Détection original : {orig_total}/{orig_max} "
|
||||||
|
f"({orig_total/orig_max*100:.0f}%)",
|
||||||
|
f" Détection Citrix Q20: {citrix_total}/{citrix_max} "
|
||||||
|
f"({citrix_total/citrix_max*100:.0f}%)",
|
||||||
|
f" Positionnement correct (original) : {orig_in_zone}/{len(all_original_stats)}",
|
||||||
|
f" Positionnement correct (Citrix) : {citrix_in_zone}/{len(all_citrix_stats)}",
|
||||||
|
"",
|
||||||
|
])
|
||||||
|
|
||||||
|
if unreliable:
|
||||||
|
report_lines.append(" ÉLÉMENTS NON FIABLES :")
|
||||||
|
for u in unreliable:
|
||||||
|
report_lines.append(f" - {u}")
|
||||||
|
else:
|
||||||
|
report_lines.append(" Tous les éléments sont fiables.")
|
||||||
|
|
||||||
|
report_lines.extend([
|
||||||
|
"",
|
||||||
|
" NOTES TECHNIQUES :",
|
||||||
|
" - qwen2.5vl bbox_2d retourne des pixels relatifs à l'image envoyée",
|
||||||
|
" - Normalisation : diviser par les dimensions de l'image (W, H)",
|
||||||
|
" - temperature=0.1 donne une variance < 0.003 typiquement",
|
||||||
|
"=" * 80,
|
||||||
|
])
|
||||||
|
|
||||||
|
report = "\n".join(report_lines)
|
||||||
|
print(report)
|
||||||
|
|
||||||
|
# Le test réussit si au moins 80% des détections originales fonctionnent
|
||||||
|
assert orig_total / orig_max >= 0.80, (
|
||||||
|
f"Taux de détection global trop bas: {orig_total}/{orig_max}"
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user