Compare commits
32 Commits
v3.0
...
203dc00d53
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
203dc00d53 | ||
|
|
e9a028134a | ||
|
|
01bba7bc6c | ||
|
|
d5285de99c | ||
|
|
33c198b827 | ||
|
|
816b37af98 | ||
|
|
d82aad984f | ||
|
|
057c37131f | ||
|
|
9bcce3fc68 | ||
|
|
f96f6322ec | ||
|
|
02ee2d7b5b | ||
|
|
47993e2ee9 | ||
|
|
7cc03f6f10 | ||
|
|
a21f1ea9fa | ||
|
|
9188bd7df1 | ||
|
|
f82753debe | ||
|
|
b92cb9db03 | ||
|
|
e66629ce1a | ||
|
|
cecdf417b7 | ||
|
|
56e3cc052a | ||
|
|
332366b58c | ||
|
|
ac9c207474 | ||
|
|
f85d56ac05 | ||
|
|
172167f6c0 | ||
|
|
42d49dd8bd | ||
|
|
f541bb8ce4 | ||
|
|
a6eb4c168f | ||
|
|
f6ad5ff2b2 | ||
|
|
2ac781343a | ||
|
|
bffcfb2db3 | ||
|
|
cc673755f7 | ||
|
|
4509038bf0 |
8
.gitignore
vendored
8
.gitignore
vendored
@@ -75,3 +75,11 @@ htmlcov/
|
||||
# === Backups ===
|
||||
*_backup_*
|
||||
backups/
|
||||
*.bak
|
||||
*.bak_*
|
||||
*.orig
|
||||
*.old
|
||||
|
||||
# === Legacy / Triage ===
|
||||
_a_trier/
|
||||
archives/
|
||||
|
||||
3
agent_rust/lea_uia/.gitignore
vendored
Normal file
3
agent_rust/lea_uia/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
target/
|
||||
**/target/
|
||||
|
||||
384
agent_rust/lea_uia/Cargo.lock
generated
Normal file
384
agent_rust/lea_uia/Cargo.lock
generated
Normal file
@@ -0,0 +1,384 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "anstream"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"anstyle-parse",
|
||||
"anstyle-query",
|
||||
"anstyle-wincon",
|
||||
"colorchoice",
|
||||
"is_terminal_polyfill",
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle"
|
||||
version = "1.0.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000"
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-parse"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e"
|
||||
dependencies = [
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-query"
|
||||
version = "1.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
|
||||
dependencies = [
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-wincon"
|
||||
version = "3.0.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"once_cell_polyfill",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "4.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351"
|
||||
dependencies = [
|
||||
"clap_builder",
|
||||
"clap_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_builder"
|
||||
version = "4.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anstyle",
|
||||
"clap_lex",
|
||||
"strsim",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_derive"
|
||||
version = "4.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1110bd8a634a1ab8cb04345d8d878267d57c3cf1b38d91b71af6686408bbca6a"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_lex"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9"
|
||||
|
||||
[[package]]
|
||||
name = "colorchoice"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570"
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||
|
||||
[[package]]
|
||||
name = "is_terminal_polyfill"
|
||||
version = "1.70.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
|
||||
|
||||
[[package]]
|
||||
name = "lea_uia"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"windows",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
|
||||
|
||||
[[package]]
|
||||
name = "once_cell_polyfill"
|
||||
version = "1.70.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.106"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.45"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.228"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
|
||||
dependencies = [
|
||||
"serde_core",
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_core"
|
||||
version = "1.0.228"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.228"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.149"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"memchr",
|
||||
"serde",
|
||||
"serde_core",
|
||||
"zmij",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.117"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
||||
|
||||
[[package]]
|
||||
name = "utf8parse"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
||||
|
||||
[[package]]
|
||||
name = "windows"
|
||||
version = "0.59.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f919aee0a93304be7f62e8e5027811bbba96bcb1de84d6618be56e43f8a32a1"
|
||||
dependencies = [
|
||||
"windows-core",
|
||||
"windows-targets",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-core"
|
||||
version = "0.59.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "810ce18ed2112484b0d4e15d022e5f598113e220c53e373fb31e67e21670c1ce"
|
||||
dependencies = [
|
||||
"windows-implement",
|
||||
"windows-interface",
|
||||
"windows-result",
|
||||
"windows-strings",
|
||||
"windows-targets",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-implement"
|
||||
version = "0.59.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "83577b051e2f49a058c308f17f273b570a6a758386fc291b5f6a934dd84e48c1"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-interface"
|
||||
version = "0.59.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-link"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
|
||||
|
||||
[[package]]
|
||||
name = "windows-link"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
||||
|
||||
[[package]]
|
||||
name = "windows-result"
|
||||
version = "0.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6"
|
||||
dependencies = [
|
||||
"windows-link 0.1.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-strings"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "87fa48cc5d406560701792be122a10132491cff9d0aeb23583cc2dcafc847319"
|
||||
dependencies = [
|
||||
"windows-link 0.1.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.61.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
|
||||
dependencies = [
|
||||
"windows-link 0.2.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.53.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
|
||||
dependencies = [
|
||||
"windows-link 0.2.1",
|
||||
"windows_aarch64_gnullvm",
|
||||
"windows_aarch64_msvc",
|
||||
"windows_i686_gnu",
|
||||
"windows_i686_gnullvm",
|
||||
"windows_i686_msvc",
|
||||
"windows_x86_64_gnu",
|
||||
"windows_x86_64_gnullvm",
|
||||
"windows_x86_64_msvc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnullvm"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
|
||||
|
||||
[[package]]
|
||||
name = "zmij"
|
||||
version = "1.0.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
|
||||
34
agent_rust/lea_uia/Cargo.toml
Normal file
34
agent_rust/lea_uia/Cargo.toml
Normal file
@@ -0,0 +1,34 @@
|
||||
[package]
|
||||
name = "lea_uia"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
authors = ["Dom <dom@rpa-vision-v3>"]
|
||||
description = "Helper Windows UI Automation pour Léa (agent RPA V3)"
|
||||
license = "Proprietary"
|
||||
|
||||
[[bin]]
|
||||
name = "lea_uia"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
clap = { version = "4.5", features = ["derive"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
windows = { version = "0.59", features = [
|
||||
"Win32_Foundation",
|
||||
"Win32_System_Com",
|
||||
"Win32_System_Ole",
|
||||
"Win32_System_Variant",
|
||||
"Win32_UI_Accessibility",
|
||||
"Win32_UI_WindowsAndMessaging",
|
||||
"Win32_Graphics_Gdi",
|
||||
] }
|
||||
|
||||
[profile.release]
|
||||
opt-level = "z" # Taille minimale
|
||||
lto = true # Link-time optimization
|
||||
codegen-units = 1 # Meilleure optimisation
|
||||
strip = true # Retirer les symboles
|
||||
panic = "abort" # Pas d'unwinding → binaire plus petit
|
||||
564
agent_rust/lea_uia/src/main.rs
Normal file
564
agent_rust/lea_uia/src/main.rs
Normal file
@@ -0,0 +1,564 @@
|
||||
// lea_uia — Helper Windows UI Automation pour Léa
|
||||
//
|
||||
// Binaire standalone qui expose 3 commandes UIA :
|
||||
// query → retourne l'élément UIA à une position (x, y)
|
||||
// find → retrouve un élément par son chemin logique
|
||||
// capture → liste les éléments visibles (debug)
|
||||
//
|
||||
// Communication avec l'agent Python via stdin/stdout JSON.
|
||||
// Tous les appels sont non-bloquants et retournent du JSON structuré.
|
||||
//
|
||||
// Sur Linux (développement) : retourne des stubs d'erreur.
|
||||
// Sur Windows : utilise UIAutomationCore via `windows-rs`.
|
||||
|
||||
use clap::{Parser, Subcommand};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(name = "lea_uia")]
|
||||
#[command(about = "Helper UI Automation pour Léa", long_about = None)]
|
||||
#[command(version)]
|
||||
struct Cli {
|
||||
#[command(subcommand)]
|
||||
command: Commands,
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
enum Commands {
|
||||
/// Retourner l'élément UIA à une position donnée (x, y en pixels écran)
|
||||
Query {
|
||||
/// Coordonnée X (pixels)
|
||||
#[arg(long)]
|
||||
x: i32,
|
||||
/// Coordonnée Y (pixels)
|
||||
#[arg(long)]
|
||||
y: i32,
|
||||
/// Inclure la hiérarchie des parents (peut être lent)
|
||||
#[arg(long, default_value_t = true)]
|
||||
with_parents: bool,
|
||||
},
|
||||
/// Rechercher un élément par son chemin logique ou son nom
|
||||
Find {
|
||||
/// Nom de l'élément (Name property)
|
||||
#[arg(long)]
|
||||
name: Option<String>,
|
||||
/// Type de contrôle (Button, Edit, MenuItem, etc.)
|
||||
#[arg(long)]
|
||||
control_type: Option<String>,
|
||||
/// AutomationId
|
||||
#[arg(long)]
|
||||
automation_id: Option<String>,
|
||||
/// Limite la recherche à cette fenêtre (titre exact)
|
||||
#[arg(long)]
|
||||
window: Option<String>,
|
||||
/// Timeout en millisecondes
|
||||
#[arg(long, default_value_t = 2000)]
|
||||
timeout_ms: u32,
|
||||
},
|
||||
/// Lister tous les éléments visibles de la fenêtre active (debug)
|
||||
Capture {
|
||||
/// Profondeur maximale de l'arbre
|
||||
#[arg(long, default_value_t = 3)]
|
||||
max_depth: u32,
|
||||
},
|
||||
/// Vérifier que UIA est disponible et fonctionnel
|
||||
Health,
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Modèles de sortie JSON
|
||||
// =========================================================================
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
struct UiaElement {
|
||||
/// Nom visible de l'élément
|
||||
name: String,
|
||||
/// Type de contrôle (Button, Edit, MenuItem, Window, ...)
|
||||
control_type: String,
|
||||
/// Classe Windows (Edit, Static, #32770, ...)
|
||||
class_name: String,
|
||||
/// AutomationId (ID interne, parfois vide)
|
||||
automation_id: String,
|
||||
/// Rectangle absolu [x1, y1, x2, y2] en pixels écran
|
||||
bounding_rect: [i32; 4],
|
||||
/// Est-ce que l'élément est activable
|
||||
is_enabled: bool,
|
||||
/// Est-ce que l'élément est visible
|
||||
is_offscreen: bool,
|
||||
/// Hiérarchie des parents (chemin logique)
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
parent_path: Vec<ParentHint>,
|
||||
/// Process owning this element
|
||||
#[serde(skip_serializing_if = "String::is_empty")]
|
||||
process_name: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
struct ParentHint {
|
||||
name: String,
|
||||
control_type: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(tag = "status")]
|
||||
enum UiaResponse {
|
||||
#[serde(rename = "ok")]
|
||||
Ok {
|
||||
element: Option<UiaElement>,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
elements: Vec<UiaElement>,
|
||||
elapsed_ms: u64,
|
||||
},
|
||||
#[serde(rename = "not_found")]
|
||||
NotFound {
|
||||
reason: String,
|
||||
elapsed_ms: u64,
|
||||
},
|
||||
#[serde(rename = "error")]
|
||||
Error {
|
||||
message: String,
|
||||
code: String,
|
||||
},
|
||||
#[serde(rename = "unavailable")]
|
||||
Unavailable {
|
||||
reason: String,
|
||||
},
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Implémentation Windows
|
||||
// =========================================================================
|
||||
|
||||
#[cfg(windows)]
|
||||
mod uia_impl {
|
||||
use super::*;
|
||||
use std::time::Instant;
|
||||
use windows::Win32::Foundation::POINT;
|
||||
use windows::Win32::System::Com::{
|
||||
CoCreateInstance, CoInitializeEx, CoUninitialize, CLSCTX_INPROC_SERVER,
|
||||
COINIT_APARTMENTTHREADED,
|
||||
};
|
||||
use windows::Win32::UI::Accessibility::{
|
||||
CUIAutomation, IUIAutomation, IUIAutomationElement, IUIAutomationTreeWalker,
|
||||
};
|
||||
|
||||
struct ComGuard;
|
||||
impl ComGuard {
|
||||
fn new() -> windows::core::Result<Self> {
|
||||
unsafe {
|
||||
let hr = CoInitializeEx(None, COINIT_APARTMENTTHREADED);
|
||||
if hr.is_err() {
|
||||
// RPC_E_CHANGED_MODE : le thread est déjà initialisé → OK
|
||||
let code = hr.0 as u32;
|
||||
if code != 0x80010106 {
|
||||
return Err(windows::core::Error::from(hr));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(Self)
|
||||
}
|
||||
}
|
||||
impl Drop for ComGuard {
|
||||
fn drop(&mut self) {
|
||||
unsafe { CoUninitialize() };
|
||||
}
|
||||
}
|
||||
|
||||
fn get_automation() -> windows::core::Result<IUIAutomation> {
|
||||
unsafe { CoCreateInstance(&CUIAutomation, None, CLSCTX_INPROC_SERVER) }
|
||||
}
|
||||
|
||||
fn element_to_struct(
|
||||
element: &IUIAutomationElement,
|
||||
with_parents: bool,
|
||||
) -> windows::core::Result<UiaElement> {
|
||||
let mut result = UiaElement {
|
||||
name: String::new(),
|
||||
control_type: String::new(),
|
||||
class_name: String::new(),
|
||||
automation_id: String::new(),
|
||||
bounding_rect: [0, 0, 0, 0],
|
||||
is_enabled: false,
|
||||
is_offscreen: true,
|
||||
parent_path: Vec::new(),
|
||||
process_name: String::new(),
|
||||
};
|
||||
|
||||
unsafe {
|
||||
if let Ok(name) = element.CurrentName() {
|
||||
result.name = name.to_string();
|
||||
}
|
||||
if let Ok(ct) = element.CurrentLocalizedControlType() {
|
||||
result.control_type = ct.to_string();
|
||||
}
|
||||
if let Ok(cn) = element.CurrentClassName() {
|
||||
result.class_name = cn.to_string();
|
||||
}
|
||||
if let Ok(aid) = element.CurrentAutomationId() {
|
||||
result.automation_id = aid.to_string();
|
||||
}
|
||||
if let Ok(rect) = element.CurrentBoundingRectangle() {
|
||||
result.bounding_rect = [rect.left, rect.top, rect.right, rect.bottom];
|
||||
}
|
||||
if let Ok(enabled) = element.CurrentIsEnabled() {
|
||||
result.is_enabled = enabled.as_bool();
|
||||
}
|
||||
if let Ok(offscreen) = element.CurrentIsOffscreen() {
|
||||
result.is_offscreen = offscreen.as_bool();
|
||||
}
|
||||
if with_parents {
|
||||
// Remonter la hiérarchie jusqu'à la Window root
|
||||
if let Ok(automation) = get_automation() {
|
||||
let walker = automation.ControlViewWalker();
|
||||
if let Ok(walker) = walker {
|
||||
let mut current = element.clone();
|
||||
for _ in 0..10 {
|
||||
match walker.GetParentElement(¤t) {
|
||||
Ok(parent) => {
|
||||
let name = parent
|
||||
.CurrentName()
|
||||
.map(|n| n.to_string())
|
||||
.unwrap_or_default();
|
||||
let ct = parent
|
||||
.CurrentLocalizedControlType()
|
||||
.map(|c| c.to_string())
|
||||
.unwrap_or_default();
|
||||
if name.is_empty() && ct.is_empty() {
|
||||
break;
|
||||
}
|
||||
result.parent_path.insert(
|
||||
0,
|
||||
ParentHint {
|
||||
name,
|
||||
control_type: ct,
|
||||
},
|
||||
);
|
||||
current = parent;
|
||||
}
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn query_at_point(x: i32, y: i32, with_parents: bool) -> UiaResponse {
|
||||
let start = Instant::now();
|
||||
let _com = match ComGuard::new() {
|
||||
Ok(g) => g,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("CoInitializeEx: {}", e),
|
||||
code: "com_init_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let automation = match get_automation() {
|
||||
Ok(a) => a,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("CUIAutomation: {}", e),
|
||||
code: "automation_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let point = POINT { x, y };
|
||||
let element = unsafe { automation.ElementFromPoint(point) };
|
||||
match element {
|
||||
Ok(el) => match element_to_struct(&el, with_parents) {
|
||||
Ok(e) => UiaResponse::Ok {
|
||||
element: Some(e),
|
||||
elements: Vec::new(),
|
||||
elapsed_ms: start.elapsed().as_millis() as u64,
|
||||
},
|
||||
Err(e) => UiaResponse::Error {
|
||||
message: format!("element_to_struct: {}", e),
|
||||
code: "extract_failed".into(),
|
||||
},
|
||||
},
|
||||
Err(_) => UiaResponse::NotFound {
|
||||
reason: format!("Aucun élément UIA à ({}, {})", x, y),
|
||||
elapsed_ms: start.elapsed().as_millis() as u64,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn find_element(
|
||||
name: Option<String>,
|
||||
_control_type: Option<String>,
|
||||
_automation_id: Option<String>,
|
||||
_window: Option<String>,
|
||||
_timeout_ms: u32,
|
||||
) -> UiaResponse {
|
||||
let start = Instant::now();
|
||||
let _com = match ComGuard::new() {
|
||||
Ok(g) => g,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("CoInitializeEx: {}", e),
|
||||
code: "com_init_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let automation = match get_automation() {
|
||||
Ok(a) => a,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("CUIAutomation: {}", e),
|
||||
code: "automation_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let root = match unsafe { automation.GetRootElement() } {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("GetRootElement: {}", e),
|
||||
code: "root_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Recherche simple par parcours d'arbre (MVP)
|
||||
// L'arbre UIA peut être énorme → on limite la profondeur
|
||||
if let Some(target_name) = name {
|
||||
let walker = unsafe { automation.ControlViewWalker() };
|
||||
if let Ok(walker) = walker {
|
||||
if let Some(found) =
|
||||
walk_and_find(&walker, &root, &target_name, 0, 6, &_control_type, &_automation_id)
|
||||
{
|
||||
match element_to_struct(&found, true) {
|
||||
Ok(e) => {
|
||||
return UiaResponse::Ok {
|
||||
element: Some(e),
|
||||
elements: Vec::new(),
|
||||
elapsed_ms: start.elapsed().as_millis() as u64,
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("element_to_struct: {}", e),
|
||||
code: "extract_failed".into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UiaResponse::NotFound {
|
||||
reason: "Aucun élément trouvé".into(),
|
||||
elapsed_ms: start.elapsed().as_millis() as u64,
|
||||
}
|
||||
}
|
||||
|
||||
/// Parcours récursif de l'arbre UIA pour trouver un élément par nom
|
||||
fn walk_and_find(
|
||||
walker: &IUIAutomationTreeWalker,
|
||||
element: &IUIAutomationElement,
|
||||
target_name: &str,
|
||||
depth: u32,
|
||||
max_depth: u32,
|
||||
target_control_type: &Option<String>,
|
||||
target_automation_id: &Option<String>,
|
||||
) -> Option<IUIAutomationElement> {
|
||||
if depth > max_depth {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Tester l'élément courant
|
||||
unsafe {
|
||||
if let Ok(name) = element.CurrentName() {
|
||||
if name.to_string() == target_name {
|
||||
// Vérifier les filtres additionnels
|
||||
let mut matches = true;
|
||||
if let Some(ct) = target_control_type {
|
||||
if let Ok(local_ct) = element.CurrentLocalizedControlType() {
|
||||
if !local_ct.to_string().to_lowercase().contains(&ct.to_lowercase()) {
|
||||
matches = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
if matches {
|
||||
if let Some(aid) = target_automation_id {
|
||||
if let Ok(local_aid) = element.CurrentAutomationId() {
|
||||
if local_aid.to_string() != *aid {
|
||||
matches = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if matches {
|
||||
return Some(element.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Parcourir les enfants
|
||||
if let Ok(first_child) = walker.GetFirstChildElement(element) {
|
||||
let mut current = first_child;
|
||||
loop {
|
||||
if let Some(found) = walk_and_find(
|
||||
walker,
|
||||
¤t,
|
||||
target_name,
|
||||
depth + 1,
|
||||
max_depth,
|
||||
target_control_type,
|
||||
target_automation_id,
|
||||
) {
|
||||
return Some(found);
|
||||
}
|
||||
match walker.GetNextSiblingElement(¤t) {
|
||||
Ok(next) => current = next,
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub fn capture_tree(_max_depth: u32) -> UiaResponse {
|
||||
let start = Instant::now();
|
||||
let _com = match ComGuard::new() {
|
||||
Ok(g) => g,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("CoInitializeEx: {}", e),
|
||||
code: "com_init_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let automation = match get_automation() {
|
||||
Ok(a) => a,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("CUIAutomation: {}", e),
|
||||
code: "automation_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let focused = unsafe { automation.GetFocusedElement() };
|
||||
match focused {
|
||||
Ok(el) => match element_to_struct(&el, true) {
|
||||
Ok(e) => UiaResponse::Ok {
|
||||
element: Some(e),
|
||||
elements: Vec::new(),
|
||||
elapsed_ms: start.elapsed().as_millis() as u64,
|
||||
},
|
||||
Err(e) => UiaResponse::Error {
|
||||
message: format!("element_to_struct: {}", e),
|
||||
code: "extract_failed".into(),
|
||||
},
|
||||
},
|
||||
Err(e) => UiaResponse::Error {
|
||||
message: format!("GetFocusedElement: {}", e),
|
||||
code: "focused_failed".into(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn health_check() -> UiaResponse {
|
||||
let _com = match ComGuard::new() {
|
||||
Ok(g) => g,
|
||||
Err(e) => {
|
||||
return UiaResponse::Unavailable {
|
||||
reason: format!("COM init failed: {}", e),
|
||||
}
|
||||
}
|
||||
};
|
||||
match get_automation() {
|
||||
Ok(_) => UiaResponse::Ok {
|
||||
element: None,
|
||||
elements: Vec::new(),
|
||||
elapsed_ms: 0,
|
||||
},
|
||||
Err(e) => UiaResponse::Unavailable {
|
||||
reason: format!("UIA not available: {}", e),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Stub Linux (pour développement et tests)
|
||||
// =========================================================================
|
||||
|
||||
#[cfg(not(windows))]
|
||||
mod uia_impl {
|
||||
use super::*;
|
||||
|
||||
pub fn query_at_point(_x: i32, _y: i32, _with_parents: bool) -> UiaResponse {
|
||||
UiaResponse::Unavailable {
|
||||
reason: "UIA n'est disponible que sur Windows".into(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn find_element(
|
||||
_name: Option<String>,
|
||||
_control_type: Option<String>,
|
||||
_automation_id: Option<String>,
|
||||
_window: Option<String>,
|
||||
_timeout_ms: u32,
|
||||
) -> UiaResponse {
|
||||
UiaResponse::Unavailable {
|
||||
reason: "UIA n'est disponible que sur Windows".into(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn capture_tree(_max_depth: u32) -> UiaResponse {
|
||||
UiaResponse::Unavailable {
|
||||
reason: "UIA n'est disponible que sur Windows".into(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn health_check() -> UiaResponse {
|
||||
UiaResponse::Unavailable {
|
||||
reason: "UIA n'est disponible que sur Windows".into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Main
|
||||
// =========================================================================
|
||||
|
||||
fn main() {
|
||||
let cli = Cli::parse();
|
||||
|
||||
let response = match cli.command {
|
||||
Commands::Query {
|
||||
x,
|
||||
y,
|
||||
with_parents,
|
||||
} => uia_impl::query_at_point(x, y, with_parents),
|
||||
Commands::Find {
|
||||
name,
|
||||
control_type,
|
||||
automation_id,
|
||||
window,
|
||||
timeout_ms,
|
||||
} => uia_impl::find_element(name, control_type, automation_id, window, timeout_ms),
|
||||
Commands::Capture { max_depth } => uia_impl::capture_tree(max_depth),
|
||||
Commands::Health => uia_impl::health_check(),
|
||||
};
|
||||
|
||||
// Sortie JSON sur stdout
|
||||
match serde_json::to_string(&response) {
|
||||
Ok(json) => println!("{}", json),
|
||||
Err(e) => {
|
||||
eprintln!("{{\"status\":\"error\",\"message\":\"JSON serialization: {}\"}}", e);
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -178,8 +178,41 @@ class EventCaptorV1:
|
||||
"timestamp": now,
|
||||
}
|
||||
self._inject_screen_metadata(event)
|
||||
# Capturer le snapshot UIA à la position du clic (si helper dispo)
|
||||
# Non-bloquant : si UIA échoue, l'event est enrichi uniquement
|
||||
# des données vision comme aujourd'hui.
|
||||
self._inject_uia_snapshot(event, x, y)
|
||||
self.on_event(event)
|
||||
|
||||
def _inject_uia_snapshot(self, event: dict, x: int, y: int) -> None:
|
||||
"""Ajouter un uia_snapshot à l'événement si le helper UIA est dispo.
|
||||
|
||||
Appelle lea_uia.exe query --x N --y N en ~10-20ms.
|
||||
Fallback silencieux si le helper n'est pas dispo ou échoue.
|
||||
"""
|
||||
try:
|
||||
from .uia_helper import get_shared_helper
|
||||
helper = get_shared_helper()
|
||||
if not helper.available:
|
||||
return
|
||||
element = helper.query_at(int(x), int(y), with_parents=True)
|
||||
if element is None:
|
||||
return
|
||||
event["uia_snapshot"] = {
|
||||
"name": element.name,
|
||||
"control_type": element.control_type,
|
||||
"class_name": element.class_name,
|
||||
"automation_id": element.automation_id,
|
||||
"bounding_rect": list(element.bounding_rect),
|
||||
"is_enabled": element.is_enabled,
|
||||
"is_offscreen": element.is_offscreen,
|
||||
"parent_path": element.parent_path,
|
||||
}
|
||||
except Exception as e:
|
||||
# Non bloquant — on continue sans UIA
|
||||
import logging
|
||||
logging.getLogger(__name__).debug(f"UIA snapshot skip: {e}")
|
||||
|
||||
def _on_scroll(self, x, y, dx, dy):
|
||||
event = {
|
||||
"type": "mouse_scroll",
|
||||
|
||||
@@ -17,6 +17,7 @@ import base64
|
||||
import hashlib
|
||||
import io
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
import logging
|
||||
|
||||
@@ -72,6 +73,12 @@ class ActionExecutorV1:
|
||||
# different de celui qui utilise l'instance).
|
||||
self._sct = None
|
||||
self.running = True
|
||||
# ── Verrou de sérialisation replay ──
|
||||
# Garantit qu'UNE SEULE action de replay s'exécute à la fois.
|
||||
# Sans ce lock, deux threads (polling main.py + lea_ui) peuvent
|
||||
# consommer deux actions simultanément → race condition + mss
|
||||
# thread-unsafe retourne des résolutions fantômes (1024x768).
|
||||
self._replay_lock = threading.Lock()
|
||||
# Backoff exponentiel pour le polling replay (evite de marteler le serveur)
|
||||
self._poll_backoff = 1.0 # Delai actuel (secondes)
|
||||
self._poll_backoff_min = 1.0 # Delai minimal (reset apres succes)
|
||||
@@ -98,20 +105,43 @@ class ActionExecutorV1:
|
||||
|
||||
@property
|
||||
def notifier(self):
|
||||
"""Instance NotificationManager paresseuse."""
|
||||
"""Instance NotificationManager paresseuse.
|
||||
|
||||
Retourne un objet avec des méthodes no-op si NotificationManager
|
||||
n'est pas disponible (tkinter / plyer absents), pour que l'executor
|
||||
ne plante jamais à cause de l'UI.
|
||||
"""
|
||||
if self._notification_manager is None:
|
||||
try:
|
||||
from ..ui.notifications import NotificationManager
|
||||
self._notification_manager = NotificationManager()
|
||||
except Exception as e:
|
||||
logger.debug(f"NotificationManager indisponible : {e}")
|
||||
# Retourner un objet factice qui ne fait rien
|
||||
# Retourner un objet factice qui ne fait rien — couvre toutes
|
||||
# les méthodes possibles via __getattr__.
|
||||
class _Noop:
|
||||
def replay_target_not_found(self, *a, **kw):
|
||||
return False
|
||||
def __getattr__(self, name):
|
||||
return lambda *a, **kw: False
|
||||
self._notification_manager = _Noop()
|
||||
return self._notification_manager
|
||||
|
||||
@property
|
||||
def activity_panel(self):
|
||||
"""Instance ActivityPanel paresseuse (singleton).
|
||||
|
||||
Fallback silencieux si le panel ne peut pas être créé.
|
||||
"""
|
||||
try:
|
||||
from ..ui.activity_panel import get_activity_panel
|
||||
return get_activity_panel()
|
||||
except Exception as e:
|
||||
logger.debug(f"ActivityPanel indisponible : {e}")
|
||||
|
||||
class _Noop:
|
||||
def __getattr__(self, name):
|
||||
return lambda *a, **kw: None
|
||||
return _Noop()
|
||||
|
||||
def _auth_headers(self) -> dict:
|
||||
"""Headers d'authentification Bearer pour les requetes au serveur."""
|
||||
if self._api_token:
|
||||
@@ -241,6 +271,128 @@ class ActionExecutorV1:
|
||||
logger.warning(f"Acteur gemma4 indisponible : {e}")
|
||||
return "EXECUTER"
|
||||
|
||||
# =========================================================================
|
||||
# UIA local — résolution via lea_uia.exe (helper Rust)
|
||||
# =========================================================================
|
||||
|
||||
def _resolve_via_uia_local(
|
||||
self, uia_target: dict, screen_width: int, screen_height: int,
|
||||
):
|
||||
"""Résoudre une cible via UIA local (lea_uia.exe).
|
||||
|
||||
Le plan V4 contient un uia_target (nom, control_type, parent_path).
|
||||
On appelle le helper Rust qui interroge UIAutomationCore.dll et
|
||||
retourne les coordonnées pixel-perfect de l'élément.
|
||||
|
||||
STRICT : si l'élément trouvé n'appartient pas à la bonne fenêtre
|
||||
parente (comparaison du parent_path), on REFUSE — sinon on clique
|
||||
au mauvais endroit (ex: 'Rechercher' de la taskbar au lieu de
|
||||
l'explorateur).
|
||||
|
||||
Retourne (x_pct, y_pct) si trouvé ET validé, None sinon.
|
||||
"""
|
||||
try:
|
||||
from .uia_helper import get_shared_helper
|
||||
helper = get_shared_helper()
|
||||
if not helper.available:
|
||||
return None
|
||||
|
||||
name = uia_target.get("name", "")
|
||||
control_type = uia_target.get("control_type", "") or None
|
||||
automation_id = uia_target.get("automation_id", "") or None
|
||||
expected_parent_path = uia_target.get("parent_path", []) or []
|
||||
|
||||
if not name:
|
||||
return None
|
||||
|
||||
element = helper.find_by_name(
|
||||
name=name,
|
||||
control_type=control_type,
|
||||
automation_id=automation_id,
|
||||
timeout_ms=1500,
|
||||
)
|
||||
if element is None or not element.is_clickable():
|
||||
logger.debug(f"UIA: '{name}' non trouvé ou non cliquable")
|
||||
return None
|
||||
|
||||
# ── VÉRIFICATION STRICTE du parent_path ──
|
||||
# Si l'élément a été enregistré dans une fenêtre spécifique,
|
||||
# il doit être trouvé dans la MÊME fenêtre au replay.
|
||||
# Sinon on clique sur un homonyme dans une autre app.
|
||||
if expected_parent_path:
|
||||
expected_root = None
|
||||
for p in expected_parent_path:
|
||||
if p.get("control_type", "").lower() in ("fenêtre", "window"):
|
||||
expected_root = p.get("name", "").strip()
|
||||
break
|
||||
|
||||
if expected_root:
|
||||
found_root = None
|
||||
for p in element.parent_path:
|
||||
if p.get("control_type", "").lower() in ("fenêtre", "window"):
|
||||
found_root = p.get("name", "").strip()
|
||||
break
|
||||
|
||||
if found_root and expected_root != found_root:
|
||||
# Match souple : même app (ex: "Bloc-notes")
|
||||
# Le titre peut changer (fichier différent) mais
|
||||
# l'app est la même → "Fichier" est au bon endroit.
|
||||
def _app_from(t):
|
||||
for s in [" – ", " - ", " — "]:
|
||||
if s in t:
|
||||
return t.split(s)[-1].strip().lower()
|
||||
return t.strip().lower()
|
||||
|
||||
same_app = _app_from(expected_root) == _app_from(found_root)
|
||||
substring_match = (
|
||||
expected_root.lower() in found_root.lower()
|
||||
or found_root.lower() in expected_root.lower()
|
||||
)
|
||||
if not same_app and not substring_match:
|
||||
logger.warning(
|
||||
f"UIA REJET : '{name}' trouvé dans '{found_root}' "
|
||||
f"mais attendu dans '{expected_root}'"
|
||||
)
|
||||
print(
|
||||
f" [UIA] REJET — '{name}' dans mauvaise app "
|
||||
f"({_app_from(found_root)} ≠ {_app_from(expected_root)})"
|
||||
)
|
||||
return None
|
||||
|
||||
# ── GARDE : rejeter les éléments géants (conteneurs) ──
|
||||
# Un élément qui couvre >50% de l'écran est un conteneur
|
||||
# (Bureau, Rechercher, liste), pas un bouton cliquable.
|
||||
# Cliquer au centre d'un conteneur = clic aveugle.
|
||||
elem_w = element.width()
|
||||
elem_h = element.height()
|
||||
screen_area = screen_width * screen_height
|
||||
elem_area = elem_w * elem_h
|
||||
if screen_area > 0 and elem_area / screen_area > 0.5:
|
||||
logger.warning(
|
||||
f"UIA REJET : '{name}' couvre {elem_area / screen_area * 100:.0f}% "
|
||||
f"de l'écran ({elem_w}x{elem_h}) — conteneur, pas un élément cliquable"
|
||||
)
|
||||
print(
|
||||
f" [UIA] REJET — '{name}' trop grand "
|
||||
f"({elem_w}x{elem_h}, {elem_area / screen_area * 100:.0f}% écran)"
|
||||
)
|
||||
return None
|
||||
|
||||
cx, cy = element.center()
|
||||
if screen_width <= 0 or screen_height <= 0:
|
||||
return None
|
||||
|
||||
x_pct = cx / screen_width
|
||||
y_pct = cy / screen_height
|
||||
if not (0.0 <= x_pct <= 1.0 and 0.0 <= y_pct <= 1.0):
|
||||
return None
|
||||
|
||||
return (x_pct, y_pct)
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"UIA local resolve erreur : {e}")
|
||||
return None
|
||||
|
||||
# =========================================================================
|
||||
# Observer — pré-analyse écran avant chaque action
|
||||
# =========================================================================
|
||||
@@ -385,10 +537,63 @@ class ActionExecutorV1:
|
||||
"visual_resolved": False,
|
||||
}
|
||||
|
||||
# ── Bloc conditionnel : skip si le dialogue n'est pas apparu ──
|
||||
# Les actions marquées conditional_on_window ne s'exécutent que
|
||||
# si la fenêtre attendue est effectivement présente. Sinon → skip.
|
||||
# Ex: Ctrl+S a sauvé silencieusement → pas de "Enregistrer sous"
|
||||
# → les clics dans le dialogue sont skippés automatiquement.
|
||||
cond_window = action.get("conditional_on_window")
|
||||
if cond_window:
|
||||
try:
|
||||
from ..window_info_crossplatform import get_active_window_info
|
||||
current_info = get_active_window_info()
|
||||
current_title = current_info.get("title", "")
|
||||
|
||||
# Comparaison souple (sous-chaîne)
|
||||
cond_lower = cond_window.lower()
|
||||
current_lower = current_title.lower() if current_title else ""
|
||||
match = (
|
||||
cond_lower in current_lower
|
||||
or current_lower in cond_lower
|
||||
)
|
||||
if not match:
|
||||
logger.info(
|
||||
f"[CONDITIONNEL] Skip action {action_id} — "
|
||||
f"dialogue '{cond_window}' absent "
|
||||
f"(fenêtre actuelle: '{current_title}')"
|
||||
)
|
||||
print(
|
||||
f" [SKIP] Dialogue '{cond_window}' absent → action skippée"
|
||||
)
|
||||
result["success"] = True
|
||||
result["warning"] = "conditional_skipped"
|
||||
return result
|
||||
else:
|
||||
logger.info(
|
||||
f"[CONDITIONNEL] Dialogue '{cond_window}' présent → exécution"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"Vérif conditionnelle échouée : {e}")
|
||||
|
||||
# ── Délai inter-actions (anti race condition mss) ──
|
||||
wait_before = action.get("wait_before", 0.5)
|
||||
if wait_before > 0:
|
||||
time.sleep(wait_before)
|
||||
|
||||
try:
|
||||
monitor = self.sct.monitors[1]
|
||||
width, height = monitor["width"], monitor["height"]
|
||||
|
||||
# ── Diagnostic résolution ──
|
||||
logger.info(
|
||||
f"[REPLAY] Action {action_id} ({action_type}) — "
|
||||
f"écran replay: {width}x{height}, "
|
||||
f"x_pct={action.get('x_pct', 0):.4f}, "
|
||||
f"y_pct={action.get('y_pct', 0):.4f} "
|
||||
f"→ pixel ({int(action.get('x_pct', 0) * width)}, "
|
||||
f"{int(action.get('y_pct', 0) * height)})"
|
||||
)
|
||||
|
||||
# Resolution visuelle des coordonnees si demande
|
||||
x_pct = action.get("x_pct", 0.0)
|
||||
y_pct = action.get("y_pct", 0.0)
|
||||
@@ -403,36 +608,119 @@ class ActionExecutorV1:
|
||||
# ── Pré-vérification : titre fenêtre ──
|
||||
# Vérifier que l'écran est dans l'état attendu AVANT de cliquer.
|
||||
if visual_mode and target_spec:
|
||||
expected_title = target_spec.get("window_title", "")
|
||||
# Le champ explicite `expected_window_before` a priorité
|
||||
# (il vient du plan V4 et indique la fenêtre STRICTEMENT
|
||||
# attendue avant l'action). Sinon fallback sur target_spec.
|
||||
expected_title = (
|
||||
action.get("expected_window_before", "")
|
||||
or target_spec.get("window_title", "")
|
||||
)
|
||||
if expected_title and expected_title != "unknown_window":
|
||||
from ..window_info_crossplatform import get_active_window_info
|
||||
current_info = get_active_window_info()
|
||||
current_title = current_info.get("title", "")
|
||||
from ..ui.messages import est_fenetre_lea, est_fenetre_bruit
|
||||
|
||||
current_app = _app_name(current_title)
|
||||
expected_app = _app_name(expected_title)
|
||||
title_match = (
|
||||
current_app == expected_app
|
||||
or expected_title.lower() in current_title.lower()
|
||||
or current_title.lower() in expected_title.lower()
|
||||
)
|
||||
# Ignorer la fenêtre de Léa elle-même (overlay agent)
|
||||
_lea_windows = ("léa", "lea —", "léa —", "lea -", "léa -", "lea assistante", "léa assistante")
|
||||
is_lea_window = any(p in current_title.lower() for p in _lea_windows)
|
||||
# Polling court pour laisser le temps à la fenêtre de
|
||||
# se stabiliser (évite les faux négatifs sur transitions
|
||||
# rapides : menu qui se ferme, taskbar qui perd le focus, etc.)
|
||||
current_title = ""
|
||||
title_match = False
|
||||
is_lea_window = False
|
||||
for attempt in range(5):
|
||||
current_info = get_active_window_info()
|
||||
current_title = current_info.get("title", "")
|
||||
|
||||
if not title_match and not is_lea_window:
|
||||
logger.warning(
|
||||
f"PRÉ-VÉRIF ÉCHOUÉE : attendu '{expected_title}', "
|
||||
f"actuel '{current_title}' — STOP"
|
||||
# Si on tombe sur Léa elle-même → on attend un peu
|
||||
if est_fenetre_lea(current_title):
|
||||
is_lea_window = True
|
||||
time.sleep(0.3)
|
||||
continue
|
||||
|
||||
# Bruit système (systray overflow, taskbar, etc.)
|
||||
# → on attend que la vraie fenêtre reprenne le focus
|
||||
if est_fenetre_bruit(current_title):
|
||||
time.sleep(0.3)
|
||||
continue
|
||||
|
||||
current_app = _app_name(current_title)
|
||||
expected_app = _app_name(expected_title)
|
||||
title_match = (
|
||||
current_app == expected_app
|
||||
or expected_title.lower() in current_title.lower()
|
||||
or current_title.lower() in expected_title.lower()
|
||||
)
|
||||
print(f" [PRÉ-VÉRIF] STOP — fenêtre '{current_title}' ≠ attendu '{expected_title}'")
|
||||
result["success"] = False
|
||||
result["error"] = f"Fenêtre incorrecte: '{current_title}' (attendu: '{expected_title}')"
|
||||
return result
|
||||
elif is_lea_window:
|
||||
logger.info(f"PRÉ-VÉRIF : fenêtre Léa détectée, ignorée — on continue")
|
||||
if title_match:
|
||||
break
|
||||
# Sinon on retente un peu au cas où la fenêtre
|
||||
# est en cours de transition
|
||||
time.sleep(0.3)
|
||||
|
||||
if not title_match:
|
||||
if is_lea_window:
|
||||
# Si après 5 essais on est encore sur Léa,
|
||||
# on ignore (l'utilisateur a Léa au premier plan)
|
||||
logger.info("[LEA] Fenêtre de Léa persistante — ignorée, on continue")
|
||||
elif not current_title or current_title == "unknown_window":
|
||||
# unknown_window persistant : on continue avec un
|
||||
# warning, UIA décidera peut-être
|
||||
logger.warning(
|
||||
f"[LEA] Fenêtre active inconnue — on tente quand même"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"[LEA] Fenêtre incorrecte : attendu '{expected_title}', "
|
||||
f"actuel '{current_title}'"
|
||||
)
|
||||
print(
|
||||
f" [PRÉ-VÉRIF] Fenêtre '{current_title}' ≠ "
|
||||
f"attendu '{expected_title}' → mode apprentissage"
|
||||
)
|
||||
try:
|
||||
self.notifier.replay_wrong_window(current_title, expected_title)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Mode apprentissage : la fenêtre attendue n'est
|
||||
# pas là. Soit l'action précédente a changé l'état
|
||||
# (ex: Ctrl+S a sauvé sans dialogue), soit l'app
|
||||
# est dans un état différent. L'humain montre.
|
||||
human_actions = self._capture_human_correction(
|
||||
timeout_s=120,
|
||||
)
|
||||
if human_actions:
|
||||
result["success"] = True
|
||||
result["resolution_method"] = "human_supervised"
|
||||
result["warning"] = "human_supervised_wrong_window"
|
||||
last_click = None
|
||||
for ha in reversed(human_actions):
|
||||
if ha.get("type") == "click":
|
||||
last_click = ha
|
||||
break
|
||||
if last_click:
|
||||
result["actual_position"] = {
|
||||
"x_pct": last_click["x_pct"],
|
||||
"y_pct": last_click["y_pct"],
|
||||
}
|
||||
result["correction"] = {
|
||||
"actions": human_actions,
|
||||
"action_count": len(human_actions),
|
||||
"last_click": last_click,
|
||||
"trigger": "wrong_window",
|
||||
"expected_window": expected_title,
|
||||
"actual_window": current_title,
|
||||
}
|
||||
else:
|
||||
# Timeout ou pas d'action → skipper cette action
|
||||
# L'état est peut-être déjà correct (ex: Ctrl+S
|
||||
# a sauvé sans dialogue → action de dialogue inutile)
|
||||
result["success"] = True
|
||||
result["warning"] = "wrong_window_skipped"
|
||||
logger.info(
|
||||
f"[LEA] Wrong window sans correction → skip "
|
||||
f"(l'état est peut-être déjà atteint)"
|
||||
)
|
||||
return result
|
||||
else:
|
||||
logger.info(f"PRÉ-VÉRIF OK : '{current_title}'")
|
||||
logger.info(f"[LEA] Pré-vérif OK : '{current_title}'")
|
||||
|
||||
# ── OBSERVER : pré-analyse écran avant résolution ──
|
||||
# Détecte popups, dialogues, états inattendus AVANT de chercher la cible.
|
||||
@@ -476,8 +764,29 @@ class ActionExecutorV1:
|
||||
return result
|
||||
# EXECUTER → continuer normalement
|
||||
|
||||
if visual_mode and target_spec and server_url:
|
||||
# ── GROUNDING : localisation pure via GroundingEngine ──
|
||||
# ── V4 : UIA local (si resolve_order l'indique et helper dispo) ──
|
||||
# Court-circuite le grounding serveur pour les clicks sur Windows natif.
|
||||
# 10-20ms au lieu de 2-5s pour un clic — c'est le cœur du V4.
|
||||
uia_resolved = False
|
||||
if visual_mode and target_spec and action_type == "click":
|
||||
resolve_order = target_spec.get("resolve_order", [])
|
||||
uia_target = target_spec.get("uia_target")
|
||||
if resolve_order and resolve_order[0] == "uia" and uia_target:
|
||||
uia_coords = self._resolve_via_uia_local(uia_target, width, height)
|
||||
if uia_coords:
|
||||
x_pct, y_pct = uia_coords
|
||||
result["visual_resolved"] = True
|
||||
result["resolution_method"] = "v4_uia_local"
|
||||
result["resolution_score"] = 0.95
|
||||
uia_resolved = True
|
||||
print(f" [UIA] résolu en local: ({x_pct:.4f}, {y_pct:.4f})")
|
||||
logger.info(
|
||||
f"V4 UIA local OK : {uia_target.get('name', '?')} "
|
||||
f"→ ({x_pct:.4f}, {y_pct:.4f})"
|
||||
)
|
||||
|
||||
if not uia_resolved and visual_mode and target_spec and server_url:
|
||||
# ── GROUNDING : localisation pure via GroundingEngine (fallback) ──
|
||||
from .grounding import GroundingEngine
|
||||
grounding = GroundingEngine(self)
|
||||
grounding_result = grounding.locate(
|
||||
@@ -509,8 +818,8 @@ class ActionExecutorV1:
|
||||
|
||||
if action_type == "click":
|
||||
# Si visual_mode est activé, le resolve DOIT réussir.
|
||||
# Pas de fallback blind — on arrête le replay si la cible
|
||||
# n'est pas trouvée visuellement. C'est un RPA VISUEL.
|
||||
# Pas de fallback blind — Léa VOIT l'écran et CHERCHE
|
||||
# l'élément. Si toute la cascade échoue → pause supervisée.
|
||||
if visual_mode and not result.get("visual_resolved"):
|
||||
# ── Policy : décider quoi faire quand grounding échoue ──
|
||||
from .policy import PolicyEngine, Decision
|
||||
@@ -532,7 +841,6 @@ class ActionExecutorV1:
|
||||
)
|
||||
|
||||
if policy_decision.decision == Decision.RETRY:
|
||||
# Re-tenter le grounding après correction (popup fermée, etc.)
|
||||
resolved2 = self._resolve_target_visual(
|
||||
server_url, target_spec, x_pct, y_pct, width, height
|
||||
)
|
||||
@@ -542,14 +850,48 @@ class ActionExecutorV1:
|
||||
result["visual_resolved"] = True
|
||||
print(f" [POLICY] Re-resolve OK après {policy_decision.action_taken}")
|
||||
else:
|
||||
# Re-resolve échoué — SUPERVISE (rendre la main)
|
||||
result["success"] = False
|
||||
result["error"] = "target_not_found"
|
||||
result["target_description"] = target_desc
|
||||
result["target_spec"] = target_spec
|
||||
result["screenshot"] = self._capture_screenshot_b64()
|
||||
result["warning"] = "visual_resolve_failed"
|
||||
self.notifier.replay_target_not_found(target_desc)
|
||||
# Retry échoué → mode apprentissage
|
||||
# Léa a tout essayé (UIA, template, VLM, retry)
|
||||
# et ne trouve toujours pas. L'humain doit montrer.
|
||||
print(f" [POLICY] Retry échoué → mode apprentissage")
|
||||
try:
|
||||
self.notifier.replay_target_not_found(
|
||||
target_desc,
|
||||
target_spec.get("window_title", ""),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
human_actions = self._capture_human_correction(
|
||||
timeout_s=120,
|
||||
)
|
||||
if human_actions:
|
||||
result["success"] = True
|
||||
result["resolution_method"] = "human_supervised"
|
||||
result["warning"] = "human_supervised_after_retry_failed"
|
||||
last_click = None
|
||||
for ha in reversed(human_actions):
|
||||
if ha.get("type") == "click":
|
||||
last_click = ha
|
||||
break
|
||||
if last_click:
|
||||
result["actual_position"] = {
|
||||
"x_pct": last_click["x_pct"],
|
||||
"y_pct": last_click["y_pct"],
|
||||
}
|
||||
result["correction"] = {
|
||||
"actions": human_actions,
|
||||
"action_count": len(human_actions),
|
||||
"last_click": last_click,
|
||||
"trigger": "retry_failed",
|
||||
}
|
||||
else:
|
||||
result["success"] = False
|
||||
result["error"] = "target_not_found"
|
||||
result["target_description"] = target_desc
|
||||
result["target_spec"] = target_spec
|
||||
result["screenshot"] = self._capture_screenshot_b64()
|
||||
result["warning"] = "visual_resolve_failed"
|
||||
return result
|
||||
|
||||
elif policy_decision.decision == Decision.SKIP:
|
||||
@@ -560,18 +902,61 @@ class ActionExecutorV1:
|
||||
elif policy_decision.decision == Decision.ABORT:
|
||||
result["success"] = False
|
||||
result["error"] = f"policy_abort:{target_desc}"
|
||||
self.notifier.replay_target_not_found(target_desc)
|
||||
self.notifier.replay_target_not_found(
|
||||
target_desc,
|
||||
target_spec.get("window_title", ""),
|
||||
)
|
||||
return result
|
||||
|
||||
else: # SUPERVISE ou CONTINUE
|
||||
result["success"] = False
|
||||
result["error"] = "target_not_found"
|
||||
result["target_description"] = target_desc
|
||||
result["target_spec"] = target_spec
|
||||
result["screenshot"] = self._capture_screenshot_b64()
|
||||
result["warning"] = "visual_resolve_failed"
|
||||
self.notifier.replay_target_not_found(target_desc)
|
||||
return result
|
||||
else: # SUPERVISE → mode apprentissage
|
||||
# Léa est perdue. Au lieu de s'arrêter, elle
|
||||
# passe en mode capture et enregistre ce que
|
||||
# l'humain fait (mini-workflow de correction).
|
||||
try:
|
||||
self.notifier.replay_target_not_found(
|
||||
target_desc,
|
||||
target_spec.get("window_title", ""),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
human_actions = self._capture_human_correction(
|
||||
timeout_s=120,
|
||||
)
|
||||
if human_actions:
|
||||
# L'humain a montré un mini-workflow
|
||||
result["success"] = True
|
||||
result["resolution_method"] = "human_supervised"
|
||||
result["warning"] = "human_supervised"
|
||||
# Stocker le dernier clic comme position résolue
|
||||
last_click = None
|
||||
for ha in reversed(human_actions):
|
||||
if ha.get("type") == "click":
|
||||
last_click = ha
|
||||
break
|
||||
if last_click:
|
||||
result["actual_position"] = {
|
||||
"x_pct": last_click["x_pct"],
|
||||
"y_pct": last_click["y_pct"],
|
||||
}
|
||||
# Envoyer toute la correction au serveur
|
||||
result["correction"] = {
|
||||
"actions": human_actions,
|
||||
"action_count": len(human_actions),
|
||||
"last_click": last_click,
|
||||
}
|
||||
logger.info(
|
||||
f"[APPRENTISSAGE] Correction reçue : "
|
||||
f"{len(human_actions)} actions — je m'en souviendrai."
|
||||
)
|
||||
else:
|
||||
# Timeout — l'humain n'a pas répondu
|
||||
result["success"] = False
|
||||
result["error"] = "target_not_found"
|
||||
result["target_description"] = target_desc
|
||||
result["target_spec"] = target_spec
|
||||
result["screenshot"] = self._capture_screenshot_b64()
|
||||
result["warning"] = "visual_resolve_failed"
|
||||
|
||||
real_x = int(x_pct * width)
|
||||
real_y = int(y_pct * height)
|
||||
@@ -582,6 +967,15 @@ class ActionExecutorV1:
|
||||
f"({real_x}, {real_y}) sur ({width}x{height}), bouton={button}"
|
||||
)
|
||||
self._click((real_x, real_y), button)
|
||||
# Phase 1 apprentissage : exposer les coordonnées RÉSOLUES
|
||||
# utilisées pour le clic. Le serveur (/replay/result) les lit
|
||||
# directement comme source de vérité pour la mémoire.
|
||||
# On donne des percentages car la mémoire est indépendante
|
||||
# de la résolution écran du client.
|
||||
result["actual_position"] = {
|
||||
"x_pct": float(x_pct),
|
||||
"y_pct": float(y_pct),
|
||||
}
|
||||
logger.info(
|
||||
f"Replay click [{mode}] : ({x_pct:.3f}, {y_pct:.3f}) -> "
|
||||
f"({real_x}, {real_y}) sur ({width}x{height})"
|
||||
@@ -615,7 +1009,29 @@ class ActionExecutorV1:
|
||||
else:
|
||||
print(f" [POST-VÉRIF] TIMEOUT {max_wait}s — '{post_title}' ≠ '{expected_after}'")
|
||||
logger.warning(f"POST-VÉRIF TIMEOUT : '{post_title}' ≠ '{expected_after}'")
|
||||
result["warning"] = f"post_verif_timeout:{post_title}"
|
||||
# Contrôle strict : si success_strict, on STOP.
|
||||
# Sinon on continue avec un warning (legacy).
|
||||
is_strict = bool(action.get("success_strict"))
|
||||
if is_strict:
|
||||
result["success"] = False
|
||||
result["error"] = (
|
||||
f"Post-vérif échouée : fenêtre '{post_title}' "
|
||||
f"au lieu de '{expected_after}'"
|
||||
)
|
||||
result["warning"] = "wrong_window"
|
||||
print(
|
||||
f" [POST-VÉRIF] STOP STRICT — l'étape ne s'est "
|
||||
f"pas déroulée comme prévu, arrêt du replay"
|
||||
)
|
||||
try:
|
||||
self.notifier.replay_wrong_window(
|
||||
post_title, expected_after,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
return result
|
||||
else:
|
||||
result["warning"] = f"post_verif_timeout:{post_title}"
|
||||
else:
|
||||
print(f" [CLICK] Terminé.")
|
||||
|
||||
@@ -710,28 +1126,66 @@ class ActionExecutorV1:
|
||||
hash_before, timeout_ms=3000
|
||||
)
|
||||
if not screen_changed:
|
||||
# ── Recovery : tenter un rollback si l'action n'a pas eu d'effet ──
|
||||
from .recovery import RecoveryEngine
|
||||
recovery = RecoveryEngine(self)
|
||||
recovery_result = recovery.attempt(
|
||||
failed_action=action,
|
||||
critic_detail="L'écran n'a pas changé après l'action",
|
||||
)
|
||||
if recovery_result.success:
|
||||
print(f" [RECOVERY] {recovery_result.detail}")
|
||||
result["recovery"] = recovery_result.to_dict()
|
||||
|
||||
result["success"] = False
|
||||
result["warning"] = "no_screen_change"
|
||||
result["error"] = "Ecran inchange apres l'action"
|
||||
print(
|
||||
f" [ECHEC] Ecran inchange apres {action_type} — "
|
||||
f"l'action n'a pas eu d'effet visible"
|
||||
)
|
||||
logger.warning(
|
||||
f"Action {action_id} ({action_type}) : ecran inchange "
|
||||
f"— action sans effet visible"
|
||||
f"[LEA] Écran inchangé après {action_type} "
|
||||
f"(action_id={action_id}) — pas d'effet visible"
|
||||
)
|
||||
|
||||
# ── Mode apprentissage : clic sans effet = mauvais clic ──
|
||||
# Si l'action était un clic visuel, l'écran inchangé prouve
|
||||
# que le grounding a cliqué au mauvais endroit. Au lieu de
|
||||
# passer silencieusement à la suite, Léa demande à l'humain.
|
||||
if action_type == "click" and visual_mode:
|
||||
print(
|
||||
f" [ECHEC] Clic sans effet — "
|
||||
f"je demande de l'aide"
|
||||
)
|
||||
try:
|
||||
self.notifier.replay_no_screen_change(action_type)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
human_actions = self._capture_human_correction(
|
||||
timeout_s=120,
|
||||
)
|
||||
if human_actions:
|
||||
result["success"] = True
|
||||
result["resolution_method"] = "human_supervised"
|
||||
result["warning"] = "human_supervised_after_no_change"
|
||||
last_click = None
|
||||
for ha in reversed(human_actions):
|
||||
if ha.get("type") == "click":
|
||||
last_click = ha
|
||||
break
|
||||
if last_click:
|
||||
result["actual_position"] = {
|
||||
"x_pct": last_click["x_pct"],
|
||||
"y_pct": last_click["y_pct"],
|
||||
}
|
||||
result["correction"] = {
|
||||
"actions": human_actions,
|
||||
"action_count": len(human_actions),
|
||||
"last_click": last_click,
|
||||
"trigger": "no_screen_change",
|
||||
}
|
||||
else:
|
||||
# Timeout — l'humain n'a pas répondu
|
||||
result["success"] = False
|
||||
result["warning"] = "no_screen_change"
|
||||
result["error"] = "Ecran inchange apres l'action"
|
||||
else:
|
||||
# Actions non-visuelles : comportement existant
|
||||
result["success"] = False
|
||||
result["warning"] = "no_screen_change"
|
||||
result["error"] = "Ecran inchange apres l'action"
|
||||
print(
|
||||
f" [ECHEC] Ecran inchange apres {action_type} — "
|
||||
f"l'action n'a pas eu d'effet visible"
|
||||
)
|
||||
try:
|
||||
self.notifier.replay_no_screen_change(action_type)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
print(f" [OK] Changement d'ecran detecte apres {action_type}")
|
||||
else:
|
||||
@@ -1195,15 +1649,24 @@ Example: x_pct=0.50, y_pct=0.30"""
|
||||
2. Execute l'action (clic, texte, etc.)
|
||||
3. POST /replay/result avec le resultat + screenshot
|
||||
|
||||
Args:
|
||||
session_id: Identifiant de la session courante
|
||||
server_url: URL de base du serveur streaming
|
||||
machine_id: Identifiant de la machine (pour le replay multi-machine)
|
||||
Sérialisé par _replay_lock — une seule action à la fois.
|
||||
Sans ce lock, deux threads concurrents consomment deux actions
|
||||
et mss retourne des résolutions fantômes (thread-unsafe).
|
||||
|
||||
Retourne True si une action a ete executee, False sinon.
|
||||
IMPORTANT: Si une action est recue, le resultat est TOUJOURS rapporte
|
||||
au serveur (meme en cas d'erreur d'execution).
|
||||
"""
|
||||
# Sérialisation stricte : si un autre thread exécute déjà une
|
||||
# action, on abandonne ce poll immédiatement (pas de file d'attente).
|
||||
if not self._replay_lock.acquire(blocking=False):
|
||||
return False
|
||||
|
||||
try:
|
||||
return self._poll_and_execute_inner(session_id, server_url, machine_id)
|
||||
finally:
|
||||
self._replay_lock.release()
|
||||
|
||||
def _poll_and_execute_inner(self, session_id: str, server_url: str, machine_id: str) -> bool:
|
||||
"""Implémentation interne de poll_and_execute (protégée par _replay_lock)."""
|
||||
import requests
|
||||
|
||||
replay_next_url = f"{server_url}/traces/stream/replay/next"
|
||||
@@ -1277,11 +1740,14 @@ Example: x_pct=0.50, y_pct=0.30"""
|
||||
print(f">>> ERREUR EXECUTION : {e}")
|
||||
logger.error(f"Erreur execute_replay_action: {e}")
|
||||
import traceback
|
||||
tb_str = traceback.format_exc()
|
||||
traceback.print_exc()
|
||||
result = {
|
||||
"action_id": action_id,
|
||||
"success": False,
|
||||
"error": f"Exception executor: {e}",
|
||||
# Inclure le traceback complet pour diagnostiquer
|
||||
# les crashes côté agent depuis les logs serveur
|
||||
"error": f"{e}\n---TRACEBACK---\n{tb_str[-500:]}",
|
||||
"screenshot": None,
|
||||
}
|
||||
|
||||
@@ -1298,9 +1764,13 @@ Example: x_pct=0.50, y_pct=0.30"""
|
||||
"resolution_method": result.get("resolution_method"),
|
||||
"resolution_score": result.get("resolution_score"),
|
||||
"resolution_elapsed_ms": result.get("resolution_elapsed_ms"),
|
||||
# Coordonnées RÉSOLUES effectivement cliquées (Phase 1 apprentissage)
|
||||
"actual_position": result.get("actual_position"),
|
||||
# Champs enrichis pour target_not_found (pause supervisée)
|
||||
"target_description": result.get("target_description"),
|
||||
"target_spec": result.get("target_spec"),
|
||||
# Correction humaine (mode apprentissage supervisé)
|
||||
"correction": result.get("correction"),
|
||||
}
|
||||
try:
|
||||
resp2 = requests.post(
|
||||
@@ -1783,6 +2253,159 @@ Example: x_pct=0.50, y_pct=0.30"""
|
||||
|
||||
logger.debug(f"Texte saisi char-by-char ({len(text)} chars)")
|
||||
|
||||
# =========================================================================
|
||||
# Mode apprentissage — l'humain montre, Léa apprend
|
||||
# =========================================================================
|
||||
|
||||
# Hotkey pour signaler la fin de la correction humaine
|
||||
_LEARNING_DONE_HOTKEY = {Key.ctrl_l, Key.shift, KeyCode.from_char("l")}
|
||||
|
||||
def _capture_human_correction(self, timeout_s: float = 120.0) -> list[dict]:
|
||||
"""Capturer un mini-workflow de correction humaine.
|
||||
|
||||
Léa est perdue — elle passe en mode capture et enregistre
|
||||
TOUTES les actions de l'humain (clics, frappes, combos)
|
||||
jusqu'à ce que l'humain signale qu'il a fini :
|
||||
- Ctrl+Shift+L (hotkey)
|
||||
- Ou timeout d'inactivité (10s sans action)
|
||||
- Ou timeout global (120s)
|
||||
|
||||
Retourne la liste des actions capturées (peut être vide si timeout).
|
||||
C'est un mini-workflow, pas juste un clic.
|
||||
"""
|
||||
done_event = threading.Event()
|
||||
actions: list[dict] = []
|
||||
last_action_time = [time.time()]
|
||||
keys_pressed: set = set()
|
||||
INACTIVITY_TIMEOUT = 10.0 # secondes
|
||||
|
||||
monitor = self.sct.monitors[1]
|
||||
screen_w, screen_h = monitor["width"], monitor["height"]
|
||||
|
||||
def _on_click(x, y, button, pressed):
|
||||
if done_event.is_set():
|
||||
return False
|
||||
if pressed and button.name in ("left", "right"):
|
||||
action = {
|
||||
"type": "click",
|
||||
"x_pct": round(x / screen_w, 6),
|
||||
"y_pct": round(y / screen_h, 6),
|
||||
"button": button.name,
|
||||
"timestamp": time.time(),
|
||||
}
|
||||
# UIA snapshot
|
||||
try:
|
||||
from .uia_helper import get_shared_helper
|
||||
helper = get_shared_helper()
|
||||
if helper.available:
|
||||
elem = helper.query_at(int(x), int(y), with_parents=True)
|
||||
if elem:
|
||||
action["uia_snapshot"] = elem.to_dict()
|
||||
except Exception:
|
||||
pass
|
||||
actions.append(action)
|
||||
last_action_time[0] = time.time()
|
||||
logger.info(f"[APPRENTISSAGE] Clic ({x}, {y}) bouton={button.name}")
|
||||
|
||||
def _on_key_press(key):
|
||||
if done_event.is_set():
|
||||
return False
|
||||
keys_pressed.add(key)
|
||||
# Vérifier hotkey Ctrl+Shift+L
|
||||
if self._LEARNING_DONE_HOTKEY.issubset(keys_pressed):
|
||||
logger.info("[APPRENTISSAGE] Hotkey Ctrl+Shift+L — fin de correction")
|
||||
print(" [APPRENTISSAGE] Ctrl+Shift+L reçu — merci !")
|
||||
done_event.set()
|
||||
return False
|
||||
|
||||
def _on_key_release(key):
|
||||
keys_pressed.discard(key)
|
||||
if done_event.is_set():
|
||||
return False
|
||||
# Capturer les frappes texte (pas les modifiers seuls)
|
||||
if hasattr(key, "char") and key.char:
|
||||
actions.append({
|
||||
"type": "type",
|
||||
"text": key.char,
|
||||
"timestamp": time.time(),
|
||||
})
|
||||
last_action_time[0] = time.time()
|
||||
elif key == Key.enter:
|
||||
actions.append({
|
||||
"type": "key_combo",
|
||||
"keys": ["enter"],
|
||||
"timestamp": time.time(),
|
||||
})
|
||||
last_action_time[0] = time.time()
|
||||
|
||||
from pynput.mouse import Listener as MouseListener
|
||||
from pynput.keyboard import Listener as KeyboardListener
|
||||
|
||||
mouse_listener = MouseListener(on_click=_on_click)
|
||||
kbd_listener = KeyboardListener(
|
||||
on_press=_on_key_press, on_release=_on_key_release,
|
||||
)
|
||||
mouse_listener.start()
|
||||
kbd_listener.start()
|
||||
|
||||
logger.info(
|
||||
f"[APPRENTISSAGE] Mode capture activé (timeout={timeout_s}s, "
|
||||
f"inactivité={INACTIVITY_TIMEOUT}s, hotkey=Ctrl+Shift+L)"
|
||||
)
|
||||
print(
|
||||
f" [APPRENTISSAGE] Montre-moi comment faire.\n"
|
||||
f" Quand tu as fini → Ctrl+Shift+L\n"
|
||||
f" (ou j'attends {INACTIVITY_TIMEOUT}s sans action)"
|
||||
)
|
||||
|
||||
# Attendre : hotkey OU inactivité OU timeout global
|
||||
start = time.time()
|
||||
while not done_event.is_set():
|
||||
elapsed = time.time() - start
|
||||
if elapsed > timeout_s:
|
||||
logger.info("[APPRENTISSAGE] Timeout global")
|
||||
break
|
||||
# Timeout inactivité : si l'humain a fait au moins 1 action
|
||||
# et n'a rien fait depuis INACTIVITY_TIMEOUT secondes
|
||||
if actions and (time.time() - last_action_time[0]) > INACTIVITY_TIMEOUT:
|
||||
logger.info(
|
||||
f"[APPRENTISSAGE] Inactivité {INACTIVITY_TIMEOUT}s — "
|
||||
f"fin automatique ({len(actions)} actions)"
|
||||
)
|
||||
print(f" [APPRENTISSAGE] Pas d'action depuis {INACTIVITY_TIMEOUT}s — je reprends.")
|
||||
break
|
||||
time.sleep(0.2)
|
||||
|
||||
mouse_listener.stop()
|
||||
kbd_listener.stop()
|
||||
|
||||
logger.info(f"[APPRENTISSAGE] {len(actions)} actions capturées")
|
||||
print(f" [APPRENTISSAGE] {len(actions)} actions capturées — merci !")
|
||||
return actions
|
||||
|
||||
def _capture_crop_at(self, x: int, y: int, size: int = 80) -> str:
|
||||
"""Capturer un crop carré autour d'une position."""
|
||||
try:
|
||||
from PIL import Image
|
||||
|
||||
with mss.mss() as local_sct:
|
||||
monitor = local_sct.monitors[1]
|
||||
raw = local_sct.grab(monitor)
|
||||
img = Image.frombytes("RGB", raw.size, raw.bgra, "raw", "BGRX")
|
||||
|
||||
half = size // 2
|
||||
left = max(0, x - half)
|
||||
top = max(0, y - half)
|
||||
right = min(img.width, x + half)
|
||||
bottom = min(img.height, y + half)
|
||||
crop = img.crop((left, top, right, bottom))
|
||||
|
||||
buffer = io.BytesIO()
|
||||
crop.save(buffer, format="JPEG", quality=85)
|
||||
return base64.b64encode(buffer.getvalue()).decode("utf-8")
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
def _click(self, pos, button_name):
|
||||
"""Deplacer la souris via courbe de Bézier puis cliquer.
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@ Ref: docs/PLAN_ACTEUR_V1.md — Architecture MICRO (grounding + exécution)
|
||||
"""
|
||||
|
||||
import base64
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
@@ -126,19 +127,62 @@ class GroundingEngine:
|
||||
)
|
||||
|
||||
t_start = time.time()
|
||||
screenshot_b64 = self._executor._capture_screenshot_b64(max_width=0, quality=75)
|
||||
|
||||
# ── Capture contrainte à la fenêtre active ──
|
||||
# Le grounding ne voit QUE la fenêtre attendue — pas la taskbar,
|
||||
# pas le systray, pas les autres apps. Comme un humain qui regarde
|
||||
# l'application sur laquelle il travaille.
|
||||
window_rect = None
|
||||
try:
|
||||
from ..window_info_crossplatform import get_active_window_rect
|
||||
win_info = get_active_window_rect()
|
||||
if win_info and win_info.get("rect"):
|
||||
r = win_info["rect"] # [left, top, right, bottom]
|
||||
# Validation : fenêtre visible et pas minuscule
|
||||
w = r[2] - r[0]
|
||||
h = r[3] - r[1]
|
||||
if w > 50 and h > 50:
|
||||
window_rect = {
|
||||
"left": max(0, r[0]),
|
||||
"top": max(0, r[1]),
|
||||
"width": min(w, screen_width),
|
||||
"height": min(h, screen_height),
|
||||
}
|
||||
logger.info(
|
||||
f"Grounding contraint à la fenêtre : "
|
||||
f"{window_rect['width']}x{window_rect['height']} "
|
||||
f"à ({window_rect['left']}, {window_rect['top']})"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"Pas de window rect disponible : {e}")
|
||||
|
||||
screenshot_b64 = self._capture_window_or_screen(window_rect)
|
||||
if not screenshot_b64:
|
||||
return GroundingResult(
|
||||
found=False, detail="Capture screenshot échouée",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
# Dimensions de la zone capturée (fenêtre ou écran entier)
|
||||
cap_w = window_rect["width"] if window_rect else screen_width
|
||||
cap_h = window_rect["height"] if window_rect else screen_height
|
||||
|
||||
for strategy in strategies:
|
||||
result = self._try_strategy(
|
||||
strategy, server_url, screenshot_b64, target_spec,
|
||||
fallback_x, fallback_y, screen_width, screen_height,
|
||||
fallback_x, fallback_y, cap_w, cap_h,
|
||||
)
|
||||
if result.found:
|
||||
# ── Conversion coords fenêtre → coords écran ──
|
||||
if window_rect:
|
||||
# Le grounding a retourné des coords relatives à la fenêtre
|
||||
# On les convertit en coords relatives à l'écran entier
|
||||
abs_x = window_rect["left"] + result.x_pct * cap_w
|
||||
abs_y = window_rect["top"] + result.y_pct * cap_h
|
||||
result.x_pct = abs_x / screen_width
|
||||
result.y_pct = abs_y / screen_height
|
||||
result.detail = f"{result.detail} [fenêtre {cap_w}x{cap_h}]"
|
||||
|
||||
result.elapsed_ms = (time.time() - t_start) * 1000
|
||||
return result
|
||||
|
||||
@@ -148,6 +192,39 @@ class GroundingEngine:
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
def _capture_window_or_screen(self, window_rect: Optional[Dict]) -> str:
|
||||
"""Capturer soit la fenêtre active (croppée), soit l'écran entier.
|
||||
|
||||
Si window_rect est fourni, capture uniquement cette zone.
|
||||
Sinon, capture l'écran entier (fallback).
|
||||
"""
|
||||
try:
|
||||
from PIL import Image
|
||||
import mss as mss_lib
|
||||
|
||||
with mss_lib.mss() as local_sct:
|
||||
if window_rect:
|
||||
# Capture de la zone fenêtre uniquement
|
||||
region = {
|
||||
"left": window_rect["left"],
|
||||
"top": window_rect["top"],
|
||||
"width": window_rect["width"],
|
||||
"height": window_rect["height"],
|
||||
}
|
||||
raw = local_sct.grab(region)
|
||||
else:
|
||||
# Fallback écran entier
|
||||
raw = local_sct.grab(local_sct.monitors[1])
|
||||
|
||||
img = Image.frombytes("RGB", raw.size, raw.bgra, "raw", "BGRX")
|
||||
buffer = io.BytesIO()
|
||||
img.save(buffer, format="JPEG", quality=75)
|
||||
return base64.b64encode(buffer.getvalue()).decode("utf-8")
|
||||
except Exception as e:
|
||||
logger.warning(f"Capture échouée : {e}")
|
||||
# Fallback sur la méthode existante de l'executor
|
||||
return self._executor._capture_screenshot_b64(max_width=0, quality=75)
|
||||
|
||||
def _try_strategy(
|
||||
self,
|
||||
strategy: str,
|
||||
|
||||
294
agent_v0/agent_v1/core/uia_helper.py
Normal file
294
agent_v0/agent_v1/core/uia_helper.py
Normal file
@@ -0,0 +1,294 @@
|
||||
# core/workflow/uia_helper.py
|
||||
"""
|
||||
UIAHelper — Wrapper Python pour lea_uia.exe (helper Rust UI Automation).
|
||||
|
||||
Expose une API Python simple pour interroger UIA via le binaire Rust.
|
||||
Communique via subprocess + stdin/stdout JSON.
|
||||
|
||||
Pourquoi un helper Rust ?
|
||||
- 5-10x plus rapide que pywinauto (10-20ms vs 50-200ms)
|
||||
- Binaire standalone ~500 Ko, aucune dépendance runtime
|
||||
- Pas de problèmes de threading COM en Python
|
||||
- Crash-safe (le crash du helper n'affecte pas l'agent Python)
|
||||
|
||||
Architecture :
|
||||
Python executor
|
||||
↓ subprocess.run
|
||||
lea_uia.exe query --x 812 --y 436
|
||||
↓ UIA API Windows
|
||||
JSON response
|
||||
↓ stdout
|
||||
Python executor parse JSON
|
||||
|
||||
Si lea_uia.exe n'est pas disponible (Linux, binaire absent, crash) :
|
||||
toutes les méthodes retournent None → fallback vision automatique.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Timeout par défaut pour les appels UIA (en secondes)
|
||||
_DEFAULT_TIMEOUT = 5.0
|
||||
|
||||
# Masquer la fenêtre console lors du spawn de lea_uia.exe sur Windows.
|
||||
# Sans ce flag, chaque appel (à chaque clic utilisateur pendant
|
||||
# l'enregistrement) fait apparaître une fenêtre cmd noire brièvement
|
||||
# visible à l'écran → ralentit la souris et pollue les screenshots
|
||||
# capturés (le VLM peut "voir" le chemin lea_uia.exe comme texte cliqué).
|
||||
#
|
||||
# La valeur 0x08000000 correspond à CREATE_NO_WINDOW défini dans
|
||||
# l'API Windows. Sur Linux/Mac, la valeur est 0 et `creationflags`
|
||||
# est ignoré. getattr() gère le cas où Python expose déjà la constante
|
||||
# sur Windows.
|
||||
if platform.system() == "Windows":
|
||||
_SUBPROCESS_CREATION_FLAGS = getattr(subprocess, "CREATE_NO_WINDOW", 0x08000000)
|
||||
else:
|
||||
_SUBPROCESS_CREATION_FLAGS = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class UiaElement:
|
||||
"""Représentation Python d'un élément UIA."""
|
||||
name: str = ""
|
||||
control_type: str = ""
|
||||
class_name: str = ""
|
||||
automation_id: str = ""
|
||||
bounding_rect: Tuple[int, int, int, int] = (0, 0, 0, 0)
|
||||
is_enabled: bool = False
|
||||
is_offscreen: bool = True
|
||||
parent_path: List[Dict[str, str]] = field(default_factory=list)
|
||||
process_name: str = ""
|
||||
|
||||
def center(self) -> Tuple[int, int]:
|
||||
"""Retourner le centre du rectangle (pixels)."""
|
||||
x1, y1, x2, y2 = self.bounding_rect
|
||||
return ((x1 + x2) // 2, (y1 + y2) // 2)
|
||||
|
||||
def width(self) -> int:
|
||||
return self.bounding_rect[2] - self.bounding_rect[0]
|
||||
|
||||
def height(self) -> int:
|
||||
return self.bounding_rect[3] - self.bounding_rect[1]
|
||||
|
||||
def is_clickable(self) -> bool:
|
||||
"""Peut-on cliquer dessus ?"""
|
||||
return (
|
||||
self.is_enabled
|
||||
and not self.is_offscreen
|
||||
and self.width() > 0
|
||||
and self.height() > 0
|
||||
)
|
||||
|
||||
def path_signature(self) -> str:
|
||||
"""Signature du chemin parent (pour retrouver l'élément)."""
|
||||
parts = [f"{p['control_type']}[{p['name']}]" for p in self.parent_path if p.get("name")]
|
||||
parts.append(f"{self.control_type}[{self.name}]")
|
||||
return " > ".join(parts)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"control_type": self.control_type,
|
||||
"class_name": self.class_name,
|
||||
"automation_id": self.automation_id,
|
||||
"bounding_rect": list(self.bounding_rect),
|
||||
"is_enabled": self.is_enabled,
|
||||
"is_offscreen": self.is_offscreen,
|
||||
"parent_path": self.parent_path,
|
||||
"process_name": self.process_name,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict[str, Any]) -> "UiaElement":
|
||||
rect = d.get("bounding_rect", [0, 0, 0, 0])
|
||||
if isinstance(rect, list) and len(rect) >= 4:
|
||||
rect = tuple(rect[:4])
|
||||
else:
|
||||
rect = (0, 0, 0, 0)
|
||||
return cls(
|
||||
name=d.get("name", ""),
|
||||
control_type=d.get("control_type", ""),
|
||||
class_name=d.get("class_name", ""),
|
||||
automation_id=d.get("automation_id", ""),
|
||||
bounding_rect=rect,
|
||||
is_enabled=d.get("is_enabled", False),
|
||||
is_offscreen=d.get("is_offscreen", True),
|
||||
parent_path=d.get("parent_path", []),
|
||||
process_name=d.get("process_name", ""),
|
||||
)
|
||||
|
||||
|
||||
class UIAHelper:
|
||||
"""Wrapper Python pour lea_uia.exe."""
|
||||
|
||||
def __init__(self, helper_path: str = "", timeout: float = _DEFAULT_TIMEOUT):
|
||||
self._helper_path = helper_path or self._find_helper()
|
||||
self._timeout = timeout
|
||||
self._available = self._check_available()
|
||||
|
||||
def _find_helper(self) -> str:
|
||||
"""Trouver lea_uia.exe dans les emplacements standards."""
|
||||
candidates = [
|
||||
r"C:\Lea\helpers\lea_uia.exe",
|
||||
os.path.join(os.path.dirname(__file__), "..", "..",
|
||||
"agent_rust", "lea_uia", "target",
|
||||
"x86_64-pc-windows-gnu", "release", "lea_uia.exe"),
|
||||
"./helpers/lea_uia.exe",
|
||||
"lea_uia.exe",
|
||||
]
|
||||
for path in candidates:
|
||||
if os.path.isfile(path):
|
||||
return os.path.abspath(path)
|
||||
return ""
|
||||
|
||||
def _check_available(self) -> bool:
|
||||
"""Vérifier que le helper est utilisable (Windows + binaire + health OK)."""
|
||||
if platform.system() != "Windows":
|
||||
logger.debug("UIAHelper: Linux/Mac — helper désactivé")
|
||||
return False
|
||||
if not self._helper_path:
|
||||
logger.debug("UIAHelper: lea_uia.exe introuvable")
|
||||
return False
|
||||
if not os.path.isfile(self._helper_path):
|
||||
logger.debug(f"UIAHelper: chemin invalide {self._helper_path}")
|
||||
return False
|
||||
return True
|
||||
|
||||
@property
|
||||
def available(self) -> bool:
|
||||
return self._available
|
||||
|
||||
@property
|
||||
def helper_path(self) -> str:
|
||||
return self._helper_path
|
||||
|
||||
def _run(self, args: List[str]) -> Optional[Dict[str, Any]]:
|
||||
"""Exécuter lea_uia.exe avec les arguments et parser le JSON."""
|
||||
if not self._available:
|
||||
return None
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[self._helper_path] + args,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=self._timeout,
|
||||
encoding="utf-8",
|
||||
errors="replace",
|
||||
creationflags=_SUBPROCESS_CREATION_FLAGS,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
logger.debug(
|
||||
f"UIAHelper: exit code {result.returncode}, "
|
||||
f"stderr: {result.stderr[:200]}"
|
||||
)
|
||||
return None
|
||||
output = result.stdout.strip()
|
||||
if not output:
|
||||
return None
|
||||
return json.loads(output)
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.debug(f"UIAHelper: timeout ({self._timeout}s) sur {args}")
|
||||
return None
|
||||
except json.JSONDecodeError as e:
|
||||
logger.debug(f"UIAHelper: JSON invalide — {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.debug(f"UIAHelper: erreur {e}")
|
||||
return None
|
||||
|
||||
def health(self) -> bool:
|
||||
"""Vérifier que UIA répond."""
|
||||
data = self._run(["health"])
|
||||
return data is not None and data.get("status") == "ok"
|
||||
|
||||
def query_at(
|
||||
self,
|
||||
x: int,
|
||||
y: int,
|
||||
with_parents: bool = True,
|
||||
) -> Optional[UiaElement]:
|
||||
"""Récupérer l'élément UIA à une position écran.
|
||||
|
||||
Args:
|
||||
x, y: Coordonnées pixel absolues
|
||||
with_parents: Inclure la hiérarchie des parents
|
||||
|
||||
Returns:
|
||||
UiaElement si trouvé, None sinon (pas d'élément ou UIA indispo)
|
||||
"""
|
||||
args = ["query", "--x", str(x), "--y", str(y)]
|
||||
if not with_parents:
|
||||
args.append("--with-parents=false")
|
||||
|
||||
data = self._run(args)
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
def find_by_name(
|
||||
self,
|
||||
name: str,
|
||||
control_type: Optional[str] = None,
|
||||
automation_id: Optional[str] = None,
|
||||
window: Optional[str] = None,
|
||||
timeout_ms: int = 2000,
|
||||
) -> Optional[UiaElement]:
|
||||
"""Rechercher un élément par son nom (+ filtres optionnels).
|
||||
|
||||
Args:
|
||||
name: Nom exact de l'élément
|
||||
control_type: Type de contrôle (Button, Edit, MenuItem...)
|
||||
automation_id: ID d'automation
|
||||
window: Restreindre à une fenêtre spécifique
|
||||
timeout_ms: Timeout de recherche en millisecondes
|
||||
"""
|
||||
args = ["find", "--name", name, "--timeout-ms", str(timeout_ms)]
|
||||
if control_type:
|
||||
args.extend(["--control-type", control_type])
|
||||
if automation_id:
|
||||
args.extend(["--automation-id", automation_id])
|
||||
if window:
|
||||
args.extend(["--window", window])
|
||||
|
||||
data = self._run(args)
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
def capture_focused(self, max_depth: int = 3) -> Optional[UiaElement]:
|
||||
"""Capturer l'élément ayant le focus + son contexte."""
|
||||
data = self._run(["capture", "--max-depth", str(max_depth)])
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
|
||||
# Instance globale partagée (singleton léger)
|
||||
_SHARED_HELPER: Optional[UIAHelper] = None
|
||||
|
||||
|
||||
def get_shared_helper() -> UIAHelper:
|
||||
"""Retourner une instance partagée de UIAHelper."""
|
||||
global _SHARED_HELPER
|
||||
if _SHARED_HELPER is None:
|
||||
_SHARED_HELPER = UIAHelper()
|
||||
return _SHARED_HELPER
|
||||
@@ -38,8 +38,19 @@ except (ImportError, ValueError):
|
||||
except ImportError:
|
||||
LeaServerClient = None
|
||||
|
||||
# Configuration du logging
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
||||
# Configuration du logging — format structuré et lisible pour un TIM
|
||||
# Niveau de détail : INFO par défaut, DEBUG si RPA_AGENT_DEBUG=1
|
||||
_log_level = logging.DEBUG if os.environ.get("RPA_AGENT_DEBUG") == "1" else logging.INFO
|
||||
logging.basicConfig(
|
||||
level=_log_level,
|
||||
format="%(asctime)s %(levelname)-7s %(name)-25s %(message)s",
|
||||
datefmt="%H:%M:%S",
|
||||
)
|
||||
|
||||
# Réduire le bruit de certaines libs
|
||||
for _noisy in ("urllib3", "requests.packages.urllib3", "PIL", "mss"):
|
||||
logging.getLogger(_noisy).setLevel(logging.WARNING)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Intervalle de polling replay (secondes)
|
||||
@@ -371,12 +382,22 @@ class AgentV1:
|
||||
time.sleep(5)
|
||||
|
||||
def stop_session(self):
|
||||
# Arrêter la capture et le streaming de la session d'enregistrement
|
||||
if self.captor: self.captor.stop()
|
||||
if self.streamer: self.streamer.stop()
|
||||
logger.info(f"Session {self.session_id} terminée.")
|
||||
# Sauvegarder le session_id avant de l'annuler (pour les logs)
|
||||
ended_session_id = self.session_id
|
||||
|
||||
# Reset le session_id pour que le poll replay utilise l'ID stable
|
||||
# Arrêter la capture d'abord (plus d'events entrants)
|
||||
if self.captor: self.captor.stop()
|
||||
|
||||
# Attendre que les events en cours de traitement dans _on_event_bridge
|
||||
# aient le temps d'être envoyés au streamer (capture duale + push)
|
||||
import time
|
||||
time.sleep(1.5)
|
||||
|
||||
# Maintenant arrêter le streamer (drain queue + finalize)
|
||||
if self.streamer: self.streamer.stop()
|
||||
logger.info(f"Session {ended_session_id} terminée.")
|
||||
|
||||
# Reset le session_id APRÈS le stop complet du streamer
|
||||
self.session_id = None
|
||||
|
||||
# Reset le backoff de l'executor pour reprendre le polling immédiatement
|
||||
@@ -403,6 +424,7 @@ class AgentV1:
|
||||
"""Capture périodique pour donner du contexte au stagiaire.
|
||||
Déduplication : n'envoie que si l'écran a changé.
|
||||
Tourne tant que session_id est défini (= enregistrement actif).
|
||||
Enrichi avec le titre de la fenêtre active pour contextualisation.
|
||||
"""
|
||||
while self.running and self.session_id:
|
||||
try:
|
||||
@@ -413,7 +435,17 @@ class AgentV1:
|
||||
if img_hash != self._last_heartbeat_hash:
|
||||
self._last_heartbeat_hash = img_hash
|
||||
self.streamer.push_image(full_path, f"heartbeat_{int(time.time())}")
|
||||
self.streamer.push_event({"type": "heartbeat", "image": full_path, "timestamp": time.time(), "machine_id": self.machine_id})
|
||||
heartbeat_event = {
|
||||
"type": "heartbeat",
|
||||
"image": full_path,
|
||||
"timestamp": time.time(),
|
||||
"machine_id": self.machine_id,
|
||||
}
|
||||
# Ajouter le titre de la fenêtre active (léger, pas de crop)
|
||||
window_title = self.vision.get_active_window_title()
|
||||
if window_title:
|
||||
heartbeat_event["active_window_title"] = window_title
|
||||
self.streamer.push_event(heartbeat_event)
|
||||
except Exception as e:
|
||||
logger.error(f"Heartbeat error: {e}")
|
||||
time.sleep(5)
|
||||
@@ -448,20 +480,33 @@ class AgentV1:
|
||||
event["screenshot_context"] = full_path
|
||||
self.streamer.push_image(full_path, f"focus_{int(time.time())}")
|
||||
|
||||
# 🔴 Capture Interactive (Dual)
|
||||
# Capture Interactive (Dual + Fenêtre active)
|
||||
if event["type"] in ["mouse_click", "key_combo"]:
|
||||
self.shot_counter += 1
|
||||
shot_id = f"shot_{self.shot_counter:04d}"
|
||||
|
||||
|
||||
pos = event.get("pos", (0, 0))
|
||||
capture_info = self.vision.capture_dual(pos[0], pos[1], shot_id)
|
||||
|
||||
|
||||
event["screenshot_id"] = shot_id
|
||||
event["vision_info"] = capture_info
|
||||
|
||||
|
||||
# Enrichir l'event avec les métadonnées de la fenêtre active
|
||||
# (titre, rect, coordonnées clic relatives, taille fenêtre)
|
||||
window_capture = capture_info.get("window_capture")
|
||||
if window_capture:
|
||||
event["window_capture"] = {
|
||||
"title": window_capture.get("window_title", ""),
|
||||
"app_name": window_capture.get("app_name", ""),
|
||||
"rect": window_capture.get("window_rect"),
|
||||
"click_relative": window_capture.get("click_in_window"),
|
||||
"window_size": window_capture.get("window_size"),
|
||||
"click_inside_window": window_capture.get("click_inside_window", True),
|
||||
}
|
||||
|
||||
self._stream_capture_info(capture_info, shot_id)
|
||||
|
||||
# 🕒 POST-ACTION : Capture du résultat après 1s (pour voir le résultat du clic)
|
||||
|
||||
# POST-ACTION : Capture du résultat après 1s (pour voir le résultat du clic)
|
||||
threading.Timer(1.0, self._capture_result, args=(shot_id,)).start()
|
||||
|
||||
self.ui.update_stats(self.shot_counter)
|
||||
@@ -481,6 +526,12 @@ class AgentV1:
|
||||
self.streamer.push_image(capture_info["full"], f"{shot_id}_full")
|
||||
if "crop" in capture_info:
|
||||
self.streamer.push_image(capture_info["crop"], f"{shot_id}_crop")
|
||||
# Streamer l'image de la fenêtre active si disponible
|
||||
window_capture = capture_info.get("window_capture")
|
||||
if window_capture and "window_image" in window_capture:
|
||||
self.streamer.push_image(
|
||||
window_capture["window_image"], f"{shot_id}_window"
|
||||
)
|
||||
|
||||
def run(self):
|
||||
self.ui.run()
|
||||
|
||||
418
agent_v0/agent_v1/ui/activity_panel.py
Normal file
418
agent_v0/agent_v1/ui/activity_panel.py
Normal file
@@ -0,0 +1,418 @@
|
||||
# agent_v1/ui/activity_panel.py
|
||||
"""
|
||||
Panel d'activité temps réel de Léa.
|
||||
|
||||
Affiche à l'utilisateur ce que Léa fait *maintenant* :
|
||||
- État courant (Observe / Cherche / Agit / Vérifie / Bloquée)
|
||||
- Action en cours (ex: "Clic sur Rechercher")
|
||||
- Progression (ex: "3/15")
|
||||
- Temps écoulé depuis le début du workflow
|
||||
|
||||
Contraintes :
|
||||
- Fallback silencieux si tkinter absent (ne crash jamais)
|
||||
- Thread-safe (mises à jour depuis les threads de replay)
|
||||
- Pas de dépendance à PyQt5 (seulement tkinter, déjà utilisé par chat_window)
|
||||
|
||||
Utilisation :
|
||||
panel = ActivityPanel()
|
||||
panel.definir_workflow("Saisie patient", nb_etapes=15)
|
||||
panel.mettre_a_jour(etat=EtatLea.AGIT, action="Clic sur Valider", etape=3)
|
||||
panel.masquer()
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EtatLea(Enum):
|
||||
"""États macroscopiques de Léa pendant un replay."""
|
||||
|
||||
INACTIVE = ("inactive", "Prête", "#808080") # Gris
|
||||
OBSERVE = ("observe", "Observe", "#4A90E2") # Bleu
|
||||
CHERCHE = ("cherche", "Cherche", "#F5A623") # Orange
|
||||
AGIT = ("agit", "Agit", "#7ED321") # Vert
|
||||
VERIFIE = ("verifie", "Vérifie", "#9013FE") # Violet
|
||||
BLOQUEE = ("bloquee", "Bloquée", "#D0021B") # Rouge
|
||||
TERMINE = ("termine", "Terminé", "#50E3C2") # Turquoise
|
||||
|
||||
def __init__(self, code: str, libelle: str, couleur: str) -> None:
|
||||
self.code = code
|
||||
self.libelle = libelle
|
||||
self.couleur = couleur
|
||||
|
||||
|
||||
@dataclass
|
||||
class EtatActivite:
|
||||
"""Instantané de l'activité courante de Léa.
|
||||
|
||||
Utilisé par le panel et exposé par `ActivityPanel.snapshot()` pour les
|
||||
tests (sans dépendre de tkinter).
|
||||
"""
|
||||
|
||||
etat: EtatLea = EtatLea.INACTIVE
|
||||
action_courante: str = ""
|
||||
nom_workflow: str = ""
|
||||
etape: int = 0
|
||||
nb_etapes: int = 0
|
||||
debut_timestamp: float = 0.0
|
||||
dernier_message: str = ""
|
||||
|
||||
def temps_ecoule_s(self) -> float:
|
||||
"""Temps écoulé depuis le début du workflow (secondes)."""
|
||||
if self.debut_timestamp <= 0:
|
||||
return 0.0
|
||||
return max(0.0, time.time() - self.debut_timestamp)
|
||||
|
||||
def progression_texte(self) -> str:
|
||||
"""Représentation textuelle de la progression (ex: '3/15')."""
|
||||
if self.nb_etapes <= 0:
|
||||
return ""
|
||||
return f"{self.etape}/{self.nb_etapes}"
|
||||
|
||||
def temps_ecoule_texte(self) -> str:
|
||||
"""Représentation humaine du temps écoulé (ex: '12s', '1m24s')."""
|
||||
s = int(self.temps_ecoule_s())
|
||||
if s < 60:
|
||||
return f"{s}s"
|
||||
return f"{s // 60}m{s % 60:02d}s"
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Sérialiser pour le logging et les tests."""
|
||||
return {
|
||||
"etat": self.etat.code,
|
||||
"etat_libelle": self.etat.libelle,
|
||||
"action_courante": self.action_courante,
|
||||
"nom_workflow": self.nom_workflow,
|
||||
"etape": self.etape,
|
||||
"nb_etapes": self.nb_etapes,
|
||||
"progression": self.progression_texte(),
|
||||
"temps_ecoule_s": round(self.temps_ecoule_s(), 1),
|
||||
"dernier_message": self.dernier_message,
|
||||
}
|
||||
|
||||
|
||||
class ActivityPanel:
|
||||
"""Panel d'activité de Léa.
|
||||
|
||||
Thread-safe. Le panel tkinter est créé à la demande (lazy) et uniquement
|
||||
si tkinter est disponible. Toutes les méthodes sont safe à appeler même
|
||||
si l'UI n'est pas dispo (fallback silencieux).
|
||||
"""
|
||||
|
||||
def __init__(self, activer_ui: bool = True) -> None:
|
||||
self._lock = threading.RLock()
|
||||
self._etat = EtatActivite()
|
||||
self._activer_ui = activer_ui
|
||||
# UI tkinter (créée à la demande dans le thread UI)
|
||||
self._tk_root = None
|
||||
self._tk_labels: dict = {}
|
||||
self._ui_disponible = None # Lazy : résolu au premier usage
|
||||
self._listeners = [] # Callbacks pour les changements d'état
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# API publique (thread-safe)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def definir_workflow(self, nom: str, nb_etapes: int = 0) -> None:
|
||||
"""Démarrer le suivi d'un nouveau workflow."""
|
||||
with self._lock:
|
||||
self._etat = EtatActivite(
|
||||
etat=EtatLea.OBSERVE,
|
||||
nom_workflow=nom,
|
||||
nb_etapes=nb_etapes,
|
||||
debut_timestamp=time.time(),
|
||||
)
|
||||
self._notifier_changement()
|
||||
self._rafraichir_ui()
|
||||
logger.info(f"[ACTIVITY] Workflow démarré : {nom} ({nb_etapes} étapes)")
|
||||
|
||||
def mettre_a_jour(
|
||||
self,
|
||||
etat: Optional[EtatLea] = None,
|
||||
action: Optional[str] = None,
|
||||
etape: Optional[int] = None,
|
||||
message: Optional[str] = None,
|
||||
) -> None:
|
||||
"""Mettre à jour l'état affiché.
|
||||
|
||||
Tous les paramètres sont optionnels — on ne met à jour que ce qui est
|
||||
fourni. Les autres champs conservent leur valeur actuelle.
|
||||
"""
|
||||
with self._lock:
|
||||
if etat is not None:
|
||||
self._etat.etat = etat
|
||||
if action is not None:
|
||||
self._etat.action_courante = action
|
||||
if etape is not None:
|
||||
self._etat.etape = etape
|
||||
if message is not None:
|
||||
self._etat.dernier_message = message
|
||||
|
||||
self._notifier_changement()
|
||||
self._rafraichir_ui()
|
||||
|
||||
def terminer(self, succes: bool = True) -> None:
|
||||
"""Marquer le workflow comme terminé."""
|
||||
with self._lock:
|
||||
self._etat.etat = EtatLea.TERMINE if succes else EtatLea.BLOQUEE
|
||||
if not succes:
|
||||
self._etat.dernier_message = (
|
||||
self._etat.dernier_message or "Léa a rendu la main"
|
||||
)
|
||||
self._notifier_changement()
|
||||
self._rafraichir_ui()
|
||||
|
||||
def reinitialiser(self) -> None:
|
||||
"""Remettre le panel en état inactif."""
|
||||
with self._lock:
|
||||
self._etat = EtatActivite()
|
||||
self._notifier_changement()
|
||||
self._rafraichir_ui()
|
||||
|
||||
def snapshot(self) -> EtatActivite:
|
||||
"""Obtenir un instantané immuable de l'état courant (pour les tests)."""
|
||||
with self._lock:
|
||||
return EtatActivite(
|
||||
etat=self._etat.etat,
|
||||
action_courante=self._etat.action_courante,
|
||||
nom_workflow=self._etat.nom_workflow,
|
||||
etape=self._etat.etape,
|
||||
nb_etapes=self._etat.nb_etapes,
|
||||
debut_timestamp=self._etat.debut_timestamp,
|
||||
dernier_message=self._etat.dernier_message,
|
||||
)
|
||||
|
||||
def masquer(self) -> None:
|
||||
"""Masquer le panel UI si affiché."""
|
||||
if self._tk_root is not None:
|
||||
try:
|
||||
self._tk_root.withdraw()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def afficher(self) -> None:
|
||||
"""Afficher le panel UI si disponible."""
|
||||
self._creer_ui_si_besoin()
|
||||
if self._tk_root is not None:
|
||||
try:
|
||||
self._tk_root.deiconify()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def on_change(self, callback) -> None:
|
||||
"""Enregistrer un listener appelé à chaque changement d'état."""
|
||||
with self._lock:
|
||||
self._listeners.append(callback)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Gestion UI tkinter (lazy, fallback silencieux)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _creer_ui_si_besoin(self) -> None:
|
||||
"""Créer la fenêtre tkinter au premier usage (lazy)."""
|
||||
if not self._activer_ui:
|
||||
return
|
||||
if self._tk_root is not None:
|
||||
return
|
||||
if self._ui_disponible is False:
|
||||
return # Déjà testé et indisponible
|
||||
|
||||
try:
|
||||
import tkinter as tk
|
||||
except Exception as e:
|
||||
logger.debug(f"[ACTIVITY] tkinter indisponible : {e}")
|
||||
self._ui_disponible = False
|
||||
return
|
||||
|
||||
try:
|
||||
self._tk_root = tk.Toplevel() if _tk_root_existe() else tk.Tk()
|
||||
self._tk_root.title("Léa — Activité")
|
||||
self._tk_root.geometry("340x180+40+40")
|
||||
self._tk_root.attributes("-topmost", True)
|
||||
self._tk_root.resizable(False, False)
|
||||
self._tk_root.configure(bg="#1E1E1E")
|
||||
|
||||
titre = tk.Label(
|
||||
self._tk_root,
|
||||
text="Léa",
|
||||
font=("Segoe UI", 14, "bold"),
|
||||
fg="#FFFFFF",
|
||||
bg="#1E1E1E",
|
||||
)
|
||||
titre.pack(pady=(10, 2))
|
||||
|
||||
self._tk_labels["etat"] = tk.Label(
|
||||
self._tk_root,
|
||||
text="Prête",
|
||||
font=("Segoe UI", 11),
|
||||
fg="#808080",
|
||||
bg="#1E1E1E",
|
||||
)
|
||||
self._tk_labels["etat"].pack()
|
||||
|
||||
self._tk_labels["action"] = tk.Label(
|
||||
self._tk_root,
|
||||
text="",
|
||||
font=("Segoe UI", 10),
|
||||
fg="#FFFFFF",
|
||||
bg="#1E1E1E",
|
||||
wraplength=300,
|
||||
)
|
||||
self._tk_labels["action"].pack(pady=(8, 2))
|
||||
|
||||
self._tk_labels["progression"] = tk.Label(
|
||||
self._tk_root,
|
||||
text="",
|
||||
font=("Segoe UI", 9),
|
||||
fg="#B0B0B0",
|
||||
bg="#1E1E1E",
|
||||
)
|
||||
self._tk_labels["progression"].pack()
|
||||
|
||||
self._tk_labels["temps"] = tk.Label(
|
||||
self._tk_root,
|
||||
text="",
|
||||
font=("Segoe UI", 9),
|
||||
fg="#808080",
|
||||
bg="#1E1E1E",
|
||||
)
|
||||
self._tk_labels["temps"].pack(pady=(4, 0))
|
||||
|
||||
self._tk_labels["message"] = tk.Label(
|
||||
self._tk_root,
|
||||
text="",
|
||||
font=("Segoe UI", 9, "italic"),
|
||||
fg="#B0B0B0",
|
||||
bg="#1E1E1E",
|
||||
wraplength=300,
|
||||
)
|
||||
self._tk_labels["message"].pack(pady=(6, 10))
|
||||
|
||||
# Masquer par défaut : on affiche seulement pendant un workflow
|
||||
self._tk_root.withdraw()
|
||||
self._ui_disponible = True
|
||||
except Exception as e:
|
||||
logger.debug(f"[ACTIVITY] Impossible de créer l'UI : {e}")
|
||||
self._ui_disponible = False
|
||||
self._tk_root = None
|
||||
|
||||
def _rafraichir_ui(self) -> None:
|
||||
"""Mettre à jour les labels tkinter (safe si l'UI n'existe pas)."""
|
||||
if not self._activer_ui or self._ui_disponible is False:
|
||||
return
|
||||
self._creer_ui_si_besoin()
|
||||
if self._tk_root is None:
|
||||
return
|
||||
|
||||
try:
|
||||
with self._lock:
|
||||
snap = self.snapshot()
|
||||
|
||||
# Utiliser after(0) pour rester dans le thread UI tkinter
|
||||
def _update():
|
||||
try:
|
||||
self._tk_labels["etat"].config(
|
||||
text=snap.etat.libelle,
|
||||
fg=snap.etat.couleur,
|
||||
)
|
||||
if snap.action_courante:
|
||||
self._tk_labels["action"].config(text=snap.action_courante)
|
||||
else:
|
||||
self._tk_labels["action"].config(text="")
|
||||
|
||||
prog = snap.progression_texte()
|
||||
if prog and snap.nom_workflow:
|
||||
self._tk_labels["progression"].config(
|
||||
text=f"« {snap.nom_workflow} » — {prog}"
|
||||
)
|
||||
elif snap.nom_workflow:
|
||||
self._tk_labels["progression"].config(
|
||||
text=f"« {snap.nom_workflow} »"
|
||||
)
|
||||
else:
|
||||
self._tk_labels["progression"].config(text="")
|
||||
|
||||
if snap.debut_timestamp > 0:
|
||||
self._tk_labels["temps"].config(
|
||||
text=f"⏱ {snap.temps_ecoule_texte()}"
|
||||
)
|
||||
else:
|
||||
self._tk_labels["temps"].config(text="")
|
||||
|
||||
self._tk_labels["message"].config(text=snap.dernier_message)
|
||||
|
||||
# Afficher automatiquement si actif
|
||||
if snap.etat != EtatLea.INACTIVE:
|
||||
self._tk_root.deiconify()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
self._tk_root.after(0, _update)
|
||||
except Exception:
|
||||
# Si le root a été détruit
|
||||
self._tk_root = None
|
||||
self._ui_disponible = False
|
||||
except Exception as e:
|
||||
logger.debug(f"[ACTIVITY] Erreur rafraîchissement UI : {e}")
|
||||
|
||||
def _notifier_changement(self) -> None:
|
||||
"""Notifier tous les listeners du changement d'état."""
|
||||
with self._lock:
|
||||
listeners = list(self._listeners)
|
||||
snap = self.snapshot()
|
||||
|
||||
for cb in listeners:
|
||||
try:
|
||||
cb(snap)
|
||||
except Exception as e:
|
||||
logger.debug(f"[ACTIVITY] Listener erreur : {e}")
|
||||
|
||||
|
||||
def _tk_root_existe() -> bool:
|
||||
"""Vérifier si un root tkinter existe déjà (pour créer un Toplevel)."""
|
||||
try:
|
||||
import tkinter as tk
|
||||
|
||||
default_root = getattr(tk, "_default_root", None)
|
||||
return default_root is not None
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Singleton global (optionnel)
|
||||
# ============================================================================
|
||||
|
||||
|
||||
_INSTANCE_GLOBALE: Optional[ActivityPanel] = None
|
||||
_LOCK_SINGLETON = threading.Lock()
|
||||
|
||||
|
||||
def get_activity_panel(activer_ui: bool = True) -> ActivityPanel:
|
||||
"""Obtenir l'instance globale du panel d'activité (lazy)."""
|
||||
global _INSTANCE_GLOBALE
|
||||
with _LOCK_SINGLETON:
|
||||
if _INSTANCE_GLOBALE is None:
|
||||
_INSTANCE_GLOBALE = ActivityPanel(activer_ui=activer_ui)
|
||||
return _INSTANCE_GLOBALE
|
||||
|
||||
|
||||
def reset_activity_panel() -> None:
|
||||
"""Réinitialiser le singleton (utile pour les tests)."""
|
||||
global _INSTANCE_GLOBALE
|
||||
with _LOCK_SINGLETON:
|
||||
if _INSTANCE_GLOBALE is not None:
|
||||
try:
|
||||
_INSTANCE_GLOBALE.masquer()
|
||||
except Exception:
|
||||
pass
|
||||
_INSTANCE_GLOBALE = None
|
||||
612
agent_v0/agent_v1/ui/messages.py
Normal file
612
agent_v0/agent_v1/ui/messages.py
Normal file
@@ -0,0 +1,612 @@
|
||||
# agent_v1/ui/messages.py
|
||||
"""
|
||||
Formatage des messages utilisateur pour Léa.
|
||||
|
||||
Convertit les codes d'erreur techniques (`target_not_found`, `no_screen_change`...)
|
||||
en phrases en français naturel, orientées action, adaptées à un utilisateur non
|
||||
technique (secrétaire médicale, TIM).
|
||||
|
||||
Trois niveaux de sévérité sont définis :
|
||||
- INFO — Léa fait son travail normalement
|
||||
- ATTENTION — Quelque chose de léger (ralentissement, retry)
|
||||
- BLOCAGE — Léa a besoin d'aide, elle rend la main
|
||||
|
||||
Le module est 100% pur (pas d'I/O, pas d'UI) : testable sans mocks lourds.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Any, Mapping, Optional
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Accès paresseux au DomainContext
|
||||
# ----------------------------------------------------------------------------
|
||||
#
|
||||
# On importe le module à l'appel pour éviter toute dépendance circulaire
|
||||
# avec `agent_v0.server_v1.domain_context` (qui ne doit pas importer l'UI).
|
||||
# Si l'import échoue (contexte client sans server_v1), on retombe sur None
|
||||
# et les formatters gardent leur comportement générique historique.
|
||||
|
||||
|
||||
def _get_domain_ctx(domain_id: Optional[str]):
|
||||
"""Récupérer un DomainContext si possible, sinon None (fallback)."""
|
||||
if not domain_id:
|
||||
return None
|
||||
try:
|
||||
from agent_v0.server_v1.domain_context import get_domain_context # lazy
|
||||
return get_domain_context(domain_id)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _friendly_target(description: str, domain_id: Optional[str] = None) -> str:
|
||||
"""Transformer une description technique en langage métier si possible.
|
||||
|
||||
Ex (tim_codage) : "DP" → "diagnostic principal"
|
||||
Ex (comptabilite) : "TVA" → "montant de TVA"
|
||||
Retombe sur la description nettoyée si aucun domaine ne matche.
|
||||
"""
|
||||
base = _nettoyer_description_cible(description)
|
||||
ctx = _get_domain_ctx(domain_id)
|
||||
if ctx is None or not base:
|
||||
return base
|
||||
try:
|
||||
return ctx._apply_synonyms(base)
|
||||
except Exception:
|
||||
return base
|
||||
|
||||
|
||||
class NiveauMessage(Enum):
|
||||
"""Niveaux hiérarchiques des messages affichés à l'utilisateur."""
|
||||
|
||||
INFO = "info" # Fond vert clair, disparaît tout seul, 3-5s
|
||||
ATTENTION = "attention" # Fond orange clair, disparaît tout seul, 7s
|
||||
BLOCAGE = "blocage" # Fond rouge clair, reste affiché, 15s+
|
||||
|
||||
|
||||
# Durée d'affichage par défaut (secondes), par niveau
|
||||
DUREE_PAR_NIVEAU: dict[NiveauMessage, int] = {
|
||||
NiveauMessage.INFO: 4,
|
||||
NiveauMessage.ATTENTION: 7,
|
||||
NiveauMessage.BLOCAGE: 15,
|
||||
}
|
||||
|
||||
# Icône textuelle par niveau (compatible plyer/Windows/Linux)
|
||||
ICONE_PAR_NIVEAU: dict[NiveauMessage, str] = {
|
||||
NiveauMessage.INFO: "i",
|
||||
NiveauMessage.ATTENTION: "!",
|
||||
NiveauMessage.BLOCAGE: "?",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class MessageUtilisateur:
|
||||
"""Un message prêt à être affiché à l'utilisateur.
|
||||
|
||||
Attributes:
|
||||
niveau: Hiérarchie (info/attention/blocage)
|
||||
titre: Titre court de la notification (≤60 caractères)
|
||||
corps: Corps du message en français naturel
|
||||
duree_s: Durée d'affichage recommandée (secondes)
|
||||
persistent: Si True, l'utilisateur doit fermer manuellement
|
||||
"""
|
||||
|
||||
niveau: NiveauMessage
|
||||
titre: str
|
||||
corps: str
|
||||
duree_s: int
|
||||
persistent: bool = False
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Sérialiser le message (utile pour les tests et le logging)."""
|
||||
return {
|
||||
"niveau": self.niveau.value,
|
||||
"titre": self.titre,
|
||||
"corps": self.corps,
|
||||
"duree_s": self.duree_s,
|
||||
"persistent": self.persistent,
|
||||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Helpers d'extraction
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def _extraire_nom_application(titre_fenetre: str) -> str:
|
||||
"""Extraire le nom de l'application à partir d'un titre de fenêtre.
|
||||
|
||||
Les titres Windows suivent généralement le format :
|
||||
"Document.txt – Bloc-notes"
|
||||
"Ma Page - Google Chrome"
|
||||
"Sans titre — Paint"
|
||||
|
||||
On retourne la partie après le dernier séparateur, ou le titre entier.
|
||||
"""
|
||||
if not titre_fenetre:
|
||||
return ""
|
||||
titre = titre_fenetre.strip()
|
||||
# Chercher le dernier séparateur parmi " – ", " — ", " - "
|
||||
for sep in (" – ", " — ", " - "):
|
||||
if sep in titre:
|
||||
return titre.rsplit(sep, 1)[-1].strip()
|
||||
return titre
|
||||
|
||||
|
||||
def _nettoyer_description_cible(description: str) -> str:
|
||||
"""Nettoyer la description technique d'une cible pour l'afficher.
|
||||
|
||||
Supprime les caractères techniques (guillemets inutiles, ':').
|
||||
"""
|
||||
if not description:
|
||||
return ""
|
||||
desc = description.strip()
|
||||
# Retirer les guillemets encapsulants
|
||||
desc = desc.strip("'\"`")
|
||||
# Limiter la longueur
|
||||
if len(desc) > 80:
|
||||
desc = desc[:77] + "..."
|
||||
return desc
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Formattage des messages techniques → humains
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def formatter_cible_non_trouvee(
|
||||
description_cible: str,
|
||||
titre_fenetre: Optional[str] = None,
|
||||
domain_id: Optional[str] = None,
|
||||
params: Optional[Mapping[str, Any]] = None,
|
||||
) -> MessageUtilisateur:
|
||||
"""Message quand Léa ne trouve pas un élément à cliquer.
|
||||
|
||||
Si un domaine métier est fourni, la description de la cible est
|
||||
transformée en langage métier via le DomainContext :
|
||||
- tim_codage + "DP" → "diagnostic principal"
|
||||
- comptabilite + "TVA" → "montant de TVA"
|
||||
|
||||
Exemple avant :
|
||||
target_not_found: 'bonjour' dans *bonjour, – Bloc-notes
|
||||
Exemple après :
|
||||
Léa a besoin d'aide
|
||||
Je ne trouve pas "bonjour" dans le Bloc-notes. Peux-tu cliquer
|
||||
dessus toi-même ? Je reprends ensuite.
|
||||
|
||||
Args:
|
||||
description_cible: Description brute de la cible.
|
||||
titre_fenetre: Titre de la fenêtre active (pour extraire l'app).
|
||||
domain_id: Domaine métier pour enrichir la sortie (optionnel).
|
||||
params: Paramètres du workflow (nom_patient, num_facture...)
|
||||
utilisés par les templates de clarification métier.
|
||||
"""
|
||||
cible = _friendly_target(description_cible, domain_id) or "l'élément"
|
||||
app = _extraire_nom_application(titre_fenetre or "")
|
||||
|
||||
# Si un domaine et un template de clarification existent, préférer la
|
||||
# question métier (plus pertinente que le message générique).
|
||||
ctx = _get_domain_ctx(domain_id)
|
||||
if ctx is not None and ctx.clarification_templates:
|
||||
try:
|
||||
corps = ctx.pose_clarification_question(
|
||||
{
|
||||
"blocked_on": "target_not_found",
|
||||
"target": description_cible or "",
|
||||
"app": app,
|
||||
"params": dict(params or {}),
|
||||
}
|
||||
)
|
||||
except Exception:
|
||||
corps = ""
|
||||
if corps:
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.BLOCAGE,
|
||||
titre="Léa a besoin d'aide",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||
persistent=True,
|
||||
)
|
||||
|
||||
if app:
|
||||
corps = (
|
||||
f"Je ne trouve pas « {cible} » dans {app}. "
|
||||
f"Peux-tu cliquer dessus toi-même ? Je reprends ensuite."
|
||||
)
|
||||
else:
|
||||
corps = (
|
||||
f"Je ne trouve pas « {cible} » à l'écran. "
|
||||
f"Peux-tu le faire toi-même ? Je reprends ensuite."
|
||||
)
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.BLOCAGE,
|
||||
titre="Léa a besoin d'aide",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||
persistent=True,
|
||||
)
|
||||
|
||||
|
||||
def formatter_fenetre_incorrecte(
|
||||
titre_actuel: str,
|
||||
titre_attendu: str,
|
||||
) -> MessageUtilisateur:
|
||||
"""Message quand la fenêtre active n'est pas celle attendue.
|
||||
|
||||
Exemple avant :
|
||||
Fenêtre incorrecte: 'Program Manager' (attendu: 'Lea : Explorateur de fichiers')
|
||||
Exemple après :
|
||||
Léa attend une fenêtre
|
||||
J'attends « Explorateur de fichiers » mais c'est « Program Manager »
|
||||
qui est affiché. Peux-tu ouvrir la bonne fenêtre ?
|
||||
"""
|
||||
app_actuelle = _extraire_nom_application(titre_actuel) or "une autre fenêtre"
|
||||
app_attendue = _extraire_nom_application(titre_attendu) or titre_attendu
|
||||
|
||||
corps = (
|
||||
f"J'attends « {app_attendue} » mais c'est « {app_actuelle} » "
|
||||
f"qui est affiché. Peux-tu ouvrir la bonne fenêtre ?"
|
||||
)
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.BLOCAGE,
|
||||
titre="Léa attend une fenêtre",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||
persistent=True,
|
||||
)
|
||||
|
||||
|
||||
def formatter_ecran_inchange(action_type: str = "") -> MessageUtilisateur:
|
||||
"""Message quand l'action n'a pas eu d'effet visible.
|
||||
|
||||
Exemple avant :
|
||||
Ecran inchange apres l'action
|
||||
Exemple après :
|
||||
Léa vérifie
|
||||
Mon clic n'a pas eu l'air de marcher. Je vais réessayer ou te
|
||||
rendre la main si ça ne passe pas.
|
||||
"""
|
||||
actions_fr = {
|
||||
"click": "Mon clic",
|
||||
"type": "Ma saisie",
|
||||
"key_combo": "Mon raccourci clavier",
|
||||
"scroll": "Mon défilement",
|
||||
}
|
||||
quoi = actions_fr.get(action_type, "Mon action")
|
||||
|
||||
corps = (
|
||||
f"{quoi} n'a pas eu l'air de marcher. Je vais réessayer, "
|
||||
f"ou te rendre la main si ça ne passe pas."
|
||||
)
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa vérifie",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
|
||||
|
||||
def formatter_connexion_perdue(hote_serveur: str = "") -> MessageUtilisateur:
|
||||
"""Message quand la connexion avec le serveur est perdue.
|
||||
|
||||
Rassurant : on dit qu'on va réessayer automatiquement.
|
||||
"""
|
||||
corps = (
|
||||
"J'ai perdu le lien avec le serveur. Je retente automatiquement, "
|
||||
"pas besoin d'intervenir."
|
||||
)
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa est déconnectée",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
|
||||
|
||||
def formatter_connexion_retablie() -> MessageUtilisateur:
|
||||
"""Message quand la connexion serveur est rétablie."""
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.INFO,
|
||||
titre="Léa",
|
||||
corps="C'est bon, la connexion est revenue. Je continue.",
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.INFO],
|
||||
)
|
||||
|
||||
|
||||
def formatter_debut_workflow(nom_workflow: str, nb_etapes: int = 0) -> MessageUtilisateur:
|
||||
"""Message au démarrage d'un workflow de replay."""
|
||||
if nb_etapes > 0:
|
||||
corps = (
|
||||
f"Je démarre « {nom_workflow} » ({nb_etapes} étapes). "
|
||||
f"Je t'indique mon avancement."
|
||||
)
|
||||
else:
|
||||
corps = f"Je démarre « {nom_workflow} ». Je t'indique mon avancement."
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.INFO,
|
||||
titre="Léa démarre",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.INFO],
|
||||
)
|
||||
|
||||
|
||||
def formatter_etape_workflow(
|
||||
etape_actuelle: int,
|
||||
nb_etapes: int,
|
||||
description: str = "",
|
||||
) -> MessageUtilisateur:
|
||||
"""Message pour la progression d'une étape."""
|
||||
if description:
|
||||
desc = _nettoyer_description_cible(description)
|
||||
corps = f"Étape {etape_actuelle}/{nb_etapes} — {desc}"
|
||||
else:
|
||||
corps = f"Étape {etape_actuelle}/{nb_etapes}"
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.INFO,
|
||||
titre="Léa avance",
|
||||
corps=corps,
|
||||
duree_s=3,
|
||||
)
|
||||
|
||||
|
||||
def formatter_retry(action_type: str = "", tentative: int = 2) -> MessageUtilisateur:
|
||||
"""Message quand Léa retente une action."""
|
||||
corps = (
|
||||
f"Je retente (tentative {tentative}). Ça arrive parfois, "
|
||||
f"l'écran était peut-être en cours de chargement."
|
||||
)
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa retente",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
|
||||
|
||||
def formatter_ralentissement() -> MessageUtilisateur:
|
||||
"""Message quand Léa prend plus de temps que prévu."""
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa prend son temps",
|
||||
corps="Je vais plus lentement que prévu. L'écran met du temps à répondre.",
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
|
||||
|
||||
def formatter_fin_workflow(
|
||||
succes: bool,
|
||||
nom_workflow: str = "",
|
||||
nb_etapes: int = 0,
|
||||
duree_s: float = 0.0,
|
||||
domain_id: Optional[str] = None,
|
||||
items_count: int = 0,
|
||||
failed_count: int = 0,
|
||||
params: Optional[Mapping[str, Any]] = None,
|
||||
) -> MessageUtilisateur:
|
||||
"""Message à la fin d'un workflow.
|
||||
|
||||
Si un domaine métier est fourni (et qu'il expose des summary_templates),
|
||||
on utilise `DomainContext.describe_workflow_outcome` pour formuler un
|
||||
rapport en langage métier (ex: "J'ai codé 14 dossiers sur 15").
|
||||
|
||||
Args:
|
||||
succes: True si l'ensemble du workflow a réussi.
|
||||
nom_workflow: Nom du workflow.
|
||||
nb_etapes: Nombre d'étapes techniques (pour fallback générique).
|
||||
duree_s: Durée totale en secondes.
|
||||
domain_id: Domaine métier (optionnel).
|
||||
items_count: Nombre d'items métier traités (ex: 15 dossiers).
|
||||
failed_count: Nombre d'items en échec.
|
||||
params: Infos supplémentaires passées aux templates.
|
||||
"""
|
||||
ctx = _get_domain_ctx(domain_id)
|
||||
if ctx is not None and ctx.summary_templates:
|
||||
try:
|
||||
corps = ctx.describe_workflow_outcome(
|
||||
workflow_name=nom_workflow,
|
||||
success=succes,
|
||||
items_count=items_count or max(1, nb_etapes),
|
||||
failed_count=failed_count,
|
||||
elapsed_s=duree_s,
|
||||
extra=dict(params or {}),
|
||||
)
|
||||
except Exception:
|
||||
corps = ""
|
||||
if corps:
|
||||
if succes and failed_count == 0:
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.INFO,
|
||||
titre="Léa a terminé",
|
||||
corps=corps,
|
||||
duree_s=6,
|
||||
)
|
||||
if succes and failed_count > 0:
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa a terminé partiellement",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.BLOCAGE,
|
||||
titre="Léa s'arrête",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||
persistent=True,
|
||||
)
|
||||
|
||||
if succes:
|
||||
if nom_workflow and nb_etapes > 0:
|
||||
corps = (
|
||||
f"C'est fait ! « {nom_workflow} » est terminé "
|
||||
f"({nb_etapes} étapes en {int(duree_s)}s)."
|
||||
)
|
||||
else:
|
||||
corps = "C'est fait ! Tout s'est bien passé."
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.INFO,
|
||||
titre="Léa a terminé",
|
||||
corps=corps,
|
||||
duree_s=6,
|
||||
)
|
||||
else:
|
||||
corps = (
|
||||
"Je n'ai pas pu terminer. Je te rends la main, "
|
||||
"tu peux continuer à partir de là où je me suis arrêtée."
|
||||
)
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.BLOCAGE,
|
||||
titre="Léa s'arrête",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||
persistent=True,
|
||||
)
|
||||
|
||||
|
||||
def formatter_erreur_generique(
|
||||
message_technique: str,
|
||||
domain_id: Optional[str] = None,
|
||||
params: Optional[Mapping[str, Any]] = None,
|
||||
) -> MessageUtilisateur:
|
||||
"""Formater un message d'erreur technique non catégorisé.
|
||||
|
||||
On essaie de détecter les motifs connus dans le message technique pour
|
||||
le router vers le bon formatter spécialisé, sinon on emballe le message.
|
||||
Si `domain_id` est fourni, il est propagé aux formatters spécialisés
|
||||
pour produire un message en langage métier.
|
||||
"""
|
||||
if not message_technique:
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa",
|
||||
corps="J'ai rencontré un petit souci. Je continue.",
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
|
||||
msg_lower = message_technique.lower()
|
||||
|
||||
# target_not_found[:...]
|
||||
if "target_not_found" in msg_lower:
|
||||
# Essayer d'extraire la description après le ':'
|
||||
match = re.match(r"target_not_found[:\s]*(.*)", message_technique, re.IGNORECASE)
|
||||
desc = match.group(1).strip() if match else ""
|
||||
return formatter_cible_non_trouvee(desc, domain_id=domain_id, params=params)
|
||||
|
||||
# Fenêtre incorrecte: 'X' (attendu: 'Y')
|
||||
if "fenêtre incorrecte" in msg_lower or "fenetre incorrecte" in msg_lower:
|
||||
# Extraire actuel et attendu
|
||||
m_actuel = re.search(r"[:,]\s*['\"]([^'\"]+)['\"]", message_technique)
|
||||
m_attendu = re.search(r"attendu[:\s]*['\"]([^'\"]+)['\"]", message_technique)
|
||||
actuel = m_actuel.group(1) if m_actuel else ""
|
||||
attendu = m_attendu.group(1) if m_attendu else ""
|
||||
return formatter_fenetre_incorrecte(actuel, attendu)
|
||||
|
||||
# Ecran inchangé
|
||||
if "inchang" in msg_lower or "no_screen_change" in msg_lower:
|
||||
return formatter_ecran_inchange()
|
||||
|
||||
# Policy abort / supervise
|
||||
if "policy_abort" in msg_lower or "visual_resolve_failed" in msg_lower:
|
||||
return formatter_cible_non_trouvee(
|
||||
message_technique, domain_id=domain_id, params=params
|
||||
)
|
||||
|
||||
# Fallback : message technique tronqué
|
||||
msg_tronque = message_technique.strip()
|
||||
if len(msg_tronque) > 120:
|
||||
msg_tronque = msg_tronque[:117] + "..."
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa",
|
||||
corps=f"J'ai rencontré un souci : {msg_tronque}",
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Détection fenêtre Léa (utilisé par l'executor pour ignorer sa propre UI)
|
||||
# ============================================================================
|
||||
|
||||
|
||||
# Motifs qui identifient une fenêtre appartenant à Léa (l'agent lui-même).
|
||||
# On utilise des regex avec \b pour éviter les faux positifs sur des noms
|
||||
# contenant "lea" (ex: "cléa.txt", "leapfrog", "replay").
|
||||
_MOTIFS_FENETRE_LEA_REGEX = (
|
||||
r"\bléa\b",
|
||||
r"\blea\b(?!p)", # "lea" mot entier, pas "leapfrog"
|
||||
r"lea\s*[—–\-:]", # "Lea —", "Lea -", "Lea :"
|
||||
r"léa\s*[—–\-:]",
|
||||
r"\bassistante ia\b",
|
||||
r"\bléa ia\b",
|
||||
r"\blea ia\b",
|
||||
)
|
||||
|
||||
|
||||
def est_fenetre_lea(titre_fenetre: str) -> bool:
|
||||
"""Détecter si un titre de fenêtre appartient à l'agent Léa lui-même.
|
||||
|
||||
Utilisé pour éviter que Léa ne se considère comme une fenêtre intrusive
|
||||
dans ses propres pré-vérifications.
|
||||
|
||||
Utilise des regex avec des word boundaries pour éviter les faux positifs
|
||||
sur des noms de fichiers contenant "lea" (ex: "cléa.txt", "replay.log").
|
||||
"""
|
||||
if not titre_fenetre:
|
||||
return False
|
||||
titre_lower = titre_fenetre.lower().strip()
|
||||
return any(re.search(motif, titre_lower) for motif in _MOTIFS_FENETRE_LEA_REGEX)
|
||||
|
||||
|
||||
# Fenêtres parasites Windows à ignorer dans les pré-vérifications.
|
||||
# Ce ne sont pas des fenêtres applicatives — c'est du bruit système
|
||||
# qui prend le focus de manière imprévisible.
|
||||
_FENETRES_BRUIT_SYSTEME = (
|
||||
"fenêtre de dépassement de capacité",
|
||||
"overflow", # version anglaise systray
|
||||
"program manager",
|
||||
"barre des tâches",
|
||||
"task bar",
|
||||
"cortana",
|
||||
"action center",
|
||||
"centre de notifications",
|
||||
)
|
||||
|
||||
|
||||
def est_fenetre_bruit(titre_fenetre: str) -> bool:
|
||||
"""Détecter si un titre de fenêtre est du bruit système Windows.
|
||||
|
||||
Ces fenêtres prennent le focus de manière imprévisible (systray overflow,
|
||||
taskbar, Program Manager) et ne sont jamais la cible d'une action utilisateur.
|
||||
"""
|
||||
if not titre_fenetre:
|
||||
return True # pas de titre = bruit
|
||||
titre_lower = titre_fenetre.lower().strip()
|
||||
if titre_lower == "unknown_window":
|
||||
return True
|
||||
return any(p in titre_lower for p in _FENETRES_BRUIT_SYSTEME)
|
||||
|
||||
|
||||
# Conservé pour rétro-compatibilité avec le code qui listait MOTIFS_FENETRE_LEA
|
||||
MOTIFS_FENETRE_LEA = (
|
||||
"léa",
|
||||
"lea —",
|
||||
"léa —",
|
||||
"lea -",
|
||||
"léa -",
|
||||
"lea assistante",
|
||||
"léa assistante",
|
||||
"lea : ",
|
||||
"léa : ",
|
||||
"assistante ia",
|
||||
)
|
||||
@@ -5,6 +5,14 @@ Utilise plyer pour les notifications système, sans dépendance PyQt5.
|
||||
|
||||
Remplace les dialogues Qt par des toasts non-bloquants.
|
||||
Thread-safe avec rate limiting (1 notification / 2 secondes max).
|
||||
|
||||
Les messages utilisateur sont formatés via `agent_v1.ui.messages` qui convertit
|
||||
les codes techniques (target_not_found, etc.) en français naturel.
|
||||
|
||||
Hiérarchie des notifications (cf. messages.NiveauMessage) :
|
||||
- INFO : auto-dismiss en ~4s, rate-limité classique
|
||||
- ATTENTION : auto-dismiss en ~7s, rate-limité classique
|
||||
- BLOCAGE : persistant (15s+), bypass du rate limit
|
||||
"""
|
||||
|
||||
import logging
|
||||
@@ -12,6 +20,22 @@ import threading
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
from .messages import (
|
||||
MessageUtilisateur,
|
||||
NiveauMessage,
|
||||
formatter_cible_non_trouvee,
|
||||
formatter_connexion_perdue,
|
||||
formatter_connexion_retablie,
|
||||
formatter_debut_workflow,
|
||||
formatter_ecran_inchange,
|
||||
formatter_erreur_generique,
|
||||
formatter_etape_workflow,
|
||||
formatter_fenetre_incorrecte,
|
||||
formatter_fin_workflow,
|
||||
formatter_ralentissement,
|
||||
formatter_retry,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Import conditionnel de plyer — fallback silencieux si absent
|
||||
@@ -59,7 +83,13 @@ class NotificationManager:
|
||||
# Méthode générique
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
def notify(self, title: str, message: str, timeout: int = 5) -> bool:
|
||||
def notify(
|
||||
self,
|
||||
title: str,
|
||||
message: str,
|
||||
timeout: int = 5,
|
||||
bypass_rate_limit: bool = False,
|
||||
) -> bool:
|
||||
"""
|
||||
Affiche une notification toast.
|
||||
|
||||
@@ -67,6 +97,8 @@ class NotificationManager:
|
||||
title: Titre de la notification.
|
||||
message: Corps du message.
|
||||
timeout: Durée d'affichage en secondes.
|
||||
bypass_rate_limit: Si True, ignore le rate limit (pour les blocages
|
||||
importants qui ne doivent pas être écrasés).
|
||||
|
||||
Returns:
|
||||
True si la notification a été envoyée, False sinon
|
||||
@@ -76,17 +108,21 @@ class NotificationManager:
|
||||
logger.debug("Notification ignorée (plyer absent) : %s", title)
|
||||
return False
|
||||
|
||||
with self._lock:
|
||||
now = time.monotonic()
|
||||
elapsed = now - self._last_notification_time
|
||||
if elapsed < RATE_LIMIT_SECONDS:
|
||||
logger.debug(
|
||||
"Notification ignorée (rate limit, %.1fs restantes) : %s",
|
||||
RATE_LIMIT_SECONDS - elapsed,
|
||||
title,
|
||||
)
|
||||
return False
|
||||
self._last_notification_time = now
|
||||
if not bypass_rate_limit:
|
||||
with self._lock:
|
||||
now = time.monotonic()
|
||||
elapsed = now - self._last_notification_time
|
||||
if elapsed < RATE_LIMIT_SECONDS:
|
||||
logger.debug(
|
||||
"Notification ignorée (rate limit, %.1fs restantes) : %s",
|
||||
RATE_LIMIT_SECONDS - elapsed,
|
||||
title,
|
||||
)
|
||||
return False
|
||||
self._last_notification_time = now
|
||||
else:
|
||||
with self._lock:
|
||||
self._last_notification_time = time.monotonic()
|
||||
|
||||
# Envoi dans un thread dédié pour ne jamais bloquer l'appelant
|
||||
thread = threading.Thread(
|
||||
@@ -97,6 +133,39 @@ class NotificationManager:
|
||||
thread.start()
|
||||
return True
|
||||
|
||||
def notify_message(self, msg: MessageUtilisateur) -> bool:
|
||||
"""Envoyer un MessageUtilisateur structuré (niveau, titre, corps).
|
||||
|
||||
Les messages BLOCAGE bypass le rate limit pour garantir que
|
||||
l'utilisateur voit qu'on a besoin de lui.
|
||||
"""
|
||||
bypass = msg.niveau == NiveauMessage.BLOCAGE
|
||||
# Log aussi pour tracer dans les logs fichiers
|
||||
self._log_message(msg)
|
||||
return self.notify(
|
||||
title=msg.titre,
|
||||
message=msg.corps,
|
||||
timeout=msg.duree_s,
|
||||
bypass_rate_limit=bypass,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _log_message(msg: MessageUtilisateur) -> None:
|
||||
"""Logger un message utilisateur avec le niveau approprié.
|
||||
|
||||
Les logs agents sont plus lisibles quand on route info → INFO,
|
||||
attention → WARNING, blocage → ERROR, avec un préfixe [LEA].
|
||||
"""
|
||||
prefix = f"[LEA] {msg.titre}: {msg.corps}"
|
||||
if msg.niveau == NiveauMessage.INFO:
|
||||
logger.info(prefix)
|
||||
elif msg.niveau == NiveauMessage.ATTENTION:
|
||||
logger.warning(prefix)
|
||||
elif msg.niveau == NiveauMessage.BLOCAGE:
|
||||
logger.error(prefix)
|
||||
else:
|
||||
logger.info(prefix)
|
||||
|
||||
def _send(self, title: str, message: str, timeout: int) -> None:
|
||||
"""Envoi effectif de la notification (exécuté dans un thread dédié)."""
|
||||
try:
|
||||
@@ -180,40 +249,79 @@ class NotificationManager:
|
||||
timeout=3,
|
||||
)
|
||||
|
||||
def replay_finished(self, success: bool, workflow_name: str) -> bool:
|
||||
"""Notification de fin de replay (succès ou échec)."""
|
||||
if success:
|
||||
return self.notify(
|
||||
title=APP_NAME,
|
||||
message="C'est fait ! Tout s'est bien passé.",
|
||||
timeout=5,
|
||||
)
|
||||
else:
|
||||
return self.notify(
|
||||
title=APP_NAME,
|
||||
message="Hmm, j'ai eu un souci. Vous pouvez me remontrer ?",
|
||||
timeout=7,
|
||||
)
|
||||
def replay_target_not_found(
|
||||
self,
|
||||
target_description: str,
|
||||
window_title: Optional[str] = None,
|
||||
) -> bool:
|
||||
"""Notification quand un élément n'est pas trouvé pendant le replay.
|
||||
|
||||
def connection_changed(self, connected: bool, server_host: str) -> bool:
|
||||
Le replay est mis en pause et attend une intervention humaine.
|
||||
Utilise `messages.formatter_cible_non_trouvee` pour un message en
|
||||
français naturel.
|
||||
"""
|
||||
msg = formatter_cible_non_trouvee(target_description, window_title)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_wrong_window(self, current_title: str, expected_title: str) -> bool:
|
||||
"""Notification quand la fenêtre active n'est pas celle attendue."""
|
||||
msg = formatter_fenetre_incorrecte(current_title, expected_title)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_no_screen_change(self, action_type: str = "") -> bool:
|
||||
"""Notification quand une action n'a pas eu d'effet visible."""
|
||||
msg = formatter_ecran_inchange(action_type)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_retry(self, action_type: str = "", tentative: int = 2) -> bool:
|
||||
"""Notification quand Léa retente une action."""
|
||||
msg = formatter_retry(action_type, tentative)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_slow(self) -> bool:
|
||||
"""Notification quand Léa va plus lentement que prévu."""
|
||||
msg = formatter_ralentissement()
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_finished(
|
||||
self,
|
||||
success: bool,
|
||||
workflow_name: str,
|
||||
step_count: int = 0,
|
||||
duration_s: float = 0.0,
|
||||
) -> bool:
|
||||
"""Notification de fin de replay (succès ou échec)."""
|
||||
msg = formatter_fin_workflow(success, workflow_name, step_count, duration_s)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_workflow_started(self, workflow_name: str, step_count: int = 0) -> bool:
|
||||
"""Notification de début de workflow (remplace `replay_started`)."""
|
||||
msg = formatter_debut_workflow(workflow_name, step_count)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_step_progress(
|
||||
self,
|
||||
current: int,
|
||||
total: int,
|
||||
description: str = "",
|
||||
) -> bool:
|
||||
"""Notification de progression d'une étape (niveau INFO)."""
|
||||
msg = formatter_etape_workflow(current, total, description)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def connection_changed(self, connected: bool, server_host: str = "") -> bool:
|
||||
"""Notification de changement d'état de la connexion serveur."""
|
||||
if connected:
|
||||
return self.notify(
|
||||
title=APP_NAME,
|
||||
message="Connectée au serveur.",
|
||||
timeout=5,
|
||||
)
|
||||
msg = formatter_connexion_retablie()
|
||||
else:
|
||||
return self.notify(
|
||||
title=APP_NAME,
|
||||
message="J'ai perdu la connexion avec le serveur.",
|
||||
timeout=7,
|
||||
)
|
||||
msg = formatter_connexion_perdue(server_host)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def error(self, message: str) -> bool:
|
||||
"""Notification d'erreur."""
|
||||
return self.notify(
|
||||
title=APP_NAME,
|
||||
message=f"Oups, un problème : {message}",
|
||||
timeout=10,
|
||||
)
|
||||
"""Notification d'erreur générique.
|
||||
|
||||
Essaie d'abord de détecter un motif technique connu et de formater
|
||||
correctement, sinon fallback sur un message générique aidant.
|
||||
"""
|
||||
msg = formatter_erreur_generique(message)
|
||||
return self.notify_message(msg)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
214
agent_v0/deploy/windows_client/agent_v1/core/grounding.py
Normal file
214
agent_v0/deploy/windows_client/agent_v1/core/grounding.py
Normal file
@@ -0,0 +1,214 @@
|
||||
# agent_v1/core/grounding.py
|
||||
"""
|
||||
Module Grounding — localisation pure d'éléments UI sur l'écran.
|
||||
|
||||
Responsabilité unique : "Trouve l'élément X sur l'écran et retourne ses coordonnées."
|
||||
Ne prend AUCUNE décision. Si l'élément n'est pas trouvé → retourne NOT_FOUND.
|
||||
|
||||
Stratégies disponibles (cascade configurable) :
|
||||
1. Serveur SomEngine + VLM (GPU distant)
|
||||
2. Template matching local (CPU, ~10ms)
|
||||
3. VLM local direct (CPU/GPU local)
|
||||
|
||||
Séparé de Policy (qui décide quoi faire quand grounding échoue).
|
||||
Ref: docs/PLAN_ACTEUR_V1.md — Architecture MICRO (grounding + exécution)
|
||||
"""
|
||||
|
||||
import base64
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class GroundingResult:
|
||||
"""Résultat d'une tentative de localisation visuelle."""
|
||||
found: bool # L'élément a été trouvé
|
||||
x_pct: float = 0.0 # Position X en % (0.0-1.0)
|
||||
y_pct: float = 0.0 # Position Y en % (0.0-1.0)
|
||||
method: str = "" # Méthode utilisée (server_som, anchor_template, vlm_direct...)
|
||||
score: float = 0.0 # Confiance (0.0-1.0)
|
||||
elapsed_ms: float = 0.0 # Temps de résolution
|
||||
detail: str = "" # Info supplémentaire (label trouvé, raison échec)
|
||||
raw: Optional[Dict] = None # Données brutes du resolver (pour debug)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"found": self.found,
|
||||
"x_pct": self.x_pct,
|
||||
"y_pct": self.y_pct,
|
||||
"method": self.method,
|
||||
"score": round(self.score, 3),
|
||||
"elapsed_ms": round(self.elapsed_ms, 1),
|
||||
"detail": self.detail,
|
||||
}
|
||||
|
||||
|
||||
# Résultat singleton pour "pas trouvé"
|
||||
NOT_FOUND = GroundingResult(found=False, detail="Aucune méthode n'a trouvé l'élément")
|
||||
|
||||
|
||||
class GroundingEngine:
|
||||
"""Moteur de localisation visuelle d'éléments UI.
|
||||
|
||||
Encapsule la cascade de résolution (serveur → template → VLM local)
|
||||
avec une interface unifiée. Ne prend aucune décision — c'est le rôle
|
||||
de PolicyEngine.
|
||||
|
||||
Usage :
|
||||
engine = GroundingEngine(executor)
|
||||
result = engine.locate(screenshot_b64, target_spec, screen_w, screen_h)
|
||||
if result.found:
|
||||
click(result.x_pct, result.y_pct)
|
||||
"""
|
||||
|
||||
def __init__(self, executor):
|
||||
"""
|
||||
Args:
|
||||
executor: ActionExecutorV1 — fournit les méthodes de résolution existantes.
|
||||
"""
|
||||
self._executor = executor
|
||||
|
||||
def locate(
|
||||
self,
|
||||
server_url: str,
|
||||
target_spec: Dict[str, Any],
|
||||
fallback_x: float,
|
||||
fallback_y: float,
|
||||
screen_width: int,
|
||||
screen_height: int,
|
||||
strategies: Optional[List[str]] = None,
|
||||
) -> GroundingResult:
|
||||
"""Localiser un élément UI sur l'écran.
|
||||
|
||||
Exécute la cascade de stratégies dans l'ordre et retourne
|
||||
dès qu'une stratégie trouve l'élément.
|
||||
|
||||
Args:
|
||||
server_url: URL du serveur (SomEngine + VLM GPU)
|
||||
target_spec: Spécification de la cible (by_text, anchor, vlm_description...)
|
||||
fallback_x, fallback_y: Coordonnées de fallback (enregistrement)
|
||||
screen_width, screen_height: Résolution écran
|
||||
strategies: Liste ordonnée de stratégies à essayer.
|
||||
Par défaut : ["server", "template", "vlm_local"]
|
||||
|
||||
Returns:
|
||||
GroundingResult avec found=True et coordonnées, ou NOT_FOUND
|
||||
"""
|
||||
if strategies is None:
|
||||
strategies = ["server", "template", "vlm_local"]
|
||||
|
||||
# ── Apprentissage : réordonner les stratégies selon l'historique ──
|
||||
# Si le Learning sait quelle méthode marche pour cette cible,
|
||||
# la mettre en premier. C'est la boucle d'apprentissage.
|
||||
learned = target_spec.get("_learned_strategy", "")
|
||||
if learned:
|
||||
strategy_map = {
|
||||
"som_text_match": "server",
|
||||
"grounding_vlm": "server",
|
||||
"server_som": "server",
|
||||
"anchor_template": "template",
|
||||
"template_matching": "template",
|
||||
"hybrid_text_direct": "vlm_local",
|
||||
"hybrid_vlm_text": "vlm_local",
|
||||
"vlm_direct": "vlm_local",
|
||||
}
|
||||
preferred = strategy_map.get(learned, "")
|
||||
if preferred and preferred in strategies:
|
||||
strategies = [preferred] + [s for s in strategies if s != preferred]
|
||||
logger.info(
|
||||
f"Grounding: stratégie réordonnée par l'apprentissage → "
|
||||
f"{strategies} (learned={learned})"
|
||||
)
|
||||
|
||||
t_start = time.time()
|
||||
screenshot_b64 = self._executor._capture_screenshot_b64(max_width=0, quality=75)
|
||||
if not screenshot_b64:
|
||||
return GroundingResult(
|
||||
found=False, detail="Capture screenshot échouée",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
for strategy in strategies:
|
||||
result = self._try_strategy(
|
||||
strategy, server_url, screenshot_b64, target_spec,
|
||||
fallback_x, fallback_y, screen_width, screen_height,
|
||||
)
|
||||
if result.found:
|
||||
result.elapsed_ms = (time.time() - t_start) * 1000
|
||||
return result
|
||||
|
||||
return GroundingResult(
|
||||
found=False,
|
||||
detail=f"Toutes les stratégies ont échoué ({', '.join(strategies)})",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
def _try_strategy(
|
||||
self,
|
||||
strategy: str,
|
||||
server_url: str,
|
||||
screenshot_b64: str,
|
||||
target_spec: Dict[str, Any],
|
||||
fallback_x: float,
|
||||
fallback_y: float,
|
||||
screen_width: int,
|
||||
screen_height: int,
|
||||
) -> GroundingResult:
|
||||
"""Essayer une stratégie de grounding unique."""
|
||||
|
||||
if strategy == "server" and server_url:
|
||||
raw = self._executor._server_resolve_target(
|
||||
server_url, screenshot_b64, target_spec,
|
||||
fallback_x, fallback_y, screen_width, screen_height,
|
||||
)
|
||||
if raw and raw.get("resolved"):
|
||||
return GroundingResult(
|
||||
found=True,
|
||||
x_pct=raw["x_pct"],
|
||||
y_pct=raw["y_pct"],
|
||||
method=raw.get("method", "server"),
|
||||
score=raw.get("score", 0.0),
|
||||
detail=raw.get("matched_element", {}).get("label", ""),
|
||||
raw=raw,
|
||||
)
|
||||
|
||||
elif strategy == "template":
|
||||
anchor_b64 = target_spec.get("anchor_image_base64", "")
|
||||
if anchor_b64:
|
||||
raw = self._executor._template_match_anchor(
|
||||
screenshot_b64, anchor_b64, screen_width, screen_height,
|
||||
)
|
||||
if raw and raw.get("resolved"):
|
||||
return GroundingResult(
|
||||
found=True,
|
||||
x_pct=raw["x_pct"],
|
||||
y_pct=raw["y_pct"],
|
||||
method="anchor_template",
|
||||
score=raw.get("score", 0.0),
|
||||
raw=raw,
|
||||
)
|
||||
|
||||
elif strategy == "vlm_local":
|
||||
by_text = target_spec.get("by_text", "")
|
||||
vlm_desc = target_spec.get("vlm_description", "")
|
||||
if vlm_desc or by_text:
|
||||
raw = self._executor._hybrid_vlm_resolve(
|
||||
screenshot_b64, target_spec, screen_width, screen_height,
|
||||
)
|
||||
if raw and raw.get("resolved"):
|
||||
return GroundingResult(
|
||||
found=True,
|
||||
x_pct=raw["x_pct"],
|
||||
y_pct=raw["y_pct"],
|
||||
method=raw.get("method", "vlm_local"),
|
||||
score=raw.get("score", 0.0),
|
||||
detail=raw.get("matched_element", {}).get("label", ""),
|
||||
raw=raw,
|
||||
)
|
||||
|
||||
return GroundingResult(found=False, method=strategy, detail=f"{strategy}: pas trouvé")
|
||||
152
agent_v0/deploy/windows_client/agent_v1/core/policy.py
Normal file
152
agent_v0/deploy/windows_client/agent_v1/core/policy.py
Normal file
@@ -0,0 +1,152 @@
|
||||
# agent_v1/core/policy.py
|
||||
"""
|
||||
Module Policy — décisions intelligentes quand le grounding échoue.
|
||||
|
||||
Responsabilité unique : "Le Grounding dit NOT_FOUND. Que fait-on ?"
|
||||
Ne localise AUCUN élément — c'est le rôle du Grounding.
|
||||
|
||||
Décisions possibles :
|
||||
- RETRY : re-tenter le grounding (après popup fermée, par exemple)
|
||||
- SKIP : l'action n'est plus nécessaire (état déjà atteint)
|
||||
- ABORT : arrêter le workflow (état incohérent)
|
||||
- SUPERVISE : rendre la main à l'utilisateur
|
||||
|
||||
Séparé de Grounding (qui localise les éléments).
|
||||
Ref: docs/PLAN_ACTEUR_V1.md — Architecture MÉSO (acteur intelligent)
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Decision(Enum):
|
||||
"""Décisions possibles quand le grounding échoue."""
|
||||
RETRY = "retry" # Re-tenter (après correction : popup fermée, navigation...)
|
||||
SKIP = "skip" # Action inutile (état déjà atteint)
|
||||
ABORT = "abort" # Arrêter le workflow (état incohérent)
|
||||
SUPERVISE = "supervise" # Rendre la main à l'utilisateur (Léa dit "je bloque")
|
||||
CONTINUE = "continue" # Continuer malgré l'échec (action non critique)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PolicyDecision:
|
||||
"""Résultat d'une décision Policy."""
|
||||
decision: Decision
|
||||
reason: str # Explication de la décision
|
||||
action_taken: str = "" # Action corrective effectuée (ex: "popup fermée")
|
||||
elapsed_ms: float = 0.0
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"decision": self.decision.value,
|
||||
"reason": self.reason,
|
||||
"action_taken": self.action_taken,
|
||||
"elapsed_ms": round(self.elapsed_ms, 1),
|
||||
}
|
||||
|
||||
|
||||
class PolicyEngine:
|
||||
"""Moteur de décision quand le grounding échoue.
|
||||
|
||||
Cascade de décision :
|
||||
1. Popup détectée ? → fermer et RETRY
|
||||
2. Acteur gemma4 → SKIP / ABORT / SUPERVISE
|
||||
3. Fallback → SUPERVISE (rendre la main)
|
||||
|
||||
Usage :
|
||||
policy = PolicyEngine(executor)
|
||||
decision = policy.decide(action, target_spec, grounding_result)
|
||||
if decision.decision == Decision.RETRY:
|
||||
# re-tenter le grounding
|
||||
elif decision.decision == Decision.SKIP:
|
||||
# marquer comme réussi, passer à la suite
|
||||
"""
|
||||
|
||||
def __init__(self, executor):
|
||||
self._executor = executor
|
||||
|
||||
def decide(
|
||||
self,
|
||||
action: Dict[str, Any],
|
||||
target_spec: Dict[str, Any],
|
||||
retry_count: int = 0,
|
||||
max_retries: int = 1,
|
||||
) -> PolicyDecision:
|
||||
"""Décider quoi faire quand le grounding a échoué.
|
||||
|
||||
Cascade :
|
||||
1. Si c'est le premier essai → tenter de fermer une popup → RETRY
|
||||
2. Si retry déjà fait → demander à l'acteur gemma4
|
||||
3. Selon gemma4 : SKIP, ABORT, ou SUPERVISE
|
||||
|
||||
Args:
|
||||
action: L'action qui a échoué
|
||||
target_spec: La cible non trouvée
|
||||
retry_count: Nombre de retries déjà faits
|
||||
max_retries: Maximum de retries autorisés
|
||||
"""
|
||||
t_start = time.time()
|
||||
|
||||
# ── Étape 1 : Tentative de fermeture popup (premier essai) ──
|
||||
if retry_count == 0:
|
||||
popup_handled = self._try_close_popup()
|
||||
if popup_handled:
|
||||
return PolicyDecision(
|
||||
decision=Decision.RETRY,
|
||||
reason="Popup détectée et fermée, re-tentative",
|
||||
action_taken="popup_closed",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
# ── Étape 2 : Max retries atteint → acteur gemma4 ──
|
||||
if retry_count >= max_retries:
|
||||
actor_decision = self._ask_actor(action, target_spec)
|
||||
|
||||
if actor_decision == "PASSER":
|
||||
return PolicyDecision(
|
||||
decision=Decision.SKIP,
|
||||
reason="Acteur gemma4 : l'état est déjà atteint",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
elif actor_decision == "STOPPER":
|
||||
return PolicyDecision(
|
||||
decision=Decision.ABORT,
|
||||
reason="Acteur gemma4 : état incohérent, arrêt",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
else:
|
||||
# EXECUTER ou inconnu → pause supervisée
|
||||
return PolicyDecision(
|
||||
decision=Decision.SUPERVISE,
|
||||
reason=f"Acteur gemma4 : {actor_decision}, pause supervisée",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
# ── Étape 3 : Encore des retries disponibles → RETRY ──
|
||||
return PolicyDecision(
|
||||
decision=Decision.RETRY,
|
||||
reason=f"Retry {retry_count + 1}/{max_retries}",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
def _try_close_popup(self) -> bool:
|
||||
"""Tenter de fermer une popup via le handler VLM existant."""
|
||||
try:
|
||||
return self._executor._handle_popup_vlm()
|
||||
except Exception as e:
|
||||
logger.debug(f"Policy: popup handler échoué : {e}")
|
||||
return False
|
||||
|
||||
def _ask_actor(self, action: Dict, target_spec: Dict) -> str:
|
||||
"""Demander à gemma4 de décider (PASSER/EXECUTER/STOPPER)."""
|
||||
try:
|
||||
return self._executor._actor_decide(action, target_spec)
|
||||
except Exception as e:
|
||||
logger.debug(f"Policy: acteur gemma4 échoué : {e}")
|
||||
return "EXECUTER" # Fallback → supervisé
|
||||
294
agent_v0/deploy/windows_client/agent_v1/core/uia_helper.py
Normal file
294
agent_v0/deploy/windows_client/agent_v1/core/uia_helper.py
Normal file
@@ -0,0 +1,294 @@
|
||||
# core/workflow/uia_helper.py
|
||||
"""
|
||||
UIAHelper — Wrapper Python pour lea_uia.exe (helper Rust UI Automation).
|
||||
|
||||
Expose une API Python simple pour interroger UIA via le binaire Rust.
|
||||
Communique via subprocess + stdin/stdout JSON.
|
||||
|
||||
Pourquoi un helper Rust ?
|
||||
- 5-10x plus rapide que pywinauto (10-20ms vs 50-200ms)
|
||||
- Binaire standalone ~500 Ko, aucune dépendance runtime
|
||||
- Pas de problèmes de threading COM en Python
|
||||
- Crash-safe (le crash du helper n'affecte pas l'agent Python)
|
||||
|
||||
Architecture :
|
||||
Python executor
|
||||
↓ subprocess.run
|
||||
lea_uia.exe query --x 812 --y 436
|
||||
↓ UIA API Windows
|
||||
JSON response
|
||||
↓ stdout
|
||||
Python executor parse JSON
|
||||
|
||||
Si lea_uia.exe n'est pas disponible (Linux, binaire absent, crash) :
|
||||
toutes les méthodes retournent None → fallback vision automatique.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Timeout par défaut pour les appels UIA (en secondes)
|
||||
_DEFAULT_TIMEOUT = 5.0
|
||||
|
||||
# Masquer la fenêtre console lors du spawn de lea_uia.exe sur Windows.
|
||||
# Sans ce flag, chaque appel (à chaque clic utilisateur pendant
|
||||
# l'enregistrement) fait apparaître une fenêtre cmd noire brièvement
|
||||
# visible à l'écran → ralentit la souris et pollue les screenshots
|
||||
# capturés (le VLM peut "voir" le chemin lea_uia.exe comme texte cliqué).
|
||||
#
|
||||
# La valeur 0x08000000 correspond à CREATE_NO_WINDOW défini dans
|
||||
# l'API Windows. Sur Linux/Mac, la valeur est 0 et `creationflags`
|
||||
# est ignoré. getattr() gère le cas où Python expose déjà la constante
|
||||
# sur Windows.
|
||||
if platform.system() == "Windows":
|
||||
_SUBPROCESS_CREATION_FLAGS = getattr(subprocess, "CREATE_NO_WINDOW", 0x08000000)
|
||||
else:
|
||||
_SUBPROCESS_CREATION_FLAGS = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class UiaElement:
|
||||
"""Représentation Python d'un élément UIA."""
|
||||
name: str = ""
|
||||
control_type: str = ""
|
||||
class_name: str = ""
|
||||
automation_id: str = ""
|
||||
bounding_rect: Tuple[int, int, int, int] = (0, 0, 0, 0)
|
||||
is_enabled: bool = False
|
||||
is_offscreen: bool = True
|
||||
parent_path: List[Dict[str, str]] = field(default_factory=list)
|
||||
process_name: str = ""
|
||||
|
||||
def center(self) -> Tuple[int, int]:
|
||||
"""Retourner le centre du rectangle (pixels)."""
|
||||
x1, y1, x2, y2 = self.bounding_rect
|
||||
return ((x1 + x2) // 2, (y1 + y2) // 2)
|
||||
|
||||
def width(self) -> int:
|
||||
return self.bounding_rect[2] - self.bounding_rect[0]
|
||||
|
||||
def height(self) -> int:
|
||||
return self.bounding_rect[3] - self.bounding_rect[1]
|
||||
|
||||
def is_clickable(self) -> bool:
|
||||
"""Peut-on cliquer dessus ?"""
|
||||
return (
|
||||
self.is_enabled
|
||||
and not self.is_offscreen
|
||||
and self.width() > 0
|
||||
and self.height() > 0
|
||||
)
|
||||
|
||||
def path_signature(self) -> str:
|
||||
"""Signature du chemin parent (pour retrouver l'élément)."""
|
||||
parts = [f"{p['control_type']}[{p['name']}]" for p in self.parent_path if p.get("name")]
|
||||
parts.append(f"{self.control_type}[{self.name}]")
|
||||
return " > ".join(parts)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"control_type": self.control_type,
|
||||
"class_name": self.class_name,
|
||||
"automation_id": self.automation_id,
|
||||
"bounding_rect": list(self.bounding_rect),
|
||||
"is_enabled": self.is_enabled,
|
||||
"is_offscreen": self.is_offscreen,
|
||||
"parent_path": self.parent_path,
|
||||
"process_name": self.process_name,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict[str, Any]) -> "UiaElement":
|
||||
rect = d.get("bounding_rect", [0, 0, 0, 0])
|
||||
if isinstance(rect, list) and len(rect) >= 4:
|
||||
rect = tuple(rect[:4])
|
||||
else:
|
||||
rect = (0, 0, 0, 0)
|
||||
return cls(
|
||||
name=d.get("name", ""),
|
||||
control_type=d.get("control_type", ""),
|
||||
class_name=d.get("class_name", ""),
|
||||
automation_id=d.get("automation_id", ""),
|
||||
bounding_rect=rect,
|
||||
is_enabled=d.get("is_enabled", False),
|
||||
is_offscreen=d.get("is_offscreen", True),
|
||||
parent_path=d.get("parent_path", []),
|
||||
process_name=d.get("process_name", ""),
|
||||
)
|
||||
|
||||
|
||||
class UIAHelper:
|
||||
"""Wrapper Python pour lea_uia.exe."""
|
||||
|
||||
def __init__(self, helper_path: str = "", timeout: float = _DEFAULT_TIMEOUT):
|
||||
self._helper_path = helper_path or self._find_helper()
|
||||
self._timeout = timeout
|
||||
self._available = self._check_available()
|
||||
|
||||
def _find_helper(self) -> str:
|
||||
"""Trouver lea_uia.exe dans les emplacements standards."""
|
||||
candidates = [
|
||||
r"C:\Lea\helpers\lea_uia.exe",
|
||||
os.path.join(os.path.dirname(__file__), "..", "..",
|
||||
"agent_rust", "lea_uia", "target",
|
||||
"x86_64-pc-windows-gnu", "release", "lea_uia.exe"),
|
||||
"./helpers/lea_uia.exe",
|
||||
"lea_uia.exe",
|
||||
]
|
||||
for path in candidates:
|
||||
if os.path.isfile(path):
|
||||
return os.path.abspath(path)
|
||||
return ""
|
||||
|
||||
def _check_available(self) -> bool:
|
||||
"""Vérifier que le helper est utilisable (Windows + binaire + health OK)."""
|
||||
if platform.system() != "Windows":
|
||||
logger.debug("UIAHelper: Linux/Mac — helper désactivé")
|
||||
return False
|
||||
if not self._helper_path:
|
||||
logger.debug("UIAHelper: lea_uia.exe introuvable")
|
||||
return False
|
||||
if not os.path.isfile(self._helper_path):
|
||||
logger.debug(f"UIAHelper: chemin invalide {self._helper_path}")
|
||||
return False
|
||||
return True
|
||||
|
||||
@property
|
||||
def available(self) -> bool:
|
||||
return self._available
|
||||
|
||||
@property
|
||||
def helper_path(self) -> str:
|
||||
return self._helper_path
|
||||
|
||||
def _run(self, args: List[str]) -> Optional[Dict[str, Any]]:
|
||||
"""Exécuter lea_uia.exe avec les arguments et parser le JSON."""
|
||||
if not self._available:
|
||||
return None
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[self._helper_path] + args,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=self._timeout,
|
||||
encoding="utf-8",
|
||||
errors="replace",
|
||||
creationflags=_SUBPROCESS_CREATION_FLAGS,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
logger.debug(
|
||||
f"UIAHelper: exit code {result.returncode}, "
|
||||
f"stderr: {result.stderr[:200]}"
|
||||
)
|
||||
return None
|
||||
output = result.stdout.strip()
|
||||
if not output:
|
||||
return None
|
||||
return json.loads(output)
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.debug(f"UIAHelper: timeout ({self._timeout}s) sur {args}")
|
||||
return None
|
||||
except json.JSONDecodeError as e:
|
||||
logger.debug(f"UIAHelper: JSON invalide — {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.debug(f"UIAHelper: erreur {e}")
|
||||
return None
|
||||
|
||||
def health(self) -> bool:
|
||||
"""Vérifier que UIA répond."""
|
||||
data = self._run(["health"])
|
||||
return data is not None and data.get("status") == "ok"
|
||||
|
||||
def query_at(
|
||||
self,
|
||||
x: int,
|
||||
y: int,
|
||||
with_parents: bool = True,
|
||||
) -> Optional[UiaElement]:
|
||||
"""Récupérer l'élément UIA à une position écran.
|
||||
|
||||
Args:
|
||||
x, y: Coordonnées pixel absolues
|
||||
with_parents: Inclure la hiérarchie des parents
|
||||
|
||||
Returns:
|
||||
UiaElement si trouvé, None sinon (pas d'élément ou UIA indispo)
|
||||
"""
|
||||
args = ["query", "--x", str(x), "--y", str(y)]
|
||||
if not with_parents:
|
||||
args.append("--with-parents=false")
|
||||
|
||||
data = self._run(args)
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
def find_by_name(
|
||||
self,
|
||||
name: str,
|
||||
control_type: Optional[str] = None,
|
||||
automation_id: Optional[str] = None,
|
||||
window: Optional[str] = None,
|
||||
timeout_ms: int = 2000,
|
||||
) -> Optional[UiaElement]:
|
||||
"""Rechercher un élément par son nom (+ filtres optionnels).
|
||||
|
||||
Args:
|
||||
name: Nom exact de l'élément
|
||||
control_type: Type de contrôle (Button, Edit, MenuItem...)
|
||||
automation_id: ID d'automation
|
||||
window: Restreindre à une fenêtre spécifique
|
||||
timeout_ms: Timeout de recherche en millisecondes
|
||||
"""
|
||||
args = ["find", "--name", name, "--timeout-ms", str(timeout_ms)]
|
||||
if control_type:
|
||||
args.extend(["--control-type", control_type])
|
||||
if automation_id:
|
||||
args.extend(["--automation-id", automation_id])
|
||||
if window:
|
||||
args.extend(["--window", window])
|
||||
|
||||
data = self._run(args)
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
def capture_focused(self, max_depth: int = 3) -> Optional[UiaElement]:
|
||||
"""Capturer l'élément ayant le focus + son contexte."""
|
||||
data = self._run(["capture", "--max-depth", str(max_depth)])
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
|
||||
# Instance globale partagée (singleton léger)
|
||||
_SHARED_HELPER: Optional[UIAHelper] = None
|
||||
|
||||
|
||||
def get_shared_helper() -> UIAHelper:
|
||||
"""Retourner une instance partagée de UIAHelper."""
|
||||
global _SHARED_HELPER
|
||||
if _SHARED_HELPER is None:
|
||||
_SHARED_HELPER = UIAHelper()
|
||||
return _SHARED_HELPER
|
||||
@@ -1,12 +1,97 @@
|
||||
# run_agent_v1.py
|
||||
import sys
|
||||
import os
|
||||
import atexit
|
||||
|
||||
# Ajout du répertoire courant au PYTHONPATH pour permettre les imports de modules
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
if current_dir not in sys.path:
|
||||
sys.path.append(current_dir)
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Verrou PID — empêche le lancement de plusieurs instances
|
||||
# Même si Lea.bat est double-cliqué ou lancé deux fois,
|
||||
# un seul agent tourne à la fois (defense-in-depth).
|
||||
# ---------------------------------------------------------------
|
||||
LOCK_FILE = os.path.join(current_dir, "lea_agent.lock")
|
||||
|
||||
|
||||
def _pid_is_alive(pid: int) -> bool:
|
||||
"""Vérifie si un processus avec ce PID existe encore (Windows + Unix)."""
|
||||
if sys.platform == "win32":
|
||||
try:
|
||||
import ctypes
|
||||
kernel32 = ctypes.windll.kernel32 # type: ignore[attr-defined]
|
||||
PROCESS_QUERY_LIMITED_INFORMATION = 0x1000
|
||||
handle = kernel32.OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, False, pid)
|
||||
if handle:
|
||||
kernel32.CloseHandle(handle)
|
||||
return True
|
||||
return False
|
||||
except Exception:
|
||||
# Fallback : tasklist
|
||||
try:
|
||||
import subprocess
|
||||
result = subprocess.run(
|
||||
["tasklist", "/FI", f"PID eq {pid}", "/NH"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
return str(pid) in result.stdout
|
||||
except Exception:
|
||||
return False
|
||||
else:
|
||||
# Unix/Linux — os.kill(pid, 0) ne tue pas le process
|
||||
try:
|
||||
os.kill(pid, 0)
|
||||
return True
|
||||
except (OSError, ProcessLookupError):
|
||||
return False
|
||||
|
||||
|
||||
def _acquire_lock() -> bool:
|
||||
"""Tente d'acquérir le verrou PID. Retourne False si une autre instance tourne."""
|
||||
my_pid = os.getpid()
|
||||
|
||||
# Lire le PID existant
|
||||
if os.path.isfile(LOCK_FILE):
|
||||
try:
|
||||
with open(LOCK_FILE, "r", encoding="utf-8") as f:
|
||||
old_pid = int(f.read().strip())
|
||||
# Le PID dans le lock est-il encore vivant ?
|
||||
if old_pid != my_pid and _pid_is_alive(old_pid):
|
||||
return False # Une autre instance tourne déjà
|
||||
except (ValueError, OSError):
|
||||
pass # Fichier corrompu — on l'écrase
|
||||
|
||||
# Écrire notre PID
|
||||
try:
|
||||
with open(LOCK_FILE, "w", encoding="utf-8") as f:
|
||||
f.write(str(my_pid))
|
||||
except OSError:
|
||||
pass # Pas bloquant — on continue sans lock
|
||||
return True
|
||||
|
||||
|
||||
def _release_lock():
|
||||
"""Supprime le fichier lock au shutdown."""
|
||||
try:
|
||||
if os.path.isfile(LOCK_FILE):
|
||||
with open(LOCK_FILE, "r", encoding="utf-8") as f:
|
||||
stored_pid = int(f.read().strip())
|
||||
# Ne supprimer que si c'est bien NOTRE lock
|
||||
if stored_pid == os.getpid():
|
||||
os.remove(LOCK_FILE)
|
||||
except (ValueError, OSError):
|
||||
pass
|
||||
|
||||
|
||||
# Vérification du lock AVANT toute initialisation lourde
|
||||
if not _acquire_lock():
|
||||
# Une autre instance de Léa tourne déjà — on quitte silencieusement
|
||||
sys.exit(0)
|
||||
|
||||
atexit.register(_release_lock)
|
||||
|
||||
# Charger config.txt et .env comme variables d'environnement
|
||||
# (équivalent du `set` dans Lea.bat, mais fonctionne aussi sans le .bat)
|
||||
for config_file in ("config.txt", ".env"):
|
||||
@@ -32,7 +117,7 @@ logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
|
||||
)
|
||||
logging.info("=== Agent V1 démarrage — config chargée ===")
|
||||
logging.info("=== Agent V1 démarrage — config chargée (PID %d) ===", os.getpid())
|
||||
logging.info("RPA_SERVER_URL=%s", os.environ.get("RPA_SERVER_URL", "(non défini)"))
|
||||
logging.info("RPA_SERVER_HOST=%s", os.environ.get("RPA_SERVER_HOST", "(non défini)"))
|
||||
logging.info("RPA_API_TOKEN=%s", os.environ.get("RPA_API_TOKEN", "(non défini)")[:8] + "...")
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
622
agent_v0/server_v1/chat_interface.py
Normal file
622
agent_v0/server_v1/chat_interface.py
Normal file
@@ -0,0 +1,622 @@
|
||||
"""
|
||||
ChatInterface — Interface de chat conversationnelle pour Léa.
|
||||
|
||||
Permet au TIM (Technicien Information Médicale) de parler à Léa en langage
|
||||
naturel :
|
||||
- "Ouvre le Bloc-notes et écris bonjour"
|
||||
- Léa comprend (TaskPlanner) et propose un plan
|
||||
- Le TIM confirme (ou refuse)
|
||||
- Léa exécute (replay) et envoie des updates de progression
|
||||
- Historique conversationnel conservé par session
|
||||
|
||||
C'est une couche LÉGÈRE au-dessus du TaskPlanner. Toute la logique de
|
||||
compréhension reste dans TaskPlanner — ChatInterface gère uniquement
|
||||
l'état conversationnel, la confirmation et le suivi d'exécution.
|
||||
|
||||
États de la session :
|
||||
idle → en attente d'un message
|
||||
planning → TaskPlanner.understand() en cours
|
||||
awaiting_confirmation → plan prêt, attend la confirmation du TIM
|
||||
executing → replay en cours
|
||||
done → dernier tour terminé (retour à idle au prochain message)
|
||||
error → erreur interne (instruction non comprise, exception…)
|
||||
|
||||
Langue : 100% français (c'est l'interface utilisateur).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# =============================================================================
|
||||
# États
|
||||
# =============================================================================
|
||||
|
||||
STATE_IDLE = "idle"
|
||||
STATE_PLANNING = "planning"
|
||||
STATE_AWAITING_CONFIRMATION = "awaiting_confirmation"
|
||||
STATE_EXECUTING = "executing"
|
||||
STATE_DONE = "done"
|
||||
STATE_ERROR = "error"
|
||||
|
||||
VALID_STATES = {
|
||||
STATE_IDLE,
|
||||
STATE_PLANNING,
|
||||
STATE_AWAITING_CONFIRMATION,
|
||||
STATE_EXECUTING,
|
||||
STATE_DONE,
|
||||
STATE_ERROR,
|
||||
}
|
||||
|
||||
# Rôles de messages
|
||||
ROLE_USER = "user"
|
||||
ROLE_LEA = "lea"
|
||||
ROLE_SYSTEM = "system"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Message
|
||||
# =============================================================================
|
||||
|
||||
@dataclass
|
||||
class ChatMessage:
|
||||
"""Un message dans l'historique d'une conversation."""
|
||||
role: str # "user", "lea", "system"
|
||||
content: str # Texte du message
|
||||
timestamp: float = field(default_factory=time.time)
|
||||
# Données contextuelles optionnelles (plan, résultat, progression…)
|
||||
meta: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"role": self.role,
|
||||
"content": self.content,
|
||||
"timestamp": self.timestamp,
|
||||
"meta": self.meta,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ChatSession
|
||||
# =============================================================================
|
||||
|
||||
class ChatSession:
|
||||
"""Une conversation entre un utilisateur et Léa.
|
||||
|
||||
Maintient l'historique, l'état courant, et le dernier plan en attente
|
||||
de confirmation. Thread-safe (un lock par session).
|
||||
|
||||
Dépendances injectées (pour tester facilement) :
|
||||
- task_planner : instance de TaskPlanner (ou mock)
|
||||
- workflows_provider : callable () -> List[Dict] (liste des workflows)
|
||||
- replay_callback : callable (session_id, machine_id, params) -> replay_id
|
||||
- status_provider : callable (replay_id) -> Dict (pour suivre l'exécution)
|
||||
|
||||
Toutes ces dépendances sont optionnelles : ChatSession dégrade
|
||||
gracieusement (fallback) si gemma4 / replay indisponibles.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
session_id: str = "",
|
||||
task_planner: Any = None,
|
||||
workflows_provider: Optional[Callable[[], List[Dict[str, Any]]]] = None,
|
||||
replay_callback: Optional[Callable[..., str]] = None,
|
||||
status_provider: Optional[Callable[[str], Dict[str, Any]]] = None,
|
||||
machine_id: str = "default",
|
||||
):
|
||||
self.session_id = session_id or f"chat_{uuid.uuid4().hex[:12]}"
|
||||
self.machine_id = machine_id
|
||||
self.created_at = time.time()
|
||||
self.updated_at = self.created_at
|
||||
|
||||
self._task_planner = task_planner
|
||||
self._workflows_provider = workflows_provider
|
||||
self._replay_callback = replay_callback
|
||||
self._status_provider = status_provider
|
||||
|
||||
self._state: str = STATE_IDLE
|
||||
self._messages: List[ChatMessage] = []
|
||||
self._pending_plan: Any = None # TaskPlan en attente de confirmation
|
||||
self._active_replay_id: str = "" # Replay courant (si executing)
|
||||
self._last_progress: Dict[str, Any] = {}
|
||||
|
||||
self._lock = threading.RLock()
|
||||
|
||||
# Message d'accueil
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"Bonjour ! Je suis Léa. Dites-moi ce que vous voulez que je fasse.",
|
||||
meta={"welcome": True},
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Accesseurs
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
@property
|
||||
def state(self) -> str:
|
||||
with self._lock:
|
||||
return self._state
|
||||
|
||||
def get_history(self) -> List[Dict[str, Any]]:
|
||||
"""Retourne l'historique complet des messages (sérialisé)."""
|
||||
with self._lock:
|
||||
return [m.to_dict() for m in self._messages]
|
||||
|
||||
def get_snapshot(self) -> Dict[str, Any]:
|
||||
"""État complet pour l'UI (historique + état + progression)."""
|
||||
with self._lock:
|
||||
return {
|
||||
"session_id": self.session_id,
|
||||
"state": self._state,
|
||||
"machine_id": self.machine_id,
|
||||
"created_at": self.created_at,
|
||||
"updated_at": self.updated_at,
|
||||
"messages": [m.to_dict() for m in self._messages],
|
||||
"pending_plan": (
|
||||
self._pending_plan.to_dict()
|
||||
if self._pending_plan is not None
|
||||
else None
|
||||
),
|
||||
"active_replay_id": self._active_replay_id,
|
||||
"progress": dict(self._last_progress),
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# API publique
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
def send_message(self, text: str) -> Dict[str, Any]:
|
||||
"""Envoyer un message utilisateur.
|
||||
|
||||
Trois cas possibles selon l'état courant :
|
||||
1. awaiting_confirmation → c'est une réponse OUI/NON
|
||||
2. executing → on rafraîchit la progression
|
||||
3. idle/done/error → nouvelle instruction, on appelle TaskPlanner
|
||||
"""
|
||||
text = (text or "").strip()
|
||||
if not text:
|
||||
return {
|
||||
"ok": False,
|
||||
"error": "Message vide",
|
||||
"state": self._state,
|
||||
}
|
||||
|
||||
with self._lock:
|
||||
# Cas 1 : on attend une confirmation
|
||||
if self._state == STATE_AWAITING_CONFIRMATION:
|
||||
return self._handle_confirmation_reply(text)
|
||||
|
||||
# Cas 2 : en pleine exécution → message ajouté mais pas d'action
|
||||
if self._state == STATE_EXECUTING:
|
||||
self._append(ROLE_USER, text)
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"Je suis en train d'exécuter le workflow. Un instant…",
|
||||
)
|
||||
return {"ok": True, "state": self._state}
|
||||
|
||||
# Cas 3 : nouvelle instruction
|
||||
self._append(ROLE_USER, text)
|
||||
self._set_state(STATE_PLANNING)
|
||||
|
||||
# Appel TaskPlanner hors du lock (peut être lent : gemma4)
|
||||
return self._plan_and_reply(text)
|
||||
|
||||
def confirm(self, confirmed: bool = True) -> Dict[str, Any]:
|
||||
"""Confirmer (ou refuser) l'exécution du plan en attente."""
|
||||
with self._lock:
|
||||
if self._state != STATE_AWAITING_CONFIRMATION:
|
||||
return {
|
||||
"ok": False,
|
||||
"error": f"Pas de plan en attente (état={self._state})",
|
||||
"state": self._state,
|
||||
}
|
||||
|
||||
if not confirmed:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"D'accord, j'annule. Dites-moi autre chose quand vous voulez.",
|
||||
)
|
||||
self._pending_plan = None
|
||||
self._set_state(STATE_IDLE)
|
||||
return {"ok": True, "state": self._state, "confirmed": False}
|
||||
|
||||
plan = self._pending_plan
|
||||
if plan is None:
|
||||
self._set_state(STATE_IDLE)
|
||||
return {
|
||||
"ok": False,
|
||||
"error": "Aucun plan à confirmer",
|
||||
"state": self._state,
|
||||
}
|
||||
|
||||
self._set_state(STATE_EXECUTING)
|
||||
|
||||
# Exécution hors du lock
|
||||
return self._execute_plan(plan)
|
||||
|
||||
def refresh_progress(self) -> Dict[str, Any]:
|
||||
"""Rafraîchir la progression du replay en cours.
|
||||
|
||||
Appelé par le client (polling) pour obtenir les updates d'exécution.
|
||||
Si le replay est terminé, passe l'état à done.
|
||||
"""
|
||||
with self._lock:
|
||||
if self._state != STATE_EXECUTING or not self._active_replay_id:
|
||||
return {"ok": True, "state": self._state, "progress": self._last_progress}
|
||||
|
||||
replay_id = self._active_replay_id
|
||||
provider = self._status_provider
|
||||
|
||||
if provider is None:
|
||||
return {"ok": True, "state": self._state, "progress": {}}
|
||||
|
||||
try:
|
||||
status = provider(replay_id) or {}
|
||||
except Exception as e:
|
||||
logger.warning(f"ChatSession: status_provider erreur: {e}")
|
||||
status = {}
|
||||
|
||||
with self._lock:
|
||||
self._last_progress = status
|
||||
self.updated_at = time.time()
|
||||
|
||||
# Détection de fin
|
||||
replay_status = str(status.get("status", "")).lower()
|
||||
completed = status.get("completed_actions", 0)
|
||||
total = status.get("total_actions", 0)
|
||||
|
||||
if replay_status in ("done", "completed", "finished", "success"):
|
||||
summary = (
|
||||
f"Workflow terminé ! {completed}/{total} actions réussies."
|
||||
if total
|
||||
else "Workflow terminé."
|
||||
)
|
||||
self._append(ROLE_LEA, summary, meta={"progress": dict(status)})
|
||||
self._set_state(STATE_DONE)
|
||||
self._active_replay_id = ""
|
||||
elif replay_status in ("failed", "error", "aborted"):
|
||||
err = status.get("error") or status.get("message") or "Erreur inconnue"
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
f"Le workflow a échoué : {err}",
|
||||
meta={"progress": dict(status)},
|
||||
)
|
||||
self._set_state(STATE_ERROR)
|
||||
self._active_replay_id = ""
|
||||
elif replay_status == "paused_need_help":
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"Je suis bloquée sur une action, j'ai besoin d'aide…",
|
||||
meta={"progress": dict(status)},
|
||||
)
|
||||
# on reste en executing pour que le TIM puisse reprendre
|
||||
# else : toujours en cours, pas de message
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"state": self._state,
|
||||
"progress": dict(self._last_progress),
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Logique interne
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
def _plan_and_reply(self, instruction: str) -> Dict[str, Any]:
|
||||
"""Appeler TaskPlanner.understand() et produire une réponse."""
|
||||
plan = None
|
||||
error_msg = ""
|
||||
|
||||
if self._task_planner is None:
|
||||
error_msg = "Planificateur indisponible"
|
||||
else:
|
||||
try:
|
||||
workflows = []
|
||||
if self._workflows_provider is not None:
|
||||
try:
|
||||
workflows = self._workflows_provider() or []
|
||||
except Exception as e:
|
||||
logger.warning(f"ChatSession: workflows_provider erreur: {e}")
|
||||
workflows = []
|
||||
|
||||
plan = self._task_planner.understand(
|
||||
instruction=instruction,
|
||||
available_workflows=workflows,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"ChatSession: TaskPlanner.understand erreur: {e}")
|
||||
error_msg = f"Erreur de compréhension : {e}"
|
||||
|
||||
# Fallback gracieux si pas de plan / gemma4 indisponible
|
||||
if plan is None:
|
||||
with self._lock:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
f"Désolée, je n'arrive pas à comprendre pour l'instant. {error_msg}".strip(),
|
||||
meta={"error": error_msg},
|
||||
)
|
||||
self._set_state(STATE_ERROR)
|
||||
return {
|
||||
"ok": False,
|
||||
"state": self._state,
|
||||
"error": error_msg,
|
||||
}
|
||||
|
||||
# Plan non compris
|
||||
if not plan.understood:
|
||||
reason = plan.error or "je n'ai pas compris votre demande"
|
||||
with self._lock:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
(
|
||||
f"Désolée, {reason}. "
|
||||
"Pouvez-vous reformuler ? Je connais les workflows que vous m'avez appris."
|
||||
),
|
||||
meta={"plan": plan.to_dict()},
|
||||
)
|
||||
self._set_state(STATE_ERROR)
|
||||
return {
|
||||
"ok": False,
|
||||
"state": self._state,
|
||||
"plan": plan.to_dict(),
|
||||
"error": reason,
|
||||
}
|
||||
|
||||
# Plan compris → formuler la proposition
|
||||
proposal = self._format_proposal(plan)
|
||||
|
||||
with self._lock:
|
||||
self._pending_plan = plan
|
||||
self._append(ROLE_LEA, proposal, meta={"plan": plan.to_dict()})
|
||||
self._set_state(STATE_AWAITING_CONFIRMATION)
|
||||
return {
|
||||
"ok": True,
|
||||
"state": self._state,
|
||||
"plan": plan.to_dict(),
|
||||
"message": proposal,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _format_proposal(plan: Any) -> str:
|
||||
"""Formuler une proposition en français à partir d'un TaskPlan."""
|
||||
lines = []
|
||||
lines.append(f"J'ai compris : « {plan.instruction} ».")
|
||||
|
||||
if plan.workflow_name:
|
||||
conf_pct = int(round((plan.match_confidence or 0.0) * 100))
|
||||
lines.append(
|
||||
f"Je vais utiliser le workflow « {plan.workflow_name} »"
|
||||
f" (confiance {conf_pct}%)."
|
||||
)
|
||||
elif plan.mode == "free" and plan.steps:
|
||||
lines.append(
|
||||
f"Je n'ai pas de workflow enregistré pour ça, "
|
||||
f"mais j'ai planifié {len(plan.steps)} étape(s) :"
|
||||
)
|
||||
for i, step in enumerate(plan.steps[:5], 1):
|
||||
desc = step.get("description", "") if isinstance(step, dict) else str(step)
|
||||
lines.append(f" {i}. {desc}")
|
||||
if len(plan.steps) > 5:
|
||||
lines.append(f" … et {len(plan.steps) - 5} autre(s) étape(s).")
|
||||
else:
|
||||
lines.append("Je n'ai pas de plan d'action clair pour cette demande.")
|
||||
|
||||
if plan.parameters:
|
||||
params_str = ", ".join(f"{k}={v}" for k, v in plan.parameters.items())
|
||||
lines.append(f"Paramètres détectés : {params_str}.")
|
||||
|
||||
if plan.is_loop:
|
||||
src = plan.loop_source or "éléments à traiter"
|
||||
lines.append(f"Traitement en boucle sur : {src}.")
|
||||
|
||||
lines.append("")
|
||||
lines.append("Est-ce que je peux y aller ? (oui / non)")
|
||||
return "\n".join(lines)
|
||||
|
||||
def _handle_confirmation_reply(self, text: str) -> Dict[str, Any]:
|
||||
"""Interpréter un message utilisateur comme OUI/NON."""
|
||||
self._append(ROLE_USER, text)
|
||||
yes_tokens = {"oui", "yes", "ok", "y", "go", "vas-y", "allez", "allez-y", "confirme", "confirmer", "continue"}
|
||||
no_tokens = {"non", "no", "annule", "annuler", "stop", "arrête", "arrete", "abandonne", "abandonner"}
|
||||
|
||||
t = text.strip().lower().rstrip("!.?")
|
||||
|
||||
if t in yes_tokens or any(t.startswith(tok + " ") for tok in yes_tokens):
|
||||
# Déverrouiller : sortir du lock avant d'exécuter (confirm re-prend le lock)
|
||||
pass
|
||||
elif t in no_tokens or any(t.startswith(tok + " ") for tok in no_tokens):
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"D'accord, j'annule. Dites-moi autre chose quand vous voulez.",
|
||||
)
|
||||
self._pending_plan = None
|
||||
self._set_state(STATE_IDLE)
|
||||
return {"ok": True, "state": self._state, "confirmed": False}
|
||||
else:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"Je n'ai pas compris votre réponse. Répondez « oui » pour lancer ou « non » pour annuler.",
|
||||
)
|
||||
return {"ok": True, "state": self._state, "needs_clarification": True}
|
||||
|
||||
# Libérer le lock pour confirm() qui le re-prendra
|
||||
plan = self._pending_plan
|
||||
self._pending_plan = None
|
||||
self._set_state(STATE_EXECUTING)
|
||||
# Exécution hors du lock (sortie du with bloc appelant)
|
||||
# Note : _handle_confirmation_reply est appelé sous lock via send_message
|
||||
# On ne peut pas appeler _execute_plan ici sans risque de double-lock.
|
||||
# On relâche le lock via une astuce : on retourne un marqueur et send_message
|
||||
# orchestrera. Ici on appelle directement _execute_plan qui utilise RLock,
|
||||
# donc c'est safe (re-entrant).
|
||||
return self._execute_plan(plan)
|
||||
|
||||
def _execute_plan(self, plan: Any) -> Dict[str, Any]:
|
||||
"""Lancer le replay correspondant au plan."""
|
||||
if plan is None:
|
||||
with self._lock:
|
||||
self._append(ROLE_LEA, "Rien à exécuter.", meta={})
|
||||
self._set_state(STATE_IDLE)
|
||||
return {"ok": False, "state": self._state, "error": "Aucun plan"}
|
||||
|
||||
if self._replay_callback is None:
|
||||
with self._lock:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"Je ne peux pas exécuter : aucun moteur d'exécution n'est configuré.",
|
||||
)
|
||||
self._set_state(STATE_ERROR)
|
||||
return {
|
||||
"ok": False,
|
||||
"state": self._state,
|
||||
"error": "replay_callback non configuré",
|
||||
}
|
||||
|
||||
# Annoncer le démarrage
|
||||
with self._lock:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"C'est parti ! Je lance le workflow…",
|
||||
meta={"plan": plan.to_dict()},
|
||||
)
|
||||
|
||||
# Appeler le callback
|
||||
try:
|
||||
if plan.workflow_match:
|
||||
replay_id = self._replay_callback(
|
||||
session_id=plan.workflow_match,
|
||||
machine_id=self.machine_id,
|
||||
params=plan.parameters,
|
||||
)
|
||||
else:
|
||||
# Mode libre : pas encore branché côté chat (on refuse proprement)
|
||||
replay_id = ""
|
||||
raise RuntimeError(
|
||||
"Mode libre non supporté pour l'instant — "
|
||||
"entraînez un workflow pour cette tâche"
|
||||
)
|
||||
except Exception as e:
|
||||
with self._lock:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
f"Je n'ai pas pu lancer le workflow : {e}",
|
||||
meta={"error": str(e)},
|
||||
)
|
||||
self._set_state(STATE_ERROR)
|
||||
return {"ok": False, "state": self._state, "error": str(e)}
|
||||
|
||||
with self._lock:
|
||||
self._active_replay_id = replay_id or ""
|
||||
return {
|
||||
"ok": True,
|
||||
"state": self._state,
|
||||
"replay_id": self._active_replay_id,
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
def _append(self, role: str, content: str, meta: Optional[Dict[str, Any]] = None) -> None:
|
||||
"""Ajouter un message à l'historique (doit être appelé sous lock)."""
|
||||
msg = ChatMessage(role=role, content=content, meta=meta or {})
|
||||
self._messages.append(msg)
|
||||
self.updated_at = msg.timestamp
|
||||
|
||||
def _set_state(self, new_state: str) -> None:
|
||||
"""Changer d'état (doit être appelé sous lock)."""
|
||||
if new_state not in VALID_STATES:
|
||||
raise ValueError(f"État invalide : {new_state}")
|
||||
old = self._state
|
||||
self._state = new_state
|
||||
self.updated_at = time.time()
|
||||
if old != new_state:
|
||||
logger.debug(
|
||||
f"ChatSession {self.session_id}: {old} -> {new_state}"
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ChatManager — registre en mémoire des sessions
|
||||
# =============================================================================
|
||||
|
||||
class ChatManager:
|
||||
"""Registre en mémoire des sessions de chat.
|
||||
|
||||
Thread-safe. Utilisé par l'API FastAPI pour gérer plusieurs
|
||||
conversations simultanées.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
task_planner: Any = None,
|
||||
workflows_provider: Optional[Callable[[], List[Dict[str, Any]]]] = None,
|
||||
replay_callback: Optional[Callable[..., str]] = None,
|
||||
status_provider: Optional[Callable[[str], Dict[str, Any]]] = None,
|
||||
):
|
||||
self._task_planner = task_planner
|
||||
self._workflows_provider = workflows_provider
|
||||
self._replay_callback = replay_callback
|
||||
self._status_provider = status_provider
|
||||
self._sessions: Dict[str, ChatSession] = {}
|
||||
self._lock = threading.RLock()
|
||||
|
||||
def create_session(self, machine_id: str = "default") -> ChatSession:
|
||||
"""Créer une nouvelle session de chat."""
|
||||
session = ChatSession(
|
||||
task_planner=self._task_planner,
|
||||
workflows_provider=self._workflows_provider,
|
||||
replay_callback=self._replay_callback,
|
||||
status_provider=self._status_provider,
|
||||
machine_id=machine_id,
|
||||
)
|
||||
with self._lock:
|
||||
self._sessions[session.session_id] = session
|
||||
logger.info(f"ChatManager: session créée {session.session_id}")
|
||||
return session
|
||||
|
||||
def get_session(self, session_id: str) -> Optional[ChatSession]:
|
||||
with self._lock:
|
||||
return self._sessions.get(session_id)
|
||||
|
||||
def list_sessions(self) -> List[Dict[str, Any]]:
|
||||
with self._lock:
|
||||
return [
|
||||
{
|
||||
"session_id": s.session_id,
|
||||
"state": s.state,
|
||||
"machine_id": s.machine_id,
|
||||
"created_at": s.created_at,
|
||||
"updated_at": s.updated_at,
|
||||
"message_count": len(s.get_history()),
|
||||
}
|
||||
for s in self._sessions.values()
|
||||
]
|
||||
|
||||
def delete_session(self, session_id: str) -> bool:
|
||||
with self._lock:
|
||||
return self._sessions.pop(session_id, None) is not None
|
||||
|
||||
def cleanup_old(self, max_age_s: float = 3600 * 24) -> int:
|
||||
"""Supprimer les sessions inactives depuis max_age_s secondes."""
|
||||
now = time.time()
|
||||
removed = 0
|
||||
with self._lock:
|
||||
to_delete = [
|
||||
sid for sid, s in self._sessions.items()
|
||||
if (now - s.updated_at) > max_age_s
|
||||
]
|
||||
for sid in to_delete:
|
||||
del self._sessions[sid]
|
||||
removed += 1
|
||||
return removed
|
||||
@@ -3,35 +3,81 @@
|
||||
Contexte métier pour les appels VLM — rend Léa experte du domaine.
|
||||
|
||||
Chaque workflow est associé à un domaine métier (médical, comptable, etc.)
|
||||
qui enrichit TOUS les prompts VLM (Observer, Critic, acteur, enrichissement).
|
||||
qui enrichit TOUS les prompts VLM (Observer, Critic, acteur, enrichissement)
|
||||
ET la personnalité de Léa (résumés, questions de clarification, rapports).
|
||||
|
||||
Un gemma4 qui sait qu'il regarde un DPI et que l'utilisateur fait du codage
|
||||
CIM-10 prend des décisions bien meilleures qu'un VLM générique.
|
||||
CIM-10 prend des décisions bien meilleures qu'un VLM générique. Et Léa qui
|
||||
dit "J'ai codé 14 dossiers sur 15" plutôt que "J'ai exécuté 112 clics" est
|
||||
bien plus utile pour un TIM.
|
||||
|
||||
Premier domaine : TIM (Technicien d'Information Médicale)
|
||||
- Logiciels DPI/DMS (dossier patient informatisé)
|
||||
- Codage CIM-10 / CCAM / GHM
|
||||
- Lecture de comptes rendus médicaux
|
||||
- Validation des séjours / RSS / RSA
|
||||
Domaines pré-configurés :
|
||||
- tim_codage : TIM, codage CIM-10 / CCAM / PMSI, DPI
|
||||
- comptabilite : factures, TVA, OCR, plans comptables
|
||||
- rh_paie : fiches de paie, employés, charges sociales
|
||||
- stocks_logistique : bons, commandes, réceptions, inventaires
|
||||
- generic : fallback bureautique
|
||||
|
||||
Usage :
|
||||
Usage basique :
|
||||
ctx = get_domain_context("tim_codage")
|
||||
prompt = f"{ctx.system_prompt}\n\n{user_prompt}"
|
||||
prompt = ctx.enrich_prompt(user_prompt, role="actor")
|
||||
|
||||
Usage langage métier :
|
||||
ctx = get_domain_context("tim_codage")
|
||||
phrase = ctx.summarize_action("click", {"target": "DP"})
|
||||
# → "saisir le diagnostic principal"
|
||||
|
||||
question = ctx.pose_clarification_question(
|
||||
{"blocked_on": "target_not_found", "target": "Fichier patient",
|
||||
"params": {"nom_patient": "Mme Durand"}}
|
||||
)
|
||||
# → "Je ne trouve pas le dossier de Mme Durand..."
|
||||
|
||||
rapport = ctx.describe_workflow_outcome(
|
||||
workflow_name="Codage séjours janvier",
|
||||
success=True,
|
||||
items_count=15,
|
||||
failed_count=1,
|
||||
)
|
||||
# → "J'ai codé 14 dossiers sur 15..."
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import unicodedata
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Dict, List, Mapping, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _strip_accents(s: str) -> str:
|
||||
"""Supprimer les accents pour les comparaisons insensibles aux diacritiques."""
|
||||
if not s:
|
||||
return ""
|
||||
nkfd = unicodedata.normalize("NFKD", s)
|
||||
return "".join(c for c in nkfd if not unicodedata.combining(c))
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Data class
|
||||
# =========================================================================
|
||||
|
||||
|
||||
@dataclass
|
||||
class DomainContext:
|
||||
"""Contexte métier pour un domaine spécifique."""
|
||||
domain_id: str # Identifiant unique (tim_codage, comptabilite, etc.)
|
||||
name: str # Nom lisible (Codage médical TIM)
|
||||
description: str # Description courte du métier
|
||||
"""Contexte métier pour un domaine spécifique.
|
||||
|
||||
Contient à la fois les hints pour les prompts VLM et les éléments de
|
||||
personnalité de Léa (langage métier, questions, rapports).
|
||||
"""
|
||||
|
||||
domain_id: str # tim_codage, comptabilite, ...
|
||||
name: str # Nom lisible
|
||||
description: str # Description courte
|
||||
|
||||
# Prompt système injecté dans TOUS les appels VLM
|
||||
system_prompt: str = ""
|
||||
@@ -39,18 +85,47 @@ class DomainContext:
|
||||
# Vocabulaire métier (termes que le VLM doit connaître)
|
||||
vocabulary: List[str] = field(default_factory=list)
|
||||
|
||||
# Applications connues (noms de logiciels que le VLM peut rencontrer)
|
||||
# Applications connues
|
||||
known_apps: List[str] = field(default_factory=list)
|
||||
|
||||
# Écrans types (descriptions des écrans courants du métier)
|
||||
# Écrans types
|
||||
screen_patterns: Dict[str, str] = field(default_factory=dict)
|
||||
|
||||
# --- Personnalité Léa -------------------------------------------------
|
||||
|
||||
# Mapping d'actions techniques (click/type/key_combo) vers description métier,
|
||||
# indexé par un mot-clé lisible trouvé dans la cible/texte.
|
||||
# Format : { (action_type, keyword_lower) : "description métier" }
|
||||
# Exemple : ("click", "dp") → "saisir le diagnostic principal"
|
||||
common_actions: Dict[str, str] = field(default_factory=dict)
|
||||
|
||||
# Synonymes métier : technique → forme lisible
|
||||
# Exemple : {"dp": "diagnostic principal", "das": "diagnostics associés"}
|
||||
vocabulary_synonyms: Dict[str, str] = field(default_factory=dict)
|
||||
|
||||
# Templates de questions de clarification (selon la raison de blocage).
|
||||
# Clé = identifiant de blocage ("target_not_found", "ambiguous_field", ...)
|
||||
# Valeur = template f-string (champs: {target}, {app}, {nom_patient}, ...)
|
||||
clarification_templates: Dict[str, str] = field(default_factory=dict)
|
||||
|
||||
# Templates de résumés de fin de workflow.
|
||||
# Clés attendues :
|
||||
# - "success" : tout a marché
|
||||
# - "partial" : succès partiel (failed_count > 0)
|
||||
# - "failure" : échec complet
|
||||
# - "success_one" : cas 1 élément (pour éviter "1 dossiers")
|
||||
# - "item_singular" : libellé d'un item ("dossier")
|
||||
# - "item_plural" : libellé au pluriel ("dossiers")
|
||||
summary_templates: Dict[str, str] = field(default_factory=dict)
|
||||
|
||||
# ------------------------------------------------------------------ API
|
||||
|
||||
def enrich_prompt(self, prompt: str, role: str = "") -> str:
|
||||
"""Enrichir un prompt avec le contexte métier.
|
||||
|
||||
Args:
|
||||
prompt: Le prompt original
|
||||
role: Le rôle du VLM (observer, critic, actor, enrichment)
|
||||
role: Le rôle du VLM (observer, critic, actor, enrichment)
|
||||
"""
|
||||
parts = []
|
||||
|
||||
@@ -65,6 +140,310 @@ class DomainContext:
|
||||
parts.append(prompt)
|
||||
return "\n\n".join(parts)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Personnalité : résumé d'action en langage métier
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def summarize_action(
|
||||
self,
|
||||
action: str,
|
||||
params: Optional[Mapping[str, Any]] = None,
|
||||
) -> str:
|
||||
"""Résumer une action technique en langage métier.
|
||||
|
||||
Args:
|
||||
action: Type d'action ("click", "type", "key_combo", "wait", "scroll")
|
||||
params: Paramètres de l'action (target, text, keys, ...)
|
||||
|
||||
Returns:
|
||||
Phrase en français orientée métier. Fallback générique si aucun
|
||||
mapping ne correspond.
|
||||
|
||||
Exemples (domaine tim_codage) :
|
||||
click sur "DP" → "saisir le diagnostic principal"
|
||||
type "E11.9" → "saisir le code CIM-10 E11.9"
|
||||
click sur "Valider" → "valider le codage"
|
||||
"""
|
||||
params = dict(params or {})
|
||||
target = str(params.get("target") or params.get("description") or "").strip()
|
||||
text = str(params.get("text") or "").strip()
|
||||
keys = params.get("keys") or []
|
||||
|
||||
haystack = _strip_accents(f"{target} {text}".lower())
|
||||
|
||||
# 1) Essayer un match mot-clé dans common_actions.
|
||||
# Clés sous la forme "click:mot" ou "type:mot".
|
||||
# Comparaison insensible à la casse ET aux accents.
|
||||
for key, label in self.common_actions.items():
|
||||
if ":" not in key:
|
||||
continue
|
||||
k_action, k_word = key.split(":", 1)
|
||||
if k_action != action:
|
||||
continue
|
||||
k_word_norm = _strip_accents(k_word.lower())
|
||||
if k_word_norm and k_word_norm in haystack:
|
||||
return label
|
||||
|
||||
# 2) Essayer une substitution via vocabulary_synonyms dans la cible.
|
||||
friendly_target = self._apply_synonyms(target)
|
||||
|
||||
if action == "click":
|
||||
if friendly_target:
|
||||
return f"cliquer sur {friendly_target}"
|
||||
return "cliquer"
|
||||
|
||||
if action == "type":
|
||||
if text and friendly_target:
|
||||
return f"saisir « {text} » dans {friendly_target}"
|
||||
if text:
|
||||
return f"saisir « {text} »"
|
||||
return "saisir du texte"
|
||||
|
||||
if action == "key_combo":
|
||||
if isinstance(keys, (list, tuple)) and keys:
|
||||
return f"utiliser le raccourci {'+'.join(str(k) for k in keys)}"
|
||||
return "utiliser un raccourci clavier"
|
||||
|
||||
if action == "wait":
|
||||
return "attendre le chargement de l'écran"
|
||||
|
||||
if action == "scroll":
|
||||
return "faire défiler l'écran"
|
||||
|
||||
# Fallback ultime
|
||||
return f"effectuer l'action {action}"
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Personnalité : question de clarification
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def pose_clarification_question(
|
||||
self,
|
||||
context: Optional[Mapping[str, Any]] = None,
|
||||
) -> str:
|
||||
"""Générer une question pertinente quand Léa bloque.
|
||||
|
||||
Cherche un template dans clarification_templates selon :
|
||||
- context["blocked_on"] (ex: "target_not_found", "ambiguous_field")
|
||||
- context["target"] (la cible visée)
|
||||
- paramètres du workflow (params) disponibles pour substitution
|
||||
|
||||
Args:
|
||||
context: Dictionnaire libre contenant au minimum 'blocked_on' ou
|
||||
'target'. Peut contenir 'params' pour la substitution.
|
||||
|
||||
Returns:
|
||||
Question en français. Fallback générique si aucun template ne
|
||||
correspond.
|
||||
"""
|
||||
ctx = dict(context or {})
|
||||
blocked_on = str(ctx.get("blocked_on") or "").strip()
|
||||
target = str(ctx.get("target") or "").strip()
|
||||
params = dict(ctx.get("params") or {})
|
||||
|
||||
# Dictionnaire de substitution : champs du context + params + target
|
||||
subs: Dict[str, Any] = {
|
||||
"target": target,
|
||||
"target_friendly": self._apply_synonyms(target) or target or "cet élément",
|
||||
"app": ctx.get("app", ""),
|
||||
}
|
||||
subs.update(params)
|
||||
|
||||
# 1) Essai par clé exacte de blocage
|
||||
template = self.clarification_templates.get(blocked_on, "")
|
||||
|
||||
# 2) Essai par cible (si la cible matche un mot-clé connu)
|
||||
if not template and target:
|
||||
low = target.lower()
|
||||
for key, tpl in self.clarification_templates.items():
|
||||
if key.startswith("target:") and key.split(":", 1)[1].lower() in low:
|
||||
template = tpl
|
||||
break
|
||||
|
||||
# 3) Template générique du domaine
|
||||
if not template:
|
||||
template = self.clarification_templates.get("default", "")
|
||||
|
||||
if template:
|
||||
try:
|
||||
return template.format_map(_SafeDict(subs))
|
||||
except Exception as e: # pragma: no cover - format inattendu
|
||||
logger.warning("clarification template format error: %s", e)
|
||||
|
||||
# 4) Fallback ultime cross-domaine
|
||||
friendly = subs["target_friendly"]
|
||||
return (
|
||||
f"Je ne trouve pas {friendly}. "
|
||||
f"Peux-tu me le montrer ou me confirmer que c'est le bon écran ?"
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Personnalité : rapport final
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def describe_workflow_outcome(
|
||||
self,
|
||||
workflow_name: str = "",
|
||||
success: bool = True,
|
||||
items_count: int = 1,
|
||||
failed_count: int = 0,
|
||||
elapsed_s: float = 0.0,
|
||||
extra: Optional[Mapping[str, Any]] = None,
|
||||
use_llm: bool = False,
|
||||
) -> str:
|
||||
"""Générer un rapport de fin de workflow en langage métier.
|
||||
|
||||
Args:
|
||||
workflow_name: Nom du workflow ("Codage janvier").
|
||||
success: True si le workflow a globalement réussi.
|
||||
items_count: Nombre d'items traités (ex: 15 dossiers). 1 par défaut.
|
||||
failed_count: Nombre d'items en échec.
|
||||
elapsed_s: Durée totale (secondes).
|
||||
extra: Infos supplémentaires (hint pour le LLM).
|
||||
use_llm: Si True, tenter un appel à gemma4 pour produire le
|
||||
rapport. Fallback sur les templates en cas d'échec.
|
||||
|
||||
Returns:
|
||||
Rapport en français. Toujours une chaîne, jamais None.
|
||||
"""
|
||||
extra = dict(extra or {})
|
||||
done = max(0, items_count - failed_count)
|
||||
|
||||
item_sg = self.summary_templates.get("item_singular", "élément")
|
||||
item_pl = self.summary_templates.get("item_plural", "éléments")
|
||||
item_word = item_sg if done <= 1 else item_pl
|
||||
|
||||
# Données disponibles pour les templates
|
||||
subs = {
|
||||
"workflow_name": workflow_name or "le workflow",
|
||||
"items_count": items_count,
|
||||
"done": done,
|
||||
"failed": failed_count,
|
||||
"item_singular": item_sg,
|
||||
"item_plural": item_pl,
|
||||
"item_word": item_word,
|
||||
"elapsed_s": int(elapsed_s),
|
||||
}
|
||||
subs.update(extra)
|
||||
|
||||
# Choisir le template adéquat
|
||||
if not success and failed_count >= items_count:
|
||||
key = "failure"
|
||||
elif failed_count > 0:
|
||||
key = "partial"
|
||||
elif items_count == 1:
|
||||
key = "success_one" if "success_one" in self.summary_templates else "success"
|
||||
else:
|
||||
key = "success"
|
||||
|
||||
template = self.summary_templates.get(key, "")
|
||||
|
||||
# Optionnel : raffiner via gemma4
|
||||
if use_llm:
|
||||
llm_text = self._llm_refine_summary(template, subs, success)
|
||||
if llm_text:
|
||||
return llm_text
|
||||
|
||||
if template:
|
||||
try:
|
||||
return template.format_map(_SafeDict(subs))
|
||||
except Exception as e: # pragma: no cover
|
||||
logger.warning("summary template format error: %s", e)
|
||||
|
||||
# Fallback générique
|
||||
if success:
|
||||
if items_count <= 1:
|
||||
return f"C'est fait, j'ai terminé « {workflow_name or 'le workflow'} »."
|
||||
return (
|
||||
f"J'ai traité {done} {item_word} sur {items_count}"
|
||||
+ (f", {failed_count} en échec." if failed_count else ".")
|
||||
)
|
||||
return (
|
||||
f"Je n'ai pas pu terminer « {workflow_name or 'le workflow'} ». "
|
||||
f"Je te rends la main."
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Helpers internes
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _apply_synonyms(self, text: str) -> str:
|
||||
"""Remplacer les sigles/termes techniques par leur forme métier.
|
||||
|
||||
Cherche mots entiers (word boundaries) en insensible à la casse.
|
||||
"""
|
||||
if not text or not self.vocabulary_synonyms:
|
||||
return text
|
||||
result = text
|
||||
for short, full in self.vocabulary_synonyms.items():
|
||||
if not short:
|
||||
continue
|
||||
pattern = r"\b" + re.escape(short) + r"\b"
|
||||
result = re.sub(pattern, full, result, flags=re.IGNORECASE)
|
||||
return result
|
||||
|
||||
def _llm_refine_summary(
|
||||
self,
|
||||
template: str,
|
||||
subs: Dict[str, Any],
|
||||
success: bool,
|
||||
) -> str:
|
||||
"""Tenter un raffinement du rapport via gemma4.
|
||||
|
||||
Appel best-effort : toute erreur retourne "" et le caller retombe sur
|
||||
le template brut. Isolé dans une méthode pour pouvoir le monkey-patcher
|
||||
dans les tests.
|
||||
"""
|
||||
try:
|
||||
import requests as _requests
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
port = os.environ.get("GEMMA4_PORT", "11435")
|
||||
url = f"http://localhost:{port}/api/chat"
|
||||
|
||||
base = ""
|
||||
if template:
|
||||
try:
|
||||
base = template.format_map(_SafeDict(subs))
|
||||
except Exception:
|
||||
base = ""
|
||||
|
||||
prompt = (
|
||||
f"Tu es Léa, une assistante RPA dans le domaine : {self.name}.\n"
|
||||
f"Tu viens de terminer un workflow. Résume en UNE à DEUX phrases "
|
||||
f"en langage métier, chaleureux mais professionnel, en français.\n\n"
|
||||
f"Données :\n"
|
||||
f"- workflow : {subs.get('workflow_name', '')}\n"
|
||||
f"- items traités : {subs.get('done', 0)} / {subs.get('items_count', 0)}\n"
|
||||
f"- échecs : {subs.get('failed', 0)}\n"
|
||||
f"- succès global : {'oui' if success else 'non'}\n"
|
||||
f"- durée : {subs.get('elapsed_s', 0)}s\n\n"
|
||||
f"Base suggérée (tu peux la reformuler) : {base or '(aucune)'}\n\n"
|
||||
f"Ta phrase :"
|
||||
)
|
||||
|
||||
try:
|
||||
resp = _requests.post(
|
||||
url,
|
||||
json={
|
||||
"model": "gemma4:e4b",
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"stream": False,
|
||||
"options": {"temperature": 0.3, "num_predict": 200},
|
||||
},
|
||||
timeout=30,
|
||||
)
|
||||
if not resp.ok:
|
||||
return ""
|
||||
content = resp.json().get("message", {}).get("content", "").strip()
|
||||
# Nettoyage basique : supprimer guillemets typographiques en bord
|
||||
content = content.strip("\"' \n")
|
||||
return content
|
||||
except Exception as e:
|
||||
logger.debug("gemma4 refine summary failed: %s", e)
|
||||
return ""
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"domain_id": self.domain_id,
|
||||
@@ -72,9 +451,24 @@ class DomainContext:
|
||||
"description": self.description,
|
||||
"known_apps": self.known_apps,
|
||||
"vocabulary_count": len(self.vocabulary),
|
||||
"common_actions_count": len(self.common_actions),
|
||||
"has_clarification_templates": bool(self.clarification_templates),
|
||||
"has_summary_templates": bool(self.summary_templates),
|
||||
}
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Utilitaires
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class _SafeDict(dict):
|
||||
"""dict pour str.format_map qui retourne "" pour les clés manquantes."""
|
||||
|
||||
def __missing__(self, key): # type: ignore[override]
|
||||
return ""
|
||||
|
||||
|
||||
# Hints par rôle VLM — adaptés au contexte métier
|
||||
_ROLE_HINTS = {
|
||||
"observer": (
|
||||
@@ -100,6 +494,7 @@ _ROLE_HINTS = {
|
||||
# Domaines pré-configurés
|
||||
# =========================================================================
|
||||
|
||||
|
||||
_TIM_CODAGE = DomainContext(
|
||||
domain_id="tim_codage",
|
||||
name="Codage médical TIM",
|
||||
@@ -156,8 +551,405 @@ _TIM_CODAGE = DomainContext(
|
||||
"recherche_code": "Recherche de code CIM-10 ou CCAM (champ de recherche + arborescence)",
|
||||
"validation_ghm": "Écran de validation du groupage avec GHM calculé et valorisation",
|
||||
},
|
||||
vocabulary_synonyms={
|
||||
"DP": "diagnostic principal",
|
||||
"DAS": "diagnostics associés",
|
||||
"CMA": "complication associée",
|
||||
"UM": "unité médicale",
|
||||
"CR": "compte rendu",
|
||||
"RSS": "résumé de sortie",
|
||||
"RSA": "résumé anonymisé",
|
||||
"GHM": "groupe homogène de malades",
|
||||
"IPP": "identifiant patient",
|
||||
},
|
||||
common_actions={
|
||||
"click:dp": "saisir le diagnostic principal",
|
||||
"click:diagnostic principal": "saisir le diagnostic principal",
|
||||
"click:das": "ajouter un diagnostic associé",
|
||||
"click:ccam": "saisir un acte CCAM",
|
||||
"click:valider": "valider le codage",
|
||||
"click:valider le codage": "valider le codage",
|
||||
"click:grouper": "calculer le GHM",
|
||||
"click:ghm": "consulter le groupage GHM",
|
||||
"click:dossier patient": "ouvrir le dossier patient",
|
||||
"click:fiche patient": "ouvrir la fiche patient",
|
||||
"click:compte rendu": "consulter le compte rendu",
|
||||
"click:cr": "consulter le compte rendu",
|
||||
"click:rechercher": "rechercher un code CIM-10",
|
||||
"type:cim": "saisir un code CIM-10",
|
||||
},
|
||||
clarification_templates={
|
||||
"default": (
|
||||
"Je ne trouve pas {target_friendly}. "
|
||||
"Tu peux me montrer où il se trouve dans le dossier ?"
|
||||
),
|
||||
"target_not_found": (
|
||||
"Je ne trouve pas {target_friendly}. "
|
||||
"Le dossier de {nom_patient} est peut-être déjà codé ou archivé ?"
|
||||
),
|
||||
"target:fichier patient": (
|
||||
"Je ne trouve pas le dossier de {nom_patient}. "
|
||||
"Il est peut-être archivé ? Tu peux me le montrer ?"
|
||||
),
|
||||
"target:dossier": (
|
||||
"Je ne trouve pas le dossier de {nom_patient}. "
|
||||
"Il est peut-être archivé ? Tu peux me le montrer ?"
|
||||
),
|
||||
"ambiguous_code": (
|
||||
"Le compte rendu mentionne plusieurs codes possibles. "
|
||||
"Est-ce le code CIM-10 {code_a} ou {code_b} que tu préfères ?"
|
||||
),
|
||||
"no_cr": (
|
||||
"Je ne trouve pas de compte rendu pour {nom_patient}. "
|
||||
"Tu veux que je saute ce dossier ou que je continue sans ?"
|
||||
),
|
||||
},
|
||||
summary_templates={
|
||||
"item_singular": "dossier",
|
||||
"item_plural": "dossiers",
|
||||
"success_one": (
|
||||
"J'ai codé le dossier de {nom_patient} en {elapsed_s}s. "
|
||||
"Tu peux vérifier le groupage GHM."
|
||||
),
|
||||
"success": (
|
||||
"J'ai codé {done} dossiers sur {items_count}. "
|
||||
"Tout est passé sans erreur, tu peux valider le groupage."
|
||||
),
|
||||
"partial": (
|
||||
"J'ai codé {done} dossiers sur {items_count}. "
|
||||
"{failed} sont en attente — codes CIM-10 ambigus, à valider manuellement."
|
||||
),
|
||||
"failure": (
|
||||
"Je n'ai pas pu coder les dossiers de {workflow_name}. "
|
||||
"Je te rends la main, les comptes rendus sont peut-être inaccessibles."
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
_COMPTABILITE = DomainContext(
|
||||
domain_id="comptabilite",
|
||||
name="Comptabilité",
|
||||
description=(
|
||||
"Comptable : saisie de factures fournisseurs et clients, lettrage, "
|
||||
"rapprochement bancaire, déclarations de TVA, bilans, immobilisations."
|
||||
),
|
||||
system_prompt=(
|
||||
"Tu es un assistant expert en comptabilité d'entreprise. "
|
||||
"L'utilisateur est un comptable qui utilise un logiciel de saisie comptable "
|
||||
"(Sage, Cegid, EBP, Quadra, Isacompta) pour saisir des factures, faire "
|
||||
"les rapprochements bancaires, préparer la TVA et les bilans.\n\n"
|
||||
"Vocabulaire du métier :\n"
|
||||
"- Facture : justificatif de vente ou d'achat (numéro, date, HT, TVA, TTC)\n"
|
||||
"- HT/TVA/TTC : montants hors taxes, taxe, toutes taxes\n"
|
||||
"- Compte comptable : numéro du plan comptable général (PCG), ex 401 (fournisseurs), 411 (clients)\n"
|
||||
"- Journal : journal de saisie (achats, ventes, banque, OD)\n"
|
||||
"- Lettrage : association d'une facture avec son paiement\n"
|
||||
"- Rapprochement : comparaison compte comptable / relevé bancaire\n"
|
||||
"- OCR / LAD : reconnaissance automatique des factures scannées\n"
|
||||
"- Écriture : ligne comptable (débit/crédit)\n"
|
||||
"- Exercice : période comptable annuelle\n"
|
||||
"- Bilan / compte de résultat : états financiers\n"
|
||||
"- CA : chiffre d'affaires\n\n"
|
||||
"Écrans courants :\n"
|
||||
"- Saisie d'écritures (numéro de compte, libellé, débit, crédit)\n"
|
||||
"- Import OCR de factures fournisseurs\n"
|
||||
"- Lettrage / rapprochement\n"
|
||||
"- Brouillard / journal\n"
|
||||
"- Balance / grand livre"
|
||||
),
|
||||
vocabulary=[
|
||||
"facture", "HT", "TVA", "TTC", "compte", "journal", "lettrage",
|
||||
"rapprochement", "OCR", "LAD", "écriture", "débit", "crédit",
|
||||
"exercice", "bilan", "compte de résultat", "CA", "immobilisation",
|
||||
"fournisseur", "client", "PCG", "plan comptable",
|
||||
],
|
||||
known_apps=[
|
||||
"Sage", "Cegid", "EBP", "Quadra", "Isacompta", "Ciel Compta",
|
||||
"Odoo", "Pennylane", "Dext", "Agicap",
|
||||
],
|
||||
screen_patterns={
|
||||
"saisie_ecriture": "Saisie d'écriture comptable (compte, libellé, débit, crédit)",
|
||||
"ocr_facture": "Import OCR : zone image + champs extraits (numéro, date, HT, TVA, TTC, fournisseur)",
|
||||
"lettrage": "Liste d'écritures à lettrer (débit vs crédit)",
|
||||
"rapprochement": "Comparaison compte banque / relevé",
|
||||
"balance": "Balance comptable (comptes agrégés avec soldes)",
|
||||
},
|
||||
vocabulary_synonyms={
|
||||
"HT": "montant hors taxes",
|
||||
"TVA": "montant de TVA",
|
||||
"TTC": "montant toutes taxes",
|
||||
"CA": "chiffre d'affaires",
|
||||
"PCG": "plan comptable général",
|
||||
"OD": "opération diverse",
|
||||
},
|
||||
common_actions={
|
||||
"click:valider": "valider l'écriture",
|
||||
"click:enregistrer": "enregistrer la saisie",
|
||||
"click:lettrer": "lettrer les écritures",
|
||||
"click:rapprocher": "rapprocher avec la banque",
|
||||
"click:ocr": "lancer la reconnaissance OCR",
|
||||
"click:facture": "ouvrir la facture",
|
||||
"click:compte": "sélectionner le compte comptable",
|
||||
"type:ht": "saisir le montant hors taxes",
|
||||
"type:tva": "saisir le montant de TVA",
|
||||
"type:ttc": "saisir le montant toutes taxes",
|
||||
},
|
||||
clarification_templates={
|
||||
"default": (
|
||||
"Je ne trouve pas {target_friendly}. "
|
||||
"C'est bien la facture {num_facture} que tu veux saisir ?"
|
||||
),
|
||||
"target_not_found": (
|
||||
"Je ne trouve pas le champ {target_friendly}. "
|
||||
"C'est bien la facture {num_facture} qui doit être saisie ?"
|
||||
),
|
||||
"target:montant": (
|
||||
"Je ne trouve pas le champ « Montant HT ». "
|
||||
"C'est bien la facture {num_facture} que tu veux saisir ?"
|
||||
),
|
||||
"target:tva": (
|
||||
"Je ne trouve pas le champ TVA. Est-ce une facture à taux {taux_tva} % ?"
|
||||
),
|
||||
"ambiguous_account": (
|
||||
"Je ne sais pas sur quel compte imputer : {compte_a} ou {compte_b} ?"
|
||||
),
|
||||
},
|
||||
summary_templates={
|
||||
"item_singular": "facture",
|
||||
"item_plural": "factures",
|
||||
"success_one": (
|
||||
"J'ai saisi la facture {num_facture} en {elapsed_s}s."
|
||||
),
|
||||
"success": (
|
||||
"J'ai saisi {done} factures sur {items_count}. "
|
||||
"Tout est en brouillard, tu peux valider."
|
||||
),
|
||||
"partial": (
|
||||
"J'ai saisi {done} factures sur {items_count}. "
|
||||
"{failed} factures sont en attente — imputation comptable à vérifier."
|
||||
),
|
||||
"failure": (
|
||||
"Je n'ai pas pu saisir les factures de {workflow_name}. "
|
||||
"L'OCR n'a peut-être pas fonctionné, je te rends la main."
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
_RH_PAIE = DomainContext(
|
||||
domain_id="rh_paie",
|
||||
name="Ressources humaines et paie",
|
||||
description=(
|
||||
"Gestionnaire RH / paie : fiches employés, contrats, bulletins de salaire, "
|
||||
"déclarations sociales (DSN), charges, congés, absences."
|
||||
),
|
||||
system_prompt=(
|
||||
"Tu es un assistant expert en gestion RH et paie française. "
|
||||
"L'utilisateur est un gestionnaire RH ou de paie qui utilise un logiciel "
|
||||
"(Silae, Sage Paie, Cegid, ADP, PayFit) pour éditer des bulletins de salaire, "
|
||||
"gérer les contrats, les absences, et envoyer les DSN.\n\n"
|
||||
"Vocabulaire du métier :\n"
|
||||
"- Bulletin de paie : fiche de salaire mensuelle\n"
|
||||
"- DSN : Déclaration Sociale Nominative (mensuelle, transmise à l'URSSAF)\n"
|
||||
"- Brut / Net : salaire avant et après charges\n"
|
||||
"- Charges sociales / patronales : cotisations employeur et salarié\n"
|
||||
"- CDI / CDD : types de contrats\n"
|
||||
"- Période de paie : mois concerné par le bulletin\n"
|
||||
"- SMIC : salaire minimum\n"
|
||||
"- IJSS : indemnités journalières sécurité sociale\n"
|
||||
"- Congés payés : solde de congés\n"
|
||||
"- RTT : réduction du temps de travail\n"
|
||||
"- Saisie sur salaire : retenue judiciaire\n"
|
||||
"- Solde de tout compte : dernier bulletin d'un salarié qui part\n\n"
|
||||
"Écrans courants :\n"
|
||||
"- Fiche employé (identité, contrat, poste, salaire)\n"
|
||||
"- Saisie des variables (heures, absences, primes)\n"
|
||||
"- Bulletin de paie (aperçu avant validation)\n"
|
||||
"- Déclaration DSN\n"
|
||||
"- Gestion des absences / congés"
|
||||
),
|
||||
vocabulary=[
|
||||
"bulletin", "salaire", "brut", "net", "charges sociales", "DSN",
|
||||
"CDI", "CDD", "congés", "RTT", "SMIC", "IJSS", "URSSAF",
|
||||
"employé", "salarié", "contrat", "prime", "heures supplémentaires",
|
||||
"absence", "solde de tout compte", "STC",
|
||||
],
|
||||
known_apps=[
|
||||
"Silae", "Sage Paie", "Cegid Paie", "ADP", "PayFit", "Nibelis",
|
||||
"Cegedim SRH", "Lucca", "HR Access",
|
||||
],
|
||||
screen_patterns={
|
||||
"fiche_employe": "Fiche employé avec identité, contrat, poste",
|
||||
"saisie_variables": "Saisie des variables de paie (heures, absences, primes)",
|
||||
"apercu_bulletin": "Aperçu du bulletin de paie avant validation",
|
||||
"dsn": "Écran DSN (déclaration sociale nominative)",
|
||||
"conges": "Gestion des absences et congés",
|
||||
},
|
||||
vocabulary_synonyms={
|
||||
"DSN": "déclaration sociale",
|
||||
"RTT": "réduction du temps de travail",
|
||||
"STC": "solde de tout compte",
|
||||
"IJSS": "indemnités journalières",
|
||||
"CP": "congés payés",
|
||||
},
|
||||
common_actions={
|
||||
"click:valider": "valider le bulletin",
|
||||
"click:editer": "éditer le bulletin",
|
||||
"click:bulletin": "ouvrir le bulletin de paie",
|
||||
"click:employe": "ouvrir la fiche employé",
|
||||
"click:dsn": "lancer la DSN",
|
||||
"click:conges": "gérer les congés",
|
||||
"click:absence": "saisir une absence",
|
||||
"type:heures": "saisir les heures travaillées",
|
||||
"type:prime": "saisir une prime",
|
||||
},
|
||||
clarification_templates={
|
||||
"default": (
|
||||
"Je ne trouve pas {target_friendly} pour {nom_employe}. "
|
||||
"Tu peux me confirmer la période de paie ?"
|
||||
),
|
||||
"target_not_found": (
|
||||
"Je ne trouve pas {target_friendly} dans la fiche de {nom_employe}. "
|
||||
"Le contrat est peut-être clôturé ?"
|
||||
),
|
||||
"target:employe": (
|
||||
"Je ne trouve pas {nom_employe} dans la liste. "
|
||||
"Est-il encore actif dans l'entreprise ?"
|
||||
),
|
||||
"ambiguous_period": (
|
||||
"Est-ce la période {periode_a} ou {periode_b} que tu veux traiter ?"
|
||||
),
|
||||
},
|
||||
summary_templates={
|
||||
"item_singular": "bulletin",
|
||||
"item_plural": "bulletins",
|
||||
"success_one": (
|
||||
"J'ai édité le bulletin de {nom_employe} en {elapsed_s}s."
|
||||
),
|
||||
"success": (
|
||||
"J'ai édité {done} bulletins sur {items_count}. "
|
||||
"La paie est prête pour validation."
|
||||
),
|
||||
"partial": (
|
||||
"J'ai édité {done} bulletins sur {items_count}. "
|
||||
"{failed} sont en attente — variables de paie à compléter."
|
||||
),
|
||||
"failure": (
|
||||
"Je n'ai pas pu éditer les bulletins de {workflow_name}. "
|
||||
"Il y a peut-être un blocage côté logiciel de paie."
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
_STOCKS_LOGISTIQUE = DomainContext(
|
||||
domain_id="stocks_logistique",
|
||||
name="Stocks et logistique",
|
||||
description=(
|
||||
"Gestionnaire de stocks / logistique : bons de commande, bons de livraison, "
|
||||
"réceptions, inventaires, mouvements de stock, expéditions."
|
||||
),
|
||||
system_prompt=(
|
||||
"Tu es un assistant expert en gestion de stocks et logistique. "
|
||||
"L'utilisateur utilise un ERP ou WMS (SAP, Dynamics, Odoo, Sage, Divalto) "
|
||||
"pour gérer les commandes, les réceptions, les expéditions et les inventaires.\n\n"
|
||||
"Vocabulaire du métier :\n"
|
||||
"- BC : Bon de Commande (achat ou vente)\n"
|
||||
"- BL : Bon de Livraison\n"
|
||||
"- BR : Bon de Réception\n"
|
||||
"- Article / Référence / SKU : produit en stock\n"
|
||||
"- Emplacement : localisation physique (allée, rayon, emplacement)\n"
|
||||
"- Mouvement de stock : entrée, sortie, transfert\n"
|
||||
"- Inventaire : comptage physique pour recaler le stock théorique\n"
|
||||
"- FIFO / LIFO : ordre de sortie des stocks\n"
|
||||
"- ERP : progiciel de gestion intégré\n"
|
||||
"- WMS : Warehouse Management System\n"
|
||||
"- Picking : préparation de commande\n"
|
||||
"- Quantité en stock / disponible / réservée\n\n"
|
||||
"Écrans courants :\n"
|
||||
"- Saisie de bon de commande / réception\n"
|
||||
"- Liste des articles (avec photo, quantité, emplacement)\n"
|
||||
"- Inventaire (comptage)\n"
|
||||
"- Mouvements de stock\n"
|
||||
"- Picking list (liste de préparation)"
|
||||
),
|
||||
vocabulary=[
|
||||
"bon de commande", "BC", "bon de livraison", "BL", "bon de réception", "BR",
|
||||
"article", "référence", "SKU", "emplacement", "stock", "inventaire",
|
||||
"mouvement", "entrée", "sortie", "picking", "FIFO", "LIFO", "ERP", "WMS",
|
||||
"fournisseur", "client", "quantité", "disponible", "réservé",
|
||||
],
|
||||
known_apps=[
|
||||
"SAP", "Dynamics", "Odoo", "Sage X3", "Divalto", "Cegid",
|
||||
"Oracle NetSuite", "Reflex WMS", "Infolog",
|
||||
],
|
||||
screen_patterns={
|
||||
"bon_commande": "Saisie de bon de commande (fournisseur, lignes d'articles, quantités)",
|
||||
"reception": "Bon de réception (rapprochement avec la commande)",
|
||||
"inventaire": "Saisie d'inventaire (article, emplacement, quantité comptée)",
|
||||
"picking": "Liste de préparation avec articles et emplacements",
|
||||
"mouvement": "Mouvement de stock (entrée/sortie/transfert)",
|
||||
},
|
||||
vocabulary_synonyms={
|
||||
"BC": "bon de commande",
|
||||
"BL": "bon de livraison",
|
||||
"BR": "bon de réception",
|
||||
"SKU": "référence produit",
|
||||
"WMS": "gestion d'entrepôt",
|
||||
"ERP": "progiciel de gestion",
|
||||
},
|
||||
common_actions={
|
||||
"click:valider": "valider le bon",
|
||||
"click:commande": "ouvrir le bon de commande",
|
||||
"click:livraison": "ouvrir le bon de livraison",
|
||||
"click:reception": "saisir la réception",
|
||||
"click:inventaire": "démarrer l'inventaire",
|
||||
"click:article": "sélectionner un article",
|
||||
"click:picking": "démarrer la préparation",
|
||||
"type:quantite": "saisir la quantité",
|
||||
"type:reference": "saisir la référence article",
|
||||
},
|
||||
clarification_templates={
|
||||
"default": (
|
||||
"Je ne trouve pas {target_friendly}. "
|
||||
"C'est bien la commande {num_bc} qu'on traite ?"
|
||||
),
|
||||
"target_not_found": (
|
||||
"Je ne trouve pas {target_friendly}. "
|
||||
"La commande {num_bc} est peut-être déjà clôturée ?"
|
||||
),
|
||||
"target:article": (
|
||||
"Je ne trouve pas l'article {ref_article}. "
|
||||
"Il est peut-être archivé ou mal référencé ?"
|
||||
),
|
||||
"quantity_mismatch": (
|
||||
"La quantité reçue ({qte_recue}) ne correspond pas à la commande "
|
||||
"({qte_commandee}). Je saisis un écart ou tu vérifies ?"
|
||||
),
|
||||
},
|
||||
summary_templates={
|
||||
"item_singular": "bon",
|
||||
"item_plural": "bons",
|
||||
"success_one": (
|
||||
"J'ai traité le bon {num_bc} en {elapsed_s}s."
|
||||
),
|
||||
"success": (
|
||||
"J'ai traité {done} bons sur {items_count}. "
|
||||
"Les mouvements de stock sont validés."
|
||||
),
|
||||
"partial": (
|
||||
"J'ai traité {done} bons sur {items_count}. "
|
||||
"{failed} bons sont en attente — écarts de quantité à vérifier."
|
||||
),
|
||||
"failure": (
|
||||
"Je n'ai pas pu traiter les bons de {workflow_name}. "
|
||||
"L'ERP a peut-être refusé une ligne, je te rends la main."
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
_GENERIC = DomainContext(
|
||||
domain_id="generic",
|
||||
name="Bureautique générale",
|
||||
@@ -166,11 +958,37 @@ _GENERIC = DomainContext(
|
||||
"Tu es un assistant RPA qui observe des applications bureautiques. "
|
||||
"Décris précisément ce que tu vois à l'écran."
|
||||
),
|
||||
summary_templates={
|
||||
"item_singular": "action",
|
||||
"item_plural": "actions",
|
||||
"success_one": "C'est fait, j'ai terminé « {workflow_name} » en {elapsed_s}s.",
|
||||
"success": (
|
||||
"J'ai terminé « {workflow_name} » : {done} {item_word} exécutées "
|
||||
"sur {items_count}."
|
||||
),
|
||||
"partial": (
|
||||
"J'ai terminé « {workflow_name} » partiellement : "
|
||||
"{done} {item_word} sur {items_count} ({failed} en échec)."
|
||||
),
|
||||
"failure": (
|
||||
"Je n'ai pas pu terminer « {workflow_name} ». Je te rends la main."
|
||||
),
|
||||
},
|
||||
clarification_templates={
|
||||
"default": (
|
||||
"Je ne trouve pas {target_friendly} à l'écran. "
|
||||
"Tu peux me le montrer ?"
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
# Registre des domaines disponibles
|
||||
_DOMAINS: Dict[str, DomainContext] = {
|
||||
"tim_codage": _TIM_CODAGE,
|
||||
"comptabilite": _COMPTABILITE,
|
||||
"rh_paie": _RH_PAIE,
|
||||
"stocks_logistique": _STOCKS_LOGISTIQUE,
|
||||
"generic": _GENERIC,
|
||||
}
|
||||
|
||||
@@ -179,7 +997,8 @@ def get_domain_context(domain_id: str = "generic") -> DomainContext:
|
||||
"""Récupérer le contexte métier par ID.
|
||||
|
||||
Args:
|
||||
domain_id: Identifiant du domaine (tim_codage, generic, etc.)
|
||||
domain_id: Identifiant du domaine (tim_codage, comptabilite, rh_paie,
|
||||
stocks_logistique, generic, etc.)
|
||||
|
||||
Returns:
|
||||
DomainContext correspondant, ou generic si non trouvé.
|
||||
|
||||
373
agent_v0/server_v1/execution_plan_runner.py
Normal file
373
agent_v0/server_v1/execution_plan_runner.py
Normal file
@@ -0,0 +1,373 @@
|
||||
# agent_v0/server_v1/execution_plan_runner.py
|
||||
"""
|
||||
ExecutionPlanRunner — Adaptateur ExecutionPlan → actions replay.
|
||||
|
||||
Pièce d'intégration du pipeline V4 :
|
||||
RawTrace → IRBuilder → WorkflowIR → ExecutionCompiler → ExecutionPlan → Runtime
|
||||
|
||||
Ce module convertit un `ExecutionPlan` (plan pré-compilé, déterministe) en
|
||||
liste d'actions au format attendu par l'executor replay actuel (clé x_pct,
|
||||
y_pct, target_spec, etc.), puis les injecte dans `_replay_queues`.
|
||||
|
||||
L'ancien chemin `build_replay_from_raw_events()` dans stream_processor.py
|
||||
reste inchangé — les deux chemins coexistent pendant la transition.
|
||||
|
||||
Format d'action produit (compatible executor existant) :
|
||||
{
|
||||
"action_id": "act_...",
|
||||
"type": "click",
|
||||
"x_pct": 0.5,
|
||||
"y_pct": 0.3,
|
||||
"visual_mode": True,
|
||||
"target_spec": {
|
||||
"by_text": "...",
|
||||
"window_title": "...",
|
||||
"vlm_description": "...",
|
||||
"anchor_image_base64": "...",
|
||||
},
|
||||
"expected_window_title": "...",
|
||||
}
|
||||
|
||||
Auteur: Dom, Alice - Avril 2026
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import threading
|
||||
import uuid
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from core.workflow.execution_plan import (
|
||||
ExecutionNode,
|
||||
ExecutionPlan,
|
||||
ResolutionStrategy,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Substitution de variables
|
||||
# =========================================================================
|
||||
# Le WorkflowIR utilise la syntaxe `{var}` dans les champs texte.
|
||||
# Ici on supporte les deux : `{var}` (IR natif) et `${var}` (replay legacy).
|
||||
_VARIABLE_RE_CURLY = re.compile(r"\{(\w+)\}")
|
||||
_VARIABLE_RE_DOLLAR = re.compile(r"\$\{(\w+)\}")
|
||||
|
||||
|
||||
def substitute_variables(text: str, variables: Dict[str, Any]) -> str:
|
||||
"""Remplacer `{var}` et `${var}` par leurs valeurs.
|
||||
|
||||
Priorité : variables fournies > placeholder brut (inchangé si inconnu).
|
||||
"""
|
||||
if not text or not variables:
|
||||
return text
|
||||
|
||||
def replacer(match: "re.Match[str]") -> str:
|
||||
var_name = match.group(1)
|
||||
if var_name in variables:
|
||||
return str(variables[var_name])
|
||||
return match.group(0)
|
||||
|
||||
text = _VARIABLE_RE_DOLLAR.sub(replacer, text)
|
||||
text = _VARIABLE_RE_CURLY.sub(replacer, text)
|
||||
return text
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Conversion ExecutionNode → action replay
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def _strategy_to_target_spec(
|
||||
strategy: Optional[ResolutionStrategy],
|
||||
fallbacks: Optional[List[ResolutionStrategy]] = None,
|
||||
intent: str = "",
|
||||
) -> Dict[str, Any]:
|
||||
"""Construire un `target_spec` depuis les stratégies de résolution.
|
||||
|
||||
Fusionne la primaire et les fallbacks pour donner un maximum d'indices
|
||||
au resolve_engine :
|
||||
- OCR → by_text
|
||||
- template → anchor_image_base64 (depuis anchor_b64)
|
||||
- VLM → vlm_description
|
||||
|
||||
Règle V4 : la stratégie primaire dicte la méthode préférée.
|
||||
Le champ `resolve_order` liste les méthodes dans l'ordre à essayer.
|
||||
Le resolve_engine honore cet ordre au lieu de sa cascade par défaut.
|
||||
|
||||
resolve_order est la clé du "zéro VLM au runtime" :
|
||||
- ["ocr", "template", "vlm"] → V4 typique (OCR rapide)
|
||||
- ["template", "ocr", "vlm"] → apprentissage : template marche mieux
|
||||
- ["vlm"] → éléments sans texte (icônes)
|
||||
"""
|
||||
spec: Dict[str, Any] = {}
|
||||
|
||||
all_strategies: List[ResolutionStrategy] = []
|
||||
if strategy is not None:
|
||||
all_strategies.append(strategy)
|
||||
if fallbacks:
|
||||
all_strategies.extend(fallbacks)
|
||||
|
||||
by_text_candidate = ""
|
||||
anchor_candidate = ""
|
||||
vlm_candidate = ""
|
||||
uia_data: Dict[str, Any] = {}
|
||||
dom_data: Dict[str, Any] = {}
|
||||
resolve_order: List[str] = []
|
||||
seen_methods: set = set()
|
||||
|
||||
for strat in all_strategies:
|
||||
if not strat:
|
||||
continue
|
||||
if strat.method == "ocr" and strat.target_text and not by_text_candidate:
|
||||
by_text_candidate = strat.target_text
|
||||
elif strat.method == "template":
|
||||
if strat.anchor_b64 and not anchor_candidate:
|
||||
anchor_candidate = strat.anchor_b64
|
||||
if strat.target_text and not by_text_candidate:
|
||||
by_text_candidate = strat.target_text
|
||||
elif strat.method == "vlm" and strat.vlm_description and not vlm_candidate:
|
||||
vlm_candidate = strat.vlm_description
|
||||
elif strat.method == "uia" and strat.uia_name and not uia_data:
|
||||
uia_data = {
|
||||
"name": strat.uia_name,
|
||||
"control_type": strat.uia_control_type,
|
||||
"automation_id": strat.uia_automation_id,
|
||||
"parent_path": strat.uia_parent_path,
|
||||
}
|
||||
elif strat.method == "dom" and strat.dom_selector and not dom_data:
|
||||
dom_data = {
|
||||
"selector": strat.dom_selector,
|
||||
"xpath": strat.dom_xpath,
|
||||
"url_pattern": strat.dom_url_pattern,
|
||||
}
|
||||
|
||||
# Construire l'ordre des méthodes (dans l'ordre primaire → fallbacks)
|
||||
if strat.method and strat.method not in seen_methods:
|
||||
resolve_order.append(strat.method)
|
||||
seen_methods.add(strat.method)
|
||||
|
||||
if by_text_candidate:
|
||||
spec["by_text"] = by_text_candidate
|
||||
if anchor_candidate:
|
||||
spec["anchor_image_base64"] = anchor_candidate
|
||||
if vlm_candidate:
|
||||
spec["vlm_description"] = vlm_candidate
|
||||
elif intent and "vlm_description" not in spec:
|
||||
# L'intention métier devient le prompt VLM de dernier recours
|
||||
spec["vlm_description"] = intent
|
||||
|
||||
# Données UIA — consommées par l'agent Windows via lea_uia.exe
|
||||
if uia_data:
|
||||
spec["uia_target"] = uia_data
|
||||
|
||||
# Données DOM — consommées par l'agent Windows via CDP (futur)
|
||||
if dom_data:
|
||||
spec["dom_target"] = dom_data
|
||||
|
||||
# Ordre de résolution pré-compilé — c'est LA pièce centrale du V4
|
||||
if resolve_order:
|
||||
spec["resolve_order"] = resolve_order
|
||||
|
||||
return spec
|
||||
|
||||
|
||||
def execution_node_to_action(
|
||||
node: ExecutionNode,
|
||||
variables: Optional[Dict[str, Any]] = None,
|
||||
id_prefix: str = "act_plan",
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Convertir un `ExecutionNode` en action replay.
|
||||
|
||||
Retourne `None` si le nœud n'est pas exécutable (type inconnu).
|
||||
|
||||
Args:
|
||||
node: Le nœud à convertir.
|
||||
variables: Dictionnaire de variables pour substituer les {var}.
|
||||
id_prefix: Préfixe pour l'action_id générée.
|
||||
"""
|
||||
variables = variables or {}
|
||||
|
||||
action: Dict[str, Any] = {
|
||||
"action_id": f"{id_prefix}_{uuid.uuid4().hex[:8]}",
|
||||
"plan_node_id": node.node_id,
|
||||
}
|
||||
|
||||
if node.intent:
|
||||
action["intention"] = node.intent
|
||||
if node.step_id:
|
||||
action["plan_step_id"] = node.step_id
|
||||
if node.is_optional:
|
||||
action["is_optional"] = True
|
||||
|
||||
# Métadonnées d'exécution utiles au runtime
|
||||
if node.timeout_ms:
|
||||
action["timeout_ms"] = node.timeout_ms
|
||||
if node.max_retries:
|
||||
action["max_retries"] = node.max_retries
|
||||
if node.recovery_action:
|
||||
action["recovery_action"] = node.recovery_action
|
||||
if node.success_condition:
|
||||
action["success_condition"] = node.success_condition.to_dict()
|
||||
|
||||
action_type = node.action_type
|
||||
|
||||
if action_type == "click":
|
||||
action["type"] = "click"
|
||||
|
||||
strategy = node.strategy_primary
|
||||
fallbacks = node.strategy_fallbacks or []
|
||||
|
||||
# ── Déduction des coordonnées depuis la stratégie primaire ──
|
||||
# - OCR : pas de coordonnées (le runtime trouve via OCR)
|
||||
# - template : l'anchor sera utilisé au runtime
|
||||
# - VLM : la description sera utilisée au runtime
|
||||
# Dans tous les cas le resolve_engine retrouve les pixels au replay.
|
||||
# On expose néanmoins un centre (0.5, 0.5) neutre pour rester
|
||||
# compatible avec les validations de queue existantes.
|
||||
action["x_pct"] = 0.5
|
||||
action["y_pct"] = 0.5
|
||||
action["visual_mode"] = True
|
||||
|
||||
target_spec = _strategy_to_target_spec(
|
||||
strategy=strategy,
|
||||
fallbacks=fallbacks,
|
||||
intent=node.intent,
|
||||
)
|
||||
|
||||
# Titre fenêtre attendu AVANT (pré-vérif stricte)
|
||||
# Si absent, aucune pré-vérif → l'action s'exécute quel que soit l'écran
|
||||
if node.expected_window_before:
|
||||
action["expected_window_before"] = node.expected_window_before
|
||||
target_spec["window_title"] = node.expected_window_before
|
||||
|
||||
# Titre fenêtre attendu APRÈS (post-vérif stricte)
|
||||
# C'est la garantie de passage à l'action suivante
|
||||
if node.success_condition and node.success_condition.expected_title:
|
||||
action["expected_window_title"] = node.success_condition.expected_title
|
||||
action["success_strict"] = (
|
||||
node.success_condition.method == "title_match"
|
||||
)
|
||||
if "window_title" not in target_spec:
|
||||
target_spec["window_title"] = node.success_condition.expected_title
|
||||
|
||||
if target_spec:
|
||||
action["target_spec"] = target_spec
|
||||
|
||||
elif action_type == "type":
|
||||
action["type"] = "type"
|
||||
text = node.text or ""
|
||||
# Substituer les variables avant d'envoyer (ex: {patient} → "DUPONT")
|
||||
action["text"] = substitute_variables(text, variables)
|
||||
if node.variable_name:
|
||||
action["variable_name"] = node.variable_name
|
||||
|
||||
elif action_type in ("key_combo", "key_press"):
|
||||
action["type"] = "key_combo"
|
||||
keys = list(node.keys or [])
|
||||
if not keys:
|
||||
return None
|
||||
action["keys"] = keys
|
||||
|
||||
elif action_type == "wait":
|
||||
action["type"] = "wait"
|
||||
duration = node.duration_ms or 1000
|
||||
action["duration_ms"] = int(duration)
|
||||
|
||||
elif action_type == "scroll":
|
||||
action["type"] = "scroll"
|
||||
# Les stratégies peuvent contenir une zone — pas exploitée ici,
|
||||
# le scroll est implicitement sur la fenêtre active.
|
||||
action["delta"] = -3
|
||||
|
||||
else:
|
||||
logger.debug("execution_node_to_action: type inconnu '%s' ignoré", action_type)
|
||||
return None
|
||||
|
||||
return action
|
||||
|
||||
|
||||
def execution_plan_to_actions(
|
||||
plan: ExecutionPlan,
|
||||
variables: Optional[Dict[str, Any]] = None,
|
||||
id_prefix: str = "act_plan",
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Convertir un `ExecutionPlan` complet en liste d'actions replay.
|
||||
|
||||
Les variables passées en argument écrasent celles du plan.
|
||||
"""
|
||||
merged_vars: Dict[str, Any] = dict(plan.variables or {})
|
||||
if variables:
|
||||
merged_vars.update(variables)
|
||||
|
||||
actions: List[Dict[str, Any]] = []
|
||||
for node in plan.nodes:
|
||||
action = execution_node_to_action(
|
||||
node=node,
|
||||
variables=merged_vars,
|
||||
id_prefix=id_prefix,
|
||||
)
|
||||
if action is not None:
|
||||
actions.append(action)
|
||||
|
||||
logger.info(
|
||||
"execution_plan_to_actions(%s) : %d nœuds → %d actions replay "
|
||||
"(vars=%d)",
|
||||
plan.plan_id, plan.total_nodes, len(actions), len(merged_vars),
|
||||
)
|
||||
return actions
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Injection dans la queue de replay
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def inject_plan_into_queue(
|
||||
plan: ExecutionPlan,
|
||||
session_id: str,
|
||||
replay_queues: Dict[str, List[Dict[str, Any]]],
|
||||
variables: Optional[Dict[str, Any]] = None,
|
||||
lock: Optional[threading.Lock] = None,
|
||||
replace: bool = True,
|
||||
id_prefix: str = "act_plan",
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Injecter un `ExecutionPlan` dans la queue de replay d'une session.
|
||||
|
||||
Args:
|
||||
plan: Le plan à exécuter.
|
||||
session_id: La session Agent V1 cible.
|
||||
replay_queues: Le dict global `_replay_queues` partagé par le serveur.
|
||||
variables: Variables à substituer dans les actions.
|
||||
lock: Verrou optionnel à acquérir avant d'écrire (threadsafe).
|
||||
replace: Si True (défaut), remplace la queue existante. Sinon, append.
|
||||
id_prefix: Préfixe pour les action_id générés.
|
||||
|
||||
Returns:
|
||||
La liste des actions injectées (après substitution).
|
||||
"""
|
||||
actions = execution_plan_to_actions(
|
||||
plan=plan, variables=variables, id_prefix=id_prefix,
|
||||
)
|
||||
|
||||
def _write() -> None:
|
||||
if replace:
|
||||
replay_queues[session_id] = list(actions)
|
||||
else:
|
||||
replay_queues[session_id].extend(actions)
|
||||
|
||||
if lock is not None:
|
||||
with lock:
|
||||
_write()
|
||||
else:
|
||||
_write()
|
||||
|
||||
logger.info(
|
||||
"inject_plan_into_queue(%s) : %d actions injectées dans la queue "
|
||||
"de la session '%s' (replace=%s)",
|
||||
plan.plan_id, len(actions), session_id, replace,
|
||||
)
|
||||
return actions
|
||||
1322
agent_v0/server_v1/replay_engine.py
Normal file
1322
agent_v0/server_v1/replay_engine.py
Normal file
File diff suppressed because it is too large
Load Diff
143
agent_v0/server_v1/replay_failure_logger.py
Normal file
143
agent_v0/server_v1/replay_failure_logger.py
Normal file
@@ -0,0 +1,143 @@
|
||||
# agent_v0/server_v1/replay_failure_logger.py
|
||||
"""
|
||||
Logger des echecs de replay pour l'apprentissage futur.
|
||||
|
||||
Chaque echec de resolution visuelle (target_not_found) est sauvegarde dans un
|
||||
fichier JSONL par session, avec le screenshot de ce que l'agent voit au moment
|
||||
de l'echec. Ces donnees alimentent le learning loop : re-entrainement des
|
||||
embeddings, ajustement des seuils, enrichissement des target_spec.
|
||||
|
||||
Structure :
|
||||
data/training/replay_failures/{replay_id}/failures.jsonl
|
||||
data/training/replay_failures/{replay_id}/screenshots/{action_id}.jpg
|
||||
"""
|
||||
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger("replay_failure_logger")
|
||||
|
||||
# Repertoire racine des echecs de replay
|
||||
_FAILURES_BASE_DIR = Path("data/training/replay_failures")
|
||||
|
||||
# Lock pour les ecritures concurrentes
|
||||
_write_lock = threading.Lock()
|
||||
|
||||
|
||||
def log_replay_failure(
|
||||
replay_id: str,
|
||||
action_id: str,
|
||||
target_spec: Optional[Dict[str, Any]],
|
||||
screenshot_b64: Optional[str],
|
||||
resolution_attempts: Optional[List[Dict[str, Any]]] = None,
|
||||
error: str = "target_not_found",
|
||||
extra: Optional[Dict[str, Any]] = None,
|
||||
) -> Optional[str]:
|
||||
"""Sauvegarder un echec de replay pour l'apprentissage futur.
|
||||
|
||||
Args:
|
||||
replay_id: Identifiant du replay en cours
|
||||
action_id: Identifiant de l'action echouee
|
||||
target_spec: Specification de la cible recherchee
|
||||
screenshot_b64: Screenshot JPEG base64 de ce que l'agent voit
|
||||
resolution_attempts: Liste des tentatives de resolution (methode, score, etc.)
|
||||
error: Type d'erreur (defaut: "target_not_found")
|
||||
extra: Champs supplementaires a stocker
|
||||
|
||||
Returns:
|
||||
Chemin du fichier JSONL cree, ou None en cas d'erreur.
|
||||
"""
|
||||
try:
|
||||
# Creer le repertoire de la session
|
||||
session_dir = _FAILURES_BASE_DIR / replay_id
|
||||
session_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Sauvegarder le screenshot si fourni
|
||||
screenshot_path = None
|
||||
if screenshot_b64:
|
||||
screenshots_dir = session_dir / "screenshots"
|
||||
screenshots_dir.mkdir(exist_ok=True)
|
||||
screenshot_path = str(screenshots_dir / f"{action_id}.jpg")
|
||||
try:
|
||||
img_bytes = base64.b64decode(screenshot_b64)
|
||||
with open(screenshot_path, "wb") as f:
|
||||
f.write(img_bytes)
|
||||
except Exception as e:
|
||||
logger.warning(f"Impossible de sauvegarder le screenshot : {e}")
|
||||
screenshot_path = None
|
||||
|
||||
# Construire l'entree JSONL
|
||||
entry = {
|
||||
"timestamp": time.strftime("%Y-%m-%dT%H:%M:%S"),
|
||||
"replay_id": replay_id,
|
||||
"action_id": action_id,
|
||||
"target_spec": _sanitize_target_spec(target_spec) if target_spec else None,
|
||||
"screenshot_path": screenshot_path,
|
||||
"resolution_attempts": resolution_attempts or [],
|
||||
"error": error,
|
||||
}
|
||||
if extra:
|
||||
entry.update(extra)
|
||||
|
||||
# Ecrire dans le fichier JSONL (thread-safe)
|
||||
jsonl_path = session_dir / "failures.jsonl"
|
||||
with _write_lock:
|
||||
with open(jsonl_path, "a", encoding="utf-8") as f:
|
||||
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
||||
|
||||
logger.info(
|
||||
f"Echec replay loggue : replay={replay_id} action={action_id} "
|
||||
f"error={error} -> {jsonl_path}"
|
||||
)
|
||||
return str(jsonl_path)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Impossible de logger l'echec replay : {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _sanitize_target_spec(target_spec: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Nettoyer le target_spec pour le stockage (retirer les images base64 volumineuses)."""
|
||||
cleaned = {}
|
||||
for key, value in target_spec.items():
|
||||
# Ne pas stocker les images base64 (trop volumineux pour le JSONL)
|
||||
if key.endswith("_base64") or key.endswith("_b64"):
|
||||
cleaned[key] = f"<{len(str(value))} chars>" if value else None
|
||||
else:
|
||||
cleaned[key] = value
|
||||
return cleaned
|
||||
|
||||
|
||||
def get_failure_count(replay_id: str) -> int:
|
||||
"""Compter le nombre d'echecs pour un replay donne."""
|
||||
jsonl_path = _FAILURES_BASE_DIR / replay_id / "failures.jsonl"
|
||||
if not jsonl_path.exists():
|
||||
return 0
|
||||
try:
|
||||
with open(jsonl_path, "r", encoding="utf-8") as f:
|
||||
return sum(1 for _ in f)
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
|
||||
def get_failures(replay_id: str) -> List[Dict[str, Any]]:
|
||||
"""Lire tous les echecs pour un replay donne."""
|
||||
jsonl_path = _FAILURES_BASE_DIR / replay_id / "failures.jsonl"
|
||||
if not jsonl_path.exists():
|
||||
return []
|
||||
failures = []
|
||||
try:
|
||||
with open(jsonl_path, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
failures.append(json.loads(line))
|
||||
except Exception as e:
|
||||
logger.warning(f"Erreur lecture echecs replay {replay_id} : {e}")
|
||||
return failures
|
||||
@@ -175,6 +175,55 @@ class ReplayLearner:
|
||||
|
||||
self.record(outcome)
|
||||
|
||||
def record_human_correction(
|
||||
self,
|
||||
session_id: str,
|
||||
action: Dict[str, Any],
|
||||
correction: Dict[str, Any],
|
||||
) -> None:
|
||||
"""Enregistrer une correction humaine (mode apprentissage supervisé).
|
||||
|
||||
L'humain a montré à Léa où cliquer. On stocke cette correction
|
||||
dans target_memory.db pour que la prochaine fois, Léa sache.
|
||||
"""
|
||||
target_spec = action.get("target_spec", {})
|
||||
by_text = target_spec.get("by_text", "")
|
||||
window_title = target_spec.get("window_title", "")
|
||||
x_pct = correction.get("x_pct", 0.0)
|
||||
y_pct = correction.get("y_pct", 0.0)
|
||||
|
||||
# Enregistrer dans le JSONL d'apprentissage
|
||||
outcome = ActionOutcome(
|
||||
session_id=session_id,
|
||||
action_id=action.get("action_id", ""),
|
||||
action_type="click",
|
||||
target_description=by_text,
|
||||
window_title=window_title,
|
||||
resolution_method="human_supervised",
|
||||
resolution_score=1.0, # Confiance maximale — l'humain a montré
|
||||
success=True,
|
||||
)
|
||||
self.record(outcome)
|
||||
|
||||
# Stocker dans target_memory.db pour le lookup futur
|
||||
try:
|
||||
from .replay_memory import get_target_memory_store
|
||||
store = get_target_memory_store()
|
||||
if store:
|
||||
store.record_success(
|
||||
screen_signature="human_correction",
|
||||
target_spec=target_spec,
|
||||
resolved_position={"x_pct": x_pct, "y_pct": y_pct},
|
||||
method="human_supervised",
|
||||
score=1.0,
|
||||
)
|
||||
logger.info(
|
||||
f"[APPRENTISSAGE] Correction stockée dans target_memory : "
|
||||
f"'{by_text}' → ({x_pct:.4f}, {y_pct:.4f})"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Learning: échec stockage target_memory: {e}")
|
||||
|
||||
def query_similar(
|
||||
self,
|
||||
target_description: str = "",
|
||||
|
||||
316
agent_v0/server_v1/replay_memory.py
Normal file
316
agent_v0/server_v1/replay_memory.py
Normal file
@@ -0,0 +1,316 @@
|
||||
# agent_v0/server_v1/replay_memory.py
|
||||
"""
|
||||
replay_memory — Greffe de TargetMemoryStore (Fiche #18) sur le pipeline V4.
|
||||
|
||||
Phase 1 du plan apprentissage Léa (docs/PLAN_APPRENTISSAGE_LEA.md).
|
||||
|
||||
Le runtime V4 appelle :
|
||||
- `memory_lookup()` AVANT la cascade coûteuse (OCR/template/VLM)
|
||||
- `memory_record_success()` APRÈS validation post-condition (`title_match` strict)
|
||||
- `memory_record_failure()` sur les échecs
|
||||
|
||||
Fingerprint léger V4 : les coordonnées clic (x_pct, y_pct) sont stockées dans
|
||||
les deux premières valeurs de `TargetFingerprint.bbox`, et la méthode de
|
||||
résolution ayant réussi dans le champ `etype`.
|
||||
|
||||
Signature d'écran V4 : `sha256(normalize(window_title))[:16]`. Simple et
|
||||
robuste aux données variables car les titres de fenêtre restent stables.
|
||||
Les faux positifs (même titre, écrans différents) sont rattrapés par la
|
||||
post-condition qui décrémentera la fiabilité via `record_failure()`.
|
||||
|
||||
Critère de fiabilité : 2 succès minimum et < 30% d'échecs pour déclencher
|
||||
un hit (paramètres de `TargetMemoryStore.lookup`). C'est exactement la
|
||||
cristallisation par répétition que l'on veut — Léa est un stagiaire qui
|
||||
apprend de l'observation.
|
||||
|
||||
Auteur : Dom, Alice — avril 2026
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
import unicodedata
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# =========================================================================
|
||||
# Singleton du store persistant
|
||||
# =========================================================================
|
||||
|
||||
_MEMORY_SINGLETON: Optional[Any] = None
|
||||
_MEMORY_DISABLED = False
|
||||
|
||||
|
||||
def get_memory_store():
|
||||
"""Retourne le `TargetMemoryStore` partagé, ou None si indisponible.
|
||||
|
||||
Lazy-init : le store n'est créé qu'au premier appel, ce qui évite
|
||||
d'importer `core.learning.target_memory_store` à l'import du module
|
||||
(et donc d'éviter les effets de bord sur le démarrage du serveur).
|
||||
"""
|
||||
global _MEMORY_SINGLETON, _MEMORY_DISABLED
|
||||
|
||||
if _MEMORY_DISABLED:
|
||||
return None
|
||||
if _MEMORY_SINGLETON is not None:
|
||||
return _MEMORY_SINGLETON
|
||||
|
||||
try:
|
||||
from core.learning.target_memory_store import TargetMemoryStore
|
||||
|
||||
base_path = os.environ.get("RPA_LEARNING_DIR", "data/learning")
|
||||
_MEMORY_SINGLETON = TargetMemoryStore(base_path=base_path)
|
||||
logger.info(
|
||||
"replay_memory: TargetMemoryStore initialisé (base=%s)", base_path,
|
||||
)
|
||||
return _MEMORY_SINGLETON
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"replay_memory: TargetMemoryStore indisponible (%s) — "
|
||||
"l'apprentissage persistant est désactivé", exc,
|
||||
)
|
||||
_MEMORY_DISABLED = True
|
||||
return None
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Normalisation de texte et hash
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def _norm_text(s: str) -> str:
|
||||
"""Normalise un texte pour un hash stable (accents, casse, NBSP, espaces)."""
|
||||
if not s:
|
||||
return ""
|
||||
s = s.replace("\u00A0", " ").strip().lower()
|
||||
s = unicodedata.normalize("NFKD", s)
|
||||
s = "".join(ch for ch in s if not unicodedata.combining(ch))
|
||||
return " ".join(s.split())
|
||||
|
||||
|
||||
def compute_screen_sig(window_title: str) -> str:
|
||||
"""Calcule la signature d'écran V4 à partir du titre de fenêtre.
|
||||
|
||||
Le `window_title` est strict depuis la phase "controle des étapes"
|
||||
(post-condition `title_match` obligatoire). C'est notre clé naturelle.
|
||||
"""
|
||||
norm = _norm_text(window_title)
|
||||
if not norm:
|
||||
return ""
|
||||
return hashlib.sha256(norm.encode("utf-8")).hexdigest()[:16]
|
||||
|
||||
|
||||
class _TargetSpecLike:
|
||||
"""Adaptateur dict → objet pour `TargetMemoryStore._hash_target_spec()`.
|
||||
|
||||
Le hash interne de TargetMemoryStore utilise `getattr(spec, "by_role", ...)`
|
||||
qui ne fonctionne pas avec un dict brut. On expose les attributs nécessaires.
|
||||
|
||||
On intègre aussi `resolve_order` et `vlm_description` dans `context_hints`
|
||||
pour qu'ils entrent dans le hash — deux actions avec le même `by_text`
|
||||
mais un `resolve_order` différent doivent avoir des hashes distincts.
|
||||
"""
|
||||
|
||||
__slots__ = ("by_role", "by_text", "by_position", "context_hints")
|
||||
|
||||
def __init__(self, d: Dict[str, Any]):
|
||||
self.by_role = d.get("by_role", "") or ""
|
||||
self.by_text = d.get("by_text", "") or ""
|
||||
self.by_position = d.get("by_position")
|
||||
|
||||
hints = dict(d.get("context_hints") or {})
|
||||
resolve_order = d.get("resolve_order")
|
||||
if resolve_order:
|
||||
hints["_resolve_order"] = "|".join(resolve_order) if isinstance(
|
||||
resolve_order, list
|
||||
) else str(resolve_order)
|
||||
if d.get("vlm_description"):
|
||||
hints["_vlm_desc"] = str(d["vlm_description"])
|
||||
if d.get("anchor_hint"):
|
||||
hints["_anchor_hint"] = str(d["anchor_hint"])
|
||||
self.context_hints = hints
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Lookup — consulté AVANT la cascade coûteuse
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def memory_lookup(
|
||||
window_title: str,
|
||||
target_spec: Dict[str, Any],
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Cherche une résolution apprise pour cette cible sur cet écran.
|
||||
|
||||
Returns:
|
||||
Dict compatible avec le format de sortie de `_resolve_target_sync`
|
||||
(resolved, method, x_pct, y_pct, score, ...) si une entrée fiable
|
||||
est trouvée. None sinon.
|
||||
"""
|
||||
store = get_memory_store()
|
||||
if store is None:
|
||||
return None
|
||||
|
||||
screen_sig = compute_screen_sig(window_title)
|
||||
if not screen_sig:
|
||||
return None
|
||||
|
||||
try:
|
||||
spec_shim = _TargetSpecLike(target_spec)
|
||||
fp = store.lookup(screen_sig, spec_shim)
|
||||
except Exception as exc:
|
||||
logger.debug("memory_lookup: erreur lookup (%s)", exc)
|
||||
return None
|
||||
|
||||
if fp is None:
|
||||
return None
|
||||
|
||||
# Fingerprint léger : bbox = (x_pct, y_pct, 0, 0)
|
||||
try:
|
||||
x_pct = float(fp.bbox[0])
|
||||
y_pct = float(fp.bbox[1])
|
||||
except (TypeError, IndexError, ValueError):
|
||||
logger.debug("memory_lookup: fingerprint bbox invalide")
|
||||
return None
|
||||
|
||||
# Sanity check : les pourcentages doivent être dans [0, 1]
|
||||
if not (0.0 <= x_pct <= 1.0 and 0.0 <= y_pct <= 1.0):
|
||||
logger.warning(
|
||||
"memory_lookup: coords invalides (%.3f, %.3f) pour sig=%s — "
|
||||
"entrée ignorée",
|
||||
x_pct, y_pct, screen_sig,
|
||||
)
|
||||
return None
|
||||
|
||||
method = fp.etype or "memory"
|
||||
confidence = float(getattr(fp, "confidence", 0.9) or 0.9)
|
||||
|
||||
logger.info(
|
||||
"memory_lookup HIT : sig=%s method=%s coords=(%.4f, %.4f) conf=%.2f "
|
||||
"target='%s'",
|
||||
screen_sig, method, x_pct, y_pct, confidence,
|
||||
(target_spec.get("by_text") or "")[:60],
|
||||
)
|
||||
|
||||
return {
|
||||
"resolved": True,
|
||||
"method": f"memory_{method}",
|
||||
"x_pct": x_pct,
|
||||
"y_pct": y_pct,
|
||||
"score": confidence,
|
||||
"from_memory": True,
|
||||
"screen_sig": screen_sig,
|
||||
}
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Record — appelé APRÈS validation post-condition
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def memory_record_success(
|
||||
window_title: str,
|
||||
target_spec: Dict[str, Any],
|
||||
x_pct: float,
|
||||
y_pct: float,
|
||||
method: str,
|
||||
confidence: float = 0.9,
|
||||
) -> bool:
|
||||
"""Enregistre une résolution réussie dans la mémoire persistante.
|
||||
|
||||
À appeler APRÈS validation de la post-condition (`title_match` strict).
|
||||
"""
|
||||
store = get_memory_store()
|
||||
if store is None:
|
||||
return False
|
||||
|
||||
screen_sig = compute_screen_sig(window_title)
|
||||
if not screen_sig:
|
||||
return False
|
||||
|
||||
# Sanity check : coordonnées dans [0, 1]
|
||||
try:
|
||||
x_pct = float(x_pct)
|
||||
y_pct = float(y_pct)
|
||||
except (TypeError, ValueError):
|
||||
logger.debug("memory_record_success: coords non numériques, skip")
|
||||
return False
|
||||
if not (0.0 <= x_pct <= 1.0 and 0.0 <= y_pct <= 1.0):
|
||||
logger.debug(
|
||||
"memory_record_success: coords hors [0,1] (%.3f, %.3f), skip",
|
||||
x_pct, y_pct,
|
||||
)
|
||||
return False
|
||||
|
||||
try:
|
||||
from core.learning.target_memory_store import TargetFingerprint
|
||||
|
||||
method_clean = method or "v4_unknown"
|
||||
fingerprint = TargetFingerprint(
|
||||
element_id=f"v4_{method_clean}",
|
||||
bbox=(x_pct, y_pct, 0.0, 0.0),
|
||||
role=target_spec.get("by_role", "") or None,
|
||||
etype=method_clean,
|
||||
label=(target_spec.get("by_text") or "")[:200] or None,
|
||||
confidence=float(confidence),
|
||||
)
|
||||
|
||||
spec_shim = _TargetSpecLike(target_spec)
|
||||
store.record_success(
|
||||
screen_signature=screen_sig,
|
||||
target_spec=spec_shim,
|
||||
fingerprint=fingerprint,
|
||||
strategy_used=method_clean,
|
||||
confidence=float(confidence),
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"memory_record_success: sig=%s method=%s coords=(%.4f, %.4f) "
|
||||
"target='%s'",
|
||||
screen_sig, method_clean, x_pct, y_pct,
|
||||
(target_spec.get("by_text") or "")[:60],
|
||||
)
|
||||
return True
|
||||
|
||||
except Exception as exc:
|
||||
logger.warning("memory_record_success: échec (%s)", exc)
|
||||
return False
|
||||
|
||||
|
||||
def memory_record_failure(
|
||||
window_title: str,
|
||||
target_spec: Dict[str, Any],
|
||||
error_message: str,
|
||||
) -> bool:
|
||||
"""Incrémente le `fail_count` pour cette (signature, target).
|
||||
|
||||
Appelé quand l'action échoue OU quand la post-condition n'est pas
|
||||
satisfaite. Le `TargetMemoryStore.lookup()` ignorera cette entrée
|
||||
si le ratio d'échecs dépasse 30%.
|
||||
"""
|
||||
store = get_memory_store()
|
||||
if store is None:
|
||||
return False
|
||||
|
||||
screen_sig = compute_screen_sig(window_title)
|
||||
if not screen_sig:
|
||||
return False
|
||||
|
||||
try:
|
||||
spec_shim = _TargetSpecLike(target_spec)
|
||||
store.record_failure(
|
||||
screen_signature=screen_sig,
|
||||
target_spec=spec_shim,
|
||||
error_message=(error_message or "unknown")[:200],
|
||||
)
|
||||
logger.debug(
|
||||
"memory_record_failure: sig=%s error='%s'",
|
||||
screen_sig, (error_message or "")[:80],
|
||||
)
|
||||
return True
|
||||
except Exception as exc:
|
||||
logger.debug("memory_record_failure: échec (%s)", exc)
|
||||
return False
|
||||
2385
agent_v0/server_v1/resolve_engine.py
Normal file
2385
agent_v0/server_v1/resolve_engine.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,622 +0,0 @@
|
||||
"""
|
||||
UIDetector - Détection Sémantique d'Éléments UI avec VLM
|
||||
|
||||
Utilise un Vision-Language Model (VLM) pour détecter et classifier
|
||||
les éléments UI avec leurs types et rôles sémantiques.
|
||||
"""
|
||||
|
||||
from typing import List, Dict, Optional, Any, Tuple
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
import json
|
||||
import re
|
||||
|
||||
from ..models.ui_element import UIElement, UIElementEmbeddings, VisualFeatures
|
||||
from .ollama_client import OllamaClient, check_ollama_available
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectionConfig:
|
||||
"""Configuration de la détection UI"""
|
||||
vlm_model: str = "qwen3-vl:8b" # Modèle VLM à utiliser (qwen3-vl:8b recommandé)
|
||||
vlm_endpoint: str = "http://localhost:11434" # Endpoint Ollama
|
||||
confidence_threshold: float = 0.7 # Seuil de confiance minimum
|
||||
max_elements: int = 50 # Nombre max d'éléments à détecter
|
||||
detect_regions: bool = True # Détecter régions d'intérêt d'abord
|
||||
use_embeddings: bool = True # Générer embeddings duaux
|
||||
|
||||
|
||||
class UIDetector:
|
||||
"""
|
||||
Détecteur d'éléments UI sémantique
|
||||
|
||||
Utilise un VLM (Vision-Language Model) pour :
|
||||
1. Détecter les régions d'intérêt dans un screenshot
|
||||
2. Classifier le type de chaque élément UI
|
||||
3. Déterminer le rôle sémantique
|
||||
4. Extraire les features visuelles
|
||||
5. Générer des embeddings duaux (image + texte)
|
||||
"""
|
||||
|
||||
def __init__(self, config: Optional[DetectionConfig] = None):
|
||||
"""
|
||||
Initialiser le détecteur
|
||||
|
||||
Args:
|
||||
config: Configuration (utilise config par défaut si None)
|
||||
"""
|
||||
self.config = config or DetectionConfig()
|
||||
self.vlm_client = None
|
||||
self._initialize_vlm()
|
||||
|
||||
def _initialize_vlm(self) -> None:
|
||||
"""Initialiser le client VLM (Ollama)"""
|
||||
try:
|
||||
# Vérifier si Ollama est disponible
|
||||
if check_ollama_available(self.config.vlm_endpoint):
|
||||
self.vlm_client = OllamaClient(
|
||||
endpoint=self.config.vlm_endpoint,
|
||||
model=self.config.vlm_model
|
||||
)
|
||||
print(f"✓ VLM initialized: {self.config.vlm_model} at {self.config.vlm_endpoint}")
|
||||
else:
|
||||
print(f"⚠ Ollama not available at {self.config.vlm_endpoint}, using simulation mode")
|
||||
self.vlm_client = None
|
||||
except Exception as e:
|
||||
print(f"⚠ Failed to initialize VLM: {e}, using simulation mode")
|
||||
self.vlm_client = None
|
||||
|
||||
def detect(self,
|
||||
screenshot_path: str,
|
||||
window_context: Optional[Dict[str, Any]] = None) -> List[UIElement]:
|
||||
"""
|
||||
Détecter tous les éléments UI dans un screenshot
|
||||
|
||||
Args:
|
||||
screenshot_path: Chemin vers le screenshot
|
||||
window_context: Contexte de la fenêtre (titre, process, etc.)
|
||||
|
||||
Returns:
|
||||
Liste d'UIElements détectés
|
||||
"""
|
||||
# Charger image
|
||||
image = self._load_image(screenshot_path)
|
||||
if image is None:
|
||||
return []
|
||||
|
||||
# Détecter régions d'intérêt si activé
|
||||
if self.config.detect_regions:
|
||||
regions = self._detect_regions_of_interest(image, window_context)
|
||||
else:
|
||||
# Utiliser image complète
|
||||
regions = [{"bbox": (0, 0, image.width, image.height), "confidence": 1.0}]
|
||||
|
||||
# Détecter éléments UI dans chaque région
|
||||
ui_elements = []
|
||||
for region in regions:
|
||||
elements = self._detect_elements_in_region(
|
||||
image,
|
||||
region,
|
||||
screenshot_path,
|
||||
window_context
|
||||
)
|
||||
ui_elements.extend(elements)
|
||||
|
||||
# Filtrer par confiance
|
||||
ui_elements = [
|
||||
el for el in ui_elements
|
||||
if el.confidence >= self.config.confidence_threshold
|
||||
]
|
||||
|
||||
# Limiter nombre d'éléments
|
||||
if len(ui_elements) > self.config.max_elements:
|
||||
# Trier par confiance et garder les meilleurs
|
||||
ui_elements.sort(key=lambda x: x.confidence, reverse=True)
|
||||
ui_elements = ui_elements[:self.config.max_elements]
|
||||
|
||||
return ui_elements
|
||||
|
||||
def _load_image(self, screenshot_path: str) -> Optional[Image.Image]:
|
||||
"""Charger une image depuis un fichier"""
|
||||
try:
|
||||
return Image.open(screenshot_path)
|
||||
except Exception as e:
|
||||
print(f"Error loading image {screenshot_path}: {e}")
|
||||
return None
|
||||
|
||||
def _detect_regions_of_interest(self,
|
||||
image: Image.Image,
|
||||
window_context: Optional[Dict] = None) -> List[Dict]:
|
||||
"""
|
||||
Détecter les régions d'intérêt dans l'image
|
||||
|
||||
Utilise le VLM pour identifier les zones contenant des éléments UI.
|
||||
|
||||
Args:
|
||||
image: Image PIL
|
||||
window_context: Contexte de la fenêtre
|
||||
|
||||
Returns:
|
||||
Liste de régions {bbox: (x, y, w, h), confidence: float}
|
||||
"""
|
||||
if self.vlm_client is None:
|
||||
# Mode simulation : diviser l'image en grille
|
||||
return self._simulate_region_detection(image)
|
||||
|
||||
# Utiliser VLM pour détecter régions
|
||||
# Pour l'instant, on utilise l'image complète (plus simple et efficace)
|
||||
width, height = image.size
|
||||
return [{
|
||||
"bbox": (0, 0, width, height),
|
||||
"confidence": 1.0
|
||||
}]
|
||||
|
||||
def _simulate_region_detection(self, image: Image.Image) -> List[Dict]:
|
||||
"""Simulation de détection de régions (pour développement)"""
|
||||
width, height = image.size
|
||||
|
||||
# Diviser en grille 3x3 pour simulation
|
||||
regions = []
|
||||
grid_size = 3
|
||||
cell_w = width // grid_size
|
||||
cell_h = height // grid_size
|
||||
|
||||
for i in range(grid_size):
|
||||
for j in range(grid_size):
|
||||
regions.append({
|
||||
"bbox": (j * cell_w, i * cell_h, cell_w, cell_h),
|
||||
"confidence": 0.8
|
||||
})
|
||||
|
||||
return regions
|
||||
|
||||
def _detect_elements_in_region(self,
|
||||
image: Image.Image,
|
||||
region: Dict,
|
||||
screenshot_path: str,
|
||||
window_context: Optional[Dict] = None) -> List[UIElement]:
|
||||
"""
|
||||
Détecter éléments UI dans une région spécifique
|
||||
|
||||
Args:
|
||||
image: Image complète
|
||||
region: Région à analyser
|
||||
screenshot_path: Chemin du screenshot
|
||||
window_context: Contexte de la fenêtre
|
||||
|
||||
Returns:
|
||||
Liste d'UIElements dans cette région
|
||||
"""
|
||||
bbox = region["bbox"]
|
||||
x, y, w, h = bbox
|
||||
|
||||
# Extraire crop de la région
|
||||
region_image = image.crop((x, y, x + w, y + h))
|
||||
|
||||
# Détecter éléments avec VLM
|
||||
if self.vlm_client is None:
|
||||
# Mode simulation
|
||||
return self._simulate_element_detection(
|
||||
region_image, bbox, screenshot_path, window_context
|
||||
)
|
||||
|
||||
# Vraie détection avec VLM !
|
||||
return self._detect_with_vlm(
|
||||
region_image, bbox, screenshot_path, window_context
|
||||
)
|
||||
|
||||
def _detect_with_vlm(self,
|
||||
region_image: Image.Image,
|
||||
region_bbox: Tuple[int, int, int, int],
|
||||
screenshot_path: str,
|
||||
window_context: Optional[Dict] = None) -> List[UIElement]:
|
||||
"""
|
||||
Détecter éléments UI avec le VLM (vraie détection)
|
||||
|
||||
Args:
|
||||
region_image: Image de la région
|
||||
region_bbox: Bbox de la région (x, y, w, h)
|
||||
screenshot_path: Chemin du screenshot
|
||||
window_context: Contexte de la fenêtre
|
||||
|
||||
Returns:
|
||||
Liste d'UIElements détectés
|
||||
"""
|
||||
x_offset, y_offset, w, h = region_bbox
|
||||
|
||||
# Construire le prompt pour le VLM
|
||||
context_str = ""
|
||||
if window_context:
|
||||
context_str = f"\nWindow context: {window_context.get('title', 'Unknown')}"
|
||||
|
||||
# Approche simplifiée : demander une description structurée
|
||||
prompt = f"""List all interactive UI elements in this screenshot.{context_str}
|
||||
|
||||
For each element, provide:
|
||||
- type (button, text_input, checkbox, link, etc.)
|
||||
- label (visible text)
|
||||
- approximate position (top/middle/bottom, left/center/right)
|
||||
|
||||
Format as JSON array:
|
||||
[{{"type": "button", "label": "Submit", "position": "middle-center"}}]
|
||||
|
||||
Return ONLY the JSON array, no other text."""
|
||||
|
||||
# Appeler le VLM
|
||||
# Note: Utiliser le chemin du screenshot complet plutôt que le crop
|
||||
# car certains VLM gèrent mieux les fichiers que les images PIL
|
||||
result = self.vlm_client.generate(
|
||||
prompt=prompt,
|
||||
image_path=screenshot_path, # Utiliser le chemin au lieu de l'image PIL
|
||||
temperature=0.1,
|
||||
max_tokens=1000
|
||||
)
|
||||
|
||||
if not result["success"]:
|
||||
print(f"❌ VLM detection failed: {result.get('error', 'Unknown error')}")
|
||||
return []
|
||||
|
||||
if not result["response"] or len(result["response"].strip()) == 0:
|
||||
print(f"⚠ VLM returned empty response")
|
||||
return []
|
||||
|
||||
# Parser la réponse JSON
|
||||
elements = self._parse_vlm_response(
|
||||
result["response"],
|
||||
region_bbox,
|
||||
screenshot_path,
|
||||
window_context
|
||||
)
|
||||
|
||||
return elements
|
||||
|
||||
def _parse_vlm_response(self,
|
||||
response: str,
|
||||
region_bbox: Tuple[int, int, int, int],
|
||||
screenshot_path: str,
|
||||
window_context: Optional[Dict] = None) -> List[UIElement]:
|
||||
"""
|
||||
Parser la réponse JSON du VLM
|
||||
|
||||
Args:
|
||||
response: Réponse texte du VLM
|
||||
region_bbox: Bbox de la région
|
||||
screenshot_path: Chemin du screenshot
|
||||
window_context: Contexte de la fenêtre
|
||||
|
||||
Returns:
|
||||
Liste d'UIElements
|
||||
"""
|
||||
x_offset, y_offset, region_w, region_h = region_bbox
|
||||
|
||||
try:
|
||||
# Extraire le JSON de la réponse (peut contenir du texte avant/après)
|
||||
json_match = re.search(r'\[.*\]', response, re.DOTALL)
|
||||
if not json_match:
|
||||
print(f"No JSON array found in VLM response")
|
||||
print(f"VLM response was: {response[:500]}...")
|
||||
return []
|
||||
|
||||
elements_data = json.loads(json_match.group(0))
|
||||
|
||||
if not isinstance(elements_data, list):
|
||||
print(f"VLM response is not a JSON array")
|
||||
return []
|
||||
|
||||
elements = []
|
||||
for i, elem_data in enumerate(elements_data):
|
||||
try:
|
||||
# Gérer les positions (pourcentages ou textuelles)
|
||||
if 'x' in elem_data and 'y' in elem_data:
|
||||
# Format avec pourcentages
|
||||
x_pct = float(elem_data.get('x', 0))
|
||||
y_pct = float(elem_data.get('y', 0))
|
||||
w_pct = float(elem_data.get('width', 10))
|
||||
h_pct = float(elem_data.get('height', 5))
|
||||
|
||||
elem_x = x_offset + int(region_w * x_pct / 100)
|
||||
elem_y = y_offset + int(region_h * y_pct / 100)
|
||||
elem_w = int(region_w * w_pct / 100)
|
||||
elem_h = int(region_h * h_pct / 100)
|
||||
else:
|
||||
# Format avec position textuelle (top/middle/bottom, left/center/right)
|
||||
position = elem_data.get('position', 'middle-center').lower()
|
||||
|
||||
# Parser la position
|
||||
if 'top' in position:
|
||||
elem_y = y_offset + region_h // 4
|
||||
elif 'bottom' in position:
|
||||
elem_y = y_offset + 3 * region_h // 4
|
||||
else: # middle
|
||||
elem_y = y_offset + region_h // 2
|
||||
|
||||
if 'left' in position:
|
||||
elem_x = x_offset + region_w // 4
|
||||
elif 'right' in position:
|
||||
elem_x = x_offset + 3 * region_w // 4
|
||||
else: # center
|
||||
elem_x = x_offset + region_w // 2
|
||||
|
||||
# Taille par défaut basée sur le type
|
||||
elem_type = elem_data.get('type', 'button')
|
||||
if elem_type == 'button':
|
||||
elem_w, elem_h = 100, 40
|
||||
elif elem_type == 'text_input':
|
||||
elem_w, elem_h = 200, 35
|
||||
elif elem_type == 'checkbox':
|
||||
elem_w, elem_h = 25, 25
|
||||
else:
|
||||
elem_w, elem_h = 80, 30
|
||||
|
||||
# Créer l'UIElement
|
||||
element = UIElement(
|
||||
element_id=f"vlm_{elem_x}_{elem_y}",
|
||||
type=elem_data.get('type', 'unknown'),
|
||||
role=elem_data.get('role', 'unknown'),
|
||||
bbox=(elem_x, elem_y, elem_w, elem_h),
|
||||
center=(elem_x + elem_w // 2, elem_y + elem_h // 2),
|
||||
label=elem_data.get('label', ''),
|
||||
label_confidence=0.85, # Confiance par défaut pour VLM
|
||||
embeddings=UIElementEmbeddings(),
|
||||
visual_features=VisualFeatures(
|
||||
dominant_color="rgb(128, 128, 128)",
|
||||
has_icon=elem_data.get('type') == 'icon',
|
||||
shape="rectangle",
|
||||
size_category="medium"
|
||||
),
|
||||
confidence=0.85, # Confiance par défaut pour VLM
|
||||
metadata={
|
||||
"detected_by": "vlm",
|
||||
"model": self.config.vlm_model,
|
||||
"screenshot_path": screenshot_path
|
||||
}
|
||||
)
|
||||
|
||||
elements.append(element)
|
||||
|
||||
except (KeyError, ValueError, TypeError) as e:
|
||||
print(f"Error parsing element {i}: {e}")
|
||||
continue
|
||||
|
||||
return elements
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Failed to parse VLM JSON response: {e}")
|
||||
print(f"Response was: {response[:200]}...")
|
||||
return []
|
||||
|
||||
def _simulate_element_detection(self,
|
||||
region_image: Image.Image,
|
||||
region_bbox: Tuple[int, int, int, int],
|
||||
screenshot_path: str,
|
||||
window_context: Optional[Dict] = None) -> List[UIElement]:
|
||||
"""Simulation de détection d'éléments (pour développement)"""
|
||||
# Pour simulation, créer quelques éléments fictifs
|
||||
elements = []
|
||||
|
||||
x_offset, y_offset, w, h = region_bbox
|
||||
|
||||
# Simuler 2-3 éléments par région
|
||||
num_elements = np.random.randint(2, 4)
|
||||
|
||||
for i in range(num_elements):
|
||||
# Position aléatoire dans la région
|
||||
elem_w = np.random.randint(50, 150)
|
||||
elem_h = np.random.randint(20, 60)
|
||||
elem_x = x_offset + np.random.randint(0, max(1, w - elem_w))
|
||||
elem_y = y_offset + np.random.randint(0, max(1, h - elem_h))
|
||||
|
||||
# Type et rôle aléatoires
|
||||
types = ["button", "text_input", "checkbox", "link", "icon"]
|
||||
roles = ["primary_action", "cancel", "submit", "form_input", "navigation"]
|
||||
|
||||
element = UIElement(
|
||||
element_id=f"elem_{elem_x}_{elem_y}",
|
||||
type=np.random.choice(types),
|
||||
role=np.random.choice(roles),
|
||||
bbox=(elem_x, elem_y, elem_w, elem_h),
|
||||
center=(elem_x + elem_w // 2, elem_y + elem_h // 2),
|
||||
label=f"Element {i}",
|
||||
label_confidence=np.random.uniform(0.7, 0.95),
|
||||
embeddings=UIElementEmbeddings(), # Embeddings vides
|
||||
visual_features=VisualFeatures(
|
||||
dominant_color="rgb(128, 128, 128)",
|
||||
has_icon=np.random.choice([True, False]),
|
||||
shape="rectangle",
|
||||
size_category="medium"
|
||||
),
|
||||
confidence=np.random.uniform(0.7, 0.95),
|
||||
metadata={"simulated": True, "screenshot_path": screenshot_path}
|
||||
)
|
||||
|
||||
elements.append(element)
|
||||
|
||||
return elements
|
||||
|
||||
def classify_type(self,
|
||||
element_image: Image.Image,
|
||||
context: Optional[Dict] = None) -> Tuple[str, float]:
|
||||
"""
|
||||
Classifier le type d'un élément UI
|
||||
|
||||
Args:
|
||||
element_image: Image de l'élément
|
||||
context: Contexte additionnel
|
||||
|
||||
Returns:
|
||||
(type, confidence)
|
||||
"""
|
||||
if self.vlm_client is None:
|
||||
# Simulation
|
||||
types = ["button", "text_input", "checkbox", "radio", "dropdown",
|
||||
"tab", "link", "icon", "table_row", "menu_item"]
|
||||
return np.random.choice(types), np.random.uniform(0.7, 0.95)
|
||||
|
||||
# Vraie classification avec VLM
|
||||
result = self.vlm_client.classify_element_type(element_image, context)
|
||||
|
||||
if result["success"]:
|
||||
return result["type"], result["confidence"]
|
||||
|
||||
return "unknown", 0.0
|
||||
|
||||
def classify_role(self,
|
||||
element_image: Image.Image,
|
||||
element_type: str,
|
||||
context: Optional[Dict] = None) -> Tuple[str, float]:
|
||||
"""
|
||||
Classifier le rôle sémantique d'un élément
|
||||
|
||||
Args:
|
||||
element_image: Image de l'élément
|
||||
element_type: Type de l'élément
|
||||
context: Contexte additionnel
|
||||
|
||||
Returns:
|
||||
(role, confidence)
|
||||
"""
|
||||
if self.vlm_client is None:
|
||||
# Simulation
|
||||
roles = ["primary_action", "cancel", "submit", "form_input",
|
||||
"search_field", "navigation", "settings", "close"]
|
||||
return np.random.choice(roles), np.random.uniform(0.7, 0.95)
|
||||
|
||||
# Vraie classification avec VLM
|
||||
result = self.vlm_client.classify_element_role(
|
||||
element_image,
|
||||
element_type,
|
||||
context
|
||||
)
|
||||
|
||||
if result["success"]:
|
||||
return result["role"], result["confidence"]
|
||||
|
||||
return "unknown", 0.0
|
||||
|
||||
def extract_visual_features(self,
|
||||
element_image: Image.Image) -> VisualFeatures:
|
||||
"""
|
||||
Extraire les features visuelles d'un élément
|
||||
|
||||
Args:
|
||||
element_image: Image de l'élément
|
||||
|
||||
Returns:
|
||||
VisualFeatures
|
||||
"""
|
||||
# Calculer couleur dominante
|
||||
img_array = np.array(element_image)
|
||||
if len(img_array.shape) == 3:
|
||||
# Moyenne des couleurs
|
||||
dominant_color = tuple(img_array.mean(axis=(0, 1)).astype(int).tolist())
|
||||
else:
|
||||
dominant_color = (128, 128, 128)
|
||||
|
||||
# Déterminer forme (simplifié)
|
||||
width, height = element_image.size
|
||||
aspect_ratio = width / height if height > 0 else 1.0
|
||||
|
||||
if aspect_ratio > 3:
|
||||
shape = "horizontal_bar"
|
||||
elif aspect_ratio < 0.33:
|
||||
shape = "vertical_bar"
|
||||
elif 0.8 <= aspect_ratio <= 1.2:
|
||||
shape = "square"
|
||||
else:
|
||||
shape = "rectangle"
|
||||
|
||||
# Catégorie de taille
|
||||
area = width * height
|
||||
if area < 1000:
|
||||
size_category = "small"
|
||||
elif area < 10000:
|
||||
size_category = "medium"
|
||||
else:
|
||||
size_category = "large"
|
||||
|
||||
# Détection d'icône (simplifié)
|
||||
has_icon = width < 100 and height < 100 and 0.8 <= aspect_ratio <= 1.2
|
||||
|
||||
return VisualFeatures(
|
||||
dominant_color=dominant_color,
|
||||
has_icon=has_icon,
|
||||
shape=shape,
|
||||
size_category=size_category
|
||||
)
|
||||
|
||||
def generate_embeddings(self,
|
||||
element_image: Image.Image,
|
||||
element_label: str,
|
||||
embedder: Optional[Any] = None) -> Optional[UIElementEmbeddings]:
|
||||
"""
|
||||
Générer embeddings duaux (image + texte) pour un élément
|
||||
|
||||
Args:
|
||||
element_image: Image de l'élément
|
||||
element_label: Label textuel de l'élément
|
||||
embedder: Embedder à utiliser (optionnel)
|
||||
|
||||
Returns:
|
||||
UIElementEmbeddings ou None
|
||||
"""
|
||||
if not self.config.use_embeddings or embedder is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Générer embedding image
|
||||
image_embedding_id = None
|
||||
if hasattr(embedder, 'embed_image'):
|
||||
# Sauvegarder temporairement l'image
|
||||
# TODO: Implémenter sauvegarde et embedding
|
||||
pass
|
||||
|
||||
# Générer embedding texte
|
||||
text_embedding_id = None
|
||||
if element_label and hasattr(embedder, 'embed_text'):
|
||||
# TODO: Implémenter embedding texte
|
||||
pass
|
||||
|
||||
if image_embedding_id or text_embedding_id:
|
||||
return UIElementEmbeddings(
|
||||
image_embedding_id=image_embedding_id,
|
||||
text_embedding_id=text_embedding_id,
|
||||
provider="openclip_ViT-B-32",
|
||||
dimensions=512
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Warning: Failed to generate embeddings: {e}")
|
||||
|
||||
return None
|
||||
|
||||
def set_vlm_client(self, client: Any) -> None:
|
||||
"""Définir le client VLM"""
|
||||
self.vlm_client = client
|
||||
|
||||
def get_config(self) -> DetectionConfig:
|
||||
"""Récupérer la configuration"""
|
||||
return self.config
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Fonctions utilitaires
|
||||
# ============================================================================
|
||||
|
||||
def create_detector(vlm_model: str = "qwen3-vl:8b",
|
||||
confidence_threshold: float = 0.7) -> UIDetector:
|
||||
"""
|
||||
Créer un UIDetector avec configuration personnalisée
|
||||
|
||||
Args:
|
||||
vlm_model: Modèle VLM à utiliser
|
||||
confidence_threshold: Seuil de confiance
|
||||
|
||||
Returns:
|
||||
UIDetector configuré
|
||||
"""
|
||||
config = DetectionConfig(
|
||||
vlm_model=vlm_model,
|
||||
confidence_threshold=confidence_threshold
|
||||
)
|
||||
return UIDetector(config)
|
||||
369
core/workflow/execution_compiler.py
Normal file
369
core/workflow/execution_compiler.py
Normal file
@@ -0,0 +1,369 @@
|
||||
# core/workflow/execution_compiler.py
|
||||
"""
|
||||
ExecutionCompiler — Compile un WorkflowIR en ExecutionPlan.
|
||||
|
||||
Pièce maîtresse de l'architecture V4.
|
||||
"Le LLM prépare et compile. Le runtime exécute."
|
||||
|
||||
Le compilateur :
|
||||
1. Prend chaque étape du WorkflowIR
|
||||
2. Compile une stratégie de résolution pour chaque action (OCR > template > VLM)
|
||||
3. Définit les timeouts, retries, fallbacks et recovery
|
||||
4. Produit un ExecutionPlan déterministe et borné
|
||||
|
||||
L'objectif : zéro VLM au runtime pour les cas normaux.
|
||||
Le VLM est un exception handler, pas le chemin principal.
|
||||
|
||||
Le compilateur utilise :
|
||||
- Les données de l'enregistrement (crops, textes OCR) pour pré-compiler
|
||||
- L'historique d'apprentissage (ReplayLearner) pour choisir la meilleure stratégie
|
||||
- Le contexte métier (DomainContext) pour adapter les paramètres
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from .workflow_ir import WorkflowIR, Step, Action
|
||||
from .execution_plan import (
|
||||
ExecutionPlan, ExecutionNode, ResolutionStrategy, SuccessCondition,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Temps estimé par type d'action (ms)
|
||||
_ACTION_TIME_ESTIMATES = {
|
||||
"click": 200, # OCR lookup + clic
|
||||
"type": 500, # Frappe char-by-char
|
||||
"key_combo": 100,
|
||||
"wait": 0, # Le duration_ms est dans l'action
|
||||
"scroll": 200,
|
||||
}
|
||||
|
||||
|
||||
class ExecutionCompiler:
|
||||
"""Compile un WorkflowIR en ExecutionPlan.
|
||||
|
||||
Usage :
|
||||
compiler = ExecutionCompiler()
|
||||
plan = compiler.compile(workflow_ir, target_machine="VM_Win11")
|
||||
plan.save("data/plans/")
|
||||
"""
|
||||
|
||||
def __init__(self, learning_dir: str = ""):
|
||||
self._learning_dir = learning_dir or "data/learning/replay_results"
|
||||
|
||||
def compile(
|
||||
self,
|
||||
ir: WorkflowIR,
|
||||
target_machine: str = "",
|
||||
target_resolution: str = "1280x800",
|
||||
params: Optional[Dict[str, str]] = None,
|
||||
surface_profile=None,
|
||||
) -> ExecutionPlan:
|
||||
"""Compiler un WorkflowIR en ExecutionPlan.
|
||||
|
||||
Args:
|
||||
ir: Le WorkflowIR à compiler
|
||||
target_machine: Machine cible (pour adapter les stratégies)
|
||||
target_resolution: Résolution de la machine cible
|
||||
params: Variables à substituer
|
||||
surface_profile: SurfaceProfile optionnel pour adapter les paramètres.
|
||||
Si fourni, timeouts/seuils/retries sont tirés du profil.
|
||||
"""
|
||||
t_start = time.time()
|
||||
|
||||
plan = ExecutionPlan(
|
||||
plan_id=f"plan_{uuid.uuid4().hex[:8]}",
|
||||
workflow_id=ir.workflow_id,
|
||||
version=ir.version,
|
||||
created_at=time.time(),
|
||||
domain=ir.domain,
|
||||
target_machine=target_machine,
|
||||
target_resolution=target_resolution,
|
||||
variables=params or {v.name: v.default for v in ir.variables},
|
||||
)
|
||||
|
||||
# Consulter l'historique d'apprentissage
|
||||
learned_strategies = self._load_learned_strategies()
|
||||
|
||||
# Compiler chaque étape
|
||||
for step in ir.steps:
|
||||
nodes = self._compile_step(step, ir, learned_strategies, surface_profile)
|
||||
plan.nodes.extend(nodes)
|
||||
|
||||
# Statistiques de compilation
|
||||
plan.total_nodes = len(plan.nodes)
|
||||
plan.nodes_with_ocr = sum(
|
||||
1 for n in plan.nodes
|
||||
if n.strategy_primary and n.strategy_primary.method == "ocr"
|
||||
)
|
||||
plan.nodes_with_template = sum(
|
||||
1 for n in plan.nodes
|
||||
if n.strategy_primary and n.strategy_primary.method == "template"
|
||||
)
|
||||
plan.nodes_with_vlm = sum(
|
||||
1 for n in plan.nodes
|
||||
if n.strategy_primary and n.strategy_primary.method == "vlm"
|
||||
)
|
||||
plan.estimated_duration_s = sum(
|
||||
_ACTION_TIME_ESTIMATES.get(n.action_type, 200) + n.duration_ms
|
||||
for n in plan.nodes
|
||||
) / 1000.0
|
||||
|
||||
elapsed = time.time() - t_start
|
||||
logger.info(
|
||||
f"Compilation: {plan.total_nodes} nœuds en {elapsed:.1f}s — "
|
||||
f"OCR={plan.nodes_with_ocr}, template={plan.nodes_with_template}, "
|
||||
f"VLM={plan.nodes_with_vlm} (exception handler)"
|
||||
)
|
||||
|
||||
return plan
|
||||
|
||||
def _compile_step(
|
||||
self,
|
||||
step: Step,
|
||||
ir: WorkflowIR,
|
||||
learned: Dict[str, str],
|
||||
surface_profile=None,
|
||||
) -> List[ExecutionNode]:
|
||||
"""Compiler une étape en nœuds d'exécution."""
|
||||
nodes = []
|
||||
|
||||
for i, action in enumerate(step.actions):
|
||||
node = self._compile_action(
|
||||
action=action,
|
||||
step=step,
|
||||
action_index=i,
|
||||
ir=ir,
|
||||
learned=learned,
|
||||
surface_profile=surface_profile,
|
||||
)
|
||||
nodes.append(node)
|
||||
|
||||
return nodes
|
||||
|
||||
def _compile_action(
|
||||
self,
|
||||
action: Action,
|
||||
step: Step,
|
||||
action_index: int,
|
||||
ir: WorkflowIR,
|
||||
learned: Dict[str, str],
|
||||
surface_profile=None,
|
||||
) -> ExecutionNode:
|
||||
"""Compiler une action en nœud d'exécution avec stratégie de résolution."""
|
||||
|
||||
node = ExecutionNode(
|
||||
node_id=f"n_{step.step_id}_{action_index}",
|
||||
action_type=action.type,
|
||||
intent=step.intent,
|
||||
step_id=step.step_id,
|
||||
is_optional=step.is_optional,
|
||||
)
|
||||
|
||||
# Paramètres par défaut, surchargés par le surface_profile si fourni
|
||||
default_click_timeout = 10000
|
||||
default_click_retries = 2
|
||||
if surface_profile is not None:
|
||||
default_click_timeout = getattr(surface_profile, "timeout_click_ms", 10000)
|
||||
default_click_retries = getattr(surface_profile, "max_retries", 2)
|
||||
|
||||
if action.type == "click":
|
||||
# Compiler les stratégies de résolution pour ce clic
|
||||
node.strategy_primary, node.strategy_fallbacks = self._compile_click_resolution(
|
||||
action, step, learned, surface_profile,
|
||||
)
|
||||
node.timeout_ms = default_click_timeout
|
||||
node.max_retries = default_click_retries
|
||||
node.recovery_action = "escape"
|
||||
|
||||
# Condition de succès STRICTE basée sur le titre de fenêtre attendu.
|
||||
# Si expected_window_after est défini, on fait du title_match (strict).
|
||||
# Sinon on retombe sur screen_changed (faible).
|
||||
expected_after = getattr(action, "expected_window_after", "")
|
||||
if expected_after and expected_after != "unknown_window":
|
||||
node.success_condition = SuccessCondition(
|
||||
method="title_match",
|
||||
expected_title=expected_after,
|
||||
description=step.postcondition or f"Fenêtre attendue: {expected_after}",
|
||||
)
|
||||
elif step.postcondition:
|
||||
node.success_condition = SuccessCondition(
|
||||
method="screen_changed",
|
||||
description=step.postcondition,
|
||||
)
|
||||
|
||||
# Pré-condition stricte : la fenêtre AVANT le clic doit matcher
|
||||
# Stockée en tant que champ dédié sur le nœud pour l'exécuteur
|
||||
expected_before = getattr(action, "expected_window_before", "")
|
||||
if expected_before and expected_before != "unknown_window":
|
||||
# On l'injecte dans la condition de succès (cas "avant")
|
||||
# Le nœud portera les deux via des champs séparés
|
||||
node.expected_window_before = expected_before
|
||||
|
||||
elif action.type == "type":
|
||||
node.text = action.text
|
||||
node.variable_name = action.text.strip("{}") if action.variable else ""
|
||||
node.timeout_ms = 5000
|
||||
node.max_retries = 0 # Pas de retry sur la frappe
|
||||
node.recovery_action = "undo"
|
||||
|
||||
elif action.type == "key_combo":
|
||||
node.keys = action.keys
|
||||
node.timeout_ms = 3000
|
||||
node.max_retries = 0
|
||||
node.recovery_action = "undo"
|
||||
|
||||
elif action.type == "wait":
|
||||
node.duration_ms = action.duration_ms or 1000
|
||||
node.timeout_ms = action.duration_ms + 2000
|
||||
node.max_retries = 0
|
||||
node.recovery_action = "none"
|
||||
|
||||
elif action.type == "scroll":
|
||||
node.timeout_ms = 3000
|
||||
node.max_retries = 0
|
||||
node.recovery_action = "none"
|
||||
|
||||
return node
|
||||
|
||||
def _compile_click_resolution(
|
||||
self,
|
||||
action: Action,
|
||||
step: Step,
|
||||
learned: Dict[str, str],
|
||||
surface_profile=None,
|
||||
) -> tuple:
|
||||
"""Compiler les stratégies de résolution pour un clic.
|
||||
|
||||
Utilise les données d'enrichissement visuel (action._enrichment) si
|
||||
disponibles :
|
||||
- by_text (OCR)
|
||||
- anchor_image_base64 (template)
|
||||
- vlm_description (VLM)
|
||||
- uia_snapshot (UIA sur Windows natif)
|
||||
|
||||
Ordre de priorité (variable selon la surface) :
|
||||
1. UIA (si snapshot dispo ET surface native ET helper dispo) — 10-20ms
|
||||
2. OCR exact (si texte visible) — 100-200ms
|
||||
3. Template matching (si crop) — 10ms
|
||||
4. VLM — exception handler
|
||||
|
||||
Le learning peut réordonner si une stratégie a mieux marché avant.
|
||||
"""
|
||||
primary = None
|
||||
fallbacks = []
|
||||
|
||||
# Lire l'enrichissement visuel si dispo
|
||||
enrichment = getattr(action, "_enrichment", None) or {}
|
||||
by_text_from_enrich = enrichment.get("by_text", "")
|
||||
anchor_b64 = enrichment.get("anchor_image_base64", "")
|
||||
vlm_desc_from_enrich = enrichment.get("vlm_description", "")
|
||||
window_title = enrichment.get("window_title", "")
|
||||
uia_snapshot = enrichment.get("uia_snapshot") or {}
|
||||
|
||||
# Source de texte : enrichissement > anchor_hint > target
|
||||
target_text = by_text_from_enrich or action.anchor_hint or action.target
|
||||
# Ne pas utiliser "unknown_window" comme texte OCR
|
||||
if target_text == "unknown_window":
|
||||
target_text = ""
|
||||
|
||||
learned_method = learned.get(target_text, "")
|
||||
|
||||
# Est-ce qu'on est sur une surface où UIA est activable ?
|
||||
uia_eligible = False
|
||||
if surface_profile is not None:
|
||||
from .surface_classifier import SurfaceType
|
||||
surface_type = getattr(surface_profile, "surface_type", None)
|
||||
uia_available = getattr(surface_profile, "uia_available", False)
|
||||
uia_eligible = (
|
||||
uia_available
|
||||
and surface_type == SurfaceType.WINDOWS_NATIVE
|
||||
)
|
||||
else:
|
||||
# Sans profil explicite, on active UIA si le snapshot est présent
|
||||
# (l'agent décidera au runtime s'il peut l'utiliser)
|
||||
uia_eligible = bool(uia_snapshot)
|
||||
|
||||
# Stratégie UIA — la plus rapide et la plus précise sur Windows natif
|
||||
if uia_snapshot and uia_snapshot.get("name") and uia_eligible:
|
||||
uia_strategy = ResolutionStrategy(
|
||||
method="uia",
|
||||
uia_name=uia_snapshot.get("name", ""),
|
||||
uia_control_type=uia_snapshot.get("control_type", ""),
|
||||
uia_automation_id=uia_snapshot.get("automation_id", ""),
|
||||
uia_parent_path=uia_snapshot.get("parent_path", []),
|
||||
threshold=0.95,
|
||||
)
|
||||
primary = uia_strategy
|
||||
|
||||
# Stratégie OCR — le texte visible est la meilleure ancre
|
||||
if target_text:
|
||||
ocr_strategy = ResolutionStrategy(
|
||||
method="ocr",
|
||||
target_text=target_text,
|
||||
threshold=0.7,
|
||||
)
|
||||
if primary is None and (
|
||||
not learned_method
|
||||
or learned_method in ("ocr", "som_text_match", "hybrid_text_direct", "v4_ocr")
|
||||
):
|
||||
primary = ocr_strategy
|
||||
else:
|
||||
fallbacks.append(ocr_strategy)
|
||||
|
||||
# Stratégie template — le crop visuel de l'enregistrement
|
||||
if anchor_b64:
|
||||
template_strategy = ResolutionStrategy(
|
||||
method="template",
|
||||
target_text=target_text,
|
||||
anchor_b64=anchor_b64,
|
||||
threshold=0.85,
|
||||
)
|
||||
if primary is None and learned_method in (
|
||||
"anchor_template", "template_matching", "v4_template"
|
||||
):
|
||||
primary = template_strategy
|
||||
else:
|
||||
fallbacks.append(template_strategy)
|
||||
|
||||
# Stratégie VLM — exception handler (dernier recours)
|
||||
vlm_description = vlm_desc_from_enrich or action.target or step.intent
|
||||
if vlm_description and vlm_description != "unknown_window":
|
||||
vlm_strategy = ResolutionStrategy(
|
||||
method="vlm",
|
||||
vlm_description=vlm_description,
|
||||
threshold=0.6,
|
||||
)
|
||||
fallbacks.append(vlm_strategy)
|
||||
|
||||
# Si aucune primaire trouvée, prendre le premier fallback
|
||||
if primary is None:
|
||||
if fallbacks:
|
||||
primary = fallbacks.pop(0)
|
||||
else:
|
||||
# Dernier recours : VLM avec l'intention métier
|
||||
primary = ResolutionStrategy(
|
||||
method="vlm",
|
||||
vlm_description=step.intent or "élément UI",
|
||||
threshold=0.5,
|
||||
)
|
||||
|
||||
return primary, fallbacks
|
||||
|
||||
def _load_learned_strategies(self) -> Dict[str, str]:
|
||||
"""Charger les stratégies apprises (ReplayLearner)."""
|
||||
try:
|
||||
from agent_v0.server_v1.replay_learner import ReplayLearner
|
||||
learner = ReplayLearner(learning_dir=self._learning_dir)
|
||||
# Construire un mapping target → best_method depuis l'historique
|
||||
strategies = {}
|
||||
for outcome in learner._recent:
|
||||
if outcome.success and outcome.resolution_method and outcome.target_description:
|
||||
strategies[outcome.target_description] = outcome.resolution_method
|
||||
return strategies
|
||||
except Exception:
|
||||
return {}
|
||||
285
core/workflow/execution_plan.py
Normal file
285
core/workflow/execution_plan.py
Normal file
@@ -0,0 +1,285 @@
|
||||
# core/workflow/execution_plan.py
|
||||
"""
|
||||
ExecutionPlan — Plan d'exécution strict, borné et versionné.
|
||||
|
||||
C'est ce que le runtime exécute. Pas d'improvisation — tout est pré-compilé :
|
||||
- chaque nœud a une stratégie de résolution primaire + fallbacks
|
||||
- chaque nœud a un timeout, un retry policy, une condition de succès
|
||||
- le VLM n'intervient qu'en exception handler (pas en chemin principal)
|
||||
|
||||
Le runtime ne fait que : exécuter → observer → vérifier → suite ou fallback.
|
||||
|
||||
Cycle : WorkflowIR → ExecutionCompiler → ExecutionPlan → Runtime
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ResolutionStrategy:
|
||||
"""Stratégie de résolution visuelle pour un élément UI.
|
||||
|
||||
Pré-compilée — le runtime n'a pas besoin du VLM pour résoudre.
|
||||
"""
|
||||
method: str # "uia", "ocr", "template", "position", "vlm", "dom"
|
||||
target_text: str = "" # Texte à chercher (pour OCR)
|
||||
anchor_b64: str = "" # Crop de référence (pour template matching)
|
||||
zone: Dict[str, float] = field(default_factory=dict) # Zone de recherche {x_min, y_min, x_max, y_max}
|
||||
position_hint: str = "" # "en haut à droite", "dans la barre des tâches"
|
||||
vlm_description: str = "" # Description VLM (dernier recours)
|
||||
threshold: float = 0.8 # Seuil de confiance
|
||||
|
||||
# Stratégie UIA (Windows UI Automation)
|
||||
# Utilisée quand l'enregistrement a capturé un snapshot UIA au moment du clic.
|
||||
# Au replay, l'agent Windows appelle lea_uia.exe find --name ... pour retrouver
|
||||
# l'élément par son chemin logique (100% fiable sur Windows natif).
|
||||
uia_name: str = "" # Name property de l'élément
|
||||
uia_control_type: str = "" # ControlType (Button, Edit, MenuItem, ...)
|
||||
uia_automation_id: str = "" # AutomationId (optionnel)
|
||||
uia_parent_path: List[Dict[str, str]] = field(default_factory=list)
|
||||
|
||||
# Stratégie DOM (web avec CDP activé) — préparation pour plus tard
|
||||
dom_selector: str = "" # CSS selector
|
||||
dom_xpath: str = "" # XPath
|
||||
dom_url_pattern: str = "" # Pattern URL à matcher
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
d = {"method": self.method}
|
||||
if self.target_text:
|
||||
d["target_text"] = self.target_text
|
||||
if self.anchor_b64:
|
||||
d["anchor_b64"] = self.anchor_b64[:50] + "..." # Tronqué pour la lisibilité
|
||||
if self.zone:
|
||||
d["zone"] = self.zone
|
||||
if self.position_hint:
|
||||
d["position_hint"] = self.position_hint
|
||||
if self.vlm_description:
|
||||
d["vlm_description"] = self.vlm_description
|
||||
if self.uia_name:
|
||||
d["uia_name"] = self.uia_name
|
||||
if self.uia_control_type:
|
||||
d["uia_control_type"] = self.uia_control_type
|
||||
if self.uia_automation_id:
|
||||
d["uia_automation_id"] = self.uia_automation_id
|
||||
if self.uia_parent_path:
|
||||
d["uia_parent_path"] = self.uia_parent_path
|
||||
if self.dom_selector:
|
||||
d["dom_selector"] = self.dom_selector
|
||||
if self.dom_xpath:
|
||||
d["dom_xpath"] = self.dom_xpath
|
||||
if self.dom_url_pattern:
|
||||
d["dom_url_pattern"] = self.dom_url_pattern
|
||||
d["threshold"] = self.threshold
|
||||
return d
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict) -> "ResolutionStrategy":
|
||||
return cls(**{k: v for k, v in d.items() if k in cls.__dataclass_fields__})
|
||||
|
||||
|
||||
@dataclass
|
||||
class SuccessCondition:
|
||||
"""Condition de succès d'un nœud — comment vérifier que l'action a marché."""
|
||||
method: str = "screen_changed" # "screen_changed", "title_match", "text_visible", "none"
|
||||
expected_title: str = "" # Titre fenêtre attendu après l'action
|
||||
expected_text: str = "" # Texte qui doit apparaître
|
||||
description: str = "" # Description pour le Critic VLM (exception handler)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
d = {"method": self.method}
|
||||
if self.expected_title:
|
||||
d["expected_title"] = self.expected_title
|
||||
if self.expected_text:
|
||||
d["expected_text"] = self.expected_text
|
||||
if self.description:
|
||||
d["description"] = self.description
|
||||
return d
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict) -> "SuccessCondition":
|
||||
return cls(**{k: v for k, v in d.items() if k in cls.__dataclass_fields__})
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExecutionNode:
|
||||
"""Nœud d'exécution — une action à exécuter avec sa stratégie complète."""
|
||||
node_id: str
|
||||
action_type: str # click, type, key_combo, wait, scroll
|
||||
intent: str = "" # Intention métier (pour le logging/audit)
|
||||
|
||||
# Résolution visuelle pré-compilée
|
||||
strategy_primary: Optional[ResolutionStrategy] = None
|
||||
strategy_fallbacks: List[ResolutionStrategy] = field(default_factory=list)
|
||||
|
||||
# Données de l'action
|
||||
text: str = "" # Texte à taper
|
||||
keys: List[str] = field(default_factory=list)
|
||||
duration_ms: int = 0
|
||||
variable_name: str = "" # Si le texte est une variable
|
||||
|
||||
# Bornes d'exécution
|
||||
timeout_ms: int = 10000 # Timeout pour cette action
|
||||
max_retries: int = 1 # Nombre de retries autorisés
|
||||
retry_delay_ms: int = 2000 # Délai entre retries
|
||||
|
||||
# Vérification
|
||||
success_condition: Optional[SuccessCondition] = None
|
||||
|
||||
# Contrôle strict de fenêtre (pré-condition)
|
||||
expected_window_before: str = "" # La fenêtre active doit matcher AVANT l'action
|
||||
|
||||
# Recovery
|
||||
recovery_action: str = "escape" # "escape", "undo", "close", "none"
|
||||
|
||||
# Contexte
|
||||
step_id: str = "" # Référence vers l'étape WorkflowIR
|
||||
is_optional: bool = False
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
d = {
|
||||
"node_id": self.node_id,
|
||||
"action_type": self.action_type,
|
||||
}
|
||||
if self.intent:
|
||||
d["intent"] = self.intent
|
||||
if self.strategy_primary:
|
||||
d["strategy_primary"] = self.strategy_primary.to_dict()
|
||||
if self.strategy_fallbacks:
|
||||
d["strategy_fallbacks"] = [s.to_dict() for s in self.strategy_fallbacks]
|
||||
if self.text:
|
||||
d["text"] = self.text
|
||||
if self.keys:
|
||||
d["keys"] = self.keys
|
||||
if self.duration_ms:
|
||||
d["duration_ms"] = self.duration_ms
|
||||
if self.variable_name:
|
||||
d["variable_name"] = self.variable_name
|
||||
d["timeout_ms"] = self.timeout_ms
|
||||
d["max_retries"] = self.max_retries
|
||||
if self.success_condition:
|
||||
d["success_condition"] = self.success_condition.to_dict()
|
||||
if self.expected_window_before:
|
||||
d["expected_window_before"] = self.expected_window_before
|
||||
d["recovery_action"] = self.recovery_action
|
||||
if self.is_optional:
|
||||
d["is_optional"] = True
|
||||
return d
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict) -> "ExecutionNode":
|
||||
primary = ResolutionStrategy.from_dict(d["strategy_primary"]) if d.get("strategy_primary") else None
|
||||
fallbacks = [ResolutionStrategy.from_dict(f) for f in d.get("strategy_fallbacks", [])]
|
||||
success = SuccessCondition.from_dict(d["success_condition"]) if d.get("success_condition") else None
|
||||
return cls(
|
||||
node_id=d["node_id"],
|
||||
action_type=d["action_type"],
|
||||
intent=d.get("intent", ""),
|
||||
strategy_primary=primary,
|
||||
strategy_fallbacks=fallbacks,
|
||||
text=d.get("text", ""),
|
||||
keys=d.get("keys", []),
|
||||
duration_ms=d.get("duration_ms", 0),
|
||||
variable_name=d.get("variable_name", ""),
|
||||
timeout_ms=d.get("timeout_ms", 10000),
|
||||
max_retries=d.get("max_retries", 1),
|
||||
retry_delay_ms=d.get("retry_delay_ms", 2000),
|
||||
success_condition=success,
|
||||
expected_window_before=d.get("expected_window_before", ""),
|
||||
recovery_action=d.get("recovery_action", "escape"),
|
||||
step_id=d.get("step_id", ""),
|
||||
is_optional=d.get("is_optional", False),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExecutionPlan:
|
||||
"""Plan d'exécution versionné — ce que le runtime exécute."""
|
||||
plan_id: str
|
||||
workflow_id: str # Référence vers le WorkflowIR source
|
||||
version: int = 1
|
||||
created_at: float = 0.0
|
||||
|
||||
# Nœuds d'exécution (séquence ordonnée)
|
||||
nodes: List[ExecutionNode] = field(default_factory=list)
|
||||
|
||||
# Variables à substituer avant exécution
|
||||
variables: Dict[str, str] = field(default_factory=dict)
|
||||
|
||||
# Configuration globale
|
||||
domain: str = "generic"
|
||||
target_machine: str = "" # Machine cible
|
||||
target_resolution: str = "" # "1280x800", "1920x1080"
|
||||
|
||||
# Métriques de compilation
|
||||
total_nodes: int = 0
|
||||
nodes_with_ocr: int = 0 # Résolution OCR (rapide, précis)
|
||||
nodes_with_template: int = 0 # Résolution template (rapide)
|
||||
nodes_with_vlm: int = 0 # Résolution VLM (lent, dernier recours)
|
||||
estimated_duration_s: float = 0.0
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"plan_id": self.plan_id,
|
||||
"workflow_id": self.workflow_id,
|
||||
"version": self.version,
|
||||
"created_at": self.created_at,
|
||||
"domain": self.domain,
|
||||
"target_machine": self.target_machine,
|
||||
"target_resolution": self.target_resolution,
|
||||
"variables": self.variables,
|
||||
"nodes": [n.to_dict() for n in self.nodes],
|
||||
"stats": {
|
||||
"total_nodes": self.total_nodes,
|
||||
"nodes_with_ocr": self.nodes_with_ocr,
|
||||
"nodes_with_template": self.nodes_with_template,
|
||||
"nodes_with_vlm": self.nodes_with_vlm,
|
||||
"estimated_duration_s": round(self.estimated_duration_s, 1),
|
||||
},
|
||||
}
|
||||
|
||||
def to_json(self, indent: int = 2) -> str:
|
||||
return json.dumps(self.to_dict(), ensure_ascii=False, indent=indent)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict) -> "ExecutionPlan":
|
||||
nodes = [ExecutionNode.from_dict(n) for n in d.get("nodes", [])]
|
||||
stats = d.get("stats", {})
|
||||
return cls(
|
||||
plan_id=d["plan_id"],
|
||||
workflow_id=d.get("workflow_id", ""),
|
||||
version=d.get("version", 1),
|
||||
created_at=d.get("created_at", 0),
|
||||
domain=d.get("domain", "generic"),
|
||||
target_machine=d.get("target_machine", ""),
|
||||
target_resolution=d.get("target_resolution", ""),
|
||||
variables=d.get("variables", {}),
|
||||
nodes=nodes,
|
||||
total_nodes=stats.get("total_nodes", len(nodes)),
|
||||
nodes_with_ocr=stats.get("nodes_with_ocr", 0),
|
||||
nodes_with_template=stats.get("nodes_with_template", 0),
|
||||
nodes_with_vlm=stats.get("nodes_with_vlm", 0),
|
||||
estimated_duration_s=stats.get("estimated_duration_s", 0),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, json_str: str) -> "ExecutionPlan":
|
||||
return cls.from_dict(json.loads(json_str))
|
||||
|
||||
def save(self, directory: str) -> Path:
|
||||
dir_path = Path(directory)
|
||||
dir_path.mkdir(parents=True, exist_ok=True)
|
||||
file_path = dir_path / f"{self.plan_id}.json"
|
||||
file_path.write_text(self.to_json(), encoding="utf-8")
|
||||
return file_path
|
||||
|
||||
@classmethod
|
||||
def load(cls, file_path: str) -> "ExecutionPlan":
|
||||
return cls.from_json(Path(file_path).read_text(encoding="utf-8"))
|
||||
627
core/workflow/ir_builder.py
Normal file
627
core/workflow/ir_builder.py
Normal file
@@ -0,0 +1,627 @@
|
||||
# core/workflow/ir_builder.py
|
||||
"""
|
||||
IRBuilder — Transforme une RawTrace en WorkflowIR.
|
||||
|
||||
C'est le "compilateur de savoir-faire" :
|
||||
RawTrace (clics bruts) → WorkflowIR (connaissance structurée)
|
||||
|
||||
Le builder utilise gemma4 pour COMPRENDRE ce que l'utilisateur a fait :
|
||||
- Segmenter les actions en étapes logiques
|
||||
- Identifier l'intention de chaque étape
|
||||
- Détecter les variables (données qui changent entre les exécutions)
|
||||
- Définir les pré/postconditions
|
||||
|
||||
Le builder est appelé UNE SEULE FOIS après l'enregistrement.
|
||||
Le WorkflowIR produit est ensuite réutilisé pour chaque replay.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from .workflow_ir import WorkflowIR, Step, Action, Variable
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class IRBuilder:
|
||||
"""Construit un WorkflowIR depuis une RawTrace (événements bruts).
|
||||
|
||||
Usage :
|
||||
builder = IRBuilder()
|
||||
ir = builder.build(
|
||||
events=raw_events,
|
||||
session_id="sess_xxx",
|
||||
domain="tim_codage",
|
||||
)
|
||||
ir.save("data/workflows/")
|
||||
"""
|
||||
|
||||
def __init__(self, gemma4_port: str = ""):
|
||||
self._gemma4_port = gemma4_port or os.environ.get("GEMMA4_PORT", "11435")
|
||||
self._gemma4_url = f"http://localhost:{self._gemma4_port}/api/chat"
|
||||
|
||||
def build(
|
||||
self,
|
||||
events: List[Dict[str, Any]],
|
||||
session_id: str = "",
|
||||
session_dir: str = "",
|
||||
domain: str = "generic",
|
||||
name: str = "",
|
||||
) -> WorkflowIR:
|
||||
"""Construire un WorkflowIR depuis des événements bruts.
|
||||
|
||||
Étapes :
|
||||
1. Filtrer les événements parasites
|
||||
2. Segmenter en étapes logiques (par changement de fenêtre/intention)
|
||||
3. Pour chaque étape, identifier l'intention via gemma4
|
||||
4. Détecter les variables
|
||||
5. Définir pré/postconditions
|
||||
"""
|
||||
t_start = time.time()
|
||||
|
||||
# Résoudre le session_dir_path pour l'enrichissement visuel
|
||||
session_dir_path = Path(session_dir) if session_dir else None
|
||||
if session_dir_path and not session_dir_path.is_dir():
|
||||
logger.warning(
|
||||
f"IRBuilder: session_dir '{session_dir}' introuvable — "
|
||||
f"enrichissement visuel désactivé"
|
||||
)
|
||||
session_dir_path = None
|
||||
|
||||
# Créer le WorkflowIR vide
|
||||
ir = WorkflowIR.new(
|
||||
name=name or f"Workflow du {time.strftime('%d/%m/%Y %H:%M')}",
|
||||
domain=domain,
|
||||
learned_from=session_id,
|
||||
)
|
||||
|
||||
# 1. Filtrer les événements utiles
|
||||
actionable = self._filter_events(events)
|
||||
if not actionable:
|
||||
logger.warning("IRBuilder: aucun événement actionable")
|
||||
return ir
|
||||
|
||||
# 2. Détecter les applications utilisées
|
||||
ir.applications = self._detect_applications(actionable)
|
||||
|
||||
# 3. Segmenter en étapes logiques
|
||||
segments = self._segment_into_steps(actionable)
|
||||
|
||||
# 4. Pour chaque segment, construire une Step
|
||||
for i, segment in enumerate(segments):
|
||||
step = self._build_step(
|
||||
segment=segment,
|
||||
step_index=i,
|
||||
total_steps=len(segments),
|
||||
workflow_name=ir.name,
|
||||
domain=domain,
|
||||
session_dir_path=session_dir_path,
|
||||
)
|
||||
ir.steps.append(step)
|
||||
|
||||
# 5. Contrôle strict : remplir expected_window_before/after pour chaque action
|
||||
# C'est la clé de la robustesse : chaque action sait dans quelle fenêtre
|
||||
# elle doit s'exécuter ET dans quelle fenêtre elle doit aboutir.
|
||||
self._attach_window_expectations(ir, actionable)
|
||||
|
||||
# 6. Détecter les variables
|
||||
ir.variables = self._detect_variables(ir.steps, actionable)
|
||||
|
||||
elapsed = time.time() - t_start
|
||||
logger.info(
|
||||
f"IRBuilder: WorkflowIR construit en {elapsed:.1f}s — "
|
||||
f"{len(ir.steps)} étapes, {len(ir.variables)} variables, "
|
||||
f"{len(ir.applications)} applications"
|
||||
)
|
||||
|
||||
return ir
|
||||
|
||||
def _filter_events(self, events: List[Dict]) -> List[Dict]:
|
||||
"""Filtrer les événements parasites.
|
||||
|
||||
Exclusions :
|
||||
1. Types d'événements de bruit (heartbeat, focus_change, action_result)
|
||||
2. Clics dont la CIBLE UIA est dans Léa elle-même
|
||||
(via uia_snapshot.parent_path — on vérifie où va le clic, pas d'où
|
||||
il vient). Un clic "sur la taskbar" peut avoir window.title="Léa"
|
||||
si Léa avait le focus, mais sa cible UIA est la taskbar.
|
||||
"""
|
||||
ignored_types = {"heartbeat", "focus_change", "action_result", "window_focus_change"}
|
||||
lea_markers = (
|
||||
"léa", "lea -", "léa -", "lea —", "léa —",
|
||||
"lea assistante", "léa assistante",
|
||||
"agent v1",
|
||||
)
|
||||
|
||||
def _uia_target_is_lea(uia_snapshot: dict) -> bool:
|
||||
"""L'élément UIA cliqué est-il dans la fenêtre de Léa ?"""
|
||||
if not uia_snapshot:
|
||||
return False
|
||||
# Vérifier le nom de l'élément lui-même
|
||||
name = (uia_snapshot.get("name", "") or "").lower()
|
||||
if any(m in name for m in lea_markers):
|
||||
return True
|
||||
# Vérifier les parents
|
||||
for parent in uia_snapshot.get("parent_path", []):
|
||||
p_name = (parent.get("name", "") or "").lower()
|
||||
if any(m in p_name for m in lea_markers):
|
||||
return True
|
||||
return False
|
||||
|
||||
result = []
|
||||
filtered_lea = 0
|
||||
for raw_evt in events:
|
||||
evt = raw_evt.get("event", raw_evt)
|
||||
evt_type = evt.get("type", "")
|
||||
if evt_type in ignored_types:
|
||||
continue
|
||||
|
||||
# Filtrer uniquement les clics dont la CIBLE est dans Léa
|
||||
# (pas les clics depuis Léa vers l'extérieur)
|
||||
if evt_type == "mouse_click":
|
||||
uia = evt.get("uia_snapshot") or {}
|
||||
if _uia_target_is_lea(uia):
|
||||
filtered_lea += 1
|
||||
continue
|
||||
|
||||
result.append(evt)
|
||||
|
||||
if filtered_lea > 0:
|
||||
logger.info(
|
||||
f"IRBuilder: {filtered_lea} clic(s) filtré(s) "
|
||||
f"(cible UIA dans la fenêtre Léa)"
|
||||
)
|
||||
return result
|
||||
|
||||
def _attach_window_expectations(self, ir: WorkflowIR, events: List[Dict]) -> None:
|
||||
"""Remplir expected_window_before/after pour chaque action du workflow.
|
||||
|
||||
C'est LA clé du contrôle strict : chaque action connaît la fenêtre
|
||||
dans laquelle elle doit s'exécuter ET celle qui doit apparaître
|
||||
après. Toute divergence au replay → STOP immédiat.
|
||||
|
||||
On reconstruit la séquence d'événements "actionables" (clicks, type,
|
||||
key_combo) et on aligne chaque Action du workflow sur son événement
|
||||
source pour récupérer :
|
||||
- expected_window_before : titre de la fenêtre AU MOMENT du clic
|
||||
- expected_window_after : titre de la fenêtre du PROCHAIN click
|
||||
|
||||
Filtre critique : la fenêtre de Léa elle-même n'est JAMAIS une
|
||||
fenêtre cible valide (c'est l'overlay agent, pas l'app métier).
|
||||
Les fenêtres "unknown_window" et les titres vides sont ignorés.
|
||||
"""
|
||||
def _is_valid_target_window(title: str) -> bool:
|
||||
"""Un titre de fenêtre est valide comme expected_window_* si :
|
||||
- non vide, non "unknown_window"
|
||||
- pas la fenêtre de Léa elle-même
|
||||
"""
|
||||
if not title or title == "unknown_window":
|
||||
return False
|
||||
title_lower = title.lower()
|
||||
lea_markers = (
|
||||
"léa", "lea -", "léa -", "lea —", "léa —",
|
||||
"lea assistante", "léa assistante",
|
||||
"agent v1",
|
||||
)
|
||||
for marker in lea_markers:
|
||||
if marker in title_lower:
|
||||
return False
|
||||
return True
|
||||
|
||||
def _extract_uia_root_window(uia_snapshot: dict) -> str:
|
||||
"""Extraire le nom de la fenêtre racine depuis un snapshot UIA.
|
||||
|
||||
Le parent_path contient la hiérarchie de l'élément cliqué.
|
||||
La première entrée avec control_type="fenêtre" est la fenêtre
|
||||
qui CONTIENT l'élément cliqué — c'est la vraie cible.
|
||||
"""
|
||||
if not uia_snapshot:
|
||||
return ""
|
||||
for parent in uia_snapshot.get("parent_path", []):
|
||||
ct = (parent.get("control_type", "") or "").lower()
|
||||
if ct in ("fenêtre", "window"):
|
||||
name = (parent.get("name", "") or "").strip()
|
||||
if name:
|
||||
return name
|
||||
return ""
|
||||
|
||||
# Extraire la séquence des événements actionables avec leurs titres
|
||||
# Source de vérité pour les clics : parent_path UIA (où va vraiment
|
||||
# le clic), sinon window.title (fallback).
|
||||
# Pour les type/key_combo : window.title uniquement.
|
||||
event_sequence: List[Dict[str, Any]] = []
|
||||
for evt in events:
|
||||
t = evt.get("type", "")
|
||||
if t not in ("mouse_click", "text_input", "key_combo", "key_press", "scroll"):
|
||||
continue
|
||||
|
||||
# Titre de référence : priorité à la cible UIA pour les clics
|
||||
effective_title = ""
|
||||
if t == "mouse_click":
|
||||
uia = evt.get("uia_snapshot") or {}
|
||||
uia_root = _extract_uia_root_window(uia)
|
||||
if uia_root and _is_valid_target_window(uia_root):
|
||||
effective_title = uia_root
|
||||
|
||||
# Fallback sur window.title
|
||||
if not effective_title:
|
||||
raw_title = evt.get("window", {}).get("title", "") or ""
|
||||
if _is_valid_target_window(raw_title):
|
||||
effective_title = raw_title
|
||||
|
||||
event_sequence.append({"type": t, "title": effective_title})
|
||||
|
||||
# Aligner avec les actions du workflow
|
||||
flat_actions: List[tuple] = []
|
||||
for si, step in enumerate(ir.steps):
|
||||
for ai, action in enumerate(step.actions):
|
||||
if action.type in ("click", "type", "key_combo"):
|
||||
flat_actions.append((si, ai, action))
|
||||
|
||||
# Limite : on prend le min entre les 2 listes
|
||||
n = min(len(flat_actions), len(event_sequence))
|
||||
|
||||
for i in range(n):
|
||||
si, ai, action = flat_actions[i]
|
||||
title_now = event_sequence[i]["title"]
|
||||
if title_now:
|
||||
action.expected_window_before = title_now
|
||||
|
||||
# Chercher le prochain événement avec un titre valide
|
||||
# Et qui est DIFFÉRENT du titre actuel (sinon pas de transition à vérifier)
|
||||
for j in range(i + 1, len(event_sequence)):
|
||||
next_title = event_sequence[j]["title"]
|
||||
if next_title and next_title != title_now:
|
||||
action.expected_window_after = next_title
|
||||
break
|
||||
|
||||
def _detect_applications(self, events: List[Dict]) -> List[str]:
|
||||
"""Détecter les applications utilisées."""
|
||||
apps = set()
|
||||
for evt in events:
|
||||
title = evt.get("window", {}).get("title", "")
|
||||
if title and title != "unknown_window":
|
||||
for sep in [" – ", " - ", " — "]:
|
||||
if sep in title:
|
||||
apps.add(title.split(sep)[-1].strip())
|
||||
break
|
||||
return sorted(apps)
|
||||
|
||||
def _segment_into_steps(self, events: List[Dict]) -> List[List[Dict]]:
|
||||
"""Segmenter les événements en étapes logiques.
|
||||
|
||||
Critères de coupure :
|
||||
- Changement d'application (fenêtre différente)
|
||||
- Pause longue (> 5s entre deux événements)
|
||||
- Transition logique (clic → frappe → clic = étapes différentes)
|
||||
"""
|
||||
if not events:
|
||||
return []
|
||||
|
||||
segments = []
|
||||
current_segment = [events[0]]
|
||||
current_app = self._get_app_name(events[0])
|
||||
|
||||
for evt in events[1:]:
|
||||
app = self._get_app_name(evt)
|
||||
evt_type = evt.get("type", "")
|
||||
|
||||
# Coupure par changement d'application
|
||||
app_changed = app and current_app and app != current_app
|
||||
|
||||
# Coupure par pause longue
|
||||
prev_ts = float(current_segment[-1].get("timestamp", 0))
|
||||
curr_ts = float(evt.get("timestamp", 0))
|
||||
long_pause = (curr_ts - prev_ts) > 5.0 if prev_ts > 0 and curr_ts > 0 else False
|
||||
|
||||
# Coupure par transition clic → nouveau clic (nouvelle intention)
|
||||
transition = (
|
||||
evt_type == "mouse_click"
|
||||
and len(current_segment) >= 2
|
||||
and current_segment[-1].get("type") not in ("mouse_click",)
|
||||
)
|
||||
|
||||
if app_changed or long_pause:
|
||||
if current_segment:
|
||||
segments.append(current_segment)
|
||||
current_segment = [evt]
|
||||
current_app = app
|
||||
else:
|
||||
current_segment.append(evt)
|
||||
|
||||
if current_segment:
|
||||
segments.append(current_segment)
|
||||
|
||||
return segments
|
||||
|
||||
def _get_app_name(self, evt: Dict) -> str:
|
||||
"""Extraire le nom d'application depuis un événement."""
|
||||
title = evt.get("window", {}).get("title", "")
|
||||
for sep in [" – ", " - ", " — "]:
|
||||
if sep in title:
|
||||
return title.split(sep)[-1].strip()
|
||||
return title
|
||||
|
||||
def _build_step(
|
||||
self,
|
||||
segment: List[Dict],
|
||||
step_index: int,
|
||||
total_steps: int,
|
||||
workflow_name: str,
|
||||
domain: str,
|
||||
session_dir_path: Optional[Path] = None,
|
||||
) -> Step:
|
||||
"""Construire une Step depuis un segment d'événements.
|
||||
|
||||
Utilise gemma4 pour comprendre l'intention du segment.
|
||||
"""
|
||||
# Construire la description du segment pour gemma4
|
||||
actions = []
|
||||
for evt in segment:
|
||||
action = self._event_to_action(evt, session_dir_path=session_dir_path)
|
||||
if action:
|
||||
actions.append(action)
|
||||
|
||||
# Description textuelle du segment
|
||||
segment_desc = self._describe_segment(segment)
|
||||
|
||||
# Demander à gemma4 l'intention
|
||||
intent, precondition, postcondition = self._analyze_intent(
|
||||
segment_desc, step_index, total_steps, workflow_name, domain,
|
||||
)
|
||||
|
||||
return Step(
|
||||
step_id=f"s{step_index + 1}",
|
||||
intent=intent or segment_desc,
|
||||
precondition=precondition,
|
||||
postcondition=postcondition,
|
||||
actions=actions,
|
||||
)
|
||||
|
||||
def _event_to_action(self, evt: Dict, session_dir_path: Optional[Path] = None) -> Optional[Action]:
|
||||
"""Convertir un événement brut en Action enrichie.
|
||||
|
||||
Pour les clics : appelle enrich_click_from_screenshot() si le session_dir
|
||||
est disponible pour obtenir :
|
||||
- by_text (texte OCR exact de l'élément cliqué)
|
||||
- anchor_image_base64 (crop 80x80 pour template matching)
|
||||
- vlm_description (description positionnelle)
|
||||
- window_capture (rect pour le grounding ciblé)
|
||||
|
||||
Cet enrichissement est LA clé pour que l'ExecutionCompiler produise
|
||||
des plans V4 complets avec toutes les stratégies (OCR + template + VLM).
|
||||
"""
|
||||
evt_type = evt.get("type", "")
|
||||
|
||||
if evt_type == "mouse_click":
|
||||
window = evt.get("window", {}).get("title", "")
|
||||
pos = evt.get("pos", [0, 0])
|
||||
|
||||
# Action de base (fallback sans enrichissement)
|
||||
action = Action(
|
||||
type="click",
|
||||
target=window,
|
||||
anchor_hint=evt.get("vision_info", {}).get("text", "") if isinstance(evt.get("vision_info"), dict) else "",
|
||||
)
|
||||
|
||||
# Enrichissement visuel via enrich_click_from_screenshot
|
||||
# Accès direct au crop OCR + anchor pour l'ExecutionCompiler
|
||||
if session_dir_path and isinstance(pos, list) and len(pos) == 2:
|
||||
enrichment = self._enrich_click(
|
||||
evt, session_dir_path, window, int(pos[0]), int(pos[1]),
|
||||
)
|
||||
if enrichment:
|
||||
# Le texte OCR devient l'anchor_hint pour l'OCR primaire
|
||||
by_text = enrichment.get("by_text", "")
|
||||
if by_text:
|
||||
action.anchor_hint = by_text
|
||||
# Stocker les métadonnées d'enrichissement dans l'action
|
||||
# (utilisé par l'ExecutionCompiler pour construire les stratégies)
|
||||
action._enrichment = enrichment
|
||||
|
||||
# Lire le snapshot UIA si l'agent Windows l'a capturé.
|
||||
# Format attendu dans l'événement :
|
||||
# evt["uia_snapshot"] = {
|
||||
# "name": "Enregistrer",
|
||||
# "control_type": "bouton",
|
||||
# "automation_id": "btnSave",
|
||||
# "parent_path": [{"name": "...", "control_type": "..."}],
|
||||
# }
|
||||
# Si présent, il est fusionné dans _enrichment pour que
|
||||
# l'ExecutionCompiler puisse créer une stratégie UIA prioritaire.
|
||||
uia_snapshot = evt.get("uia_snapshot")
|
||||
if uia_snapshot and isinstance(uia_snapshot, dict):
|
||||
if not hasattr(action, "_enrichment") or action._enrichment is None:
|
||||
action._enrichment = {}
|
||||
action._enrichment["uia_snapshot"] = uia_snapshot
|
||||
|
||||
return action
|
||||
|
||||
elif evt_type == "text_input":
|
||||
text = evt.get("text", "")
|
||||
if text:
|
||||
return Action(type="type", text=text)
|
||||
elif evt_type in ("key_combo", "key_press"):
|
||||
keys = evt.get("keys", [])
|
||||
if keys:
|
||||
return Action(type="key_combo", keys=keys)
|
||||
elif evt_type == "scroll":
|
||||
return Action(type="scroll")
|
||||
|
||||
return None
|
||||
|
||||
def _enrich_click(
|
||||
self,
|
||||
evt: Dict,
|
||||
session_dir_path: Path,
|
||||
window_title: str,
|
||||
click_x: int,
|
||||
click_y: int,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Enrichir un clic avec OCR + crop + description.
|
||||
|
||||
Réutilise enrich_click_from_screenshot du stream_processor (éprouvé).
|
||||
Retourne un dict avec by_text, anchor_image_base64, vlm_description, etc.
|
||||
"""
|
||||
try:
|
||||
from agent_v0.server_v1.stream_processor import enrich_click_from_screenshot
|
||||
|
||||
# Trouver le screenshot full
|
||||
screenshot_id = evt.get("screenshot_id", "")
|
||||
if not screenshot_id:
|
||||
return None
|
||||
|
||||
full_path = session_dir_path / "shots" / f"{screenshot_id}_full.png"
|
||||
if not full_path.is_file():
|
||||
return None
|
||||
|
||||
# Résolution écran
|
||||
screen_w = 1280
|
||||
screen_h = 800
|
||||
window_capture = evt.get("window_capture", {})
|
||||
if window_capture.get("window_rect"):
|
||||
rect = window_capture["window_rect"]
|
||||
screen_w = max(screen_w, rect[2])
|
||||
screen_h = max(screen_h, rect[3])
|
||||
|
||||
return enrich_click_from_screenshot(
|
||||
screenshot_path=full_path,
|
||||
click_x=click_x,
|
||||
click_y=click_y,
|
||||
screen_w=screen_w,
|
||||
screen_h=screen_h,
|
||||
window_title=window_title,
|
||||
vision_info=evt.get("vision_info") if isinstance(evt.get("vision_info"), dict) else None,
|
||||
session_dir=session_dir_path,
|
||||
screenshot_id=screenshot_id,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"IRBuilder._enrich_click: {e}")
|
||||
return None
|
||||
|
||||
def _describe_segment(self, segment: List[Dict]) -> str:
|
||||
"""Décrire un segment en langage naturel (pour gemma4)."""
|
||||
parts = []
|
||||
window = ""
|
||||
for evt in segment:
|
||||
evt_type = evt.get("type", "")
|
||||
w = evt.get("window", {}).get("title", "")
|
||||
if w and w != window:
|
||||
window = w
|
||||
parts.append(f"[{w}]")
|
||||
if evt_type == "mouse_click":
|
||||
text = evt.get("vision_info", {}).get("text", "")
|
||||
parts.append(f"clic sur '{text}'" if text else "clic")
|
||||
elif evt_type == "text_input":
|
||||
text = evt.get("text", "")
|
||||
parts.append(f"saisie '{text[:30]}'")
|
||||
elif evt_type in ("key_combo", "key_press"):
|
||||
keys = evt.get("keys", [])
|
||||
parts.append(f"touche {'+'.join(keys)}")
|
||||
return " → ".join(parts) if parts else "action"
|
||||
|
||||
def _analyze_intent(
|
||||
self,
|
||||
segment_desc: str,
|
||||
step_index: int,
|
||||
total_steps: int,
|
||||
workflow_name: str,
|
||||
domain: str,
|
||||
) -> tuple:
|
||||
"""Demander à gemma4 de comprendre l'intention d'un segment.
|
||||
|
||||
Returns:
|
||||
(intent, precondition, postcondition)
|
||||
"""
|
||||
import requests as _requests
|
||||
|
||||
# Charger le contexte métier
|
||||
domain_prompt = ""
|
||||
try:
|
||||
from agent_v0.server_v1.domain_context import get_domain_context
|
||||
ctx = get_domain_context(domain)
|
||||
if ctx.system_prompt:
|
||||
domain_prompt = f"\nContexte métier : {ctx.name}\n"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
prompt = (
|
||||
f"{domain_prompt}"
|
||||
f"Workflow : {workflow_name} (étape {step_index + 1}/{total_steps})\n"
|
||||
f"Actions observées : {segment_desc}\n\n"
|
||||
f"Réponds en 3 lignes :\n"
|
||||
f"INTENTION: que veut faire l'utilisateur avec ces actions (1 phrase)\n"
|
||||
f"AVANT: état attendu de l'écran avant cette étape (1 phrase)\n"
|
||||
f"APRÈS: état attendu de l'écran après cette étape (1 phrase)"
|
||||
)
|
||||
|
||||
try:
|
||||
resp = _requests.post(
|
||||
self._gemma4_url,
|
||||
json={
|
||||
"model": "gemma4:e4b",
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"stream": False,
|
||||
"think": True,
|
||||
"options": {"temperature": 0.1, "num_predict": 800},
|
||||
},
|
||||
timeout=30,
|
||||
)
|
||||
if resp.ok:
|
||||
content = resp.json().get("message", {}).get("content", "")
|
||||
return self._parse_intent_response(content)
|
||||
except Exception as e:
|
||||
logger.debug(f"IRBuilder: gemma4 indisponible ({e})")
|
||||
|
||||
return (segment_desc, "", "")
|
||||
|
||||
def _parse_intent_response(self, content: str) -> tuple:
|
||||
"""Parser la réponse gemma4 (INTENTION/AVANT/APRÈS)."""
|
||||
intent = ""
|
||||
precondition = ""
|
||||
postcondition = ""
|
||||
|
||||
for line in content.split("\n"):
|
||||
clean = line.strip()
|
||||
upper = clean.upper()
|
||||
if upper.startswith("INTENTION:"):
|
||||
intent = clean.split(":", 1)[1].strip()
|
||||
elif upper.startswith("AVANT:"):
|
||||
precondition = clean.split(":", 1)[1].strip()
|
||||
elif upper.startswith(("APRÈS:", "APRES:")):
|
||||
postcondition = clean.split(":", 1)[1].strip()
|
||||
|
||||
return (intent, precondition, postcondition)
|
||||
|
||||
def _detect_variables(self, steps: List[Step], events: List[Dict]) -> List[Variable]:
|
||||
"""Détecter les variables dans le workflow.
|
||||
|
||||
Une variable est une donnée qui change entre les exécutions :
|
||||
- Texte saisi par l'utilisateur (noms, codes, dates)
|
||||
- Données lues à l'écran (résultats de recherche)
|
||||
"""
|
||||
variables = []
|
||||
seen_texts = set()
|
||||
|
||||
for step in steps:
|
||||
for action in step.actions:
|
||||
if action.type == "type" and action.text:
|
||||
text = action.text.strip()
|
||||
if text and text not in seen_texts and len(text) > 2:
|
||||
seen_texts.add(text)
|
||||
var_name = f"texte_{len(variables) + 1}"
|
||||
variables.append(Variable(
|
||||
name=var_name,
|
||||
description=f"Texte saisi : '{text[:50]}'",
|
||||
source="user",
|
||||
default=text,
|
||||
))
|
||||
# Marquer l'action comme variable
|
||||
action.variable = True
|
||||
action.text = f"{{{var_name}}}"
|
||||
|
||||
return variables
|
||||
693
core/workflow/shadow_observer.py
Normal file
693
core/workflow/shadow_observer.py
Normal file
@@ -0,0 +1,693 @@
|
||||
# core/workflow/shadow_observer.py
|
||||
"""
|
||||
ShadowObserver — Observation en temps réel de ce que Léa comprend.
|
||||
|
||||
C'est le "mode Shadow amélioré" : pendant que l'utilisateur enregistre
|
||||
une démonstration, Léa lui dit ce qu'elle comprend au fur et à mesure.
|
||||
|
||||
Contrairement à l'IRBuilder (qui analyse TOUT à la fin en appelant gemma4),
|
||||
le ShadowObserver travaille en incrémental :
|
||||
- À chaque événement reçu, il met à jour sa compréhension locale.
|
||||
- Il segmente dès qu'un critère de coupure est détecté.
|
||||
- Il émet des notifications légères ("Léa a compris : tu viens d'ouvrir le
|
||||
Bloc-notes") via un callback.
|
||||
- Il détecte les variables (texte saisi) pendant la frappe.
|
||||
|
||||
Le ShadowObserver n'est pas la source de vérité — c'est une couche
|
||||
d'observation. La source de vérité reste `live_events.jsonl`.
|
||||
Le WorkflowIR final est toujours reconstruit par l'IRBuilder après
|
||||
validation, mais la compréhension temps réel accélère la boucle de
|
||||
rétroaction avec l'utilisateur.
|
||||
|
||||
Usage :
|
||||
|
||||
def on_notify(event):
|
||||
print(f"[{event.niveau}] {event.message}")
|
||||
|
||||
observer = ShadowObserver(notify_callback=on_notify)
|
||||
observer.start("sess_abc")
|
||||
observer.observe_event(event1)
|
||||
observer.observe_event(event2)
|
||||
...
|
||||
comprehension = observer.get_understanding()
|
||||
# → [{"step": 1, "intent": "Ouvrir le Bloc-notes", "confidence": 0.8}, ...]
|
||||
observer.stop()
|
||||
|
||||
Contraintes :
|
||||
- 100% asynchrone côté performance : la méthode observe_event() ne doit
|
||||
jamais bloquer la capture (pas d'appel réseau synchrone).
|
||||
- Optionnel : activable via paramètre, ne modifie pas la capture existante.
|
||||
- 100% français dans les messages utilisateur.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Types d'événements observationnels
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class NiveauNotification(str, Enum):
|
||||
"""Niveau d'importance d'une notification.
|
||||
|
||||
- INFO : information passive ("Léa observe...")
|
||||
- DECOUVERTE : Léa vient de comprendre quelque chose de nouveau
|
||||
- QUESTION : Léa aimerait une confirmation (non bloquant)
|
||||
- VARIABLE : une variable a été détectée
|
||||
"""
|
||||
|
||||
INFO = "info"
|
||||
DECOUVERTE = "decouverte"
|
||||
QUESTION = "question"
|
||||
VARIABLE = "variable"
|
||||
|
||||
|
||||
@dataclass
|
||||
class NotificationShadow:
|
||||
"""Notification émise par le ShadowObserver vers la GUI utilisateur."""
|
||||
|
||||
notif_id: str
|
||||
niveau: NiveauNotification
|
||||
message: str # Texte affichable à l'utilisateur (français)
|
||||
session_id: str
|
||||
step_index: int = -1 # Index de l'étape concernée, -1 si global
|
||||
data: Dict[str, Any] = field(default_factory=dict)
|
||||
timestamp: float = 0.0
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"notif_id": self.notif_id,
|
||||
"niveau": self.niveau.value,
|
||||
"message": self.message,
|
||||
"session_id": self.session_id,
|
||||
"step_index": self.step_index,
|
||||
"data": self.data,
|
||||
"timestamp": self.timestamp,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class UnderstoodStep:
|
||||
"""Étape logique comprise en temps réel par le ShadowObserver.
|
||||
|
||||
C'est une version simplifiée de `Step` (core.workflow.workflow_ir),
|
||||
optimisée pour la construction incrémentale. Elle sera convertie
|
||||
en `Step` final par le ShadowValidator après validation.
|
||||
"""
|
||||
|
||||
step_index: int
|
||||
intent: str # Intention humaine (ex: "Ouvrir le Bloc-notes")
|
||||
intent_provisoire: bool = True # True tant que gemma4 n'a pas confirmé
|
||||
confidence: float = 0.5 # Score de confiance (0..1)
|
||||
app_name: str = "" # Application principale
|
||||
window_title: str = "" # Titre de la fenêtre au début du segment
|
||||
events: List[Dict[str, Any]] = field(default_factory=list)
|
||||
variables_detectees: List[str] = field(default_factory=list)
|
||||
started_at: float = 0.0
|
||||
ended_at: float = 0.0
|
||||
validated: bool = False # L'utilisateur a validé l'étape
|
||||
corrected: bool = False # L'utilisateur a corrigé l'intention
|
||||
cancelled: bool = False # L'utilisateur a annulé l'étape
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"step_index": self.step_index,
|
||||
"intent": self.intent,
|
||||
"intent_provisoire": self.intent_provisoire,
|
||||
"confidence": round(self.confidence, 3),
|
||||
"app_name": self.app_name,
|
||||
"window_title": self.window_title,
|
||||
"events_count": len(self.events),
|
||||
"variables_detectees": list(self.variables_detectees),
|
||||
"started_at": self.started_at,
|
||||
"ended_at": self.ended_at,
|
||||
"validated": self.validated,
|
||||
"corrected": self.corrected,
|
||||
"cancelled": self.cancelled,
|
||||
}
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Observer
|
||||
# =========================================================================
|
||||
|
||||
|
||||
# Constantes de segmentation (en secondes). On évite de re-déclarer les
|
||||
# constantes de l'IRBuilder car l'observation est incrémentale — on peut
|
||||
# se permettre des seuils plus courts pour plus de réactivité.
|
||||
_SEUIL_PAUSE_LONGUE_S = 4.0
|
||||
_SEUIL_CONFIANCE_BASE = 0.5
|
||||
_SEUIL_CONFIANCE_APP_CHANGE = 0.8
|
||||
|
||||
# Types d'événements ignorés
|
||||
_EVENT_TYPES_IGNORES = {
|
||||
"heartbeat",
|
||||
"focus_change",
|
||||
"action_result",
|
||||
"window_focus_change",
|
||||
}
|
||||
|
||||
|
||||
class ShadowObserver:
|
||||
"""Observe les événements en temps réel et met à jour la compréhension.
|
||||
|
||||
Thread-safe : peut être appelé depuis plusieurs threads (capture,
|
||||
API, worker).
|
||||
|
||||
Le callback `notify_callback` est appelé de manière synchrone mais les
|
||||
notifications sont extrêmement légères (juste un dataclass) — elles
|
||||
sont destinées à être envoyées via WebSocket/HTTP long-poll depuis la
|
||||
couche API.
|
||||
"""
|
||||
|
||||
NotifyCallback = Callable[[NotificationShadow], None]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
notify_callback: Optional[NotifyCallback] = None,
|
||||
*,
|
||||
enable_gemma4: bool = False,
|
||||
gemma4_callback: Optional[Callable[[UnderstoodStep], None]] = None,
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
notify_callback: Fonction appelée à chaque notification
|
||||
(doit être rapide, pas d'IO bloquant).
|
||||
enable_gemma4: Si True, une tâche asynchrone peut enrichir
|
||||
les intentions via gemma4 (non bloquant). En pratique,
|
||||
on laisse le caller le brancher via `gemma4_callback`.
|
||||
gemma4_callback: Fonction appelée en arrière-plan pour
|
||||
enrichir une étape (via gemma4 ou autre LLM). Non bloquant.
|
||||
"""
|
||||
self._notify_callback = notify_callback
|
||||
self._enable_gemma4 = enable_gemma4
|
||||
self._gemma4_callback = gemma4_callback
|
||||
|
||||
self._lock = threading.RLock()
|
||||
self._sessions: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
# ----- Cycle de vie --------------------------------------------------
|
||||
|
||||
def start(self, session_id: str) -> None:
|
||||
"""Démarrer l'observation d'une session."""
|
||||
with self._lock:
|
||||
self._sessions[session_id] = {
|
||||
"steps": [], # List[UnderstoodStep]
|
||||
"current_step": None, # Optional[UnderstoodStep]
|
||||
"last_event_ts": 0.0,
|
||||
"last_notif_ts": 0.0,
|
||||
"total_events": 0,
|
||||
"notifications": [], # Historique des notifications
|
||||
"started_at": time.time(),
|
||||
"stopped_at": 0.0,
|
||||
}
|
||||
self._notifier(
|
||||
session_id,
|
||||
NiveauNotification.INFO,
|
||||
"Léa t'observe. Fais ta tâche normalement, je vais apprendre.",
|
||||
)
|
||||
|
||||
def stop(self, session_id: str) -> None:
|
||||
"""Arrêter l'observation et finaliser le segment en cours."""
|
||||
with self._lock:
|
||||
state = self._sessions.get(session_id)
|
||||
if not state:
|
||||
return
|
||||
current = state.get("current_step")
|
||||
if current is not None and current.events:
|
||||
current.ended_at = state["last_event_ts"] or time.time()
|
||||
state["steps"].append(current)
|
||||
state["current_step"] = None
|
||||
state["stopped_at"] = time.time()
|
||||
|
||||
nb_steps = len(self.get_understanding(session_id))
|
||||
if nb_steps > 0:
|
||||
self._notifier(
|
||||
session_id,
|
||||
NiveauNotification.DECOUVERTE,
|
||||
f"J'ai observé {nb_steps} étape(s). Tu veux que je te les "
|
||||
f"montre pour validation ?",
|
||||
)
|
||||
|
||||
def reset(self, session_id: str) -> None:
|
||||
"""Supprimer l'état d'une session (après finalisation)."""
|
||||
with self._lock:
|
||||
self._sessions.pop(session_id, None)
|
||||
|
||||
# ----- Observation ---------------------------------------------------
|
||||
|
||||
def observe_event(self, session_id: str, event: Dict[str, Any]) -> None:
|
||||
"""Observer un nouvel événement pendant la capture.
|
||||
|
||||
Cette méthode est appelée à chaque événement reçu par le serveur.
|
||||
Elle doit être RAPIDE (pas d'IO réseau synchrone).
|
||||
"""
|
||||
evt_type = event.get("type", "")
|
||||
if evt_type in _EVENT_TYPES_IGNORES:
|
||||
return
|
||||
|
||||
with self._lock:
|
||||
state = self._sessions.get(session_id)
|
||||
if not state:
|
||||
# Auto-start si pas encore démarré (robustesse)
|
||||
self.start(session_id)
|
||||
state = self._sessions[session_id]
|
||||
|
||||
state["total_events"] += 1
|
||||
|
||||
# 1. Décider si on démarre un nouveau segment
|
||||
current = state.get("current_step")
|
||||
should_cut, cut_reason = self._should_cut(state, event)
|
||||
|
||||
if should_cut and current is not None:
|
||||
current.ended_at = state["last_event_ts"] or time.time()
|
||||
state["steps"].append(current)
|
||||
self._emit_step_closed(session_id, current, cut_reason)
|
||||
current = None
|
||||
state["current_step"] = None
|
||||
|
||||
if current is None:
|
||||
step_index = len(state["steps"]) + 1
|
||||
current = UnderstoodStep(
|
||||
step_index=step_index,
|
||||
intent=self._initial_intent(event),
|
||||
intent_provisoire=True,
|
||||
confidence=_SEUIL_CONFIANCE_BASE,
|
||||
app_name=self._get_app_name(event),
|
||||
window_title=self._get_window_title(event),
|
||||
started_at=float(event.get("timestamp", 0)) or time.time(),
|
||||
)
|
||||
state["current_step"] = current
|
||||
|
||||
# 2. Ajouter l'événement au segment courant
|
||||
current.events.append(event)
|
||||
ts = float(event.get("timestamp", 0)) or time.time()
|
||||
state["last_event_ts"] = ts
|
||||
|
||||
# 3. Rafraîchir l'intent provisoire à partir du contexte accumulé
|
||||
current.intent = self._refine_intent(current, event)
|
||||
|
||||
# 4. Détection de variable pendant la frappe
|
||||
if evt_type == "text_input":
|
||||
self._handle_text_input(session_id, current, event)
|
||||
|
||||
# 5. Émission périodique d'un résumé (toutes les 5s)
|
||||
self._maybe_emit_heartbeat(session_id, state)
|
||||
|
||||
# ----- API publique --------------------------------------------------
|
||||
|
||||
def get_understanding(
|
||||
self, session_id: str, include_current: bool = True
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Récupérer ce que Léa a compris jusqu'ici.
|
||||
|
||||
Returns:
|
||||
Liste de dicts au format :
|
||||
[{"step": 1, "intent": "Ouvrir le Bloc-notes",
|
||||
"confidence": 0.9, "app": "Bloc-notes",
|
||||
"events_count": 4, ...}, ...]
|
||||
"""
|
||||
with self._lock:
|
||||
state = self._sessions.get(session_id)
|
||||
if not state:
|
||||
return []
|
||||
steps = list(state["steps"])
|
||||
if include_current and state.get("current_step") is not None:
|
||||
steps = steps + [state["current_step"]]
|
||||
|
||||
out = []
|
||||
for step in steps:
|
||||
d = step.to_dict()
|
||||
d["step"] = d.pop("step_index")
|
||||
out.append(d)
|
||||
return out
|
||||
|
||||
def get_notifications(
|
||||
self, session_id: str, since_ts: float = 0.0
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Récupérer les notifications émises depuis un timestamp."""
|
||||
with self._lock:
|
||||
state = self._sessions.get(session_id)
|
||||
if not state:
|
||||
return []
|
||||
return [
|
||||
n.to_dict() for n in state["notifications"]
|
||||
if n.timestamp >= since_ts
|
||||
]
|
||||
|
||||
def get_current_step(
|
||||
self, session_id: str
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Retourner l'étape en cours de construction."""
|
||||
with self._lock:
|
||||
state = self._sessions.get(session_id)
|
||||
if not state:
|
||||
return None
|
||||
current = state.get("current_step")
|
||||
if current is None:
|
||||
return None
|
||||
return current.to_dict()
|
||||
|
||||
def get_steps_internal(
|
||||
self, session_id: str, include_current: bool = True
|
||||
) -> List[UnderstoodStep]:
|
||||
"""Version interne : retourne les objets `UnderstoodStep`.
|
||||
|
||||
Utilisé par le ShadowValidator pour reconstruire un WorkflowIR.
|
||||
"""
|
||||
with self._lock:
|
||||
state = self._sessions.get(session_id)
|
||||
if not state:
|
||||
return []
|
||||
steps = list(state["steps"])
|
||||
if include_current and state.get("current_step") is not None:
|
||||
steps = steps + [state["current_step"]]
|
||||
# Retourner des copies pour éviter les mutations externes
|
||||
return [self._copy_step(s) for s in steps]
|
||||
|
||||
def has_session(self, session_id: str) -> bool:
|
||||
with self._lock:
|
||||
return session_id in self._sessions
|
||||
|
||||
# ----- Internals : segmentation --------------------------------------
|
||||
|
||||
def _should_cut(
|
||||
self, state: Dict[str, Any], event: Dict[str, Any]
|
||||
) -> tuple:
|
||||
"""Décider si l'événement doit démarrer un nouveau segment.
|
||||
|
||||
Returns:
|
||||
(should_cut, reason)
|
||||
"""
|
||||
current = state.get("current_step")
|
||||
if current is None or not current.events:
|
||||
return (False, "")
|
||||
|
||||
# Coupure : changement d'application
|
||||
new_app = self._get_app_name(event)
|
||||
if new_app and current.app_name and new_app != current.app_name:
|
||||
return (True, "changement_application")
|
||||
|
||||
# Coupure : pause longue entre deux événements
|
||||
prev_ts = float(current.events[-1].get("timestamp", 0))
|
||||
curr_ts = float(event.get("timestamp", 0))
|
||||
if prev_ts > 0 and curr_ts > 0:
|
||||
if (curr_ts - prev_ts) > _SEUIL_PAUSE_LONGUE_S:
|
||||
return (True, "pause_longue")
|
||||
|
||||
# Coupure : key_combo « lourd » type ctrl+s (sauvegarde) → fin logique
|
||||
evt_type = event.get("type", "")
|
||||
if evt_type in ("key_combo", "key_press"):
|
||||
keys = [str(k).lower() for k in event.get("keys", [])]
|
||||
if "ctrl" in keys and any(k in keys for k in ("s", "enter")):
|
||||
# On accroche le key_combo à l'étape courante, puis on coupe
|
||||
# APRÈS — retourner False ici, la coupure se fera au prochain
|
||||
# événement. C'est voulu.
|
||||
return (False, "")
|
||||
|
||||
return (False, "")
|
||||
|
||||
def _initial_intent(self, event: Dict[str, Any]) -> str:
|
||||
"""Intention provisoire d'un tout nouveau segment."""
|
||||
app = self._get_app_name(event) or self._get_window_title(event)
|
||||
evt_type = event.get("type", "")
|
||||
if evt_type == "mouse_click":
|
||||
hint = event.get("vision_info", {}).get("text", "")
|
||||
if hint:
|
||||
return f"Cliquer sur « {hint} »"
|
||||
if app:
|
||||
return f"Interagir avec {app}"
|
||||
return "Cliquer quelque part"
|
||||
if evt_type == "text_input":
|
||||
text = event.get("text", "")[:40]
|
||||
return f"Saisir du texte" + (f" « {text} »" if text else "")
|
||||
if evt_type in ("key_combo", "key_press"):
|
||||
keys = event.get("keys", [])
|
||||
return f"Appuyer sur {'+'.join(keys)}" if keys else "Raccourci clavier"
|
||||
return f"Action dans {app}" if app else "Action"
|
||||
|
||||
def _refine_intent(
|
||||
self, step: UnderstoodStep, event: Dict[str, Any]
|
||||
) -> str:
|
||||
"""Raffiner l'intention au fur et à mesure qu'on voit plus d'événements.
|
||||
|
||||
Heuristiques simples — pas de gemma4 ici pour rester rapide.
|
||||
"""
|
||||
types = [e.get("type", "") for e in step.events]
|
||||
has_click = "mouse_click" in types
|
||||
has_type = "text_input" in types
|
||||
has_key = any(t in ("key_combo", "key_press") for t in types)
|
||||
app = step.app_name or self._get_window_title(event)
|
||||
|
||||
# Cas 1 : clic + saisie + entrée → "Rechercher X"
|
||||
if has_click and has_type:
|
||||
texts = [e.get("text", "") for e in step.events if e.get("type") == "text_input"]
|
||||
if texts and any("enter" in [k.lower() for k in e.get("keys", [])]
|
||||
for e in step.events if e.get("type") in ("key_combo", "key_press")):
|
||||
premier_texte = next((t for t in texts if t), "")
|
||||
if premier_texte:
|
||||
step.confidence = min(0.85, step.confidence + 0.05)
|
||||
return f"Rechercher « {premier_texte[:30]} »"
|
||||
|
||||
# Cas 2 : saisie seule → "Écrire du texte"
|
||||
if has_type and not has_click:
|
||||
texts = [e.get("text", "") for e in step.events if e.get("type") == "text_input"]
|
||||
premier_texte = next((t for t in texts if t), "")
|
||||
if premier_texte:
|
||||
return f"Écrire « {premier_texte[:40]} »"
|
||||
return "Écrire du texte"
|
||||
|
||||
# Cas 3 : ctrl+s → "Sauvegarder"
|
||||
if has_key:
|
||||
for e in step.events:
|
||||
if e.get("type") in ("key_combo", "key_press"):
|
||||
keys = [str(k).lower() for k in e.get("keys", [])]
|
||||
if "ctrl" in keys and "s" in keys:
|
||||
step.confidence = min(0.9, step.confidence + 0.1)
|
||||
return f"Sauvegarder{' dans ' + app if app else ''}"
|
||||
if "ctrl" in keys and "c" in keys:
|
||||
return f"Copier{' depuis ' + app if app else ''}"
|
||||
if "ctrl" in keys and "v" in keys:
|
||||
return f"Coller{' dans ' + app if app else ''}"
|
||||
|
||||
# Cas 4 : clic seul + app identifiable
|
||||
if has_click and app:
|
||||
hint = ""
|
||||
for e in step.events:
|
||||
if e.get("type") == "mouse_click":
|
||||
hint = e.get("vision_info", {}).get("text", "")
|
||||
if hint:
|
||||
break
|
||||
if hint:
|
||||
return f"Cliquer sur « {hint} » dans {app}"
|
||||
return f"Interagir avec {app}"
|
||||
|
||||
return step.intent
|
||||
|
||||
def _handle_text_input(
|
||||
self,
|
||||
session_id: str,
|
||||
step: UnderstoodStep,
|
||||
event: Dict[str, Any],
|
||||
) -> None:
|
||||
"""Détecter et notifier une variable lors d'une saisie texte."""
|
||||
text = (event.get("text") or "").strip()
|
||||
if not text or len(text) < 3:
|
||||
return
|
||||
|
||||
# Déduire un nom de variable provisoire
|
||||
var_name = f"texte_{len(step.variables_detectees) + 1}"
|
||||
step.variables_detectees.append(var_name)
|
||||
|
||||
# Heuristique : détecter le type plausible
|
||||
var_type = self._guess_variable_type(text)
|
||||
|
||||
self._notifier(
|
||||
session_id,
|
||||
NiveauNotification.VARIABLE,
|
||||
f"Variable détectée : tu as tapé « {text[:40]} » — c'est {var_type} ?",
|
||||
step_index=step.step_index,
|
||||
data={
|
||||
"variable_name": var_name,
|
||||
"value": text,
|
||||
"variable_type": var_type,
|
||||
},
|
||||
)
|
||||
|
||||
def _guess_variable_type(self, text: str) -> str:
|
||||
"""Deviner le type d'une variable à partir de sa valeur."""
|
||||
t = text.strip()
|
||||
# Date (basique)
|
||||
if len(t) == 10 and t[2] in "/-" and t[5] in "/-":
|
||||
return "une date"
|
||||
if t.isdigit():
|
||||
return "un numéro"
|
||||
if "@" in t and "." in t:
|
||||
return "une adresse e-mail"
|
||||
if len(t) <= 10 and t.replace(" ", "").replace("-", "").isalnum() and not any(c.islower() for c in t):
|
||||
return "un code"
|
||||
if " " in t and len(t) > 10:
|
||||
return "un texte libre"
|
||||
return "un texte"
|
||||
|
||||
# ----- Internals : notifications -------------------------------------
|
||||
|
||||
def _notifier(
|
||||
self,
|
||||
session_id: str,
|
||||
niveau: NiveauNotification,
|
||||
message: str,
|
||||
*,
|
||||
step_index: int = -1,
|
||||
data: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
"""Créer et émettre une notification."""
|
||||
notif = NotificationShadow(
|
||||
notif_id=uuid.uuid4().hex[:12],
|
||||
niveau=niveau,
|
||||
message=message,
|
||||
session_id=session_id,
|
||||
step_index=step_index,
|
||||
data=data or {},
|
||||
timestamp=time.time(),
|
||||
)
|
||||
|
||||
with self._lock:
|
||||
state = self._sessions.get(session_id)
|
||||
if state is not None:
|
||||
state["notifications"].append(notif)
|
||||
state["last_notif_ts"] = notif.timestamp
|
||||
|
||||
if self._notify_callback is not None:
|
||||
try:
|
||||
self._notify_callback(notif)
|
||||
except Exception as e:
|
||||
logger.debug(f"ShadowObserver: callback a échoué : {e}")
|
||||
|
||||
def _emit_step_closed(
|
||||
self,
|
||||
session_id: str,
|
||||
step: UnderstoodStep,
|
||||
reason: str,
|
||||
) -> None:
|
||||
"""Émettre une notification quand une étape est fermée."""
|
||||
raison_humaine = {
|
||||
"changement_application": "tu es passé à une autre application",
|
||||
"pause_longue": "tu as fait une pause",
|
||||
}.get(reason, "")
|
||||
|
||||
suffixe = f" ({raison_humaine})" if raison_humaine else ""
|
||||
self._notifier(
|
||||
session_id,
|
||||
NiveauNotification.DECOUVERTE,
|
||||
f"Nouvelle étape comprise : {step.intent}{suffixe}",
|
||||
step_index=step.step_index,
|
||||
data={"step": step.to_dict()},
|
||||
)
|
||||
|
||||
if self._enable_gemma4 and self._gemma4_callback is not None:
|
||||
# Non bloquant : on délègue au caller (qui peut utiliser un thread)
|
||||
try:
|
||||
self._gemma4_callback(self._copy_step(step))
|
||||
except Exception as e:
|
||||
logger.debug(f"ShadowObserver: gemma4_callback a échoué : {e}")
|
||||
|
||||
def _maybe_emit_heartbeat(
|
||||
self,
|
||||
session_id: str,
|
||||
state: Dict[str, Any],
|
||||
) -> None:
|
||||
"""Émettre un résumé périodique (toutes les 5s env.)."""
|
||||
now = time.time()
|
||||
last = state.get("last_notif_ts", 0)
|
||||
if now - last < 5.0:
|
||||
return
|
||||
nb_steps = len(state["steps"])
|
||||
if state.get("current_step") is not None:
|
||||
nb_steps += 1
|
||||
if nb_steps == 0:
|
||||
return
|
||||
self._notifier(
|
||||
session_id,
|
||||
NiveauNotification.INFO,
|
||||
f"J'ai compris {nb_steps} étape(s) jusqu'ici.",
|
||||
data={"steps_count": nb_steps},
|
||||
)
|
||||
|
||||
# ----- Utilitaires ---------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _get_app_name(event: Dict[str, Any]) -> str:
|
||||
"""Extraire le nom d'application depuis un événement."""
|
||||
window = event.get("window") or {}
|
||||
if isinstance(window, dict):
|
||||
title = window.get("title", "")
|
||||
app_name = window.get("app_name", "")
|
||||
else:
|
||||
title = event.get("window_title", "")
|
||||
app_name = ""
|
||||
|
||||
# Préférer app_name si disponible
|
||||
if app_name and app_name != "unknown":
|
||||
return app_name
|
||||
|
||||
# Sinon, extraire depuis le titre
|
||||
for sep in [" – ", " - ", " — "]:
|
||||
if sep in title:
|
||||
return title.split(sep)[-1].strip()
|
||||
return title.strip() if title else ""
|
||||
|
||||
@staticmethod
|
||||
def _get_window_title(event: Dict[str, Any]) -> str:
|
||||
window = event.get("window") or {}
|
||||
if isinstance(window, dict):
|
||||
return window.get("title", "") or ""
|
||||
return event.get("window_title", "") or ""
|
||||
|
||||
@staticmethod
|
||||
def _copy_step(step: UnderstoodStep) -> UnderstoodStep:
|
||||
"""Copie superficielle pour éviter les fuites de mutation."""
|
||||
return UnderstoodStep(
|
||||
step_index=step.step_index,
|
||||
intent=step.intent,
|
||||
intent_provisoire=step.intent_provisoire,
|
||||
confidence=step.confidence,
|
||||
app_name=step.app_name,
|
||||
window_title=step.window_title,
|
||||
events=list(step.events),
|
||||
variables_detectees=list(step.variables_detectees),
|
||||
started_at=step.started_at,
|
||||
ended_at=step.ended_at,
|
||||
validated=step.validated,
|
||||
corrected=step.corrected,
|
||||
cancelled=step.cancelled,
|
||||
)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Singleton partagé (optionnel)
|
||||
# =========================================================================
|
||||
|
||||
|
||||
_shared_observer: Optional[ShadowObserver] = None
|
||||
_shared_lock = threading.Lock()
|
||||
|
||||
|
||||
def get_shared_observer() -> ShadowObserver:
|
||||
"""Observer partagé pour l'API (lazy init)."""
|
||||
global _shared_observer
|
||||
with _shared_lock:
|
||||
if _shared_observer is None:
|
||||
_shared_observer = ShadowObserver()
|
||||
return _shared_observer
|
||||
468
core/workflow/shadow_validator.py
Normal file
468
core/workflow/shadow_validator.py
Normal file
@@ -0,0 +1,468 @@
|
||||
# core/workflow/shadow_validator.py
|
||||
"""
|
||||
ShadowValidator — Applique les feedbacks utilisateur et reconstruit un WorkflowIR.
|
||||
|
||||
Le ShadowObserver observe et comprend en temps réel. Le ShadowValidator,
|
||||
lui, prend les décisions de l'utilisateur (valider, corriger, annuler,
|
||||
combiner) et reconstruit un WorkflowIR final « propre » qui sera
|
||||
persisté et exécutable par le runtime.
|
||||
|
||||
Opérations supportées :
|
||||
- validate(step_index) : marquer l'étape comme validée
|
||||
- correct(step_index, new_intent) : corriger l'intention
|
||||
- undo(step_index) : annuler l'étape (elle sera exclue du WorkflowIR)
|
||||
- merge_with_next(step_index) : fusionner avec l'étape suivante
|
||||
- cancel() : annuler tout le workflow
|
||||
- split(step_index, at_event_index) : couper une étape en deux (bonus)
|
||||
|
||||
Le validator ne touche PAS aux événements bruts (events.jsonl) — il
|
||||
travaille sur la liste des `UnderstoodStep` fournie par le ShadowObserver.
|
||||
|
||||
Une fois toutes les actions appliquées, `build_workflow_ir()` produit
|
||||
un WorkflowIR exécutable à partir des étapes validées/corrigées.
|
||||
|
||||
Usage :
|
||||
|
||||
validator = ShadowValidator()
|
||||
validator.set_steps(observer.get_steps_internal(session_id))
|
||||
|
||||
validator.apply_feedback({"action": "validate", "step_index": 1})
|
||||
validator.apply_feedback({
|
||||
"action": "correct",
|
||||
"step_index": 2,
|
||||
"new_intent": "Sauvegarder le document",
|
||||
})
|
||||
validator.apply_feedback({"action": "undo", "step_index": 3})
|
||||
|
||||
ir = validator.build_workflow_ir(
|
||||
session_id="sess_abc",
|
||||
name="Mon workflow",
|
||||
domain="generic",
|
||||
)
|
||||
ir.save("data/workflows/")
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from .shadow_observer import UnderstoodStep
|
||||
from .workflow_ir import Action, Step, Variable, WorkflowIR
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Actions supportées par le feedback
|
||||
FEEDBACK_ACTIONS = {
|
||||
"validate",
|
||||
"correct",
|
||||
"undo",
|
||||
"cancel",
|
||||
"merge_next",
|
||||
"split",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class FeedbackResult:
|
||||
"""Résultat d'une opération de feedback."""
|
||||
|
||||
ok: bool
|
||||
action: str
|
||||
step_index: int
|
||||
message: str
|
||||
data: Dict[str, Any]
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"ok": self.ok,
|
||||
"action": self.action,
|
||||
"step_index": self.step_index,
|
||||
"message": self.message,
|
||||
"data": dict(self.data),
|
||||
}
|
||||
|
||||
|
||||
class ShadowValidator:
|
||||
"""Applique les feedbacks utilisateur et produit un WorkflowIR."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._steps: List[UnderstoodStep] = []
|
||||
self._cancelled_workflow: bool = False
|
||||
self._history: List[FeedbackResult] = []
|
||||
|
||||
# ----- API -----------------------------------------------------------
|
||||
|
||||
def set_steps(self, steps: List[UnderstoodStep]) -> None:
|
||||
"""Initialiser le validator avec la liste des étapes observées."""
|
||||
self._steps = [self._clone(s) for s in steps]
|
||||
self._cancelled_workflow = False
|
||||
self._history = []
|
||||
|
||||
@property
|
||||
def steps(self) -> List[UnderstoodStep]:
|
||||
"""Vue en lecture des étapes courantes."""
|
||||
return list(self._steps)
|
||||
|
||||
@property
|
||||
def history(self) -> List[FeedbackResult]:
|
||||
"""Historique des feedbacks appliqués."""
|
||||
return list(self._history)
|
||||
|
||||
@property
|
||||
def is_cancelled(self) -> bool:
|
||||
return self._cancelled_workflow
|
||||
|
||||
def apply_feedback(self, feedback: Dict[str, Any]) -> FeedbackResult:
|
||||
"""Appliquer un feedback utilisateur.
|
||||
|
||||
Le `feedback` est un dict au format :
|
||||
{
|
||||
"action": "validate" | "correct" | "undo" | "cancel" | "merge_next" | "split",
|
||||
"step_index": 1, # Index 1-based (comme dans get_understanding)
|
||||
"new_intent": "...", # Pour correct
|
||||
"at_event_index": 3, # Pour split
|
||||
}
|
||||
|
||||
Returns:
|
||||
FeedbackResult
|
||||
"""
|
||||
action = (feedback.get("action") or "").strip()
|
||||
if action not in FEEDBACK_ACTIONS:
|
||||
return self._record(FeedbackResult(
|
||||
ok=False, action=action, step_index=-1,
|
||||
message=f"Action inconnue : « {action} »",
|
||||
data={"supported": sorted(FEEDBACK_ACTIONS)},
|
||||
))
|
||||
|
||||
if action == "cancel":
|
||||
return self._do_cancel()
|
||||
|
||||
step_index = int(feedback.get("step_index", -1))
|
||||
if not self._is_valid_step_index(step_index):
|
||||
return self._record(FeedbackResult(
|
||||
ok=False, action=action, step_index=step_index,
|
||||
message=f"Index d'étape invalide : {step_index}",
|
||||
data={"nb_steps": len(self._steps)},
|
||||
))
|
||||
|
||||
if action == "validate":
|
||||
return self._do_validate(step_index)
|
||||
if action == "correct":
|
||||
return self._do_correct(step_index, feedback.get("new_intent", ""))
|
||||
if action == "undo":
|
||||
return self._do_undo(step_index)
|
||||
if action == "merge_next":
|
||||
return self._do_merge_next(step_index)
|
||||
if action == "split":
|
||||
return self._do_split(
|
||||
step_index, int(feedback.get("at_event_index", -1))
|
||||
)
|
||||
|
||||
return self._record(FeedbackResult(
|
||||
ok=False, action=action, step_index=step_index,
|
||||
message="Action non implémentée", data={},
|
||||
))
|
||||
|
||||
def apply_feedbacks(
|
||||
self, feedbacks: List[Dict[str, Any]]
|
||||
) -> List[FeedbackResult]:
|
||||
"""Appliquer plusieurs feedbacks dans l'ordre."""
|
||||
return [self.apply_feedback(f) for f in feedbacks]
|
||||
|
||||
# ----- Opérations ---------------------------------------------------
|
||||
|
||||
def _do_validate(self, step_index: int) -> FeedbackResult:
|
||||
step = self._get_step(step_index)
|
||||
step.validated = True
|
||||
step.intent_provisoire = False
|
||||
step.confidence = max(step.confidence, 0.95)
|
||||
return self._record(FeedbackResult(
|
||||
ok=True, action="validate", step_index=step_index,
|
||||
message=f"Étape {step_index} validée : {step.intent}",
|
||||
data={"intent": step.intent},
|
||||
))
|
||||
|
||||
def _do_correct(
|
||||
self, step_index: int, new_intent: str
|
||||
) -> FeedbackResult:
|
||||
new_intent = (new_intent or "").strip()
|
||||
if not new_intent:
|
||||
return self._record(FeedbackResult(
|
||||
ok=False, action="correct", step_index=step_index,
|
||||
message="Nouvelle intention vide",
|
||||
data={},
|
||||
))
|
||||
step = self._get_step(step_index)
|
||||
old_intent = step.intent
|
||||
step.intent = new_intent
|
||||
step.corrected = True
|
||||
step.validated = True # Corriger = implicitement valider
|
||||
step.intent_provisoire = False
|
||||
step.confidence = 1.0
|
||||
return self._record(FeedbackResult(
|
||||
ok=True, action="correct", step_index=step_index,
|
||||
message=f"Étape {step_index} corrigée : « {old_intent} » → « {new_intent} »",
|
||||
data={"old_intent": old_intent, "new_intent": new_intent},
|
||||
))
|
||||
|
||||
def _do_undo(self, step_index: int) -> FeedbackResult:
|
||||
step = self._get_step(step_index)
|
||||
step.cancelled = True
|
||||
return self._record(FeedbackResult(
|
||||
ok=True, action="undo", step_index=step_index,
|
||||
message=f"Étape {step_index} annulée : {step.intent}",
|
||||
data={"intent": step.intent},
|
||||
))
|
||||
|
||||
def _do_merge_next(self, step_index: int) -> FeedbackResult:
|
||||
"""Fusionner l'étape avec la suivante."""
|
||||
if step_index >= len(self._steps):
|
||||
return self._record(FeedbackResult(
|
||||
ok=False, action="merge_next", step_index=step_index,
|
||||
message="Aucune étape suivante à fusionner",
|
||||
data={},
|
||||
))
|
||||
step = self._get_step(step_index)
|
||||
next_step = self._get_step(step_index + 1)
|
||||
|
||||
merged = UnderstoodStep(
|
||||
step_index=step.step_index,
|
||||
intent=step.intent if len(step.intent) >= len(next_step.intent) else next_step.intent,
|
||||
intent_provisoire=False,
|
||||
confidence=max(step.confidence, next_step.confidence),
|
||||
app_name=step.app_name or next_step.app_name,
|
||||
window_title=step.window_title or next_step.window_title,
|
||||
events=list(step.events) + list(next_step.events),
|
||||
variables_detectees=list(step.variables_detectees)
|
||||
+ list(next_step.variables_detectees),
|
||||
started_at=step.started_at or next_step.started_at,
|
||||
ended_at=next_step.ended_at or step.ended_at,
|
||||
validated=True,
|
||||
corrected=step.corrected or next_step.corrected,
|
||||
cancelled=False,
|
||||
)
|
||||
|
||||
# Remplacer [step, next_step] par [merged]
|
||||
idx0 = step_index - 1 # 1-based → 0-based
|
||||
self._steps.pop(idx0 + 1) # next_step
|
||||
self._steps[idx0] = merged
|
||||
self._renumber()
|
||||
|
||||
return self._record(FeedbackResult(
|
||||
ok=True, action="merge_next", step_index=step_index,
|
||||
message=f"Étapes {step_index} et {step_index + 1} fusionnées",
|
||||
data={"intent": merged.intent},
|
||||
))
|
||||
|
||||
def _do_split(
|
||||
self, step_index: int, at_event_index: int
|
||||
) -> FeedbackResult:
|
||||
"""Couper une étape en deux au niveau de l'événement at_event_index.
|
||||
|
||||
`at_event_index` est 0-based parmi les events de l'étape.
|
||||
"""
|
||||
step = self._get_step(step_index)
|
||||
if at_event_index <= 0 or at_event_index >= len(step.events):
|
||||
return self._record(FeedbackResult(
|
||||
ok=False, action="split", step_index=step_index,
|
||||
message=f"Index de coupe invalide : {at_event_index}",
|
||||
data={"nb_events": len(step.events)},
|
||||
))
|
||||
|
||||
left_events = step.events[:at_event_index]
|
||||
right_events = step.events[at_event_index:]
|
||||
|
||||
left = UnderstoodStep(
|
||||
step_index=step.step_index,
|
||||
intent=step.intent + " (1/2)",
|
||||
intent_provisoire=True,
|
||||
confidence=step.confidence * 0.9,
|
||||
app_name=step.app_name,
|
||||
window_title=step.window_title,
|
||||
events=left_events,
|
||||
started_at=step.started_at,
|
||||
)
|
||||
right = UnderstoodStep(
|
||||
step_index=step.step_index + 1,
|
||||
intent=step.intent + " (2/2)",
|
||||
intent_provisoire=True,
|
||||
confidence=step.confidence * 0.9,
|
||||
app_name=step.app_name,
|
||||
window_title=step.window_title,
|
||||
events=right_events,
|
||||
started_at=float(right_events[0].get("timestamp", 0))
|
||||
if right_events else step.started_at,
|
||||
ended_at=step.ended_at,
|
||||
)
|
||||
|
||||
idx0 = step_index - 1
|
||||
self._steps[idx0] = left
|
||||
self._steps.insert(idx0 + 1, right)
|
||||
self._renumber()
|
||||
|
||||
return self._record(FeedbackResult(
|
||||
ok=True, action="split", step_index=step_index,
|
||||
message=f"Étape {step_index} coupée en 2",
|
||||
data={"nb_steps": len(self._steps)},
|
||||
))
|
||||
|
||||
def _do_cancel(self) -> FeedbackResult:
|
||||
self._cancelled_workflow = True
|
||||
return self._record(FeedbackResult(
|
||||
ok=True, action="cancel", step_index=-1,
|
||||
message="Workflow annulé",
|
||||
data={},
|
||||
))
|
||||
|
||||
# ----- Construction du WorkflowIR -----------------------------------
|
||||
|
||||
def build_workflow_ir(
|
||||
self,
|
||||
session_id: str = "",
|
||||
name: str = "",
|
||||
domain: str = "generic",
|
||||
*,
|
||||
require_all_validated: bool = False,
|
||||
) -> Optional[WorkflowIR]:
|
||||
"""Construire un WorkflowIR à partir des étapes corrigées.
|
||||
|
||||
Args:
|
||||
session_id: Identifiant de la session source.
|
||||
name: Nom du workflow.
|
||||
domain: Domaine métier.
|
||||
require_all_validated: Si True, lève une erreur si au moins
|
||||
une étape n'a pas été validée explicitement.
|
||||
|
||||
Returns:
|
||||
WorkflowIR ou None si le workflow a été annulé.
|
||||
"""
|
||||
if self._cancelled_workflow:
|
||||
logger.info("ShadowValidator: workflow annulé, pas de build")
|
||||
return None
|
||||
|
||||
ir = WorkflowIR.new(
|
||||
name=name or f"Workflow du {time.strftime('%d/%m/%Y %H:%M')}",
|
||||
domain=domain,
|
||||
learned_from=session_id,
|
||||
)
|
||||
|
||||
variables: List[Variable] = []
|
||||
seen_texts = set()
|
||||
applications: set = set()
|
||||
|
||||
for step in self._steps:
|
||||
if step.cancelled:
|
||||
continue
|
||||
if require_all_validated and not step.validated:
|
||||
raise ValueError(
|
||||
f"Étape {step.step_index} non validée : {step.intent}"
|
||||
)
|
||||
|
||||
if step.app_name:
|
||||
applications.add(step.app_name)
|
||||
|
||||
actions = []
|
||||
for evt in step.events:
|
||||
action = self._event_to_action(evt)
|
||||
if action is None:
|
||||
continue
|
||||
|
||||
# Détection de variable (texte saisi)
|
||||
if action.type == "type" and action.text:
|
||||
text = action.text.strip()
|
||||
if text and text not in seen_texts and len(text) > 2:
|
||||
seen_texts.add(text)
|
||||
var_name = f"texte_{len(variables) + 1}"
|
||||
variables.append(Variable(
|
||||
name=var_name,
|
||||
description=f"Texte saisi : « {text[:50]} »",
|
||||
source="user",
|
||||
default=text,
|
||||
))
|
||||
action.variable = True
|
||||
action.text = "{" + var_name + "}"
|
||||
|
||||
actions.append(action)
|
||||
|
||||
ir_step = Step(
|
||||
step_id=f"s{len(ir.steps) + 1}",
|
||||
intent=step.intent,
|
||||
actions=actions,
|
||||
)
|
||||
ir.steps.append(ir_step)
|
||||
|
||||
ir.variables = variables
|
||||
ir.applications = sorted(applications)
|
||||
ir.updated_at = time.time()
|
||||
|
||||
logger.info(
|
||||
f"ShadowValidator: WorkflowIR construit — {len(ir.steps)} étapes, "
|
||||
f"{len(ir.variables)} variables"
|
||||
)
|
||||
return ir
|
||||
|
||||
# ----- Utilitaires --------------------------------------------------
|
||||
|
||||
def _is_valid_step_index(self, step_index: int) -> bool:
|
||||
return 1 <= step_index <= len(self._steps)
|
||||
|
||||
def _get_step(self, step_index: int) -> UnderstoodStep:
|
||||
return self._steps[step_index - 1]
|
||||
|
||||
def _renumber(self) -> None:
|
||||
for i, s in enumerate(self._steps, start=1):
|
||||
s.step_index = i
|
||||
|
||||
def _record(self, result: FeedbackResult) -> FeedbackResult:
|
||||
self._history.append(result)
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _clone(step: UnderstoodStep) -> UnderstoodStep:
|
||||
return UnderstoodStep(
|
||||
step_index=step.step_index,
|
||||
intent=step.intent,
|
||||
intent_provisoire=step.intent_provisoire,
|
||||
confidence=step.confidence,
|
||||
app_name=step.app_name,
|
||||
window_title=step.window_title,
|
||||
events=list(step.events),
|
||||
variables_detectees=list(step.variables_detectees),
|
||||
started_at=step.started_at,
|
||||
ended_at=step.ended_at,
|
||||
validated=step.validated,
|
||||
corrected=step.corrected,
|
||||
cancelled=step.cancelled,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _event_to_action(evt: Dict[str, Any]) -> Optional[Action]:
|
||||
"""Convertir un événement brut en Action (miroir de IRBuilder)."""
|
||||
evt_type = evt.get("type", "")
|
||||
|
||||
if evt_type == "mouse_click":
|
||||
window = evt.get("window") or {}
|
||||
if isinstance(window, dict):
|
||||
target = window.get("title", "")
|
||||
else:
|
||||
target = evt.get("window_title", "")
|
||||
return Action(
|
||||
type="click",
|
||||
target=target or "",
|
||||
anchor_hint=(evt.get("vision_info") or {}).get("text", ""),
|
||||
)
|
||||
if evt_type == "text_input":
|
||||
text = evt.get("text", "")
|
||||
if text:
|
||||
return Action(type="type", text=text)
|
||||
if evt_type in ("key_combo", "key_press"):
|
||||
keys = evt.get("keys", [])
|
||||
if keys:
|
||||
return Action(type="key_combo", keys=list(keys))
|
||||
if evt_type == "scroll":
|
||||
return Action(type="scroll")
|
||||
return None
|
||||
337
core/workflow/surface_classifier.py
Normal file
337
core/workflow/surface_classifier.py
Normal file
@@ -0,0 +1,337 @@
|
||||
# core/workflow/surface_classifier.py
|
||||
"""
|
||||
SurfaceClassifier — détecte le type de surface applicative au moment de l'exécution.
|
||||
|
||||
4 types de surfaces reconnus :
|
||||
- citrix : session Citrix/RDP/TSE (wfica32.exe, mstsc.exe, CDViewer.exe)
|
||||
→ vision pure obligatoire, paramètres tolérants
|
||||
- windows_native : application Windows native (notepad.exe, explorer.exe, DPI...)
|
||||
→ vision + UIA bonus, paramètres standards
|
||||
- web_local : navigateur local (chrome.exe, firefox.exe, msedge.exe)
|
||||
→ vision + DOM/CDP bonus (si activé), paramètres rapides
|
||||
- unknown : fallback → vision pure, paramètres par défaut
|
||||
|
||||
Le classifier s'exécute UNE SEULE FOIS au début d'une session ou d'un replay.
|
||||
Son résultat détermine :
|
||||
1. Quels helpers sont activés (UIA ? CDP ?)
|
||||
2. Les paramètres de résolution (timeouts, seuils OCR)
|
||||
3. La stratégie de recovery
|
||||
|
||||
Principe : la vision reste le fondement. Le classifier décide juste
|
||||
des bonus à activer et des paramètres à tuner.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SurfaceType(str, Enum):
|
||||
"""Types de surfaces applicatives."""
|
||||
CITRIX = "citrix"
|
||||
WINDOWS_NATIVE = "windows_native"
|
||||
WEB_LOCAL = "web_local"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
|
||||
# Processus connus par type de surface
|
||||
_CITRIX_PROCESSES = {
|
||||
"wfica32.exe", # Citrix Workspace (Windows 10+)
|
||||
"cdviewer.exe", # Citrix Desktop Viewer
|
||||
"cdviewer.exe",
|
||||
"mstsc.exe", # Microsoft Remote Desktop
|
||||
"vmware-vmx.exe", # VMware (cas RDS)
|
||||
"xen.exe", # Citrix XenApp
|
||||
"receiver.exe", # Citrix Receiver (ancien)
|
||||
"selfservice.exe", # Citrix Self-Service Plug-in
|
||||
}
|
||||
|
||||
_BROWSER_PROCESSES = {
|
||||
"chrome.exe",
|
||||
"msedge.exe",
|
||||
"firefox.exe",
|
||||
"brave.exe",
|
||||
"opera.exe",
|
||||
"vivaldi.exe",
|
||||
}
|
||||
|
||||
# Processus système Windows qui ne sont PAS des surfaces applicatives
|
||||
_SYSTEM_PROCESSES = {
|
||||
"explorer.exe", # Shell Windows (cas spécial — on le compte comme natif)
|
||||
"searchhost.exe", # Recherche Windows
|
||||
"startmenuexperiencehost.exe",
|
||||
"shellexperiencehost.exe",
|
||||
"applicationframehost.exe",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class SurfaceProfile:
|
||||
"""Profil complet d'une surface détectée."""
|
||||
surface_type: SurfaceType
|
||||
process_name: str = "" # Processus de la fenêtre active
|
||||
window_title: str = "" # Titre de la fenêtre active
|
||||
confidence: float = 1.0 # Confiance de la détection (0-1)
|
||||
|
||||
# Capacités disponibles
|
||||
uia_available: bool = False # Le helper UIA peut être utilisé
|
||||
cdp_available: bool = False # Chrome DevTools Protocol accessible
|
||||
ocr_available: bool = True # OCR toujours dispo (docTR)
|
||||
vlm_available: bool = True # VLM toujours dispo (qwen2.5vl)
|
||||
|
||||
# Paramètres adaptés à la surface
|
||||
timeout_click_ms: int = 10000
|
||||
timeout_resolve_ms: int = 5000
|
||||
ocr_threshold: float = 0.75
|
||||
template_threshold: float = 0.85
|
||||
max_retries: int = 2
|
||||
retry_delay_ms: int = 2000
|
||||
|
||||
# Métadonnées
|
||||
detected_at: float = 0.0
|
||||
details: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"surface_type": self.surface_type.value,
|
||||
"process_name": self.process_name,
|
||||
"window_title": self.window_title,
|
||||
"confidence": round(self.confidence, 3),
|
||||
"capabilities": {
|
||||
"uia": self.uia_available,
|
||||
"cdp": self.cdp_available,
|
||||
"ocr": self.ocr_available,
|
||||
"vlm": self.vlm_available,
|
||||
},
|
||||
"parameters": {
|
||||
"timeout_click_ms": self.timeout_click_ms,
|
||||
"timeout_resolve_ms": self.timeout_resolve_ms,
|
||||
"ocr_threshold": self.ocr_threshold,
|
||||
"template_threshold": self.template_threshold,
|
||||
"max_retries": self.max_retries,
|
||||
"retry_delay_ms": self.retry_delay_ms,
|
||||
},
|
||||
"details": self.details,
|
||||
}
|
||||
|
||||
def resolve_order(self) -> List[str]:
|
||||
"""Construire l'ordre de résolution selon la surface et les capacités."""
|
||||
order = []
|
||||
if self.uia_available and self.surface_type == SurfaceType.WINDOWS_NATIVE:
|
||||
order.append("uia")
|
||||
if self.cdp_available and self.surface_type == SurfaceType.WEB_LOCAL:
|
||||
order.append("dom")
|
||||
order.extend(["ocr", "template", "vlm"])
|
||||
return order
|
||||
|
||||
|
||||
class SurfaceClassifier:
|
||||
"""Détecte la surface et configure les paramètres adaptés.
|
||||
|
||||
Usage :
|
||||
classifier = SurfaceClassifier()
|
||||
profile = classifier.classify(process="notepad.exe", title="Sans titre – Bloc-notes")
|
||||
if profile.uia_available:
|
||||
# Utiliser lea_uia.exe
|
||||
"""
|
||||
|
||||
def __init__(self, uia_helper_path: str = ""):
|
||||
"""
|
||||
Args:
|
||||
uia_helper_path: Chemin vers lea_uia.exe (optionnel, auto-détection sinon)
|
||||
"""
|
||||
self._uia_helper_path = uia_helper_path or self._find_uia_helper()
|
||||
|
||||
def _find_uia_helper(self) -> str:
|
||||
"""Trouver lea_uia.exe dans les emplacements standards."""
|
||||
candidates = [
|
||||
r"C:\Lea\helpers\lea_uia.exe",
|
||||
r".\helpers\lea_uia.exe",
|
||||
os.path.join(os.path.dirname(__file__), "..", "..", "agent_rust", "lea_uia",
|
||||
"target", "x86_64-pc-windows-gnu", "release", "lea_uia.exe"),
|
||||
]
|
||||
for path in candidates:
|
||||
if os.path.isfile(path):
|
||||
return os.path.abspath(path)
|
||||
return ""
|
||||
|
||||
def classify(
|
||||
self,
|
||||
process_name: str = "",
|
||||
window_title: str = "",
|
||||
screen_info: Optional[Dict] = None,
|
||||
) -> SurfaceProfile:
|
||||
"""Classifier une surface depuis le contexte fenêtre.
|
||||
|
||||
Args:
|
||||
process_name: Nom du processus (ex: "notepad.exe")
|
||||
window_title: Titre de la fenêtre active
|
||||
screen_info: Infos écran (résolution, DPI, compression détectée)
|
||||
"""
|
||||
import time
|
||||
|
||||
process_lower = process_name.lower().strip()
|
||||
title_lower = window_title.lower()
|
||||
|
||||
# Détection Citrix — priorité absolue
|
||||
if process_lower in _CITRIX_PROCESSES:
|
||||
return self._build_citrix_profile(process_name, window_title, time.time())
|
||||
|
||||
# Titre Citrix (ex: "Session Citrix", "Citrix Receiver")
|
||||
if any(marker in title_lower for marker in ["citrix", "ica session", "rdp session"]):
|
||||
return self._build_citrix_profile(process_name, window_title, time.time())
|
||||
|
||||
# Navigateur
|
||||
if process_lower in _BROWSER_PROCESSES:
|
||||
# Cas particulier : navigateur qui contient du Citrix embedded
|
||||
if "citrix" in title_lower:
|
||||
return self._build_citrix_profile(process_name, window_title, time.time())
|
||||
return self._build_web_profile(process_name, window_title, time.time())
|
||||
|
||||
# Application Windows native
|
||||
if process_lower.endswith(".exe") and process_lower not in _SYSTEM_PROCESSES:
|
||||
return self._build_windows_profile(process_name, window_title, time.time())
|
||||
|
||||
# Shell Windows (explorer.exe) — compté comme natif
|
||||
if process_lower == "explorer.exe":
|
||||
return self._build_windows_profile(process_name, window_title, time.time())
|
||||
|
||||
# Unknown — fallback sûr
|
||||
return self._build_unknown_profile(process_name, window_title, time.time())
|
||||
|
||||
def _build_citrix_profile(self, process: str, title: str, ts: float) -> SurfaceProfile:
|
||||
"""Profil Citrix — vision pure, paramètres tolérants."""
|
||||
return SurfaceProfile(
|
||||
surface_type=SurfaceType.CITRIX,
|
||||
process_name=process,
|
||||
window_title=title,
|
||||
confidence=0.95,
|
||||
uia_available=False, # UIA n'est pas dispo dans Citrix
|
||||
cdp_available=False,
|
||||
ocr_available=True,
|
||||
vlm_available=True,
|
||||
# Citrix : compression JPEG, latence, retries agressifs
|
||||
timeout_click_ms=15000,
|
||||
timeout_resolve_ms=10000,
|
||||
ocr_threshold=0.65, # Plus tolérant (compression)
|
||||
template_threshold=0.75, # Plus tolérant
|
||||
max_retries=3,
|
||||
retry_delay_ms=3000,
|
||||
detected_at=ts,
|
||||
details={"reason": "citrix_process_or_title"},
|
||||
)
|
||||
|
||||
def _build_windows_profile(self, process: str, title: str, ts: float) -> SurfaceProfile:
|
||||
"""Profil Windows natif — vision + UIA bonus."""
|
||||
uia_ok = self._check_uia_available()
|
||||
return SurfaceProfile(
|
||||
surface_type=SurfaceType.WINDOWS_NATIVE,
|
||||
process_name=process,
|
||||
window_title=title,
|
||||
confidence=0.9,
|
||||
uia_available=uia_ok,
|
||||
cdp_available=False,
|
||||
ocr_available=True,
|
||||
vlm_available=True,
|
||||
timeout_click_ms=8000,
|
||||
timeout_resolve_ms=5000,
|
||||
ocr_threshold=0.75,
|
||||
template_threshold=0.85,
|
||||
max_retries=2,
|
||||
retry_delay_ms=2000,
|
||||
detected_at=ts,
|
||||
details={
|
||||
"reason": "native_windows_process",
|
||||
"uia_helper": self._uia_helper_path if uia_ok else "",
|
||||
},
|
||||
)
|
||||
|
||||
def _build_web_profile(self, process: str, title: str, ts: float) -> SurfaceProfile:
|
||||
"""Profil web local — vision (+ CDP plus tard)."""
|
||||
cdp_ok = self._check_cdp_available()
|
||||
return SurfaceProfile(
|
||||
surface_type=SurfaceType.WEB_LOCAL,
|
||||
process_name=process,
|
||||
window_title=title,
|
||||
confidence=0.9,
|
||||
uia_available=False, # UIA limité pour les navigateurs
|
||||
cdp_available=cdp_ok,
|
||||
ocr_available=True,
|
||||
vlm_available=True,
|
||||
# Web local : rapide, texte bien rendu
|
||||
timeout_click_ms=5000,
|
||||
timeout_resolve_ms=3000,
|
||||
ocr_threshold=0.80,
|
||||
template_threshold=0.88,
|
||||
max_retries=1,
|
||||
retry_delay_ms=1000,
|
||||
detected_at=ts,
|
||||
details={"reason": "browser_process"},
|
||||
)
|
||||
|
||||
def _build_unknown_profile(self, process: str, title: str, ts: float) -> SurfaceProfile:
|
||||
"""Profil inconnu — paramètres sûrs par défaut."""
|
||||
return SurfaceProfile(
|
||||
surface_type=SurfaceType.UNKNOWN,
|
||||
process_name=process,
|
||||
window_title=title,
|
||||
confidence=0.5,
|
||||
uia_available=False,
|
||||
cdp_available=False,
|
||||
ocr_available=True,
|
||||
vlm_available=True,
|
||||
timeout_click_ms=10000,
|
||||
timeout_resolve_ms=5000,
|
||||
ocr_threshold=0.70,
|
||||
template_threshold=0.80,
|
||||
max_retries=2,
|
||||
retry_delay_ms=2000,
|
||||
detected_at=ts,
|
||||
details={"reason": "fallback"},
|
||||
)
|
||||
|
||||
def _check_uia_available(self) -> bool:
|
||||
"""Vérifier que lea_uia.exe est dispo et fonctionnel.
|
||||
|
||||
Sur Windows : appelle `lea_uia.exe health`.
|
||||
Sur Linux : toujours False (stub).
|
||||
"""
|
||||
if platform.system() != "Windows":
|
||||
return False
|
||||
if not self._uia_helper_path or not os.path.isfile(self._uia_helper_path):
|
||||
return False
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[self._uia_helper_path, "health"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
return False
|
||||
import json
|
||||
data = json.loads(result.stdout.strip())
|
||||
return data.get("status") == "ok"
|
||||
except Exception as e:
|
||||
logger.debug(f"UIA health check failed: {e}")
|
||||
return False
|
||||
|
||||
def _check_cdp_available(self) -> bool:
|
||||
"""Vérifier que Chrome DevTools Protocol est accessible.
|
||||
|
||||
Teste la présence d'un endpoint CDP sur localhost:9222.
|
||||
"""
|
||||
try:
|
||||
import urllib.request
|
||||
with urllib.request.urlopen(
|
||||
"http://localhost:9222/json/version", timeout=1
|
||||
) as resp:
|
||||
return resp.status == 200
|
||||
except Exception:
|
||||
return False
|
||||
294
core/workflow/uia_helper.py
Normal file
294
core/workflow/uia_helper.py
Normal file
@@ -0,0 +1,294 @@
|
||||
# core/workflow/uia_helper.py
|
||||
"""
|
||||
UIAHelper — Wrapper Python pour lea_uia.exe (helper Rust UI Automation).
|
||||
|
||||
Expose une API Python simple pour interroger UIA via le binaire Rust.
|
||||
Communique via subprocess + stdin/stdout JSON.
|
||||
|
||||
Pourquoi un helper Rust ?
|
||||
- 5-10x plus rapide que pywinauto (10-20ms vs 50-200ms)
|
||||
- Binaire standalone ~500 Ko, aucune dépendance runtime
|
||||
- Pas de problèmes de threading COM en Python
|
||||
- Crash-safe (le crash du helper n'affecte pas l'agent Python)
|
||||
|
||||
Architecture :
|
||||
Python executor
|
||||
↓ subprocess.run
|
||||
lea_uia.exe query --x 812 --y 436
|
||||
↓ UIA API Windows
|
||||
JSON response
|
||||
↓ stdout
|
||||
Python executor parse JSON
|
||||
|
||||
Si lea_uia.exe n'est pas disponible (Linux, binaire absent, crash) :
|
||||
toutes les méthodes retournent None → fallback vision automatique.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Timeout par défaut pour les appels UIA (en secondes)
|
||||
_DEFAULT_TIMEOUT = 5.0
|
||||
|
||||
# Masquer la fenêtre console lors du spawn de lea_uia.exe sur Windows.
|
||||
# Sans ce flag, chaque appel (à chaque clic utilisateur pendant
|
||||
# l'enregistrement) fait apparaître une fenêtre cmd noire brièvement
|
||||
# visible à l'écran → ralentit la souris et pollue les screenshots
|
||||
# capturés (le VLM peut "voir" le chemin lea_uia.exe comme texte cliqué).
|
||||
#
|
||||
# La valeur 0x08000000 correspond à CREATE_NO_WINDOW défini dans
|
||||
# l'API Windows. Sur Linux/Mac, la valeur est 0 et `creationflags`
|
||||
# est ignoré. getattr() gère le cas où Python expose déjà la constante
|
||||
# sur Windows.
|
||||
if platform.system() == "Windows":
|
||||
_SUBPROCESS_CREATION_FLAGS = getattr(subprocess, "CREATE_NO_WINDOW", 0x08000000)
|
||||
else:
|
||||
_SUBPROCESS_CREATION_FLAGS = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class UiaElement:
|
||||
"""Représentation Python d'un élément UIA."""
|
||||
name: str = ""
|
||||
control_type: str = ""
|
||||
class_name: str = ""
|
||||
automation_id: str = ""
|
||||
bounding_rect: Tuple[int, int, int, int] = (0, 0, 0, 0)
|
||||
is_enabled: bool = False
|
||||
is_offscreen: bool = True
|
||||
parent_path: List[Dict[str, str]] = field(default_factory=list)
|
||||
process_name: str = ""
|
||||
|
||||
def center(self) -> Tuple[int, int]:
|
||||
"""Retourner le centre du rectangle (pixels)."""
|
||||
x1, y1, x2, y2 = self.bounding_rect
|
||||
return ((x1 + x2) // 2, (y1 + y2) // 2)
|
||||
|
||||
def width(self) -> int:
|
||||
return self.bounding_rect[2] - self.bounding_rect[0]
|
||||
|
||||
def height(self) -> int:
|
||||
return self.bounding_rect[3] - self.bounding_rect[1]
|
||||
|
||||
def is_clickable(self) -> bool:
|
||||
"""Peut-on cliquer dessus ?"""
|
||||
return (
|
||||
self.is_enabled
|
||||
and not self.is_offscreen
|
||||
and self.width() > 0
|
||||
and self.height() > 0
|
||||
)
|
||||
|
||||
def path_signature(self) -> str:
|
||||
"""Signature du chemin parent (pour retrouver l'élément)."""
|
||||
parts = [f"{p['control_type']}[{p['name']}]" for p in self.parent_path if p.get("name")]
|
||||
parts.append(f"{self.control_type}[{self.name}]")
|
||||
return " > ".join(parts)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"control_type": self.control_type,
|
||||
"class_name": self.class_name,
|
||||
"automation_id": self.automation_id,
|
||||
"bounding_rect": list(self.bounding_rect),
|
||||
"is_enabled": self.is_enabled,
|
||||
"is_offscreen": self.is_offscreen,
|
||||
"parent_path": self.parent_path,
|
||||
"process_name": self.process_name,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict[str, Any]) -> "UiaElement":
|
||||
rect = d.get("bounding_rect", [0, 0, 0, 0])
|
||||
if isinstance(rect, list) and len(rect) >= 4:
|
||||
rect = tuple(rect[:4])
|
||||
else:
|
||||
rect = (0, 0, 0, 0)
|
||||
return cls(
|
||||
name=d.get("name", ""),
|
||||
control_type=d.get("control_type", ""),
|
||||
class_name=d.get("class_name", ""),
|
||||
automation_id=d.get("automation_id", ""),
|
||||
bounding_rect=rect,
|
||||
is_enabled=d.get("is_enabled", False),
|
||||
is_offscreen=d.get("is_offscreen", True),
|
||||
parent_path=d.get("parent_path", []),
|
||||
process_name=d.get("process_name", ""),
|
||||
)
|
||||
|
||||
|
||||
class UIAHelper:
|
||||
"""Wrapper Python pour lea_uia.exe."""
|
||||
|
||||
def __init__(self, helper_path: str = "", timeout: float = _DEFAULT_TIMEOUT):
|
||||
self._helper_path = helper_path or self._find_helper()
|
||||
self._timeout = timeout
|
||||
self._available = self._check_available()
|
||||
|
||||
def _find_helper(self) -> str:
|
||||
"""Trouver lea_uia.exe dans les emplacements standards."""
|
||||
candidates = [
|
||||
r"C:\Lea\helpers\lea_uia.exe",
|
||||
os.path.join(os.path.dirname(__file__), "..", "..",
|
||||
"agent_rust", "lea_uia", "target",
|
||||
"x86_64-pc-windows-gnu", "release", "lea_uia.exe"),
|
||||
"./helpers/lea_uia.exe",
|
||||
"lea_uia.exe",
|
||||
]
|
||||
for path in candidates:
|
||||
if os.path.isfile(path):
|
||||
return os.path.abspath(path)
|
||||
return ""
|
||||
|
||||
def _check_available(self) -> bool:
|
||||
"""Vérifier que le helper est utilisable (Windows + binaire + health OK)."""
|
||||
if platform.system() != "Windows":
|
||||
logger.debug("UIAHelper: Linux/Mac — helper désactivé")
|
||||
return False
|
||||
if not self._helper_path:
|
||||
logger.debug("UIAHelper: lea_uia.exe introuvable")
|
||||
return False
|
||||
if not os.path.isfile(self._helper_path):
|
||||
logger.debug(f"UIAHelper: chemin invalide {self._helper_path}")
|
||||
return False
|
||||
return True
|
||||
|
||||
@property
|
||||
def available(self) -> bool:
|
||||
return self._available
|
||||
|
||||
@property
|
||||
def helper_path(self) -> str:
|
||||
return self._helper_path
|
||||
|
||||
def _run(self, args: List[str]) -> Optional[Dict[str, Any]]:
|
||||
"""Exécuter lea_uia.exe avec les arguments et parser le JSON."""
|
||||
if not self._available:
|
||||
return None
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[self._helper_path] + args,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=self._timeout,
|
||||
encoding="utf-8",
|
||||
errors="replace",
|
||||
creationflags=_SUBPROCESS_CREATION_FLAGS,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
logger.debug(
|
||||
f"UIAHelper: exit code {result.returncode}, "
|
||||
f"stderr: {result.stderr[:200]}"
|
||||
)
|
||||
return None
|
||||
output = result.stdout.strip()
|
||||
if not output:
|
||||
return None
|
||||
return json.loads(output)
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.debug(f"UIAHelper: timeout ({self._timeout}s) sur {args}")
|
||||
return None
|
||||
except json.JSONDecodeError as e:
|
||||
logger.debug(f"UIAHelper: JSON invalide — {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.debug(f"UIAHelper: erreur {e}")
|
||||
return None
|
||||
|
||||
def health(self) -> bool:
|
||||
"""Vérifier que UIA répond."""
|
||||
data = self._run(["health"])
|
||||
return data is not None and data.get("status") == "ok"
|
||||
|
||||
def query_at(
|
||||
self,
|
||||
x: int,
|
||||
y: int,
|
||||
with_parents: bool = True,
|
||||
) -> Optional[UiaElement]:
|
||||
"""Récupérer l'élément UIA à une position écran.
|
||||
|
||||
Args:
|
||||
x, y: Coordonnées pixel absolues
|
||||
with_parents: Inclure la hiérarchie des parents
|
||||
|
||||
Returns:
|
||||
UiaElement si trouvé, None sinon (pas d'élément ou UIA indispo)
|
||||
"""
|
||||
args = ["query", "--x", str(x), "--y", str(y)]
|
||||
if not with_parents:
|
||||
args.append("--with-parents=false")
|
||||
|
||||
data = self._run(args)
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
def find_by_name(
|
||||
self,
|
||||
name: str,
|
||||
control_type: Optional[str] = None,
|
||||
automation_id: Optional[str] = None,
|
||||
window: Optional[str] = None,
|
||||
timeout_ms: int = 2000,
|
||||
) -> Optional[UiaElement]:
|
||||
"""Rechercher un élément par son nom (+ filtres optionnels).
|
||||
|
||||
Args:
|
||||
name: Nom exact de l'élément
|
||||
control_type: Type de contrôle (Button, Edit, MenuItem...)
|
||||
automation_id: ID d'automation
|
||||
window: Restreindre à une fenêtre spécifique
|
||||
timeout_ms: Timeout de recherche en millisecondes
|
||||
"""
|
||||
args = ["find", "--name", name, "--timeout-ms", str(timeout_ms)]
|
||||
if control_type:
|
||||
args.extend(["--control-type", control_type])
|
||||
if automation_id:
|
||||
args.extend(["--automation-id", automation_id])
|
||||
if window:
|
||||
args.extend(["--window", window])
|
||||
|
||||
data = self._run(args)
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
def capture_focused(self, max_depth: int = 3) -> Optional[UiaElement]:
|
||||
"""Capturer l'élément ayant le focus + son contexte."""
|
||||
data = self._run(["capture", "--max-depth", str(max_depth)])
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
|
||||
# Instance globale partagée (singleton léger)
|
||||
_SHARED_HELPER: Optional[UIAHelper] = None
|
||||
|
||||
|
||||
def get_shared_helper() -> UIAHelper:
|
||||
"""Retourner une instance partagée de UIAHelper."""
|
||||
global _SHARED_HELPER
|
||||
if _SHARED_HELPER is None:
|
||||
_SHARED_HELPER = UIAHelper()
|
||||
return _SHARED_HELPER
|
||||
278
core/workflow/workflow_ir.py
Normal file
278
core/workflow/workflow_ir.py
Normal file
@@ -0,0 +1,278 @@
|
||||
# core/workflow/workflow_ir.py
|
||||
"""
|
||||
WorkflowIR — Représentation Intermédiaire d'un workflow.
|
||||
|
||||
C'est la CONNAISSANCE que Léa a acquise en observant un utilisateur.
|
||||
Pas les clics bruts (RawTrace), pas le plan d'exécution (ExecutionPlan).
|
||||
C'est ce que Léa a COMPRIS.
|
||||
|
||||
Format générique — fonctionne pour n'importe quel métier :
|
||||
- TIM qui code des dossiers patients
|
||||
- Comptable qui saisit des factures
|
||||
- RH qui édite des fiches de paie
|
||||
- Logisticien qui gère des stocks
|
||||
|
||||
Le domaine métier est une couche par-dessus (domain_context),
|
||||
pas dans le WorkflowIR lui-même.
|
||||
|
||||
Cycle de vie :
|
||||
RawTrace (capture) → WorkflowIR (compréhension) → ExecutionPlan (exécution)
|
||||
|
||||
Le WorkflowIR est :
|
||||
- versionné (chaque recompilation incrémente la version)
|
||||
- indépendant de la résolution d'écran
|
||||
- indépendant du poste cible
|
||||
- paramétrable (variables substituables)
|
||||
- enrichi par l'apprentissage (chaque replay améliore le IR)
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Structures de données
|
||||
# =========================================================================
|
||||
|
||||
|
||||
@dataclass
|
||||
class Variable:
|
||||
"""Variable substituable dans un workflow."""
|
||||
name: str # Identifiant (ex: "patient", "facture_num")
|
||||
description: str = "" # Description humaine
|
||||
source: str = "user" # Origine : "user", "screen", "file", "previous_step"
|
||||
default: str = "" # Valeur par défaut
|
||||
required: bool = True
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"source": self.source,
|
||||
"default": self.default,
|
||||
"required": self.required,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict) -> "Variable":
|
||||
return cls(**{k: v for k, v in d.items() if k in cls.__dataclass_fields__})
|
||||
|
||||
|
||||
@dataclass
|
||||
class Action:
|
||||
"""Action élémentaire dans une étape."""
|
||||
type: str # click, type, key_combo, wait, scroll
|
||||
target: str = "" # Description de la cible ("bouton Enregistrer")
|
||||
text: str = "" # Texte à taper (pour type)
|
||||
keys: List[str] = field(default_factory=list) # Touches (pour key_combo)
|
||||
duration_ms: int = 0 # Durée (pour wait)
|
||||
variable: bool = False # True si le texte contient une variable {var}
|
||||
anchor_hint: str = "" # Indice visuel pour aider la résolution
|
||||
# Contrôle strict des étapes — l'action ne peut s'exécuter que si la fenêtre
|
||||
# active correspond à `expected_window_before`, et ne peut passer à la
|
||||
# suivante que si la fenêtre résultante correspond à `expected_window_after`.
|
||||
# Ces champs sont extraits par l'IRBuilder depuis les événements bruts.
|
||||
expected_window_before: str = ""
|
||||
expected_window_after: str = ""
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
d = {"type": self.type}
|
||||
if self.target:
|
||||
d["target"] = self.target
|
||||
if self.text:
|
||||
d["text"] = self.text
|
||||
if self.keys:
|
||||
d["keys"] = self.keys
|
||||
if self.duration_ms:
|
||||
d["duration_ms"] = self.duration_ms
|
||||
if self.variable:
|
||||
d["variable"] = True
|
||||
if self.anchor_hint:
|
||||
d["anchor_hint"] = self.anchor_hint
|
||||
if self.expected_window_before:
|
||||
d["expected_window_before"] = self.expected_window_before
|
||||
if self.expected_window_after:
|
||||
d["expected_window_after"] = self.expected_window_after
|
||||
return d
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict) -> "Action":
|
||||
return cls(**{k: v for k, v in d.items() if k in cls.__dataclass_fields__})
|
||||
|
||||
|
||||
@dataclass
|
||||
class Step:
|
||||
"""Étape logique d'un workflow — une intention métier."""
|
||||
step_id: str
|
||||
intent: str # "Ouvrir le dossier", "Saisir le code"
|
||||
precondition: str = "" # "L'application est sur l'écran de liste"
|
||||
postcondition: str = "" # "Le dossier est affiché"
|
||||
actions: List[Action] = field(default_factory=list)
|
||||
is_optional: bool = False # Étape optionnelle (peut être sautée)
|
||||
is_loop: bool = False # Étape répétée (pour chaque élément)
|
||||
loop_variable: str = "" # Variable de boucle
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
d = {
|
||||
"step_id": self.step_id,
|
||||
"intent": self.intent,
|
||||
"actions": [a.to_dict() for a in self.actions],
|
||||
}
|
||||
if self.precondition:
|
||||
d["precondition"] = self.precondition
|
||||
if self.postcondition:
|
||||
d["postcondition"] = self.postcondition
|
||||
if self.is_optional:
|
||||
d["is_optional"] = True
|
||||
if self.is_loop:
|
||||
d["is_loop"] = True
|
||||
d["loop_variable"] = self.loop_variable
|
||||
return d
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict) -> "Step":
|
||||
actions = [Action.from_dict(a) for a in d.get("actions", [])]
|
||||
return cls(
|
||||
step_id=d["step_id"],
|
||||
intent=d.get("intent", ""),
|
||||
precondition=d.get("precondition", ""),
|
||||
postcondition=d.get("postcondition", ""),
|
||||
actions=actions,
|
||||
is_optional=d.get("is_optional", False),
|
||||
is_loop=d.get("is_loop", False),
|
||||
loop_variable=d.get("loop_variable", ""),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class WorkflowIR:
|
||||
"""Représentation Intermédiaire d'un workflow — la connaissance compilée.
|
||||
|
||||
C'est ce que Léa a compris en observant l'utilisateur.
|
||||
Indépendant du poste, de la résolution, du runtime.
|
||||
"""
|
||||
workflow_id: str
|
||||
version: int = 1
|
||||
name: str = ""
|
||||
description: str = ""
|
||||
domain: str = "generic" # Domaine métier (tim_codage, compta, rh, stocks...)
|
||||
learned_from: str = "" # session_id source
|
||||
created_at: float = 0.0
|
||||
updated_at: float = 0.0
|
||||
|
||||
# Contenu
|
||||
variables: List[Variable] = field(default_factory=list)
|
||||
steps: List[Step] = field(default_factory=list)
|
||||
|
||||
# Métadonnées d'apprentissage
|
||||
replay_count: int = 0 # Nombre de replays effectués
|
||||
success_rate: float = 0.0 # Taux de succès moyen
|
||||
last_replay_at: float = 0.0
|
||||
|
||||
# Applications utilisées (détectées lors de l'apprentissage)
|
||||
applications: List[str] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"workflow_id": self.workflow_id,
|
||||
"version": self.version,
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"domain": self.domain,
|
||||
"learned_from": self.learned_from,
|
||||
"created_at": self.created_at,
|
||||
"updated_at": self.updated_at,
|
||||
"variables": [v.to_dict() for v in self.variables],
|
||||
"steps": [s.to_dict() for s in self.steps],
|
||||
"replay_count": self.replay_count,
|
||||
"success_rate": round(self.success_rate, 3),
|
||||
"last_replay_at": self.last_replay_at,
|
||||
"applications": self.applications,
|
||||
}
|
||||
|
||||
def to_json(self, indent: int = 2) -> str:
|
||||
return json.dumps(self.to_dict(), ensure_ascii=False, indent=indent)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict) -> "WorkflowIR":
|
||||
variables = [Variable.from_dict(v) for v in d.get("variables", [])]
|
||||
steps = [Step.from_dict(s) for s in d.get("steps", [])]
|
||||
return cls(
|
||||
workflow_id=d["workflow_id"],
|
||||
version=d.get("version", 1),
|
||||
name=d.get("name", ""),
|
||||
description=d.get("description", ""),
|
||||
domain=d.get("domain", "generic"),
|
||||
learned_from=d.get("learned_from", ""),
|
||||
created_at=d.get("created_at", 0),
|
||||
updated_at=d.get("updated_at", 0),
|
||||
variables=variables,
|
||||
steps=steps,
|
||||
replay_count=d.get("replay_count", 0),
|
||||
success_rate=d.get("success_rate", 0),
|
||||
last_replay_at=d.get("last_replay_at", 0),
|
||||
applications=d.get("applications", []),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, json_str: str) -> "WorkflowIR":
|
||||
return cls.from_dict(json.loads(json_str))
|
||||
|
||||
def save(self, directory: str) -> Path:
|
||||
"""Sauvegarder le WorkflowIR dans un fichier JSON."""
|
||||
dir_path = Path(directory)
|
||||
dir_path.mkdir(parents=True, exist_ok=True)
|
||||
file_path = dir_path / f"{self.workflow_id}_v{self.version}.json"
|
||||
file_path.write_text(self.to_json(), encoding="utf-8")
|
||||
logger.info(f"WorkflowIR sauvegardé : {file_path}")
|
||||
return file_path
|
||||
|
||||
@classmethod
|
||||
def load(cls, file_path: str) -> "WorkflowIR":
|
||||
"""Charger un WorkflowIR depuis un fichier JSON."""
|
||||
return cls.from_json(Path(file_path).read_text(encoding="utf-8"))
|
||||
|
||||
def increment_version(self) -> "WorkflowIR":
|
||||
"""Créer une nouvelle version du workflow (après recompilation)."""
|
||||
import copy
|
||||
new = copy.deepcopy(self)
|
||||
new.version += 1
|
||||
new.updated_at = time.time()
|
||||
return new
|
||||
|
||||
def add_step(self, intent: str, actions: List[Dict] = None, **kwargs) -> Step:
|
||||
"""Ajouter une étape au workflow."""
|
||||
step = Step(
|
||||
step_id=f"s{len(self.steps) + 1}",
|
||||
intent=intent,
|
||||
actions=[Action.from_dict(a) for a in (actions or [])],
|
||||
**kwargs,
|
||||
)
|
||||
self.steps.append(step)
|
||||
return step
|
||||
|
||||
def add_variable(self, name: str, **kwargs) -> Variable:
|
||||
"""Ajouter une variable au workflow."""
|
||||
var = Variable(name=name, **kwargs)
|
||||
self.variables.append(var)
|
||||
return var
|
||||
|
||||
@staticmethod
|
||||
def new(name: str, domain: str = "generic", learned_from: str = "") -> "WorkflowIR":
|
||||
"""Créer un nouveau WorkflowIR vide."""
|
||||
return WorkflowIR(
|
||||
workflow_id=f"wf_{uuid.uuid4().hex[:12]}",
|
||||
version=1,
|
||||
name=name,
|
||||
domain=domain,
|
||||
learned_from=learned_from,
|
||||
created_at=time.time(),
|
||||
updated_at=time.time(),
|
||||
)
|
||||
@@ -51,10 +51,14 @@ echo Pour arreter Lea : clic droit sur l'icone ^> "Quitter Lea"
|
||||
echo Vous pouvez fermer cette fenetre.
|
||||
echo.
|
||||
|
||||
.venv\Scripts\pythonw.exe run_agent_v1.py
|
||||
start "" /b .venv\Scripts\pythonw.exe run_agent_v1.py
|
||||
|
||||
:: Attendre 3s puis verifier que Lea tourne
|
||||
timeout /t 3 >nul
|
||||
tasklist /FI "IMAGENAME eq pythonw.exe" /NH 2>nul | findstr /I "pythonw" >nul
|
||||
if errorlevel 1 (
|
||||
echo.
|
||||
echo Lea a rencontre un probleme au demarrage.
|
||||
echo Lea n'a pas demarre correctement.
|
||||
echo Tentative avec affichage des erreurs...
|
||||
echo.
|
||||
.venv\Scripts\python.exe run_agent_v1.py
|
||||
|
||||
220
docs/PLAN_APPRENTISSAGE_LEA.md
Normal file
220
docs/PLAN_APPRENTISSAGE_LEA.md
Normal file
@@ -0,0 +1,220 @@
|
||||
# Plan Apprentissage Léa — Phase 1 / 2 / 3
|
||||
|
||||
**Date** : 10 avril 2026
|
||||
**Auteur** : Dom + Claude (session cartographie target_resolver)
|
||||
**Statut** : Plan validé par Dom, implémentation non commencée
|
||||
|
||||
---
|
||||
|
||||
## Contexte
|
||||
|
||||
Après deux semaines à debugger le replay sur Windows et avoir écrit du code (V4 : surface_classifier, UIA, execution_plan, executor strict) qui **dupliquait sans le savoir** des concepts déjà présents dans le V3 legacy, une cartographie exhaustive a été lancée.
|
||||
|
||||
Fichiers lus en profondeur :
|
||||
- `core/execution/target_resolver.py` (3495 lignes)
|
||||
- `core/learning/target_memory_store.py` (545 lignes — Fiche #18)
|
||||
- `core/models/workflow_graph.py` (TargetSpec — 570-640)
|
||||
- `core/detection/spatial_analyzer.py` (595 lignes)
|
||||
|
||||
## Découverte critique
|
||||
|
||||
**Les pipelines V3 et V4 sont complètement découplés au runtime de replay.**
|
||||
|
||||
```
|
||||
REPLAY V4 (actif aujourd'hui) LEGACY V3 (dormant au replay)
|
||||
============================= =============================
|
||||
stream_processor workflow_pipeline
|
||||
↓ ↓
|
||||
execution_plan_runner execution_loop
|
||||
↓ ↓
|
||||
agent_v1/core/executor.py action_executor
|
||||
↓ ↓
|
||||
OCR + template + VLM direct target_resolver
|
||||
↓
|
||||
target_memory_store (Fiche #18)
|
||||
↓
|
||||
SpatialAnalyzer
|
||||
```
|
||||
|
||||
Vérifié par `grep "from core.execution" agent_v0/` → **zéro import**.
|
||||
|
||||
Callers V3 encore vivants (mais pas sur le chemin de replay critique) :
|
||||
- `agent_chat/app.py`
|
||||
- `visual_workflow_builder/backend/api/workflows.py`
|
||||
- `core/evaluation/*`
|
||||
|
||||
## Modules dormants à valeur immédiate
|
||||
|
||||
### TargetMemoryStore — le Crystallizer qu'on pensait devoir écrire
|
||||
|
||||
- SQLite `data/learning/target_memory.db` + JSONL audit `data/learning/events/YYYY-MM-DD/*.jsonl`
|
||||
- API propre et testée :
|
||||
- `record_success(screen_sig, target_spec, fingerprint, strategy, confidence)`
|
||||
- `record_failure(screen_sig, target_spec, error)`
|
||||
- `lookup(screen_sig, target_spec, min_success_count=2, max_fail_ratio=0.3)` → fingerprint ou None
|
||||
- Clé unique : `(screen_signature, target_spec_hash)`
|
||||
- Fingerprint : `(element_id, bbox, role, etype, label, confidence)`
|
||||
- **Critère de fiabilité** : au moins 2 succès et < 30% d'échecs → c'est ça la "cristallisation par répétition"
|
||||
|
||||
### TargetSpec — vocabulaire déjà riche
|
||||
|
||||
Dans `core/models/workflow_graph.py:572` :
|
||||
- `context_hints` : `near_text`, `below_text`, `right_of_text`, `same_row_as_text`, `within_region`, `exclude_near_text`
|
||||
- `hard_constraints` : `within_container_text`, `min_area`
|
||||
- `weights` : `proximity`, `alignment`, `container`, `roi_iou`
|
||||
|
||||
### ResolutionStrategy V4 — vocabulaire pauvre (à enrichir)
|
||||
|
||||
Dans `core/workflow/execution_plan.py:27` :
|
||||
- `target_text`, `anchor_b64`, `zone`, `vlm_description`, `uia_*`, `dom_*`
|
||||
- Pas de context_hints, pas de hard_constraints → trou dans l'expressivité
|
||||
|
||||
## Décision validée
|
||||
|
||||
**Léa = stagiaire qui apprend de la répétition.** La mémoire précède la généralisation. Mais le raisonnement spatial reste indispensable comme filet de sécurité quand la mémoire ne suffit pas (décalages de layout, premier replay sur nouvel écran, généralisation entre écrans similaires).
|
||||
|
||||
## Plan séquencé
|
||||
|
||||
### Phase 1 — Mémoire sur V4 (≈1 jour, ~150 lignes)
|
||||
|
||||
**Objectif** : greffer `TargetMemoryStore` directement sur le resolve V4, sans passer par target_resolver ni UIElement.
|
||||
|
||||
**Lookup avant OCR/template/VLM**
|
||||
```python
|
||||
fp = memory.lookup(screen_sig, target_spec)
|
||||
if fp:
|
||||
# On a vu ce clic réussir ≥2 fois sur cet écran
|
||||
return fp.bbox # clic direct, <10ms
|
||||
```
|
||||
|
||||
**Record après validation post-condition (déjà en place — `title_match` strict)**
|
||||
```python
|
||||
if post_condition_passed:
|
||||
memory.record_success(screen_sig, target_spec, fingerprint, "v4_ocr", confidence)
|
||||
else:
|
||||
memory.record_failure(screen_sig, target_spec, reason)
|
||||
```
|
||||
|
||||
**À construire**
|
||||
- `screen_signature(screenshot)` → hash stable. Piste : `window_title` + tokens OCR dominants, ou réutiliser `core/execution/screen_signature.py` si compatible.
|
||||
- Fingerprint léger : `(x, y, w, h, method)`. Pas besoin de role/type/label en V4.
|
||||
- Point de branchement exact à confirmer avant implémentation :
|
||||
- Côté serveur dans `resolve_engine` (si resolve serveur)
|
||||
- Côté agent dans `agent_v1/core/executor.py` (si resolve local)
|
||||
|
||||
**Bénéfice observable**
|
||||
- 3ème passage d'un workflow sur même écran : 10-15s VLM remplacés par <10ms lookup
|
||||
- Léa **apprend** vraiment — pas parce qu'on a écrit un Crystallizer, parce qu'on a consommé celui qui dort depuis mars
|
||||
|
||||
**Tests de validation**
|
||||
- [ ] Rejouer un workflow 3 fois, mesurer le temps du 3ème passage
|
||||
- [ ] Vérifier que `data/learning/target_memory.db` se remplit
|
||||
- [ ] Vérifier que les événements JSONL s'écrivent
|
||||
|
||||
### Phase 2 light — Raisonnement spatial OCR-only (≈3-5 jours, ~300-400 lignes)
|
||||
|
||||
**Principe clé** : pur pixel/OCR. Pas d'`UIElement`, pas de role/type, pas de parser UI. On évite le piège "ressusciter V3 complet".
|
||||
|
||||
**À l'enregistrement (IRBuilder, côté serveur)**
|
||||
1. Pour chaque clic `(x, y)` dans la trace
|
||||
2. OCR la zone autour (±300px)
|
||||
3. Identifier les 3-5 textes les plus proches avec direction (left/right/above/below) et distance
|
||||
4. Populer `ResolutionStrategy.context_hints` :
|
||||
```python
|
||||
{
|
||||
"right_of_text": "Nom du patient", # 60px à gauche du clic
|
||||
"below_text": "Identité", # 120px au-dessus
|
||||
"near_text": "Enregistrer", # le texte du clic lui-même
|
||||
}
|
||||
```
|
||||
|
||||
**Au replay (resolve_engine)**, en cascade :
|
||||
1. Lookup mémoire (Phase 1) → si hit, clic direct
|
||||
2. Sinon : OCR de l'écran actuel
|
||||
3. Trouver les ancres de `context_hints` via OCR (normalisation accents + fuzzy Fiche #8)
|
||||
4. Calculer la zone candidate par intersection des contraintes spatiales
|
||||
5. Cliquer
|
||||
6. Si post-cond échoue : retombée VLM (exception handler)
|
||||
|
||||
**Logique à porter depuis target_resolver.py**
|
||||
- `_apply_context_hints_to_candidates` (lignes 2601-2803) — adaptée à "candidats = zones OCR" au lieu de "candidats = UIElement"
|
||||
- `_find_element_by_text` + normalisation (`_norm_text`, `_fuzzy_ratio`) lignes 211-235
|
||||
- Healing profile (ligne 395) pour relaxation progressive
|
||||
|
||||
**Décision tranchée**
|
||||
- OCR **côté serveur Linux** (docTR déjà présent via SomEngine)
|
||||
- Zéro changement sur le client Windows
|
||||
- Le serveur reçoit le screenshot au moment du build IR, extrait les context_hints, les intègre dans `ResolutionStrategy`
|
||||
|
||||
**Enrichissement de `ResolutionStrategy` (execution_plan.py)**
|
||||
Ajouter au dataclass :
|
||||
```python
|
||||
context_hints: Dict[str, Any] = field(default_factory=dict)
|
||||
```
|
||||
|
||||
Et dans `execution_plan_runner._strategy_to_target_spec` : propager `context_hints` dans `target_spec`.
|
||||
|
||||
**Tests de validation**
|
||||
- [ ] Enregistrer un workflow, vérifier que le plan contient des `context_hints` cohérents
|
||||
- [ ] Modifier la résolution de la VM (1920→1280), rejouer, vérifier que les clics atteignent la bonne cible
|
||||
- [ ] Ajouter un champ au-dessus de la cible, rejouer, vérifier robustesse
|
||||
|
||||
### Phase 3 — Spatial V3 complet (pas maintenant)
|
||||
|
||||
**Correction 10 avril 2026** : une version précédente de ce document affirmait qu'OmniParser avait été retiré. **C'était faux.** OmniParser est toujours présent :
|
||||
- `core/detection/omniparser_adapter.py` — 429 lignes
|
||||
- `agent_v0/server_v1/resolve_engine.py:254` — `_get_omniparser()` singleton thread-safe, lazy-load
|
||||
- `agent_v0/server_v1/resolve_engine.py:293` — `_resolve_by_yolo()` défini et importé dans `api_stream.py`
|
||||
|
||||
Ce qui est vrai : `_resolve_by_yolo` **n'est jamais appelé** dans la cascade V4 (`_resolve_target_sync` ne l'invoque pas). C'est du code **dormant**, pas supprimé.
|
||||
|
||||
**Conséquence pour Phase 3** : on a potentiellement **déjà** un parser UI utilisable. Deux pistes :
|
||||
1. **Ré-activer `_resolve_by_yolo`** dans la cascade V4 (injecter un appel dans `_resolve_target_sync` comme fallback après OCR/template/VLM). Il produit déjà une liste d'éléments détectés avec bbox et role approximatif.
|
||||
2. **Pont `_resolve_by_yolo → List[UIElement]`** : adapter la sortie YOLO pour alimenter `target_resolver` V3. Un pont d'une centaine de lignes devrait suffire.
|
||||
|
||||
**Avant de lancer Phase 3**, vérifier :
|
||||
- Les modèles YOLO sont-ils toujours sur disque ? (`omniparser.detect()` lazy-loads)
|
||||
- Quelle qualité de détection sur des écrans Citrix/DPI réels ?
|
||||
- Les tests `tests/integration/test_auto_healing_integration.py` et `tests/unit/test_fiche11_*` passent-ils encore ?
|
||||
|
||||
**Tant qu'on n'a pas fait cette vérification, Phase 3 reste pending.**
|
||||
|
||||
## Ce qu'on ne fait PAS
|
||||
|
||||
| Tentation | Pourquoi on résiste |
|
||||
|-----------|---------------------|
|
||||
| Refactorer `target_resolver.py` pour le rendre V4-compatible | 3495 lignes couplées à `UIElement` disparu — plus économique de le laisser dormir et recoder l'essentiel minimal dans V4 |
|
||||
| Brancher `action_executor` sur le streaming replay | 2000 lignes de pipeline pour un bénéfice qu'on a en 150 lignes avec TargetMemoryStore seul |
|
||||
| Ressusciter `SpatialAnalyzer` maintenant | Zéro valeur sans `UIElement` riches en amont |
|
||||
| Faire Phase 2 avant Phase 1 | Léa raisonnerait à chaque clic, lent et coûteux — pas un "stagiaire qui apprend", juste un agent qui réfléchit en boucle |
|
||||
|
||||
## Suivi d'avancement
|
||||
|
||||
### Phase 1 — Mémoire sur V4
|
||||
- [ ] Identifier le point de branchement exact (serveur vs agent)
|
||||
- [ ] Définir `screen_signature` stable pour V4
|
||||
- [ ] Définir le format fingerprint léger
|
||||
- [ ] Brancher `memory.lookup()` avant cascade OCR/template/VLM
|
||||
- [ ] Brancher `memory.record_success()` après post-cond validée
|
||||
- [ ] Brancher `memory.record_failure()` sur échec
|
||||
- [ ] Test : workflow rejoué 3 fois, 3ème en <100ms sur le resolve
|
||||
- [ ] Vérifier remplissage de `data/learning/target_memory.db`
|
||||
|
||||
### Phase 2 light — Spatial OCR-only
|
||||
- [ ] Enrichir `ResolutionStrategy` avec `context_hints`
|
||||
- [ ] IRBuilder : extraire context_hints via OCR au build
|
||||
- [ ] `execution_plan_runner` : propager context_hints dans target_spec
|
||||
- [ ] resolve_engine : implémenter fallback spatial OCR
|
||||
- [ ] Porter `_apply_context_hints_to_candidates` adapté
|
||||
- [ ] Porter normalisation texte (`_norm_text`, `_fuzzy_ratio`)
|
||||
- [ ] Test : résolution VM modifiée, clic atteint toujours la cible
|
||||
- [ ] Test : champ ajouté dans le formulaire, robustesse préservée
|
||||
|
||||
### Phase 3 — Spatial V3 complet
|
||||
- [ ] **BLOQUÉ** jusqu'à ce qu'un parser UI produise des `UIElement`
|
||||
|
||||
## Liens
|
||||
|
||||
- Code de référence : `core/execution/target_resolver.py`, `core/learning/target_memory_store.py`
|
||||
- Architecture V4 : `core/workflow/execution_plan.py`, `core/workflow/execution_compiler.py`, `agent_v0/server_v1/execution_plan_runner.py`
|
||||
- Replay runtime : `agent_v0/agent_v1/core/executor.py`
|
||||
441
tests/unit/test_chat_interface.py
Normal file
441
tests/unit/test_chat_interface.py
Normal file
@@ -0,0 +1,441 @@
|
||||
# tests/unit/test_chat_interface.py
|
||||
"""
|
||||
Tests unitaires du module chat_interface (Léa conversationnelle).
|
||||
|
||||
Vérifie :
|
||||
1. Création de session (état initial, message d'accueil)
|
||||
2. Envoi de message → appel TaskPlanner mocké
|
||||
3. Historique (get_history)
|
||||
4. Transitions d'états idle → planning → awaiting_confirmation → executing → done
|
||||
5. Abandon (utilisateur répond "non")
|
||||
6. Fallback gracieux quand gemma4/TaskPlanner indisponible
|
||||
7. ChatManager (création, listing, cleanup)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
_ROOT = str(Path(__file__).resolve().parents[2])
|
||||
if _ROOT not in sys.path:
|
||||
sys.path.insert(0, _ROOT)
|
||||
|
||||
from agent_v0.server_v1.chat_interface import (
|
||||
ChatSession,
|
||||
ChatManager,
|
||||
STATE_IDLE,
|
||||
STATE_PLANNING,
|
||||
STATE_AWAITING_CONFIRMATION,
|
||||
STATE_EXECUTING,
|
||||
STATE_DONE,
|
||||
STATE_ERROR,
|
||||
ROLE_USER,
|
||||
ROLE_LEA,
|
||||
)
|
||||
from agent_v0.server_v1.task_planner import TaskPlan
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Fixtures
|
||||
# =============================================================================
|
||||
|
||||
@pytest.fixture
|
||||
def sample_workflows():
|
||||
return [
|
||||
{
|
||||
"session_id": "sess_bloc_notes",
|
||||
"name": "Bloc-notes",
|
||||
"description": "Ouvrir Bloc-notes via Exécuter (Win+R) et écrire du texte",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def understood_plan():
|
||||
"""TaskPlan qui a compris l'ordre et matche un workflow."""
|
||||
return TaskPlan(
|
||||
instruction="ouvre le bloc-notes et écris bonjour",
|
||||
understood=True,
|
||||
workflow_match="sess_bloc_notes",
|
||||
workflow_name="Bloc-notes",
|
||||
match_confidence=0.9,
|
||||
parameters={"texte": "bonjour"},
|
||||
is_loop=False,
|
||||
mode="replay",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def unknown_plan():
|
||||
"""TaskPlan qui n'a pas compris."""
|
||||
return TaskPlan(
|
||||
instruction="fais le café",
|
||||
understood=False,
|
||||
error="aucun workflow ne correspond",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_task_planner(understood_plan):
|
||||
planner = MagicMock()
|
||||
planner.understand.return_value = understood_plan
|
||||
return planner
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_replay_callback():
|
||||
return MagicMock(return_value="replay_abc123")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_status_provider():
|
||||
"""Retourne un dict par défaut 'running' — peut être modifié dans les tests."""
|
||||
return MagicMock(return_value={
|
||||
"status": "running",
|
||||
"completed_actions": 1,
|
||||
"total_actions": 5,
|
||||
})
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def session(mock_task_planner, sample_workflows, mock_replay_callback, mock_status_provider):
|
||||
return ChatSession(
|
||||
task_planner=mock_task_planner,
|
||||
workflows_provider=lambda: sample_workflows,
|
||||
replay_callback=mock_replay_callback,
|
||||
status_provider=mock_status_provider,
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests création session
|
||||
# =============================================================================
|
||||
|
||||
class TestSessionCreation:
|
||||
def test_session_id_generated(self):
|
||||
s = ChatSession()
|
||||
assert s.session_id.startswith("chat_")
|
||||
|
||||
def test_initial_state_is_idle(self):
|
||||
s = ChatSession()
|
||||
assert s.state == STATE_IDLE
|
||||
|
||||
def test_welcome_message_present(self):
|
||||
s = ChatSession()
|
||||
history = s.get_history()
|
||||
assert len(history) == 1
|
||||
assert history[0]["role"] == ROLE_LEA
|
||||
assert "Bonjour" in history[0]["content"] or "Léa" in history[0]["content"]
|
||||
|
||||
def test_session_id_custom(self):
|
||||
s = ChatSession(session_id="custom_42")
|
||||
assert s.session_id == "custom_42"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests send_message
|
||||
# =============================================================================
|
||||
|
||||
class TestSendMessage:
|
||||
def test_empty_message_rejected(self, session):
|
||||
result = session.send_message("")
|
||||
assert result["ok"] is False
|
||||
|
||||
def test_send_message_calls_planner(self, session, mock_task_planner):
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
mock_task_planner.understand.assert_called_once()
|
||||
call = mock_task_planner.understand.call_args
|
||||
assert call.kwargs["instruction"] == "ouvre le bloc-notes"
|
||||
# workflows_provider a été appelé et passé
|
||||
assert "available_workflows" in call.kwargs
|
||||
assert len(call.kwargs["available_workflows"]) == 1
|
||||
|
||||
def test_send_message_transitions_to_awaiting_confirmation(self, session):
|
||||
result = session.send_message("ouvre le bloc-notes")
|
||||
assert result["ok"] is True
|
||||
assert session.state == STATE_AWAITING_CONFIRMATION
|
||||
assert result["state"] == STATE_AWAITING_CONFIRMATION
|
||||
|
||||
def test_user_message_added_to_history(self, session):
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
history = session.get_history()
|
||||
user_msgs = [m for m in history if m["role"] == ROLE_USER]
|
||||
assert len(user_msgs) == 1
|
||||
assert user_msgs[0]["content"] == "ouvre le bloc-notes"
|
||||
|
||||
def test_lea_proposal_added_to_history(self, session):
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
history = session.get_history()
|
||||
lea_msgs = [m for m in history if m["role"] == ROLE_LEA]
|
||||
# Bienvenue + proposition
|
||||
assert len(lea_msgs) == 2
|
||||
proposal = lea_msgs[-1]["content"]
|
||||
assert "Bloc-notes" in proposal
|
||||
assert "oui" in proposal.lower() or "y aller" in proposal.lower()
|
||||
|
||||
def test_proposal_contains_confidence(self, session):
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
history = session.get_history()
|
||||
proposal = history[-1]["content"]
|
||||
# 0.9 → 90%
|
||||
assert "90" in proposal
|
||||
|
||||
def test_proposal_contains_parameters(self, session):
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
history = session.get_history()
|
||||
proposal = history[-1]["content"]
|
||||
assert "texte" in proposal
|
||||
assert "bonjour" in proposal
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests get_history
|
||||
# =============================================================================
|
||||
|
||||
class TestGetHistory:
|
||||
def test_history_returns_list_of_dicts(self, session):
|
||||
history = session.get_history()
|
||||
assert isinstance(history, list)
|
||||
assert all(isinstance(m, dict) for m in history)
|
||||
|
||||
def test_history_message_structure(self, session):
|
||||
history = session.get_history()
|
||||
msg = history[0]
|
||||
assert "role" in msg
|
||||
assert "content" in msg
|
||||
assert "timestamp" in msg
|
||||
assert "meta" in msg
|
||||
|
||||
def test_history_grows_with_messages(self, session):
|
||||
initial = len(session.get_history())
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
assert len(session.get_history()) > initial
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests transitions d'états
|
||||
# =============================================================================
|
||||
|
||||
class TestStateTransitions:
|
||||
def test_full_happy_path(self, session, mock_task_planner, mock_replay_callback):
|
||||
"""idle → planning → awaiting_confirmation → executing → done."""
|
||||
# Départ : idle
|
||||
assert session.state == STATE_IDLE
|
||||
|
||||
# Envoi message → planning → awaiting_confirmation
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
assert session.state == STATE_AWAITING_CONFIRMATION
|
||||
|
||||
# Confirmation → executing
|
||||
result = session.confirm(confirmed=True)
|
||||
assert result["ok"] is True
|
||||
assert session.state == STATE_EXECUTING
|
||||
mock_replay_callback.assert_called_once()
|
||||
call = mock_replay_callback.call_args
|
||||
assert call.kwargs["session_id"] == "sess_bloc_notes"
|
||||
|
||||
# Simulation : replay terminé → done
|
||||
session._status_provider.return_value = {
|
||||
"status": "done",
|
||||
"completed_actions": 5,
|
||||
"total_actions": 5,
|
||||
}
|
||||
session.refresh_progress()
|
||||
assert session.state == STATE_DONE
|
||||
|
||||
def test_confirm_via_message_oui(self, session, mock_replay_callback):
|
||||
"""Le TIM peut répondre 'oui' en message au lieu d'un bouton."""
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
assert session.state == STATE_AWAITING_CONFIRMATION
|
||||
|
||||
session.send_message("oui")
|
||||
assert session.state == STATE_EXECUTING
|
||||
mock_replay_callback.assert_called_once()
|
||||
|
||||
def test_refusal_via_confirm_false(self, session, mock_replay_callback):
|
||||
"""confirm(False) → retour à idle, pas d'exécution."""
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
result = session.confirm(confirmed=False)
|
||||
assert result["ok"] is True
|
||||
assert result["confirmed"] is False
|
||||
assert session.state == STATE_IDLE
|
||||
mock_replay_callback.assert_not_called()
|
||||
|
||||
def test_refusal_via_message_non(self, session, mock_replay_callback):
|
||||
"""Le TIM répond 'non' → annulation."""
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
session.send_message("non")
|
||||
assert session.state == STATE_IDLE
|
||||
mock_replay_callback.assert_not_called()
|
||||
# Le message d'annulation doit être dans l'historique
|
||||
history = session.get_history()
|
||||
assert any("annule" in m["content"].lower() for m in history)
|
||||
|
||||
def test_ambiguous_confirmation_reply(self, session):
|
||||
"""Réponse ambiguë pendant awaiting_confirmation → demande de clarification."""
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
result = session.send_message("peut-être")
|
||||
assert session.state == STATE_AWAITING_CONFIRMATION
|
||||
assert result.get("needs_clarification") is True
|
||||
|
||||
def test_failed_replay_transitions_to_error(self, session):
|
||||
"""replay_callback lève une exception → état error."""
|
||||
session._replay_callback = MagicMock(side_effect=RuntimeError("boom"))
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
result = session.confirm(confirmed=True)
|
||||
assert result["ok"] is False
|
||||
assert session.state == STATE_ERROR
|
||||
|
||||
def test_replay_failure_from_status(self, session):
|
||||
"""Le replay rapporte 'failed' → état error."""
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
session.confirm(confirmed=True)
|
||||
assert session.state == STATE_EXECUTING
|
||||
|
||||
session._status_provider.return_value = {
|
||||
"status": "failed",
|
||||
"error": "element introuvable",
|
||||
}
|
||||
session.refresh_progress()
|
||||
assert session.state == STATE_ERROR
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests fallback / résilience
|
||||
# =============================================================================
|
||||
|
||||
class TestResilience:
|
||||
def test_no_task_planner_graceful(self):
|
||||
"""Sans TaskPlanner, on reste gracieux."""
|
||||
s = ChatSession(task_planner=None)
|
||||
result = s.send_message("test")
|
||||
assert result["ok"] is False
|
||||
assert s.state == STATE_ERROR
|
||||
# Message d'erreur présent dans l'historique
|
||||
history = s.get_history()
|
||||
assert any("désolée" in m["content"].lower() or "indisponible" in m["content"].lower()
|
||||
for m in history)
|
||||
|
||||
def test_task_planner_exception_graceful(self, mock_replay_callback):
|
||||
"""TaskPlanner lève une exception (gemma4 down) → état error propre."""
|
||||
planner = MagicMock()
|
||||
planner.understand.side_effect = RuntimeError("gemma4 offline")
|
||||
|
||||
s = ChatSession(
|
||||
task_planner=planner,
|
||||
workflows_provider=lambda: [],
|
||||
replay_callback=mock_replay_callback,
|
||||
)
|
||||
result = s.send_message("test")
|
||||
assert result["ok"] is False
|
||||
assert s.state == STATE_ERROR
|
||||
|
||||
def test_instruction_not_understood(self, unknown_plan, mock_replay_callback):
|
||||
"""Plan.understood = False → message d'erreur explicite."""
|
||||
planner = MagicMock()
|
||||
planner.understand.return_value = unknown_plan
|
||||
|
||||
s = ChatSession(
|
||||
task_planner=planner,
|
||||
workflows_provider=lambda: [],
|
||||
replay_callback=mock_replay_callback,
|
||||
)
|
||||
result = s.send_message("fais le café")
|
||||
assert result["ok"] is False
|
||||
assert s.state == STATE_ERROR
|
||||
history = s.get_history()
|
||||
assert any("reformuler" in m["content"].lower() for m in history)
|
||||
|
||||
def test_no_replay_callback(self, mock_task_planner, sample_workflows):
|
||||
"""Sans replay_callback, on refuse l'exécution proprement."""
|
||||
s = ChatSession(
|
||||
task_planner=mock_task_planner,
|
||||
workflows_provider=lambda: sample_workflows,
|
||||
replay_callback=None,
|
||||
)
|
||||
s.send_message("ouvre le bloc-notes")
|
||||
result = s.confirm(confirmed=True)
|
||||
assert result["ok"] is False
|
||||
assert s.state == STATE_ERROR
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests snapshot
|
||||
# =============================================================================
|
||||
|
||||
class TestSnapshot:
|
||||
def test_snapshot_structure(self, session):
|
||||
snap = session.get_snapshot()
|
||||
assert "session_id" in snap
|
||||
assert "state" in snap
|
||||
assert "messages" in snap
|
||||
assert "pending_plan" in snap
|
||||
assert "active_replay_id" in snap
|
||||
assert "progress" in snap
|
||||
|
||||
def test_snapshot_includes_pending_plan_when_awaiting(self, session):
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
snap = session.get_snapshot()
|
||||
assert snap["state"] == STATE_AWAITING_CONFIRMATION
|
||||
assert snap["pending_plan"] is not None
|
||||
assert snap["pending_plan"]["workflow_name"] == "Bloc-notes"
|
||||
|
||||
def test_snapshot_no_pending_plan_in_idle(self, session):
|
||||
snap = session.get_snapshot()
|
||||
assert snap["pending_plan"] is None
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests ChatManager
|
||||
# =============================================================================
|
||||
|
||||
class TestChatManager:
|
||||
def test_create_session(self, mock_task_planner, sample_workflows):
|
||||
mgr = ChatManager(
|
||||
task_planner=mock_task_planner,
|
||||
workflows_provider=lambda: sample_workflows,
|
||||
)
|
||||
s = mgr.create_session()
|
||||
assert s is not None
|
||||
assert s.session_id in [x["session_id"] for x in mgr.list_sessions()]
|
||||
|
||||
def test_get_session(self, mock_task_planner):
|
||||
mgr = ChatManager(task_planner=mock_task_planner)
|
||||
s = mgr.create_session()
|
||||
retrieved = mgr.get_session(s.session_id)
|
||||
assert retrieved is s
|
||||
|
||||
def test_get_session_not_found(self):
|
||||
mgr = ChatManager()
|
||||
assert mgr.get_session("unknown") is None
|
||||
|
||||
def test_delete_session(self, mock_task_planner):
|
||||
mgr = ChatManager(task_planner=mock_task_planner)
|
||||
s = mgr.create_session()
|
||||
assert mgr.delete_session(s.session_id) is True
|
||||
assert mgr.get_session(s.session_id) is None
|
||||
|
||||
def test_cleanup_old_sessions(self, mock_task_planner):
|
||||
mgr = ChatManager(task_planner=mock_task_planner)
|
||||
s = mgr.create_session()
|
||||
# Simuler une session très ancienne
|
||||
s.updated_at = time.time() - 100000
|
||||
removed = mgr.cleanup_old(max_age_s=3600)
|
||||
assert removed == 1
|
||||
assert mgr.get_session(s.session_id) is None
|
||||
|
||||
def test_list_sessions_structure(self, mock_task_planner):
|
||||
mgr = ChatManager(task_planner=mock_task_planner)
|
||||
mgr.create_session(machine_id="pc-01")
|
||||
sessions = mgr.list_sessions()
|
||||
assert len(sessions) == 1
|
||||
s = sessions[0]
|
||||
assert "session_id" in s
|
||||
assert "state" in s
|
||||
assert "machine_id" in s
|
||||
assert s["machine_id"] == "pc-01"
|
||||
543
tests/unit/test_domain_personality.py
Normal file
543
tests/unit/test_domain_personality.py
Normal file
@@ -0,0 +1,543 @@
|
||||
"""Tests unitaires pour la personnalité métier de Léa.
|
||||
|
||||
Couvre :
|
||||
- summarize_action : résumé d'actions en langage métier par domaine
|
||||
- pose_clarification_question : questions contextuelles quand Léa bloque
|
||||
- describe_workflow_outcome : rapports de fin en langage métier
|
||||
- Fallback domaine inconnu / vocabulaire synonyme
|
||||
- Intégration avec agent_v0.agent_v1.ui.messages (formatters enrichis)
|
||||
- Appel gemma4 mocké pour le raffinement de résumé
|
||||
|
||||
Tous les tests sont 100% offline : aucun appel réseau réel.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
# Assurer que la racine du projet est dans le path (comme les autres tests unit)
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
if str(ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from agent_v0.server_v1.domain_context import (
|
||||
DomainContext,
|
||||
get_domain_context,
|
||||
list_domains,
|
||||
register_domain,
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Domaines pré-configurés
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class TestDomainesDisponibles:
|
||||
"""Tous les domaines prévus doivent être enregistrés."""
|
||||
|
||||
def test_tim_codage_present(self):
|
||||
ctx = get_domain_context("tim_codage")
|
||||
assert ctx.domain_id == "tim_codage"
|
||||
assert "CIM-10" in ctx.vocabulary
|
||||
assert ctx.common_actions # non vide
|
||||
assert ctx.clarification_templates
|
||||
assert ctx.summary_templates
|
||||
|
||||
def test_comptabilite_present(self):
|
||||
ctx = get_domain_context("comptabilite")
|
||||
assert ctx.domain_id == "comptabilite"
|
||||
assert "facture" in ctx.vocabulary
|
||||
assert ctx.summary_templates["item_plural"] == "factures"
|
||||
|
||||
def test_rh_paie_present(self):
|
||||
ctx = get_domain_context("rh_paie")
|
||||
assert ctx.domain_id == "rh_paie"
|
||||
assert "bulletin" in ctx.vocabulary
|
||||
assert ctx.summary_templates["item_plural"] == "bulletins"
|
||||
|
||||
def test_stocks_logistique_present(self):
|
||||
ctx = get_domain_context("stocks_logistique")
|
||||
assert ctx.domain_id == "stocks_logistique"
|
||||
assert "BC" in ctx.vocabulary or "bon de commande" in ctx.vocabulary
|
||||
assert ctx.summary_templates["item_plural"] == "bons"
|
||||
|
||||
def test_generic_fallback(self):
|
||||
"""Un domaine inconnu retourne le contexte générique."""
|
||||
ctx = get_domain_context("n_existe_pas_42")
|
||||
assert ctx.domain_id == "generic"
|
||||
|
||||
def test_list_domains_contains_all(self):
|
||||
ids = {d["domain_id"] for d in list_domains()}
|
||||
assert {
|
||||
"tim_codage",
|
||||
"comptabilite",
|
||||
"rh_paie",
|
||||
"stocks_logistique",
|
||||
"generic",
|
||||
}.issubset(ids)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# summarize_action — résumé d'actions
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class TestSummarizeAction:
|
||||
"""Résumés en langage métier par domaine."""
|
||||
|
||||
def test_tim_click_dp_saisir_diagnostic_principal(self):
|
||||
ctx = get_domain_context("tim_codage")
|
||||
phrase = ctx.summarize_action("click", {"target": "DP"})
|
||||
assert phrase == "saisir le diagnostic principal"
|
||||
|
||||
def test_tim_click_valider_codage(self):
|
||||
ctx = get_domain_context("tim_codage")
|
||||
phrase = ctx.summarize_action("click", {"target": "Valider le codage"})
|
||||
assert phrase == "valider le codage"
|
||||
|
||||
def test_tim_click_dossier_patient(self):
|
||||
ctx = get_domain_context("tim_codage")
|
||||
phrase = ctx.summarize_action(
|
||||
"click", {"target": "Ouvrir le dossier patient"}
|
||||
)
|
||||
assert phrase == "ouvrir le dossier patient"
|
||||
|
||||
def test_compta_type_ht(self):
|
||||
ctx = get_domain_context("comptabilite")
|
||||
phrase = ctx.summarize_action(
|
||||
"type", {"target": "Montant HT", "text": "1500"}
|
||||
)
|
||||
# La mention "ht" dans la cible déclenche le mapping
|
||||
assert phrase == "saisir le montant hors taxes"
|
||||
|
||||
def test_compta_click_lettrer(self):
|
||||
ctx = get_domain_context("comptabilite")
|
||||
phrase = ctx.summarize_action("click", {"target": "Lettrer"})
|
||||
assert phrase == "lettrer les écritures"
|
||||
|
||||
def test_rh_click_bulletin(self):
|
||||
ctx = get_domain_context("rh_paie")
|
||||
phrase = ctx.summarize_action("click", {"target": "Bulletin de paie"})
|
||||
assert phrase == "ouvrir le bulletin de paie"
|
||||
|
||||
def test_stocks_type_quantite(self):
|
||||
ctx = get_domain_context("stocks_logistique")
|
||||
phrase = ctx.summarize_action(
|
||||
"type", {"target": "Quantité reçue", "text": "42"}
|
||||
)
|
||||
assert phrase == "saisir la quantité"
|
||||
|
||||
def test_generic_click_fallback(self):
|
||||
ctx = get_domain_context("generic")
|
||||
phrase = ctx.summarize_action("click", {"target": "Bouton quelconque"})
|
||||
# Pas de mapping mais une description → "cliquer sur ..."
|
||||
assert "cliquer sur" in phrase
|
||||
|
||||
def test_unknown_domain_click(self):
|
||||
"""Un domaine inconnu ne plante pas."""
|
||||
ctx = get_domain_context("inconnu")
|
||||
phrase = ctx.summarize_action("click", {"target": "Quelque chose"})
|
||||
assert phrase # non vide
|
||||
assert "cliquer" in phrase
|
||||
|
||||
def test_tim_synonymes_dp_dans_cible_longue(self):
|
||||
"""Si aucun mapping exact mais la cible contient DP → substitution synonyme."""
|
||||
ctx = get_domain_context("tim_codage")
|
||||
# Aucun mapping direct "saisir le" mais "DP" est dans les synonymes
|
||||
phrase = ctx.summarize_action("click", {"target": "Saisir le DP"})
|
||||
assert phrase == "saisir le diagnostic principal"
|
||||
|
||||
def test_key_combo_generic(self):
|
||||
ctx = get_domain_context("generic")
|
||||
phrase = ctx.summarize_action("key_combo", {"keys": ["ctrl", "s"]})
|
||||
assert "ctrl+s" in phrase
|
||||
|
||||
def test_wait_and_scroll(self):
|
||||
ctx = get_domain_context("tim_codage")
|
||||
assert "attendre" in ctx.summarize_action("wait", {})
|
||||
assert "défiler" in ctx.summarize_action("scroll", {})
|
||||
|
||||
def test_type_no_target(self):
|
||||
ctx = get_domain_context("generic")
|
||||
phrase = ctx.summarize_action("type", {"text": "hello"})
|
||||
assert "hello" in phrase
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# pose_clarification_question — questions de blocage
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class TestClarification:
|
||||
"""Questions posées par Léa en cas de blocage."""
|
||||
|
||||
def test_tim_fichier_patient_avec_nom(self):
|
||||
ctx = get_domain_context("tim_codage")
|
||||
question = ctx.pose_clarification_question(
|
||||
{
|
||||
"blocked_on": "target_not_found",
|
||||
"target": "Fichier patient",
|
||||
"params": {"nom_patient": "Mme Durand"},
|
||||
}
|
||||
)
|
||||
assert "Mme Durand" in question
|
||||
# Langage métier : mention "dossier" (pas juste "fichier")
|
||||
assert "dossier" in question.lower()
|
||||
|
||||
def test_compta_montant_avec_num_facture(self):
|
||||
ctx = get_domain_context("comptabilite")
|
||||
question = ctx.pose_clarification_question(
|
||||
{
|
||||
"blocked_on": "target_not_found",
|
||||
"target": "Montant HT",
|
||||
"params": {"num_facture": "F2026-0145"},
|
||||
}
|
||||
)
|
||||
assert "F2026-0145" in question
|
||||
assert "Montant HT" in question or "Montant" in question
|
||||
|
||||
def test_rh_employe_non_trouve(self):
|
||||
ctx = get_domain_context("rh_paie")
|
||||
question = ctx.pose_clarification_question(
|
||||
{
|
||||
"blocked_on": "target_not_found",
|
||||
"target": "Fiche employé",
|
||||
"params": {"nom_employe": "Jean Martin"},
|
||||
}
|
||||
)
|
||||
assert "Jean Martin" in question
|
||||
|
||||
def test_stocks_article_non_trouve(self):
|
||||
ctx = get_domain_context("stocks_logistique")
|
||||
question = ctx.pose_clarification_question(
|
||||
{
|
||||
"blocked_on": "target_not_found",
|
||||
"target": "Article",
|
||||
"params": {"ref_article": "REF-4242", "num_bc": "BC-2026-042"},
|
||||
}
|
||||
)
|
||||
# Un des deux identifiants au moins apparaît
|
||||
assert "REF-4242" in question or "BC-2026-042" in question
|
||||
|
||||
def test_ambiguous_code_tim(self):
|
||||
ctx = get_domain_context("tim_codage")
|
||||
question = ctx.pose_clarification_question(
|
||||
{
|
||||
"blocked_on": "ambiguous_code",
|
||||
"params": {"code_a": "E11.9", "code_b": "E11.8"},
|
||||
}
|
||||
)
|
||||
assert "E11.9" in question
|
||||
assert "E11.8" in question
|
||||
|
||||
def test_clarification_unknown_domain_fallback(self):
|
||||
"""Domaine inconnu → message générique, jamais de crash."""
|
||||
ctx = get_domain_context("inconnu")
|
||||
question = ctx.pose_clarification_question(
|
||||
{"blocked_on": "target_not_found", "target": "Un champ"}
|
||||
)
|
||||
assert question
|
||||
assert "trouve pas" in question.lower()
|
||||
|
||||
def test_clarification_empty_context(self):
|
||||
"""Pas de contexte du tout → fallback."""
|
||||
ctx = get_domain_context("tim_codage")
|
||||
question = ctx.pose_clarification_question(None)
|
||||
assert question # non vide
|
||||
assert isinstance(question, str)
|
||||
|
||||
def test_clarification_missing_params_no_crash(self):
|
||||
"""Si un template mentionne {nom_patient} mais qu'il n'est pas fourni,
|
||||
on ne plante pas — les champs manquants sont vides."""
|
||||
ctx = get_domain_context("tim_codage")
|
||||
question = ctx.pose_clarification_question(
|
||||
{
|
||||
"blocked_on": "target_not_found",
|
||||
"target": "Fichier patient",
|
||||
# pas de nom_patient
|
||||
}
|
||||
)
|
||||
assert isinstance(question, str)
|
||||
assert question
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# describe_workflow_outcome — rapports finaux
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class TestWorkflowOutcome:
|
||||
"""Rapports de fin de workflow en langage métier."""
|
||||
|
||||
def test_tim_succes_complet(self):
|
||||
ctx = get_domain_context("tim_codage")
|
||||
rapport = ctx.describe_workflow_outcome(
|
||||
workflow_name="Codage janvier",
|
||||
success=True,
|
||||
items_count=15,
|
||||
failed_count=0,
|
||||
)
|
||||
assert "15 dossiers" in rapport
|
||||
assert "codé" in rapport
|
||||
|
||||
def test_tim_succes_partiel(self):
|
||||
ctx = get_domain_context("tim_codage")
|
||||
rapport = ctx.describe_workflow_outcome(
|
||||
workflow_name="Codage janvier",
|
||||
success=True,
|
||||
items_count=15,
|
||||
failed_count=1,
|
||||
)
|
||||
assert "14 dossiers" in rapport
|
||||
assert "15" in rapport
|
||||
assert "1" in rapport # nombre en attente
|
||||
|
||||
def test_tim_echec_complet(self):
|
||||
ctx = get_domain_context("tim_codage")
|
||||
rapport = ctx.describe_workflow_outcome(
|
||||
workflow_name="Codage janvier",
|
||||
success=False,
|
||||
items_count=15,
|
||||
failed_count=15,
|
||||
)
|
||||
assert "Codage janvier" in rapport
|
||||
assert "pas" in rapport.lower() or "rends la main" in rapport.lower()
|
||||
|
||||
def test_compta_success_factures(self):
|
||||
ctx = get_domain_context("comptabilite")
|
||||
rapport = ctx.describe_workflow_outcome(
|
||||
workflow_name="Saisie factures mars",
|
||||
success=True,
|
||||
items_count=30,
|
||||
failed_count=0,
|
||||
)
|
||||
assert "30 factures" in rapport
|
||||
|
||||
def test_rh_success_bulletins(self):
|
||||
ctx = get_domain_context("rh_paie")
|
||||
rapport = ctx.describe_workflow_outcome(
|
||||
workflow_name="Paie avril",
|
||||
success=True,
|
||||
items_count=50,
|
||||
failed_count=2,
|
||||
)
|
||||
assert "48" in rapport
|
||||
assert "50" in rapport
|
||||
assert "bulletins" in rapport
|
||||
|
||||
def test_stocks_success_bons(self):
|
||||
ctx = get_domain_context("stocks_logistique")
|
||||
rapport = ctx.describe_workflow_outcome(
|
||||
workflow_name="Réceptions semaine 14",
|
||||
success=True,
|
||||
items_count=12,
|
||||
failed_count=0,
|
||||
)
|
||||
assert "12 bons" in rapport
|
||||
|
||||
def test_generic_fallback(self):
|
||||
"""Domaine inconnu → rapport générique cohérent."""
|
||||
ctx = get_domain_context("inconnu")
|
||||
rapport = ctx.describe_workflow_outcome(
|
||||
workflow_name="Mon workflow",
|
||||
success=True,
|
||||
items_count=5,
|
||||
failed_count=0,
|
||||
)
|
||||
assert rapport
|
||||
assert "Mon workflow" in rapport or "5" in rapport
|
||||
|
||||
def test_tim_success_one_avec_nom_patient(self):
|
||||
"""Cas 1 item : utilise success_one avec un paramètre métier."""
|
||||
ctx = get_domain_context("tim_codage")
|
||||
rapport = ctx.describe_workflow_outcome(
|
||||
workflow_name="Codage urgent",
|
||||
success=True,
|
||||
items_count=1,
|
||||
failed_count=0,
|
||||
elapsed_s=42,
|
||||
extra={"nom_patient": "M. Dupont"},
|
||||
)
|
||||
assert "M. Dupont" in rapport
|
||||
assert "42" in rapport
|
||||
|
||||
|
||||
class TestWorkflowOutcomeLLM:
|
||||
"""Tests du raffinement LLM (gemma4) pour le rapport final."""
|
||||
|
||||
def test_use_llm_success_mocked(self):
|
||||
"""Quand use_llm=True et gemma4 répond, on utilise sa réponse."""
|
||||
ctx = get_domain_context("tim_codage")
|
||||
|
||||
def fake_refine(self, template, subs, success):
|
||||
return "Voilà, j'ai codé tous tes dossiers, bon café !"
|
||||
|
||||
with patch.object(DomainContext, "_llm_refine_summary", fake_refine):
|
||||
rapport = ctx.describe_workflow_outcome(
|
||||
workflow_name="Codage", success=True,
|
||||
items_count=10, use_llm=True,
|
||||
)
|
||||
assert "bon café" in rapport
|
||||
|
||||
def test_use_llm_failure_falls_back_to_template(self):
|
||||
"""Si l'appel LLM retourne "" → on retombe sur le template."""
|
||||
ctx = get_domain_context("tim_codage")
|
||||
|
||||
def fake_refine(self, template, subs, success):
|
||||
return "" # simulate failure
|
||||
|
||||
with patch.object(DomainContext, "_llm_refine_summary", fake_refine):
|
||||
rapport = ctx.describe_workflow_outcome(
|
||||
workflow_name="Codage", success=True,
|
||||
items_count=10, failed_count=0, use_llm=True,
|
||||
)
|
||||
assert "10 dossiers" in rapport
|
||||
|
||||
def test_llm_refine_network_error_safe(self):
|
||||
"""_llm_refine_summary ne doit jamais lever, même si requests échoue."""
|
||||
ctx = get_domain_context("tim_codage")
|
||||
|
||||
fake_requests = MagicMock()
|
||||
fake_requests.post.side_effect = RuntimeError("boom")
|
||||
|
||||
with patch.dict("sys.modules", {"requests": fake_requests}):
|
||||
out = ctx._llm_refine_summary(
|
||||
template="ok", subs={"workflow_name": "x"}, success=True
|
||||
)
|
||||
assert out == ""
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Domaine custom enregistré dynamiquement
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class TestRegisterDomain:
|
||||
def test_register_custom_domain(self):
|
||||
custom = DomainContext(
|
||||
domain_id="test_custom_xyz",
|
||||
name="Test",
|
||||
description="test",
|
||||
common_actions={"click:foo": "faire foo"},
|
||||
summary_templates={
|
||||
"item_singular": "truc",
|
||||
"item_plural": "trucs",
|
||||
"success": "J'ai fait {done} trucs sur {items_count}.",
|
||||
"partial": "Partiel : {done}/{items_count}.",
|
||||
"failure": "Echec.",
|
||||
},
|
||||
)
|
||||
register_domain(custom)
|
||||
fetched = get_domain_context("test_custom_xyz")
|
||||
assert fetched.name == "Test"
|
||||
assert fetched.summarize_action("click", {"target": "FOO"}) == "faire foo"
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Intégration avec ui.messages
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class TestMessagesIntegration:
|
||||
"""Les formatters de messages utilisent le domaine quand fourni."""
|
||||
|
||||
def test_cible_non_trouvee_domain_tim(self):
|
||||
from agent_v0.agent_v1.ui.messages import formatter_cible_non_trouvee
|
||||
|
||||
msg = formatter_cible_non_trouvee(
|
||||
description_cible="Fichier patient",
|
||||
titre_fenetre="DxCare",
|
||||
domain_id="tim_codage",
|
||||
params={"nom_patient": "Mme Durand"},
|
||||
)
|
||||
assert "Mme Durand" in msg.corps
|
||||
|
||||
def test_cible_non_trouvee_domain_comptabilite(self):
|
||||
from agent_v0.agent_v1.ui.messages import formatter_cible_non_trouvee
|
||||
|
||||
msg = formatter_cible_non_trouvee(
|
||||
description_cible="Montant HT",
|
||||
titre_fenetre="Sage",
|
||||
domain_id="comptabilite",
|
||||
params={"num_facture": "F2026-007"},
|
||||
)
|
||||
assert "F2026-007" in msg.corps
|
||||
|
||||
def test_cible_non_trouvee_sans_domain_retrocompat(self):
|
||||
"""Sans domain_id, comportement historique conservé."""
|
||||
from agent_v0.agent_v1.ui.messages import formatter_cible_non_trouvee
|
||||
|
||||
msg = formatter_cible_non_trouvee(
|
||||
description_cible="bonjour",
|
||||
titre_fenetre="Test – Bloc-notes",
|
||||
)
|
||||
assert "bonjour" in msg.corps
|
||||
assert "Bloc-notes" in msg.corps
|
||||
|
||||
def test_fin_workflow_tim_partiel(self):
|
||||
from agent_v0.agent_v1.ui.messages import (
|
||||
NiveauMessage,
|
||||
formatter_fin_workflow,
|
||||
)
|
||||
|
||||
msg = formatter_fin_workflow(
|
||||
succes=True,
|
||||
nom_workflow="Codage janvier",
|
||||
nb_etapes=120,
|
||||
duree_s=900,
|
||||
domain_id="tim_codage",
|
||||
items_count=15,
|
||||
failed_count=1,
|
||||
)
|
||||
# Langage métier, pas "120 étapes"
|
||||
assert "14 dossiers" in msg.corps
|
||||
assert msg.niveau == NiveauMessage.ATTENTION # succès partiel
|
||||
|
||||
def test_fin_workflow_tim_complet(self):
|
||||
from agent_v0.agent_v1.ui.messages import (
|
||||
NiveauMessage,
|
||||
formatter_fin_workflow,
|
||||
)
|
||||
|
||||
msg = formatter_fin_workflow(
|
||||
succes=True,
|
||||
nom_workflow="Codage janvier",
|
||||
nb_etapes=120,
|
||||
duree_s=900,
|
||||
domain_id="tim_codage",
|
||||
items_count=15,
|
||||
failed_count=0,
|
||||
)
|
||||
assert "15 dossiers" in msg.corps
|
||||
assert msg.niveau == NiveauMessage.INFO
|
||||
|
||||
def test_fin_workflow_sans_domain_retrocompat(self):
|
||||
from agent_v0.agent_v1.ui.messages import formatter_fin_workflow
|
||||
|
||||
msg = formatter_fin_workflow(
|
||||
succes=True, nom_workflow="Demo", nb_etapes=5, duree_s=10
|
||||
)
|
||||
assert "Demo" in msg.corps
|
||||
assert "5 étapes" in msg.corps
|
||||
|
||||
def test_erreur_generique_propagate_domain(self):
|
||||
from agent_v0.agent_v1.ui.messages import formatter_erreur_generique
|
||||
|
||||
msg = formatter_erreur_generique(
|
||||
"target_not_found: Montant HT",
|
||||
domain_id="comptabilite",
|
||||
params={"num_facture": "F-001"},
|
||||
)
|
||||
assert "F-001" in msg.corps
|
||||
|
||||
def test_friendly_target_tim_synonyme(self):
|
||||
from agent_v0.agent_v1.ui.messages import _friendly_target
|
||||
|
||||
assert _friendly_target("DP", "tim_codage") == "diagnostic principal"
|
||||
assert _friendly_target("DP", None) == "DP" # pas de domaine → identique
|
||||
assert _friendly_target("DP", "domaine_inexistant") == "DP"
|
||||
264
tests/unit/test_execution_compiler.py
Normal file
264
tests/unit/test_execution_compiler.py
Normal file
@@ -0,0 +1,264 @@
|
||||
"""
|
||||
Tests de l'ExecutionCompiler et de l'ExecutionPlan.
|
||||
|
||||
Vérifie que :
|
||||
- Le compilateur produit un plan déterministe depuis un WorkflowIR
|
||||
- Les stratégies de résolution sont correctement compilées (OCR > template > VLM)
|
||||
- Les timeouts, retries et recovery sont définis
|
||||
- Le plan est sérialisable et versionné
|
||||
"""
|
||||
|
||||
import json
|
||||
import shutil
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
_ROOT = str(Path(__file__).resolve().parents[2])
|
||||
if _ROOT not in sys.path:
|
||||
sys.path.insert(0, _ROOT)
|
||||
|
||||
from core.workflow.workflow_ir import WorkflowIR
|
||||
from core.workflow.execution_plan import ExecutionPlan, ExecutionNode, ResolutionStrategy, SuccessCondition
|
||||
from core.workflow.execution_compiler import ExecutionCompiler
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# ExecutionPlan — format et sérialisation
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestExecutionPlan:
|
||||
|
||||
def test_serialisation_roundtrip(self):
|
||||
plan = ExecutionPlan(
|
||||
plan_id="plan_test",
|
||||
workflow_id="wf_123",
|
||||
version=1,
|
||||
)
|
||||
plan.nodes.append(ExecutionNode(
|
||||
node_id="n1",
|
||||
action_type="click",
|
||||
intent="Cliquer sur Enregistrer",
|
||||
strategy_primary=ResolutionStrategy(method="ocr", target_text="Enregistrer"),
|
||||
strategy_fallbacks=[ResolutionStrategy(method="vlm", vlm_description="bouton Enregistrer")],
|
||||
success_condition=SuccessCondition(method="title_match", expected_title="Fichier sauvegardé"),
|
||||
))
|
||||
|
||||
json_str = plan.to_json()
|
||||
plan2 = ExecutionPlan.from_json(json_str)
|
||||
|
||||
assert plan2.plan_id == "plan_test"
|
||||
assert len(plan2.nodes) == 1
|
||||
assert plan2.nodes[0].strategy_primary.method == "ocr"
|
||||
assert len(plan2.nodes[0].strategy_fallbacks) == 1
|
||||
assert plan2.nodes[0].success_condition.method == "title_match"
|
||||
|
||||
def test_save_load(self):
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
try:
|
||||
plan = ExecutionPlan(plan_id="plan_save", workflow_id="wf_1")
|
||||
plan.nodes.append(ExecutionNode(node_id="n1", action_type="click"))
|
||||
path = plan.save(tmpdir)
|
||||
|
||||
plan2 = ExecutionPlan.load(str(path))
|
||||
assert plan2.plan_id == "plan_save"
|
||||
assert len(plan2.nodes) == 1
|
||||
finally:
|
||||
shutil.rmtree(tmpdir)
|
||||
|
||||
def test_node_avec_variable(self):
|
||||
node = ExecutionNode(
|
||||
node_id="n1",
|
||||
action_type="type",
|
||||
text="{patient}",
|
||||
variable_name="patient",
|
||||
)
|
||||
d = node.to_dict()
|
||||
assert d["variable_name"] == "patient"
|
||||
assert d["text"] == "{patient}"
|
||||
|
||||
def test_node_timeout_et_retry(self):
|
||||
node = ExecutionNode(
|
||||
node_id="n1",
|
||||
action_type="click",
|
||||
timeout_ms=5000,
|
||||
max_retries=3,
|
||||
recovery_action="undo",
|
||||
)
|
||||
assert node.timeout_ms == 5000
|
||||
assert node.max_retries == 3
|
||||
assert node.recovery_action == "undo"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# ExecutionCompiler — compilation WorkflowIR → ExecutionPlan
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestExecutionCompiler:
|
||||
|
||||
def _make_ir(self):
|
||||
"""Créer un WorkflowIR de test."""
|
||||
ir = WorkflowIR.new("Test workflow", domain="generic")
|
||||
ir.add_step(
|
||||
"Ouvrir le fichier",
|
||||
actions=[
|
||||
{"type": "click", "target": "bouton Ouvrir", "anchor_hint": "Ouvrir"},
|
||||
{"type": "wait", "duration_ms": 2000},
|
||||
],
|
||||
precondition="L'application est ouverte",
|
||||
postcondition="La fenêtre Ouvrir est affichée",
|
||||
)
|
||||
ir.add_step(
|
||||
"Saisir le nom",
|
||||
actions=[
|
||||
{"type": "type", "text": "{nom_fichier}", "variable": True},
|
||||
{"type": "key_combo", "keys": ["enter"]},
|
||||
],
|
||||
)
|
||||
ir.add_variable("nom_fichier", description="Nom du fichier", default="rapport.pdf")
|
||||
return ir
|
||||
|
||||
def test_compilation_basique(self):
|
||||
compiler = ExecutionCompiler()
|
||||
ir = self._make_ir()
|
||||
plan = compiler.compile(ir)
|
||||
|
||||
assert plan.workflow_id == ir.workflow_id
|
||||
assert plan.total_nodes == 4 # click + wait + type + key_combo
|
||||
assert plan.domain == "generic"
|
||||
|
||||
def test_click_a_strategie_resolution(self):
|
||||
"""Un clic doit avoir une stratégie primaire et des fallbacks."""
|
||||
compiler = ExecutionCompiler()
|
||||
ir = self._make_ir()
|
||||
plan = compiler.compile(ir)
|
||||
|
||||
click_nodes = [n for n in plan.nodes if n.action_type == "click"]
|
||||
assert len(click_nodes) == 1
|
||||
|
||||
click = click_nodes[0]
|
||||
assert click.strategy_primary is not None
|
||||
assert click.strategy_primary.method in ("ocr", "template", "vlm")
|
||||
assert len(click.strategy_fallbacks) >= 1
|
||||
|
||||
def test_ocr_est_prioritaire(self):
|
||||
"""Quand du texte est disponible, OCR est la stratégie primaire."""
|
||||
compiler = ExecutionCompiler()
|
||||
ir = self._make_ir()
|
||||
plan = compiler.compile(ir)
|
||||
|
||||
click = [n for n in plan.nodes if n.action_type == "click"][0]
|
||||
assert click.strategy_primary.method == "ocr"
|
||||
assert click.strategy_primary.target_text == "Ouvrir"
|
||||
|
||||
def test_vlm_est_fallback(self):
|
||||
"""Le VLM est toujours en dernier fallback (exception handler)."""
|
||||
compiler = ExecutionCompiler()
|
||||
ir = self._make_ir()
|
||||
plan = compiler.compile(ir)
|
||||
|
||||
click = [n for n in plan.nodes if n.action_type == "click"][0]
|
||||
vlm_fallbacks = [f for f in click.strategy_fallbacks if f.method == "vlm"]
|
||||
assert len(vlm_fallbacks) >= 1
|
||||
|
||||
def test_type_a_variable(self):
|
||||
"""Une action type avec variable a le bon variable_name."""
|
||||
compiler = ExecutionCompiler()
|
||||
ir = self._make_ir()
|
||||
plan = compiler.compile(ir)
|
||||
|
||||
type_nodes = [n for n in plan.nodes if n.action_type == "type"]
|
||||
assert len(type_nodes) == 1
|
||||
assert type_nodes[0].variable_name == "nom_fichier"
|
||||
|
||||
def test_wait_a_duration(self):
|
||||
"""Un wait a la bonne durée."""
|
||||
compiler = ExecutionCompiler()
|
||||
ir = self._make_ir()
|
||||
plan = compiler.compile(ir)
|
||||
|
||||
wait_nodes = [n for n in plan.nodes if n.action_type == "wait"]
|
||||
assert len(wait_nodes) == 1
|
||||
assert wait_nodes[0].duration_ms == 2000
|
||||
|
||||
def test_click_a_recovery(self):
|
||||
"""Un clic a une politique de recovery."""
|
||||
compiler = ExecutionCompiler()
|
||||
ir = self._make_ir()
|
||||
plan = compiler.compile(ir)
|
||||
|
||||
click = [n for n in plan.nodes if n.action_type == "click"][0]
|
||||
assert click.recovery_action in ("escape", "undo", "close", "none")
|
||||
assert click.max_retries >= 1
|
||||
|
||||
def test_postcondition_devient_success_condition(self):
|
||||
"""La postcondition du WorkflowIR devient la condition de succès du nœud."""
|
||||
compiler = ExecutionCompiler()
|
||||
ir = self._make_ir()
|
||||
plan = compiler.compile(ir)
|
||||
|
||||
click = [n for n in plan.nodes if n.action_type == "click"][0]
|
||||
assert click.success_condition is not None
|
||||
assert "Ouvrir" in click.success_condition.description
|
||||
|
||||
def test_statistiques_compilation(self):
|
||||
"""Les statistiques de compilation sont correctes."""
|
||||
compiler = ExecutionCompiler()
|
||||
ir = self._make_ir()
|
||||
plan = compiler.compile(ir)
|
||||
|
||||
assert plan.total_nodes == 4
|
||||
assert plan.nodes_with_ocr >= 1
|
||||
assert plan.estimated_duration_s > 0
|
||||
|
||||
def test_variables_dans_le_plan(self):
|
||||
"""Les variables du WorkflowIR sont dans le plan avec leurs valeurs par défaut."""
|
||||
compiler = ExecutionCompiler()
|
||||
ir = self._make_ir()
|
||||
plan = compiler.compile(ir)
|
||||
|
||||
assert "nom_fichier" in plan.variables
|
||||
assert plan.variables["nom_fichier"] == "rapport.pdf"
|
||||
|
||||
def test_params_override_defaults(self):
|
||||
"""Les params passés au compile écrasent les valeurs par défaut."""
|
||||
compiler = ExecutionCompiler()
|
||||
ir = self._make_ir()
|
||||
plan = compiler.compile(ir, params={"nom_fichier": "facture_mars.pdf"})
|
||||
|
||||
assert plan.variables["nom_fichier"] == "facture_mars.pdf"
|
||||
|
||||
def test_plan_json_roundtrip(self):
|
||||
"""Compiler → JSON → recharger → même plan."""
|
||||
compiler = ExecutionCompiler()
|
||||
ir = self._make_ir()
|
||||
plan = compiler.compile(ir)
|
||||
|
||||
json_str = plan.to_json()
|
||||
plan2 = ExecutionPlan.from_json(json_str)
|
||||
|
||||
assert plan2.total_nodes == plan.total_nodes
|
||||
assert plan2.workflow_id == plan.workflow_id
|
||||
assert len(plan2.nodes) == len(plan.nodes)
|
||||
|
||||
def test_compilation_workflow_vide(self):
|
||||
"""Un workflow vide produit un plan vide."""
|
||||
compiler = ExecutionCompiler()
|
||||
ir = WorkflowIR.new("Vide")
|
||||
plan = compiler.compile(ir)
|
||||
|
||||
assert plan.total_nodes == 0
|
||||
assert plan.nodes == []
|
||||
|
||||
def test_plusieurs_domaines(self):
|
||||
"""Le compilateur fonctionne pour différents domaines."""
|
||||
compiler = ExecutionCompiler()
|
||||
for domain in ["tim_codage", "comptabilite", "rh_paie", "generic"]:
|
||||
ir = WorkflowIR.new("Test", domain=domain)
|
||||
ir.add_step("Action", actions=[{"type": "click", "target": "bouton"}])
|
||||
plan = compiler.compile(ir)
|
||||
assert plan.domain == domain
|
||||
565
tests/unit/test_execution_plan_runner.py
Normal file
565
tests/unit/test_execution_plan_runner.py
Normal file
@@ -0,0 +1,565 @@
|
||||
"""
|
||||
Tests de execution_plan_runner — adaptateur ExecutionPlan → queue de replay.
|
||||
|
||||
Vérifie que :
|
||||
- Un ExecutionNode est correctement converti en action replay
|
||||
- Les stratégies de résolution (OCR / template / VLM) produisent le bon target_spec
|
||||
- Les variables {var} et ${var} sont substituées dans les textes
|
||||
- L'injection dans la queue _replay_queues est correcte (avec et sans lock)
|
||||
- La conversion d'un plan complet respecte l'ordre et les limites
|
||||
- Les types d'actions non exécutables sont ignorés
|
||||
|
||||
Ces tests sont isolés et ne dépendent pas du serveur FastAPI (on importe
|
||||
uniquement execution_plan_runner et les dataclasses du core).
|
||||
"""
|
||||
|
||||
import sys
|
||||
import threading
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
_ROOT = str(Path(__file__).resolve().parents[2])
|
||||
if _ROOT not in sys.path:
|
||||
sys.path.insert(0, _ROOT)
|
||||
|
||||
from core.workflow.execution_plan import (
|
||||
ExecutionNode,
|
||||
ExecutionPlan,
|
||||
ResolutionStrategy,
|
||||
SuccessCondition,
|
||||
)
|
||||
from core.workflow.execution_compiler import ExecutionCompiler
|
||||
from core.workflow.workflow_ir import WorkflowIR
|
||||
|
||||
from agent_v0.server_v1.execution_plan_runner import (
|
||||
execution_node_to_action,
|
||||
execution_plan_to_actions,
|
||||
inject_plan_into_queue,
|
||||
substitute_variables,
|
||||
)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Substitution de variables
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestSubstituteVariables:
|
||||
|
||||
def test_substitution_curly(self):
|
||||
assert substitute_variables("{nom}", {"nom": "Dupont"}) == "Dupont"
|
||||
|
||||
def test_substitution_dollar(self):
|
||||
assert substitute_variables("${nom}", {"nom": "Dupont"}) == "Dupont"
|
||||
|
||||
def test_substitution_dans_phrase(self):
|
||||
assert (
|
||||
substitute_variables("Bonjour {nom}, votre code est ${code}",
|
||||
{"nom": "Alice", "code": "A42"})
|
||||
== "Bonjour Alice, votre code est A42"
|
||||
)
|
||||
|
||||
def test_variable_inconnue_inchangee(self):
|
||||
# Une variable inconnue reste dans le texte (pas de KeyError)
|
||||
assert substitute_variables("{inconnu}", {"autre": "val"}) == "{inconnu}"
|
||||
|
||||
def test_texte_sans_variable(self):
|
||||
assert substitute_variables("texte simple", {"x": "1"}) == "texte simple"
|
||||
|
||||
def test_texte_vide(self):
|
||||
assert substitute_variables("", {"x": "1"}) == ""
|
||||
|
||||
def test_variables_vides(self):
|
||||
assert substitute_variables("{x}", {}) == "{x}"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Conversion ExecutionNode → action replay
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestExecutionNodeToAction:
|
||||
|
||||
def test_click_avec_strategie_ocr(self):
|
||||
"""Un clic avec stratégie OCR produit une action click visuelle avec by_text."""
|
||||
node = ExecutionNode(
|
||||
node_id="n1",
|
||||
action_type="click",
|
||||
intent="Cliquer sur Enregistrer",
|
||||
strategy_primary=ResolutionStrategy(
|
||||
method="ocr",
|
||||
target_text="Enregistrer",
|
||||
threshold=0.8,
|
||||
),
|
||||
)
|
||||
action = execution_node_to_action(node)
|
||||
|
||||
assert action is not None
|
||||
assert action["type"] == "click"
|
||||
assert action["action_id"].startswith("act_plan_")
|
||||
assert action["plan_node_id"] == "n1"
|
||||
assert action["intention"] == "Cliquer sur Enregistrer"
|
||||
assert action["visual_mode"] is True
|
||||
assert "x_pct" in action and "y_pct" in action
|
||||
assert action["target_spec"]["by_text"] == "Enregistrer"
|
||||
|
||||
def test_click_avec_strategie_template(self):
|
||||
"""Un clic avec stratégie template expose l'anchor_image_base64."""
|
||||
node = ExecutionNode(
|
||||
node_id="n2",
|
||||
action_type="click",
|
||||
strategy_primary=ResolutionStrategy(
|
||||
method="template",
|
||||
anchor_b64="AAABBBCCCDDD",
|
||||
target_text="Ouvrir",
|
||||
),
|
||||
)
|
||||
action = execution_node_to_action(node)
|
||||
|
||||
assert action is not None
|
||||
assert action["type"] == "click"
|
||||
assert action["target_spec"]["anchor_image_base64"] == "AAABBBCCCDDD"
|
||||
assert action["target_spec"]["by_text"] == "Ouvrir"
|
||||
assert action["visual_mode"] is True
|
||||
|
||||
def test_click_avec_strategie_vlm(self):
|
||||
"""Un clic avec stratégie VLM expose vlm_description."""
|
||||
node = ExecutionNode(
|
||||
node_id="n3",
|
||||
action_type="click",
|
||||
strategy_primary=ResolutionStrategy(
|
||||
method="vlm",
|
||||
vlm_description="bouton rouge en haut à droite",
|
||||
),
|
||||
)
|
||||
action = execution_node_to_action(node)
|
||||
|
||||
assert action is not None
|
||||
assert action["target_spec"]["vlm_description"] == "bouton rouge en haut à droite"
|
||||
assert action["visual_mode"] is True
|
||||
|
||||
def test_click_avec_fallbacks_ajoute_hints(self):
|
||||
"""Les fallbacks enrichissent le target_spec avec toutes les ancres disponibles."""
|
||||
node = ExecutionNode(
|
||||
node_id="n4",
|
||||
action_type="click",
|
||||
intent="Ouvrir le menu",
|
||||
strategy_primary=ResolutionStrategy(method="ocr", target_text="Menu"),
|
||||
strategy_fallbacks=[
|
||||
ResolutionStrategy(
|
||||
method="template", anchor_b64="XYZ", target_text="Menu",
|
||||
),
|
||||
ResolutionStrategy(
|
||||
method="vlm", vlm_description="menu déroulant",
|
||||
),
|
||||
],
|
||||
)
|
||||
action = execution_node_to_action(node)
|
||||
|
||||
spec = action["target_spec"]
|
||||
assert spec["by_text"] == "Menu"
|
||||
assert spec["anchor_image_base64"] == "XYZ"
|
||||
assert spec["vlm_description"] == "menu déroulant"
|
||||
|
||||
def test_click_avec_success_condition_expected_title(self):
|
||||
"""La success_condition avec expected_title passe dans expected_window_title."""
|
||||
node = ExecutionNode(
|
||||
node_id="n5",
|
||||
action_type="click",
|
||||
strategy_primary=ResolutionStrategy(method="ocr", target_text="OK"),
|
||||
success_condition=SuccessCondition(
|
||||
method="title_match",
|
||||
expected_title="Document sauvegardé",
|
||||
),
|
||||
)
|
||||
action = execution_node_to_action(node)
|
||||
|
||||
assert action["expected_window_title"] == "Document sauvegardé"
|
||||
assert action["target_spec"]["window_title"] == "Document sauvegardé"
|
||||
|
||||
def test_type_avec_variable_substitution(self):
|
||||
"""Un node type avec variable {patient} est substitué."""
|
||||
node = ExecutionNode(
|
||||
node_id="n6",
|
||||
action_type="type",
|
||||
text="{patient}",
|
||||
variable_name="patient",
|
||||
)
|
||||
action = execution_node_to_action(node, variables={"patient": "DUPONT"})
|
||||
|
||||
assert action["type"] == "type"
|
||||
assert action["text"] == "DUPONT"
|
||||
assert action["variable_name"] == "patient"
|
||||
|
||||
def test_type_sans_variable(self):
|
||||
"""Un texte sans variable est inchangé."""
|
||||
node = ExecutionNode(
|
||||
node_id="n7",
|
||||
action_type="type",
|
||||
text="Bonjour",
|
||||
)
|
||||
action = execution_node_to_action(node)
|
||||
assert action["text"] == "Bonjour"
|
||||
|
||||
def test_key_combo(self):
|
||||
"""Un key_combo expose les touches."""
|
||||
node = ExecutionNode(
|
||||
node_id="n8",
|
||||
action_type="key_combo",
|
||||
keys=["ctrl", "s"],
|
||||
)
|
||||
action = execution_node_to_action(node)
|
||||
|
||||
assert action["type"] == "key_combo"
|
||||
assert action["keys"] == ["ctrl", "s"]
|
||||
|
||||
def test_key_combo_vide_retourne_none(self):
|
||||
"""Un key_combo sans touches est ignoré."""
|
||||
node = ExecutionNode(
|
||||
node_id="n9",
|
||||
action_type="key_combo",
|
||||
keys=[],
|
||||
)
|
||||
assert execution_node_to_action(node) is None
|
||||
|
||||
def test_wait(self):
|
||||
"""Un wait expose duration_ms."""
|
||||
node = ExecutionNode(
|
||||
node_id="n10",
|
||||
action_type="wait",
|
||||
duration_ms=2500,
|
||||
)
|
||||
action = execution_node_to_action(node)
|
||||
|
||||
assert action["type"] == "wait"
|
||||
assert action["duration_ms"] == 2500
|
||||
|
||||
def test_wait_sans_duration_default(self):
|
||||
"""Un wait sans duration a un défaut de 1000ms."""
|
||||
node = ExecutionNode(node_id="n11", action_type="wait")
|
||||
action = execution_node_to_action(node)
|
||||
assert action["duration_ms"] == 1000
|
||||
|
||||
def test_scroll(self):
|
||||
"""Un scroll produit une action scroll."""
|
||||
node = ExecutionNode(node_id="n12", action_type="scroll")
|
||||
action = execution_node_to_action(node)
|
||||
|
||||
assert action["type"] == "scroll"
|
||||
assert "delta" in action
|
||||
|
||||
def test_type_inconnu_retourne_none(self):
|
||||
"""Un type d'action inconnu est ignoré (retourne None)."""
|
||||
node = ExecutionNode(node_id="n13", action_type="unknown_thing")
|
||||
assert execution_node_to_action(node) is None
|
||||
|
||||
def test_metadonnees_execution_propagees(self):
|
||||
"""timeout_ms, max_retries, recovery_action passent dans l'action."""
|
||||
node = ExecutionNode(
|
||||
node_id="n14",
|
||||
action_type="click",
|
||||
strategy_primary=ResolutionStrategy(method="ocr", target_text="X"),
|
||||
timeout_ms=15000,
|
||||
max_retries=3,
|
||||
recovery_action="undo",
|
||||
)
|
||||
action = execution_node_to_action(node)
|
||||
|
||||
assert action["timeout_ms"] == 15000
|
||||
assert action["max_retries"] == 3
|
||||
assert action["recovery_action"] == "undo"
|
||||
|
||||
def test_node_optionnel(self):
|
||||
"""is_optional est propagé."""
|
||||
node = ExecutionNode(
|
||||
node_id="n15",
|
||||
action_type="click",
|
||||
strategy_primary=ResolutionStrategy(method="ocr", target_text="X"),
|
||||
is_optional=True,
|
||||
)
|
||||
action = execution_node_to_action(node)
|
||||
assert action["is_optional"] is True
|
||||
|
||||
def test_id_prefix_custom(self):
|
||||
"""Le préfixe d'id peut être personnalisé."""
|
||||
node = ExecutionNode(
|
||||
node_id="n16",
|
||||
action_type="click",
|
||||
strategy_primary=ResolutionStrategy(method="ocr", target_text="X"),
|
||||
)
|
||||
action = execution_node_to_action(node, id_prefix="act_custom")
|
||||
assert action["action_id"].startswith("act_custom_")
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Conversion ExecutionPlan → liste d'actions
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestExecutionPlanToActions:
|
||||
|
||||
def _make_plan(self) -> ExecutionPlan:
|
||||
plan = ExecutionPlan(
|
||||
plan_id="plan_test",
|
||||
workflow_id="wf_test",
|
||||
version=1,
|
||||
variables={"nom_fichier": "rapport.pdf"},
|
||||
)
|
||||
plan.nodes = [
|
||||
ExecutionNode(
|
||||
node_id="n1",
|
||||
action_type="click",
|
||||
strategy_primary=ResolutionStrategy(method="ocr", target_text="Ouvrir"),
|
||||
),
|
||||
ExecutionNode(
|
||||
node_id="n2",
|
||||
action_type="type",
|
||||
text="{nom_fichier}",
|
||||
variable_name="nom_fichier",
|
||||
),
|
||||
ExecutionNode(
|
||||
node_id="n3",
|
||||
action_type="key_combo",
|
||||
keys=["enter"],
|
||||
),
|
||||
ExecutionNode(
|
||||
node_id="n4",
|
||||
action_type="wait",
|
||||
duration_ms=1500,
|
||||
),
|
||||
]
|
||||
plan.total_nodes = 4
|
||||
return plan
|
||||
|
||||
def test_conversion_ordre_respecte(self):
|
||||
plan = self._make_plan()
|
||||
actions = execution_plan_to_actions(plan)
|
||||
|
||||
assert len(actions) == 4
|
||||
assert actions[0]["type"] == "click"
|
||||
assert actions[1]["type"] == "type"
|
||||
assert actions[2]["type"] == "key_combo"
|
||||
assert actions[3]["type"] == "wait"
|
||||
|
||||
def test_variables_du_plan_appliquees(self):
|
||||
plan = self._make_plan()
|
||||
actions = execution_plan_to_actions(plan)
|
||||
type_action = next(a for a in actions if a["type"] == "type")
|
||||
assert type_action["text"] == "rapport.pdf"
|
||||
|
||||
def test_variables_override(self):
|
||||
"""Les variables passées en argument écrasent celles du plan."""
|
||||
plan = self._make_plan()
|
||||
actions = execution_plan_to_actions(
|
||||
plan, variables={"nom_fichier": "facture.pdf"},
|
||||
)
|
||||
type_action = next(a for a in actions if a["type"] == "type")
|
||||
assert type_action["text"] == "facture.pdf"
|
||||
|
||||
def test_plan_vide(self):
|
||||
plan = ExecutionPlan(plan_id="empty", workflow_id="wf_empty")
|
||||
actions = execution_plan_to_actions(plan)
|
||||
assert actions == []
|
||||
|
||||
def test_noeud_non_convertible_ignore(self):
|
||||
"""Un nœud inconnu ne bloque pas la conversion."""
|
||||
plan = ExecutionPlan(plan_id="p", workflow_id="wf")
|
||||
plan.nodes = [
|
||||
ExecutionNode(
|
||||
node_id="n1",
|
||||
action_type="click",
|
||||
strategy_primary=ResolutionStrategy(method="ocr", target_text="OK"),
|
||||
),
|
||||
ExecutionNode(node_id="n2", action_type="unknown_type"),
|
||||
ExecutionNode(
|
||||
node_id="n3",
|
||||
action_type="type",
|
||||
text="hello",
|
||||
),
|
||||
]
|
||||
actions = execution_plan_to_actions(plan)
|
||||
assert len(actions) == 2
|
||||
assert actions[0]["type"] == "click"
|
||||
assert actions[1]["type"] == "type"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Injection dans la queue de replay
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestInjectPlanIntoQueue:
|
||||
|
||||
def _make_simple_plan(self) -> ExecutionPlan:
|
||||
plan = ExecutionPlan(plan_id="p_inj", workflow_id="wf_inj")
|
||||
plan.nodes = [
|
||||
ExecutionNode(
|
||||
node_id="n1",
|
||||
action_type="click",
|
||||
strategy_primary=ResolutionStrategy(method="ocr", target_text="Go"),
|
||||
),
|
||||
ExecutionNode(node_id="n2", action_type="wait", duration_ms=500),
|
||||
]
|
||||
return plan
|
||||
|
||||
def test_injection_replace(self):
|
||||
"""Par défaut, la queue est remplacée."""
|
||||
plan = self._make_simple_plan()
|
||||
queues: dict = defaultdict(list)
|
||||
queues["sess_abc"] = [{"type": "click", "action_id": "old"}]
|
||||
|
||||
actions = inject_plan_into_queue(
|
||||
plan=plan,
|
||||
session_id="sess_abc",
|
||||
replay_queues=queues,
|
||||
)
|
||||
|
||||
assert len(actions) == 2
|
||||
assert len(queues["sess_abc"]) == 2
|
||||
# L'ancienne action a été remplacée
|
||||
assert all(a["action_id"] != "old" for a in queues["sess_abc"])
|
||||
|
||||
def test_injection_append(self):
|
||||
"""Avec replace=False, on ajoute aux actions existantes."""
|
||||
plan = self._make_simple_plan()
|
||||
queues: dict = defaultdict(list)
|
||||
queues["sess_abc"] = [{"type": "click", "action_id": "existing"}]
|
||||
|
||||
inject_plan_into_queue(
|
||||
plan=plan,
|
||||
session_id="sess_abc",
|
||||
replay_queues=queues,
|
||||
replace=False,
|
||||
)
|
||||
|
||||
assert len(queues["sess_abc"]) == 3
|
||||
assert queues["sess_abc"][0]["action_id"] == "existing"
|
||||
|
||||
def test_injection_avec_lock(self):
|
||||
"""Le lock est respecté pendant l'injection."""
|
||||
plan = self._make_simple_plan()
|
||||
queues: dict = defaultdict(list)
|
||||
lock = threading.Lock()
|
||||
|
||||
actions = inject_plan_into_queue(
|
||||
plan=plan,
|
||||
session_id="sess_x",
|
||||
replay_queues=queues,
|
||||
lock=lock,
|
||||
)
|
||||
|
||||
assert len(actions) == 2
|
||||
assert len(queues["sess_x"]) == 2
|
||||
# Le lock est bien libéré après l'injection
|
||||
assert lock.acquire(blocking=False) is True
|
||||
lock.release()
|
||||
|
||||
def test_injection_avec_variables(self):
|
||||
"""Les variables sont substituées lors de l'injection."""
|
||||
plan = ExecutionPlan(plan_id="p_var", workflow_id="wf_var")
|
||||
plan.nodes = [
|
||||
ExecutionNode(
|
||||
node_id="n1",
|
||||
action_type="type",
|
||||
text="{patient}",
|
||||
variable_name="patient",
|
||||
),
|
||||
]
|
||||
|
||||
queues: dict = defaultdict(list)
|
||||
actions = inject_plan_into_queue(
|
||||
plan=plan,
|
||||
session_id="sess_v",
|
||||
replay_queues=queues,
|
||||
variables={"patient": "MARTIN"},
|
||||
)
|
||||
|
||||
assert actions[0]["text"] == "MARTIN"
|
||||
assert queues["sess_v"][0]["text"] == "MARTIN"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Intégration : pipeline complet IR → Plan → Actions
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestFullPipelineV4:
|
||||
"""Teste le pipeline complet : WorkflowIR → ExecutionPlan → actions replay."""
|
||||
|
||||
def test_pipeline_complet_ir_vers_actions(self):
|
||||
# 1. Construire un WorkflowIR
|
||||
ir = WorkflowIR.new("Test pipeline V4", domain="generic")
|
||||
ir.add_step(
|
||||
"Ouvrir le fichier",
|
||||
actions=[
|
||||
{"type": "click", "target": "bouton Ouvrir", "anchor_hint": "Ouvrir"},
|
||||
{"type": "wait", "duration_ms": 1000},
|
||||
],
|
||||
postcondition="La fenêtre Ouvrir est visible",
|
||||
)
|
||||
ir.add_step(
|
||||
"Saisir le nom",
|
||||
actions=[
|
||||
{"type": "type", "text": "{nom_fichier}", "variable": True},
|
||||
{"type": "key_combo", "keys": ["enter"]},
|
||||
],
|
||||
)
|
||||
ir.add_variable("nom_fichier", description="Fichier", default="doc.pdf")
|
||||
|
||||
# 2. Compiler → ExecutionPlan
|
||||
compiler = ExecutionCompiler()
|
||||
plan = compiler.compile(ir)
|
||||
assert plan.total_nodes == 4
|
||||
|
||||
# 3. Convertir → actions replay
|
||||
actions = execution_plan_to_actions(plan)
|
||||
assert len(actions) == 4
|
||||
|
||||
types = [a["type"] for a in actions]
|
||||
assert types == ["click", "wait", "type", "key_combo"]
|
||||
|
||||
# Le clic a une stratégie OCR → by_text
|
||||
click = actions[0]
|
||||
assert click["visual_mode"] is True
|
||||
assert click["target_spec"].get("by_text") == "Ouvrir"
|
||||
|
||||
# Le type a substitué la variable depuis le plan
|
||||
type_action = actions[2]
|
||||
assert type_action["text"] == "doc.pdf"
|
||||
|
||||
# Le key_combo a les touches
|
||||
assert actions[3]["keys"] == ["enter"]
|
||||
|
||||
def test_pipeline_avec_params_override(self):
|
||||
"""Les params passés à l'injection prévalent sur le plan."""
|
||||
ir = WorkflowIR.new("Variables override")
|
||||
ir.add_step("Saisie", actions=[
|
||||
{"type": "type", "text": "{code}", "variable": True},
|
||||
])
|
||||
ir.add_variable("code", default="DEFAULT")
|
||||
|
||||
compiler = ExecutionCompiler()
|
||||
plan = compiler.compile(ir)
|
||||
|
||||
actions = execution_plan_to_actions(
|
||||
plan, variables={"code": "RUNTIME"},
|
||||
)
|
||||
assert actions[0]["text"] == "RUNTIME"
|
||||
|
||||
def test_pipeline_plan_serialise_et_recharge(self):
|
||||
"""Le plan peut être sérialisé/rechargé puis converti en actions."""
|
||||
ir = WorkflowIR.new("Roundtrip")
|
||||
ir.add_step("X", actions=[
|
||||
{"type": "click", "target": "btn", "anchor_hint": "Valider"},
|
||||
])
|
||||
compiler = ExecutionCompiler()
|
||||
plan = compiler.compile(ir)
|
||||
|
||||
json_str = plan.to_json()
|
||||
plan2 = ExecutionPlan.from_json(json_str)
|
||||
|
||||
actions = execution_plan_to_actions(plan2)
|
||||
assert len(actions) == 1
|
||||
assert actions[0]["type"] == "click"
|
||||
727
tests/unit/test_lea_notifications.py
Normal file
727
tests/unit/test_lea_notifications.py
Normal file
@@ -0,0 +1,727 @@
|
||||
"""Tests unitaires pour l'UX de Léa (notifications, messages, activity panel).
|
||||
|
||||
Couvre :
|
||||
- Formatage des messages techniques → français naturel (module messages.py)
|
||||
- Hiérarchie info/attention/blocage
|
||||
- Détection de la fenêtre Léa
|
||||
- NotificationManager avec plyer mocké
|
||||
- ActivityPanel sans tkinter (fallback silencieux)
|
||||
|
||||
Ces tests ne nécessitent ni tkinter ni plyer : tout est mocké ou géré en
|
||||
fallback silencieux. Ils doivent passer sur toutes les plateformes.
|
||||
|
||||
Auteur: Dom, avril 2026
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
# Assurer que la racine du projet est dans le path (comme conftest)
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
if str(ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from agent_v0.agent_v1.ui import activity_panel, messages, notifications
|
||||
from agent_v0.agent_v1.ui.activity_panel import ActivityPanel, EtatLea, reset_activity_panel
|
||||
from agent_v0.agent_v1.ui.messages import (
|
||||
MessageUtilisateur,
|
||||
NiveauMessage,
|
||||
_extraire_nom_application,
|
||||
_nettoyer_description_cible,
|
||||
est_fenetre_lea,
|
||||
formatter_cible_non_trouvee,
|
||||
formatter_connexion_perdue,
|
||||
formatter_connexion_retablie,
|
||||
formatter_debut_workflow,
|
||||
formatter_ecran_inchange,
|
||||
formatter_erreur_generique,
|
||||
formatter_etape_workflow,
|
||||
formatter_fenetre_incorrecte,
|
||||
formatter_fin_workflow,
|
||||
formatter_ralentissement,
|
||||
formatter_retry,
|
||||
)
|
||||
from agent_v0.agent_v1.ui.notifications import NotificationManager
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Tests : helpers d'extraction
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class TestExtraction:
|
||||
"""Tests des helpers _extraire_nom_application et _nettoyer_description_cible."""
|
||||
|
||||
def test_extraire_app_avec_em_dash(self):
|
||||
assert _extraire_nom_application("Document.txt – Bloc-notes") == "Bloc-notes"
|
||||
|
||||
def test_extraire_app_avec_em_dash_long(self):
|
||||
assert _extraire_nom_application("Ma Page — Google Chrome") == "Google Chrome"
|
||||
|
||||
def test_extraire_app_avec_dash_simple(self):
|
||||
assert _extraire_nom_application("Session 1 - Firefox") == "Firefox"
|
||||
|
||||
def test_extraire_app_sans_separateur(self):
|
||||
assert _extraire_nom_application("Bloc-notes") == "Bloc-notes"
|
||||
|
||||
def test_extraire_app_vide(self):
|
||||
assert _extraire_nom_application("") == ""
|
||||
assert _extraire_nom_application(None) == ""
|
||||
|
||||
def test_extraire_app_garde_dernier_separateur(self):
|
||||
# Cas multi-séparateurs : on garde la dernière partie
|
||||
assert _extraire_nom_application("A - B - C") == "C"
|
||||
|
||||
def test_nettoyer_description_retire_guillemets(self):
|
||||
assert _nettoyer_description_cible("'bonjour'") == "bonjour"
|
||||
assert _nettoyer_description_cible('"bonjour"') == "bonjour"
|
||||
assert _nettoyer_description_cible("`code`") == "code"
|
||||
|
||||
def test_nettoyer_description_vide(self):
|
||||
assert _nettoyer_description_cible("") == ""
|
||||
assert _nettoyer_description_cible(None) == ""
|
||||
|
||||
def test_nettoyer_description_tronque(self):
|
||||
longue = "x" * 200
|
||||
resultat = _nettoyer_description_cible(longue)
|
||||
assert len(resultat) <= 80
|
||||
assert resultat.endswith("...")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Tests : détection fenêtre Léa
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class TestFenetreLea:
|
||||
"""Tests de est_fenetre_lea — crucial pour la robustesse."""
|
||||
|
||||
@pytest.mark.parametrize("titre", [
|
||||
"Léa",
|
||||
"Léa — Assistante IA",
|
||||
"Lea - Assistante",
|
||||
"Léa — Activité",
|
||||
"Lea : Explorateur de fichiers",
|
||||
"LÉA — ASSISTANTE IA", # casse mixte
|
||||
"Léa assistante",
|
||||
"Assistante IA",
|
||||
])
|
||||
def test_detecte_fenetres_lea(self, titre):
|
||||
assert est_fenetre_lea(titre), f"Devrait détecter : {titre!r}"
|
||||
|
||||
@pytest.mark.parametrize("titre", [
|
||||
"Bloc-notes",
|
||||
"Google Chrome",
|
||||
"Program Manager",
|
||||
"Microsoft Word - Document1",
|
||||
"Sans titre - Paint",
|
||||
"",
|
||||
"cléa.txt", # contient "léa" mais c'est un fichier
|
||||
"replay.log", # contient "lea"
|
||||
"leapfrog.exe", # contient "lea"
|
||||
"nucleaire.pdf", # contient "lea"
|
||||
])
|
||||
def test_ignore_fenetres_non_lea(self, titre):
|
||||
"""Les faux positifs sur des noms contenant 'lea' doivent être évités
|
||||
grâce aux word boundaries regex."""
|
||||
assert not est_fenetre_lea(titre), f"Ne devrait pas détecter : {titre!r}"
|
||||
|
||||
def test_titre_none(self):
|
||||
assert est_fenetre_lea(None) is False
|
||||
|
||||
def test_espaces_en_trop(self):
|
||||
assert est_fenetre_lea(" Léa — Assistante IA ") is True
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Tests : formatage des messages techniques → humains
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class TestFormatterCibleNonTrouvee:
|
||||
"""Tests du formatage quand un élément n'est pas trouvé."""
|
||||
|
||||
def test_message_blocage(self):
|
||||
msg = formatter_cible_non_trouvee("bonjour", "Document – Bloc-notes")
|
||||
assert msg.niveau == NiveauMessage.BLOCAGE
|
||||
assert msg.persistent is True
|
||||
assert "besoin d'aide" in msg.titre.lower()
|
||||
|
||||
def test_message_contient_nom_element(self):
|
||||
msg = formatter_cible_non_trouvee("Rechercher", "Chrome")
|
||||
assert "rechercher" in msg.corps.lower()
|
||||
|
||||
def test_message_contient_nom_application(self):
|
||||
msg = formatter_cible_non_trouvee("bonjour", "Doc – Bloc-notes")
|
||||
assert "bloc-notes" in msg.corps.lower()
|
||||
|
||||
def test_message_action_orientee(self):
|
||||
"""Le message doit proposer une action à l'utilisateur."""
|
||||
msg = formatter_cible_non_trouvee("bouton", "App")
|
||||
corps_lower = msg.corps.lower()
|
||||
# Doit contenir un verbe d'action type "cliquer", "faire"
|
||||
assert any(verb in corps_lower for verb in ["cliqu", "faire", "peux-tu"])
|
||||
|
||||
def test_sans_fenetre(self):
|
||||
msg = formatter_cible_non_trouvee("Submit", None)
|
||||
assert msg.niveau == NiveauMessage.BLOCAGE
|
||||
assert "submit" in msg.corps.lower()
|
||||
|
||||
def test_description_vide(self):
|
||||
msg = formatter_cible_non_trouvee("", "App")
|
||||
# Doit quand même produire un message utilisable
|
||||
assert msg.corps
|
||||
assert msg.niveau == NiveauMessage.BLOCAGE
|
||||
|
||||
def test_message_techniques_nettoyes(self):
|
||||
"""Pas de '__target_not_found__' ni code technique visible."""
|
||||
msg = formatter_cible_non_trouvee("'bonjour'", "Bloc-notes")
|
||||
assert "target_not_found" not in msg.corps
|
||||
# Les guillemets techniques sont nettoyés, mais on en ajoute des français
|
||||
assert "bonjour" in msg.corps
|
||||
|
||||
|
||||
class TestFormatterFenetreIncorrecte:
|
||||
"""Tests du formatage quand la mauvaise fenêtre est active."""
|
||||
|
||||
def test_message_blocage_persistent(self):
|
||||
msg = formatter_fenetre_incorrecte(
|
||||
"Program Manager",
|
||||
"Lea : Explorateur de fichiers",
|
||||
)
|
||||
assert msg.niveau == NiveauMessage.BLOCAGE
|
||||
assert msg.persistent is True
|
||||
|
||||
def test_mentionne_fenetre_attendue(self):
|
||||
msg = formatter_fenetre_incorrecte("Program Manager", "Chrome")
|
||||
assert "chrome" in msg.corps.lower()
|
||||
|
||||
def test_mentionne_fenetre_actuelle(self):
|
||||
msg = formatter_fenetre_incorrecte("Program Manager", "Chrome")
|
||||
assert "program manager" in msg.corps.lower()
|
||||
|
||||
def test_suggere_action(self):
|
||||
msg = formatter_fenetre_incorrecte("A", "B")
|
||||
# Propose d'ouvrir la bonne fenêtre
|
||||
assert "ouvr" in msg.corps.lower() or "fenêtre" in msg.corps.lower()
|
||||
|
||||
|
||||
class TestFormatterEcranInchange:
|
||||
"""Tests du formatage quand l'écran ne change pas après une action."""
|
||||
|
||||
def test_niveau_attention(self):
|
||||
"""L'écran inchangé est de niveau ATTENTION, pas BLOCAGE."""
|
||||
msg = formatter_ecran_inchange("click")
|
||||
assert msg.niveau == NiveauMessage.ATTENTION
|
||||
|
||||
def test_message_pour_click(self):
|
||||
msg = formatter_ecran_inchange("click")
|
||||
assert "clic" in msg.corps.lower()
|
||||
|
||||
def test_message_pour_type(self):
|
||||
msg = formatter_ecran_inchange("type")
|
||||
assert "saisie" in msg.corps.lower()
|
||||
|
||||
def test_message_pour_key_combo(self):
|
||||
msg = formatter_ecran_inchange("key_combo")
|
||||
assert "raccourci" in msg.corps.lower()
|
||||
|
||||
def test_sans_type_action(self):
|
||||
msg = formatter_ecran_inchange("")
|
||||
assert msg.corps # Doit quand même produire quelque chose
|
||||
|
||||
def test_pas_persistent(self):
|
||||
msg = formatter_ecran_inchange("click")
|
||||
assert msg.persistent is False
|
||||
|
||||
|
||||
class TestFormatterConnexion:
|
||||
"""Tests des messages de connexion serveur."""
|
||||
|
||||
def test_connexion_perdue_attention(self):
|
||||
msg = formatter_connexion_perdue("localhost")
|
||||
assert msg.niveau == NiveauMessage.ATTENTION
|
||||
|
||||
def test_connexion_perdue_rassurante(self):
|
||||
"""Le message doit rassurer (reconnexion automatique)."""
|
||||
msg = formatter_connexion_perdue()
|
||||
assert "automatique" in msg.corps.lower() or "retent" in msg.corps.lower()
|
||||
|
||||
def test_connexion_retablie_info(self):
|
||||
msg = formatter_connexion_retablie()
|
||||
assert msg.niveau == NiveauMessage.INFO
|
||||
|
||||
def test_connexion_retablie_positive(self):
|
||||
msg = formatter_connexion_retablie()
|
||||
assert "bon" in msg.corps.lower() or "revenue" in msg.corps.lower()
|
||||
|
||||
|
||||
class TestFormatterWorkflow:
|
||||
"""Tests des messages de workflow (début, étape, fin)."""
|
||||
|
||||
def test_debut_avec_etapes(self):
|
||||
msg = formatter_debut_workflow("Saisie patient", 15)
|
||||
assert msg.niveau == NiveauMessage.INFO
|
||||
assert "saisie patient" in msg.corps.lower()
|
||||
assert "15" in msg.corps
|
||||
|
||||
def test_debut_sans_etapes(self):
|
||||
msg = formatter_debut_workflow("Backup")
|
||||
assert msg.niveau == NiveauMessage.INFO
|
||||
assert "backup" in msg.corps.lower()
|
||||
|
||||
def test_etape_progression(self):
|
||||
msg = formatter_etape_workflow(3, 15, "Clic sur Valider")
|
||||
assert "3" in msg.corps
|
||||
assert "15" in msg.corps
|
||||
assert "valider" in msg.corps.lower()
|
||||
|
||||
def test_etape_sans_description(self):
|
||||
msg = formatter_etape_workflow(5, 20)
|
||||
assert "5" in msg.corps
|
||||
assert "20" in msg.corps
|
||||
|
||||
def test_fin_succes(self):
|
||||
msg = formatter_fin_workflow(True, "Ma tâche", 10, 45.0)
|
||||
assert msg.niveau == NiveauMessage.INFO
|
||||
assert "terminé" in msg.corps.lower() or "fait" in msg.corps.lower()
|
||||
|
||||
def test_fin_echec_blocage(self):
|
||||
msg = formatter_fin_workflow(False, "Ma tâche")
|
||||
assert msg.niveau == NiveauMessage.BLOCAGE
|
||||
assert msg.persistent is True
|
||||
|
||||
|
||||
class TestFormatterRetryRalentissement:
|
||||
"""Tests des messages de retry et ralentissement."""
|
||||
|
||||
def test_retry_attention(self):
|
||||
msg = formatter_retry("click", 2)
|
||||
assert msg.niveau == NiveauMessage.ATTENTION
|
||||
assert "2" in msg.corps # numéro de tentative
|
||||
|
||||
def test_ralentissement_attention(self):
|
||||
msg = formatter_ralentissement()
|
||||
assert msg.niveau == NiveauMessage.ATTENTION
|
||||
assert "lent" in msg.corps.lower()
|
||||
|
||||
|
||||
class TestFormatterErreurGenerique:
|
||||
"""Tests du router formatter_erreur_generique → spécialisé."""
|
||||
|
||||
def test_detecte_target_not_found(self):
|
||||
msg = formatter_erreur_generique("target_not_found: 'bouton'")
|
||||
assert msg.niveau == NiveauMessage.BLOCAGE
|
||||
assert "bouton" in msg.corps.lower()
|
||||
|
||||
def test_detecte_fenetre_incorrecte(self):
|
||||
msg = formatter_erreur_generique(
|
||||
"Fenêtre incorrecte: 'Program Manager' (attendu: 'Chrome')"
|
||||
)
|
||||
assert msg.niveau == NiveauMessage.BLOCAGE
|
||||
assert "chrome" in msg.corps.lower() or "program manager" in msg.corps.lower()
|
||||
|
||||
def test_detecte_ecran_inchange(self):
|
||||
msg = formatter_erreur_generique("Ecran inchange apres l'action")
|
||||
assert msg.niveau == NiveauMessage.ATTENTION
|
||||
|
||||
def test_detecte_no_screen_change(self):
|
||||
msg = formatter_erreur_generique("no_screen_change after click")
|
||||
assert msg.niveau == NiveauMessage.ATTENTION
|
||||
|
||||
def test_detecte_policy_abort(self):
|
||||
msg = formatter_erreur_generique("policy_abort:target_desc_x")
|
||||
assert msg.niveau == NiveauMessage.BLOCAGE
|
||||
|
||||
def test_message_vide(self):
|
||||
msg = formatter_erreur_generique("")
|
||||
assert msg.corps
|
||||
assert msg.niveau == NiveauMessage.ATTENTION
|
||||
|
||||
def test_message_inconnu_tronque(self):
|
||||
long_msg = "erreur très longue " * 20
|
||||
msg = formatter_erreur_generique(long_msg)
|
||||
assert len(msg.corps) <= 200 # tronqué avec "..."
|
||||
|
||||
def test_pas_de_code_technique_dans_message_utilisateur(self):
|
||||
"""Les messages présentés à l'utilisateur ne doivent pas contenir de
|
||||
noms de variables, de fonctions, ou de types Python."""
|
||||
msg = formatter_erreur_generique("target_not_found: 'bouton'")
|
||||
# Le code technique ne doit pas apparaître tel quel dans le corps
|
||||
assert "target_not_found" not in msg.corps
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Tests : hiérarchie NiveauMessage
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class TestHierarchieNiveau:
|
||||
"""Tests de la hiérarchie info/attention/blocage."""
|
||||
|
||||
def test_niveau_info_duree_courte(self):
|
||||
msg = formatter_connexion_retablie()
|
||||
assert msg.niveau == NiveauMessage.INFO
|
||||
assert msg.duree_s <= 6
|
||||
|
||||
def test_niveau_attention_duree_moyenne(self):
|
||||
msg = formatter_ecran_inchange("click")
|
||||
assert msg.niveau == NiveauMessage.ATTENTION
|
||||
assert 5 <= msg.duree_s <= 10
|
||||
|
||||
def test_niveau_blocage_duree_longue_persistent(self):
|
||||
msg = formatter_cible_non_trouvee("x", "y")
|
||||
assert msg.niveau == NiveauMessage.BLOCAGE
|
||||
assert msg.duree_s >= 10
|
||||
assert msg.persistent is True
|
||||
|
||||
def test_niveau_info_non_persistent(self):
|
||||
msg = formatter_debut_workflow("test")
|
||||
assert msg.persistent is False
|
||||
|
||||
def test_to_dict_serialisation(self):
|
||||
msg = MessageUtilisateur(
|
||||
niveau=NiveauMessage.INFO,
|
||||
titre="Test",
|
||||
corps="Corps",
|
||||
duree_s=5,
|
||||
)
|
||||
d = msg.to_dict()
|
||||
assert d["niveau"] == "info"
|
||||
assert d["titre"] == "Test"
|
||||
assert d["corps"] == "Corps"
|
||||
assert d["duree_s"] == 5
|
||||
assert d["persistent"] is False
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Tests : NotificationManager (avec plyer mocké)
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class TestNotificationManager:
|
||||
"""Tests du NotificationManager avec plyer mocké.
|
||||
|
||||
Ces tests ne dépendent pas de l'environnement : plyer est patché pour
|
||||
qu'on puisse vérifier les appels sans afficher de vraies notifications.
|
||||
"""
|
||||
|
||||
def test_instanciation(self):
|
||||
mgr = NotificationManager()
|
||||
assert mgr is not None
|
||||
|
||||
def test_notify_sans_plyer(self, monkeypatch):
|
||||
"""Si plyer n'est pas dispo, notify() retourne False sans crasher."""
|
||||
monkeypatch.setattr(notifications, "_PLYER_AVAILABLE", False)
|
||||
mgr = NotificationManager()
|
||||
assert mgr.notify("titre", "message") is False
|
||||
|
||||
def test_notify_avec_plyer_mocke(self, monkeypatch):
|
||||
mock_plyer = MagicMock()
|
||||
monkeypatch.setattr(notifications, "_PLYER_AVAILABLE", True)
|
||||
monkeypatch.setattr(notifications, "_plyer_notification", mock_plyer)
|
||||
|
||||
mgr = NotificationManager()
|
||||
result = mgr.notify("Titre", "Message", timeout=5)
|
||||
assert result is True
|
||||
# L'envoi est asynchrone, laissons le thread démarrer
|
||||
time.sleep(0.1)
|
||||
mock_plyer.notify.assert_called_once()
|
||||
|
||||
def test_rate_limit(self, monkeypatch):
|
||||
"""Le rate limit bloque les notifications trop rapprochées."""
|
||||
mock_plyer = MagicMock()
|
||||
monkeypatch.setattr(notifications, "_PLYER_AVAILABLE", True)
|
||||
monkeypatch.setattr(notifications, "_plyer_notification", mock_plyer)
|
||||
|
||||
mgr = NotificationManager()
|
||||
assert mgr.notify("T1", "M1") is True
|
||||
# Immédiatement après → bloqué
|
||||
assert mgr.notify("T2", "M2") is False
|
||||
|
||||
def test_bypass_rate_limit_pour_blocage(self, monkeypatch):
|
||||
"""Les messages BLOCAGE bypass le rate limit."""
|
||||
mock_plyer = MagicMock()
|
||||
monkeypatch.setattr(notifications, "_PLYER_AVAILABLE", True)
|
||||
monkeypatch.setattr(notifications, "_plyer_notification", mock_plyer)
|
||||
|
||||
mgr = NotificationManager()
|
||||
assert mgr.notify("T1", "M1") is True
|
||||
# Sans bypass → bloqué
|
||||
assert mgr.notify("T2", "M2") is False
|
||||
# Avec bypass → passe
|
||||
assert mgr.notify("T3", "M3", bypass_rate_limit=True) is True
|
||||
|
||||
def test_notify_message_niveau_blocage_bypass(self, monkeypatch):
|
||||
mock_plyer = MagicMock()
|
||||
monkeypatch.setattr(notifications, "_PLYER_AVAILABLE", True)
|
||||
monkeypatch.setattr(notifications, "_plyer_notification", mock_plyer)
|
||||
|
||||
mgr = NotificationManager()
|
||||
# Occuper le rate limit
|
||||
mgr.notify("T0", "M0")
|
||||
# Message BLOCAGE doit passer même pendant le rate limit
|
||||
msg_blocage = formatter_cible_non_trouvee("x", "y")
|
||||
assert mgr.notify_message(msg_blocage) is True
|
||||
|
||||
def test_replay_target_not_found_avec_titre(self, monkeypatch):
|
||||
"""L'API spécialisée produit un message contenant le nom d'app."""
|
||||
mock_plyer = MagicMock()
|
||||
monkeypatch.setattr(notifications, "_PLYER_AVAILABLE", True)
|
||||
monkeypatch.setattr(notifications, "_plyer_notification", mock_plyer)
|
||||
|
||||
mgr = NotificationManager()
|
||||
mgr.replay_target_not_found("Rechercher", "Document – Bloc-notes")
|
||||
time.sleep(0.1)
|
||||
# Vérifier qu'on a bien envoyé un message qui mentionne l'app
|
||||
args, kwargs = mock_plyer.notify.call_args
|
||||
message_envoye = kwargs.get("message", "")
|
||||
assert "bloc-notes" in message_envoye.lower()
|
||||
assert "rechercher" in message_envoye.lower()
|
||||
|
||||
def test_replay_wrong_window(self, monkeypatch):
|
||||
mock_plyer = MagicMock()
|
||||
monkeypatch.setattr(notifications, "_PLYER_AVAILABLE", True)
|
||||
monkeypatch.setattr(notifications, "_plyer_notification", mock_plyer)
|
||||
|
||||
mgr = NotificationManager()
|
||||
mgr.replay_wrong_window("Program Manager", "Chrome")
|
||||
time.sleep(0.1)
|
||||
args, kwargs = mock_plyer.notify.call_args
|
||||
titre = kwargs.get("title", "")
|
||||
# Le titre doit indiquer l'attente d'une fenêtre
|
||||
assert "fenêtre" in titre.lower() or "attend" in titre.lower()
|
||||
|
||||
def test_error_route_vers_formatter_specialise(self, monkeypatch):
|
||||
"""error() détecte 'target_not_found' et produit un message de blocage."""
|
||||
mock_plyer = MagicMock()
|
||||
monkeypatch.setattr(notifications, "_PLYER_AVAILABLE", True)
|
||||
monkeypatch.setattr(notifications, "_plyer_notification", mock_plyer)
|
||||
|
||||
mgr = NotificationManager()
|
||||
mgr.error("target_not_found: 'bonjour'")
|
||||
time.sleep(0.1)
|
||||
mock_plyer.notify.assert_called_once()
|
||||
args, kwargs = mock_plyer.notify.call_args
|
||||
# Le message envoyé doit être en français naturel, pas le code brut
|
||||
message_envoye = kwargs.get("message", "")
|
||||
assert "target_not_found" not in message_envoye
|
||||
|
||||
def test_backward_compat_connection_changed(self, monkeypatch):
|
||||
"""L'API existante connection_changed reste fonctionnelle."""
|
||||
mock_plyer = MagicMock()
|
||||
monkeypatch.setattr(notifications, "_PLYER_AVAILABLE", True)
|
||||
monkeypatch.setattr(notifications, "_plyer_notification", mock_plyer)
|
||||
|
||||
mgr = NotificationManager()
|
||||
# Déconnexion
|
||||
mgr.connection_changed(False, "localhost")
|
||||
time.sleep(0.1)
|
||||
assert mock_plyer.notify.called
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Tests : ActivityPanel (sans tkinter)
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class TestActivityPanelFallback:
|
||||
"""Tests du panel d'activité en mode fallback (sans tkinter)."""
|
||||
|
||||
def setup_method(self):
|
||||
reset_activity_panel()
|
||||
|
||||
def teardown_method(self):
|
||||
reset_activity_panel()
|
||||
|
||||
def test_creation_sans_ui(self):
|
||||
"""Le panel peut être créé sans UI (activer_ui=False)."""
|
||||
panel = ActivityPanel(activer_ui=False)
|
||||
assert panel is not None
|
||||
|
||||
def test_snapshot_initial(self):
|
||||
panel = ActivityPanel(activer_ui=False)
|
||||
snap = panel.snapshot()
|
||||
assert snap.etat == EtatLea.INACTIVE
|
||||
assert snap.nom_workflow == ""
|
||||
assert snap.etape == 0
|
||||
|
||||
def test_definir_workflow(self):
|
||||
panel = ActivityPanel(activer_ui=False)
|
||||
panel.definir_workflow("Test", nb_etapes=10)
|
||||
snap = panel.snapshot()
|
||||
assert snap.nom_workflow == "Test"
|
||||
assert snap.nb_etapes == 10
|
||||
assert snap.etat == EtatLea.OBSERVE
|
||||
assert snap.debut_timestamp > 0
|
||||
|
||||
def test_mettre_a_jour_etape(self):
|
||||
panel = ActivityPanel(activer_ui=False)
|
||||
panel.definir_workflow("Test", 10)
|
||||
panel.mettre_a_jour(etat=EtatLea.AGIT, action="Clic", etape=3)
|
||||
snap = panel.snapshot()
|
||||
assert snap.etat == EtatLea.AGIT
|
||||
assert snap.action_courante == "Clic"
|
||||
assert snap.etape == 3
|
||||
|
||||
def test_mettre_a_jour_partiel(self):
|
||||
panel = ActivityPanel(activer_ui=False)
|
||||
panel.definir_workflow("Test", 10)
|
||||
panel.mettre_a_jour(etape=5)
|
||||
snap = panel.snapshot()
|
||||
assert snap.etape == 5
|
||||
# L'état reste OBSERVE (non modifié)
|
||||
assert snap.etat == EtatLea.OBSERVE
|
||||
|
||||
def test_progression_texte(self):
|
||||
panel = ActivityPanel(activer_ui=False)
|
||||
panel.definir_workflow("Test", 10)
|
||||
panel.mettre_a_jour(etape=3)
|
||||
snap = panel.snapshot()
|
||||
assert snap.progression_texte() == "3/10"
|
||||
|
||||
def test_progression_texte_sans_nb_etapes(self):
|
||||
panel = ActivityPanel(activer_ui=False)
|
||||
panel.definir_workflow("Test", nb_etapes=0)
|
||||
snap = panel.snapshot()
|
||||
assert snap.progression_texte() == ""
|
||||
|
||||
def test_temps_ecoule(self):
|
||||
panel = ActivityPanel(activer_ui=False)
|
||||
panel.definir_workflow("Test", 10)
|
||||
time.sleep(0.05)
|
||||
snap = panel.snapshot()
|
||||
assert snap.temps_ecoule_s() >= 0.05
|
||||
|
||||
def test_temps_ecoule_texte_secondes(self):
|
||||
panel = ActivityPanel(activer_ui=False)
|
||||
panel.definir_workflow("Test", 10)
|
||||
snap = panel.snapshot()
|
||||
# Format "Xs" pour < 60s
|
||||
texte = snap.temps_ecoule_texte()
|
||||
assert texte.endswith("s")
|
||||
|
||||
def test_terminer_succes(self):
|
||||
panel = ActivityPanel(activer_ui=False)
|
||||
panel.definir_workflow("Test", 10)
|
||||
panel.terminer(succes=True)
|
||||
snap = panel.snapshot()
|
||||
assert snap.etat == EtatLea.TERMINE
|
||||
|
||||
def test_terminer_echec(self):
|
||||
panel = ActivityPanel(activer_ui=False)
|
||||
panel.definir_workflow("Test", 10)
|
||||
panel.terminer(succes=False)
|
||||
snap = panel.snapshot()
|
||||
assert snap.etat == EtatLea.BLOQUEE
|
||||
assert snap.dernier_message # Un message par défaut est mis
|
||||
|
||||
def test_reinitialiser(self):
|
||||
panel = ActivityPanel(activer_ui=False)
|
||||
panel.definir_workflow("Test", 10)
|
||||
panel.reinitialiser()
|
||||
snap = panel.snapshot()
|
||||
assert snap.etat == EtatLea.INACTIVE
|
||||
assert snap.nom_workflow == ""
|
||||
|
||||
def test_listener_appele_sur_changement(self):
|
||||
panel = ActivityPanel(activer_ui=False)
|
||||
calls = []
|
||||
panel.on_change(lambda snap: calls.append(snap.etat))
|
||||
|
||||
panel.definir_workflow("Test", 5)
|
||||
panel.mettre_a_jour(etat=EtatLea.AGIT)
|
||||
|
||||
assert EtatLea.OBSERVE in calls
|
||||
assert EtatLea.AGIT in calls
|
||||
|
||||
def test_listener_erreur_nintervient_pas(self):
|
||||
"""Un listener qui crash ne doit pas casser le panel."""
|
||||
panel = ActivityPanel(activer_ui=False)
|
||||
|
||||
def listener_casse(snap):
|
||||
raise RuntimeError("boom")
|
||||
|
||||
panel.on_change(listener_casse)
|
||||
# Ne doit pas crasher
|
||||
panel.definir_workflow("Test", 5)
|
||||
snap = panel.snapshot()
|
||||
assert snap.nom_workflow == "Test"
|
||||
|
||||
def test_to_dict_serialisation(self):
|
||||
panel = ActivityPanel(activer_ui=False)
|
||||
panel.definir_workflow("Ma tâche", 10)
|
||||
panel.mettre_a_jour(
|
||||
etat=EtatLea.AGIT,
|
||||
action="Clic sur Valider",
|
||||
etape=3,
|
||||
)
|
||||
d = panel.snapshot().to_dict()
|
||||
assert d["nom_workflow"] == "Ma tâche"
|
||||
assert d["etat"] == "agit"
|
||||
assert d["etat_libelle"] == "Agit"
|
||||
assert d["progression"] == "3/10"
|
||||
assert d["action_courante"] == "Clic sur Valider"
|
||||
|
||||
def test_masquer_sans_ui_ne_crash_pas(self):
|
||||
panel = ActivityPanel(activer_ui=False)
|
||||
# Doit être no-op sans crasher
|
||||
panel.masquer()
|
||||
panel.afficher()
|
||||
|
||||
def test_etats_ont_couleurs_et_libelles(self):
|
||||
"""Vérifier que tous les états ont bien une couleur et un libellé."""
|
||||
for etat in EtatLea:
|
||||
assert etat.libelle
|
||||
assert etat.couleur.startswith("#")
|
||||
assert etat.code
|
||||
|
||||
def test_singleton_global(self):
|
||||
p1 = activity_panel.get_activity_panel(activer_ui=False)
|
||||
p2 = activity_panel.get_activity_panel(activer_ui=False)
|
||||
assert p1 is p2
|
||||
|
||||
def test_reset_singleton(self):
|
||||
p1 = activity_panel.get_activity_panel(activer_ui=False)
|
||||
activity_panel.reset_activity_panel()
|
||||
p2 = activity_panel.get_activity_panel(activer_ui=False)
|
||||
assert p1 is not p2
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Tests : intégration executor ↔ notifier
|
||||
# ============================================================================
|
||||
|
||||
|
||||
class TestExecutorNotifierFallback:
|
||||
"""Vérifier que le Noop fallback de l'executor couvre toutes les méthodes."""
|
||||
|
||||
def test_executor_noop_supporte_toutes_methodes(self):
|
||||
"""Le fallback _Noop doit répondre à n'importe quelle méthode."""
|
||||
# Simuler le cas où NotificationManager lève une exception
|
||||
with patch(
|
||||
"agent_v0.agent_v1.ui.notifications.NotificationManager",
|
||||
side_effect=RuntimeError("UI indisponible"),
|
||||
):
|
||||
from agent_v0.agent_v1.core.executor import ActionExecutorV1
|
||||
# Ne pas vraiment instancier (dépendances mss/pynput) — on teste
|
||||
# la logique du stub en recréant la classe noop inline.
|
||||
|
||||
# Test direct du pattern noop utilisé dans executor
|
||||
class _Noop:
|
||||
def __getattr__(self, name):
|
||||
return lambda *a, **kw: False
|
||||
|
||||
noop = _Noop()
|
||||
# Toutes ces méthodes doivent retourner False sans crasher
|
||||
assert noop.replay_target_not_found("x") is False
|
||||
assert noop.replay_wrong_window("x", "y") is False
|
||||
assert noop.replay_no_screen_change("click") is False
|
||||
assert noop.notify_message(None) is False
|
||||
assert noop.nimporte_quelle_methode() is False
|
||||
430
tests/unit/test_shadow_observer.py
Normal file
430
tests/unit/test_shadow_observer.py
Normal file
@@ -0,0 +1,430 @@
|
||||
"""
|
||||
Tests du ShadowObserver — observation temps réel de Léa.
|
||||
|
||||
Vérifie que :
|
||||
- L'observer démarre et s'arrête correctement
|
||||
- Les événements sont segmentés en étapes logiques
|
||||
- Les variables sont détectées pendant la frappe
|
||||
- Les notifications sont émises avec le bon niveau
|
||||
- La compréhension est accessible en temps réel
|
||||
- Le callback de notification est appelé
|
||||
- Les événements parasites (heartbeat) sont ignorés
|
||||
"""
|
||||
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
_ROOT = str(Path(__file__).resolve().parents[2])
|
||||
if _ROOT not in sys.path:
|
||||
sys.path.insert(0, _ROOT)
|
||||
|
||||
from core.workflow.shadow_observer import (
|
||||
NiveauNotification,
|
||||
NotificationShadow,
|
||||
ShadowObserver,
|
||||
UnderstoodStep,
|
||||
get_shared_observer,
|
||||
)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Fixtures
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def _evt_click(text="Rechercher", title="Explorateur", ts=100.0):
|
||||
return {
|
||||
"type": "mouse_click",
|
||||
"pos": [400, 580],
|
||||
"window": {"title": title, "app_name": title.split(" - ")[-1] if " - " in title else title},
|
||||
"timestamp": ts,
|
||||
"vision_info": {"text": text},
|
||||
}
|
||||
|
||||
|
||||
def _evt_type(text="bonjour", title="Bloc-notes", ts=101.0):
|
||||
return {
|
||||
"type": "text_input",
|
||||
"text": text,
|
||||
"window": {"title": title, "app_name": title},
|
||||
"timestamp": ts,
|
||||
}
|
||||
|
||||
|
||||
def _evt_key(keys=None, title="Bloc-notes", ts=102.0):
|
||||
return {
|
||||
"type": "key_combo",
|
||||
"keys": keys or ["enter"],
|
||||
"window": {"title": title, "app_name": title},
|
||||
"timestamp": ts,
|
||||
}
|
||||
|
||||
|
||||
def _evt_heartbeat(ts=103.0):
|
||||
return {"type": "heartbeat", "timestamp": ts}
|
||||
|
||||
|
||||
def _make_session_events():
|
||||
"""Scénario typique : ouvrir Bloc-notes, taper du texte, sauvegarder."""
|
||||
return [
|
||||
_evt_click(text="Rechercher", title="Menu Démarrer", ts=100.0),
|
||||
_evt_type(text="blocnote", title="Menu Démarrer", ts=101.0),
|
||||
_evt_key(keys=["enter"], title="Menu Démarrer", ts=102.0),
|
||||
_evt_heartbeat(ts=103.0), # Doit être ignoré
|
||||
# Changement d'application → nouveau segment attendu
|
||||
_evt_click(text="", title="Sans titre - Bloc-notes", ts=105.0),
|
||||
_evt_type(text="Bonjour le monde", title="Sans titre - Bloc-notes", ts=106.0),
|
||||
_evt_key(keys=["ctrl", "s"], title="Sans titre - Bloc-notes", ts=108.0),
|
||||
]
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Tests de base
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestShadowObserverBase:
|
||||
|
||||
def test_start_et_stop(self):
|
||||
obs = ShadowObserver()
|
||||
obs.start("sess_test")
|
||||
assert obs.has_session("sess_test")
|
||||
obs.stop("sess_test")
|
||||
assert obs.has_session("sess_test") # stop ne supprime pas
|
||||
obs.reset("sess_test")
|
||||
assert not obs.has_session("sess_test")
|
||||
|
||||
def test_auto_start_sur_premier_event(self):
|
||||
"""observe_event() sans start() doit auto-démarrer la session."""
|
||||
obs = ShadowObserver()
|
||||
obs.observe_event("sess_auto", _evt_click())
|
||||
assert obs.has_session("sess_auto")
|
||||
steps = obs.get_understanding("sess_auto")
|
||||
assert len(steps) >= 1
|
||||
|
||||
def test_heartbeat_ignore(self):
|
||||
obs = ShadowObserver()
|
||||
obs.start("s1")
|
||||
obs.observe_event("s1", _evt_heartbeat())
|
||||
steps = obs.get_understanding("s1")
|
||||
assert len(steps) == 0 # Aucune étape créée par un heartbeat
|
||||
|
||||
def test_focus_change_ignore(self):
|
||||
obs = ShadowObserver()
|
||||
obs.start("s1")
|
||||
obs.observe_event("s1", {"type": "focus_change", "timestamp": 100})
|
||||
assert len(obs.get_understanding("s1")) == 0
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Segmentation
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestShadowObserverSegmentation:
|
||||
|
||||
def test_segmentation_par_changement_app(self):
|
||||
"""Un changement d'application crée un nouveau segment."""
|
||||
obs = ShadowObserver()
|
||||
obs.start("s1")
|
||||
obs.observe_event("s1", _evt_click(title="Firefox"))
|
||||
obs.observe_event("s1", _evt_click(title="Firefox", ts=100.5))
|
||||
obs.observe_event("s1", _evt_click(title="Bloc-notes", ts=101.0))
|
||||
|
||||
steps = obs.get_understanding("s1")
|
||||
assert len(steps) >= 2 # Au moins 2 segments
|
||||
|
||||
def test_segmentation_par_pause_longue(self):
|
||||
"""Une pause > 4s coupe le segment."""
|
||||
obs = ShadowObserver()
|
||||
obs.start("s1")
|
||||
obs.observe_event("s1", _evt_click(title="App1", ts=100.0))
|
||||
obs.observe_event("s1", _evt_click(title="App1", ts=100.5))
|
||||
# Pause de 10 secondes
|
||||
obs.observe_event("s1", _evt_click(title="App1", ts=110.5))
|
||||
|
||||
steps = obs.get_understanding("s1")
|
||||
assert len(steps) >= 2
|
||||
|
||||
def test_segment_complet(self):
|
||||
"""Scénario complet : Bloc-notes + texte + save."""
|
||||
obs = ShadowObserver()
|
||||
obs.start("s1")
|
||||
for evt in _make_session_events():
|
||||
obs.observe_event("s1", evt)
|
||||
obs.stop("s1")
|
||||
|
||||
steps = obs.get_understanding("s1")
|
||||
assert len(steps) >= 2 # Au moins Menu Démarrer + Bloc-notes
|
||||
|
||||
# Au moins une étape doit mentionner le Bloc-notes
|
||||
intents = " ".join(s["intent"].lower() for s in steps)
|
||||
assert "bloc" in intents or "enregistr" in intents or "sauvegard" in intents or \
|
||||
"écrir" in intents or "text" in intents.lower()
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Intention et raffinement
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestShadowObserverIntent:
|
||||
|
||||
def test_intent_recherche(self):
|
||||
"""Clic + saisie + entrée → 'Rechercher X'."""
|
||||
obs = ShadowObserver()
|
||||
obs.start("s1")
|
||||
obs.observe_event("s1", _evt_click(text="Champ recherche", title="Explorateur", ts=100.0))
|
||||
obs.observe_event("s1", _evt_type(text="calculatrice", title="Explorateur", ts=100.5))
|
||||
obs.observe_event("s1", _evt_key(keys=["enter"], title="Explorateur", ts=101.0))
|
||||
|
||||
current = obs.get_current_step("s1")
|
||||
assert current is not None
|
||||
assert "recherch" in current["intent"].lower() or "calculatrice" in current["intent"].lower()
|
||||
|
||||
def test_intent_ctrl_s(self):
|
||||
"""Ctrl+S → 'Sauvegarder'."""
|
||||
obs = ShadowObserver()
|
||||
obs.start("s1")
|
||||
obs.observe_event("s1", _evt_click(title="Bloc-notes", ts=100.0))
|
||||
obs.observe_event("s1", _evt_key(keys=["ctrl", "s"], title="Bloc-notes", ts=100.5))
|
||||
|
||||
current = obs.get_current_step("s1")
|
||||
assert current is not None
|
||||
assert "sauvegard" in current["intent"].lower()
|
||||
|
||||
def test_intent_ecrire(self):
|
||||
"""Saisie seule → 'Écrire'."""
|
||||
obs = ShadowObserver()
|
||||
obs.start("s1")
|
||||
obs.observe_event("s1", _evt_type(text="Un texte libre", title="Bloc-notes"))
|
||||
|
||||
current = obs.get_current_step("s1")
|
||||
assert current is not None
|
||||
assert "écri" in current["intent"].lower() or "text" in current["intent"].lower()
|
||||
|
||||
def test_confidence_augmente_avec_contexte(self):
|
||||
"""La confiance augmente quand le contexte devient clair."""
|
||||
obs = ShadowObserver()
|
||||
obs.start("s1")
|
||||
obs.observe_event("s1", _evt_click(title="App"))
|
||||
c1 = obs.get_current_step("s1")["confidence"]
|
||||
|
||||
obs.observe_event("s1", _evt_key(keys=["ctrl", "s"], title="App"))
|
||||
c2 = obs.get_current_step("s1")["confidence"]
|
||||
|
||||
assert c2 >= c1
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Détection de variables
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestShadowObserverVariables:
|
||||
|
||||
def test_variable_detectee_lors_saisie(self):
|
||||
"""Une saisie texte > 3 caractères crée une variable."""
|
||||
obs = ShadowObserver()
|
||||
notifs = []
|
||||
obs._notify_callback = lambda n: notifs.append(n)
|
||||
obs.start("s1")
|
||||
|
||||
obs.observe_event("s1", _evt_type(text="Jean Dupont", title="Formulaire"))
|
||||
|
||||
var_notifs = [n for n in notifs if n.niveau == NiveauNotification.VARIABLE]
|
||||
assert len(var_notifs) == 1
|
||||
assert "Jean Dupont" in var_notifs[0].message
|
||||
assert var_notifs[0].data["variable_name"].startswith("texte_")
|
||||
|
||||
def test_variable_type_date(self):
|
||||
obs = ShadowObserver()
|
||||
obs.start("s1")
|
||||
obs.observe_event("s1", _evt_type(text="15/03/2026", title="Formulaire"))
|
||||
|
||||
current = obs.get_current_step("s1")
|
||||
assert len(current["variables_detectees"]) == 1
|
||||
|
||||
def test_variable_type_email(self):
|
||||
obs = ShadowObserver()
|
||||
notifs = []
|
||||
obs._notify_callback = lambda n: notifs.append(n)
|
||||
obs.start("s1")
|
||||
|
||||
obs.observe_event("s1", _evt_type(text="jean@example.com", title="Formulaire"))
|
||||
|
||||
var_notifs = [n for n in notifs if n.niveau == NiveauNotification.VARIABLE]
|
||||
assert len(var_notifs) == 1
|
||||
assert "e-mail" in var_notifs[0].message or "mail" in var_notifs[0].message
|
||||
|
||||
def test_texte_court_ignore(self):
|
||||
"""Un texte de moins de 3 caractères n'est pas une variable."""
|
||||
obs = ShadowObserver()
|
||||
notifs = []
|
||||
obs._notify_callback = lambda n: notifs.append(n)
|
||||
obs.start("s1")
|
||||
|
||||
obs.observe_event("s1", _evt_type(text="ab", title="App"))
|
||||
|
||||
var_notifs = [n for n in notifs if n.niveau == NiveauNotification.VARIABLE]
|
||||
assert len(var_notifs) == 0
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Notifications et callbacks
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestShadowObserverNotifications:
|
||||
|
||||
def test_notification_au_demarrage(self):
|
||||
notifs = []
|
||||
obs = ShadowObserver(notify_callback=lambda n: notifs.append(n))
|
||||
obs.start("s1")
|
||||
|
||||
assert len(notifs) >= 1
|
||||
assert notifs[0].niveau == NiveauNotification.INFO
|
||||
assert "observe" in notifs[0].message.lower()
|
||||
|
||||
def test_notification_nouvelle_etape(self):
|
||||
"""Un changement d'application émet une notification DECOUVERTE."""
|
||||
notifs = []
|
||||
obs = ShadowObserver(notify_callback=lambda n: notifs.append(n))
|
||||
obs.start("s1")
|
||||
|
||||
obs.observe_event("s1", _evt_click(title="Firefox", ts=100.0))
|
||||
obs.observe_event("s1", _evt_click(title="Bloc-notes", ts=101.0))
|
||||
|
||||
decouverts = [n for n in notifs if n.niveau == NiveauNotification.DECOUVERTE]
|
||||
assert len(decouverts) >= 1
|
||||
|
||||
def test_notification_stop_resume(self):
|
||||
"""Au stop, on émet un résumé du nombre d'étapes."""
|
||||
notifs = []
|
||||
obs = ShadowObserver(notify_callback=lambda n: notifs.append(n))
|
||||
obs.start("s1")
|
||||
for evt in _make_session_events():
|
||||
obs.observe_event("s1", evt)
|
||||
obs.stop("s1")
|
||||
|
||||
messages = [n.message.lower() for n in notifs]
|
||||
assert any("étape" in m or "observ" in m for m in messages)
|
||||
|
||||
def test_notifications_since_ts(self):
|
||||
"""get_notifications(since_ts=...) filtre correctement."""
|
||||
obs = ShadowObserver()
|
||||
obs.start("s1")
|
||||
time.sleep(0.01)
|
||||
mid_ts = time.time()
|
||||
time.sleep(0.01)
|
||||
obs.observe_event("s1", _evt_click(title="Firefox"))
|
||||
obs.observe_event("s1", _evt_click(title="Bloc-notes"))
|
||||
|
||||
recentes = obs.get_notifications("s1", since_ts=mid_ts)
|
||||
toutes = obs.get_notifications("s1", since_ts=0)
|
||||
assert len(recentes) < len(toutes)
|
||||
|
||||
def test_callback_erreur_ne_plante_pas(self):
|
||||
"""Un callback qui lève ne doit pas faire planter l'observer."""
|
||||
def bad_callback(notif):
|
||||
raise RuntimeError("boom")
|
||||
|
||||
obs = ShadowObserver(notify_callback=bad_callback)
|
||||
obs.start("s1") # Devrait émettre une notification (qui plante en callback)
|
||||
# Si on arrive ici, c'est OK
|
||||
obs.observe_event("s1", _evt_click())
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Compréhension et API publique
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestShadowObserverUnderstanding:
|
||||
|
||||
def test_get_understanding_format(self):
|
||||
"""La structure retournée est bien celle attendue."""
|
||||
obs = ShadowObserver()
|
||||
obs.start("s1")
|
||||
obs.observe_event("s1", _evt_click(title="App"))
|
||||
|
||||
steps = obs.get_understanding("s1")
|
||||
assert isinstance(steps, list)
|
||||
assert len(steps) >= 1
|
||||
step = steps[0]
|
||||
assert "step" in step
|
||||
assert "intent" in step
|
||||
assert "confidence" in step
|
||||
assert isinstance(step["step"], int)
|
||||
assert 0.0 <= step["confidence"] <= 1.0
|
||||
|
||||
def test_get_understanding_sans_session(self):
|
||||
obs = ShadowObserver()
|
||||
steps = obs.get_understanding("inexistante")
|
||||
assert steps == []
|
||||
|
||||
def test_get_current_step(self):
|
||||
obs = ShadowObserver()
|
||||
obs.start("s1")
|
||||
assert obs.get_current_step("s1") is None
|
||||
|
||||
obs.observe_event("s1", _evt_click(title="App"))
|
||||
current = obs.get_current_step("s1")
|
||||
assert current is not None
|
||||
assert current["step_index"] == 1
|
||||
|
||||
def test_get_steps_internal(self):
|
||||
"""get_steps_internal retourne des UnderstoodStep copiés."""
|
||||
obs = ShadowObserver()
|
||||
obs.start("s1")
|
||||
obs.observe_event("s1", _evt_click(title="App"))
|
||||
|
||||
internals = obs.get_steps_internal("s1")
|
||||
assert len(internals) >= 1
|
||||
assert isinstance(internals[0], UnderstoodStep)
|
||||
|
||||
# Mutation externe ne doit pas affecter l'observer
|
||||
internals[0].intent = "HACKED"
|
||||
again = obs.get_steps_internal("s1")
|
||||
assert again[0].intent != "HACKED"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Singleton partagé
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestSharedObserver:
|
||||
|
||||
def test_singleton(self):
|
||||
obs1 = get_shared_observer()
|
||||
obs2 = get_shared_observer()
|
||||
assert obs1 is obs2
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Performance (contrainte : observe_event doit être rapide)
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestShadowObserverPerformance:
|
||||
|
||||
def test_observe_event_rapide(self):
|
||||
"""observe_event() doit traiter 1000 events en moins de 500ms."""
|
||||
obs = ShadowObserver()
|
||||
obs.start("s_perf")
|
||||
|
||||
events = []
|
||||
for i in range(1000):
|
||||
events.append(_evt_click(title="App", ts=100.0 + i * 0.01))
|
||||
|
||||
start = time.time()
|
||||
for evt in events:
|
||||
obs.observe_event("s_perf", evt)
|
||||
elapsed = time.time() - start
|
||||
|
||||
assert elapsed < 0.5, f"Trop lent : {elapsed:.2f}s pour 1000 events"
|
||||
531
tests/unit/test_shadow_validator.py
Normal file
531
tests/unit/test_shadow_validator.py
Normal file
@@ -0,0 +1,531 @@
|
||||
"""
|
||||
Tests du ShadowValidator — feedback utilisateur et reconstruction WorkflowIR.
|
||||
|
||||
Vérifie que :
|
||||
- Les feedbacks (validate/correct/undo/merge_next/split/cancel) sont appliqués
|
||||
- Le WorkflowIR final est bien reconstruit à partir des étapes corrigées
|
||||
- Les variables sont détectées dans les actions finales
|
||||
- L'historique des feedbacks est conservé
|
||||
- Les erreurs (index invalide, action inconnue) sont gérées proprement
|
||||
"""
|
||||
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
_ROOT = str(Path(__file__).resolve().parents[2])
|
||||
if _ROOT not in sys.path:
|
||||
sys.path.insert(0, _ROOT)
|
||||
|
||||
from core.workflow.shadow_observer import ShadowObserver, UnderstoodStep
|
||||
from core.workflow.shadow_validator import FeedbackResult, ShadowValidator
|
||||
from core.workflow.workflow_ir import WorkflowIR
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Fixtures
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def _make_step(step_index=1, intent="Ouvrir Firefox", app="Firefox",
|
||||
events=None, **kwargs):
|
||||
return UnderstoodStep(
|
||||
step_index=step_index,
|
||||
intent=intent,
|
||||
app_name=app,
|
||||
window_title=app,
|
||||
events=events or [
|
||||
{"type": "mouse_click", "window": {"title": app},
|
||||
"vision_info": {"text": "bouton"}, "timestamp": 100.0}
|
||||
],
|
||||
started_at=100.0,
|
||||
ended_at=101.0,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def _make_type_step(texte="Bonjour", app="Bloc-notes", step_index=1):
|
||||
return UnderstoodStep(
|
||||
step_index=step_index,
|
||||
intent=f"Écrire « {texte} »",
|
||||
app_name=app,
|
||||
window_title=app,
|
||||
events=[
|
||||
{"type": "text_input", "text": texte,
|
||||
"window": {"title": app}, "timestamp": 100.0}
|
||||
],
|
||||
started_at=100.0,
|
||||
)
|
||||
|
||||
|
||||
def _make_3_steps():
|
||||
return [
|
||||
_make_step(1, "Ouvrir le Bloc-notes", "Bloc-notes"),
|
||||
_make_type_step("Bonjour le monde", step_index=2),
|
||||
_make_step(3, "Sauvegarder", "Bloc-notes", events=[
|
||||
{"type": "key_combo", "keys": ["ctrl", "s"],
|
||||
"window": {"title": "Bloc-notes"}, "timestamp": 103.0}
|
||||
]),
|
||||
]
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Initialisation
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestShadowValidatorBase:
|
||||
|
||||
def test_creation(self):
|
||||
v = ShadowValidator()
|
||||
assert v.steps == []
|
||||
assert v.history == []
|
||||
assert not v.is_cancelled
|
||||
|
||||
def test_set_steps(self):
|
||||
v = ShadowValidator()
|
||||
v.set_steps(_make_3_steps())
|
||||
assert len(v.steps) == 3
|
||||
assert v.steps[0].intent == "Ouvrir le Bloc-notes"
|
||||
|
||||
def test_clone_protege_mutation(self):
|
||||
"""set_steps clone les étapes pour éviter les mutations externes."""
|
||||
v = ShadowValidator()
|
||||
steps = _make_3_steps()
|
||||
v.set_steps(steps)
|
||||
steps[0].intent = "HACKED"
|
||||
assert v.steps[0].intent == "Ouvrir le Bloc-notes"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# validate
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestValidatorValidate:
|
||||
|
||||
def test_validate_etape(self):
|
||||
v = ShadowValidator()
|
||||
v.set_steps(_make_3_steps())
|
||||
|
||||
result = v.apply_feedback({"action": "validate", "step_index": 1})
|
||||
|
||||
assert result.ok is True
|
||||
assert v.steps[0].validated is True
|
||||
assert v.steps[0].confidence >= 0.95
|
||||
assert v.steps[0].intent_provisoire is False
|
||||
|
||||
def test_validate_index_invalide(self):
|
||||
v = ShadowValidator()
|
||||
v.set_steps(_make_3_steps())
|
||||
|
||||
result = v.apply_feedback({"action": "validate", "step_index": 99})
|
||||
assert result.ok is False
|
||||
assert "invalide" in result.message.lower()
|
||||
|
||||
def test_validate_toutes_les_etapes(self):
|
||||
v = ShadowValidator()
|
||||
v.set_steps(_make_3_steps())
|
||||
for i in range(1, 4):
|
||||
v.apply_feedback({"action": "validate", "step_index": i})
|
||||
|
||||
assert all(s.validated for s in v.steps)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# correct
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestValidatorCorrect:
|
||||
|
||||
def test_correct_intent(self):
|
||||
v = ShadowValidator()
|
||||
v.set_steps(_make_3_steps())
|
||||
|
||||
result = v.apply_feedback({
|
||||
"action": "correct",
|
||||
"step_index": 1,
|
||||
"new_intent": "Démarrer la rédaction d'un email",
|
||||
})
|
||||
|
||||
assert result.ok is True
|
||||
assert v.steps[0].intent == "Démarrer la rédaction d'un email"
|
||||
assert v.steps[0].corrected is True
|
||||
assert v.steps[0].validated is True # Corriger = valider implicitement
|
||||
assert "old_intent" in result.data
|
||||
|
||||
def test_correct_intent_vide(self):
|
||||
v = ShadowValidator()
|
||||
v.set_steps(_make_3_steps())
|
||||
|
||||
result = v.apply_feedback({
|
||||
"action": "correct",
|
||||
"step_index": 1,
|
||||
"new_intent": "",
|
||||
})
|
||||
|
||||
assert result.ok is False
|
||||
assert v.steps[0].corrected is False
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# undo
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestValidatorUndo:
|
||||
|
||||
def test_undo_etape(self):
|
||||
v = ShadowValidator()
|
||||
v.set_steps(_make_3_steps())
|
||||
|
||||
result = v.apply_feedback({"action": "undo", "step_index": 2})
|
||||
assert result.ok is True
|
||||
assert v.steps[1].cancelled is True
|
||||
|
||||
def test_undo_exclut_etape_du_workflow(self):
|
||||
"""Une étape undo ne doit pas apparaître dans le WorkflowIR final."""
|
||||
v = ShadowValidator()
|
||||
v.set_steps(_make_3_steps())
|
||||
|
||||
v.apply_feedback({"action": "undo", "step_index": 2})
|
||||
ir = v.build_workflow_ir(session_id="s1", name="Test")
|
||||
|
||||
assert ir is not None
|
||||
assert len(ir.steps) == 2 # 3 - 1 = 2
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# merge_next
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestValidatorMergeNext:
|
||||
|
||||
def test_merge_deux_etapes(self):
|
||||
v = ShadowValidator()
|
||||
v.set_steps(_make_3_steps())
|
||||
|
||||
result = v.apply_feedback({"action": "merge_next", "step_index": 1})
|
||||
assert result.ok is True
|
||||
assert len(v.steps) == 2 # 3 - 1 = 2
|
||||
|
||||
def test_merge_conserve_les_events(self):
|
||||
v = ShadowValidator()
|
||||
v.set_steps(_make_3_steps())
|
||||
|
||||
total_events_before = sum(len(s.events) for s in v.steps)
|
||||
v.apply_feedback({"action": "merge_next", "step_index": 2})
|
||||
total_events_after = sum(len(s.events) for s in v.steps)
|
||||
|
||||
assert total_events_before == total_events_after
|
||||
|
||||
def test_merge_derniere_etape_echoue(self):
|
||||
v = ShadowValidator()
|
||||
v.set_steps(_make_3_steps())
|
||||
|
||||
result = v.apply_feedback({"action": "merge_next", "step_index": 3})
|
||||
assert result.ok is False
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# split
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestValidatorSplit:
|
||||
|
||||
def test_split_en_deux(self):
|
||||
v = ShadowValidator()
|
||||
multi_events_step = _make_step(
|
||||
1, "Étape composite",
|
||||
events=[
|
||||
{"type": "mouse_click", "window": {"title": "App"},
|
||||
"timestamp": 100.0, "vision_info": {}},
|
||||
{"type": "text_input", "text": "partie 1",
|
||||
"window": {"title": "App"}, "timestamp": 101.0},
|
||||
{"type": "text_input", "text": "partie 2",
|
||||
"window": {"title": "App"}, "timestamp": 102.0},
|
||||
],
|
||||
)
|
||||
v.set_steps([multi_events_step])
|
||||
|
||||
result = v.apply_feedback({
|
||||
"action": "split",
|
||||
"step_index": 1,
|
||||
"at_event_index": 2,
|
||||
})
|
||||
|
||||
assert result.ok is True
|
||||
assert len(v.steps) == 2
|
||||
assert len(v.steps[0].events) == 2
|
||||
assert len(v.steps[1].events) == 1
|
||||
|
||||
def test_split_index_invalide(self):
|
||||
v = ShadowValidator()
|
||||
v.set_steps(_make_3_steps())
|
||||
result = v.apply_feedback({
|
||||
"action": "split",
|
||||
"step_index": 1,
|
||||
"at_event_index": 99,
|
||||
})
|
||||
assert result.ok is False
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# cancel
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestValidatorCancel:
|
||||
|
||||
def test_cancel_workflow(self):
|
||||
v = ShadowValidator()
|
||||
v.set_steps(_make_3_steps())
|
||||
|
||||
result = v.apply_feedback({"action": "cancel"})
|
||||
assert result.ok is True
|
||||
assert v.is_cancelled
|
||||
|
||||
def test_cancel_build_retourne_none(self):
|
||||
v = ShadowValidator()
|
||||
v.set_steps(_make_3_steps())
|
||||
v.apply_feedback({"action": "cancel"})
|
||||
|
||||
ir = v.build_workflow_ir(session_id="s1", name="Test")
|
||||
assert ir is None
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Action inconnue
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestValidatorUnknownAction:
|
||||
|
||||
def test_action_inconnue(self):
|
||||
v = ShadowValidator()
|
||||
v.set_steps(_make_3_steps())
|
||||
|
||||
result = v.apply_feedback({"action": "do_magic"})
|
||||
assert result.ok is False
|
||||
assert "inconnue" in result.message.lower()
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Construction du WorkflowIR
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestValidatorBuild:
|
||||
|
||||
def test_build_workflow_ir(self):
|
||||
v = ShadowValidator()
|
||||
v.set_steps(_make_3_steps())
|
||||
|
||||
ir = v.build_workflow_ir(
|
||||
session_id="sess_test",
|
||||
name="Mon workflow",
|
||||
domain="generic",
|
||||
)
|
||||
|
||||
assert ir is not None
|
||||
assert isinstance(ir, WorkflowIR)
|
||||
assert ir.name == "Mon workflow"
|
||||
assert ir.learned_from == "sess_test"
|
||||
assert len(ir.steps) == 3
|
||||
|
||||
def test_build_with_variables(self):
|
||||
"""Les textes saisis deviennent des variables dans le WorkflowIR."""
|
||||
v = ShadowValidator()
|
||||
v.set_steps([
|
||||
_make_type_step("Jean Dupont", step_index=1),
|
||||
_make_type_step("jean@example.com", app="Email", step_index=2),
|
||||
])
|
||||
|
||||
ir = v.build_workflow_ir(session_id="s1", name="Test")
|
||||
assert len(ir.variables) == 2
|
||||
|
||||
# Les actions de type type doivent référencer les variables
|
||||
for step in ir.steps:
|
||||
for action in step.actions:
|
||||
if action.type == "type":
|
||||
assert action.variable is True
|
||||
assert action.text.startswith("{")
|
||||
|
||||
def test_build_respecte_corrections(self):
|
||||
"""Les intentions corrigées se retrouvent dans le WorkflowIR."""
|
||||
v = ShadowValidator()
|
||||
v.set_steps(_make_3_steps())
|
||||
|
||||
v.apply_feedback({
|
||||
"action": "correct",
|
||||
"step_index": 1,
|
||||
"new_intent": "Lancer l'application de prise de notes",
|
||||
})
|
||||
|
||||
ir = v.build_workflow_ir(session_id="s1", name="Test")
|
||||
assert ir.steps[0].intent == "Lancer l'application de prise de notes"
|
||||
|
||||
def test_build_exclut_etapes_annulees(self):
|
||||
v = ShadowValidator()
|
||||
v.set_steps(_make_3_steps())
|
||||
v.apply_feedback({"action": "undo", "step_index": 2})
|
||||
|
||||
ir = v.build_workflow_ir(session_id="s1", name="Test")
|
||||
assert len(ir.steps) == 2
|
||||
|
||||
def test_build_require_all_validated(self):
|
||||
"""Avec require_all_validated, erreur si une étape n'est pas validée."""
|
||||
v = ShadowValidator()
|
||||
v.set_steps(_make_3_steps())
|
||||
v.apply_feedback({"action": "validate", "step_index": 1})
|
||||
# Étapes 2 et 3 pas validées
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
v.build_workflow_ir(
|
||||
session_id="s1", name="Test", require_all_validated=True
|
||||
)
|
||||
|
||||
def test_build_applications_detectees(self):
|
||||
v = ShadowValidator()
|
||||
v.set_steps([
|
||||
_make_step(1, "Ouvrir Firefox", "Firefox"),
|
||||
_make_step(2, "Écrire", "Bloc-notes"),
|
||||
])
|
||||
ir = v.build_workflow_ir(session_id="s1", name="Test")
|
||||
assert "Firefox" in ir.applications
|
||||
assert "Bloc-notes" in ir.applications
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Historique
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestValidatorHistory:
|
||||
|
||||
def test_historique_feedbacks(self):
|
||||
v = ShadowValidator()
|
||||
v.set_steps(_make_3_steps())
|
||||
|
||||
v.apply_feedback({"action": "validate", "step_index": 1})
|
||||
v.apply_feedback({
|
||||
"action": "correct", "step_index": 2,
|
||||
"new_intent": "Écrire le texte"
|
||||
})
|
||||
v.apply_feedback({"action": "undo", "step_index": 3})
|
||||
|
||||
history = v.history
|
||||
assert len(history) == 3
|
||||
assert history[0].action == "validate"
|
||||
assert history[1].action == "correct"
|
||||
assert history[2].action == "undo"
|
||||
|
||||
def test_apply_feedbacks_batch(self):
|
||||
v = ShadowValidator()
|
||||
v.set_steps(_make_3_steps())
|
||||
|
||||
results = v.apply_feedbacks([
|
||||
{"action": "validate", "step_index": 1},
|
||||
{"action": "validate", "step_index": 2},
|
||||
{"action": "undo", "step_index": 3},
|
||||
])
|
||||
|
||||
assert len(results) == 3
|
||||
assert all(r.ok for r in results)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Intégration ShadowObserver + ShadowValidator
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestShadowObserverValidatorIntegration:
|
||||
|
||||
def test_observer_vers_validator(self):
|
||||
"""Flow complet : Observer → Validator → WorkflowIR."""
|
||||
obs = ShadowObserver()
|
||||
obs.start("sess_flow")
|
||||
|
||||
# Simuler des événements
|
||||
obs.observe_event("sess_flow", {
|
||||
"type": "mouse_click",
|
||||
"window": {"title": "Menu Démarrer", "app_name": "Menu Démarrer"},
|
||||
"vision_info": {"text": "Rechercher"},
|
||||
"timestamp": 100.0,
|
||||
})
|
||||
obs.observe_event("sess_flow", {
|
||||
"type": "text_input",
|
||||
"text": "blocnote",
|
||||
"window": {"title": "Menu Démarrer", "app_name": "Menu Démarrer"},
|
||||
"timestamp": 100.5,
|
||||
})
|
||||
obs.observe_event("sess_flow", {
|
||||
"type": "key_combo",
|
||||
"keys": ["enter"],
|
||||
"window": {"title": "Menu Démarrer", "app_name": "Menu Démarrer"},
|
||||
"timestamp": 101.0,
|
||||
})
|
||||
# Changement d'application
|
||||
obs.observe_event("sess_flow", {
|
||||
"type": "text_input",
|
||||
"text": "Hello world",
|
||||
"window": {"title": "Sans titre - Bloc-notes", "app_name": "Bloc-notes"},
|
||||
"timestamp": 105.0,
|
||||
})
|
||||
obs.stop("sess_flow")
|
||||
|
||||
# Récupérer les étapes
|
||||
internals = obs.get_steps_internal("sess_flow")
|
||||
assert len(internals) >= 2
|
||||
|
||||
# Passer au validator
|
||||
validator = ShadowValidator()
|
||||
validator.set_steps(internals)
|
||||
|
||||
# Valider la première étape, corriger la seconde
|
||||
validator.apply_feedback({"action": "validate", "step_index": 1})
|
||||
validator.apply_feedback({
|
||||
"action": "correct",
|
||||
"step_index": 2,
|
||||
"new_intent": "Écrire un texte de démonstration",
|
||||
})
|
||||
|
||||
# Construire le WorkflowIR
|
||||
ir = validator.build_workflow_ir(
|
||||
session_id="sess_flow",
|
||||
name="Flow de test",
|
||||
domain="generic",
|
||||
)
|
||||
|
||||
assert ir is not None
|
||||
assert len(ir.steps) >= 2
|
||||
assert ir.steps[1].intent == "Écrire un texte de démonstration"
|
||||
assert len(ir.variables) >= 1 # Au moins "blocnote" ou "Hello world"
|
||||
|
||||
def test_undo_puis_build(self):
|
||||
obs = ShadowObserver()
|
||||
obs.start("sess_undo")
|
||||
for i in range(3):
|
||||
obs.observe_event("sess_undo", {
|
||||
"type": "mouse_click",
|
||||
"window": {"title": f"App{i}"},
|
||||
"vision_info": {"text": "bouton"},
|
||||
"timestamp": 100.0 + i * 6.0, # > 4s pour créer des segments
|
||||
})
|
||||
obs.stop("sess_undo")
|
||||
|
||||
validator = ShadowValidator()
|
||||
validator.set_steps(obs.get_steps_internal("sess_undo"))
|
||||
nb_before = len(validator.steps)
|
||||
assert nb_before >= 2
|
||||
|
||||
validator.apply_feedback({"action": "undo", "step_index": 1})
|
||||
|
||||
ir = validator.build_workflow_ir(session_id="sess_undo", name="Test")
|
||||
assert len(ir.steps) == nb_before - 1
|
||||
353
tests/unit/test_surface_and_uia.py
Normal file
353
tests/unit/test_surface_and_uia.py
Normal file
@@ -0,0 +1,353 @@
|
||||
"""
|
||||
Tests du SurfaceClassifier et du UIAHelper.
|
||||
|
||||
Vérifie :
|
||||
- Détection correcte des 4 types de surfaces (citrix, windows_native, web, unknown)
|
||||
- Paramètres adaptés par surface (timeouts, seuils)
|
||||
- Fallback gracieux si helper UIA absent
|
||||
- Sérialisation des profils
|
||||
- Wrapper UIAHelper avec mocks subprocess
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
_ROOT = str(Path(__file__).resolve().parents[2])
|
||||
if _ROOT not in sys.path:
|
||||
sys.path.insert(0, _ROOT)
|
||||
|
||||
from core.workflow.surface_classifier import (
|
||||
SurfaceClassifier,
|
||||
SurfaceProfile,
|
||||
SurfaceType,
|
||||
)
|
||||
from core.workflow.uia_helper import UIAHelper, UiaElement
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# SurfaceClassifier — détection par processus
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestSurfaceClassifier:
|
||||
"""Tests de détection des surfaces."""
|
||||
|
||||
def _classifier(self):
|
||||
"""Classifier sans helper UIA (pour que les tests soient reproductibles)."""
|
||||
return SurfaceClassifier(uia_helper_path="")
|
||||
|
||||
def test_detection_citrix_wfica(self):
|
||||
"""wfica32.exe → Citrix."""
|
||||
c = self._classifier()
|
||||
profile = c.classify(process_name="wfica32.exe", window_title="Session Citrix")
|
||||
assert profile.surface_type == SurfaceType.CITRIX
|
||||
assert profile.uia_available is False
|
||||
assert profile.ocr_threshold < 0.75 # Plus tolérant
|
||||
assert profile.max_retries >= 3
|
||||
|
||||
def test_detection_citrix_mstsc(self):
|
||||
"""mstsc.exe → Citrix (RDP)."""
|
||||
c = self._classifier()
|
||||
profile = c.classify(process_name="mstsc.exe", window_title="Remote Desktop")
|
||||
assert profile.surface_type == SurfaceType.CITRIX
|
||||
|
||||
def test_detection_citrix_par_titre(self):
|
||||
"""Titre 'Citrix' → Citrix même si process non listé."""
|
||||
c = self._classifier()
|
||||
profile = c.classify(process_name="chrome.exe", window_title="DxCare - Citrix Receiver")
|
||||
assert profile.surface_type == SurfaceType.CITRIX
|
||||
|
||||
def test_detection_windows_natif_notepad(self):
|
||||
"""notepad.exe → Windows natif."""
|
||||
c = self._classifier()
|
||||
profile = c.classify(process_name="notepad.exe", window_title="Sans titre – Bloc-notes")
|
||||
assert profile.surface_type == SurfaceType.WINDOWS_NATIVE
|
||||
assert profile.ocr_threshold == 0.75
|
||||
assert profile.timeout_click_ms == 8000
|
||||
|
||||
def test_detection_windows_natif_explorer(self):
|
||||
"""explorer.exe → Windows natif (cas spécial)."""
|
||||
c = self._classifier()
|
||||
profile = c.classify(process_name="explorer.exe", window_title="Lea")
|
||||
assert profile.surface_type == SurfaceType.WINDOWS_NATIVE
|
||||
|
||||
def test_detection_windows_natif_dxcare(self):
|
||||
"""dxcare.exe (DPI hospitalier) → Windows natif."""
|
||||
c = self._classifier()
|
||||
profile = c.classify(process_name="dxcare.exe", window_title="DxCare - Dossier 12345")
|
||||
assert profile.surface_type == SurfaceType.WINDOWS_NATIVE
|
||||
|
||||
def test_detection_web_chrome(self):
|
||||
"""chrome.exe → Web local."""
|
||||
c = self._classifier()
|
||||
profile = c.classify(process_name="chrome.exe", window_title="Google - Google Chrome")
|
||||
assert profile.surface_type == SurfaceType.WEB_LOCAL
|
||||
assert profile.ocr_threshold == 0.80 # Plus strict (texte bien rendu)
|
||||
assert profile.max_retries == 1 # Rapide
|
||||
|
||||
def test_detection_web_edge(self):
|
||||
c = self._classifier()
|
||||
profile = c.classify(process_name="msedge.exe", window_title="Edge")
|
||||
assert profile.surface_type == SurfaceType.WEB_LOCAL
|
||||
|
||||
def test_detection_web_firefox(self):
|
||||
c = self._classifier()
|
||||
profile = c.classify(process_name="firefox.exe", window_title="Firefox")
|
||||
assert profile.surface_type == SurfaceType.WEB_LOCAL
|
||||
|
||||
def test_detection_unknown_fallback(self):
|
||||
"""Process non reconnu → unknown avec paramètres sûrs."""
|
||||
c = self._classifier()
|
||||
profile = c.classify(process_name="", window_title="")
|
||||
assert profile.surface_type == SurfaceType.UNKNOWN
|
||||
assert profile.confidence < 1.0
|
||||
assert profile.ocr_available is True # OCR toujours dispo
|
||||
|
||||
def test_citrix_dans_navigateur(self):
|
||||
"""Citrix embedded dans Chrome → Citrix."""
|
||||
c = self._classifier()
|
||||
profile = c.classify(
|
||||
process_name="chrome.exe",
|
||||
window_title="Citrix Workspace - DxCare",
|
||||
)
|
||||
assert profile.surface_type == SurfaceType.CITRIX
|
||||
|
||||
def test_resolve_order_par_surface(self):
|
||||
"""Ordre de résolution cohérent avec la surface."""
|
||||
c = self._classifier()
|
||||
|
||||
citrix = c.classify("wfica32.exe", "Session")
|
||||
assert "uia" not in citrix.resolve_order()
|
||||
assert "ocr" in citrix.resolve_order()
|
||||
|
||||
windows = c.classify("notepad.exe", "Bloc-notes")
|
||||
# UIA pas dispo (helper path vide) donc absent
|
||||
assert "ocr" in windows.resolve_order()
|
||||
|
||||
web = c.classify("chrome.exe", "Google")
|
||||
assert "ocr" in web.resolve_order()
|
||||
|
||||
|
||||
class TestSurfaceProfile:
|
||||
"""Tests du dataclass SurfaceProfile."""
|
||||
|
||||
def test_to_dict_structure(self):
|
||||
p = SurfaceProfile(
|
||||
surface_type=SurfaceType.WINDOWS_NATIVE,
|
||||
process_name="notepad.exe",
|
||||
window_title="Test",
|
||||
)
|
||||
d = p.to_dict()
|
||||
assert d["surface_type"] == "windows_native"
|
||||
assert "capabilities" in d
|
||||
assert "parameters" in d
|
||||
assert d["capabilities"]["ocr"] is True
|
||||
assert d["capabilities"]["uia"] is False # Par défaut
|
||||
|
||||
def test_resolve_order_construction(self):
|
||||
"""L'ordre de résolution utilise les capacités dispo."""
|
||||
p = SurfaceProfile(
|
||||
surface_type=SurfaceType.WINDOWS_NATIVE,
|
||||
uia_available=True,
|
||||
)
|
||||
order = p.resolve_order()
|
||||
assert order[0] == "uia" # UIA en premier si dispo
|
||||
assert "ocr" in order
|
||||
assert "vlm" in order
|
||||
|
||||
def test_resolve_order_sans_uia(self):
|
||||
p = SurfaceProfile(
|
||||
surface_type=SurfaceType.CITRIX,
|
||||
uia_available=False,
|
||||
)
|
||||
order = p.resolve_order()
|
||||
assert "uia" not in order
|
||||
assert order[0] == "ocr" # OCR en premier
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# UIAHelper — wrapper Python
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestUIAHelper:
|
||||
"""Tests du wrapper UIAHelper."""
|
||||
|
||||
def test_initialization_sans_helper(self):
|
||||
"""Sans helper trouvé, available = False."""
|
||||
helper = UIAHelper(helper_path="/chemin/inexistant.exe")
|
||||
assert helper.available is False
|
||||
|
||||
def test_query_retourne_none_si_indispo(self):
|
||||
"""Si le helper n'est pas dispo, query retourne None."""
|
||||
helper = UIAHelper(helper_path="/chemin/inexistant.exe")
|
||||
result = helper.query_at(100, 200)
|
||||
assert result is None
|
||||
|
||||
def test_find_retourne_none_si_indispo(self):
|
||||
helper = UIAHelper(helper_path="/chemin/inexistant.exe")
|
||||
result = helper.find_by_name("Enregistrer")
|
||||
assert result is None
|
||||
|
||||
def test_health_retourne_false_si_indispo(self):
|
||||
helper = UIAHelper(helper_path="/chemin/inexistant.exe")
|
||||
assert helper.health() is False
|
||||
|
||||
@patch("core.workflow.uia_helper.os.path.isfile", return_value=True)
|
||||
@patch("core.workflow.uia_helper.platform.system", return_value="Windows")
|
||||
@patch("core.workflow.uia_helper.subprocess.run")
|
||||
def test_query_success_mock(self, mock_run, mock_platform, mock_isfile):
|
||||
"""Query avec mock subprocess retourne un UiaElement."""
|
||||
mock_result = MagicMock()
|
||||
mock_result.returncode = 0
|
||||
mock_result.stdout = json.dumps({
|
||||
"status": "ok",
|
||||
"element": {
|
||||
"name": "Enregistrer",
|
||||
"control_type": "bouton",
|
||||
"class_name": "Button",
|
||||
"automation_id": "btnSave",
|
||||
"bounding_rect": [100, 200, 200, 250],
|
||||
"is_enabled": True,
|
||||
"is_offscreen": False,
|
||||
"parent_path": [
|
||||
{"name": "Bloc-notes", "control_type": "fenêtre"}
|
||||
],
|
||||
},
|
||||
"elapsed_ms": 15,
|
||||
})
|
||||
mock_run.return_value = mock_result
|
||||
|
||||
helper = UIAHelper(helper_path="fake_lea_uia.exe")
|
||||
element = helper.query_at(150, 225)
|
||||
|
||||
assert element is not None
|
||||
assert element.name == "Enregistrer"
|
||||
assert element.control_type == "bouton"
|
||||
assert element.bounding_rect == (100, 200, 200, 250)
|
||||
assert element.center() == (150, 225)
|
||||
assert element.is_clickable() is True
|
||||
assert len(element.parent_path) == 1
|
||||
|
||||
@patch("core.workflow.uia_helper.os.path.isfile", return_value=True)
|
||||
@patch("core.workflow.uia_helper.platform.system", return_value="Windows")
|
||||
@patch("core.workflow.uia_helper.subprocess.run")
|
||||
def test_find_success_mock(self, mock_run, mock_platform, mock_isfile):
|
||||
mock_result = MagicMock()
|
||||
mock_result.returncode = 0
|
||||
mock_result.stdout = json.dumps({
|
||||
"status": "ok",
|
||||
"element": {
|
||||
"name": "Fichier",
|
||||
"control_type": "menu",
|
||||
"bounding_rect": [0, 20, 50, 40],
|
||||
"is_enabled": True,
|
||||
"is_offscreen": False,
|
||||
},
|
||||
})
|
||||
mock_run.return_value = mock_result
|
||||
|
||||
helper = UIAHelper(helper_path="fake.exe")
|
||||
element = helper.find_by_name("Fichier", control_type="menu")
|
||||
assert element is not None
|
||||
assert element.name == "Fichier"
|
||||
|
||||
@patch("core.workflow.uia_helper.os.path.isfile", return_value=True)
|
||||
@patch("core.workflow.uia_helper.platform.system", return_value="Windows")
|
||||
@patch("core.workflow.uia_helper.subprocess.run")
|
||||
def test_not_found(self, mock_run, mock_platform, mock_isfile):
|
||||
mock_result = MagicMock()
|
||||
mock_result.returncode = 0
|
||||
mock_result.stdout = json.dumps({
|
||||
"status": "not_found",
|
||||
"reason": "Pas d'élément",
|
||||
"elapsed_ms": 5,
|
||||
})
|
||||
mock_run.return_value = mock_result
|
||||
|
||||
helper = UIAHelper(helper_path="fake.exe")
|
||||
assert helper.query_at(999, 999) is None
|
||||
|
||||
@patch("core.workflow.uia_helper.os.path.isfile", return_value=True)
|
||||
@patch("core.workflow.uia_helper.platform.system", return_value="Windows")
|
||||
@patch("core.workflow.uia_helper.subprocess.run")
|
||||
def test_timeout(self, mock_run, mock_platform, mock_isfile):
|
||||
"""Un timeout subprocess ne fait pas crash le helper."""
|
||||
import subprocess as sp
|
||||
mock_run.side_effect = sp.TimeoutExpired("lea_uia", 5)
|
||||
|
||||
helper = UIAHelper(helper_path="fake.exe")
|
||||
assert helper.query_at(100, 100) is None
|
||||
|
||||
@patch("core.workflow.uia_helper.os.path.isfile", return_value=True)
|
||||
@patch("core.workflow.uia_helper.platform.system", return_value="Windows")
|
||||
@patch("core.workflow.uia_helper.subprocess.run")
|
||||
def test_json_invalide(self, mock_run, mock_platform, mock_isfile):
|
||||
"""Une sortie non-JSON ne fait pas crash."""
|
||||
mock_result = MagicMock()
|
||||
mock_result.returncode = 0
|
||||
mock_result.stdout = "pas du JSON"
|
||||
mock_run.return_value = mock_result
|
||||
|
||||
helper = UIAHelper(helper_path="fake.exe")
|
||||
assert helper.query_at(100, 100) is None
|
||||
|
||||
|
||||
class TestUiaElement:
|
||||
"""Tests du dataclass UiaElement."""
|
||||
|
||||
def test_from_dict_minimal(self):
|
||||
e = UiaElement.from_dict({"name": "test"})
|
||||
assert e.name == "test"
|
||||
assert e.bounding_rect == (0, 0, 0, 0)
|
||||
|
||||
def test_center(self):
|
||||
e = UiaElement(bounding_rect=(100, 200, 200, 300))
|
||||
assert e.center() == (150, 250)
|
||||
|
||||
def test_is_clickable(self):
|
||||
e = UiaElement(
|
||||
bounding_rect=(100, 100, 200, 150),
|
||||
is_enabled=True,
|
||||
is_offscreen=False,
|
||||
)
|
||||
assert e.is_clickable() is True
|
||||
|
||||
e2 = UiaElement(
|
||||
bounding_rect=(100, 100, 200, 150),
|
||||
is_enabled=False,
|
||||
is_offscreen=False,
|
||||
)
|
||||
assert e2.is_clickable() is False
|
||||
|
||||
def test_path_signature(self):
|
||||
e = UiaElement(
|
||||
name="Enregistrer",
|
||||
control_type="bouton",
|
||||
parent_path=[
|
||||
{"name": "Bloc-notes", "control_type": "fenêtre"},
|
||||
{"name": "Fichier", "control_type": "menu"},
|
||||
],
|
||||
)
|
||||
sig = e.path_signature()
|
||||
assert "Bloc-notes" in sig
|
||||
assert "Enregistrer" in sig
|
||||
assert " > " in sig
|
||||
|
||||
def test_roundtrip_dict(self):
|
||||
original = UiaElement(
|
||||
name="test",
|
||||
control_type="bouton",
|
||||
bounding_rect=(10, 20, 30, 40),
|
||||
is_enabled=True,
|
||||
is_offscreen=False,
|
||||
)
|
||||
d = original.to_dict()
|
||||
copy = UiaElement.from_dict(d)
|
||||
assert copy.name == original.name
|
||||
assert copy.bounding_rect == original.bounding_rect
|
||||
assert copy.is_enabled == original.is_enabled
|
||||
305
tests/unit/test_v4_resolve_order.py
Normal file
305
tests/unit/test_v4_resolve_order.py
Normal file
@@ -0,0 +1,305 @@
|
||||
"""
|
||||
Tests du mécanisme V4 : résolution pilotée par l'ordre pré-compilé.
|
||||
|
||||
Vérifie que :
|
||||
- Le resolve_order est bien propagé du plan vers le target_spec
|
||||
- Le resolve_engine honore l'ordre au lieu de sa cascade par défaut
|
||||
- Les méthodes sont essayées dans l'ordre spécifié
|
||||
- Si toutes échouent, fallback sur la cascade legacy
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
_ROOT = str(Path(__file__).resolve().parents[2])
|
||||
if _ROOT not in sys.path:
|
||||
sys.path.insert(0, _ROOT)
|
||||
|
||||
from core.workflow.workflow_ir import WorkflowIR
|
||||
from core.workflow.execution_plan import ExecutionNode, ResolutionStrategy, ExecutionPlan
|
||||
from core.workflow.execution_compiler import ExecutionCompiler
|
||||
from agent_v0.server_v1.execution_plan_runner import (
|
||||
execution_node_to_action,
|
||||
execution_plan_to_actions,
|
||||
_strategy_to_target_spec,
|
||||
)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Test 1 : le resolve_order est propagé du plan au target_spec
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestResolveOrderPropagation:
|
||||
"""Le resolve_order doit être présent dans le target_spec."""
|
||||
|
||||
def test_ocr_primary_produit_resolve_order(self):
|
||||
"""OCR primaire → resolve_order commence par 'ocr'."""
|
||||
primary = ResolutionStrategy(method="ocr", target_text="Enregistrer")
|
||||
fallbacks = [
|
||||
ResolutionStrategy(method="template", anchor_b64="abc123"),
|
||||
ResolutionStrategy(method="vlm", vlm_description="bouton Enregistrer"),
|
||||
]
|
||||
spec = _strategy_to_target_spec(primary, fallbacks)
|
||||
|
||||
assert "resolve_order" in spec
|
||||
assert spec["resolve_order"] == ["ocr", "template", "vlm"]
|
||||
|
||||
def test_template_primary_produit_resolve_order(self):
|
||||
"""Template primaire → resolve_order commence par 'template'."""
|
||||
primary = ResolutionStrategy(method="template", anchor_b64="abc")
|
||||
fallbacks = [ResolutionStrategy(method="vlm", vlm_description="icône")]
|
||||
spec = _strategy_to_target_spec(primary, fallbacks)
|
||||
|
||||
assert spec["resolve_order"][0] == "template"
|
||||
|
||||
def test_vlm_only(self):
|
||||
"""Juste VLM → resolve_order = ['vlm']."""
|
||||
primary = ResolutionStrategy(method="vlm", vlm_description="popup")
|
||||
spec = _strategy_to_target_spec(primary, [])
|
||||
|
||||
assert spec["resolve_order"] == ["vlm"]
|
||||
|
||||
def test_pas_de_doublons(self):
|
||||
"""Chaque méthode apparaît une seule fois dans l'ordre."""
|
||||
primary = ResolutionStrategy(method="ocr", target_text="test")
|
||||
fallbacks = [
|
||||
ResolutionStrategy(method="template", anchor_b64="abc"),
|
||||
ResolutionStrategy(method="ocr", target_text="autre"), # Doublon
|
||||
]
|
||||
spec = _strategy_to_target_spec(primary, fallbacks)
|
||||
|
||||
assert spec["resolve_order"].count("ocr") == 1
|
||||
assert spec["resolve_order"].count("template") == 1
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Test 2 : execution_node_to_action propage bien le resolve_order
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestExecutionNodeConversion:
|
||||
"""Les actions générées contiennent le resolve_order."""
|
||||
|
||||
def test_click_node_a_resolve_order(self):
|
||||
"""Un ExecutionNode click produit une action avec resolve_order."""
|
||||
node = ExecutionNode(
|
||||
node_id="n1",
|
||||
action_type="click",
|
||||
intent="Cliquer sur Fichier",
|
||||
strategy_primary=ResolutionStrategy(method="ocr", target_text="Fichier"),
|
||||
strategy_fallbacks=[
|
||||
ResolutionStrategy(method="vlm", vlm_description="menu Fichier"),
|
||||
],
|
||||
)
|
||||
action = execution_node_to_action(node)
|
||||
|
||||
assert action is not None
|
||||
assert action["type"] == "click"
|
||||
assert "target_spec" in action
|
||||
assert "resolve_order" in action["target_spec"]
|
||||
assert action["target_spec"]["resolve_order"] == ["ocr", "vlm"]
|
||||
assert action["target_spec"]["by_text"] == "Fichier"
|
||||
assert action["target_spec"]["vlm_description"] == "menu Fichier"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Test 3 : le compilateur produit des plans avec resolve_order correct
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestCompilerProduitResolveOrder:
|
||||
"""Le ExecutionCompiler produit des plans avec resolve_order."""
|
||||
|
||||
def test_workflow_complet_avec_resolve_order(self):
|
||||
"""Un workflow compilé a des actions avec resolve_order."""
|
||||
ir = WorkflowIR.new("Test", domain="generic")
|
||||
ir.add_step(
|
||||
"Cliquer sur Enregistrer",
|
||||
actions=[{
|
||||
"type": "click",
|
||||
"target": "bouton Enregistrer",
|
||||
"anchor_hint": "Enregistrer",
|
||||
}],
|
||||
)
|
||||
|
||||
compiler = ExecutionCompiler()
|
||||
plan = compiler.compile(ir)
|
||||
|
||||
actions = execution_plan_to_actions(plan)
|
||||
assert len(actions) == 1
|
||||
assert "resolve_order" in actions[0]["target_spec"]
|
||||
# OCR doit être en premier (stratégie primaire quand texte dispo)
|
||||
assert actions[0]["target_spec"]["resolve_order"][0] == "ocr"
|
||||
|
||||
def test_fallback_vlm_toujours_present(self):
|
||||
"""Le VLM est toujours présent en fallback."""
|
||||
ir = WorkflowIR.new("Test")
|
||||
ir.add_step("Clic", actions=[{"type": "click", "target": "X", "anchor_hint": "X"}])
|
||||
|
||||
compiler = ExecutionCompiler()
|
||||
plan = compiler.compile(ir)
|
||||
|
||||
actions = execution_plan_to_actions(plan)
|
||||
assert "vlm" in actions[0]["target_spec"]["resolve_order"]
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Test 4 : _resolve_with_precompiled_order respecte l'ordre
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestResolveWithPrecompiledOrder:
|
||||
"""Le mécanisme V4 de résolution honore l'ordre."""
|
||||
|
||||
@patch("agent_v0.server_v1.resolve_engine._resolve_by_ocr_text")
|
||||
def test_ocr_appele_en_premier(self, mock_ocr):
|
||||
"""Si resolve_order=['ocr', 'vlm'], OCR est appelé en premier."""
|
||||
from agent_v0.server_v1.resolve_engine import _resolve_with_precompiled_order
|
||||
|
||||
mock_ocr.return_value = {
|
||||
"resolved": True,
|
||||
"x_pct": 0.5,
|
||||
"y_pct": 0.3,
|
||||
"score": 0.9,
|
||||
}
|
||||
|
||||
result = _resolve_with_precompiled_order(
|
||||
screenshot_path="/fake.png",
|
||||
target_spec={
|
||||
"by_text": "Enregistrer",
|
||||
"resolve_order": ["ocr", "vlm"],
|
||||
},
|
||||
resolve_order=["ocr", "vlm"],
|
||||
screen_width=1280,
|
||||
screen_height=800,
|
||||
fallback_x_pct=0.5,
|
||||
fallback_y_pct=0.5,
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result.get("resolved") is True
|
||||
assert result.get("resolve_method") == "v4_ocr"
|
||||
mock_ocr.assert_called_once()
|
||||
|
||||
@patch("agent_v0.server_v1.resolve_engine._vlm_quick_find")
|
||||
@patch("agent_v0.server_v1.resolve_engine._resolve_by_ocr_text")
|
||||
def test_cascade_ocr_vers_vlm(self, mock_ocr, mock_vlm):
|
||||
"""Si OCR échoue, VLM est essayé."""
|
||||
from agent_v0.server_v1.resolve_engine import _resolve_with_precompiled_order
|
||||
|
||||
mock_ocr.return_value = None # OCR échoue
|
||||
mock_vlm.return_value = {
|
||||
"resolved": True,
|
||||
"x_pct": 0.5,
|
||||
"y_pct": 0.3,
|
||||
}
|
||||
|
||||
result = _resolve_with_precompiled_order(
|
||||
screenshot_path="/fake.png",
|
||||
target_spec={
|
||||
"by_text": "Enregistrer",
|
||||
"vlm_description": "bouton Enregistrer",
|
||||
"resolve_order": ["ocr", "vlm"],
|
||||
},
|
||||
resolve_order=["ocr", "vlm"],
|
||||
screen_width=1280,
|
||||
screen_height=800,
|
||||
fallback_x_pct=0.5,
|
||||
fallback_y_pct=0.5,
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert result.get("resolve_method") == "v4_vlm"
|
||||
mock_ocr.assert_called_once()
|
||||
mock_vlm.assert_called_once()
|
||||
|
||||
@patch("agent_v0.server_v1.resolve_engine._vlm_quick_find")
|
||||
@patch("agent_v0.server_v1.resolve_engine._resolve_by_ocr_text")
|
||||
def test_toutes_methodes_echouent(self, mock_ocr, mock_vlm):
|
||||
"""Si toutes les méthodes échouent, retourne None."""
|
||||
from agent_v0.server_v1.resolve_engine import _resolve_with_precompiled_order
|
||||
|
||||
mock_ocr.return_value = None
|
||||
mock_vlm.return_value = None
|
||||
|
||||
result = _resolve_with_precompiled_order(
|
||||
screenshot_path="/fake.png",
|
||||
target_spec={
|
||||
"by_text": "Inexistant",
|
||||
"vlm_description": "truc inexistant",
|
||||
"resolve_order": ["ocr", "vlm"],
|
||||
},
|
||||
resolve_order=["ocr", "vlm"],
|
||||
screen_width=1280,
|
||||
screen_height=800,
|
||||
fallback_x_pct=0.5,
|
||||
fallback_y_pct=0.5,
|
||||
)
|
||||
|
||||
assert result is None
|
||||
|
||||
def test_resolve_order_vide(self):
|
||||
"""Un resolve_order vide ne plante pas."""
|
||||
from agent_v0.server_v1.resolve_engine import _resolve_with_precompiled_order
|
||||
|
||||
result = _resolve_with_precompiled_order(
|
||||
screenshot_path="/fake.png",
|
||||
target_spec={"by_text": "test"},
|
||||
resolve_order=[],
|
||||
screen_width=1280,
|
||||
screen_height=800,
|
||||
fallback_x_pct=0.5,
|
||||
fallback_y_pct=0.5,
|
||||
)
|
||||
|
||||
assert result is None
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Test 5 : pipeline complet — IR → Plan → action avec resolve_order
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestPipelineCompletV4:
|
||||
"""Test du pipeline V4 complet de bout en bout (sans runtime réel)."""
|
||||
|
||||
def test_ir_vers_action_avec_resolve_order(self):
|
||||
"""Un WorkflowIR produit des actions avec resolve_order correctement."""
|
||||
ir = WorkflowIR.new("Workflow complet", domain="tim_codage")
|
||||
ir.add_step(
|
||||
"Ouvrir le fichier",
|
||||
actions=[{
|
||||
"type": "click",
|
||||
"target": "bouton Ouvrir",
|
||||
"anchor_hint": "Ouvrir",
|
||||
}],
|
||||
)
|
||||
ir.add_step(
|
||||
"Saisir le nom",
|
||||
actions=[
|
||||
{"type": "type", "text": "rapport.pdf"},
|
||||
{"type": "key_combo", "keys": ["enter"]},
|
||||
],
|
||||
)
|
||||
|
||||
compiler = ExecutionCompiler()
|
||||
plan = compiler.compile(ir)
|
||||
actions = execution_plan_to_actions(plan)
|
||||
|
||||
# On doit avoir 3 actions : click, type, key_combo
|
||||
assert len(actions) == 3
|
||||
|
||||
click_action = actions[0]
|
||||
assert click_action["type"] == "click"
|
||||
assert "resolve_order" in click_action["target_spec"]
|
||||
assert click_action["target_spec"]["resolve_order"][0] == "ocr"
|
||||
assert click_action["target_spec"]["by_text"] == "Ouvrir"
|
||||
|
||||
# type et key_combo n'ont pas de target_spec
|
||||
assert actions[1]["type"] == "type"
|
||||
assert "target_spec" not in actions[1]
|
||||
assert actions[2]["type"] == "key_combo"
|
||||
349
tests/unit/test_v4_wiring.py
Normal file
349
tests/unit/test_v4_wiring.py
Normal file
@@ -0,0 +1,349 @@
|
||||
"""
|
||||
Tests de câblage complet V4 :
|
||||
- SurfaceClassifier + ExecutionCompiler : paramètres adaptés par surface
|
||||
- IRBuilder lit uia_snapshot depuis les événements
|
||||
- ExecutionCompiler crée une stratégie UIA quand dispo
|
||||
- execution_plan_runner propage uia_target dans target_spec
|
||||
- Pipeline E2E : RawTrace (avec UIA) → WorkflowIR → Plan → action runtime
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
_ROOT = str(Path(__file__).resolve().parents[2])
|
||||
if _ROOT not in sys.path:
|
||||
sys.path.insert(0, _ROOT)
|
||||
|
||||
from core.workflow.workflow_ir import WorkflowIR, Step, Action
|
||||
from core.workflow.execution_plan import ExecutionPlan, ExecutionNode, ResolutionStrategy
|
||||
from core.workflow.execution_compiler import ExecutionCompiler
|
||||
from core.workflow.surface_classifier import SurfaceClassifier, SurfaceProfile, SurfaceType
|
||||
from core.workflow.ir_builder import IRBuilder
|
||||
from agent_v0.server_v1.execution_plan_runner import (
|
||||
execution_node_to_action,
|
||||
execution_plan_to_actions,
|
||||
_strategy_to_target_spec,
|
||||
)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# ExecutionCompiler avec SurfaceProfile
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestCompilerWithSurfaceProfile:
|
||||
|
||||
def test_profil_citrix_impose_timeouts_longs(self):
|
||||
"""Profil Citrix → timeouts longs, retries 3x."""
|
||||
ir = WorkflowIR.new("Test")
|
||||
ir.add_step("Clic", actions=[{"type": "click", "target": "Bouton", "anchor_hint": "OK"}])
|
||||
|
||||
profile = SurfaceProfile(
|
||||
surface_type=SurfaceType.CITRIX,
|
||||
timeout_click_ms=15000,
|
||||
max_retries=3,
|
||||
ocr_threshold=0.65,
|
||||
)
|
||||
|
||||
compiler = ExecutionCompiler()
|
||||
plan = compiler.compile(ir, surface_profile=profile)
|
||||
|
||||
click_node = [n for n in plan.nodes if n.action_type == "click"][0]
|
||||
assert click_node.timeout_ms == 15000
|
||||
assert click_node.max_retries == 3
|
||||
|
||||
def test_profil_web_impose_timeouts_courts(self):
|
||||
"""Profil web → timeouts courts, 1 retry."""
|
||||
ir = WorkflowIR.new("Test")
|
||||
ir.add_step("Clic", actions=[{"type": "click", "target": "X", "anchor_hint": "Login"}])
|
||||
|
||||
profile = SurfaceProfile(
|
||||
surface_type=SurfaceType.WEB_LOCAL,
|
||||
timeout_click_ms=5000,
|
||||
max_retries=1,
|
||||
)
|
||||
|
||||
compiler = ExecutionCompiler()
|
||||
plan = compiler.compile(ir, surface_profile=profile)
|
||||
|
||||
click_node = [n for n in plan.nodes if n.action_type == "click"][0]
|
||||
assert click_node.timeout_ms == 5000
|
||||
assert click_node.max_retries == 1
|
||||
|
||||
def test_sans_profil_utilise_defauts(self):
|
||||
"""Sans surface_profile, comportement par défaut."""
|
||||
ir = WorkflowIR.new("Test")
|
||||
ir.add_step("Clic", actions=[{"type": "click", "target": "X", "anchor_hint": "Y"}])
|
||||
|
||||
compiler = ExecutionCompiler()
|
||||
plan = compiler.compile(ir)
|
||||
|
||||
click_node = [n for n in plan.nodes if n.action_type == "click"][0]
|
||||
assert click_node.timeout_ms == 10000 # Défaut
|
||||
assert click_node.max_retries == 2 # Défaut
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Stratégie UIA dans la compilation
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestUiaStrategyCompilation:
|
||||
|
||||
def _make_ir_with_uia(self):
|
||||
"""Créer un WorkflowIR avec une action portant un uia_snapshot."""
|
||||
ir = WorkflowIR.new("Test UIA")
|
||||
action = Action(
|
||||
type="click",
|
||||
target="Bloc-notes",
|
||||
anchor_hint="Enregistrer",
|
||||
)
|
||||
# Simuler l'enrichissement avec UIA
|
||||
action._enrichment = {
|
||||
"by_text": "Enregistrer",
|
||||
"anchor_image_base64": "fake_crop_data",
|
||||
"vlm_description": "Le bouton Enregistrer du menu Fichier",
|
||||
"uia_snapshot": {
|
||||
"name": "Enregistrer",
|
||||
"control_type": "bouton",
|
||||
"automation_id": "btnSave",
|
||||
"parent_path": [
|
||||
{"name": "Bloc-notes", "control_type": "fenêtre"},
|
||||
{"name": "Fichier", "control_type": "menu"},
|
||||
],
|
||||
},
|
||||
}
|
||||
step = Step(step_id="s1", intent="Sauvegarder", actions=[action])
|
||||
ir.steps.append(step)
|
||||
return ir
|
||||
|
||||
def test_uia_strategie_creee_si_surface_windows(self):
|
||||
"""Sur Windows natif avec UIA dispo, la stratégie UIA est primaire."""
|
||||
ir = self._make_ir_with_uia()
|
||||
profile = SurfaceProfile(
|
||||
surface_type=SurfaceType.WINDOWS_NATIVE,
|
||||
uia_available=True,
|
||||
)
|
||||
|
||||
compiler = ExecutionCompiler()
|
||||
plan = compiler.compile(ir, surface_profile=profile)
|
||||
|
||||
click = [n for n in plan.nodes if n.action_type == "click"][0]
|
||||
assert click.strategy_primary is not None
|
||||
assert click.strategy_primary.method == "uia"
|
||||
assert click.strategy_primary.uia_name == "Enregistrer"
|
||||
assert click.strategy_primary.uia_control_type == "bouton"
|
||||
|
||||
def test_uia_desactive_sur_citrix(self):
|
||||
"""Sur Citrix, UIA est ignoré même si snapshot présent."""
|
||||
ir = self._make_ir_with_uia()
|
||||
profile = SurfaceProfile(
|
||||
surface_type=SurfaceType.CITRIX,
|
||||
uia_available=False,
|
||||
)
|
||||
|
||||
compiler = ExecutionCompiler()
|
||||
plan = compiler.compile(ir, surface_profile=profile)
|
||||
|
||||
click = [n for n in plan.nodes if n.action_type == "click"][0]
|
||||
assert click.strategy_primary.method != "uia"
|
||||
# OCR est la primaire (texte dispo)
|
||||
assert click.strategy_primary.method == "ocr"
|
||||
|
||||
def test_uia_fallback_sur_ocr_si_uia_manquant(self):
|
||||
"""Sans uia_snapshot, OCR primaire."""
|
||||
ir = WorkflowIR.new("Test")
|
||||
action = Action(
|
||||
type="click",
|
||||
target="Fichier",
|
||||
anchor_hint="Fichier",
|
||||
)
|
||||
action._enrichment = {
|
||||
"by_text": "Fichier",
|
||||
"vlm_description": "Menu Fichier",
|
||||
}
|
||||
step = Step(step_id="s1", intent="Ouvrir menu", actions=[action])
|
||||
ir.steps.append(step)
|
||||
|
||||
profile = SurfaceProfile(
|
||||
surface_type=SurfaceType.WINDOWS_NATIVE,
|
||||
uia_available=True,
|
||||
)
|
||||
|
||||
compiler = ExecutionCompiler()
|
||||
plan = compiler.compile(ir, surface_profile=profile)
|
||||
|
||||
click = [n for n in plan.nodes if n.action_type == "click"][0]
|
||||
assert click.strategy_primary.method == "ocr"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# IRBuilder lit uia_snapshot depuis les événements
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestIRBuilderLitUiaSnapshot:
|
||||
|
||||
def test_ir_builder_propage_uia_snapshot(self):
|
||||
"""Un event avec uia_snapshot → Action._enrichment contient uia_snapshot."""
|
||||
events = [
|
||||
{
|
||||
"event": {
|
||||
"type": "mouse_click",
|
||||
"pos": [500, 300],
|
||||
"window": {"title": "Bloc-notes"},
|
||||
"timestamp": 100.0,
|
||||
"uia_snapshot": {
|
||||
"name": "Enregistrer",
|
||||
"control_type": "bouton",
|
||||
"automation_id": "btnSave",
|
||||
"parent_path": [{"name": "Fichier", "control_type": "menu"}],
|
||||
},
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
builder = IRBuilder(gemma4_port="99999")
|
||||
ir = builder.build(events, name="Test")
|
||||
|
||||
# Parcourir les steps pour trouver le clic
|
||||
found_action = None
|
||||
for step in ir.steps:
|
||||
for action in step.actions:
|
||||
if action.type == "click":
|
||||
found_action = action
|
||||
break
|
||||
|
||||
assert found_action is not None
|
||||
enrichment = getattr(found_action, "_enrichment", None) or {}
|
||||
assert "uia_snapshot" in enrichment
|
||||
assert enrichment["uia_snapshot"]["name"] == "Enregistrer"
|
||||
assert enrichment["uia_snapshot"]["control_type"] == "bouton"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# execution_plan_runner propage uia_target dans target_spec
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestUiaTargetPropagation:
|
||||
|
||||
def test_strategy_uia_produit_uia_target(self):
|
||||
"""Une stratégie UIA primaire → target_spec contient uia_target."""
|
||||
primary = ResolutionStrategy(
|
||||
method="uia",
|
||||
uia_name="Enregistrer",
|
||||
uia_control_type="bouton",
|
||||
uia_automation_id="btnSave",
|
||||
uia_parent_path=[{"name": "Fichier", "control_type": "menu"}],
|
||||
)
|
||||
fallbacks = [
|
||||
ResolutionStrategy(method="ocr", target_text="Enregistrer"),
|
||||
ResolutionStrategy(method="vlm", vlm_description="bouton Enregistrer"),
|
||||
]
|
||||
|
||||
spec = _strategy_to_target_spec(primary, fallbacks)
|
||||
|
||||
assert "uia_target" in spec
|
||||
assert spec["uia_target"]["name"] == "Enregistrer"
|
||||
assert spec["uia_target"]["control_type"] == "bouton"
|
||||
assert spec["uia_target"]["automation_id"] == "btnSave"
|
||||
assert spec["resolve_order"][0] == "uia"
|
||||
assert "ocr" in spec["resolve_order"]
|
||||
assert "vlm" in spec["resolve_order"]
|
||||
|
||||
def test_pas_de_uia_target_si_pas_de_stratégie(self):
|
||||
"""Sans stratégie UIA → pas de uia_target."""
|
||||
primary = ResolutionStrategy(method="ocr", target_text="test")
|
||||
spec = _strategy_to_target_spec(primary, [])
|
||||
|
||||
assert "uia_target" not in spec
|
||||
assert "uia" not in spec.get("resolve_order", [])
|
||||
|
||||
def test_execution_node_to_action_avec_uia(self):
|
||||
"""Un ExecutionNode avec stratégie UIA produit une action complète."""
|
||||
node = ExecutionNode(
|
||||
node_id="n1",
|
||||
action_type="click",
|
||||
intent="Cliquer Enregistrer",
|
||||
strategy_primary=ResolutionStrategy(
|
||||
method="uia",
|
||||
uia_name="Enregistrer",
|
||||
uia_control_type="bouton",
|
||||
),
|
||||
strategy_fallbacks=[
|
||||
ResolutionStrategy(method="ocr", target_text="Enregistrer"),
|
||||
],
|
||||
)
|
||||
|
||||
action = execution_node_to_action(node)
|
||||
assert action is not None
|
||||
assert action["type"] == "click"
|
||||
assert "uia_target" in action["target_spec"]
|
||||
assert action["target_spec"]["uia_target"]["name"] == "Enregistrer"
|
||||
assert action["target_spec"]["resolve_order"] == ["uia", "ocr"]
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Pipeline E2E : événement avec UIA → action runtime avec uia_target
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestPipelineE2EUia:
|
||||
|
||||
def test_pipeline_complet_uia(self):
|
||||
"""RawTrace (avec uia_snapshot) → WorkflowIR → Plan → action runtime."""
|
||||
# Événements simulés d'un enregistrement sur Windows natif
|
||||
events = [
|
||||
{
|
||||
"event": {
|
||||
"type": "mouse_click",
|
||||
"pos": [500, 300],
|
||||
"window": {"title": "Bloc-notes"},
|
||||
"timestamp": 100.0,
|
||||
"uia_snapshot": {
|
||||
"name": "Enregistrer",
|
||||
"control_type": "bouton",
|
||||
"automation_id": "btnSave",
|
||||
"parent_path": [
|
||||
{"name": "Bloc-notes", "control_type": "fenêtre"},
|
||||
],
|
||||
},
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
# Pipeline complet
|
||||
builder = IRBuilder(gemma4_port="99999")
|
||||
ir = builder.build(events, name="Test E2E UIA")
|
||||
|
||||
profile = SurfaceProfile(
|
||||
surface_type=SurfaceType.WINDOWS_NATIVE,
|
||||
uia_available=True,
|
||||
timeout_click_ms=8000,
|
||||
max_retries=2,
|
||||
)
|
||||
|
||||
compiler = ExecutionCompiler()
|
||||
plan = compiler.compile(ir, surface_profile=profile)
|
||||
|
||||
actions = execution_plan_to_actions(plan)
|
||||
|
||||
# Vérifier que l'action finale a toutes les données UIA
|
||||
click_actions = [a for a in actions if a["type"] == "click"]
|
||||
assert len(click_actions) == 1
|
||||
|
||||
action = click_actions[0]
|
||||
assert "target_spec" in action
|
||||
spec = action["target_spec"]
|
||||
|
||||
assert "resolve_order" in spec
|
||||
assert spec["resolve_order"][0] == "uia"
|
||||
assert "uia_target" in spec
|
||||
assert spec["uia_target"]["name"] == "Enregistrer"
|
||||
assert spec["uia_target"]["control_type"] == "bouton"
|
||||
assert action.get("timeout_ms") == 8000
|
||||
assert action.get("max_retries") == 2
|
||||
261
tests/unit/test_workflow_ir.py
Normal file
261
tests/unit/test_workflow_ir.py
Normal file
@@ -0,0 +1,261 @@
|
||||
"""
|
||||
Tests du WorkflowIR et de l'IRBuilder.
|
||||
|
||||
Vérifie que :
|
||||
- Le format WorkflowIR est correct (sérialisation, désérialisation, versioning)
|
||||
- L'IRBuilder segmente et comprend les traces brutes
|
||||
- Les variables sont détectées et substituables
|
||||
- Le tout fonctionne sans gemma4 (fallback gracieux)
|
||||
"""
|
||||
|
||||
import json
|
||||
import shutil
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
_ROOT = str(Path(__file__).resolve().parents[2])
|
||||
if _ROOT not in sys.path:
|
||||
sys.path.insert(0, _ROOT)
|
||||
|
||||
from core.workflow.workflow_ir import WorkflowIR, Step, Action, Variable
|
||||
from core.workflow.ir_builder import IRBuilder
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# WorkflowIR — format et sérialisation
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestWorkflowIR:
|
||||
|
||||
def test_creation_vide(self):
|
||||
ir = WorkflowIR.new("Test workflow")
|
||||
assert ir.workflow_id.startswith("wf_")
|
||||
assert ir.version == 1
|
||||
assert ir.name == "Test workflow"
|
||||
assert ir.steps == []
|
||||
assert ir.variables == []
|
||||
|
||||
def test_ajout_etapes(self):
|
||||
ir = WorkflowIR.new("Test")
|
||||
ir.add_step("Ouvrir l'application", actions=[
|
||||
{"type": "click", "target": "icône app"},
|
||||
{"type": "wait", "duration_ms": 2000},
|
||||
])
|
||||
ir.add_step("Saisir les données", actions=[
|
||||
{"type": "type", "text": "bonjour"},
|
||||
])
|
||||
assert len(ir.steps) == 2
|
||||
assert ir.steps[0].intent == "Ouvrir l'application"
|
||||
assert len(ir.steps[0].actions) == 2
|
||||
assert ir.steps[0].actions[0].type == "click"
|
||||
|
||||
def test_ajout_variables(self):
|
||||
ir = WorkflowIR.new("Test")
|
||||
ir.add_variable("patient", description="Nom du patient", source="screen")
|
||||
ir.add_variable("code", description="Code à saisir", default="A00.0")
|
||||
assert len(ir.variables) == 2
|
||||
assert ir.variables[0].name == "patient"
|
||||
assert ir.variables[1].default == "A00.0"
|
||||
|
||||
def test_serialisation_json(self):
|
||||
ir = WorkflowIR.new("Mon workflow", domain="tim_codage")
|
||||
ir.add_step("Étape 1")
|
||||
ir.add_variable("var1", description="Une variable")
|
||||
|
||||
json_str = ir.to_json()
|
||||
data = json.loads(json_str)
|
||||
|
||||
assert data["name"] == "Mon workflow"
|
||||
assert data["domain"] == "tim_codage"
|
||||
assert len(data["steps"]) == 1
|
||||
assert len(data["variables"]) == 1
|
||||
|
||||
def test_deserialisation_json(self):
|
||||
ir = WorkflowIR.new("Test roundtrip")
|
||||
ir.add_step("Ouvrir", actions=[{"type": "click", "target": "bouton"}])
|
||||
ir.add_variable("v1", description="test")
|
||||
|
||||
json_str = ir.to_json()
|
||||
ir2 = WorkflowIR.from_json(json_str)
|
||||
|
||||
assert ir2.name == "Test roundtrip"
|
||||
assert len(ir2.steps) == 1
|
||||
assert ir2.steps[0].intent == "Ouvrir"
|
||||
assert ir2.steps[0].actions[0].type == "click"
|
||||
assert len(ir2.variables) == 1
|
||||
|
||||
def test_save_et_load(self):
|
||||
tmpdir = tempfile.mkdtemp()
|
||||
try:
|
||||
ir = WorkflowIR.new("Save test")
|
||||
ir.add_step("Étape 1")
|
||||
path = ir.save(tmpdir)
|
||||
|
||||
assert path.is_file()
|
||||
|
||||
ir2 = WorkflowIR.load(str(path))
|
||||
assert ir2.name == "Save test"
|
||||
assert len(ir2.steps) == 1
|
||||
finally:
|
||||
shutil.rmtree(tmpdir)
|
||||
|
||||
def test_increment_version(self):
|
||||
ir = WorkflowIR.new("Versionning")
|
||||
assert ir.version == 1
|
||||
|
||||
ir2 = ir.increment_version()
|
||||
assert ir2.version == 2
|
||||
assert ir.version == 1 # Original inchangé
|
||||
assert ir2.name == "Versionning"
|
||||
|
||||
def test_domaine_generique(self):
|
||||
"""Le WorkflowIR est générique — pas lié à un métier."""
|
||||
for domain in ["tim_codage", "comptabilite", "rh_paie", "stocks", "generic"]:
|
||||
ir = WorkflowIR.new("Test", domain=domain)
|
||||
assert ir.domain == domain
|
||||
|
||||
def test_etape_optionnelle(self):
|
||||
ir = WorkflowIR.new("Test")
|
||||
ir.add_step("Vérification facultative", is_optional=True)
|
||||
assert ir.steps[0].is_optional is True
|
||||
|
||||
def test_etape_boucle(self):
|
||||
ir = WorkflowIR.new("Test")
|
||||
ir.add_step("Traiter chaque dossier", is_loop=True, loop_variable="dossier")
|
||||
assert ir.steps[0].is_loop is True
|
||||
assert ir.steps[0].loop_variable == "dossier"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# IRBuilder — construction depuis RawTrace
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestIRBuilder:
|
||||
|
||||
def _make_events(self):
|
||||
"""Créer des événements bruts simulés (comme live_events.jsonl)."""
|
||||
return [
|
||||
{"event": {"type": "mouse_click", "pos": [400, 580], "window": {"title": "Lea : Explorateur"}, "timestamp": 100.0, "vision_info": {"text": "Rechercher"}}},
|
||||
{"event": {"type": "text_input", "text": "blocnote", "window": {"title": "Rechercher"}, "timestamp": 102.0}},
|
||||
{"event": {"type": "key_combo", "keys": ["enter"], "window": {"title": "Rechercher"}, "timestamp": 103.0}},
|
||||
{"event": {"type": "heartbeat", "timestamp": 104.0}}, # Parasite — doit être filtré
|
||||
{"event": {"type": "mouse_click", "pos": [300, 200], "window": {"title": "Rechercher"}, "timestamp": 105.0, "vision_info": {"text": "Bloc-notes"}}},
|
||||
{"event": {"type": "mouse_click", "pos": [500, 300], "window": {"title": "Sans titre – Bloc-notes"}, "timestamp": 112.0, "vision_info": {"text": ""}}},
|
||||
{"event": {"type": "text_input", "text": "Bonjour le monde", "window": {"title": "*Sans titre – Bloc-notes"}, "timestamp": 113.0}},
|
||||
{"event": {"type": "key_combo", "keys": ["ctrl", "s"], "window": {"title": "*Sans titre – Bloc-notes"}, "timestamp": 115.0}},
|
||||
]
|
||||
|
||||
def test_builder_sans_gemma4(self):
|
||||
"""Le builder fonctionne même sans gemma4 (fallback gracieux)."""
|
||||
builder = IRBuilder(gemma4_port="99999") # Port invalide
|
||||
events = self._make_events()
|
||||
|
||||
ir = builder.build(events, session_id="test_sess", domain="generic", name="Test")
|
||||
|
||||
assert ir.name == "Test"
|
||||
assert ir.learned_from == "test_sess"
|
||||
assert len(ir.steps) >= 1
|
||||
assert len(ir.applications) >= 1
|
||||
|
||||
def test_filtre_heartbeat(self):
|
||||
"""Les heartbeat sont filtrés."""
|
||||
builder = IRBuilder(gemma4_port="99999")
|
||||
events = self._make_events()
|
||||
|
||||
ir = builder.build(events, name="Test")
|
||||
|
||||
# Vérifier qu'aucune action n'est de type heartbeat
|
||||
for step in ir.steps:
|
||||
for action in step.actions:
|
||||
assert action.type != "heartbeat"
|
||||
|
||||
def test_detection_applications(self):
|
||||
"""Les applications utilisées sont détectées."""
|
||||
builder = IRBuilder(gemma4_port="99999")
|
||||
events = self._make_events()
|
||||
|
||||
ir = builder.build(events, name="Test")
|
||||
|
||||
assert "Bloc-notes" in ir.applications or "Explorateur" in ir.applications
|
||||
|
||||
def test_detection_variables(self):
|
||||
"""Le texte saisi est détecté comme variable."""
|
||||
builder = IRBuilder(gemma4_port="99999")
|
||||
events = self._make_events()
|
||||
|
||||
ir = builder.build(events, name="Test")
|
||||
|
||||
# Le texte "blocnote" et "Bonjour le monde" doivent être des variables
|
||||
assert len(ir.variables) >= 1
|
||||
var_defaults = [v.default for v in ir.variables]
|
||||
assert any("blocnote" in d or "Bonjour" in d for d in var_defaults)
|
||||
|
||||
def test_segmentation_par_application(self):
|
||||
"""Les événements sont segmentés par changement d'application."""
|
||||
builder = IRBuilder(gemma4_port="99999")
|
||||
events = self._make_events()
|
||||
|
||||
ir = builder.build(events, name="Test")
|
||||
|
||||
# Au moins 2 étapes (Explorateur → Bloc-notes)
|
||||
assert len(ir.steps) >= 2
|
||||
|
||||
def test_actions_dans_les_etapes(self):
|
||||
"""Chaque étape contient les bonnes actions."""
|
||||
builder = IRBuilder(gemma4_port="99999")
|
||||
events = self._make_events()
|
||||
|
||||
ir = builder.build(events, name="Test")
|
||||
|
||||
all_actions = []
|
||||
for step in ir.steps:
|
||||
all_actions.extend(step.actions)
|
||||
|
||||
types = [a.type for a in all_actions]
|
||||
assert "click" in types
|
||||
assert "type" in types
|
||||
assert "key_combo" in types
|
||||
|
||||
def test_workflow_ir_complet_roundtrip(self):
|
||||
"""Build → JSON → reload → même contenu."""
|
||||
builder = IRBuilder(gemma4_port="99999")
|
||||
events = self._make_events()
|
||||
|
||||
ir = builder.build(events, name="Roundtrip test", domain="compta")
|
||||
json_str = ir.to_json()
|
||||
ir2 = WorkflowIR.from_json(json_str)
|
||||
|
||||
assert ir2.name == "Roundtrip test"
|
||||
assert ir2.domain == "compta"
|
||||
assert len(ir2.steps) == len(ir.steps)
|
||||
assert len(ir2.variables) == len(ir.variables)
|
||||
|
||||
@patch("requests.post")
|
||||
def test_builder_avec_gemma4_mock(self, mock_post):
|
||||
"""Avec gemma4, le builder enrichit les intentions."""
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.ok = True
|
||||
mock_resp.json.return_value = {
|
||||
"message": {"content": (
|
||||
"INTENTION: Rechercher et ouvrir le Bloc-notes\n"
|
||||
"AVANT: L'explorateur de fichiers est ouvert\n"
|
||||
"APRÈS: Le Bloc-notes est ouvert et actif"
|
||||
)}
|
||||
}
|
||||
mock_post.return_value = mock_resp
|
||||
|
||||
builder = IRBuilder()
|
||||
events = self._make_events()
|
||||
|
||||
ir = builder.build(events, name="Test gemma4")
|
||||
|
||||
# Au moins une étape doit avoir une intention enrichie
|
||||
intents = [s.intent for s in ir.steps]
|
||||
has_enriched = any("Bloc-notes" in i or "Rechercher" in i for i in intents)
|
||||
assert has_enriched or len(ir.steps) >= 1 # Fallback acceptable
|
||||
13
tools/run_session_cleaner.sh
Executable file
13
tools/run_session_cleaner.sh
Executable file
@@ -0,0 +1,13 @@
|
||||
#!/bin/bash
|
||||
# Lancement rapide du Session Cleaner
|
||||
# Usage : ./tools/run_session_cleaner.sh [--port 5006] [--debug]
|
||||
|
||||
cd "$(dirname "$0")/.."
|
||||
source .venv/bin/activate 2>/dev/null || true
|
||||
|
||||
# Charger le token API depuis .env.local si present
|
||||
if [ -f .env.local ]; then
|
||||
export $(grep RPA_API_TOKEN .env.local 2>/dev/null | xargs)
|
||||
fi
|
||||
|
||||
python tools/session_cleaner.py "$@"
|
||||
1263
tools/session_cleaner.py
Normal file
1263
tools/session_cleaner.py
Normal file
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
@@ -1876,7 +1876,7 @@ def load_system_config():
|
||||
"version": "1.0.0",
|
||||
"services": {},
|
||||
"llm": {"provider": "ollama", "base_url": "http://localhost:11434", "model": "qwen2.5:7b"},
|
||||
"vlm": {"provider": "ollama", "base_url": "http://localhost:11434", "model": "qwen2.5vl:7b"},
|
||||
"vlm": {"provider": "ollama", "base_url": "http://localhost:11434", "model": "gemma4:e4b"},
|
||||
"detection": {"owl_model": "google/owlv2-base-patch16-ensemble", "confidence_threshold": 0.3},
|
||||
"database": {"type": "sqlite", "path": "data/training/workflows.db"},
|
||||
"security": {"enable_encryption": True, "require_authentication": False}
|
||||
@@ -2371,6 +2371,93 @@ def proxy_streaming(endpoint):
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Chat conversationnel — Léa
|
||||
# =============================================================================
|
||||
|
||||
CHAT_BASE_URL = 'http://localhost:5005/api/v1/chat'
|
||||
|
||||
|
||||
@app.route('/chat')
|
||||
def chat_page():
|
||||
"""Page de chat conversationnel avec Léa."""
|
||||
return render_template('chat.html')
|
||||
|
||||
|
||||
@app.route('/api/chat/session', methods=['POST'])
|
||||
def proxy_chat_session():
|
||||
"""Proxy : créer une session de chat côté serveur streaming."""
|
||||
return _proxy_chat(
|
||||
method='POST',
|
||||
path='/session',
|
||||
payload=request.get_json(silent=True) or {},
|
||||
)
|
||||
|
||||
|
||||
@app.route('/api/chat/<session_id>/message', methods=['POST'])
|
||||
def proxy_chat_message(session_id):
|
||||
"""Proxy : envoyer un message dans une session."""
|
||||
return _proxy_chat(
|
||||
method='POST',
|
||||
path=f'/{session_id}/message',
|
||||
payload=request.get_json(silent=True) or {},
|
||||
)
|
||||
|
||||
|
||||
@app.route('/api/chat/<session_id>/history', methods=['GET'])
|
||||
def proxy_chat_history(session_id):
|
||||
"""Proxy : récupérer l'historique."""
|
||||
return _proxy_chat(method='GET', path=f'/{session_id}/history')
|
||||
|
||||
|
||||
@app.route('/api/chat/<session_id>/confirm', methods=['POST'])
|
||||
def proxy_chat_confirm(session_id):
|
||||
"""Proxy : confirmer l'exécution d'un plan."""
|
||||
return _proxy_chat(
|
||||
method='POST',
|
||||
path=f'/{session_id}/confirm',
|
||||
payload=request.get_json(silent=True) or {},
|
||||
)
|
||||
|
||||
|
||||
def _proxy_chat(method, path, payload=None):
|
||||
"""Helper pour proxyfier les requêtes vers le serveur streaming (:5005)."""
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
|
||||
url = f'{CHAT_BASE_URL}{path}'
|
||||
headers = {
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
# Token Bearer (lu depuis l'env — même token que le serveur streaming)
|
||||
token = os.environ.get('RPA_API_TOKEN', '')
|
||||
if token:
|
||||
headers['Authorization'] = f'Bearer {token}'
|
||||
|
||||
try:
|
||||
data_bytes = None
|
||||
if payload is not None and method != 'GET':
|
||||
data_bytes = json.dumps(payload).encode('utf-8')
|
||||
req = urllib.request.Request(url, data=data_bytes, headers=headers, method=method)
|
||||
with urllib.request.urlopen(req, timeout=15) as response:
|
||||
body = response.read().decode('utf-8')
|
||||
try:
|
||||
return jsonify(json.loads(body))
|
||||
except json.JSONDecodeError:
|
||||
return body, response.status, {'Content-Type': 'application/json'}
|
||||
except urllib.error.HTTPError as e:
|
||||
try:
|
||||
detail = json.loads(e.read().decode('utf-8'))
|
||||
except Exception:
|
||||
detail = {'error': str(e)}
|
||||
return jsonify(detail), e.code
|
||||
except urllib.error.URLError as e:
|
||||
return jsonify({'error': f'Serveur chat inaccessible : {e}'}), 502
|
||||
except Exception as e:
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Main
|
||||
# =============================================================================
|
||||
|
||||
240
web_dashboard/static/js/chat.js
Normal file
240
web_dashboard/static/js/chat.js
Normal file
@@ -0,0 +1,240 @@
|
||||
// chat.js — Client Léa conversationnelle
|
||||
// Logique minimaliste : pas de framework, fetch + polling.
|
||||
|
||||
const API_BASE = "/api/chat"; // Proxyfié par le dashboard Flask vers :5005
|
||||
|
||||
let sessionId = null;
|
||||
let pollTimer = null;
|
||||
let lastMessageCount = 0;
|
||||
let currentState = "idle";
|
||||
|
||||
const STATE_LABELS = {
|
||||
idle: "En attente",
|
||||
planning: "Léa réfléchit…",
|
||||
awaiting_confirmation: "En attente de confirmation",
|
||||
executing: "Léa exécute le workflow…",
|
||||
done: "Terminé",
|
||||
error: "Erreur",
|
||||
};
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Initialisation
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
async function initChat() {
|
||||
try {
|
||||
const resp = await fetch(`${API_BASE}/session`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ machine_id: "default" }),
|
||||
});
|
||||
if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
|
||||
const data = await resp.json();
|
||||
sessionId = data.session_id;
|
||||
currentState = data.state || "idle";
|
||||
updateStatus(currentState);
|
||||
renderMessages(data.history || []);
|
||||
document.getElementById("sessionInfo").textContent = `Session ${sessionId}`;
|
||||
startPolling();
|
||||
} catch (err) {
|
||||
console.error("Impossible de créer la session chat :", err);
|
||||
showSystemMessage(`Impossible de créer la session chat : ${err.message}. Vérifiez que le serveur streaming (5005) est démarré.`);
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Envoi de messages
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
async function sendMessage() {
|
||||
const input = document.getElementById("composerInput");
|
||||
const text = (input.value || "").trim();
|
||||
if (!text || !sessionId) return;
|
||||
|
||||
const sendBtn = document.getElementById("sendBtn");
|
||||
sendBtn.disabled = true;
|
||||
input.value = "";
|
||||
autosizeTextarea();
|
||||
|
||||
// Affichage optimiste
|
||||
appendMessage({
|
||||
role: "user",
|
||||
content: text,
|
||||
timestamp: Date.now() / 1000,
|
||||
});
|
||||
|
||||
try {
|
||||
updateStatus("planning");
|
||||
const resp = await fetch(`${API_BASE}/${sessionId}/message`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ message: text }),
|
||||
});
|
||||
if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
|
||||
const data = await resp.json();
|
||||
currentState = data.state || "idle";
|
||||
updateStatus(currentState);
|
||||
renderMessages(data.history || []);
|
||||
} catch (err) {
|
||||
console.error("Erreur envoi message :", err);
|
||||
showSystemMessage(`Erreur : ${err.message}`);
|
||||
updateStatus("error");
|
||||
} finally {
|
||||
sendBtn.disabled = false;
|
||||
input.focus();
|
||||
}
|
||||
}
|
||||
|
||||
async function confirmPlan(confirmed) {
|
||||
if (!sessionId) return;
|
||||
const confirmBar = document.getElementById("confirmBar");
|
||||
confirmBar.classList.remove("visible");
|
||||
|
||||
try {
|
||||
const resp = await fetch(`${API_BASE}/${sessionId}/confirm`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ confirmed }),
|
||||
});
|
||||
if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
|
||||
const data = await resp.json();
|
||||
currentState = data.state || "idle";
|
||||
updateStatus(currentState);
|
||||
renderMessages(data.history || []);
|
||||
} catch (err) {
|
||||
console.error("Erreur confirmation :", err);
|
||||
showSystemMessage(`Erreur confirmation : ${err.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Polling
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
function startPolling() {
|
||||
if (pollTimer) clearInterval(pollTimer);
|
||||
pollTimer = setInterval(pollHistory, 2000);
|
||||
}
|
||||
|
||||
async function pollHistory() {
|
||||
if (!sessionId) return;
|
||||
try {
|
||||
const resp = await fetch(`${API_BASE}/${sessionId}/history`);
|
||||
if (!resp.ok) return;
|
||||
const data = await resp.json();
|
||||
const snap = data.snapshot || {};
|
||||
currentState = snap.state || "idle";
|
||||
updateStatus(currentState, snap.progress || {});
|
||||
const messages = snap.messages || [];
|
||||
if (messages.length !== lastMessageCount) {
|
||||
renderMessages(messages);
|
||||
}
|
||||
} catch (err) {
|
||||
// Silencieux — on réessayera au prochain tick
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Rendu
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
function renderMessages(messages) {
|
||||
const container = document.getElementById("messages");
|
||||
container.innerHTML = "";
|
||||
messages.forEach(msg => appendMessage(msg, false));
|
||||
lastMessageCount = messages.length;
|
||||
container.scrollTop = container.scrollHeight;
|
||||
|
||||
// Afficher/masquer la barre de confirmation
|
||||
const confirmBar = document.getElementById("confirmBar");
|
||||
if (currentState === "awaiting_confirmation") {
|
||||
confirmBar.classList.add("visible");
|
||||
} else {
|
||||
confirmBar.classList.remove("visible");
|
||||
}
|
||||
}
|
||||
|
||||
function appendMessage(msg, autoscroll = true) {
|
||||
const container = document.getElementById("messages");
|
||||
const div = document.createElement("div");
|
||||
div.className = `message ${msg.role}`;
|
||||
|
||||
const avatar = document.createElement("div");
|
||||
avatar.className = "avatar";
|
||||
if (msg.role === "user") avatar.textContent = "Vous";
|
||||
else if (msg.role === "lea") avatar.textContent = "L";
|
||||
else avatar.textContent = "i";
|
||||
|
||||
const bubbleWrap = document.createElement("div");
|
||||
const bubble = document.createElement("div");
|
||||
bubble.className = "bubble";
|
||||
bubble.textContent = msg.content || "";
|
||||
bubbleWrap.appendChild(bubble);
|
||||
|
||||
const ts = document.createElement("div");
|
||||
ts.className = "timestamp";
|
||||
try {
|
||||
const d = new Date((msg.timestamp || 0) * 1000);
|
||||
ts.textContent = d.toLocaleTimeString("fr-FR");
|
||||
} catch (e) { ts.textContent = ""; }
|
||||
bubbleWrap.appendChild(ts);
|
||||
|
||||
div.appendChild(avatar);
|
||||
div.appendChild(bubbleWrap);
|
||||
container.appendChild(div);
|
||||
|
||||
if (autoscroll) container.scrollTop = container.scrollHeight;
|
||||
}
|
||||
|
||||
function showSystemMessage(text) {
|
||||
appendMessage({
|
||||
role: "system",
|
||||
content: text,
|
||||
timestamp: Date.now() / 1000,
|
||||
});
|
||||
}
|
||||
|
||||
function updateStatus(state, progress = {}) {
|
||||
const dot = document.getElementById("statusDot");
|
||||
const txt = document.getElementById("statusText");
|
||||
dot.className = `status-dot ${state}`;
|
||||
let label = STATE_LABELS[state] || state;
|
||||
|
||||
if (state === "executing" && progress && progress.total_actions) {
|
||||
const done = progress.completed_actions || 0;
|
||||
const total = progress.total_actions || 0;
|
||||
label = `Léa exécute… ${done}/${total}`;
|
||||
}
|
||||
|
||||
txt.textContent = label;
|
||||
|
||||
// Bloquer la saisie pendant planning/executing
|
||||
const input = document.getElementById("composerInput");
|
||||
const sendBtn = document.getElementById("sendBtn");
|
||||
const blocked = (state === "planning" || state === "executing");
|
||||
input.disabled = blocked;
|
||||
sendBtn.disabled = blocked;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// UX composer
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
function handleKeydown(event) {
|
||||
if (event.key === "Enter" && !event.shiftKey) {
|
||||
event.preventDefault();
|
||||
sendMessage();
|
||||
}
|
||||
}
|
||||
|
||||
function autosizeTextarea() {
|
||||
const input = document.getElementById("composerInput");
|
||||
input.style.height = "auto";
|
||||
input.style.height = Math.min(input.scrollHeight, 120) + "px";
|
||||
}
|
||||
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
const input = document.getElementById("composerInput");
|
||||
input.addEventListener("input", autosizeTextarea);
|
||||
initChat();
|
||||
});
|
||||
309
web_dashboard/templates/chat.html
Normal file
309
web_dashboard/templates/chat.html
Normal file
@@ -0,0 +1,309 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="fr">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Léa — Chat RPA Vision V3</title>
|
||||
<style>
|
||||
* { margin: 0; padding: 0; box-sizing: border-box; }
|
||||
body {
|
||||
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
||||
background: #0f172a;
|
||||
color: #e2e8f0;
|
||||
min-height: 100vh;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
}
|
||||
.header {
|
||||
background: linear-gradient(135deg, #3b82f6 0%, #8b5cf6 100%);
|
||||
color: white;
|
||||
padding: 16px 24px;
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
box-shadow: 0 2px 8px rgba(0,0,0,0.3);
|
||||
}
|
||||
.header h1 {
|
||||
font-size: 20px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 10px;
|
||||
}
|
||||
.header .home-link {
|
||||
color: rgba(255,255,255,0.85);
|
||||
text-decoration: none;
|
||||
font-size: 13px;
|
||||
padding: 6px 14px;
|
||||
border-radius: 6px;
|
||||
background: rgba(255,255,255,0.1);
|
||||
transition: background 0.2s;
|
||||
}
|
||||
.header .home-link:hover { background: rgba(255,255,255,0.2); }
|
||||
|
||||
.chat-wrapper {
|
||||
flex: 1;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
max-width: 900px;
|
||||
width: 100%;
|
||||
margin: 0 auto;
|
||||
padding: 20px;
|
||||
gap: 16px;
|
||||
}
|
||||
|
||||
.status-bar {
|
||||
background: #1e293b;
|
||||
border: 1px solid #334155;
|
||||
border-radius: 12px;
|
||||
padding: 12px 18px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: 12px;
|
||||
font-size: 14px;
|
||||
}
|
||||
.status-bar .status-label {
|
||||
color: #94a3b8;
|
||||
}
|
||||
.status-bar .status-value {
|
||||
font-weight: 600;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
}
|
||||
.status-dot {
|
||||
width: 10px;
|
||||
height: 10px;
|
||||
border-radius: 50%;
|
||||
background: #64748b;
|
||||
}
|
||||
.status-dot.idle { background: #64748b; }
|
||||
.status-dot.planning { background: #f59e0b; animation: pulse 1.2s infinite; }
|
||||
.status-dot.awaiting_confirmation { background: #3b82f6; animation: pulse 1.8s infinite; }
|
||||
.status-dot.executing { background: #22c55e; animation: pulse 1s infinite; }
|
||||
.status-dot.done { background: #22c55e; }
|
||||
.status-dot.error { background: #ef4444; }
|
||||
@keyframes pulse {
|
||||
0%, 100% { opacity: 1; transform: scale(1); }
|
||||
50% { opacity: 0.5; transform: scale(1.15); }
|
||||
}
|
||||
|
||||
.messages {
|
||||
flex: 1;
|
||||
background: #1e293b;
|
||||
border: 1px solid #334155;
|
||||
border-radius: 12px;
|
||||
padding: 20px;
|
||||
overflow-y: auto;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 14px;
|
||||
min-height: 400px;
|
||||
max-height: calc(100vh - 320px);
|
||||
}
|
||||
.messages::-webkit-scrollbar { width: 8px; }
|
||||
.messages::-webkit-scrollbar-thumb { background: #334155; border-radius: 4px; }
|
||||
.messages::-webkit-scrollbar-track { background: transparent; }
|
||||
|
||||
.message {
|
||||
display: flex;
|
||||
gap: 12px;
|
||||
max-width: 85%;
|
||||
animation: fadeIn 0.25s ease-out;
|
||||
}
|
||||
@keyframes fadeIn {
|
||||
from { opacity: 0; transform: translateY(6px); }
|
||||
to { opacity: 1; transform: translateY(0); }
|
||||
}
|
||||
.message.user {
|
||||
align-self: flex-end;
|
||||
flex-direction: row-reverse;
|
||||
}
|
||||
.message .avatar {
|
||||
width: 34px;
|
||||
height: 34px;
|
||||
border-radius: 50%;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
font-size: 16px;
|
||||
flex-shrink: 0;
|
||||
font-weight: 600;
|
||||
}
|
||||
.message.user .avatar {
|
||||
background: #3b82f6;
|
||||
color: white;
|
||||
}
|
||||
.message.lea .avatar {
|
||||
background: linear-gradient(135deg, #8b5cf6, #ec4899);
|
||||
color: white;
|
||||
}
|
||||
.message.system .avatar {
|
||||
background: #475569;
|
||||
color: #cbd5e1;
|
||||
}
|
||||
.message .bubble {
|
||||
background: #334155;
|
||||
padding: 12px 16px;
|
||||
border-radius: 14px;
|
||||
line-height: 1.5;
|
||||
font-size: 14px;
|
||||
white-space: pre-wrap;
|
||||
word-wrap: break-word;
|
||||
box-shadow: 0 1px 3px rgba(0,0,0,0.2);
|
||||
}
|
||||
.message.user .bubble {
|
||||
background: #1d4ed8;
|
||||
color: white;
|
||||
}
|
||||
.message.lea .bubble {
|
||||
background: #334155;
|
||||
}
|
||||
.message.system .bubble {
|
||||
background: transparent;
|
||||
border: 1px dashed #475569;
|
||||
color: #94a3b8;
|
||||
font-style: italic;
|
||||
}
|
||||
.message .timestamp {
|
||||
font-size: 11px;
|
||||
color: #64748b;
|
||||
margin-top: 4px;
|
||||
}
|
||||
|
||||
.confirm-bar {
|
||||
background: #1e293b;
|
||||
border: 1px solid #3b82f6;
|
||||
border-radius: 12px;
|
||||
padding: 14px 18px;
|
||||
display: none;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: 12px;
|
||||
}
|
||||
.confirm-bar.visible { display: flex; }
|
||||
.confirm-bar .label {
|
||||
font-size: 14px;
|
||||
color: #93c5fd;
|
||||
font-weight: 500;
|
||||
}
|
||||
.confirm-bar .actions { display: flex; gap: 10px; }
|
||||
.btn {
|
||||
padding: 9px 20px;
|
||||
border-radius: 8px;
|
||||
border: none;
|
||||
font-size: 14px;
|
||||
font-weight: 600;
|
||||
cursor: pointer;
|
||||
transition: all 0.15s;
|
||||
}
|
||||
.btn-confirm {
|
||||
background: #22c55e;
|
||||
color: white;
|
||||
}
|
||||
.btn-confirm:hover { background: #16a34a; }
|
||||
.btn-cancel {
|
||||
background: #475569;
|
||||
color: #e2e8f0;
|
||||
}
|
||||
.btn-cancel:hover { background: #64748b; }
|
||||
.btn:disabled {
|
||||
opacity: 0.5;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
.composer {
|
||||
background: #1e293b;
|
||||
border: 1px solid #334155;
|
||||
border-radius: 12px;
|
||||
padding: 14px;
|
||||
display: flex;
|
||||
gap: 10px;
|
||||
align-items: flex-end;
|
||||
}
|
||||
.composer textarea {
|
||||
flex: 1;
|
||||
background: #0f172a;
|
||||
border: 1px solid #334155;
|
||||
border-radius: 8px;
|
||||
padding: 10px 14px;
|
||||
color: #e2e8f0;
|
||||
font-family: inherit;
|
||||
font-size: 14px;
|
||||
resize: none;
|
||||
min-height: 42px;
|
||||
max-height: 120px;
|
||||
line-height: 1.5;
|
||||
}
|
||||
.composer textarea:focus {
|
||||
outline: none;
|
||||
border-color: #3b82f6;
|
||||
}
|
||||
.composer textarea:disabled {
|
||||
opacity: 0.6;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
.composer .btn-send {
|
||||
background: #3b82f6;
|
||||
color: white;
|
||||
padding: 10px 22px;
|
||||
}
|
||||
.composer .btn-send:hover { background: #2563eb; }
|
||||
|
||||
.progress-bar {
|
||||
margin-top: 8px;
|
||||
height: 6px;
|
||||
background: #0f172a;
|
||||
border-radius: 3px;
|
||||
overflow: hidden;
|
||||
}
|
||||
.progress-bar .fill {
|
||||
height: 100%;
|
||||
background: linear-gradient(90deg, #3b82f6, #22c55e);
|
||||
width: 0%;
|
||||
transition: width 0.4s ease;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="header">
|
||||
<h1>Léa — Assistant RPA Vision V3</h1>
|
||||
<a href="/" class="home-link">Retour au dashboard</a>
|
||||
</div>
|
||||
|
||||
<div class="chat-wrapper">
|
||||
<div class="status-bar">
|
||||
<div>
|
||||
<span class="status-label">État :</span>
|
||||
<span class="status-value">
|
||||
<span class="status-dot idle" id="statusDot"></span>
|
||||
<span id="statusText">En attente</span>
|
||||
</span>
|
||||
</div>
|
||||
<div id="sessionInfo" style="color:#64748b;font-size:12px;">Aucune session</div>
|
||||
</div>
|
||||
|
||||
<div class="messages" id="messages"></div>
|
||||
|
||||
<div class="confirm-bar" id="confirmBar">
|
||||
<div class="label">Léa propose un plan. Confirmer l'exécution ?</div>
|
||||
<div class="actions">
|
||||
<button class="btn btn-cancel" onclick="confirmPlan(false)">Non, annuler</button>
|
||||
<button class="btn btn-confirm" onclick="confirmPlan(true)">Oui, y aller</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="composer">
|
||||
<textarea
|
||||
id="composerInput"
|
||||
placeholder="Dites à Léa ce que vous voulez faire (ex. « Ouvre le Bloc-notes et écris bonjour »)…"
|
||||
rows="1"
|
||||
onkeydown="handleKeydown(event)"
|
||||
></textarea>
|
||||
<button class="btn btn-send" id="sendBtn" onclick="sendMessage()">Envoyer</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script src="/static/js/chat.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
Reference in New Issue
Block a user