Compare commits
352 Commits
ef5d595d98
...
sp1/anchor
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2cabc6cb7e | ||
|
|
d686c3ac22 | ||
|
|
e212f4141c | ||
|
|
33ddb51c3c | ||
|
|
1d6efdb1b7 | ||
|
|
cf81ce4c7b | ||
|
|
ec1fb81054 | ||
|
|
6d5ef51c60 | ||
|
|
d0c794d923 | ||
|
|
9605cc9d95 | ||
|
|
667575c3ad | ||
|
|
787dbfb0eb | ||
|
|
86b5ec18c6 | ||
|
|
b8b963059e | ||
|
|
2b1743c206 | ||
|
|
48879fb849 | ||
|
|
c12fd8e1c1 | ||
|
|
cbd3d40e39 | ||
|
|
33c1e2e0d1 | ||
|
|
c0e4c382be | ||
|
|
5c5ce747b0 | ||
|
|
b20d17882e | ||
|
|
9fb2c7bfee | ||
|
|
f7f6926410 | ||
|
|
09f65cecbe | ||
|
|
0ee54157e5 | ||
|
|
6d34b3cb68 | ||
|
|
f18de016d7 | ||
|
|
549ea0631b | ||
|
|
0e215da842 | ||
|
|
d00fe7b00b | ||
|
|
5b2afa3629 | ||
|
|
0f122a512f | ||
|
|
806cc04b82 | ||
|
|
4dc7d840d6 | ||
|
|
4e7c2a7628 | ||
|
|
3697e3ba0e | ||
|
|
5289f3de48 | ||
|
|
4b3d5ce0d7 | ||
|
|
9b8bdfdbbe | ||
|
|
f2e9aac6b7 | ||
|
|
18ed6cb751 | ||
|
|
d38f0b0f2f | ||
|
|
86b3c8f7e7 | ||
|
|
7a1a5cb6fd | ||
|
|
2dd306724c | ||
|
|
335d576830 | ||
|
|
1a58a0d1f1 | ||
|
|
eb2df539f1 | ||
|
|
c9f848273b | ||
|
|
45ec5fe969 | ||
|
|
8b6c397531 | ||
|
|
6a300a4298 | ||
|
|
0587036c17 | ||
|
|
f2a9e40502 | ||
|
|
34527b5cc5 | ||
|
|
bd3aaf7d64 | ||
|
|
05a30f2d1d | ||
|
|
47377226f2 | ||
|
|
d515b22d1b | ||
|
|
aba849324a | ||
|
|
7ad260d02f | ||
|
|
794a248dae | ||
|
|
8332b2cd37 | ||
|
|
9a45e61e2a | ||
|
|
e66bc6d452 | ||
|
|
7b1f30af1a | ||
|
|
488d14240a | ||
|
|
45b6da5e3f | ||
|
|
02211fddf2 | ||
|
|
ed36bc2b37 | ||
|
|
9677738f32 | ||
|
|
d422aa119c | ||
|
|
7b943926db | ||
|
|
99f89317cb | ||
|
|
6b8114eb97 | ||
|
|
7ef98d8089 | ||
|
|
8ea4ed0ad2 | ||
|
|
a49f59b4d6 | ||
|
|
762e75a077 | ||
|
|
c1a144c673 | ||
|
|
e8a0fb0e42 | ||
|
|
4ba426c205 | ||
|
|
7bb8d543ab | ||
|
|
debd7b423c | ||
|
|
6544ebe3f0 | ||
|
|
10136f0ee0 | ||
|
|
054279feb4 | ||
|
|
ea1f57afb1 | ||
|
|
345762330b | ||
|
|
b1b32187ba | ||
|
|
ad24d16d83 | ||
|
|
a76f3db682 | ||
|
|
9a029a221d | ||
|
|
5ed1810ef3 | ||
|
|
c9878f0a76 | ||
|
|
08701761e6 | ||
|
|
a13d6d0052 | ||
|
|
84d2d4a667 | ||
|
|
1b4e64960b | ||
|
|
bd100bc538 | ||
|
|
1647e42d32 | ||
|
|
7df51d2c79 | ||
|
|
5ea4960e65 | ||
|
|
f2212e77e3 | ||
|
|
9872f4510c | ||
|
|
2eeaa806bb | ||
|
|
df5ad59330 | ||
|
|
bfbf0f9c3e | ||
|
|
ecc5a233a7 | ||
|
|
293e54b4e6 | ||
|
|
0d7bcd18ac | ||
|
|
4df1ba5779 | ||
|
|
e9702b4df9 | ||
|
|
e0b47e4518 | ||
|
|
5dc20cc85b | ||
|
|
88ed103de5 | ||
|
|
194853cebb | ||
|
|
626823d327 | ||
|
|
2e76b44ff3 | ||
|
|
731b5bcae2 | ||
|
|
8648e375fe | ||
|
|
56e869c467 | ||
|
|
f8dc3c3af4 | ||
|
|
ca81850a20 | ||
|
|
35fd6cf4c5 | ||
|
|
7847a0e829 | ||
|
|
40440f1ca0 | ||
|
|
7233df2bb9 | ||
|
|
f62fda575f | ||
|
|
22c0a2ba61 | ||
|
|
6fdedbfe9d | ||
|
|
c969f93a23 | ||
|
|
1cbec2806e | ||
|
|
864530c851 | ||
|
|
d1ebf62217 | ||
|
|
87dbe8c5ff | ||
|
|
0a02a6ec9c | ||
|
|
83be93e121 | ||
|
|
f5c33477f0 | ||
|
|
b1a3aa16f1 | ||
|
|
0bcfddbbc4 | ||
|
|
aa47172f0f | ||
|
|
65da557310 | ||
|
|
af13cd80ff | ||
|
|
7c6945171e | ||
|
|
ca0b436a61 | ||
|
|
fc01afa59c | ||
|
|
2a51a844b9 | ||
|
|
2d71e2a249 | ||
|
|
fae95c5366 | ||
|
|
6582a69d31 | ||
|
|
5543e25f9d | ||
|
|
2a07d8084b | ||
|
|
35b27ae492 | ||
|
|
b584bbabc3 | ||
|
|
8817f527e7 | ||
|
|
964856ab30 | ||
|
|
a67d896104 | ||
|
|
90c1d8036f | ||
|
|
6261002039 | ||
|
|
0e6e61f2b1 | ||
|
|
41c1250c99 | ||
|
|
2af3bc3b93 | ||
|
|
6154423a91 | ||
|
|
41eba898c0 | ||
|
|
9452e86fd1 | ||
|
|
5e31cdf666 | ||
|
|
487bcb8618 | ||
|
|
3d6868f029 | ||
|
|
f73a2a59a9 | ||
|
|
77faa03ec9 | ||
|
|
343d6fbe95 | ||
|
|
cc64439738 | ||
|
|
90007cc7c1 | ||
|
|
73cea2385e | ||
|
|
e2046837cf | ||
|
|
b30d4b6656 | ||
|
|
e4a48e78bf | ||
|
|
ea36bba5cc | ||
|
|
9da589c8c2 | ||
|
|
16ff396dbf | ||
|
|
e44fd7b328 | ||
|
|
66815b7a1a | ||
|
|
c6b695eca8 | ||
|
|
99d2083dea | ||
|
|
a718086140 | ||
|
|
c82979e72b | ||
|
|
2185c41cc1 | ||
|
|
26804eb123 | ||
|
|
d71d5df4a8 | ||
|
|
6829ad8e79 | ||
|
|
8903f35433 | ||
|
|
4ab2c15e5c | ||
|
|
eba6fea779 | ||
|
|
f04398d5a7 | ||
|
|
4ce9c47f45 | ||
|
|
9dfcdb5fb0 | ||
|
|
3efe15d2c7 | ||
|
|
9d87ed64c5 | ||
|
|
00134963e5 | ||
|
|
0ec5e2a25b | ||
|
|
0c5fffe951 | ||
|
|
5027ed9a23 | ||
|
|
6caab2c600 | ||
|
|
552e66dbf6 | ||
|
|
de1026ee2e | ||
|
|
7b50725bf8 | ||
|
|
7feef3b6a9 | ||
|
|
0b06db222d | ||
|
|
74ee0dadee | ||
|
|
0b452f975a | ||
|
|
6ab385d671 | ||
|
|
b3eab83a0f | ||
|
|
27490849a8 | ||
|
|
cebbf0809a | ||
|
|
3e227d28ad | ||
|
|
8ce63fcba2 | ||
|
|
4202431421 | ||
|
|
4923623dd4 | ||
|
|
84181cc982 | ||
|
|
7355d315a3 | ||
|
|
c50adab3a1 | ||
|
|
2fbb305f65 | ||
|
|
ff581be397 | ||
|
|
203e5cc6c1 | ||
|
|
d1b556b6cd | ||
|
|
729cd67743 | ||
|
|
73ddcdb29d | ||
|
|
14a9442343 | ||
|
|
5da4581e76 | ||
|
|
cbe8dc95d2 | ||
|
|
04a14a56b2 | ||
|
|
2290f1846b | ||
|
|
c57b40ae1d | ||
|
|
bc21b27da7 | ||
|
|
6a2248ddcd | ||
|
|
82d7b38cff | ||
|
|
6c7f88c05d | ||
|
|
447fbb2c6e | ||
|
|
623be15bfe | ||
|
|
55d5aebbd2 | ||
|
|
73b731fef8 | ||
|
|
ffd97ae9a5 | ||
|
|
d168833609 | ||
|
|
23a06a744c | ||
|
|
af4eae28b9 | ||
|
|
c198c930a1 | ||
|
|
e3efef2fe7 | ||
|
|
95fddeebb3 | ||
|
|
71523cebd3 | ||
|
|
3aa806a630 | ||
|
|
588c8f22c1 | ||
|
|
3d243d731d | ||
|
|
2431a6c9e9 | ||
|
|
969236da03 | ||
|
|
f30461b88c | ||
|
|
f34eca20f9 | ||
|
|
309dfd5287 | ||
|
|
f5a672d7b9 | ||
|
|
1acea85fa6 | ||
|
|
4f61741420 | ||
|
|
2fa864b5c7 | ||
|
|
10739c33fa | ||
|
|
39bea1b042 | ||
|
|
26b4e6d8ce | ||
|
|
4fb84b1090 | ||
|
|
7f2bc6fe97 | ||
|
|
eded968c70 | ||
|
|
53d29d9b24 | ||
|
|
690053bd57 | ||
|
|
c7b0649716 | ||
|
|
2bfcfa4535 | ||
|
|
b808e48b1f | ||
|
|
78ee962918 | ||
|
|
c8a3618e27 | ||
|
|
9ca277a63f | ||
|
|
8c7b6e5696 | ||
|
|
af4ffa189a | ||
|
|
42f571d496 | ||
|
|
36737cfe9d | ||
|
|
93ef93e563 | ||
|
|
376e4a88b3 | ||
|
|
bb4ed2a75d | ||
|
|
f7b8cddd2b | ||
|
|
a9a99953dd | ||
|
|
aee64f54b1 | ||
|
|
c77844fa9a | ||
|
|
013fe071a2 | ||
|
|
203dc00d53 | ||
|
|
e9a028134a | ||
|
|
01bba7bc6c | ||
|
|
d5285de99c | ||
|
|
33c198b827 | ||
|
|
816b37af98 | ||
|
|
d82aad984f | ||
|
|
057c37131f | ||
|
|
9bcce3fc68 | ||
|
|
f96f6322ec | ||
|
|
02ee2d7b5b | ||
|
|
47993e2ee9 | ||
|
|
7cc03f6f10 | ||
|
|
a21f1ea9fa | ||
|
|
9188bd7df1 | ||
|
|
f82753debe | ||
|
|
b92cb9db03 | ||
|
|
e66629ce1a | ||
|
|
cecdf417b7 | ||
|
|
56e3cc052a | ||
|
|
332366b58c | ||
|
|
ac9c207474 | ||
|
|
f85d56ac05 | ||
|
|
172167f6c0 | ||
|
|
42d49dd8bd | ||
|
|
f541bb8ce4 | ||
|
|
a6eb4c168f | ||
|
|
f6ad5ff2b2 | ||
|
|
2ac781343a | ||
|
|
bffcfb2db3 | ||
|
|
cc673755f7 | ||
|
|
4509038bf0 | ||
|
|
99041f0117 | ||
|
|
72a9651b94 | ||
|
|
8589e87a13 | ||
|
|
8a1dfc6e8b | ||
|
|
3bcf59e16f | ||
|
|
46206d9396 | ||
|
|
d3e928bebe | ||
|
|
a679fbb62b | ||
|
|
f0b311306d | ||
|
|
1c5ff42006 | ||
|
|
b09a3df054 | ||
|
|
fceb76de1f | ||
|
|
6d4ff4f215 | ||
|
|
2486e43def | ||
|
|
20b74286f7 | ||
|
|
a1c97504ab | ||
|
|
d6c7346898 | ||
|
|
90ee8ca8f4 | ||
|
|
84a91630e9 | ||
|
|
91614fbff0 | ||
|
|
c1ce6a3964 | ||
|
|
0bd0fbb8c5 | ||
|
|
394342be7e | ||
|
|
6724f43950 | ||
|
|
d99b17394a | ||
|
|
875367dea9 | ||
|
|
a74056ca22 | ||
|
|
6937b94f2a | ||
|
|
4f5c518d3a | ||
|
|
7dec3ab63a | ||
|
|
68d5bb7dd1 |
12
.env.example
12
.env.example
@@ -30,7 +30,9 @@ DASHBOARD_PORT=5001
|
||||
CLIP_MODEL=ViT-B-32
|
||||
CLIP_PRETRAINED=openai
|
||||
CLIP_DEVICE=cpu # cpu or cuda
|
||||
VLM_MODEL=qwen3-vl:8b
|
||||
RPA_VLM_MODEL=gemma4:latest # gemma4:latest (défaut), qwen3-vl:8b, ui-tars (fallback)
|
||||
VLM_MODEL=gemma4:latest # alias de compatibilité
|
||||
# VLM_ALLOW_CLOUD=false # true pour activer les APIs cloud en fallback (OpenAI, Gemini, Anthropic)
|
||||
VLM_ENDPOINT=http://localhost:11434
|
||||
OWL_MODEL=google/owlv2-base-patch16-ensemble
|
||||
OWL_CONFIDENCE_THRESHOLD=0.1
|
||||
@@ -44,6 +46,14 @@ LOGS_PATH=logs
|
||||
UPLOADS_PATH=data/training/uploads
|
||||
SESSIONS_PATH=data/training/sessions
|
||||
|
||||
# ============================================================================
|
||||
# Feedback Bus (Léa parle pendant exécution)
|
||||
# ============================================================================
|
||||
# Bus SocketIO unifié 'lea:*' (action_started, action_done, need_confirm, paused).
|
||||
# Désactivé par défaut. Mettre à 1 pour activer les bulles temps réel dans ChatWindow.
|
||||
# Si la connexion bus échoue, l'exécution continue normalement (fail-safe).
|
||||
LEA_FEEDBACK_BUS=0
|
||||
|
||||
# ============================================================================
|
||||
# FAISS
|
||||
# ============================================================================
|
||||
|
||||
207
.gitea/workflows/security-audit.yml
Normal file
207
.gitea/workflows/security-audit.yml
Normal file
@@ -0,0 +1,207 @@
|
||||
# ------------------------------------------------------------------
|
||||
# Audit sécurité — bandit + pip-audit + scan secrets
|
||||
# ------------------------------------------------------------------
|
||||
# Jamais bloquant : on reporte les warnings, on ne casse pas la CI.
|
||||
# Utile pour détecter les dérives progressives (nouveaux CVE, secrets
|
||||
# oubliés dans un commit, patterns risqués).
|
||||
#
|
||||
# Fréquence : à chaque push sur main + hebdo (cron).
|
||||
# ------------------------------------------------------------------
|
||||
name: security-audit
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
schedule:
|
||||
# Tous les lundis à 6h UTC (8h Paris hiver, 7h Paris été).
|
||||
- cron: "0 6 * * 1"
|
||||
workflow_dispatch: {}
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
# ----------------------------------------------------------------
|
||||
# Job 1 — bandit (bonnes pratiques sécu Python)
|
||||
# ----------------------------------------------------------------
|
||||
bandit:
|
||||
name: Bandit (scan statique)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
continue-on-error: true
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Python 3.12
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
cache: "pip"
|
||||
|
||||
- name: Installation bandit
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install "bandit[toml]==1.7.10"
|
||||
|
||||
- name: Scan bandit sur core/
|
||||
run: |
|
||||
# -ll : niveau LOW minimum (remonte tout)
|
||||
# -ii : confiance LOW minimum
|
||||
# --skip B101 : on ignore les asserts (usuels en tests/validation)
|
||||
bandit -r core/ \
|
||||
--skip B101,B404,B603 \
|
||||
--format txt \
|
||||
--exit-zero \
|
||||
--output bandit-report.txt
|
||||
echo "=== RAPPORT BANDIT ==="
|
||||
cat bandit-report.txt
|
||||
|
||||
- name: Upload rapport bandit
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: bandit-report
|
||||
path: bandit-report.txt
|
||||
retention-days: 30
|
||||
if-no-files-found: ignore
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Job 2 — pip-audit (CVE sur requirements)
|
||||
# ----------------------------------------------------------------
|
||||
pip-audit:
|
||||
name: pip-audit (CVE dépendances)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
continue-on-error: true
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Python 3.12
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
cache: "pip"
|
||||
|
||||
- name: Installation pip-audit
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install "pip-audit==2.7.3"
|
||||
|
||||
- name: Audit CVE sur requirements-ci.txt
|
||||
run: |
|
||||
if [ -f requirements-ci.txt ]; then
|
||||
pip-audit -r requirements-ci.txt \
|
||||
--format json \
|
||||
--output pip-audit-ci.json \
|
||||
--progress-spinner off \
|
||||
--disable-pip || echo "::warning::CVE détectées dans requirements-ci.txt"
|
||||
echo "=== RAPPORT pip-audit (CI) ==="
|
||||
cat pip-audit-ci.json || true
|
||||
else
|
||||
echo "::notice::requirements-ci.txt absent — skip"
|
||||
fi
|
||||
|
||||
- name: Audit CVE sur requirements.txt (best-effort)
|
||||
run: |
|
||||
# Timeout généreux car requirements.txt est massif (torch, CUDA).
|
||||
timeout 120 pip-audit -r requirements.txt \
|
||||
--format json \
|
||||
--output pip-audit-full.json \
|
||||
--progress-spinner off \
|
||||
--disable-pip 2>&1 | head -200 || \
|
||||
echo "::warning::pip-audit sur requirements.txt a timeout ou échoué (non bloquant)"
|
||||
|
||||
- name: Upload rapports pip-audit
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: pip-audit-reports
|
||||
path: |
|
||||
pip-audit-ci.json
|
||||
pip-audit-full.json
|
||||
retention-days: 30
|
||||
if-no-files-found: ignore
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Job 3 — Scan secrets en clair (grep simple)
|
||||
# ----------------------------------------------------------------
|
||||
# Patterns recherchés : clés API Anthropic (sk-ant-), OpenAI (sk-),
|
||||
# Google (AIzaSy), AWS (AKIA), tokens Hugging Face (hf_).
|
||||
# Ne cherche QUE dans les fichiers trackés (pas .env, pas .venv).
|
||||
# ----------------------------------------------------------------
|
||||
secrets-scan:
|
||||
name: Scan secrets (grep)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 3
|
||||
continue-on-error: true
|
||||
|
||||
steps:
|
||||
- name: Checkout (historique complet)
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Scan patterns de secrets
|
||||
run: |
|
||||
# Chemins exclus : venvs, caches, data, htmlcov, models.
|
||||
EXCLUDES='--exclude-dir=.venv --exclude-dir=venv_v3 --exclude-dir=.git \
|
||||
--exclude-dir=node_modules --exclude-dir=htmlcov --exclude-dir=models \
|
||||
--exclude-dir=data --exclude-dir=__pycache__ --exclude-dir=.pytest_cache \
|
||||
--exclude=*.lock --exclude=*.log --exclude=*.md'
|
||||
|
||||
echo "=== Recherche de secrets potentiels ==="
|
||||
FOUND=0
|
||||
|
||||
# Anthropic
|
||||
if grep -rnI $EXCLUDES -E 'sk-ant-[a-zA-Z0-9_-]{20,}' . 2>/dev/null; then
|
||||
echo "::warning::Clé Anthropic potentielle détectée"
|
||||
FOUND=1
|
||||
fi
|
||||
|
||||
# OpenAI
|
||||
if grep -rnI $EXCLUDES -E 'sk-proj-[a-zA-Z0-9_-]{20,}|sk-[a-zA-Z0-9]{40,}' . 2>/dev/null; then
|
||||
echo "::warning::Clé OpenAI potentielle détectée"
|
||||
FOUND=1
|
||||
fi
|
||||
|
||||
# Google Cloud / API Keys
|
||||
if grep -rnI $EXCLUDES -E 'AIzaSy[a-zA-Z0-9_-]{33}' . 2>/dev/null; then
|
||||
echo "::warning::Clé Google API potentielle détectée"
|
||||
FOUND=1
|
||||
fi
|
||||
|
||||
# AWS
|
||||
if grep -rnI $EXCLUDES -E 'AKIA[0-9A-Z]{16}' . 2>/dev/null; then
|
||||
echo "::warning::Clé AWS potentielle détectée"
|
||||
FOUND=1
|
||||
fi
|
||||
|
||||
# Hugging Face
|
||||
if grep -rnI $EXCLUDES -E 'hf_[a-zA-Z0-9]{30,}' . 2>/dev/null; then
|
||||
echo "::warning::Token Hugging Face potentiel détecté"
|
||||
FOUND=1
|
||||
fi
|
||||
|
||||
# Mots-clés suspects à côté d'assignations
|
||||
if grep -rnI $EXCLUDES -E '(password|passwd|secret|api_key|apikey|token)\s*=\s*["\x27][a-zA-Z0-9_\-!@#\$%]{12,}["\x27]' . 2>/dev/null \
|
||||
| grep -viE '(example|dummy|placeholder|test|fake|xxx|changeme|\$\{)' 2>/dev/null; then
|
||||
echo "::warning::Assignation suspecte d'un secret détectée"
|
||||
FOUND=1
|
||||
fi
|
||||
|
||||
if [ "$FOUND" -eq 0 ]; then
|
||||
echo "Aucun secret détecté par les patterns de base."
|
||||
else
|
||||
echo ""
|
||||
echo "::notice::Vérifier manuellement les occurrences ci-dessus."
|
||||
echo "::notice::Si faux positif : ajouter le fichier aux exclusions ou reformater."
|
||||
fi
|
||||
|
||||
# Toujours succès (job non bloquant).
|
||||
exit 0
|
||||
214
.gitea/workflows/tests.yml
Normal file
214
.gitea/workflows/tests.yml
Normal file
@@ -0,0 +1,214 @@
|
||||
# ------------------------------------------------------------------
|
||||
# CI principale — Tests unitaires + lint léger
|
||||
# ------------------------------------------------------------------
|
||||
# Déclenchement : push / pull_request sur n'importe quelle branche.
|
||||
# Objectif : feedback rapide (< 3 min) sans GPU ni Ollama.
|
||||
# Runner : self-hosted (label "ubuntu-latest" ou équivalent).
|
||||
#
|
||||
# Les tests marqués `slow`, `gpu`, `integration`, `performance`,
|
||||
# `visual` et `smoke` sont exclus volontairement — ils nécessitent
|
||||
# CUDA, Ollama, ou des captures d'écran réelles.
|
||||
# ------------------------------------------------------------------
|
||||
name: tests
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- "**"
|
||||
pull_request:
|
||||
branches:
|
||||
- "**"
|
||||
|
||||
# Permet à une nouvelle exécution d'annuler les précédentes
|
||||
# sur la même branche (évite l'engorgement du runner local).
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
# Empêche l'import accidentel de torch/CUDA pendant la CI.
|
||||
PYTHONDONTWRITEBYTECODE: "1"
|
||||
PIP_DISABLE_PIP_VERSION_CHECK: "1"
|
||||
PIP_NO_PYTHON_VERSION_WARNING: "1"
|
||||
# Les modules d'exécution lisent parfois ces vars ; valeurs neutres en CI.
|
||||
RPA_VISION_CI: "1"
|
||||
RPA_AUTH_VAULT_PATH: "/tmp/ci_vault.enc"
|
||||
# api_stream.py a un fail-closed P0-C : si RPA_API_TOKEN absent, sys.exit(1)
|
||||
# au module load. On fournit un token bidon pour que les imports passent en CI.
|
||||
# (Le token n'est jamais utilisé réellement — les tests mockent les requêtes.)
|
||||
RPA_API_TOKEN: "ci_test_token_not_used_for_real_auth_just_to_pass_import_check_0123456789"
|
||||
|
||||
jobs:
|
||||
# ----------------------------------------------------------------
|
||||
# Job 1 — Lint (ruff + black --check)
|
||||
# ----------------------------------------------------------------
|
||||
# Non-bloquant : si ruff/black ne sont pas installables, on log
|
||||
# un warning et on continue. L'objectif ici est d'alerter, pas de
|
||||
# casser la CI pour des espaces en trop.
|
||||
# ----------------------------------------------------------------
|
||||
lint:
|
||||
name: Lint (ruff + black)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
continue-on-error: true
|
||||
|
||||
steps:
|
||||
- name: Checkout du code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Python 3.12
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
cache: "pip"
|
||||
|
||||
- name: Installation des linters
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install "ruff==0.6.9" "black==23.12.1" || {
|
||||
echo "::warning::Impossible d'installer ruff/black — job ignoré"
|
||||
exit 0
|
||||
}
|
||||
|
||||
- name: Ruff (lint rapide)
|
||||
run: |
|
||||
if command -v ruff >/dev/null 2>&1; then
|
||||
# Ruff : erreurs critiques uniquement (E9 syntax, F63 invalid print,
|
||||
# F7 syntax, F82 undefined in __all__).
|
||||
# F821 (undefined name) volontairement exclu le temps de nettoyer
|
||||
# la dette technique préexistante (voir docs/STATUS.md).
|
||||
# Dossiers legacy exclus :
|
||||
# - agent_v0/deploy/windows_client/ : clone obsolète (marqué OBSOLÈTE)
|
||||
# - tests/property/ : tests cassés connus (cf. MEMORY.md)
|
||||
ruff check --select=E9,F63,F7,F82 --output-format=github \
|
||||
--exclude "agent_v0/deploy/windows_client" \
|
||||
--exclude "tests/property" \
|
||||
--exclude "tests/integration/test_visual_rpa_checkpoint.py" \
|
||||
core/ agent_v0/ tests/ || {
|
||||
echo "::warning::Ruff a trouvé des erreurs critiques"
|
||||
exit 1
|
||||
}
|
||||
else
|
||||
echo "::warning::ruff indisponible — skip"
|
||||
fi
|
||||
|
||||
- name: Black (format check)
|
||||
run: |
|
||||
if command -v black >/dev/null 2>&1; then
|
||||
# --check : ne modifie pas, signale juste.
|
||||
# Dossiers legacy exclus (cohérent avec ruff).
|
||||
black --check --diff \
|
||||
--exclude "agent_v0/deploy/windows_client|tests/property" \
|
||||
core/ agent_v0/ tests/ || {
|
||||
echo "::warning::Black suggère un reformatage — non bloquant"
|
||||
exit 0
|
||||
}
|
||||
else
|
||||
echo "::warning::black indisponible — skip"
|
||||
fi
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Job 2 — Tests unitaires
|
||||
# ----------------------------------------------------------------
|
||||
# Exclut tous les marqueurs lourds. Utilise requirements-ci.txt
|
||||
# pour éviter torch/CUDA (économie ~3 Go + ~2 min).
|
||||
# ----------------------------------------------------------------
|
||||
unit-tests:
|
||||
name: Tests unitaires (sans GPU)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
|
||||
steps:
|
||||
- name: Checkout du code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Python 3.12
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
cache: "pip"
|
||||
cache-dependency-path: |
|
||||
requirements-ci.txt
|
||||
requirements.txt
|
||||
|
||||
- name: Installation des dépendances CI
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
if [ -f requirements-ci.txt ]; then
|
||||
echo "Utilisation de requirements-ci.txt (léger, sans torch)"
|
||||
pip install -r requirements-ci.txt
|
||||
else
|
||||
echo "::warning::requirements-ci.txt absent — fallback requirements.txt (lourd)"
|
||||
pip install -r requirements.txt
|
||||
fi
|
||||
|
||||
- name: Vérification imports critiques
|
||||
run: |
|
||||
python -c "import pytest; print(f'pytest {pytest.__version__}')"
|
||||
python -c "import sys; sys.path.insert(0, '.'); import core; print('core OK')" || {
|
||||
echo "::error::Impossible d'importer core.*"
|
||||
exit 1
|
||||
}
|
||||
|
||||
- name: Tests unitaires (hors slow/gpu/integration)
|
||||
run: |
|
||||
python -m pytest tests/unit/ \
|
||||
-m "not slow and not gpu and not integration and not performance and not visual" \
|
||||
--tb=short \
|
||||
--strict-markers \
|
||||
-q \
|
||||
--maxfail=10 \
|
||||
-o cache_dir=/tmp/.pytest_cache_ci
|
||||
|
||||
- name: Upload logs si échec
|
||||
if: failure()
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: pytest-logs
|
||||
path: |
|
||||
/tmp/.pytest_cache_ci
|
||||
logs/
|
||||
retention-days: 3
|
||||
if-no-files-found: ignore
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Job 3 — Tests sécurité (bloquant)
|
||||
# ----------------------------------------------------------------
|
||||
# Les tests `test_security_*` valident des invariants critiques
|
||||
# (évaluation sûre, sérialisation signée). Aucune régression tolérée.
|
||||
# ----------------------------------------------------------------
|
||||
security-tests:
|
||||
name: Tests sécurité (critique)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
needs: [unit-tests]
|
||||
|
||||
steps:
|
||||
- name: Checkout du code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Python 3.12
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
cache: "pip"
|
||||
cache-dependency-path: |
|
||||
requirements-ci.txt
|
||||
requirements.txt
|
||||
|
||||
- name: Installation des dépendances CI
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
if [ -f requirements-ci.txt ]; then
|
||||
pip install -r requirements-ci.txt
|
||||
else
|
||||
pip install -r requirements.txt
|
||||
fi
|
||||
|
||||
- name: Tests sécurité (test_security_*)
|
||||
run: |
|
||||
python -m pytest tests/unit/test_security_*.py \
|
||||
--tb=long \
|
||||
--strict-markers \
|
||||
-v \
|
||||
-o cache_dir=/tmp/.pytest_cache_ci_sec
|
||||
59
.gitignore
vendored
59
.gitignore
vendored
@@ -74,4 +74,63 @@ htmlcov/
|
||||
|
||||
# === Backups ===
|
||||
*_backup_*
|
||||
*.db.backup_*
|
||||
backups/
|
||||
*.bak
|
||||
*.bak_*
|
||||
*.orig
|
||||
*.old
|
||||
|
||||
# === Legacy / Triage ===
|
||||
_a_trier/
|
||||
archives/
|
||||
|
||||
# === Claude Code — worktrees et données locales ===
|
||||
# Worktrees générés par la CLI Claude Code lors d'exécutions d'agents
|
||||
# parallèles. Peuvent atteindre plusieurs centaines de Mo chacun.
|
||||
# Ne jamais committer — gérer via `git worktree list` / `git worktree remove`.
|
||||
.claude/
|
||||
.kiro/
|
||||
.antigravitycli/
|
||||
.playwright-cli/
|
||||
.qwen/
|
||||
.mcp.json
|
||||
.snapshots/
|
||||
|
||||
# === Données runtime (sessions, learning, buffer, config local) ===
|
||||
data/
|
||||
**/capture_library.json
|
||||
.hypothesis/
|
||||
.deps_installed
|
||||
# Buffers SQLite locaux (streamer, cache)
|
||||
**/buffer/
|
||||
**/pending_events.db
|
||||
# Databases applicatives (instance Flask)
|
||||
**/instance/*.db
|
||||
**/instance/*.sqlite
|
||||
**/instance/*.sqlite3
|
||||
# Caches et index locaux
|
||||
*.sqlite
|
||||
*.sqlite3
|
||||
*.db-journal
|
||||
*.db-wal
|
||||
*.db-shm
|
||||
web_dashboard/static/analytics/*.bpmn
|
||||
results_vlm_bench.json
|
||||
|
||||
# Scripts locaux one-shot d'intervention/bench, non réutilisables tels quels.
|
||||
tools/bench_qwen35_evidence.py
|
||||
tools/codex_windows_correction_rapport.py
|
||||
|
||||
# Verbatims clients (sensibles, à valider avant push)
|
||||
docs/clients/
|
||||
|
||||
.qw-baseline.log
|
||||
docs/coordination/.loop_state/
|
||||
|
||||
# Runtime Python embedded pour l'installateur Inno Setup (local, ~11M, non versionné)
|
||||
deploy/installer/python-3.12-embed/
|
||||
deploy/installer/python-3.12.8-embed-amd64.zip
|
||||
# Artefacts de build installateur (EXE compilés + staging) — non versionnés
|
||||
deploy/releases/*.exe
|
||||
deploy/build/
|
||||
|
||||
8
.vite/deps/_metadata.json
Normal file
8
.vite/deps/_metadata.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"hash": "cccc2566",
|
||||
"configHash": "0c083961",
|
||||
"lockfileHash": "e3b0c442",
|
||||
"browserHash": "764a8433",
|
||||
"optimized": {},
|
||||
"chunks": {}
|
||||
}
|
||||
3
.vite/deps/package.json
Normal file
3
.vite/deps/package.json
Normal file
@@ -0,0 +1,3 @@
|
||||
{
|
||||
"type": "module"
|
||||
}
|
||||
12
AGENTS.md
Normal file
12
AGENTS.md
Normal file
@@ -0,0 +1,12 @@
|
||||
## graphify
|
||||
|
||||
This project has a knowledge graph at graphify-out/ with god nodes, community structure, and cross-file relationships.
|
||||
|
||||
When the user types `/graphify`, invoke the `skill` tool with `skill: "graphify"` before doing anything else.
|
||||
|
||||
Rules:
|
||||
- For codebase questions, first run `graphify query "<question>"` when graphify-out/graph.json exists. Use `graphify path "<A>" "<B>"` for relationships and `graphify explain "<concept>"` for focused concepts. These return a scoped subgraph, usually much smaller than GRAPH_REPORT.md or raw grep output.
|
||||
- Dirty graphify-out/ files are expected after hooks or incremental updates; dirty graph files are not a reason to skip graphify. Only skip graphify if the task is about stale or incorrect graph output, or the user explicitly says not to use it.
|
||||
- If graphify-out/wiki/index.md exists, use it for broad navigation instead of raw source browsing.
|
||||
- Read graphify-out/GRAPH_REPORT.md only for broad architecture review or when query/path/explain do not surface enough context.
|
||||
- After modifying code, run `graphify update .` to keep the graph current (AST-only, no API cost).
|
||||
106
CLAUDE.md
Normal file
106
CLAUDE.md
Normal file
@@ -0,0 +1,106 @@
|
||||
# CLAUDE.md — rpa_vision_v3
|
||||
|
||||
Ce fichier prime sur le CLAUDE.md racine (`~/ai/CLAUDE.md`) pour tout travail dans ce projet.
|
||||
|
||||
## Rôle de Claude Code sur ce projet
|
||||
|
||||
Exécutant supervisé, pas architecte. Mission : garantir la **cohérence** de chaque modification avec la vision globale du projet et le **contrat "100% vision"** (résolution UI par la vue, pas par les sélecteurs DOM/API). Quand tu touches un fichier, vérifie que tu ne casses rien ailleurs.
|
||||
|
||||
Tu n'es pas en autonomie. Dom valide avant chaque étape. Tu proposes, il décide.
|
||||
|
||||
## Priorité absolue
|
||||
|
||||
**La démo Urgence_aiva_demo doit fonctionner.** Workflow 22+ steps sur Easily Assure, patiente MOREL Catherine, audience mixte DG/DSI/médecins/DIM/TIM. Tout arbitrage technique se tranche par : "est-ce que ça rapproche ou éloigne de la démo qui tourne ?"
|
||||
|
||||
## Méthode obligatoire — non négociable
|
||||
|
||||
- **Chirurgie itérative supervisée** : une modification, un test (≤ 2 min), validation explicite de Dom avant la suivante.
|
||||
- **Pas de batch** : jamais plusieurs changements groupés sans validation intermédiaire.
|
||||
- **Rustine interdite** : tu corriges la cause, pas le symptôme. Si tu ne comprends pas la cause, tu le dis et tu arrêtes.
|
||||
- **Lire la doc avant d'agir** : code existant, `docs/`, specs. Pas de proposition basée sur des suppositions.
|
||||
- **Un commit = une intention** : message explicite, daté.
|
||||
- **Diff review systématique** sur tout code de production avant commit.
|
||||
|
||||
## Anti-patterns à proscrire
|
||||
|
||||
- Réponses longues. Si Dom dit "trop long" ou "déjà vu", tu raccourcis sans débattre.
|
||||
- Propositions structurelles avant d'avoir compris l'intention de Dom.
|
||||
- Re-proposer ce qui est déjà en place dans le code.
|
||||
- Raisonner sur un composant trouvé via grep **sans vérifier qu'il est effectivement appelé au runtime**. Le projet contient beaucoup de code écrit mais non wired.
|
||||
- Présenter la première solution qui marche. Toujours explorer 2-3 approches, présenter la meilleure avec justification.
|
||||
|
||||
## Architecture runtime réelle (à valider/raffiner avec Dom)
|
||||
|
||||
```
|
||||
[VWB frontend React :3002]
|
||||
↓ (HTTP)
|
||||
[VWB backend Flask + SQLite]
|
||||
↓ (envoi step par step)
|
||||
[agent_v1 — Linux]
|
||||
↓ (SSH vers Windows)
|
||||
[Léa — chatbot exécutant — PC Windows]
|
||||
↓
|
||||
[Easily Assure — interface cible]
|
||||
```
|
||||
|
||||
**Ollama** : sert le ou les modèles utilisés pour la résolution VLM, l'extraction texte, et la décision t2a. Sert aussi de **proxy vers cloud** pour certains appels.
|
||||
|
||||
**Cascade de résolution UI** (à confirmer composant par composant au runtime) :
|
||||
1. OCR (docTR ou EasyOCR selon module)
|
||||
2. cv2 template matching
|
||||
3. YOLO v4 grounding
|
||||
4. VLM grounding
|
||||
|
||||
**UI-DETR-1** : utilisé par VWB **au recording** pour overlays numérotés (équivalent OmniParser). `crop_hash` volontairement non persisté.
|
||||
|
||||
**Asymétrie connue, sujet ouvert post-démo** : VWB direct utilise UI-DETR-1 au runtime, le replay sur Léa ne l'utilise pas (cascade OCR/template/VLM seulement). Ne pas tenter de "fixer" cette asymétrie maintenant.
|
||||
|
||||
## ⚠️ Champs de mines — code orphelin
|
||||
|
||||
`core/` contient ~40 sous-modules. **Beaucoup ne sont pas wired au runtime actif.** Avant de raisonner sur un composant trouvé dans `core/` (coaching, healing, federation, learning, cognition, etc.) :
|
||||
|
||||
1. Vérifier qu'il est importé par un point d'entrée actif.
|
||||
2. Vérifier qu'il est effectivement appelé en runtime (traces, logs).
|
||||
3. Si doute, demander à Dom.
|
||||
|
||||
**Cas spécifique agent_v1** : suspicion de code orphelin à rebrancher. Si tu trouves un appel codé mais non exécuté en runtime (ex. appel Ollama de commentaire d'action présent dans le code mais jamais déclenché), c'est prioritaire à signaler.
|
||||
|
||||
## Debug — où regarder en premier
|
||||
|
||||
- `logs/` (racine projet) — logs runtime généraux
|
||||
- `logs/audit/` — traces d'exécution
|
||||
- `logs/healing/` — si concerne le healing
|
||||
- `data/runner_captures/` — captures d'exécution
|
||||
- `visual_workflow_builder/logs/` — logs VWB
|
||||
- `server/logs/` — logs serveur
|
||||
|
||||
**Vérifier qu'un appel Ollama se déclenche vraiment au runtime** : ne pas se fier à la présence de l'appel dans le code. Tracer effectivement (log d'entrée de fonction, requête vue côté Ollama `:11434`).
|
||||
|
||||
## Inspirations externes
|
||||
|
||||
Voir `docs/INSPIRATION_FRAMEWORKS_2026-05-10.md` pour les patterns convergents (OpenAdapt, Skyvern, OmniParser : Policy/Grounding, Safety Gate, Abstraction Ladder, Planner-Actor-Validator). Le projet est techniquement plus mature que sa documentation ne le suggère — s'inspirer des bons patterns sans complexe.
|
||||
|
||||
## Recherche d'information
|
||||
|
||||
Ta connaissance interne est datée. Pour tout sujet technique évoluant vite (modèles VLM, frameworks RPA visuels, librairies de grounding, versions d'outils), **chercher sur internet d'abord**. Privilégier les sources de moins de 6 mois.
|
||||
|
||||
## Stack
|
||||
|
||||
- Python 3.10-3.12, venv `venv_v3/`
|
||||
- Backend VWB : Flask + SQLite
|
||||
- Frontend VWB : React (port 3002), dashboard :5001, API :8000
|
||||
- LLM local : Ollama `:11434`
|
||||
- GUI legacy : PyQt5
|
||||
- Tests : pytest avec marqueurs (unit/integration/slow/smoke)
|
||||
- Langue : français (code, commentaires, logs, GUI)
|
||||
|
||||
## Commandes utiles
|
||||
|
||||
```bash
|
||||
cd ~/ai/rpa_vision_v3 && source venv_v3/bin/activate
|
||||
./run.sh --full # Écosystème complet
|
||||
./run.sh --gui # GUI PyQt5 seule
|
||||
./run.sh --test # Tests complets
|
||||
make test-fast # Tests rapides
|
||||
make check # Validation imports + tests rapides
|
||||
```
|
||||
@@ -21,7 +21,12 @@ ollama serve
|
||||
### 3. Télécharger le modèle VLM
|
||||
|
||||
```bash
|
||||
ollama pull qwen3-vl:8b
|
||||
# Modèle par défaut du projet (voir .env.example)
|
||||
ollama pull gemma4:latest
|
||||
|
||||
# Alternatives supportées
|
||||
# ollama pull qwen3-vl:8b
|
||||
# ollama pull 0000/ui-tars-1.5-7b-q8_0:7b # grounder visuel
|
||||
```
|
||||
|
||||
## Utilisation
|
||||
|
||||
339
README.md
339
README.md
@@ -1,207 +1,204 @@
|
||||
# RPA Vision V3 - 100% Vision-Based Workflow Automation
|
||||
# RPA Vision V3 — Automatisation basée sur la compréhension visuelle des interfaces
|
||||
|
||||
## 📊 Status
|
||||
> ⚠️ **Projet en phase POC** — voir [`docs/STATUS.md`](docs/STATUS.md) pour l'état
|
||||
> réel par module. Certaines briques sont opérationnelles bout en bout,
|
||||
> d'autres sont en cours de stabilisation. Ce dépôt n'est pas production-ready.
|
||||
|
||||
🚀 **PRODUCTION-READY** - Phase 12 Complete (77% System Completion) ✅
|
||||
*Dernière mise à jour : 14 avril 2026*
|
||||
|
||||
**Latest Update**: 14 Décembre 2024
|
||||
- ✅ **10/13 Phases Complétées** - Système mature et fonctionnel
|
||||
- ✅ **Performance Exceptionnelle** - 500-6250x plus rapide que requis
|
||||
- ✅ **Architecture Entreprise** - 148k+ lignes, 19 modules, 6 specs complètes
|
||||
- ✅ **Innovations Techniques** - Self-healing, Multi-modal, GPU management
|
||||
- 📊 **Audit Complet** - [Rapport détaillé](AUDIT_COMPLET_SYSTEME_RPA_VISION_V3.md)
|
||||
## Intention
|
||||
|
||||
**Quick Test**: `bash test_clip.sh`
|
||||
Automatiser des workflows métier par **compréhension sémantique de l'écran**
|
||||
plutôt que par coordonnées de clic fixes. Le système observe l'utilisateur,
|
||||
reconstruit un graphe d'états de l'interface, et cherche à rejouer la
|
||||
procédure en reconnaissant visuellement les éléments cibles — y compris
|
||||
quand l'UI change légèrement.
|
||||
|
||||
## 🎯 Vision
|
||||
Terrain cible principal : postes hospitaliers (Citrix, applications métier
|
||||
web et desktop). Contrainte forte : **100 % local**, pas d'appel à un LLM
|
||||
cloud dans le pipeline par défaut.
|
||||
|
||||
RPA basé sur la **compréhension sémantique** des interfaces, pas sur des coordonnées de clics.
|
||||
|
||||
Le système apprend des workflows en observant l'utilisateur et les automatise de manière robuste grâce à une architecture en 5 couches.
|
||||
|
||||
## 🏗️ Architecture en 5 Couches
|
||||
## Architecture en couches
|
||||
|
||||
```
|
||||
RawSession (Couche 0)
|
||||
↓
|
||||
ScreenState (Couche 1) - 4 niveaux d'abstraction
|
||||
↓
|
||||
UIElement Detection (Couche 2) - Types + Rôles sémantiques
|
||||
↓
|
||||
State Embedding (Couche 3) - Fusion multi-modale
|
||||
↓
|
||||
Workflow Graph (Couche 4) - Nodes + Edges + Learning States
|
||||
RawSession (couche 0) — capture événements + screenshots
|
||||
↓
|
||||
ScreenState (couche 1) — états d'écran à plusieurs niveaux d'abstraction
|
||||
↓
|
||||
UIElement (couche 2) — détection sémantique (cascade OCR + templates + VLM)
|
||||
↓
|
||||
State Embedding (couche 3) — fusion multi-modale + index FAISS
|
||||
↓
|
||||
Workflow Graph (couche 4) — nœuds, transitions, résolution de cibles
|
||||
```
|
||||
|
||||
## 📁 Structure
|
||||
## État des fonctionnalités (synthèse)
|
||||
|
||||
```
|
||||
rpa_vision_v3/
|
||||
├── core/
|
||||
│ ├── models/ # Couches 0-4 : Structures de données
|
||||
│ ├── capture/ # Couche 0 : Capture événements + screenshots
|
||||
│ ├── detection/ # Couche 2 : Détection UI sémantique
|
||||
│ ├── embedding/ # Couche 3 : Fusion multi-modale + FAISS
|
||||
│ ├── graph/ # Couche 4 : Construction + Matching + Exécution
|
||||
│ └── persistence/ # Sauvegarde/Chargement
|
||||
├── data/
|
||||
│ ├── sessions/ # RawSessions
|
||||
│ ├── screen_states/ # ScreenStates
|
||||
│ ├── embeddings/ # Vecteurs .npy
|
||||
│ ├── faiss_index/ # Index FAISS
|
||||
│ └── workflows/ # Workflow Graphs
|
||||
└── tests/ # Tests unitaires + intégration
|
||||
```
|
||||
Le détail par module est dans [`docs/STATUS.md`](docs/STATUS.md).
|
||||
|
||||
## 🚀 Démarrage Rapide
|
||||
**Opérationnel**
|
||||
- Capture Windows (Agent V1) + streaming vers serveur Linux
|
||||
- Stockage des sessions brutes (screenshots + événements)
|
||||
- Streaming server FastAPI, sessions en mémoire
|
||||
- Build du package Windows (`deploy/build_package.sh`)
|
||||
|
||||
**Alpha (fonctionnel sur un cas de référence, encore peu généralisé)**
|
||||
- Détection UI par cascade VLM + OCR + templates
|
||||
- Construction de workflow graph depuis une session
|
||||
- Replay E2E supervisé — premier succès sur Notepad le 13 avril 2026
|
||||
- Mode apprentissage : pause et demande d'aide humaine quand la résolution échoue
|
||||
- Embeddings CLIP + index FAISS
|
||||
- Module auth (Fernet + TOTP), federation (LearningPack)
|
||||
- Web Dashboard, Agent Chat
|
||||
|
||||
**En cours**
|
||||
- Visual Workflow Builder (VWB) — bugs DB runtime connus
|
||||
- Self-healing / recovery global
|
||||
- Analytics / reporting
|
||||
- Worker de compilation sessions → ExecutionPlan
|
||||
- Tests E2E multi-applications
|
||||
|
||||
## Limitations connues
|
||||
|
||||
- Le pipeline de replay est validé sur un nombre très restreint d'applications.
|
||||
- `TargetMemoryStore` (apprentissage Phase 1) est câblé mais sa base reste
|
||||
vide tant qu'un replay complet n'a pas été cristallisé.
|
||||
- Certaines asymétries entre chemins stricts et legacy dans le serveur de
|
||||
streaming peuvent provoquer des arrêts au lieu de pauses d'apprentissage.
|
||||
- VWB n'est pas encore stable en écriture ; un outil dédié plus simple est
|
||||
envisagé.
|
||||
|
||||
## Démarrage
|
||||
|
||||
### Prérequis
|
||||
|
||||
- Python 3.10 à 3.12
|
||||
- [Ollama](https://ollama.ai) installé et démarré localement
|
||||
- Recommandé : GPU NVIDIA pour l'inférence VLM
|
||||
- Windows 10/11 uniquement pour le client Agent V1
|
||||
|
||||
### Installation
|
||||
|
||||
```bash
|
||||
# 1. Installer Ollama
|
||||
curl -fsSL https://ollama.ai/install.sh | sh # Linux
|
||||
# ou
|
||||
brew install ollama # macOS
|
||||
|
||||
# 2. Démarrer Ollama
|
||||
ollama serve
|
||||
|
||||
# 3. Télécharger le modèle VLM
|
||||
ollama pull qwen3-vl:8b
|
||||
|
||||
# 4. Installer dépendances Python
|
||||
# 1) Cloner puis créer le venv
|
||||
python3 -m venv .venv
|
||||
source .venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
|
||||
# 2) Démarrer Ollama et récupérer le modèle VLM par défaut
|
||||
ollama serve &
|
||||
ollama pull gemma4:latest # défaut du projet
|
||||
# Alternatives supportées :
|
||||
# ollama pull qwen3-vl:8b
|
||||
# ollama pull 0000/ui-tars-1.5-7b-q8_0:7b # grounder visuel
|
||||
|
||||
# 3) Copier et ajuster la configuration
|
||||
cp .env.example .env
|
||||
# éditer .env pour vérifier RPA_VLM_MODEL, VLM_ENDPOINT, ports, etc.
|
||||
```
|
||||
|
||||
### Test Rapide
|
||||
### Lancer les services
|
||||
|
||||
Tous les services sont pilotés par `svc.sh` (source de vérité des ports :
|
||||
`services.conf`).
|
||||
|
||||
```bash
|
||||
# Diagnostic système
|
||||
python3 rpa_vision_v3/examples/diagnostic_vlm.py
|
||||
|
||||
# Test de détection
|
||||
./rpa_vision_v3/test_quick.sh
|
||||
./svc.sh status # État de tous les services
|
||||
./svc.sh start # Tout démarrer
|
||||
./svc.sh start streaming # Streaming server uniquement (port 5005)
|
||||
./svc.sh restart api # Redémarrer l'API (port 8000)
|
||||
./svc.sh stop # Tout arrêter
|
||||
```
|
||||
|
||||
### Utilisation - Détection UI
|
||||
| Port | Service |
|
||||
|---|---|
|
||||
| 8000 | API Server (upload / traitement core) |
|
||||
| 5001 | Web Dashboard |
|
||||
| 5002 | VWB Backend (Flask) |
|
||||
| 5003 | Monitoring |
|
||||
| 5004 | Agent Chat |
|
||||
| 5005 | Streaming Server (Agent V1 → pipeline core) |
|
||||
| 5006 | Session Cleaner |
|
||||
| 5099 | Worker de compilation (optionnel) |
|
||||
| 3002 | VWB Frontend (Vite/React) |
|
||||
|
||||
```python
|
||||
from rpa_vision_v3.core.detection import create_detector
|
||||
### Client Windows (Agent V1)
|
||||
|
||||
# Créer le détecteur
|
||||
detector = create_detector()
|
||||
|
||||
# Détecter les éléments UI
|
||||
elements = detector.detect("screenshot.png")
|
||||
|
||||
# Utiliser les résultats
|
||||
for elem in elements:
|
||||
print(f"{elem.type:15s} | {elem.role:20s} | {elem.label}")
|
||||
```
|
||||
|
||||
### Utilisation - Workflow (Phase 4 - À venir)
|
||||
|
||||
```python
|
||||
from rpa_vision_v3.core.models import RawSession, ScreenState, Workflow
|
||||
from rpa_vision_v3.core.graph import GraphBuilder, NodeMatcher
|
||||
|
||||
# 1. Capturer une session
|
||||
session = RawSession(...)
|
||||
# ... capturer événements et screenshots
|
||||
|
||||
# 2. Construire workflow automatiquement
|
||||
builder = GraphBuilder(...)
|
||||
workflow = builder.build_from_session(session)
|
||||
|
||||
# 3. Matcher état actuel
|
||||
matcher = NodeMatcher(...)
|
||||
current_state = ScreenState(...)
|
||||
match = matcher.match(current_state, workflow)
|
||||
|
||||
# 4. Exécuter action
|
||||
if match:
|
||||
edge = workflow.get_outgoing_edges(match.node.node_id)[0]
|
||||
executor.execute_edge(edge, current_state)
|
||||
```
|
||||
|
||||
## 📚 Documentation
|
||||
|
||||
### Guides Principaux
|
||||
- **Quick Start** : `QUICK_START.md` - Démarrage rapide
|
||||
- **Prochaines Étapes** : `NEXT_STEPS.md` - Roadmap et Phase 4
|
||||
- **Phase 3 Complète** : `PHASE3_COMPLETE.md` - Résumé Phase 3
|
||||
|
||||
### Documentation Technique
|
||||
- **Spec complète** : `.kiro/specs/workflow-graph-implementation/`
|
||||
- **Architecture** : `docs/reference/ARCHITECTURE_VISION_COMPLETE.md`
|
||||
- **Détection Hybride** : `HYBRID_DETECTION_SUMMARY.md`
|
||||
- **Intégration Ollama** : `docs/OLLAMA_INTEGRATION.md`
|
||||
|
||||
## 🎓 Concepts Clés
|
||||
|
||||
### RPA 100% Vision
|
||||
|
||||
- ❌ Pas de coordonnées (x, y) fixes
|
||||
- ✅ Rôles sémantiques (primary_action, form_input, etc.)
|
||||
- ✅ Matching par similarité visuelle et textuelle
|
||||
- ✅ Robuste aux changements d'UI
|
||||
|
||||
### Apprentissage Progressif
|
||||
|
||||
```
|
||||
OBSERVATION (5+ exécutions)
|
||||
↓
|
||||
COACHING (10+ assistances, succès >90%)
|
||||
↓
|
||||
AUTO_CANDIDATE (20+ exécutions, succès >95%)
|
||||
↓
|
||||
AUTO_CONFIRMÉ (validation utilisateur)
|
||||
```
|
||||
|
||||
### State Embedding
|
||||
|
||||
Fusion multi-modale :
|
||||
- 50% Image (screenshot complet)
|
||||
- 30% Texte (texte détecté)
|
||||
- 10% Titre (fenêtre)
|
||||
- 10% UI (éléments détectés)
|
||||
|
||||
## 🧪 Tests
|
||||
Le client capture souris, clavier et écran sur le poste Windows et envoie
|
||||
les données au streaming server Linux.
|
||||
|
||||
```bash
|
||||
# Tests unitaires
|
||||
pytest tests/unit/
|
||||
|
||||
# Tests d'intégration
|
||||
pytest tests/integration/
|
||||
|
||||
# Tests de performance
|
||||
pytest tests/performance/ --benchmark-only
|
||||
# Build du package Windows depuis le repo Linux
|
||||
./deploy/build_package.sh
|
||||
# produit deploy/Lea_v<version>.zip
|
||||
```
|
||||
|
||||
## 📈 Roadmap - 77% Complété (10/13 Phases)
|
||||
Voir [`docs/DEV_SETUP.md`](docs/DEV_SETUP.md) pour la maintenance du dépôt
|
||||
(worktrees, build, services).
|
||||
|
||||
### ✅ **Phases Complétées**
|
||||
- [x] **Phase 1-2** : Fondations + Embeddings FAISS ✅
|
||||
- [x] **Phase 4-6** : Détection UI + Workflow Graphs + Action Execution ✅
|
||||
- [x] **Phase 7-8** : Learning System + Training System ✅
|
||||
- [x] **Phase 10-12** : GPU Management + Performance + Monitoring ✅
|
||||
## Arborescence du dépôt
|
||||
|
||||
### 🎯 **Phases Restantes**
|
||||
- [ ] **Phase 3** : Checkpoint Final (tests storage)
|
||||
- [ ] **Phase 9** : Visual Workflow Builder (90% → 100%)
|
||||
- [ ] **Phase 13** : Tests End-to-End + Documentation finale
|
||||
```
|
||||
rpa_vision_v3/
|
||||
├── agent_v0/ # Agent V1 (client Windows) + serveur de streaming
|
||||
│ ├── agent_v1/ # Source de l'agent (capture, UI tray, exécution)
|
||||
│ └── server_v1/ # FastAPI streaming + processeurs
|
||||
├── core/ # Pipeline core
|
||||
│ ├── detection/ # Cascade VLM + OCR + templates
|
||||
│ ├── embedding/ # CLIP + FAISS
|
||||
│ ├── graph/ # Construction / matching de workflow graphs
|
||||
│ ├── execution/ # Résolution de cibles, actions LLM
|
||||
│ ├── learning/ # TargetMemoryStore (apprentissage)
|
||||
│ ├── auth/ # Vault Fernet + TOTP
|
||||
│ └── federation/ # Export/import de LearningPacks
|
||||
├── visual_workflow_builder/ # VWB (backend Flask + frontend React Vite)
|
||||
├── web_dashboard/ # Dashboard Flask + SocketIO
|
||||
├── agent_chat/ # Interface conversationnelle + planner
|
||||
├── deploy/ # Scripts de build et unités systemd
|
||||
├── data/ # Sessions, embeddings, index FAISS, apprentissage
|
||||
├── docs/ # Documentation technique
|
||||
├── tests/ # pytest (unit, integration, e2e)
|
||||
├── services.conf # Source de vérité des ports
|
||||
├── svc.sh # Orchestrateur des services
|
||||
└── run.sh # Démarrage tout-en-un (legacy, préférer svc.sh)
|
||||
```
|
||||
|
||||
### 🚀 **Composants Production-Ready**
|
||||
- **Agent V0** : Capture cross-platform + Encryption ✅
|
||||
- **Server API** : Processing pipeline + Web dashboard ✅
|
||||
- **Analytics System** : Monitoring + Insights + Reporting ✅
|
||||
- **Self-Healing** : Automatic adaptation + Recovery ✅
|
||||
## Tests
|
||||
|
||||
## 🤝 Contribution
|
||||
```bash
|
||||
source .venv/bin/activate
|
||||
|
||||
Voir `.kiro/specs/workflow-graph-implementation/tasks.md` pour les tâches en cours.
|
||||
# Tests rapides (hors marqueur slow)
|
||||
pytest -m "not slow" -q
|
||||
|
||||
## 📄 Licence
|
||||
# Tests d'intégration (streaming, pipeline)
|
||||
pytest tests/integration/ -q
|
||||
|
||||
Propriétaire - Tous droits réservés
|
||||
# Tests E2E
|
||||
pytest tests/test_pipeline_e2e.py -q
|
||||
```
|
||||
|
||||
Quelques tests legacy sont connus comme cassés — voir la mémoire projet et
|
||||
`docs/` pour la liste.
|
||||
|
||||
## Documentation
|
||||
|
||||
- [`docs/STATUS.md`](docs/STATUS.md) — état réel par module
|
||||
- [`docs/DEV_SETUP.md`](docs/DEV_SETUP.md) — tâches d'administration (worktrees, build)
|
||||
- [`docs/EXECUTION_LOOP_FLAGS.md`](docs/EXECUTION_LOOP_FLAGS.md) — flags C1 vision-aware (`enable_ui_detection`, `enable_ocr`, `analyze_timeout_ms`, `window_info_provider`)
|
||||
- [`docs/VISION_RPA_INTELLIGENT.md`](docs/VISION_RPA_INTELLIGENT.md) — cahier des charges
|
||||
- [`docs/PLAN_ACTEUR_V1.md`](docs/PLAN_ACTEUR_V1.md) — architecture 3 niveaux (Macro / Méso / Micro)
|
||||
- [`docs/CONFORMITE_AI_ACT.md`](docs/CONFORMITE_AI_ACT.md) — journalisation, floutage, rétention
|
||||
|
||||
## Concepts clés
|
||||
|
||||
- **RPA 100 % vision** : pas de coordonnées fixes ; l'agent localise un
|
||||
élément par ce qu'il voit (label + contexte visuel), pas par `x,y`.
|
||||
- **Apprentissage progressif** : mode shadow → assisté → autonome, validé
|
||||
par supervision humaine sur les échecs.
|
||||
- **LLM 100 % local** : Ollama sur la machine. Aucun appel cloud dans le
|
||||
pipeline par défaut (cf. feedback projet `feedback_local_only.md`).
|
||||
|
||||
## Licence
|
||||
|
||||
Propriétaire — tous droits réservés.
|
||||
|
||||
@@ -125,25 +125,47 @@ class WorkflowPipelineEnhanced:
|
||||
current_node_id = match_result["node_id"]
|
||||
logger.info(f"Matched current state to node: {current_node_id} (confidence: {match_result['confidence']:.3f})")
|
||||
|
||||
# 2. Obtenir la prochaine action
|
||||
# 2. Obtenir la prochaine action (contrat dict avec status explicite)
|
||||
action_info = self.get_next_action(workflow_id, current_node_id)
|
||||
|
||||
if not action_info:
|
||||
# Workflow terminé
|
||||
action_status = action_info.get("status")
|
||||
|
||||
if action_status == "terminal":
|
||||
# Workflow terminé (aucun outgoing_edge = fin légitime)
|
||||
performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
|
||||
|
||||
|
||||
result = WorkflowExecutionResult.workflow_complete(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
current_node=current_node_id,
|
||||
performance_metrics=performance_metrics
|
||||
performance_metrics=performance_metrics,
|
||||
)
|
||||
result.correlation_id = correlation_id
|
||||
result.match_result = match_result
|
||||
|
||||
|
||||
logger.info(f"Workflow {workflow_id} completed at node {current_node_id}")
|
||||
return result
|
||||
|
||||
|
||||
if action_status == "blocked":
|
||||
# Des edges existent mais aucun ne passe les filtres :
|
||||
# c'est un blocage, pas une fin de workflow.
|
||||
performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
|
||||
|
||||
result = WorkflowExecutionResult.error(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
error_message=f"No valid edge: {action_info.get('reason', 'unknown')}",
|
||||
step_type="action_selection",
|
||||
current_node=current_node_id,
|
||||
performance_metrics=performance_metrics,
|
||||
)
|
||||
result.correlation_id = correlation_id
|
||||
|
||||
logger.warning(
|
||||
f"Workflow {workflow_id} blocked at node {current_node_id}: "
|
||||
f"{action_info.get('reason')}"
|
||||
)
|
||||
return result
|
||||
|
||||
logger.info(f"Next action: {action_info['action']['type']} -> {action_info['target_node']}")
|
||||
|
||||
# 3. Charger le workflow pour obtenir l'edge complet
|
||||
@@ -14,8 +14,9 @@ import asyncio
|
||||
import logging
|
||||
import json
|
||||
import base64
|
||||
import pickle
|
||||
import gzip
|
||||
import pickle # noqa: S403 - usage legacy restreint au fallback de migration
|
||||
import io
|
||||
from typing import Dict, List, Optional, Any, Tuple
|
||||
from dataclasses import dataclass, asdict
|
||||
from datetime import datetime
|
||||
@@ -24,6 +25,12 @@ import numpy as np
|
||||
|
||||
from core.visual.visual_target_manager import VisualTarget, VisualTargetManager
|
||||
from core.visual.screenshot_validation_manager import ScreenshotValidationManager, ValidationResult
|
||||
from core.security.signed_serializer import (
|
||||
SignatureVerificationError,
|
||||
UnsupportedFormatError,
|
||||
dumps_signed,
|
||||
loads_signed,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -435,19 +442,19 @@ class VisualPersistenceManager:
|
||||
return None
|
||||
|
||||
async def _serialize_workflow_data(self, workflow_data: VisualWorkflowData) -> bytes:
|
||||
"""Sérialise les données d'un workflow"""
|
||||
"""Sérialise les données d'un workflow en JSON signé HMAC."""
|
||||
# Convertir en dictionnaire
|
||||
data_dict = asdict(workflow_data)
|
||||
|
||||
|
||||
# Traiter les types spéciaux
|
||||
data_dict['created_at'] = workflow_data.created_at.isoformat()
|
||||
|
||||
|
||||
# Sérialiser les cibles visuelles
|
||||
serialized_targets = {}
|
||||
for signature, target in workflow_data.visual_targets.items():
|
||||
serialized_targets[signature] = await self._serialize_visual_target(target)
|
||||
data_dict['visual_targets'] = serialized_targets
|
||||
|
||||
|
||||
# Sérialiser l'historique de validation
|
||||
serialized_history = {}
|
||||
for signature, history in workflow_data.validation_history.items():
|
||||
@@ -455,15 +462,30 @@ class VisualPersistenceManager:
|
||||
self._serialize_validation_result(result) for result in history
|
||||
]
|
||||
data_dict['validation_history'] = serialized_history
|
||||
|
||||
# Convertir en bytes
|
||||
return pickle.dumps(data_dict)
|
||||
|
||||
|
||||
# JSON signé HMAC (cf. core.security.signed_serializer)
|
||||
return dumps_signed(data_dict)
|
||||
|
||||
async def _deserialize_workflow_data(self, data: bytes) -> VisualWorkflowData:
|
||||
"""Désérialise les données d'un workflow"""
|
||||
# Désérialiser le dictionnaire
|
||||
data_dict = pickle.loads(data)
|
||||
|
||||
"""Désérialise les données d'un workflow (JSON signé HMAC ;
|
||||
fallback pickle legacy avec WARNING pour migrer les anciens fichiers)."""
|
||||
try:
|
||||
data_dict = loads_signed(data)
|
||||
except SignatureVerificationError:
|
||||
# Fichier altéré ou clé différente : on refuse sans fallback.
|
||||
logger.error("Workflow visuel : signature HMAC invalide — refus.")
|
||||
raise
|
||||
except UnsupportedFormatError:
|
||||
# Ancien format pickle : fallback explicite et bruyant.
|
||||
import os
|
||||
if os.getenv("RPA_ALLOW_PICKLE_FALLBACK", "1") == "0":
|
||||
raise
|
||||
logger.warning(
|
||||
"Workflow visuel au format pickle legacy — lecture de compat, "
|
||||
"ré-écrire en JSON signé dès que possible."
|
||||
)
|
||||
data_dict = pickle.loads(data) # noqa: S301 - fallback legacy
|
||||
|
||||
# Reconstruire les objets
|
||||
workflow_data = VisualWorkflowData(
|
||||
workflow_id=data_dict['workflow_id'],
|
||||
@@ -38,6 +38,7 @@ from werkzeug.utils import secure_filename
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from core.workflow import SemanticMatcher, VariableManager
|
||||
from core.detection.vlm_config import get_reasoning_model
|
||||
|
||||
# Import des composants conversationnels
|
||||
from .intent_parser import IntentParser, IntentType, get_intent_parser
|
||||
@@ -83,9 +84,24 @@ app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024 # 50 MB max upload (sécuri
|
||||
_ALLOWED_ORIGINS = [
|
||||
"http://localhost:3002",
|
||||
"http://localhost:5002",
|
||||
"http://localhost:5004",
|
||||
"https://vwb.labs.laurinebazin.design",
|
||||
"https://lea.labs.laurinebazin.design",
|
||||
# LAN local : serveur Linux (192.168.1.40) + Léa Windows (192.168.1.11).
|
||||
# Sans ces origines, engineio rejette la ChatWindow tkinter Windows et
|
||||
# même les requêtes self-loopback (cf. journal 2026-05-24 11:00:47).
|
||||
"http://192.168.1.40:5004",
|
||||
"http://192.168.1.40:5005",
|
||||
"http://192.168.1.11:5004",
|
||||
"http://192.168.1.11:5005",
|
||||
]
|
||||
# Override possible via LEA_CORS_ALLOWED_ORIGINS=comma,separated,list pour
|
||||
# environnements non-LAN. Vide ou absent → garde la liste par défaut ci-dessus.
|
||||
_extra_origins = os.environ.get("LEA_CORS_ALLOWED_ORIGINS", "").strip()
|
||||
if _extra_origins:
|
||||
_ALLOWED_ORIGINS.extend(
|
||||
o.strip() for o in _extra_origins.split(",") if o.strip()
|
||||
)
|
||||
socketio = SocketIO(app, cors_allowed_origins=_ALLOWED_ORIGINS)
|
||||
|
||||
|
||||
@@ -133,6 +149,28 @@ def _streaming_headers() -> dict:
|
||||
headers["Authorization"] = f"Bearer {_STREAMING_API_TOKEN}"
|
||||
return headers
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Feedback Bus — events 'lea:*' temps réel vers ChatWindow
|
||||
# ============================================================
|
||||
LEA_FEEDBACK_BUS = os.environ.get("LEA_FEEDBACK_BUS", "0").lower() in ("1", "true", "yes", "on")
|
||||
|
||||
|
||||
def _emit_lea(event: str, payload: Dict[str, Any]) -> None:
|
||||
"""Émet 'lea:{event}' sur le bus SocketIO. No-op silencieux si flag off ou erreur."""
|
||||
if not LEA_FEEDBACK_BUS:
|
||||
return
|
||||
try:
|
||||
socketio.emit(f"lea:{event}", payload)
|
||||
except Exception:
|
||||
logger.debug("_emit_lea silenced", exc_info=True)
|
||||
|
||||
|
||||
def _emit_dual(legacy_event: str, lea_event: str, payload: Dict[str, Any], **kwargs) -> None:
|
||||
"""Émet l'event legacy (compat dashboard) ET l'alias lea:* (ChatWindow tkinter)."""
|
||||
socketio.emit(legacy_event, payload, **kwargs)
|
||||
_emit_lea(lea_event, payload)
|
||||
|
||||
execution_status = {
|
||||
"running": False,
|
||||
"workflow": None,
|
||||
@@ -177,6 +215,9 @@ _pending_imports: Dict[str, Dict[str, Any]] = {}
|
||||
# Copilot state — suivi du mode pas-à-pas
|
||||
_copilot_sessions: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
# LearnActionOrchestrator — P1-LEA SHADOW (apprentissage Léa-first)
|
||||
learn_action_orchestrator = None # injecté par init_system()
|
||||
|
||||
_COPILOT_KEYWORDS = [
|
||||
"copilot", "co-pilot",
|
||||
"pas à pas", "pas-à-pas", "pas a pas",
|
||||
@@ -197,6 +238,7 @@ def init_system():
|
||||
global matcher, gpu_manager
|
||||
global intent_parser, confirmation_loop, response_generator, conversation_manager
|
||||
global autonomous_planner
|
||||
reasoning_model = get_reasoning_model()
|
||||
|
||||
# 1. SemanticMatcher — multi-répertoires (P0-6) + matching LLM (P0-7)
|
||||
# Scan data/workflows/ + data/training/workflows/ + data/training/live_sessions/workflows/
|
||||
@@ -204,7 +246,7 @@ def init_system():
|
||||
matcher = SemanticMatcher(
|
||||
workflows_dir=None, # None = scan tous les répertoires par défaut
|
||||
use_llm=True, # Matching sémantique via Ollama (P0-7)
|
||||
llm_model="qwen2.5:7b",
|
||||
llm_model=reasoning_model,
|
||||
)
|
||||
dirs_info = matcher.get_directories()
|
||||
dirs_summary = ", ".join(
|
||||
@@ -229,7 +271,10 @@ def init_system():
|
||||
|
||||
# 3. Composants conversationnels
|
||||
try:
|
||||
intent_parser = get_intent_parser(use_llm=True) # LLM activé (Ollama)
|
||||
intent_parser = get_intent_parser(
|
||||
use_llm=True,
|
||||
llm_model=reasoning_model,
|
||||
) # LLM activé (Ollama)
|
||||
confirmation_loop = get_confirmation_loop()
|
||||
response_generator = get_response_generator()
|
||||
conversation_manager = get_conversation_manager()
|
||||
@@ -256,8 +301,24 @@ def init_system():
|
||||
if EXECUTION_AVAILABLE:
|
||||
try:
|
||||
# Pipeline de workflow (matching + actions)
|
||||
workflow_pipeline = WorkflowPipeline()
|
||||
logger.info("✓ WorkflowPipeline initialisé")
|
||||
# Depuis C1c 2026-05-25 : désactiver UI detection (OWL/VLM côté
|
||||
# UIDetector via DetectionConfig) par défaut pour économiser
|
||||
# ~900 MiB VRAM au boot du chat service. Le chemin SocketIO 5004
|
||||
# / narration ChatWindow / ExecutionLoop n'utilise pas
|
||||
# workflow_pipeline.ui_detector (grep confirmé). Activation
|
||||
# explicite : AGENT_CHAT_ENABLE_UI_DETECTION=1.
|
||||
_ui_detection_enabled = os.environ.get(
|
||||
"AGENT_CHAT_ENABLE_UI_DETECTION", "0"
|
||||
).strip() in ("1", "true", "yes")
|
||||
workflow_pipeline = WorkflowPipeline(
|
||||
enable_ui_detection=_ui_detection_enabled,
|
||||
enable_vlm=_ui_detection_enabled,
|
||||
)
|
||||
logger.info(
|
||||
f"✓ WorkflowPipeline initialisé "
|
||||
f"(ui_detection={_ui_detection_enabled}, "
|
||||
f"économie ~900 MiB VRAM si False)"
|
||||
)
|
||||
|
||||
# Capture d'écran
|
||||
screen_capturer = ScreenCapturer()
|
||||
@@ -294,7 +355,7 @@ def init_system():
|
||||
|
||||
# 5. Autonomous Planner (Agent Libre)
|
||||
try:
|
||||
autonomous_planner = get_autonomous_planner(llm_model="qwen2.5:7b")
|
||||
autonomous_planner = get_autonomous_planner(llm_model=reasoning_model)
|
||||
|
||||
# Configurer les callbacks pour l'exécution
|
||||
if screen_capturer:
|
||||
@@ -334,6 +395,26 @@ def init_system():
|
||||
else:
|
||||
logger.info("ℹ Import Excel non disponible (openpyxl manquant ?)")
|
||||
|
||||
# 8. LearnActionOrchestrator (P1-LEA SHADOW) — apprentissage Léa-first
|
||||
global learn_action_orchestrator
|
||||
try:
|
||||
from .handlers.learn_action import get_learn_action_orchestrator
|
||||
|
||||
def _learn_emit(event: str, payload: Dict[str, Any]) -> None:
|
||||
try:
|
||||
socketio.emit(event, payload)
|
||||
except Exception:
|
||||
logger.debug("learn emit silenced", exc_info=True)
|
||||
|
||||
learn_action_orchestrator = get_learn_action_orchestrator(emit=_learn_emit)
|
||||
resumed = learn_action_orchestrator.resume_sessions()
|
||||
logger.info(
|
||||
f"✓ LearnActionOrchestrator initialisé (sessions reprises: {len(resumed)})"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"⚠ LearnActionOrchestrator: {e}")
|
||||
learn_action_orchestrator = None
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Routes Web
|
||||
@@ -623,7 +704,7 @@ def api_execute():
|
||||
}
|
||||
|
||||
# Notifier via WebSocket
|
||||
socketio.emit('execution_started', {
|
||||
_emit_dual('execution_started', 'action_started', {
|
||||
"workflow": match.workflow_name,
|
||||
"params": all_params
|
||||
})
|
||||
@@ -650,7 +731,7 @@ def api_history():
|
||||
# =============================================================================
|
||||
|
||||
# Modèle texte pour les réponses conversationnelles (pas besoin de vision)
|
||||
_LEA_LLM_MODEL = os.environ.get("LEA_LLM_MODEL", "qwen3:8b")
|
||||
_LEA_LLM_MODEL = os.environ.get("LEA_LLM_MODEL") or get_reasoning_model()
|
||||
|
||||
_LEA_SYSTEM_PROMPT = """Tu es Léa, une assistante professionnelle chaleureuse et bienveillante.
|
||||
|
||||
@@ -746,6 +827,24 @@ def api_chat():
|
||||
if not message:
|
||||
return jsonify({"error": "Message vide"}), 400
|
||||
|
||||
# 0. Routage P1-LEA : si une session d'apprentissage est active pour ce
|
||||
# session_id, l'orchestrateur traite le message ; sinon on tombe sur le
|
||||
# flux normal (intent_parser / matcher / confirmation).
|
||||
if learn_action_orchestrator is not None and session_id:
|
||||
try:
|
||||
learn_reply = learn_action_orchestrator.handle_chat_message(
|
||||
session_id, message
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("learn_action_orchestrator error")
|
||||
learn_reply = None
|
||||
if learn_reply is not None:
|
||||
return jsonify({
|
||||
"session_id": session_id,
|
||||
"response": learn_reply,
|
||||
"handler": "learn_action",
|
||||
})
|
||||
|
||||
# 1. Obtenir ou créer la session
|
||||
session = conversation_manager.get_or_create_session(session_id=session_id)
|
||||
|
||||
@@ -1181,28 +1280,28 @@ def _execute_gesture(gesture):
|
||||
)
|
||||
|
||||
if resp.status_code == 200:
|
||||
socketio.emit('execution_completed', {
|
||||
_emit_dual('execution_completed', 'done', {
|
||||
"workflow": gesture.name,
|
||||
"success": True,
|
||||
"message": f"Geste '{gesture.name}' ({'+'.join(gesture.keys)}) envoyé",
|
||||
})
|
||||
else:
|
||||
error = resp.text[:200]
|
||||
socketio.emit('execution_completed', {
|
||||
_emit_dual('execution_completed', 'done', {
|
||||
"workflow": gesture.name,
|
||||
"success": False,
|
||||
"message": f"Erreur: {error}",
|
||||
})
|
||||
|
||||
except http_requests.ConnectionError:
|
||||
socketio.emit('execution_completed', {
|
||||
_emit_dual('execution_completed', 'done', {
|
||||
"workflow": gesture.name,
|
||||
"success": False,
|
||||
"message": "Serveur de streaming non disponible (port 5005).",
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Gesture execution error: {e}")
|
||||
socketio.emit('execution_completed', {
|
||||
_emit_dual('execution_completed', 'done', {
|
||||
"workflow": gesture.name,
|
||||
"success": False,
|
||||
"message": f"Erreur: {str(e)}",
|
||||
@@ -1661,6 +1760,52 @@ def handle_copilot_abort():
|
||||
})
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Bulle paused_need_help — handlers SocketIO depuis ChatWindow (J3.5)
|
||||
# =============================================================================
|
||||
|
||||
@socketio.on('lea:replay_resume')
|
||||
def handle_lea_replay_resume(data):
|
||||
"""Bouton Continuer : relayer le resume vers le streaming server."""
|
||||
replay_id = (data or {}).get("replay_id")
|
||||
if not replay_id:
|
||||
_emit_lea("resume_acked", {"status": "error", "detail": "replay_id manquant"})
|
||||
return
|
||||
try:
|
||||
resp = http_requests.post(
|
||||
f"{STREAMING_SERVER_URL}/api/v1/traces/stream/replay/{replay_id}/resume",
|
||||
headers=_streaming_headers(),
|
||||
timeout=5,
|
||||
)
|
||||
if resp.ok:
|
||||
logger.info(f"Replay {replay_id} resume relayé OK")
|
||||
_emit_lea("resume_acked", {"replay_id": replay_id, "status": "ok"})
|
||||
else:
|
||||
detail = resp.text[:200]
|
||||
logger.warning(f"Resume échoué (HTTP {resp.status_code}): {detail}")
|
||||
_emit_lea("resume_acked", {
|
||||
"replay_id": replay_id, "status": "error",
|
||||
"http_status": resp.status_code, "detail": detail,
|
||||
})
|
||||
except Exception as e:
|
||||
logger.warning(f"Resume relay error: {e}")
|
||||
_emit_lea("resume_acked", {
|
||||
"replay_id": replay_id, "status": "error", "detail": str(e),
|
||||
})
|
||||
|
||||
|
||||
@socketio.on('lea:replay_abort')
|
||||
def handle_lea_replay_abort(data):
|
||||
"""Bouton Annuler : arrêter le polling local. Le replay côté streaming sera
|
||||
cleaned up naturellement au prochain replay (cf api_stream._replay_states stale)."""
|
||||
global execution_status
|
||||
replay_id = (data or {}).get("replay_id")
|
||||
execution_status["running"] = False
|
||||
execution_status["message"] = "Annulé par l'utilisateur"
|
||||
logger.info(f"Replay {replay_id or '?'} abort par l'utilisateur (paused bubble)")
|
||||
_emit_lea("abort_acked", {"replay_id": replay_id, "status": "ok"})
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Exécution de workflow
|
||||
# =============================================================================
|
||||
@@ -1730,14 +1875,20 @@ def _poll_replay_progress(replay_id: str, workflow_name: str, total_actions: int
|
||||
"""Suivre la progression d'un replay distant via polling."""
|
||||
import time
|
||||
|
||||
max_wait = 120 # 2 minutes max
|
||||
max_wait_running = 120 # 2 min en exécution active
|
||||
max_wait_paused = 600 # 10 min en pause supervisée (humain peut prendre son temps)
|
||||
poll_interval = 2.0
|
||||
elapsed = 0
|
||||
was_paused = False
|
||||
|
||||
while elapsed < max_wait and execution_status.get("running"):
|
||||
while execution_status.get("running"):
|
||||
time.sleep(poll_interval)
|
||||
elapsed += poll_interval
|
||||
|
||||
cap = max_wait_paused if was_paused else max_wait_running
|
||||
if elapsed >= cap:
|
||||
break
|
||||
|
||||
try:
|
||||
resp = http_requests.get(
|
||||
f"{STREAMING_SERVER_URL}/api/v1/traces/stream/replay/{replay_id}",
|
||||
@@ -1753,7 +1904,32 @@ def _poll_replay_progress(replay_id: str, workflow_name: str, total_actions: int
|
||||
failed = data.get("failed_actions", 0)
|
||||
progress = int(10 + (completed / max(total_actions, 1)) * 80)
|
||||
|
||||
socketio.emit('execution_progress', {
|
||||
if status == "paused_need_help" and not was_paused:
|
||||
_emit_lea("paused", {
|
||||
"workflow": workflow_name,
|
||||
"replay_id": replay_id,
|
||||
"completed": completed,
|
||||
"total": total_actions,
|
||||
"failed_action": data.get("failed_action"),
|
||||
"reason": (
|
||||
data.get("pause_message")
|
||||
or data.get("message")
|
||||
or data.get("error")
|
||||
or "Action incertaine"
|
||||
),
|
||||
"safety_checks": data.get("safety_checks") or [],
|
||||
})
|
||||
was_paused = True
|
||||
elapsed = 0
|
||||
elif was_paused and status != "paused_need_help":
|
||||
_emit_lea("resumed", {
|
||||
"workflow": workflow_name,
|
||||
"replay_id": replay_id,
|
||||
"status_after": status,
|
||||
})
|
||||
was_paused = False
|
||||
|
||||
_emit_dual('execution_progress', 'action_progress', {
|
||||
"progress": progress,
|
||||
"step": f"Action {completed}/{total_actions} exécutée",
|
||||
"current": completed,
|
||||
@@ -1922,7 +2098,7 @@ def execute_workflow_copilot(match, params: Dict[str, Any]):
|
||||
|
||||
actions = _build_actions_from_workflow(match, params)
|
||||
if not actions:
|
||||
socketio.emit('copilot_complete', {
|
||||
_emit_dual('copilot_complete', 'done', {
|
||||
"workflow": workflow_name,
|
||||
"status": "error",
|
||||
"message": "Aucune action exécutable dans ce workflow.",
|
||||
@@ -1959,7 +2135,7 @@ def execute_workflow_copilot(match, params: Dict[str, Any]):
|
||||
break
|
||||
|
||||
copilot_state["status"] = "waiting_approval"
|
||||
socketio.emit('copilot_step', {
|
||||
_emit_dual('copilot_step', 'need_confirm', {
|
||||
"workflow": workflow_name,
|
||||
"step_index": idx,
|
||||
"total": total,
|
||||
@@ -1982,7 +2158,7 @@ def execute_workflow_copilot(match, params: Dict[str, Any]):
|
||||
|
||||
if waited >= max_wait:
|
||||
copilot_state["status"] = "aborted"
|
||||
socketio.emit('copilot_complete', {
|
||||
_emit_dual('copilot_complete', 'done', {
|
||||
"workflow": workflow_name,
|
||||
"status": "timeout",
|
||||
"message": f"Timeout : pas de réponse après {max_wait}s.",
|
||||
@@ -1999,7 +2175,7 @@ def execute_workflow_copilot(match, params: Dict[str, Any]):
|
||||
elif decision == "skipped":
|
||||
copilot_state["skipped"] += 1
|
||||
logger.info(f"Copilot skip étape {idx + 1}/{total}")
|
||||
socketio.emit('copilot_step_result', {
|
||||
_emit_dual('copilot_step_result', 'step_result', {
|
||||
"step_index": idx,
|
||||
"total": total,
|
||||
"status": "skipped",
|
||||
@@ -2034,7 +2210,7 @@ def execute_workflow_copilot(match, params: Dict[str, Any]):
|
||||
|
||||
if action_success:
|
||||
copilot_state["completed"] += 1
|
||||
socketio.emit('copilot_step_result', {
|
||||
_emit_dual('copilot_step_result', 'step_result', {
|
||||
"step_index": idx,
|
||||
"total": total,
|
||||
"status": "completed",
|
||||
@@ -2042,7 +2218,7 @@ def execute_workflow_copilot(match, params: Dict[str, Any]):
|
||||
})
|
||||
else:
|
||||
copilot_state["failed"] += 1
|
||||
socketio.emit('copilot_step_result', {
|
||||
_emit_dual('copilot_step_result', 'step_result', {
|
||||
"step_index": idx,
|
||||
"total": total,
|
||||
"status": "failed",
|
||||
@@ -2051,7 +2227,7 @@ def execute_workflow_copilot(match, params: Dict[str, Any]):
|
||||
else:
|
||||
error = resp.text[:200]
|
||||
copilot_state["failed"] += 1
|
||||
socketio.emit('copilot_step_result', {
|
||||
_emit_dual('copilot_step_result', 'step_result', {
|
||||
"step_index": idx,
|
||||
"total": total,
|
||||
"status": "failed",
|
||||
@@ -2060,7 +2236,7 @@ def execute_workflow_copilot(match, params: Dict[str, Any]):
|
||||
|
||||
except http_requests.ConnectionError:
|
||||
copilot_state["failed"] += 1
|
||||
socketio.emit('copilot_step_result', {
|
||||
_emit_dual('copilot_step_result', 'step_result', {
|
||||
"step_index": idx,
|
||||
"total": total,
|
||||
"status": "failed",
|
||||
@@ -2070,7 +2246,7 @@ def execute_workflow_copilot(match, params: Dict[str, Any]):
|
||||
except Exception as e:
|
||||
copilot_state["failed"] += 1
|
||||
logger.error(f"Copilot action error: {e}")
|
||||
socketio.emit('copilot_step_result', {
|
||||
_emit_dual('copilot_step_result', 'step_result', {
|
||||
"step_index": idx,
|
||||
"total": total,
|
||||
"status": "failed",
|
||||
@@ -2098,7 +2274,7 @@ def execute_workflow_copilot(match, params: Dict[str, Any]):
|
||||
f"Copilot terminé : {completed} réussies, "
|
||||
f"{skipped} passées, {failed} échouées sur {total} étapes."
|
||||
)
|
||||
socketio.emit('copilot_complete', {
|
||||
_emit_dual('copilot_complete', 'done', {
|
||||
"workflow": workflow_name,
|
||||
"status": "completed" if success else "partial",
|
||||
"message": message,
|
||||
@@ -2175,7 +2351,7 @@ def execute_workflow(match, params):
|
||||
execution_status["progress"] = 10
|
||||
execution_status["message"] = f"Envoyé à l'Agent V1 ({target_session})"
|
||||
|
||||
socketio.emit('execution_progress', {
|
||||
_emit_dual('execution_progress', 'action_progress', {
|
||||
"progress": 10,
|
||||
"step": f"Replay envoyé à l'Agent V1 — {total_actions} actions en attente",
|
||||
"current": 0,
|
||||
@@ -2523,7 +2699,7 @@ def update_progress(progress: int, message: str, current: int, total: int):
|
||||
execution_status["progress"] = progress
|
||||
execution_status["message"] = message
|
||||
|
||||
socketio.emit('execution_progress', {
|
||||
_emit_dual('execution_progress', 'action_progress', {
|
||||
"progress": progress,
|
||||
"step": message,
|
||||
"current": current,
|
||||
@@ -2543,13 +2719,149 @@ def finish_execution(workflow_name: str, success: bool, message: str):
|
||||
if command_history:
|
||||
command_history[-1]["status"] = "completed" if success else "failed"
|
||||
|
||||
socketio.emit('execution_completed', {
|
||||
_emit_dual('execution_completed', 'done', {
|
||||
"workflow": workflow_name,
|
||||
"success": success,
|
||||
"message": message
|
||||
})
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Orchestration démo GHT Sud 95 — "traite N dossiers"
|
||||
# =============================================================================
|
||||
# Délégué à agent_chat.urgences_orchestrator (gemma3:1b NLP + thread orchestrateur).
|
||||
# Routes :
|
||||
# POST /api/urgences/parse — test parsing intent (debug)
|
||||
# POST /api/urgences/start — démarrer une orchestration
|
||||
# GET /api/urgences/status/<id>— état d'une orchestration
|
||||
# GET /api/urgences/list — toutes les orchestrations en mémoire
|
||||
|
||||
try:
|
||||
from agent_chat.urgences_orchestrator import (
|
||||
parse_lea_command,
|
||||
start_orchestration,
|
||||
get_orchestration,
|
||||
list_orchestrations,
|
||||
)
|
||||
_URGENCES_AVAILABLE = True
|
||||
except Exception as _e_urg:
|
||||
logger.warning("Module urgences_orchestrator indisponible : %s", _e_urg)
|
||||
_URGENCES_AVAILABLE = False
|
||||
|
||||
|
||||
@app.route('/api/urgences/parse', methods=['POST'])
|
||||
def urgences_parse():
|
||||
if not _URGENCES_AVAILABLE:
|
||||
return jsonify({"error": "module urgences_orchestrator indisponible"}), 503
|
||||
payload = request.get_json(silent=True) or {}
|
||||
text = (payload.get("text") or "").strip()
|
||||
if not text:
|
||||
return jsonify({"error": "champ 'text' manquant"}), 400
|
||||
intent = parse_lea_command(text)
|
||||
return jsonify(intent)
|
||||
|
||||
|
||||
@app.route('/api/urgences/start', methods=['POST'])
|
||||
def urgences_start():
|
||||
if not _URGENCES_AVAILABLE:
|
||||
return jsonify({"error": "module urgences_orchestrator indisponible"}), 503
|
||||
payload = request.get_json(silent=True) or {}
|
||||
text = (payload.get("text") or "").strip()
|
||||
session_id = payload.get("session_id") or ""
|
||||
machine_id = payload.get("machine_id") or None
|
||||
if not text:
|
||||
return jsonify({"error": "champ 'text' manquant"}), 400
|
||||
intent = parse_lea_command(text)
|
||||
if intent.get("action") != "process_patients":
|
||||
return jsonify({"intent": intent, "started": False,
|
||||
"reply": "Je n'ai pas compris la commande. Exemples : 'traite-moi 3 dossiers', 'code les 5 premiers'."})
|
||||
state = start_orchestration(intent, session_id=session_id, machine_id=machine_id)
|
||||
return jsonify({"intent": intent, "started": True, "orchestration": state.to_dict()})
|
||||
|
||||
|
||||
@app.route('/api/urgences/status/<orch_id>')
|
||||
def urgences_status(orch_id):
|
||||
if not _URGENCES_AVAILABLE:
|
||||
return jsonify({"error": "module urgences_orchestrator indisponible"}), 503
|
||||
state = get_orchestration(orch_id)
|
||||
if not state:
|
||||
return jsonify({"error": f"orchestration {orch_id} introuvable"}), 404
|
||||
return jsonify(state.to_dict())
|
||||
|
||||
|
||||
@app.route('/api/urgences/list')
|
||||
def urgences_list():
|
||||
if not _URGENCES_AVAILABLE:
|
||||
return jsonify({"error": "module urgences_orchestrator indisponible"}), 503
|
||||
return jsonify({"orchestrations": list_orchestrations()})
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# P1-LEA SHADOW — déclenchement d'apprentissage depuis l'extérieur
|
||||
# =============================================================================
|
||||
|
||||
@app.route('/api/learn/start', methods=['POST'])
|
||||
def api_learn_start():
|
||||
"""Déclenche une session d'apprentissage Léa-first.
|
||||
|
||||
Endpoint utilisé par le bouton Windows (ChatWindow tkinter) ou tout autre
|
||||
client externe pour démarrer le cycle Shadow → Persist côté agent-chat.
|
||||
|
||||
Payload JSON :
|
||||
- machine_id (str, obligatoire) : identifiant de la machine où
|
||||
l'apprentissage est en cours (sera repris pour le persist).
|
||||
- session_name (str | None, optionnel) : nom d'affichage de la
|
||||
session (ignoré pour l'instant — réservé futur).
|
||||
- user_id (str | None, optionnel) : défaut "default".
|
||||
- trigger_source (str, optionnel) : défaut "windows_button".
|
||||
Utilisé pour distinguer du "magic_phrase" ou "proactive".
|
||||
|
||||
Retours :
|
||||
- 200 : {"session_id": str, "state": str, "message": str}
|
||||
- 400 : machine_id absent ou vide
|
||||
- 503 : orchestrateur non initialisé (init_system pas appelé)
|
||||
- 500 : exception interne (shadow_start, état illégal, etc.)
|
||||
|
||||
Auth/CORS : suit le pattern des autres routes API du module (pas d'auth
|
||||
Flask explicite — l'API est en LAN derrière le reverse proxy /
|
||||
SocketIO cors_allowed_origins).
|
||||
"""
|
||||
if learn_action_orchestrator is None:
|
||||
return jsonify({
|
||||
"error": "LearnActionOrchestrator non initialisé",
|
||||
}), 503
|
||||
|
||||
data = request.get_json(silent=True) or {}
|
||||
machine_id = (data.get("machine_id") or "").strip()
|
||||
if not machine_id:
|
||||
return jsonify({
|
||||
"error": "machine_id requis (str non vide)",
|
||||
}), 400
|
||||
|
||||
user_id = (data.get("user_id") or "default").strip() or "default"
|
||||
trigger_source = (data.get("trigger_source") or "windows_button").strip() or "windows_button"
|
||||
# session_name reçu mais non utilisé pour l'instant (réservé futur)
|
||||
_session_name = data.get("session_name")
|
||||
|
||||
try:
|
||||
st, reply = learn_action_orchestrator.start_session(
|
||||
user_id=user_id,
|
||||
trigger_source=trigger_source,
|
||||
machine_id=machine_id,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.exception("api_learn_start failed")
|
||||
return jsonify({
|
||||
"error": f"démarrage apprentissage impossible: {exc}",
|
||||
}), 500
|
||||
|
||||
return jsonify({
|
||||
"session_id": st.session_id,
|
||||
"state": st.state.value if hasattr(st.state, "value") else str(st.state),
|
||||
"message": reply,
|
||||
})
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Main
|
||||
# =============================================================================
|
||||
|
||||
@@ -27,6 +27,8 @@ import requests
|
||||
# Ajouter le chemin du projet pour les imports core
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
||||
|
||||
from core.detection.vlm_config import get_reasoning_model
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Essayer d'importer les composants de détection visuelle
|
||||
@@ -49,7 +51,10 @@ try:
|
||||
from PIL import Image as PILImage
|
||||
import pyautogui
|
||||
PYAUTOGUI_AVAILABLE = True
|
||||
except ImportError:
|
||||
except Exception:
|
||||
# pyautogui peut lever Xlib.error.DisplayConnectionError (pas un ImportError)
|
||||
# quand X n'est pas accessible — typique d'un service systemd headless côté
|
||||
# serveur. Le serveur n'a pas besoin de pyautogui (utilisé côté client agent).
|
||||
PYAUTOGUI_AVAILABLE = False
|
||||
PILImage = None
|
||||
pyautogui = None
|
||||
@@ -110,11 +115,11 @@ class AutonomousPlanner:
|
||||
def __init__(
|
||||
self,
|
||||
llm_endpoint: str = "http://localhost:11434/api/generate",
|
||||
llm_model: str = "qwen2.5:7b",
|
||||
llm_model: Optional[str] = None,
|
||||
timeout: int = 60
|
||||
):
|
||||
self.llm_endpoint = llm_endpoint
|
||||
self.llm_model = llm_model
|
||||
self.llm_model = llm_model or get_reasoning_model()
|
||||
self.timeout = timeout
|
||||
self.llm_available = self._check_llm()
|
||||
|
||||
@@ -134,11 +139,31 @@ class AutonomousPlanner:
|
||||
logger.info(f"AutonomousPlanner initialized (LLM: {self.llm_model}, available: {self.llm_available}, visual: {self._owl_detector is not None}, vlm: {self._vlm_client is not None})")
|
||||
|
||||
def _init_visual_detection(self):
|
||||
"""Initialise le détecteur visuel OWL-v2."""
|
||||
"""Initialise le détecteur visuel OWL-v2.
|
||||
|
||||
Désactivé par défaut depuis 2026-05-25 (C1b) : OWL-v2 chargeait sur
|
||||
CUDA au boot et retenait ~600 MiB VRAM même en cas d'OOM silencieux,
|
||||
fausssant les benchs perf et contribuant à l'offload Ollama VLM.
|
||||
Comme `autonomous_planner` est largement non-wired au runtime actif
|
||||
(cf. mémoire projet : HTTP 410 dépréciés), le défaut est skip.
|
||||
|
||||
Activation : `AGENT_CHAT_ENABLE_OWL=1` (env var).
|
||||
Device : `AGENT_CHAT_OWL_DEVICE=cuda|cpu` (override l'auto-détect).
|
||||
"""
|
||||
if os.environ.get("AGENT_CHAT_ENABLE_OWL", "0").strip() not in ("1", "true", "yes"):
|
||||
logger.info(
|
||||
"OWL-v2 visual detector skipped at boot "
|
||||
"(AGENT_CHAT_ENABLE_OWL!=1, économie ~600 MiB VRAM)"
|
||||
)
|
||||
return
|
||||
if VISUAL_DETECTION_AVAILABLE and OwlDetector:
|
||||
try:
|
||||
self._owl_detector = OwlDetector(confidence_threshold=0.1)
|
||||
logger.info("OWL-v2 visual detector initialized")
|
||||
device = os.environ.get("AGENT_CHAT_OWL_DEVICE", "").strip() or None
|
||||
self._owl_detector = OwlDetector(
|
||||
confidence_threshold=0.1,
|
||||
device=device,
|
||||
)
|
||||
logger.info(f"OWL-v2 visual detector initialized (device={device or 'auto'})")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not initialize OWL detector: {e}")
|
||||
self._owl_detector = None
|
||||
@@ -147,8 +172,10 @@ class AutonomousPlanner:
|
||||
"""Initialise le client VLM pour analyse intelligente."""
|
||||
if VLM_AVAILABLE and OllamaClient:
|
||||
try:
|
||||
self._vlm_client = OllamaClient(model="qwen2.5vl:7b")
|
||||
logger.info("VLM client initialized (qwen2.5vl:7b)")
|
||||
from core.detection.vlm_config import get_vlm_model
|
||||
_planner_vlm = get_vlm_model()
|
||||
self._vlm_client = OllamaClient(model=_planner_vlm)
|
||||
logger.info("VLM client initialized (%s)", _planner_vlm)
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not initialize VLM client: {e}")
|
||||
self._vlm_client = None
|
||||
@@ -1003,12 +1030,12 @@ _planner_instance: Optional[AutonomousPlanner] = None
|
||||
|
||||
|
||||
def get_autonomous_planner(
|
||||
llm_model: str = "qwen2.5:7b"
|
||||
llm_model: Optional[str] = None
|
||||
) -> AutonomousPlanner:
|
||||
"""Retourne l'instance singleton du planner."""
|
||||
global _planner_instance
|
||||
|
||||
if _planner_instance is None:
|
||||
_planner_instance = AutonomousPlanner(llm_model=llm_model)
|
||||
_planner_instance = AutonomousPlanner(llm_model=llm_model or get_reasoning_model())
|
||||
|
||||
return _planner_instance
|
||||
|
||||
@@ -16,6 +16,7 @@ Auteur: Dom — Mars 2026
|
||||
|
||||
import logging
|
||||
import re
|
||||
import unicodedata
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from difflib import SequenceMatcher
|
||||
@@ -24,6 +25,11 @@ from typing import Dict, List, Optional, Tuple
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
SAVE_COMMAND_LABELS = {"enregistrer", "save", "sauvegarder"}
|
||||
SAVE_AS_LABELS = {"enregistrer sous", "save as", "sauvegarder sous"}
|
||||
FILE_MENU_LABELS = {"fichier", "file", "menu fichier", "file menu"}
|
||||
|
||||
|
||||
@dataclass
|
||||
class Gesture:
|
||||
"""Un geste primitif universel."""
|
||||
@@ -564,6 +570,7 @@ class GestureCatalog:
|
||||
Patterns :
|
||||
- Clic en haut à droite de la fenêtre (x > 95%, y < 5%) → fermer
|
||||
- target_text contenant ✕, ×, X, □, ─, etc.
|
||||
- Commande applicative "Enregistrer" sûre → Ctrl+S
|
||||
"""
|
||||
# Vérifier le target_text
|
||||
target_text = (
|
||||
@@ -583,6 +590,9 @@ class GestureCatalog:
|
||||
if target_lower in ("─", "—", "_", "minimize", "réduire"):
|
||||
return self._by_id.get("win_minimize")
|
||||
|
||||
if self._is_save_command_action(action):
|
||||
return self._by_id.get("edit_save")
|
||||
|
||||
# Vérifier la position relative (coin haut-droite = fermer)
|
||||
x_pct = action.get("x_pct", 0)
|
||||
y_pct = action.get("y_pct", 0)
|
||||
@@ -596,6 +606,128 @@ class GestureCatalog:
|
||||
|
||||
return None
|
||||
|
||||
def _normalize_ui_text(self, value: str) -> str:
|
||||
"""Normaliser un libellé UI pour comparer accents, casse et raccourcis."""
|
||||
text = str(value or "").strip().lower()
|
||||
text = unicodedata.normalize("NFKD", text)
|
||||
text = "".join(ch for ch in text if not unicodedata.combining(ch))
|
||||
text = text.replace("’", "'")
|
||||
text = re.sub(r"\s+", " ", text)
|
||||
text = re.sub(r"\s*\([^)]*ctrl\s*\+?\s*s[^)]*\)\s*$", "", text)
|
||||
text = re.sub(r"\s+ctrl\s*\+?\s*s\s*$", "", text)
|
||||
return text.strip()
|
||||
|
||||
def _action_text_candidates(self, action: Dict) -> List[str]:
|
||||
"""Retourner les libellés utiles d'une action et de son target_spec."""
|
||||
target_spec = action.get("target_spec") or {}
|
||||
candidates = [
|
||||
action.get("target_text", ""),
|
||||
action.get("target_description", ""),
|
||||
action.get("description", ""),
|
||||
target_spec.get("by_text", ""),
|
||||
target_spec.get("target_text", ""),
|
||||
target_spec.get("vlm_description", ""),
|
||||
]
|
||||
return [str(c) for c in candidates if c]
|
||||
|
||||
def _action_role_text(self, action: Dict) -> str:
|
||||
target_spec = action.get("target_spec") or {}
|
||||
uia = action.get("uia_snapshot") or {}
|
||||
role_parts = [
|
||||
action.get("role", ""),
|
||||
action.get("control_type", ""),
|
||||
target_spec.get("by_role", ""),
|
||||
target_spec.get("role", ""),
|
||||
target_spec.get("control_type", ""),
|
||||
uia.get("control_type", ""),
|
||||
uia.get("class_name", ""),
|
||||
]
|
||||
return " ".join(self._normalize_ui_text(part) for part in role_parts if part)
|
||||
|
||||
def _action_context_text(self, action: Dict) -> str:
|
||||
target_spec = action.get("target_spec") or {}
|
||||
hints = target_spec.get("context_hints") or {}
|
||||
context_parts = [
|
||||
action.get("window_title", ""),
|
||||
target_spec.get("window_title", ""),
|
||||
target_spec.get("vlm_description", ""),
|
||||
hints.get("window_title", ""),
|
||||
hints.get("interaction", ""),
|
||||
hints.get("source", ""),
|
||||
hints.get("menu_path", ""),
|
||||
]
|
||||
return " ".join(self._normalize_ui_text(part) for part in context_parts if part)
|
||||
|
||||
def _is_file_menu_action(self, action: Dict) -> bool:
|
||||
labels = {self._normalize_ui_text(text) for text in self._action_text_candidates(action)}
|
||||
return bool(labels & FILE_MENU_LABELS)
|
||||
|
||||
def _is_save_command_label(self, action: Dict) -> bool:
|
||||
for text in self._action_text_candidates(action):
|
||||
label = self._normalize_ui_text(text)
|
||||
if not label:
|
||||
continue
|
||||
if any(save_as in label for save_as in SAVE_AS_LABELS):
|
||||
return False
|
||||
if label in SAVE_COMMAND_LABELS:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _is_save_dialog_action(self, action: Dict) -> bool:
|
||||
context = self._action_context_text(action)
|
||||
if any(save_as in context for save_as in SAVE_AS_LABELS):
|
||||
return True
|
||||
dialog_markers = (
|
||||
"save dialog",
|
||||
"save_dialog",
|
||||
"dialog",
|
||||
"boite de dialogue",
|
||||
"fenetre enregistrer sous",
|
||||
"confirmer l'enregistrement",
|
||||
"save changes",
|
||||
)
|
||||
return any(marker in context for marker in dialog_markers)
|
||||
|
||||
def _is_save_command_action(self, action: Dict) -> bool:
|
||||
if not self._is_save_command_label(action):
|
||||
return False
|
||||
if self._is_save_dialog_action(action):
|
||||
return False
|
||||
|
||||
role = self._action_role_text(action)
|
||||
context = self._action_context_text(action)
|
||||
command_markers = (
|
||||
"menu",
|
||||
"menuitem",
|
||||
"item de menu",
|
||||
"toolbar",
|
||||
"barre d'outils",
|
||||
"tool bar",
|
||||
"ruban",
|
||||
"ribbon",
|
||||
"commande",
|
||||
"command",
|
||||
)
|
||||
return any(marker in role or marker in context for marker in command_markers)
|
||||
|
||||
def _substitute_action(
|
||||
self,
|
||||
action: Dict,
|
||||
gesture: Gesture,
|
||||
*,
|
||||
original_type: str,
|
||||
source_action_ids: Optional[List[str]] = None,
|
||||
reason: str = "",
|
||||
) -> Dict:
|
||||
new_action = gesture.to_replay_action()
|
||||
new_action["action_id"] = action.get("action_id", new_action["action_id"])
|
||||
new_action["original_type"] = original_type
|
||||
if source_action_ids:
|
||||
new_action["substitution_source_action_ids"] = source_action_ids
|
||||
if reason:
|
||||
new_action["substitution_reason"] = reason
|
||||
return new_action
|
||||
|
||||
def optimize_replay_actions(self, actions: List[Dict]) -> List[Dict]:
|
||||
"""
|
||||
Optimiser une liste d'actions de replay en substituant les gestes connus.
|
||||
@@ -610,13 +742,45 @@ class GestureCatalog:
|
||||
substitutions = 0
|
||||
|
||||
for action in actions:
|
||||
if (
|
||||
action.get("type") == "click"
|
||||
and optimized
|
||||
and optimized[-1].get("type") == "click"
|
||||
and self._is_file_menu_action(optimized[-1])
|
||||
and self._is_save_command_label(action)
|
||||
and not self._is_save_dialog_action(action)
|
||||
):
|
||||
gesture = self._by_id.get("edit_save")
|
||||
previous = optimized.pop()
|
||||
source_ids = [
|
||||
source_id for source_id in (
|
||||
previous.get("action_id"),
|
||||
action.get("action_id"),
|
||||
)
|
||||
if source_id
|
||||
]
|
||||
optimized.append(
|
||||
self._substitute_action(
|
||||
action,
|
||||
gesture,
|
||||
original_type="click_sequence",
|
||||
source_action_ids=source_ids,
|
||||
reason="file_menu_save_to_ctrl_s",
|
||||
)
|
||||
)
|
||||
substitutions += 1
|
||||
logger.debug("Séquence Fichier > Enregistrer substituée par Ctrl+S")
|
||||
continue
|
||||
|
||||
gesture = self.match_action(action)
|
||||
if gesture and action.get("type") != "key_combo":
|
||||
# Substituer par le raccourci clavier
|
||||
new_action = gesture.to_replay_action()
|
||||
# Conserver l'action_id original pour le tracking
|
||||
new_action["action_id"] = action.get("action_id", new_action["action_id"])
|
||||
new_action["original_type"] = action.get("type")
|
||||
new_action = self._substitute_action(
|
||||
action,
|
||||
gesture,
|
||||
original_type=action.get("type", ""),
|
||||
reason=f"{gesture.id}_gesture_substitution",
|
||||
)
|
||||
optimized.append(new_action)
|
||||
substitutions += 1
|
||||
logger.debug(
|
||||
|
||||
29
agent_chat/handlers/__init__.py
Normal file
29
agent_chat/handlers/__init__.py
Normal file
@@ -0,0 +1,29 @@
|
||||
"""Agent-chat handlers package.
|
||||
|
||||
Contient les orchestrateurs spécialisés (apprentissage Léa, etc.) appelés
|
||||
par `agent_chat.app` quand le routage normal d'intent ne suffit pas.
|
||||
"""
|
||||
|
||||
from .learn_action import (
|
||||
LearnActionOrchestrator,
|
||||
LearnState,
|
||||
LearnIntent,
|
||||
LearnIntentParser,
|
||||
OptionCFormatter,
|
||||
StreamingClient,
|
||||
StateStore,
|
||||
PersistPayloadBuilder,
|
||||
get_learn_action_orchestrator,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"LearnActionOrchestrator",
|
||||
"LearnState",
|
||||
"LearnIntent",
|
||||
"LearnIntentParser",
|
||||
"OptionCFormatter",
|
||||
"StreamingClient",
|
||||
"StateStore",
|
||||
"PersistPayloadBuilder",
|
||||
"get_learn_action_orchestrator",
|
||||
]
|
||||
1192
agent_chat/handlers/learn_action.py
Normal file
1192
agent_chat/handlers/learn_action.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -19,6 +19,8 @@ from enum import Enum
|
||||
from typing import Dict, Any, List, Optional, Tuple
|
||||
from pathlib import Path
|
||||
|
||||
from core.detection.vlm_config import get_reasoning_model
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -280,7 +282,7 @@ class IntentParser:
|
||||
self,
|
||||
use_llm: bool = False,
|
||||
llm_endpoint: str = "http://localhost:11434",
|
||||
llm_model: str = "qwen2.5:7b"
|
||||
llm_model: Optional[str] = None
|
||||
):
|
||||
"""
|
||||
Initialiser le parseur d'intentions.
|
||||
@@ -292,7 +294,7 @@ class IntentParser:
|
||||
"""
|
||||
self.use_llm = use_llm
|
||||
self.llm_endpoint = llm_endpoint
|
||||
self.llm_model = llm_model
|
||||
self.llm_model = llm_model or get_reasoning_model()
|
||||
self.llm_available = False
|
||||
self._workflows_cache: List[Dict[str, Any]] = []
|
||||
|
||||
@@ -687,7 +689,7 @@ _intent_parser: Optional[IntentParser] = None
|
||||
|
||||
def get_intent_parser(
|
||||
use_llm: bool = False,
|
||||
llm_model: str = "qwen2.5:7b",
|
||||
llm_model: Optional[str] = None,
|
||||
llm_endpoint: str = "http://localhost:11434"
|
||||
) -> IntentParser:
|
||||
"""
|
||||
@@ -695,20 +697,21 @@ def get_intent_parser(
|
||||
|
||||
Args:
|
||||
use_llm: Activer le LLM (Ollama)
|
||||
llm_model: Modèle à utiliser (qwen2.5:7b par défaut)
|
||||
llm_model: Modèle à utiliser (défaut: modèle reasoning central)
|
||||
llm_endpoint: URL de l'endpoint Ollama
|
||||
"""
|
||||
global _intent_parser
|
||||
resolved_model = llm_model or get_reasoning_model()
|
||||
if _intent_parser is None:
|
||||
_intent_parser = IntentParser(
|
||||
use_llm=use_llm,
|
||||
llm_endpoint=llm_endpoint,
|
||||
llm_model=llm_model
|
||||
llm_model=resolved_model
|
||||
)
|
||||
elif use_llm and not _intent_parser.use_llm:
|
||||
# Réactiver le LLM si demandé
|
||||
_intent_parser.use_llm = True
|
||||
_intent_parser.llm_model = llm_model
|
||||
_intent_parser.llm_model = resolved_model
|
||||
_intent_parser._check_llm_availability()
|
||||
return _intent_parser
|
||||
|
||||
|
||||
518
agent_chat/urgences_orchestrator.py
Normal file
518
agent_chat/urgences_orchestrator.py
Normal file
@@ -0,0 +1,518 @@
|
||||
"""Orchestrateur démo GHT Sud 95 — pilotage du scénario "traite N dossiers".
|
||||
|
||||
Reçoit une commande naturelle de Léa (chat) et orchestre :
|
||||
1. Parsing intent via gemma3:1b (mini-LLM local, ~400 ms)
|
||||
2. Setup Chrome (Win+R → URL maquette → Enter) via /replay/raw
|
||||
3. extract_table sur la liste des patients (regex IPP, limit=N)
|
||||
4. Boucle : pour chaque IPP, lance le workflow "Urgence_unit" via /replay
|
||||
avec `variables={"patient_id": ipp}` pour la résolution `{{patient_id}}`
|
||||
5. Synthèse finale postée dans le chat
|
||||
|
||||
L'orchestration tourne dans un thread daemon. L'état est stocké en mémoire,
|
||||
poll-able via /api/urgences/status/<orch_id>.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import threading
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Chargement explicite de .env.local du repo (le service systemd peut ne pas
|
||||
# voir cet env file). Cherche dans le parent de agent_chat/.
|
||||
def _load_env_local() -> None:
|
||||
env_path = Path(__file__).resolve().parent.parent / ".env.local"
|
||||
if not env_path.is_file():
|
||||
return
|
||||
try:
|
||||
for line in env_path.read_text().splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
k, v = line.split("=", 1)
|
||||
k = k.strip()
|
||||
v = v.strip().strip('"').strip("'")
|
||||
os.environ.setdefault(k, v)
|
||||
except Exception as e:
|
||||
logger.warning("Erreur chargement .env.local: %s", e)
|
||||
|
||||
|
||||
_load_env_local()
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
# Config
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
STREAM_BASE = os.environ.get("RPA_STREAM_BASE", "http://localhost:5005")
|
||||
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434/api/generate")
|
||||
NLP_MODEL = os.environ.get("LEA_NLP_MODEL", "gemma3:1b")
|
||||
RPA_API_TOKEN = os.environ.get("RPA_API_TOKEN", "")
|
||||
|
||||
URGENCE_WORKFLOW_ID = os.environ.get("LEA_URGENCE_WORKFLOW_ID", "wf_urgence_unit")
|
||||
# URL LAN locale (sans Basic Auth ni HTTPS) pour éviter le prompt Windows Hello
|
||||
# de Chrome (lecteur d'empreintes digitales) qui bloque le replay automatique.
|
||||
# L'URL publique HTTPS reste disponible (https://urgence.labs.laurinebazin.design)
|
||||
# pour usage humain, mais n'est PAS utilisée par Léa pendant la démo.
|
||||
MAQUETTE_URL = os.environ.get("LEA_MAQUETTE_URL", "http://192.168.1.40:8765/index.html")
|
||||
|
||||
|
||||
|
||||
# Session de replay stable de l'agent V1. L'agent polle /replay/next sur
|
||||
# `agent_<user_id>` indépendamment des sessions d'enregistrement (sess_*).
|
||||
# user_id default côté agent V1 = "demo_user" (cf. agent_v1/main.py:62).
|
||||
AGENT_SESSION_ID = os.environ.get("LEA_AGENT_SESSION_ID", "agent_demo_user")
|
||||
|
||||
# machine_id de l'agent V1 cible. DOIT matcher self.machine_id côté agent V1
|
||||
# (sinon /replay/next ne distribue pas la queue à cette machine — le serveur
|
||||
# isole les machines pour éviter le vol cross-machine d'actions).
|
||||
# Valeur par défaut = hostname du PC Windows de démo GHT.
|
||||
AGENT_MACHINE_ID = os.environ.get("LEA_AGENT_MACHINE_ID", "DESKTOP-58D5CAC_windows")
|
||||
|
||||
# Pattern IPP : 8 chiffres, premier groupe "25" (cohort 2025), reste libre
|
||||
IPP_PATTERN = r"^25\d{6}$"
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
# NLP : parsing de commande naturelle via gemma3:1b
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
NLP_PROMPT = """Tu es un parseur d'intentions pour Léa, assistant RPA médical.
|
||||
Réponds UNIQUEMENT en JSON valide, sans texte avant/après, selon ce schéma :
|
||||
{"action": "process_patients" | "stop" | "unknown", "count": <int|null>, "order": "first" | "last" | "all" | "specific" | null, "ipp": "<string>" | null}
|
||||
|
||||
Règles :
|
||||
- "traite N dossiers" / "code N dossiers" / "fais les N premiers" → action=process_patients, count=N, order="first"
|
||||
- "traite tous les dossiers" → action=process_patients, count=null, order="all"
|
||||
- "traite le dossier 25003364" → action=process_patients, count=1, order="specific", ipp="25003364"
|
||||
- "stop" / "arrête" / "annule" → action=stop
|
||||
- Question ("comment", "pourquoi") → action=unknown
|
||||
- Si tu ne comprends pas → action=unknown"""
|
||||
|
||||
|
||||
def parse_lea_command(text: str, model: str = NLP_MODEL, timeout: int = 8) -> Dict[str, Any]:
|
||||
"""Parse une commande naturelle en intent structuré via gemma3:1b.
|
||||
|
||||
Fallback regex si Ollama est indisponible — pour ne pas bloquer la démo.
|
||||
Returns : dict {action, count, order, ipp} ou {action: "unknown"}.
|
||||
"""
|
||||
payload = {
|
||||
"model": model,
|
||||
"prompt": NLP_PROMPT + "\n\nUtilisateur : " + text + "\n\nJSON :",
|
||||
"stream": False,
|
||||
"format": "json",
|
||||
"options": {"temperature": 0.0, "num_predict": 120, "num_ctx": 1024},
|
||||
}
|
||||
data = json.dumps(payload).encode("utf-8")
|
||||
req = urllib.request.Request(OLLAMA_URL, data=data, headers={"Content-Type": "application/json"})
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
body = json.loads(resp.read().decode("utf-8"))
|
||||
raw = (body.get("response") or "").strip()
|
||||
if raw.startswith("```"):
|
||||
raw = raw.split("\n", 1)[-1].rsplit("```", 1)[0].strip()
|
||||
intent = json.loads(raw)
|
||||
except (urllib.error.URLError, TimeoutError, json.JSONDecodeError) as e:
|
||||
logger.warning("parse_lea_command: gemma3:1b indisponible (%s), fallback regex", e)
|
||||
return _parse_fallback_regex(text)
|
||||
|
||||
# Post-processing : gemma3:1b a tendance à remplir tous les champs même
|
||||
# quand non pertinent. On nettoie :
|
||||
# - ipp ne doit être conservé que si présent LITTÉRALEMENT dans le texte source
|
||||
# (sinon le LLM hallucine un IPP plausible)
|
||||
if intent.get("ipp") and str(intent["ipp"]) not in text:
|
||||
intent["ipp"] = None
|
||||
# Si le LLM a forcé order=specific sans vrai IPP, on bascule en first
|
||||
if intent.get("order") == "specific":
|
||||
intent["order"] = "first"
|
||||
# - ipp ne doit être conservé que si order="specific" ET format IPP valide
|
||||
if intent.get("ipp") and intent.get("order") != "specific":
|
||||
intent["ipp"] = None
|
||||
if intent.get("ipp") and not re.match(r"^\d{8,10}$", str(intent["ipp"])):
|
||||
intent["ipp"] = None
|
||||
# - si count est défini ET order="all", l'humain demande "N dossiers" et
|
||||
# non "tous les dossiers" : on bascule en "first" (cohérence sémantique)
|
||||
if intent.get("count") and intent.get("order") == "all":
|
||||
intent["order"] = "first"
|
||||
return intent
|
||||
|
||||
|
||||
def _parse_fallback_regex(text: str) -> Dict[str, Any]:
|
||||
"""Fallback regex robuste si LLM HS — couvre les phrasings classiques."""
|
||||
t = text.lower()
|
||||
if any(w in t for w in ("stop", "arrête", "annule", "annuler")):
|
||||
return {"action": "stop", "count": None, "order": None, "ipp": None}
|
||||
# IPP spécifique : "traite le dossier 25003364"
|
||||
m = re.search(r"\b(25\d{6})\b", text)
|
||||
if m and any(w in t for w in ("traite", "code", "analyse")):
|
||||
return {"action": "process_patients", "count": 1, "order": "specific", "ipp": m.group(1)}
|
||||
if any(w in t for w in ("tous", "toutes")) and any(w in t for w in ("traite", "code")):
|
||||
return {"action": "process_patients", "count": None, "order": "all", "ipp": None}
|
||||
# Quantifié : "traite 3 dossiers"
|
||||
m = re.search(r"(\d+)\s*(?:premiers?\s*)?(?:dossiers?|cas|patients?)", t)
|
||||
if m and any(w in t for w in ("traite", "code", "fais", "analyse")):
|
||||
return {"action": "process_patients", "count": int(m.group(1)), "order": "first", "ipp": None}
|
||||
return {"action": "unknown", "count": None, "order": None, "ipp": None}
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
# Helpers HTTP vers le streaming server (port 5005)
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
def _stream_headers() -> Dict[str, str]:
|
||||
h = {"Content-Type": "application/json"}
|
||||
if RPA_API_TOKEN:
|
||||
h["Authorization"] = f"Bearer {RPA_API_TOKEN}"
|
||||
return h
|
||||
|
||||
|
||||
def _post(path: str, body: dict, timeout: int = 30) -> dict:
|
||||
req = urllib.request.Request(
|
||||
STREAM_BASE + path,
|
||||
data=json.dumps(body).encode("utf-8"),
|
||||
headers=_stream_headers(),
|
||||
method="POST",
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
return json.loads(resp.read().decode("utf-8"))
|
||||
|
||||
|
||||
def _get(path: str, timeout: int = 10) -> dict:
|
||||
req = urllib.request.Request(
|
||||
STREAM_BASE + path,
|
||||
headers=_stream_headers(),
|
||||
method="GET",
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
return json.loads(resp.read().decode("utf-8"))
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
# Orchestration : état + thread d'exécution
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
@dataclass
|
||||
class DossierResult:
|
||||
ipp: str
|
||||
decision: Optional[str] = None # "REQUALIFICATION_HOSPITALISATION" | "FORFAIT_URGENCE"
|
||||
decision_court: Optional[str] = None # "UHCD" | "Forfait Urgences"
|
||||
confiance: Optional[str] = None
|
||||
duree_passage_heures: Optional[float] = None
|
||||
concordance: Optional[bool] = None
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class OrchestrationState:
|
||||
orch_id: str
|
||||
status: str = "starting" # starting | running | done | error | cancelled
|
||||
progress: int = 0 # 0 → count
|
||||
count: int = 0
|
||||
current_step: str = "" # "setup_chrome" | "extract_table" | "process_dossier_X" | "synthese"
|
||||
intent: Dict[str, Any] = field(default_factory=dict)
|
||||
patients: List[str] = field(default_factory=list)
|
||||
results: List[DossierResult] = field(default_factory=list)
|
||||
synthese: Optional[str] = None
|
||||
error: Optional[str] = None
|
||||
started_at: float = field(default_factory=time.time)
|
||||
finished_at: Optional[float] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"orch_id": self.orch_id,
|
||||
"status": self.status,
|
||||
"progress": self.progress,
|
||||
"count": self.count,
|
||||
"current_step": self.current_step,
|
||||
"intent": self.intent,
|
||||
"patients": self.patients,
|
||||
"results": [r.__dict__ for r in self.results],
|
||||
"synthese": self.synthese,
|
||||
"error": self.error,
|
||||
"elapsed_s": round((self.finished_at or time.time()) - self.started_at, 1),
|
||||
}
|
||||
|
||||
|
||||
# Registry global des orchestrations en cours (thread-safe via lock)
|
||||
_ORCH_REGISTRY: Dict[str, OrchestrationState] = {}
|
||||
_ORCH_LOCK = threading.Lock()
|
||||
|
||||
|
||||
def get_orchestration(orch_id: str) -> Optional[OrchestrationState]:
|
||||
with _ORCH_LOCK:
|
||||
return _ORCH_REGISTRY.get(orch_id)
|
||||
|
||||
|
||||
def list_orchestrations() -> List[Dict[str, Any]]:
|
||||
with _ORCH_LOCK:
|
||||
return [s.to_dict() for s in _ORCH_REGISTRY.values()]
|
||||
|
||||
|
||||
def start_orchestration(
|
||||
intent: Dict[str, Any],
|
||||
session_id: str = "",
|
||||
machine_id: Optional[str] = None,
|
||||
) -> OrchestrationState:
|
||||
"""Lance une orchestration en thread daemon. Retourne l'état initial.
|
||||
|
||||
Args:
|
||||
intent: dict {action, count, order, ipp} (sortie de parse_lea_command)
|
||||
session_id: session de replay (default: agent_demo_user, le canal stable
|
||||
sur lequel l'agent V1 polle /replay/next)
|
||||
machine_id: machine cible (optionnel, pour multi-machines futurs)
|
||||
"""
|
||||
if not session_id:
|
||||
session_id = AGENT_SESSION_ID
|
||||
if not machine_id:
|
||||
machine_id = AGENT_MACHINE_ID
|
||||
orch_id = "orch_" + uuid.uuid4().hex[:10]
|
||||
count = intent.get("count") or 3 # default 3 si "tous" ou "first" sans nombre
|
||||
state = OrchestrationState(
|
||||
orch_id=orch_id,
|
||||
status="starting",
|
||||
count=count,
|
||||
intent=intent,
|
||||
)
|
||||
with _ORCH_LOCK:
|
||||
_ORCH_REGISTRY[orch_id] = state
|
||||
|
||||
th = threading.Thread(
|
||||
target=_run_orchestration,
|
||||
args=(state, session_id, machine_id),
|
||||
daemon=True,
|
||||
name=f"orch-{orch_id}",
|
||||
)
|
||||
th.start()
|
||||
return state
|
||||
|
||||
|
||||
def _run_orchestration(state: OrchestrationState, session_id: str, machine_id: Optional[str]) -> None:
|
||||
"""Boucle d'orchestration exécutée dans un thread.
|
||||
|
||||
Phases :
|
||||
1. Setup Chrome (raw actions Win+R)
|
||||
2. extract_table sur liste patients
|
||||
3. Boucle workflow Urgence_unit
|
||||
4. Synthèse
|
||||
"""
|
||||
try:
|
||||
state.status = "running"
|
||||
intent = state.intent
|
||||
|
||||
# Cas "specific" : court-circuiter, juste 1 IPP
|
||||
if intent.get("order") == "specific" and intent.get("ipp"):
|
||||
state.patients = [intent["ipp"]]
|
||||
state.count = 1
|
||||
state.current_step = "process_dossier"
|
||||
_process_dossiers(state, session_id, machine_id)
|
||||
else:
|
||||
# 1. Setup Chrome → URL maquette
|
||||
state.current_step = "setup_chrome"
|
||||
_setup_chrome(session_id, machine_id)
|
||||
|
||||
# 2. Lire la liste des IPP via extract_table
|
||||
state.current_step = "extract_table"
|
||||
patients = _extract_patient_list(session_id, machine_id, limit=state.count)
|
||||
state.patients = patients
|
||||
if not patients:
|
||||
raise RuntimeError("extract_table n'a trouvé aucun IPP — vérifier que Chrome est sur index.html")
|
||||
|
||||
# 3. Pour chaque IPP : lancer workflow Urgence_unit
|
||||
_process_dossiers(state, session_id, machine_id)
|
||||
|
||||
# 4. Synthèse
|
||||
state.current_step = "synthese"
|
||||
state.synthese = _build_synthese(state)
|
||||
state.status = "done"
|
||||
except Exception as e:
|
||||
logger.exception("Orchestration %s : erreur fatale", state.orch_id)
|
||||
state.status = "error"
|
||||
state.error = str(e)
|
||||
finally:
|
||||
state.finished_at = time.time()
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
# Phases de l'orchestration
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
def _setup_chrome(session_id: str, machine_id: Optional[str]) -> None:
|
||||
"""Composer "ouvrir Chrome sur l'URL maquette" via le catalogue de réflexes.
|
||||
|
||||
Léa ne fait PAS un workflow appris pour cette étape : c'est une composition
|
||||
de primitives natives (réflexes du catalogue) + une saisie texte.
|
||||
|
||||
Séquence :
|
||||
1. réflexe `sys_run` (Win+R) ← gesture_catalog
|
||||
2. type "chrome.exe <URL>" ← saisie atomique
|
||||
3. réflexe `nav_enter` (Entrée) ← gesture_catalog
|
||||
"""
|
||||
from agent_chat.gesture_catalog import get_gesture_catalog
|
||||
|
||||
catalog = get_gesture_catalog()
|
||||
show_desktop = catalog.get_by_id("win_minimize_all") # Win+D — minimise tout (Léa incl.)
|
||||
sys_run = catalog.get_by_id("sys_run")
|
||||
nav_enter = catalog.get_by_id("nav_enter")
|
||||
if sys_run is None or nav_enter is None or show_desktop is None:
|
||||
raise RuntimeError("Réflexes catalogue manquants : win_minimize_all / sys_run / nav_enter")
|
||||
|
||||
actions = [
|
||||
show_desktop.to_replay_action(), # réflexe Win+D — Léa se réduit complètement
|
||||
{
|
||||
"action_id": f"setup_wait_desktop_{uuid.uuid4().hex[:6]}",
|
||||
"type": "wait",
|
||||
"duration_ms": 400,
|
||||
"intention": "Attendre que le bureau soit affiché",
|
||||
},
|
||||
sys_run.to_replay_action(), # réflexe Win+R
|
||||
{
|
||||
"action_id": f"setup_wait_{uuid.uuid4().hex[:6]}",
|
||||
"type": "wait",
|
||||
"duration_ms": 800,
|
||||
"intention": "Attendre que la boîte Exécuter soit prête",
|
||||
},
|
||||
{
|
||||
"action_id": f"setup_typeurl_{uuid.uuid4().hex[:6]}",
|
||||
"type": "type",
|
||||
"text": f"chrome.exe {MAQUETTE_URL}",
|
||||
"intention": "Taper la commande Chrome + URL maquette",
|
||||
},
|
||||
nav_enter.to_replay_action(), # réflexe Entrée
|
||||
{
|
||||
"action_id": f"setup_wait_load_{uuid.uuid4().hex[:6]}",
|
||||
"type": "wait",
|
||||
"duration_ms": 3500,
|
||||
"intention": "Attendre le chargement de la maquette",
|
||||
},
|
||||
]
|
||||
payload = {
|
||||
"actions": actions,
|
||||
"session_id": session_id,
|
||||
"task_description": "Setup démo GHT — composition réflexes (sys_run + type + nav_enter)",
|
||||
}
|
||||
if machine_id:
|
||||
payload["machine_id"] = machine_id
|
||||
resp = _post("/api/v1/traces/stream/replay/raw", payload, timeout=20)
|
||||
replay_id = resp.get("replay_id")
|
||||
if not replay_id:
|
||||
raise RuntimeError(f"setup_chrome : pas de replay_id ({resp})")
|
||||
# Setup Chrome ≈ 13s observé (Win+D + Win+R + type URL + Enter + wait 3500ms),
|
||||
# mais le PC peut être chargé → 60s donne de la marge.
|
||||
_wait_replay_done(replay_id, timeout_s=60)
|
||||
|
||||
|
||||
def _extract_patient_list(session_id: str, machine_id: Optional[str], limit: int) -> List[str]:
|
||||
"""Lance une action extract_table seule pour lire la liste des IPP."""
|
||||
actions = [
|
||||
{
|
||||
"action_id": f"extract_table_{uuid.uuid4().hex[:6]}",
|
||||
"type": "extract_table",
|
||||
"parameters": {
|
||||
"output_var": "patients_list",
|
||||
"pattern": IPP_PATTERN,
|
||||
"limit": limit,
|
||||
},
|
||||
"intention": "Lire la liste des IPP visible à l'écran",
|
||||
},
|
||||
]
|
||||
payload = {
|
||||
"actions": actions,
|
||||
"session_id": session_id,
|
||||
"task_description": "Extraction liste patients GHT",
|
||||
}
|
||||
if machine_id:
|
||||
payload["machine_id"] = machine_id
|
||||
resp = _post("/api/v1/traces/stream/replay/raw", payload, timeout=15)
|
||||
replay_id = resp.get("replay_id")
|
||||
if not replay_id:
|
||||
raise RuntimeError(f"extract_table : pas de replay_id ({resp})")
|
||||
final = _wait_replay_done(replay_id, timeout_s=20)
|
||||
return list(final.get("variables", {}).get("patients_list") or [])
|
||||
|
||||
|
||||
def _process_dossiers(state: OrchestrationState, session_id: str, machine_id: Optional[str]) -> None:
|
||||
"""Boucle : pour chaque IPP, lance le workflow Urgence_unit."""
|
||||
for i, ipp in enumerate(state.patients):
|
||||
state.current_step = f"process_dossier_{i+1}_of_{len(state.patients)}"
|
||||
result = DossierResult(ipp=ipp)
|
||||
try:
|
||||
payload = {
|
||||
"workflow_id": URGENCE_WORKFLOW_ID,
|
||||
"session_id": session_id,
|
||||
"variables": {"patient_id": ipp},
|
||||
}
|
||||
if machine_id:
|
||||
payload["machine_id"] = machine_id
|
||||
resp = _post("/api/v1/traces/stream/replay", payload, timeout=20)
|
||||
replay_id = resp.get("replay_id")
|
||||
if not replay_id:
|
||||
raise RuntimeError(f"replay_id manquant ({resp})")
|
||||
final = _wait_replay_done(replay_id, timeout_s=180)
|
||||
t2a = final.get("variables", {}).get("t2a_result") or {}
|
||||
result.decision = t2a.get("decision")
|
||||
result.decision_court = t2a.get("decision_court")
|
||||
result.confiance = t2a.get("confiance")
|
||||
result.duree_passage_heures = t2a.get("duree_passage_heures")
|
||||
result.concordance = t2a.get("concordance")
|
||||
except Exception as e:
|
||||
result.error = str(e)
|
||||
logger.warning("Dossier %s : erreur %s", ipp, e)
|
||||
state.results.append(result)
|
||||
state.progress = i + 1
|
||||
|
||||
|
||||
def _wait_replay_done(replay_id: str, timeout_s: int = 60, poll_s: float = 1.0) -> Dict[str, Any]:
|
||||
"""Poll /replay/<id> jusqu'à status terminal."""
|
||||
deadline = time.time() + timeout_s
|
||||
last = {}
|
||||
while time.time() < deadline:
|
||||
try:
|
||||
last = _get(f"/api/v1/traces/stream/replay/{replay_id}", timeout=5)
|
||||
except Exception as e:
|
||||
logger.warning("poll replay %s : %s", replay_id, e)
|
||||
status = last.get("status", "")
|
||||
if status in ("done", "completed", "finished", "error", "cancelled", "paused_need_help"):
|
||||
return last
|
||||
time.sleep(poll_s)
|
||||
raise TimeoutError(f"replay {replay_id} non terminé après {timeout_s}s (status={last.get('status')})")
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
# Synthèse finale
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
def _build_synthese(state: OrchestrationState) -> str:
|
||||
"""Construit le message de synthèse posté dans le chat à la fin."""
|
||||
n = len(state.results)
|
||||
if n == 0:
|
||||
return "Aucun dossier traité."
|
||||
n_uhcd = sum(1 for r in state.results if r.decision == "REQUALIFICATION_HOSPITALISATION")
|
||||
n_forfait = sum(1 for r in state.results if r.decision == "FORFAIT_URGENCE")
|
||||
n_concord = sum(1 for r in state.results if r.concordance is True)
|
||||
lines = [f"✅ Terminé. {n} dossier(s) traité(s) : {n_forfait} forfait(s) urgences, {n_uhcd} UHCD."]
|
||||
if any(r.concordance is not None for r in state.results):
|
||||
lines.append(f"Concordance vérité-terrain : {n_concord}/{n}.")
|
||||
lines.append("")
|
||||
for r in state.results:
|
||||
if r.error:
|
||||
lines.append(f" • {r.ipp} : ❌ erreur — {r.error}")
|
||||
continue
|
||||
decision_label = r.decision_court or r.decision or "—"
|
||||
conf = f"confiance {r.confiance}" if r.confiance else ""
|
||||
duree = f"{r.duree_passage_heures:.1f}h" if r.duree_passage_heures else ""
|
||||
concord_mark = ""
|
||||
if r.concordance is True:
|
||||
concord_mark = " ✓"
|
||||
elif r.concordance is False:
|
||||
concord_mark = " ⚠ écart vérité-terrain"
|
||||
details = ", ".join(x for x in (conf, duree) if x)
|
||||
lines.append(f" • {r.ipp} : {decision_label}{concord_mark}" + (f" ({details})" if details else ""))
|
||||
return "\n".join(lines)
|
||||
3
agent_rust/lea_uia/.gitignore
vendored
Normal file
3
agent_rust/lea_uia/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
target/
|
||||
**/target/
|
||||
|
||||
384
agent_rust/lea_uia/Cargo.lock
generated
Normal file
384
agent_rust/lea_uia/Cargo.lock
generated
Normal file
@@ -0,0 +1,384 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "anstream"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"anstyle-parse",
|
||||
"anstyle-query",
|
||||
"anstyle-wincon",
|
||||
"colorchoice",
|
||||
"is_terminal_polyfill",
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle"
|
||||
version = "1.0.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000"
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-parse"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e"
|
||||
dependencies = [
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-query"
|
||||
version = "1.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
|
||||
dependencies = [
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-wincon"
|
||||
version = "3.0.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"once_cell_polyfill",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "4.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351"
|
||||
dependencies = [
|
||||
"clap_builder",
|
||||
"clap_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_builder"
|
||||
version = "4.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anstyle",
|
||||
"clap_lex",
|
||||
"strsim",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_derive"
|
||||
version = "4.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1110bd8a634a1ab8cb04345d8d878267d57c3cf1b38d91b71af6686408bbca6a"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_lex"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9"
|
||||
|
||||
[[package]]
|
||||
name = "colorchoice"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570"
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||
|
||||
[[package]]
|
||||
name = "is_terminal_polyfill"
|
||||
version = "1.70.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
|
||||
|
||||
[[package]]
|
||||
name = "lea_uia"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"windows",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
|
||||
|
||||
[[package]]
|
||||
name = "once_cell_polyfill"
|
||||
version = "1.70.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.106"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.45"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.228"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
|
||||
dependencies = [
|
||||
"serde_core",
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_core"
|
||||
version = "1.0.228"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.228"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.149"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"memchr",
|
||||
"serde",
|
||||
"serde_core",
|
||||
"zmij",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.117"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
||||
|
||||
[[package]]
|
||||
name = "utf8parse"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
||||
|
||||
[[package]]
|
||||
name = "windows"
|
||||
version = "0.59.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f919aee0a93304be7f62e8e5027811bbba96bcb1de84d6618be56e43f8a32a1"
|
||||
dependencies = [
|
||||
"windows-core",
|
||||
"windows-targets",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-core"
|
||||
version = "0.59.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "810ce18ed2112484b0d4e15d022e5f598113e220c53e373fb31e67e21670c1ce"
|
||||
dependencies = [
|
||||
"windows-implement",
|
||||
"windows-interface",
|
||||
"windows-result",
|
||||
"windows-strings",
|
||||
"windows-targets",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-implement"
|
||||
version = "0.59.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "83577b051e2f49a058c308f17f273b570a6a758386fc291b5f6a934dd84e48c1"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-interface"
|
||||
version = "0.59.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-link"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
|
||||
|
||||
[[package]]
|
||||
name = "windows-link"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
||||
|
||||
[[package]]
|
||||
name = "windows-result"
|
||||
version = "0.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6"
|
||||
dependencies = [
|
||||
"windows-link 0.1.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-strings"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "87fa48cc5d406560701792be122a10132491cff9d0aeb23583cc2dcafc847319"
|
||||
dependencies = [
|
||||
"windows-link 0.1.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.61.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
|
||||
dependencies = [
|
||||
"windows-link 0.2.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.53.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
|
||||
dependencies = [
|
||||
"windows-link 0.2.1",
|
||||
"windows_aarch64_gnullvm",
|
||||
"windows_aarch64_msvc",
|
||||
"windows_i686_gnu",
|
||||
"windows_i686_gnullvm",
|
||||
"windows_i686_msvc",
|
||||
"windows_x86_64_gnu",
|
||||
"windows_x86_64_gnullvm",
|
||||
"windows_x86_64_msvc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnullvm"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
|
||||
|
||||
[[package]]
|
||||
name = "zmij"
|
||||
version = "1.0.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
|
||||
34
agent_rust/lea_uia/Cargo.toml
Normal file
34
agent_rust/lea_uia/Cargo.toml
Normal file
@@ -0,0 +1,34 @@
|
||||
[package]
|
||||
name = "lea_uia"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
authors = ["Dom <dom@rpa-vision-v3>"]
|
||||
description = "Helper Windows UI Automation pour Léa (agent RPA V3)"
|
||||
license = "Proprietary"
|
||||
|
||||
[[bin]]
|
||||
name = "lea_uia"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
clap = { version = "4.5", features = ["derive"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
windows = { version = "0.59", features = [
|
||||
"Win32_Foundation",
|
||||
"Win32_System_Com",
|
||||
"Win32_System_Ole",
|
||||
"Win32_System_Variant",
|
||||
"Win32_UI_Accessibility",
|
||||
"Win32_UI_WindowsAndMessaging",
|
||||
"Win32_Graphics_Gdi",
|
||||
] }
|
||||
|
||||
[profile.release]
|
||||
opt-level = "z" # Taille minimale
|
||||
lto = true # Link-time optimization
|
||||
codegen-units = 1 # Meilleure optimisation
|
||||
strip = true # Retirer les symboles
|
||||
panic = "abort" # Pas d'unwinding → binaire plus petit
|
||||
564
agent_rust/lea_uia/src/main.rs
Normal file
564
agent_rust/lea_uia/src/main.rs
Normal file
@@ -0,0 +1,564 @@
|
||||
// lea_uia — Helper Windows UI Automation pour Léa
|
||||
//
|
||||
// Binaire standalone qui expose 3 commandes UIA :
|
||||
// query → retourne l'élément UIA à une position (x, y)
|
||||
// find → retrouve un élément par son chemin logique
|
||||
// capture → liste les éléments visibles (debug)
|
||||
//
|
||||
// Communication avec l'agent Python via stdin/stdout JSON.
|
||||
// Tous les appels sont non-bloquants et retournent du JSON structuré.
|
||||
//
|
||||
// Sur Linux (développement) : retourne des stubs d'erreur.
|
||||
// Sur Windows : utilise UIAutomationCore via `windows-rs`.
|
||||
|
||||
use clap::{Parser, Subcommand};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(name = "lea_uia")]
|
||||
#[command(about = "Helper UI Automation pour Léa", long_about = None)]
|
||||
#[command(version)]
|
||||
struct Cli {
|
||||
#[command(subcommand)]
|
||||
command: Commands,
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
enum Commands {
|
||||
/// Retourner l'élément UIA à une position donnée (x, y en pixels écran)
|
||||
Query {
|
||||
/// Coordonnée X (pixels)
|
||||
#[arg(long)]
|
||||
x: i32,
|
||||
/// Coordonnée Y (pixels)
|
||||
#[arg(long)]
|
||||
y: i32,
|
||||
/// Inclure la hiérarchie des parents (peut être lent)
|
||||
#[arg(long, default_value_t = true)]
|
||||
with_parents: bool,
|
||||
},
|
||||
/// Rechercher un élément par son chemin logique ou son nom
|
||||
Find {
|
||||
/// Nom de l'élément (Name property)
|
||||
#[arg(long)]
|
||||
name: Option<String>,
|
||||
/// Type de contrôle (Button, Edit, MenuItem, etc.)
|
||||
#[arg(long)]
|
||||
control_type: Option<String>,
|
||||
/// AutomationId
|
||||
#[arg(long)]
|
||||
automation_id: Option<String>,
|
||||
/// Limite la recherche à cette fenêtre (titre exact)
|
||||
#[arg(long)]
|
||||
window: Option<String>,
|
||||
/// Timeout en millisecondes
|
||||
#[arg(long, default_value_t = 2000)]
|
||||
timeout_ms: u32,
|
||||
},
|
||||
/// Lister tous les éléments visibles de la fenêtre active (debug)
|
||||
Capture {
|
||||
/// Profondeur maximale de l'arbre
|
||||
#[arg(long, default_value_t = 3)]
|
||||
max_depth: u32,
|
||||
},
|
||||
/// Vérifier que UIA est disponible et fonctionnel
|
||||
Health,
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Modèles de sortie JSON
|
||||
// =========================================================================
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
struct UiaElement {
|
||||
/// Nom visible de l'élément
|
||||
name: String,
|
||||
/// Type de contrôle (Button, Edit, MenuItem, Window, ...)
|
||||
control_type: String,
|
||||
/// Classe Windows (Edit, Static, #32770, ...)
|
||||
class_name: String,
|
||||
/// AutomationId (ID interne, parfois vide)
|
||||
automation_id: String,
|
||||
/// Rectangle absolu [x1, y1, x2, y2] en pixels écran
|
||||
bounding_rect: [i32; 4],
|
||||
/// Est-ce que l'élément est activable
|
||||
is_enabled: bool,
|
||||
/// Est-ce que l'élément est visible
|
||||
is_offscreen: bool,
|
||||
/// Hiérarchie des parents (chemin logique)
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
parent_path: Vec<ParentHint>,
|
||||
/// Process owning this element
|
||||
#[serde(skip_serializing_if = "String::is_empty")]
|
||||
process_name: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
struct ParentHint {
|
||||
name: String,
|
||||
control_type: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(tag = "status")]
|
||||
enum UiaResponse {
|
||||
#[serde(rename = "ok")]
|
||||
Ok {
|
||||
element: Option<UiaElement>,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
elements: Vec<UiaElement>,
|
||||
elapsed_ms: u64,
|
||||
},
|
||||
#[serde(rename = "not_found")]
|
||||
NotFound {
|
||||
reason: String,
|
||||
elapsed_ms: u64,
|
||||
},
|
||||
#[serde(rename = "error")]
|
||||
Error {
|
||||
message: String,
|
||||
code: String,
|
||||
},
|
||||
#[serde(rename = "unavailable")]
|
||||
Unavailable {
|
||||
reason: String,
|
||||
},
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Implémentation Windows
|
||||
// =========================================================================
|
||||
|
||||
#[cfg(windows)]
|
||||
mod uia_impl {
|
||||
use super::*;
|
||||
use std::time::Instant;
|
||||
use windows::Win32::Foundation::POINT;
|
||||
use windows::Win32::System::Com::{
|
||||
CoCreateInstance, CoInitializeEx, CoUninitialize, CLSCTX_INPROC_SERVER,
|
||||
COINIT_APARTMENTTHREADED,
|
||||
};
|
||||
use windows::Win32::UI::Accessibility::{
|
||||
CUIAutomation, IUIAutomation, IUIAutomationElement, IUIAutomationTreeWalker,
|
||||
};
|
||||
|
||||
struct ComGuard;
|
||||
impl ComGuard {
|
||||
fn new() -> windows::core::Result<Self> {
|
||||
unsafe {
|
||||
let hr = CoInitializeEx(None, COINIT_APARTMENTTHREADED);
|
||||
if hr.is_err() {
|
||||
// RPC_E_CHANGED_MODE : le thread est déjà initialisé → OK
|
||||
let code = hr.0 as u32;
|
||||
if code != 0x80010106 {
|
||||
return Err(windows::core::Error::from(hr));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(Self)
|
||||
}
|
||||
}
|
||||
impl Drop for ComGuard {
|
||||
fn drop(&mut self) {
|
||||
unsafe { CoUninitialize() };
|
||||
}
|
||||
}
|
||||
|
||||
fn get_automation() -> windows::core::Result<IUIAutomation> {
|
||||
unsafe { CoCreateInstance(&CUIAutomation, None, CLSCTX_INPROC_SERVER) }
|
||||
}
|
||||
|
||||
fn element_to_struct(
|
||||
element: &IUIAutomationElement,
|
||||
with_parents: bool,
|
||||
) -> windows::core::Result<UiaElement> {
|
||||
let mut result = UiaElement {
|
||||
name: String::new(),
|
||||
control_type: String::new(),
|
||||
class_name: String::new(),
|
||||
automation_id: String::new(),
|
||||
bounding_rect: [0, 0, 0, 0],
|
||||
is_enabled: false,
|
||||
is_offscreen: true,
|
||||
parent_path: Vec::new(),
|
||||
process_name: String::new(),
|
||||
};
|
||||
|
||||
unsafe {
|
||||
if let Ok(name) = element.CurrentName() {
|
||||
result.name = name.to_string();
|
||||
}
|
||||
if let Ok(ct) = element.CurrentLocalizedControlType() {
|
||||
result.control_type = ct.to_string();
|
||||
}
|
||||
if let Ok(cn) = element.CurrentClassName() {
|
||||
result.class_name = cn.to_string();
|
||||
}
|
||||
if let Ok(aid) = element.CurrentAutomationId() {
|
||||
result.automation_id = aid.to_string();
|
||||
}
|
||||
if let Ok(rect) = element.CurrentBoundingRectangle() {
|
||||
result.bounding_rect = [rect.left, rect.top, rect.right, rect.bottom];
|
||||
}
|
||||
if let Ok(enabled) = element.CurrentIsEnabled() {
|
||||
result.is_enabled = enabled.as_bool();
|
||||
}
|
||||
if let Ok(offscreen) = element.CurrentIsOffscreen() {
|
||||
result.is_offscreen = offscreen.as_bool();
|
||||
}
|
||||
if with_parents {
|
||||
// Remonter la hiérarchie jusqu'à la Window root
|
||||
if let Ok(automation) = get_automation() {
|
||||
let walker = automation.ControlViewWalker();
|
||||
if let Ok(walker) = walker {
|
||||
let mut current = element.clone();
|
||||
for _ in 0..10 {
|
||||
match walker.GetParentElement(¤t) {
|
||||
Ok(parent) => {
|
||||
let name = parent
|
||||
.CurrentName()
|
||||
.map(|n| n.to_string())
|
||||
.unwrap_or_default();
|
||||
let ct = parent
|
||||
.CurrentLocalizedControlType()
|
||||
.map(|c| c.to_string())
|
||||
.unwrap_or_default();
|
||||
if name.is_empty() && ct.is_empty() {
|
||||
break;
|
||||
}
|
||||
result.parent_path.insert(
|
||||
0,
|
||||
ParentHint {
|
||||
name,
|
||||
control_type: ct,
|
||||
},
|
||||
);
|
||||
current = parent;
|
||||
}
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn query_at_point(x: i32, y: i32, with_parents: bool) -> UiaResponse {
|
||||
let start = Instant::now();
|
||||
let _com = match ComGuard::new() {
|
||||
Ok(g) => g,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("CoInitializeEx: {}", e),
|
||||
code: "com_init_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let automation = match get_automation() {
|
||||
Ok(a) => a,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("CUIAutomation: {}", e),
|
||||
code: "automation_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let point = POINT { x, y };
|
||||
let element = unsafe { automation.ElementFromPoint(point) };
|
||||
match element {
|
||||
Ok(el) => match element_to_struct(&el, with_parents) {
|
||||
Ok(e) => UiaResponse::Ok {
|
||||
element: Some(e),
|
||||
elements: Vec::new(),
|
||||
elapsed_ms: start.elapsed().as_millis() as u64,
|
||||
},
|
||||
Err(e) => UiaResponse::Error {
|
||||
message: format!("element_to_struct: {}", e),
|
||||
code: "extract_failed".into(),
|
||||
},
|
||||
},
|
||||
Err(_) => UiaResponse::NotFound {
|
||||
reason: format!("Aucun élément UIA à ({}, {})", x, y),
|
||||
elapsed_ms: start.elapsed().as_millis() as u64,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn find_element(
|
||||
name: Option<String>,
|
||||
_control_type: Option<String>,
|
||||
_automation_id: Option<String>,
|
||||
_window: Option<String>,
|
||||
_timeout_ms: u32,
|
||||
) -> UiaResponse {
|
||||
let start = Instant::now();
|
||||
let _com = match ComGuard::new() {
|
||||
Ok(g) => g,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("CoInitializeEx: {}", e),
|
||||
code: "com_init_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let automation = match get_automation() {
|
||||
Ok(a) => a,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("CUIAutomation: {}", e),
|
||||
code: "automation_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let root = match unsafe { automation.GetRootElement() } {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("GetRootElement: {}", e),
|
||||
code: "root_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Recherche simple par parcours d'arbre (MVP)
|
||||
// L'arbre UIA peut être énorme → on limite la profondeur
|
||||
if let Some(target_name) = name {
|
||||
let walker = unsafe { automation.ControlViewWalker() };
|
||||
if let Ok(walker) = walker {
|
||||
if let Some(found) =
|
||||
walk_and_find(&walker, &root, &target_name, 0, 6, &_control_type, &_automation_id)
|
||||
{
|
||||
match element_to_struct(&found, true) {
|
||||
Ok(e) => {
|
||||
return UiaResponse::Ok {
|
||||
element: Some(e),
|
||||
elements: Vec::new(),
|
||||
elapsed_ms: start.elapsed().as_millis() as u64,
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("element_to_struct: {}", e),
|
||||
code: "extract_failed".into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UiaResponse::NotFound {
|
||||
reason: "Aucun élément trouvé".into(),
|
||||
elapsed_ms: start.elapsed().as_millis() as u64,
|
||||
}
|
||||
}
|
||||
|
||||
/// Parcours récursif de l'arbre UIA pour trouver un élément par nom
|
||||
fn walk_and_find(
|
||||
walker: &IUIAutomationTreeWalker,
|
||||
element: &IUIAutomationElement,
|
||||
target_name: &str,
|
||||
depth: u32,
|
||||
max_depth: u32,
|
||||
target_control_type: &Option<String>,
|
||||
target_automation_id: &Option<String>,
|
||||
) -> Option<IUIAutomationElement> {
|
||||
if depth > max_depth {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Tester l'élément courant
|
||||
unsafe {
|
||||
if let Ok(name) = element.CurrentName() {
|
||||
if name.to_string() == target_name {
|
||||
// Vérifier les filtres additionnels
|
||||
let mut matches = true;
|
||||
if let Some(ct) = target_control_type {
|
||||
if let Ok(local_ct) = element.CurrentLocalizedControlType() {
|
||||
if !local_ct.to_string().to_lowercase().contains(&ct.to_lowercase()) {
|
||||
matches = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
if matches {
|
||||
if let Some(aid) = target_automation_id {
|
||||
if let Ok(local_aid) = element.CurrentAutomationId() {
|
||||
if local_aid.to_string() != *aid {
|
||||
matches = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if matches {
|
||||
return Some(element.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Parcourir les enfants
|
||||
if let Ok(first_child) = walker.GetFirstChildElement(element) {
|
||||
let mut current = first_child;
|
||||
loop {
|
||||
if let Some(found) = walk_and_find(
|
||||
walker,
|
||||
¤t,
|
||||
target_name,
|
||||
depth + 1,
|
||||
max_depth,
|
||||
target_control_type,
|
||||
target_automation_id,
|
||||
) {
|
||||
return Some(found);
|
||||
}
|
||||
match walker.GetNextSiblingElement(¤t) {
|
||||
Ok(next) => current = next,
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub fn capture_tree(_max_depth: u32) -> UiaResponse {
|
||||
let start = Instant::now();
|
||||
let _com = match ComGuard::new() {
|
||||
Ok(g) => g,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("CoInitializeEx: {}", e),
|
||||
code: "com_init_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let automation = match get_automation() {
|
||||
Ok(a) => a,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("CUIAutomation: {}", e),
|
||||
code: "automation_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let focused = unsafe { automation.GetFocusedElement() };
|
||||
match focused {
|
||||
Ok(el) => match element_to_struct(&el, true) {
|
||||
Ok(e) => UiaResponse::Ok {
|
||||
element: Some(e),
|
||||
elements: Vec::new(),
|
||||
elapsed_ms: start.elapsed().as_millis() as u64,
|
||||
},
|
||||
Err(e) => UiaResponse::Error {
|
||||
message: format!("element_to_struct: {}", e),
|
||||
code: "extract_failed".into(),
|
||||
},
|
||||
},
|
||||
Err(e) => UiaResponse::Error {
|
||||
message: format!("GetFocusedElement: {}", e),
|
||||
code: "focused_failed".into(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn health_check() -> UiaResponse {
|
||||
let _com = match ComGuard::new() {
|
||||
Ok(g) => g,
|
||||
Err(e) => {
|
||||
return UiaResponse::Unavailable {
|
||||
reason: format!("COM init failed: {}", e),
|
||||
}
|
||||
}
|
||||
};
|
||||
match get_automation() {
|
||||
Ok(_) => UiaResponse::Ok {
|
||||
element: None,
|
||||
elements: Vec::new(),
|
||||
elapsed_ms: 0,
|
||||
},
|
||||
Err(e) => UiaResponse::Unavailable {
|
||||
reason: format!("UIA not available: {}", e),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Stub Linux (pour développement et tests)
|
||||
// =========================================================================
|
||||
|
||||
#[cfg(not(windows))]
|
||||
mod uia_impl {
|
||||
use super::*;
|
||||
|
||||
pub fn query_at_point(_x: i32, _y: i32, _with_parents: bool) -> UiaResponse {
|
||||
UiaResponse::Unavailable {
|
||||
reason: "UIA n'est disponible que sur Windows".into(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn find_element(
|
||||
_name: Option<String>,
|
||||
_control_type: Option<String>,
|
||||
_automation_id: Option<String>,
|
||||
_window: Option<String>,
|
||||
_timeout_ms: u32,
|
||||
) -> UiaResponse {
|
||||
UiaResponse::Unavailable {
|
||||
reason: "UIA n'est disponible que sur Windows".into(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn capture_tree(_max_depth: u32) -> UiaResponse {
|
||||
UiaResponse::Unavailable {
|
||||
reason: "UIA n'est disponible que sur Windows".into(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn health_check() -> UiaResponse {
|
||||
UiaResponse::Unavailable {
|
||||
reason: "UIA n'est disponible que sur Windows".into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Main
|
||||
// =========================================================================
|
||||
|
||||
fn main() {
|
||||
let cli = Cli::parse();
|
||||
|
||||
let response = match cli.command {
|
||||
Commands::Query {
|
||||
x,
|
||||
y,
|
||||
with_parents,
|
||||
} => uia_impl::query_at_point(x, y, with_parents),
|
||||
Commands::Find {
|
||||
name,
|
||||
control_type,
|
||||
automation_id,
|
||||
window,
|
||||
timeout_ms,
|
||||
} => uia_impl::find_element(name, control_type, automation_id, window, timeout_ms),
|
||||
Commands::Capture { max_depth } => uia_impl::capture_tree(max_depth),
|
||||
Commands::Health => uia_impl::health_check(),
|
||||
};
|
||||
|
||||
// Sortie JSON sur stdout
|
||||
match serde_json::to_string(&response) {
|
||||
Ok(json) => println!("{}", json),
|
||||
Err(e) => {
|
||||
eprintln!("{{\"status\":\"error\",\"message\":\"JSON serialization: {}\"}}", e);
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -27,7 +27,7 @@ if platform.system() == "Windows":
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
AGENT_VERSION = "1.0.0"
|
||||
AGENT_VERSION = "1.0.1"
|
||||
|
||||
# Identifiant unique de la machine (utilisé pour le multi-machine)
|
||||
# Configurable via variable d'environnement, sinon auto-généré depuis hostname + OS
|
||||
@@ -40,20 +40,36 @@ MACHINE_ID = os.environ.get(
|
||||
BASE_DIR = Path(__file__).resolve().parent
|
||||
|
||||
# Endpoint du serveur Streaming (port 5005)
|
||||
# SERVER_URL contient TOUJOURS /api/v1 à la fin (convention unifiée).
|
||||
SERVER_URL = os.getenv("RPA_SERVER_URL", "http://localhost:5005/api/v1")
|
||||
# Base sans /api/v1 — pour les routes à la racine (/health)
|
||||
SERVER_BASE = SERVER_URL.rsplit("/api/v1", 1)[0]
|
||||
UPLOAD_ENDPOINT = f"{SERVER_URL}/traces/upload"
|
||||
STREAMING_ENDPOINT = f"{SERVER_URL}/traces/stream"
|
||||
|
||||
# Host Ollama — SÉPARÉ du serveur RPA.
|
||||
# Ollama tourne en local sur la machine serveur, jamais exposé via le reverse proxy.
|
||||
# Défaut : localhost (exécution locale ou accès LAN direct).
|
||||
OLLAMA_HOST = os.getenv("RPA_OLLAMA_HOST", "localhost")
|
||||
|
||||
# Token d'authentification API (doit correspondre au token du serveur)
|
||||
# Configurable via variable d'environnement RPA_API_TOKEN
|
||||
API_TOKEN = os.environ.get("RPA_API_TOKEN", "")
|
||||
|
||||
# --- Orchestrateur Léa-first (agent-chat Linux) ---
|
||||
# Endpoint racine du service agent-chat qui héberge POST /api/learn/start
|
||||
# (P1-LEA-SHADOW). Configurable via RPA_AGENT_CHAT_URL.
|
||||
# Défaut : localhost:5004 (même machine en dev). En POC clinique, doit
|
||||
# pointer vers le DGX Spark (ex. http://agent-chat.dgx-local:5004).
|
||||
AGENT_CHAT_URL = os.environ.get("RPA_AGENT_CHAT_URL", "http://localhost:5004")
|
||||
|
||||
# Paramètres de session
|
||||
MAX_SESSION_DURATION_S = 60 * 60 # 1 heure
|
||||
SESSIONS_ROOT = BASE_DIR / "sessions"
|
||||
|
||||
# Paramètres Vision (Crops pour qwen3-vl)
|
||||
TARGETED_CROP_SIZE = (150, 150)
|
||||
# Paramètres Vision (Crops pour la résolution visuelle)
|
||||
# 80x80 : assez petit pour être discriminant (icônes), assez grand pour le contexte
|
||||
TARGETED_CROP_SIZE = (80, 80)
|
||||
SCREENSHOT_QUALITY = 85
|
||||
|
||||
# Floutage des données sensibles (conformité AI Act)
|
||||
|
||||
82
agent_v0/agent_v1/core/anchor_catalog.py
Normal file
82
agent_v0/agent_v1/core/anchor_catalog.py
Normal file
@@ -0,0 +1,82 @@
|
||||
"""Catalog d'ancres visuelles — Phase 1 standalone.
|
||||
|
||||
Ce module fournit un catalog Python (pas YAML) listant les trios
|
||||
(window_title, anchor_label, target_label) connus pour lesquels la
|
||||
résolution par triangulation visuelle est applicable.
|
||||
|
||||
Phase 1 : non branché au runtime, prouvé sur fixtures par
|
||||
`tests/unit/test_anchor_relative.py`.
|
||||
|
||||
Edition simple : ajouter une entrée à `ANCHOR_ENTRIES`.
|
||||
Validation : `find_entry_for_title(title)` retourne la première entrée
|
||||
dont un `title_patterns` matche (case-insensitive, substring).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
# Catalog des entrées d'ancres visuelles connues.
|
||||
#
|
||||
# Format d'une entrée :
|
||||
# id (str) : identifiant stable pour audit
|
||||
# title_patterns (tuple) : sous-chaines case-insensitive du titre fenêtre
|
||||
# anchor_label (list) : labels d'ancres a essayer dans l'ordre (FR puis EN)
|
||||
# target_label (str) : libelle cible (ex. "Enregistrer")
|
||||
# geometry_hint (dict) :
|
||||
# region (str) : indicatif ("bottom-right", "bottom-center", ...)
|
||||
# min_x_norm/min_y_norm/max_x_norm/max_y_norm (float) : zone valide
|
||||
# (normalisée 0..1 sur la fenêtre/écran)
|
||||
# offset_from_anchor (dict) : {"x_px": int, "y_px": int} delta ancre→cible
|
||||
ANCHOR_ENTRIES: List[Dict[str, Any]] = [
|
||||
{
|
||||
"id": "notepad_save_as_enregistrer",
|
||||
"title_patterns": ("enregistrer sous", "save as"),
|
||||
"anchor_label": ["Annuler", "Cancel"],
|
||||
"target_label": "Enregistrer",
|
||||
"geometry_hint": {
|
||||
"region": "bottom-right",
|
||||
"min_x_norm": 0.55,
|
||||
"min_y_norm": 0.75,
|
||||
"max_x_norm": 1.0,
|
||||
"max_y_norm": 1.0,
|
||||
"offset_from_anchor": {"x_px": -100, "y_px": 0},
|
||||
},
|
||||
},
|
||||
{
|
||||
"id": "notepad_unsaved_changes_enregistrer",
|
||||
"title_patterns": ("bloc-notes", "notepad"),
|
||||
"anchor_label": ["Ne pas enregistrer", "Don't Save"],
|
||||
"target_label": "Enregistrer",
|
||||
"geometry_hint": {
|
||||
"region": "bottom-center",
|
||||
"min_x_norm": 0.30,
|
||||
"min_y_norm": 0.50,
|
||||
"max_x_norm": 0.85,
|
||||
"max_y_norm": 1.0,
|
||||
"offset_from_anchor": {"x_px": -120, "y_px": 0},
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def find_entry_for_title(title: str) -> Optional[Dict[str, Any]]:
|
||||
"""Retourne la première entrée dont un title_pattern matche (substring CI).
|
||||
|
||||
Args:
|
||||
title: titre de fenêtre courant (ex. "Enregistrer sous").
|
||||
|
||||
Returns:
|
||||
L'entrée catalog matchante, ou None si aucun match.
|
||||
Aucun raise — l'absence de match est un cas normal.
|
||||
"""
|
||||
if not title:
|
||||
return None
|
||||
title_lower = title.lower()
|
||||
for entry in ANCHOR_ENTRIES:
|
||||
patterns = entry.get("title_patterns") or ()
|
||||
for pat in patterns:
|
||||
if pat and pat.lower() in title_lower:
|
||||
return entry
|
||||
return None
|
||||
292
agent_v0/agent_v1/core/anchor_relative.py
Normal file
292
agent_v0/agent_v1/core/anchor_relative.py
Normal file
@@ -0,0 +1,292 @@
|
||||
"""Localisation par triangulation depuis une ancre visuelle.
|
||||
|
||||
Module standalone Phase 1 — non branché au runtime.
|
||||
|
||||
Principe : étant donnée une ancre texte fiable (ex. "Annuler"),
|
||||
localiser une cible voisine ("Enregistrer") par offset géométrique.
|
||||
Validation optionnelle par cross-check du label cible.
|
||||
|
||||
Détecteur injectable (`detector=`) pour faciliter les tests offline ;
|
||||
au runtime (Phase 2), on injectera `ActionExecutorV1._find_text_on_screen`.
|
||||
|
||||
Pas de dépendance nouvelle. Pas de VLM, pas d'UIA, pas de persistance.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Callable, Dict, Optional, Tuple
|
||||
|
||||
# Type alias : un détecteur prend (screenshot_b64, label) et retourne
|
||||
# (x_px, y_px) ou None.
|
||||
DetectorFn = Callable[[str, str], Optional[Tuple[int, int]]]
|
||||
|
||||
|
||||
@dataclass
|
||||
class AnchorMatch:
|
||||
"""Résultat d'une recherche par ancre relative.
|
||||
|
||||
Tous les champs sont remplis même si `found=False` (zéros pour les
|
||||
coordonnées, reason explicite, evidence pour audit).
|
||||
"""
|
||||
|
||||
found: bool
|
||||
target_x_pct: float
|
||||
target_y_pct: float
|
||||
anchor_x_pct: float
|
||||
anchor_y_pct: float
|
||||
confidence: float
|
||||
reason: str
|
||||
evidence: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
def _default_detector(screenshot_b64: str, label: str) -> Optional[Tuple[int, int]]:
|
||||
"""Détecteur OCR par défaut : rendu TTF + cv2.matchTemplate.
|
||||
|
||||
Reprend la logique de `ActionExecutorV1._find_text_on_screen`
|
||||
(executor.py:3277) sans dépendre de l'instance ActionExecutorV1
|
||||
(qui amène mss/pynput inutiles ici).
|
||||
"""
|
||||
try:
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
import cv2
|
||||
import numpy as np
|
||||
except ImportError:
|
||||
return None
|
||||
|
||||
if not label or not screenshot_b64:
|
||||
return None
|
||||
|
||||
try:
|
||||
img_bytes = base64.b64decode(screenshot_b64)
|
||||
img_array = np.frombuffer(img_bytes, dtype=np.uint8)
|
||||
screenshot_bgr = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
|
||||
if screenshot_bgr is None:
|
||||
return None
|
||||
gray = cv2.cvtColor(screenshot_bgr, cv2.COLOR_BGR2GRAY)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
font_paths = [
|
||||
"C:/Windows/Fonts/arial.ttf",
|
||||
"C:/Windows/Fonts/segoeui.ttf",
|
||||
"C:/Windows/Fonts/tahoma.ttf",
|
||||
"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
|
||||
"/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf",
|
||||
]
|
||||
|
||||
def _get_font(size: int):
|
||||
for fp in font_paths:
|
||||
try:
|
||||
return ImageFont.truetype(fp, size)
|
||||
except (OSError, IOError):
|
||||
continue
|
||||
return ImageFont.load_default()
|
||||
|
||||
best_match: Optional[Tuple[int, int]] = None
|
||||
best_val = 0.0
|
||||
threshold = 0.75
|
||||
|
||||
for font_size in (14, 16, 18, 20, 22, 24, 12, 26, 28, 10):
|
||||
font = _get_font(font_size)
|
||||
tmp = Image.new("L", (1, 1), 255)
|
||||
tmp_draw = ImageDraw.Draw(tmp)
|
||||
bbox = tmp_draw.textbbox((0, 0), label, font=font)
|
||||
text_w = bbox[2] - bbox[0] + 6
|
||||
text_h = bbox[3] - bbox[1] + 6
|
||||
if text_w <= 0 or text_h <= 0:
|
||||
continue
|
||||
if text_w >= gray.shape[1] or text_h >= gray.shape[0]:
|
||||
continue
|
||||
text_img = Image.new("L", (text_w, text_h), 255)
|
||||
draw = ImageDraw.Draw(text_img)
|
||||
draw.text((3, 3), label, fill=0, font=font)
|
||||
template = np.array(text_img)
|
||||
result = cv2.matchTemplate(gray, template, cv2.TM_CCOEFF_NORMED)
|
||||
_, max_val, _, max_loc = cv2.minMaxLoc(result)
|
||||
if max_val > best_val:
|
||||
best_val = max_val
|
||||
best_match = (
|
||||
max_loc[0] + template.shape[1] // 2,
|
||||
max_loc[1] + template.shape[0] // 2,
|
||||
)
|
||||
if max_val > 0.75:
|
||||
break
|
||||
|
||||
if best_match and best_val >= threshold:
|
||||
return best_match
|
||||
return None
|
||||
|
||||
|
||||
def _try_detect(
|
||||
detector: DetectorFn,
|
||||
screenshot_b64: str,
|
||||
labels: Any,
|
||||
) -> Tuple[Optional[Tuple[int, int]], str]:
|
||||
"""Essaye chaque label de la liste (ou string unique) jusqu'à un hit.
|
||||
|
||||
Retourne (position_px, label_qui_a_matche) ou (None, "").
|
||||
"""
|
||||
if isinstance(labels, str):
|
||||
labels_list = [labels]
|
||||
else:
|
||||
labels_list = list(labels or [])
|
||||
for label in labels_list:
|
||||
pos = detector(screenshot_b64, label)
|
||||
if pos:
|
||||
return pos, label
|
||||
return None, ""
|
||||
|
||||
|
||||
def _is_in_zone(
|
||||
x_norm: float,
|
||||
y_norm: float,
|
||||
geometry_hint: Dict[str, Any],
|
||||
) -> bool:
|
||||
"""Vérifie que (x_norm, y_norm) tombe dans la zone du geometry_hint."""
|
||||
min_x = float(geometry_hint.get("min_x_norm", 0.0))
|
||||
max_x = float(geometry_hint.get("max_x_norm", 1.0))
|
||||
min_y = float(geometry_hint.get("min_y_norm", 0.0))
|
||||
max_y = float(geometry_hint.get("max_y_norm", 1.0))
|
||||
return (min_x <= x_norm <= max_x) and (min_y <= y_norm <= max_y)
|
||||
|
||||
|
||||
def find_target_via_anchor(
|
||||
anchor_label: Any,
|
||||
target_label: str,
|
||||
geometry_hint: Dict[str, Any],
|
||||
screenshot_b64: str,
|
||||
screen_width: int,
|
||||
screen_height: int,
|
||||
detector: Optional[DetectorFn] = None,
|
||||
cross_check_target: bool = True,
|
||||
) -> AnchorMatch:
|
||||
"""Localise `target_label` par triangulation depuis `anchor_label`.
|
||||
|
||||
Args:
|
||||
anchor_label: label (str) ou liste de labels essayés dans l'ordre
|
||||
(ex. ["Annuler", "Cancel"] pour fallback FR→EN).
|
||||
target_label: libellé cible (ex. "Enregistrer"). Utilisé pour le
|
||||
cross-check uniquement.
|
||||
geometry_hint: dict décrivant la zone valide pour l'ancre et
|
||||
l'offset ancre→cible. Voir `anchor_catalog.ANCHOR_ENTRIES`
|
||||
pour le format exact.
|
||||
screenshot_b64: capture encodée base64 (JPEG/PNG).
|
||||
screen_width: largeur de référence en pixels (écran ou fenêtre).
|
||||
screen_height: hauteur de référence en pixels.
|
||||
detector: callable (b64, label) → (x_px, y_px) | None. Si None,
|
||||
utilise un détecteur OCR par défaut (rendu TTF + cv2).
|
||||
Pour les tests, injecter un mock.
|
||||
cross_check_target: si True (défaut), tente de détecter aussi
|
||||
`target_label` près de la position candidate et ajuste la
|
||||
confidence en conséquence.
|
||||
|
||||
Returns:
|
||||
AnchorMatch toujours retourné (jamais None). `found=False` si
|
||||
l'ancre n'est pas trouvée ou hors zone ; `reason` explique.
|
||||
"""
|
||||
det = detector or _default_detector
|
||||
ev: Dict[str, Any] = {
|
||||
"anchor_candidates_tried": (
|
||||
list(anchor_label) if not isinstance(anchor_label, str) else [anchor_label]
|
||||
),
|
||||
"target_label": target_label,
|
||||
"geometry_hint": geometry_hint,
|
||||
}
|
||||
|
||||
# 1. Détection ancre (FR puis EN)
|
||||
anchor_px, matched_anchor_label = _try_detect(det, screenshot_b64, anchor_label)
|
||||
if not anchor_px:
|
||||
return AnchorMatch(
|
||||
found=False,
|
||||
target_x_pct=0.0,
|
||||
target_y_pct=0.0,
|
||||
anchor_x_pct=0.0,
|
||||
anchor_y_pct=0.0,
|
||||
confidence=0.0,
|
||||
reason="anchor_not_found",
|
||||
evidence=ev,
|
||||
)
|
||||
|
||||
ax, ay = anchor_px
|
||||
anchor_x_pct = ax / float(screen_width) if screen_width else 0.0
|
||||
anchor_y_pct = ay / float(screen_height) if screen_height else 0.0
|
||||
ev["anchor_matched_label"] = matched_anchor_label
|
||||
ev["anchor_px"] = [ax, ay]
|
||||
ev["anchor_norm"] = [anchor_x_pct, anchor_y_pct]
|
||||
|
||||
# 2. Garde géométrique : ancre dans la zone autorisée
|
||||
if not _is_in_zone(anchor_x_pct, anchor_y_pct, geometry_hint):
|
||||
return AnchorMatch(
|
||||
found=False,
|
||||
target_x_pct=0.0,
|
||||
target_y_pct=0.0,
|
||||
anchor_x_pct=anchor_x_pct,
|
||||
anchor_y_pct=anchor_y_pct,
|
||||
confidence=0.0,
|
||||
reason="anchor_out_of_zone",
|
||||
evidence=ev,
|
||||
)
|
||||
|
||||
# 3. Déduction position cible par offset
|
||||
offset = geometry_hint.get("offset_from_anchor", {}) or {}
|
||||
dx = int(offset.get("x_px", 0))
|
||||
dy = int(offset.get("y_px", 0))
|
||||
target_x_px = ax + dx
|
||||
target_y_px = ay + dy
|
||||
target_x_pct = target_x_px / float(screen_width) if screen_width else 0.0
|
||||
target_y_pct = target_y_px / float(screen_height) if screen_height else 0.0
|
||||
ev["target_px_from_offset"] = [target_x_px, target_y_px]
|
||||
|
||||
if not (0.0 <= target_x_pct <= 1.0 and 0.0 <= target_y_pct <= 1.0):
|
||||
return AnchorMatch(
|
||||
found=False,
|
||||
target_x_pct=target_x_pct,
|
||||
target_y_pct=target_y_pct,
|
||||
anchor_x_pct=anchor_x_pct,
|
||||
anchor_y_pct=anchor_y_pct,
|
||||
confidence=0.0,
|
||||
reason="target_out_of_bounds",
|
||||
evidence=ev,
|
||||
)
|
||||
|
||||
# 4. Cross-check : tenter de détecter target_label
|
||||
confidence = 0.5 # ancre seule
|
||||
reason = "anchor_only"
|
||||
if cross_check_target and target_label:
|
||||
target_pos = det(screenshot_b64, target_label)
|
||||
if target_pos:
|
||||
tx, ty = target_pos
|
||||
dist_px = ((tx - target_x_px) ** 2 + (ty - target_y_px) ** 2) ** 0.5
|
||||
ev["target_detected_px"] = [tx, ty]
|
||||
ev["target_cross_check_dist_px"] = round(dist_px, 1)
|
||||
# Tolerance proche de l'offset (cf. design 2200 §3.2)
|
||||
if dist_px <= 50:
|
||||
# Cross-check OK : on raffine sur la position détectée
|
||||
target_x_px, target_y_px = tx, ty
|
||||
target_x_pct = tx / float(screen_width) if screen_width else 0.0
|
||||
target_y_pct = ty / float(screen_height) if screen_height else 0.0
|
||||
confidence = 0.85
|
||||
reason = "anchor_plus_target_cross_check"
|
||||
else:
|
||||
# target_label détecté mais loin de l'offset attendu : suspect.
|
||||
# On garde la position offset mais on dégrade confidence.
|
||||
confidence = 0.4
|
||||
reason = "anchor_ok_target_drift_high"
|
||||
else:
|
||||
# Cross-check absent : comportement documenté (cf. test 7).
|
||||
# On garde la position offset mais confidence reste à 0.5.
|
||||
ev["target_cross_check_dist_px"] = None
|
||||
reason = "anchor_only_target_not_visible"
|
||||
|
||||
return AnchorMatch(
|
||||
found=True,
|
||||
target_x_pct=target_x_pct,
|
||||
target_y_pct=target_y_pct,
|
||||
anchor_x_pct=anchor_x_pct,
|
||||
anchor_y_pct=anchor_y_pct,
|
||||
confidence=confidence,
|
||||
reason=reason,
|
||||
evidence=ev,
|
||||
)
|
||||
@@ -56,6 +56,8 @@ class EventCaptorV1:
|
||||
|
||||
# État des touches modificatrices
|
||||
self.modifiers = set()
|
||||
self._pending_standalone_win = False
|
||||
self._suppress_release_only_win_combo = False
|
||||
|
||||
# Tracking du focus fenêtre
|
||||
self.last_window = None
|
||||
@@ -178,8 +180,41 @@ class EventCaptorV1:
|
||||
"timestamp": now,
|
||||
}
|
||||
self._inject_screen_metadata(event)
|
||||
# Capturer le snapshot UIA à la position du clic (si helper dispo)
|
||||
# Non-bloquant : si UIA échoue, l'event est enrichi uniquement
|
||||
# des données vision comme aujourd'hui.
|
||||
self._inject_uia_snapshot(event, x, y)
|
||||
self.on_event(event)
|
||||
|
||||
def _inject_uia_snapshot(self, event: dict, x: int, y: int) -> None:
|
||||
"""Ajouter un uia_snapshot à l'événement si le helper UIA est dispo.
|
||||
|
||||
Appelle lea_uia.exe query --x N --y N en ~10-20ms.
|
||||
Fallback silencieux si le helper n'est pas dispo ou échoue.
|
||||
"""
|
||||
try:
|
||||
from .uia_helper import get_shared_helper
|
||||
helper = get_shared_helper()
|
||||
if not helper.available:
|
||||
return
|
||||
element = helper.query_at(int(x), int(y), with_parents=True)
|
||||
if element is None:
|
||||
return
|
||||
event["uia_snapshot"] = {
|
||||
"name": element.name,
|
||||
"control_type": element.control_type,
|
||||
"class_name": element.class_name,
|
||||
"automation_id": element.automation_id,
|
||||
"bounding_rect": list(element.bounding_rect),
|
||||
"is_enabled": element.is_enabled,
|
||||
"is_offscreen": element.is_offscreen,
|
||||
"parent_path": element.parent_path,
|
||||
}
|
||||
except Exception as e:
|
||||
# Non bloquant — on continue sans UIA
|
||||
import logging
|
||||
logging.getLogger(__name__).debug(f"UIA snapshot skip: {e}")
|
||||
|
||||
def _on_scroll(self, x, y, dx, dy):
|
||||
event = {
|
||||
"type": "mouse_scroll",
|
||||
@@ -294,6 +329,56 @@ class EventCaptorV1:
|
||||
return {"kind": "key", "name": key.name}
|
||||
return {"kind": "unknown", "str": str(key)}
|
||||
|
||||
@staticmethod
|
||||
def _raw_key_name(raw_key: Dict[str, Any]) -> Optional[str]:
|
||||
"""Nom lisible depuis un raw_key sérialisé."""
|
||||
if raw_key.get("kind") == "vk":
|
||||
char = raw_key.get("char")
|
||||
if char and len(str(char)) == 1:
|
||||
return str(char).lower()
|
||||
if raw_key.get("kind") == "key":
|
||||
name = raw_key.get("name")
|
||||
return str(name).lower() if name else None
|
||||
return None
|
||||
|
||||
def _emit_release_only_windows_combo(self) -> bool:
|
||||
"""Infère Win+<touche> si Windows/NoMachine n'a livré que les releases.
|
||||
|
||||
Certaines sessions ne remontent pas les press de Win+S via pynput,
|
||||
mais livrent ensuite release('s') puis release('cmd'). Sans cette
|
||||
inférence ciblée, le geste système est perdu et les releases polluent
|
||||
le prochain text_input.
|
||||
"""
|
||||
with self._text_lock:
|
||||
raw_keys = list(self._raw_key_buffer)
|
||||
if len(raw_keys) < 2:
|
||||
return False
|
||||
cmd_names = {"cmd", "cmd_l", "cmd_r"}
|
||||
last = raw_keys[-1]
|
||||
if last.get("action") != "release" or self._raw_key_name(last) not in cmd_names:
|
||||
return False
|
||||
combo_key = None
|
||||
for raw in reversed(raw_keys[:-1]):
|
||||
if raw.get("action") != "release":
|
||||
continue
|
||||
name = self._raw_key_name(raw)
|
||||
if name and name not in self._MODIFIER_KEY_NAMES:
|
||||
combo_key = name
|
||||
break
|
||||
if not combo_key:
|
||||
return False
|
||||
self._raw_key_buffer.clear()
|
||||
|
||||
event = {
|
||||
"type": "key_combo",
|
||||
"keys": ["win", combo_key],
|
||||
"raw_keys": raw_keys,
|
||||
"timestamp": time.time(),
|
||||
}
|
||||
self._inject_screen_metadata(event)
|
||||
self.on_event(event)
|
||||
return True
|
||||
|
||||
def _on_press(self, key):
|
||||
# TOUJOURS enregistrer le press brut dans le buffer raw_keys
|
||||
with self._text_lock:
|
||||
@@ -311,6 +396,7 @@ class EventCaptorV1:
|
||||
self.modifiers.add("shift")
|
||||
elif key in (Key.cmd, Key.cmd_l, Key.cmd_r):
|
||||
self.modifiers.add("win")
|
||||
self._pending_standalone_win = True
|
||||
|
||||
# --- Combos avec modificateur (sauf Shift seul) ---
|
||||
# Shift seul n'est pas un « vrai » modificateur pour les combos :
|
||||
@@ -336,6 +422,9 @@ class EventCaptorV1:
|
||||
# Ne PAS émettre de combo si c'est un modificateur seul
|
||||
# (ex: appui sur Ctrl sans autre touche = pas de combo)
|
||||
if key_name and key_name not in self._MODIFIER_KEY_NAMES:
|
||||
self._pending_standalone_win = False
|
||||
if "win" in self.modifiers:
|
||||
self._suppress_release_only_win_combo = True
|
||||
# Un combo interrompt la saisie texte en cours
|
||||
self._flush_text_buffer()
|
||||
# Attacher les raw_keys accumulés (press des modificateurs + press de la touche)
|
||||
@@ -367,6 +456,7 @@ class EventCaptorV1:
|
||||
- Enter / Tab : flush immédiat + émission de l'événement
|
||||
- Escape : vide le buffer sans émettre
|
||||
"""
|
||||
escape_raw_keys = None
|
||||
with self._text_lock:
|
||||
# --- Touches spéciales ---
|
||||
if key == Key.backspace:
|
||||
@@ -378,12 +468,14 @@ class EventCaptorV1:
|
||||
if key == Key.esc:
|
||||
# Annuler la saisie en cours
|
||||
self._text_buffer.clear()
|
||||
self._raw_key_buffer.clear()
|
||||
self._text_start_pos = None
|
||||
self._cancel_flush_timer()
|
||||
return
|
||||
escape_raw_keys = list(self._raw_key_buffer)
|
||||
self._raw_key_buffer.clear()
|
||||
# Émettre hors lock après le bloc critique.
|
||||
pass
|
||||
|
||||
if key in (Key.enter, Key.tab):
|
||||
elif key in (Key.enter, Key.tab):
|
||||
# Flush immédiat — on relâche le lock avant d'appeler
|
||||
# _flush_text_buffer (qui prend aussi le lock)
|
||||
pass # on sort du with et on flush après
|
||||
@@ -421,6 +513,18 @@ class EventCaptorV1:
|
||||
# Touche spéciale non gérée (F1, Insert, etc.) — on ignore
|
||||
return
|
||||
|
||||
if escape_raw_keys is not None:
|
||||
event = {
|
||||
"type": "key_combo",
|
||||
"keys": ["escape"],
|
||||
"timestamp": time.time(),
|
||||
}
|
||||
if escape_raw_keys:
|
||||
event["raw_keys"] = escape_raw_keys
|
||||
self._inject_screen_metadata(event)
|
||||
self.on_event(event)
|
||||
return
|
||||
|
||||
# Si on arrive ici, c'est Enter ou Tab → flush le buffer en cours
|
||||
# puis émettre le caractère spécial comme text_input séparé
|
||||
self._flush_text_buffer()
|
||||
@@ -518,6 +622,35 @@ class EventCaptorV1:
|
||||
**self._encode_key(key),
|
||||
})
|
||||
|
||||
if key in (Key.cmd, Key.cmd_l, Key.cmd_r) and self._suppress_release_only_win_combo:
|
||||
with self._text_lock:
|
||||
self._raw_key_buffer.clear()
|
||||
self._pending_standalone_win = False
|
||||
self._suppress_release_only_win_combo = False
|
||||
self.modifiers.discard("win")
|
||||
return
|
||||
|
||||
if key in (Key.cmd, Key.cmd_l, Key.cmd_r) and self._emit_release_only_windows_combo():
|
||||
self._pending_standalone_win = False
|
||||
self._suppress_release_only_win_combo = False
|
||||
self.modifiers.discard("win")
|
||||
return
|
||||
|
||||
if key in (Key.cmd, Key.cmd_l, Key.cmd_r) and self._pending_standalone_win:
|
||||
with self._text_lock:
|
||||
raw_keys = list(self._raw_key_buffer)
|
||||
self._raw_key_buffer.clear()
|
||||
event = {
|
||||
"type": "key_combo",
|
||||
"keys": ["win"],
|
||||
"raw_keys": raw_keys,
|
||||
"timestamp": time.time(),
|
||||
}
|
||||
self._inject_screen_metadata(event)
|
||||
self.on_event(event)
|
||||
self._pending_standalone_win = False
|
||||
self._suppress_release_only_win_combo = False
|
||||
|
||||
if key in (Key.ctrl, Key.ctrl_l, Key.ctrl_r):
|
||||
self.modifiers.discard("ctrl")
|
||||
elif key in (Key.alt, Key.alt_l, Key.alt_r):
|
||||
@@ -526,6 +659,8 @@ class EventCaptorV1:
|
||||
self.modifiers.discard("shift")
|
||||
elif key in (Key.cmd, Key.cmd_l, Key.cmd_r):
|
||||
self.modifiers.discard("win")
|
||||
self._pending_standalone_win = False
|
||||
self._suppress_release_only_win_combo = False
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Métadonnées système
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
529
agent_v0/agent_v1/core/grounding.py
Normal file
529
agent_v0/agent_v1/core/grounding.py
Normal file
@@ -0,0 +1,529 @@
|
||||
# agent_v1/core/grounding.py
|
||||
"""
|
||||
Module Grounding — localisation pure d'éléments UI sur l'écran.
|
||||
|
||||
Responsabilité unique : "Trouve l'élément X sur l'écran et retourne ses coordonnées."
|
||||
Ne prend AUCUNE décision. Si l'élément n'est pas trouvé → retourne NOT_FOUND.
|
||||
|
||||
Stratégies disponibles (cascade configurable) :
|
||||
1. Serveur SomEngine + VLM (GPU distant)
|
||||
2. Template matching local (CPU, ~10ms)
|
||||
3. VLM local direct (CPU/GPU local)
|
||||
|
||||
Séparé de Policy (qui décide quoi faire quand grounding échoue).
|
||||
Ref: docs/PLAN_ACTEUR_V1.md — Architecture MICRO (grounding + exécution)
|
||||
"""
|
||||
|
||||
import base64
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class GroundingResult:
|
||||
"""Résultat d'une tentative de localisation visuelle."""
|
||||
found: bool # L'élément a été trouvé
|
||||
x_pct: float = 0.0 # Position X en % (0.0-1.0)
|
||||
y_pct: float = 0.0 # Position Y en % (0.0-1.0)
|
||||
method: str = "" # Méthode utilisée (server_som, anchor_template, vlm_direct...)
|
||||
score: float = 0.0 # Confiance (0.0-1.0)
|
||||
elapsed_ms: float = 0.0 # Temps de résolution
|
||||
detail: str = "" # Info supplémentaire (label trouvé, raison échec)
|
||||
raw: Optional[Dict] = None # Données brutes du resolver (pour debug)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"found": self.found,
|
||||
"x_pct": self.x_pct,
|
||||
"y_pct": self.y_pct,
|
||||
"method": self.method,
|
||||
"score": round(self.score, 3),
|
||||
"elapsed_ms": round(self.elapsed_ms, 1),
|
||||
"detail": self.detail,
|
||||
}
|
||||
|
||||
|
||||
# Résultat singleton pour "pas trouvé"
|
||||
NOT_FOUND = GroundingResult(found=False, detail="Aucune méthode n'a trouvé l'élément")
|
||||
|
||||
|
||||
class GroundingEngine:
|
||||
"""Moteur de localisation visuelle d'éléments UI.
|
||||
|
||||
Encapsule la cascade de résolution (serveur → template → VLM local)
|
||||
avec une interface unifiée. Ne prend aucune décision — c'est le rôle
|
||||
de PolicyEngine.
|
||||
|
||||
Usage :
|
||||
engine = GroundingEngine(executor)
|
||||
result = engine.locate(screenshot_b64, target_spec, screen_w, screen_h)
|
||||
if result.found:
|
||||
click(result.x_pct, result.y_pct)
|
||||
"""
|
||||
|
||||
def __init__(self, executor):
|
||||
"""
|
||||
Args:
|
||||
executor: ActionExecutorV1 — fournit les méthodes de résolution existantes.
|
||||
"""
|
||||
self._executor = executor
|
||||
|
||||
@staticmethod
|
||||
def _should_scope_to_active_window(target_spec: Dict[str, Any]) -> bool:
|
||||
"""Déterminer si le grounding doit être limité à la fenêtre active."""
|
||||
if str(target_spec.get("screen_scope", "")).strip().lower() == "full_screen":
|
||||
return False
|
||||
|
||||
by_role = str(target_spec.get("by_role", "")).strip().lower()
|
||||
if by_role in {"start_button"}:
|
||||
return False
|
||||
|
||||
has_anchor = bool(target_spec.get("anchor_image_base64"))
|
||||
context_hints = target_spec.get("context_hints") or {}
|
||||
has_window_or_text_hint = any(
|
||||
str(target_spec.get(key, "") or "").strip()
|
||||
for key in ("window_title", "by_text", "vlm_description")
|
||||
) or bool(str(context_hints.get("window_title", "") or "").strip())
|
||||
if has_anchor and not has_window_or_text_hint and not by_role:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def _targets_lea_window(target_spec: Dict[str, Any]) -> bool:
|
||||
"""Déterminer si la cible pointe explicitement vers l'UI de Léa."""
|
||||
try:
|
||||
from ..ui.messages import est_fenetre_lea
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
context_hints = target_spec.get("context_hints") or {}
|
||||
hints = [
|
||||
target_spec.get("window_title", ""),
|
||||
context_hints.get("window_title", ""),
|
||||
target_spec.get("vlm_description", ""),
|
||||
target_spec.get("by_text", ""),
|
||||
]
|
||||
return any(est_fenetre_lea(str(hint)) for hint in hints if hint)
|
||||
|
||||
@staticmethod
|
||||
def _is_plausible_window_rect(
|
||||
rect: Optional[List[int]],
|
||||
title: str,
|
||||
screen_width: int,
|
||||
screen_height: int,
|
||||
) -> bool:
|
||||
"""Valider qu'un rect actif ressemble à une vraie fenêtre utilisable.
|
||||
|
||||
Rejette explicitement les zones système "bar-like" (taskbar, systray)
|
||||
et les titres inconnus/bruités. Le grounding ne doit jamais se
|
||||
contraindre à une zone non validée.
|
||||
"""
|
||||
if not rect or len(rect) != 4:
|
||||
return False
|
||||
|
||||
try:
|
||||
from ..ui.messages import est_fenetre_bruit
|
||||
except Exception:
|
||||
def est_fenetre_bruit(_title: str) -> bool:
|
||||
return not _title or _title.strip().lower() == "unknown_window"
|
||||
|
||||
w = rect[2] - rect[0]
|
||||
h = rect[3] - rect[1]
|
||||
title_clean = str(title or "").strip()
|
||||
if w <= 50 or h <= 50:
|
||||
return False
|
||||
title_lower = title_clean.lower()
|
||||
is_unknown_title = not title_clean or title_lower == "unknown_window"
|
||||
if not is_unknown_title and est_fenetre_bruit(title_clean):
|
||||
return False
|
||||
|
||||
# Une zone très plate, surtout en bas d'écran et très large, est
|
||||
# typiquement une barre des tâches / systray, pas une vraie fenêtre.
|
||||
# On réduit le seuil de hauteur à 120px pour ne pas rejeter les petits modaux.
|
||||
is_bar_like = (
|
||||
h < 120
|
||||
or (w > 0.9 * screen_width and h < 0.15 * screen_height)
|
||||
)
|
||||
|
||||
# Exception : si le titre contient un mot-clé de dialogue connu,
|
||||
# on considère que c'est plausible même si c'est petit.
|
||||
keywords = ["enregistrer sous", "save as", "voulez-vous", "confirm", "attention", "error", "erreur"]
|
||||
if any(k in title_lower for k in keywords):
|
||||
return h >= 80 # Un dialogue fait au moins 80px (titre + bouton)
|
||||
|
||||
return not is_bar_like
|
||||
|
||||
@staticmethod
|
||||
def _visual_scope_hints(target_spec: Dict[str, Any]) -> List[str]:
|
||||
"""Construire des indices textuels à chercher dans le crop fenêtre."""
|
||||
hints: List[str] = []
|
||||
raw_hints = [
|
||||
target_spec.get("window_title", ""),
|
||||
(target_spec.get("context_hints") or {}).get("window_title", ""),
|
||||
target_spec.get("by_text", ""),
|
||||
]
|
||||
for raw in raw_hints:
|
||||
text = str(raw or "").strip()
|
||||
if not text:
|
||||
continue
|
||||
text = text.lstrip("*").strip()
|
||||
variants = [text]
|
||||
for sep in (" – ", " - ", " — "):
|
||||
if sep in text:
|
||||
variants.extend(part.strip().lstrip("*") for part in text.split(sep))
|
||||
for variant in variants:
|
||||
if variant and len(variant) >= 3 and variant not in hints:
|
||||
hints.append(variant)
|
||||
return hints
|
||||
|
||||
@staticmethod
|
||||
def _server_rejects_text_fallback(raw: Optional[Dict[str, Any]]) -> bool:
|
||||
"""Dire si un rejet serveur doit bloquer le fallback texte local.
|
||||
|
||||
Un rejet explicite n'est pas un simple "non trouvé": le serveur a vu
|
||||
un candidat et l'a refusé pour une raison de qualité/zone. Refaire une
|
||||
recherche OCR large côté client contournerait ce garde-fou.
|
||||
"""
|
||||
if not raw or raw.get("resolved"):
|
||||
return False
|
||||
|
||||
reason = str(raw.get("reason") or "")
|
||||
method = str(raw.get("method") or "")
|
||||
return (
|
||||
method.startswith("rejected_")
|
||||
or reason.startswith("close_tab_")
|
||||
or reason.startswith("drift_")
|
||||
or "below_threshold" in reason
|
||||
)
|
||||
|
||||
def _window_crop_matches_target_visually(
|
||||
self,
|
||||
screenshot_b64: str,
|
||||
target_spec: Dict[str, Any],
|
||||
) -> bool:
|
||||
"""Vérifier visuellement qu'un crop contraint contient la bonne cible.
|
||||
|
||||
Principe: ne jamais faire confiance au rect système seul. Si aucun
|
||||
indice textuel n'est disponible, on laisse passer le crop plausible
|
||||
pour ne pas sur-bloquer les cibles purement iconiques.
|
||||
"""
|
||||
hints = self._visual_scope_hints(target_spec)
|
||||
if not hints:
|
||||
return True
|
||||
|
||||
finder = getattr(self._executor, "_find_text_on_screen", None)
|
||||
if not callable(finder):
|
||||
return True
|
||||
|
||||
for hint in hints:
|
||||
try:
|
||||
if finder(screenshot_b64, hint):
|
||||
logger.info(
|
||||
"Grounding fenêtre validé visuellement via '%s'",
|
||||
hint,
|
||||
)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.debug("Validation visuelle du crop échouée pour '%s': %s", hint, e)
|
||||
logger.info(
|
||||
"Grounding plein écran : crop fenêtre rejeté par validation visuelle "
|
||||
"(hints=%s)",
|
||||
hints,
|
||||
)
|
||||
return False
|
||||
|
||||
def locate(
|
||||
self,
|
||||
server_url: str,
|
||||
target_spec: Dict[str, Any],
|
||||
fallback_x: float,
|
||||
fallback_y: float,
|
||||
screen_width: int,
|
||||
screen_height: int,
|
||||
strategies: Optional[List[str]] = None,
|
||||
) -> GroundingResult:
|
||||
"""Localiser un élément UI sur l'écran.
|
||||
|
||||
Exécute la cascade de stratégies dans l'ordre et retourne
|
||||
dès qu'une stratégie trouve l'élément.
|
||||
|
||||
Args:
|
||||
server_url: URL du serveur (SomEngine + VLM GPU)
|
||||
target_spec: Spécification de la cible (by_text, anchor, vlm_description...)
|
||||
fallback_x, fallback_y: Coordonnées de fallback (enregistrement)
|
||||
screen_width, screen_height: Résolution écran
|
||||
strategies: Liste ordonnée de stratégies à essayer.
|
||||
Par défaut : ["server", "template", "vlm_local"]
|
||||
|
||||
Returns:
|
||||
GroundingResult avec found=True et coordonnées, ou NOT_FOUND
|
||||
"""
|
||||
if strategies is None:
|
||||
strategies = ["server", "template", "vlm_local"]
|
||||
|
||||
# ── Apprentissage : réordonner les stratégies selon l'historique ──
|
||||
# Si le Learning sait quelle méthode marche pour cette cible,
|
||||
# la mettre en premier. C'est la boucle d'apprentissage.
|
||||
learned = target_spec.get("_learned_strategy", "")
|
||||
if learned:
|
||||
strategy_map = {
|
||||
"som_text_match": "server",
|
||||
"grounding_vlm": "server",
|
||||
"server_som": "server",
|
||||
"anchor_template": "template",
|
||||
"template_matching": "template",
|
||||
"hybrid_text_direct": "vlm_local",
|
||||
"hybrid_vlm_text": "vlm_local",
|
||||
"vlm_direct": "vlm_local",
|
||||
}
|
||||
preferred = strategy_map.get(learned, "")
|
||||
if preferred and preferred in strategies:
|
||||
strategies = [preferred] + [s for s in strategies if s != preferred]
|
||||
logger.info(
|
||||
f"Grounding: stratégie réordonnée par l'apprentissage → "
|
||||
f"{strategies} (learned={learned})"
|
||||
)
|
||||
|
||||
t_start = time.time()
|
||||
|
||||
window_rect = None
|
||||
active_title = ""
|
||||
if self._should_scope_to_active_window(target_spec):
|
||||
# ── Capture contrainte à la fenêtre active ──
|
||||
# Le grounding ne voit QUE la fenêtre attendue — pas la taskbar,
|
||||
# pas le systray, pas les autres apps. Comme un humain qui regarde
|
||||
# l'application sur laquelle il travaille.
|
||||
try:
|
||||
from ..window_info_crossplatform import get_active_window_rect
|
||||
from ..ui.messages import est_fenetre_lea
|
||||
|
||||
win_info = get_active_window_rect()
|
||||
if win_info and win_info.get("rect"):
|
||||
active_title = str(win_info.get("title", "") or "")
|
||||
if est_fenetre_lea(active_title) and not self._targets_lea_window(target_spec):
|
||||
logger.info(
|
||||
"Grounding plein écran : fenêtre active Léa ignorée pour "
|
||||
"cible externe (%s)",
|
||||
target_spec.get("by_text", "") or target_spec.get("by_role", ""),
|
||||
)
|
||||
win_info = None
|
||||
if win_info and win_info.get("rect"):
|
||||
r = win_info["rect"] # [left, top, right, bottom]
|
||||
if self._is_plausible_window_rect(r, active_title, screen_width, screen_height):
|
||||
w = r[2] - r[0]
|
||||
h = r[3] - r[1]
|
||||
window_rect = {
|
||||
"left": max(0, r[0]),
|
||||
"top": max(0, r[1]),
|
||||
"width": min(w, screen_width),
|
||||
"height": min(h, screen_height),
|
||||
}
|
||||
logger.info(
|
||||
f"Grounding contraint à la fenêtre : "
|
||||
f"{window_rect['width']}x{window_rect['height']} "
|
||||
f"à ({window_rect['left']}, {window_rect['top']})"
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"Grounding plein écran : rect actif rejeté "
|
||||
"(title='%s', rect=%s)",
|
||||
active_title,
|
||||
r,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"Pas de window rect disponible : {e}")
|
||||
else:
|
||||
logger.info(
|
||||
"Grounding plein écran pour by_role='%s'",
|
||||
target_spec.get("by_role", ""),
|
||||
)
|
||||
|
||||
screenshot_b64 = self._capture_window_or_screen(window_rect)
|
||||
if window_rect and screenshot_b64:
|
||||
if not self._window_crop_matches_target_visually(screenshot_b64, target_spec):
|
||||
window_rect = None
|
||||
screenshot_b64 = self._capture_window_or_screen(None)
|
||||
if not screenshot_b64:
|
||||
return GroundingResult(
|
||||
found=False, detail="Capture screenshot échouée",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
# Dimensions de la zone capturée (fenêtre ou écran entier)
|
||||
cap_w = window_rect["width"] if window_rect else screen_width
|
||||
cap_h = window_rect["height"] if window_rect else screen_height
|
||||
|
||||
skip_text_fallback_after_server_reject = False
|
||||
for strategy in strategies:
|
||||
if (
|
||||
strategy == "vlm_local"
|
||||
and skip_text_fallback_after_server_reject
|
||||
and target_spec.get("by_text")
|
||||
):
|
||||
by_text = target_spec.get("by_text", "")
|
||||
logger.info(
|
||||
"[GROUNDING] Rejet serveur explicite pour '%s' — "
|
||||
"skip fallback local hybrid_text_direct",
|
||||
by_text,
|
||||
)
|
||||
print(
|
||||
f" [GROUNDING] Rejet serveur explicite pour '{by_text}' "
|
||||
"→ pas de fallback texte local"
|
||||
)
|
||||
continue
|
||||
|
||||
result = self._try_strategy(
|
||||
strategy, server_url, screenshot_b64, target_spec,
|
||||
fallback_x, fallback_y, cap_w, cap_h,
|
||||
)
|
||||
if strategy == "server" and self._server_rejects_text_fallback(result.raw):
|
||||
skip_text_fallback_after_server_reject = True
|
||||
if result.found:
|
||||
# ── Conversion coords fenêtre → coords écran ──
|
||||
if window_rect:
|
||||
# Le grounding a retourné des coords relatives à la fenêtre
|
||||
# On les convertit en coords relatives à l'écran entier
|
||||
abs_x = window_rect["left"] + result.x_pct * cap_w
|
||||
abs_y = window_rect["top"] + result.y_pct * cap_h
|
||||
result.x_pct = abs_x / screen_width
|
||||
result.y_pct = abs_y / screen_height
|
||||
result.detail = f"{result.detail} [fenêtre {cap_w}x{cap_h}]"
|
||||
|
||||
result.elapsed_ms = (time.time() - t_start) * 1000
|
||||
return result
|
||||
|
||||
if target_spec.get("allow_position_fallback"):
|
||||
if 0.0 <= fallback_x <= 1.0 and 0.0 <= fallback_y <= 1.0:
|
||||
return GroundingResult(
|
||||
found=True,
|
||||
x_pct=fallback_x,
|
||||
y_pct=fallback_y,
|
||||
method="position_fallback",
|
||||
score=0.2,
|
||||
detail="fallback positionnel explicite",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
return GroundingResult(
|
||||
found=False,
|
||||
detail=f"Toutes les stratégies ont échoué ({', '.join(strategies)})",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
def _capture_window_or_screen(self, window_rect: Optional[Dict]) -> str:
|
||||
"""Capturer soit la fenêtre active (croppée), soit l'écran entier.
|
||||
|
||||
Si window_rect est fourni, capture uniquement cette zone.
|
||||
Sinon, capture l'écran entier (fallback).
|
||||
"""
|
||||
try:
|
||||
from PIL import Image
|
||||
import mss as mss_lib
|
||||
|
||||
with mss_lib.mss() as local_sct:
|
||||
if window_rect:
|
||||
# Capture de la zone fenêtre uniquement
|
||||
region = {
|
||||
"left": window_rect["left"],
|
||||
"top": window_rect["top"],
|
||||
"width": window_rect["width"],
|
||||
"height": window_rect["height"],
|
||||
}
|
||||
raw = local_sct.grab(region)
|
||||
else:
|
||||
# Fallback écran entier
|
||||
raw = local_sct.grab(local_sct.monitors[1])
|
||||
|
||||
img = Image.frombytes("RGB", raw.size, raw.bgra, "raw", "BGRX")
|
||||
buffer = io.BytesIO()
|
||||
img.save(buffer, format="JPEG", quality=75)
|
||||
return base64.b64encode(buffer.getvalue()).decode("utf-8")
|
||||
except Exception as e:
|
||||
logger.warning(f"Capture échouée : {e}")
|
||||
# Fallback sur la méthode existante de l'executor
|
||||
return self._executor._capture_screenshot_b64(max_width=0, quality=75)
|
||||
|
||||
def _try_strategy(
|
||||
self,
|
||||
strategy: str,
|
||||
server_url: str,
|
||||
screenshot_b64: str,
|
||||
target_spec: Dict[str, Any],
|
||||
fallback_x: float,
|
||||
fallback_y: float,
|
||||
screen_width: int,
|
||||
screen_height: int,
|
||||
) -> GroundingResult:
|
||||
"""Essayer une stratégie de grounding unique."""
|
||||
|
||||
if strategy == "server" and server_url:
|
||||
raw = self._executor._server_resolve_target(
|
||||
server_url, screenshot_b64, target_spec,
|
||||
fallback_x, fallback_y, screen_width, screen_height,
|
||||
)
|
||||
if raw and raw.get("resolved"):
|
||||
return GroundingResult(
|
||||
found=True,
|
||||
x_pct=raw["x_pct"],
|
||||
y_pct=raw["y_pct"],
|
||||
method=raw.get("method", "server"),
|
||||
score=raw.get("score", 0.0),
|
||||
detail=raw.get("matched_element", {}).get("label", ""),
|
||||
raw=raw,
|
||||
)
|
||||
if raw:
|
||||
return GroundingResult(
|
||||
found=False,
|
||||
method=raw.get("method", "server"),
|
||||
score=raw.get("score", 0.0),
|
||||
detail=raw.get("reason", "server: pas trouvé"),
|
||||
raw=raw,
|
||||
)
|
||||
|
||||
elif strategy == "template":
|
||||
anchor_b64 = target_spec.get("anchor_image_base64", "")
|
||||
if anchor_b64:
|
||||
raw = self._executor._template_match_anchor(
|
||||
screenshot_b64,
|
||||
anchor_b64,
|
||||
screen_width,
|
||||
screen_height,
|
||||
fallback_x_pct=fallback_x,
|
||||
fallback_y_pct=fallback_y,
|
||||
)
|
||||
if raw and raw.get("resolved"):
|
||||
return GroundingResult(
|
||||
found=True,
|
||||
x_pct=raw["x_pct"],
|
||||
y_pct=raw["y_pct"],
|
||||
method="anchor_template",
|
||||
score=raw.get("score", 0.0),
|
||||
raw=raw,
|
||||
)
|
||||
|
||||
elif strategy == "vlm_local":
|
||||
by_text = target_spec.get("by_text", "")
|
||||
vlm_desc = target_spec.get("vlm_description", "")
|
||||
if vlm_desc or by_text:
|
||||
raw = self._executor._hybrid_vlm_resolve(
|
||||
screenshot_b64, target_spec, screen_width, screen_height,
|
||||
)
|
||||
if raw and raw.get("resolved"):
|
||||
return GroundingResult(
|
||||
found=True,
|
||||
x_pct=raw["x_pct"],
|
||||
y_pct=raw["y_pct"],
|
||||
method=raw.get("method", "vlm_local"),
|
||||
score=raw.get("score", 0.0),
|
||||
detail=raw.get("matched_element", {}).get("label", ""),
|
||||
raw=raw,
|
||||
)
|
||||
|
||||
return GroundingResult(found=False, method=strategy, detail=f"{strategy}: pas trouvé")
|
||||
172
agent_v0/agent_v1/core/policy.py
Normal file
172
agent_v0/agent_v1/core/policy.py
Normal file
@@ -0,0 +1,172 @@
|
||||
# agent_v1/core/policy.py
|
||||
"""
|
||||
Module Policy — décisions intelligentes quand le grounding échoue.
|
||||
|
||||
Responsabilité unique : "Le Grounding dit NOT_FOUND. Que fait-on ?"
|
||||
Ne localise AUCUN élément — c'est le rôle du Grounding.
|
||||
|
||||
Décisions possibles :
|
||||
- RETRY : re-tenter le grounding (après popup fermée, par exemple)
|
||||
- SKIP : l'action n'est plus nécessaire (état déjà atteint)
|
||||
- ABORT : arrêter le workflow (état incohérent)
|
||||
- SUPERVISE : rendre la main à l'utilisateur
|
||||
|
||||
Séparé de Grounding (qui localise les éléments).
|
||||
Ref: docs/PLAN_ACTEUR_V1.md — Architecture MÉSO (acteur intelligent)
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Decision(Enum):
|
||||
"""Décisions possibles quand le grounding échoue."""
|
||||
RETRY = "retry" # Re-tenter (après correction : popup fermée, navigation...)
|
||||
SKIP = "skip" # Action inutile (état déjà atteint)
|
||||
ABORT = "abort" # Arrêter le workflow (état incohérent)
|
||||
SUPERVISE = "supervise" # Rendre la main à l'utilisateur (Léa dit "je bloque")
|
||||
CONTINUE = "continue" # Continuer malgré l'échec (action non critique)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PolicyDecision:
|
||||
"""Résultat d'une décision Policy."""
|
||||
decision: Decision
|
||||
reason: str # Explication de la décision
|
||||
action_taken: str = "" # Action corrective effectuée (ex: "popup fermée")
|
||||
elapsed_ms: float = 0.0
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"decision": self.decision.value,
|
||||
"reason": self.reason,
|
||||
"action_taken": self.action_taken,
|
||||
"elapsed_ms": round(self.elapsed_ms, 1),
|
||||
}
|
||||
|
||||
|
||||
class PolicyEngine:
|
||||
"""Moteur de décision quand le grounding échoue.
|
||||
|
||||
Cascade de décision :
|
||||
1. Popup détectée ? → fermer et RETRY
|
||||
2. Acteur gemma4 → SKIP / ABORT / SUPERVISE
|
||||
3. Fallback → SUPERVISE (rendre la main)
|
||||
|
||||
Usage :
|
||||
policy = PolicyEngine(executor)
|
||||
decision = policy.decide(action, target_spec, grounding_result)
|
||||
if decision.decision == Decision.RETRY:
|
||||
# re-tenter le grounding
|
||||
elif decision.decision == Decision.SKIP:
|
||||
# marquer comme réussi, passer à la suite
|
||||
"""
|
||||
|
||||
def __init__(self, executor):
|
||||
self._executor = executor
|
||||
|
||||
def decide(
|
||||
self,
|
||||
action: Dict[str, Any],
|
||||
target_spec: Dict[str, Any],
|
||||
retry_count: int = 0,
|
||||
max_retries: int = 1,
|
||||
) -> PolicyDecision:
|
||||
"""Décider quoi faire quand le grounding a échoué.
|
||||
|
||||
Cascade :
|
||||
1. Si c'est le premier essai → tenter de fermer une popup → RETRY
|
||||
2. Si retry déjà fait → demander à l'acteur gemma4
|
||||
3. Selon gemma4 : SKIP, ABORT, ou SUPERVISE
|
||||
|
||||
**SÉCURITÉ** : si, pendant l'étape 1, le handler popup détecte un
|
||||
dialogue système Windows (UAC, CredUI, SmartScreen…), on bascule
|
||||
immédiatement en SUPERVISE. Cf. system_dialog_guard.py.
|
||||
|
||||
Args:
|
||||
action: L'action qui a échoué
|
||||
target_spec: La cible non trouvée
|
||||
retry_count: Nombre de retries déjà faits
|
||||
max_retries: Maximum de retries autorisés
|
||||
"""
|
||||
t_start = time.time()
|
||||
|
||||
# ── Étape 1 : Tentative de fermeture popup (premier essai) ──
|
||||
if retry_count == 0:
|
||||
popup_handled = self._try_close_popup()
|
||||
|
||||
# Si le popup handler a détecté un dialogue système, on
|
||||
# bascule immédiatement en SUPERVISE — pas de retry, pas de
|
||||
# gemma4 : on rend la main à l'humain.
|
||||
if getattr(self._executor, "_system_dialog_pause", None):
|
||||
sd = self._executor._system_dialog_pause
|
||||
return PolicyDecision(
|
||||
decision=Decision.SUPERVISE,
|
||||
reason=(
|
||||
f"Dialogue système détecté ({sd.get('category', '?')}) — "
|
||||
f"refus d'interaction automatique"
|
||||
),
|
||||
action_taken="system_dialog_blocked",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
if popup_handled:
|
||||
return PolicyDecision(
|
||||
decision=Decision.RETRY,
|
||||
reason="Popup détectée et fermée, re-tentative",
|
||||
action_taken="popup_closed",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
# ── Étape 2 : Max retries atteint → acteur gemma4 ──
|
||||
if retry_count >= max_retries:
|
||||
actor_decision = self._ask_actor(action, target_spec)
|
||||
|
||||
if actor_decision == "PASSER":
|
||||
return PolicyDecision(
|
||||
decision=Decision.SKIP,
|
||||
reason="Acteur gemma4 : l'état est déjà atteint",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
elif actor_decision == "STOPPER":
|
||||
return PolicyDecision(
|
||||
decision=Decision.ABORT,
|
||||
reason="Acteur gemma4 : état incohérent, arrêt",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
else:
|
||||
# EXECUTER ou inconnu → pause supervisée
|
||||
return PolicyDecision(
|
||||
decision=Decision.SUPERVISE,
|
||||
reason=f"Acteur gemma4 : {actor_decision}, pause supervisée",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
# ── Étape 3 : Encore des retries disponibles → RETRY ──
|
||||
return PolicyDecision(
|
||||
decision=Decision.RETRY,
|
||||
reason=f"Retry {retry_count + 1}/{max_retries}",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
def _try_close_popup(self) -> bool:
|
||||
"""Tenter de fermer une popup via le handler VLM existant."""
|
||||
try:
|
||||
return self._executor._handle_popup_vlm()
|
||||
except Exception as e:
|
||||
logger.debug(f"Policy: popup handler échoué : {e}")
|
||||
return False
|
||||
|
||||
def _ask_actor(self, action: Dict, target_spec: Dict) -> str:
|
||||
"""Demander à gemma4 de décider (PASSER/EXECUTER/STOPPER)."""
|
||||
try:
|
||||
return self._executor._actor_decide(action, target_spec)
|
||||
except Exception as e:
|
||||
logger.debug(f"Policy: acteur gemma4 échoué : {e}")
|
||||
return "EXECUTER" # Fallback → supervisé
|
||||
215
agent_v0/agent_v1/core/recovery.py
Normal file
215
agent_v0/agent_v1/core/recovery.py
Normal file
@@ -0,0 +1,215 @@
|
||||
# agent_v1/core/recovery.py
|
||||
"""
|
||||
Module Recovery — mécanisme de rollback quand une action échoue.
|
||||
|
||||
Responsabilité : "L'action a échoué ou produit un résultat inattendu.
|
||||
Comment revenir en arrière ?"
|
||||
|
||||
Stratégies de recovery :
|
||||
1. Ctrl+Z (undo natif) — pour les frappes et modifications
|
||||
2. Escape (fermer dialogue) — pour les popups/menus
|
||||
3. Alt+F4 (fermer fenêtre) — si mauvaise application ouverte
|
||||
4. Clic hors zone — fermer un menu déroulant
|
||||
5. Navigation retour — retourner à l'écran précédent
|
||||
|
||||
Le Recovery est appelé par le Policy quand le Critic détecte un
|
||||
résultat inattendu (pixel OK + sémantique NON = changement inattendu).
|
||||
|
||||
Ref: docs/VISION_RPA_INTELLIGENT.md — "Il se trompe" → correction
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class RecoveryAction(Enum):
|
||||
"""Actions de recovery possibles."""
|
||||
UNDO = "undo" # Ctrl+Z
|
||||
ESCAPE = "escape" # Echap (fermer dialogue/menu)
|
||||
CLOSE_WINDOW = "close" # Alt+F4
|
||||
CLICK_AWAY = "click_away" # Clic hors zone (fermer menu)
|
||||
NONE = "none" # Pas de recovery possible
|
||||
|
||||
|
||||
@dataclass
|
||||
class RecoveryResult:
|
||||
"""Résultat d'une tentative de recovery."""
|
||||
action_taken: RecoveryAction
|
||||
success: bool
|
||||
detail: str = ""
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"action_taken": self.action_taken.value,
|
||||
"success": self.success,
|
||||
"detail": self.detail,
|
||||
}
|
||||
|
||||
|
||||
class RecoveryEngine:
|
||||
"""Moteur de recovery — tente de revenir en arrière après un échec.
|
||||
|
||||
Choisit la stratégie de recovery en fonction du type d'action qui a échoué
|
||||
et de l'état actuel de l'écran.
|
||||
|
||||
Usage :
|
||||
recovery = RecoveryEngine(executor)
|
||||
result = recovery.attempt(failed_action, critic_result)
|
||||
if result.success:
|
||||
# re-tenter l'action
|
||||
"""
|
||||
|
||||
def __init__(self, executor):
|
||||
self._executor = executor
|
||||
|
||||
def attempt(
|
||||
self,
|
||||
failed_action: Dict[str, Any],
|
||||
critic_detail: str = "",
|
||||
) -> RecoveryResult:
|
||||
"""Tenter une recovery après un échec.
|
||||
|
||||
Sélectionne la stratégie appropriée selon le type d'action :
|
||||
- click qui ouvre la mauvaise chose → Escape ou Ctrl+Z
|
||||
- type qui tape au mauvais endroit → Ctrl+Z
|
||||
- key_combo inattendu → Ctrl+Z
|
||||
- popup apparue → Escape
|
||||
|
||||
Args:
|
||||
failed_action: L'action qui a échoué
|
||||
critic_detail: Détail du Critic (raison de l'échec sémantique)
|
||||
"""
|
||||
action_type = failed_action.get("type", "")
|
||||
detail_lower = critic_detail.lower()
|
||||
|
||||
# Choisir la stratégie de recovery
|
||||
strategy = self._select_strategy(action_type, detail_lower)
|
||||
|
||||
if strategy == RecoveryAction.NONE:
|
||||
return RecoveryResult(
|
||||
action_taken=RecoveryAction.NONE,
|
||||
success=False,
|
||||
detail="Pas de stratégie de recovery applicable",
|
||||
)
|
||||
|
||||
return self._execute_recovery(strategy)
|
||||
|
||||
def _select_strategy(self, action_type: str, critic_detail: str) -> RecoveryAction:
|
||||
"""Sélectionner la meilleure stratégie de recovery.
|
||||
|
||||
Priorité : type d'action d'abord (frappe → undo), puis contexte.
|
||||
"""
|
||||
# Frappe ou modification incorrecte → toujours Ctrl+Z
|
||||
if action_type in ("type", "key_combo"):
|
||||
return RecoveryAction.UNDO
|
||||
|
||||
# Popup/dialogue détecté
|
||||
if any(w in critic_detail for w in ["popup", "dialog", "erreur", "error", "modal"]):
|
||||
return RecoveryAction.ESCAPE
|
||||
|
||||
# Menu ouvert par erreur
|
||||
if any(w in critic_detail for w in ["menu", "dropdown", "déroulant"]):
|
||||
return RecoveryAction.ESCAPE
|
||||
|
||||
# Mauvaise fenêtre ouverte
|
||||
if any(w in critic_detail for w in ["mauvaise fenêtre", "wrong window"]):
|
||||
return RecoveryAction.CLOSE_WINDOW
|
||||
|
||||
# Clic qui a produit un résultat inattendu
|
||||
if action_type == "click":
|
||||
return RecoveryAction.ESCAPE
|
||||
|
||||
return RecoveryAction.NONE
|
||||
|
||||
def _execute_recovery(self, strategy: RecoveryAction) -> RecoveryResult:
|
||||
"""Exécuter la stratégie de recovery choisie."""
|
||||
from pynput.keyboard import Controller as KeyboardController, Key
|
||||
|
||||
keyboard = self._executor.keyboard
|
||||
|
||||
try:
|
||||
if strategy == RecoveryAction.UNDO:
|
||||
# Ctrl+Z
|
||||
logger.info("Recovery : Ctrl+Z (undo)")
|
||||
print(" [RECOVERY] Ctrl+Z — annulation de la dernière action")
|
||||
keyboard.press(Key.ctrl)
|
||||
keyboard.press('z')
|
||||
keyboard.release('z')
|
||||
keyboard.release(Key.ctrl)
|
||||
time.sleep(0.5)
|
||||
return RecoveryResult(
|
||||
action_taken=RecoveryAction.UNDO,
|
||||
success=True,
|
||||
detail="Ctrl+Z exécuté",
|
||||
)
|
||||
|
||||
elif strategy == RecoveryAction.ESCAPE:
|
||||
# Echap
|
||||
logger.info("Recovery : Escape (fermer dialogue)")
|
||||
print(" [RECOVERY] Escape — fermeture dialogue/menu")
|
||||
keyboard.press(Key.esc)
|
||||
keyboard.release(Key.esc)
|
||||
time.sleep(0.5)
|
||||
return RecoveryResult(
|
||||
action_taken=RecoveryAction.ESCAPE,
|
||||
success=True,
|
||||
detail="Escape exécuté",
|
||||
)
|
||||
|
||||
elif strategy == RecoveryAction.CLOSE_WINDOW:
|
||||
# Alt+F4 — AVEC vérification fenêtre active
|
||||
# Sur un poste hospitalier, Alt+F4 sans vérif peut fermer le DPI patient
|
||||
try:
|
||||
from ..window_info_crossplatform import get_active_window_info
|
||||
active = get_active_window_info()
|
||||
active_title = active.get("title", "")
|
||||
logger.info(f"Recovery : Alt+F4 sur '{active_title}'")
|
||||
print(f" [RECOVERY] Alt+F4 — fermeture de '{active_title}'")
|
||||
except Exception:
|
||||
logger.info("Recovery : Alt+F4 (fenêtre active inconnue)")
|
||||
print(" [RECOVERY] Alt+F4 — fermeture fenêtre indésirable")
|
||||
|
||||
keyboard.press(Key.alt)
|
||||
keyboard.press(Key.f4)
|
||||
keyboard.release(Key.f4)
|
||||
keyboard.release(Key.alt)
|
||||
time.sleep(1.0)
|
||||
return RecoveryResult(
|
||||
action_taken=RecoveryAction.CLOSE_WINDOW,
|
||||
success=True,
|
||||
detail=f"Alt+F4 exécuté sur '{active_title if 'active_title' in dir() else '?'}'",
|
||||
)
|
||||
|
||||
elif strategy == RecoveryAction.CLICK_AWAY:
|
||||
# Clic au centre de l'écran (hors popup)
|
||||
logger.info("Recovery : clic hors zone")
|
||||
print(" [RECOVERY] Clic hors zone — fermeture menu")
|
||||
monitor = self._executor.sct.monitors[1]
|
||||
w, h = monitor["width"], monitor["height"]
|
||||
# Cliquer dans un coin neutre (10% depuis le haut-gauche)
|
||||
self._executor._click((int(w * 0.1), int(h * 0.1)), "left")
|
||||
time.sleep(0.5)
|
||||
return RecoveryResult(
|
||||
action_taken=RecoveryAction.CLICK_AWAY,
|
||||
success=True,
|
||||
detail="Clic hors zone exécuté",
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Recovery échoué ({strategy.value}) : {e}")
|
||||
return RecoveryResult(
|
||||
action_taken=strategy,
|
||||
success=False,
|
||||
detail=f"Erreur : {e}",
|
||||
)
|
||||
|
||||
return RecoveryResult(
|
||||
action_taken=RecoveryAction.NONE,
|
||||
success=False,
|
||||
detail="Stratégie non implémentée",
|
||||
)
|
||||
448
agent_v0/agent_v1/core/system_dialog_guard.py
Normal file
448
agent_v0/agent_v1/core/system_dialog_guard.py
Normal file
@@ -0,0 +1,448 @@
|
||||
# agent_v1/core/system_dialog_guard.py
|
||||
"""
|
||||
Garde-fou sécurité : détection des dialogues système Windows critiques.
|
||||
|
||||
==============================================================================
|
||||
POURQUOI ?
|
||||
==============================================================================
|
||||
|
||||
Pendant un replay, si un dialogue UAC, CredUI (mot de passe Windows),
|
||||
SmartScreen ou une notification de sécurité Windows apparaît, Léa pourrait
|
||||
demander au VLM "quel bouton cliquer" et recevoir "Oui" en réponse.
|
||||
|
||||
→ **Léa cliquerait OUI sur une élévation UAC** → vecteur d'attaque ransomware.
|
||||
|
||||
Ce module fournit la détection de ces dialogues pour que l'exécuteur
|
||||
**ne clique JAMAIS dessus automatiquement**. La décision est renvoyée à
|
||||
l'humain (pause supervisée).
|
||||
|
||||
==============================================================================
|
||||
PRINCIPE
|
||||
==============================================================================
|
||||
|
||||
- **Faux positif tolérable** : on préfère pauser pour rien plutôt que cliquer
|
||||
sur un UAC.
|
||||
- **Faux négatif catastrophique** : mieux vaut être trop prudent.
|
||||
- **Multi-signal** : titre, ClassName UIA, nom de processus, parent_path.
|
||||
Un seul signal suffit à bloquer.
|
||||
- **Compatible Citrix** : les dialogues UAC d'un client Citrix apparaissent
|
||||
aussi dans la VM distante — la détection par classe UIA fonctionne.
|
||||
|
||||
==============================================================================
|
||||
PATTERNS DE DÉTECTION (ordre de criticité décroissant)
|
||||
==============================================================================
|
||||
|
||||
1. UAC Consent (élévation de privilèges)
|
||||
- ClassName : `$$$Secure UAP Dummy Window Class$$$`
|
||||
- Process : `consent.exe`
|
||||
- Titre : "Contrôle de compte d'utilisateur", "User Account Control"
|
||||
|
||||
2. CredUI (prompt mot de passe Windows)
|
||||
- ClassName : `Credential Dialog Xaml Host`
|
||||
- Process : `credentialuibroker.exe`, `credui.exe`
|
||||
- Titre : "Sécurité Windows", "Windows Security"
|
||||
|
||||
3. SmartScreen (protection contre applications inconnues)
|
||||
- Process : `smartscreen.exe`
|
||||
- Titre : "Windows a protégé votre ordinateur", "Windows protected your PC"
|
||||
|
||||
4. Windows Defender / Security Center
|
||||
- Process : `securityhealthhost.exe`, `msmpeng.exe`
|
||||
- Titre : "Sécurité Windows", "Windows Defender"
|
||||
|
||||
5. Signatures pilotes / driver install
|
||||
- Titre : "Installer ce pilote", "Driver signature"
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Catégories de dialogues système (pour logging + messages)
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class SystemDialogCategory:
|
||||
"""Catégories de dialogues système à bloquer absolument."""
|
||||
UAC = "uac_consent" # Élévation de privilèges
|
||||
CREDUI = "windows_credential_prompt" # Prompt de mot de passe
|
||||
SMARTSCREEN = "smartscreen" # Protection SmartScreen
|
||||
DEFENDER = "windows_defender" # Alerte Windows Defender
|
||||
DRIVER = "driver_install" # Installation pilote signé
|
||||
SECURITY_TOAST = "security_toast" # Toast de sécurité Windows
|
||||
UNKNOWN_DIALOG = "unknown_system_dialog" # Dialogue #32770 sans app connue
|
||||
|
||||
|
||||
@dataclass
|
||||
class SystemDialogDetection:
|
||||
"""Résultat d'une analyse de dialogue système."""
|
||||
is_system_dialog: bool
|
||||
category: str = "" # Valeur de SystemDialogCategory
|
||||
matched_signal: str = "" # Ex: "class_name=Consent.exe"
|
||||
matched_value: str = "" # La valeur qui a matché
|
||||
reason: str = "" # Explication lisible
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"is_system_dialog": self.is_system_dialog,
|
||||
"category": self.category,
|
||||
"matched_signal": self.matched_signal,
|
||||
"matched_value": self.matched_value,
|
||||
"reason": self.reason,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Signatures de détection
|
||||
# =============================================================================
|
||||
|
||||
|
||||
# ClassName UIA (casse préservée — Windows exposées telle quelle par UIA).
|
||||
# Utilisées telles quelles puis en minuscules pour matcher avec souplesse.
|
||||
_CLASS_NAMES_SYSTEM = {
|
||||
# UAC Consent
|
||||
"$$$Secure UAP Dummy Window Class$$$": SystemDialogCategory.UAC,
|
||||
"Credential Dialog Xaml Host": SystemDialogCategory.CREDUI,
|
||||
# Windows Credential UI ancien nom
|
||||
"CredentialDialogXamlHost": SystemDialogCategory.CREDUI,
|
||||
}
|
||||
|
||||
# Nom de processus (comparaison insensible à la casse, .exe normalisé)
|
||||
_PROCESS_NAMES_SYSTEM = {
|
||||
"consent.exe": SystemDialogCategory.UAC,
|
||||
"credentialuibroker.exe": SystemDialogCategory.CREDUI,
|
||||
"credui.exe": SystemDialogCategory.CREDUI,
|
||||
"credwiz.exe": SystemDialogCategory.CREDUI,
|
||||
"smartscreen.exe": SystemDialogCategory.SMARTSCREEN,
|
||||
"securityhealthhost.exe": SystemDialogCategory.DEFENDER,
|
||||
"securityhealthui.exe": SystemDialogCategory.DEFENDER,
|
||||
"securityhealthsystray.exe": SystemDialogCategory.DEFENDER,
|
||||
"msmpeng.exe": SystemDialogCategory.DEFENDER,
|
||||
"windowsdefender.exe": SystemDialogCategory.DEFENDER,
|
||||
"msiexec.exe": SystemDialogCategory.DRIVER, # prompts pilotes signés
|
||||
"drvinst.exe": SystemDialogCategory.DRIVER,
|
||||
}
|
||||
|
||||
# Motifs titre (insensibles à la casse, regex avec word boundaries)
|
||||
# On ne matche pas les titres génériques trop larges pour limiter les faux
|
||||
# positifs sur OSIRIS/OBSIUS/MEDSPHERE.
|
||||
_TITLE_PATTERNS_SYSTEM: Tuple[Tuple[re.Pattern, str], ...] = (
|
||||
# UAC
|
||||
(re.compile(r"contr[oô]le\s+de\s+compte\s+d'?utilisateur", re.IGNORECASE),
|
||||
SystemDialogCategory.UAC),
|
||||
(re.compile(r"\buser\s+account\s+control\b", re.IGNORECASE),
|
||||
SystemDialogCategory.UAC),
|
||||
(re.compile(r"voulez-vous\s+autoriser\s+cette\s+application", re.IGNORECASE),
|
||||
SystemDialogCategory.UAC),
|
||||
(re.compile(r"do\s+you\s+want\s+to\s+allow\s+this\s+app", re.IGNORECASE),
|
||||
SystemDialogCategory.UAC),
|
||||
|
||||
# CredUI / Sécurité Windows
|
||||
(re.compile(r"\bs[eé]curit[eé]\s+windows\b", re.IGNORECASE),
|
||||
SystemDialogCategory.CREDUI),
|
||||
(re.compile(r"\bwindows\s+security\b", re.IGNORECASE),
|
||||
SystemDialogCategory.CREDUI),
|
||||
(re.compile(r"entrer\s+les\s+informations\s+d'?identification", re.IGNORECASE),
|
||||
SystemDialogCategory.CREDUI),
|
||||
(re.compile(r"enter\s+(?:your\s+)?credentials?", re.IGNORECASE),
|
||||
SystemDialogCategory.CREDUI),
|
||||
(re.compile(r"connectez-vous\s+[aà]\s+votre\s+compte", re.IGNORECASE),
|
||||
SystemDialogCategory.CREDUI),
|
||||
(re.compile(r"\bsign\s+in\s+to\s+your\s+account\b", re.IGNORECASE),
|
||||
SystemDialogCategory.CREDUI),
|
||||
|
||||
# SmartScreen
|
||||
(re.compile(r"windows\s+a\s+prot[eé]g[eé]", re.IGNORECASE),
|
||||
SystemDialogCategory.SMARTSCREEN),
|
||||
(re.compile(r"windows\s+protected\s+your\s+pc", re.IGNORECASE),
|
||||
SystemDialogCategory.SMARTSCREEN),
|
||||
(re.compile(r"\bsmartscreen\b", re.IGNORECASE),
|
||||
SystemDialogCategory.SMARTSCREEN),
|
||||
(re.compile(r"\b[eé]diteur\s+inconnu\b", re.IGNORECASE),
|
||||
SystemDialogCategory.SMARTSCREEN),
|
||||
(re.compile(r"\bunknown\s+publisher\b", re.IGNORECASE),
|
||||
SystemDialogCategory.SMARTSCREEN),
|
||||
|
||||
# Windows Defender
|
||||
(re.compile(r"windows\s+defender", re.IGNORECASE),
|
||||
SystemDialogCategory.DEFENDER),
|
||||
(re.compile(r"menace\s+d[eé]tect[eé]e", re.IGNORECASE),
|
||||
SystemDialogCategory.DEFENDER),
|
||||
(re.compile(r"threat\s+detected", re.IGNORECASE),
|
||||
SystemDialogCategory.DEFENDER),
|
||||
|
||||
# Driver
|
||||
(re.compile(r"installer\s+ce\s+pilote", re.IGNORECASE),
|
||||
SystemDialogCategory.DRIVER),
|
||||
(re.compile(r"install\s+this\s+driver", re.IGNORECASE),
|
||||
SystemDialogCategory.DRIVER),
|
||||
(re.compile(r"signature\s+num[eé]rique\s+du\s+pilote", re.IGNORECASE),
|
||||
SystemDialogCategory.DRIVER),
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Fonctions de détection
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def _normalize_process(name: str) -> str:
|
||||
"""Normaliser un nom de processus pour comparaison."""
|
||||
if not name:
|
||||
return ""
|
||||
name = name.strip().lower()
|
||||
# Enlever le chemin éventuel
|
||||
if "\\" in name or "/" in name:
|
||||
name = name.replace("\\", "/").split("/")[-1]
|
||||
# Assurer suffixe .exe pour matcher le dictionnaire
|
||||
if not name.endswith(".exe") and name:
|
||||
# Les process_name peuvent venir sans .exe (psutil) — on ajoute
|
||||
# pour avoir une clé uniforme
|
||||
name_with_exe = name + ".exe"
|
||||
if name_with_exe in _PROCESS_NAMES_SYSTEM:
|
||||
return name_with_exe
|
||||
return name
|
||||
|
||||
|
||||
def _check_class_name(class_name: str) -> Optional[Tuple[str, str, str]]:
|
||||
"""Vérifier si un ClassName UIA matche un dialogue système.
|
||||
|
||||
Returns:
|
||||
(category, matched_class, reason) si match, None sinon.
|
||||
"""
|
||||
if not class_name:
|
||||
return None
|
||||
|
||||
# Match exact
|
||||
if class_name in _CLASS_NAMES_SYSTEM:
|
||||
cat = _CLASS_NAMES_SYSTEM[class_name]
|
||||
return (cat, class_name, f"ClassName UIA '{class_name}' = dialogue système {cat}")
|
||||
|
||||
# Match insensible à la casse + normalisation espaces
|
||||
cn_norm = class_name.strip()
|
||||
for known, cat in _CLASS_NAMES_SYSTEM.items():
|
||||
if cn_norm.lower() == known.lower():
|
||||
return (cat, class_name, f"ClassName UIA ~= '{known}' ({cat})")
|
||||
|
||||
# Détection souple UAC (il existe quelques variantes de la classe secure)
|
||||
if "secure uap" in class_name.lower() or "uap dummy" in class_name.lower():
|
||||
return (SystemDialogCategory.UAC, class_name,
|
||||
f"ClassName '{class_name}' contient 'Secure UAP' → UAC")
|
||||
|
||||
# Credential XAML Host
|
||||
if "credential" in class_name.lower() and "xaml" in class_name.lower():
|
||||
return (SystemDialogCategory.CREDUI, class_name,
|
||||
f"ClassName '{class_name}' contient Credential+Xaml → CredUI")
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _check_process_name(process_name: str) -> Optional[Tuple[str, str, str]]:
|
||||
"""Vérifier si un nom de processus est un dialogue système.
|
||||
|
||||
Returns:
|
||||
(category, matched_process, reason) si match, None sinon.
|
||||
"""
|
||||
if not process_name:
|
||||
return None
|
||||
|
||||
norm = _normalize_process(process_name)
|
||||
if norm in _PROCESS_NAMES_SYSTEM:
|
||||
cat = _PROCESS_NAMES_SYSTEM[norm]
|
||||
return (cat, process_name, f"Processus '{norm}' = {cat}")
|
||||
return None
|
||||
|
||||
|
||||
def _check_title(title: str) -> Optional[Tuple[str, str, str]]:
|
||||
"""Vérifier si un titre de fenêtre matche un dialogue système.
|
||||
|
||||
Returns:
|
||||
(category, matched_pattern, reason) si match, None sinon.
|
||||
"""
|
||||
if not title:
|
||||
return None
|
||||
|
||||
for pattern, cat in _TITLE_PATTERNS_SYSTEM:
|
||||
m = pattern.search(title)
|
||||
if m:
|
||||
return (cat, m.group(0),
|
||||
f"Titre '{title[:60]}' matche '{pattern.pattern}' → {cat}")
|
||||
return None
|
||||
|
||||
|
||||
def is_system_dialog(
|
||||
uia_snapshot: Optional[Dict[str, Any]] = None,
|
||||
window_info: Optional[Dict[str, Any]] = None,
|
||||
) -> SystemDialogDetection:
|
||||
"""Déterminer si la fenêtre active est un dialogue système critique.
|
||||
|
||||
La détection combine plusieurs signaux — **un seul suffit à bloquer**.
|
||||
On préfère un faux positif (pause inutile) à un faux négatif (clic UAC).
|
||||
|
||||
Args:
|
||||
uia_snapshot: Dict avec champs `class_name`, `process_name`,
|
||||
`parent_path`, `name`. Peut être None si UIA indisponible.
|
||||
window_info: Dict avec champs `title`, `app_name`. Peut être None.
|
||||
|
||||
Returns:
|
||||
SystemDialogDetection avec is_system_dialog=True si un dialogue
|
||||
système est détecté.
|
||||
|
||||
Exemples::
|
||||
|
||||
det = is_system_dialog(window_info={"title": "User Account Control"})
|
||||
assert det.is_system_dialog # UAC détecté
|
||||
|
||||
det = is_system_dialog(uia_snapshot={"class_name": "$$$Secure UAP Dummy Window Class$$$"})
|
||||
assert det.is_system_dialog # UAC via ClassName
|
||||
|
||||
det = is_system_dialog(window_info={"title": "OSIRIS - Patient Dupont"})
|
||||
assert not det.is_system_dialog # Application métier → OK
|
||||
"""
|
||||
# ── Signal 1 : ClassName UIA ──
|
||||
if uia_snapshot:
|
||||
cn = uia_snapshot.get("class_name", "") or ""
|
||||
r = _check_class_name(cn)
|
||||
if r:
|
||||
cat, matched, reason = r
|
||||
return SystemDialogDetection(
|
||||
is_system_dialog=True,
|
||||
category=cat,
|
||||
matched_signal="class_name",
|
||||
matched_value=matched,
|
||||
reason=reason,
|
||||
)
|
||||
|
||||
# Explorer aussi les parents (le champ cliqué peut être un bouton
|
||||
# interne dont la ClassName est "Button", mais le root de la fenêtre
|
||||
# est le Consent.exe).
|
||||
for parent in uia_snapshot.get("parent_path", []) or []:
|
||||
p_cn = parent.get("class_name", "") or ""
|
||||
r = _check_class_name(p_cn)
|
||||
if r:
|
||||
cat, matched, reason = r
|
||||
return SystemDialogDetection(
|
||||
is_system_dialog=True,
|
||||
category=cat,
|
||||
matched_signal="parent_class_name",
|
||||
matched_value=matched,
|
||||
reason=f"Parent : {reason}",
|
||||
)
|
||||
|
||||
# ── Signal 2 : Process name ──
|
||||
if uia_snapshot:
|
||||
pn = uia_snapshot.get("process_name", "") or ""
|
||||
r = _check_process_name(pn)
|
||||
if r:
|
||||
cat, matched, reason = r
|
||||
return SystemDialogDetection(
|
||||
is_system_dialog=True,
|
||||
category=cat,
|
||||
matched_signal="process_name",
|
||||
matched_value=matched,
|
||||
reason=reason,
|
||||
)
|
||||
|
||||
if window_info:
|
||||
app = window_info.get("app_name", "") or ""
|
||||
r = _check_process_name(app)
|
||||
if r:
|
||||
cat, matched, reason = r
|
||||
return SystemDialogDetection(
|
||||
is_system_dialog=True,
|
||||
category=cat,
|
||||
matched_signal="app_name",
|
||||
matched_value=matched,
|
||||
reason=reason,
|
||||
)
|
||||
|
||||
# ── Signal 3 : Titre de fenêtre ──
|
||||
if window_info:
|
||||
title = window_info.get("title", "") or ""
|
||||
r = _check_title(title)
|
||||
if r:
|
||||
cat, matched, reason = r
|
||||
return SystemDialogDetection(
|
||||
is_system_dialog=True,
|
||||
category=cat,
|
||||
matched_signal="window_title",
|
||||
matched_value=matched,
|
||||
reason=reason,
|
||||
)
|
||||
|
||||
if uia_snapshot:
|
||||
# Certains dialogues système remontent leur titre dans uia.name
|
||||
uia_name = uia_snapshot.get("name", "") or ""
|
||||
r = _check_title(uia_name)
|
||||
if r:
|
||||
cat, matched, reason = r
|
||||
return SystemDialogDetection(
|
||||
is_system_dialog=True,
|
||||
category=cat,
|
||||
matched_signal="uia_name",
|
||||
matched_value=matched,
|
||||
reason=reason,
|
||||
)
|
||||
|
||||
return SystemDialogDetection(is_system_dialog=False)
|
||||
|
||||
|
||||
def detect_current_system_dialog() -> SystemDialogDetection:
|
||||
"""Analyser l'écran actuel et détecter un dialogue système.
|
||||
|
||||
Helper autonome qui interroge à la fois `get_active_window_info()` et
|
||||
le helper UIA (si dispo) pour obtenir la détection la plus fiable.
|
||||
|
||||
Returns:
|
||||
SystemDialogDetection. Si un signal matche, is_system_dialog=True.
|
||||
Si rien n'est disponible (Linux, UIA absent), is_system_dialog=False
|
||||
mais le caller peut encore fallback sur une analyse par titre.
|
||||
"""
|
||||
window_info: Optional[Dict[str, Any]] = None
|
||||
uia_snapshot: Optional[Dict[str, Any]] = None
|
||||
|
||||
# Fenêtre active (cross-platform)
|
||||
try:
|
||||
from ..window_info_crossplatform import get_active_window_info
|
||||
window_info = get_active_window_info()
|
||||
except Exception as e: # pragma: no cover — best-effort
|
||||
logger.debug(f"[SYS-DIALOG] window_info indisponible : {e}")
|
||||
|
||||
# UIA local (Windows uniquement, via lea_uia.exe)
|
||||
try:
|
||||
from .uia_helper import get_shared_helper
|
||||
helper = get_shared_helper()
|
||||
if helper.available:
|
||||
# On capture l'élément focalisé (root = fenêtre active)
|
||||
element = helper.capture_focused(max_depth=2)
|
||||
if element is not None:
|
||||
uia_snapshot = element.to_dict()
|
||||
except Exception as e: # pragma: no cover
|
||||
logger.debug(f"[SYS-DIALOG] UIA indisponible : {e}")
|
||||
|
||||
detection = is_system_dialog(
|
||||
uia_snapshot=uia_snapshot, window_info=window_info,
|
||||
)
|
||||
|
||||
if detection.is_system_dialog:
|
||||
logger.warning(
|
||||
f"[SYS-DIALOG] BLOCAGE — dialogue système détecté "
|
||||
f"[{detection.category}] via {detection.matched_signal}='{detection.matched_value}' "
|
||||
f"— {detection.reason}"
|
||||
)
|
||||
return detection
|
||||
|
||||
|
||||
__all__ = [
|
||||
"SystemDialogCategory",
|
||||
"SystemDialogDetection",
|
||||
"is_system_dialog",
|
||||
"detect_current_system_dialog",
|
||||
]
|
||||
294
agent_v0/agent_v1/core/uia_helper.py
Normal file
294
agent_v0/agent_v1/core/uia_helper.py
Normal file
@@ -0,0 +1,294 @@
|
||||
# core/workflow/uia_helper.py
|
||||
"""
|
||||
UIAHelper — Wrapper Python pour lea_uia.exe (helper Rust UI Automation).
|
||||
|
||||
Expose une API Python simple pour interroger UIA via le binaire Rust.
|
||||
Communique via subprocess + stdin/stdout JSON.
|
||||
|
||||
Pourquoi un helper Rust ?
|
||||
- 5-10x plus rapide que pywinauto (10-20ms vs 50-200ms)
|
||||
- Binaire standalone ~500 Ko, aucune dépendance runtime
|
||||
- Pas de problèmes de threading COM en Python
|
||||
- Crash-safe (le crash du helper n'affecte pas l'agent Python)
|
||||
|
||||
Architecture :
|
||||
Python executor
|
||||
↓ subprocess.run
|
||||
lea_uia.exe query --x 812 --y 436
|
||||
↓ UIA API Windows
|
||||
JSON response
|
||||
↓ stdout
|
||||
Python executor parse JSON
|
||||
|
||||
Si lea_uia.exe n'est pas disponible (Linux, binaire absent, crash) :
|
||||
toutes les méthodes retournent None → fallback vision automatique.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Timeout par défaut pour les appels UIA (en secondes)
|
||||
_DEFAULT_TIMEOUT = 5.0
|
||||
|
||||
# Masquer la fenêtre console lors du spawn de lea_uia.exe sur Windows.
|
||||
# Sans ce flag, chaque appel (à chaque clic utilisateur pendant
|
||||
# l'enregistrement) fait apparaître une fenêtre cmd noire brièvement
|
||||
# visible à l'écran → ralentit la souris et pollue les screenshots
|
||||
# capturés (le VLM peut "voir" le chemin lea_uia.exe comme texte cliqué).
|
||||
#
|
||||
# La valeur 0x08000000 correspond à CREATE_NO_WINDOW défini dans
|
||||
# l'API Windows. Sur Linux/Mac, la valeur est 0 et `creationflags`
|
||||
# est ignoré. getattr() gère le cas où Python expose déjà la constante
|
||||
# sur Windows.
|
||||
if platform.system() == "Windows":
|
||||
_SUBPROCESS_CREATION_FLAGS = getattr(subprocess, "CREATE_NO_WINDOW", 0x08000000)
|
||||
else:
|
||||
_SUBPROCESS_CREATION_FLAGS = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class UiaElement:
|
||||
"""Représentation Python d'un élément UIA."""
|
||||
name: str = ""
|
||||
control_type: str = ""
|
||||
class_name: str = ""
|
||||
automation_id: str = ""
|
||||
bounding_rect: Tuple[int, int, int, int] = (0, 0, 0, 0)
|
||||
is_enabled: bool = False
|
||||
is_offscreen: bool = True
|
||||
parent_path: List[Dict[str, str]] = field(default_factory=list)
|
||||
process_name: str = ""
|
||||
|
||||
def center(self) -> Tuple[int, int]:
|
||||
"""Retourner le centre du rectangle (pixels)."""
|
||||
x1, y1, x2, y2 = self.bounding_rect
|
||||
return ((x1 + x2) // 2, (y1 + y2) // 2)
|
||||
|
||||
def width(self) -> int:
|
||||
return self.bounding_rect[2] - self.bounding_rect[0]
|
||||
|
||||
def height(self) -> int:
|
||||
return self.bounding_rect[3] - self.bounding_rect[1]
|
||||
|
||||
def is_clickable(self) -> bool:
|
||||
"""Peut-on cliquer dessus ?"""
|
||||
return (
|
||||
self.is_enabled
|
||||
and not self.is_offscreen
|
||||
and self.width() > 0
|
||||
and self.height() > 0
|
||||
)
|
||||
|
||||
def path_signature(self) -> str:
|
||||
"""Signature du chemin parent (pour retrouver l'élément)."""
|
||||
parts = [f"{p['control_type']}[{p['name']}]" for p in self.parent_path if p.get("name")]
|
||||
parts.append(f"{self.control_type}[{self.name}]")
|
||||
return " > ".join(parts)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"control_type": self.control_type,
|
||||
"class_name": self.class_name,
|
||||
"automation_id": self.automation_id,
|
||||
"bounding_rect": list(self.bounding_rect),
|
||||
"is_enabled": self.is_enabled,
|
||||
"is_offscreen": self.is_offscreen,
|
||||
"parent_path": self.parent_path,
|
||||
"process_name": self.process_name,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict[str, Any]) -> "UiaElement":
|
||||
rect = d.get("bounding_rect", [0, 0, 0, 0])
|
||||
if isinstance(rect, list) and len(rect) >= 4:
|
||||
rect = tuple(rect[:4])
|
||||
else:
|
||||
rect = (0, 0, 0, 0)
|
||||
return cls(
|
||||
name=d.get("name", ""),
|
||||
control_type=d.get("control_type", ""),
|
||||
class_name=d.get("class_name", ""),
|
||||
automation_id=d.get("automation_id", ""),
|
||||
bounding_rect=rect,
|
||||
is_enabled=d.get("is_enabled", False),
|
||||
is_offscreen=d.get("is_offscreen", True),
|
||||
parent_path=d.get("parent_path", []),
|
||||
process_name=d.get("process_name", ""),
|
||||
)
|
||||
|
||||
|
||||
class UIAHelper:
|
||||
"""Wrapper Python pour lea_uia.exe."""
|
||||
|
||||
def __init__(self, helper_path: str = "", timeout: float = _DEFAULT_TIMEOUT):
|
||||
self._helper_path = helper_path or self._find_helper()
|
||||
self._timeout = timeout
|
||||
self._available = self._check_available()
|
||||
|
||||
def _find_helper(self) -> str:
|
||||
"""Trouver lea_uia.exe dans les emplacements standards."""
|
||||
candidates = [
|
||||
r"C:\Lea\helpers\lea_uia.exe",
|
||||
os.path.join(os.path.dirname(__file__), "..", "..",
|
||||
"agent_rust", "lea_uia", "target",
|
||||
"x86_64-pc-windows-gnu", "release", "lea_uia.exe"),
|
||||
"./helpers/lea_uia.exe",
|
||||
"lea_uia.exe",
|
||||
]
|
||||
for path in candidates:
|
||||
if os.path.isfile(path):
|
||||
return os.path.abspath(path)
|
||||
return ""
|
||||
|
||||
def _check_available(self) -> bool:
|
||||
"""Vérifier que le helper est utilisable (Windows + binaire + health OK)."""
|
||||
if platform.system() != "Windows":
|
||||
logger.debug("UIAHelper: Linux/Mac — helper désactivé")
|
||||
return False
|
||||
if not self._helper_path:
|
||||
logger.debug("UIAHelper: lea_uia.exe introuvable")
|
||||
return False
|
||||
if not os.path.isfile(self._helper_path):
|
||||
logger.debug(f"UIAHelper: chemin invalide {self._helper_path}")
|
||||
return False
|
||||
return True
|
||||
|
||||
@property
|
||||
def available(self) -> bool:
|
||||
return self._available
|
||||
|
||||
@property
|
||||
def helper_path(self) -> str:
|
||||
return self._helper_path
|
||||
|
||||
def _run(self, args: List[str]) -> Optional[Dict[str, Any]]:
|
||||
"""Exécuter lea_uia.exe avec les arguments et parser le JSON."""
|
||||
if not self._available:
|
||||
return None
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[self._helper_path] + args,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=self._timeout,
|
||||
encoding="utf-8",
|
||||
errors="replace",
|
||||
creationflags=_SUBPROCESS_CREATION_FLAGS,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
logger.debug(
|
||||
f"UIAHelper: exit code {result.returncode}, "
|
||||
f"stderr: {result.stderr[:200]}"
|
||||
)
|
||||
return None
|
||||
output = result.stdout.strip()
|
||||
if not output:
|
||||
return None
|
||||
return json.loads(output)
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.debug(f"UIAHelper: timeout ({self._timeout}s) sur {args}")
|
||||
return None
|
||||
except json.JSONDecodeError as e:
|
||||
logger.debug(f"UIAHelper: JSON invalide — {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.debug(f"UIAHelper: erreur {e}")
|
||||
return None
|
||||
|
||||
def health(self) -> bool:
|
||||
"""Vérifier que UIA répond."""
|
||||
data = self._run(["health"])
|
||||
return data is not None and data.get("status") == "ok"
|
||||
|
||||
def query_at(
|
||||
self,
|
||||
x: int,
|
||||
y: int,
|
||||
with_parents: bool = True,
|
||||
) -> Optional[UiaElement]:
|
||||
"""Récupérer l'élément UIA à une position écran.
|
||||
|
||||
Args:
|
||||
x, y: Coordonnées pixel absolues
|
||||
with_parents: Inclure la hiérarchie des parents
|
||||
|
||||
Returns:
|
||||
UiaElement si trouvé, None sinon (pas d'élément ou UIA indispo)
|
||||
"""
|
||||
args = ["query", "--x", str(x), "--y", str(y)]
|
||||
if not with_parents:
|
||||
args.append("--with-parents=false")
|
||||
|
||||
data = self._run(args)
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
def find_by_name(
|
||||
self,
|
||||
name: str,
|
||||
control_type: Optional[str] = None,
|
||||
automation_id: Optional[str] = None,
|
||||
window: Optional[str] = None,
|
||||
timeout_ms: int = 2000,
|
||||
) -> Optional[UiaElement]:
|
||||
"""Rechercher un élément par son nom (+ filtres optionnels).
|
||||
|
||||
Args:
|
||||
name: Nom exact de l'élément
|
||||
control_type: Type de contrôle (Button, Edit, MenuItem...)
|
||||
automation_id: ID d'automation
|
||||
window: Restreindre à une fenêtre spécifique
|
||||
timeout_ms: Timeout de recherche en millisecondes
|
||||
"""
|
||||
args = ["find", "--name", name, "--timeout-ms", str(timeout_ms)]
|
||||
if control_type:
|
||||
args.extend(["--control-type", control_type])
|
||||
if automation_id:
|
||||
args.extend(["--automation-id", automation_id])
|
||||
if window:
|
||||
args.extend(["--window", window])
|
||||
|
||||
data = self._run(args)
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
def capture_focused(self, max_depth: int = 3) -> Optional[UiaElement]:
|
||||
"""Capturer l'élément ayant le focus + son contexte."""
|
||||
data = self._run(["capture", "--max-depth", str(max_depth)])
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
|
||||
# Instance globale partagée (singleton léger)
|
||||
_SHARED_HELPER: Optional[UIAHelper] = None
|
||||
|
||||
|
||||
def get_shared_helper() -> UIAHelper:
|
||||
"""Retourner une instance partagée de UIAHelper."""
|
||||
global _SHARED_HELPER
|
||||
if _SHARED_HELPER is None:
|
||||
_SHARED_HELPER = UIAHelper()
|
||||
return _SHARED_HELPER
|
||||
39
agent_v0/agent_v1/finalize_contract.py
Normal file
39
agent_v0/agent_v1/finalize_contract.py
Normal file
@@ -0,0 +1,39 @@
|
||||
"""Dispatch léger du contrat enrichi de /finalize côté agent."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def dispatch_finalize_result(ui: Any, payload: Dict[str, Any], replay_name: str) -> None:
|
||||
"""Router le résultat de /finalize vers la bonne surface UI agent."""
|
||||
if not isinstance(payload, dict):
|
||||
return
|
||||
|
||||
replay_request = payload.get("replay_request") or {}
|
||||
replay_launch = payload.get("replay_launch") or {}
|
||||
|
||||
if replay_launch.get("status") == "started":
|
||||
logger.info("Replay direct déjà lancé par le serveur après finalize")
|
||||
return
|
||||
|
||||
if not payload.get("replay_ready") or not replay_request:
|
||||
return
|
||||
|
||||
if replay_launch.get("status") == "failed":
|
||||
logger.warning(
|
||||
"Auto-replay serveur échoué après finalize, proposition manuelle"
|
||||
)
|
||||
|
||||
if ui is None or not hasattr(ui, "offer_finalize_replay"):
|
||||
logger.info("UI indisponible pour proposer un test immédiat")
|
||||
return
|
||||
|
||||
ui.offer_finalize_replay(
|
||||
replay_request,
|
||||
replay_name or "la tâche que vous venez d'enregistrer",
|
||||
)
|
||||
@@ -17,6 +17,7 @@ import threading
|
||||
from .config import (
|
||||
SESSIONS_ROOT, AGENT_VERSION, SERVER_URL, MACHINE_ID, LOG_RETENTION_DAYS,
|
||||
SCREEN_RESOLUTION, DPI_SCALE, OS_THEME, API_TOKEN, MAX_SESSION_DURATION_S,
|
||||
STREAMING_ENDPOINT,
|
||||
)
|
||||
from .core.captor import EventCaptorV1
|
||||
from .core.executor import ActionExecutorV1
|
||||
@@ -27,6 +28,7 @@ from .ui.chat_window import ChatWindow
|
||||
from .ui.capture_server import CaptureServer
|
||||
from .session.storage import SessionStorage
|
||||
from .vision.capturer import VisionCapturer
|
||||
from .finalize_contract import dispatch_finalize_result
|
||||
|
||||
# Import optionnel du client serveur (pour le chat et les workflows)
|
||||
# Deux chemins : relatif (depuis agent_v0.agent_v1) ou absolu (depuis C:\rpa_vision\agent_v1)
|
||||
@@ -38,8 +40,19 @@ except (ImportError, ValueError):
|
||||
except ImportError:
|
||||
LeaServerClient = None
|
||||
|
||||
# Configuration du logging
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
||||
# Configuration du logging — format structuré et lisible pour un TIM
|
||||
# Niveau de détail : INFO par défaut, DEBUG si RPA_AGENT_DEBUG=1
|
||||
_log_level = logging.DEBUG if os.environ.get("RPA_AGENT_DEBUG") == "1" else logging.INFO
|
||||
logging.basicConfig(
|
||||
level=_log_level,
|
||||
format="%(asctime)s %(levelname)-7s %(name)-25s %(message)s",
|
||||
datefmt="%H:%M:%S",
|
||||
)
|
||||
|
||||
# Réduire le bruit de certaines libs
|
||||
for _noisy in ("urllib3", "requests.packages.urllib3", "PIL", "mss"):
|
||||
logging.getLogger(_noisy).setLevel(logging.WARNING)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Intervalle de polling replay (secondes)
|
||||
@@ -68,6 +81,7 @@ class AgentV1:
|
||||
self._executor = None
|
||||
# Flag pour indiquer qu'un replay est en cours (eviter les conflits)
|
||||
self._replay_active = False
|
||||
self._last_recording_name = ""
|
||||
|
||||
# Etat partage entre systray et chat (source de verite unique)
|
||||
self._state = AgentState()
|
||||
@@ -75,22 +89,23 @@ class AgentV1:
|
||||
self._state.set_on_stop(self.stop_session)
|
||||
|
||||
# Client serveur pour le chat et les workflows
|
||||
# Plus de RPA_SERVER_HOST : le LeaServerClient derive tout de SERVER_URL
|
||||
self._server_client = None
|
||||
if LeaServerClient is not None:
|
||||
# Forcer le token API pour éviter les 401
|
||||
# (le token est set par start.bat dans l'environnement)
|
||||
from .config import API_TOKEN as _token
|
||||
server_host = os.getenv("RPA_SERVER_HOST", "localhost")
|
||||
self._server_client = LeaServerClient(server_host=server_host)
|
||||
self._server_client = LeaServerClient()
|
||||
if _token and not self._server_client._api_token:
|
||||
self._server_client._api_token = _token
|
||||
logger.info("Token API forcé dans LeaServerClient")
|
||||
|
||||
# Fenetre de chat Lea (tkinter natif)
|
||||
# Le host est derive de SERVER_URL (plus de RPA_SERVER_HOST)
|
||||
server_host = (
|
||||
self._server_client.server_host
|
||||
if self._server_client is not None
|
||||
else os.getenv("RPA_SERVER_HOST", "localhost")
|
||||
else "localhost"
|
||||
)
|
||||
self._chat_window = ChatWindow(
|
||||
server_client=self._server_client,
|
||||
@@ -103,6 +118,11 @@ class AgentV1:
|
||||
# Executeur pour le replay (doit exister avant le poll)
|
||||
self._executor = ActionExecutorV1()
|
||||
|
||||
# Wiring ChatWindow → Executor pour Plan B (pause_message → bulle interactive)
|
||||
# Permet à l'executor d'afficher une bulle paused dans la fenêtre Léa V1
|
||||
# quand le serveur signale replay_paused=True via /replay/next.
|
||||
self._wire_chat_window_to_executor()
|
||||
|
||||
# Boucles permanentes (pas besoin de session active)
|
||||
self.running = True
|
||||
self._bg_vision = VisionCapturer(str(SESSIONS_ROOT / "_background"))
|
||||
@@ -131,6 +151,15 @@ class AgentV1:
|
||||
shared_state=self._state,
|
||||
)
|
||||
|
||||
def _wire_chat_window_to_executor(self) -> None:
|
||||
"""Relie l'executor courant à la ChatWindow pour les pauses supervisees."""
|
||||
if self._executor is None or self._chat_window is None:
|
||||
return
|
||||
try:
|
||||
self._executor._chat_window_ref = self._chat_window
|
||||
except Exception:
|
||||
logger.debug("Wiring chat_window->executor echoue (non bloquant)", exc_info=True)
|
||||
|
||||
def _delayed_cleanup(self):
|
||||
"""Nettoyage en arrière-plan après 30s pour ne pas bloquer le démarrage."""
|
||||
time.sleep(30)
|
||||
@@ -189,16 +218,19 @@ class AgentV1:
|
||||
time.sleep(30) # Vérifier toutes les 30s
|
||||
|
||||
def start_session(self, workflow_name):
|
||||
self._last_recording_name = workflow_name
|
||||
self.session_id = f"sess_{time.strftime('%Y%m%dT%H%M%S')}_{uuid.uuid4().hex[:6]}"
|
||||
self.session_dir = self.storage.get_session_dir(self.session_id)
|
||||
|
||||
self.vision = VisionCapturer(str(self.session_dir))
|
||||
|
||||
self.streamer = TraceStreamer(self.session_id, machine_id=self.machine_id)
|
||||
self.streamer.set_on_finalize_result(self._on_finalize_result)
|
||||
self.captor = EventCaptorV1(self._on_event_bridge)
|
||||
|
||||
# Initialiser l'executeur partage
|
||||
self._executor = ActionExecutorV1()
|
||||
self._wire_chat_window_to_executor()
|
||||
|
||||
self.shot_counter = 0
|
||||
self.running = True
|
||||
@@ -304,6 +336,15 @@ class AgentV1:
|
||||
# pour enchainer les actions du workflow
|
||||
time.sleep(0.2)
|
||||
else:
|
||||
if getattr(self._executor, "_replay_paused", False):
|
||||
if not self._replay_active:
|
||||
self._replay_active = True
|
||||
self.ui.set_replay_active(True)
|
||||
self._state.set_replay_active(True)
|
||||
poll_delay = getattr(self._executor, '_poll_backoff', REPLAY_POLL_INTERVAL)
|
||||
time.sleep(max(poll_delay, REPLAY_POLL_INTERVAL))
|
||||
continue
|
||||
|
||||
# Pas d'action en attente — utiliser le backoff de l'executor
|
||||
# (augmente si le serveur est indisponible, reset a 1s sinon)
|
||||
if self._replay_active:
|
||||
@@ -352,11 +393,11 @@ class AgentV1:
|
||||
continue
|
||||
self._last_bg_hash = img_hash
|
||||
|
||||
# Envoyer au streaming server (avec token auth)
|
||||
# Envoyer au streaming server (via STREAMING_ENDPOINT unifié)
|
||||
headers = {"Authorization": f"Bearer {API_TOKEN}"} if API_TOKEN else {}
|
||||
with open(full_path, 'rb') as f:
|
||||
req.post(
|
||||
f"{SERVER_URL}/traces/stream/image",
|
||||
f"{STREAMING_ENDPOINT}/image",
|
||||
params={
|
||||
"session_id": bg_session,
|
||||
"shot_id": f"heartbeat_{int(time.time())}",
|
||||
@@ -365,18 +406,29 @@ class AgentV1:
|
||||
headers=headers,
|
||||
files={"file": ("screenshot.png", f, "image/png")},
|
||||
timeout=10,
|
||||
allow_redirects=False,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"[HEARTBEAT] Erreur: {e}")
|
||||
time.sleep(5)
|
||||
|
||||
def stop_session(self):
|
||||
# Arrêter la capture et le streaming de la session d'enregistrement
|
||||
if self.captor: self.captor.stop()
|
||||
if self.streamer: self.streamer.stop()
|
||||
logger.info(f"Session {self.session_id} terminée.")
|
||||
# Sauvegarder le session_id avant de l'annuler (pour les logs)
|
||||
ended_session_id = self.session_id
|
||||
|
||||
# Reset le session_id pour que le poll replay utilise l'ID stable
|
||||
# Arrêter la capture d'abord (plus d'events entrants)
|
||||
if self.captor: self.captor.stop()
|
||||
|
||||
# Attendre que les events en cours de traitement dans _on_event_bridge
|
||||
# aient le temps d'être envoyés au streamer (capture duale + push)
|
||||
import time
|
||||
time.sleep(1.5)
|
||||
|
||||
# Maintenant arrêter le streamer (drain queue + finalize)
|
||||
if self.streamer: self.streamer.stop()
|
||||
logger.info(f"Session {ended_session_id} terminée.")
|
||||
|
||||
# Reset le session_id APRÈS le stop complet du streamer
|
||||
self.session_id = None
|
||||
|
||||
# Reset le backoff de l'executor pour reprendre le polling immédiatement
|
||||
@@ -397,12 +449,18 @@ class AgentV1:
|
||||
f"agent_{self.user_id}"
|
||||
)
|
||||
|
||||
def _on_finalize_result(self, payload: dict) -> None:
|
||||
"""Réagir au contrat enrichi de /finalize côté agent."""
|
||||
replay_name = self._last_recording_name or "la tâche que vous venez d'enregistrer"
|
||||
dispatch_finalize_result(self.ui, payload, replay_name)
|
||||
|
||||
_last_heartbeat_hash: str = ""
|
||||
|
||||
def _heartbeat_loop(self):
|
||||
"""Capture périodique pour donner du contexte au stagiaire.
|
||||
Déduplication : n'envoie que si l'écran a changé.
|
||||
Tourne tant que session_id est défini (= enregistrement actif).
|
||||
Enrichi avec le titre de la fenêtre active pour contextualisation.
|
||||
"""
|
||||
while self.running and self.session_id:
|
||||
try:
|
||||
@@ -413,7 +471,23 @@ class AgentV1:
|
||||
if img_hash != self._last_heartbeat_hash:
|
||||
self._last_heartbeat_hash = img_hash
|
||||
self.streamer.push_image(full_path, f"heartbeat_{int(time.time())}")
|
||||
self.streamer.push_event({"type": "heartbeat", "image": full_path, "timestamp": time.time(), "machine_id": self.machine_id})
|
||||
heartbeat_event = {
|
||||
"type": "heartbeat",
|
||||
"image": full_path,
|
||||
"timestamp": time.time(),
|
||||
"machine_id": self.machine_id,
|
||||
}
|
||||
# Ajouter le titre de la fenêtre active (léger, pas de crop)
|
||||
window_title = self.vision.get_active_window_title()
|
||||
if window_title:
|
||||
heartbeat_event["active_window_title"] = window_title
|
||||
# QW1 — enrichissement multi-écrans (additif, fallback gracieux)
|
||||
try:
|
||||
from .vision.capturer import _enrich_with_monitor_info
|
||||
_enrich_with_monitor_info(heartbeat_event)
|
||||
except Exception:
|
||||
pass
|
||||
self.streamer.push_event(heartbeat_event)
|
||||
except Exception as e:
|
||||
logger.error(f"Heartbeat error: {e}")
|
||||
time.sleep(5)
|
||||
@@ -448,20 +522,33 @@ class AgentV1:
|
||||
event["screenshot_context"] = full_path
|
||||
self.streamer.push_image(full_path, f"focus_{int(time.time())}")
|
||||
|
||||
# 🔴 Capture Interactive (Dual)
|
||||
# Capture Interactive (Dual + Fenêtre active)
|
||||
if event["type"] in ["mouse_click", "key_combo"]:
|
||||
self.shot_counter += 1
|
||||
shot_id = f"shot_{self.shot_counter:04d}"
|
||||
|
||||
|
||||
pos = event.get("pos", (0, 0))
|
||||
capture_info = self.vision.capture_dual(pos[0], pos[1], shot_id)
|
||||
|
||||
|
||||
event["screenshot_id"] = shot_id
|
||||
event["vision_info"] = capture_info
|
||||
|
||||
|
||||
# Enrichir l'event avec les métadonnées de la fenêtre active
|
||||
# (titre, rect, coordonnées clic relatives, taille fenêtre)
|
||||
window_capture = capture_info.get("window_capture")
|
||||
if window_capture:
|
||||
event["window_capture"] = {
|
||||
"title": window_capture.get("window_title", ""),
|
||||
"app_name": window_capture.get("app_name", ""),
|
||||
"rect": window_capture.get("window_rect"),
|
||||
"click_relative": window_capture.get("click_in_window"),
|
||||
"window_size": window_capture.get("window_size"),
|
||||
"click_inside_window": window_capture.get("click_inside_window", True),
|
||||
}
|
||||
|
||||
self._stream_capture_info(capture_info, shot_id)
|
||||
|
||||
# 🕒 POST-ACTION : Capture du résultat après 1s (pour voir le résultat du clic)
|
||||
|
||||
# POST-ACTION : Capture du résultat après 1s (pour voir le résultat du clic)
|
||||
threading.Timer(1.0, self._capture_result, args=(shot_id,)).start()
|
||||
|
||||
self.ui.update_stats(self.shot_counter)
|
||||
@@ -481,13 +568,77 @@ class AgentV1:
|
||||
self.streamer.push_image(capture_info["full"], f"{shot_id}_full")
|
||||
if "crop" in capture_info:
|
||||
self.streamer.push_image(capture_info["crop"], f"{shot_id}_crop")
|
||||
# Streamer l'image de la fenêtre active si disponible
|
||||
window_capture = capture_info.get("window_capture")
|
||||
if window_capture and "window_image" in window_capture:
|
||||
self.streamer.push_image(
|
||||
window_capture["window_image"], f"{shot_id}_window"
|
||||
)
|
||||
|
||||
def run(self):
|
||||
self.ui.run()
|
||||
|
||||
def _headless_keepalive(agent):
|
||||
"""Maintient le main thread vivant quand l'UI tray ne peut pas tourner.
|
||||
|
||||
Sans cela, ``agent.run()`` retourne immédiatement (pystray échoue quand
|
||||
Léa est lancée via SSH sans session interactive Windows), le main thread
|
||||
se termine, et TOUS les daemon threads — y compris ``_replay_poll_loop``
|
||||
— meurent avec lui. Observé 3 fois en 24h les 24/05 :
|
||||
- SSH ``Permission denied`` (1231)
|
||||
- polls morts après relance distante (1620)
|
||||
- polls morts ``replay_sess_506d6fa2`` (1627)
|
||||
|
||||
Le keepalive ne se déclenche QUE si ``agent.run()`` est sorti tout en
|
||||
laissant ``agent.running=True`` (cas anormal). En mode interactif
|
||||
normal, ``pystray.Icon.run()`` ne sort jamais, donc ce code est
|
||||
invisible.
|
||||
"""
|
||||
import signal as _sig
|
||||
_stop = threading.Event()
|
||||
|
||||
def _handler(sig, frame):
|
||||
logger.info(f"[MAIN] Signal {sig} recu — arret propre")
|
||||
_stop.set()
|
||||
agent.running = False
|
||||
|
||||
for sig_name in ("SIGTERM", "SIGINT", "SIGBREAK"):
|
||||
sig_obj = getattr(_sig, sig_name, None)
|
||||
if sig_obj is None:
|
||||
continue
|
||||
try:
|
||||
_sig.signal(sig_obj, _handler)
|
||||
except (ValueError, OSError):
|
||||
pass
|
||||
|
||||
logger.info(
|
||||
"[MAIN] Keepalive headless actif — main thread bloque pour maintenir "
|
||||
"les daemon threads (_replay_poll_loop, heartbeat, capture_server) vivants. "
|
||||
"Pour stopper Lea : kill -TERM <pid> ou Ctrl+C."
|
||||
)
|
||||
try:
|
||||
_stop.wait()
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
agent.running = False
|
||||
logger.info("[MAIN] Keepalive termine — agent.running=False, daemon threads vont s'arreter")
|
||||
|
||||
|
||||
def main():
|
||||
agent = AgentV1()
|
||||
agent.run()
|
||||
try:
|
||||
agent.run()
|
||||
except Exception:
|
||||
logger.exception("[MAIN] agent.run() a leve une exception")
|
||||
|
||||
if getattr(agent, "running", False):
|
||||
logger.warning(
|
||||
"[MAIN] agent.run() est sorti mais agent.running=True — "
|
||||
"probablement pystray sans session interactive (SSH). "
|
||||
"Bascule en keepalive headless."
|
||||
)
|
||||
_headless_keepalive(agent)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
149
agent_v0/agent_v1/network/feedback_bus.py
Normal file
149
agent_v0/agent_v1/network/feedback_bus.py
Normal file
@@ -0,0 +1,149 @@
|
||||
# agent_v1/network/feedback_bus.py
|
||||
"""Client SocketIO pour le bus feedback Léa.
|
||||
|
||||
Consomme les events 'lea:*' émis par agent_chat (port 5004) et les dispatche
|
||||
vers ChatWindow pour affichage en bulles temps réel.
|
||||
|
||||
Events écoutés :
|
||||
lea:action_started — début d'un workflow ou d'une action
|
||||
lea:action_progress — progression dans le workflow
|
||||
lea:done — fin d'un workflow ou d'un copilot
|
||||
lea:need_confirm — étape copilot en attente de validation
|
||||
lea:step_result — résultat d'une étape copilot
|
||||
lea:paused — basculement en paused_need_help (asset démo)
|
||||
lea:resumed — sortie de pause supervisée
|
||||
|
||||
Fail-safe : toute erreur de connexion ou de dispatch est silencieusement
|
||||
loggée. Le ChatWindow continue de fonctionner même si le bus est mort
|
||||
(comportement strictement identique au pré-J3).
|
||||
|
||||
Usage :
|
||||
bus = FeedbackBusClient(
|
||||
server_url="http://localhost:5004",
|
||||
token=os.environ.get("RPA_API_TOKEN", ""),
|
||||
on_event=lambda event, payload: print(event, payload),
|
||||
)
|
||||
bus.start() # connexion en arrière-plan, non-bloquant
|
||||
# ... ChatWindow tourne ...
|
||||
bus.stop()
|
||||
"""
|
||||
|
||||
import logging
|
||||
import threading
|
||||
from typing import Callable, Optional
|
||||
|
||||
import socketio
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
LEA_EVENTS = (
|
||||
'lea:action_started',
|
||||
'lea:action_progress',
|
||||
'lea:done',
|
||||
'lea:need_confirm',
|
||||
'lea:step_result',
|
||||
'lea:paused',
|
||||
'lea:resumed',
|
||||
)
|
||||
|
||||
EventCallback = Callable[[str, dict], None]
|
||||
|
||||
|
||||
class FeedbackBusClient:
|
||||
"""Client SocketIO non-bloquant pour le bus 'lea:*'."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
server_url: str,
|
||||
token: Optional[str] = None,
|
||||
on_event: Optional[EventCallback] = None,
|
||||
):
|
||||
self._url = server_url.rstrip('/')
|
||||
self._token = token or None
|
||||
self._on_event: EventCallback = on_event or (lambda e, p: None)
|
||||
self._sio = socketio.Client(
|
||||
reconnection=True,
|
||||
reconnection_attempts=0, # 0 = illimité
|
||||
reconnection_delay=2,
|
||||
reconnection_delay_max=30,
|
||||
logger=False,
|
||||
engineio_logger=False,
|
||||
)
|
||||
self._thread: Optional[threading.Thread] = None
|
||||
self._register_handlers()
|
||||
|
||||
def _register_handlers(self) -> None:
|
||||
@self._sio.event
|
||||
def connect():
|
||||
logger.info("FeedbackBus connecté à %s", self._url)
|
||||
|
||||
@self._sio.event
|
||||
def disconnect():
|
||||
logger.info("FeedbackBus déconnecté")
|
||||
|
||||
for ev in LEA_EVENTS:
|
||||
self._sio.on(ev, lambda data, e=ev: self._dispatch(e, data))
|
||||
|
||||
def _dispatch(self, event: str, payload: Optional[dict]) -> None:
|
||||
try:
|
||||
self._on_event(event, payload or {})
|
||||
except Exception:
|
||||
logger.debug("FeedbackBus dispatch silenced", exc_info=True)
|
||||
|
||||
def start(self) -> None:
|
||||
"""Démarrer la connexion en arrière-plan (idempotent, non-bloquant)."""
|
||||
if self._thread is not None and self._thread.is_alive():
|
||||
return
|
||||
self._thread = threading.Thread(
|
||||
target=self._run, daemon=True, name="LeaFeedbackBus",
|
||||
)
|
||||
self._thread.start()
|
||||
|
||||
def _run(self) -> None:
|
||||
headers = {}
|
||||
if self._token:
|
||||
headers['Authorization'] = f'Bearer {self._token}'
|
||||
try:
|
||||
self._sio.connect(self._url, headers=headers, wait=True)
|
||||
self._sio.wait()
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"FeedbackBus connect échoué (%s) — ChatWindow continue normalement", e,
|
||||
)
|
||||
|
||||
def stop(self) -> None:
|
||||
"""Arrêter proprement la connexion (idempotent, fail-safe)."""
|
||||
try:
|
||||
if self._sio.connected:
|
||||
self._sio.disconnect()
|
||||
except Exception:
|
||||
logger.debug("FeedbackBus stop silenced", exc_info=True)
|
||||
|
||||
@property
|
||||
def connected(self) -> bool:
|
||||
return bool(self._sio.connected)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Actions utilisateur depuis la bulle paused_need_help (J3.5)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def resume_replay(self, replay_id: str) -> bool:
|
||||
"""Bouton Continuer : émet 'lea:replay_resume' vers agent_chat.
|
||||
|
||||
Retourne True si l'event a pu être émis, False sinon (déconnecté/erreur).
|
||||
"""
|
||||
return self._safe_emit("lea:replay_resume", {"replay_id": replay_id})
|
||||
|
||||
def abort_replay(self, replay_id: str) -> bool:
|
||||
"""Bouton Annuler : émet 'lea:replay_abort' vers agent_chat."""
|
||||
return self._safe_emit("lea:replay_abort", {"replay_id": replay_id})
|
||||
|
||||
def _safe_emit(self, event: str, payload: dict) -> bool:
|
||||
try:
|
||||
if not self._sio.connected:
|
||||
return False
|
||||
self._sio.emit(event, payload)
|
||||
return True
|
||||
except Exception:
|
||||
logger.debug("FeedbackBus _safe_emit silenced", exc_info=True)
|
||||
return False
|
||||
147
agent_v0/agent_v1/network/lea_orchestrator_client.py
Normal file
147
agent_v0/agent_v1/network/lea_orchestrator_client.py
Normal file
@@ -0,0 +1,147 @@
|
||||
"""
|
||||
Client HTTP minimal pour l'orchestrateur Léa-first (agent-chat Linux).
|
||||
|
||||
Rebranchement P1-LEA-SHADOW : le bouton "Apprenez-moi" côté Windows déclenche
|
||||
la création d'une session d'apprentissage côté agent-chat (REST) AVANT de
|
||||
lancer la capture locale. Le pipeline streaming (capture frames/événements
|
||||
via start_recording) n'est PAS modifié — seule la prise de contact initiale
|
||||
avec Léa change.
|
||||
|
||||
Contrat :
|
||||
POST {AGENT_CHAT_URL}/api/learn/start
|
||||
Headers : Authorization: Bearer <RPA_API_TOKEN>, Content-Type: application/json
|
||||
Body : { machine_id, session_name, user_id?, trigger_source }
|
||||
Réponse : { session_id, state, message }
|
||||
|
||||
Politique :
|
||||
- Timeout 10s (connect + read)
|
||||
- Retry x2 avec backoff 0.5s puis 1.0s
|
||||
- En cas d'échec définitif : lève LeaOrchestratorError (le caller doit
|
||||
basculer en mode dégradé : start_recording local sans assistance).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Timeout HTTP (connect + read) — 10s comme spec
|
||||
_HTTP_TIMEOUT_S = 10.0
|
||||
# Nombre de tentatives totales (1 + 2 retry)
|
||||
_MAX_ATTEMPTS = 3
|
||||
# Backoff progressif entre les tentatives
|
||||
_BACKOFF_S = (0.5, 1.0)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class LearnStartResponse:
|
||||
"""Réponse normalisée de POST /api/learn/start."""
|
||||
|
||||
session_id: str
|
||||
state: str
|
||||
message: str
|
||||
|
||||
|
||||
class LeaOrchestratorError(RuntimeError):
|
||||
"""Erreur définitive de communication avec l'orchestrateur Léa."""
|
||||
|
||||
|
||||
def start_learning_session(
|
||||
base_url: str,
|
||||
*,
|
||||
machine_id: str,
|
||||
session_name: str,
|
||||
api_token: str = "",
|
||||
user_id: Optional[str] = None,
|
||||
trigger_source: str = "windows_button",
|
||||
timeout_s: float = _HTTP_TIMEOUT_S,
|
||||
max_attempts: int = _MAX_ATTEMPTS,
|
||||
backoff_s: tuple = _BACKOFF_S,
|
||||
) -> LearnStartResponse:
|
||||
"""Démarre une session d'apprentissage via l'orchestrateur agent-chat.
|
||||
|
||||
Args:
|
||||
base_url: URL racine de l'agent-chat (ex. http://localhost:5004).
|
||||
machine_id: Identifiant unique du poste Windows.
|
||||
session_name: Nom humain de la tâche (saisi par l'utilisateur).
|
||||
api_token: Bearer token (RPA_API_TOKEN). Vide => header omis.
|
||||
user_id: Identifiant utilisateur optionnel.
|
||||
trigger_source: Source du déclenchement (windows_button, tray, ...).
|
||||
timeout_s: Timeout total connect+read par tentative.
|
||||
max_attempts: Nombre total de tentatives (1 + retry).
|
||||
backoff_s: Tuple des délais en secondes entre tentatives (len = max_attempts-1).
|
||||
|
||||
Returns:
|
||||
LearnStartResponse normalisée.
|
||||
|
||||
Raises:
|
||||
LeaOrchestratorError: si toutes les tentatives échouent.
|
||||
"""
|
||||
# Import local : httpx peut ne pas être installé sur tous les postes
|
||||
# Windows historiques. On veut un message d'erreur clair plutôt qu'un
|
||||
# ImportError en chaîne au moment du clic bouton.
|
||||
try:
|
||||
import httpx
|
||||
except ImportError as exc: # pragma: no cover (dépend du venv)
|
||||
raise LeaOrchestratorError(
|
||||
"httpx non disponible — installer httpx>=0.27 sur le poste Windows."
|
||||
) from exc
|
||||
|
||||
url = base_url.rstrip("/") + "/api/learn/start"
|
||||
payload = {
|
||||
"machine_id": machine_id,
|
||||
"session_name": session_name,
|
||||
"trigger_source": trigger_source,
|
||||
}
|
||||
if user_id:
|
||||
payload["user_id"] = user_id
|
||||
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if api_token:
|
||||
headers["Authorization"] = f"Bearer {api_token}"
|
||||
|
||||
last_exc: Optional[Exception] = None
|
||||
for attempt in range(max_attempts):
|
||||
try:
|
||||
logger.info(
|
||||
"POST %s (tentative %d/%d) machine_id=%s session=%s",
|
||||
url, attempt + 1, max_attempts, machine_id, session_name,
|
||||
)
|
||||
with httpx.Client(timeout=timeout_s) as client:
|
||||
resp = client.post(url, json=payload, headers=headers)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
session_id = data.get("session_id", "")
|
||||
state = data.get("state", "")
|
||||
message = data.get("message", "")
|
||||
if not session_id:
|
||||
raise LeaOrchestratorError(
|
||||
f"Réponse invalide (pas de session_id) : {data!r}"
|
||||
)
|
||||
logger.info(
|
||||
"Session Léa démarrée : session_id=%s state=%s",
|
||||
session_id, state,
|
||||
)
|
||||
return LearnStartResponse(
|
||||
session_id=str(session_id),
|
||||
state=str(state),
|
||||
message=str(message),
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001 — on retry sur toute erreur réseau/HTTP
|
||||
last_exc = exc
|
||||
logger.warning(
|
||||
"Echec tentative %d/%d POST %s : %s",
|
||||
attempt + 1, max_attempts, url, exc,
|
||||
)
|
||||
if attempt < max_attempts - 1:
|
||||
delay = backoff_s[attempt] if attempt < len(backoff_s) else backoff_s[-1]
|
||||
time.sleep(delay)
|
||||
|
||||
raise LeaOrchestratorError(
|
||||
f"Echec définitif POST {url} après {max_attempts} tentatives : {last_exc}"
|
||||
)
|
||||
380
agent_v0/agent_v1/network/persistent_buffer.py
Normal file
380
agent_v0/agent_v1/network/persistent_buffer.py
Normal file
@@ -0,0 +1,380 @@
|
||||
# agent_v1/network/persistent_buffer.py
|
||||
"""
|
||||
Buffer persistant SQLite pour les événements/images qui n'ont pas pu être envoyés.
|
||||
|
||||
Résout le bloquant AI Act Article 12 : en cas de coupure serveur ou de queue pleine,
|
||||
les événements prioritaires (click, key, action, screenshot) sont persistés sur disque
|
||||
au lieu d'être silencieusement perdus. Ils sont rejoués à la reconnexion.
|
||||
|
||||
Caractéristiques :
|
||||
- SQLite fichier unique (agent_v1/buffer/pending_events.db), thread-safe
|
||||
- Async : les écritures se font depuis un thread daemon, jamais bloquant
|
||||
- Quota : compteur d'attempts par item, abandon après MAX_ATTEMPTS
|
||||
- Robustesse : un fichier corrompu est renommé et recréé vide
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sqlite3
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Nombre max de tentatives avant abandon définitif d'un item
|
||||
MAX_ATTEMPTS = 10
|
||||
|
||||
# Taille max du buffer en items pour éviter une explosion disque
|
||||
# (typiquement : 1000 events + 1000 images = quelques Mo de SQLite)
|
||||
MAX_BUFFER_ITEMS = 2000
|
||||
|
||||
|
||||
class PersistentBuffer:
|
||||
"""Buffer SQLite pour événements/images en attente d'envoi.
|
||||
|
||||
Deux tables :
|
||||
- pending_events (id, session_id, payload_json, attempts, created_at)
|
||||
- pending_images (id, session_id, shot_id, image_path, attempts, created_at)
|
||||
|
||||
Usage :
|
||||
buf = PersistentBuffer(base_dir / "buffer")
|
||||
buf.add_event(session_id, event_dict) # persiste un event
|
||||
buf.add_image(session_id, image_path, shot_id) # persiste une image
|
||||
for row in buf.drain_events(): # itère sur les events
|
||||
if envoyer(row): buf.delete_event(row["id"])
|
||||
else: buf.mark_attempt(row["id"], "event")
|
||||
"""
|
||||
|
||||
def __init__(self, buffer_dir: Path):
|
||||
self.buffer_dir = Path(buffer_dir)
|
||||
self.buffer_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.db_path = self.buffer_dir / "pending_events.db"
|
||||
self._lock = threading.Lock()
|
||||
self._init_db()
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Initialisation / gestion corruption
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
def _init_db(self):
|
||||
"""Crée les tables si elles n'existent pas.
|
||||
|
||||
En cas de fichier corrompu, on le renomme en .corrupted et on recrée
|
||||
un buffer vide. On préfère perdre un buffer non lisible plutôt que
|
||||
de crasher l'agent au démarrage.
|
||||
"""
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS pending_events (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
session_id TEXT NOT NULL,
|
||||
payload TEXT NOT NULL,
|
||||
attempts INTEGER NOT NULL DEFAULT 0,
|
||||
created_at REAL NOT NULL
|
||||
)
|
||||
"""
|
||||
)
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS pending_images (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
session_id TEXT NOT NULL,
|
||||
shot_id TEXT NOT NULL,
|
||||
image_path TEXT NOT NULL,
|
||||
attempts INTEGER NOT NULL DEFAULT 0,
|
||||
created_at REAL NOT NULL
|
||||
)
|
||||
"""
|
||||
)
|
||||
conn.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_events_created "
|
||||
"ON pending_events(created_at)"
|
||||
)
|
||||
conn.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_images_created "
|
||||
"ON pending_images(created_at)"
|
||||
)
|
||||
conn.commit()
|
||||
except sqlite3.DatabaseError as e:
|
||||
logger.warning(
|
||||
f"Buffer SQLite corrompu ({e}) — renommage en .corrupted "
|
||||
f"et recréation d'un buffer vide"
|
||||
)
|
||||
try:
|
||||
corrupted = self.db_path.with_suffix(
|
||||
f".corrupted.{int(time.time())}"
|
||||
)
|
||||
os.rename(self.db_path, corrupted)
|
||||
except OSError:
|
||||
# Si le rename échoue, on tente la suppression directe
|
||||
try:
|
||||
os.remove(self.db_path)
|
||||
except OSError:
|
||||
pass
|
||||
# Nouvelle tentative (table vide)
|
||||
with self._connect() as conn:
|
||||
conn.execute(
|
||||
"CREATE TABLE IF NOT EXISTS pending_events ("
|
||||
"id INTEGER PRIMARY KEY AUTOINCREMENT, "
|
||||
"session_id TEXT NOT NULL, payload TEXT NOT NULL, "
|
||||
"attempts INTEGER NOT NULL DEFAULT 0, "
|
||||
"created_at REAL NOT NULL)"
|
||||
)
|
||||
conn.execute(
|
||||
"CREATE TABLE IF NOT EXISTS pending_images ("
|
||||
"id INTEGER PRIMARY KEY AUTOINCREMENT, "
|
||||
"session_id TEXT NOT NULL, shot_id TEXT NOT NULL, "
|
||||
"image_path TEXT NOT NULL, "
|
||||
"attempts INTEGER NOT NULL DEFAULT 0, "
|
||||
"created_at REAL NOT NULL)"
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
def _connect(self) -> sqlite3.Connection:
|
||||
"""Connexion SQLite en mode WAL (meilleure concurrence)."""
|
||||
conn = sqlite3.connect(
|
||||
str(self.db_path),
|
||||
timeout=5.0,
|
||||
check_same_thread=False,
|
||||
isolation_level=None, # autocommit — on gère les transactions
|
||||
)
|
||||
try:
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
conn.execute("PRAGMA synchronous=NORMAL")
|
||||
except sqlite3.DatabaseError:
|
||||
pass
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Écriture — persiste un item
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
def add_event(self, session_id: str, event: dict) -> bool:
|
||||
"""Persiste un événement. Retourne True si écrit, False sinon.
|
||||
|
||||
Si le buffer dépasse MAX_BUFFER_ITEMS, on drop l'insertion (plutôt
|
||||
que saturer le disque). On log un warning au premier dépassement.
|
||||
"""
|
||||
with self._lock:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
count = conn.execute(
|
||||
"SELECT COUNT(*) FROM pending_events"
|
||||
).fetchone()[0]
|
||||
if count >= MAX_BUFFER_ITEMS:
|
||||
logger.warning(
|
||||
f"Buffer persistant saturé ({count} events) "
|
||||
f"— event droppé"
|
||||
)
|
||||
return False
|
||||
conn.execute(
|
||||
"INSERT INTO pending_events "
|
||||
"(session_id, payload, attempts, created_at) "
|
||||
"VALUES (?, ?, 0, ?)",
|
||||
(session_id, json.dumps(event), time.time()),
|
||||
)
|
||||
return True
|
||||
except (sqlite3.DatabaseError, TypeError, ValueError) as e:
|
||||
logger.error(f"Buffer add_event échoué : {e}")
|
||||
return False
|
||||
|
||||
def add_image(
|
||||
self, session_id: str, image_path: str, shot_id: str
|
||||
) -> bool:
|
||||
"""Persiste une référence image (chemin fichier + shot_id).
|
||||
|
||||
On ne stocke PAS les bytes de l'image (risque de faire gonfler la DB) :
|
||||
uniquement le chemin. Donc l'image doit rester présente sur disque
|
||||
tant qu'elle n'a pas été envoyée avec succès au serveur.
|
||||
"""
|
||||
with self._lock:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
count = conn.execute(
|
||||
"SELECT COUNT(*) FROM pending_images"
|
||||
).fetchone()[0]
|
||||
if count >= MAX_BUFFER_ITEMS:
|
||||
logger.warning(
|
||||
f"Buffer persistant saturé ({count} images) "
|
||||
f"— image droppée"
|
||||
)
|
||||
return False
|
||||
conn.execute(
|
||||
"INSERT INTO pending_images "
|
||||
"(session_id, shot_id, image_path, attempts, created_at) "
|
||||
"VALUES (?, ?, ?, 0, ?)",
|
||||
(session_id, shot_id, image_path, time.time()),
|
||||
)
|
||||
return True
|
||||
except sqlite3.DatabaseError as e:
|
||||
logger.error(f"Buffer add_image échoué : {e}")
|
||||
return False
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Lecture — drain dans l'ordre chronologique
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
def drain_events(self, limit: int = 100) -> list:
|
||||
"""Retourne les events en attente, triés par date de création."""
|
||||
with self._lock:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
rows = conn.execute(
|
||||
"SELECT id, session_id, payload, attempts "
|
||||
"FROM pending_events "
|
||||
"ORDER BY created_at ASC LIMIT ?",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
except sqlite3.DatabaseError as e:
|
||||
logger.error(f"Buffer drain_events échoué : {e}")
|
||||
return []
|
||||
|
||||
def drain_images(self, limit: int = 50) -> list:
|
||||
"""Retourne les images en attente, triées par date de création."""
|
||||
with self._lock:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
rows = conn.execute(
|
||||
"SELECT id, session_id, shot_id, image_path, attempts "
|
||||
"FROM pending_images "
|
||||
"ORDER BY created_at ASC LIMIT ?",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
except sqlite3.DatabaseError as e:
|
||||
logger.error(f"Buffer drain_images échoué : {e}")
|
||||
return []
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Marquage — succès, échec, abandon
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
def delete_event(self, row_id: int):
|
||||
"""Supprime un event après envoi réussi."""
|
||||
with self._lock:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
conn.execute(
|
||||
"DELETE FROM pending_events WHERE id = ?", (row_id,)
|
||||
)
|
||||
except sqlite3.DatabaseError as e:
|
||||
logger.error(f"Buffer delete_event échoué : {e}")
|
||||
|
||||
def delete_image(self, row_id: int):
|
||||
"""Supprime une image après envoi réussi."""
|
||||
with self._lock:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
conn.execute(
|
||||
"DELETE FROM pending_images WHERE id = ?", (row_id,)
|
||||
)
|
||||
except sqlite3.DatabaseError as e:
|
||||
logger.error(f"Buffer delete_image échoué : {e}")
|
||||
|
||||
def increment_attempts(self, row_id: int, kind: str) -> int:
|
||||
"""Incrémente le compteur d'attempts. Retourne la nouvelle valeur.
|
||||
|
||||
kind : "event" ou "image"
|
||||
"""
|
||||
table = "pending_events" if kind == "event" else "pending_images"
|
||||
with self._lock:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
conn.execute(
|
||||
f"UPDATE {table} SET attempts = attempts + 1 "
|
||||
"WHERE id = ?",
|
||||
(row_id,),
|
||||
)
|
||||
row = conn.execute(
|
||||
f"SELECT attempts FROM {table} WHERE id = ?", (row_id,)
|
||||
).fetchone()
|
||||
return int(row["attempts"]) if row else MAX_ATTEMPTS
|
||||
except sqlite3.DatabaseError as e:
|
||||
logger.error(f"Buffer increment_attempts échoué : {e}")
|
||||
return MAX_ATTEMPTS
|
||||
|
||||
def abandon_exceeded(self) -> int:
|
||||
"""Supprime les items ayant dépassé MAX_ATTEMPTS.
|
||||
|
||||
Un item abandonné est logué en erreur (trace AI Act) puis supprimé.
|
||||
Retourne le nombre d'items abandonnés.
|
||||
"""
|
||||
abandoned = 0
|
||||
with self._lock:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
# Events abandonnés
|
||||
rows = conn.execute(
|
||||
"SELECT id, session_id, payload FROM pending_events "
|
||||
"WHERE attempts >= ?",
|
||||
(MAX_ATTEMPTS,),
|
||||
).fetchall()
|
||||
for r in rows:
|
||||
try:
|
||||
event_type = json.loads(r["payload"]).get(
|
||||
"type", "?"
|
||||
)
|
||||
except (ValueError, TypeError):
|
||||
event_type = "?"
|
||||
logger.error(
|
||||
f"Buffer : event abandonné après {MAX_ATTEMPTS} "
|
||||
f"tentatives — session={r['session_id']} "
|
||||
f"type={event_type}"
|
||||
)
|
||||
abandoned += 1
|
||||
conn.execute(
|
||||
"DELETE FROM pending_events WHERE attempts >= ?",
|
||||
(MAX_ATTEMPTS,),
|
||||
)
|
||||
|
||||
# Images abandonnées
|
||||
rows = conn.execute(
|
||||
"SELECT id, session_id, shot_id FROM pending_images "
|
||||
"WHERE attempts >= ?",
|
||||
(MAX_ATTEMPTS,),
|
||||
).fetchall()
|
||||
for r in rows:
|
||||
logger.error(
|
||||
f"Buffer : image abandonnée après {MAX_ATTEMPTS} "
|
||||
f"tentatives — session={r['session_id']} "
|
||||
f"shot_id={r['shot_id']}"
|
||||
)
|
||||
abandoned += 1
|
||||
conn.execute(
|
||||
"DELETE FROM pending_images WHERE attempts >= ?",
|
||||
(MAX_ATTEMPTS,),
|
||||
)
|
||||
except sqlite3.DatabaseError as e:
|
||||
logger.error(f"Buffer abandon_exceeded échoué : {e}")
|
||||
return abandoned
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Introspection
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
def counts(self) -> dict:
|
||||
"""Retourne (events_count, images_count) pour diagnostic."""
|
||||
with self._lock:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
ev = conn.execute(
|
||||
"SELECT COUNT(*) FROM pending_events"
|
||||
).fetchone()[0]
|
||||
im = conn.execute(
|
||||
"SELECT COUNT(*) FROM pending_images"
|
||||
).fetchone()[0]
|
||||
return {"events": ev, "images": im}
|
||||
except sqlite3.DatabaseError:
|
||||
return {"events": 0, "images": 0}
|
||||
|
||||
def is_empty(self) -> bool:
|
||||
c = self.counts()
|
||||
return c["events"] == 0 and c["images"] == 0
|
||||
@@ -14,18 +14,39 @@ Robustesse (P0-2) :
|
||||
- Health-check périodique (30s) pour recovery du flag _server_available
|
||||
- Compression JPEG qualité 85 pour les images (réduction ~5-10x)
|
||||
- Backpressure : queue bornée (maxsize=100), drop des heartbeat si pleine
|
||||
|
||||
Conformité AI Act (Article 12 — journalisation automatique) :
|
||||
- Purge après ACK : les screenshots locaux sont supprimés après HTTP 200
|
||||
du serveur (par défaut). Le serveur devient la source de vérité.
|
||||
- Buffer persistant : les events/images prioritaires non envoyés sont
|
||||
persistés dans un SQLite local (agent_v1/buffer/pending_events.db)
|
||||
et rejoués au démarrage et à la reconnexion.
|
||||
"""
|
||||
|
||||
import enum
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
import queue
|
||||
import threading
|
||||
import time
|
||||
from typing import Callable, Optional
|
||||
|
||||
import requests
|
||||
from PIL import Image
|
||||
|
||||
from ..config import API_TOKEN, STREAMING_ENDPOINT
|
||||
from ..config import API_TOKEN, BASE_DIR, STREAMING_ENDPOINT
|
||||
from .persistent_buffer import MAX_ATTEMPTS, PersistentBuffer
|
||||
|
||||
|
||||
# Fix P0-E : résultat d'envoi d'image trivaleur (succès / échec réseau / fichier
|
||||
# disparu). On ne doit PAS considérer un FileNotFoundError comme un succès
|
||||
# HTTP 200 — sinon le buffer SQLite supprime l'entrée alors que le serveur n'a
|
||||
# jamais reçu l'image (perte silencieuse).
|
||||
class ImageSendResult(enum.Enum):
|
||||
OK = "ok" # HTTP 200, serveur a accusé réception
|
||||
FAILED = "failed" # Erreur réseau/serveur récupérable (retry OK)
|
||||
FILE_GONE = "file_gone" # Fichier local introuvable (abandon, pas retry)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -42,8 +63,28 @@ JPEG_QUALITY = 85
|
||||
# Taille max de la queue (backpressure)
|
||||
QUEUE_MAX_SIZE = 100
|
||||
|
||||
# Types d'événements à ne jamais dropper
|
||||
PRIORITY_EVENT_TYPES = {"click", "key", "scroll", "action", "screenshot"}
|
||||
# Types d'événements à ne jamais dropper.
|
||||
# Les noms historiques sont conservés, mais les événements réels du captor
|
||||
# Agent V1 sont mouse_click/key_combo/text_input/mouse_scroll.
|
||||
PRIORITY_EVENT_TYPES = {
|
||||
"click", "key", "scroll", "action", "screenshot",
|
||||
"mouse_click", "double_click", "key_combo", "key_press",
|
||||
"text_input", "mouse_scroll",
|
||||
}
|
||||
|
||||
# Purge locale après ACK serveur (Partie A de l'audit)
|
||||
# Activé par défaut : le serveur conserve déjà les screenshots 180 jours
|
||||
# (conformité AI Act Article 12). Désactivable via RPA_PURGE_AFTER_ACK=0
|
||||
# pour debugging local.
|
||||
PURGE_AFTER_ACK = os.environ.get("RPA_PURGE_AFTER_ACK", "1").lower() in (
|
||||
"1", "true", "yes",
|
||||
)
|
||||
|
||||
# Chemin du buffer persistant (Partie B de l'audit)
|
||||
BUFFER_DIR = BASE_DIR / "buffer"
|
||||
|
||||
# Intervalle entre deux tentatives de drain du buffer (secondes)
|
||||
BUFFER_DRAIN_INTERVAL_S = 15
|
||||
|
||||
|
||||
class TraceStreamer:
|
||||
@@ -54,8 +95,25 @@ class TraceStreamer:
|
||||
self.running = False
|
||||
self._thread = None
|
||||
self._health_thread = None
|
||||
self._drain_thread = None
|
||||
self._server_available = True # Désactivé après trop d'échecs
|
||||
|
||||
# Buffer persistant — partagé entre sessions (survit au redémarrage)
|
||||
# Initialisé paresseusement pour ne pas payer le coût SQLite en dehors
|
||||
# d'un streaming actif.
|
||||
self._buffer: PersistentBuffer | None = None
|
||||
self._on_finalize_result: Optional[Callable[[dict], None]] = None
|
||||
|
||||
def set_on_finalize_result(self, callback: Optional[Callable[[dict], None]]) -> None:
|
||||
"""Définir un callback appelé avec le payload JSON de /finalize."""
|
||||
self._on_finalize_result = callback
|
||||
|
||||
def _get_buffer(self) -> PersistentBuffer:
|
||||
"""Retourne le buffer persistant, en l'initialisant au besoin."""
|
||||
if self._buffer is None:
|
||||
self._buffer = PersistentBuffer(BUFFER_DIR)
|
||||
return self._buffer
|
||||
|
||||
@staticmethod
|
||||
def _auth_headers() -> dict:
|
||||
"""Headers d'authentification Bearer pour les requêtes API."""
|
||||
@@ -75,6 +133,11 @@ class TraceStreamer:
|
||||
target=self._health_check_loop, daemon=True
|
||||
)
|
||||
self._health_thread.start()
|
||||
# Thread de drain du buffer persistant (rejoue les items en attente)
|
||||
self._drain_thread = threading.Thread(
|
||||
target=self._buffer_drain_loop, daemon=True
|
||||
)
|
||||
self._drain_thread.start()
|
||||
logger.info(f"Streamer pour {self.session_id} démarré")
|
||||
|
||||
def stop(self):
|
||||
@@ -99,6 +162,9 @@ class TraceStreamer:
|
||||
if self._health_thread:
|
||||
self._health_thread.join(timeout=2.0)
|
||||
|
||||
if self._drain_thread:
|
||||
self._drain_thread.join(timeout=2.0)
|
||||
|
||||
self._finalize_session()
|
||||
logger.info(f"Streamer pour {self.session_id} arrêté")
|
||||
|
||||
@@ -126,11 +192,21 @@ class TraceStreamer:
|
||||
|
||||
Quand la queue est pleine :
|
||||
- Les événements prioritaires (click, key, action, screenshot) sont
|
||||
ajoutés en bloquant brièvement (0.5s)
|
||||
- Les heartbeat sont silencieusement droppés
|
||||
ajoutés en bloquant brièvement (0.5s). Si toujours pleine → persistés
|
||||
dans le buffer SQLite pour rejeu ultérieur.
|
||||
- Les heartbeat sont silencieusement droppés.
|
||||
- Si le serveur est marqué indisponible, on persiste immédiatement les
|
||||
items prioritaires (évite de remplir la queue inutilement).
|
||||
"""
|
||||
is_priority = self._is_priority_item(item_type, data)
|
||||
|
||||
# Serveur indisponible + item prioritaire → on persiste directement
|
||||
# sans polluer la queue RAM (qui ne sera jamais vidée tant que le
|
||||
# serveur est down).
|
||||
if is_priority and not self._server_available:
|
||||
self._persist_to_buffer(item_type, data)
|
||||
return
|
||||
|
||||
try:
|
||||
self.queue.put_nowait((item_type, data))
|
||||
except queue.Full:
|
||||
@@ -139,10 +215,18 @@ class TraceStreamer:
|
||||
try:
|
||||
self.queue.put((item_type, data), timeout=0.5)
|
||||
except queue.Full:
|
||||
logger.warning(
|
||||
f"Queue pleine — événement prioritaire droppé "
|
||||
f"(type={item_type})"
|
||||
)
|
||||
# Persistance disque (ne JAMAIS dropper un prioritaire)
|
||||
persisted = self._persist_to_buffer(item_type, data)
|
||||
if persisted:
|
||||
logger.warning(
|
||||
f"Queue pleine — événement prioritaire persisté "
|
||||
f"sur disque (type={item_type})"
|
||||
)
|
||||
else:
|
||||
logger.error(
|
||||
f"Queue pleine ET buffer saturé — événement "
|
||||
f"prioritaire perdu (type={item_type})"
|
||||
)
|
||||
else:
|
||||
# Heartbeat ou événement non-critique : on drop silencieusement
|
||||
logger.debug(
|
||||
@@ -163,6 +247,23 @@ class TraceStreamer:
|
||||
return event_type in PRIORITY_EVENT_TYPES
|
||||
return False
|
||||
|
||||
def _persist_to_buffer(self, item_type: str, data) -> bool:
|
||||
"""Persiste un item dans le buffer SQLite. Retourne True si OK.
|
||||
|
||||
Utilisé quand la queue est pleine ou le serveur indisponible.
|
||||
"""
|
||||
try:
|
||||
buf = self._get_buffer()
|
||||
if item_type == "event" and isinstance(data, dict):
|
||||
return buf.add_event(self.session_id, data)
|
||||
if item_type == "image":
|
||||
path, shot_id = data
|
||||
return buf.add_image(self.session_id, path, shot_id)
|
||||
except Exception as e:
|
||||
# On n'arrête jamais l'agent si le buffer échoue
|
||||
logger.error(f"Persistance buffer échouée : {e}")
|
||||
return False
|
||||
|
||||
# =========================================================================
|
||||
# Boucle d'envoi
|
||||
# =========================================================================
|
||||
@@ -174,16 +275,36 @@ class TraceStreamer:
|
||||
try:
|
||||
item_type, data = self.queue.get(timeout=0.5)
|
||||
success = False
|
||||
is_file_gone = False
|
||||
if item_type == "event":
|
||||
success = self._send_with_retry(self._send_event, data)
|
||||
elif item_type == "image":
|
||||
success = self._send_with_retry(self._send_image, *data)
|
||||
result = self._send_with_retry(self._send_image, *data)
|
||||
# Fix P0-E : distinguer FILE_GONE du vrai succès HTTP.
|
||||
if result is ImageSendResult.OK:
|
||||
success = True
|
||||
elif result is ImageSendResult.FILE_GONE:
|
||||
# Fichier disparu : pas de retry, pas de persistance
|
||||
# (on ne peut plus le renvoyer). On considère l'item
|
||||
# comme traité sans comptabiliser un succès réseau.
|
||||
is_file_gone = True
|
||||
success = False
|
||||
else:
|
||||
success = False
|
||||
self.queue.task_done()
|
||||
|
||||
if success:
|
||||
consecutive_failures = 0
|
||||
elif is_file_gone:
|
||||
# Fichier introuvable — déjà logué ERROR dans _send_image.
|
||||
# On ne persiste PAS dans le buffer (retry voué à échouer).
|
||||
consecutive_failures = 0
|
||||
else:
|
||||
consecutive_failures += 1
|
||||
# Après 3 retries infructueux, si l'item est prioritaire,
|
||||
# on le persiste pour ne pas le perdre définitivement.
|
||||
if self._is_priority_item(item_type, data):
|
||||
self._persist_to_buffer(item_type, data)
|
||||
if consecutive_failures >= 10:
|
||||
logger.warning(
|
||||
"10 échecs consécutifs — serveur marqué indisponible"
|
||||
@@ -200,15 +321,22 @@ class TraceStreamer:
|
||||
# Retry avec backoff exponentiel
|
||||
# =========================================================================
|
||||
|
||||
def _send_with_retry(self, send_fn, *args) -> bool:
|
||||
def _send_with_retry(self, send_fn, *args):
|
||||
"""Tente l'envoi avec retry et backoff exponentiel.
|
||||
|
||||
3 tentatives max avec délais de 1s, 2s, 4s entre chaque.
|
||||
Retourne True si l'envoi a réussi, False sinon.
|
||||
Retourne :
|
||||
- True / ImageSendResult.OK si l'envoi a réussi
|
||||
- ImageSendResult.FILE_GONE (images uniquement) — pas de retry
|
||||
- False / ImageSendResult.FAILED sinon
|
||||
"""
|
||||
# Première tentative (sans délai)
|
||||
if send_fn(*args):
|
||||
return True
|
||||
first = send_fn(*args)
|
||||
if first is ImageSendResult.OK or first is True:
|
||||
return first
|
||||
# Fix P0-E : FILE_GONE → pas de retry, l'erreur est permanente.
|
||||
if first is ImageSendResult.FILE_GONE:
|
||||
return first
|
||||
|
||||
# Retries avec backoff
|
||||
for attempt, delay in enumerate(RETRY_DELAYS, start=1):
|
||||
@@ -219,9 +347,13 @@ class TraceStreamer:
|
||||
f"Retry {attempt}/{MAX_RETRIES} dans {delay}s..."
|
||||
)
|
||||
time.sleep(delay)
|
||||
if send_fn(*args):
|
||||
result = send_fn(*args)
|
||||
if result is ImageSendResult.OK or result is True:
|
||||
logger.debug(f"Retry {attempt} réussi")
|
||||
return True
|
||||
return result
|
||||
# FILE_GONE pendant un retry — idem, on arrête
|
||||
if result is ImageSendResult.FILE_GONE:
|
||||
return result
|
||||
|
||||
logger.debug(f"Envoi échoué après {MAX_RETRIES} retries")
|
||||
return False
|
||||
@@ -260,6 +392,115 @@ class TraceStreamer:
|
||||
except Exception:
|
||||
logger.debug("Health-check échoué — serveur toujours indisponible")
|
||||
|
||||
# =========================================================================
|
||||
# Drain du buffer persistant (Partie B)
|
||||
# =========================================================================
|
||||
|
||||
def _buffer_drain_loop(self):
|
||||
"""Rejoue les items persistés en arrière-plan.
|
||||
|
||||
Tourne tant que self.running. Essaie de drainer le buffer toutes les
|
||||
BUFFER_DRAIN_INTERVAL_S secondes, mais seulement si :
|
||||
- le serveur est disponible,
|
||||
- il y a effectivement des items en attente.
|
||||
|
||||
Au premier passage (démarrage agent), on draine immédiatement pour
|
||||
rejouer tout ce qui a été persisté lors de la session précédente.
|
||||
"""
|
||||
# Au démarrage : drain immédiat (pas d'attente)
|
||||
first_pass = True
|
||||
while self.running:
|
||||
if not first_pass:
|
||||
time.sleep(BUFFER_DRAIN_INTERVAL_S)
|
||||
if not self.running:
|
||||
break
|
||||
first_pass = False
|
||||
|
||||
if not self._server_available:
|
||||
continue
|
||||
|
||||
try:
|
||||
buf = self._get_buffer()
|
||||
# Abandonner d'abord les items exceeded (évite de les retenter)
|
||||
abandoned = buf.abandon_exceeded()
|
||||
if abandoned:
|
||||
logger.warning(
|
||||
f"Buffer : {abandoned} items abandonnés "
|
||||
f"après {MAX_ATTEMPTS} tentatives"
|
||||
)
|
||||
|
||||
counts = buf.counts()
|
||||
if counts["events"] == 0 and counts["images"] == 0:
|
||||
continue
|
||||
|
||||
logger.info(
|
||||
f"Buffer drain : {counts['events']} events, "
|
||||
f"{counts['images']} images en attente — rejeu"
|
||||
)
|
||||
self._drain_buffer_once(buf)
|
||||
except Exception as e:
|
||||
logger.error(f"Buffer drain loop échoué : {e}")
|
||||
|
||||
def _drain_buffer_once(self, buf: PersistentBuffer):
|
||||
"""Une passe de drain : envoie ce qui peut l'être, incrémente le reste.
|
||||
|
||||
On arrête dès qu'un envoi échoue (serveur probablement down).
|
||||
"""
|
||||
# Events d'abord (plus légers, priorité métier AI Act)
|
||||
for row in buf.drain_events(limit=50):
|
||||
if not self._server_available:
|
||||
return
|
||||
try:
|
||||
import json as _json
|
||||
event = _json.loads(row["payload"])
|
||||
except (ValueError, TypeError):
|
||||
logger.error(
|
||||
f"Buffer : payload event #{row['id']} corrompu, suppression"
|
||||
)
|
||||
buf.delete_event(row["id"])
|
||||
continue
|
||||
if self._send_event(event):
|
||||
buf.delete_event(row["id"])
|
||||
else:
|
||||
buf.increment_attempts(row["id"], "event")
|
||||
# Serveur répond mal — on arrête la passe
|
||||
return
|
||||
|
||||
# Puis images
|
||||
for row in buf.drain_images(limit=20):
|
||||
if not self._server_available:
|
||||
return
|
||||
image_path = row["image_path"]
|
||||
shot_id = row["shot_id"]
|
||||
if not os.path.exists(image_path):
|
||||
# Fichier local disparu (purge, clean-up) — on abandonne.
|
||||
# Fix P0-E : log ERROR (pas warning) — c'est une perte de donnée.
|
||||
logger.error(
|
||||
f"Buffer : image #{row['id']} introuvable sur disque "
|
||||
f"({image_path}) — entrée abandonnée (le serveur n'a "
|
||||
f"jamais reçu cette image, session={row['session_id']}, "
|
||||
f"shot={shot_id})"
|
||||
)
|
||||
buf.delete_image(row["id"])
|
||||
continue
|
||||
result = self._send_image(image_path, shot_id)
|
||||
if result is ImageSendResult.OK or result is True:
|
||||
buf.delete_image(row["id"])
|
||||
elif result is ImageSendResult.FILE_GONE:
|
||||
# Fix P0-E : fichier disparu pendant l'envoi.
|
||||
# Ce n'est PAS un succès HTTP — ne pas considérer comme tel.
|
||||
# On supprime néanmoins l'entrée (retry voué à échouer)
|
||||
# mais avec un log ERROR explicite.
|
||||
logger.error(
|
||||
f"Buffer : image #{row['id']} disparue pendant l'envoi "
|
||||
f"({image_path}) — entrée abandonnée, pas de retry "
|
||||
f"(session={row['session_id']}, shot={shot_id})"
|
||||
)
|
||||
buf.delete_image(row["id"])
|
||||
else:
|
||||
buf.increment_attempts(row["id"], "image")
|
||||
return
|
||||
|
||||
# =========================================================================
|
||||
# Compression JPEG
|
||||
# =========================================================================
|
||||
@@ -287,6 +528,56 @@ class TraceStreamer:
|
||||
logger.warning(f"Compression JPEG échouée, envoi PNG brut: {e}")
|
||||
return None, None, None
|
||||
|
||||
# =========================================================================
|
||||
# Purge locale après ACK (Partie A)
|
||||
# =========================================================================
|
||||
|
||||
@staticmethod
|
||||
def _purge_local_image(path: str):
|
||||
"""Supprime un screenshot local après ACK 200 du serveur.
|
||||
|
||||
Ne crashe JAMAIS si le fichier est verrouillé (cas Windows) ou
|
||||
déjà supprimé : on log en debug et on continue. L'auto-cleanup
|
||||
de SessionStorage repassera plus tard.
|
||||
"""
|
||||
if not PURGE_AFTER_ACK:
|
||||
return
|
||||
try:
|
||||
os.remove(path)
|
||||
logger.debug(f"Screenshot local purgé après ACK : {path}")
|
||||
except FileNotFoundError:
|
||||
# Déjà supprimé ou chemin invalide — silencieux
|
||||
pass
|
||||
except PermissionError as e:
|
||||
# Windows verrouille parfois les fichiers (antivirus, indexation...)
|
||||
logger.debug(
|
||||
f"Purge différée (fichier verrouillé) : {path} — {e}"
|
||||
)
|
||||
except OSError as e:
|
||||
logger.debug(f"Purge échouée : {path} — {e}")
|
||||
|
||||
# =========================================================================
|
||||
# Protection redirect POST→GET (INC-7)
|
||||
# =========================================================================
|
||||
|
||||
@staticmethod
|
||||
def _check_redirect(resp, url: str):
|
||||
"""Detecter et logger une redirection sur un POST.
|
||||
|
||||
La lib requests transforme un POST en GET sur 301/302 (RFC 7231).
|
||||
Avec allow_redirects=False, on recoit le 301/302 directement.
|
||||
On log un WARNING explicite pour que l'admin corrige l'URL.
|
||||
"""
|
||||
if resp.status_code in (301, 302, 307, 308):
|
||||
location = resp.headers.get("Location", "?")
|
||||
logger.warning(
|
||||
f"Redirection {resp.status_code} detectee sur POST {url} "
|
||||
f"→ {location}. Verifiez que RPA_SERVER_URL utilise "
|
||||
f"https:// si le serveur redirige."
|
||||
)
|
||||
return True
|
||||
return False
|
||||
|
||||
# =========================================================================
|
||||
# Envois HTTP
|
||||
# =========================================================================
|
||||
@@ -294,15 +585,20 @@ class TraceStreamer:
|
||||
def _register_session(self):
|
||||
"""Enregistrer la session auprès du serveur (avec identifiant machine)."""
|
||||
try:
|
||||
url = f"{STREAMING_ENDPOINT}/register"
|
||||
resp = requests.post(
|
||||
f"{STREAMING_ENDPOINT}/register",
|
||||
url,
|
||||
params={
|
||||
"session_id": self.session_id,
|
||||
"machine_id": self.machine_id,
|
||||
},
|
||||
headers=self._auth_headers(),
|
||||
timeout=3,
|
||||
allow_redirects=False,
|
||||
)
|
||||
if self._check_redirect(resp, url):
|
||||
logger.warning("Enregistrement session échoué (redirect)")
|
||||
return
|
||||
if resp.ok:
|
||||
logger.info(
|
||||
f"Session {self.session_id} enregistrée sur le serveur "
|
||||
@@ -322,28 +618,40 @@ class TraceStreamer:
|
||||
C'est la dernière chance de sauver les données de la session.
|
||||
"""
|
||||
try:
|
||||
url = f"{STREAMING_ENDPOINT}/finalize"
|
||||
resp = requests.post(
|
||||
f"{STREAMING_ENDPOINT}/finalize",
|
||||
url,
|
||||
params={
|
||||
"session_id": self.session_id,
|
||||
"machine_id": self.machine_id,
|
||||
},
|
||||
headers=self._auth_headers(),
|
||||
timeout=30, # Le build workflow peut prendre du temps
|
||||
allow_redirects=False,
|
||||
)
|
||||
self._check_redirect(resp, url)
|
||||
if resp.ok:
|
||||
result = resp.json()
|
||||
logger.info(f"Session finalisée: {result}")
|
||||
if self._on_finalize_result is not None:
|
||||
try:
|
||||
self._on_finalize_result(result)
|
||||
except Exception as cb_error:
|
||||
logger.warning(
|
||||
"Callback finalize ignoré après erreur: %s",
|
||||
cb_error,
|
||||
)
|
||||
else:
|
||||
logger.warning(f"Finalisation échouée: {resp.status_code}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Finalisation échouée: {e}")
|
||||
logger.warning(f"Finalisation échouée: {e}")
|
||||
|
||||
def _send_event(self, event: dict) -> bool:
|
||||
"""Envoyer un événement au serveur (avec identifiant machine)."""
|
||||
if not self._server_available:
|
||||
return False
|
||||
try:
|
||||
url = f"{STREAMING_ENDPOINT}/event"
|
||||
payload = {
|
||||
"session_id": self.session_id,
|
||||
"timestamp": time.time(),
|
||||
@@ -351,24 +659,36 @@ class TraceStreamer:
|
||||
"machine_id": self.machine_id,
|
||||
}
|
||||
resp = requests.post(
|
||||
f"{STREAMING_ENDPOINT}/event",
|
||||
url,
|
||||
json=payload,
|
||||
headers=self._auth_headers(),
|
||||
timeout=2,
|
||||
allow_redirects=False,
|
||||
)
|
||||
if self._check_redirect(resp, url):
|
||||
return False
|
||||
return resp.ok
|
||||
except Exception as e:
|
||||
logger.debug(f"Streaming Event échoué: {e}")
|
||||
return False
|
||||
|
||||
def _send_image(self, path: str, shot_id: str) -> bool:
|
||||
def _send_image(self, path: str, shot_id: str):
|
||||
"""Envoyer un screenshot au serveur, compressé en JPEG.
|
||||
|
||||
Utilise un context manager pour le fallback PNG afin d'éviter
|
||||
les fuites de descripteurs de fichier.
|
||||
|
||||
Partie A (purge après ACK) : en cas de HTTP 200 confirmé, le fichier
|
||||
local est supprimé (le serveur devient la source de vérité).
|
||||
|
||||
Fix P0-E : retourne `ImageSendResult` (OK / FAILED / FILE_GONE).
|
||||
Les appelants historiques qui attendaient un bool continuent de
|
||||
fonctionner grâce à la truthiness du enum (OK → True, reste → False),
|
||||
MAIS le drain du buffer doit désormais discriminer FILE_GONE pour
|
||||
ne pas confondre "fichier disparu" avec "envoyé avec succès".
|
||||
"""
|
||||
if not self._server_available:
|
||||
return False
|
||||
return ImageSendResult.FAILED
|
||||
try:
|
||||
# Tenter la compression JPEG (réduction ~5-10x vs PNG)
|
||||
jpeg_buf, content_type, suffix = self._compress_image_to_jpeg(path)
|
||||
@@ -379,19 +699,26 @@ class TraceStreamer:
|
||||
"machine_id": self.machine_id,
|
||||
}
|
||||
|
||||
url = f"{STREAMING_ENDPOINT}/image"
|
||||
if jpeg_buf is not None:
|
||||
# Envoi du JPEG compressé (BytesIO, pas de fuite possible)
|
||||
files = {
|
||||
"file": (f"{shot_id}{suffix}", jpeg_buf, content_type)
|
||||
}
|
||||
resp = requests.post(
|
||||
f"{STREAMING_ENDPOINT}/image",
|
||||
url,
|
||||
files=files,
|
||||
params=params,
|
||||
headers=self._auth_headers(),
|
||||
timeout=5,
|
||||
allow_redirects=False,
|
||||
)
|
||||
return resp.ok
|
||||
if self._check_redirect(resp, url):
|
||||
return ImageSendResult.FAILED
|
||||
if resp.ok:
|
||||
self._purge_local_image(path)
|
||||
return ImageSendResult.OK
|
||||
return ImageSendResult.FAILED
|
||||
else:
|
||||
# Fallback : envoi PNG original avec context manager
|
||||
with open(path, "rb") as f:
|
||||
@@ -399,13 +726,29 @@ class TraceStreamer:
|
||||
"file": (f"{shot_id}.png", f, "image/png")
|
||||
}
|
||||
resp = requests.post(
|
||||
f"{STREAMING_ENDPOINT}/image",
|
||||
url,
|
||||
files=files,
|
||||
params=params,
|
||||
headers=self._auth_headers(),
|
||||
timeout=5,
|
||||
allow_redirects=False,
|
||||
)
|
||||
return resp.ok
|
||||
if self._check_redirect(resp, url):
|
||||
return ImageSendResult.FAILED
|
||||
if resp.ok:
|
||||
self._purge_local_image(path)
|
||||
return ImageSendResult.OK
|
||||
return ImageSendResult.FAILED
|
||||
except FileNotFoundError:
|
||||
# Fix P0-E : fichier local disparu. On NE doit PAS considérer ça
|
||||
# comme un succès HTTP 200. Le serveur n'a rien reçu. On signale
|
||||
# `FILE_GONE` pour que le drain du buffer supprime l'entrée
|
||||
# (pas de retry possible) tout en loguant ERROR (pas debug).
|
||||
logger.error(
|
||||
f"Image {shot_id} introuvable sur disque ({path}) — "
|
||||
f"abandon (serveur n'a rien reçu)"
|
||||
)
|
||||
return ImageSendResult.FILE_GONE
|
||||
except Exception as e:
|
||||
logger.debug(f"Streaming Image échoué: {e}")
|
||||
return False
|
||||
return ImageSendResult.FAILED
|
||||
|
||||
@@ -3,7 +3,9 @@ mss>=9.0.1 # Capture d'écran haute performance
|
||||
pynput>=1.7.7 # Clavier/Souris Cross-plateforme
|
||||
Pillow>=10.0.0 # Crops et processing image
|
||||
requests>=2.31.0 # Streaming réseau
|
||||
python-socketio[client]>=5.10,<6.0 # Bus feedback Léa 'lea:*' (compat Flask-SocketIO 5.3.x serveur)
|
||||
psutil>=5.9.0 # Monitoring CPU/RAM
|
||||
screeninfo>=0.8 # QW1 — détection des monitors physiques + offsets
|
||||
pystray>=0.19.5 # Icône Tray UI
|
||||
plyer>=2.1.0 # Notifications toast natives (remplace PyQt5)
|
||||
pywebview>=5.0 # Fenêtre de chat Léa intégrée (Edge WebView2 sur Windows)
|
||||
|
||||
0
agent_v0/agent_v1/tools/__init__.py
Normal file
0
agent_v0/agent_v1/tools/__init__.py
Normal file
88
agent_v0/agent_v1/tools/test_lea_pause_flow.py
Normal file
88
agent_v0/agent_v1/tools/test_lea_pause_flow.py
Normal file
@@ -0,0 +1,88 @@
|
||||
# agent_v1/tools/test_lea_pause_flow.py
|
||||
"""Smoke test : simuler un lea:paused localement et vérifier la bulle ChatWindow.
|
||||
|
||||
À lancer SUR WINDOWS (PC démo) :
|
||||
cd C:/rpa_vision
|
||||
.venv\\Scripts\\python.exe -m agent_v1.tools.test_lea_pause_flow
|
||||
|
||||
Ce script ouvre une ChatWindow, simule l'arrivée d'un payload paused_need_help
|
||||
avec un message LONG (350+ chars pour tester le scroll interne), puis attend
|
||||
les clics utilisateur sur Continuer/Annuler. Le test vérifie qu'il y a UN SEUL
|
||||
affichage (la bulle chat), pas de toast supplémentaire.
|
||||
|
||||
Exit code 0 si succès. Logs dans la console.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
# Configurer le logging avant tout import du package
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s [%(name)s] %(levelname)s: %(message)s',
|
||||
)
|
||||
logger = logging.getLogger("test_lea_pause_flow")
|
||||
|
||||
# Forcer le bus feedback (pour que les boutons puissent émettre vers
|
||||
# rpa-agent-chat — port 5004). Si on ne veut PAS du bus, mettre LEA_FEEDBACK_BUS=0.
|
||||
os.environ.setdefault("LEA_FEEDBACK_BUS", "1")
|
||||
os.environ.setdefault("RPA_API_TOKEN", "") # à remplir si serveur exige Bearer
|
||||
|
||||
|
||||
def main() -> int:
|
||||
try:
|
||||
from agent_v1.ui.chat_window import ChatWindow
|
||||
except ImportError as e:
|
||||
print(f"[TEST] Import ChatWindow KO : {e}")
|
||||
return 2
|
||||
|
||||
print("[TEST] Création ChatWindow...")
|
||||
cw = ChatWindow(
|
||||
server_client=None,
|
||||
on_start_callback=None,
|
||||
server_host=os.environ.get("RPA_SERVER_HOST", "192.168.1.40"),
|
||||
chat_port=5004,
|
||||
)
|
||||
# Attendre que le tk loop soit prêt
|
||||
time.sleep(2.0)
|
||||
cw.show()
|
||||
time.sleep(0.5)
|
||||
|
||||
print("[TEST] Simulation lea:paused avec long message (350 chars)...")
|
||||
long_msg = (
|
||||
"Je n'arrive pas à trouver le champ « Numéro de dossier patient » "
|
||||
"sur l'écran courant. J'ai essayé 3 stratégies de grounding visuel "
|
||||
"(template matching, OCR, VLM) sans succès. Pouvez-vous me montrer "
|
||||
"l'emplacement exact du champ, ou cliquer dessus à ma place ? "
|
||||
"Quand vous avez fini, cliquez sur Continuer pour que je reprenne."
|
||||
)
|
||||
payload = {
|
||||
"replay_id": "test_replay_pause_flow_001",
|
||||
"workflow": "Démo UHCD",
|
||||
"reason": long_msg,
|
||||
"completed": 5,
|
||||
"total": 12,
|
||||
}
|
||||
cw._add_paused_bubble(payload)
|
||||
print(f"[TEST] Bulle envoyée. Message len={len(long_msg)} chars.")
|
||||
print("[TEST] Vérifiez visuellement :")
|
||||
print(" 1) UN SEUL popup (la bulle chat dans la fenêtre Léa)")
|
||||
print(" 2) Le message long s'affiche en intégralité (scroll interne si besoin)")
|
||||
print(" 3) Boutons Continuer / Annuler visibles")
|
||||
print(" 4) Cliquez sur Annuler → bulle fermée + feedback '✗ Annulé'")
|
||||
print("[TEST] La fenêtre reste ouverte 30s. Ctrl+C pour quitter avant.")
|
||||
|
||||
try:
|
||||
for i in range(30):
|
||||
time.sleep(1.0)
|
||||
except KeyboardInterrupt:
|
||||
print("[TEST] Interruption clavier.")
|
||||
|
||||
print("[TEST] Test terminé. Vérifier visuellement les 4 points ci-dessus.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
87
agent_v0/agent_v1/tools/test_lea_toast.py
Normal file
87
agent_v0/agent_v1/tools/test_lea_toast.py
Normal file
@@ -0,0 +1,87 @@
|
||||
# agent_v1/tools/test_lea_toast.py
|
||||
"""
|
||||
Test visuel rapide du toast Léa (démo GHT 8 mai 2026).
|
||||
|
||||
Lance trois scénarios de toast successifs pour valider l'affichage Windows :
|
||||
1. Toast simple « pause supervisée »
|
||||
2. Toast avec message long (vérifier wraplength)
|
||||
3. Toast type BLOCAGE (= ce que voit l'utilisateur quand Léa est perdue)
|
||||
|
||||
Usage Windows :
|
||||
C:\\rpa_vision\\.venv\\Scripts\\python.exe C:\\rpa_vision\\agent_v1\\tools\\test_lea_toast.py
|
||||
|
||||
Le script s'attend à voir trois toasts successifs en haut-droite de l'écran
|
||||
principal, espacés de ~6 s, fond bleu Léa, autodismiss après 15 s ou clic.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _bootstrap_path() -> None:
|
||||
"""Autoriser l'exécution directe sans -m : ajouter C:\\rpa_vision au sys.path."""
|
||||
here = Path(__file__).resolve()
|
||||
# On remonte : tools -> agent_v1 -> rpa_vision (parent du package agent_v1)
|
||||
rpa_root = here.parent.parent.parent
|
||||
if str(rpa_root) not in sys.path:
|
||||
sys.path.insert(0, str(rpa_root))
|
||||
|
||||
|
||||
def main() -> int:
|
||||
_bootstrap_path()
|
||||
|
||||
# Import après ajout du path (les deux variantes fonctionnent)
|
||||
try:
|
||||
from agent_v1.ui.paused_toast import show_paused_toast
|
||||
except Exception as e: # pragma: no cover (debug only)
|
||||
print(f"[TEST] ERREUR import agent_v1.ui.paused_toast : {e}")
|
||||
return 1
|
||||
|
||||
scenarios = [
|
||||
(
|
||||
"Toast 1/3 : pause simple",
|
||||
"Léa a besoin de votre aide",
|
||||
"Test 1/3 — Pause supervisée. Cliquez sur 'Continuer' dans la chat.",
|
||||
),
|
||||
(
|
||||
"Toast 2/3 : message long",
|
||||
"Léa — j'attends votre validation",
|
||||
(
|
||||
"Test 2/3 — J'ai trouvé 11 dossiers correspondant à vos critères "
|
||||
"(UHCD, Forfait 1, PE2). Je vais traiter le dossier de M. DUPONT "
|
||||
"Jean en premier. Pouvez-vous valider que c'est le bon ordre "
|
||||
"avant que je continue ?"
|
||||
),
|
||||
),
|
||||
(
|
||||
"Toast 3/3 : blocage cible non trouvée",
|
||||
"Léa — je ne vois pas l'élément",
|
||||
(
|
||||
"Test 3/3 — Je n'arrive pas à trouver « Examens cliniques » à "
|
||||
"l'écran. Pouvez-vous me montrer où cliquer ?"
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
for label, title, message in scenarios:
|
||||
print(f"[TEST] {label}")
|
||||
ok = show_paused_toast(title=title, message=message)
|
||||
print(f" show_paused_toast() = {ok}")
|
||||
if not ok:
|
||||
print(f" ECHEC : {label}")
|
||||
# Espacer pour que Dom voit chaque toast distinctement
|
||||
# (rate limit interne = 3s pour message identique, mais ici les
|
||||
# messages diffèrent, le rate limit ne s'applique pas)
|
||||
time.sleep(6)
|
||||
|
||||
print("[TEST] Attente 12s supplémentaires pour laisser le dernier toast vivre...")
|
||||
time.sleep(12)
|
||||
print("[TEST] OK — fin du test. Si vous avez vu 3 toasts bleus en haut-droite,")
|
||||
print(" le mécanisme Léa pause est validé.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
53
agent_v0/agent_v1/ui/_test_paused_toast.py
Normal file
53
agent_v0/agent_v1/ui/_test_paused_toast.py
Normal file
@@ -0,0 +1,53 @@
|
||||
# agent_v1/ui/_test_paused_toast.py
|
||||
"""
|
||||
Test isolé du toast paused — à exécuter directement sur Windows.
|
||||
|
||||
Usage (sur Windows, depuis C:\\rpa_vision\\agent_v1) :
|
||||
python -m agent_v1.ui._test_paused_toast
|
||||
|
||||
OU plus simple :
|
||||
python C:\\rpa_vision\\agent_v1\\ui\\_test_paused_toast.py
|
||||
|
||||
Le toast doit s'afficher en haut à droite de l'écran principal pendant ~15s.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import time
|
||||
|
||||
|
||||
def main() -> int:
|
||||
print("[TEST] Lancement du toast paused...")
|
||||
|
||||
try:
|
||||
# Import flexible : essai relatif puis absolu
|
||||
try:
|
||||
from .paused_toast import show_paused_toast
|
||||
except ImportError:
|
||||
from paused_toast import show_paused_toast
|
||||
except Exception as e:
|
||||
print(f"[TEST] ERREUR import : {e}")
|
||||
return 1
|
||||
|
||||
ok = show_paused_toast(
|
||||
title="Léa a besoin de votre aide",
|
||||
message=(
|
||||
"Test isolé — démo GHT 8 mai 2026.\n"
|
||||
"Si vous voyez ce toast, le mécanisme de pause supervisée "
|
||||
"fonctionne correctement."
|
||||
),
|
||||
)
|
||||
print(f"[TEST] show_paused_toast() retour = {ok}")
|
||||
|
||||
if not ok:
|
||||
print("[TEST] ÉCHEC : toast non déclenché.")
|
||||
return 2
|
||||
|
||||
print("[TEST] Toast déclenché. Attente de 18s pour le voir s'afficher puis se fermer...")
|
||||
time.sleep(18)
|
||||
print("[TEST] OK — fin du test.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
418
agent_v0/agent_v1/ui/activity_panel.py
Normal file
418
agent_v0/agent_v1/ui/activity_panel.py
Normal file
@@ -0,0 +1,418 @@
|
||||
# agent_v1/ui/activity_panel.py
|
||||
"""
|
||||
Panel d'activité temps réel de Léa.
|
||||
|
||||
Affiche à l'utilisateur ce que Léa fait *maintenant* :
|
||||
- État courant (Observe / Cherche / Agit / Vérifie / Bloquée)
|
||||
- Action en cours (ex: "Clic sur Rechercher")
|
||||
- Progression (ex: "3/15")
|
||||
- Temps écoulé depuis le début du workflow
|
||||
|
||||
Contraintes :
|
||||
- Fallback silencieux si tkinter absent (ne crash jamais)
|
||||
- Thread-safe (mises à jour depuis les threads de replay)
|
||||
- Pas de dépendance à PyQt5 (seulement tkinter, déjà utilisé par chat_window)
|
||||
|
||||
Utilisation :
|
||||
panel = ActivityPanel()
|
||||
panel.definir_workflow("Saisie patient", nb_etapes=15)
|
||||
panel.mettre_a_jour(etat=EtatLea.AGIT, action="Clic sur Valider", etape=3)
|
||||
panel.masquer()
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EtatLea(Enum):
|
||||
"""États macroscopiques de Léa pendant un replay."""
|
||||
|
||||
INACTIVE = ("inactive", "Prête", "#808080") # Gris
|
||||
OBSERVE = ("observe", "Observe", "#4A90E2") # Bleu
|
||||
CHERCHE = ("cherche", "Cherche", "#F5A623") # Orange
|
||||
AGIT = ("agit", "Agit", "#7ED321") # Vert
|
||||
VERIFIE = ("verifie", "Vérifie", "#9013FE") # Violet
|
||||
BLOQUEE = ("bloquee", "Bloquée", "#D0021B") # Rouge
|
||||
TERMINE = ("termine", "Terminé", "#50E3C2") # Turquoise
|
||||
|
||||
def __init__(self, code: str, libelle: str, couleur: str) -> None:
|
||||
self.code = code
|
||||
self.libelle = libelle
|
||||
self.couleur = couleur
|
||||
|
||||
|
||||
@dataclass
|
||||
class EtatActivite:
|
||||
"""Instantané de l'activité courante de Léa.
|
||||
|
||||
Utilisé par le panel et exposé par `ActivityPanel.snapshot()` pour les
|
||||
tests (sans dépendre de tkinter).
|
||||
"""
|
||||
|
||||
etat: EtatLea = EtatLea.INACTIVE
|
||||
action_courante: str = ""
|
||||
nom_workflow: str = ""
|
||||
etape: int = 0
|
||||
nb_etapes: int = 0
|
||||
debut_timestamp: float = 0.0
|
||||
dernier_message: str = ""
|
||||
|
||||
def temps_ecoule_s(self) -> float:
|
||||
"""Temps écoulé depuis le début du workflow (secondes)."""
|
||||
if self.debut_timestamp <= 0:
|
||||
return 0.0
|
||||
return max(0.0, time.time() - self.debut_timestamp)
|
||||
|
||||
def progression_texte(self) -> str:
|
||||
"""Représentation textuelle de la progression (ex: '3/15')."""
|
||||
if self.nb_etapes <= 0:
|
||||
return ""
|
||||
return f"{self.etape}/{self.nb_etapes}"
|
||||
|
||||
def temps_ecoule_texte(self) -> str:
|
||||
"""Représentation humaine du temps écoulé (ex: '12s', '1m24s')."""
|
||||
s = int(self.temps_ecoule_s())
|
||||
if s < 60:
|
||||
return f"{s}s"
|
||||
return f"{s // 60}m{s % 60:02d}s"
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Sérialiser pour le logging et les tests."""
|
||||
return {
|
||||
"etat": self.etat.code,
|
||||
"etat_libelle": self.etat.libelle,
|
||||
"action_courante": self.action_courante,
|
||||
"nom_workflow": self.nom_workflow,
|
||||
"etape": self.etape,
|
||||
"nb_etapes": self.nb_etapes,
|
||||
"progression": self.progression_texte(),
|
||||
"temps_ecoule_s": round(self.temps_ecoule_s(), 1),
|
||||
"dernier_message": self.dernier_message,
|
||||
}
|
||||
|
||||
|
||||
class ActivityPanel:
|
||||
"""Panel d'activité de Léa.
|
||||
|
||||
Thread-safe. Le panel tkinter est créé à la demande (lazy) et uniquement
|
||||
si tkinter est disponible. Toutes les méthodes sont safe à appeler même
|
||||
si l'UI n'est pas dispo (fallback silencieux).
|
||||
"""
|
||||
|
||||
def __init__(self, activer_ui: bool = True) -> None:
|
||||
self._lock = threading.RLock()
|
||||
self._etat = EtatActivite()
|
||||
self._activer_ui = activer_ui
|
||||
# UI tkinter (créée à la demande dans le thread UI)
|
||||
self._tk_root = None
|
||||
self._tk_labels: dict = {}
|
||||
self._ui_disponible = None # Lazy : résolu au premier usage
|
||||
self._listeners = [] # Callbacks pour les changements d'état
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# API publique (thread-safe)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def definir_workflow(self, nom: str, nb_etapes: int = 0) -> None:
|
||||
"""Démarrer le suivi d'un nouveau workflow."""
|
||||
with self._lock:
|
||||
self._etat = EtatActivite(
|
||||
etat=EtatLea.OBSERVE,
|
||||
nom_workflow=nom,
|
||||
nb_etapes=nb_etapes,
|
||||
debut_timestamp=time.time(),
|
||||
)
|
||||
self._notifier_changement()
|
||||
self._rafraichir_ui()
|
||||
logger.info(f"[ACTIVITY] Workflow démarré : {nom} ({nb_etapes} étapes)")
|
||||
|
||||
def mettre_a_jour(
|
||||
self,
|
||||
etat: Optional[EtatLea] = None,
|
||||
action: Optional[str] = None,
|
||||
etape: Optional[int] = None,
|
||||
message: Optional[str] = None,
|
||||
) -> None:
|
||||
"""Mettre à jour l'état affiché.
|
||||
|
||||
Tous les paramètres sont optionnels — on ne met à jour que ce qui est
|
||||
fourni. Les autres champs conservent leur valeur actuelle.
|
||||
"""
|
||||
with self._lock:
|
||||
if etat is not None:
|
||||
self._etat.etat = etat
|
||||
if action is not None:
|
||||
self._etat.action_courante = action
|
||||
if etape is not None:
|
||||
self._etat.etape = etape
|
||||
if message is not None:
|
||||
self._etat.dernier_message = message
|
||||
|
||||
self._notifier_changement()
|
||||
self._rafraichir_ui()
|
||||
|
||||
def terminer(self, succes: bool = True) -> None:
|
||||
"""Marquer le workflow comme terminé."""
|
||||
with self._lock:
|
||||
self._etat.etat = EtatLea.TERMINE if succes else EtatLea.BLOQUEE
|
||||
if not succes:
|
||||
self._etat.dernier_message = (
|
||||
self._etat.dernier_message or "Léa a rendu la main"
|
||||
)
|
||||
self._notifier_changement()
|
||||
self._rafraichir_ui()
|
||||
|
||||
def reinitialiser(self) -> None:
|
||||
"""Remettre le panel en état inactif."""
|
||||
with self._lock:
|
||||
self._etat = EtatActivite()
|
||||
self._notifier_changement()
|
||||
self._rafraichir_ui()
|
||||
|
||||
def snapshot(self) -> EtatActivite:
|
||||
"""Obtenir un instantané immuable de l'état courant (pour les tests)."""
|
||||
with self._lock:
|
||||
return EtatActivite(
|
||||
etat=self._etat.etat,
|
||||
action_courante=self._etat.action_courante,
|
||||
nom_workflow=self._etat.nom_workflow,
|
||||
etape=self._etat.etape,
|
||||
nb_etapes=self._etat.nb_etapes,
|
||||
debut_timestamp=self._etat.debut_timestamp,
|
||||
dernier_message=self._etat.dernier_message,
|
||||
)
|
||||
|
||||
def masquer(self) -> None:
|
||||
"""Masquer le panel UI si affiché."""
|
||||
if self._tk_root is not None:
|
||||
try:
|
||||
self._tk_root.withdraw()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def afficher(self) -> None:
|
||||
"""Afficher le panel UI si disponible."""
|
||||
self._creer_ui_si_besoin()
|
||||
if self._tk_root is not None:
|
||||
try:
|
||||
self._tk_root.deiconify()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def on_change(self, callback) -> None:
|
||||
"""Enregistrer un listener appelé à chaque changement d'état."""
|
||||
with self._lock:
|
||||
self._listeners.append(callback)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Gestion UI tkinter (lazy, fallback silencieux)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _creer_ui_si_besoin(self) -> None:
|
||||
"""Créer la fenêtre tkinter au premier usage (lazy)."""
|
||||
if not self._activer_ui:
|
||||
return
|
||||
if self._tk_root is not None:
|
||||
return
|
||||
if self._ui_disponible is False:
|
||||
return # Déjà testé et indisponible
|
||||
|
||||
try:
|
||||
import tkinter as tk
|
||||
except Exception as e:
|
||||
logger.debug(f"[ACTIVITY] tkinter indisponible : {e}")
|
||||
self._ui_disponible = False
|
||||
return
|
||||
|
||||
try:
|
||||
self._tk_root = tk.Toplevel() if _tk_root_existe() else tk.Tk()
|
||||
self._tk_root.title("Léa — Activité")
|
||||
self._tk_root.geometry("340x180+40+40")
|
||||
self._tk_root.attributes("-topmost", True)
|
||||
self._tk_root.resizable(False, False)
|
||||
self._tk_root.configure(bg="#1E1E1E")
|
||||
|
||||
titre = tk.Label(
|
||||
self._tk_root,
|
||||
text="Léa",
|
||||
font=("Segoe UI", 14, "bold"),
|
||||
fg="#FFFFFF",
|
||||
bg="#1E1E1E",
|
||||
)
|
||||
titre.pack(pady=(10, 2))
|
||||
|
||||
self._tk_labels["etat"] = tk.Label(
|
||||
self._tk_root,
|
||||
text="Prête",
|
||||
font=("Segoe UI", 11),
|
||||
fg="#808080",
|
||||
bg="#1E1E1E",
|
||||
)
|
||||
self._tk_labels["etat"].pack()
|
||||
|
||||
self._tk_labels["action"] = tk.Label(
|
||||
self._tk_root,
|
||||
text="",
|
||||
font=("Segoe UI", 10),
|
||||
fg="#FFFFFF",
|
||||
bg="#1E1E1E",
|
||||
wraplength=300,
|
||||
)
|
||||
self._tk_labels["action"].pack(pady=(8, 2))
|
||||
|
||||
self._tk_labels["progression"] = tk.Label(
|
||||
self._tk_root,
|
||||
text="",
|
||||
font=("Segoe UI", 9),
|
||||
fg="#B0B0B0",
|
||||
bg="#1E1E1E",
|
||||
)
|
||||
self._tk_labels["progression"].pack()
|
||||
|
||||
self._tk_labels["temps"] = tk.Label(
|
||||
self._tk_root,
|
||||
text="",
|
||||
font=("Segoe UI", 9),
|
||||
fg="#808080",
|
||||
bg="#1E1E1E",
|
||||
)
|
||||
self._tk_labels["temps"].pack(pady=(4, 0))
|
||||
|
||||
self._tk_labels["message"] = tk.Label(
|
||||
self._tk_root,
|
||||
text="",
|
||||
font=("Segoe UI", 9, "italic"),
|
||||
fg="#B0B0B0",
|
||||
bg="#1E1E1E",
|
||||
wraplength=300,
|
||||
)
|
||||
self._tk_labels["message"].pack(pady=(6, 10))
|
||||
|
||||
# Masquer par défaut : on affiche seulement pendant un workflow
|
||||
self._tk_root.withdraw()
|
||||
self._ui_disponible = True
|
||||
except Exception as e:
|
||||
logger.debug(f"[ACTIVITY] Impossible de créer l'UI : {e}")
|
||||
self._ui_disponible = False
|
||||
self._tk_root = None
|
||||
|
||||
def _rafraichir_ui(self) -> None:
|
||||
"""Mettre à jour les labels tkinter (safe si l'UI n'existe pas)."""
|
||||
if not self._activer_ui or self._ui_disponible is False:
|
||||
return
|
||||
self._creer_ui_si_besoin()
|
||||
if self._tk_root is None:
|
||||
return
|
||||
|
||||
try:
|
||||
with self._lock:
|
||||
snap = self.snapshot()
|
||||
|
||||
# Utiliser after(0) pour rester dans le thread UI tkinter
|
||||
def _update():
|
||||
try:
|
||||
self._tk_labels["etat"].config(
|
||||
text=snap.etat.libelle,
|
||||
fg=snap.etat.couleur,
|
||||
)
|
||||
if snap.action_courante:
|
||||
self._tk_labels["action"].config(text=snap.action_courante)
|
||||
else:
|
||||
self._tk_labels["action"].config(text="")
|
||||
|
||||
prog = snap.progression_texte()
|
||||
if prog and snap.nom_workflow:
|
||||
self._tk_labels["progression"].config(
|
||||
text=f"« {snap.nom_workflow} » — {prog}"
|
||||
)
|
||||
elif snap.nom_workflow:
|
||||
self._tk_labels["progression"].config(
|
||||
text=f"« {snap.nom_workflow} »"
|
||||
)
|
||||
else:
|
||||
self._tk_labels["progression"].config(text="")
|
||||
|
||||
if snap.debut_timestamp > 0:
|
||||
self._tk_labels["temps"].config(
|
||||
text=f"⏱ {snap.temps_ecoule_texte()}"
|
||||
)
|
||||
else:
|
||||
self._tk_labels["temps"].config(text="")
|
||||
|
||||
self._tk_labels["message"].config(text=snap.dernier_message)
|
||||
|
||||
# Afficher automatiquement si actif
|
||||
if snap.etat != EtatLea.INACTIVE:
|
||||
self._tk_root.deiconify()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
self._tk_root.after(0, _update)
|
||||
except Exception:
|
||||
# Si le root a été détruit
|
||||
self._tk_root = None
|
||||
self._ui_disponible = False
|
||||
except Exception as e:
|
||||
logger.debug(f"[ACTIVITY] Erreur rafraîchissement UI : {e}")
|
||||
|
||||
def _notifier_changement(self) -> None:
|
||||
"""Notifier tous les listeners du changement d'état."""
|
||||
with self._lock:
|
||||
listeners = list(self._listeners)
|
||||
snap = self.snapshot()
|
||||
|
||||
for cb in listeners:
|
||||
try:
|
||||
cb(snap)
|
||||
except Exception as e:
|
||||
logger.debug(f"[ACTIVITY] Listener erreur : {e}")
|
||||
|
||||
|
||||
def _tk_root_existe() -> bool:
|
||||
"""Vérifier si un root tkinter existe déjà (pour créer un Toplevel)."""
|
||||
try:
|
||||
import tkinter as tk
|
||||
|
||||
default_root = getattr(tk, "_default_root", None)
|
||||
return default_root is not None
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Singleton global (optionnel)
|
||||
# ============================================================================
|
||||
|
||||
|
||||
_INSTANCE_GLOBALE: Optional[ActivityPanel] = None
|
||||
_LOCK_SINGLETON = threading.Lock()
|
||||
|
||||
|
||||
def get_activity_panel(activer_ui: bool = True) -> ActivityPanel:
|
||||
"""Obtenir l'instance globale du panel d'activité (lazy)."""
|
||||
global _INSTANCE_GLOBALE
|
||||
with _LOCK_SINGLETON:
|
||||
if _INSTANCE_GLOBALE is None:
|
||||
_INSTANCE_GLOBALE = ActivityPanel(activer_ui=activer_ui)
|
||||
return _INSTANCE_GLOBALE
|
||||
|
||||
|
||||
def reset_activity_panel() -> None:
|
||||
"""Réinitialiser le singleton (utile pour les tests)."""
|
||||
global _INSTANCE_GLOBALE
|
||||
with _LOCK_SINGLETON:
|
||||
if _INSTANCE_GLOBALE is not None:
|
||||
try:
|
||||
_INSTANCE_GLOBALE.masquer()
|
||||
except Exception:
|
||||
pass
|
||||
_INSTANCE_GLOBALE = None
|
||||
@@ -3,15 +3,25 @@ Mini serveur HTTP sur l'agent Windows pour les captures d'ecran a la demande
|
||||
et les operations fichiers.
|
||||
|
||||
Ecoute sur le port 5006 (configurable via RPA_CAPTURE_PORT).
|
||||
Bind par defaut sur 127.0.0.1 (configurable via RPA_CAPTURE_BIND).
|
||||
Endpoints :
|
||||
GET /capture -> screenshot frais en base64 (JPEG)
|
||||
GET /health -> {"status": "ok"}
|
||||
GET /health -> {"status": "ok"} (pas d'auth — sonde liveness)
|
||||
POST /file-action -> operations fichiers (list, create, move, copy, sort)
|
||||
|
||||
Securite :
|
||||
- Authentification Bearer obligatoire (RPA_API_TOKEN) pour /capture et
|
||||
/file-action. Sans token configure, ces endpoints sont desactives.
|
||||
- Les tentatives non authentifiees sont loguees (WARNING) avec l'IP source.
|
||||
- Bind defaut localhost. Pour exposer sur le LAN (cas VWB backend qui
|
||||
appelle l'agent a distance), definir explicitement
|
||||
RPA_CAPTURE_BIND=0.0.0.0. L'auth reste alors la seule protection.
|
||||
"""
|
||||
import threading
|
||||
import logging
|
||||
import json
|
||||
import base64
|
||||
import hmac
|
||||
import io
|
||||
import os
|
||||
import time
|
||||
@@ -20,6 +30,17 @@ from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
CAPTURE_PORT = int(os.environ.get("RPA_CAPTURE_PORT", "5006"))
|
||||
# Bind par defaut sur localhost — defense en profondeur.
|
||||
# Pour le deploiement VWB (backend Linux -> agent Windows), definir
|
||||
# RPA_CAPTURE_BIND=0.0.0.0 explicitement. L'auth par token reste requise.
|
||||
CAPTURE_BIND = os.environ.get("RPA_CAPTURE_BIND", "127.0.0.1")
|
||||
|
||||
# Token d'authentification (partage avec le streaming). Doit etre defini pour
|
||||
# que /capture et /file-action soient accessibles.
|
||||
CAPTURE_TOKEN = os.environ.get("RPA_API_TOKEN", "")
|
||||
|
||||
# Endpoints ouverts (pas d'auth requise — sondes techniques uniquement)
|
||||
_PUBLIC_PATHS = {"/health"}
|
||||
|
||||
# Floutage des données sensibles (conformité AI Act)
|
||||
BLUR_SENSITIVE = os.environ.get("RPA_BLUR_SENSITIVE", "true").lower() in ("true", "1", "yes")
|
||||
@@ -33,6 +54,8 @@ class CaptureHandler(BaseHTTPRequestHandler):
|
||||
|
||||
def do_GET(self):
|
||||
if self.path == "/capture":
|
||||
if not self._check_auth():
|
||||
return
|
||||
self._handle_capture()
|
||||
elif self.path == "/health":
|
||||
self._send_json(200, {"status": "ok"})
|
||||
@@ -41,10 +64,56 @@ class CaptureHandler(BaseHTTPRequestHandler):
|
||||
|
||||
def do_POST(self):
|
||||
if self.path == "/file-action":
|
||||
if not self._check_auth():
|
||||
return
|
||||
self._handle_file_action()
|
||||
else:
|
||||
self._send_json(404, {"error": "not found"})
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _check_auth(self) -> bool:
|
||||
"""Valide le Bearer token. Renvoie 401/503 si invalide.
|
||||
|
||||
- Si aucun token n'est configure cote serveur (RPA_API_TOKEN vide),
|
||||
on refuse toutes les requetes sensibles (503) — fail-closed.
|
||||
- Sinon, on compare en temps constant via hmac.compare_digest.
|
||||
- Les tentatives echouees sont loguees avec l'IP source.
|
||||
"""
|
||||
# Autoriser les endpoints publics
|
||||
if self.path in _PUBLIC_PATHS:
|
||||
return True
|
||||
|
||||
peer = self.client_address[0] if self.client_address else "?"
|
||||
|
||||
if not CAPTURE_TOKEN:
|
||||
logger.error(
|
||||
"Refus %s depuis %s : RPA_API_TOKEN non configure "
|
||||
"(capture server en mode fail-closed)",
|
||||
self.path, peer,
|
||||
)
|
||||
self._send_json(503, {
|
||||
"error": "capture server non configure (token manquant)",
|
||||
})
|
||||
return False
|
||||
|
||||
auth_header = self.headers.get("Authorization", "")
|
||||
token = ""
|
||||
if auth_header.startswith("Bearer "):
|
||||
token = auth_header[len("Bearer "):].strip()
|
||||
|
||||
if not token or not hmac.compare_digest(token, CAPTURE_TOKEN):
|
||||
logger.warning(
|
||||
"Tentative d'acces non autorisee a %s depuis %s "
|
||||
"(token %s)",
|
||||
self.path, peer,
|
||||
"absent" if not token else "invalide",
|
||||
)
|
||||
self._send_json(401, {"error": "unauthorized"})
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def do_OPTIONS(self):
|
||||
"""Gestion CORS preflight."""
|
||||
self.send_response(200)
|
||||
@@ -89,14 +158,25 @@ class CaptureHandler(BaseHTTPRequestHandler):
|
||||
"""Capture l'ecran principal et le renvoie en base64 JPEG."""
|
||||
t0 = time.perf_counter()
|
||||
try:
|
||||
import mss
|
||||
from PIL import Image
|
||||
from ..vision.capturer import (
|
||||
capture_foreground_window_image,
|
||||
capture_screen_image,
|
||||
)
|
||||
|
||||
with mss.mss() as sct:
|
||||
monitor = sct.monitors[1] # ecran principal
|
||||
raw = sct.grab(monitor)
|
||||
|
||||
img = Image.frombytes("RGB", raw.size, raw.bgra, "raw", "BGRX")
|
||||
_monitor, img, meta = capture_screen_image()
|
||||
if img is None:
|
||||
img, win_meta = capture_foreground_window_image()
|
||||
meta.update(win_meta)
|
||||
if img is None:
|
||||
elapsed_ms = (time.perf_counter() - t0) * 1000
|
||||
logger.error("Erreur capture : aucun backend exploitable (%s)", meta)
|
||||
self._send_json(503, {
|
||||
"error": "capture_unavailable",
|
||||
"source": meta.get("backend", "unknown"),
|
||||
"capture_ms": round(elapsed_ms),
|
||||
"diagnostics": meta,
|
||||
})
|
||||
return
|
||||
|
||||
# Floutage des données sensibles (conformité AI Act)
|
||||
if BLUR_SENSITIVE:
|
||||
@@ -111,15 +191,22 @@ class CaptureHandler(BaseHTTPRequestHandler):
|
||||
img_b64 = base64.b64encode(buf.getvalue()).decode()
|
||||
|
||||
elapsed_ms = (time.perf_counter() - t0) * 1000
|
||||
logger.info(f"Capture {img.width}x{img.height} en {elapsed_ms:.0f}ms")
|
||||
logger.info(
|
||||
"Capture %sx%s via %s en %.0fms",
|
||||
img.width,
|
||||
img.height,
|
||||
meta.get("backend", "unknown"),
|
||||
elapsed_ms,
|
||||
)
|
||||
|
||||
self._send_json(200, {
|
||||
"image": img_b64,
|
||||
"width": img.width,
|
||||
"height": img.height,
|
||||
"format": "jpeg",
|
||||
"source": "windows_live",
|
||||
"source": meta.get("backend", "windows_live"),
|
||||
"capture_ms": round(elapsed_ms),
|
||||
"diagnostics": meta,
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
@@ -351,21 +438,46 @@ class _FileActionHandlerLocal:
|
||||
class CaptureServer:
|
||||
"""Serveur de capture d'ecran en temps reel (thread daemon)."""
|
||||
|
||||
def __init__(self, port: int = CAPTURE_PORT):
|
||||
def __init__(self, port: int = CAPTURE_PORT, bind: str = CAPTURE_BIND):
|
||||
self._port = port
|
||||
self._bind = bind
|
||||
self._server: HTTPServer | None = None
|
||||
self._thread: threading.Thread | None = None
|
||||
|
||||
def start(self):
|
||||
"""Demarre le serveur dans un thread daemon."""
|
||||
"""Demarre le serveur dans un thread daemon.
|
||||
|
||||
Avertit si le serveur est expose sur le LAN sans token configure.
|
||||
"""
|
||||
# Defense en profondeur : refus de demarrer si expose LAN sans auth
|
||||
exposed_lan = self._bind not in ("127.0.0.1", "localhost", "::1")
|
||||
if exposed_lan and not CAPTURE_TOKEN:
|
||||
logger.error(
|
||||
"REFUS demarrage capture server : bind=%s (LAN) sans "
|
||||
"RPA_API_TOKEN. Definir le token ou RPA_CAPTURE_BIND=127.0.0.1.",
|
||||
self._bind,
|
||||
)
|
||||
print(
|
||||
f"[CAPTURE] REFUS demarrage : bind={self._bind} sans token. "
|
||||
f"Definir RPA_API_TOKEN ou RPA_CAPTURE_BIND=127.0.0.1."
|
||||
)
|
||||
return
|
||||
|
||||
try:
|
||||
self._server = HTTPServer(("0.0.0.0", self._port), CaptureHandler)
|
||||
self._server = HTTPServer((self._bind, self._port), CaptureHandler)
|
||||
self._thread = threading.Thread(
|
||||
target=self._server.serve_forever, daemon=True
|
||||
)
|
||||
self._thread.start()
|
||||
logger.info(f"Capture server demarre sur le port {self._port}")
|
||||
print(f"[CAPTURE] Serveur de capture demarre sur le port {self._port}")
|
||||
auth_mode = "token requis" if CAPTURE_TOKEN else "token absent (fail-closed)"
|
||||
logger.info(
|
||||
"Capture server demarre sur %s:%s (%s)",
|
||||
self._bind, self._port, auth_mode,
|
||||
)
|
||||
print(
|
||||
f"[CAPTURE] Serveur de capture demarre sur "
|
||||
f"{self._bind}:{self._port} ({auth_mode})"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Impossible de demarrer le capture server : {e}")
|
||||
print(f"[CAPTURE] ERREUR demarrage : {e}")
|
||||
|
||||
@@ -5,17 +5,32 @@ Fenetre de chat Lea integree au systray — version tkinter native.
|
||||
Remplace l'approche Edge browser par une vraie fenetre tkinter integree.
|
||||
Design professionnel, theme clair, ancree en bas a droite de l'ecran.
|
||||
Tourne dans son propre thread daemon pour ne pas bloquer pystray.
|
||||
|
||||
Le runtime Python embedded Windows ne contient pas toujours Tcl/Tk. Dans ce
|
||||
cas, le menu "Discuter avec Lea" ouvre le chat DGX dans le navigateur.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import math
|
||||
import threading
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
from typing import Any, Callable, Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# FeedbackBus : import fail-safe (le ChatWindow doit tourner même si python-socketio
|
||||
# n'est pas installé sur le poste client, par exemple ancienne installation Pauline)
|
||||
try:
|
||||
from ..network.feedback_bus import FeedbackBusClient
|
||||
_HAS_FEEDBACK_BUS = True
|
||||
except Exception:
|
||||
FeedbackBusClient = None # type: ignore
|
||||
_HAS_FEEDBACK_BUS = False
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Theme — palette professionnelle claire
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -42,6 +57,25 @@ SCROLLBAR_BG = "#E5E7EB" # Fond scrollbar
|
||||
SCROLLBAR_FG = "#9CA3AF" # Curseur scrollbar
|
||||
MSG_BORDER_COLOR = "#D1D5DB" # Bordure subtile des bulles de messages
|
||||
|
||||
# Bulle paused_need_help (J3.5) — alerte non bloquante, asset démo majeur
|
||||
PAUSED_BG = "#FEF3C7" # Jaune pâle
|
||||
PAUSED_BORDER = "#F59E0B" # Orange ambré
|
||||
PAUSED_FG = "#92400E" # Brun foncé (lisible sur fond jaune)
|
||||
PAUSED_BTN_RESUME_BG = "#22C55E" # Vert
|
||||
PAUSED_BTN_RESUME_HOVER = "#16A34A"
|
||||
PAUSED_BTN_ABORT_BG = "#9CA3AF" # Gris neutre (pas dramatique)
|
||||
PAUSED_BTN_ABORT_HOVER = "#6B7280"
|
||||
|
||||
# Bulle "Léa exécute" (J3.4) — distincte des bulles chat normales
|
||||
ACTION_BG = "#F1F5F9" # Gris très clair (différencie d'une réponse chat)
|
||||
ACTION_BORDER = "#CBD5E1" # Gris pâle
|
||||
ACTION_FG = "#1E293B" # Gris foncé
|
||||
ACTION_META_FG = "#94A3B8" # Métadonnées en gris discret
|
||||
ACTION_ICON_RUN = "#3B82F6" # Bleu (en cours)
|
||||
ACTION_ICON_OK = "#22C55E" # Vert (succès)
|
||||
ACTION_ICON_ERR = "#EF4444" # Rouge (échec)
|
||||
ACTION_ICON_INFO = "#64748B" # Gris (neutre)
|
||||
|
||||
# Dimensions — confortables
|
||||
WIN_WIDTH = 600
|
||||
WIN_HEIGHT = 800
|
||||
@@ -62,6 +96,80 @@ FONT_SEND_BTN = ("Segoe UI", 13)
|
||||
FONT_RESIZE_GRIP = ("Segoe UI", 10)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Templates de bulles "Léa exécute" (J3.4)
|
||||
# Chaque template prend un payload et retourne (icon, icon_color, title).
|
||||
# Les libellés sont volontairement neutres : le contexte métier vient du
|
||||
# payload (workflow, action, message), pas de hardcoding.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _tpl_action_started(payload: Dict[str, Any]) -> tuple:
|
||||
wf = payload.get("workflow") or "?"
|
||||
return ("▶", ACTION_ICON_RUN, f"Démarrage : {wf}")
|
||||
|
||||
|
||||
def _tpl_action_progress(payload: Dict[str, Any]) -> tuple:
|
||||
cur = payload.get("current", "?")
|
||||
tot = payload.get("total", "?")
|
||||
step = payload.get("step")
|
||||
title = step if step else f"Étape {cur}/{tot}"
|
||||
return ("⋯", ACTION_ICON_RUN, str(title))
|
||||
|
||||
|
||||
def _tpl_done(payload: Dict[str, Any]) -> tuple:
|
||||
success = bool(payload.get("success", True))
|
||||
msg = payload.get("message") or ("Terminé" if success else "Échec")
|
||||
if success:
|
||||
return ("✓", ACTION_ICON_OK, str(msg))
|
||||
return ("✗", ACTION_ICON_ERR, str(msg))
|
||||
|
||||
|
||||
def _tpl_need_confirm(payload: Dict[str, Any]) -> tuple:
|
||||
action = payload.get("action") or {}
|
||||
desc = action.get("description") if isinstance(action, dict) else None
|
||||
title = desc or "J'attends ton accord avant de continuer"
|
||||
return ("?", ACTION_ICON_RUN, str(title))
|
||||
|
||||
|
||||
def _tpl_step_result(payload: Dict[str, Any]) -> tuple:
|
||||
status = (payload.get("status") or "").lower()
|
||||
msg = payload.get("message") or status or "Étape terminée"
|
||||
if status in ("ok", "success", "approved"):
|
||||
return ("✓", ACTION_ICON_OK, str(msg))
|
||||
if status in ("error", "failed"):
|
||||
return ("✗", ACTION_ICON_ERR, str(msg))
|
||||
return ("·", ACTION_ICON_INFO, str(msg))
|
||||
|
||||
|
||||
def _tpl_resumed(payload: Dict[str, Any]) -> tuple:
|
||||
return ("→", ACTION_ICON_OK, "Reprise")
|
||||
|
||||
|
||||
_ACTION_TEMPLATES = {
|
||||
"lea:action_started": _tpl_action_started,
|
||||
"lea:action_progress": _tpl_action_progress,
|
||||
"lea:done": _tpl_done,
|
||||
"lea:need_confirm": _tpl_need_confirm,
|
||||
"lea:step_result": _tpl_step_result,
|
||||
"lea:resumed": _tpl_resumed,
|
||||
}
|
||||
|
||||
|
||||
def _extract_meta(payload: Dict[str, Any]) -> str:
|
||||
"""Métadonnées techniques en pied de bulle (workflow, étape, replay_id court)."""
|
||||
parts = []
|
||||
wf = payload.get("workflow")
|
||||
if wf:
|
||||
parts.append(str(wf))
|
||||
cur, tot = payload.get("current"), payload.get("total")
|
||||
if cur is not None and tot is not None:
|
||||
parts.append(f"étape {cur}/{tot}")
|
||||
rid = payload.get("replay_id")
|
||||
if rid:
|
||||
parts.append(f"#{str(rid)[-6:]}")
|
||||
return " • ".join(parts)
|
||||
|
||||
|
||||
class ChatWindow:
|
||||
"""Fenetre de chat Lea en tkinter natif.
|
||||
|
||||
@@ -91,6 +199,8 @@ class ChatWindow:
|
||||
self._root = None
|
||||
self._ready = threading.Event()
|
||||
self._messages = [] # historique local
|
||||
self._bus: Optional[Any] = None # FeedbackBusClient (J3.3, peut rester None)
|
||||
self._active_paused_bubble: Optional[Dict[str, Any]] = None # bulle paused active (J3.5)
|
||||
|
||||
# S'abonner aux changements de l'etat partage
|
||||
if self._shared_state is not None:
|
||||
@@ -114,7 +224,10 @@ class ChatWindow:
|
||||
|
||||
def toggle(self) -> None:
|
||||
"""Afficher/masquer la fenetre de chat."""
|
||||
if self._destroyed or self._root is None:
|
||||
if self._destroyed:
|
||||
return
|
||||
if self._root is None:
|
||||
self._open_browser_fallback()
|
||||
return
|
||||
if self._visible:
|
||||
self.hide()
|
||||
@@ -123,7 +236,10 @@ class ChatWindow:
|
||||
|
||||
def show(self) -> None:
|
||||
"""Afficher la fenetre."""
|
||||
if self._destroyed or self._root is None:
|
||||
if self._destroyed:
|
||||
return
|
||||
if self._root is None:
|
||||
self._open_browser_fallback()
|
||||
return
|
||||
self._root.after(0, self._do_show)
|
||||
|
||||
@@ -152,6 +268,79 @@ class ChatWindow:
|
||||
"""Mettre a jour le client serveur (appele si cree apres la fenetre)."""
|
||||
self._server_client = server_client
|
||||
|
||||
def _chat_url(self) -> str:
|
||||
"""Retourne l'URL web du chat, derivee de la config serveur."""
|
||||
configured_url = self._chat_url_from_server_url(self._configured_server_url())
|
||||
if self._server_client is not None:
|
||||
chat_base = getattr(self._server_client, "_chat_base", None)
|
||||
if chat_base:
|
||||
chat_base = str(chat_base).rstrip("/")
|
||||
if not self._is_local_url(chat_base):
|
||||
return chat_base
|
||||
if configured_url:
|
||||
return configured_url
|
||||
|
||||
if configured_url:
|
||||
return configured_url
|
||||
|
||||
host = (self._server_host or "localhost").strip()
|
||||
if host.startswith(("http://", "https://")):
|
||||
parsed = urlparse(host)
|
||||
scheme = parsed.scheme or "http"
|
||||
hostname = parsed.hostname or "localhost"
|
||||
return f"{scheme}://{hostname}:{self._chat_port}"
|
||||
|
||||
return f"http://{host}:{self._chat_port}"
|
||||
|
||||
@staticmethod
|
||||
def _is_local_url(url: str) -> bool:
|
||||
try:
|
||||
host = urlparse(url).hostname
|
||||
except Exception:
|
||||
return False
|
||||
return host in {"localhost", "127.0.0.1", "::1"}
|
||||
|
||||
def _chat_url_from_server_url(self, server_url: Optional[str]) -> Optional[str]:
|
||||
if not server_url:
|
||||
return None
|
||||
try:
|
||||
parsed = urlparse(server_url.strip())
|
||||
except Exception:
|
||||
return None
|
||||
if not parsed.hostname or parsed.hostname in {"localhost", "127.0.0.1", "::1"}:
|
||||
return None
|
||||
scheme = parsed.scheme or "http"
|
||||
return f"{scheme}://{parsed.hostname}:{self._chat_port}"
|
||||
|
||||
def _configured_server_url(self) -> Optional[str]:
|
||||
env_url = os.environ.get("RPA_SERVER_URL", "").strip()
|
||||
if env_url:
|
||||
return env_url
|
||||
|
||||
try:
|
||||
# Installed layout: <app>/agent_v1/ui/chat_window.py.
|
||||
for parent in Path(__file__).resolve().parents:
|
||||
cfg = parent / "config.txt"
|
||||
if cfg.exists():
|
||||
for line in cfg.read_text(encoding="utf-8", errors="ignore").splitlines():
|
||||
if line.startswith("RPA_SERVER_URL="):
|
||||
return line.split("=", 1)[1].strip()
|
||||
except Exception:
|
||||
logger.debug("Lecture config.txt pour chat_url impossible", exc_info=True)
|
||||
return None
|
||||
|
||||
def _open_browser_fallback(self) -> None:
|
||||
"""Fallback POC quand tkinter est absent du Python embedded."""
|
||||
url = self._chat_url()
|
||||
try:
|
||||
import webbrowser
|
||||
if webbrowser.open(url, new=1):
|
||||
logger.info("ChatWindow indisponible, chat ouvert dans le navigateur: %s", url)
|
||||
else:
|
||||
logger.warning("ChatWindow indisponible, ouverture navigateur refusee: %s", url)
|
||||
except Exception as exc:
|
||||
logger.error("Impossible d'ouvrir le chat dans le navigateur (%s): %s", url, exc)
|
||||
|
||||
def _on_shared_state_change(self, state) -> None:
|
||||
"""Callback appele quand l'etat partage change (depuis le systray ou ailleurs).
|
||||
|
||||
@@ -266,6 +455,9 @@ class ChatWindow:
|
||||
# Signaler que la fenetre est prete
|
||||
self._ready.set()
|
||||
|
||||
# Demarrer le bus feedback Lea (events 'lea:*' temps reel)
|
||||
self._start_feedback_bus()
|
||||
|
||||
# Boucle tkinter
|
||||
root.mainloop()
|
||||
|
||||
@@ -608,6 +800,12 @@ class ChatWindow:
|
||||
|
||||
def _do_destroy(self) -> None:
|
||||
"""Detruit la fenetre (appele dans le thread tkinter)."""
|
||||
if self._bus is not None:
|
||||
try:
|
||||
self._bus.stop()
|
||||
except Exception:
|
||||
pass
|
||||
self._bus = None
|
||||
if self._root is not None:
|
||||
try:
|
||||
self._root.quit()
|
||||
@@ -617,6 +815,608 @@ class ChatWindow:
|
||||
self._root = None
|
||||
self._visible = False
|
||||
|
||||
# ======================================================================
|
||||
# FeedbackBus — bulles temps reel pendant l'execution (J3.3)
|
||||
# ======================================================================
|
||||
|
||||
def _start_feedback_bus(self) -> None:
|
||||
"""Demarrer la connexion au bus 'lea:*' si flag actif et lib disponible."""
|
||||
if not _HAS_FEEDBACK_BUS:
|
||||
logger.debug("FeedbackBus non disponible (python-socketio manquant)")
|
||||
return
|
||||
flag = os.environ.get("LEA_FEEDBACK_BUS", "0").lower()
|
||||
if flag not in ("1", "true", "yes", "on"):
|
||||
return
|
||||
try:
|
||||
url = f"http://{self._server_host}:{self._chat_port}"
|
||||
token = os.environ.get("RPA_API_TOKEN", "") or None
|
||||
self._bus = FeedbackBusClient(url, token=token, on_event=self._on_lea_event)
|
||||
self._bus.start()
|
||||
logger.info("FeedbackBus demarre : %s", url)
|
||||
except Exception:
|
||||
logger.debug("FeedbackBus init silenced", exc_info=True)
|
||||
self._bus = None
|
||||
|
||||
def _on_lea_event(self, event: str, payload: Dict[str, Any]) -> None:
|
||||
"""Callback bus → bulle Lea. Thread-safe : helpers utilisent root.after."""
|
||||
payload = payload or {}
|
||||
|
||||
# J3.5 : la pause supervisée a sa propre bulle interactive
|
||||
if event == "lea:paused":
|
||||
self._add_paused_bubble(payload)
|
||||
return
|
||||
if event in ("lea:resumed", "lea:done"):
|
||||
self._close_active_paused_bubble(reason=event)
|
||||
# on continue pour afficher la bulle d'action (cf. dispatch ci-dessous)
|
||||
|
||||
# Acks bus (resume_acked, abort_acked) : silencieux côté UI
|
||||
if event in ("lea:resume_acked", "lea:abort_acked"):
|
||||
return
|
||||
|
||||
# J3.4 : bulle "Léa exécute" stylisée (séparée des bulles chat normales)
|
||||
rendered = _ACTION_TEMPLATES.get(event)
|
||||
if rendered is None:
|
||||
# Event inconnu : on affiche en bulle d'action neutre
|
||||
self._add_action_bubble(
|
||||
icon="·", icon_color=ACTION_ICON_INFO,
|
||||
title=event.removeprefix("lea:"),
|
||||
meta=_extract_meta(payload),
|
||||
)
|
||||
return
|
||||
icon, icon_color, title = rendered(payload)
|
||||
self._add_action_bubble(
|
||||
icon=icon, icon_color=icon_color, title=title,
|
||||
meta=_extract_meta(payload),
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Bulle "Léa exécute" stylisée (J3.4)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _add_action_bubble(
|
||||
self, icon: str, icon_color: str, title: str, meta: str = "",
|
||||
) -> None:
|
||||
if self._root is None:
|
||||
return
|
||||
self._root.after(0, lambda: self._render_action_bubble(icon, icon_color, title, meta))
|
||||
|
||||
def _render_action_bubble(
|
||||
self, icon: str, icon_color: str, title: str, meta: str,
|
||||
) -> None:
|
||||
tk = self._tk
|
||||
if getattr(self, "_msg_frame", None) is None:
|
||||
return
|
||||
now = datetime.now().strftime("%H:%M")
|
||||
|
||||
container = tk.Frame(self._msg_frame, bg=BG_COLOR)
|
||||
container.pack(fill=tk.X, padx=MARGIN, pady=3)
|
||||
|
||||
inner = tk.Frame(
|
||||
container, bg=ACTION_BG, padx=10, pady=6,
|
||||
highlightbackground=ACTION_BORDER, highlightthickness=1,
|
||||
)
|
||||
inner.pack(anchor=tk.W, padx=(0, 70), fill=tk.X)
|
||||
|
||||
row = tk.Frame(inner, bg=ACTION_BG)
|
||||
row.pack(fill=tk.X, anchor=tk.W)
|
||||
|
||||
tk.Label(
|
||||
row, text=icon, bg=ACTION_BG, fg=icon_color,
|
||||
font=("Segoe UI", 13, "bold"), padx=4,
|
||||
).pack(side=tk.LEFT)
|
||||
|
||||
tk.Label(
|
||||
row, text=title, bg=ACTION_BG, fg=ACTION_FG,
|
||||
font=FONT_MSG, anchor="w", justify=tk.LEFT,
|
||||
wraplength=MSG_WRAP_WIDTH - 60,
|
||||
).pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(2, 0))
|
||||
|
||||
if meta:
|
||||
tk.Label(
|
||||
inner, text=f"{meta} • {now}",
|
||||
bg=ACTION_BG, fg=ACTION_META_FG,
|
||||
font=FONT_TIMESTAMP, anchor="w",
|
||||
).pack(fill=tk.X, anchor=tk.W, pady=(2, 0))
|
||||
|
||||
# UX fix 8 mai 2026 : auto-scroll après chaque bulle d'action
|
||||
self._scroll_to_bottom()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Bulle paused_need_help interactive (J3.5)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _add_paused_bubble(self, payload: Dict[str, Any]) -> None:
|
||||
"""Ajouter une bulle paused interactive (asset démo : Léa demande de l'aide).
|
||||
|
||||
IMPORTANT (8 mai 2026, démo GHT) : par défaut la fenêtre démarre cachée
|
||||
(`root.withdraw()`). Il FAUT la rendre visible et la forcer au premier
|
||||
plan, sinon Dom ne voit jamais la bulle. On exécute dans le thread
|
||||
tkinter via `root.after(0, ...)`.
|
||||
|
||||
UX fix 8 mai 2026 : un seul affichage (la bulle chat). Plus de toast
|
||||
en double — on force juste la chat window au premier plan.
|
||||
"""
|
||||
if self._root is None:
|
||||
return
|
||||
|
||||
def _show_and_render():
|
||||
try:
|
||||
self._do_show()
|
||||
# Re-pin topmost pour passer devant les apps actives
|
||||
self._root.attributes("-topmost", True)
|
||||
self._root.lift()
|
||||
# Bell sonore léger pour attirer l'attention (Tkinter natif)
|
||||
try:
|
||||
self._root.bell()
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
logger.debug("force-show chat_window silenced", exc_info=True)
|
||||
try:
|
||||
# UX fix mai 2026 : repartir d'un chat vide pour focaliser
|
||||
# l'attention sur la question (clear visuel uniquement,
|
||||
# self._messages reste intact pour la traçabilité debug).
|
||||
self._clear_chat_history()
|
||||
self._render_paused_bubble(payload)
|
||||
except Exception:
|
||||
logger.exception("render paused bubble failed; using fallback")
|
||||
try:
|
||||
self._clear_chat_history()
|
||||
self._render_paused_fallback_bubble(payload)
|
||||
except Exception:
|
||||
logger.debug("render paused fallback silenced", exc_info=True)
|
||||
|
||||
self._root.after(0, _show_and_render)
|
||||
|
||||
def _clear_chat_history(self) -> None:
|
||||
"""Vide la zone d'affichage du chat (widgets enfants de _msg_frame).
|
||||
|
||||
Détruit uniquement les widgets visuels — self._messages (liste mémoire)
|
||||
reste intact pour la traçabilité debug. Cohérent avec _do_remove_typing
|
||||
qui détruit aussi le widget sans toucher à l'historique.
|
||||
"""
|
||||
if getattr(self, "_msg_frame", None) is None:
|
||||
return
|
||||
try:
|
||||
for child in list(self._msg_frame.winfo_children()):
|
||||
child.destroy()
|
||||
self._active_paused_bubble = None
|
||||
if hasattr(self, "_typing_frame"):
|
||||
self._typing_frame = None
|
||||
self._scroll_to_bottom()
|
||||
except Exception:
|
||||
logger.debug("clear chat history silenced", exc_info=True)
|
||||
|
||||
@staticmethod
|
||||
def _compute_paused_bubble_height(
|
||||
reason_str: str,
|
||||
chars_per_line: int = 52,
|
||||
max_rows: int = 14,
|
||||
) -> tuple:
|
||||
"""Calcule la hauteur du Text (en lignes) + si une scrollbar est
|
||||
nécessaire pour le message d'une bulle paused.
|
||||
|
||||
Patch 22 mai 2026 — fix troncature : on prend en compte les \\n
|
||||
explicites (les `reason` serveur peuvent lister plusieurs
|
||||
candidats avec un saut de ligne par item) en plus de la longueur
|
||||
en caractères, et on active la scrollbar dès que le cap est
|
||||
atteint pour éviter que du contenu disparaisse silencieusement.
|
||||
|
||||
Retourne ``(height_lines, needs_scrollbar)``.
|
||||
"""
|
||||
if not reason_str:
|
||||
return 2, False
|
||||
text = str(reason_str)
|
||||
chars_per_line = max(24, int(chars_per_line or 52))
|
||||
estimated = 0
|
||||
for raw_line in text.splitlines() or [""]:
|
||||
estimated += max(1, math.ceil(len(raw_line) / chars_per_line))
|
||||
cap = max(2, int(max_rows or 14))
|
||||
height = max(2, min(cap, estimated))
|
||||
# Scrollbar dès que le cap est atteint OU contenu long (filet
|
||||
# textuel : ≥ 200 chars implique souvent un débordement visuel
|
||||
# même quand les lignes brutes sont peu nombreuses).
|
||||
needs_scroll = (estimated >= cap) or (len(text) > 200)
|
||||
return height, needs_scroll
|
||||
|
||||
def _paused_text_layout(self) -> tuple:
|
||||
"""Retourne ``(wrap_px, chars_per_line, max_rows)`` pour la bulle pause.
|
||||
|
||||
La fenêtre Léa est souvent redimensionnée à ~380px de large sur le
|
||||
poste Windows. Les anciennes estimations fixes calculaient trop peu
|
||||
de lignes et tronquaient le message. On part donc des dimensions
|
||||
réelles du canvas et de la métrique de la police Tk.
|
||||
"""
|
||||
canvas_w = 0
|
||||
canvas_h = 0
|
||||
try:
|
||||
canvas_w = int(self._canvas.winfo_width()) if self._canvas is not None else 0
|
||||
canvas_h = int(self._canvas.winfo_height()) if self._canvas is not None else 0
|
||||
except Exception:
|
||||
canvas_w = canvas_h = 0
|
||||
|
||||
# Marges: container + padding inner + petite marge droite. La bulle
|
||||
# de pause est une alerte critique, elle utilise donc presque toute
|
||||
# la largeur disponible sur les fenêtres étroites.
|
||||
wrap_px = max(220, canvas_w - (2 * MARGIN) - 52) if canvas_w else 360
|
||||
|
||||
avg_char = 8
|
||||
line_px = 22
|
||||
try:
|
||||
from tkinter import font as tkfont
|
||||
font = tkfont.Font(font=FONT_MSG)
|
||||
avg_char = max(6, font.measure("n"))
|
||||
line_px = max(18, font.metrics("linespace"))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
chars_per_line = max(24, int(wrap_px / avg_char))
|
||||
# Réserver titre, metadata, boutons, feedback et padding. Même sur
|
||||
# une petite fenêtre, on garde assez de lignes pour ne pas couper un
|
||||
# message d'erreur standard.
|
||||
max_rows = 14
|
||||
if canvas_h:
|
||||
max_rows = max(5, min(18, int((canvas_h - 145) / line_px)))
|
||||
return wrap_px, chars_per_line, max_rows
|
||||
|
||||
def _render_paused_bubble(self, payload: Dict[str, Any]) -> None:
|
||||
tk = self._tk
|
||||
if getattr(self, "_msg_frame", None) is None:
|
||||
return
|
||||
|
||||
replay_id = str(payload.get("replay_id", "") or "")
|
||||
workflow = payload.get("workflow", "?")
|
||||
reason = payload.get("reason") or "Action incertaine — j'ai besoin de votre validation."
|
||||
completed = payload.get("completed", 0)
|
||||
total = payload.get("total", "?")
|
||||
now = datetime.now().strftime("%H:%M")
|
||||
|
||||
container = tk.Frame(self._msg_frame, bg=BG_COLOR)
|
||||
container.pack(fill=tk.X, padx=MARGIN, pady=6)
|
||||
|
||||
inner = tk.Frame(
|
||||
container, bg=PAUSED_BG, padx=14, pady=12,
|
||||
highlightbackground=PAUSED_BORDER, highlightthickness=2,
|
||||
)
|
||||
inner.pack(anchor=tk.W, padx=(0, 12), fill=tk.X)
|
||||
|
||||
tk.Label(
|
||||
inner, text=f"⏸ Pause supervisée • {now}",
|
||||
bg=PAUSED_BG, fg=PAUSED_FG,
|
||||
font=("Segoe UI", 12, "bold"), anchor="w",
|
||||
).pack(fill=tk.X, anchor=tk.W)
|
||||
|
||||
# Message borné et scrollable : sur une fenêtre Léa étroite, une
|
||||
# bulle trop haute fait disparaître le début du diagnostic hors du
|
||||
# viewport. On garde donc la bulle compacte et on scrolle le texte.
|
||||
reason_str = str(reason)
|
||||
_wrap_px, chars_per_line, max_rows = self._paused_text_layout()
|
||||
text_rows, needs_text_scroll = self._compute_paused_bubble_height(
|
||||
reason_str,
|
||||
chars_per_line=chars_per_line,
|
||||
max_rows=max_rows,
|
||||
)
|
||||
text_frame = tk.Frame(inner, bg=PAUSED_BG)
|
||||
text_frame.pack(fill=tk.X, anchor=tk.W, pady=(6, 0))
|
||||
reason_msg = tk.Text(
|
||||
text_frame,
|
||||
height=text_rows,
|
||||
wrap=tk.WORD,
|
||||
bg=PAUSED_BG,
|
||||
fg=PAUSED_FG,
|
||||
font=FONT_MSG,
|
||||
bd=0,
|
||||
highlightthickness=0,
|
||||
relief=tk.FLAT,
|
||||
padx=0,
|
||||
pady=0,
|
||||
cursor="arrow",
|
||||
)
|
||||
reason_msg.insert("1.0", reason_str)
|
||||
reason_msg.configure(state="disabled")
|
||||
reason_msg.pack(side=tk.LEFT, fill=tk.X, expand=True)
|
||||
if needs_text_scroll:
|
||||
scrollbar = tk.Scrollbar(
|
||||
text_frame,
|
||||
orient=tk.VERTICAL,
|
||||
command=reason_msg.yview,
|
||||
width=12,
|
||||
)
|
||||
reason_msg.configure(yscrollcommand=scrollbar.set)
|
||||
scrollbar.pack(side=tk.RIGHT, fill=tk.Y, padx=(6, 0))
|
||||
|
||||
tk.Label(
|
||||
inner, text=f"{workflow} — étape {completed}/{total}",
|
||||
bg=PAUSED_BG, fg=TIMESTAMP_FG, font=FONT_TIMESTAMP, anchor="w",
|
||||
).pack(fill=tk.X, anchor=tk.W, pady=(4, 8))
|
||||
|
||||
btn_frame = tk.Frame(inner, bg=PAUSED_BG)
|
||||
btn_frame.pack(fill=tk.X, anchor=tk.W)
|
||||
|
||||
btn_resume = tk.Button(
|
||||
btn_frame, text="Continuer",
|
||||
bg=PAUSED_BTN_RESUME_BG, fg="white", font=FONT_QUICK_BTN,
|
||||
padx=14, pady=4, bd=0, cursor="hand2",
|
||||
activebackground=PAUSED_BTN_RESUME_HOVER, activeforeground="white",
|
||||
command=lambda: self._on_paused_resume(replay_id),
|
||||
)
|
||||
btn_resume.pack(side=tk.LEFT, padx=(0, 8))
|
||||
|
||||
btn_abort = tk.Button(
|
||||
btn_frame, text="Annuler",
|
||||
bg=PAUSED_BTN_ABORT_BG, fg="white", font=FONT_QUICK_BTN,
|
||||
padx=14, pady=4, bd=0, cursor="hand2",
|
||||
activebackground=PAUSED_BTN_ABORT_HOVER, activeforeground="white",
|
||||
command=lambda: self._on_paused_abort(replay_id),
|
||||
)
|
||||
btn_abort.pack(side=tk.LEFT)
|
||||
|
||||
# Zone de feedback (mise à jour après clic, avant l'ack du bus)
|
||||
feedback_label = tk.Label(
|
||||
inner, text="", bg=PAUSED_BG, fg=PAUSED_FG,
|
||||
font=FONT_TIMESTAMP, anchor="w",
|
||||
)
|
||||
feedback_label.pack(fill=tk.X, anchor=tk.W, pady=(6, 0))
|
||||
|
||||
self._active_paused_bubble = {
|
||||
"container": container, "inner": inner,
|
||||
"btn_resume": btn_resume, "btn_abort": btn_abort,
|
||||
"feedback_label": feedback_label,
|
||||
"replay_id": replay_id,
|
||||
}
|
||||
|
||||
# Scroll automatique vers la nouvelle bulle (visible immédiatement)
|
||||
self._scroll_to_bottom()
|
||||
|
||||
def _render_paused_fallback_bubble(self, payload: Dict[str, Any]) -> None:
|
||||
"""Rendu minimal de secours si la bulle riche echoue."""
|
||||
tk = self._tk
|
||||
if getattr(self, "_msg_frame", None) is None:
|
||||
return
|
||||
|
||||
replay_id = str(payload.get("replay_id", "") or "")
|
||||
workflow = payload.get("workflow", "?")
|
||||
reason = str(
|
||||
payload.get("reason")
|
||||
or "Action incertaine - j'ai besoin de votre validation."
|
||||
)
|
||||
completed = payload.get("completed", 0)
|
||||
total = payload.get("total", "?")
|
||||
now = datetime.now().strftime("%H:%M")
|
||||
|
||||
container = tk.Frame(self._msg_frame, bg=BG_COLOR)
|
||||
container.pack(fill=tk.X, padx=MARGIN, pady=6)
|
||||
|
||||
inner = tk.Frame(
|
||||
container, bg=PAUSED_BG, padx=14, pady=12,
|
||||
highlightbackground=PAUSED_BORDER, highlightthickness=2,
|
||||
)
|
||||
inner.pack(anchor=tk.W, padx=(0, 12), fill=tk.X)
|
||||
|
||||
tk.Label(
|
||||
inner, text=f"Pause supervisee - {now}",
|
||||
bg=PAUSED_BG, fg=PAUSED_FG,
|
||||
font=("Segoe UI", 12, "bold"), anchor="w",
|
||||
).pack(fill=tk.X, anchor=tk.W)
|
||||
|
||||
wrap_px = 360
|
||||
try:
|
||||
if self._canvas is not None:
|
||||
wrap_px = max(220, int(self._canvas.winfo_width()) - 80)
|
||||
except Exception:
|
||||
pass
|
||||
tk.Label(
|
||||
inner, text=reason, bg=PAUSED_BG, fg=PAUSED_FG,
|
||||
font=FONT_MSG, wraplength=wrap_px, justify=tk.LEFT,
|
||||
anchor=tk.W,
|
||||
).pack(fill=tk.X, anchor=tk.W, pady=(6, 0))
|
||||
|
||||
tk.Label(
|
||||
inner, text=f"{workflow} - etape {completed}/{total}",
|
||||
bg=PAUSED_BG, fg=TIMESTAMP_FG, font=FONT_TIMESTAMP, anchor="w",
|
||||
).pack(fill=tk.X, anchor=tk.W, pady=(4, 8))
|
||||
|
||||
btn_frame = tk.Frame(inner, bg=PAUSED_BG)
|
||||
btn_frame.pack(fill=tk.X, anchor=tk.W)
|
||||
|
||||
btn_resume = tk.Button(
|
||||
btn_frame, text="Continuer",
|
||||
bg=PAUSED_BTN_RESUME_BG, fg="white", font=FONT_QUICK_BTN,
|
||||
padx=14, pady=4, bd=0, cursor="hand2",
|
||||
activebackground=PAUSED_BTN_RESUME_HOVER, activeforeground="white",
|
||||
command=lambda: self._on_paused_resume(replay_id),
|
||||
)
|
||||
btn_resume.pack(side=tk.LEFT, padx=(0, 8))
|
||||
|
||||
btn_abort = tk.Button(
|
||||
btn_frame, text="Annuler",
|
||||
bg=PAUSED_BTN_ABORT_BG, fg="white", font=FONT_QUICK_BTN,
|
||||
padx=14, pady=4, bd=0, cursor="hand2",
|
||||
activebackground=PAUSED_BTN_ABORT_HOVER, activeforeground="white",
|
||||
command=lambda: self._on_paused_abort(replay_id),
|
||||
)
|
||||
btn_abort.pack(side=tk.LEFT)
|
||||
|
||||
feedback_label = tk.Label(
|
||||
inner, text="", bg=PAUSED_BG, fg=PAUSED_FG,
|
||||
font=FONT_TIMESTAMP, anchor="w",
|
||||
)
|
||||
feedback_label.pack(fill=tk.X, anchor=tk.W, pady=(6, 0))
|
||||
|
||||
self._active_paused_bubble = {
|
||||
"container": container, "inner": inner,
|
||||
"btn_resume": btn_resume, "btn_abort": btn_abort,
|
||||
"feedback_label": feedback_label,
|
||||
"replay_id": replay_id,
|
||||
}
|
||||
self._scroll_to_bottom()
|
||||
|
||||
def _close_active_paused_bubble(self, reason: str) -> None:
|
||||
if self._active_paused_bubble is None or self._root is None:
|
||||
return
|
||||
self._root.after(0, lambda: self._do_close_paused_bubble(reason))
|
||||
|
||||
def _do_close_paused_bubble(self, reason: str) -> None:
|
||||
bubble = self._active_paused_bubble
|
||||
if bubble is None:
|
||||
return
|
||||
try:
|
||||
bubble["btn_resume"].config(state="disabled")
|
||||
bubble["btn_abort"].config(state="disabled")
|
||||
label_text = {
|
||||
"lea:resumed": "→ Reprise",
|
||||
"lea:done": "→ Terminé",
|
||||
}.get(reason, f"→ {reason}")
|
||||
self._tk.Label(
|
||||
bubble["inner"], text=label_text,
|
||||
bg=PAUSED_BG, fg=PAUSED_FG, font=FONT_TIMESTAMP, anchor="w",
|
||||
).pack(fill="x", anchor="w", pady=(6, 0))
|
||||
except Exception:
|
||||
logger.debug("close paused bubble silenced", exc_info=True)
|
||||
self._active_paused_bubble = None
|
||||
|
||||
def _on_paused_resume(self, replay_id: str) -> None:
|
||||
"""Bouton Continuer : émettre lea:replay_resume + feedback immédiat UI.
|
||||
|
||||
UX fix 8 mai 2026 : on désactive les 2 boutons et on affiche un message
|
||||
de feedback dès le clic, sans attendre l'ack serveur. Le bus émet en
|
||||
arrière-plan ; si la connexion est tombée, on log un warning visible.
|
||||
|
||||
Fallback HTTP 22 mai 2026 : si le bus SocketIO est déconnecté, on
|
||||
retombe sur un POST direct ``/replay/{id}/resume`` via
|
||||
``server_client``. Si les deux échouent, on ré-active les boutons
|
||||
et on saute l'auto-hide pour permettre à l'utilisateur de
|
||||
réessayer manuellement (sinon le replay reste figé côté serveur).
|
||||
"""
|
||||
if not replay_id:
|
||||
self._update_paused_feedback("⚠ replay_id manquant — impossible de relancer")
|
||||
return
|
||||
emitted, channel = self._dispatch_paused_action(
|
||||
replay_id,
|
||||
bus_method="resume_replay",
|
||||
client_method="resume_replay",
|
||||
)
|
||||
self._disable_paused_buttons()
|
||||
if emitted:
|
||||
self._update_paused_feedback("→ Reprise demandée…")
|
||||
logger.info(
|
||||
"paused_bubble: replay_resume émis pour %s via %s",
|
||||
replay_id, channel,
|
||||
)
|
||||
try:
|
||||
self._root.after(500, self._do_hide)
|
||||
except Exception:
|
||||
logger.debug("auto-hide on resume silenced", exc_info=True)
|
||||
return
|
||||
# Échec sur les deux canaux : laisser l'utilisateur réessayer.
|
||||
self._update_paused_feedback("⚠ Serveur injoignable — réessayez")
|
||||
self._enable_paused_buttons()
|
||||
logger.warning(
|
||||
"paused_bubble: bus et HTTP indisponibles, resume non émis "
|
||||
"pour %s", replay_id,
|
||||
)
|
||||
|
||||
def _on_paused_abort(self, replay_id: str) -> None:
|
||||
"""Bouton Annuler : émettre lea:replay_abort + fermeture locale immédiate.
|
||||
|
||||
UX fix 8 mai 2026 : on ferme la bulle localement dès le clic (le serveur
|
||||
n'envoie pas de lea:resumed pour un abort, donc sans cette fermeture
|
||||
locale la bulle restait coincée — c'était la cause de "Annuler ne
|
||||
fonctionne pas" rapportée par Dom).
|
||||
|
||||
Fallback HTTP 22 mai 2026 : symétrique de ``_on_paused_resume`` —
|
||||
si le bus est déconnecté, POST direct ``/replay/{id}/cancel``.
|
||||
L'abort ferme la bulle localement quelle que soit l'issue (l'état
|
||||
serveur sera réconcilié au prochain poll /replay/next).
|
||||
"""
|
||||
emitted, channel = self._dispatch_paused_action(
|
||||
replay_id,
|
||||
bus_method="abort_replay",
|
||||
client_method="abort_replay",
|
||||
)
|
||||
self._disable_paused_buttons()
|
||||
if emitted:
|
||||
self._update_paused_feedback("✗ Annulé")
|
||||
logger.info(
|
||||
"paused_bubble: replay_abort émis pour %s via %s",
|
||||
replay_id, channel,
|
||||
)
|
||||
else:
|
||||
self._update_paused_feedback("✗ Annulé (serveur injoignable)")
|
||||
logger.warning(
|
||||
"paused_bubble: bus et HTTP indisponibles, abort non émis "
|
||||
"pour %s", replay_id,
|
||||
)
|
||||
# Fermer la bulle en local (l'abort n'a pas de lea:resumed associé)
|
||||
self._close_active_paused_bubble(reason="abort_local")
|
||||
# UX fix mai 2026 : minimiser la fenêtre après 500ms (cohérence
|
||||
# avec _on_paused_resume, demandé explicitement par Dom).
|
||||
try:
|
||||
self._root.after(500, self._do_hide)
|
||||
except Exception:
|
||||
logger.debug("auto-hide on abort silenced", exc_info=True)
|
||||
|
||||
def _dispatch_paused_action(
|
||||
self,
|
||||
replay_id: str,
|
||||
bus_method: str,
|
||||
client_method: str,
|
||||
) -> tuple:
|
||||
"""Envoyer une action de bulle paused via bus puis fallback HTTP.
|
||||
|
||||
Retourne ``(emitted, channel)`` où ``channel`` vaut ``"bus"``,
|
||||
``"http"`` ou ``""`` (aucun chemin n'a abouti).
|
||||
"""
|
||||
if self._bus is not None and getattr(self._bus, "connected", False):
|
||||
try:
|
||||
if getattr(self._bus, bus_method)(replay_id):
|
||||
return True, "bus"
|
||||
except Exception:
|
||||
logger.debug("paused_bubble: bus %s silenced", bus_method, exc_info=True)
|
||||
if self._server_client is not None and hasattr(self._server_client, client_method):
|
||||
try:
|
||||
if getattr(self._server_client, client_method)(replay_id):
|
||||
return True, "http"
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"paused_bubble: server_client %s silenced",
|
||||
client_method, exc_info=True,
|
||||
)
|
||||
return False, ""
|
||||
|
||||
def _disable_paused_buttons(self) -> None:
|
||||
if not self._active_paused_bubble:
|
||||
return
|
||||
try:
|
||||
self._active_paused_bubble["btn_resume"].config(state="disabled")
|
||||
self._active_paused_bubble["btn_abort"].config(state="disabled")
|
||||
except Exception:
|
||||
logger.debug("disable paused buttons silenced", exc_info=True)
|
||||
|
||||
def _enable_paused_buttons(self) -> None:
|
||||
"""Ré-activer les boutons Continuer/Annuler de la bulle paused
|
||||
active. Appelé quand l'envoi a échoué sur tous les canaux —
|
||||
l'utilisateur doit pouvoir réessayer manuellement.
|
||||
"""
|
||||
if not self._active_paused_bubble:
|
||||
return
|
||||
try:
|
||||
self._active_paused_bubble["btn_resume"].config(state="normal")
|
||||
self._active_paused_bubble["btn_abort"].config(state="normal")
|
||||
except Exception:
|
||||
logger.debug("enable paused buttons silenced", exc_info=True)
|
||||
|
||||
def _update_paused_feedback(self, text: str) -> None:
|
||||
if not self._active_paused_bubble:
|
||||
return
|
||||
label = self._active_paused_bubble.get("feedback_label")
|
||||
if label is None:
|
||||
return
|
||||
try:
|
||||
label.config(text=text)
|
||||
except Exception:
|
||||
logger.debug("update paused feedback silenced", exc_info=True)
|
||||
|
||||
# ======================================================================
|
||||
# Ajout de messages dans la zone de chat
|
||||
# ======================================================================
|
||||
@@ -957,8 +1757,19 @@ class ChatWindow:
|
||||
self._add_lea_message(
|
||||
f"C'est parti ! Montrez-moi comment faire \u00ab {name} \u00bb."
|
||||
)
|
||||
|
||||
# --- P1-LEA-SHADOW : d\u00e9clencher d'abord l'orchestrateur L\u00e9a Linux ---
|
||||
# On contacte agent-chat AVANT la capture locale : si la session
|
||||
# serveur d\u00e9marre, on r\u00e9cup\u00e8re un session_id + un message d'accueil
|
||||
# de L\u00e9a qu'on affiche dans le chat. Si \u00e9chec : mode d\u00e9grad\u00e9
|
||||
# (capture locale uniquement, sans assistance conversationnelle).
|
||||
self._start_lea_orchestrator_session(name)
|
||||
|
||||
# --- Comportement historique pr\u00e9serv\u00e9 : capture locale ---
|
||||
# Le pipeline streaming (frames/\u00e9v\u00e9nements) reste pilot\u00e9 par
|
||||
# agent_v1 local. L'orchestrateur Linux ne touche PAS \u00e0 la
|
||||
# capture, il pilote uniquement le dialogue de fin de session.
|
||||
try:
|
||||
# Utiliser l'etat partage si disponible (synchronise le systray)
|
||||
if self._shared_state is not None:
|
||||
self._shared_state.start_recording(name)
|
||||
elif self._on_start_callback is not None:
|
||||
@@ -966,6 +1777,60 @@ class ChatWindow:
|
||||
except Exception as e:
|
||||
self._add_lea_message(f"Oups, un probl\u00e8me : {e}")
|
||||
|
||||
def _start_lea_orchestrator_session(self, session_name: str) -> None:
|
||||
"""Appelle POST /api/learn/start c\u00f4t\u00e9 agent-chat Linux (P1-LEA-SHADOW).
|
||||
|
||||
Fail-safe : toute erreur (config absente, httpx manquant, timeout,
|
||||
500 serveur...) bascule en mode d\u00e9grad\u00e9 sans bloquer la capture
|
||||
locale. Un message clair est affich\u00e9 dans le chat.
|
||||
"""
|
||||
try:
|
||||
from ..config import AGENT_CHAT_URL, API_TOKEN, MACHINE_ID
|
||||
from ..network.lea_orchestrator_client import (
|
||||
LeaOrchestratorError,
|
||||
start_learning_session,
|
||||
)
|
||||
except Exception as exc: # pragma: no cover (import-time)
|
||||
logger.error("Impossible de charger le client orchestrateur L\u00e9a : %s", exc)
|
||||
self._add_lea_message(
|
||||
"\u26a0 Impossible de joindre L\u00e9a serveur. "
|
||||
"L'apprentissage continue localement, mais sans assistance "
|
||||
"conversationnelle."
|
||||
)
|
||||
return
|
||||
|
||||
try:
|
||||
resp = start_learning_session(
|
||||
AGENT_CHAT_URL,
|
||||
machine_id=MACHINE_ID,
|
||||
session_name=session_name,
|
||||
api_token=API_TOKEN,
|
||||
trigger_source="windows_button",
|
||||
)
|
||||
except LeaOrchestratorError as exc:
|
||||
logger.error("Orchestrateur L\u00e9a injoignable : %s", exc)
|
||||
self._add_lea_message(
|
||||
"\u26a0 Impossible de joindre L\u00e9a serveur. "
|
||||
"L'apprentissage continue localement, mais sans assistance "
|
||||
"conversationnelle."
|
||||
)
|
||||
return
|
||||
except Exception as exc: # noqa: BLE001 \u2014 d\u00e9fensif
|
||||
logger.exception("Erreur inattendue orchestrateur L\u00e9a")
|
||||
self._add_lea_message(
|
||||
f"\u26a0 Erreur orchestrateur L\u00e9a : {exc}. "
|
||||
"L'apprentissage continue localement."
|
||||
)
|
||||
return
|
||||
|
||||
# Affichage du message d'accueil renvoy\u00e9 par L\u00e9a (si pr\u00e9sent)
|
||||
if resp.message:
|
||||
self._add_lea_message(resp.message)
|
||||
logger.info(
|
||||
"Session orchestrateur L\u00e9a OK : id=%s state=%s",
|
||||
resp.session_id, resp.state,
|
||||
)
|
||||
|
||||
def _on_quick_tasks(self) -> None:
|
||||
"""Bouton Lancer — demande ce que L\u00e9a sait faire."""
|
||||
self._add_user_message("Qu'est-ce que vous savez faire ?")
|
||||
|
||||
484
agent_v0/agent_v1/ui/message_contract.py
Normal file
484
agent_v0/agent_v1/ui/message_contract.py
Normal file
@@ -0,0 +1,484 @@
|
||||
"""Contrat de lisibilite des messages visibles par l'humain.
|
||||
|
||||
Ce module ne branche encore aucun point runtime. Il fournit une brique pure et
|
||||
testable pour que les sorties UI de Lea puissent refuser les messages trop
|
||||
generiques ou trop techniques avant affichage.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import unicodedata
|
||||
from dataclasses import dataclass
|
||||
from typing import Iterable, Mapping
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SUPERVISED_PAUSE_LABELS = (
|
||||
"J'essaie de",
|
||||
"J'attendais",
|
||||
"Je vois",
|
||||
"Peux-tu",
|
||||
)
|
||||
|
||||
MAX_VISIBLE_MESSAGE_CHARS = 720
|
||||
MAX_FIELD_CHARS = 180
|
||||
MIN_FIELD_CHARS = 4
|
||||
|
||||
_GENERIC_PHRASES = (
|
||||
"un element",
|
||||
"un élément",
|
||||
"l'element",
|
||||
"l'élément",
|
||||
"element inconnu",
|
||||
"élément inconnu",
|
||||
"cette action",
|
||||
"cette cible",
|
||||
"cible inconnue",
|
||||
"validation requise",
|
||||
"action requise",
|
||||
)
|
||||
|
||||
_ACTIONABLE_FRENCH_HINTS = (
|
||||
"peux-tu",
|
||||
"cliquer",
|
||||
"ouvrir",
|
||||
"selectionner",
|
||||
"sélectionner",
|
||||
"choisir",
|
||||
"saisir",
|
||||
"corriger",
|
||||
"montrer",
|
||||
"indiquer",
|
||||
"valider",
|
||||
"fermer",
|
||||
"placer",
|
||||
"mettre",
|
||||
"reprendre",
|
||||
)
|
||||
|
||||
_TECHNICAL_ENGLISH_TERMS = (
|
||||
"target_not_found",
|
||||
"target not found",
|
||||
"no_screen_change",
|
||||
"no screen change",
|
||||
"wrong_window",
|
||||
"wrong window",
|
||||
"validation required",
|
||||
"retry",
|
||||
"fallback",
|
||||
"timeout",
|
||||
"screenshot",
|
||||
"validator",
|
||||
"failure",
|
||||
"failed",
|
||||
"resolve target",
|
||||
"postcondition",
|
||||
"please",
|
||||
"click",
|
||||
"button",
|
||||
"target",
|
||||
"expected",
|
||||
"actual",
|
||||
"observed",
|
||||
)
|
||||
|
||||
_TECHNICAL_FIELD_RE = re.compile(
|
||||
r"\b(?:"
|
||||
r"action_id|replay_id|session_id|workflow_id|machine_id|target_spec|"
|
||||
r"vlm_description|resolution_method|resolution_score|retry_count|"
|
||||
r"x_pct|y_pct|screenshot_b64|expected_window_title|current_action_index"
|
||||
r")\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_TECHNICAL_IDENTIFIER_RE = re.compile(
|
||||
r"\b(?:action|replay|session|sess|workflow|node|edge|target|retry|"
|
||||
r"precheck|wait|trace|event|machine|run)_[A-Za-z0-9][A-Za-z0-9_.:-]{3,}\b"
|
||||
)
|
||||
_UUID_RE = re.compile(
|
||||
r"\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_LONG_HEX_RE = re.compile(r"\b[0-9a-f]{16,}\b", re.IGNORECASE)
|
||||
_PIXEL_TUPLE_RE = re.compile(r"\(\s*\d{2,5}\s*,\s*\d{2,5}\s*\)")
|
||||
_PIXEL_FIELD_RE = re.compile(
|
||||
r"\b(?:x|y|left|top|width|height|w|h|x_pct|y_pct)\s*[=:]\s*-?\d+(?:[.,]\d+)?",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_PX_RE = re.compile(r"\b\d{2,5}\s*px\b", re.IGNORECASE)
|
||||
_SCORE_RE = re.compile(
|
||||
r"\b(?:score|confidence|confiance|similarit[eé]|threshold|seuil|"
|
||||
r"probabilit[eé])\s*[:=]\s*\d+(?:[.,]\d+)?%?\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MessageValidationIssue:
|
||||
"""Un probleme detecte dans un message visible par l'humain."""
|
||||
|
||||
code: str
|
||||
detail: str
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MessageValidationResult:
|
||||
"""Resultat de validation d'un message utilisateur."""
|
||||
|
||||
issues: tuple[MessageValidationIssue, ...] = ()
|
||||
|
||||
@property
|
||||
def valid(self) -> bool:
|
||||
return not self.issues
|
||||
|
||||
def raise_for_errors(self) -> None:
|
||||
if not self.valid:
|
||||
raise MessageContractError(self)
|
||||
|
||||
|
||||
class MessageContractError(ValueError):
|
||||
"""Erreur levee quand un message ne respecte pas le contrat humain."""
|
||||
|
||||
def __init__(self, result: MessageValidationResult):
|
||||
self.result = result
|
||||
details = "; ".join(f"{issue.code}: {issue.detail}" for issue in result.issues)
|
||||
super().__init__(f"Message humain invalide: {details}")
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SupervisedPauseFields:
|
||||
"""Champs obligatoires pour expliquer une pause supervisee."""
|
||||
|
||||
intention: str
|
||||
attendu: str
|
||||
vu: str
|
||||
demande: str
|
||||
|
||||
|
||||
DEFAULT_SUPERVISED_PAUSE_FIELDS = SupervisedPauseFields(
|
||||
intention="continuer une etape supervisee",
|
||||
attendu="un accord humain clair avant de continuer",
|
||||
vu="je suis sur une etape qui demande une verification humaine",
|
||||
demande="indiquer si je peux continuer ou corriger l'action attendue",
|
||||
)
|
||||
|
||||
|
||||
def format_supervised_pause_message(
|
||||
*,
|
||||
intention: str,
|
||||
attendu: str,
|
||||
vu: str,
|
||||
demande: str,
|
||||
) -> str:
|
||||
"""Formatter une pause supervisee claire et actionnable.
|
||||
|
||||
Le message retourne exactement quatre lignes. Si un champ reste vague ou
|
||||
technique, la fonction leve ``MessageContractError`` au lieu de produire un
|
||||
message degradant pour l'utilisateur.
|
||||
"""
|
||||
|
||||
fields = SupervisedPauseFields(
|
||||
intention=_one_line(intention),
|
||||
attendu=_one_line(attendu),
|
||||
vu=_one_line(vu),
|
||||
demande=_one_line(demande),
|
||||
)
|
||||
message = "\n".join(
|
||||
(
|
||||
f"J'essaie de : {fields.intention}",
|
||||
f"J'attendais : {fields.attendu}",
|
||||
f"Je vois : {fields.vu}",
|
||||
f"Peux-tu : {fields.demande}",
|
||||
)
|
||||
)
|
||||
validate_supervised_pause_message(message).raise_for_errors()
|
||||
return message
|
||||
|
||||
|
||||
def format_supervised_pause_from_mapping(payload: Mapping[str, object]) -> str:
|
||||
"""Formatter depuis un mapping runtime avec noms de champs explicites.
|
||||
|
||||
Alias acceptes pour faciliter l'integration progressive:
|
||||
``intention|trying_to``, ``attendu|expected``, ``vu|observed``,
|
||||
``demande|request``.
|
||||
"""
|
||||
|
||||
return format_supervised_pause_message(
|
||||
intention=_mapping_text(payload, "intention", "trying_to"),
|
||||
attendu=_mapping_text(payload, "attendu", "expected"),
|
||||
vu=_mapping_text(payload, "vu", "observed"),
|
||||
demande=_mapping_text(payload, "demande", "request"),
|
||||
)
|
||||
|
||||
|
||||
def coerce_supervised_pause_message(
|
||||
message: object = "",
|
||||
*,
|
||||
intention: object = "",
|
||||
attendu: object = "",
|
||||
vu: object = "",
|
||||
demande: object = "",
|
||||
) -> str:
|
||||
"""Retourner une pause supervisee valide, meme depuis un ancien message.
|
||||
|
||||
Si ``message`` respecte deja le contrat strict, il est conserve. Sinon on
|
||||
compose les quatre champs avec les valeurs explicites disponibles. Les
|
||||
valeurs trop vagues ou techniques sont remplacees par des fallbacks clairs.
|
||||
"""
|
||||
|
||||
raw_message = _one_line(message)
|
||||
if raw_message and validate_supervised_pause_message(raw_message).valid:
|
||||
return raw_message
|
||||
|
||||
defaults = DEFAULT_SUPERVISED_PAUSE_FIELDS
|
||||
candidates = SupervisedPauseFields(
|
||||
intention=_safe_field_text(intention, defaults.intention),
|
||||
attendu=_safe_field_text(attendu, defaults.attendu),
|
||||
vu=_safe_field_text(vu, defaults.vu),
|
||||
demande=_safe_field_text(demande or raw_message, defaults.demande),
|
||||
)
|
||||
|
||||
try:
|
||||
return format_supervised_pause_message(
|
||||
intention=candidates.intention,
|
||||
attendu=candidates.attendu,
|
||||
vu=candidates.vu,
|
||||
demande=candidates.demande,
|
||||
)
|
||||
except MessageContractError:
|
||||
return format_supervised_pause_message(
|
||||
intention=defaults.intention,
|
||||
attendu=defaults.attendu,
|
||||
vu=defaults.vu,
|
||||
demande=defaults.demande,
|
||||
)
|
||||
|
||||
|
||||
def warn_visible_message(
|
||||
message: object,
|
||||
*,
|
||||
source: str,
|
||||
supervised_pause: bool = False,
|
||||
) -> str:
|
||||
"""Log contract violations without modifying the visible message."""
|
||||
|
||||
text = str(message or "")
|
||||
validator = validate_supervised_pause_message if supervised_pause else validate_visible_message
|
||||
result = validator(text)
|
||||
if not result.valid:
|
||||
logger.warning(
|
||||
"[message_contract] invalid_message source=%s codes=%s",
|
||||
source,
|
||||
[issue.code for issue in result.issues],
|
||||
)
|
||||
return text
|
||||
|
||||
|
||||
def validate_supervised_pause_message(message: str) -> MessageValidationResult:
|
||||
"""Valider le contrat strict d'une pause supervisee."""
|
||||
|
||||
issues = list(validate_visible_message(message).issues)
|
||||
fields, structure_issues = _parse_supervised_pause(message)
|
||||
issues.extend(structure_issues)
|
||||
|
||||
if fields:
|
||||
for name, value in fields.items():
|
||||
if len(value) < MIN_FIELD_CHARS:
|
||||
issues.append(
|
||||
MessageValidationIssue(
|
||||
"field_too_short",
|
||||
f"{name} doit etre explicite",
|
||||
)
|
||||
)
|
||||
if len(value) > MAX_FIELD_CHARS:
|
||||
issues.append(
|
||||
MessageValidationIssue(
|
||||
"field_too_long",
|
||||
f"{name} depasse {MAX_FIELD_CHARS} caracteres",
|
||||
)
|
||||
)
|
||||
demande = fields.get("demande", "")
|
||||
if not _contains_actionable_french(demande) or len(demande.split()) < 4:
|
||||
issues.append(
|
||||
MessageValidationIssue(
|
||||
"not_actionable",
|
||||
"la demande doit contenir une action concrete en francais",
|
||||
)
|
||||
)
|
||||
|
||||
return _dedupe_issues(issues)
|
||||
|
||||
|
||||
def validate_visible_message(message: str) -> MessageValidationResult:
|
||||
"""Valider qu'un message visible n'est ni generique ni technique."""
|
||||
|
||||
text = str(message or "").strip()
|
||||
issues: list[MessageValidationIssue] = []
|
||||
|
||||
if not text:
|
||||
return MessageValidationResult(
|
||||
(MessageValidationIssue("empty_message", "message vide"),)
|
||||
)
|
||||
|
||||
if len(text) > MAX_VISIBLE_MESSAGE_CHARS:
|
||||
issues.append(
|
||||
MessageValidationIssue(
|
||||
"message_too_long",
|
||||
f"message au-dela de {MAX_VISIBLE_MESSAGE_CHARS} caracteres",
|
||||
)
|
||||
)
|
||||
|
||||
folded = _fold(text)
|
||||
seen_generic_phrases: set[str] = set()
|
||||
for phrase in _GENERIC_PHRASES:
|
||||
folded_phrase = _fold(phrase)
|
||||
if folded_phrase in seen_generic_phrases:
|
||||
continue
|
||||
seen_generic_phrases.add(folded_phrase)
|
||||
if folded_phrase in folded:
|
||||
issues.append(
|
||||
MessageValidationIssue(
|
||||
"generic_phrase",
|
||||
f"formulation trop generique: {phrase}",
|
||||
)
|
||||
)
|
||||
|
||||
for term in _TECHNICAL_ENGLISH_TERMS:
|
||||
if _fold(term) in folded:
|
||||
issues.append(
|
||||
MessageValidationIssue(
|
||||
"technical_english",
|
||||
f"anglais technique visible: {term}",
|
||||
)
|
||||
)
|
||||
|
||||
for code, pattern, detail in (
|
||||
("technical_field", _TECHNICAL_FIELD_RE, "champ technique brut"),
|
||||
("technical_identifier", _TECHNICAL_IDENTIFIER_RE, "identifiant technique brut"),
|
||||
("technical_identifier", _UUID_RE, "UUID brut"),
|
||||
("technical_identifier", _LONG_HEX_RE, "hash technique brut"),
|
||||
("raw_coordinates", _PIXEL_TUPLE_RE, "coordonnees pixel brutes"),
|
||||
("raw_coordinates", _PIXEL_FIELD_RE, "coordonnees techniques brutes"),
|
||||
("raw_coordinates", _PX_RE, "coordonnees pixel brutes"),
|
||||
("raw_score", _SCORE_RE, "score ou confiance brut"),
|
||||
):
|
||||
if pattern.search(text):
|
||||
issues.append(MessageValidationIssue(code, detail))
|
||||
|
||||
return _dedupe_issues(issues)
|
||||
|
||||
|
||||
def is_valid_visible_message(message: str) -> bool:
|
||||
"""Raccourci booleen pour les points d'integration UI."""
|
||||
|
||||
return validate_visible_message(message).valid
|
||||
|
||||
|
||||
def is_valid_supervised_pause_message(message: str) -> bool:
|
||||
"""Raccourci booleen pour les pauses supervisees."""
|
||||
|
||||
return validate_supervised_pause_message(message).valid
|
||||
|
||||
|
||||
def _parse_supervised_pause(
|
||||
message: str,
|
||||
) -> tuple[dict[str, str], list[MessageValidationIssue]]:
|
||||
lines = [line.rstrip() for line in str(message or "").splitlines() if line.strip()]
|
||||
issues: list[MessageValidationIssue] = []
|
||||
|
||||
if len(lines) != 4:
|
||||
issues.append(
|
||||
MessageValidationIssue(
|
||||
"invalid_structure",
|
||||
"une pause supervisee doit contenir exactement 4 lignes",
|
||||
)
|
||||
)
|
||||
return {}, issues
|
||||
|
||||
specs = (
|
||||
("intention", r"^J'essaie de\s*:\s*(.+)$"),
|
||||
("attendu", r"^J'attendais\s*:\s*(.+)$"),
|
||||
("vu", r"^Je vois\s*:\s*(.+)$"),
|
||||
("demande", r"^Peux-tu\s*:\s*(.+)$"),
|
||||
)
|
||||
fields: dict[str, str] = {}
|
||||
for line, (name, pattern) in zip(lines, specs):
|
||||
match = re.match(pattern, line)
|
||||
if not match:
|
||||
issues.append(
|
||||
MessageValidationIssue(
|
||||
"invalid_structure",
|
||||
f"ligne {len(fields) + 1} doit commencer par {SUPERVISED_PAUSE_LABELS[len(fields)]}",
|
||||
)
|
||||
)
|
||||
continue
|
||||
fields[name] = match.group(1).strip()
|
||||
|
||||
if len(fields) != 4:
|
||||
return {}, issues
|
||||
|
||||
return fields, issues
|
||||
|
||||
|
||||
def _contains_actionable_french(text: str) -> bool:
|
||||
folded = _fold(text)
|
||||
return any(_fold(hint) in folded for hint in _ACTIONABLE_FRENCH_HINTS)
|
||||
|
||||
|
||||
def _one_line(value: object) -> str:
|
||||
return re.sub(r"\s+", " ", str(value or "")).strip()
|
||||
|
||||
|
||||
def _mapping_text(payload: Mapping[str, object], *keys: str) -> str:
|
||||
for key in keys:
|
||||
value = payload.get(key)
|
||||
if value is not None:
|
||||
return str(value)
|
||||
return ""
|
||||
|
||||
|
||||
def _safe_field_text(value: object, fallback: str) -> str:
|
||||
text = _one_line(value)
|
||||
if len(text) < MIN_FIELD_CHARS or len(text) > MAX_FIELD_CHARS:
|
||||
return fallback
|
||||
if not validate_visible_message(text).valid:
|
||||
return fallback
|
||||
return text
|
||||
|
||||
|
||||
def _fold(text: str) -> str:
|
||||
normalized = unicodedata.normalize("NFKD", str(text or ""))
|
||||
ascii_text = "".join(ch for ch in normalized if not unicodedata.combining(ch))
|
||||
return ascii_text.casefold()
|
||||
|
||||
|
||||
def _dedupe_issues(issues: Iterable[MessageValidationIssue]) -> MessageValidationResult:
|
||||
seen: set[tuple[str, str]] = set()
|
||||
deduped: list[MessageValidationIssue] = []
|
||||
for issue in issues:
|
||||
key = (issue.code, issue.detail)
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
deduped.append(issue)
|
||||
return MessageValidationResult(tuple(deduped))
|
||||
|
||||
|
||||
__all__ = [
|
||||
"MAX_FIELD_CHARS",
|
||||
"MAX_VISIBLE_MESSAGE_CHARS",
|
||||
"MessageContractError",
|
||||
"MessageValidationIssue",
|
||||
"MessageValidationResult",
|
||||
"SUPERVISED_PAUSE_LABELS",
|
||||
"SupervisedPauseFields",
|
||||
"coerce_supervised_pause_message",
|
||||
"format_supervised_pause_from_mapping",
|
||||
"format_supervised_pause_message",
|
||||
"is_valid_supervised_pause_message",
|
||||
"is_valid_visible_message",
|
||||
"validate_supervised_pause_message",
|
||||
"validate_visible_message",
|
||||
"warn_visible_message",
|
||||
]
|
||||
661
agent_v0/agent_v1/ui/messages.py
Normal file
661
agent_v0/agent_v1/ui/messages.py
Normal file
@@ -0,0 +1,661 @@
|
||||
# agent_v1/ui/messages.py
|
||||
"""
|
||||
Formatage des messages utilisateur pour Léa.
|
||||
|
||||
Convertit les codes d'erreur techniques (`target_not_found`, `no_screen_change`...)
|
||||
en phrases en français naturel, orientées action, adaptées à un utilisateur non
|
||||
technique (secrétaire médicale, TIM).
|
||||
|
||||
Trois niveaux de sévérité sont définis :
|
||||
- INFO — Léa fait son travail normalement
|
||||
- ATTENTION — Quelque chose de léger (ralentissement, retry)
|
||||
- BLOCAGE — Léa a besoin d'aide, elle rend la main
|
||||
|
||||
Le module est 100% pur (pas d'I/O, pas d'UI) : testable sans mocks lourds.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Any, Mapping, Optional
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Accès paresseux au DomainContext
|
||||
# ----------------------------------------------------------------------------
|
||||
#
|
||||
# On importe le module à l'appel pour éviter toute dépendance circulaire
|
||||
# avec `agent_v0.server_v1.domain_context` (qui ne doit pas importer l'UI).
|
||||
# Si l'import échoue (contexte client sans server_v1), on retombe sur None
|
||||
# et les formatters gardent leur comportement générique historique.
|
||||
|
||||
|
||||
def _get_domain_ctx(domain_id: Optional[str]):
|
||||
"""Récupérer un DomainContext si possible, sinon None (fallback)."""
|
||||
if not domain_id:
|
||||
return None
|
||||
try:
|
||||
from agent_v0.server_v1.domain_context import get_domain_context # lazy
|
||||
return get_domain_context(domain_id)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _friendly_target(description: str, domain_id: Optional[str] = None) -> str:
|
||||
"""Transformer une description technique en langage métier si possible.
|
||||
|
||||
Ex (tim_codage) : "DP" → "diagnostic principal"
|
||||
Ex (comptabilite) : "TVA" → "montant de TVA"
|
||||
Retombe sur la description nettoyée si aucun domaine ne matche.
|
||||
"""
|
||||
base = _nettoyer_description_cible(description)
|
||||
ctx = _get_domain_ctx(domain_id)
|
||||
if ctx is None or not base:
|
||||
return base
|
||||
try:
|
||||
return ctx._apply_synonyms(base)
|
||||
except Exception:
|
||||
return base
|
||||
|
||||
|
||||
class NiveauMessage(Enum):
|
||||
"""Niveaux hiérarchiques des messages affichés à l'utilisateur."""
|
||||
|
||||
INFO = "info" # Fond vert clair, disparaît tout seul, 3-5s
|
||||
ATTENTION = "attention" # Fond orange clair, disparaît tout seul, 7s
|
||||
BLOCAGE = "blocage" # Fond rouge clair, reste affiché, 15s+
|
||||
|
||||
|
||||
# Durée d'affichage par défaut (secondes), par niveau
|
||||
DUREE_PAR_NIVEAU: dict[NiveauMessage, int] = {
|
||||
NiveauMessage.INFO: 4,
|
||||
NiveauMessage.ATTENTION: 7,
|
||||
NiveauMessage.BLOCAGE: 15,
|
||||
}
|
||||
|
||||
# Icône textuelle par niveau (compatible plyer/Windows/Linux)
|
||||
ICONE_PAR_NIVEAU: dict[NiveauMessage, str] = {
|
||||
NiveauMessage.INFO: "i",
|
||||
NiveauMessage.ATTENTION: "!",
|
||||
NiveauMessage.BLOCAGE: "?",
|
||||
}
|
||||
|
||||
# Les pauses supervisees peuvent contenir une raison precise, parfois longue
|
||||
# (fenetre observee, fenetre attendue, action en cours). On garde l'information
|
||||
# utile et on laisse les widgets UI gerer le wrap/scroll.
|
||||
MAX_TARGET_DESCRIPTION_CHARS = 1024
|
||||
MAX_GENERIC_TECHNICAL_MESSAGE_CHARS = 1024
|
||||
|
||||
|
||||
@dataclass
|
||||
class MessageUtilisateur:
|
||||
"""Un message prêt à être affiché à l'utilisateur.
|
||||
|
||||
Attributes:
|
||||
niveau: Hiérarchie (info/attention/blocage)
|
||||
titre: Titre court de la notification (≤60 caractères)
|
||||
corps: Corps du message en français naturel
|
||||
duree_s: Durée d'affichage recommandée (secondes)
|
||||
persistent: Si True, l'utilisateur doit fermer manuellement
|
||||
"""
|
||||
|
||||
niveau: NiveauMessage
|
||||
titre: str
|
||||
corps: str
|
||||
duree_s: int
|
||||
persistent: bool = False
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Sérialiser le message (utile pour les tests et le logging)."""
|
||||
return {
|
||||
"niveau": self.niveau.value,
|
||||
"titre": self.titre,
|
||||
"corps": self.corps,
|
||||
"duree_s": self.duree_s,
|
||||
"persistent": self.persistent,
|
||||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Helpers d'extraction
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def _extraire_nom_application(titre_fenetre: str) -> str:
|
||||
"""Extraire le nom de l'application à partir d'un titre de fenêtre.
|
||||
|
||||
Les titres Windows suivent généralement le format :
|
||||
"Document.txt – Bloc-notes"
|
||||
"Ma Page - Google Chrome"
|
||||
"Sans titre — Paint"
|
||||
|
||||
On retourne la partie après le dernier séparateur, ou le titre entier.
|
||||
"""
|
||||
if not titre_fenetre:
|
||||
return ""
|
||||
titre = titre_fenetre.strip()
|
||||
# Chercher le dernier séparateur parmi " – ", " — ", " - "
|
||||
for sep in (" – ", " — ", " - "):
|
||||
if sep in titre:
|
||||
return titre.rsplit(sep, 1)[-1].strip()
|
||||
return titre
|
||||
|
||||
|
||||
def _nettoyer_description_cible(description: str) -> str:
|
||||
"""Nettoyer la description technique d'une cible pour l'afficher.
|
||||
|
||||
Supprime les caractères techniques (guillemets inutiles, ':').
|
||||
"""
|
||||
if not description:
|
||||
return ""
|
||||
desc = description.strip()
|
||||
# Retirer les guillemets encapsulants
|
||||
desc = desc.strip("'\"`")
|
||||
# Limiter la longueur sans perdre les details utiles a la supervision.
|
||||
if len(desc) > MAX_TARGET_DESCRIPTION_CHARS:
|
||||
desc = desc[: MAX_TARGET_DESCRIPTION_CHARS - 3] + "..."
|
||||
return desc
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Formattage des messages techniques → humains
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def formatter_cible_non_trouvee(
|
||||
description_cible: str,
|
||||
titre_fenetre: Optional[str] = None,
|
||||
domain_id: Optional[str] = None,
|
||||
params: Optional[Mapping[str, Any]] = None,
|
||||
) -> MessageUtilisateur:
|
||||
"""Message quand Léa ne trouve pas un élément à cliquer.
|
||||
|
||||
Si un domaine métier est fourni, la description de la cible est
|
||||
transformée en langage métier via le DomainContext :
|
||||
- tim_codage + "DP" → "diagnostic principal"
|
||||
- comptabilite + "TVA" → "montant de TVA"
|
||||
|
||||
Exemple avant :
|
||||
target_not_found: 'bonjour' dans *bonjour, – Bloc-notes
|
||||
Exemple après :
|
||||
Léa a besoin d'aide
|
||||
Je ne trouve pas "bonjour" dans le Bloc-notes. Peux-tu cliquer
|
||||
dessus toi-même ? Je reprends ensuite.
|
||||
|
||||
Args:
|
||||
description_cible: Description brute de la cible.
|
||||
titre_fenetre: Titre de la fenêtre active (pour extraire l'app).
|
||||
domain_id: Domaine métier pour enrichir la sortie (optionnel).
|
||||
params: Paramètres du workflow (nom_patient, num_facture...)
|
||||
utilisés par les templates de clarification métier.
|
||||
"""
|
||||
cible = _friendly_target(description_cible, domain_id) or "l'élément"
|
||||
app = _extraire_nom_application(titre_fenetre or "")
|
||||
|
||||
# Si un domaine et un template de clarification existent, préférer la
|
||||
# question métier (plus pertinente que le message générique).
|
||||
ctx = _get_domain_ctx(domain_id)
|
||||
if ctx is not None and ctx.clarification_templates:
|
||||
try:
|
||||
corps = ctx.pose_clarification_question(
|
||||
{
|
||||
"blocked_on": "target_not_found",
|
||||
"target": description_cible or "",
|
||||
"app": app,
|
||||
"params": dict(params or {}),
|
||||
}
|
||||
)
|
||||
except Exception:
|
||||
corps = ""
|
||||
if corps:
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.BLOCAGE,
|
||||
titre="Léa a besoin d'aide",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||
persistent=True,
|
||||
)
|
||||
|
||||
if app:
|
||||
corps = (
|
||||
f"Je ne trouve pas « {cible} » dans {app}. "
|
||||
f"Peux-tu cliquer dessus toi-même ? Je reprends ensuite."
|
||||
)
|
||||
else:
|
||||
corps = (
|
||||
f"Je ne trouve pas « {cible} » à l'écran. "
|
||||
f"Peux-tu le faire toi-même ? Je reprends ensuite."
|
||||
)
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.BLOCAGE,
|
||||
titre="Léa a besoin d'aide",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||
persistent=True,
|
||||
)
|
||||
|
||||
|
||||
def formatter_fenetre_incorrecte(
|
||||
titre_actuel: str,
|
||||
titre_attendu: str,
|
||||
) -> MessageUtilisateur:
|
||||
"""Message quand la fenêtre active n'est pas celle attendue.
|
||||
|
||||
Exemple avant :
|
||||
Fenêtre incorrecte: 'Program Manager' (attendu: 'Lea : Explorateur de fichiers')
|
||||
Exemple après :
|
||||
Léa attend une fenêtre
|
||||
J'attends « Explorateur de fichiers » mais c'est « Program Manager »
|
||||
qui est affiché. Peux-tu ouvrir la bonne fenêtre ?
|
||||
"""
|
||||
app_actuelle = _extraire_nom_application(titre_actuel) or "une autre fenêtre"
|
||||
app_attendue = _extraire_nom_application(titre_attendu) or titre_attendu
|
||||
|
||||
corps = (
|
||||
f"J'attends « {app_attendue} » mais c'est « {app_actuelle} » "
|
||||
f"qui est affiché. Peux-tu ouvrir la bonne fenêtre ?"
|
||||
)
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.BLOCAGE,
|
||||
titre="Léa attend une fenêtre",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||
persistent=True,
|
||||
)
|
||||
|
||||
|
||||
def formatter_ecran_inchange(action_type: str = "") -> MessageUtilisateur:
|
||||
"""Message quand l'action n'a pas eu d'effet visible.
|
||||
|
||||
Exemple avant :
|
||||
Ecran inchange apres l'action
|
||||
Exemple après :
|
||||
Léa vérifie
|
||||
Mon clic n'a pas eu l'air de marcher. Je vais réessayer ou te
|
||||
rendre la main si ça ne passe pas.
|
||||
"""
|
||||
actions_fr = {
|
||||
"click": "Mon clic",
|
||||
"type": "Ma saisie",
|
||||
"key_combo": "Mon raccourci clavier",
|
||||
"scroll": "Mon défilement",
|
||||
}
|
||||
quoi = actions_fr.get(action_type, "Mon action")
|
||||
|
||||
corps = (
|
||||
f"{quoi} n'a pas eu l'air de marcher. Je vais réessayer, "
|
||||
f"ou te rendre la main si ça ne passe pas."
|
||||
)
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa vérifie",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
|
||||
|
||||
def formatter_mode_apprentissage(
|
||||
raison: str = "",
|
||||
description_cible: str = "",
|
||||
titre_fenetre: Optional[str] = None,
|
||||
) -> MessageUtilisateur:
|
||||
"""Message quand Léa passe en mode apprentissage (pause supervisée).
|
||||
|
||||
L'utilisateur doit comprendre :
|
||||
1. Léa est bloquée et a besoin d'aide
|
||||
2. L'utilisateur doit prendre la main et montrer comment faire
|
||||
3. Ctrl+Shift+L pour signaler qu'il a fini
|
||||
|
||||
Le ton est humble, clair, actionnable. Pas technique.
|
||||
|
||||
Exemple :
|
||||
Léa a besoin d'aide
|
||||
Je n'y arrive pas, montrez-moi comment faire.
|
||||
Quand vous avez fini, appuyez sur Ctrl+Shift+L.
|
||||
"""
|
||||
cible = _nettoyer_description_cible(description_cible) if description_cible else ""
|
||||
app = _extraire_nom_application(titre_fenetre or "") if titre_fenetre else ""
|
||||
|
||||
# Construire un contexte court si disponible
|
||||
contexte = ""
|
||||
if cible and app:
|
||||
contexte = f" (« {cible} » dans {app})"
|
||||
elif cible:
|
||||
contexte = f" (« {cible} »)"
|
||||
|
||||
corps = (
|
||||
f"Je n'y arrive pas{contexte}, montrez-moi comment faire. "
|
||||
f"Quand vous avez fini, appuyez sur Ctrl+Shift+L."
|
||||
)
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.BLOCAGE,
|
||||
titre="Léa a besoin d'aide",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||
persistent=True,
|
||||
)
|
||||
|
||||
|
||||
def formatter_connexion_perdue(hote_serveur: str = "") -> MessageUtilisateur:
|
||||
"""Message quand la connexion avec le serveur est perdue.
|
||||
|
||||
Rassurant : on dit qu'on va réessayer automatiquement.
|
||||
"""
|
||||
corps = (
|
||||
"J'ai perdu le lien avec le serveur. Je retente automatiquement, "
|
||||
"pas besoin d'intervenir."
|
||||
)
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa est déconnectée",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
|
||||
|
||||
def formatter_connexion_retablie() -> MessageUtilisateur:
|
||||
"""Message quand la connexion serveur est rétablie."""
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.INFO,
|
||||
titre="Léa",
|
||||
corps="C'est bon, la connexion est revenue. Je continue.",
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.INFO],
|
||||
)
|
||||
|
||||
|
||||
def formatter_debut_workflow(nom_workflow: str, nb_etapes: int = 0) -> MessageUtilisateur:
|
||||
"""Message au démarrage d'un workflow de replay."""
|
||||
if nb_etapes > 0:
|
||||
corps = (
|
||||
f"Je démarre « {nom_workflow} » ({nb_etapes} étapes). "
|
||||
f"Je t'indique mon avancement."
|
||||
)
|
||||
else:
|
||||
corps = f"Je démarre « {nom_workflow} ». Je t'indique mon avancement."
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.INFO,
|
||||
titre="Léa démarre",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.INFO],
|
||||
)
|
||||
|
||||
|
||||
def formatter_etape_workflow(
|
||||
etape_actuelle: int,
|
||||
nb_etapes: int,
|
||||
description: str = "",
|
||||
) -> MessageUtilisateur:
|
||||
"""Message pour la progression d'une étape."""
|
||||
if description:
|
||||
desc = _nettoyer_description_cible(description)
|
||||
corps = f"Étape {etape_actuelle}/{nb_etapes} — {desc}"
|
||||
else:
|
||||
corps = f"Étape {etape_actuelle}/{nb_etapes}"
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.INFO,
|
||||
titre="Léa avance",
|
||||
corps=corps,
|
||||
duree_s=3,
|
||||
)
|
||||
|
||||
|
||||
def formatter_retry(action_type: str = "", tentative: int = 2) -> MessageUtilisateur:
|
||||
"""Message quand Léa retente une action."""
|
||||
corps = (
|
||||
f"Je retente (tentative {tentative}). Ça arrive parfois, "
|
||||
f"l'écran était peut-être en cours de chargement."
|
||||
)
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa retente",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
|
||||
|
||||
def formatter_ralentissement() -> MessageUtilisateur:
|
||||
"""Message quand Léa prend plus de temps que prévu."""
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa prend son temps",
|
||||
corps="Je vais plus lentement que prévu. L'écran met du temps à répondre.",
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
|
||||
|
||||
def formatter_fin_workflow(
|
||||
succes: bool,
|
||||
nom_workflow: str = "",
|
||||
nb_etapes: int = 0,
|
||||
duree_s: float = 0.0,
|
||||
domain_id: Optional[str] = None,
|
||||
items_count: int = 0,
|
||||
failed_count: int = 0,
|
||||
params: Optional[Mapping[str, Any]] = None,
|
||||
) -> MessageUtilisateur:
|
||||
"""Message à la fin d'un workflow.
|
||||
|
||||
Si un domaine métier est fourni (et qu'il expose des summary_templates),
|
||||
on utilise `DomainContext.describe_workflow_outcome` pour formuler un
|
||||
rapport en langage métier (ex: "J'ai codé 14 dossiers sur 15").
|
||||
|
||||
Args:
|
||||
succes: True si l'ensemble du workflow a réussi.
|
||||
nom_workflow: Nom du workflow.
|
||||
nb_etapes: Nombre d'étapes techniques (pour fallback générique).
|
||||
duree_s: Durée totale en secondes.
|
||||
domain_id: Domaine métier (optionnel).
|
||||
items_count: Nombre d'items métier traités (ex: 15 dossiers).
|
||||
failed_count: Nombre d'items en échec.
|
||||
params: Infos supplémentaires passées aux templates.
|
||||
"""
|
||||
ctx = _get_domain_ctx(domain_id)
|
||||
if ctx is not None and ctx.summary_templates:
|
||||
try:
|
||||
corps = ctx.describe_workflow_outcome(
|
||||
workflow_name=nom_workflow,
|
||||
success=succes,
|
||||
items_count=items_count or max(1, nb_etapes),
|
||||
failed_count=failed_count,
|
||||
elapsed_s=duree_s,
|
||||
extra=dict(params or {}),
|
||||
)
|
||||
except Exception:
|
||||
corps = ""
|
||||
if corps:
|
||||
if succes and failed_count == 0:
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.INFO,
|
||||
titre="Léa a terminé",
|
||||
corps=corps,
|
||||
duree_s=6,
|
||||
)
|
||||
if succes and failed_count > 0:
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa a terminé partiellement",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.BLOCAGE,
|
||||
titre="Léa s'arrête",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||
persistent=True,
|
||||
)
|
||||
|
||||
if succes:
|
||||
if nom_workflow and nb_etapes > 0:
|
||||
corps = (
|
||||
f"C'est fait ! « {nom_workflow} » est terminé "
|
||||
f"({nb_etapes} étapes en {int(duree_s)}s)."
|
||||
)
|
||||
else:
|
||||
corps = "C'est fait ! Tout s'est bien passé."
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.INFO,
|
||||
titre="Léa a terminé",
|
||||
corps=corps,
|
||||
duree_s=6,
|
||||
)
|
||||
else:
|
||||
corps = (
|
||||
"Je n'ai pas pu terminer. Je te rends la main, "
|
||||
"tu peux continuer à partir de là où je me suis arrêtée."
|
||||
)
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.BLOCAGE,
|
||||
titre="Léa s'arrête",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||
persistent=True,
|
||||
)
|
||||
|
||||
|
||||
def formatter_erreur_generique(
|
||||
message_technique: str,
|
||||
domain_id: Optional[str] = None,
|
||||
params: Optional[Mapping[str, Any]] = None,
|
||||
) -> MessageUtilisateur:
|
||||
"""Formater un message d'erreur technique non catégorisé.
|
||||
|
||||
On essaie de détecter les motifs connus dans le message technique pour
|
||||
le router vers le bon formatter spécialisé, sinon on emballe le message.
|
||||
Si `domain_id` est fourni, il est propagé aux formatters spécialisés
|
||||
pour produire un message en langage métier.
|
||||
"""
|
||||
if not message_technique:
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa",
|
||||
corps="J'ai rencontré un petit souci. Je continue.",
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
|
||||
msg_lower = message_technique.lower()
|
||||
|
||||
# target_not_found[:...]
|
||||
if "target_not_found" in msg_lower:
|
||||
# Essayer d'extraire la description après le ':'
|
||||
match = re.match(r"target_not_found[:\s]*(.*)", message_technique, re.IGNORECASE)
|
||||
desc = match.group(1).strip() if match else ""
|
||||
return formatter_cible_non_trouvee(desc, domain_id=domain_id, params=params)
|
||||
|
||||
# Fenêtre incorrecte: 'X' (attendu: 'Y')
|
||||
if "fenêtre incorrecte" in msg_lower or "fenetre incorrecte" in msg_lower:
|
||||
# Extraire actuel et attendu
|
||||
m_actuel = re.search(r"[:,]\s*['\"]([^'\"]+)['\"]", message_technique)
|
||||
m_attendu = re.search(r"attendu[:\s]*['\"]([^'\"]+)['\"]", message_technique)
|
||||
actuel = m_actuel.group(1) if m_actuel else ""
|
||||
attendu = m_attendu.group(1) if m_attendu else ""
|
||||
return formatter_fenetre_incorrecte(actuel, attendu)
|
||||
|
||||
# Ecran inchangé
|
||||
if "inchang" in msg_lower or "no_screen_change" in msg_lower:
|
||||
return formatter_ecran_inchange()
|
||||
|
||||
# Policy abort / supervise
|
||||
if "policy_abort" in msg_lower or "visual_resolve_failed" in msg_lower:
|
||||
return formatter_cible_non_trouvee(
|
||||
message_technique, domain_id=domain_id, params=params
|
||||
)
|
||||
|
||||
# Fallback : message technique tronqué
|
||||
msg_tronque = message_technique.strip()
|
||||
if len(msg_tronque) > MAX_GENERIC_TECHNICAL_MESSAGE_CHARS:
|
||||
msg_tronque = msg_tronque[: MAX_GENERIC_TECHNICAL_MESSAGE_CHARS - 3] + "..."
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa",
|
||||
corps=f"J'ai rencontré un souci : {msg_tronque}",
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Détection fenêtre Léa (utilisé par l'executor pour ignorer sa propre UI)
|
||||
# ============================================================================
|
||||
|
||||
|
||||
# Motifs qui identifient une fenêtre appartenant à Léa (l'agent lui-même).
|
||||
# On utilise des regex avec \b pour éviter les faux positifs sur des noms
|
||||
# contenant "lea" (ex: "cléa.txt", "leapfrog", "replay").
|
||||
_MOTIFS_FENETRE_LEA_REGEX = (
|
||||
r"\bléa\b",
|
||||
r"\blea\b(?!p)", # "lea" mot entier, pas "leapfrog"
|
||||
r"lea\s*[—–\-:]", # "Lea —", "Lea -", "Lea :"
|
||||
r"léa\s*[—–\-:]",
|
||||
r"\bassistante ia\b",
|
||||
r"\bléa ia\b",
|
||||
r"\blea ia\b",
|
||||
)
|
||||
|
||||
|
||||
def est_fenetre_lea(titre_fenetre: str) -> bool:
|
||||
"""Détecter si un titre de fenêtre appartient à l'agent Léa lui-même.
|
||||
|
||||
Utilisé pour éviter que Léa ne se considère comme une fenêtre intrusive
|
||||
dans ses propres pré-vérifications.
|
||||
|
||||
Utilise des regex avec des word boundaries pour éviter les faux positifs
|
||||
sur des noms de fichiers contenant "lea" (ex: "cléa.txt", "replay.log").
|
||||
"""
|
||||
if not titre_fenetre:
|
||||
return False
|
||||
titre_lower = titre_fenetre.lower().strip()
|
||||
return any(re.search(motif, titre_lower) for motif in _MOTIFS_FENETRE_LEA_REGEX)
|
||||
|
||||
|
||||
# Fenêtres parasites Windows à ignorer dans les pré-vérifications.
|
||||
# Ce ne sont pas des fenêtres applicatives — c'est du bruit système
|
||||
# qui prend le focus de manière imprévisible.
|
||||
_FENETRES_BRUIT_SYSTEME = (
|
||||
"fenêtre de dépassement de capacité",
|
||||
"overflow", # version anglaise systray
|
||||
"program manager",
|
||||
"barre des tâches",
|
||||
"task bar",
|
||||
"cortana",
|
||||
"action center",
|
||||
"centre de notifications",
|
||||
)
|
||||
|
||||
|
||||
def est_fenetre_bruit(titre_fenetre: str) -> bool:
|
||||
"""Détecter si un titre de fenêtre est du bruit système Windows.
|
||||
|
||||
Ces fenêtres prennent le focus de manière imprévisible (systray overflow,
|
||||
taskbar, Program Manager) et ne sont jamais la cible d'une action utilisateur.
|
||||
"""
|
||||
if not titre_fenetre:
|
||||
return True # pas de titre = bruit
|
||||
titre_lower = titre_fenetre.lower().strip()
|
||||
if titre_lower == "unknown_window":
|
||||
return True
|
||||
return any(p in titre_lower for p in _FENETRES_BRUIT_SYSTEME)
|
||||
|
||||
|
||||
# Conservé pour rétro-compatibilité avec le code qui listait MOTIFS_FENETRE_LEA
|
||||
MOTIFS_FENETRE_LEA = (
|
||||
"léa",
|
||||
"lea —",
|
||||
"léa —",
|
||||
"lea -",
|
||||
"léa -",
|
||||
"lea assistante",
|
||||
"léa assistante",
|
||||
"lea : ",
|
||||
"léa : ",
|
||||
"assistante ia",
|
||||
)
|
||||
@@ -5,6 +5,14 @@ Utilise plyer pour les notifications système, sans dépendance PyQt5.
|
||||
|
||||
Remplace les dialogues Qt par des toasts non-bloquants.
|
||||
Thread-safe avec rate limiting (1 notification / 2 secondes max).
|
||||
|
||||
Les messages utilisateur sont formatés via `agent_v1.ui.messages` qui convertit
|
||||
les codes techniques (target_not_found, etc.) en français naturel.
|
||||
|
||||
Hiérarchie des notifications (cf. messages.NiveauMessage) :
|
||||
- INFO : auto-dismiss en ~4s, rate-limité classique
|
||||
- ATTENTION : auto-dismiss en ~7s, rate-limité classique
|
||||
- BLOCAGE : persistant (15s+), bypass du rate limit
|
||||
"""
|
||||
|
||||
import logging
|
||||
@@ -12,6 +20,23 @@ import threading
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
from .messages import (
|
||||
MessageUtilisateur,
|
||||
NiveauMessage,
|
||||
formatter_cible_non_trouvee,
|
||||
formatter_connexion_perdue,
|
||||
formatter_connexion_retablie,
|
||||
formatter_debut_workflow,
|
||||
formatter_ecran_inchange,
|
||||
formatter_erreur_generique,
|
||||
formatter_etape_workflow,
|
||||
formatter_fenetre_incorrecte,
|
||||
formatter_fin_workflow,
|
||||
formatter_mode_apprentissage,
|
||||
formatter_ralentissement,
|
||||
formatter_retry,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Import conditionnel de plyer — fallback silencieux si absent
|
||||
@@ -59,7 +84,13 @@ class NotificationManager:
|
||||
# Méthode générique
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
def notify(self, title: str, message: str, timeout: int = 5) -> bool:
|
||||
def notify(
|
||||
self,
|
||||
title: str,
|
||||
message: str,
|
||||
timeout: int = 5,
|
||||
bypass_rate_limit: bool = False,
|
||||
) -> bool:
|
||||
"""
|
||||
Affiche une notification toast.
|
||||
|
||||
@@ -67,6 +98,8 @@ class NotificationManager:
|
||||
title: Titre de la notification.
|
||||
message: Corps du message.
|
||||
timeout: Durée d'affichage en secondes.
|
||||
bypass_rate_limit: Si True, ignore le rate limit (pour les blocages
|
||||
importants qui ne doivent pas être écrasés).
|
||||
|
||||
Returns:
|
||||
True si la notification a été envoyée, False sinon
|
||||
@@ -76,17 +109,21 @@ class NotificationManager:
|
||||
logger.debug("Notification ignorée (plyer absent) : %s", title)
|
||||
return False
|
||||
|
||||
with self._lock:
|
||||
now = time.monotonic()
|
||||
elapsed = now - self._last_notification_time
|
||||
if elapsed < RATE_LIMIT_SECONDS:
|
||||
logger.debug(
|
||||
"Notification ignorée (rate limit, %.1fs restantes) : %s",
|
||||
RATE_LIMIT_SECONDS - elapsed,
|
||||
title,
|
||||
)
|
||||
return False
|
||||
self._last_notification_time = now
|
||||
if not bypass_rate_limit:
|
||||
with self._lock:
|
||||
now = time.monotonic()
|
||||
elapsed = now - self._last_notification_time
|
||||
if elapsed < RATE_LIMIT_SECONDS:
|
||||
logger.debug(
|
||||
"Notification ignorée (rate limit, %.1fs restantes) : %s",
|
||||
RATE_LIMIT_SECONDS - elapsed,
|
||||
title,
|
||||
)
|
||||
return False
|
||||
self._last_notification_time = now
|
||||
else:
|
||||
with self._lock:
|
||||
self._last_notification_time = time.monotonic()
|
||||
|
||||
# Envoi dans un thread dédié pour ne jamais bloquer l'appelant
|
||||
thread = threading.Thread(
|
||||
@@ -97,6 +134,48 @@ class NotificationManager:
|
||||
thread.start()
|
||||
return True
|
||||
|
||||
def notify_message(self, msg: MessageUtilisateur) -> bool:
|
||||
"""Envoyer un MessageUtilisateur structuré (niveau, titre, corps).
|
||||
|
||||
Les messages BLOCAGE bypass le rate limit pour garantir que
|
||||
l'utilisateur voit qu'on a besoin de lui.
|
||||
|
||||
UX fix 8 mai 2026 (démo GHT) : la bulle ChatWindow Léa V1 (Tkinter
|
||||
topmost + bell + force-show) est désormais l'affichage canonique pour
|
||||
les BLOCAGE de pause supervisée. On NE déclenche PLUS show_paused_toast
|
||||
depuis ici — Dom rapportait 3 popups en parallèle (toast executor,
|
||||
toast bubble, toast notifications). Plyer reste actif comme
|
||||
notification système discrète. Le toast Tkinter custom est conservé
|
||||
pour les fallbacks sans ChatWindow (cf. executor.Plan B).
|
||||
"""
|
||||
bypass = msg.niveau == NiveauMessage.BLOCAGE
|
||||
# Log aussi pour tracer dans les logs fichiers
|
||||
self._log_message(msg)
|
||||
|
||||
return self.notify(
|
||||
title=msg.titre,
|
||||
message=msg.corps,
|
||||
timeout=msg.duree_s,
|
||||
bypass_rate_limit=bypass,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _log_message(msg: MessageUtilisateur) -> None:
|
||||
"""Logger un message utilisateur avec le niveau approprié.
|
||||
|
||||
Les logs agents sont plus lisibles quand on route info → INFO,
|
||||
attention → WARNING, blocage → ERROR, avec un préfixe [LEA].
|
||||
"""
|
||||
prefix = f"[LEA] {msg.titre}: {msg.corps}"
|
||||
if msg.niveau == NiveauMessage.INFO:
|
||||
logger.info(prefix)
|
||||
elif msg.niveau == NiveauMessage.ATTENTION:
|
||||
logger.warning(prefix)
|
||||
elif msg.niveau == NiveauMessage.BLOCAGE:
|
||||
logger.error(prefix)
|
||||
else:
|
||||
logger.info(prefix)
|
||||
|
||||
def _send(self, title: str, message: str, timeout: int) -> None:
|
||||
"""Envoi effectif de la notification (exécuté dans un thread dédié)."""
|
||||
try:
|
||||
@@ -180,40 +259,93 @@ class NotificationManager:
|
||||
timeout=3,
|
||||
)
|
||||
|
||||
def replay_finished(self, success: bool, workflow_name: str) -> bool:
|
||||
"""Notification de fin de replay (succès ou échec)."""
|
||||
if success:
|
||||
return self.notify(
|
||||
title=APP_NAME,
|
||||
message="C'est fait ! Tout s'est bien passé.",
|
||||
timeout=5,
|
||||
)
|
||||
else:
|
||||
return self.notify(
|
||||
title=APP_NAME,
|
||||
message="Hmm, j'ai eu un souci. Vous pouvez me remontrer ?",
|
||||
timeout=7,
|
||||
)
|
||||
def replay_target_not_found(
|
||||
self,
|
||||
target_description: str,
|
||||
window_title: Optional[str] = None,
|
||||
) -> bool:
|
||||
"""Notification quand un élément n'est pas trouvé pendant le replay.
|
||||
|
||||
def connection_changed(self, connected: bool, server_host: str) -> bool:
|
||||
Le replay est mis en pause et attend une intervention humaine.
|
||||
Utilise `messages.formatter_cible_non_trouvee` pour un message en
|
||||
français naturel.
|
||||
"""
|
||||
msg = formatter_cible_non_trouvee(target_description, window_title)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_wrong_window(self, current_title: str, expected_title: str) -> bool:
|
||||
"""Notification quand la fenêtre active n'est pas celle attendue."""
|
||||
msg = formatter_fenetre_incorrecte(current_title, expected_title)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_no_screen_change(self, action_type: str = "") -> bool:
|
||||
"""Notification quand une action n'a pas eu d'effet visible."""
|
||||
msg = formatter_ecran_inchange(action_type)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_learning_mode(
|
||||
self,
|
||||
raison: str = "",
|
||||
target_description: str = "",
|
||||
window_title: Optional[str] = None,
|
||||
) -> bool:
|
||||
"""Notification quand Léa passe en mode apprentissage.
|
||||
|
||||
Léa est bloquée et demande à l'utilisateur de montrer comment faire.
|
||||
Message humble et actionnable pour un utilisateur non technique.
|
||||
"""
|
||||
msg = formatter_mode_apprentissage(raison, target_description, window_title)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_retry(self, action_type: str = "", tentative: int = 2) -> bool:
|
||||
"""Notification quand Léa retente une action."""
|
||||
msg = formatter_retry(action_type, tentative)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_slow(self) -> bool:
|
||||
"""Notification quand Léa va plus lentement que prévu."""
|
||||
msg = formatter_ralentissement()
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_finished(
|
||||
self,
|
||||
success: bool,
|
||||
workflow_name: str,
|
||||
step_count: int = 0,
|
||||
duration_s: float = 0.0,
|
||||
) -> bool:
|
||||
"""Notification de fin de replay (succès ou échec)."""
|
||||
msg = formatter_fin_workflow(success, workflow_name, step_count, duration_s)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_workflow_started(self, workflow_name: str, step_count: int = 0) -> bool:
|
||||
"""Notification de début de workflow (remplace `replay_started`)."""
|
||||
msg = formatter_debut_workflow(workflow_name, step_count)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_step_progress(
|
||||
self,
|
||||
current: int,
|
||||
total: int,
|
||||
description: str = "",
|
||||
) -> bool:
|
||||
"""Notification de progression d'une étape (niveau INFO)."""
|
||||
msg = formatter_etape_workflow(current, total, description)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def connection_changed(self, connected: bool, server_host: str = "") -> bool:
|
||||
"""Notification de changement d'état de la connexion serveur."""
|
||||
if connected:
|
||||
return self.notify(
|
||||
title=APP_NAME,
|
||||
message="Connectée au serveur.",
|
||||
timeout=5,
|
||||
)
|
||||
msg = formatter_connexion_retablie()
|
||||
else:
|
||||
return self.notify(
|
||||
title=APP_NAME,
|
||||
message="J'ai perdu la connexion avec le serveur.",
|
||||
timeout=7,
|
||||
)
|
||||
msg = formatter_connexion_perdue(server_host)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def error(self, message: str) -> bool:
|
||||
"""Notification d'erreur."""
|
||||
return self.notify(
|
||||
title=APP_NAME,
|
||||
message=f"Oups, un problème : {message}",
|
||||
timeout=10,
|
||||
)
|
||||
"""Notification d'erreur générique.
|
||||
|
||||
Essaie d'abord de détecter un motif technique connu et de formater
|
||||
correctement, sinon fallback sur un message générique aidant.
|
||||
"""
|
||||
msg = formatter_erreur_generique(message)
|
||||
return self.notify_message(msg)
|
||||
|
||||
290
agent_v0/agent_v1/ui/paused_toast.py
Normal file
290
agent_v0/agent_v1/ui/paused_toast.py
Normal file
@@ -0,0 +1,290 @@
|
||||
# agent_v1/ui/paused_toast.py
|
||||
"""
|
||||
Toast Tkinter custom pour la pause supervisée (« Léa a besoin de votre aide »).
|
||||
|
||||
Démo GHT 8 mai 2026 — Fallback robuste 100 % autonome quand :
|
||||
- plyer.notification est silencieux sous Windows 11 (Focus Assist, balloon tips
|
||||
bloqués par la stratégie système),
|
||||
- la ChatWindow Léa V1 est `withdraw()`-cachée par défaut (Dom ne la voit pas),
|
||||
- aucune autre UI ne peut garantir que Dom verra physiquement le message.
|
||||
|
||||
Stratégie :
|
||||
- Toplevel topmost overrideredirect en haut à droite de l'écran principal,
|
||||
- fond bleu Léa, titre + message, auto-close après TOAST_DURATION_S,
|
||||
- thread-safe : peut être appelé depuis n'importe quel thread (le polling
|
||||
replay tourne dans un daemon thread, pas le thread principal),
|
||||
- aucune dépendance externe (juste tkinter stdlib),
|
||||
- rate limit interne pour éviter le flood (1 toast / 3s minimum).
|
||||
|
||||
Si un Tk root existe déjà dans le process (ChatWindow), on attache le Toplevel
|
||||
à ce root via `root.after(0, ...)` — c'est l'idiome thread-safe officiel de
|
||||
tkinter. Sinon on crée un Tk() dédié dans un daemon thread.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from typing import Any, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Couleurs cohérentes avec le thème Léa (cf. chat_window.py)
|
||||
TOAST_BG = "#2563EB" # Bleu Léa (HEADER_BG)
|
||||
TOAST_FG = "#FFFFFF"
|
||||
TOAST_TITLE_BG = "#1E40AF" # Bleu plus foncé pour le bandeau titre
|
||||
TOAST_BORDER = "#1E3A8A"
|
||||
|
||||
TOAST_WIDTH = 380
|
||||
TOAST_PAD_X = 18
|
||||
TOAST_PAD_Y = 14
|
||||
TOAST_DURATION_MS = 15000
|
||||
TOAST_RATE_LIMIT_S = 3.0
|
||||
|
||||
_lock = threading.Lock()
|
||||
_last_shown_at: float = 0.0
|
||||
_last_message: str = ""
|
||||
|
||||
|
||||
def _resolve_existing_root() -> Optional[Any]:
|
||||
"""Tente de récupérer le Tk root déjà créé par la ChatWindow.
|
||||
|
||||
On évite tk._default_root (deprecated) et on remonte plutôt via les
|
||||
threads existants : la ChatWindow garde une référence dans son instance
|
||||
mais n'expose rien de global. On se rabat donc sur la création d'un Tk
|
||||
indépendant si on n'a rien — c'est sûr, tkinter supporte plusieurs Tk()
|
||||
concurrents tant qu'ils sont chacun dans leur propre thread.
|
||||
"""
|
||||
try:
|
||||
import tkinter as tk
|
||||
# tk._default_root est interne mais c'est le moyen le plus simple
|
||||
# de partager un mainloop existant. Si ChatWindow tourne, ce sera
|
||||
# son root.
|
||||
root = getattr(tk, "_default_root", None)
|
||||
if root is not None:
|
||||
# Vérifier qu'il est encore vivant
|
||||
try:
|
||||
root.winfo_exists()
|
||||
return root
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _build_toast(parent: Any, title: str, message: str) -> Any:
|
||||
"""Construit le Toplevel toast (appelé dans le thread tkinter)."""
|
||||
import tkinter as tk
|
||||
|
||||
top = tk.Toplevel(parent)
|
||||
top.withdraw() # éviter le flash pendant la construction
|
||||
top.overrideredirect(True) # pas de barre de titre
|
||||
top.attributes("-topmost", True)
|
||||
try:
|
||||
# Petit boost de visibilité Windows : alpha légèrement transparent
|
||||
top.attributes("-alpha", 0.97)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Bordure visuelle (cadre extérieur foncé)
|
||||
outer = tk.Frame(top, bg=TOAST_BORDER, padx=2, pady=2)
|
||||
outer.pack(fill="both", expand=True)
|
||||
|
||||
# Bandeau titre
|
||||
title_frame = tk.Frame(outer, bg=TOAST_TITLE_BG)
|
||||
title_frame.pack(fill="x")
|
||||
tk.Label(
|
||||
title_frame,
|
||||
text=f" ⏸ {title}",
|
||||
bg=TOAST_TITLE_BG,
|
||||
fg=TOAST_FG,
|
||||
font=("Segoe UI", 12, "bold"),
|
||||
anchor="w",
|
||||
padx=10,
|
||||
pady=8,
|
||||
).pack(fill="x")
|
||||
|
||||
# Corps du message
|
||||
body_frame = tk.Frame(outer, bg=TOAST_BG)
|
||||
body_frame.pack(fill="both", expand=True)
|
||||
tk.Label(
|
||||
body_frame,
|
||||
text=message,
|
||||
bg=TOAST_BG,
|
||||
fg=TOAST_FG,
|
||||
font=("Segoe UI", 11),
|
||||
wraplength=TOAST_WIDTH - 40,
|
||||
justify="left",
|
||||
anchor="w",
|
||||
padx=TOAST_PAD_X,
|
||||
pady=TOAST_PAD_Y,
|
||||
).pack(fill="both", expand=True)
|
||||
|
||||
# Pied de page : "Cliquez pour fermer"
|
||||
footer = tk.Label(
|
||||
outer,
|
||||
text="Cliquez pour fermer",
|
||||
bg=TOAST_BG,
|
||||
fg="#BFDBFE",
|
||||
font=("Segoe UI", 9, "italic"),
|
||||
anchor="e",
|
||||
padx=10,
|
||||
pady=4,
|
||||
)
|
||||
footer.pack(fill="x", side="bottom")
|
||||
|
||||
# Position : haut-droite de l'écran principal
|
||||
top.update_idletasks()
|
||||
height = top.winfo_reqheight()
|
||||
screen_w = top.winfo_screenwidth()
|
||||
x = screen_w - TOAST_WIDTH - 16
|
||||
y = 16
|
||||
top.geometry(f"{TOAST_WIDTH}x{height}+{x}+{y}")
|
||||
|
||||
# Click anywhere to close
|
||||
def _close(_=None):
|
||||
try:
|
||||
top.destroy()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
top.bind("<Button-1>", _close)
|
||||
for child in (outer, title_frame, body_frame, footer):
|
||||
try:
|
||||
child.bind("<Button-1>", _close)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Afficher + boost focus brut pour passer devant Focus Assist
|
||||
top.deiconify()
|
||||
top.lift()
|
||||
try:
|
||||
top.focus_force()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Re-pin topmost après 100 ms (Windows désactive parfois -topmost
|
||||
# quand le focus est pris par une autre app)
|
||||
def _repin():
|
||||
try:
|
||||
top.attributes("-topmost", True)
|
||||
top.lift()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
top.after(100, _repin)
|
||||
top.after(500, _repin)
|
||||
top.after(2000, _repin)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Auto-close
|
||||
try:
|
||||
top.after(TOAST_DURATION_MS, _close)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return top
|
||||
|
||||
|
||||
def _show_in_dedicated_thread(title: str, message: str) -> None:
|
||||
"""Crée un Tk() indépendant dans un daemon thread.
|
||||
|
||||
Utilisé en fallback quand aucun Tk root n'existe. Le thread vit le
|
||||
temps du toast (~15s) puis se termine proprement.
|
||||
"""
|
||||
def _run():
|
||||
try:
|
||||
# DPI awareness (Windows haute résolution)
|
||||
try:
|
||||
import ctypes
|
||||
ctypes.windll.shcore.SetProcessDpiAwareness(1)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
import tkinter as tk
|
||||
|
||||
root = tk.Tk()
|
||||
root.withdraw()
|
||||
try:
|
||||
dpi = root.winfo_fpixels("1i")
|
||||
root.tk.call("tk", "scaling", dpi / 72.0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
top = _build_toast(root, title, message)
|
||||
|
||||
# Quitter mainloop quand le toast est détruit
|
||||
def _watch():
|
||||
try:
|
||||
if not top.winfo_exists():
|
||||
root.quit()
|
||||
return
|
||||
except Exception:
|
||||
root.quit()
|
||||
return
|
||||
root.after(200, _watch)
|
||||
|
||||
root.after(200, _watch)
|
||||
root.mainloop()
|
||||
try:
|
||||
root.destroy()
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
logger.debug("paused_toast dedicated thread failed", exc_info=True)
|
||||
|
||||
t = threading.Thread(target=_run, daemon=True, name="paused-toast-tk")
|
||||
t.start()
|
||||
|
||||
|
||||
def show_paused_toast(
|
||||
title: str = "Léa a besoin de votre aide",
|
||||
message: str = "",
|
||||
) -> bool:
|
||||
"""Affiche un toast paused topmost.
|
||||
|
||||
Thread-safe, rate-limité, sans dépendance externe. Retourne True si le
|
||||
toast a été déclenché, False s'il a été ignoré (rate limit ou erreur).
|
||||
"""
|
||||
global _last_shown_at, _last_message
|
||||
|
||||
if not message:
|
||||
message = "Action en attente de votre validation."
|
||||
|
||||
# Rate limit basique : éviter qu'un poll en boucle ouvre 50 toasts
|
||||
now = time.monotonic()
|
||||
with _lock:
|
||||
same_message = (message == _last_message)
|
||||
elapsed = now - _last_shown_at
|
||||
if same_message and elapsed < TOAST_RATE_LIMIT_S:
|
||||
logger.debug(
|
||||
"paused_toast rate-limited (%.1fs since last identical)", elapsed
|
||||
)
|
||||
return False
|
||||
_last_shown_at = now
|
||||
_last_message = message
|
||||
|
||||
# Tentative 1 : utiliser le Tk root existant (ChatWindow) via after()
|
||||
root = _resolve_existing_root()
|
||||
if root is not None:
|
||||
try:
|
||||
root.after(0, lambda: _build_toast(root, title, message))
|
||||
logger.info("paused_toast scheduled on existing Tk root")
|
||||
return True
|
||||
except Exception:
|
||||
logger.debug("paused_toast existing-root path failed", exc_info=True)
|
||||
|
||||
# Tentative 2 : créer un Tk() dans un daemon thread
|
||||
try:
|
||||
_show_in_dedicated_thread(title, message)
|
||||
logger.info("paused_toast scheduled in dedicated thread")
|
||||
return True
|
||||
except Exception:
|
||||
logger.error("paused_toast dedicated-thread path failed", exc_info=True)
|
||||
return False
|
||||
|
||||
|
||||
__all__ = ["show_paused_toast"]
|
||||
@@ -371,7 +371,13 @@ class SmartTrayV1:
|
||||
)
|
||||
if name and name.strip():
|
||||
name = name.strip()
|
||||
# Utiliser l'etat partage si disponible
|
||||
|
||||
# --- P1-LEA-SHADOW : d\u00e9clencher d'abord l'orchestrateur L\u00e9a Linux ---
|
||||
# On contacte agent-chat AVANT la capture locale. Si \u00e9chec,
|
||||
# bascule en mode d\u00e9grad\u00e9 (capture locale sans assistance).
|
||||
self._start_lea_orchestrator_session(name)
|
||||
|
||||
# --- Comportement historique pr\u00e9serv\u00e9 : capture locale ---
|
||||
if self._shared_state is not None:
|
||||
try:
|
||||
self._shared_state.start_recording(name)
|
||||
@@ -393,6 +399,55 @@ class SmartTrayV1:
|
||||
|
||||
threading.Thread(target=_dialog, daemon=True).start()
|
||||
|
||||
def _start_lea_orchestrator_session(self, session_name: str) -> None:
|
||||
"""Appelle POST /api/learn/start côté agent-chat Linux (P1-LEA-SHADOW).
|
||||
|
||||
Fail-safe : toute erreur (config absente, httpx manquant, timeout,
|
||||
5xx serveur...) bascule en mode dégradé sans bloquer la capture
|
||||
locale. L'utilisateur est informé via le NotificationManager.
|
||||
"""
|
||||
try:
|
||||
from ..config import AGENT_CHAT_URL, API_TOKEN, MACHINE_ID
|
||||
from ..network.lea_orchestrator_client import (
|
||||
LeaOrchestratorError,
|
||||
start_learning_session,
|
||||
)
|
||||
except Exception as exc: # pragma: no cover (import-time)
|
||||
logger.error("Impossible de charger le client orchestrateur Léa : %s", exc)
|
||||
self._notifier.notify(
|
||||
"Léa",
|
||||
"Serveur injoignable — apprentissage local uniquement.",
|
||||
)
|
||||
return
|
||||
|
||||
try:
|
||||
resp = start_learning_session(
|
||||
AGENT_CHAT_URL,
|
||||
machine_id=MACHINE_ID,
|
||||
session_name=session_name,
|
||||
api_token=API_TOKEN,
|
||||
trigger_source="tray_button",
|
||||
)
|
||||
except LeaOrchestratorError as exc:
|
||||
logger.error("Orchestrateur Léa injoignable : %s", exc)
|
||||
self._notifier.notify(
|
||||
"Léa",
|
||||
"Serveur injoignable — apprentissage local uniquement.",
|
||||
)
|
||||
return
|
||||
except Exception: # noqa: BLE001 — défensif
|
||||
logger.exception("Erreur inattendue orchestrateur Léa")
|
||||
self._notifier.notify(
|
||||
"Léa",
|
||||
"Erreur orchestrateur — apprentissage local uniquement.",
|
||||
)
|
||||
return
|
||||
|
||||
logger.info(
|
||||
"Session orchestrateur Léa OK : id=%s state=%s",
|
||||
resp.session_id, resp.state,
|
||||
)
|
||||
|
||||
def _on_stop_session(self, _icon=None, _item=None) -> None:
|
||||
"""Termine la session en cours et envoie les donnees."""
|
||||
count = self.actions_count
|
||||
@@ -504,6 +559,100 @@ class SmartTrayV1:
|
||||
|
||||
threading.Thread(target=_replay, daemon=True).start()
|
||||
|
||||
def _launch_replay_request(
|
||||
self,
|
||||
replay_request: Dict[str, Any],
|
||||
replay_name: str,
|
||||
) -> None:
|
||||
"""Lance un replay direct depuis un payload `replay_request` serveur."""
|
||||
endpoint = (replay_request or {}).get("endpoint", "")
|
||||
session_id = (replay_request or {}).get("session_id", "")
|
||||
machine_id = (replay_request or {}).get("machine_id") or self.machine_id
|
||||
|
||||
if endpoint != "/api/v1/traces/stream/replay-session" or not session_id:
|
||||
logger.warning("Replay request non supporté: %s", replay_request)
|
||||
self._notifier.notify(
|
||||
"Léa",
|
||||
"Je ne peux pas lancer ce test automatique pour le moment.",
|
||||
)
|
||||
return
|
||||
|
||||
def _replay():
|
||||
if self.server_client is None:
|
||||
return
|
||||
|
||||
with self._state_lock:
|
||||
self._replay_active = True
|
||||
self._update_icon()
|
||||
self._notifier.notify(
|
||||
"Léa",
|
||||
f"Le système d'intelligence artificielle exécute la "
|
||||
f"tâche '{replay_name}' sur votre écran.",
|
||||
)
|
||||
|
||||
try:
|
||||
import requests
|
||||
auth_headers = {}
|
||||
if self.server_client is not None:
|
||||
auth_headers = self.server_client._auth_headers()
|
||||
resp = requests.post(
|
||||
f"{self.server_client._stream_base}{endpoint}",
|
||||
params={
|
||||
"session_id": session_id,
|
||||
"machine_id": machine_id,
|
||||
},
|
||||
headers=auth_headers,
|
||||
timeout=30,
|
||||
allow_redirects=False,
|
||||
)
|
||||
if resp.ok:
|
||||
logger.info(
|
||||
"Replay direct démarré pour session %s (machine=%s)",
|
||||
session_id,
|
||||
machine_id,
|
||||
)
|
||||
else:
|
||||
self._notifier.notify(
|
||||
"Léa",
|
||||
"Hmm, le serveur a refusé le test immédiat.",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Erreur lancement replay direct : %s", e)
|
||||
self._notifier.notify(
|
||||
"Léa",
|
||||
f"Oups, un problème : {e}",
|
||||
)
|
||||
finally:
|
||||
with self._state_lock:
|
||||
self._replay_active = False
|
||||
self._update_icon()
|
||||
|
||||
threading.Thread(target=_replay, daemon=True).start()
|
||||
|
||||
def offer_finalize_replay(
|
||||
self,
|
||||
replay_request: Dict[str, Any],
|
||||
replay_name: str,
|
||||
) -> None:
|
||||
"""Proposer à l'utilisateur de tester immédiatement la tâche apprise."""
|
||||
if not replay_request or not replay_request.get("session_id"):
|
||||
return
|
||||
|
||||
def _offer():
|
||||
self._notifier.notify(
|
||||
"Léa",
|
||||
f"J'ai compris la tâche '{replay_name}'. Voulez-vous la tester ?",
|
||||
)
|
||||
if not _ask_consent(
|
||||
"Léa — Test immédiat",
|
||||
f"J'ai compris la tâche '{replay_name}'. "
|
||||
"Voulez-vous la tester maintenant ?",
|
||||
):
|
||||
return
|
||||
self._launch_replay_request(replay_request, replay_name)
|
||||
|
||||
threading.Thread(target=_offer, daemon=True).start()
|
||||
|
||||
def _on_emergency_stop(self, _icon=None, _item=None) -> None:
|
||||
"""Arret d'urgence — stoppe TOUTES les activites de l'agent immediatement.
|
||||
|
||||
|
||||
@@ -2,12 +2,20 @@
|
||||
"""
|
||||
Gestionnaire de vision avancé pour Agent V1.
|
||||
Optimisé pour le streaming fibre avec détection de changement.
|
||||
|
||||
Captures disponibles :
|
||||
- Plein écran (full) : contexte global 1920x1080+
|
||||
- Crop ciblé (crop) : 80x80 autour du clic (apprentissage VLM)
|
||||
- Fenêtre active (window) : image isolée de la fenêtre + métadonnées
|
||||
(titre, rect, coordonnées clic relatives) — cross-platform
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
import logging
|
||||
import hashlib
|
||||
import platform
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from PIL import Image, ImageFilter, ImageStat
|
||||
import mss
|
||||
from ..config import TARGETED_CROP_SIZE, SCREENSHOT_QUALITY, BLUR_SENSITIVE
|
||||
@@ -15,6 +23,400 @@ from .blur_sensitive import blur_sensitive_regions
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# OS courant (détecté une seule fois)
|
||||
_SYSTEM = platform.system()
|
||||
|
||||
# QW1 — détection multi-écrans (fallback gracieux si screeninfo absent)
|
||||
try:
|
||||
from screeninfo import get_monitors as _screeninfo_get_monitors
|
||||
_SCREENINFO_AVAILABLE = True
|
||||
except ImportError:
|
||||
_SCREENINFO_AVAILABLE = False
|
||||
|
||||
|
||||
def _get_monitors_geometry() -> List[Dict[str, Any]]:
|
||||
"""Retourne la liste des monitors physiques avec leurs offsets.
|
||||
|
||||
Returns:
|
||||
List[dict] : [{idx, x, y, w, h, primary}, ...]. Vide si screeninfo
|
||||
indisponible (le serveur tombera sur fallback composite).
|
||||
"""
|
||||
if not _SCREENINFO_AVAILABLE:
|
||||
return []
|
||||
try:
|
||||
monitors = _screeninfo_get_monitors()
|
||||
return [
|
||||
{
|
||||
"idx": i,
|
||||
"x": int(m.x),
|
||||
"y": int(m.y),
|
||||
"w": int(m.width),
|
||||
"h": int(m.height),
|
||||
"primary": bool(getattr(m, "is_primary", False)),
|
||||
}
|
||||
for i, m in enumerate(monitors)
|
||||
]
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
def _get_active_monitor_index() -> Optional[int]:
|
||||
"""Retourne l'index logique du monitor où se trouve le curseur (focus actif).
|
||||
|
||||
Returns:
|
||||
int ou None si indéterminable.
|
||||
"""
|
||||
if not _SCREENINFO_AVAILABLE:
|
||||
return None
|
||||
try:
|
||||
import pyautogui # import paresseux : évite la dépendance dure
|
||||
cx, cy = pyautogui.position()
|
||||
for i, m in enumerate(_screeninfo_get_monitors()):
|
||||
if m.x <= cx < m.x + m.width and m.y <= cy < m.y + m.height:
|
||||
return i
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def _enrich_with_monitor_info(payload: dict) -> dict:
|
||||
"""Ajoute monitor_index et monitors_geometry au payload (in-place + return)."""
|
||||
if isinstance(payload, dict):
|
||||
payload["monitor_index"] = _get_active_monitor_index()
|
||||
payload["monitors_geometry"] = _get_monitors_geometry()
|
||||
return payload
|
||||
|
||||
|
||||
# Garde dimensions monitor (démo GHT 19 mai 2026) : mss.monitors[1] peut
|
||||
# retourner intermittemment des dims tronquées (cas observé 2560×60). Utiliser
|
||||
# ces dims pour normaliser des coords empoisonne la mémoire (TargetMemoryStore).
|
||||
MIN_MONITOR_WIDTH = 200
|
||||
MIN_MONITOR_HEIGHT = 200
|
||||
MONITOR_MAX_ATTEMPTS = 2
|
||||
MONITOR_RETRY_DELAY_S = 0.05
|
||||
BLACK_FRAME_MEAN_MAX = 1.0
|
||||
BLACK_FRAME_STDDEV_MAX = 1.0
|
||||
BLACK_FRAME_MAX_LUMA = 3
|
||||
|
||||
|
||||
def _is_monitor_sane(monitor) -> bool:
|
||||
"""True si les dims du monitor sont au-dessus du seuil de plausibilité."""
|
||||
if not isinstance(monitor, dict):
|
||||
return False
|
||||
w = monitor.get("width", 0) or 0
|
||||
h = monitor.get("height", 0) or 0
|
||||
return w >= MIN_MONITOR_WIDTH and h >= MIN_MONITOR_HEIGHT
|
||||
|
||||
|
||||
def _dim_str(monitor) -> str:
|
||||
"""Représentation courte WxH pour les logs (gère monitor=None)."""
|
||||
if not isinstance(monitor, dict):
|
||||
return "?x?"
|
||||
return f"{monitor.get('width', '?')}x{monitor.get('height', '?')}"
|
||||
|
||||
|
||||
def _acquire_safe_grab(max_attempts: int = MONITOR_MAX_ATTEMPTS,
|
||||
retry_delay_s: float = MONITOR_RETRY_DELAY_S,
|
||||
allow_secondary_fallback: bool = True):
|
||||
"""Ouvre mss et capture un monitor avec dimensions plausibles.
|
||||
|
||||
Stratégie en cascade :
|
||||
1. À chaque tentative, ouvrir un nouveau `mss.mss()` (peut rafraîchir le
|
||||
cache interne) et examiner monitors[1..n].
|
||||
2. Préférer monitors[1] (écran principal physique). Si aberrant ET
|
||||
`allow_secondary_fallback=True`, prendre le premier monitors[2..n]
|
||||
sain avec un WARNING explicite.
|
||||
3. Si `allow_secondary_fallback=False`, on n'accepte QUE monitors[1].
|
||||
Utile pour les méthodes qui reçoivent des coordonnées (x, y) en
|
||||
système écran composite : capturer un monitor secondaire produirait
|
||||
une image saine mais décalée par rapport à ces coords.
|
||||
4. Si aucune dim plausible : attendre `retry_delay_s` et retenter.
|
||||
5. Après `max_attempts` infructueuses : log ERROR et retourner
|
||||
(None, None) pour que l'appelant tombe en sortie d'erreur explicite.
|
||||
|
||||
Args:
|
||||
max_attempts: nombre de tentatives mss avant abandon.
|
||||
retry_delay_s: délai entre tentatives.
|
||||
allow_secondary_fallback: si False, refuser monitors[2..n] (fail-closed
|
||||
pour les méthodes coord-bearing).
|
||||
|
||||
Returns:
|
||||
Tuple (monitor_dict, PIL.Image) si capture saine réussie,
|
||||
(None, None) sinon.
|
||||
"""
|
||||
last_aberrant = None
|
||||
secondary_seen = False # un monitor secondaire sain a été vu mais refusé
|
||||
for attempt in range(max_attempts):
|
||||
with mss.mss() as sct:
|
||||
monitors = list(sct.monitors) if sct.monitors else []
|
||||
chosen = None
|
||||
chosen_idx = None
|
||||
for idx in range(1, len(monitors)):
|
||||
candidate = monitors[idx]
|
||||
if not _is_monitor_sane(candidate):
|
||||
last_aberrant = candidate
|
||||
logger.warning(
|
||||
"Monitor[%d] dims aberrantes (%s, seuil %dx%d) "
|
||||
"— attempt %d/%d",
|
||||
idx, _dim_str(candidate),
|
||||
MIN_MONITOR_WIDTH, MIN_MONITOR_HEIGHT,
|
||||
attempt + 1, max_attempts,
|
||||
)
|
||||
continue
|
||||
# Monitor sain trouvé
|
||||
if idx == 1 or allow_secondary_fallback:
|
||||
chosen = candidate
|
||||
chosen_idx = idx
|
||||
break
|
||||
# Sinon : sain mais secondaire interdit pour cet appelant
|
||||
secondary_seen = True
|
||||
logger.warning(
|
||||
"Monitor[%d] sain (%s) mais fallback secondaire refusé "
|
||||
"(allow_secondary_fallback=False) — capture cohérente "
|
||||
"des coords impossible",
|
||||
idx, _dim_str(candidate),
|
||||
)
|
||||
if chosen is not None:
|
||||
if chosen_idx != 1 or attempt > 0:
|
||||
logger.warning(
|
||||
"Capture fallback : monitor[%d] dim=%s, attempt=%d",
|
||||
chosen_idx, _dim_str(chosen), attempt + 1,
|
||||
)
|
||||
sct_img = sct.grab(chosen)
|
||||
img = Image.frombytes(
|
||||
"RGB", sct_img.size, sct_img.bgra, "raw", "BGRX",
|
||||
)
|
||||
return chosen, img
|
||||
if attempt < max_attempts - 1:
|
||||
time.sleep(retry_delay_s)
|
||||
if secondary_seen and not allow_secondary_fallback:
|
||||
logger.error(
|
||||
"Capture abandonnée : monitor[1] aberrant après %d tentatives "
|
||||
"(dernier vu %s) et fallback secondaire désactivé "
|
||||
"pour préserver la cohérence des coordonnées",
|
||||
max_attempts, _dim_str(last_aberrant),
|
||||
)
|
||||
else:
|
||||
logger.error(
|
||||
"Aucun monitor avec dims plausibles trouvé après %d tentatives "
|
||||
"(dernier vu : %s, seuil %dx%d) — capture abandonnée",
|
||||
max_attempts, _dim_str(last_aberrant),
|
||||
MIN_MONITOR_WIDTH, MIN_MONITOR_HEIGHT,
|
||||
)
|
||||
return None, None
|
||||
|
||||
|
||||
def _compute_luma_stats(img: Image.Image) -> Dict[str, float | int]:
|
||||
"""Retourne des stats simples de luminance pour diagnostiquer un frame noir."""
|
||||
gray = img.convert("L")
|
||||
stat = ImageStat.Stat(gray)
|
||||
min_luma, max_luma = gray.getextrema()
|
||||
return {
|
||||
"mean": round(float(stat.mean[0]) if stat.mean else 0.0, 2),
|
||||
"stddev": round(float(stat.stddev[0]) if stat.stddev else 0.0, 2),
|
||||
"min": int(min_luma),
|
||||
"max": int(max_luma),
|
||||
}
|
||||
|
||||
|
||||
def _is_effectively_black(img: Image.Image) -> bool:
|
||||
"""Heuristique fail-closed pour refuser un screenshot pratiquement noir."""
|
||||
stats = _compute_luma_stats(img)
|
||||
return (
|
||||
stats["max"] <= BLACK_FRAME_MAX_LUMA
|
||||
and stats["mean"] <= BLACK_FRAME_MEAN_MAX
|
||||
and stats["stddev"] <= BLACK_FRAME_STDDEV_MAX
|
||||
)
|
||||
|
||||
|
||||
def _capture_via_imagegrab() -> Tuple[Optional[Dict[str, int]], Optional[Image.Image], Dict[str, Any]]:
|
||||
"""Fallback Windows via Pillow/ImageGrab.
|
||||
|
||||
Utile quand `mss` retourne un frame noir alors que la session graphique
|
||||
utilisateur reste visible.
|
||||
"""
|
||||
if _SYSTEM != "Windows":
|
||||
return None, None, {"backend": "imagegrab", "error": "unsupported_platform"}
|
||||
|
||||
try:
|
||||
from PIL import ImageGrab
|
||||
except ImportError as exc:
|
||||
return None, None, {"backend": "imagegrab", "error": str(exc)}
|
||||
|
||||
try:
|
||||
img = ImageGrab.grab(all_screens=True)
|
||||
except Exception as exc:
|
||||
logger.warning("ImageGrab indisponible pour le fallback capture : %s", exc)
|
||||
return None, None, {"backend": "imagegrab", "error": str(exc)}
|
||||
|
||||
monitor = {"left": 0, "top": 0, "width": img.width, "height": img.height}
|
||||
return monitor, img, {
|
||||
"backend": "imagegrab",
|
||||
"luma": _compute_luma_stats(img),
|
||||
}
|
||||
|
||||
|
||||
def capture_screen_image(
|
||||
allow_secondary_fallback: bool = True,
|
||||
) -> Tuple[Optional[Dict[str, int]], Optional[Image.Image], Dict[str, Any]]:
|
||||
"""Capture plein écran avec diagnostic noir + fallback Windows.
|
||||
|
||||
Returns:
|
||||
(monitor, image, meta) où image peut être None si aucun backend plein
|
||||
écran n'a produit une image exploitable.
|
||||
"""
|
||||
monitor, img = _acquire_safe_grab(
|
||||
allow_secondary_fallback=allow_secondary_fallback
|
||||
)
|
||||
meta: Dict[str, Any] = {"backend": "mss"}
|
||||
|
||||
if img is not None:
|
||||
meta["luma"] = _compute_luma_stats(img)
|
||||
if not _is_effectively_black(img):
|
||||
return monitor, img, meta
|
||||
logger.warning(
|
||||
"Capture mss quasi noire (%s) — tentative de fallback",
|
||||
meta["luma"],
|
||||
)
|
||||
meta["mss_black_frame"] = True
|
||||
else:
|
||||
meta["mss_unavailable"] = True
|
||||
|
||||
fallback_monitor, fallback_img, fallback_meta = _capture_via_imagegrab()
|
||||
if fallback_img is not None:
|
||||
if not _is_effectively_black(fallback_img):
|
||||
logger.warning(
|
||||
"Capture fallback via ImageGrab (%sx%s)",
|
||||
fallback_img.width,
|
||||
fallback_img.height,
|
||||
)
|
||||
return fallback_monitor, fallback_img, fallback_meta
|
||||
logger.warning(
|
||||
"Capture ImageGrab quasi noire (%s)",
|
||||
fallback_meta.get("luma"),
|
||||
)
|
||||
meta["imagegrab_black_frame"] = True
|
||||
|
||||
meta["imagegrab_error"] = fallback_meta.get("error")
|
||||
return None, None, meta
|
||||
|
||||
|
||||
def _capture_window_image_windows(
|
||||
hwnd: int,
|
||||
width: int,
|
||||
height: int,
|
||||
) -> Tuple[Optional[Image.Image], Dict[str, Any]]:
|
||||
"""Capture une fenêtre Windows via PrintWindow.
|
||||
|
||||
Fallback utile quand la capture plein écran est noire mais que la fenêtre
|
||||
active reste imprimable par l'API Win32.
|
||||
"""
|
||||
if _SYSTEM != "Windows":
|
||||
return None, {"backend": "printwindow", "error": "unsupported_platform"}
|
||||
|
||||
try:
|
||||
import ctypes
|
||||
import win32gui
|
||||
import win32ui
|
||||
except ImportError as exc:
|
||||
return None, {"backend": "printwindow", "error": str(exc)}
|
||||
|
||||
last_error = None
|
||||
for flag in (3, 2, 0):
|
||||
wnd_dc = None
|
||||
src_dc = None
|
||||
mem_dc = None
|
||||
bmp = None
|
||||
try:
|
||||
wnd_dc = win32gui.GetWindowDC(hwnd)
|
||||
if not wnd_dc:
|
||||
raise RuntimeError("GetWindowDC a retourné 0")
|
||||
src_dc = win32ui.CreateDCFromHandle(wnd_dc)
|
||||
mem_dc = src_dc.CreateCompatibleDC()
|
||||
bmp = win32ui.CreateBitmap()
|
||||
bmp.CreateCompatibleBitmap(src_dc, width, height)
|
||||
mem_dc.SelectObject(bmp)
|
||||
result = ctypes.windll.user32.PrintWindow(
|
||||
hwnd, mem_dc.GetSafeHdc(), flag
|
||||
)
|
||||
bits = bmp.GetBitmapBits(True)
|
||||
img = Image.frombuffer(
|
||||
"RGB", (width, height), bits, "raw", "BGRX", 0, 1
|
||||
)
|
||||
luma = _compute_luma_stats(img)
|
||||
if result or not _is_effectively_black(img):
|
||||
return img, {
|
||||
"backend": f"printwindow:{flag}",
|
||||
"printwindow_result": int(result),
|
||||
"luma": luma,
|
||||
}
|
||||
except Exception as exc:
|
||||
last_error = str(exc)
|
||||
finally:
|
||||
try:
|
||||
if bmp is not None:
|
||||
win32gui.DeleteObject(bmp.GetHandle())
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if mem_dc is not None:
|
||||
mem_dc.DeleteDC()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if src_dc is not None:
|
||||
src_dc.DeleteDC()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if wnd_dc is not None:
|
||||
win32gui.ReleaseDC(hwnd, wnd_dc)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None, {
|
||||
"backend": "printwindow",
|
||||
"error": last_error or "no_usable_frame",
|
||||
}
|
||||
|
||||
|
||||
def capture_foreground_window_image() -> Tuple[Optional[Image.Image], Dict[str, Any]]:
|
||||
"""Capture la fenêtre au focus via API native si disponible."""
|
||||
try:
|
||||
from ..window_info_crossplatform import get_active_window_rect
|
||||
|
||||
rect_info = get_active_window_rect()
|
||||
except Exception as exc:
|
||||
return None, {"backend": "printwindow", "error": str(exc)}
|
||||
|
||||
if not rect_info:
|
||||
return None, {"backend": "printwindow", "error": "active_window_unavailable"}
|
||||
|
||||
win_w, win_h = rect_info.get("size", [0, 0])
|
||||
hwnd = rect_info.get("hwnd")
|
||||
if not hwnd or win_w <= 0 or win_h <= 0:
|
||||
return None, {
|
||||
"backend": "printwindow",
|
||||
"error": "active_window_handle_unavailable",
|
||||
"title": rect_info.get("title", "unknown_window"),
|
||||
}
|
||||
|
||||
img, meta = _capture_window_image_windows(hwnd, win_w, win_h)
|
||||
if img is None:
|
||||
return None, meta
|
||||
|
||||
meta.update(
|
||||
{
|
||||
"title": rect_info.get("title", "unknown_window"),
|
||||
"app_name": rect_info.get("app_name", "unknown_app"),
|
||||
"rect": rect_info.get("rect"),
|
||||
"window_size": rect_info.get("size"),
|
||||
"hwnd": hwnd,
|
||||
}
|
||||
)
|
||||
return img, meta
|
||||
|
||||
|
||||
class VisionCapturer:
|
||||
def __init__(self, session_dir: str):
|
||||
self.session_dir = session_dir
|
||||
@@ -27,66 +429,257 @@ class VisionCapturer:
|
||||
"""
|
||||
Capture l'écran complet.
|
||||
Si force=False, vérifie d'abord si l'écran a changé.
|
||||
|
||||
Enrichit les métadonnées avec le titre de la fenêtre active
|
||||
(utile pour le contextualisation des heartbeats côté serveur).
|
||||
"""
|
||||
try:
|
||||
with mss.mss() as sct:
|
||||
monitor = sct.monitors[1]
|
||||
sct_img = sct.grab(monitor)
|
||||
img = Image.frombytes("RGB", sct_img.size, sct_img.bgra, "raw", "BGRX")
|
||||
|
||||
# Détection de changement (pour Heartbeat)
|
||||
if not force:
|
||||
current_hash = self._compute_quick_hash(img)
|
||||
if current_hash == self.last_img_hash:
|
||||
return "" # Pas de changement, on économise la fibre
|
||||
self.last_img_hash = current_hash
|
||||
_monitor, img, meta = capture_screen_image()
|
||||
if img is None:
|
||||
img, win_meta = capture_foreground_window_image()
|
||||
if img is None:
|
||||
logger.error(
|
||||
"Capture plein contexte indisponible (meta=%s, window=%s)",
|
||||
meta,
|
||||
win_meta,
|
||||
)
|
||||
return ""
|
||||
logger.warning(
|
||||
"Capture plein contexte dégradée via fenêtre active (%s)",
|
||||
win_meta.get("backend"),
|
||||
)
|
||||
|
||||
# Floutage des données sensibles (conformité AI Act)
|
||||
if BLUR_SENSITIVE:
|
||||
blur_sensitive_regions(img)
|
||||
# Détection de changement (pour Heartbeat)
|
||||
if not force:
|
||||
current_hash = self._compute_quick_hash(img)
|
||||
if current_hash == self.last_img_hash:
|
||||
return "" # Pas de changement, on économise la fibre
|
||||
self.last_img_hash = current_hash
|
||||
|
||||
path = os.path.join(self.shots_dir, f"context_{int(time.time())}_{name_suffix}.png")
|
||||
img.save(path, "PNG", quality=SCREENSHOT_QUALITY)
|
||||
return path
|
||||
# Floutage des données sensibles (conformité AI Act)
|
||||
if BLUR_SENSITIVE:
|
||||
blur_sensitive_regions(img)
|
||||
|
||||
path = os.path.join(self.shots_dir, f"context_{int(time.time())}_{name_suffix}.png")
|
||||
img.save(path, "PNG", quality=SCREENSHOT_QUALITY)
|
||||
return path
|
||||
except Exception as e:
|
||||
logger.error(f"Erreur Context Capture: {e}")
|
||||
return ""
|
||||
|
||||
def capture_dual(self, x: int, y: int, screenshot_id: str, anonymize=False) -> dict:
|
||||
"""Capture duale (Full + Crop) systématique (forcée car liée à une action)."""
|
||||
def get_active_window_title(self) -> str:
|
||||
"""Retourne le titre de la fenêtre active (pour enrichir les heartbeats).
|
||||
|
||||
Fallback gracieux : retourne une chaîne vide si indisponible.
|
||||
"""
|
||||
try:
|
||||
with mss.mss() as sct:
|
||||
full_path = os.path.join(self.shots_dir, f"{screenshot_id}_full.png")
|
||||
monitor = sct.monitors[1]
|
||||
sct_img = sct.grab(monitor)
|
||||
img = Image.frombytes("RGB", sct_img.size, sct_img.bgra, "raw", "BGRX")
|
||||
from ..window_info_crossplatform import get_active_window_info
|
||||
info = get_active_window_info()
|
||||
return info.get("title", "")
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
# Capture du Crop (Cœur de l'apprentissage qwen3-vl)
|
||||
crop_path = os.path.join(self.shots_dir, f"{screenshot_id}_crop.png")
|
||||
w, h = TARGETED_CROP_SIZE
|
||||
left = max(0, x - w // 2)
|
||||
top = max(0, y - h // 2)
|
||||
crop_img = img.crop((left, top, left + w, top + h))
|
||||
|
||||
if anonymize:
|
||||
crop_img = crop_img.filter(ImageFilter.GaussianBlur(radius=4))
|
||||
def capture_dual(self, x: int, y: int, screenshot_id: str, anonymize=False) -> dict:
|
||||
"""Capture triple (Full + Crop + Fenêtre active) systématique.
|
||||
|
||||
# Floutage des données sensibles (conformité AI Act)
|
||||
if BLUR_SENSITIVE:
|
||||
blur_sensitive_regions(img)
|
||||
blur_sensitive_regions(crop_img)
|
||||
La fenêtre active est un AJOUT — en cas d'échec, le full + crop
|
||||
sont toujours retournés (fallback gracieux).
|
||||
"""
|
||||
try:
|
||||
# Coords (x, y) sont en système écran composite ; cropper depuis
|
||||
# un monitor secondaire (offset ≠ 0) produirait une image saine
|
||||
# mais décalée → fail-closed sur fallback secondaire.
|
||||
_monitor, img, meta = capture_screen_image(
|
||||
allow_secondary_fallback=False
|
||||
)
|
||||
if img is None:
|
||||
window_info = self.capture_active_window(
|
||||
x, y, screenshot_id, full_img=None
|
||||
)
|
||||
if window_info:
|
||||
result = {"window_capture": window_info}
|
||||
_enrich_with_monitor_info(result)
|
||||
logger.warning(
|
||||
"capture_dual dégradée: fenêtre active seule (%s)",
|
||||
meta,
|
||||
)
|
||||
return result
|
||||
return {}
|
||||
|
||||
img.save(full_path, "PNG", quality=SCREENSHOT_QUALITY)
|
||||
crop_img.save(crop_path, "PNG", quality=SCREENSHOT_QUALITY)
|
||||
full_path = os.path.join(self.shots_dir, f"{screenshot_id}_full.png")
|
||||
|
||||
# Mise à jour du hash pour le prochain heartbeat
|
||||
self.last_img_hash = self._compute_quick_hash(img)
|
||||
# Capture du Crop (Cœur de l'apprentissage qwen3-vl)
|
||||
crop_path = os.path.join(self.shots_dir, f"{screenshot_id}_crop.png")
|
||||
w, h = TARGETED_CROP_SIZE
|
||||
left = max(0, x - w // 2)
|
||||
top = max(0, y - h // 2)
|
||||
crop_img = img.crop((left, top, left + w, top + h))
|
||||
|
||||
return {"full": full_path, "crop": crop_path}
|
||||
if anonymize:
|
||||
crop_img = crop_img.filter(ImageFilter.GaussianBlur(radius=4))
|
||||
|
||||
# Floutage des données sensibles (conformité AI Act)
|
||||
if BLUR_SENSITIVE:
|
||||
blur_sensitive_regions(img)
|
||||
blur_sensitive_regions(crop_img)
|
||||
|
||||
img.save(full_path, "PNG", quality=SCREENSHOT_QUALITY)
|
||||
crop_img.save(crop_path, "PNG", quality=SCREENSHOT_QUALITY)
|
||||
|
||||
# Mise à jour du hash pour le prochain heartbeat
|
||||
self.last_img_hash = self._compute_quick_hash(img)
|
||||
|
||||
result = {"full": full_path, "crop": crop_path}
|
||||
|
||||
# --- Capture de la fenêtre active ---
|
||||
# Ajout non-bloquant : enrichit le résultat avec l'image
|
||||
# de la fenêtre seule + métadonnées (titre, rect, clic relatif)
|
||||
window_info = self.capture_active_window(x, y, screenshot_id, full_img=img)
|
||||
if window_info:
|
||||
result["window_capture"] = window_info
|
||||
|
||||
# QW1 — enrichissement multi-écrans (additif, fallback gracieux)
|
||||
_enrich_with_monitor_info(result)
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"Erreur Dual Capture: {e}")
|
||||
return {}
|
||||
|
||||
def capture_active_window(
|
||||
self,
|
||||
x: int,
|
||||
y: int,
|
||||
screenshot_id: str,
|
||||
full_img: Optional[Image.Image] = None,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Capture l'image de la fenêtre active seule + métadonnées.
|
||||
|
||||
Stratégie :
|
||||
1. Obtenir le rectangle de la fenêtre via l'API OS (pywin32 / xdotool / Quartz)
|
||||
2. Cropper depuis le screenshot plein écran (plus fiable que PrintWindow)
|
||||
3. Calculer les coordonnées du clic relatives à la fenêtre
|
||||
|
||||
Args:
|
||||
x, y: coordonnées du clic en pixels écran
|
||||
screenshot_id: identifiant pour le nom de fichier
|
||||
full_img: screenshot plein écran déjà capturé (optionnel, évite une
|
||||
double capture si appelé depuis capture_dual)
|
||||
|
||||
Returns:
|
||||
Dict avec window_image, window_title, window_rect, click_in_window,
|
||||
window_size — ou None si la fenêtre est introuvable.
|
||||
"""
|
||||
try:
|
||||
from ..window_info_crossplatform import get_active_window_rect
|
||||
|
||||
rect_info = get_active_window_rect()
|
||||
if not rect_info:
|
||||
logger.debug("Fenêtre active introuvable — skip capture fenêtre")
|
||||
return None
|
||||
|
||||
win_rect = rect_info["rect"] # [left, top, right, bottom]
|
||||
win_left, win_top, win_right, win_bottom = win_rect
|
||||
win_w, win_h = rect_info["size"] # [width, height]
|
||||
title = rect_info.get("title", "unknown_window")
|
||||
app_name = rect_info.get("app_name", "unknown_app")
|
||||
|
||||
# Ignorer les fenêtres trop petites (barres de tâches, popups système)
|
||||
if win_w < 50 or win_h < 50:
|
||||
logger.debug(f"Fenêtre trop petite ({win_w}x{win_h}) — skip")
|
||||
return None
|
||||
|
||||
# Coordonnées du clic relatives à la fenêtre
|
||||
click_rel_x = x - win_left
|
||||
click_rel_y = y - win_top
|
||||
|
||||
# Si le clic est en dehors de la fenêtre, on le signale mais on continue
|
||||
click_inside = (0 <= click_rel_x <= win_w and 0 <= click_rel_y <= win_h)
|
||||
|
||||
window_img = None
|
||||
|
||||
# --- Crop de la fenêtre depuis le plein écran ---
|
||||
if full_img is None:
|
||||
# Pas de screenshot fourni — en capturer un (cas standalone).
|
||||
# win_rect est en coords globales ; cropper depuis un monitor
|
||||
# secondaire produirait une image décalée → fail-closed sur
|
||||
# fallback secondaire.
|
||||
try:
|
||||
_monitor, full_img, _meta = capture_screen_image(
|
||||
allow_secondary_fallback=False
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Erreur capture plein écran pour fenêtre : {e}")
|
||||
full_img = None
|
||||
|
||||
if full_img is not None and not _is_effectively_black(full_img):
|
||||
img_w, img_h = full_img.size
|
||||
crop_left = max(0, win_left)
|
||||
crop_top = max(0, win_top)
|
||||
crop_right = min(img_w, win_right)
|
||||
crop_bottom = min(img_h, win_bottom)
|
||||
|
||||
if crop_right > crop_left and crop_bottom > crop_top:
|
||||
window_img = full_img.crop(
|
||||
(crop_left, crop_top, crop_right, crop_bottom)
|
||||
)
|
||||
else:
|
||||
logger.debug("Fenêtre hors écran — fallback natif si possible")
|
||||
elif full_img is not None:
|
||||
logger.warning(
|
||||
"capture_active_window: screenshot plein écran noir, fallback natif"
|
||||
)
|
||||
|
||||
if window_img is None and rect_info.get("hwnd"):
|
||||
window_img, native_meta = _capture_window_image_windows(
|
||||
rect_info["hwnd"], win_w, win_h
|
||||
)
|
||||
if window_img is not None:
|
||||
logger.warning(
|
||||
"capture_active_window via fallback natif (%s)",
|
||||
native_meta.get("backend"),
|
||||
)
|
||||
|
||||
if window_img is None:
|
||||
logger.debug("Fenêtre hors écran ou capture native indisponible")
|
||||
return None
|
||||
|
||||
# Floutage conformité AI Act
|
||||
if BLUR_SENSITIVE:
|
||||
blur_sensitive_regions(window_img)
|
||||
|
||||
# Sauvegarde
|
||||
window_path = os.path.join(
|
||||
self.shots_dir, f"{screenshot_id}_window.png"
|
||||
)
|
||||
window_img.save(window_path, "PNG", quality=SCREENSHOT_QUALITY)
|
||||
|
||||
result = {
|
||||
"window_image": window_path,
|
||||
"window_title": title,
|
||||
"app_name": app_name,
|
||||
"window_rect": win_rect,
|
||||
"window_size": [win_w, win_h],
|
||||
"click_in_window": [click_rel_x, click_rel_y],
|
||||
"click_inside_window": click_inside,
|
||||
}
|
||||
|
||||
# QW1 — enrichissement multi-écrans (additif)
|
||||
_enrich_with_monitor_info(result)
|
||||
|
||||
logger.debug(
|
||||
f"Fenêtre capturée : {title} ({win_w}x{win_h}) — "
|
||||
f"clic relatif ({click_rel_x}, {click_rel_y})"
|
||||
)
|
||||
return result
|
||||
|
||||
except ImportError as e:
|
||||
logger.debug(f"Module fenêtre indisponible : {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Erreur capture fenêtre active : {e}")
|
||||
return None
|
||||
|
||||
def _compute_quick_hash(self, img: Image) -> str:
|
||||
"""Calcule un hash rapide basé sur une vignette réduite pour détecter les changements."""
|
||||
# On réduit l'image à 64x64 pour comparer les masses de couleurs (très rapide)
|
||||
|
||||
@@ -17,7 +17,7 @@ from __future__ import annotations
|
||||
|
||||
import platform
|
||||
import subprocess
|
||||
from typing import Dict, Optional
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
|
||||
def _run_cmd(cmd: list[str]) -> Optional[str]:
|
||||
@@ -36,11 +36,11 @@ def get_active_window_info() -> Dict[str, str]:
|
||||
"title": "...",
|
||||
"app_name": "..."
|
||||
}
|
||||
|
||||
|
||||
Détecte automatiquement l'OS et utilise la méthode appropriée.
|
||||
"""
|
||||
system = platform.system()
|
||||
|
||||
|
||||
if system == "Linux":
|
||||
return _get_window_info_linux()
|
||||
elif system == "Windows":
|
||||
@@ -51,6 +51,32 @@ def get_active_window_info() -> Dict[str, str]:
|
||||
return {"title": "unknown_window", "app_name": "unknown_app"}
|
||||
|
||||
|
||||
def get_active_window_rect() -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Renvoie le rectangle de la fenêtre active :
|
||||
{
|
||||
"title": "...",
|
||||
"app_name": "...",
|
||||
"rect": [left, top, right, bottom],
|
||||
"position": [left, top],
|
||||
"size": [width, height],
|
||||
"hwnd": int # Windows uniquement
|
||||
}
|
||||
|
||||
Retourne None si la fenêtre est introuvable ou minimisée.
|
||||
Détecte automatiquement l'OS et utilise la méthode appropriée.
|
||||
"""
|
||||
system = platform.system()
|
||||
|
||||
if system == "Windows":
|
||||
return _get_window_rect_windows()
|
||||
elif system == "Linux":
|
||||
return _get_window_rect_linux()
|
||||
elif system == "Darwin":
|
||||
return _get_window_rect_macos()
|
||||
return None
|
||||
|
||||
|
||||
def _get_window_info_linux() -> Dict[str, str]:
|
||||
"""
|
||||
Linux: utilise xdotool (X11)
|
||||
@@ -178,6 +204,163 @@ def _get_window_info_macos() -> Dict[str, str]:
|
||||
}
|
||||
|
||||
|
||||
def _get_window_rect_windows() -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Windows : utilise pywin32 pour obtenir le rectangle de la fenêtre active.
|
||||
|
||||
Retourne None si la fenêtre est minimisée (icônifiée) ou si pywin32 manque.
|
||||
"""
|
||||
try:
|
||||
import win32gui
|
||||
import win32process
|
||||
import psutil
|
||||
|
||||
hwnd = win32gui.GetForegroundWindow()
|
||||
if not hwnd:
|
||||
return None
|
||||
|
||||
# Ignorer les fenêtres minimisées (pas de contenu visible)
|
||||
if win32gui.IsIconic(hwnd):
|
||||
return None
|
||||
|
||||
title = win32gui.GetWindowText(hwnd) or "unknown_window"
|
||||
|
||||
# Rectangle de la fenêtre (coordonnées écran absolues)
|
||||
left, top, right, bottom = win32gui.GetWindowRect(hwnd)
|
||||
width = right - left
|
||||
height = bottom - top
|
||||
|
||||
# Ignorer les fenêtres de taille nulle ou absurde
|
||||
if width <= 0 or height <= 0:
|
||||
return None
|
||||
|
||||
# Nom du processus
|
||||
_, pid = win32process.GetWindowThreadProcessId(hwnd)
|
||||
try:
|
||||
app_name = psutil.Process(pid).name()
|
||||
except Exception:
|
||||
app_name = "unknown_app"
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"app_name": app_name,
|
||||
"rect": [left, top, right, bottom],
|
||||
"position": [left, top],
|
||||
"size": [width, height],
|
||||
"hwnd": hwnd,
|
||||
}
|
||||
|
||||
except ImportError:
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _get_window_rect_linux() -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Linux (X11) : utilise xdotool + xwininfo pour obtenir le rectangle.
|
||||
|
||||
Nécessite : sudo apt-get install xdotool x11-utils
|
||||
"""
|
||||
try:
|
||||
# Identifiant de la fenêtre active
|
||||
wid = _run_cmd(["xdotool", "getactivewindow"])
|
||||
if not wid:
|
||||
return None
|
||||
|
||||
title = _run_cmd(["xdotool", "getactivewindow", "getwindowname"]) or "unknown_window"
|
||||
pid_str = _run_cmd(["xdotool", "getactivewindow", "getwindowpid"])
|
||||
app_name = "unknown_app"
|
||||
if pid_str:
|
||||
app_name = _run_cmd(["ps", "-p", pid_str.strip(), "-o", "comm="]) or "unknown_app"
|
||||
|
||||
# Géométrie via xdotool --shell (position + taille)
|
||||
geom_raw = _run_cmd(["xdotool", "getwindowgeometry", "--shell", wid])
|
||||
if not geom_raw:
|
||||
return None
|
||||
|
||||
vals: Dict[str, int] = {}
|
||||
for line in geom_raw.strip().splitlines():
|
||||
if "=" in line:
|
||||
k, v = line.split("=", 1)
|
||||
try:
|
||||
vals[k.strip()] = int(v.strip())
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if not {"X", "Y", "WIDTH", "HEIGHT"} <= vals.keys():
|
||||
return None
|
||||
|
||||
x, y = vals["X"], vals["Y"]
|
||||
w, h = vals["WIDTH"], vals["HEIGHT"]
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"app_name": app_name,
|
||||
"rect": [x, y, x + w, y + h],
|
||||
"position": [x, y],
|
||||
"size": [w, h],
|
||||
}
|
||||
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _get_window_rect_macos() -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
macOS : utilise Quartz (CGWindowListCopyWindowInfo) pour obtenir le rectangle.
|
||||
|
||||
Nécessite : pip install pyobjc-framework-Quartz
|
||||
"""
|
||||
try:
|
||||
from AppKit import NSWorkspace
|
||||
from Quartz import (
|
||||
CGWindowListCopyWindowInfo,
|
||||
kCGWindowListOptionOnScreenOnly,
|
||||
kCGNullWindowID,
|
||||
)
|
||||
|
||||
active_app = NSWorkspace.sharedWorkspace().activeApplication()
|
||||
app_name = active_app.get("NSApplicationName", "unknown_app")
|
||||
|
||||
window_list = CGWindowListCopyWindowInfo(
|
||||
kCGWindowListOptionOnScreenOnly, kCGNullWindowID
|
||||
)
|
||||
|
||||
for window in window_list:
|
||||
owner_name = window.get("kCGWindowOwnerName", "")
|
||||
if owner_name != app_name:
|
||||
continue
|
||||
|
||||
bounds = window.get("kCGWindowBounds")
|
||||
if not bounds:
|
||||
continue
|
||||
|
||||
x = int(bounds.get("X", 0))
|
||||
y = int(bounds.get("Y", 0))
|
||||
w = int(bounds.get("Width", 0))
|
||||
h = int(bounds.get("Height", 0))
|
||||
if w <= 0 or h <= 0:
|
||||
continue
|
||||
|
||||
title = window.get("kCGWindowName", "unknown_window") or "unknown_window"
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"app_name": app_name,
|
||||
"rect": [x, y, x + w, y + h],
|
||||
"position": [x, y],
|
||||
"size": [w, h],
|
||||
}
|
||||
|
||||
except ImportError:
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# Test rapide
|
||||
if __name__ == "__main__":
|
||||
import time
|
||||
@@ -185,8 +368,13 @@ if __name__ == "__main__":
|
||||
print(f"OS détecté: {platform.system()}")
|
||||
print("\nTest de capture fenêtre active (5 secondes)...")
|
||||
print("Changez de fenêtre pour tester!\n")
|
||||
|
||||
|
||||
for i in range(5):
|
||||
info = get_active_window_info()
|
||||
rect = get_active_window_rect()
|
||||
print(f"[{i+1}] App: {info['app_name']:20s} | Title: {info['title']}")
|
||||
if rect:
|
||||
print(f" Rect: {rect['rect']} | Size: {rect['size']}")
|
||||
else:
|
||||
print(" Rect: non disponible")
|
||||
time.sleep(1)
|
||||
|
||||
@@ -43,6 +43,9 @@ class EventCaptorV1:
|
||||
|
||||
# État des touches modificatrices
|
||||
self.modifiers = set()
|
||||
self._pending_standalone_win = False
|
||||
self._suppress_release_only_win_combo = False
|
||||
self._raw_key_buffer: List[Dict[str, Any]] = []
|
||||
|
||||
# Tracking du focus fenêtre
|
||||
self.last_window = None
|
||||
@@ -91,6 +94,7 @@ class EventCaptorV1:
|
||||
# Flush du buffer texte restant avant arrêt
|
||||
self._flush_text_buffer()
|
||||
# Annuler le timer s'il est en cours
|
||||
emit_escape = False
|
||||
with self._text_lock:
|
||||
if self._text_flush_timer is not None:
|
||||
self._text_flush_timer.cancel()
|
||||
@@ -159,7 +163,80 @@ class EventCaptorV1:
|
||||
# Clavier
|
||||
# ----------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _get_key_name(key) -> Optional[str]:
|
||||
"""Convertit un objet pynput Key/KeyCode en nom lisible."""
|
||||
if isinstance(key, KeyCode):
|
||||
return key.char if key.char else None
|
||||
if isinstance(key, Key):
|
||||
return key.name
|
||||
return str(key)
|
||||
|
||||
@staticmethod
|
||||
def _encode_key(key) -> Dict[str, Any]:
|
||||
if isinstance(key, KeyCode):
|
||||
return {"kind": "vk", "vk": key.vk, "char": key.char}
|
||||
if isinstance(key, Key):
|
||||
return {"kind": "key", "name": key.name}
|
||||
return {"kind": "unknown", "str": str(key)}
|
||||
|
||||
@staticmethod
|
||||
def _raw_key_name(raw_key: Dict[str, Any]) -> Optional[str]:
|
||||
if raw_key.get("kind") == "vk":
|
||||
char = raw_key.get("char")
|
||||
if char and len(str(char)) == 1:
|
||||
return str(char).lower()
|
||||
if raw_key.get("kind") == "key":
|
||||
name = raw_key.get("name")
|
||||
return str(name).lower() if name else None
|
||||
return None
|
||||
|
||||
def _emit_release_only_windows_combo(self) -> bool:
|
||||
"""Infère Win+<touche> quand seuls les releases sont capturés."""
|
||||
with self._text_lock:
|
||||
raw_keys = list(getattr(self, "_raw_key_buffer", []))
|
||||
if len(raw_keys) < 2:
|
||||
return False
|
||||
cmd_names = {"cmd", "cmd_l", "cmd_r"}
|
||||
last = raw_keys[-1]
|
||||
if last.get("action") != "release" or self._raw_key_name(last) not in cmd_names:
|
||||
return False
|
||||
combo_key = None
|
||||
modifier_names = {
|
||||
"ctrl", "ctrl_l", "ctrl_r",
|
||||
"alt", "alt_l", "alt_r",
|
||||
"shift", "shift_l", "shift_r",
|
||||
"cmd", "cmd_l", "cmd_r",
|
||||
}
|
||||
for raw in reversed(raw_keys[:-1]):
|
||||
if raw.get("action") != "release":
|
||||
continue
|
||||
name = self._raw_key_name(raw)
|
||||
if name and name not in modifier_names:
|
||||
combo_key = name
|
||||
break
|
||||
if not combo_key:
|
||||
return False
|
||||
self._raw_key_buffer.clear()
|
||||
|
||||
event = {
|
||||
"type": "key_combo",
|
||||
"keys": ["win", combo_key],
|
||||
"raw_keys": raw_keys,
|
||||
"timestamp": time.time(),
|
||||
}
|
||||
self.on_event(event)
|
||||
return True
|
||||
|
||||
def _on_press(self, key):
|
||||
with self._text_lock:
|
||||
if not hasattr(self, "_raw_key_buffer"):
|
||||
self._raw_key_buffer = []
|
||||
self._raw_key_buffer.append({
|
||||
"action": "press",
|
||||
**self._encode_key(key),
|
||||
})
|
||||
|
||||
# Gestion des touches modificatrices
|
||||
if key in (Key.ctrl, Key.ctrl_l, Key.ctrl_r):
|
||||
self.modifiers.add("ctrl")
|
||||
@@ -167,15 +244,26 @@ class EventCaptorV1:
|
||||
self.modifiers.add("alt")
|
||||
elif key in (Key.shift, Key.shift_l, Key.shift_r):
|
||||
self.modifiers.add("shift")
|
||||
elif key in (Key.cmd, Key.cmd_l, Key.cmd_r):
|
||||
self.modifiers.add("win")
|
||||
self._pending_standalone_win = True
|
||||
|
||||
# --- Combos avec modificateur (sauf Shift seul) ---
|
||||
# Shift seul n'est pas un « vrai » modificateur pour les combos :
|
||||
# Shift+a = 'A' = saisie texte, pas un raccourci.
|
||||
# On considère un combo seulement si Ctrl ou Alt est enfoncé.
|
||||
has_real_modifier = self.modifiers & {"ctrl", "alt"}
|
||||
# On considère un combo seulement si Ctrl, Alt ou Win est enfoncé.
|
||||
has_real_modifier = self.modifiers & {"ctrl", "alt", "win"}
|
||||
if has_real_modifier:
|
||||
key_name = self._get_key_name(key)
|
||||
if key_name and key_name not in ("ctrl", "alt", "shift"):
|
||||
if key_name and key_name not in (
|
||||
"ctrl", "ctrl_l", "ctrl_r",
|
||||
"alt", "alt_l", "alt_r",
|
||||
"shift", "shift_l", "shift_r",
|
||||
"cmd", "cmd_l", "cmd_r",
|
||||
):
|
||||
self._pending_standalone_win = False
|
||||
if "win" in self.modifiers:
|
||||
self._suppress_release_only_win_combo = True
|
||||
# Un combo interrompt la saisie texte en cours
|
||||
self._flush_text_buffer()
|
||||
event = {
|
||||
@@ -205,14 +293,18 @@ class EventCaptorV1:
|
||||
self._reset_flush_timer()
|
||||
return
|
||||
|
||||
if key == Key.escape:
|
||||
escape_keys = [Key.esc]
|
||||
key_escape = getattr(Key, "escape", None)
|
||||
if key_escape is not None:
|
||||
escape_keys.append(key_escape)
|
||||
if key in escape_keys:
|
||||
# Annuler la saisie en cours
|
||||
self._text_buffer.clear()
|
||||
self._text_start_pos = None
|
||||
self._cancel_flush_timer()
|
||||
return
|
||||
emit_escape = True
|
||||
|
||||
if key in (Key.enter, Key.tab):
|
||||
elif key in (Key.enter, Key.tab):
|
||||
# Flush immédiat — on relâche le lock avant d'appeler
|
||||
# _flush_text_buffer (qui prend aussi le lock)
|
||||
pass # on sort du with et on flush après
|
||||
@@ -238,6 +330,15 @@ class EventCaptorV1:
|
||||
# Touche spéciale non gérée (F1, Insert, etc.) — on ignore
|
||||
return
|
||||
|
||||
if emit_escape:
|
||||
event = {
|
||||
"type": "key_combo",
|
||||
"keys": ["escape"],
|
||||
"timestamp": time.time(),
|
||||
}
|
||||
self.on_event(event)
|
||||
return
|
||||
|
||||
# Si on arrive ici, c'est Enter ou Tab → flush immédiat
|
||||
self._flush_text_buffer()
|
||||
|
||||
@@ -290,12 +391,46 @@ class EventCaptorV1:
|
||||
self.on_event(event)
|
||||
|
||||
def _on_release(self, key):
|
||||
with self._text_lock:
|
||||
self._raw_key_buffer.append({
|
||||
"action": "release",
|
||||
**self._encode_key(key),
|
||||
})
|
||||
|
||||
if key in (Key.cmd, Key.cmd_l, Key.cmd_r) and self._suppress_release_only_win_combo:
|
||||
with self._text_lock:
|
||||
self._raw_key_buffer.clear()
|
||||
self._pending_standalone_win = False
|
||||
self._suppress_release_only_win_combo = False
|
||||
self.modifiers.discard("win")
|
||||
return
|
||||
|
||||
if key in (Key.cmd, Key.cmd_l, Key.cmd_r) and self._emit_release_only_windows_combo():
|
||||
self._pending_standalone_win = False
|
||||
self._suppress_release_only_win_combo = False
|
||||
self.modifiers.discard("win")
|
||||
return
|
||||
|
||||
if key in (Key.cmd, Key.cmd_l, Key.cmd_r) and self._pending_standalone_win:
|
||||
event = {
|
||||
"type": "key_combo",
|
||||
"keys": ["win"],
|
||||
"timestamp": time.time(),
|
||||
}
|
||||
self.on_event(event)
|
||||
self._pending_standalone_win = False
|
||||
self._suppress_release_only_win_combo = False
|
||||
|
||||
if key in (Key.ctrl, Key.ctrl_l, Key.ctrl_r):
|
||||
self.modifiers.discard("ctrl")
|
||||
elif key in (Key.alt, Key.alt_l, Key.alt_r):
|
||||
self.modifiers.discard("alt")
|
||||
elif key in (Key.shift, Key.shift_l, Key.shift_r):
|
||||
self.modifiers.discard("shift")
|
||||
elif key in (Key.cmd, Key.cmd_l, Key.cmd_r):
|
||||
self.modifiers.discard("win")
|
||||
self._pending_standalone_win = False
|
||||
self._suppress_release_only_win_combo = False
|
||||
|
||||
def _watch_window_focus(self):
|
||||
"""Surveille proactivement le changement de fenêtre pour le stagiaire."""
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
214
agent_v0/deploy/windows_client/agent_v1/core/grounding.py
Normal file
214
agent_v0/deploy/windows_client/agent_v1/core/grounding.py
Normal file
@@ -0,0 +1,214 @@
|
||||
# agent_v1/core/grounding.py
|
||||
"""
|
||||
Module Grounding — localisation pure d'éléments UI sur l'écran.
|
||||
|
||||
Responsabilité unique : "Trouve l'élément X sur l'écran et retourne ses coordonnées."
|
||||
Ne prend AUCUNE décision. Si l'élément n'est pas trouvé → retourne NOT_FOUND.
|
||||
|
||||
Stratégies disponibles (cascade configurable) :
|
||||
1. Serveur SomEngine + VLM (GPU distant)
|
||||
2. Template matching local (CPU, ~10ms)
|
||||
3. VLM local direct (CPU/GPU local)
|
||||
|
||||
Séparé de Policy (qui décide quoi faire quand grounding échoue).
|
||||
Ref: docs/PLAN_ACTEUR_V1.md — Architecture MICRO (grounding + exécution)
|
||||
"""
|
||||
|
||||
import base64
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class GroundingResult:
|
||||
"""Résultat d'une tentative de localisation visuelle."""
|
||||
found: bool # L'élément a été trouvé
|
||||
x_pct: float = 0.0 # Position X en % (0.0-1.0)
|
||||
y_pct: float = 0.0 # Position Y en % (0.0-1.0)
|
||||
method: str = "" # Méthode utilisée (server_som, anchor_template, vlm_direct...)
|
||||
score: float = 0.0 # Confiance (0.0-1.0)
|
||||
elapsed_ms: float = 0.0 # Temps de résolution
|
||||
detail: str = "" # Info supplémentaire (label trouvé, raison échec)
|
||||
raw: Optional[Dict] = None # Données brutes du resolver (pour debug)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"found": self.found,
|
||||
"x_pct": self.x_pct,
|
||||
"y_pct": self.y_pct,
|
||||
"method": self.method,
|
||||
"score": round(self.score, 3),
|
||||
"elapsed_ms": round(self.elapsed_ms, 1),
|
||||
"detail": self.detail,
|
||||
}
|
||||
|
||||
|
||||
# Résultat singleton pour "pas trouvé"
|
||||
NOT_FOUND = GroundingResult(found=False, detail="Aucune méthode n'a trouvé l'élément")
|
||||
|
||||
|
||||
class GroundingEngine:
|
||||
"""Moteur de localisation visuelle d'éléments UI.
|
||||
|
||||
Encapsule la cascade de résolution (serveur → template → VLM local)
|
||||
avec une interface unifiée. Ne prend aucune décision — c'est le rôle
|
||||
de PolicyEngine.
|
||||
|
||||
Usage :
|
||||
engine = GroundingEngine(executor)
|
||||
result = engine.locate(screenshot_b64, target_spec, screen_w, screen_h)
|
||||
if result.found:
|
||||
click(result.x_pct, result.y_pct)
|
||||
"""
|
||||
|
||||
def __init__(self, executor):
|
||||
"""
|
||||
Args:
|
||||
executor: ActionExecutorV1 — fournit les méthodes de résolution existantes.
|
||||
"""
|
||||
self._executor = executor
|
||||
|
||||
def locate(
|
||||
self,
|
||||
server_url: str,
|
||||
target_spec: Dict[str, Any],
|
||||
fallback_x: float,
|
||||
fallback_y: float,
|
||||
screen_width: int,
|
||||
screen_height: int,
|
||||
strategies: Optional[List[str]] = None,
|
||||
) -> GroundingResult:
|
||||
"""Localiser un élément UI sur l'écran.
|
||||
|
||||
Exécute la cascade de stratégies dans l'ordre et retourne
|
||||
dès qu'une stratégie trouve l'élément.
|
||||
|
||||
Args:
|
||||
server_url: URL du serveur (SomEngine + VLM GPU)
|
||||
target_spec: Spécification de la cible (by_text, anchor, vlm_description...)
|
||||
fallback_x, fallback_y: Coordonnées de fallback (enregistrement)
|
||||
screen_width, screen_height: Résolution écran
|
||||
strategies: Liste ordonnée de stratégies à essayer.
|
||||
Par défaut : ["server", "template", "vlm_local"]
|
||||
|
||||
Returns:
|
||||
GroundingResult avec found=True et coordonnées, ou NOT_FOUND
|
||||
"""
|
||||
if strategies is None:
|
||||
strategies = ["server", "template", "vlm_local"]
|
||||
|
||||
# ── Apprentissage : réordonner les stratégies selon l'historique ──
|
||||
# Si le Learning sait quelle méthode marche pour cette cible,
|
||||
# la mettre en premier. C'est la boucle d'apprentissage.
|
||||
learned = target_spec.get("_learned_strategy", "")
|
||||
if learned:
|
||||
strategy_map = {
|
||||
"som_text_match": "server",
|
||||
"grounding_vlm": "server",
|
||||
"server_som": "server",
|
||||
"anchor_template": "template",
|
||||
"template_matching": "template",
|
||||
"hybrid_text_direct": "vlm_local",
|
||||
"hybrid_vlm_text": "vlm_local",
|
||||
"vlm_direct": "vlm_local",
|
||||
}
|
||||
preferred = strategy_map.get(learned, "")
|
||||
if preferred and preferred in strategies:
|
||||
strategies = [preferred] + [s for s in strategies if s != preferred]
|
||||
logger.info(
|
||||
f"Grounding: stratégie réordonnée par l'apprentissage → "
|
||||
f"{strategies} (learned={learned})"
|
||||
)
|
||||
|
||||
t_start = time.time()
|
||||
screenshot_b64 = self._executor._capture_screenshot_b64(max_width=0, quality=75)
|
||||
if not screenshot_b64:
|
||||
return GroundingResult(
|
||||
found=False, detail="Capture screenshot échouée",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
for strategy in strategies:
|
||||
result = self._try_strategy(
|
||||
strategy, server_url, screenshot_b64, target_spec,
|
||||
fallback_x, fallback_y, screen_width, screen_height,
|
||||
)
|
||||
if result.found:
|
||||
result.elapsed_ms = (time.time() - t_start) * 1000
|
||||
return result
|
||||
|
||||
return GroundingResult(
|
||||
found=False,
|
||||
detail=f"Toutes les stratégies ont échoué ({', '.join(strategies)})",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
def _try_strategy(
|
||||
self,
|
||||
strategy: str,
|
||||
server_url: str,
|
||||
screenshot_b64: str,
|
||||
target_spec: Dict[str, Any],
|
||||
fallback_x: float,
|
||||
fallback_y: float,
|
||||
screen_width: int,
|
||||
screen_height: int,
|
||||
) -> GroundingResult:
|
||||
"""Essayer une stratégie de grounding unique."""
|
||||
|
||||
if strategy == "server" and server_url:
|
||||
raw = self._executor._server_resolve_target(
|
||||
server_url, screenshot_b64, target_spec,
|
||||
fallback_x, fallback_y, screen_width, screen_height,
|
||||
)
|
||||
if raw and raw.get("resolved"):
|
||||
return GroundingResult(
|
||||
found=True,
|
||||
x_pct=raw["x_pct"],
|
||||
y_pct=raw["y_pct"],
|
||||
method=raw.get("method", "server"),
|
||||
score=raw.get("score", 0.0),
|
||||
detail=raw.get("matched_element", {}).get("label", ""),
|
||||
raw=raw,
|
||||
)
|
||||
|
||||
elif strategy == "template":
|
||||
anchor_b64 = target_spec.get("anchor_image_base64", "")
|
||||
if anchor_b64:
|
||||
raw = self._executor._template_match_anchor(
|
||||
screenshot_b64, anchor_b64, screen_width, screen_height,
|
||||
)
|
||||
if raw and raw.get("resolved"):
|
||||
return GroundingResult(
|
||||
found=True,
|
||||
x_pct=raw["x_pct"],
|
||||
y_pct=raw["y_pct"],
|
||||
method="anchor_template",
|
||||
score=raw.get("score", 0.0),
|
||||
raw=raw,
|
||||
)
|
||||
|
||||
elif strategy == "vlm_local":
|
||||
by_text = target_spec.get("by_text", "")
|
||||
vlm_desc = target_spec.get("vlm_description", "")
|
||||
if vlm_desc or by_text:
|
||||
raw = self._executor._hybrid_vlm_resolve(
|
||||
screenshot_b64, target_spec, screen_width, screen_height,
|
||||
)
|
||||
if raw and raw.get("resolved"):
|
||||
return GroundingResult(
|
||||
found=True,
|
||||
x_pct=raw["x_pct"],
|
||||
y_pct=raw["y_pct"],
|
||||
method=raw.get("method", "vlm_local"),
|
||||
score=raw.get("score", 0.0),
|
||||
detail=raw.get("matched_element", {}).get("label", ""),
|
||||
raw=raw,
|
||||
)
|
||||
|
||||
return GroundingResult(found=False, method=strategy, detail=f"{strategy}: pas trouvé")
|
||||
152
agent_v0/deploy/windows_client/agent_v1/core/policy.py
Normal file
152
agent_v0/deploy/windows_client/agent_v1/core/policy.py
Normal file
@@ -0,0 +1,152 @@
|
||||
# agent_v1/core/policy.py
|
||||
"""
|
||||
Module Policy — décisions intelligentes quand le grounding échoue.
|
||||
|
||||
Responsabilité unique : "Le Grounding dit NOT_FOUND. Que fait-on ?"
|
||||
Ne localise AUCUN élément — c'est le rôle du Grounding.
|
||||
|
||||
Décisions possibles :
|
||||
- RETRY : re-tenter le grounding (après popup fermée, par exemple)
|
||||
- SKIP : l'action n'est plus nécessaire (état déjà atteint)
|
||||
- ABORT : arrêter le workflow (état incohérent)
|
||||
- SUPERVISE : rendre la main à l'utilisateur
|
||||
|
||||
Séparé de Grounding (qui localise les éléments).
|
||||
Ref: docs/PLAN_ACTEUR_V1.md — Architecture MÉSO (acteur intelligent)
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Decision(Enum):
|
||||
"""Décisions possibles quand le grounding échoue."""
|
||||
RETRY = "retry" # Re-tenter (après correction : popup fermée, navigation...)
|
||||
SKIP = "skip" # Action inutile (état déjà atteint)
|
||||
ABORT = "abort" # Arrêter le workflow (état incohérent)
|
||||
SUPERVISE = "supervise" # Rendre la main à l'utilisateur (Léa dit "je bloque")
|
||||
CONTINUE = "continue" # Continuer malgré l'échec (action non critique)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PolicyDecision:
|
||||
"""Résultat d'une décision Policy."""
|
||||
decision: Decision
|
||||
reason: str # Explication de la décision
|
||||
action_taken: str = "" # Action corrective effectuée (ex: "popup fermée")
|
||||
elapsed_ms: float = 0.0
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"decision": self.decision.value,
|
||||
"reason": self.reason,
|
||||
"action_taken": self.action_taken,
|
||||
"elapsed_ms": round(self.elapsed_ms, 1),
|
||||
}
|
||||
|
||||
|
||||
class PolicyEngine:
|
||||
"""Moteur de décision quand le grounding échoue.
|
||||
|
||||
Cascade de décision :
|
||||
1. Popup détectée ? → fermer et RETRY
|
||||
2. Acteur gemma4 → SKIP / ABORT / SUPERVISE
|
||||
3. Fallback → SUPERVISE (rendre la main)
|
||||
|
||||
Usage :
|
||||
policy = PolicyEngine(executor)
|
||||
decision = policy.decide(action, target_spec, grounding_result)
|
||||
if decision.decision == Decision.RETRY:
|
||||
# re-tenter le grounding
|
||||
elif decision.decision == Decision.SKIP:
|
||||
# marquer comme réussi, passer à la suite
|
||||
"""
|
||||
|
||||
def __init__(self, executor):
|
||||
self._executor = executor
|
||||
|
||||
def decide(
|
||||
self,
|
||||
action: Dict[str, Any],
|
||||
target_spec: Dict[str, Any],
|
||||
retry_count: int = 0,
|
||||
max_retries: int = 1,
|
||||
) -> PolicyDecision:
|
||||
"""Décider quoi faire quand le grounding a échoué.
|
||||
|
||||
Cascade :
|
||||
1. Si c'est le premier essai → tenter de fermer une popup → RETRY
|
||||
2. Si retry déjà fait → demander à l'acteur gemma4
|
||||
3. Selon gemma4 : SKIP, ABORT, ou SUPERVISE
|
||||
|
||||
Args:
|
||||
action: L'action qui a échoué
|
||||
target_spec: La cible non trouvée
|
||||
retry_count: Nombre de retries déjà faits
|
||||
max_retries: Maximum de retries autorisés
|
||||
"""
|
||||
t_start = time.time()
|
||||
|
||||
# ── Étape 1 : Tentative de fermeture popup (premier essai) ──
|
||||
if retry_count == 0:
|
||||
popup_handled = self._try_close_popup()
|
||||
if popup_handled:
|
||||
return PolicyDecision(
|
||||
decision=Decision.RETRY,
|
||||
reason="Popup détectée et fermée, re-tentative",
|
||||
action_taken="popup_closed",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
# ── Étape 2 : Max retries atteint → acteur gemma4 ──
|
||||
if retry_count >= max_retries:
|
||||
actor_decision = self._ask_actor(action, target_spec)
|
||||
|
||||
if actor_decision == "PASSER":
|
||||
return PolicyDecision(
|
||||
decision=Decision.SKIP,
|
||||
reason="Acteur gemma4 : l'état est déjà atteint",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
elif actor_decision == "STOPPER":
|
||||
return PolicyDecision(
|
||||
decision=Decision.ABORT,
|
||||
reason="Acteur gemma4 : état incohérent, arrêt",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
else:
|
||||
# EXECUTER ou inconnu → pause supervisée
|
||||
return PolicyDecision(
|
||||
decision=Decision.SUPERVISE,
|
||||
reason=f"Acteur gemma4 : {actor_decision}, pause supervisée",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
# ── Étape 3 : Encore des retries disponibles → RETRY ──
|
||||
return PolicyDecision(
|
||||
decision=Decision.RETRY,
|
||||
reason=f"Retry {retry_count + 1}/{max_retries}",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
def _try_close_popup(self) -> bool:
|
||||
"""Tenter de fermer une popup via le handler VLM existant."""
|
||||
try:
|
||||
return self._executor._handle_popup_vlm()
|
||||
except Exception as e:
|
||||
logger.debug(f"Policy: popup handler échoué : {e}")
|
||||
return False
|
||||
|
||||
def _ask_actor(self, action: Dict, target_spec: Dict) -> str:
|
||||
"""Demander à gemma4 de décider (PASSER/EXECUTER/STOPPER)."""
|
||||
try:
|
||||
return self._executor._actor_decide(action, target_spec)
|
||||
except Exception as e:
|
||||
logger.debug(f"Policy: acteur gemma4 échoué : {e}")
|
||||
return "EXECUTER" # Fallback → supervisé
|
||||
294
agent_v0/deploy/windows_client/agent_v1/core/uia_helper.py
Normal file
294
agent_v0/deploy/windows_client/agent_v1/core/uia_helper.py
Normal file
@@ -0,0 +1,294 @@
|
||||
# core/workflow/uia_helper.py
|
||||
"""
|
||||
UIAHelper — Wrapper Python pour lea_uia.exe (helper Rust UI Automation).
|
||||
|
||||
Expose une API Python simple pour interroger UIA via le binaire Rust.
|
||||
Communique via subprocess + stdin/stdout JSON.
|
||||
|
||||
Pourquoi un helper Rust ?
|
||||
- 5-10x plus rapide que pywinauto (10-20ms vs 50-200ms)
|
||||
- Binaire standalone ~500 Ko, aucune dépendance runtime
|
||||
- Pas de problèmes de threading COM en Python
|
||||
- Crash-safe (le crash du helper n'affecte pas l'agent Python)
|
||||
|
||||
Architecture :
|
||||
Python executor
|
||||
↓ subprocess.run
|
||||
lea_uia.exe query --x 812 --y 436
|
||||
↓ UIA API Windows
|
||||
JSON response
|
||||
↓ stdout
|
||||
Python executor parse JSON
|
||||
|
||||
Si lea_uia.exe n'est pas disponible (Linux, binaire absent, crash) :
|
||||
toutes les méthodes retournent None → fallback vision automatique.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Timeout par défaut pour les appels UIA (en secondes)
|
||||
_DEFAULT_TIMEOUT = 5.0
|
||||
|
||||
# Masquer la fenêtre console lors du spawn de lea_uia.exe sur Windows.
|
||||
# Sans ce flag, chaque appel (à chaque clic utilisateur pendant
|
||||
# l'enregistrement) fait apparaître une fenêtre cmd noire brièvement
|
||||
# visible à l'écran → ralentit la souris et pollue les screenshots
|
||||
# capturés (le VLM peut "voir" le chemin lea_uia.exe comme texte cliqué).
|
||||
#
|
||||
# La valeur 0x08000000 correspond à CREATE_NO_WINDOW défini dans
|
||||
# l'API Windows. Sur Linux/Mac, la valeur est 0 et `creationflags`
|
||||
# est ignoré. getattr() gère le cas où Python expose déjà la constante
|
||||
# sur Windows.
|
||||
if platform.system() == "Windows":
|
||||
_SUBPROCESS_CREATION_FLAGS = getattr(subprocess, "CREATE_NO_WINDOW", 0x08000000)
|
||||
else:
|
||||
_SUBPROCESS_CREATION_FLAGS = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class UiaElement:
|
||||
"""Représentation Python d'un élément UIA."""
|
||||
name: str = ""
|
||||
control_type: str = ""
|
||||
class_name: str = ""
|
||||
automation_id: str = ""
|
||||
bounding_rect: Tuple[int, int, int, int] = (0, 0, 0, 0)
|
||||
is_enabled: bool = False
|
||||
is_offscreen: bool = True
|
||||
parent_path: List[Dict[str, str]] = field(default_factory=list)
|
||||
process_name: str = ""
|
||||
|
||||
def center(self) -> Tuple[int, int]:
|
||||
"""Retourner le centre du rectangle (pixels)."""
|
||||
x1, y1, x2, y2 = self.bounding_rect
|
||||
return ((x1 + x2) // 2, (y1 + y2) // 2)
|
||||
|
||||
def width(self) -> int:
|
||||
return self.bounding_rect[2] - self.bounding_rect[0]
|
||||
|
||||
def height(self) -> int:
|
||||
return self.bounding_rect[3] - self.bounding_rect[1]
|
||||
|
||||
def is_clickable(self) -> bool:
|
||||
"""Peut-on cliquer dessus ?"""
|
||||
return (
|
||||
self.is_enabled
|
||||
and not self.is_offscreen
|
||||
and self.width() > 0
|
||||
and self.height() > 0
|
||||
)
|
||||
|
||||
def path_signature(self) -> str:
|
||||
"""Signature du chemin parent (pour retrouver l'élément)."""
|
||||
parts = [f"{p['control_type']}[{p['name']}]" for p in self.parent_path if p.get("name")]
|
||||
parts.append(f"{self.control_type}[{self.name}]")
|
||||
return " > ".join(parts)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"control_type": self.control_type,
|
||||
"class_name": self.class_name,
|
||||
"automation_id": self.automation_id,
|
||||
"bounding_rect": list(self.bounding_rect),
|
||||
"is_enabled": self.is_enabled,
|
||||
"is_offscreen": self.is_offscreen,
|
||||
"parent_path": self.parent_path,
|
||||
"process_name": self.process_name,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict[str, Any]) -> "UiaElement":
|
||||
rect = d.get("bounding_rect", [0, 0, 0, 0])
|
||||
if isinstance(rect, list) and len(rect) >= 4:
|
||||
rect = tuple(rect[:4])
|
||||
else:
|
||||
rect = (0, 0, 0, 0)
|
||||
return cls(
|
||||
name=d.get("name", ""),
|
||||
control_type=d.get("control_type", ""),
|
||||
class_name=d.get("class_name", ""),
|
||||
automation_id=d.get("automation_id", ""),
|
||||
bounding_rect=rect,
|
||||
is_enabled=d.get("is_enabled", False),
|
||||
is_offscreen=d.get("is_offscreen", True),
|
||||
parent_path=d.get("parent_path", []),
|
||||
process_name=d.get("process_name", ""),
|
||||
)
|
||||
|
||||
|
||||
class UIAHelper:
|
||||
"""Wrapper Python pour lea_uia.exe."""
|
||||
|
||||
def __init__(self, helper_path: str = "", timeout: float = _DEFAULT_TIMEOUT):
|
||||
self._helper_path = helper_path or self._find_helper()
|
||||
self._timeout = timeout
|
||||
self._available = self._check_available()
|
||||
|
||||
def _find_helper(self) -> str:
|
||||
"""Trouver lea_uia.exe dans les emplacements standards."""
|
||||
candidates = [
|
||||
r"C:\Lea\helpers\lea_uia.exe",
|
||||
os.path.join(os.path.dirname(__file__), "..", "..",
|
||||
"agent_rust", "lea_uia", "target",
|
||||
"x86_64-pc-windows-gnu", "release", "lea_uia.exe"),
|
||||
"./helpers/lea_uia.exe",
|
||||
"lea_uia.exe",
|
||||
]
|
||||
for path in candidates:
|
||||
if os.path.isfile(path):
|
||||
return os.path.abspath(path)
|
||||
return ""
|
||||
|
||||
def _check_available(self) -> bool:
|
||||
"""Vérifier que le helper est utilisable (Windows + binaire + health OK)."""
|
||||
if platform.system() != "Windows":
|
||||
logger.debug("UIAHelper: Linux/Mac — helper désactivé")
|
||||
return False
|
||||
if not self._helper_path:
|
||||
logger.debug("UIAHelper: lea_uia.exe introuvable")
|
||||
return False
|
||||
if not os.path.isfile(self._helper_path):
|
||||
logger.debug(f"UIAHelper: chemin invalide {self._helper_path}")
|
||||
return False
|
||||
return True
|
||||
|
||||
@property
|
||||
def available(self) -> bool:
|
||||
return self._available
|
||||
|
||||
@property
|
||||
def helper_path(self) -> str:
|
||||
return self._helper_path
|
||||
|
||||
def _run(self, args: List[str]) -> Optional[Dict[str, Any]]:
|
||||
"""Exécuter lea_uia.exe avec les arguments et parser le JSON."""
|
||||
if not self._available:
|
||||
return None
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[self._helper_path] + args,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=self._timeout,
|
||||
encoding="utf-8",
|
||||
errors="replace",
|
||||
creationflags=_SUBPROCESS_CREATION_FLAGS,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
logger.debug(
|
||||
f"UIAHelper: exit code {result.returncode}, "
|
||||
f"stderr: {result.stderr[:200]}"
|
||||
)
|
||||
return None
|
||||
output = result.stdout.strip()
|
||||
if not output:
|
||||
return None
|
||||
return json.loads(output)
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.debug(f"UIAHelper: timeout ({self._timeout}s) sur {args}")
|
||||
return None
|
||||
except json.JSONDecodeError as e:
|
||||
logger.debug(f"UIAHelper: JSON invalide — {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.debug(f"UIAHelper: erreur {e}")
|
||||
return None
|
||||
|
||||
def health(self) -> bool:
|
||||
"""Vérifier que UIA répond."""
|
||||
data = self._run(["health"])
|
||||
return data is not None and data.get("status") == "ok"
|
||||
|
||||
def query_at(
|
||||
self,
|
||||
x: int,
|
||||
y: int,
|
||||
with_parents: bool = True,
|
||||
) -> Optional[UiaElement]:
|
||||
"""Récupérer l'élément UIA à une position écran.
|
||||
|
||||
Args:
|
||||
x, y: Coordonnées pixel absolues
|
||||
with_parents: Inclure la hiérarchie des parents
|
||||
|
||||
Returns:
|
||||
UiaElement si trouvé, None sinon (pas d'élément ou UIA indispo)
|
||||
"""
|
||||
args = ["query", "--x", str(x), "--y", str(y)]
|
||||
if not with_parents:
|
||||
args.append("--with-parents=false")
|
||||
|
||||
data = self._run(args)
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
def find_by_name(
|
||||
self,
|
||||
name: str,
|
||||
control_type: Optional[str] = None,
|
||||
automation_id: Optional[str] = None,
|
||||
window: Optional[str] = None,
|
||||
timeout_ms: int = 2000,
|
||||
) -> Optional[UiaElement]:
|
||||
"""Rechercher un élément par son nom (+ filtres optionnels).
|
||||
|
||||
Args:
|
||||
name: Nom exact de l'élément
|
||||
control_type: Type de contrôle (Button, Edit, MenuItem...)
|
||||
automation_id: ID d'automation
|
||||
window: Restreindre à une fenêtre spécifique
|
||||
timeout_ms: Timeout de recherche en millisecondes
|
||||
"""
|
||||
args = ["find", "--name", name, "--timeout-ms", str(timeout_ms)]
|
||||
if control_type:
|
||||
args.extend(["--control-type", control_type])
|
||||
if automation_id:
|
||||
args.extend(["--automation-id", automation_id])
|
||||
if window:
|
||||
args.extend(["--window", window])
|
||||
|
||||
data = self._run(args)
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
def capture_focused(self, max_depth: int = 3) -> Optional[UiaElement]:
|
||||
"""Capturer l'élément ayant le focus + son contexte."""
|
||||
data = self._run(["capture", "--max-depth", str(max_depth)])
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
|
||||
# Instance globale partagée (singleton léger)
|
||||
_SHARED_HELPER: Optional[UIAHelper] = None
|
||||
|
||||
|
||||
def get_shared_helper() -> UIAHelper:
|
||||
"""Retourner une instance partagée de UIAHelper."""
|
||||
global _SHARED_HELPER
|
||||
if _SHARED_HELPER is None:
|
||||
_SHARED_HELPER = UIAHelper()
|
||||
return _SHARED_HELPER
|
||||
@@ -319,7 +319,22 @@ class AgentV1:
|
||||
if img_hash != self._last_heartbeat_hash:
|
||||
self._last_heartbeat_hash = img_hash
|
||||
self.streamer.push_image(full_path, f"heartbeat_{int(time.time())}")
|
||||
self.streamer.push_event({"type": "heartbeat", "image": full_path, "timestamp": time.time(), "machine_id": self.machine_id})
|
||||
heartbeat_event = {
|
||||
"type": "heartbeat",
|
||||
"image": full_path,
|
||||
"timestamp": time.time(),
|
||||
"machine_id": self.machine_id,
|
||||
}
|
||||
# QW1 — enrichissement multi-écrans (monitor_index + monitors_geometry)
|
||||
# Additif, fallback gracieux : sans cet enrichissement, le serveur
|
||||
# ne reçoit l'info qu'au moment des clics, donc QW1 ne s'active
|
||||
# pas en continu sur poste Windows multi-écrans.
|
||||
try:
|
||||
from .vision.capturer import _enrich_with_monitor_info
|
||||
_enrich_with_monitor_info(heartbeat_event)
|
||||
except Exception as e:
|
||||
logger.debug("QW1 enrichissement heartbeat échoué: %s", e)
|
||||
self.streamer.push_event(heartbeat_event)
|
||||
except Exception as e:
|
||||
logger.error(f"Heartbeat error: {e}")
|
||||
time.sleep(5)
|
||||
|
||||
@@ -8,12 +8,73 @@ import os
|
||||
import time
|
||||
import logging
|
||||
import hashlib
|
||||
from typing import Any, Dict, List, Optional
|
||||
from PIL import Image, ImageFilter, ImageStat
|
||||
import mss
|
||||
from ..config import TARGETED_CROP_SIZE, SCREENSHOT_QUALITY
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# QW1 — détection multi-écrans (fallback gracieux si screeninfo absent)
|
||||
try:
|
||||
from screeninfo import get_monitors as _screeninfo_get_monitors
|
||||
_SCREENINFO_AVAILABLE = True
|
||||
except ImportError:
|
||||
_SCREENINFO_AVAILABLE = False
|
||||
|
||||
|
||||
def _get_monitors_geometry() -> List[Dict[str, Any]]:
|
||||
"""Retourne la liste des monitors physiques avec leurs offsets.
|
||||
|
||||
Returns:
|
||||
List[dict] : [{idx, x, y, w, h, primary}, ...]. Vide si screeninfo
|
||||
indisponible (le serveur tombera sur fallback composite).
|
||||
"""
|
||||
if not _SCREENINFO_AVAILABLE:
|
||||
return []
|
||||
try:
|
||||
monitors = _screeninfo_get_monitors()
|
||||
return [
|
||||
{
|
||||
"idx": i,
|
||||
"x": int(m.x),
|
||||
"y": int(m.y),
|
||||
"w": int(m.width),
|
||||
"h": int(m.height),
|
||||
"primary": bool(getattr(m, "is_primary", False)),
|
||||
}
|
||||
for i, m in enumerate(monitors)
|
||||
]
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
def _get_active_monitor_index() -> Optional[int]:
|
||||
"""Retourne l'index logique du monitor où se trouve le curseur (focus actif).
|
||||
|
||||
Returns:
|
||||
int ou None si indéterminable.
|
||||
"""
|
||||
if not _SCREENINFO_AVAILABLE:
|
||||
return None
|
||||
try:
|
||||
import pyautogui # import paresseux : évite la dépendance dure
|
||||
cx, cy = pyautogui.position()
|
||||
for i, m in enumerate(_screeninfo_get_monitors()):
|
||||
if m.x <= cx < m.x + m.width and m.y <= cy < m.y + m.height:
|
||||
return i
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def _enrich_with_monitor_info(payload: dict) -> dict:
|
||||
"""Ajoute monitor_index et monitors_geometry au payload (in-place + return)."""
|
||||
if isinstance(payload, dict):
|
||||
payload["monitor_index"] = _get_active_monitor_index()
|
||||
payload["monitors_geometry"] = _get_monitors_geometry()
|
||||
return payload
|
||||
|
||||
class VisionCapturer:
|
||||
def __init__(self, session_dir: str):
|
||||
self.session_dir = session_dir
|
||||
@@ -72,7 +133,12 @@ class VisionCapturer:
|
||||
# Mise à jour du hash pour le prochain heartbeat
|
||||
self.last_img_hash = self._compute_quick_hash(img)
|
||||
|
||||
return {"full": full_path, "crop": crop_path}
|
||||
result = {"full": full_path, "crop": crop_path}
|
||||
|
||||
# QW1 — enrichissement multi-écrans (additif, fallback gracieux)
|
||||
_enrich_with_monitor_info(result)
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"Erreur Dual Capture: {e}")
|
||||
return {}
|
||||
|
||||
@@ -3,7 +3,9 @@ mss>=9.0.1 # Capture d'écran haute performance
|
||||
pynput>=1.7.7 # Clavier/Souris Cross-plateforme
|
||||
Pillow>=10.0.0 # Crops et processing image
|
||||
requests>=2.31.0 # Streaming réseau
|
||||
python-socketio[client]>=5.10,<6.0 # Bus feedback Léa 'lea:*' (compat Flask-SocketIO 5.3.x serveur)
|
||||
psutil>=5.9.0 # Monitoring CPU/RAM
|
||||
screeninfo>=0.8 # QW1 — détection des monitors physiques + offsets
|
||||
pystray>=0.19.5 # Icône Tray UI
|
||||
plyer>=2.1.0 # Notifications toast natives (remplace PyQt5)
|
||||
|
||||
|
||||
@@ -2,6 +2,17 @@
|
||||
"""
|
||||
deploy_windows.py — Script de packaging du client Windows pour Agent V1.
|
||||
|
||||
⚠️ OBSOLÈTE (avril 2026)
|
||||
Le build officiel du package Windows passe par ``deploy/build_package.sh``
|
||||
(à la racine du repo) qui lit directement ``agent_v0/agent_v1/`` et évite
|
||||
les clones intermédiaires. Ce script est conservé pour référence mais son
|
||||
manifeste ``FILE_MANIFEST`` est incomplet : il n'inclut pas
|
||||
``system_dialog_guard.py``, ``persistent_buffer.py``, ``recovery.py``,
|
||||
``uia_helper.py``, ``grounding.py``, ``policy.py``,
|
||||
``vision/blur_sensitive.py``, ``vision/system_info.py``,
|
||||
``ui/chat_window.py``, ``ui/capture_server.py``, ``ui/shared_state.py``.
|
||||
Ne PAS l'utiliser pour un packaging réel.
|
||||
|
||||
Copie uniquement les fichiers nécessaires au fonctionnement de l'agent
|
||||
sur le PC cible (Windows), sans le serveur ni les dépendances lourdes.
|
||||
|
||||
|
||||
@@ -21,36 +21,33 @@ from typing import Any, Callable, Dict, List, Optional
|
||||
logger = logging.getLogger("lea_ui.server_client")
|
||||
|
||||
|
||||
def _get_server_host() -> str:
|
||||
"""Recuperer l'adresse du serveur Linux.
|
||||
def _get_server_url() -> str:
|
||||
"""Recuperer l'URL du serveur RPA (avec /api/v1).
|
||||
|
||||
Ordre de resolution :
|
||||
1. Variable d'environnement RPA_SERVER_HOST
|
||||
2. Fichier de config agent_config.json (cle "server_host")
|
||||
3. Fallback localhost
|
||||
1. Import depuis agent_v1.config (source de verite unique)
|
||||
2. Variable d'environnement RPA_SERVER_URL
|
||||
3. Fallback http://localhost:5005/api/v1
|
||||
"""
|
||||
# 1. Variable d'environnement
|
||||
host = os.environ.get("RPA_SERVER_HOST", "").strip()
|
||||
if host:
|
||||
return host
|
||||
# 1. Import depuis config.py (source de verite)
|
||||
try:
|
||||
from agent_v1.config import SERVER_URL
|
||||
return SERVER_URL
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# 2. Fichier de config
|
||||
config_paths = [
|
||||
os.path.join(os.path.dirname(__file__), "..", "agent_config.json"),
|
||||
os.path.join(os.path.dirname(__file__), "..", "..", "agent_config.json"),
|
||||
]
|
||||
for config_path in config_paths:
|
||||
try:
|
||||
with open(config_path, "r", encoding="utf-8") as f:
|
||||
cfg = json.load(f)
|
||||
host = cfg.get("server_host", "").strip()
|
||||
if host:
|
||||
return host
|
||||
except (OSError, json.JSONDecodeError):
|
||||
continue
|
||||
# 2. Variable d'environnement directe
|
||||
url = os.environ.get("RPA_SERVER_URL", "").strip().rstrip("/")
|
||||
if url:
|
||||
return url
|
||||
|
||||
# 3. Fallback
|
||||
return "localhost"
|
||||
return "http://localhost:5005/api/v1"
|
||||
|
||||
|
||||
def _get_server_base(server_url: str) -> str:
|
||||
"""Extraire la base URL (sans /api/v1) pour les routes racine (/health)."""
|
||||
return server_url.rsplit("/api/v1", 1)[0]
|
||||
|
||||
|
||||
class LeaServerClient:
|
||||
@@ -67,12 +64,23 @@ class LeaServerClient:
|
||||
chat_port: int = 5004,
|
||||
stream_port: int = 5005,
|
||||
) -> None:
|
||||
self._host = server_host or _get_server_host()
|
||||
# URL unifiée : SERVER_URL contient TOUJOURS /api/v1 (convention INC-1).
|
||||
# _stream_url = URL avec /api/v1 (pour les routes API)
|
||||
# _stream_base = URL sans /api/v1 (pour /health uniquement)
|
||||
self._stream_url = _get_server_url()
|
||||
self._stream_base = _get_server_base(self._stream_url)
|
||||
|
||||
# Extraire le host depuis l'URL pour le chat et pour l'affichage
|
||||
try:
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(self._stream_base)
|
||||
self._host = parsed.hostname or "localhost"
|
||||
except Exception:
|
||||
self._host = server_host or "localhost"
|
||||
|
||||
self._chat_port = chat_port
|
||||
self._stream_port = stream_port
|
||||
|
||||
self._chat_base = f"http://{self._host}:{self._chat_port}"
|
||||
self._stream_base = f"http://{self._host}:{self._stream_port}"
|
||||
|
||||
# Etat de connexion
|
||||
self._connected = False
|
||||
@@ -95,8 +103,8 @@ class LeaServerClient:
|
||||
self._api_token = os.environ.get("RPA_API_TOKEN", "")
|
||||
|
||||
logger.info(
|
||||
"LeaServerClient initialise : chat=%s, stream=%s",
|
||||
self._chat_base, self._stream_base,
|
||||
"LeaServerClient initialise : chat=%s, stream_url=%s, stream_base=%s",
|
||||
self._chat_base, self._stream_url, self._stream_base,
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -146,7 +154,11 @@ class LeaServerClient:
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def check_connection(self) -> bool:
|
||||
"""Tester la connexion au serveur streaming (port 5005)."""
|
||||
"""Tester la connexion au serveur streaming (port 5005).
|
||||
|
||||
Le health check utilise _stream_base (sans /api/v1) car la route
|
||||
/health est a la racine du serveur FastAPI, pas sous /api/v1.
|
||||
"""
|
||||
try:
|
||||
import requests
|
||||
resp = requests.get(
|
||||
@@ -219,7 +231,7 @@ class LeaServerClient:
|
||||
import requests
|
||||
headers = self._auth_headers()
|
||||
resp = requests.get(
|
||||
f"{self._stream_base}/api/v1/traces/stream/workflows",
|
||||
f"{self._stream_url}/traces/stream/workflows",
|
||||
headers=headers,
|
||||
timeout=10,
|
||||
)
|
||||
@@ -276,7 +288,7 @@ class LeaServerClient:
|
||||
while self._polling:
|
||||
try:
|
||||
resp = req_lib.get(
|
||||
f"{self._stream_base}/api/v1/traces/stream/replay/next",
|
||||
f"{self._stream_url}/traces/stream/replay/next",
|
||||
params={"session_id": self._poll_session_id},
|
||||
headers=self._auth_headers(),
|
||||
timeout=5,
|
||||
@@ -310,7 +322,7 @@ class LeaServerClient:
|
||||
try:
|
||||
import requests
|
||||
resp = requests.get(
|
||||
f"{self._stream_base}/api/v1/traces/stream/replays",
|
||||
f"{self._stream_url}/traces/stream/replays",
|
||||
headers=self._auth_headers(),
|
||||
timeout=5,
|
||||
)
|
||||
@@ -326,6 +338,50 @@ class LeaServerClient:
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def resume_replay(self, replay_id: str) -> bool:
|
||||
"""Reprendre un replay en pause supervisée via HTTP direct.
|
||||
|
||||
Fallback du chemin SocketIO (`lea:replay_resume` → agent_chat)
|
||||
utilisé quand le bus feedback est déconnecté au moment où
|
||||
l'utilisateur clique « Continuer » dans la bulle paused.
|
||||
|
||||
Retourne True si le serveur streaming a accepté la reprise.
|
||||
"""
|
||||
if not replay_id:
|
||||
return False
|
||||
try:
|
||||
import requests
|
||||
resp = requests.post(
|
||||
f"{self._stream_url}/traces/stream/replay/{replay_id}/resume",
|
||||
headers=self._auth_headers(),
|
||||
timeout=10,
|
||||
)
|
||||
return bool(resp.ok)
|
||||
except Exception:
|
||||
logger.debug("resume_replay HTTP silenced", exc_info=True)
|
||||
return False
|
||||
|
||||
def abort_replay(self, replay_id: str) -> bool:
|
||||
"""Annuler un replay en pause supervisée via HTTP direct.
|
||||
|
||||
Symétrique de ``resume_replay`` : fallback du chemin SocketIO
|
||||
(`lea:replay_abort`) quand le bus feedback est déconnecté.
|
||||
POSTe sur ``/replay/{id}/cancel`` côté serveur streaming.
|
||||
"""
|
||||
if not replay_id:
|
||||
return False
|
||||
try:
|
||||
import requests
|
||||
resp = requests.post(
|
||||
f"{self._stream_url}/traces/stream/replay/{replay_id}/cancel",
|
||||
headers=self._auth_headers(),
|
||||
timeout=10,
|
||||
)
|
||||
return bool(resp.ok)
|
||||
except Exception:
|
||||
logger.debug("abort_replay HTTP silenced", exc_info=True)
|
||||
return False
|
||||
|
||||
def report_action_result(
|
||||
self,
|
||||
session_id: str,
|
||||
@@ -338,7 +394,7 @@ class LeaServerClient:
|
||||
try:
|
||||
import requests
|
||||
requests.post(
|
||||
f"{self._stream_base}/api/v1/traces/stream/replay/result",
|
||||
f"{self._stream_url}/traces/stream/replay/result",
|
||||
json={
|
||||
"session_id": session_id,
|
||||
"action_id": action_id,
|
||||
|
||||
@@ -1,12 +1,97 @@
|
||||
# run_agent_v1.py
|
||||
import sys
|
||||
import os
|
||||
import atexit
|
||||
|
||||
# Ajout du répertoire courant au PYTHONPATH pour permettre les imports de modules
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
if current_dir not in sys.path:
|
||||
sys.path.append(current_dir)
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Verrou PID — empêche le lancement de plusieurs instances
|
||||
# Même si Lea.bat est double-cliqué ou lancé deux fois,
|
||||
# un seul agent tourne à la fois (defense-in-depth).
|
||||
# ---------------------------------------------------------------
|
||||
LOCK_FILE = os.path.join(current_dir, "lea_agent.lock")
|
||||
|
||||
|
||||
def _pid_is_alive(pid: int) -> bool:
|
||||
"""Vérifie si un processus avec ce PID existe encore (Windows + Unix)."""
|
||||
if sys.platform == "win32":
|
||||
try:
|
||||
import ctypes
|
||||
kernel32 = ctypes.windll.kernel32 # type: ignore[attr-defined]
|
||||
PROCESS_QUERY_LIMITED_INFORMATION = 0x1000
|
||||
handle = kernel32.OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, False, pid)
|
||||
if handle:
|
||||
kernel32.CloseHandle(handle)
|
||||
return True
|
||||
return False
|
||||
except Exception:
|
||||
# Fallback : tasklist
|
||||
try:
|
||||
import subprocess
|
||||
result = subprocess.run(
|
||||
["tasklist", "/FI", f"PID eq {pid}", "/NH"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
return str(pid) in result.stdout
|
||||
except Exception:
|
||||
return False
|
||||
else:
|
||||
# Unix/Linux — os.kill(pid, 0) ne tue pas le process
|
||||
try:
|
||||
os.kill(pid, 0)
|
||||
return True
|
||||
except (OSError, ProcessLookupError):
|
||||
return False
|
||||
|
||||
|
||||
def _acquire_lock() -> bool:
|
||||
"""Tente d'acquérir le verrou PID. Retourne False si une autre instance tourne."""
|
||||
my_pid = os.getpid()
|
||||
|
||||
# Lire le PID existant
|
||||
if os.path.isfile(LOCK_FILE):
|
||||
try:
|
||||
with open(LOCK_FILE, "r", encoding="utf-8") as f:
|
||||
old_pid = int(f.read().strip())
|
||||
# Le PID dans le lock est-il encore vivant ?
|
||||
if old_pid != my_pid and _pid_is_alive(old_pid):
|
||||
return False # Une autre instance tourne déjà
|
||||
except (ValueError, OSError):
|
||||
pass # Fichier corrompu — on l'écrase
|
||||
|
||||
# Écrire notre PID
|
||||
try:
|
||||
with open(LOCK_FILE, "w", encoding="utf-8") as f:
|
||||
f.write(str(my_pid))
|
||||
except OSError:
|
||||
pass # Pas bloquant — on continue sans lock
|
||||
return True
|
||||
|
||||
|
||||
def _release_lock():
|
||||
"""Supprime le fichier lock au shutdown."""
|
||||
try:
|
||||
if os.path.isfile(LOCK_FILE):
|
||||
with open(LOCK_FILE, "r", encoding="utf-8") as f:
|
||||
stored_pid = int(f.read().strip())
|
||||
# Ne supprimer que si c'est bien NOTRE lock
|
||||
if stored_pid == os.getpid():
|
||||
os.remove(LOCK_FILE)
|
||||
except (ValueError, OSError):
|
||||
pass
|
||||
|
||||
|
||||
# Vérification du lock AVANT toute initialisation lourde
|
||||
if not _acquire_lock():
|
||||
# Une autre instance de Léa tourne déjà — on quitte silencieusement
|
||||
sys.exit(0)
|
||||
|
||||
atexit.register(_release_lock)
|
||||
|
||||
# Charger config.txt et .env comme variables d'environnement
|
||||
# (équivalent du `set` dans Lea.bat, mais fonctionne aussi sans le .bat)
|
||||
for config_file in ("config.txt", ".env"):
|
||||
@@ -32,7 +117,7 @@ logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
|
||||
)
|
||||
logging.info("=== Agent V1 démarrage — config chargée ===")
|
||||
logging.info("=== Agent V1 démarrage — config chargée (PID %d) ===", os.getpid())
|
||||
logging.info("RPA_SERVER_URL=%s", os.environ.get("RPA_SERVER_URL", "(non défini)"))
|
||||
logging.info("RPA_SERVER_HOST=%s", os.environ.get("RPA_SERVER_HOST", "(non défini)"))
|
||||
logging.info("RPA_API_TOKEN=%s", os.environ.get("RPA_API_TOKEN", "(non défini)")[:8] + "...")
|
||||
|
||||
413
agent_v0/server_v1/agent_registry.py
Normal file
413
agent_v0/server_v1/agent_registry.py
Normal file
@@ -0,0 +1,413 @@
|
||||
# agent_v0/server_v1/agent_registry.py
|
||||
"""
|
||||
Registre des agents Lea enrolles sur le parc.
|
||||
|
||||
Alimente par les endpoints /api/v1/agents/enroll et /api/v1/agents/uninstall
|
||||
que l'installeur Inno Setup (`deploy/installer/Lea.iss`) appelle a
|
||||
l'installation et a la desinstallation sur chaque poste collaborateur.
|
||||
|
||||
Stockage : SQLite simple, cohabite avec rpa_data.db dans data/databases/.
|
||||
Aucune dependance GPU/LLM — ce module doit rester leger (juste sqlite3 +
|
||||
stdlib) pour pouvoir etre importe par le serveur HTTP.
|
||||
|
||||
Schema de la table `enrolled_agents` :
|
||||
id INTEGER PK AUTOINCREMENT
|
||||
machine_id TEXT UNIQUE NOT NULL — identifiant genere par l'installeur
|
||||
user_name TEXT — nom affichage collaborateur
|
||||
user_email TEXT
|
||||
user_id TEXT — identifiant metier (ex: AIVA-001)
|
||||
hostname TEXT
|
||||
os_info TEXT
|
||||
version TEXT — version du client Lea
|
||||
status TEXT DEFAULT 'active' — 'active' | 'uninstalled'
|
||||
enrolled_at TEXT NOT NULL — ISO 8601 UTC
|
||||
last_seen_at TEXT — ISO 8601 UTC (heartbeat / stream)
|
||||
uninstalled_at TEXT
|
||||
uninstall_reason TEXT
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import hmac
|
||||
import logging
|
||||
import os
|
||||
import secrets
|
||||
import sqlite3
|
||||
import threading
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Verrou global : SQLite tolere plusieurs threads mais on serialise
|
||||
# les ecritures pour eviter les races sur _init_db + upserts concurrents.
|
||||
_DB_LOCK = threading.Lock()
|
||||
|
||||
|
||||
def _utc_now_iso() -> str:
|
||||
"""Horodatage ISO 8601 UTC (compatible toutes les autres tables)."""
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def _new_token() -> Tuple[str, str]:
|
||||
"""WP-C : genere un token poste (clair) et son empreinte SHA-256.
|
||||
|
||||
Le clair est retourne UNE seule fois a l'appelant (resultat de enroll) ; seul
|
||||
le hash est persiste dans `token_hash`. Le clair n'est jamais journalise ni
|
||||
stocke. L'auth runtime reste inchangee (aucun branchement ici sur la
|
||||
verification de token cote api_stream).
|
||||
"""
|
||||
clear = secrets.token_hex(32)
|
||||
token_hash = hashlib.sha256(clear.encode("utf-8")).hexdigest()
|
||||
return clear, token_hash
|
||||
|
||||
|
||||
def _fleet_enroll_locked() -> bool:
|
||||
"""WP-B : parc verrouille -> aucun NOUVEAU machine_id ne peut s'enroler.
|
||||
|
||||
Pilote par l'env `RPA_FLEET_ENROLL_LOCKED` (true/1/yes), reversible (relu a
|
||||
chaque appel). Ferme le contournement « poste revoque + nouveau machine_id +
|
||||
token global » : les machines deja connues gardent leur comportement, seul
|
||||
l'enrolement d'un machine_id inconnu est refuse quand le parc est verrouille.
|
||||
"""
|
||||
return os.getenv("RPA_FLEET_ENROLL_LOCKED", "").strip().lower() in ("1", "true", "yes")
|
||||
|
||||
|
||||
class AgentRegistry:
|
||||
"""Gestion CRUD des agents enrolles (SQLite)."""
|
||||
|
||||
def __init__(self, db_path: str | Path = "data/databases/rpa_data.db"):
|
||||
self.db_path = Path(db_path)
|
||||
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
self._init_db()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Infra SQLite
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _connect(self) -> sqlite3.Connection:
|
||||
# check_same_thread=False : on protege nous-memes via _DB_LOCK,
|
||||
# indispensable car FastAPI appelle les endpoints sur threads
|
||||
# differents (thread pool).
|
||||
conn = sqlite3.connect(str(self.db_path), check_same_thread=False)
|
||||
conn.row_factory = sqlite3.Row
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
conn.execute("PRAGMA foreign_keys=ON")
|
||||
return conn
|
||||
|
||||
def _init_db(self) -> None:
|
||||
"""Cree la table et ses index si absents (idempotent)."""
|
||||
with _DB_LOCK, self._connect() as conn:
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS enrolled_agents (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
machine_id TEXT NOT NULL UNIQUE,
|
||||
user_name TEXT,
|
||||
user_email TEXT,
|
||||
user_id TEXT,
|
||||
hostname TEXT,
|
||||
os_info TEXT,
|
||||
version TEXT,
|
||||
status TEXT NOT NULL DEFAULT 'active',
|
||||
enrolled_at TEXT NOT NULL,
|
||||
last_seen_at TEXT,
|
||||
uninstalled_at TEXT,
|
||||
uninstall_reason TEXT
|
||||
)
|
||||
"""
|
||||
)
|
||||
conn.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_enrolled_agents_status "
|
||||
"ON enrolled_agents(status)"
|
||||
)
|
||||
conn.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_enrolled_agents_machine "
|
||||
"ON enrolled_agents(machine_id)"
|
||||
)
|
||||
# WP-C Patch 1 : colonnes « token par poste », migration additive
|
||||
# idempotente. Inertes tant que l'auth par poste n'est pas branchée
|
||||
# (patchs WP-C ultérieurs). Voir DETTE-015.
|
||||
existing_cols = {
|
||||
row[1]
|
||||
for row in conn.execute(
|
||||
"PRAGMA table_info(enrolled_agents)"
|
||||
).fetchall()
|
||||
}
|
||||
for col in ("token_hash", "token_issued_at"):
|
||||
if col not in existing_cols:
|
||||
conn.execute(
|
||||
f"ALTER TABLE enrolled_agents ADD COLUMN {col} TEXT"
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Lecture
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def get(self, machine_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""Recupere un agent par machine_id (ou None)."""
|
||||
with _DB_LOCK, self._connect() as conn:
|
||||
row = conn.execute(
|
||||
"SELECT * FROM enrolled_agents WHERE machine_id = ?",
|
||||
(machine_id,),
|
||||
).fetchone()
|
||||
return dict(row) if row else None
|
||||
|
||||
def list_by_status(self, status: str) -> List[Dict[str, Any]]:
|
||||
"""Liste les agents par statut ('active' | 'uninstalled')."""
|
||||
with _DB_LOCK, self._connect() as conn:
|
||||
rows = conn.execute(
|
||||
"SELECT * FROM enrolled_agents WHERE status = ? "
|
||||
"ORDER BY enrolled_at DESC",
|
||||
(status,),
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
def count_by_status(self, status: str) -> int:
|
||||
with _DB_LOCK, self._connect() as conn:
|
||||
row = conn.execute(
|
||||
"SELECT COUNT(*) AS n FROM enrolled_agents WHERE status = ?",
|
||||
(status,),
|
||||
).fetchone()
|
||||
return int(row["n"]) if row else 0
|
||||
|
||||
def verify_token(self, token: str | None) -> Optional[str]:
|
||||
"""WP-C : verifie un token poste, retourne le machine_id actif ou None.
|
||||
|
||||
Compare le SHA-256 du token presente aux `token_hash` des agents
|
||||
`status='active'` via `hmac.compare_digest` (comparaison a temps
|
||||
constant, evite les fuites par timing). Un agent desinstalle/revoque
|
||||
n'est pas 'active' donc refuse ; la rotation a l'enrolement invalide
|
||||
l'ancien token.
|
||||
|
||||
INERTE : non branchee sur l'auth runtime (le branchement derriere flag
|
||||
sera le Patch 4). Aucun appelant runtime a ce stade.
|
||||
"""
|
||||
if not token:
|
||||
return None
|
||||
token_hash = hashlib.sha256(token.encode("utf-8")).hexdigest()
|
||||
with _DB_LOCK, self._connect() as conn:
|
||||
rows = conn.execute(
|
||||
"SELECT machine_id, token_hash FROM enrolled_agents "
|
||||
"WHERE status = 'active' AND token_hash IS NOT NULL"
|
||||
).fetchall()
|
||||
for row in rows:
|
||||
if hmac.compare_digest(str(row["token_hash"]), token_hash):
|
||||
return str(row["machine_id"])
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Ecriture
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def enroll(
|
||||
self,
|
||||
*,
|
||||
machine_id: str,
|
||||
user_name: str | None = None,
|
||||
user_email: str | None = None,
|
||||
user_id: str | None = None,
|
||||
hostname: str | None = None,
|
||||
os_info: str | None = None,
|
||||
version: str | None = None,
|
||||
allow_reactivate: bool = True,
|
||||
) -> Dict[str, Any]:
|
||||
"""Enregistre un nouvel agent ou reactive un agent desinstalle.
|
||||
|
||||
Returns:
|
||||
dict avec clefs {"created": bool, "reactivated": bool, "agent": row}
|
||||
|
||||
Raises:
|
||||
ValueError: si machine_id est vide.
|
||||
AgentAlreadyEnrolledError: si deja actif (status=active).
|
||||
"""
|
||||
if not machine_id or not machine_id.strip():
|
||||
raise ValueError("machine_id est obligatoire")
|
||||
machine_id = machine_id.strip()
|
||||
|
||||
now = _utc_now_iso()
|
||||
|
||||
with _DB_LOCK, self._connect() as conn:
|
||||
existing = conn.execute(
|
||||
"SELECT * FROM enrolled_agents WHERE machine_id = ?",
|
||||
(machine_id,),
|
||||
).fetchone()
|
||||
|
||||
if existing is not None:
|
||||
if existing["status"] == "active":
|
||||
# Deja enrolle et actif -> conflit explicit
|
||||
raise AgentAlreadyEnrolledError(dict(existing))
|
||||
|
||||
if existing["uninstall_reason"] == "admin_revoke":
|
||||
raise AgentRevokedError(dict(existing))
|
||||
|
||||
# Agent desinstalle : reactivation si autorise (defaut)
|
||||
if not allow_reactivate:
|
||||
raise AgentAlreadyEnrolledError(dict(existing))
|
||||
|
||||
# WP-C : rotation du token a chaque (re)enrolement.
|
||||
token, token_hash = _new_token()
|
||||
conn.execute(
|
||||
"""
|
||||
UPDATE enrolled_agents
|
||||
SET user_name = COALESCE(?, user_name),
|
||||
user_email = COALESCE(?, user_email),
|
||||
user_id = COALESCE(?, user_id),
|
||||
hostname = COALESCE(?, hostname),
|
||||
os_info = COALESCE(?, os_info),
|
||||
version = COALESCE(?, version),
|
||||
status = 'active',
|
||||
enrolled_at = ?,
|
||||
last_seen_at = ?,
|
||||
uninstalled_at = NULL,
|
||||
uninstall_reason = NULL,
|
||||
token_hash = ?,
|
||||
token_issued_at = ?
|
||||
WHERE machine_id = ?
|
||||
""",
|
||||
(
|
||||
user_name, user_email, user_id,
|
||||
hostname, os_info, version,
|
||||
now, now,
|
||||
token_hash, now,
|
||||
machine_id,
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
row = conn.execute(
|
||||
"SELECT * FROM enrolled_agents WHERE machine_id = ?",
|
||||
(machine_id,),
|
||||
).fetchone()
|
||||
return {
|
||||
"created": False,
|
||||
"reactivated": True,
|
||||
"agent": dict(row),
|
||||
"token": token,
|
||||
}
|
||||
|
||||
# Nouvelle inscription — WP-B : refusee si le parc est verrouille
|
||||
if _fleet_enroll_locked():
|
||||
raise FleetEnrollLockedError(machine_id)
|
||||
# WP-C : token poste genere a la creation.
|
||||
token, token_hash = _new_token()
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO enrolled_agents (
|
||||
machine_id, user_name, user_email, user_id,
|
||||
hostname, os_info, version,
|
||||
status, enrolled_at, last_seen_at,
|
||||
token_hash, token_issued_at
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, 'active', ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
machine_id, user_name, user_email, user_id,
|
||||
hostname, os_info, version,
|
||||
now, now,
|
||||
token_hash, now,
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
row = conn.execute(
|
||||
"SELECT * FROM enrolled_agents WHERE machine_id = ?",
|
||||
(machine_id,),
|
||||
).fetchone()
|
||||
return {
|
||||
"created": True,
|
||||
"reactivated": False,
|
||||
"agent": dict(row),
|
||||
"token": token,
|
||||
}
|
||||
|
||||
def uninstall(
|
||||
self,
|
||||
*,
|
||||
machine_id: str,
|
||||
reason: str | None = None,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Marque un agent comme desinstalle (soft delete).
|
||||
|
||||
Returns:
|
||||
Le row mis a jour, ou None si l'agent n'existe pas.
|
||||
"""
|
||||
if not machine_id or not machine_id.strip():
|
||||
raise ValueError("machine_id est obligatoire")
|
||||
machine_id = machine_id.strip()
|
||||
|
||||
now = _utc_now_iso()
|
||||
with _DB_LOCK, self._connect() as conn:
|
||||
existing = conn.execute(
|
||||
"SELECT * FROM enrolled_agents WHERE machine_id = ?",
|
||||
(machine_id,),
|
||||
).fetchone()
|
||||
if existing is None:
|
||||
return None
|
||||
|
||||
conn.execute(
|
||||
"""
|
||||
UPDATE enrolled_agents
|
||||
SET status = 'uninstalled',
|
||||
uninstalled_at = ?,
|
||||
uninstall_reason = ?
|
||||
WHERE machine_id = ?
|
||||
""",
|
||||
(now, reason, machine_id),
|
||||
)
|
||||
conn.commit()
|
||||
row = conn.execute(
|
||||
"SELECT * FROM enrolled_agents WHERE machine_id = ?",
|
||||
(machine_id,),
|
||||
).fetchone()
|
||||
return dict(row)
|
||||
|
||||
def touch_last_seen(self, machine_id: str) -> None:
|
||||
"""Met a jour last_seen_at (appel depuis le stream / heartbeat).
|
||||
|
||||
Silencieux si l'agent est inconnu (evite les erreurs sur vieux clients).
|
||||
Ne reactive jamais un agent desinstalle/revoque.
|
||||
"""
|
||||
if not machine_id:
|
||||
return
|
||||
now = _utc_now_iso()
|
||||
with _DB_LOCK, self._connect() as conn:
|
||||
conn.execute(
|
||||
"UPDATE enrolled_agents SET last_seen_at = ? "
|
||||
"WHERE machine_id = ? AND status = 'active'",
|
||||
(now, machine_id),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
class AgentAlreadyEnrolledError(Exception):
|
||||
"""Levee si on tente d'enrouler une machine deja active."""
|
||||
|
||||
def __init__(self, existing_row: Dict[str, Any]):
|
||||
self.existing = existing_row
|
||||
super().__init__(
|
||||
f"machine_id={existing_row.get('machine_id')} deja enrole "
|
||||
f"(status={existing_row.get('status')})"
|
||||
)
|
||||
|
||||
|
||||
class AgentRevokedError(Exception):
|
||||
"""Levee si un administrateur a revoque ce machine_id."""
|
||||
|
||||
def __init__(self, existing_row: Dict[str, Any]):
|
||||
self.existing = existing_row
|
||||
super().__init__(
|
||||
f"machine_id={existing_row.get('machine_id')} revoque "
|
||||
f"(reason={existing_row.get('uninstall_reason')})"
|
||||
)
|
||||
|
||||
|
||||
class FleetEnrollLockedError(Exception):
|
||||
"""Levee si le parc est verrouille (RPA_FLEET_ENROLL_LOCKED) et qu'on tente
|
||||
d'enroler un nouveau machine_id inconnu (WP-B)."""
|
||||
|
||||
def __init__(self, machine_id: str):
|
||||
self.machine_id = machine_id
|
||||
super().__init__(
|
||||
f"enrolement refuse : parc verrouille (RPA_FLEET_ENROLL_LOCKED), "
|
||||
f"machine_id={machine_id} inconnu"
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
393
agent_v0/server_v1/audit_trail.py
Normal file
393
agent_v0/server_v1/audit_trail.py
Normal file
@@ -0,0 +1,393 @@
|
||||
# agent_v0/server_v1/audit_trail.py
|
||||
"""
|
||||
Module Audit Trail — traçabilité complète des actions RPA.
|
||||
|
||||
Responsabilité : "Chaque action exécutée par Léa est tracée, datée, attribuée."
|
||||
|
||||
En milieu hospitalier (codage CIM-10 via DPI), la traçabilité est une obligation
|
||||
légale. Ce module enregistre chaque action avec :
|
||||
- L'identité du TIM (Technicien d'Information Médicale) superviseur
|
||||
- Le mode d'exécution (autonome, assisté, shadow)
|
||||
- Le résultat détaillé (succès, échec, correction)
|
||||
- L'horodatage ISO 8601
|
||||
|
||||
Format de stockage : fichiers JSONL datés dans data/audit/ (un par jour).
|
||||
Aucune dépendance externe (stdlib + dataclasses uniquement).
|
||||
|
||||
Usage :
|
||||
audit = AuditTrail()
|
||||
audit.record(AuditEntry(
|
||||
session_id="sess_abc",
|
||||
action_id="act_001",
|
||||
user_id="tim_dupont",
|
||||
user_name="Marie Dupont",
|
||||
...
|
||||
))
|
||||
entries = audit.query(user_id="tim_dupont", date_from="2026-04-01")
|
||||
csv_data = audit.export_csv(date_from="2026-04-01", date_to="2026-04-06")
|
||||
summary = audit.get_summary("2026-04-05")
|
||||
"""
|
||||
|
||||
import csv
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
from dataclasses import dataclass, asdict, fields
|
||||
from datetime import datetime, date, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Répertoire par défaut pour le stockage des fichiers d'audit
|
||||
_DEFAULT_AUDIT_DIR = os.environ.get("RPA_AUDIT_DIR", "data/audit")
|
||||
|
||||
|
||||
@dataclass
|
||||
class AuditEntry:
|
||||
"""Entrée d'audit — un événement tracé dans le système."""
|
||||
|
||||
# Horodatage ISO 8601 (ex: 2026-04-05T14:23:01.456789)
|
||||
timestamp: str = ""
|
||||
|
||||
# Identifiants de session et d'action
|
||||
session_id: str = ""
|
||||
action_id: str = ""
|
||||
|
||||
# Identité de l'utilisateur superviseur
|
||||
user_id: str = "" # Identifiant du TIM (login Windows ou configuré)
|
||||
user_name: str = "" # Nom affiché (ex: "Marie Dupont")
|
||||
machine_id: str = "" # ID du poste client (hostname ou configuré)
|
||||
|
||||
# Description de l'action
|
||||
action_type: str = "" # click, type, key_combo, wait, etc.
|
||||
action_detail: str = "" # Description humaine ("Clic sur 'Enregistrer' dans DxCare")
|
||||
target_app: str = "" # Application cible (DxCare, Orbis, etc.)
|
||||
|
||||
# Mode d'exécution
|
||||
execution_mode: str = "" # "autonomous", "assisted", "shadow"
|
||||
|
||||
# Résultat
|
||||
result: str = "" # "success", "failed", "skipped", "recovered"
|
||||
resolution_method: str = "" # Comment la cible a été trouvée (som_text_match, vlm_direct, etc.)
|
||||
critic_result: str = "" # Résultat de la vérification sémantique
|
||||
recovery_action: str = "" # Action corrective si échec (undo, escape, retry, none)
|
||||
|
||||
# Contexte métier
|
||||
domain: str = "" # Domaine métier (tim_codage, generic, etc.)
|
||||
workflow_id: str = "" # ID du workflow exécuté
|
||||
workflow_name: str = "" # Nom lisible du workflow
|
||||
|
||||
# Performance
|
||||
duration_ms: float = 0.0 # Durée de l'action en millisecondes
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convertir en dictionnaire sérialisable JSON."""
|
||||
return asdict(self)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "AuditEntry":
|
||||
"""Créer une entrée depuis un dictionnaire.
|
||||
|
||||
Ignore les clés inconnues pour la compatibilité future.
|
||||
"""
|
||||
known_fields = {f.name for f in fields(cls)}
|
||||
filtered = {k: v for k, v in data.items() if k in known_fields}
|
||||
return cls(**filtered)
|
||||
|
||||
|
||||
class AuditTrail:
|
||||
"""Gestionnaire de traçabilité — enregistrement et consultation des actions.
|
||||
|
||||
Stocke chaque événement dans un fichier JSONL daté (un fichier par jour).
|
||||
Thread-safe grâce à un verrou d'écriture.
|
||||
|
||||
Fichiers produits :
|
||||
data/audit/audit_2026-04-05.jsonl
|
||||
data/audit/audit_2026-04-06.jsonl
|
||||
...
|
||||
"""
|
||||
|
||||
def __init__(self, audit_dir: str = ""):
|
||||
self.audit_dir = Path(audit_dir or _DEFAULT_AUDIT_DIR)
|
||||
self.audit_dir.mkdir(parents=True, exist_ok=True)
|
||||
self._lock = threading.Lock()
|
||||
logger.info(f"Audit Trail initialisé : {self.audit_dir}")
|
||||
|
||||
def _file_for_date(self, d: date) -> Path:
|
||||
"""Chemin du fichier JSONL pour une date donnée."""
|
||||
return self.audit_dir / f"audit_{d.isoformat()}.jsonl"
|
||||
|
||||
def record(self, entry: AuditEntry) -> None:
|
||||
"""Enregistrer une entrée d'audit.
|
||||
|
||||
Ajoute un horodatage ISO 8601 si absent, puis écrit en append
|
||||
dans le fichier JSONL du jour.
|
||||
"""
|
||||
# Horodatage automatique si absent
|
||||
if not entry.timestamp:
|
||||
entry.timestamp = datetime.now().isoformat()
|
||||
|
||||
# Déterminer le fichier du jour à partir du timestamp
|
||||
try:
|
||||
entry_date = datetime.fromisoformat(entry.timestamp).date()
|
||||
except (ValueError, TypeError):
|
||||
entry_date = date.today()
|
||||
|
||||
audit_file = self._file_for_date(entry_date)
|
||||
|
||||
with self._lock:
|
||||
try:
|
||||
with open(audit_file, "a", encoding="utf-8") as f:
|
||||
f.write(json.dumps(entry.to_dict(), ensure_ascii=False) + "\n")
|
||||
except Exception as e:
|
||||
logger.error(f"Audit Trail: échec écriture {audit_file}: {e}")
|
||||
return
|
||||
|
||||
logger.debug(
|
||||
f"Audit: {entry.result} {entry.action_type} "
|
||||
f"'{entry.action_detail[:50]}' "
|
||||
f"[user={entry.user_id}] [session={entry.session_id}]"
|
||||
)
|
||||
|
||||
def _load_file(self, filepath: Path) -> List[AuditEntry]:
|
||||
"""Charger toutes les entrées d'un fichier JSONL."""
|
||||
if not filepath.is_file():
|
||||
return []
|
||||
|
||||
entries = []
|
||||
try:
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
for line_num, line in enumerate(f, 1):
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
data = json.loads(line)
|
||||
entries.append(AuditEntry.from_dict(data))
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(
|
||||
f"Audit Trail: ligne {line_num} invalide dans "
|
||||
f"{filepath.name}: {e}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Audit Trail: échec lecture {filepath}: {e}")
|
||||
|
||||
return entries
|
||||
|
||||
def _date_range(self, date_from: str = "", date_to: str = "") -> List[date]:
|
||||
"""Calculer la liste de dates entre date_from et date_to (inclus).
|
||||
|
||||
Si date_from est vide, utilise aujourd'hui.
|
||||
Si date_to est vide, utilise date_from.
|
||||
Format attendu : YYYY-MM-DD.
|
||||
"""
|
||||
if date_from:
|
||||
try:
|
||||
d_from = date.fromisoformat(date_from)
|
||||
except ValueError:
|
||||
d_from = date.today()
|
||||
else:
|
||||
d_from = date.today()
|
||||
|
||||
if date_to:
|
||||
try:
|
||||
d_to = date.fromisoformat(date_to)
|
||||
except ValueError:
|
||||
d_to = d_from
|
||||
else:
|
||||
d_to = d_from
|
||||
|
||||
# Assurer l'ordre chronologique
|
||||
if d_to < d_from:
|
||||
d_from, d_to = d_to, d_from
|
||||
|
||||
dates = []
|
||||
current = d_from
|
||||
while current <= d_to:
|
||||
dates.append(current)
|
||||
current += timedelta(days=1)
|
||||
|
||||
return dates
|
||||
|
||||
def query(
|
||||
self,
|
||||
date_from: str = "",
|
||||
date_to: str = "",
|
||||
user_id: str = "",
|
||||
session_id: str = "",
|
||||
result: str = "",
|
||||
action_type: str = "",
|
||||
workflow_id: str = "",
|
||||
domain: str = "",
|
||||
limit: int = 500,
|
||||
offset: int = 0,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Rechercher des entrées d'audit avec filtres.
|
||||
|
||||
Tous les filtres sont optionnels et combinés en AND.
|
||||
Retourne les entrées triées par timestamp décroissant (plus récentes d'abord).
|
||||
"""
|
||||
dates = self._date_range(date_from, date_to)
|
||||
all_entries: List[AuditEntry] = []
|
||||
|
||||
for d in dates:
|
||||
filepath = self._file_for_date(d)
|
||||
all_entries.extend(self._load_file(filepath))
|
||||
|
||||
# Appliquer les filtres
|
||||
filtered = []
|
||||
for entry in all_entries:
|
||||
if user_id and entry.user_id != user_id:
|
||||
continue
|
||||
if session_id and entry.session_id != session_id:
|
||||
continue
|
||||
if result and entry.result != result:
|
||||
continue
|
||||
if action_type and entry.action_type != action_type:
|
||||
continue
|
||||
if workflow_id and entry.workflow_id != workflow_id:
|
||||
continue
|
||||
if domain and entry.domain != domain:
|
||||
continue
|
||||
filtered.append(entry)
|
||||
|
||||
# Tri par timestamp décroissant (plus récent en premier)
|
||||
filtered.sort(key=lambda e: e.timestamp, reverse=True)
|
||||
|
||||
# Pagination
|
||||
paginated = filtered[offset:offset + limit]
|
||||
|
||||
return [e.to_dict() for e in paginated]
|
||||
|
||||
def get_summary(self, target_date: str = "") -> Dict[str, Any]:
|
||||
"""Résumé journalier d'une date donnée.
|
||||
|
||||
Retourne les statistiques agrégées :
|
||||
- Nombre total d'actions
|
||||
- Taux de succès
|
||||
- Répartition par utilisateur
|
||||
- Répartition par résultat
|
||||
- Répartition par type d'action
|
||||
- Répartition par workflow
|
||||
- Répartition par mode d'exécution
|
||||
"""
|
||||
if not target_date:
|
||||
target_date = date.today().isoformat()
|
||||
|
||||
try:
|
||||
d = date.fromisoformat(target_date)
|
||||
except ValueError:
|
||||
d = date.today()
|
||||
|
||||
entries = self._load_file(self._file_for_date(d))
|
||||
|
||||
if not entries:
|
||||
return {
|
||||
"date": d.isoformat(),
|
||||
"total_actions": 0,
|
||||
"success_rate": 0.0,
|
||||
"by_user": {},
|
||||
"by_result": {},
|
||||
"by_action_type": {},
|
||||
"by_workflow": {},
|
||||
"by_execution_mode": {},
|
||||
}
|
||||
|
||||
total = len(entries)
|
||||
successes = sum(1 for e in entries if e.result == "success")
|
||||
|
||||
# Agrégations
|
||||
by_user: Dict[str, Dict[str, Any]] = {}
|
||||
by_result: Dict[str, int] = {}
|
||||
by_action_type: Dict[str, int] = {}
|
||||
by_workflow: Dict[str, int] = {}
|
||||
by_execution_mode: Dict[str, int] = {}
|
||||
|
||||
for entry in entries:
|
||||
# Par utilisateur
|
||||
uid = entry.user_id or "inconnu"
|
||||
if uid not in by_user:
|
||||
by_user[uid] = {
|
||||
"user_name": entry.user_name,
|
||||
"total": 0,
|
||||
"success": 0,
|
||||
}
|
||||
by_user[uid]["total"] += 1
|
||||
if entry.result == "success":
|
||||
by_user[uid]["success"] += 1
|
||||
|
||||
# Par résultat
|
||||
r = entry.result or "inconnu"
|
||||
by_result[r] = by_result.get(r, 0) + 1
|
||||
|
||||
# Par type d'action
|
||||
at = entry.action_type or "inconnu"
|
||||
by_action_type[at] = by_action_type.get(at, 0) + 1
|
||||
|
||||
# Par workflow
|
||||
wf = entry.workflow_id or "inconnu"
|
||||
by_workflow[wf] = by_workflow.get(wf, 0) + 1
|
||||
|
||||
# Par mode d'exécution
|
||||
em = entry.execution_mode or "inconnu"
|
||||
by_execution_mode[em] = by_execution_mode.get(em, 0) + 1
|
||||
|
||||
# Calculer le taux de succès par utilisateur
|
||||
for uid, stats in by_user.items():
|
||||
stats["success_rate"] = round(
|
||||
stats["success"] / stats["total"], 3
|
||||
) if stats["total"] > 0 else 0.0
|
||||
|
||||
return {
|
||||
"date": d.isoformat(),
|
||||
"total_actions": total,
|
||||
"success_rate": round(successes / total, 3) if total > 0 else 0.0,
|
||||
"by_user": by_user,
|
||||
"by_result": by_result,
|
||||
"by_action_type": by_action_type,
|
||||
"by_workflow": by_workflow,
|
||||
"by_execution_mode": by_execution_mode,
|
||||
}
|
||||
|
||||
def export_csv(
|
||||
self,
|
||||
date_from: str = "",
|
||||
date_to: str = "",
|
||||
user_id: str = "",
|
||||
session_id: str = "",
|
||||
) -> str:
|
||||
"""Exporter les entrées d'audit en CSV.
|
||||
|
||||
Retourne une chaîne CSV complète (avec en-tête).
|
||||
Filtres optionnels par date, utilisateur, session.
|
||||
"""
|
||||
# Récupérer les entrées avec les mêmes filtres que query()
|
||||
entries = self.query(
|
||||
date_from=date_from,
|
||||
date_to=date_to,
|
||||
user_id=user_id,
|
||||
session_id=session_id,
|
||||
limit=100000, # Pas de pagination pour l'export
|
||||
)
|
||||
|
||||
if not entries:
|
||||
return ""
|
||||
|
||||
# En-têtes CSV — même ordre que le dataclass
|
||||
fieldnames = [f.name for f in fields(AuditEntry)]
|
||||
|
||||
output = io.StringIO()
|
||||
writer = csv.DictWriter(
|
||||
output,
|
||||
fieldnames=fieldnames,
|
||||
extrasaction="ignore",
|
||||
quoting=csv.QUOTE_MINIMAL,
|
||||
)
|
||||
writer.writeheader()
|
||||
for entry_dict in entries:
|
||||
writer.writerow(entry_dict)
|
||||
|
||||
return output.getvalue()
|
||||
622
agent_v0/server_v1/chat_interface.py
Normal file
622
agent_v0/server_v1/chat_interface.py
Normal file
@@ -0,0 +1,622 @@
|
||||
"""
|
||||
ChatInterface — Interface de chat conversationnelle pour Léa.
|
||||
|
||||
Permet au TIM (Technicien Information Médicale) de parler à Léa en langage
|
||||
naturel :
|
||||
- "Ouvre le Bloc-notes et écris bonjour"
|
||||
- Léa comprend (TaskPlanner) et propose un plan
|
||||
- Le TIM confirme (ou refuse)
|
||||
- Léa exécute (replay) et envoie des updates de progression
|
||||
- Historique conversationnel conservé par session
|
||||
|
||||
C'est une couche LÉGÈRE au-dessus du TaskPlanner. Toute la logique de
|
||||
compréhension reste dans TaskPlanner — ChatInterface gère uniquement
|
||||
l'état conversationnel, la confirmation et le suivi d'exécution.
|
||||
|
||||
États de la session :
|
||||
idle → en attente d'un message
|
||||
planning → TaskPlanner.understand() en cours
|
||||
awaiting_confirmation → plan prêt, attend la confirmation du TIM
|
||||
executing → replay en cours
|
||||
done → dernier tour terminé (retour à idle au prochain message)
|
||||
error → erreur interne (instruction non comprise, exception…)
|
||||
|
||||
Langue : 100% français (c'est l'interface utilisateur).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# =============================================================================
|
||||
# États
|
||||
# =============================================================================
|
||||
|
||||
STATE_IDLE = "idle"
|
||||
STATE_PLANNING = "planning"
|
||||
STATE_AWAITING_CONFIRMATION = "awaiting_confirmation"
|
||||
STATE_EXECUTING = "executing"
|
||||
STATE_DONE = "done"
|
||||
STATE_ERROR = "error"
|
||||
|
||||
VALID_STATES = {
|
||||
STATE_IDLE,
|
||||
STATE_PLANNING,
|
||||
STATE_AWAITING_CONFIRMATION,
|
||||
STATE_EXECUTING,
|
||||
STATE_DONE,
|
||||
STATE_ERROR,
|
||||
}
|
||||
|
||||
# Rôles de messages
|
||||
ROLE_USER = "user"
|
||||
ROLE_LEA = "lea"
|
||||
ROLE_SYSTEM = "system"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Message
|
||||
# =============================================================================
|
||||
|
||||
@dataclass
|
||||
class ChatMessage:
|
||||
"""Un message dans l'historique d'une conversation."""
|
||||
role: str # "user", "lea", "system"
|
||||
content: str # Texte du message
|
||||
timestamp: float = field(default_factory=time.time)
|
||||
# Données contextuelles optionnelles (plan, résultat, progression…)
|
||||
meta: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"role": self.role,
|
||||
"content": self.content,
|
||||
"timestamp": self.timestamp,
|
||||
"meta": self.meta,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ChatSession
|
||||
# =============================================================================
|
||||
|
||||
class ChatSession:
|
||||
"""Une conversation entre un utilisateur et Léa.
|
||||
|
||||
Maintient l'historique, l'état courant, et le dernier plan en attente
|
||||
de confirmation. Thread-safe (un lock par session).
|
||||
|
||||
Dépendances injectées (pour tester facilement) :
|
||||
- task_planner : instance de TaskPlanner (ou mock)
|
||||
- workflows_provider : callable () -> List[Dict] (liste des workflows)
|
||||
- replay_callback : callable (session_id, machine_id, params) -> replay_id
|
||||
- status_provider : callable (replay_id) -> Dict (pour suivre l'exécution)
|
||||
|
||||
Toutes ces dépendances sont optionnelles : ChatSession dégrade
|
||||
gracieusement (fallback) si gemma4 / replay indisponibles.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
session_id: str = "",
|
||||
task_planner: Any = None,
|
||||
workflows_provider: Optional[Callable[[], List[Dict[str, Any]]]] = None,
|
||||
replay_callback: Optional[Callable[..., str]] = None,
|
||||
status_provider: Optional[Callable[[str], Dict[str, Any]]] = None,
|
||||
machine_id: str = "default",
|
||||
):
|
||||
self.session_id = session_id or f"chat_{uuid.uuid4().hex[:12]}"
|
||||
self.machine_id = machine_id
|
||||
self.created_at = time.time()
|
||||
self.updated_at = self.created_at
|
||||
|
||||
self._task_planner = task_planner
|
||||
self._workflows_provider = workflows_provider
|
||||
self._replay_callback = replay_callback
|
||||
self._status_provider = status_provider
|
||||
|
||||
self._state: str = STATE_IDLE
|
||||
self._messages: List[ChatMessage] = []
|
||||
self._pending_plan: Any = None # TaskPlan en attente de confirmation
|
||||
self._active_replay_id: str = "" # Replay courant (si executing)
|
||||
self._last_progress: Dict[str, Any] = {}
|
||||
|
||||
self._lock = threading.RLock()
|
||||
|
||||
# Message d'accueil
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"Bonjour ! Je suis Léa. Dites-moi ce que vous voulez que je fasse.",
|
||||
meta={"welcome": True},
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Accesseurs
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
@property
|
||||
def state(self) -> str:
|
||||
with self._lock:
|
||||
return self._state
|
||||
|
||||
def get_history(self) -> List[Dict[str, Any]]:
|
||||
"""Retourne l'historique complet des messages (sérialisé)."""
|
||||
with self._lock:
|
||||
return [m.to_dict() for m in self._messages]
|
||||
|
||||
def get_snapshot(self) -> Dict[str, Any]:
|
||||
"""État complet pour l'UI (historique + état + progression)."""
|
||||
with self._lock:
|
||||
return {
|
||||
"session_id": self.session_id,
|
||||
"state": self._state,
|
||||
"machine_id": self.machine_id,
|
||||
"created_at": self.created_at,
|
||||
"updated_at": self.updated_at,
|
||||
"messages": [m.to_dict() for m in self._messages],
|
||||
"pending_plan": (
|
||||
self._pending_plan.to_dict()
|
||||
if self._pending_plan is not None
|
||||
else None
|
||||
),
|
||||
"active_replay_id": self._active_replay_id,
|
||||
"progress": dict(self._last_progress),
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# API publique
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
def send_message(self, text: str) -> Dict[str, Any]:
|
||||
"""Envoyer un message utilisateur.
|
||||
|
||||
Trois cas possibles selon l'état courant :
|
||||
1. awaiting_confirmation → c'est une réponse OUI/NON
|
||||
2. executing → on rafraîchit la progression
|
||||
3. idle/done/error → nouvelle instruction, on appelle TaskPlanner
|
||||
"""
|
||||
text = (text or "").strip()
|
||||
if not text:
|
||||
return {
|
||||
"ok": False,
|
||||
"error": "Message vide",
|
||||
"state": self._state,
|
||||
}
|
||||
|
||||
with self._lock:
|
||||
# Cas 1 : on attend une confirmation
|
||||
if self._state == STATE_AWAITING_CONFIRMATION:
|
||||
return self._handle_confirmation_reply(text)
|
||||
|
||||
# Cas 2 : en pleine exécution → message ajouté mais pas d'action
|
||||
if self._state == STATE_EXECUTING:
|
||||
self._append(ROLE_USER, text)
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"Je suis en train d'exécuter le workflow. Un instant…",
|
||||
)
|
||||
return {"ok": True, "state": self._state}
|
||||
|
||||
# Cas 3 : nouvelle instruction
|
||||
self._append(ROLE_USER, text)
|
||||
self._set_state(STATE_PLANNING)
|
||||
|
||||
# Appel TaskPlanner hors du lock (peut être lent : gemma4)
|
||||
return self._plan_and_reply(text)
|
||||
|
||||
def confirm(self, confirmed: bool = True) -> Dict[str, Any]:
|
||||
"""Confirmer (ou refuser) l'exécution du plan en attente."""
|
||||
with self._lock:
|
||||
if self._state != STATE_AWAITING_CONFIRMATION:
|
||||
return {
|
||||
"ok": False,
|
||||
"error": f"Pas de plan en attente (état={self._state})",
|
||||
"state": self._state,
|
||||
}
|
||||
|
||||
if not confirmed:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"D'accord, j'annule. Dites-moi autre chose quand vous voulez.",
|
||||
)
|
||||
self._pending_plan = None
|
||||
self._set_state(STATE_IDLE)
|
||||
return {"ok": True, "state": self._state, "confirmed": False}
|
||||
|
||||
plan = self._pending_plan
|
||||
if plan is None:
|
||||
self._set_state(STATE_IDLE)
|
||||
return {
|
||||
"ok": False,
|
||||
"error": "Aucun plan à confirmer",
|
||||
"state": self._state,
|
||||
}
|
||||
|
||||
self._set_state(STATE_EXECUTING)
|
||||
|
||||
# Exécution hors du lock
|
||||
return self._execute_plan(plan)
|
||||
|
||||
def refresh_progress(self) -> Dict[str, Any]:
|
||||
"""Rafraîchir la progression du replay en cours.
|
||||
|
||||
Appelé par le client (polling) pour obtenir les updates d'exécution.
|
||||
Si le replay est terminé, passe l'état à done.
|
||||
"""
|
||||
with self._lock:
|
||||
if self._state != STATE_EXECUTING or not self._active_replay_id:
|
||||
return {"ok": True, "state": self._state, "progress": self._last_progress}
|
||||
|
||||
replay_id = self._active_replay_id
|
||||
provider = self._status_provider
|
||||
|
||||
if provider is None:
|
||||
return {"ok": True, "state": self._state, "progress": {}}
|
||||
|
||||
try:
|
||||
status = provider(replay_id) or {}
|
||||
except Exception as e:
|
||||
logger.warning(f"ChatSession: status_provider erreur: {e}")
|
||||
status = {}
|
||||
|
||||
with self._lock:
|
||||
self._last_progress = status
|
||||
self.updated_at = time.time()
|
||||
|
||||
# Détection de fin
|
||||
replay_status = str(status.get("status", "")).lower()
|
||||
completed = status.get("completed_actions", 0)
|
||||
total = status.get("total_actions", 0)
|
||||
|
||||
if replay_status in ("done", "completed", "finished", "success"):
|
||||
summary = (
|
||||
f"Workflow terminé ! {completed}/{total} actions réussies."
|
||||
if total
|
||||
else "Workflow terminé."
|
||||
)
|
||||
self._append(ROLE_LEA, summary, meta={"progress": dict(status)})
|
||||
self._set_state(STATE_DONE)
|
||||
self._active_replay_id = ""
|
||||
elif replay_status in ("failed", "error", "aborted"):
|
||||
err = status.get("error") or status.get("message") or "Erreur inconnue"
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
f"Le workflow a échoué : {err}",
|
||||
meta={"progress": dict(status)},
|
||||
)
|
||||
self._set_state(STATE_ERROR)
|
||||
self._active_replay_id = ""
|
||||
elif replay_status == "paused_need_help":
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"Je suis bloquée sur une action, j'ai besoin d'aide…",
|
||||
meta={"progress": dict(status)},
|
||||
)
|
||||
# on reste en executing pour que le TIM puisse reprendre
|
||||
# else : toujours en cours, pas de message
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"state": self._state,
|
||||
"progress": dict(self._last_progress),
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Logique interne
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
def _plan_and_reply(self, instruction: str) -> Dict[str, Any]:
|
||||
"""Appeler TaskPlanner.understand() et produire une réponse."""
|
||||
plan = None
|
||||
error_msg = ""
|
||||
|
||||
if self._task_planner is None:
|
||||
error_msg = "Planificateur indisponible"
|
||||
else:
|
||||
try:
|
||||
workflows = []
|
||||
if self._workflows_provider is not None:
|
||||
try:
|
||||
workflows = self._workflows_provider() or []
|
||||
except Exception as e:
|
||||
logger.warning(f"ChatSession: workflows_provider erreur: {e}")
|
||||
workflows = []
|
||||
|
||||
plan = self._task_planner.understand(
|
||||
instruction=instruction,
|
||||
available_workflows=workflows,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"ChatSession: TaskPlanner.understand erreur: {e}")
|
||||
error_msg = f"Erreur de compréhension : {e}"
|
||||
|
||||
# Fallback gracieux si pas de plan / gemma4 indisponible
|
||||
if plan is None:
|
||||
with self._lock:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
f"Désolée, je n'arrive pas à comprendre pour l'instant. {error_msg}".strip(),
|
||||
meta={"error": error_msg},
|
||||
)
|
||||
self._set_state(STATE_ERROR)
|
||||
return {
|
||||
"ok": False,
|
||||
"state": self._state,
|
||||
"error": error_msg,
|
||||
}
|
||||
|
||||
# Plan non compris
|
||||
if not plan.understood:
|
||||
reason = plan.error or "je n'ai pas compris votre demande"
|
||||
with self._lock:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
(
|
||||
f"Désolée, {reason}. "
|
||||
"Pouvez-vous reformuler ? Je connais les workflows que vous m'avez appris."
|
||||
),
|
||||
meta={"plan": plan.to_dict()},
|
||||
)
|
||||
self._set_state(STATE_ERROR)
|
||||
return {
|
||||
"ok": False,
|
||||
"state": self._state,
|
||||
"plan": plan.to_dict(),
|
||||
"error": reason,
|
||||
}
|
||||
|
||||
# Plan compris → formuler la proposition
|
||||
proposal = self._format_proposal(plan)
|
||||
|
||||
with self._lock:
|
||||
self._pending_plan = plan
|
||||
self._append(ROLE_LEA, proposal, meta={"plan": plan.to_dict()})
|
||||
self._set_state(STATE_AWAITING_CONFIRMATION)
|
||||
return {
|
||||
"ok": True,
|
||||
"state": self._state,
|
||||
"plan": plan.to_dict(),
|
||||
"message": proposal,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _format_proposal(plan: Any) -> str:
|
||||
"""Formuler une proposition en français à partir d'un TaskPlan."""
|
||||
lines = []
|
||||
lines.append(f"J'ai compris : « {plan.instruction} ».")
|
||||
|
||||
if plan.workflow_name:
|
||||
conf_pct = int(round((plan.match_confidence or 0.0) * 100))
|
||||
lines.append(
|
||||
f"Je vais utiliser le workflow « {plan.workflow_name} »"
|
||||
f" (confiance {conf_pct}%)."
|
||||
)
|
||||
elif plan.mode == "free" and plan.steps:
|
||||
lines.append(
|
||||
f"Je n'ai pas de workflow enregistré pour ça, "
|
||||
f"mais j'ai planifié {len(plan.steps)} étape(s) :"
|
||||
)
|
||||
for i, step in enumerate(plan.steps[:5], 1):
|
||||
desc = step.get("description", "") if isinstance(step, dict) else str(step)
|
||||
lines.append(f" {i}. {desc}")
|
||||
if len(plan.steps) > 5:
|
||||
lines.append(f" … et {len(plan.steps) - 5} autre(s) étape(s).")
|
||||
else:
|
||||
lines.append("Je n'ai pas de plan d'action clair pour cette demande.")
|
||||
|
||||
if plan.parameters:
|
||||
params_str = ", ".join(f"{k}={v}" for k, v in plan.parameters.items())
|
||||
lines.append(f"Paramètres détectés : {params_str}.")
|
||||
|
||||
if plan.is_loop:
|
||||
src = plan.loop_source or "éléments à traiter"
|
||||
lines.append(f"Traitement en boucle sur : {src}.")
|
||||
|
||||
lines.append("")
|
||||
lines.append("Est-ce que je peux y aller ? (oui / non)")
|
||||
return "\n".join(lines)
|
||||
|
||||
def _handle_confirmation_reply(self, text: str) -> Dict[str, Any]:
|
||||
"""Interpréter un message utilisateur comme OUI/NON."""
|
||||
self._append(ROLE_USER, text)
|
||||
yes_tokens = {"oui", "yes", "ok", "y", "go", "vas-y", "allez", "allez-y", "confirme", "confirmer", "continue"}
|
||||
no_tokens = {"non", "no", "annule", "annuler", "stop", "arrête", "arrete", "abandonne", "abandonner"}
|
||||
|
||||
t = text.strip().lower().rstrip("!.?")
|
||||
|
||||
if t in yes_tokens or any(t.startswith(tok + " ") for tok in yes_tokens):
|
||||
# Déverrouiller : sortir du lock avant d'exécuter (confirm re-prend le lock)
|
||||
pass
|
||||
elif t in no_tokens or any(t.startswith(tok + " ") for tok in no_tokens):
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"D'accord, j'annule. Dites-moi autre chose quand vous voulez.",
|
||||
)
|
||||
self._pending_plan = None
|
||||
self._set_state(STATE_IDLE)
|
||||
return {"ok": True, "state": self._state, "confirmed": False}
|
||||
else:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"Je n'ai pas compris votre réponse. Répondez « oui » pour lancer ou « non » pour annuler.",
|
||||
)
|
||||
return {"ok": True, "state": self._state, "needs_clarification": True}
|
||||
|
||||
# Libérer le lock pour confirm() qui le re-prendra
|
||||
plan = self._pending_plan
|
||||
self._pending_plan = None
|
||||
self._set_state(STATE_EXECUTING)
|
||||
# Exécution hors du lock (sortie du with bloc appelant)
|
||||
# Note : _handle_confirmation_reply est appelé sous lock via send_message
|
||||
# On ne peut pas appeler _execute_plan ici sans risque de double-lock.
|
||||
# On relâche le lock via une astuce : on retourne un marqueur et send_message
|
||||
# orchestrera. Ici on appelle directement _execute_plan qui utilise RLock,
|
||||
# donc c'est safe (re-entrant).
|
||||
return self._execute_plan(plan)
|
||||
|
||||
def _execute_plan(self, plan: Any) -> Dict[str, Any]:
|
||||
"""Lancer le replay correspondant au plan."""
|
||||
if plan is None:
|
||||
with self._lock:
|
||||
self._append(ROLE_LEA, "Rien à exécuter.", meta={})
|
||||
self._set_state(STATE_IDLE)
|
||||
return {"ok": False, "state": self._state, "error": "Aucun plan"}
|
||||
|
||||
if self._replay_callback is None:
|
||||
with self._lock:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"Je ne peux pas exécuter : aucun moteur d'exécution n'est configuré.",
|
||||
)
|
||||
self._set_state(STATE_ERROR)
|
||||
return {
|
||||
"ok": False,
|
||||
"state": self._state,
|
||||
"error": "replay_callback non configuré",
|
||||
}
|
||||
|
||||
# Annoncer le démarrage
|
||||
with self._lock:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"C'est parti ! Je lance le workflow…",
|
||||
meta={"plan": plan.to_dict()},
|
||||
)
|
||||
|
||||
# Appeler le callback
|
||||
try:
|
||||
if plan.workflow_match:
|
||||
replay_id = self._replay_callback(
|
||||
session_id=plan.workflow_match,
|
||||
machine_id=self.machine_id,
|
||||
params=plan.parameters,
|
||||
)
|
||||
else:
|
||||
# Mode libre : pas encore branché côté chat (on refuse proprement)
|
||||
replay_id = ""
|
||||
raise RuntimeError(
|
||||
"Mode libre non supporté pour l'instant — "
|
||||
"entraînez un workflow pour cette tâche"
|
||||
)
|
||||
except Exception as e:
|
||||
with self._lock:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
f"Je n'ai pas pu lancer le workflow : {e}",
|
||||
meta={"error": str(e)},
|
||||
)
|
||||
self._set_state(STATE_ERROR)
|
||||
return {"ok": False, "state": self._state, "error": str(e)}
|
||||
|
||||
with self._lock:
|
||||
self._active_replay_id = replay_id or ""
|
||||
return {
|
||||
"ok": True,
|
||||
"state": self._state,
|
||||
"replay_id": self._active_replay_id,
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
def _append(self, role: str, content: str, meta: Optional[Dict[str, Any]] = None) -> None:
|
||||
"""Ajouter un message à l'historique (doit être appelé sous lock)."""
|
||||
msg = ChatMessage(role=role, content=content, meta=meta or {})
|
||||
self._messages.append(msg)
|
||||
self.updated_at = msg.timestamp
|
||||
|
||||
def _set_state(self, new_state: str) -> None:
|
||||
"""Changer d'état (doit être appelé sous lock)."""
|
||||
if new_state not in VALID_STATES:
|
||||
raise ValueError(f"État invalide : {new_state}")
|
||||
old = self._state
|
||||
self._state = new_state
|
||||
self.updated_at = time.time()
|
||||
if old != new_state:
|
||||
logger.debug(
|
||||
f"ChatSession {self.session_id}: {old} -> {new_state}"
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ChatManager — registre en mémoire des sessions
|
||||
# =============================================================================
|
||||
|
||||
class ChatManager:
|
||||
"""Registre en mémoire des sessions de chat.
|
||||
|
||||
Thread-safe. Utilisé par l'API FastAPI pour gérer plusieurs
|
||||
conversations simultanées.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
task_planner: Any = None,
|
||||
workflows_provider: Optional[Callable[[], List[Dict[str, Any]]]] = None,
|
||||
replay_callback: Optional[Callable[..., str]] = None,
|
||||
status_provider: Optional[Callable[[str], Dict[str, Any]]] = None,
|
||||
):
|
||||
self._task_planner = task_planner
|
||||
self._workflows_provider = workflows_provider
|
||||
self._replay_callback = replay_callback
|
||||
self._status_provider = status_provider
|
||||
self._sessions: Dict[str, ChatSession] = {}
|
||||
self._lock = threading.RLock()
|
||||
|
||||
def create_session(self, machine_id: str = "default") -> ChatSession:
|
||||
"""Créer une nouvelle session de chat."""
|
||||
session = ChatSession(
|
||||
task_planner=self._task_planner,
|
||||
workflows_provider=self._workflows_provider,
|
||||
replay_callback=self._replay_callback,
|
||||
status_provider=self._status_provider,
|
||||
machine_id=machine_id,
|
||||
)
|
||||
with self._lock:
|
||||
self._sessions[session.session_id] = session
|
||||
logger.info(f"ChatManager: session créée {session.session_id}")
|
||||
return session
|
||||
|
||||
def get_session(self, session_id: str) -> Optional[ChatSession]:
|
||||
with self._lock:
|
||||
return self._sessions.get(session_id)
|
||||
|
||||
def list_sessions(self) -> List[Dict[str, Any]]:
|
||||
with self._lock:
|
||||
return [
|
||||
{
|
||||
"session_id": s.session_id,
|
||||
"state": s.state,
|
||||
"machine_id": s.machine_id,
|
||||
"created_at": s.created_at,
|
||||
"updated_at": s.updated_at,
|
||||
"message_count": len(s.get_history()),
|
||||
}
|
||||
for s in self._sessions.values()
|
||||
]
|
||||
|
||||
def delete_session(self, session_id: str) -> bool:
|
||||
with self._lock:
|
||||
return self._sessions.pop(session_id, None) is not None
|
||||
|
||||
def cleanup_old(self, max_age_s: float = 3600 * 24) -> int:
|
||||
"""Supprimer les sessions inactives depuis max_age_s secondes."""
|
||||
now = time.time()
|
||||
removed = 0
|
||||
with self._lock:
|
||||
to_delete = [
|
||||
sid for sid, s in self._sessions.items()
|
||||
if (now - s.updated_at) > max_age_s
|
||||
]
|
||||
for sid in to_delete:
|
||||
del self._sessions[sid]
|
||||
removed += 1
|
||||
return removed
|
||||
5
agent_v0/server_v1/core/__init__.py
Normal file
5
agent_v0/server_v1/core/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Sous-package `core` du serveur (server_v1).
|
||||
|
||||
Sert de point de montage pour les composants serveur internes
|
||||
(par ex. `dialog/` — DialogResolver MVP R2).
|
||||
"""
|
||||
36
agent_v0/server_v1/core/dialog/__init__.py
Normal file
36
agent_v0/server_v1/core/dialog/__init__.py
Normal file
@@ -0,0 +1,36 @@
|
||||
"""DialogResolver — R2 MVP P0.
|
||||
|
||||
Centralise la résolution des modaux runtime côté serveur via un catalogue
|
||||
``KNOWN_DIALOGS`` (10 entrées P0) + un ``DialogResolver`` qui renvoie une
|
||||
politique stricte ``auto`` / ``pause`` / ``skip``.
|
||||
|
||||
Spec source : ``docs/recherche/SPEC_POPUPS_CATALOGUE.md``.
|
||||
|
||||
Périmètre P0 explicite :
|
||||
- Catalogue minimal 10 entrées (Easily save/overwrite/confirm/clinical-warning,
|
||||
Notepad unsaved, Windows save confirm, Windows file-explorer fallback, UAC,
|
||||
Hello CredUI, browser update).
|
||||
- Validateur déclaratif ``system_modals_cannot_be_overridden`` : refuse toute
|
||||
surcharge ``auto`` / ``skip`` sur un modal SYSTÈME (`windows-` / `defender-`).
|
||||
- Pas de modification d'``executor.py`` (rebranchement côté agent_v1 = P1).
|
||||
"""
|
||||
|
||||
from .catalog import KNOWN_DIALOGS, DialogPolicy, DialogSpec
|
||||
from .resolver import (
|
||||
DialogResolution,
|
||||
DialogResolver,
|
||||
DeclarativeOverride,
|
||||
SystemModalOverrideError,
|
||||
system_modals_cannot_be_overridden,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"KNOWN_DIALOGS",
|
||||
"DialogPolicy",
|
||||
"DialogSpec",
|
||||
"DialogResolver",
|
||||
"DialogResolution",
|
||||
"DeclarativeOverride",
|
||||
"SystemModalOverrideError",
|
||||
"system_modals_cannot_be_overridden",
|
||||
]
|
||||
262
agent_v0/server_v1/core/dialog/catalog.py
Normal file
262
agent_v0/server_v1/core/dialog/catalog.py
Normal file
@@ -0,0 +1,262 @@
|
||||
"""Catalogue des modaux runtime connus — R2 MVP P0.
|
||||
|
||||
Source de vérité unique (côté serveur) pour les 10 entrées P0.
|
||||
Réutilise les patterns présents dans ``agent_v1/core/executor.py``
|
||||
(``_KNOWN_RUNTIME_DIALOGS``, ``_CONTEXTUAL_RUNTIME_DIALOGS``) sans les
|
||||
dupliquer côté agent.
|
||||
|
||||
Format compact : un ``DialogSpec`` par modal, avec :
|
||||
- ``id`` — identifiant kebab-case stable (clé de ``KNOWN_DIALOGS``).
|
||||
- ``title_patterns`` — patterns à matcher dans le titre fenêtre
|
||||
(case/accent-insensitive, voir ``DialogResolver._normalize``).
|
||||
- ``evidence_texts`` — patterns secondaires requis dans l'OCR/UIA
|
||||
des textes visibles (utilisé quand le titre seul est ambigu, ex.
|
||||
Bloc-notes).
|
||||
- ``button_texts`` — labels cibles si ``policy=auto``.
|
||||
- ``policy`` — politique par défaut, trichotomie stricte
|
||||
(``auto`` / ``pause`` / ``skip``).
|
||||
- ``declarative_override`` — autorise un workflow VWB à surcharger
|
||||
``policy`` via ``expected_modal`` ? Toujours ``False`` pour SYSTÈME.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, Literal, Tuple
|
||||
|
||||
# Trichotomie stricte des politiques. Tout autre valeur est interdite.
|
||||
DialogPolicy = Literal["auto", "pause", "skip"]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DialogSpec:
|
||||
"""Description compacte d'un modal connu.
|
||||
|
||||
Frozen pour éviter les mutations accidentelles (le catalogue est
|
||||
une constante globale, partagée entre threads via ``DialogResolver``).
|
||||
"""
|
||||
|
||||
id: str
|
||||
title_patterns: Tuple[str, ...]
|
||||
evidence_texts: Tuple[str, ...] = field(default_factory=tuple)
|
||||
button_texts: Tuple[str, ...] = field(default_factory=tuple)
|
||||
policy: DialogPolicy = "pause"
|
||||
declarative_override: bool = False
|
||||
description: str = ""
|
||||
|
||||
|
||||
# Préfixes d'IDs catalogue qui désignent des modaux SYSTÈME — politique
|
||||
# ``pause`` STRICTE et non surchargeable par un workflow VWB
|
||||
# (cf. SPEC_POPUPS_CATALOGUE.md §3 + validateur).
|
||||
SYSTEM_DIALOG_ID_PREFIXES: Tuple[str, ...] = ("windows-", "defender-")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 10 entrées P0 — démo Urgence_aiva + démo Bloc-notes (replay 4c38dbb8)
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# Sémantique :
|
||||
# - les `title_patterns` sont matchés en substring après normalisation
|
||||
# case/accent-insensitive ; quand `evidence_texts` est non vide, AU MOINS
|
||||
# UN pattern doit aussi se retrouver dans les textes fournis (utile pour
|
||||
# Bloc-notes / Notepad dont le titre seul est trop générique).
|
||||
# - `button_texts` n'est utilisé qu'avec `policy="auto"` ; il liste les
|
||||
# labels acceptables (priorité = ordre dans le tuple).
|
||||
#
|
||||
# Important : `windows-file-explorer` est inclus comme *fallback transition*
|
||||
# (replay 4c38dbb8 — clic "Enregistrer" → fenêtre observée
|
||||
# "rpa_vision : Explorateur de fichiers" au lieu de Bloc-notes). On le marque
|
||||
# `pause` pour laisser un humain trancher tant que le contextual matching
|
||||
# côté agent n'a pas rebranché DialogResolver (P1).
|
||||
KNOWN_DIALOGS: Dict[str, DialogSpec] = {
|
||||
"confirm-save-overwrite": DialogSpec(
|
||||
id="confirm-save-overwrite",
|
||||
title_patterns=(
|
||||
"confirmer l'enregistrement",
|
||||
"confirm save as",
|
||||
),
|
||||
button_texts=("Oui", "Yes", "Remplacer", "Replace"),
|
||||
policy="auto",
|
||||
declarative_override=True,
|
||||
description=(
|
||||
"Windows/Easily — confirmation d'écrasement de fichier "
|
||||
"(`Voulez-vous le remplacer ?`)."
|
||||
),
|
||||
),
|
||||
"notepad-unsaved-changes": DialogSpec(
|
||||
id="notepad-unsaved-changes",
|
||||
title_patterns=("bloc-notes", "notepad"),
|
||||
evidence_texts=(
|
||||
"ne pas enregistrer",
|
||||
"don't save",
|
||||
"voulez-vous enregistrer",
|
||||
"do you want to save",
|
||||
),
|
||||
button_texts=("Enregistrer", "Save"),
|
||||
policy="auto",
|
||||
declarative_override=True,
|
||||
description=(
|
||||
"Bloc-notes / Notepad — `Voulez-vous enregistrer les modifications ?` "
|
||||
"Titre seul ambigu → exige une evidence visuelle."
|
||||
),
|
||||
),
|
||||
"windows-file-explorer": DialogSpec(
|
||||
id="windows-file-explorer",
|
||||
title_patterns=(
|
||||
"explorateur de fichiers",
|
||||
"file explorer",
|
||||
),
|
||||
# Pas de button_texts : aucune action auto en P0.
|
||||
policy="pause",
|
||||
declarative_override=True,
|
||||
description=(
|
||||
"Fenêtre Explorateur de fichiers détectée comme premier plan "
|
||||
"alors qu'on attendait Bloc-notes (cas replay 4c38dbb8). "
|
||||
"Fallback `pause` pour escalade humaine en attendant le "
|
||||
"contextual matching côté agent_v1 (P1)."
|
||||
),
|
||||
),
|
||||
"easily-save-unconfirmed": DialogSpec(
|
||||
id="easily-save-unconfirmed",
|
||||
title_patterns=(
|
||||
"easily assure",
|
||||
"easily assure - confirmation",
|
||||
),
|
||||
evidence_texts=(
|
||||
"voulez-vous enregistrer",
|
||||
"enregistrer les modifications",
|
||||
"do you want to save",
|
||||
"unsaved changes",
|
||||
),
|
||||
button_texts=("Enregistrer", "Save"),
|
||||
policy="auto",
|
||||
declarative_override=True,
|
||||
description=(
|
||||
"Easily Assure — Confirmation d'enregistrement avant fermeture "
|
||||
"(placeholder : signature OCR à affiner sur capture réelle)."
|
||||
),
|
||||
),
|
||||
"easily-overwrite-file": DialogSpec(
|
||||
id="easily-overwrite-file",
|
||||
title_patterns=(
|
||||
"confirmer l'enregistrement",
|
||||
"confirm save as",
|
||||
),
|
||||
evidence_texts=(
|
||||
"existe déjà",
|
||||
"voulez-vous le remplacer",
|
||||
"already exists",
|
||||
"overwrite",
|
||||
),
|
||||
button_texts=("Oui", "Yes"),
|
||||
policy="auto",
|
||||
declarative_override=True,
|
||||
description=(
|
||||
"Easily Assure — popup d'écrasement de fichier "
|
||||
"(placeholder : signature OCR à affiner)."
|
||||
),
|
||||
),
|
||||
"easily-confirm-action": DialogSpec(
|
||||
id="easily-confirm-action",
|
||||
title_patterns=("confirmer", "confirm"),
|
||||
evidence_texts=(
|
||||
"êtes-vous sûr",
|
||||
"are you sure",
|
||||
"confirmer l'enregistrement",
|
||||
),
|
||||
button_texts=("Oui", "Yes"),
|
||||
policy="auto",
|
||||
declarative_override=True,
|
||||
description=(
|
||||
"Easily Assure — confirmation générique d'une action métier "
|
||||
"(placeholder)."
|
||||
),
|
||||
),
|
||||
"easily-clinical-warning": DialogSpec(
|
||||
id="easily-clinical-warning",
|
||||
title_patterns=(
|
||||
"avertissement clinique",
|
||||
"easily assure - avertissement",
|
||||
"clinical alert",
|
||||
),
|
||||
evidence_texts=(
|
||||
"attention",
|
||||
"avertissement clinique",
|
||||
"allergie",
|
||||
"contre-indication",
|
||||
"warning",
|
||||
),
|
||||
# Pas de button_texts : la décision est clinique, humaine, par design.
|
||||
policy="pause",
|
||||
declarative_override=False,
|
||||
description=(
|
||||
"Easily Assure — avertissement clinique (allergie, contre-indication). "
|
||||
"Décision médicale OBLIGATOIRE — `pause` non surchargeable."
|
||||
),
|
||||
),
|
||||
"windows-uac": DialogSpec(
|
||||
id="windows-uac",
|
||||
title_patterns=(
|
||||
"contrôle de compte d'utilisateur",
|
||||
"user account control",
|
||||
),
|
||||
evidence_texts=(
|
||||
"voulez-vous autoriser cette application",
|
||||
"do you want to allow this app",
|
||||
),
|
||||
policy="pause",
|
||||
declarative_override=False,
|
||||
description=(
|
||||
"Windows UAC — élévation de privilèges. JAMAIS auto-accept en "
|
||||
"healthtech. `pause` STRICT, non surchargeable par déclaratif workflow."
|
||||
),
|
||||
),
|
||||
"windows-hello-credui": DialogSpec(
|
||||
id="windows-hello-credui",
|
||||
title_patterns=(
|
||||
"sécurité windows",
|
||||
"windows security",
|
||||
),
|
||||
evidence_texts=(
|
||||
"windows hello",
|
||||
"saisissez votre code pin",
|
||||
"enter your pin",
|
||||
"touchez le capteur",
|
||||
"fingerprint",
|
||||
"connectez-vous à votre compte",
|
||||
"sign in to your account",
|
||||
),
|
||||
policy="pause",
|
||||
declarative_override=False,
|
||||
description=(
|
||||
"Windows Hello / CredUI — identification physique requise par "
|
||||
"construction (PIN, empreinte, MFA). `pause` STRICT."
|
||||
),
|
||||
),
|
||||
"edge-update": DialogSpec(
|
||||
id="edge-update",
|
||||
title_patterns=(
|
||||
"microsoft edge",
|
||||
"microsoft edge a été mis à jour",
|
||||
"google chrome",
|
||||
),
|
||||
evidence_texts=(
|
||||
"a été mis à jour",
|
||||
"redémarrer",
|
||||
"relancer",
|
||||
"was updated",
|
||||
"relaunch",
|
||||
),
|
||||
policy="skip",
|
||||
declarative_override=True,
|
||||
description=(
|
||||
"Edge / Chrome — bulle de mise à jour non bloquante "
|
||||
"(ignore par défaut, ne casse pas le workflow)."
|
||||
),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def is_system_dialog(modal_id: str) -> bool:
|
||||
"""Vrai si le modal appartient à la catégorie SYSTÈME (Windows/Defender)."""
|
||||
return modal_id.startswith(SYSTEM_DIALOG_ID_PREFIXES)
|
||||
264
agent_v0/server_v1/core/dialog/resolver.py
Normal file
264
agent_v0/server_v1/core/dialog/resolver.py
Normal file
@@ -0,0 +1,264 @@
|
||||
"""DialogResolver — R2 MVP P0.
|
||||
|
||||
Match titre + evidence → ``DialogResolution`` (policy stricte + action).
|
||||
Réutilise la normalisation case/accent-insensitive développée pour
|
||||
``ActionExecutorV1._normalize_loose_text`` (executor.py).
|
||||
|
||||
Pas de dépendance Windows : pur Python, testable hors VM.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, Iterable, Mapping, Optional, Sequence
|
||||
|
||||
from .catalog import (
|
||||
KNOWN_DIALOGS,
|
||||
DialogPolicy,
|
||||
DialogSpec,
|
||||
SYSTEM_DIALOG_ID_PREFIXES,
|
||||
is_system_dialog,
|
||||
)
|
||||
|
||||
|
||||
_TRANSLATION_TABLE = str.maketrans(
|
||||
{
|
||||
"’": "'",
|
||||
"‘": "'",
|
||||
"`": "'",
|
||||
"´": "'",
|
||||
"–": "-",
|
||||
"—": "-",
|
||||
"−": "-",
|
||||
"\xa0": " ",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _normalize(value: Optional[str]) -> str:
|
||||
"""Casefold + dé-ambiguïse apostrophes/tirets/non-breaking-space.
|
||||
|
||||
Logique alignée sur ``ActionExecutorV1._normalize_loose_text``
|
||||
(agent_v1/core/executor.py) pour rester cohérent côté agent.
|
||||
"""
|
||||
if not value:
|
||||
return ""
|
||||
normalized = str(value).casefold().translate(_TRANSLATION_TABLE)
|
||||
return " ".join(normalized.split())
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DialogResolution:
|
||||
"""Résultat d'une résolution. Sérialisable JSON via ``to_dict``.
|
||||
|
||||
- ``matched`` : True si un modal du catalogue a été identifié.
|
||||
- ``dialog_id`` : ID catalogue (``""`` si pas de match).
|
||||
- ``policy`` : politique stricte appliquée (``"auto" | "pause" | "skip"``).
|
||||
Quand aucun match : ``"pause"`` par défaut (politique conservative
|
||||
healthtech, cf. SPEC §1.1 règle d'or n°4).
|
||||
- ``action`` : dict décrivant le geste à effectuer si ``policy=="auto"``,
|
||||
``None`` sinon.
|
||||
- ``reason`` : message FR court pour audit / bulle Léa.
|
||||
"""
|
||||
|
||||
matched: bool
|
||||
dialog_id: str
|
||||
policy: DialogPolicy
|
||||
action: Optional[Dict[str, Any]] = None
|
||||
reason: str = ""
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"matched": self.matched,
|
||||
"dialog_id": self.dialog_id,
|
||||
"policy": self.policy,
|
||||
"action": self.action,
|
||||
"reason": self.reason,
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DeclarativeOverride:
|
||||
"""Surcharge déclarative remontée par un workflow VWB (``expected_modal``).
|
||||
|
||||
Le ``DialogResolver`` ne consomme cette structure que si la spec de base
|
||||
autorise ``declarative_override=True``. Les modaux SYSTÈME sont rejetés
|
||||
en amont par :func:`system_modals_cannot_be_overridden`.
|
||||
"""
|
||||
|
||||
dialog_id: str
|
||||
policy: DialogPolicy
|
||||
button_label: Optional[str] = None
|
||||
confirm: bool = False
|
||||
|
||||
|
||||
class SystemModalOverrideError(ValueError):
|
||||
"""Lève en cas de tentative de surcharger un modal SYSTÈME en auto/skip."""
|
||||
|
||||
|
||||
def system_modals_cannot_be_overridden(override: DeclarativeOverride) -> DeclarativeOverride:
|
||||
"""Validateur déclaratif (à brancher côté VWB schema + côté API).
|
||||
|
||||
Toute déclaration ``expected_modal`` qui cible un modal SYSTÈME
|
||||
(préfixes ``windows-`` / ``defender-``) ET tente une politique
|
||||
différente de ``"pause"`` est rejetée par construction.
|
||||
|
||||
Cf. SPEC_POPUPS_CATALOGUE.md §3 — règle d'or n°1.
|
||||
"""
|
||||
if is_system_dialog(override.dialog_id) and override.policy != "pause":
|
||||
raise SystemModalOverrideError(
|
||||
f"expected_modal.policy='{override.policy}' interdit pour "
|
||||
f"'{override.dialog_id}' (catégorie SYSTÈME — toujours 'pause' "
|
||||
f"en healthtech)."
|
||||
)
|
||||
return override
|
||||
|
||||
|
||||
class DialogResolver:
|
||||
"""Résolveur de modaux runtime — P0.
|
||||
|
||||
Stateless : peut être instancié une fois côté serveur et appelé en
|
||||
concurrence. La méthode :meth:`resolve` n'effectue aucun I/O.
|
||||
"""
|
||||
|
||||
def __init__(self, catalog: Optional[Mapping[str, DialogSpec]] = None) -> None:
|
||||
# Copie défensive — le caller peut injecter un sous-ensemble pour
|
||||
# les tests sans muter ``KNOWN_DIALOGS``.
|
||||
self._catalog: Dict[str, DialogSpec] = dict(catalog or KNOWN_DIALOGS)
|
||||
|
||||
@property
|
||||
def catalog(self) -> Mapping[str, DialogSpec]:
|
||||
return self._catalog
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# API publique
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def resolve(
|
||||
self,
|
||||
current_title: str,
|
||||
evidence_texts: Optional[Sequence[str]] = None,
|
||||
declarative_override: Optional[DeclarativeOverride] = None,
|
||||
) -> DialogResolution:
|
||||
"""Identifier un modal et calculer sa politique effective.
|
||||
|
||||
- ``current_title`` : titre fenêtre courante (Windows ``GetWindowText``
|
||||
/ Linux ``xdotool getactivewindow getwindowname``).
|
||||
- ``evidence_texts`` : tableau de textes secondaires (OCR/UIA) — sert
|
||||
à lever l'ambiguïté quand le titre seul ne suffit pas (Bloc-notes).
|
||||
- ``declarative_override`` : surcharge VWB. Doit avoir été validée
|
||||
en amont par :func:`system_modals_cannot_be_overridden` ; on
|
||||
le revalide ici par sécurité (défense en profondeur).
|
||||
|
||||
Retourne toujours une ``DialogResolution`` (jamais ``None``).
|
||||
Sans match, politique conservative ``pause``.
|
||||
"""
|
||||
norm_title = _normalize(current_title)
|
||||
norm_evidences = tuple(_normalize(t) for t in (evidence_texts or ()))
|
||||
|
||||
spec = self._find_matching_spec(norm_title, norm_evidences)
|
||||
if spec is None:
|
||||
return DialogResolution(
|
||||
matched=False,
|
||||
dialog_id="",
|
||||
policy="pause",
|
||||
action=None,
|
||||
reason=(
|
||||
"Aucun modal connu n'a matché ce titre/evidence — "
|
||||
"pause conservative (healthtech)."
|
||||
),
|
||||
)
|
||||
|
||||
effective_policy = spec.policy
|
||||
applied_override = False
|
||||
|
||||
if declarative_override and declarative_override.dialog_id == spec.id:
|
||||
# Garde-fou systémique : on rejette toute surcharge SYSTÈME même
|
||||
# si appelée directement sur ``resolve`` (défense en profondeur).
|
||||
system_modals_cannot_be_overridden(declarative_override)
|
||||
if spec.declarative_override:
|
||||
effective_policy = declarative_override.policy
|
||||
applied_override = True
|
||||
|
||||
action = self._build_action(spec, effective_policy, declarative_override if applied_override else None)
|
||||
reason = self._build_reason(spec, effective_policy, applied_override)
|
||||
|
||||
return DialogResolution(
|
||||
matched=True,
|
||||
dialog_id=spec.id,
|
||||
policy=effective_policy,
|
||||
action=action,
|
||||
reason=reason,
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internes
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _find_matching_spec(
|
||||
self,
|
||||
norm_title: str,
|
||||
norm_evidences: Iterable[str],
|
||||
) -> Optional[DialogSpec]:
|
||||
if not norm_title:
|
||||
return None
|
||||
evidences = tuple(norm_evidences)
|
||||
for spec in self._catalog.values():
|
||||
if not self._title_matches(spec, norm_title):
|
||||
continue
|
||||
if spec.evidence_texts:
|
||||
if not self._evidence_matches(spec, evidences):
|
||||
continue
|
||||
return spec
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _title_matches(spec: DialogSpec, norm_title: str) -> bool:
|
||||
for pattern in spec.title_patterns:
|
||||
norm_pattern = _normalize(pattern)
|
||||
if norm_pattern and norm_pattern in norm_title:
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _evidence_matches(spec: DialogSpec, norm_evidences: Sequence[str]) -> bool:
|
||||
for pattern in spec.evidence_texts:
|
||||
norm_pattern = _normalize(pattern)
|
||||
if not norm_pattern:
|
||||
continue
|
||||
for ev in norm_evidences:
|
||||
if norm_pattern in ev:
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _build_action(
|
||||
spec: DialogSpec,
|
||||
policy: DialogPolicy,
|
||||
override: Optional[DeclarativeOverride],
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
if policy != "auto":
|
||||
return None
|
||||
# Bouton cible : surcharge déclarative > premier button_text catalogue.
|
||||
button_label = None
|
||||
if override and override.button_label:
|
||||
button_label = override.button_label
|
||||
elif spec.button_texts:
|
||||
button_label = spec.button_texts[0]
|
||||
|
||||
return {
|
||||
"type": "click_button",
|
||||
"button_label": button_label,
|
||||
"fallback_button_labels": list(spec.button_texts),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _build_reason(
|
||||
spec: DialogSpec,
|
||||
policy: DialogPolicy,
|
||||
applied_override: bool,
|
||||
) -> str:
|
||||
base = f"Modal '{spec.id}' identifié — policy={policy}"
|
||||
if applied_override:
|
||||
base += " (surcharge workflow)"
|
||||
return base
|
||||
1025
agent_v0/server_v1/domain_context.py
Normal file
1025
agent_v0/server_v1/domain_context.py
Normal file
File diff suppressed because it is too large
Load Diff
373
agent_v0/server_v1/execution_plan_runner.py
Normal file
373
agent_v0/server_v1/execution_plan_runner.py
Normal file
@@ -0,0 +1,373 @@
|
||||
# agent_v0/server_v1/execution_plan_runner.py
|
||||
"""
|
||||
ExecutionPlanRunner — Adaptateur ExecutionPlan → actions replay.
|
||||
|
||||
Pièce d'intégration du pipeline V4 :
|
||||
RawTrace → IRBuilder → WorkflowIR → ExecutionCompiler → ExecutionPlan → Runtime
|
||||
|
||||
Ce module convertit un `ExecutionPlan` (plan pré-compilé, déterministe) en
|
||||
liste d'actions au format attendu par l'executor replay actuel (clé x_pct,
|
||||
y_pct, target_spec, etc.), puis les injecte dans `_replay_queues`.
|
||||
|
||||
L'ancien chemin `build_replay_from_raw_events()` dans stream_processor.py
|
||||
reste inchangé — les deux chemins coexistent pendant la transition.
|
||||
|
||||
Format d'action produit (compatible executor existant) :
|
||||
{
|
||||
"action_id": "act_...",
|
||||
"type": "click",
|
||||
"x_pct": 0.5,
|
||||
"y_pct": 0.3,
|
||||
"visual_mode": True,
|
||||
"target_spec": {
|
||||
"by_text": "...",
|
||||
"window_title": "...",
|
||||
"vlm_description": "...",
|
||||
"anchor_image_base64": "...",
|
||||
},
|
||||
"expected_window_title": "...",
|
||||
}
|
||||
|
||||
Auteur: Dom, Alice - Avril 2026
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import threading
|
||||
import uuid
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from core.workflow.execution_plan import (
|
||||
ExecutionNode,
|
||||
ExecutionPlan,
|
||||
ResolutionStrategy,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Substitution de variables
|
||||
# =========================================================================
|
||||
# Le WorkflowIR utilise la syntaxe `{var}` dans les champs texte.
|
||||
# Ici on supporte les deux : `{var}` (IR natif) et `${var}` (replay legacy).
|
||||
_VARIABLE_RE_CURLY = re.compile(r"\{(\w+)\}")
|
||||
_VARIABLE_RE_DOLLAR = re.compile(r"\$\{(\w+)\}")
|
||||
|
||||
|
||||
def substitute_variables(text: str, variables: Dict[str, Any]) -> str:
|
||||
"""Remplacer `{var}` et `${var}` par leurs valeurs.
|
||||
|
||||
Priorité : variables fournies > placeholder brut (inchangé si inconnu).
|
||||
"""
|
||||
if not text or not variables:
|
||||
return text
|
||||
|
||||
def replacer(match: "re.Match[str]") -> str:
|
||||
var_name = match.group(1)
|
||||
if var_name in variables:
|
||||
return str(variables[var_name])
|
||||
return match.group(0)
|
||||
|
||||
text = _VARIABLE_RE_DOLLAR.sub(replacer, text)
|
||||
text = _VARIABLE_RE_CURLY.sub(replacer, text)
|
||||
return text
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Conversion ExecutionNode → action replay
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def _strategy_to_target_spec(
|
||||
strategy: Optional[ResolutionStrategy],
|
||||
fallbacks: Optional[List[ResolutionStrategy]] = None,
|
||||
intent: str = "",
|
||||
) -> Dict[str, Any]:
|
||||
"""Construire un `target_spec` depuis les stratégies de résolution.
|
||||
|
||||
Fusionne la primaire et les fallbacks pour donner un maximum d'indices
|
||||
au resolve_engine :
|
||||
- OCR → by_text
|
||||
- template → anchor_image_base64 (depuis anchor_b64)
|
||||
- VLM → vlm_description
|
||||
|
||||
Règle V4 : la stratégie primaire dicte la méthode préférée.
|
||||
Le champ `resolve_order` liste les méthodes dans l'ordre à essayer.
|
||||
Le resolve_engine honore cet ordre au lieu de sa cascade par défaut.
|
||||
|
||||
resolve_order est la clé du "zéro VLM au runtime" :
|
||||
- ["ocr", "template", "vlm"] → V4 typique (OCR rapide)
|
||||
- ["template", "ocr", "vlm"] → apprentissage : template marche mieux
|
||||
- ["vlm"] → éléments sans texte (icônes)
|
||||
"""
|
||||
spec: Dict[str, Any] = {}
|
||||
|
||||
all_strategies: List[ResolutionStrategy] = []
|
||||
if strategy is not None:
|
||||
all_strategies.append(strategy)
|
||||
if fallbacks:
|
||||
all_strategies.extend(fallbacks)
|
||||
|
||||
by_text_candidate = ""
|
||||
anchor_candidate = ""
|
||||
vlm_candidate = ""
|
||||
uia_data: Dict[str, Any] = {}
|
||||
dom_data: Dict[str, Any] = {}
|
||||
resolve_order: List[str] = []
|
||||
seen_methods: set = set()
|
||||
|
||||
for strat in all_strategies:
|
||||
if not strat:
|
||||
continue
|
||||
if strat.method == "ocr" and strat.target_text and not by_text_candidate:
|
||||
by_text_candidate = strat.target_text
|
||||
elif strat.method == "template":
|
||||
if strat.anchor_b64 and not anchor_candidate:
|
||||
anchor_candidate = strat.anchor_b64
|
||||
if strat.target_text and not by_text_candidate:
|
||||
by_text_candidate = strat.target_text
|
||||
elif strat.method == "vlm" and strat.vlm_description and not vlm_candidate:
|
||||
vlm_candidate = strat.vlm_description
|
||||
elif strat.method == "uia" and strat.uia_name and not uia_data:
|
||||
uia_data = {
|
||||
"name": strat.uia_name,
|
||||
"control_type": strat.uia_control_type,
|
||||
"automation_id": strat.uia_automation_id,
|
||||
"parent_path": strat.uia_parent_path,
|
||||
}
|
||||
elif strat.method == "dom" and strat.dom_selector and not dom_data:
|
||||
dom_data = {
|
||||
"selector": strat.dom_selector,
|
||||
"xpath": strat.dom_xpath,
|
||||
"url_pattern": strat.dom_url_pattern,
|
||||
}
|
||||
|
||||
# Construire l'ordre des méthodes (dans l'ordre primaire → fallbacks)
|
||||
if strat.method and strat.method not in seen_methods:
|
||||
resolve_order.append(strat.method)
|
||||
seen_methods.add(strat.method)
|
||||
|
||||
if by_text_candidate:
|
||||
spec["by_text"] = by_text_candidate
|
||||
if anchor_candidate:
|
||||
spec["anchor_image_base64"] = anchor_candidate
|
||||
if vlm_candidate:
|
||||
spec["vlm_description"] = vlm_candidate
|
||||
elif intent and "vlm_description" not in spec:
|
||||
# L'intention métier devient le prompt VLM de dernier recours
|
||||
spec["vlm_description"] = intent
|
||||
|
||||
# Données UIA — consommées par l'agent Windows via lea_uia.exe
|
||||
if uia_data:
|
||||
spec["uia_target"] = uia_data
|
||||
|
||||
# Données DOM — consommées par l'agent Windows via CDP (futur)
|
||||
if dom_data:
|
||||
spec["dom_target"] = dom_data
|
||||
|
||||
# Ordre de résolution pré-compilé — c'est LA pièce centrale du V4
|
||||
if resolve_order:
|
||||
spec["resolve_order"] = resolve_order
|
||||
|
||||
return spec
|
||||
|
||||
|
||||
def execution_node_to_action(
|
||||
node: ExecutionNode,
|
||||
variables: Optional[Dict[str, Any]] = None,
|
||||
id_prefix: str = "act_plan",
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Convertir un `ExecutionNode` en action replay.
|
||||
|
||||
Retourne `None` si le nœud n'est pas exécutable (type inconnu).
|
||||
|
||||
Args:
|
||||
node: Le nœud à convertir.
|
||||
variables: Dictionnaire de variables pour substituer les {var}.
|
||||
id_prefix: Préfixe pour l'action_id générée.
|
||||
"""
|
||||
variables = variables or {}
|
||||
|
||||
action: Dict[str, Any] = {
|
||||
"action_id": f"{id_prefix}_{uuid.uuid4().hex[:8]}",
|
||||
"plan_node_id": node.node_id,
|
||||
}
|
||||
|
||||
if node.intent:
|
||||
action["intention"] = node.intent
|
||||
if node.step_id:
|
||||
action["plan_step_id"] = node.step_id
|
||||
if node.is_optional:
|
||||
action["is_optional"] = True
|
||||
|
||||
# Métadonnées d'exécution utiles au runtime
|
||||
if node.timeout_ms:
|
||||
action["timeout_ms"] = node.timeout_ms
|
||||
if node.max_retries:
|
||||
action["max_retries"] = node.max_retries
|
||||
if node.recovery_action:
|
||||
action["recovery_action"] = node.recovery_action
|
||||
if node.success_condition:
|
||||
action["success_condition"] = node.success_condition.to_dict()
|
||||
|
||||
action_type = node.action_type
|
||||
|
||||
if action_type == "click":
|
||||
action["type"] = "click"
|
||||
|
||||
strategy = node.strategy_primary
|
||||
fallbacks = node.strategy_fallbacks or []
|
||||
|
||||
# ── Déduction des coordonnées depuis la stratégie primaire ──
|
||||
# - OCR : pas de coordonnées (le runtime trouve via OCR)
|
||||
# - template : l'anchor sera utilisé au runtime
|
||||
# - VLM : la description sera utilisée au runtime
|
||||
# Dans tous les cas le resolve_engine retrouve les pixels au replay.
|
||||
# On expose néanmoins un centre (0.5, 0.5) neutre pour rester
|
||||
# compatible avec les validations de queue existantes.
|
||||
action["x_pct"] = 0.5
|
||||
action["y_pct"] = 0.5
|
||||
action["visual_mode"] = True
|
||||
|
||||
target_spec = _strategy_to_target_spec(
|
||||
strategy=strategy,
|
||||
fallbacks=fallbacks,
|
||||
intent=node.intent,
|
||||
)
|
||||
|
||||
# Titre fenêtre attendu AVANT (pré-vérif stricte)
|
||||
# Si absent, aucune pré-vérif → l'action s'exécute quel que soit l'écran
|
||||
if node.expected_window_before:
|
||||
action["expected_window_before"] = node.expected_window_before
|
||||
target_spec["window_title"] = node.expected_window_before
|
||||
|
||||
# Titre fenêtre attendu APRÈS (post-vérif stricte)
|
||||
# C'est la garantie de passage à l'action suivante
|
||||
if node.success_condition and node.success_condition.expected_title:
|
||||
action["expected_window_title"] = node.success_condition.expected_title
|
||||
action["success_strict"] = (
|
||||
node.success_condition.method == "title_match"
|
||||
)
|
||||
if "window_title" not in target_spec:
|
||||
target_spec["window_title"] = node.success_condition.expected_title
|
||||
|
||||
if target_spec:
|
||||
action["target_spec"] = target_spec
|
||||
|
||||
elif action_type == "type":
|
||||
action["type"] = "type"
|
||||
text = node.text or ""
|
||||
# Substituer les variables avant d'envoyer (ex: {patient} → "DUPONT")
|
||||
action["text"] = substitute_variables(text, variables)
|
||||
if node.variable_name:
|
||||
action["variable_name"] = node.variable_name
|
||||
|
||||
elif action_type in ("key_combo", "key_press"):
|
||||
action["type"] = "key_combo"
|
||||
keys = list(node.keys or [])
|
||||
if not keys:
|
||||
return None
|
||||
action["keys"] = keys
|
||||
|
||||
elif action_type == "wait":
|
||||
action["type"] = "wait"
|
||||
duration = node.duration_ms or 1000
|
||||
action["duration_ms"] = int(duration)
|
||||
|
||||
elif action_type == "scroll":
|
||||
action["type"] = "scroll"
|
||||
# Les stratégies peuvent contenir une zone — pas exploitée ici,
|
||||
# le scroll est implicitement sur la fenêtre active.
|
||||
action["delta"] = -3
|
||||
|
||||
else:
|
||||
logger.debug("execution_node_to_action: type inconnu '%s' ignoré", action_type)
|
||||
return None
|
||||
|
||||
return action
|
||||
|
||||
|
||||
def execution_plan_to_actions(
|
||||
plan: ExecutionPlan,
|
||||
variables: Optional[Dict[str, Any]] = None,
|
||||
id_prefix: str = "act_plan",
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Convertir un `ExecutionPlan` complet en liste d'actions replay.
|
||||
|
||||
Les variables passées en argument écrasent celles du plan.
|
||||
"""
|
||||
merged_vars: Dict[str, Any] = dict(plan.variables or {})
|
||||
if variables:
|
||||
merged_vars.update(variables)
|
||||
|
||||
actions: List[Dict[str, Any]] = []
|
||||
for node in plan.nodes:
|
||||
action = execution_node_to_action(
|
||||
node=node,
|
||||
variables=merged_vars,
|
||||
id_prefix=id_prefix,
|
||||
)
|
||||
if action is not None:
|
||||
actions.append(action)
|
||||
|
||||
logger.info(
|
||||
"execution_plan_to_actions(%s) : %d nœuds → %d actions replay "
|
||||
"(vars=%d)",
|
||||
plan.plan_id, plan.total_nodes, len(actions), len(merged_vars),
|
||||
)
|
||||
return actions
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Injection dans la queue de replay
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def inject_plan_into_queue(
|
||||
plan: ExecutionPlan,
|
||||
session_id: str,
|
||||
replay_queues: Dict[str, List[Dict[str, Any]]],
|
||||
variables: Optional[Dict[str, Any]] = None,
|
||||
lock: Optional[threading.Lock] = None,
|
||||
replace: bool = True,
|
||||
id_prefix: str = "act_plan",
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Injecter un `ExecutionPlan` dans la queue de replay d'une session.
|
||||
|
||||
Args:
|
||||
plan: Le plan à exécuter.
|
||||
session_id: La session Agent V1 cible.
|
||||
replay_queues: Le dict global `_replay_queues` partagé par le serveur.
|
||||
variables: Variables à substituer dans les actions.
|
||||
lock: Verrou optionnel à acquérir avant d'écrire (threadsafe).
|
||||
replace: Si True (défaut), remplace la queue existante. Sinon, append.
|
||||
id_prefix: Préfixe pour les action_id générés.
|
||||
|
||||
Returns:
|
||||
La liste des actions injectées (après substitution).
|
||||
"""
|
||||
actions = execution_plan_to_actions(
|
||||
plan=plan, variables=variables, id_prefix=id_prefix,
|
||||
)
|
||||
|
||||
def _write() -> None:
|
||||
if replace:
|
||||
replay_queues[session_id] = list(actions)
|
||||
else:
|
||||
replay_queues[session_id].extend(actions)
|
||||
|
||||
if lock is not None:
|
||||
with lock:
|
||||
_write()
|
||||
else:
|
||||
_write()
|
||||
|
||||
logger.info(
|
||||
"inject_plan_into_queue(%s) : %d actions injectées dans la queue "
|
||||
"de la session '%s' (replace=%s)",
|
||||
plan.plan_id, len(actions), session_id, replace,
|
||||
)
|
||||
return actions
|
||||
@@ -17,6 +17,20 @@ from typing import Any, Dict, List, Optional
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _infer_machine_id_from_session_id(session_id: str, fallback: str = "default") -> str:
|
||||
"""Déduire le machine_id depuis un session_id spécial si possible.
|
||||
|
||||
Les heartbeats de fond de Léa utilisent `bg_<machine_id>` comme
|
||||
identifiant de session. Lors d'un redémarrage serveur, ces sessions
|
||||
peuvent être restaurées depuis la persistance JSON avec `machine_id`
|
||||
resté à `default`. On rétablit ici l'information machine pour que les
|
||||
replays ciblés retrouvent bien la session de fond active.
|
||||
"""
|
||||
if session_id.startswith("bg_") and len(session_id) > 3:
|
||||
return session_id[3:]
|
||||
return fallback
|
||||
|
||||
|
||||
@dataclass
|
||||
class LiveSessionState:
|
||||
"""État d'une session active en mémoire."""
|
||||
@@ -65,7 +79,8 @@ class LiveSessionState:
|
||||
class LiveSessionManager:
|
||||
"""Gère les sessions live en mémoire côté serveur avec persistance disque."""
|
||||
|
||||
def __init__(self, persist_dir: str = "data/streaming_sessions"):
|
||||
def __init__(self, persist_dir: str = "data/streaming_sessions",
|
||||
live_sessions_dir: Optional[str] = None):
|
||||
self._sessions: Dict[str, LiveSessionState] = {}
|
||||
self._lock = threading.Lock()
|
||||
self._persist_dir = Path(persist_dir)
|
||||
@@ -74,17 +89,29 @@ class LiveSessionManager:
|
||||
self._persist_counter = 0 # Compteur pour limiter la fréquence de persistance
|
||||
self._persist_interval = 10 # Persister toutes les N modifications
|
||||
|
||||
# Dossier des sessions live (JSONL + screenshots)
|
||||
self._live_sessions_dir = Path(live_sessions_dir) if live_sessions_dir else None
|
||||
|
||||
# Charger les sessions persistées au démarrage
|
||||
self._load_persisted_sessions()
|
||||
# Reconstruire les sessions depuis les live_events.jsonl sur disque
|
||||
self._discover_sessions_from_disk()
|
||||
|
||||
def _load_persisted_sessions(self):
|
||||
"""Charger les sessions sauvegardées au démarrage."""
|
||||
"""Charger les sessions sauvegardées au démarrage (JSON state files)."""
|
||||
count = 0
|
||||
for session_file in sorted(self._persist_dir.glob("sess_*.json")):
|
||||
session_files = sorted(self._persist_dir.glob("sess_*.json"))
|
||||
session_files += sorted(self._persist_dir.glob("bg_*.json"))
|
||||
for session_file in session_files:
|
||||
try:
|
||||
with open(session_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
session = LiveSessionState.from_dict(data)
|
||||
if session.machine_id == "default":
|
||||
session.machine_id = _infer_machine_id_from_session_id(
|
||||
session.session_id,
|
||||
fallback=session.machine_id,
|
||||
)
|
||||
self._sessions[session.session_id] = session
|
||||
count += 1
|
||||
except Exception as e:
|
||||
@@ -92,6 +119,66 @@ class LiveSessionManager:
|
||||
if count:
|
||||
logger.info(f"{count} session(s) restaurée(s) depuis {self._persist_dir}")
|
||||
|
||||
def _discover_sessions_from_disk(self):
|
||||
"""Découvrir les sessions depuis les live_events.jsonl sur disque.
|
||||
|
||||
Reconstruit les sessions manquantes du session_manager en scannant :
|
||||
- live_sessions/sess_*/live_events.jsonl (sessions racine)
|
||||
- live_sessions/{machine_id}/sess_*/live_events.jsonl (multi-machine)
|
||||
|
||||
Ne touche pas aux sessions déjà chargées depuis le JSON persist.
|
||||
"""
|
||||
if self._live_sessions_dir is None:
|
||||
return
|
||||
live_dir = self._live_sessions_dir
|
||||
if not live_dir.exists():
|
||||
return
|
||||
|
||||
discovered = 0
|
||||
for jsonl_file in sorted(live_dir.glob("**/live_events.jsonl")):
|
||||
session_dir = jsonl_file.parent
|
||||
session_id = session_dir.name
|
||||
if not (session_id.startswith("sess_") or session_id.startswith("bg_")):
|
||||
continue
|
||||
if session_id in self._sessions:
|
||||
continue
|
||||
|
||||
# Déduire le machine_id depuis le chemin parent
|
||||
parent_name = session_dir.parent.name
|
||||
if parent_name == live_dir.name:
|
||||
machine_id = _infer_machine_id_from_session_id(session_id)
|
||||
else:
|
||||
machine_id = parent_name
|
||||
|
||||
# Compter events et screenshots
|
||||
events_count = 0
|
||||
try:
|
||||
with open(jsonl_file, 'r', encoding='utf-8') as f:
|
||||
for _ in f:
|
||||
events_count += 1
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
shots_dir = session_dir / "shots"
|
||||
shots_count = len(list(shots_dir.glob("shot_*_full.png"))) if shots_dir.exists() else 0
|
||||
|
||||
# Créer la session en mémoire
|
||||
session = LiveSessionState(
|
||||
session_id=session_id,
|
||||
machine_id=machine_id,
|
||||
finalized=False,
|
||||
)
|
||||
# Stocker le nombre d'events/shots dans les métadonnées
|
||||
session.shot_paths = {f"shot_{i:04d}": "" for i in range(shots_count)}
|
||||
self._sessions[session_id] = session
|
||||
discovered += 1
|
||||
|
||||
if discovered:
|
||||
logger.info(
|
||||
f"{discovered} session(s) découverte(s) depuis {live_dir} "
|
||||
f"(total: {len(self._sessions)} sessions en mémoire)"
|
||||
)
|
||||
|
||||
def _persist_session(self, session_id: str):
|
||||
"""Sauvegarder une session sur disque (appelé périodiquement)."""
|
||||
session = self._sessions.get(session_id)
|
||||
@@ -102,7 +189,7 @@ class LiveSessionManager:
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
json.dump(session.to_dict(), f, ensure_ascii=False)
|
||||
except Exception as e:
|
||||
logger.debug(f"Erreur persistance session {session_id}: {e}")
|
||||
logger.warning(f"Erreur persistance session {session_id}: {e}")
|
||||
|
||||
def _maybe_persist(self, session_id: str):
|
||||
"""Persister si le compteur atteint l'intervalle."""
|
||||
@@ -180,6 +267,31 @@ class LiveSessionManager:
|
||||
if meta_val is not None:
|
||||
info[meta_key] = meta_val
|
||||
session.last_window_info = info
|
||||
# Exploiter window_capture (envoyé par l'agent avec la capture fenêtre)
|
||||
# pour enrichir last_window_info avec le titre précis de la fenêtre cliquée
|
||||
window_capture = event_data.get("window_capture")
|
||||
if window_capture and isinstance(window_capture, dict):
|
||||
wc_title = window_capture.get("title", "").strip()
|
||||
wc_app = window_capture.get("app_name", "").strip()
|
||||
if wc_title:
|
||||
session.last_window_info["title"] = wc_title
|
||||
if wc_app:
|
||||
session.last_window_info["app_name"] = wc_app
|
||||
# QW1 — propager monitor_index et monitors_geometry depuis window_capture
|
||||
if "monitor_index" in window_capture:
|
||||
session.last_window_info["monitor_index"] = window_capture["monitor_index"]
|
||||
if "monitors_geometry" in window_capture:
|
||||
session.last_window_info["monitors_geometry"] = window_capture["monitors_geometry"]
|
||||
|
||||
# QW1 — propager monitor_index/monitors_geometry du payload event
|
||||
# (cas heartbeat enrichi sans window/window_title). Toujours
|
||||
# rafraîchir le focus actif (change souvent) et la géométrie
|
||||
# (l'utilisateur peut brancher/débrancher un écran).
|
||||
if "monitor_index" in event_data:
|
||||
session.last_window_info["monitor_index"] = event_data["monitor_index"]
|
||||
if "monitors_geometry" in event_data and event_data["monitors_geometry"]:
|
||||
session.last_window_info["monitors_geometry"] = event_data["monitors_geometry"]
|
||||
|
||||
# Accumuler les titres/apps pour le nommage automatique
|
||||
title = session.last_window_info.get("title", "").strip()
|
||||
app_name = session.last_window_info.get("app_name", "").strip()
|
||||
@@ -221,18 +333,41 @@ class LiveSessionManager:
|
||||
import socket
|
||||
|
||||
# Construire les événements au format RawSession
|
||||
# Important : copier TOUTES les données de l'événement (pos, text, keys, button...)
|
||||
# car Event.from_dict() met tout sauf t/type/window/screenshot_id dans event.data,
|
||||
# et le GraphBuilder utilise event.data pour construire les actions.
|
||||
events = []
|
||||
for evt in session.events:
|
||||
window_info = {
|
||||
"title": evt.get("window_title", session.last_window_info.get("title", "")),
|
||||
"app_name": evt.get("app_name", session.last_window_info.get("app_name", "unknown")),
|
||||
}
|
||||
events.append({
|
||||
# Extraire window info (plusieurs formats possibles)
|
||||
window_raw = evt.get("window")
|
||||
if isinstance(window_raw, dict):
|
||||
window_info = {
|
||||
"title": window_raw.get("title", session.last_window_info.get("title", "")),
|
||||
"app_name": window_raw.get("app_name", session.last_window_info.get("app_name", "unknown")),
|
||||
}
|
||||
else:
|
||||
window_info = {
|
||||
"title": evt.get("window_title", session.last_window_info.get("title", "")),
|
||||
"app_name": evt.get("app_name", session.last_window_info.get("app_name", "unknown")),
|
||||
}
|
||||
|
||||
raw_event = {
|
||||
"t": evt.get("timestamp", 0),
|
||||
"type": evt.get("type", "unknown"),
|
||||
"window": window_info,
|
||||
"screenshot_id": evt.get("screenshot_id"),
|
||||
})
|
||||
}
|
||||
|
||||
# Copier les données spécifiques au type d'événement
|
||||
# (pos, button, text, keys, etc.) — indispensable pour le replay
|
||||
_skip_keys = {"type", "timestamp", "window", "window_title",
|
||||
"app_name", "screenshot_id", "machine_id",
|
||||
"screen_metadata", "vision_info"}
|
||||
for key, value in evt.items():
|
||||
if key not in _skip_keys and key not in raw_event:
|
||||
raw_event[key] = value
|
||||
|
||||
events.append(raw_event)
|
||||
|
||||
# Construire les screenshots au format RawSession
|
||||
screenshots = []
|
||||
|
||||
154
agent_v0/server_v1/loop_detector.py
Normal file
154
agent_v0/server_v1/loop_detector.py
Normal file
@@ -0,0 +1,154 @@
|
||||
# agent_v0/server_v1/loop_detector.py
|
||||
"""LoopDetector composite — détection de stagnation de Léa pendant un replay (QW2).
|
||||
|
||||
Trois signaux indépendants :
|
||||
- screen_static : N captures consécutives avec CLIP similarity > seuil
|
||||
- action_repeat : N actions consécutives identiques (type + coords)
|
||||
- retry_threshold : nombre de retries cumulés >= seuil
|
||||
|
||||
Un seul signal positif → verdict.detected=True. Le serveur bascule alors le
|
||||
replay en paused_need_help avec pause_reason explicite.
|
||||
|
||||
Désactivable via env var RPA_LOOP_DETECTOR_ENABLED=0.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class LoopVerdict:
|
||||
detected: bool = False
|
||||
reason: str = ""
|
||||
signal: str = "" # "screen_static" | "action_repeat" | "retry_threshold" | ""
|
||||
evidence: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
def _env_int(name: str, default: int) -> int:
|
||||
try:
|
||||
return int(os.environ.get(name, default))
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
def _env_float(name: str, default: float) -> float:
|
||||
try:
|
||||
return float(os.environ.get(name, default))
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
def _env_bool_enabled(name: str) -> bool:
|
||||
val = os.environ.get(name, "1").strip().lower()
|
||||
return val not in ("0", "false", "no", "off", "")
|
||||
|
||||
|
||||
def _cosine_similarity(a, b) -> float:
|
||||
"""Similarité cosine entre deux vecteurs (listes ou np.array). Robuste vecteur nul."""
|
||||
import numpy as np
|
||||
av = np.asarray(a, dtype=np.float32).flatten()
|
||||
bv = np.asarray(b, dtype=np.float32).flatten()
|
||||
na, nb = float(np.linalg.norm(av)), float(np.linalg.norm(bv))
|
||||
if na < 1e-8 or nb < 1e-8:
|
||||
return 0.0
|
||||
return float(np.dot(av, bv) / (na * nb))
|
||||
|
||||
|
||||
class LoopDetector:
|
||||
def __init__(self, clip_embedder=None):
|
||||
self.clip_embedder = clip_embedder
|
||||
|
||||
def evaluate(
|
||||
self,
|
||||
state: Dict[str, Any],
|
||||
screenshots: List[Any],
|
||||
actions: List[Dict[str, Any]],
|
||||
) -> LoopVerdict:
|
||||
"""Évalue les 3 signaux. Retourne le premier déclenché.
|
||||
|
||||
Args:
|
||||
state: replay_state (utilisé pour retried_actions)
|
||||
screenshots: anneau d'embeddings CLIP (les N derniers)
|
||||
actions: anneau des N dernières actions exécutées
|
||||
"""
|
||||
if not _env_bool_enabled("RPA_LOOP_DETECTOR_ENABLED"):
|
||||
return LoopVerdict(detected=False)
|
||||
|
||||
# Signal A : screen_static
|
||||
verdict = self._check_screen_static(screenshots)
|
||||
if verdict.detected:
|
||||
return verdict
|
||||
|
||||
# Signal B : action_repeat
|
||||
verdict = self._check_action_repeat(actions)
|
||||
if verdict.detected:
|
||||
return verdict
|
||||
|
||||
# Signal C : retry_threshold
|
||||
verdict = self._check_retry_threshold(state)
|
||||
if verdict.detected:
|
||||
return verdict
|
||||
|
||||
return LoopVerdict(detected=False)
|
||||
|
||||
def _check_screen_static(self, screenshots: List[Any]) -> LoopVerdict:
|
||||
n_required = _env_int("RPA_LOOP_SCREEN_STATIC_N", 4)
|
||||
threshold = _env_float("RPA_LOOP_SCREEN_STATIC_THRESHOLD", 0.99)
|
||||
|
||||
if self.clip_embedder is None or len(screenshots) < n_required:
|
||||
return LoopVerdict()
|
||||
|
||||
try:
|
||||
recent = screenshots[-n_required:]
|
||||
# Embed chaque capture via le CLIP embedder (peut lever)
|
||||
embeddings = [self.clip_embedder.embed_image(img) for img in recent]
|
||||
sims = [_cosine_similarity(embeddings[i], embeddings[i + 1])
|
||||
for i in range(len(embeddings) - 1)]
|
||||
min_sim = min(sims)
|
||||
if min_sim > threshold:
|
||||
return LoopVerdict(
|
||||
detected=True,
|
||||
reason="loop_detected",
|
||||
signal="screen_static",
|
||||
evidence={"min_similarity": round(min_sim, 4),
|
||||
"n_captures": n_required,
|
||||
"threshold": threshold},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("LoopDetector signal_A erreur (%s) — signal inerte ce tick", e)
|
||||
return LoopVerdict()
|
||||
|
||||
def _check_action_repeat(self, actions: List[Dict[str, Any]]) -> LoopVerdict:
|
||||
n_required = _env_int("RPA_LOOP_ACTION_REPEAT_N", 3)
|
||||
if len(actions) < n_required:
|
||||
return LoopVerdict()
|
||||
recent = actions[-n_required:]
|
||||
|
||||
def _signature(a: Dict[str, Any]) -> tuple:
|
||||
return (a.get("type"), a.get("x_pct"), a.get("y_pct"))
|
||||
|
||||
sigs = [_signature(a) for a in recent]
|
||||
if all(s == sigs[0] for s in sigs):
|
||||
return LoopVerdict(
|
||||
detected=True,
|
||||
reason="loop_detected",
|
||||
signal="action_repeat",
|
||||
evidence={"signature": sigs[0], "count": n_required},
|
||||
)
|
||||
return LoopVerdict()
|
||||
|
||||
def _check_retry_threshold(self, state: Dict[str, Any]) -> LoopVerdict:
|
||||
threshold = _env_int("RPA_LOOP_RETRY_THRESHOLD", 3)
|
||||
retried = int(state.get("retried_actions", 0))
|
||||
if retried >= threshold:
|
||||
return LoopVerdict(
|
||||
detected=True,
|
||||
reason="loop_detected",
|
||||
signal="retry_threshold",
|
||||
evidence={"retried_actions": retried, "threshold": threshold},
|
||||
)
|
||||
return LoopVerdict()
|
||||
99
agent_v0/server_v1/monitor_router.py
Normal file
99
agent_v0/server_v1/monitor_router.py
Normal file
@@ -0,0 +1,99 @@
|
||||
# agent_v0/server_v1/monitor_router.py
|
||||
"""MonitorRouter — résolution de l'écran cible pour le replay (QW1).
|
||||
|
||||
Stratégie en cascade :
|
||||
1. action.monitor_index (hérité de la session source) → cible cet écran
|
||||
2. session.last_focused_monitor (focus actif vu en dernier heartbeat) → fallback
|
||||
3. composite (offset 0, 0) → backward compat
|
||||
|
||||
Émet sur le bus lea:* l'event monitor_routed avec la source de la décision.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class MonitorTarget:
|
||||
"""Représente l'écran cible résolu pour une action de replay."""
|
||||
idx: int
|
||||
offset_x: int
|
||||
offset_y: int
|
||||
w: int
|
||||
h: int
|
||||
source: str # "action" | "focus" | "composite_fallback"
|
||||
|
||||
|
||||
_COMPOSITE_FALLBACK = MonitorTarget(
|
||||
idx=-1,
|
||||
offset_x=0,
|
||||
offset_y=0,
|
||||
w=0,
|
||||
h=0,
|
||||
source="composite_fallback",
|
||||
)
|
||||
|
||||
|
||||
def _find_monitor(geometry: List[Dict[str, Any]], idx: int) -> Optional[Dict[str, Any]]:
|
||||
"""Retourne le monitor d'index donné, ou None si absent."""
|
||||
for m in geometry:
|
||||
if m.get("idx") == idx:
|
||||
return m
|
||||
return None
|
||||
|
||||
|
||||
def _to_target(monitor: Dict[str, Any], source: str) -> MonitorTarget:
|
||||
return MonitorTarget(
|
||||
idx=int(monitor["idx"]),
|
||||
offset_x=int(monitor.get("x", 0)),
|
||||
offset_y=int(monitor.get("y", 0)),
|
||||
w=int(monitor.get("w", 0)),
|
||||
h=int(monitor.get("h", 0)),
|
||||
source=source,
|
||||
)
|
||||
|
||||
|
||||
def resolve_target_monitor(
|
||||
action: Dict[str, Any],
|
||||
session_state: Dict[str, Any],
|
||||
) -> MonitorTarget:
|
||||
"""Résout l'écran cible d'une action de replay.
|
||||
|
||||
Args:
|
||||
action: Dict de l'action (peut contenir `monitor_index`).
|
||||
session_state: État de la session (doit contenir `monitors_geometry`
|
||||
et `last_focused_monitor`).
|
||||
|
||||
Returns:
|
||||
MonitorTarget avec l'offset à appliquer aux coordonnées de grounding.
|
||||
"""
|
||||
geometry: List[Dict[str, Any]] = session_state.get("monitors_geometry") or []
|
||||
|
||||
# 1. Cible explicite via action
|
||||
explicit_idx = action.get("monitor_index")
|
||||
if explicit_idx is not None and geometry:
|
||||
m = _find_monitor(geometry, int(explicit_idx))
|
||||
if m is not None:
|
||||
return _to_target(m, source="action")
|
||||
# Index invalide → on tombe sur le fallback focus
|
||||
logger.warning(
|
||||
"[BUS] lea:monitor_invalid_index requested=%d available_idx=%s",
|
||||
int(explicit_idx), [g.get("idx") for g in geometry],
|
||||
)
|
||||
|
||||
# 2. Fallback focus actif
|
||||
focused_idx = session_state.get("last_focused_monitor")
|
||||
if focused_idx is not None and geometry:
|
||||
m = _find_monitor(geometry, int(focused_idx))
|
||||
if m is not None:
|
||||
return _to_target(m, source="focus")
|
||||
logger.warning(
|
||||
"[BUS] lea:monitor_unavailable focused_idx=%d available_idx=%s",
|
||||
int(focused_idx), [g.get("idx") for g in geometry],
|
||||
)
|
||||
|
||||
# 3. Fallback composite (backward compat — comportement actuel mss.monitors[0])
|
||||
return _COMPOSITE_FALLBACK
|
||||
2998
agent_v0/server_v1/replay_engine.py
Normal file
2998
agent_v0/server_v1/replay_engine.py
Normal file
File diff suppressed because it is too large
Load Diff
143
agent_v0/server_v1/replay_failure_logger.py
Normal file
143
agent_v0/server_v1/replay_failure_logger.py
Normal file
@@ -0,0 +1,143 @@
|
||||
# agent_v0/server_v1/replay_failure_logger.py
|
||||
"""
|
||||
Logger des echecs de replay pour l'apprentissage futur.
|
||||
|
||||
Chaque echec de resolution visuelle (target_not_found) est sauvegarde dans un
|
||||
fichier JSONL par session, avec le screenshot de ce que l'agent voit au moment
|
||||
de l'echec. Ces donnees alimentent le learning loop : re-entrainement des
|
||||
embeddings, ajustement des seuils, enrichissement des target_spec.
|
||||
|
||||
Structure :
|
||||
data/training/replay_failures/{replay_id}/failures.jsonl
|
||||
data/training/replay_failures/{replay_id}/screenshots/{action_id}.jpg
|
||||
"""
|
||||
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger("replay_failure_logger")
|
||||
|
||||
# Repertoire racine des echecs de replay
|
||||
_FAILURES_BASE_DIR = Path("data/training/replay_failures")
|
||||
|
||||
# Lock pour les ecritures concurrentes
|
||||
_write_lock = threading.Lock()
|
||||
|
||||
|
||||
def log_replay_failure(
|
||||
replay_id: str,
|
||||
action_id: str,
|
||||
target_spec: Optional[Dict[str, Any]],
|
||||
screenshot_b64: Optional[str],
|
||||
resolution_attempts: Optional[List[Dict[str, Any]]] = None,
|
||||
error: str = "target_not_found",
|
||||
extra: Optional[Dict[str, Any]] = None,
|
||||
) -> Optional[str]:
|
||||
"""Sauvegarder un echec de replay pour l'apprentissage futur.
|
||||
|
||||
Args:
|
||||
replay_id: Identifiant du replay en cours
|
||||
action_id: Identifiant de l'action echouee
|
||||
target_spec: Specification de la cible recherchee
|
||||
screenshot_b64: Screenshot JPEG base64 de ce que l'agent voit
|
||||
resolution_attempts: Liste des tentatives de resolution (methode, score, etc.)
|
||||
error: Type d'erreur (defaut: "target_not_found")
|
||||
extra: Champs supplementaires a stocker
|
||||
|
||||
Returns:
|
||||
Chemin du fichier JSONL cree, ou None en cas d'erreur.
|
||||
"""
|
||||
try:
|
||||
# Creer le repertoire de la session
|
||||
session_dir = _FAILURES_BASE_DIR / replay_id
|
||||
session_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Sauvegarder le screenshot si fourni
|
||||
screenshot_path = None
|
||||
if screenshot_b64:
|
||||
screenshots_dir = session_dir / "screenshots"
|
||||
screenshots_dir.mkdir(exist_ok=True)
|
||||
screenshot_path = str(screenshots_dir / f"{action_id}.jpg")
|
||||
try:
|
||||
img_bytes = base64.b64decode(screenshot_b64)
|
||||
with open(screenshot_path, "wb") as f:
|
||||
f.write(img_bytes)
|
||||
except Exception as e:
|
||||
logger.warning(f"Impossible de sauvegarder le screenshot : {e}")
|
||||
screenshot_path = None
|
||||
|
||||
# Construire l'entree JSONL
|
||||
entry = {
|
||||
"timestamp": time.strftime("%Y-%m-%dT%H:%M:%S"),
|
||||
"replay_id": replay_id,
|
||||
"action_id": action_id,
|
||||
"target_spec": _sanitize_target_spec(target_spec) if target_spec else None,
|
||||
"screenshot_path": screenshot_path,
|
||||
"resolution_attempts": resolution_attempts or [],
|
||||
"error": error,
|
||||
}
|
||||
if extra:
|
||||
entry.update(extra)
|
||||
|
||||
# Ecrire dans le fichier JSONL (thread-safe)
|
||||
jsonl_path = session_dir / "failures.jsonl"
|
||||
with _write_lock:
|
||||
with open(jsonl_path, "a", encoding="utf-8") as f:
|
||||
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
||||
|
||||
logger.info(
|
||||
f"Echec replay loggue : replay={replay_id} action={action_id} "
|
||||
f"error={error} -> {jsonl_path}"
|
||||
)
|
||||
return str(jsonl_path)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Impossible de logger l'echec replay : {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _sanitize_target_spec(target_spec: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Nettoyer le target_spec pour le stockage (retirer les images base64 volumineuses)."""
|
||||
cleaned = {}
|
||||
for key, value in target_spec.items():
|
||||
# Ne pas stocker les images base64 (trop volumineux pour le JSONL)
|
||||
if key.endswith("_base64") or key.endswith("_b64"):
|
||||
cleaned[key] = f"<{len(str(value))} chars>" if value else None
|
||||
else:
|
||||
cleaned[key] = value
|
||||
return cleaned
|
||||
|
||||
|
||||
def get_failure_count(replay_id: str) -> int:
|
||||
"""Compter le nombre d'echecs pour un replay donne."""
|
||||
jsonl_path = _FAILURES_BASE_DIR / replay_id / "failures.jsonl"
|
||||
if not jsonl_path.exists():
|
||||
return 0
|
||||
try:
|
||||
with open(jsonl_path, "r", encoding="utf-8") as f:
|
||||
return sum(1 for _ in f)
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
|
||||
def get_failures(replay_id: str) -> List[Dict[str, Any]]:
|
||||
"""Lire tous les echecs pour un replay donne."""
|
||||
jsonl_path = _FAILURES_BASE_DIR / replay_id / "failures.jsonl"
|
||||
if not jsonl_path.exists():
|
||||
return []
|
||||
failures = []
|
||||
try:
|
||||
with open(jsonl_path, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
failures.append(json.loads(line))
|
||||
except Exception as e:
|
||||
logger.warning(f"Erreur lecture echecs replay {replay_id} : {e}")
|
||||
return failures
|
||||
441
agent_v0/server_v1/replay_learner.py
Normal file
441
agent_v0/server_v1/replay_learner.py
Normal file
@@ -0,0 +1,441 @@
|
||||
# agent_v0/server_v1/replay_learner.py
|
||||
"""
|
||||
Module Learning — apprentissage à partir des résultats de replay.
|
||||
|
||||
Responsabilité : "Chaque replay qui échoue enrichit notre base de connaissances."
|
||||
|
||||
Stocke les résultats structurés de chaque action (succès/échec, méthode,
|
||||
screenshots, correction appliquée) pour :
|
||||
1. Améliorer les décisions futures (Policy)
|
||||
2. Affiner les stratégies de grounding (quel méthode marche pour quel écran)
|
||||
3. Détecter les patterns récurrents d'échec
|
||||
4. Alimenter le fine-tuning futur du VLM
|
||||
|
||||
Format inspiré du cahier des charges (docs/VISION_RPA_INTELLIGENT.md) :
|
||||
{
|
||||
"screenshot_before": "base64...",
|
||||
"action": {"type": "click", "target": "Bouton Valider", ...},
|
||||
"screenshot_after": "base64...",
|
||||
"success": true,
|
||||
"resolution_method": "som_text_match",
|
||||
"correction": null,
|
||||
"human_validated": false
|
||||
}
|
||||
|
||||
Ref: docs/VISION_RPA_INTELLIGENT.md — Boucle d'apprentissage (section 4)
|
||||
Ref: docs/PLAN_ACTEUR_V1.md — Phase 3 : apprentissage continu
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Répertoire par défaut pour le stockage des résultats d'apprentissage
|
||||
_DEFAULT_LEARNING_DIR = os.environ.get(
|
||||
"RPA_LEARNING_DIR", "data/learning/replay_results"
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ActionOutcome:
|
||||
"""Résultat structuré d'une action de replay."""
|
||||
# Identifiants
|
||||
session_id: str
|
||||
action_id: str
|
||||
action_type: str # click, type, key_combo
|
||||
timestamp: float = 0.0 # Epoch
|
||||
|
||||
# Contexte
|
||||
target_description: str = "" # "Clic sur 'Enregistrer' dans Bloc-notes"
|
||||
intention: str = "" # "Sauvegarder le fichier"
|
||||
window_title: str = ""
|
||||
|
||||
# Résolution
|
||||
resolution_method: str = "" # server_som, anchor_template, vlm_direct...
|
||||
resolution_score: float = 0.0
|
||||
resolution_elapsed_ms: float = 0.0
|
||||
|
||||
# Résultat
|
||||
success: bool = False
|
||||
error: str = ""
|
||||
warning: str = ""
|
||||
|
||||
# Vérification (Critic)
|
||||
pixel_verified: Optional[bool] = None
|
||||
semantic_verified: Optional[bool] = None
|
||||
critic_detail: str = ""
|
||||
|
||||
# Recovery
|
||||
recovery_action: str = "" # undo, escape, close, none
|
||||
recovery_success: bool = False
|
||||
|
||||
# Screenshots (chemins relatifs, pas base64 — trop lourd)
|
||||
screenshot_before_path: str = ""
|
||||
screenshot_after_path: str = ""
|
||||
|
||||
# Correction humaine (feedback loop)
|
||||
human_validated: bool = False
|
||||
human_correction: str = "" # Description de la correction
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return asdict(self)
|
||||
|
||||
|
||||
class ReplayLearner:
|
||||
"""Apprentissage à partir des résultats de replay.
|
||||
|
||||
Stocke chaque action dans un fichier JSONL par session.
|
||||
Fournit des requêtes pour améliorer les décisions futures.
|
||||
|
||||
Usage côté serveur (api_stream.py) :
|
||||
learner = ReplayLearner()
|
||||
learner.record(outcome)
|
||||
|
||||
Usage côté Policy :
|
||||
history = learner.query_similar(target_description, window_title)
|
||||
# → "La dernière fois, template matching a échoué mais SoM a trouvé"
|
||||
"""
|
||||
|
||||
def __init__(self, learning_dir: str = ""):
|
||||
self.learning_dir = Path(learning_dir or _DEFAULT_LEARNING_DIR)
|
||||
self.learning_dir.mkdir(parents=True, exist_ok=True)
|
||||
# Cache mémoire des derniers résultats (pour requêtes rapides)
|
||||
self._recent: List[ActionOutcome] = []
|
||||
self._max_recent = 500
|
||||
|
||||
def record(self, outcome: ActionOutcome) -> None:
|
||||
"""Enregistrer le résultat d'une action.
|
||||
|
||||
Écrit en append dans un fichier JSONL par session.
|
||||
Garde aussi en mémoire pour les requêtes rapides.
|
||||
"""
|
||||
if not outcome.timestamp:
|
||||
outcome.timestamp = time.time()
|
||||
|
||||
# Fichier JSONL par session
|
||||
session_file = self.learning_dir / f"{outcome.session_id}.jsonl"
|
||||
try:
|
||||
with open(session_file, "a") as f:
|
||||
f.write(json.dumps(outcome.to_dict(), ensure_ascii=False) + "\n")
|
||||
except Exception as e:
|
||||
logger.warning(f"Learning: échec écriture {session_file}: {e}")
|
||||
|
||||
# Cache mémoire
|
||||
self._recent.append(outcome)
|
||||
if len(self._recent) > self._max_recent:
|
||||
self._recent = self._recent[-self._max_recent:]
|
||||
|
||||
# Log résumé
|
||||
status = "OK" if outcome.success else "ÉCHEC"
|
||||
logger.info(
|
||||
f"Learning: {status} {outcome.action_type} "
|
||||
f"'{outcome.target_description[:40]}' "
|
||||
f"[{outcome.resolution_method}] "
|
||||
f"critic={'OK' if outcome.semantic_verified else 'NON' if outcome.semantic_verified is False else '?'}"
|
||||
)
|
||||
|
||||
def record_from_replay_result(
|
||||
self,
|
||||
session_id: str,
|
||||
action: Dict[str, Any],
|
||||
result: Dict[str, Any],
|
||||
verification: Optional[Dict] = None,
|
||||
) -> None:
|
||||
"""Enregistrer depuis les structures existantes du replay.
|
||||
|
||||
Convertit le format action/result du replay en ActionOutcome.
|
||||
Appelé depuis api_stream.py après chaque action de replay.
|
||||
"""
|
||||
target_spec = action.get("target_spec", {})
|
||||
outcome = ActionOutcome(
|
||||
session_id=session_id,
|
||||
action_id=action.get("action_id", ""),
|
||||
action_type=action.get("type", ""),
|
||||
target_description=target_spec.get("by_text", ""),
|
||||
intention=action.get("intention", ""),
|
||||
window_title=target_spec.get("window_title", ""),
|
||||
resolution_method=result.get("resolution_method", ""),
|
||||
resolution_score=result.get("resolution_score", 0.0),
|
||||
resolution_elapsed_ms=result.get("resolution_elapsed_ms", 0.0),
|
||||
success=result.get("success", False),
|
||||
error=result.get("error", ""),
|
||||
warning=result.get("warning", ""),
|
||||
)
|
||||
|
||||
if verification:
|
||||
outcome.pixel_verified = verification.get("verified")
|
||||
outcome.semantic_verified = verification.get("semantic_verified")
|
||||
outcome.critic_detail = verification.get("semantic_detail", "")
|
||||
|
||||
self.record(outcome)
|
||||
|
||||
def record_human_correction(
|
||||
self,
|
||||
session_id: str,
|
||||
action: Dict[str, Any],
|
||||
correction: Dict[str, Any],
|
||||
) -> None:
|
||||
"""Enregistrer une correction humaine (mode apprentissage supervisé).
|
||||
|
||||
L'humain a montré à Léa où cliquer. On stocke cette correction
|
||||
dans target_memory.db pour que la prochaine fois, Léa sache.
|
||||
"""
|
||||
target_spec = action.get("target_spec", {})
|
||||
by_text = target_spec.get("by_text", "")
|
||||
window_title = (
|
||||
target_spec.get("window_title", "")
|
||||
or action.get("window_title", "")
|
||||
or target_spec.get("expected_window_before", "")
|
||||
or (target_spec.get("context_hints") or {}).get("window_title", "")
|
||||
)
|
||||
x_pct = correction.get("x_pct")
|
||||
y_pct = correction.get("y_pct")
|
||||
last_click = correction.get("last_click")
|
||||
if (x_pct is None or y_pct is None) and isinstance(last_click, dict):
|
||||
x_pct = last_click.get("x_pct")
|
||||
y_pct = last_click.get("y_pct")
|
||||
|
||||
try:
|
||||
x_pct_f = float(x_pct)
|
||||
y_pct_f = float(y_pct)
|
||||
except (TypeError, ValueError):
|
||||
logger.warning(
|
||||
"[APPRENTISSAGE] Correction humaine non persistée : "
|
||||
"aucune coordonnée clic exploitable pour '%s'",
|
||||
by_text,
|
||||
)
|
||||
return
|
||||
|
||||
if not (0.0 < x_pct_f <= 1.0 and 0.0 < y_pct_f <= 1.0):
|
||||
logger.warning(
|
||||
"[APPRENTISSAGE] Correction humaine non persistée : "
|
||||
"coordonnées hors bornes pour '%s' (%.4f, %.4f)",
|
||||
by_text,
|
||||
x_pct_f,
|
||||
y_pct_f,
|
||||
)
|
||||
return
|
||||
|
||||
# Enregistrer dans le JSONL d'apprentissage
|
||||
outcome = ActionOutcome(
|
||||
session_id=session_id,
|
||||
action_id=action.get("action_id", ""),
|
||||
action_type="click",
|
||||
target_description=by_text,
|
||||
window_title=window_title,
|
||||
resolution_method="human_supervised",
|
||||
resolution_score=1.0, # Confiance maximale — l'humain a montré
|
||||
success=True,
|
||||
)
|
||||
self.record(outcome)
|
||||
|
||||
# Stocker dans target_memory.db pour le lookup futur
|
||||
try:
|
||||
from .replay_memory import memory_record_success
|
||||
stored = False
|
||||
if window_title:
|
||||
stored = memory_record_success(
|
||||
window_title=window_title,
|
||||
target_spec=target_spec,
|
||||
x_pct=x_pct_f,
|
||||
y_pct=y_pct_f,
|
||||
method="human_supervised",
|
||||
confidence=1.0,
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"[APPRENTISSAGE] Correction humaine non persistée : "
|
||||
"window_title absent pour '%s'",
|
||||
by_text,
|
||||
)
|
||||
|
||||
if stored:
|
||||
logger.info(
|
||||
f"[APPRENTISSAGE] Correction stockée dans target_memory : "
|
||||
f"'{by_text}' → ({x_pct:.4f}, {y_pct:.4f})"
|
||||
)
|
||||
elif window_title:
|
||||
logger.warning(
|
||||
"[APPRENTISSAGE] Correction humaine non persistée : "
|
||||
"échec memory_record_success pour '%s' dans '%s'",
|
||||
by_text,
|
||||
window_title,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Learning: échec stockage target_memory: {e}")
|
||||
|
||||
def query_similar(
|
||||
self,
|
||||
target_description: str = "",
|
||||
window_title: str = "",
|
||||
limit: int = 10,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Chercher des résultats similaires dans l'historique.
|
||||
|
||||
Recherche par correspondance textuelle sur la description de cible
|
||||
et le titre de fenêtre. Retourne les plus récents en premier.
|
||||
|
||||
Utile pour le Policy : "qu'est-ce qui a marché avant pour cette cible ?"
|
||||
"""
|
||||
results = []
|
||||
target_lower = target_description.lower()
|
||||
window_lower = window_title.lower()
|
||||
|
||||
for outcome in reversed(self._recent):
|
||||
score = 0
|
||||
if target_lower and target_lower in outcome.target_description.lower():
|
||||
score += 2
|
||||
if window_lower and window_lower in outcome.window_title.lower():
|
||||
score += 1
|
||||
if score > 0:
|
||||
results.append({
|
||||
"outcome": outcome.to_dict(),
|
||||
"relevance": score,
|
||||
})
|
||||
if len(results) >= limit:
|
||||
break
|
||||
|
||||
return sorted(results, key=lambda x: x["relevance"], reverse=True)
|
||||
|
||||
def best_strategy_for(
|
||||
self,
|
||||
target_description: str = "",
|
||||
window_title: str = "",
|
||||
) -> Optional[str]:
|
||||
"""Quelle méthode de grounding a le mieux marché pour cette cible ?
|
||||
|
||||
Consulte l'historique et retourne la méthode qui a le plus haut
|
||||
taux de succès pour des cibles similaires. C'est la boucle
|
||||
d'apprentissage : les replays passés améliorent les suivants.
|
||||
|
||||
Returns:
|
||||
Nom de la meilleure méthode (ex: "som_text_match") ou None
|
||||
"""
|
||||
similar = self.query_similar(target_description, window_title, limit=20)
|
||||
if not similar:
|
||||
return None
|
||||
|
||||
# Compter les succès par méthode
|
||||
method_stats: Dict[str, List[int]] = {} # method → [successes, total]
|
||||
for entry in similar:
|
||||
outcome = entry["outcome"]
|
||||
method = outcome.get("resolution_method", "")
|
||||
if not method:
|
||||
continue
|
||||
if method not in method_stats:
|
||||
method_stats[method] = [0, 0]
|
||||
method_stats[method][1] += 1
|
||||
if outcome.get("success"):
|
||||
method_stats[method][0] += 1
|
||||
|
||||
if not method_stats:
|
||||
return None
|
||||
|
||||
# Retourner la méthode avec le meilleur taux de succès (minimum 2 occurrences)
|
||||
best = None
|
||||
best_rate = 0.0
|
||||
for method, (successes, total) in method_stats.items():
|
||||
if total >= 2: # Au moins 2 essais pour être significatif
|
||||
rate = successes / total
|
||||
if rate > best_rate:
|
||||
best_rate = rate
|
||||
best = method
|
||||
|
||||
if best:
|
||||
logger.info(
|
||||
f"Learning: meilleure stratégie pour '{target_description[:30]}' → "
|
||||
f"{best} ({best_rate:.0%} sur {method_stats[best][1]} essais)"
|
||||
)
|
||||
|
||||
return best
|
||||
|
||||
def consolidate_workflow(
|
||||
self,
|
||||
actions: list,
|
||||
session_id: str = "",
|
||||
) -> int:
|
||||
"""Consolider un workflow avec les apprentissages passés.
|
||||
|
||||
Pour chaque action du workflow, vérifie si l'historique suggère
|
||||
une meilleure stratégie de résolution. Si oui, l'ajoute en
|
||||
hint dans le target_spec de l'action.
|
||||
|
||||
Modifie les actions in-place. Retourne le nombre d'actions enrichies.
|
||||
|
||||
C'est la cross-pollination : un replay qui a réussi "Enregistrer"
|
||||
via som_text améliore tous les futurs workflows qui cliquent sur "Enregistrer".
|
||||
"""
|
||||
enriched = 0
|
||||
for action in actions:
|
||||
if action.get("type") != "click":
|
||||
continue
|
||||
target_spec = action.get("target_spec", {})
|
||||
by_text = target_spec.get("by_text", "")
|
||||
window = target_spec.get("window_title", "")
|
||||
if not by_text:
|
||||
continue
|
||||
|
||||
best = self.best_strategy_for(by_text, window)
|
||||
if best:
|
||||
target_spec["_learned_strategy"] = best
|
||||
enriched += 1
|
||||
|
||||
if enriched:
|
||||
logger.info(
|
||||
f"Consolidation : {enriched} actions enrichies par l'apprentissage "
|
||||
f"(session {session_id})"
|
||||
)
|
||||
return enriched
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""Statistiques globales des résultats de replay."""
|
||||
if not self._recent:
|
||||
return {"total": 0}
|
||||
|
||||
total = len(self._recent)
|
||||
successes = sum(1 for o in self._recent if o.success)
|
||||
methods = {}
|
||||
for o in self._recent:
|
||||
m = o.resolution_method or "unknown"
|
||||
if m not in methods:
|
||||
methods[m] = {"total": 0, "success": 0}
|
||||
methods[m]["total"] += 1
|
||||
if o.success:
|
||||
methods[m]["success"] += 1
|
||||
|
||||
return {
|
||||
"total": total,
|
||||
"success_rate": round(successes / total, 3) if total > 0 else 0,
|
||||
"methods": {
|
||||
m: {
|
||||
"total": v["total"],
|
||||
"success_rate": round(v["success"] / v["total"], 3) if v["total"] > 0 else 0,
|
||||
}
|
||||
for m, v in methods.items()
|
||||
},
|
||||
}
|
||||
|
||||
def load_session(self, session_id: str) -> List[ActionOutcome]:
|
||||
"""Charger tous les résultats d'une session depuis le fichier JSONL."""
|
||||
session_file = self.learning_dir / f"{session_id}.jsonl"
|
||||
if not session_file.is_file():
|
||||
return []
|
||||
|
||||
outcomes = []
|
||||
try:
|
||||
with open(session_file) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
data = json.loads(line)
|
||||
outcomes.append(ActionOutcome(**data))
|
||||
except Exception as e:
|
||||
logger.warning(f"Learning: échec lecture {session_file}: {e}")
|
||||
|
||||
return outcomes
|
||||
487
agent_v0/server_v1/replay_memory.py
Normal file
487
agent_v0/server_v1/replay_memory.py
Normal file
@@ -0,0 +1,487 @@
|
||||
# agent_v0/server_v1/replay_memory.py
|
||||
"""
|
||||
replay_memory — Greffe de TargetMemoryStore (Fiche #18) sur le pipeline V4.
|
||||
|
||||
Phase 1 du plan apprentissage Léa (docs/PLAN_APPRENTISSAGE_LEA.md).
|
||||
|
||||
Le runtime V4 appelle :
|
||||
- `memory_lookup()` AVANT la cascade coûteuse (OCR/template/VLM)
|
||||
- `memory_record_success()` APRÈS validation post-condition (`title_match` strict)
|
||||
- `memory_record_failure()` sur les échecs
|
||||
|
||||
Fingerprint léger V4 : les coordonnées clic (x_pct, y_pct) sont stockées dans
|
||||
les deux premières valeurs de `TargetFingerprint.bbox`, et la méthode de
|
||||
résolution ayant réussi dans le champ `etype`.
|
||||
|
||||
Signature d'écran V4 : `sha256(normalize(window_title))[:16]`. Simple et
|
||||
robuste aux données variables car les titres de fenêtre restent stables.
|
||||
Les faux positifs (même titre, écrans différents) sont rattrapés par la
|
||||
post-condition qui décrémentera la fiabilité via `record_failure()`.
|
||||
|
||||
Critère de fiabilité : 2 succès minimum et < 30% d'échecs pour déclencher
|
||||
un hit (paramètres de `TargetMemoryStore.lookup`). C'est exactement la
|
||||
cristallisation par répétition que l'on veut — Léa est un stagiaire qui
|
||||
apprend de l'observation.
|
||||
|
||||
Auteur : Dom, Alice — avril 2026
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
import unicodedata
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# =========================================================================
|
||||
# Singleton du store persistant
|
||||
# =========================================================================
|
||||
|
||||
_MEMORY_SINGLETON: Optional[Any] = None
|
||||
_MEMORY_DISABLED = False
|
||||
|
||||
_GENERIC_BUTTON_TEXTS = {
|
||||
"annuler",
|
||||
"cancel",
|
||||
"enregistrer",
|
||||
"non",
|
||||
"no",
|
||||
"ok",
|
||||
"oui",
|
||||
"ouvrir",
|
||||
"open",
|
||||
"remplacer",
|
||||
"replace",
|
||||
"save",
|
||||
"yes",
|
||||
}
|
||||
|
||||
|
||||
def get_memory_store():
|
||||
"""Retourne le `TargetMemoryStore` partagé, ou None si indisponible.
|
||||
|
||||
Lazy-init : le store n'est créé qu'au premier appel, ce qui évite
|
||||
d'importer `core.learning.target_memory_store` à l'import du module
|
||||
(et donc d'éviter les effets de bord sur le démarrage du serveur).
|
||||
"""
|
||||
global _MEMORY_SINGLETON, _MEMORY_DISABLED
|
||||
|
||||
if _MEMORY_DISABLED:
|
||||
return None
|
||||
if _MEMORY_SINGLETON is not None:
|
||||
return _MEMORY_SINGLETON
|
||||
|
||||
try:
|
||||
from core.learning.target_memory_store import TargetMemoryStore
|
||||
|
||||
base_path = os.environ.get("RPA_LEARNING_DIR", "data/learning")
|
||||
_MEMORY_SINGLETON = TargetMemoryStore(base_path=base_path)
|
||||
logger.info(
|
||||
"replay_memory: TargetMemoryStore initialisé (base=%s)", base_path,
|
||||
)
|
||||
return _MEMORY_SINGLETON
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"replay_memory: TargetMemoryStore indisponible (%s) — "
|
||||
"l'apprentissage persistant est désactivé", exc,
|
||||
)
|
||||
_MEMORY_DISABLED = True
|
||||
return None
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Normalisation de texte et hash
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def _norm_text(s: str) -> str:
|
||||
"""Normalise un texte pour un hash stable (accents, casse, NBSP, espaces)."""
|
||||
if not s:
|
||||
return ""
|
||||
s = s.replace("\u00A0", " ").strip().lower()
|
||||
s = unicodedata.normalize("NFKD", s)
|
||||
s = "".join(ch for ch in s if not unicodedata.combining(ch))
|
||||
return " ".join(s.split())
|
||||
|
||||
|
||||
def _memory_lookup_skip_reason(target_spec: Dict[str, Any]) -> str:
|
||||
"""Retourne la raison pour laquelle la mémoire ne doit pas court-circuiter.
|
||||
|
||||
Les clics qui changent de fenêtre doivent être résolus visuellement à
|
||||
l'instant T : une coordonnée apprise peut être une bonne piste, mais pas
|
||||
une décision finale. Pour les boutons très génériques, on exige au moins
|
||||
un contexte de fenêtre/interaction dans la clé mémoire afin d'éviter les
|
||||
collisions entre « Enregistrer », « OK », « Oui », etc.
|
||||
"""
|
||||
if not isinstance(target_spec, dict):
|
||||
return ""
|
||||
|
||||
hints = target_spec.get("context_hints") or {}
|
||||
if bool(hints.get("requires_window_transition")):
|
||||
return "window_transition_requires_visual_confirmation"
|
||||
|
||||
button_text = _norm_text(str(target_spec.get("by_text") or ""))
|
||||
if button_text not in _GENERIC_BUTTON_TEXTS:
|
||||
return ""
|
||||
|
||||
before = (
|
||||
hints.get("expected_window_before")
|
||||
or hints.get("button_expected_before_window")
|
||||
or hints.get("window_title")
|
||||
or target_spec.get("window_title")
|
||||
)
|
||||
after = (
|
||||
hints.get("expected_window_after")
|
||||
or hints.get("button_expected_after_window")
|
||||
or hints.get("expected_after_window")
|
||||
)
|
||||
interaction = hints.get("interaction") or hints.get("foreground_dialog_id")
|
||||
role = target_spec.get("by_role")
|
||||
if not (before and role and (after or interaction)):
|
||||
return "generic_button_missing_context"
|
||||
return ""
|
||||
|
||||
|
||||
def compute_screen_sig(window_title: str) -> str:
|
||||
"""Calcule la signature d'écran V4 à partir du titre de fenêtre.
|
||||
|
||||
Le `window_title` est strict depuis la phase "controle des étapes"
|
||||
(post-condition `title_match` obligatoire). C'est notre clé naturelle.
|
||||
"""
|
||||
norm = _norm_text(window_title)
|
||||
if not norm:
|
||||
return ""
|
||||
return hashlib.sha256(norm.encode("utf-8")).hexdigest()[:16]
|
||||
|
||||
|
||||
def _round_float_list(values: Any, precision: int = 4) -> Optional[tuple[float, ...]]:
|
||||
"""Normaliser une liste de coordonnées flottantes pour le hash mémoire."""
|
||||
if not isinstance(values, (list, tuple)):
|
||||
return None
|
||||
out = []
|
||||
for value in values:
|
||||
try:
|
||||
out.append(round(float(value), precision))
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
return tuple(out)
|
||||
|
||||
|
||||
def _int_pair(values: Any) -> Optional[tuple[int, int]]:
|
||||
"""Extraire une paire entière stable pour les hints spatiaux."""
|
||||
if not isinstance(values, (list, tuple)) or len(values) < 2:
|
||||
return None
|
||||
try:
|
||||
return int(values[0]), int(values[1])
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _should_reuse_recorded_window_relative_coords(fp: Any) -> bool:
|
||||
"""Décider si on doit remplacer la mémoire apprise par la position source.
|
||||
|
||||
Cette réécriture n'est légitime que pour les entrées faibles de type
|
||||
`position_fallback`/`v4_unknown`, où la mémoire ne contient pas une vraie
|
||||
localisation visuelle robuste mais seulement un clic écran dépendant de la
|
||||
résolution. Pour les méthodes visuelles apprises (template, SoM, OCR...),
|
||||
réinjecter un vieux `click_relative` source crée des collisions et des
|
||||
dérives sur des boutons homonymes (`Enregistrer`, `OK`, etc.).
|
||||
"""
|
||||
method = str(getattr(fp, "etype", "") or "").strip().lower()
|
||||
return method in {"position_fallback", "v4_unknown"}
|
||||
|
||||
|
||||
class _TargetSpecLike:
|
||||
"""Adaptateur dict → objet pour `TargetMemoryStore._hash_target_spec()`.
|
||||
|
||||
Le hash interne de TargetMemoryStore utilise `getattr(spec, "by_role", ...)`
|
||||
qui ne fonctionne pas avec un dict brut. On expose les attributs nécessaires.
|
||||
|
||||
On intègre aussi `resolve_order`, `vlm_description` et des indices
|
||||
spatiaux (SoM, click_relative) dans `context_hints` pour qu'ils entrent
|
||||
dans le hash. Sinon, deux actions `Enregistrer` dans la même fenêtre
|
||||
mais à des emplacements différents collisionnent.
|
||||
"""
|
||||
|
||||
__slots__ = ("by_role", "by_text", "by_position", "context_hints")
|
||||
|
||||
def __init__(self, d: Dict[str, Any]):
|
||||
self.by_role = d.get("by_role", "") or ""
|
||||
self.by_text = d.get("by_text", "") or ""
|
||||
self.by_position = d.get("by_position")
|
||||
|
||||
hints = dict(d.get("context_hints") or {})
|
||||
resolve_order = d.get("resolve_order")
|
||||
if resolve_order:
|
||||
hints["_resolve_order"] = "|".join(resolve_order) if isinstance(
|
||||
resolve_order, list
|
||||
) else str(resolve_order)
|
||||
if d.get("vlm_description"):
|
||||
hints["_vlm_desc"] = str(d["vlm_description"])
|
||||
if d.get("anchor_hint"):
|
||||
hints["_anchor_hint"] = str(d["anchor_hint"])
|
||||
|
||||
som_element = d.get("som_element") or {}
|
||||
som_bbox = _round_float_list(som_element.get("bbox_norm"))
|
||||
if som_bbox:
|
||||
hints["_som_bbox"] = som_bbox
|
||||
som_center = _round_float_list(som_element.get("center_norm"), precision=5)
|
||||
if som_center:
|
||||
hints["_som_center"] = som_center
|
||||
|
||||
window_capture = d.get("window_capture") or {}
|
||||
click_relative = _int_pair(window_capture.get("click_relative"))
|
||||
window_size = _int_pair(window_capture.get("window_size"))
|
||||
if click_relative and window_size:
|
||||
hints["_window_rel"] = f"{click_relative[0]},{click_relative[1]}@{window_size[0]}x{window_size[1]}"
|
||||
|
||||
self.context_hints = hints
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Lookup — consulté AVANT la cascade coûteuse
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def memory_lookup(
|
||||
window_title: str,
|
||||
target_spec: Dict[str, Any],
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Cherche une résolution apprise pour cette cible sur cet écran.
|
||||
|
||||
Returns:
|
||||
Dict compatible avec le format de sortie de `_resolve_target_sync`
|
||||
(resolved, method, x_pct, y_pct, score, ...) si une entrée fiable
|
||||
est trouvée. None sinon.
|
||||
"""
|
||||
skip_reason = _memory_lookup_skip_reason(target_spec)
|
||||
if skip_reason:
|
||||
logger.info("memory_lookup SKIP : %s", skip_reason)
|
||||
return None
|
||||
|
||||
store = get_memory_store()
|
||||
if store is None:
|
||||
return None
|
||||
|
||||
screen_sig = compute_screen_sig(window_title)
|
||||
if not screen_sig:
|
||||
return None
|
||||
|
||||
try:
|
||||
spec_shim = _TargetSpecLike(target_spec)
|
||||
fp = store.lookup(screen_sig, spec_shim)
|
||||
except Exception as exc:
|
||||
logger.debug("memory_lookup: erreur lookup (%s)", exc)
|
||||
return None
|
||||
|
||||
if fp is None:
|
||||
return None
|
||||
|
||||
# Fingerprint léger : bbox = (x_pct, y_pct, 0, 0)
|
||||
try:
|
||||
x_pct = float(fp.bbox[0])
|
||||
y_pct = float(fp.bbox[1])
|
||||
except (TypeError, IndexError, ValueError):
|
||||
logger.debug("memory_lookup: fingerprint bbox invalide")
|
||||
return None
|
||||
|
||||
# Quand l'entrée mémoire provient d'un simple `position_fallback`, les
|
||||
# coordonnées stockées reflètent surtout la géométrie écran source. Dans
|
||||
# ce cas précis, réutiliser la position relative enregistrée dans la
|
||||
# fenêtre source reste préférable si elle existe.
|
||||
#
|
||||
# En revanche, pour une méthode visuelle réellement apprise
|
||||
# (`anchor_template`, `som_*`, `hybrid_text_direct`, ...), remplacer les
|
||||
# coords mémorisées par un vieux `click_relative` crée des dérives sur
|
||||
# des cibles textuelles homonymes. On garde donc les coords apprises.
|
||||
window_capture = target_spec.get("window_capture") or {}
|
||||
click_relative = window_capture.get("click_relative")
|
||||
window_size = window_capture.get("window_size")
|
||||
if (
|
||||
_should_reuse_recorded_window_relative_coords(fp)
|
||||
and (
|
||||
isinstance(click_relative, (list, tuple))
|
||||
and len(click_relative) >= 2
|
||||
and isinstance(window_size, (list, tuple))
|
||||
and len(window_size) >= 2
|
||||
)
|
||||
):
|
||||
try:
|
||||
rel_x = float(click_relative[0])
|
||||
rel_y = float(click_relative[1])
|
||||
win_w = float(window_size[0])
|
||||
win_h = float(window_size[1])
|
||||
if win_w > 1 and win_h > 1:
|
||||
x_pct = rel_x / win_w
|
||||
y_pct = rel_y / win_h
|
||||
logger.info(
|
||||
"memory_lookup: coords fenêtre source réutilisées "
|
||||
"(click_relative=%s, window_size=%s) -> (%.4f, %.4f)",
|
||||
click_relative,
|
||||
window_size,
|
||||
x_pct,
|
||||
y_pct,
|
||||
)
|
||||
except (TypeError, ValueError, ZeroDivisionError):
|
||||
logger.debug("memory_lookup: window_capture invalide, fallback bbox")
|
||||
|
||||
# Sanity check : les pourcentages doivent être dans [0, 1]
|
||||
if not (0.0 <= x_pct <= 1.0 and 0.0 <= y_pct <= 1.0):
|
||||
logger.warning(
|
||||
"memory_lookup: coords invalides (%.3f, %.3f) pour sig=%s — "
|
||||
"entrée ignorée",
|
||||
x_pct, y_pct, screen_sig,
|
||||
)
|
||||
return None
|
||||
|
||||
method = fp.etype or "memory"
|
||||
confidence = float(getattr(fp, "confidence", 0.9) or 0.9)
|
||||
|
||||
logger.info(
|
||||
"memory_lookup HIT : sig=%s method=%s coords=(%.4f, %.4f) conf=%.2f "
|
||||
"target='%s'",
|
||||
screen_sig, method, x_pct, y_pct, confidence,
|
||||
(target_spec.get("by_text") or "")[:60],
|
||||
)
|
||||
|
||||
return {
|
||||
"resolved": True,
|
||||
"method": f"memory_{method}",
|
||||
"x_pct": x_pct,
|
||||
"y_pct": y_pct,
|
||||
"score": confidence,
|
||||
"from_memory": True,
|
||||
"screen_sig": screen_sig,
|
||||
}
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Record — appelé APRÈS validation post-condition
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def memory_record_success(
|
||||
window_title: str,
|
||||
target_spec: Dict[str, Any],
|
||||
x_pct: float,
|
||||
y_pct: float,
|
||||
method: str,
|
||||
confidence: float = 0.9,
|
||||
) -> bool:
|
||||
"""Enregistre une résolution réussie dans la mémoire persistante.
|
||||
|
||||
À appeler APRÈS validation de la post-condition (`title_match` strict).
|
||||
"""
|
||||
store = get_memory_store()
|
||||
if store is None:
|
||||
return False
|
||||
|
||||
screen_sig = compute_screen_sig(window_title)
|
||||
if not screen_sig:
|
||||
return False
|
||||
|
||||
# Sanity check : coordonnées dans [0, 1]
|
||||
try:
|
||||
x_pct = float(x_pct)
|
||||
y_pct = float(y_pct)
|
||||
except (TypeError, ValueError):
|
||||
logger.debug("memory_record_success: coords non numériques, skip")
|
||||
return False
|
||||
if not (0.0 <= x_pct <= 1.0 and 0.0 <= y_pct <= 1.0):
|
||||
logger.warning(
|
||||
"memory_record_success: coords hors [0,1] (%.3f, %.3f), skip — "
|
||||
"probable input parasite (target='%s' method=%s)",
|
||||
x_pct, y_pct,
|
||||
(target_spec.get("by_text") or "")[:60], method,
|
||||
)
|
||||
return False
|
||||
# Rejeter (0.0, 0.0) exact : coin haut-gauche = signature de bruit
|
||||
# (curseur NoMachine, événement OS parasite, listener pynput sans clic
|
||||
# humain réel). Cf. bug observé replay_sess_63a1313b 2026-05-24 18:31-18:32.
|
||||
if x_pct == 0.0 and y_pct == 0.0:
|
||||
logger.warning(
|
||||
"memory_record_success: coords (0.0, 0.0) rejetées — "
|
||||
"signature de bruit (target='%s' method=%s)",
|
||||
(target_spec.get("by_text") or "")[:60], method,
|
||||
)
|
||||
return False
|
||||
|
||||
try:
|
||||
from core.learning.target_memory_store import TargetFingerprint
|
||||
|
||||
# Stripper les préfixes "memory_" empilés pour ne garder que
|
||||
# la méthode de résolution originale (ex: template_matching).
|
||||
# Sans ça, le cycle lookup → record → lookup empile "memory_"
|
||||
# indéfiniment : memory_memory_memory_template_matching.
|
||||
method_clean = method or "v4_unknown"
|
||||
while method_clean.startswith("memory_"):
|
||||
method_clean = method_clean[len("memory_"):]
|
||||
method_clean = method_clean or "v4_unknown"
|
||||
fingerprint = TargetFingerprint(
|
||||
element_id=f"v4_{method_clean}",
|
||||
bbox=(x_pct, y_pct, 0.0, 0.0),
|
||||
role=target_spec.get("by_role", "") or None,
|
||||
etype=method_clean,
|
||||
label=(target_spec.get("by_text") or "")[:200] or None,
|
||||
confidence=float(confidence),
|
||||
)
|
||||
|
||||
spec_shim = _TargetSpecLike(target_spec)
|
||||
store.record_success(
|
||||
screen_signature=screen_sig,
|
||||
target_spec=spec_shim,
|
||||
fingerprint=fingerprint,
|
||||
strategy_used=method_clean,
|
||||
confidence=float(confidence),
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"memory_record_success: sig=%s method=%s coords=(%.4f, %.4f) "
|
||||
"target='%s'",
|
||||
screen_sig, method_clean, x_pct, y_pct,
|
||||
(target_spec.get("by_text") or "")[:60],
|
||||
)
|
||||
return True
|
||||
|
||||
except Exception as exc:
|
||||
logger.warning("memory_record_success: échec (%s)", exc)
|
||||
return False
|
||||
|
||||
|
||||
def memory_record_failure(
|
||||
window_title: str,
|
||||
target_spec: Dict[str, Any],
|
||||
error_message: str,
|
||||
) -> bool:
|
||||
"""Incrémente le `fail_count` pour cette (signature, target).
|
||||
|
||||
Appelé quand l'action échoue OU quand la post-condition n'est pas
|
||||
satisfaite. Le `TargetMemoryStore.lookup()` ignorera cette entrée
|
||||
si le ratio d'échecs dépasse 30%.
|
||||
"""
|
||||
store = get_memory_store()
|
||||
if store is None:
|
||||
return False
|
||||
|
||||
screen_sig = compute_screen_sig(window_title)
|
||||
if not screen_sig:
|
||||
return False
|
||||
|
||||
try:
|
||||
spec_shim = _TargetSpecLike(target_spec)
|
||||
store.record_failure(
|
||||
screen_signature=screen_sig,
|
||||
target_spec=spec_shim,
|
||||
error_message=(error_message or "unknown")[:200],
|
||||
)
|
||||
logger.debug(
|
||||
"memory_record_failure: sig=%s error='%s'",
|
||||
screen_sig, (error_message or "")[:80],
|
||||
)
|
||||
return True
|
||||
except Exception as exc:
|
||||
logger.debug("memory_record_failure: échec (%s)", exc)
|
||||
return False
|
||||
@@ -1,20 +1,26 @@
|
||||
# agent_v0/server_v1/replay_verifier.py
|
||||
"""
|
||||
ReplayVerifier — Vérification post-action pour le replay de workflows.
|
||||
ReplayVerifier — Vérification post-action (Critic) pour le replay de workflows.
|
||||
|
||||
Compare les screenshots avant/après une action pour détecter si elle a eu
|
||||
un effet visible. Utilisé par l'API de replay pour décider si une action
|
||||
a réussi ou si un retry est nécessaire.
|
||||
Deux niveaux de vérification :
|
||||
1. PIXEL : Différence d'image avant/après (rapide, ~10ms)
|
||||
- L'écran a-t-il changé ? Où ? De combien ?
|
||||
2. SÉMANTIQUE : VLM évalue si le résultat correspond à l'attendu (~2-5s)
|
||||
- L'action a-t-elle eu l'EFFET voulu ? (pas juste "des pixels ont bougé")
|
||||
|
||||
Stratégies de vérification :
|
||||
1. Différence d'image globale (avant == après → probablement rien ne s'est passé)
|
||||
2. Zone locale autour du clic (si l'action est un clic)
|
||||
3. Détection de texte apparu (si l'action est une frappe)
|
||||
Le niveau pixel existait déjà. Le niveau sémantique (Critic) est le chaînon
|
||||
manquant identifié par comparaison avec Claude Computer Use et OpenAdapt.
|
||||
|
||||
Ref: docs/VISION_RPA_INTELLIGENT.md — étape VERIFY du pipeline.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from core.detection import vlm_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -35,9 +41,13 @@ class VerificationResult:
|
||||
suggestion: str # "retry", "skip", "abort", "continue"
|
||||
detail: str = "" # Description humaine du résultat
|
||||
local_change_pct: float = 0.0 # % de changement dans la zone locale (si applicable)
|
||||
# Critic sémantique (VLM)
|
||||
semantic_verified: Optional[bool] = None # None = pas de vérif sémantique
|
||||
semantic_detail: str = "" # Explication du VLM
|
||||
semantic_elapsed_ms: float = 0.0 # Temps de la vérif sémantique
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
d = {
|
||||
"verified": self.verified,
|
||||
"confidence": round(self.confidence, 3),
|
||||
"changes_detected": self.changes_detected,
|
||||
@@ -46,6 +56,11 @@ class VerificationResult:
|
||||
"detail": self.detail,
|
||||
"local_change_pct": round(self.local_change_pct, 3),
|
||||
}
|
||||
if self.semantic_verified is not None:
|
||||
d["semantic_verified"] = self.semantic_verified
|
||||
d["semantic_detail"] = self.semantic_detail
|
||||
d["semantic_elapsed_ms"] = round(self.semantic_elapsed_ms, 1)
|
||||
return d
|
||||
|
||||
|
||||
class ReplayVerifier:
|
||||
@@ -315,10 +330,11 @@ class ReplayVerifier:
|
||||
),
|
||||
)
|
||||
|
||||
# Cas 4 : Pas de changement (key_combo, wait)
|
||||
# Pour les raccourcis clavier et attentes, l'absence de changement
|
||||
# n'est pas forcément un problème (ex: Ctrl+C ne change pas l'écran)
|
||||
if action_type in ("key_combo", "wait"):
|
||||
# Cas 4 : Pas de changement (key_combo, wait, verify_screen)
|
||||
# `verify_screen` côté agent n'est qu'une temporisation de stabilisation.
|
||||
# Il ne doit pas exiger un NOUVEAU changement visuel sinon le setup
|
||||
# boucle inutilement une fois l'application déjà ouverte.
|
||||
if action_type in ("key_combo", "wait", "verify_screen"):
|
||||
return VerificationResult(
|
||||
verified=True,
|
||||
confidence=0.4,
|
||||
@@ -345,3 +361,278 @@ class ReplayVerifier:
|
||||
f"(global={global_change_pct:.3f}%, local={local_change_pct:.3f}%)"
|
||||
),
|
||||
)
|
||||
|
||||
# =========================================================================
|
||||
# Critic sémantique — VLM évalue si le résultat correspond à l'attendu
|
||||
# =========================================================================
|
||||
|
||||
def verify_with_critic(
|
||||
self,
|
||||
action: Dict[str, Any],
|
||||
result: Dict[str, Any],
|
||||
screenshot_before: Optional[str] = None,
|
||||
screenshot_after: Optional[str] = None,
|
||||
expected_result: str = "",
|
||||
action_intention: str = "",
|
||||
workflow_context: str = "",
|
||||
) -> VerificationResult:
|
||||
"""Vérification complète : pixel + sémantique (Critic).
|
||||
|
||||
Étape 1 : Vérification pixel (rapide, ~10ms) — l'écran a-t-il changé ?
|
||||
Étape 2 : Vérification sémantique (VLM, ~2-5s) — le changement est-il le bon ?
|
||||
|
||||
La vérification sémantique n'est lancée que si :
|
||||
- expected_result est fourni (description de l'état attendu après l'action)
|
||||
- La vérification pixel a détecté un changement (sinon, pas besoin du VLM)
|
||||
|
||||
Args:
|
||||
action: L'action exécutée
|
||||
result: Le résultat rapporté par l'agent
|
||||
screenshot_before: Screenshot avant l'action (base64)
|
||||
screenshot_after: Screenshot après l'action (base64)
|
||||
expected_result: Description de l'état attendu après l'action
|
||||
action_intention: Ce que l'action était censée faire
|
||||
workflow_context: Contexte global (progression, objectif)
|
||||
"""
|
||||
# Étape 1 : vérification pixel (existante)
|
||||
pixel_result = self.verify_action(
|
||||
action=action,
|
||||
result=result,
|
||||
screenshot_before=screenshot_before,
|
||||
screenshot_after=screenshot_after,
|
||||
)
|
||||
|
||||
# Pas de description attendue → retourner le résultat pixel seul
|
||||
if not expected_result:
|
||||
return pixel_result
|
||||
|
||||
# Si aucun changement pixel ET suggestion retry → pas besoin du VLM
|
||||
if not pixel_result.changes_detected and pixel_result.suggestion == "retry":
|
||||
return pixel_result
|
||||
|
||||
# Étape 2 : vérification sémantique via VLM
|
||||
semantic = self._verify_semantic(
|
||||
screenshot_before=screenshot_before,
|
||||
screenshot_after=screenshot_after,
|
||||
expected_result=expected_result,
|
||||
action_intention=action_intention,
|
||||
workflow_context=workflow_context,
|
||||
)
|
||||
|
||||
if semantic is None:
|
||||
# VLM indisponible → garder le résultat pixel seul
|
||||
return pixel_result
|
||||
|
||||
# Fusionner les résultats pixel + sémantique
|
||||
return self._merge_results(pixel_result, semantic)
|
||||
|
||||
def _verify_semantic(
|
||||
self,
|
||||
screenshot_before: Optional[str],
|
||||
screenshot_after: Optional[str],
|
||||
expected_result: str,
|
||||
action_intention: str = "",
|
||||
workflow_context: str = "",
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Appeler le VLM pour évaluer sémantiquement le résultat de l'action.
|
||||
|
||||
Utilise le VLM (résolu via vlm_config) en mode texte+images pour analyser
|
||||
les screenshots avant/après et dire si le résultat attendu est atteint.
|
||||
|
||||
Sur Citrix (image plate), c'est la SEULE façon de vérifier intelligemment
|
||||
si une action a eu l'effet voulu.
|
||||
|
||||
Returns:
|
||||
Dict avec {"verified": bool, "detail": str, "elapsed_ms": float}
|
||||
ou None si le VLM est indisponible.
|
||||
"""
|
||||
import requests as _requests
|
||||
|
||||
if not screenshot_after:
|
||||
return None
|
||||
|
||||
# Endpoint VLM : piloté par config (Ollama local ou tunnel DGX = 11434).
|
||||
# GEMMA4_PORT conservé comme override legacy (ancien conteneur Docker 11435).
|
||||
_default_port = vlm_config.DEFAULT_OLLAMA_ENDPOINT.rsplit(":", 1)[-1]
|
||||
gemma4_port = os.environ.get("GEMMA4_PORT", _default_port)
|
||||
gemma4_url = f"http://localhost:{gemma4_port}/api/chat"
|
||||
|
||||
# Construire le prompt Critic
|
||||
context_parts = []
|
||||
if action_intention:
|
||||
context_parts.append(f"Action effectuée : {action_intention}")
|
||||
if workflow_context:
|
||||
context_parts.append(f"Contexte : {workflow_context}")
|
||||
context_str = "\n".join(context_parts)
|
||||
|
||||
# Deux images : avant et après
|
||||
images = []
|
||||
prompt_images = ""
|
||||
if screenshot_before and screenshot_after:
|
||||
images = [screenshot_before, screenshot_after]
|
||||
prompt_images = (
|
||||
"Image 1 = écran AVANT l'action.\n"
|
||||
"Image 2 = écran APRÈS l'action.\n"
|
||||
)
|
||||
elif screenshot_after:
|
||||
images = [screenshot_after]
|
||||
prompt_images = "Image = écran APRÈS l'action.\n"
|
||||
|
||||
prompt = (
|
||||
f"Tu es le VÉRIFICATEUR d'un robot RPA. Tu dois dire si l'action a réussi.\n\n"
|
||||
f"{prompt_images}"
|
||||
f"{context_str}\n\n"
|
||||
f"Résultat attendu : {expected_result}\n\n"
|
||||
f"Est-ce que le résultat attendu est visible à l'écran ?\n"
|
||||
f"Réponds EXACTEMENT dans ce format :\n"
|
||||
f"VERDICT: OUI ou NON\n"
|
||||
f"RAISON: explication courte (1 ligne)"
|
||||
)
|
||||
|
||||
# Injecter le contexte métier si disponible
|
||||
from .domain_context import get_domain_context
|
||||
domain = get_domain_context(os.environ.get("RPA_DOMAIN", "generic"))
|
||||
messages = []
|
||||
if domain.system_prompt:
|
||||
messages.append({"role": "system", "content": domain.system_prompt})
|
||||
messages.append({"role": "user", "content": prompt, "images": images})
|
||||
|
||||
try:
|
||||
t_start = time.time()
|
||||
resp = _requests.post(
|
||||
gemma4_url,
|
||||
json={
|
||||
"model": vlm_config.get_vlm_model(),
|
||||
"messages": messages,
|
||||
"stream": False,
|
||||
"think": True,
|
||||
"options": {"temperature": 0.1, "num_predict": 800},
|
||||
},
|
||||
timeout=30,
|
||||
)
|
||||
elapsed_ms = (time.time() - t_start) * 1000
|
||||
|
||||
if not resp.ok:
|
||||
logger.warning(f"Critic VLM HTTP {resp.status_code}")
|
||||
return None
|
||||
|
||||
content = resp.json().get("message", {}).get("content", "").strip()
|
||||
|
||||
# Parser le verdict
|
||||
verified = None
|
||||
detail = content
|
||||
for line in content.split("\n"):
|
||||
line_upper = line.strip().upper()
|
||||
if line_upper.startswith("VERDICT:"):
|
||||
verdict_text = line_upper.replace("VERDICT:", "").strip()
|
||||
if "OUI" in verdict_text or "YES" in verdict_text:
|
||||
verified = True
|
||||
elif "NON" in verdict_text or "NO" in verdict_text:
|
||||
verified = False
|
||||
elif line_upper.startswith("RAISON:"):
|
||||
detail = line.strip().replace("RAISON:", "").strip()
|
||||
|
||||
if verified is None:
|
||||
# Fallback : chercher OUI/NON dans le texte brut
|
||||
upper = content.upper()
|
||||
if "OUI" in upper and "NON" not in upper:
|
||||
verified = True
|
||||
elif "NON" in upper:
|
||||
verified = False
|
||||
else:
|
||||
logger.warning(f"Critic VLM réponse non parsable : {content[:100]}")
|
||||
return None
|
||||
|
||||
logger.info(
|
||||
f"Critic VLM : {'OUI' if verified else 'NON'} en {elapsed_ms:.0f}ms — {detail[:80]}"
|
||||
)
|
||||
return {
|
||||
"verified": verified,
|
||||
"detail": detail,
|
||||
"elapsed_ms": elapsed_ms,
|
||||
}
|
||||
|
||||
except _requests.Timeout:
|
||||
logger.warning("Critic VLM timeout (30s)")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning(f"Critic VLM erreur : {e}")
|
||||
return None
|
||||
|
||||
def _merge_results(
|
||||
self,
|
||||
pixel: VerificationResult,
|
||||
semantic: Dict[str, Any],
|
||||
) -> VerificationResult:
|
||||
"""Fusionner les résultats pixel et sémantique.
|
||||
|
||||
Matrice de décision :
|
||||
- Pixel OK + Semantic OK → vérifié (confiance haute)
|
||||
- Pixel OK + Semantic NON → INATTENDU (l'écran a changé mais pas comme prévu)
|
||||
- Pixel NON + Semantic OK → vérifié quand même (le VLM voit le résultat)
|
||||
- Pixel NON + Semantic NON → échec (retry)
|
||||
"""
|
||||
sem_ok = semantic["verified"]
|
||||
pix_ok = pixel.changes_detected
|
||||
|
||||
if pix_ok and sem_ok:
|
||||
# Tout concorde — confiance maximale
|
||||
return VerificationResult(
|
||||
verified=True,
|
||||
confidence=min(0.95, pixel.confidence + 0.2),
|
||||
changes_detected=True,
|
||||
change_area_pct=pixel.change_area_pct,
|
||||
local_change_pct=pixel.local_change_pct,
|
||||
suggestion="continue",
|
||||
detail=f"Pixel OK + Critic OK : {semantic['detail']}",
|
||||
semantic_verified=True,
|
||||
semantic_detail=semantic["detail"],
|
||||
semantic_elapsed_ms=semantic["elapsed_ms"],
|
||||
)
|
||||
|
||||
elif pix_ok and not sem_ok:
|
||||
# L'écran a changé mais pas dans le bon sens → INATTENDU
|
||||
# C'est le cas le plus important : popup, erreur, mauvaise fenêtre
|
||||
return VerificationResult(
|
||||
verified=False,
|
||||
confidence=0.7,
|
||||
changes_detected=True,
|
||||
change_area_pct=pixel.change_area_pct,
|
||||
local_change_pct=pixel.local_change_pct,
|
||||
suggestion="retry",
|
||||
detail=f"Pixel OK mais Critic NON : {semantic['detail']}",
|
||||
semantic_verified=False,
|
||||
semantic_detail=semantic["detail"],
|
||||
semantic_elapsed_ms=semantic["elapsed_ms"],
|
||||
)
|
||||
|
||||
elif not pix_ok and sem_ok:
|
||||
# Peu de pixels ont changé mais le VLM dit que le résultat est bon
|
||||
# Ex: focus sur un onglet déjà visible (changement subtil)
|
||||
return VerificationResult(
|
||||
verified=True,
|
||||
confidence=0.6,
|
||||
changes_detected=False,
|
||||
change_area_pct=pixel.change_area_pct,
|
||||
local_change_pct=pixel.local_change_pct,
|
||||
suggestion="continue",
|
||||
detail=f"Pixel inchangé mais Critic OK : {semantic['detail']}",
|
||||
semantic_verified=True,
|
||||
semantic_detail=semantic["detail"],
|
||||
semantic_elapsed_ms=semantic["elapsed_ms"],
|
||||
)
|
||||
|
||||
else:
|
||||
# Rien n'a changé et le VLM confirme → échec
|
||||
return VerificationResult(
|
||||
verified=False,
|
||||
confidence=0.8,
|
||||
changes_detected=False,
|
||||
change_area_pct=pixel.change_area_pct,
|
||||
local_change_pct=pixel.local_change_pct,
|
||||
suggestion="retry",
|
||||
detail=f"Pixel inchangé + Critic NON : {semantic['detail']}",
|
||||
semantic_verified=False,
|
||||
semantic_detail=semantic["detail"],
|
||||
semantic_elapsed_ms=semantic["elapsed_ms"],
|
||||
)
|
||||
|
||||
329
agent_v0/server_v1/replay_watchdog.py
Normal file
329
agent_v0/server_v1/replay_watchdog.py
Normal file
@@ -0,0 +1,329 @@
|
||||
"""Replay orphan watchdog for in-flight replay actions.
|
||||
|
||||
This module watches `_retry_pending` and re-pushes actions that were
|
||||
dispatched by the server but never acknowledged by the Windows agent.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextlib
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _env_bool(name: str, default: str) -> bool:
|
||||
return os.environ.get(name, default).strip().lower() in {
|
||||
"1",
|
||||
"true",
|
||||
"yes",
|
||||
"on",
|
||||
}
|
||||
|
||||
|
||||
def _env_float(name: str, default: float) -> float:
|
||||
try:
|
||||
return float(os.environ.get(name, str(default)))
|
||||
except (TypeError, ValueError):
|
||||
logger.warning("Watchdog: invalid env %s, fallback=%s", name, default)
|
||||
return default
|
||||
|
||||
|
||||
def _env_int(name: str, default: int) -> int:
|
||||
try:
|
||||
return int(os.environ.get(name, str(default)))
|
||||
except (TypeError, ValueError):
|
||||
logger.warning("Watchdog: invalid env %s, fallback=%s", name, default)
|
||||
return default
|
||||
|
||||
|
||||
def _env_max_resends(default: int) -> int:
|
||||
raw = os.environ.get("RPA_WATCHDOG_MAX_RESENDS")
|
||||
if raw is None or not str(raw).strip():
|
||||
raw = os.environ.get("RPA_WATCHDOG_MAX_RETRIES")
|
||||
try:
|
||||
return int(raw) if raw is not None else default
|
||||
except (TypeError, ValueError):
|
||||
logger.warning("Watchdog: invalid max resend env, fallback=%s", default)
|
||||
return default
|
||||
|
||||
|
||||
WATCHDOG_ENABLED = _env_bool("RPA_WATCHDOG_ENABLED", "1")
|
||||
WATCHDOG_SCAN_INTERVAL_S = _env_float("RPA_WATCHDOG_SCAN_INTERVAL_S", 10.0)
|
||||
WATCHDOG_ORPHAN_TIMEOUT_S = _env_float("RPA_WATCHDOG_ORPHAN_TIMEOUT_S", 45.0)
|
||||
WATCHDOG_MAX_RESENDS = _env_max_resends(2)
|
||||
WATCHDOG_REPUSH_POSITION = (
|
||||
os.environ.get("RPA_WATCHDOG_REPUSH_POSITION", "head").strip().lower()
|
||||
)
|
||||
|
||||
|
||||
_metrics_lock = asyncio.Lock()
|
||||
_metrics: Dict[str, Any] = {
|
||||
"orphans_detected_total": 0,
|
||||
"orphans_resent_total": 0,
|
||||
"orphans_giveup_total": 0,
|
||||
"scans_total": 0,
|
||||
"scans_failed_total": 0,
|
||||
"last_scan_ts": 0.0,
|
||||
"last_scan_duration_ms": 0.0,
|
||||
"current_in_flight_count": 0,
|
||||
"current_orphan_count": 0,
|
||||
}
|
||||
|
||||
|
||||
async def _bump(key: str, delta: int = 1) -> None:
|
||||
async with _metrics_lock:
|
||||
_metrics[key] = _metrics.get(key, 0) + delta
|
||||
|
||||
|
||||
def get_metrics_snapshot() -> Dict[str, Any]:
|
||||
return dict(_metrics)
|
||||
|
||||
|
||||
SseNotifier = Callable[[str, str], None]
|
||||
|
||||
|
||||
class ReplayWatchdog:
|
||||
"""Background coroutine that re-pushes orphaned replay actions."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
retry_pending: Dict[str, Dict[str, Any]],
|
||||
replay_queues: Dict[str, List[Dict[str, Any]]],
|
||||
async_lock_factory: Callable[[], Any],
|
||||
sse_notifier: Optional[SseNotifier] = None,
|
||||
) -> None:
|
||||
self._retry_pending = retry_pending
|
||||
self._replay_queues = replay_queues
|
||||
self._async_lock = async_lock_factory
|
||||
self._sse_notifier = sse_notifier
|
||||
self._task: Optional[asyncio.Task] = None
|
||||
self._stopped = asyncio.Event()
|
||||
|
||||
async def start(self) -> None:
|
||||
if not WATCHDOG_ENABLED:
|
||||
logger.info("[WATCHDOG] disabled via RPA_WATCHDOG_ENABLED=0")
|
||||
return
|
||||
if self._task is not None and not self._task.done():
|
||||
logger.warning("[WATCHDOG] already started")
|
||||
return
|
||||
self._stopped.clear()
|
||||
self._task = asyncio.create_task(self._run(), name="replay_watchdog")
|
||||
logger.info(
|
||||
"[WATCHDOG] started scan=%.1fs orphan_timeout=%.1fs max_resends=%d repush=%s",
|
||||
WATCHDOG_SCAN_INTERVAL_S,
|
||||
WATCHDOG_ORPHAN_TIMEOUT_S,
|
||||
WATCHDOG_MAX_RESENDS,
|
||||
WATCHDOG_REPUSH_POSITION,
|
||||
)
|
||||
|
||||
async def stop(self, timeout_s: float = 5.0) -> None:
|
||||
if self._task is None:
|
||||
return
|
||||
self._stopped.set()
|
||||
self._task.cancel()
|
||||
try:
|
||||
await asyncio.wait_for(self._task, timeout=timeout_s)
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("[WATCHDOG] stop timeout after %.1fs", timeout_s)
|
||||
except Exception:
|
||||
logger.exception("[WATCHDOG] unexpected stop error")
|
||||
self._task = None
|
||||
logger.info("[WATCHDOG] stopped")
|
||||
|
||||
async def _run(self) -> None:
|
||||
try:
|
||||
while not self._stopped.is_set():
|
||||
try:
|
||||
await asyncio.wait_for(
|
||||
self._stopped.wait(),
|
||||
timeout=WATCHDOG_SCAN_INTERVAL_S,
|
||||
)
|
||||
break
|
||||
except asyncio.TimeoutError:
|
||||
pass
|
||||
|
||||
try:
|
||||
await self._scan_once()
|
||||
except Exception:
|
||||
await _bump("scans_failed_total")
|
||||
logger.exception("[WATCHDOG] scan failed")
|
||||
except asyncio.CancelledError:
|
||||
logger.info("[WATCHDOG] cancelled")
|
||||
raise
|
||||
finally:
|
||||
logger.info("[WATCHDOG] loop terminated")
|
||||
|
||||
async def _scan_once(self) -> Dict[str, int]:
|
||||
t0 = time.time()
|
||||
await _bump("scans_total")
|
||||
|
||||
resent = 0
|
||||
gaveup = 0
|
||||
skipped = 0
|
||||
in_flight = 0
|
||||
orphans = 0
|
||||
|
||||
orphan_targets: List[Tuple[str, Dict[str, Any]]] = []
|
||||
async with self._async_lock():
|
||||
for action_id, info in list(self._retry_pending.items()):
|
||||
dispatched_at = info.get("dispatched_at", 0.0) or 0.0
|
||||
if dispatched_at <= 0:
|
||||
skipped += 1
|
||||
continue
|
||||
age = t0 - dispatched_at
|
||||
in_flight += 1
|
||||
if age < WATCHDOG_ORPHAN_TIMEOUT_S:
|
||||
continue
|
||||
orphans += 1
|
||||
orphan_targets.append((action_id, dict(info)))
|
||||
|
||||
for action_id, info in orphan_targets:
|
||||
await _bump("orphans_detected_total")
|
||||
resent_count = int(info.get("resent_count", 0) or 0)
|
||||
|
||||
if resent_count >= WATCHDOG_MAX_RESENDS:
|
||||
async with self._async_lock():
|
||||
self._retry_pending.pop(action_id, None)
|
||||
age_total = t0 - float(info.get("first_dispatched_at", t0) or t0)
|
||||
logger.error(
|
||||
"[BUS] lea:dispatch_orphan_giveup action_id=%s resent=%d age_total=%.1fs "
|
||||
"session=%s machine=%s replay=%s",
|
||||
action_id,
|
||||
resent_count,
|
||||
age_total,
|
||||
info.get("session_id", "?"),
|
||||
info.get("machine_id", "?"),
|
||||
info.get("replay_id", "?"),
|
||||
)
|
||||
gaveup += 1
|
||||
await _bump("orphans_giveup_total")
|
||||
continue
|
||||
|
||||
session_id = info.get("session_id")
|
||||
machine_id = info.get("machine_id", "default")
|
||||
action = info.get("dispatched_action") or info.get("action")
|
||||
if not session_id or not isinstance(action, dict):
|
||||
logger.warning(
|
||||
"[WATCHDOG] invalid schema for %s session_id=%r action_type=%s",
|
||||
action_id,
|
||||
session_id,
|
||||
type(action).__name__,
|
||||
)
|
||||
async with self._async_lock():
|
||||
self._retry_pending.pop(action_id, None)
|
||||
continue
|
||||
|
||||
async with self._async_lock():
|
||||
existing = self._retry_pending.get(action_id)
|
||||
if existing is None:
|
||||
logger.debug(
|
||||
"[WATCHDOG] %s acked between snapshot and resend; skip",
|
||||
action_id,
|
||||
)
|
||||
continue
|
||||
queue = self._replay_queues.setdefault(session_id, [])
|
||||
if WATCHDOG_REPUSH_POSITION == "tail":
|
||||
queue.append(dict(action))
|
||||
else:
|
||||
queue.insert(0, dict(action))
|
||||
existing["resent_count"] = resent_count + 1
|
||||
existing["last_resent_at"] = time.time()
|
||||
existing["dispatched_at"] = 0.0
|
||||
|
||||
age_total = t0 - float(info.get("first_dispatched_at", t0) or t0)
|
||||
logger.warning(
|
||||
"[BUS] lea:dispatch_orphan_resent action_id=%s resent=%d/%d age=%.1fs "
|
||||
"session=%s machine=%s replay=%s",
|
||||
action_id,
|
||||
resent_count + 1,
|
||||
WATCHDOG_MAX_RESENDS,
|
||||
age_total,
|
||||
session_id,
|
||||
machine_id,
|
||||
info.get("replay_id", "?"),
|
||||
)
|
||||
resent += 1
|
||||
await _bump("orphans_resent_total")
|
||||
|
||||
if self._sse_notifier is not None:
|
||||
try:
|
||||
self._sse_notifier(session_id, machine_id)
|
||||
except Exception as exc:
|
||||
logger.debug("[WATCHDOG] sse notifier failed: %s", exc)
|
||||
|
||||
elapsed_ms = (time.time() - t0) * 1000.0
|
||||
async with _metrics_lock:
|
||||
_metrics["last_scan_ts"] = t0
|
||||
_metrics["last_scan_duration_ms"] = elapsed_ms
|
||||
_metrics["current_in_flight_count"] = in_flight
|
||||
_metrics["current_orphan_count"] = orphans
|
||||
scans_total = _metrics["scans_total"]
|
||||
|
||||
if orphans or gaveup:
|
||||
logger.info(
|
||||
"[METRIC] watchdog scan=%d orphans=%d resent=%d gaveup=%d "
|
||||
"in_flight=%d skipped=%d elapsed_ms=%.1f",
|
||||
scans_total,
|
||||
orphans,
|
||||
resent,
|
||||
gaveup,
|
||||
in_flight,
|
||||
skipped,
|
||||
elapsed_ms,
|
||||
)
|
||||
|
||||
return {
|
||||
"orphans": orphans,
|
||||
"resent": resent,
|
||||
"gaveup": gaveup,
|
||||
"skipped": skipped,
|
||||
"in_flight": in_flight,
|
||||
}
|
||||
|
||||
|
||||
_singleton: Optional[ReplayWatchdog] = None
|
||||
|
||||
|
||||
def get_or_create_watchdog(
|
||||
retry_pending: Dict[str, Dict[str, Any]],
|
||||
replay_queues: Dict[str, List[Dict[str, Any]]],
|
||||
async_lock_factory: Callable[[], Any],
|
||||
sse_notifier: Optional[SseNotifier] = None,
|
||||
) -> ReplayWatchdog:
|
||||
global _singleton
|
||||
if _singleton is None:
|
||||
_singleton = ReplayWatchdog(
|
||||
retry_pending=retry_pending,
|
||||
replay_queues=replay_queues,
|
||||
async_lock_factory=async_lock_factory,
|
||||
sse_notifier=sse_notifier,
|
||||
)
|
||||
return _singleton
|
||||
|
||||
|
||||
@contextlib.asynccontextmanager
|
||||
async def watchdog_lifespan(
|
||||
retry_pending: Dict[str, Dict[str, Any]],
|
||||
replay_queues: Dict[str, List[Dict[str, Any]]],
|
||||
async_lock_factory: Callable[[], Any],
|
||||
sse_notifier: Optional[SseNotifier] = None,
|
||||
):
|
||||
watchdog = get_or_create_watchdog(
|
||||
retry_pending=retry_pending,
|
||||
replay_queues=replay_queues,
|
||||
async_lock_factory=async_lock_factory,
|
||||
sse_notifier=sse_notifier,
|
||||
)
|
||||
await watchdog.start()
|
||||
try:
|
||||
yield watchdog
|
||||
finally:
|
||||
await watchdog.stop()
|
||||
3172
agent_v0/server_v1/resolve_engine.py
Normal file
3172
agent_v0/server_v1/resolve_engine.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -25,6 +25,7 @@ Le worker :
|
||||
5. Se suspend quand un replay est actif (libère le GPU)
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import signal
|
||||
@@ -67,6 +68,7 @@ class VLMWorker:
|
||||
self._running = False
|
||||
self._processor = None # Initialisé au premier besoin (lazy loading GPU)
|
||||
self._current_session: Optional[str] = None
|
||||
self._started_at: str = datetime.now().isoformat()
|
||||
|
||||
# Stats
|
||||
self._stats: Dict[str, int] = {
|
||||
@@ -83,7 +85,10 @@ class VLMWorker:
|
||||
if self._processor is None:
|
||||
logger.info("Initialisation du StreamProcessor (chargement GPU)...")
|
||||
from .stream_processor import StreamProcessor
|
||||
self._processor = StreamProcessor(data_dir=str(LIVE_SESSIONS_DIR))
|
||||
self._processor = StreamProcessor(
|
||||
data_dir=str(DATA_DIR),
|
||||
enable_vlm=True,
|
||||
)
|
||||
logger.info("StreamProcessor initialisé.")
|
||||
return self._processor
|
||||
|
||||
@@ -98,6 +103,11 @@ class VLMWorker:
|
||||
logger.info(" Sessions dir : %s", LIVE_SESSIONS_DIR)
|
||||
logger.info(" Poll interval : %ds", POLL_INTERVAL)
|
||||
|
||||
# N2 + N3 : santé initiale + signal READY systemd dès le démarrage
|
||||
# (avant tout chargement GPU, pour ne pas dépasser le timeout de start).
|
||||
self._write_health("healthy")
|
||||
self._sd_notify("READY=1")
|
||||
|
||||
while self._running:
|
||||
try:
|
||||
# Vérifier si un replay est actif
|
||||
@@ -110,6 +120,7 @@ class VLMWorker:
|
||||
if session_id:
|
||||
self._process_session(session_id)
|
||||
else:
|
||||
self._write_health("healthy") # N2 : cycle idle
|
||||
time.sleep(POLL_INTERVAL)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
@@ -119,6 +130,7 @@ class VLMWorker:
|
||||
logger.error("Erreur dans la boucle principale : %s", e, exc_info=True)
|
||||
time.sleep(5) # Éviter une boucle d'erreurs rapide
|
||||
|
||||
self._write_health("stopped") # N2 : santé finale
|
||||
logger.info("VLM Worker arrêté.")
|
||||
|
||||
def stop(self):
|
||||
@@ -126,6 +138,103 @@ class VLMWorker:
|
||||
self._running = False
|
||||
logger.info("Arrêt demandé.")
|
||||
|
||||
# =========================================================================
|
||||
# N2 — Health file (_worker_health.json)
|
||||
# =========================================================================
|
||||
#
|
||||
# Garde-fou anti-blocage silencieux : expose l'état de santé du worker sur
|
||||
# disque pour qu'un superviseur (humain, dashboard, watchdog) détecte un
|
||||
# worker dégradé sans avoir à fouiller les logs. Écriture atomique.
|
||||
#
|
||||
# CONFIDENTIALITÉ (HDS) : n'écrit AUCUNE donnée patient — uniquement des
|
||||
# identifiants techniques (session_id), des compteurs et des booléens de
|
||||
# composants. Jamais d'OCR, de noms de fichiers screenshots, ni de contenu
|
||||
# de session.
|
||||
|
||||
def _sd_notify(self, state: str) -> bool:
|
||||
"""Notifie systemd via $NOTIFY_SOCKET, sans dépendance `systemd.daemon`.
|
||||
|
||||
Implémentation pure socket (AF_UNIX SOCK_DGRAM) : fonctionne sous systemd
|
||||
`Type=notify` pour `READY=1` et le heartbeat `WATCHDOG=1`. No-op silencieux
|
||||
hors systemd (variable absente) ou en cas d'erreur — jamais bloquant.
|
||||
Retourne True si le message a été émis.
|
||||
"""
|
||||
addr = os.environ.get("NOTIFY_SOCKET")
|
||||
if not addr:
|
||||
return False
|
||||
try:
|
||||
import socket
|
||||
|
||||
# Namespace abstrait systemd : '@' → octet nul de préfixe
|
||||
connect_addr = "\0" + addr[1:] if addr.startswith("@") else addr
|
||||
with socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM) as sock:
|
||||
sock.connect(connect_addr)
|
||||
sock.sendall(state.encode("utf-8"))
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.debug("sd_notify(%s) échoué : %s", state, e)
|
||||
return False
|
||||
|
||||
def _health_components(self) -> Dict[str, bool]:
|
||||
"""Statut booléen de chaque composant lourd, dérivé du processor."""
|
||||
proc = self._processor
|
||||
return {
|
||||
"screen_analyzer": proc is not None and getattr(proc, "_screen_analyzer", None) is not None,
|
||||
"clip_embedder": proc is not None and getattr(proc, "_clip_embedder", None) is not None,
|
||||
"faiss_manager": proc is not None and getattr(proc, "_faiss_manager", None) is not None,
|
||||
"state_embedding_builder": proc is not None and getattr(proc, "_state_embedding_builder", None) is not None,
|
||||
}
|
||||
|
||||
def _write_health(self, status: str) -> None:
|
||||
"""Écrit data/training/_worker_health.json de façon atomique.
|
||||
|
||||
`status` attendu : healthy | busy | degraded | stopped. Si le worker
|
||||
tourne en mode VLM mais que ScreenAnalyzer est absent, le statut est
|
||||
forcé à 'degraded' quelle que soit la valeur demandée.
|
||||
"""
|
||||
try:
|
||||
components = self._health_components()
|
||||
|
||||
proc = self._processor
|
||||
vlm_mode = proc is not None and getattr(proc, "_enable_vlm", False)
|
||||
if vlm_mode and not components["screen_analyzer"]:
|
||||
status = "degraded"
|
||||
|
||||
queue_path = DATA_DIR / "_worker_queue.txt"
|
||||
try:
|
||||
queue_length = len(
|
||||
[ln for ln in queue_path.read_text(encoding="utf-8").splitlines() if ln.strip()]
|
||||
) if queue_path.exists() else 0
|
||||
except Exception:
|
||||
queue_length = 0
|
||||
|
||||
payload = {
|
||||
"pid": os.getpid(),
|
||||
"started_at": self._started_at,
|
||||
"last_cycle": datetime.now().isoformat(),
|
||||
"current_session": self._current_session,
|
||||
"queue_length": queue_length,
|
||||
"components": components,
|
||||
"stats": dict(self._stats),
|
||||
"status": status,
|
||||
}
|
||||
|
||||
health_path = DATA_DIR / "_worker_health.json"
|
||||
tmp_path = health_path.with_suffix(".json.tmp")
|
||||
tmp_path.write_text(
|
||||
json.dumps(payload, ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
tmp_path.rename(health_path)
|
||||
except Exception as e:
|
||||
# Le health file est un garde-fou, jamais un point de défaillance.
|
||||
logger.warning("Écriture health file échouée : %s", e)
|
||||
|
||||
# N3 : chaque écriture santé sert aussi de heartbeat watchdog systemd
|
||||
# (sauf à l'arrêt). No-op hors systemd.
|
||||
if status != "stopped":
|
||||
self._sd_notify("WATCHDOG=1")
|
||||
|
||||
# =========================================================================
|
||||
# Queue management (fichier _worker_queue.txt)
|
||||
# =========================================================================
|
||||
@@ -206,6 +315,9 @@ class VLMWorker:
|
||||
REPLAY_WAIT_TIMEOUT,
|
||||
)
|
||||
break
|
||||
# N3 : heartbeat pendant la pause replay (peut durer jusqu'à 120s,
|
||||
# sinon le watchdog tuerait un worker pourtant sain et en attente).
|
||||
self._sd_notify("WATCHDOG=1")
|
||||
time.sleep(REPLAY_CHECK_INTERVAL)
|
||||
|
||||
elapsed = time.time() - start
|
||||
@@ -220,6 +332,7 @@ class VLMWorker:
|
||||
"""Traite une session complète (analyse VLM + construction workflow)."""
|
||||
self._current_session = session_id
|
||||
logger.info("=== Début traitement session %s ===", session_id)
|
||||
self._write_health("busy") # N2 : début de session
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
@@ -331,6 +444,7 @@ class VLMWorker:
|
||||
|
||||
finally:
|
||||
self._current_session = None
|
||||
self._write_health("healthy") # N2 : fin de session (ou degraded auto)
|
||||
|
||||
logger.info("=== Fin traitement session %s ===", session_id)
|
||||
|
||||
@@ -347,6 +461,8 @@ class VLMWorker:
|
||||
f" ({shot_id})" if shot_id else "",
|
||||
)
|
||||
|
||||
self._write_health("busy") # N2 : heartbeat à chaque screenshot
|
||||
|
||||
# Vérifier si un replay est devenu actif pendant le traitement
|
||||
if self._is_replay_active():
|
||||
logger.info(
|
||||
|
||||
273
agent_v0/server_v1/safety_checks_provider.py
Normal file
273
agent_v0/server_v1/safety_checks_provider.py
Normal file
@@ -0,0 +1,273 @@
|
||||
# agent_v0/server_v1/safety_checks_provider.py
|
||||
"""SafetyChecksProvider — checks hybrides déclaratifs + LLM contextuels (QW4).
|
||||
|
||||
Pour une action pause_for_human :
|
||||
- les checks déclaratifs (workflow) sont toujours inclus
|
||||
- si safety_level == "medical_critical" et RPA_SAFETY_CHECKS_LLM_ENABLED=1,
|
||||
un appel LLM (medgemma:4b par défaut) ajoute jusqu'à N checks contextuels
|
||||
|
||||
Tout échec côté LLM (timeout, exception, parse) → additional_checks=[] :
|
||||
le replay continue avec uniquement les déclaratifs (fallback safe).
|
||||
"""
|
||||
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from core.detection import vlm_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
from agent_v0.agent_v1.ui.message_contract import (
|
||||
coerce_supervised_pause_message,
|
||||
warn_visible_message,
|
||||
)
|
||||
except Exception: # pragma: no cover - fallback for partial server deployments
|
||||
coerce_supervised_pause_message = None
|
||||
warn_visible_message = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class PausePayload:
|
||||
checks: List[Dict[str, Any]] = field(default_factory=list)
|
||||
pause_reason: str = ""
|
||||
message: str = ""
|
||||
|
||||
|
||||
def _env(name: str, default: str) -> str:
|
||||
return os.environ.get(name, default).strip()
|
||||
|
||||
|
||||
def _env_int(name: str, default: int) -> int:
|
||||
try:
|
||||
return int(os.environ.get(name, default))
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
def _env_bool_enabled(name: str) -> bool:
|
||||
val = os.environ.get(name, "1").strip().lower()
|
||||
return val not in ("0", "false", "no", "off", "")
|
||||
|
||||
|
||||
def build_pause_payload(
|
||||
action: Dict[str, Any],
|
||||
replay_state: Dict[str, Any],
|
||||
last_screenshot: Optional[str],
|
||||
) -> PausePayload:
|
||||
"""Construit le payload de pause enrichi pour une action pause_for_human."""
|
||||
params = dict(action.get("parameters") or {})
|
||||
for key in ("message", "safety_level", "safety_checks", "pause_reason"):
|
||||
if key not in params or params.get(key) in (None, "", []):
|
||||
if action.get(key) not in (None, "", []):
|
||||
params[key] = action.get(key)
|
||||
|
||||
raw_message = (
|
||||
params.get("message")
|
||||
or action.get("message")
|
||||
or action.get("intention")
|
||||
or ""
|
||||
)
|
||||
message = _coerce_pause_message(
|
||||
raw_message,
|
||||
intention=params.get("intention") or action.get("intention") or action.get("description"),
|
||||
attendu=params.get("attendu") or params.get("expected") or action.get("expected"),
|
||||
vu=params.get("vu") or params.get("observed") or action.get("observed"),
|
||||
demande=params.get("demande") or params.get("request"),
|
||||
)
|
||||
safety_level = params.get("safety_level")
|
||||
declarative = params.get("safety_checks") or []
|
||||
|
||||
# Normalisation des checks déclaratifs
|
||||
checks: List[Dict[str, Any]] = []
|
||||
for d in declarative:
|
||||
checks.append({
|
||||
"id": d.get("id") or f"decl_{uuid.uuid4().hex[:6]}",
|
||||
"label": d.get("label", "Validation"),
|
||||
"required": bool(d.get("required", True)),
|
||||
"source": "declarative",
|
||||
"evidence": None,
|
||||
})
|
||||
|
||||
# Ajout LLM contextual si applicable
|
||||
if safety_level == "medical_critical" and _env_bool_enabled("RPA_SAFETY_CHECKS_LLM_ENABLED"):
|
||||
try:
|
||||
additional = _call_llm_for_contextual_checks(
|
||||
action=action,
|
||||
replay_state=replay_state,
|
||||
last_screenshot=last_screenshot,
|
||||
existing_labels=[c["label"] for c in checks],
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("[BUS] lea:safety_checks_llm_failed reason=exception detail=%s", e)
|
||||
additional = []
|
||||
|
||||
for a in additional:
|
||||
checks.append({
|
||||
"id": f"llm_{uuid.uuid4().hex[:6]}",
|
||||
"label": a.get("label", ""),
|
||||
"required": False, # checks LLM = informationnels, pas obligatoires V1
|
||||
"source": "llm_contextual",
|
||||
"evidence": a.get("evidence", ""),
|
||||
})
|
||||
|
||||
return PausePayload(
|
||||
checks=checks,
|
||||
pause_reason=params.get("pause_reason", ""),
|
||||
message=message,
|
||||
)
|
||||
|
||||
|
||||
def _coerce_pause_message(
|
||||
message: Any = "",
|
||||
*,
|
||||
intention: Any = "",
|
||||
attendu: Any = "",
|
||||
vu: Any = "",
|
||||
demande: Any = "",
|
||||
) -> str:
|
||||
if warn_visible_message is not None:
|
||||
warn_visible_message(
|
||||
message,
|
||||
source="safety_checks_provider._coerce_pause_message.raw",
|
||||
supervised_pause=False,
|
||||
)
|
||||
|
||||
if coerce_supervised_pause_message is not None:
|
||||
result = coerce_supervised_pause_message(
|
||||
message,
|
||||
intention=intention,
|
||||
attendu=attendu,
|
||||
vu=vu,
|
||||
demande=demande,
|
||||
)
|
||||
if warn_visible_message is not None:
|
||||
warn_visible_message(
|
||||
result,
|
||||
source="safety_checks_provider._coerce_pause_message.final",
|
||||
supervised_pause=True,
|
||||
)
|
||||
return result
|
||||
|
||||
fallback_request = "indiquer si je peux continuer ou corriger l'action attendue"
|
||||
result = "\n".join(
|
||||
(
|
||||
f"J'essaie de : {intention or 'continuer une etape supervisee'}",
|
||||
f"J'attendais : {attendu or 'un accord humain clair avant de continuer'}",
|
||||
f"Je vois : {vu or 'je suis sur une etape qui demande une verification humaine'}",
|
||||
f"Peux-tu : {demande or message or fallback_request}",
|
||||
)
|
||||
)
|
||||
if warn_visible_message is not None:
|
||||
warn_visible_message(
|
||||
result,
|
||||
source="safety_checks_provider._coerce_pause_message.final_fallback",
|
||||
supervised_pause=True,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def _call_llm_for_contextual_checks(
|
||||
action: Dict[str, Any],
|
||||
replay_state: Dict[str, Any],
|
||||
last_screenshot: Optional[str],
|
||||
existing_labels: List[str],
|
||||
) -> List[Dict[str, str]]:
|
||||
"""Appelle Ollama en mode JSON strict pour générer 0-N checks contextuels.
|
||||
|
||||
Returns:
|
||||
List[{label, evidence}] (max RPA_SAFETY_CHECKS_LLM_MAX_CHECKS).
|
||||
[] sur tout échec (timeout, JSON invalide, exception).
|
||||
"""
|
||||
import requests
|
||||
|
||||
# Modèle : override explicite RPA_SAFETY_CHECKS_LLM_MODEL prioritaire ; sinon
|
||||
# résolution centralisée vlm_config (gemma4:latest si dispo — meilleur bench
|
||||
# 2026-05-06 cf. docs/BENCH_SAFETY_CHECKS_2026-05-06.md — sinon fallback DGX).
|
||||
# Pas de fallback silencieux vers un modèle absent : get_vlm_model vérifie /api/tags.
|
||||
model = _env("RPA_SAFETY_CHECKS_LLM_MODEL", "") or vlm_config.get_vlm_model()
|
||||
# Timeout 7s : warm avg gemma4 = 2.9s + marge 4s. Cold start ~10s couvert
|
||||
# si le modèle reste résident (OLLAMA_KEEP_ALIVE=24h recommandé prod).
|
||||
timeout_s = _env_int("RPA_SAFETY_CHECKS_LLM_TIMEOUT_S", 7)
|
||||
max_checks = _env_int("RPA_SAFETY_CHECKS_LLM_MAX_CHECKS", 3)
|
||||
ollama_url = _env("OLLAMA_URL", "http://localhost:11434")
|
||||
|
||||
params = action.get("parameters") or {}
|
||||
workflow_message = params.get("message", "")
|
||||
existing = ", ".join(existing_labels) if existing_labels else "aucun"
|
||||
|
||||
prompt = f"""Tu es Léa, assistante médicale supervisée.
|
||||
Avant de continuer le workflow, tu dois lister 0 à {max_checks} vérifications supplémentaires
|
||||
que l'humain doit acquitter, en regardant l'écran actuel.
|
||||
|
||||
Contexte workflow : {workflow_message}
|
||||
Checks déjà demandés : {existing}
|
||||
|
||||
NE répète PAS un check déjà demandé.
|
||||
Si rien d'inhabituel à signaler, retourne {{"additional_checks": []}}.
|
||||
|
||||
Réponds UNIQUEMENT en JSON :
|
||||
{{
|
||||
"additional_checks": [
|
||||
{{"label": "string court", "evidence": "ce que tu as vu d'inhabituel"}}
|
||||
]
|
||||
}}
|
||||
"""
|
||||
|
||||
payload = {
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"format": "json",
|
||||
"options": {"temperature": 0.1, "num_predict": 200},
|
||||
}
|
||||
|
||||
if last_screenshot and os.path.isfile(last_screenshot):
|
||||
try:
|
||||
with open(last_screenshot, "rb") as f:
|
||||
payload["images"] = [base64.b64encode(f.read()).decode("ascii")]
|
||||
except Exception as e:
|
||||
logger.debug("safety_checks: lecture screenshot échouée (%s) — appel sans image", e)
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{ollama_url}/api/generate",
|
||||
json=payload,
|
||||
timeout=timeout_s,
|
||||
)
|
||||
if response.status_code != 200:
|
||||
logger.warning("[BUS] lea:safety_checks_llm_failed reason=http_status detail=%s", response.status_code)
|
||||
return []
|
||||
text = response.json().get("response", "").strip()
|
||||
except requests.Timeout:
|
||||
logger.warning("[BUS] lea:safety_checks_llm_failed reason=timeout detail=%ss", timeout_s)
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.warning("[BUS] lea:safety_checks_llm_failed reason=network detail=%s", e)
|
||||
return []
|
||||
|
||||
# format=json garantit normalement du JSON valide
|
||||
try:
|
||||
parsed = json.loads(text)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning("[BUS] lea:safety_checks_llm_failed reason=json_decode detail=%s", e)
|
||||
return []
|
||||
|
||||
additional = parsed.get("additional_checks") or []
|
||||
if not isinstance(additional, list):
|
||||
return []
|
||||
|
||||
# Filtre + tronc
|
||||
valid = []
|
||||
for item in additional[:max_checks]:
|
||||
if isinstance(item, dict) and item.get("label"):
|
||||
valid.append({
|
||||
"label": str(item["label"])[:200],
|
||||
"evidence": str(item.get("evidence", ""))[:300],
|
||||
})
|
||||
return valid
|
||||
File diff suppressed because it is too large
Load Diff
601
agent_v0/server_v1/task_planner.py
Normal file
601
agent_v0/server_v1/task_planner.py
Normal file
@@ -0,0 +1,601 @@
|
||||
# agent_v0/server_v1/task_planner.py
|
||||
"""
|
||||
TaskPlanner — Planificateur MACRO pour RPA Vision V3.
|
||||
|
||||
Responsabilité : comprendre un ordre en langage naturel et l'exécuter.
|
||||
|
||||
"Traite les dossiers de janvier" →
|
||||
1. Comprendre l'instruction (gemma4)
|
||||
2. Trouver le workflow appris correspondant
|
||||
3. Identifier les paramètres/variables
|
||||
4. Exécuter (replay avec substitution) ou planifier (actions libres)
|
||||
|
||||
C'est le niveau MACRO de l'architecture 3 niveaux :
|
||||
MACRO (TaskPlanner) → décompose et orchestre
|
||||
MÉSO (Policy/Observer/Critic) → décide et vérifie
|
||||
MICRO (Grounding/Executor) → localise et clique
|
||||
|
||||
Ref: docs/PLAN_ACTEUR_V1.md — Phase 3 : Planificateur
|
||||
Ref: docs/VISION_RPA_INTELLIGENT.md — "Il observe" → "Il devient autonome"
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from core.detection import vlm_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class TaskPlan:
|
||||
"""Plan d'exécution généré par le planificateur."""
|
||||
instruction: str # Instruction originale de l'utilisateur
|
||||
understood: bool = False # L'instruction a été comprise
|
||||
workflow_match: str = "" # ID du workflow correspondant (si trouvé)
|
||||
workflow_name: str = "" # Nom du workflow correspondant
|
||||
match_confidence: float = 0.0 # Confiance du match (0-1)
|
||||
parameters: Dict[str, Any] = field(default_factory=dict) # Variables extraites
|
||||
is_loop: bool = False # Boucle sur une liste d'éléments
|
||||
loop_source: str = "" # Source des éléments (écran, fichier, requête)
|
||||
steps: List[Dict[str, Any]] = field(default_factory=list) # Actions planifiées
|
||||
mode: str = "" # "replay" (workflow connu) ou "free" (actions générées)
|
||||
error: str = ""
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"instruction": self.instruction,
|
||||
"understood": self.understood,
|
||||
"workflow_match": self.workflow_match,
|
||||
"workflow_name": self.workflow_name,
|
||||
"match_confidence": round(self.match_confidence, 3),
|
||||
"parameters": self.parameters,
|
||||
"is_loop": self.is_loop,
|
||||
"loop_source": self.loop_source,
|
||||
"steps_count": len(self.steps),
|
||||
"mode": self.mode,
|
||||
"error": self.error,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class TaskResult:
|
||||
"""Résultat de l'exécution d'une tâche."""
|
||||
instruction: str
|
||||
success: bool
|
||||
total_items: int = 1 # Nombre d'éléments traités (1 si pas de boucle)
|
||||
completed_items: int = 0
|
||||
failed_items: int = 0
|
||||
results: List[Dict[str, Any]] = field(default_factory=list)
|
||||
elapsed_s: float = 0.0
|
||||
summary: str = ""
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"instruction": self.instruction,
|
||||
"success": self.success,
|
||||
"total_items": self.total_items,
|
||||
"completed_items": self.completed_items,
|
||||
"failed_items": self.failed_items,
|
||||
"elapsed_s": round(self.elapsed_s, 1),
|
||||
"summary": self.summary,
|
||||
}
|
||||
|
||||
|
||||
class TaskPlanner:
|
||||
"""Planificateur MACRO — comprend les instructions et orchestre l'exécution.
|
||||
|
||||
Usage :
|
||||
planner = TaskPlanner()
|
||||
plan = planner.understand("traite les dossiers de janvier")
|
||||
result = planner.execute(plan, replay_callback=launch_replay)
|
||||
"""
|
||||
|
||||
def __init__(self, gemma4_port: str = "", domain_id: str = ""):
|
||||
# Endpoint VLM : piloté par config (Ollama local ou tunnel DGX = 11434).
|
||||
# GEMMA4_PORT conservé comme override legacy (ancien conteneur Docker 11435).
|
||||
_default_port = vlm_config.DEFAULT_OLLAMA_ENDPOINT.rsplit(":", 1)[-1]
|
||||
self._gemma4_port = gemma4_port or os.environ.get("GEMMA4_PORT", _default_port)
|
||||
self._gemma4_url = f"http://localhost:{self._gemma4_port}/api/chat"
|
||||
self._domain_id = domain_id or os.environ.get("RPA_DOMAIN", "generic")
|
||||
|
||||
# Charger le contexte métier
|
||||
try:
|
||||
from .domain_context import get_domain_context
|
||||
self._domain = get_domain_context(self._domain_id)
|
||||
except Exception:
|
||||
self._domain = None
|
||||
|
||||
def understand(
|
||||
self,
|
||||
instruction: str,
|
||||
available_workflows: Optional[List[Dict[str, Any]]] = None,
|
||||
screen_context: str = "",
|
||||
) -> TaskPlan:
|
||||
"""Comprendre une instruction en langage naturel.
|
||||
|
||||
Étape 1 : gemma4 analyse l'instruction et identifie :
|
||||
- Le type de tâche (ouvrir, traiter, rechercher, etc.)
|
||||
- Le workflow correspondant (s'il en existe un)
|
||||
- Les paramètres/variables (nom, date, fichier, etc.)
|
||||
- Si c'est une boucle (traiter TOUS les dossiers)
|
||||
|
||||
Args:
|
||||
instruction: L'ordre de l'utilisateur ("traite les dossiers de janvier")
|
||||
available_workflows: Liste des workflows connus [{name, description, session_id}]
|
||||
screen_context: Description de l'écran actuel (pour le contexte)
|
||||
"""
|
||||
import requests as _requests
|
||||
|
||||
plan = TaskPlan(instruction=instruction)
|
||||
|
||||
# Construire la liste des workflows disponibles pour le prompt (top 10)
|
||||
workflows_desc = "Aucun workflow enregistré."
|
||||
if available_workflows:
|
||||
top_workflows = available_workflows[:10]
|
||||
lines = []
|
||||
for i, wf in enumerate(top_workflows):
|
||||
name = wf.get("name", wf.get("session_id", f"workflow_{i}"))
|
||||
desc = wf.get("description", "")
|
||||
sid = wf.get("session_id", "")
|
||||
# Montrer la description métier pour aider le matching sémantique
|
||||
label = f"{name}"
|
||||
if desc:
|
||||
label += f" — {desc}"
|
||||
lines.append(f" {i+1}. {label} (id={sid})")
|
||||
workflows_desc = "\n".join(lines)
|
||||
|
||||
# Contexte métier
|
||||
domain_prompt = ""
|
||||
if self._domain and self._domain.system_prompt:
|
||||
domain_prompt = f"\nCONTEXTE MÉTIER :\n{self._domain.system_prompt}\n"
|
||||
|
||||
prompt = (
|
||||
f"Tu es le PLANIFICATEUR d'un robot RPA (Léa). "
|
||||
f"Analyse l'ordre utilisateur et identifie le workflow correspondant.\n"
|
||||
f"{domain_prompt}\n"
|
||||
f"WORKFLOWS DISPONIBLES :\n{workflows_desc}\n\n"
|
||||
f"ORDRE : \"{instruction}\"\n\n"
|
||||
f"RÈGLE DE MATCHING :\n"
|
||||
f"- Compare l'INTENTION de l'ordre avec la DESCRIPTION de chaque workflow\n"
|
||||
f"- \"Ouvre le bloc-notes\" correspond à un workflow décrit \"Ouvrir Bloc-notes via recherche\"\n"
|
||||
f"- Un workflow qui utilise la même application EST un match même si les mots diffèrent\n"
|
||||
f"- Si aucun workflow ne correspond, réponds WORKFLOW: AUCUN\n\n"
|
||||
f"Réponds EXACTEMENT dans ce format (une ligne par champ) :\n"
|
||||
f"COMPRIS: OUI\n"
|
||||
f"WORKFLOW: <numéro> (ou AUCUN)\n"
|
||||
f"CONFIANCE: <0.0 à 1.0>\n"
|
||||
f"PARAMETRES: clé1=valeur1, clé2=valeur2 (ou AUCUN)\n"
|
||||
f"BOUCLE: OUI ou NON\n"
|
||||
f"SOURCE_BOUCLE: écran, fichier, ou aucun\n"
|
||||
f"PLAN:\n"
|
||||
f"1. première étape\n"
|
||||
f"2. deuxième étape\n"
|
||||
)
|
||||
|
||||
try:
|
||||
resp = _requests.post(
|
||||
self._gemma4_url,
|
||||
json={
|
||||
"model": vlm_config.get_vlm_model(),
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"stream": False,
|
||||
"think": True,
|
||||
"options": {"temperature": 0.2, "num_predict": 800},
|
||||
},
|
||||
timeout=120,
|
||||
)
|
||||
|
||||
if not resp.ok:
|
||||
plan.error = f"gemma4 HTTP {resp.status_code}"
|
||||
return plan
|
||||
|
||||
content = resp.json().get("message", {}).get("content", "").strip()
|
||||
logger.info(f"TaskPlanner: réponse gemma4 ({len(content)} chars)")
|
||||
|
||||
# Parser la réponse
|
||||
plan = self._parse_understanding(plan, content, available_workflows)
|
||||
|
||||
except Exception as e:
|
||||
plan.error = f"gemma4 erreur: {e}"
|
||||
logger.warning(f"TaskPlanner: {plan.error}")
|
||||
|
||||
return plan
|
||||
|
||||
def _parse_understanding(
|
||||
self,
|
||||
plan: TaskPlan,
|
||||
content: str,
|
||||
available_workflows: Optional[List[Dict]] = None,
|
||||
) -> TaskPlan:
|
||||
"""Parser la réponse de gemma4 pour construire le plan.
|
||||
|
||||
Tolérant aux variations de format :
|
||||
- "COMPRIS : OUI" ou "COMPRIS: oui" ou "**COMPRIS:** OUI"
|
||||
- Numéros de workflow : "1", "1.", "#1", "Workflow 1"
|
||||
- Paramètres : "clé=valeur" ou "clé: valeur" sur la même ligne ou les suivantes
|
||||
"""
|
||||
import re
|
||||
|
||||
# Nettoyer le markdown (gras, italique)
|
||||
content_clean = re.sub(r'\*{1,2}([^*]+)\*{1,2}', r'\1', content)
|
||||
|
||||
in_params_section = False
|
||||
in_plan_section = False
|
||||
|
||||
for line in content_clean.split("\n"):
|
||||
line_clean = line.strip()
|
||||
if not line_clean:
|
||||
continue
|
||||
upper = line_clean.upper()
|
||||
|
||||
# --- COMPRIS ---
|
||||
if re.match(r'^COMPRIS\s*[:=]', upper):
|
||||
val = re.split(r'[:=]', upper, 1)[1].strip()
|
||||
plan.understood = "OUI" in val or "YES" in val or "TRUE" in val
|
||||
in_params_section = False
|
||||
in_plan_section = False
|
||||
|
||||
# --- WORKFLOW ---
|
||||
elif re.match(r'^WORKFLOW\s*[:=]', upper):
|
||||
val = line_clean.split(":", 1)[1].strip() if ":" in line_clean else line_clean.split("=", 1)[1].strip()
|
||||
val_upper = val.upper().strip()
|
||||
in_params_section = False
|
||||
in_plan_section = False
|
||||
if val_upper in ("AUCUN", "NONE", "NON", "N/A", "-", ""):
|
||||
continue
|
||||
# Extraire le numéro : "1", "1.", "#1", "Workflow 1", "1 (Bloc-notes)"
|
||||
num_match = re.search(r'(\d+)', val)
|
||||
if num_match and available_workflows:
|
||||
idx = int(num_match.group(1)) - 1
|
||||
if 0 <= idx < len(available_workflows):
|
||||
wf = available_workflows[idx]
|
||||
plan.workflow_match = wf.get("session_id", "")
|
||||
plan.workflow_name = wf.get("name", "")
|
||||
plan.match_confidence = 0.8
|
||||
plan.mode = "replay"
|
||||
|
||||
# --- CONFIANCE ---
|
||||
elif re.match(r'^CONFIANCE\s*[:=]', upper):
|
||||
val = re.split(r'[:=]', line_clean, 1)[1].strip()
|
||||
in_params_section = False
|
||||
in_plan_section = False
|
||||
# Extraire un float : "0.9", "0,9", "90%"
|
||||
float_match = re.search(r'(\d+[.,]\d+)', val)
|
||||
if float_match:
|
||||
try:
|
||||
plan.match_confidence = float(float_match.group(1).replace(",", "."))
|
||||
except ValueError:
|
||||
pass
|
||||
elif "%" in val:
|
||||
pct_match = re.search(r'(\d+)', val)
|
||||
if pct_match:
|
||||
plan.match_confidence = int(pct_match.group(1)) / 100.0
|
||||
|
||||
# --- PARAMETRES ---
|
||||
elif re.match(r'^PARAM[EÈ]TRES?\s*[:=]', upper):
|
||||
val = re.split(r'[:=]', line_clean, 1)[1].strip()
|
||||
in_plan_section = False
|
||||
val_upper = val.upper().strip()
|
||||
if val_upper in ("AUCUN", "NONE", "NON", "N/A", "-"):
|
||||
in_params_section = False
|
||||
continue
|
||||
# Vide = paramètres sur les lignes suivantes
|
||||
in_params_section = True
|
||||
if val and val_upper not in ("", ):
|
||||
# Paramètres sur la même ligne : "clé1=val1, clé2=val2"
|
||||
self._extract_params_from_line(val, plan)
|
||||
|
||||
# --- BOUCLE ---
|
||||
elif re.match(r'^BOUCLE\s*[:=]', upper):
|
||||
val = re.split(r'[:=]', upper, 1)[1].strip()
|
||||
plan.is_loop = "OUI" in val or "YES" in val or "TRUE" in val
|
||||
in_params_section = False
|
||||
in_plan_section = False
|
||||
|
||||
# --- SOURCE_BOUCLE ---
|
||||
elif re.match(r'^SOURCE[_ ]BOUCLE\s*[:=]', upper):
|
||||
plan.loop_source = re.split(r'[:=]', line_clean, 1)[1].strip()
|
||||
in_params_section = False
|
||||
in_plan_section = False
|
||||
|
||||
# --- PLAN ---
|
||||
elif re.match(r'^PLAN\s*[:=]?\s*$', upper) or upper == "PLAN:":
|
||||
in_plan_section = True
|
||||
in_params_section = False
|
||||
|
||||
# --- Lignes de contenu (paramètres d'abord, puis étapes) ---
|
||||
elif in_params_section and ("=" in line_clean or ": " in line_clean):
|
||||
self._extract_params_from_line(line_clean, plan)
|
||||
|
||||
elif in_plan_section and re.match(r'^(\d+[.)]\s+|- )', line_clean):
|
||||
plan.steps.append({"description": line_clean})
|
||||
|
||||
elif re.match(r'^(\d+[.)]\s+|- )', line_clean) and not in_params_section:
|
||||
# Étape numérotée en dehors d'une section explicite
|
||||
plan.steps.append({"description": line_clean})
|
||||
|
||||
# Si pas de workflow trouvé mais compris → mode libre
|
||||
if plan.understood and not plan.workflow_match:
|
||||
plan.mode = "free"
|
||||
|
||||
return plan
|
||||
|
||||
@staticmethod
|
||||
def _extract_params_from_line(text: str, plan: TaskPlan) -> None:
|
||||
"""Extraire des paramètres clé=valeur ou clé: valeur d'une ligne."""
|
||||
import re
|
||||
text = text.strip().strip("- ")
|
||||
# Ignorer les labels de section
|
||||
if re.match(r'^(COMPRIS|WORKFLOW|BOUCLE|SOURCE|PLAN|CONFIANCE)', text.upper()):
|
||||
return
|
||||
# Essayer clé=valeur d'abord
|
||||
if "=" in text:
|
||||
for part in text.split(","):
|
||||
part = part.strip()
|
||||
if "=" in part:
|
||||
k, v = part.split("=", 1)
|
||||
k, v = k.strip().strip("- "), v.strip()
|
||||
if k and v and v.upper() not in ("AUCUN", "NONE"):
|
||||
plan.parameters[k] = v
|
||||
# Sinon clé: valeur (mais pas les labels de section)
|
||||
elif ": " in text:
|
||||
k, v = text.split(": ", 1)
|
||||
k, v = k.strip().strip("- "), v.strip()
|
||||
if k and v and len(k) < 30 and v.upper() not in ("AUCUN", "NONE"):
|
||||
plan.parameters[k] = v
|
||||
|
||||
def execute(
|
||||
self,
|
||||
plan: TaskPlan,
|
||||
replay_callback=None,
|
||||
machine_id: str = "default",
|
||||
) -> TaskResult:
|
||||
"""Exécuter un plan.
|
||||
|
||||
Deux modes :
|
||||
1. "replay" : relancer un workflow enregistré avec substitution de variables
|
||||
2. "free" : exécuter les actions planifiées par gemma4
|
||||
|
||||
Args:
|
||||
plan: Le plan généré par understand()
|
||||
replay_callback: Fonction qui lance un replay
|
||||
signature: (session_id, machine_id, params) → replay_id
|
||||
machine_id: Machine cible pour l'exécution
|
||||
"""
|
||||
t_start = time.time()
|
||||
result = TaskResult(instruction=plan.instruction, success=False)
|
||||
|
||||
if not plan.understood:
|
||||
result.summary = f"Instruction non comprise : {plan.error or 'réponse gemma4 invalide'}"
|
||||
return result
|
||||
|
||||
if plan.mode == "replay" and plan.workflow_match:
|
||||
# Mode replay : relancer un workflow connu
|
||||
result = self._execute_replay(plan, replay_callback, machine_id)
|
||||
|
||||
elif plan.mode == "free" and plan.steps:
|
||||
# Mode libre : actions planifiées par gemma4
|
||||
result = self._execute_free(plan, replay_callback, machine_id)
|
||||
|
||||
else:
|
||||
result.summary = "Pas de workflow correspondant et pas d'actions planifiées"
|
||||
|
||||
result.elapsed_s = time.time() - t_start
|
||||
return result
|
||||
|
||||
def _execute_replay(
|
||||
self,
|
||||
plan: TaskPlan,
|
||||
replay_callback,
|
||||
machine_id: str,
|
||||
) -> TaskResult:
|
||||
"""Exécuter en mode replay (workflow connu)."""
|
||||
result = TaskResult(instruction=plan.instruction, success=False)
|
||||
|
||||
if not replay_callback:
|
||||
result.summary = "Pas de callback replay configuré"
|
||||
return result
|
||||
|
||||
if plan.is_loop:
|
||||
# Boucle : TODO — lister les éléments puis itérer
|
||||
# Pour l'instant, exécution simple
|
||||
logger.info(
|
||||
f"TaskPlanner: boucle détectée mais pas encore implémentée, "
|
||||
f"exécution simple du workflow {plan.workflow_name}"
|
||||
)
|
||||
|
||||
try:
|
||||
replay_id = replay_callback(
|
||||
session_id=plan.workflow_match,
|
||||
machine_id=machine_id,
|
||||
params=plan.parameters,
|
||||
)
|
||||
result.success = True
|
||||
result.completed_items = 1
|
||||
result.total_items = 1
|
||||
result.summary = (
|
||||
f"Workflow '{plan.workflow_name}' lancé (replay={replay_id})"
|
||||
f" avec paramètres {plan.parameters}" if plan.parameters else ""
|
||||
)
|
||||
result.results.append({
|
||||
"replay_id": replay_id,
|
||||
"workflow": plan.workflow_name,
|
||||
"params": plan.parameters,
|
||||
})
|
||||
except Exception as e:
|
||||
result.summary = f"Erreur lancement replay : {e}"
|
||||
logger.error(f"TaskPlanner: {result.summary}")
|
||||
|
||||
return result
|
||||
|
||||
def _execute_free(
|
||||
self,
|
||||
plan: TaskPlan,
|
||||
replay_callback,
|
||||
machine_id: str,
|
||||
) -> TaskResult:
|
||||
"""Exécuter en mode libre (actions planifiées par gemma4)."""
|
||||
result = TaskResult(instruction=plan.instruction, success=False)
|
||||
|
||||
# Convertir les étapes en actions replay
|
||||
actions = self._steps_to_actions(plan.steps, plan.parameters)
|
||||
|
||||
if not actions:
|
||||
result.summary = "Impossible de convertir le plan en actions exécutables"
|
||||
return result
|
||||
|
||||
if replay_callback:
|
||||
try:
|
||||
replay_id = replay_callback(
|
||||
actions=actions,
|
||||
machine_id=machine_id,
|
||||
task_description=plan.instruction,
|
||||
)
|
||||
result.success = True
|
||||
result.completed_items = 1
|
||||
result.summary = f"Plan libre exécuté ({len(actions)} actions, replay={replay_id})"
|
||||
except Exception as e:
|
||||
result.summary = f"Erreur exécution plan libre : {e}"
|
||||
else:
|
||||
result.summary = f"Plan prêt ({len(actions)} actions) mais pas de callback"
|
||||
result.results = actions
|
||||
|
||||
return result
|
||||
|
||||
def _steps_to_actions(
|
||||
self,
|
||||
steps: List[Dict[str, Any]],
|
||||
parameters: Dict[str, Any],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Convertir les étapes textuelles en actions replay.
|
||||
|
||||
Utilise gemma4 pour traduire chaque étape en action structurée.
|
||||
Les types d'actions supportés : click, type, key_combo, wait.
|
||||
"""
|
||||
import re
|
||||
import requests as _requests
|
||||
|
||||
steps_text = "\n".join(
|
||||
s.get("description", str(s)) for s in steps
|
||||
)
|
||||
|
||||
prompt = (
|
||||
"Convertis ces étapes RPA en actions JSON.\n\n"
|
||||
f"ÉTAPES :\n{steps_text}\n\n"
|
||||
f"PARAMÈTRES : {json.dumps(parameters, ensure_ascii=False)}\n\n"
|
||||
"TYPES D'ACTIONS DISPONIBLES :\n"
|
||||
'- Cliquer : {"type": "click", "target_spec": {"by_text": "texte du bouton"}}\n'
|
||||
'- Taper du texte : {"type": "type", "text": "texte à taper"}\n'
|
||||
'- Raccourci clavier : {"type": "key_combo", "keys": ["ctrl", "s"]}\n'
|
||||
'- Attendre : {"type": "wait", "duration_ms": 2000}\n\n'
|
||||
"RÈGLES :\n"
|
||||
"- UNE action JSON par ligne\n"
|
||||
"- Pas de commentaires, pas de texte autour, JUSTE le JSON\n"
|
||||
"- Utilise les paramètres fournis dans les valeurs\n\n"
|
||||
"ACTIONS :\n"
|
||||
)
|
||||
|
||||
try:
|
||||
resp = _requests.post(
|
||||
self._gemma4_url,
|
||||
json={
|
||||
"model": vlm_config.get_vlm_model(),
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"stream": False,
|
||||
"think": True,
|
||||
"options": {"temperature": 0.1, "num_predict": 1500},
|
||||
},
|
||||
timeout=120,
|
||||
)
|
||||
|
||||
if not resp.ok:
|
||||
return []
|
||||
|
||||
content = resp.json().get("message", {}).get("content", "")
|
||||
return self._parse_actions_json(content)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"TaskPlanner: conversion étapes échouée : {e}")
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def _parse_actions_json(content: str) -> List[Dict[str, Any]]:
|
||||
"""Parser des actions JSON depuis une réponse VLM.
|
||||
|
||||
Tolère :
|
||||
- Un JSON par ligne
|
||||
- Un tableau JSON [...]
|
||||
- Du texte autour des JSON (markdown, commentaires)
|
||||
- Des objets imbriqués (target_spec)
|
||||
"""
|
||||
import re
|
||||
|
||||
actions = []
|
||||
valid_types = {"click", "type", "key_combo", "wait"}
|
||||
|
||||
# Stratégie 1 : essayer de parser comme un tableau JSON
|
||||
array_match = re.search(r'\[[\s\S]*\]', content)
|
||||
if array_match:
|
||||
try:
|
||||
parsed = json.loads(array_match.group())
|
||||
if isinstance(parsed, list):
|
||||
for item in parsed:
|
||||
if isinstance(item, dict) and item.get("type") in valid_types:
|
||||
if item["type"] == "click":
|
||||
item["visual_mode"] = True
|
||||
actions.append(item)
|
||||
if actions:
|
||||
return actions
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Stratégie 2 : extraire les objets JSON individuels (supporte imbrication)
|
||||
# Trouver chaque { ... } en gérant les accolades imbriquées
|
||||
i = 0
|
||||
while i < len(content):
|
||||
if content[i] == '{':
|
||||
depth = 0
|
||||
start = i
|
||||
while i < len(content):
|
||||
if content[i] == '{':
|
||||
depth += 1
|
||||
elif content[i] == '}':
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
candidate = content[start:i+1]
|
||||
try:
|
||||
action = json.loads(candidate)
|
||||
if isinstance(action, dict) and action.get("type") in valid_types:
|
||||
if action["type"] == "click":
|
||||
action["visual_mode"] = True
|
||||
actions.append(action)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
break
|
||||
i += 1
|
||||
i += 1
|
||||
|
||||
return actions
|
||||
|
||||
def list_capabilities(
|
||||
self,
|
||||
available_workflows: List[Dict[str, Any]],
|
||||
) -> str:
|
||||
"""Lister ce que Léa sait faire (pour l'interface utilisateur)."""
|
||||
if not available_workflows:
|
||||
return "Léa n'a pas encore appris de workflows. Enregistrez-en un d'abord."
|
||||
|
||||
lines = ["Léa sait faire :"]
|
||||
for wf in available_workflows:
|
||||
name = wf.get("name", "?")
|
||||
desc = wf.get("description", "")
|
||||
lines.append(f" - {name}" + (f" ({desc})" if desc else ""))
|
||||
|
||||
lines.append("")
|
||||
lines.append("Dites-lui ce que vous voulez faire en langage naturel.")
|
||||
return "\n".join(lines)
|
||||
@@ -34,8 +34,16 @@ class StreamWorker:
|
||||
self.running = False
|
||||
self.processed_files: Set[str] = set()
|
||||
|
||||
# StreamProcessor partagé (créé si non fourni)
|
||||
self.processor = processor or StreamProcessor(data_dir=str(self.live_dir))
|
||||
# StreamProcessor partagé (créé si non fourni). En mode standalone,
|
||||
# live_dir pointe normalement vers data/training/live_sessions ; le
|
||||
# processor doit garder data/training comme racine pour workflows/.
|
||||
processor_data_dir = (
|
||||
self.live_dir.parent if self.live_dir.name == "live_sessions" else self.live_dir
|
||||
)
|
||||
self.processor = processor or StreamProcessor(
|
||||
data_dir=str(processor_data_dir),
|
||||
enable_vlm=True,
|
||||
)
|
||||
|
||||
self._thread: threading.Thread = None
|
||||
|
||||
|
||||
256
agent_v0/server_v1/workflow_replay.py
Normal file
256
agent_v0/server_v1/workflow_replay.py
Normal file
@@ -0,0 +1,256 @@
|
||||
"""
|
||||
workflow_replay.py — Pont entre le WorkflowRunner et le replay Agent V1.
|
||||
|
||||
Convertit un Workflow enrichi (avec embeddings CLIP + FAISS) en actions
|
||||
de replay pour l'Agent V1, avec vérification FAISS à chaque étape.
|
||||
|
||||
Architecture :
|
||||
Workflow (nodes + edges + embeddings)
|
||||
→ pour chaque edge : action + embedding du node source
|
||||
→ FAISS vérifie que l'écran actuel correspond au node attendu
|
||||
→ si OK : exécuter l'action normalement
|
||||
→ si MISMATCH : stopper ou adapter
|
||||
|
||||
Auteur : Dom + Claude
|
||||
Date : 5 avril 2026
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import numpy as np
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def build_workflow_replay(
|
||||
workflow_path: str,
|
||||
session_dir: str,
|
||||
faiss_manager=None,
|
||||
clip_embedder=None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Convertir un Workflow enrichi en actions de replay avec vérification FAISS.
|
||||
|
||||
Chaque action de clic est enrichie avec :
|
||||
- L'embedding CLIP du node source (pour vérification au replay)
|
||||
- Le titre de fenêtre attendu
|
||||
- Les textes OCR du node (pour le grounding)
|
||||
|
||||
Args:
|
||||
workflow_path: Chemin vers le workflow JSON
|
||||
session_dir: Répertoire de la session (pour les screenshots/crops)
|
||||
faiss_manager: FAISSManager pré-chargé (optionnel, créé si None)
|
||||
clip_embedder: CLIPEmbedder pré-chargé (optionnel, créé si None)
|
||||
|
||||
Returns:
|
||||
Liste d'actions prêtes pour la queue de replay Agent V1.
|
||||
"""
|
||||
import json
|
||||
import uuid
|
||||
|
||||
# Charger le workflow
|
||||
with open(workflow_path) as f:
|
||||
wf_data = json.load(f)
|
||||
|
||||
nodes = {n["node_id"]: n for n in wf_data.get("nodes", [])}
|
||||
edges = wf_data.get("edges", [])
|
||||
entry_nodes = wf_data.get("entry_nodes", [])
|
||||
|
||||
if not nodes or not edges:
|
||||
logger.warning("Workflow vide : %d nodes, %d edges", len(nodes), len(edges))
|
||||
return []
|
||||
|
||||
logger.info(
|
||||
"Workflow '%s' chargé : %d nodes, %d edges",
|
||||
wf_data.get("name", "?"), len(nodes), len(edges),
|
||||
)
|
||||
|
||||
# Construire la séquence d'actions depuis le graphe (BFS linéaire)
|
||||
actions = []
|
||||
visited = set()
|
||||
current_node_id = entry_nodes[0] if entry_nodes else list(nodes.keys())[0]
|
||||
|
||||
while current_node_id and current_node_id not in visited:
|
||||
visited.add(current_node_id)
|
||||
node = nodes.get(current_node_id)
|
||||
if not node:
|
||||
break
|
||||
|
||||
# Trouver l'edge sortant
|
||||
outgoing = [e for e in edges if e.get("from_node") == current_node_id]
|
||||
if not outgoing:
|
||||
break
|
||||
|
||||
edge = outgoing[0] # Premier edge (linéaire)
|
||||
action_data = edge.get("action", {})
|
||||
next_node_id = edge.get("to_node")
|
||||
next_node = nodes.get(next_node_id, {})
|
||||
|
||||
# Extraire les infos du node source pour la vérification
|
||||
node_metadata = node.get("metadata", {})
|
||||
node_title = node_metadata.get("window_title", "")
|
||||
|
||||
# Extraire les infos de l'action
|
||||
action_type = action_data.get("type", "unknown")
|
||||
target = action_data.get("target", {})
|
||||
params = action_data.get("parameters", {})
|
||||
|
||||
if action_type == "compound":
|
||||
# Actions compound : décomposer en étapes
|
||||
steps = params.get("steps", [])
|
||||
for step in steps:
|
||||
step_type = step.get("type", "unknown")
|
||||
step_action = {
|
||||
"action_id": f"wf_{uuid.uuid4().hex[:8]}",
|
||||
"type": _map_action_type(step_type),
|
||||
"workflow_node": current_node_id,
|
||||
"expected_window_title": node_title,
|
||||
}
|
||||
|
||||
if step_type == "mouse_click":
|
||||
step_action["x_pct"] = step.get("x_pct", 0)
|
||||
step_action["y_pct"] = step.get("y_pct", 0)
|
||||
step_action["button"] = step.get("button", "left")
|
||||
step_action["visual_mode"] = True
|
||||
# Target spec pour le grounding
|
||||
step_action["target_spec"] = {
|
||||
"by_text": target.get("by_text", ""),
|
||||
"by_role": target.get("by_role", ""),
|
||||
"by_text_source": "ocr" if target.get("by_text") else "",
|
||||
"window_title": node_title,
|
||||
"original_position": {
|
||||
"y_relative": "",
|
||||
"x_relative": "",
|
||||
},
|
||||
}
|
||||
_merge_semantic_target_fields(
|
||||
step_action["target_spec"],
|
||||
target,
|
||||
params,
|
||||
step,
|
||||
)
|
||||
target_label = _first_non_empty_text(
|
||||
step_action["target_spec"].get("by_text"),
|
||||
step_action["target_spec"].get("target_text"),
|
||||
step_action["target_spec"].get("description"),
|
||||
step_action["target_spec"].get("ocr_description"),
|
||||
step_action["target_spec"].get("vlm_description"),
|
||||
)
|
||||
if target_label:
|
||||
step_action.setdefault(
|
||||
"target_text",
|
||||
step_action["target_spec"].get("target_text") or target_label,
|
||||
)
|
||||
step_action.setdefault("target_description", target_label)
|
||||
# Ajouter le crop anchor si disponible
|
||||
_attach_anchor(step_action, step, session_dir)
|
||||
|
||||
elif step_type in ("text_input", "key_press"):
|
||||
if step_type == "text_input":
|
||||
step_action["type"] = "type"
|
||||
step_action["text"] = step.get("text", "")
|
||||
else:
|
||||
step_action["type"] = "key_combo"
|
||||
step_action["keys"] = step.get("keys", [])
|
||||
|
||||
elif step_type == "wait":
|
||||
step_action["type"] = "wait"
|
||||
step_action["duration_ms"] = step.get("duration_ms", 500)
|
||||
|
||||
actions.append(step_action)
|
||||
|
||||
# Passer au node suivant
|
||||
current_node_id = next_node_id
|
||||
|
||||
# Ajouter expected_window_title pour la post-vérification
|
||||
click_indices = [i for i, a in enumerate(actions) if a.get("type") == "click"]
|
||||
for j, ci in enumerate(click_indices):
|
||||
if j + 1 < len(click_indices):
|
||||
next_ci = click_indices[j + 1]
|
||||
next_title = actions[next_ci].get("expected_window_title", "")
|
||||
if next_title:
|
||||
actions[ci]["expected_window_title"] = next_title
|
||||
|
||||
logger.info("Workflow → %d actions de replay", len(actions))
|
||||
return actions
|
||||
|
||||
|
||||
def _map_action_type(step_type: str) -> str:
|
||||
"""Mapper les types d'action du workflow vers les types de replay."""
|
||||
mapping = {
|
||||
"mouse_click": "click",
|
||||
"text_input": "type",
|
||||
"key_press": "key_combo",
|
||||
"wait": "wait",
|
||||
"scroll": "scroll",
|
||||
}
|
||||
return mapping.get(step_type, step_type)
|
||||
|
||||
|
||||
_TARGET_SEMANTIC_KEYS = (
|
||||
"by_text",
|
||||
"by_role",
|
||||
"anchor_id",
|
||||
"target_text",
|
||||
"ocr_description",
|
||||
"description",
|
||||
"vlm_description",
|
||||
"by_text_source",
|
||||
"anchor_bbox",
|
||||
"original_size",
|
||||
)
|
||||
|
||||
|
||||
def _first_non_empty_text(*values: Any) -> str:
|
||||
for value in values:
|
||||
text = str(value or "").strip()
|
||||
if text and text.casefold() not in {"none", "null"}:
|
||||
return text
|
||||
return ""
|
||||
|
||||
|
||||
def _merge_semantic_target_fields(
|
||||
target_spec: Dict[str, Any],
|
||||
*sources: Dict[str, Any],
|
||||
) -> None:
|
||||
for source in sources:
|
||||
if not isinstance(source, dict):
|
||||
continue
|
||||
visual_anchor = source.get("visual_anchor") or {}
|
||||
if isinstance(visual_anchor, dict):
|
||||
_merge_semantic_target_fields(target_spec, visual_anchor)
|
||||
for key in _TARGET_SEMANTIC_KEYS:
|
||||
value = source.get(key)
|
||||
if value and not target_spec.get(key):
|
||||
target_spec[key] = value
|
||||
|
||||
if not target_spec.get("by_text"):
|
||||
target_text = _first_non_empty_text(target_spec.get("target_text"))
|
||||
if target_text:
|
||||
target_spec["by_text"] = target_text
|
||||
target_spec.setdefault("by_text_source", "visual_anchor")
|
||||
|
||||
if not target_spec.get("vlm_description"):
|
||||
description = _first_non_empty_text(
|
||||
target_spec.get("description"),
|
||||
target_spec.get("ocr_description"),
|
||||
)
|
||||
if description:
|
||||
target_spec["vlm_description"] = description
|
||||
|
||||
|
||||
def _attach_anchor(action: dict, step: dict, session_dir: str) -> None:
|
||||
"""Attacher le crop anchor au target_spec si disponible."""
|
||||
import base64
|
||||
|
||||
# Chercher le crop dans le session_dir
|
||||
screenshot_id = step.get("screenshot_id", "")
|
||||
if screenshot_id and session_dir:
|
||||
crop_path = Path(session_dir) / "shots" / f"{screenshot_id}_crop.png"
|
||||
if crop_path.is_file():
|
||||
action["target_spec"]["anchor_image_base64"] = base64.b64encode(
|
||||
crop_path.read_bytes()
|
||||
).decode()
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user