Compare commits
185 Commits
c2dc8f8fe4
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
16ff396dbf | ||
|
|
e44fd7b328 | ||
|
|
66815b7a1a | ||
|
|
c6b695eca8 | ||
|
|
99d2083dea | ||
|
|
a718086140 | ||
|
|
c82979e72b | ||
|
|
2185c41cc1 | ||
|
|
26804eb123 | ||
|
|
d71d5df4a8 | ||
|
|
6829ad8e79 | ||
|
|
8903f35433 | ||
|
|
4ab2c15e5c | ||
|
|
eba6fea779 | ||
|
|
f04398d5a7 | ||
|
|
4ce9c47f45 | ||
|
|
9dfcdb5fb0 | ||
|
|
3efe15d2c7 | ||
|
|
9d87ed64c5 | ||
|
|
00134963e5 | ||
|
|
0ec5e2a25b | ||
|
|
0c5fffe951 | ||
|
|
5027ed9a23 | ||
|
|
6caab2c600 | ||
|
|
552e66dbf6 | ||
|
|
de1026ee2e | ||
|
|
7b50725bf8 | ||
|
|
7feef3b6a9 | ||
|
|
0b06db222d | ||
|
|
74ee0dadee | ||
|
|
0b452f975a | ||
|
|
6ab385d671 | ||
|
|
b3eab83a0f | ||
|
|
27490849a8 | ||
|
|
cebbf0809a | ||
|
|
3e227d28ad | ||
|
|
8ce63fcba2 | ||
|
|
4202431421 | ||
|
|
4923623dd4 | ||
|
|
84181cc982 | ||
|
|
7355d315a3 | ||
|
|
c50adab3a1 | ||
|
|
2fbb305f65 | ||
|
|
ff581be397 | ||
|
|
203e5cc6c1 | ||
|
|
d1b556b6cd | ||
|
|
729cd67743 | ||
|
|
73ddcdb29d | ||
|
|
14a9442343 | ||
|
|
5da4581e76 | ||
|
|
cbe8dc95d2 | ||
|
|
04a14a56b2 | ||
|
|
2290f1846b | ||
|
|
c57b40ae1d | ||
|
|
bc21b27da7 | ||
|
|
6a2248ddcd | ||
|
|
82d7b38cff | ||
|
|
6c7f88c05d | ||
|
|
447fbb2c6e | ||
|
|
623be15bfe | ||
|
|
55d5aebbd2 | ||
|
|
73b731fef8 | ||
|
|
ffd97ae9a5 | ||
|
|
d168833609 | ||
|
|
23a06a744c | ||
|
|
af4eae28b9 | ||
|
|
c198c930a1 | ||
|
|
e3efef2fe7 | ||
|
|
95fddeebb3 | ||
|
|
71523cebd3 | ||
|
|
3aa806a630 | ||
|
|
588c8f22c1 | ||
|
|
3d243d731d | ||
|
|
2431a6c9e9 | ||
|
|
969236da03 | ||
|
|
f30461b88c | ||
|
|
f34eca20f9 | ||
|
|
309dfd5287 | ||
|
|
f5a672d7b9 | ||
|
|
1acea85fa6 | ||
|
|
4f61741420 | ||
|
|
2fa864b5c7 | ||
|
|
10739c33fa | ||
|
|
39bea1b042 | ||
|
|
26b4e6d8ce | ||
|
|
4fb84b1090 | ||
|
|
7f2bc6fe97 | ||
|
|
eded968c70 | ||
|
|
53d29d9b24 | ||
|
|
690053bd57 | ||
|
|
c7b0649716 | ||
|
|
2bfcfa4535 | ||
|
|
b808e48b1f | ||
|
|
78ee962918 | ||
|
|
c8a3618e27 | ||
|
|
9ca277a63f | ||
|
|
8c7b6e5696 | ||
|
|
af4ffa189a | ||
|
|
42f571d496 | ||
|
|
36737cfe9d | ||
|
|
93ef93e563 | ||
|
|
376e4a88b3 | ||
|
|
bb4ed2a75d | ||
|
|
f7b8cddd2b | ||
|
|
a9a99953dd | ||
|
|
aee64f54b1 | ||
|
|
c77844fa9a | ||
|
|
013fe071a2 | ||
|
|
203dc00d53 | ||
|
|
e9a028134a | ||
|
|
01bba7bc6c | ||
|
|
d5285de99c | ||
|
|
33c198b827 | ||
|
|
816b37af98 | ||
|
|
d82aad984f | ||
|
|
057c37131f | ||
|
|
9bcce3fc68 | ||
|
|
f96f6322ec | ||
|
|
02ee2d7b5b | ||
|
|
47993e2ee9 | ||
|
|
7cc03f6f10 | ||
|
|
a21f1ea9fa | ||
|
|
9188bd7df1 | ||
|
|
f82753debe | ||
|
|
b92cb9db03 | ||
|
|
e66629ce1a | ||
|
|
cecdf417b7 | ||
|
|
56e3cc052a | ||
|
|
332366b58c | ||
|
|
ac9c207474 | ||
|
|
f85d56ac05 | ||
|
|
172167f6c0 | ||
|
|
42d49dd8bd | ||
|
|
f541bb8ce4 | ||
|
|
a6eb4c168f | ||
|
|
f6ad5ff2b2 | ||
|
|
2ac781343a | ||
|
|
bffcfb2db3 | ||
|
|
cc673755f7 | ||
|
|
4509038bf0 | ||
|
|
99041f0117 | ||
|
|
72a9651b94 | ||
|
|
8589e87a13 | ||
|
|
8a1dfc6e8b | ||
|
|
3bcf59e16f | ||
|
|
46206d9396 | ||
|
|
d3e928bebe | ||
|
|
a679fbb62b | ||
|
|
f0b311306d | ||
|
|
1c5ff42006 | ||
|
|
b09a3df054 | ||
|
|
fceb76de1f | ||
|
|
6d4ff4f215 | ||
|
|
2486e43def | ||
|
|
20b74286f7 | ||
|
|
a1c97504ab | ||
|
|
d6c7346898 | ||
|
|
90ee8ca8f4 | ||
|
|
84a91630e9 | ||
|
|
91614fbff0 | ||
|
|
c1ce6a3964 | ||
|
|
0bd0fbb8c5 | ||
|
|
394342be7e | ||
|
|
6724f43950 | ||
|
|
d99b17394a | ||
|
|
875367dea9 | ||
|
|
a74056ca22 | ||
|
|
6937b94f2a | ||
|
|
4f5c518d3a | ||
|
|
7dec3ab63a | ||
|
|
68d5bb7dd1 | ||
|
|
ef5d595d98 | ||
|
|
5ceee9c393 | ||
|
|
5e0b53cfd1 | ||
|
|
e8a8a588c1 | ||
|
|
18792fd7b4 | ||
|
|
1e8e2dd9f3 | ||
|
|
1253a40051 | ||
|
|
a92d04621a | ||
|
|
13390a71e7 | ||
|
|
4c76dca992 | ||
|
|
2ddccff108 | ||
|
|
3417f09598 | ||
|
|
bbe506c63a | ||
|
|
647aa610fd |
@@ -30,7 +30,9 @@ DASHBOARD_PORT=5001
|
||||
CLIP_MODEL=ViT-B-32
|
||||
CLIP_PRETRAINED=openai
|
||||
CLIP_DEVICE=cpu # cpu or cuda
|
||||
VLM_MODEL=qwen3-vl:8b
|
||||
RPA_VLM_MODEL=gemma4:latest # gemma4:latest (défaut), qwen3-vl:8b, ui-tars (fallback)
|
||||
VLM_MODEL=gemma4:latest # alias de compatibilité
|
||||
# VLM_ALLOW_CLOUD=false # true pour activer les APIs cloud en fallback (OpenAI, Gemini, Anthropic)
|
||||
VLM_ENDPOINT=http://localhost:11434
|
||||
OWL_MODEL=google/owlv2-base-patch16-ensemble
|
||||
OWL_CONFIDENCE_THRESHOLD=0.1
|
||||
|
||||
207
.gitea/workflows/security-audit.yml
Normal file
207
.gitea/workflows/security-audit.yml
Normal file
@@ -0,0 +1,207 @@
|
||||
# ------------------------------------------------------------------
|
||||
# Audit sécurité — bandit + pip-audit + scan secrets
|
||||
# ------------------------------------------------------------------
|
||||
# Jamais bloquant : on reporte les warnings, on ne casse pas la CI.
|
||||
# Utile pour détecter les dérives progressives (nouveaux CVE, secrets
|
||||
# oubliés dans un commit, patterns risqués).
|
||||
#
|
||||
# Fréquence : à chaque push sur main + hebdo (cron).
|
||||
# ------------------------------------------------------------------
|
||||
name: security-audit
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
schedule:
|
||||
# Tous les lundis à 6h UTC (8h Paris hiver, 7h Paris été).
|
||||
- cron: "0 6 * * 1"
|
||||
workflow_dispatch: {}
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
# ----------------------------------------------------------------
|
||||
# Job 1 — bandit (bonnes pratiques sécu Python)
|
||||
# ----------------------------------------------------------------
|
||||
bandit:
|
||||
name: Bandit (scan statique)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
continue-on-error: true
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Python 3.12
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
cache: "pip"
|
||||
|
||||
- name: Installation bandit
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install "bandit[toml]==1.7.10"
|
||||
|
||||
- name: Scan bandit sur core/
|
||||
run: |
|
||||
# -ll : niveau LOW minimum (remonte tout)
|
||||
# -ii : confiance LOW minimum
|
||||
# --skip B101 : on ignore les asserts (usuels en tests/validation)
|
||||
bandit -r core/ \
|
||||
--skip B101,B404,B603 \
|
||||
--format txt \
|
||||
--exit-zero \
|
||||
--output bandit-report.txt
|
||||
echo "=== RAPPORT BANDIT ==="
|
||||
cat bandit-report.txt
|
||||
|
||||
- name: Upload rapport bandit
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: bandit-report
|
||||
path: bandit-report.txt
|
||||
retention-days: 30
|
||||
if-no-files-found: ignore
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Job 2 — pip-audit (CVE sur requirements)
|
||||
# ----------------------------------------------------------------
|
||||
pip-audit:
|
||||
name: pip-audit (CVE dépendances)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
continue-on-error: true
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Python 3.12
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
cache: "pip"
|
||||
|
||||
- name: Installation pip-audit
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install "pip-audit==2.7.3"
|
||||
|
||||
- name: Audit CVE sur requirements-ci.txt
|
||||
run: |
|
||||
if [ -f requirements-ci.txt ]; then
|
||||
pip-audit -r requirements-ci.txt \
|
||||
--format json \
|
||||
--output pip-audit-ci.json \
|
||||
--progress-spinner off \
|
||||
--disable-pip || echo "::warning::CVE détectées dans requirements-ci.txt"
|
||||
echo "=== RAPPORT pip-audit (CI) ==="
|
||||
cat pip-audit-ci.json || true
|
||||
else
|
||||
echo "::notice::requirements-ci.txt absent — skip"
|
||||
fi
|
||||
|
||||
- name: Audit CVE sur requirements.txt (best-effort)
|
||||
run: |
|
||||
# Timeout généreux car requirements.txt est massif (torch, CUDA).
|
||||
timeout 120 pip-audit -r requirements.txt \
|
||||
--format json \
|
||||
--output pip-audit-full.json \
|
||||
--progress-spinner off \
|
||||
--disable-pip 2>&1 | head -200 || \
|
||||
echo "::warning::pip-audit sur requirements.txt a timeout ou échoué (non bloquant)"
|
||||
|
||||
- name: Upload rapports pip-audit
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: pip-audit-reports
|
||||
path: |
|
||||
pip-audit-ci.json
|
||||
pip-audit-full.json
|
||||
retention-days: 30
|
||||
if-no-files-found: ignore
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Job 3 — Scan secrets en clair (grep simple)
|
||||
# ----------------------------------------------------------------
|
||||
# Patterns recherchés : clés API Anthropic (sk-ant-), OpenAI (sk-),
|
||||
# Google (AIzaSy), AWS (AKIA), tokens Hugging Face (hf_).
|
||||
# Ne cherche QUE dans les fichiers trackés (pas .env, pas .venv).
|
||||
# ----------------------------------------------------------------
|
||||
secrets-scan:
|
||||
name: Scan secrets (grep)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 3
|
||||
continue-on-error: true
|
||||
|
||||
steps:
|
||||
- name: Checkout (historique complet)
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Scan patterns de secrets
|
||||
run: |
|
||||
# Chemins exclus : venvs, caches, data, htmlcov, models.
|
||||
EXCLUDES='--exclude-dir=.venv --exclude-dir=venv_v3 --exclude-dir=.git \
|
||||
--exclude-dir=node_modules --exclude-dir=htmlcov --exclude-dir=models \
|
||||
--exclude-dir=data --exclude-dir=__pycache__ --exclude-dir=.pytest_cache \
|
||||
--exclude=*.lock --exclude=*.log --exclude=*.md'
|
||||
|
||||
echo "=== Recherche de secrets potentiels ==="
|
||||
FOUND=0
|
||||
|
||||
# Anthropic
|
||||
if grep -rnI $EXCLUDES -E 'sk-ant-[a-zA-Z0-9_-]{20,}' . 2>/dev/null; then
|
||||
echo "::warning::Clé Anthropic potentielle détectée"
|
||||
FOUND=1
|
||||
fi
|
||||
|
||||
# OpenAI
|
||||
if grep -rnI $EXCLUDES -E 'sk-proj-[a-zA-Z0-9_-]{20,}|sk-[a-zA-Z0-9]{40,}' . 2>/dev/null; then
|
||||
echo "::warning::Clé OpenAI potentielle détectée"
|
||||
FOUND=1
|
||||
fi
|
||||
|
||||
# Google Cloud / API Keys
|
||||
if grep -rnI $EXCLUDES -E 'AIzaSy[a-zA-Z0-9_-]{33}' . 2>/dev/null; then
|
||||
echo "::warning::Clé Google API potentielle détectée"
|
||||
FOUND=1
|
||||
fi
|
||||
|
||||
# AWS
|
||||
if grep -rnI $EXCLUDES -E 'AKIA[0-9A-Z]{16}' . 2>/dev/null; then
|
||||
echo "::warning::Clé AWS potentielle détectée"
|
||||
FOUND=1
|
||||
fi
|
||||
|
||||
# Hugging Face
|
||||
if grep -rnI $EXCLUDES -E 'hf_[a-zA-Z0-9]{30,}' . 2>/dev/null; then
|
||||
echo "::warning::Token Hugging Face potentiel détecté"
|
||||
FOUND=1
|
||||
fi
|
||||
|
||||
# Mots-clés suspects à côté d'assignations
|
||||
if grep -rnI $EXCLUDES -E '(password|passwd|secret|api_key|apikey|token)\s*=\s*["\x27][a-zA-Z0-9_\-!@#\$%]{12,}["\x27]' . 2>/dev/null \
|
||||
| grep -viE '(example|dummy|placeholder|test|fake|xxx|changeme|\$\{)' 2>/dev/null; then
|
||||
echo "::warning::Assignation suspecte d'un secret détectée"
|
||||
FOUND=1
|
||||
fi
|
||||
|
||||
if [ "$FOUND" -eq 0 ]; then
|
||||
echo "Aucun secret détecté par les patterns de base."
|
||||
else
|
||||
echo ""
|
||||
echo "::notice::Vérifier manuellement les occurrences ci-dessus."
|
||||
echo "::notice::Si faux positif : ajouter le fichier aux exclusions ou reformater."
|
||||
fi
|
||||
|
||||
# Toujours succès (job non bloquant).
|
||||
exit 0
|
||||
214
.gitea/workflows/tests.yml
Normal file
214
.gitea/workflows/tests.yml
Normal file
@@ -0,0 +1,214 @@
|
||||
# ------------------------------------------------------------------
|
||||
# CI principale — Tests unitaires + lint léger
|
||||
# ------------------------------------------------------------------
|
||||
# Déclenchement : push / pull_request sur n'importe quelle branche.
|
||||
# Objectif : feedback rapide (< 3 min) sans GPU ni Ollama.
|
||||
# Runner : self-hosted (label "ubuntu-latest" ou équivalent).
|
||||
#
|
||||
# Les tests marqués `slow`, `gpu`, `integration`, `performance`,
|
||||
# `visual` et `smoke` sont exclus volontairement — ils nécessitent
|
||||
# CUDA, Ollama, ou des captures d'écran réelles.
|
||||
# ------------------------------------------------------------------
|
||||
name: tests
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- "**"
|
||||
pull_request:
|
||||
branches:
|
||||
- "**"
|
||||
|
||||
# Permet à une nouvelle exécution d'annuler les précédentes
|
||||
# sur la même branche (évite l'engorgement du runner local).
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
# Empêche l'import accidentel de torch/CUDA pendant la CI.
|
||||
PYTHONDONTWRITEBYTECODE: "1"
|
||||
PIP_DISABLE_PIP_VERSION_CHECK: "1"
|
||||
PIP_NO_PYTHON_VERSION_WARNING: "1"
|
||||
# Les modules d'exécution lisent parfois ces vars ; valeurs neutres en CI.
|
||||
RPA_VISION_CI: "1"
|
||||
RPA_AUTH_VAULT_PATH: "/tmp/ci_vault.enc"
|
||||
# api_stream.py a un fail-closed P0-C : si RPA_API_TOKEN absent, sys.exit(1)
|
||||
# au module load. On fournit un token bidon pour que les imports passent en CI.
|
||||
# (Le token n'est jamais utilisé réellement — les tests mockent les requêtes.)
|
||||
RPA_API_TOKEN: "ci_test_token_not_used_for_real_auth_just_to_pass_import_check_0123456789"
|
||||
|
||||
jobs:
|
||||
# ----------------------------------------------------------------
|
||||
# Job 1 — Lint (ruff + black --check)
|
||||
# ----------------------------------------------------------------
|
||||
# Non-bloquant : si ruff/black ne sont pas installables, on log
|
||||
# un warning et on continue. L'objectif ici est d'alerter, pas de
|
||||
# casser la CI pour des espaces en trop.
|
||||
# ----------------------------------------------------------------
|
||||
lint:
|
||||
name: Lint (ruff + black)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
continue-on-error: true
|
||||
|
||||
steps:
|
||||
- name: Checkout du code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Python 3.12
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
cache: "pip"
|
||||
|
||||
- name: Installation des linters
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install "ruff==0.6.9" "black==23.12.1" || {
|
||||
echo "::warning::Impossible d'installer ruff/black — job ignoré"
|
||||
exit 0
|
||||
}
|
||||
|
||||
- name: Ruff (lint rapide)
|
||||
run: |
|
||||
if command -v ruff >/dev/null 2>&1; then
|
||||
# Ruff : erreurs critiques uniquement (E9 syntax, F63 invalid print,
|
||||
# F7 syntax, F82 undefined in __all__).
|
||||
# F821 (undefined name) volontairement exclu le temps de nettoyer
|
||||
# la dette technique préexistante (voir docs/STATUS.md).
|
||||
# Dossiers legacy exclus :
|
||||
# - agent_v0/deploy/windows_client/ : clone obsolète (marqué OBSOLÈTE)
|
||||
# - tests/property/ : tests cassés connus (cf. MEMORY.md)
|
||||
ruff check --select=E9,F63,F7,F82 --output-format=github \
|
||||
--exclude "agent_v0/deploy/windows_client" \
|
||||
--exclude "tests/property" \
|
||||
--exclude "tests/integration/test_visual_rpa_checkpoint.py" \
|
||||
core/ agent_v0/ tests/ || {
|
||||
echo "::warning::Ruff a trouvé des erreurs critiques"
|
||||
exit 1
|
||||
}
|
||||
else
|
||||
echo "::warning::ruff indisponible — skip"
|
||||
fi
|
||||
|
||||
- name: Black (format check)
|
||||
run: |
|
||||
if command -v black >/dev/null 2>&1; then
|
||||
# --check : ne modifie pas, signale juste.
|
||||
# Dossiers legacy exclus (cohérent avec ruff).
|
||||
black --check --diff \
|
||||
--exclude "agent_v0/deploy/windows_client|tests/property" \
|
||||
core/ agent_v0/ tests/ || {
|
||||
echo "::warning::Black suggère un reformatage — non bloquant"
|
||||
exit 0
|
||||
}
|
||||
else
|
||||
echo "::warning::black indisponible — skip"
|
||||
fi
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Job 2 — Tests unitaires
|
||||
# ----------------------------------------------------------------
|
||||
# Exclut tous les marqueurs lourds. Utilise requirements-ci.txt
|
||||
# pour éviter torch/CUDA (économie ~3 Go + ~2 min).
|
||||
# ----------------------------------------------------------------
|
||||
unit-tests:
|
||||
name: Tests unitaires (sans GPU)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
|
||||
steps:
|
||||
- name: Checkout du code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Python 3.12
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
cache: "pip"
|
||||
cache-dependency-path: |
|
||||
requirements-ci.txt
|
||||
requirements.txt
|
||||
|
||||
- name: Installation des dépendances CI
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
if [ -f requirements-ci.txt ]; then
|
||||
echo "Utilisation de requirements-ci.txt (léger, sans torch)"
|
||||
pip install -r requirements-ci.txt
|
||||
else
|
||||
echo "::warning::requirements-ci.txt absent — fallback requirements.txt (lourd)"
|
||||
pip install -r requirements.txt
|
||||
fi
|
||||
|
||||
- name: Vérification imports critiques
|
||||
run: |
|
||||
python -c "import pytest; print(f'pytest {pytest.__version__}')"
|
||||
python -c "import sys; sys.path.insert(0, '.'); import core; print('core OK')" || {
|
||||
echo "::error::Impossible d'importer core.*"
|
||||
exit 1
|
||||
}
|
||||
|
||||
- name: Tests unitaires (hors slow/gpu/integration)
|
||||
run: |
|
||||
python -m pytest tests/unit/ \
|
||||
-m "not slow and not gpu and not integration and not performance and not visual" \
|
||||
--tb=short \
|
||||
--strict-markers \
|
||||
-q \
|
||||
--maxfail=10 \
|
||||
-o cache_dir=/tmp/.pytest_cache_ci
|
||||
|
||||
- name: Upload logs si échec
|
||||
if: failure()
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: pytest-logs
|
||||
path: |
|
||||
/tmp/.pytest_cache_ci
|
||||
logs/
|
||||
retention-days: 3
|
||||
if-no-files-found: ignore
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Job 3 — Tests sécurité (bloquant)
|
||||
# ----------------------------------------------------------------
|
||||
# Les tests `test_security_*` valident des invariants critiques
|
||||
# (évaluation sûre, sérialisation signée). Aucune régression tolérée.
|
||||
# ----------------------------------------------------------------
|
||||
security-tests:
|
||||
name: Tests sécurité (critique)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
needs: [unit-tests]
|
||||
|
||||
steps:
|
||||
- name: Checkout du code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Python 3.12
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
cache: "pip"
|
||||
cache-dependency-path: |
|
||||
requirements-ci.txt
|
||||
requirements.txt
|
||||
|
||||
- name: Installation des dépendances CI
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
if [ -f requirements-ci.txt ]; then
|
||||
pip install -r requirements-ci.txt
|
||||
else
|
||||
pip install -r requirements.txt
|
||||
fi
|
||||
|
||||
- name: Tests sécurité (test_security_*)
|
||||
run: |
|
||||
python -m pytest tests/unit/test_security_*.py \
|
||||
--tb=long \
|
||||
--strict-markers \
|
||||
-v \
|
||||
-o cache_dir=/tmp/.pytest_cache_ci_sec
|
||||
36
.gitignore
vendored
36
.gitignore
vendored
@@ -75,3 +75,39 @@ htmlcov/
|
||||
# === Backups ===
|
||||
*_backup_*
|
||||
backups/
|
||||
*.bak
|
||||
*.bak_*
|
||||
*.orig
|
||||
*.old
|
||||
|
||||
# === Legacy / Triage ===
|
||||
_a_trier/
|
||||
archives/
|
||||
|
||||
# === Claude Code — worktrees et données locales ===
|
||||
# Worktrees générés par la CLI Claude Code lors d'exécutions d'agents
|
||||
# parallèles. Peuvent atteindre plusieurs centaines de Mo chacun.
|
||||
# Ne jamais committer — gérer via `git worktree list` / `git worktree remove`.
|
||||
.claude/
|
||||
.kiro/
|
||||
.mcp.json
|
||||
.snapshots/
|
||||
|
||||
# === Données runtime (sessions, learning, buffer, config local) ===
|
||||
data/
|
||||
**/capture_library.json
|
||||
.hypothesis/
|
||||
.deps_installed
|
||||
# Buffers SQLite locaux (streamer, cache)
|
||||
**/buffer/
|
||||
**/pending_events.db
|
||||
# Databases applicatives (instance Flask)
|
||||
**/instance/*.db
|
||||
**/instance/*.sqlite
|
||||
**/instance/*.sqlite3
|
||||
# Caches et index locaux
|
||||
*.sqlite
|
||||
*.sqlite3
|
||||
*.db-journal
|
||||
*.db-wal
|
||||
*.db-shm
|
||||
|
||||
@@ -21,7 +21,12 @@ ollama serve
|
||||
### 3. Télécharger le modèle VLM
|
||||
|
||||
```bash
|
||||
ollama pull qwen3-vl:8b
|
||||
# Modèle par défaut du projet (voir .env.example)
|
||||
ollama pull gemma4:latest
|
||||
|
||||
# Alternatives supportées
|
||||
# ollama pull qwen3-vl:8b
|
||||
# ollama pull 0000/ui-tars-1.5-7b-q8_0:7b # grounder visuel
|
||||
```
|
||||
|
||||
## Utilisation
|
||||
|
||||
339
README.md
339
README.md
@@ -1,207 +1,204 @@
|
||||
# RPA Vision V3 - 100% Vision-Based Workflow Automation
|
||||
# RPA Vision V3 — Automatisation basée sur la compréhension visuelle des interfaces
|
||||
|
||||
## 📊 Status
|
||||
> ⚠️ **Projet en phase POC** — voir [`docs/STATUS.md`](docs/STATUS.md) pour l'état
|
||||
> réel par module. Certaines briques sont opérationnelles bout en bout,
|
||||
> d'autres sont en cours de stabilisation. Ce dépôt n'est pas production-ready.
|
||||
|
||||
🚀 **PRODUCTION-READY** - Phase 12 Complete (77% System Completion) ✅
|
||||
*Dernière mise à jour : 14 avril 2026*
|
||||
|
||||
**Latest Update**: 14 Décembre 2024
|
||||
- ✅ **10/13 Phases Complétées** - Système mature et fonctionnel
|
||||
- ✅ **Performance Exceptionnelle** - 500-6250x plus rapide que requis
|
||||
- ✅ **Architecture Entreprise** - 148k+ lignes, 19 modules, 6 specs complètes
|
||||
- ✅ **Innovations Techniques** - Self-healing, Multi-modal, GPU management
|
||||
- 📊 **Audit Complet** - [Rapport détaillé](AUDIT_COMPLET_SYSTEME_RPA_VISION_V3.md)
|
||||
## Intention
|
||||
|
||||
**Quick Test**: `bash test_clip.sh`
|
||||
Automatiser des workflows métier par **compréhension sémantique de l'écran**
|
||||
plutôt que par coordonnées de clic fixes. Le système observe l'utilisateur,
|
||||
reconstruit un graphe d'états de l'interface, et cherche à rejouer la
|
||||
procédure en reconnaissant visuellement les éléments cibles — y compris
|
||||
quand l'UI change légèrement.
|
||||
|
||||
## 🎯 Vision
|
||||
Terrain cible principal : postes hospitaliers (Citrix, applications métier
|
||||
web et desktop). Contrainte forte : **100 % local**, pas d'appel à un LLM
|
||||
cloud dans le pipeline par défaut.
|
||||
|
||||
RPA basé sur la **compréhension sémantique** des interfaces, pas sur des coordonnées de clics.
|
||||
|
||||
Le système apprend des workflows en observant l'utilisateur et les automatise de manière robuste grâce à une architecture en 5 couches.
|
||||
|
||||
## 🏗️ Architecture en 5 Couches
|
||||
## Architecture en couches
|
||||
|
||||
```
|
||||
RawSession (Couche 0)
|
||||
↓
|
||||
ScreenState (Couche 1) - 4 niveaux d'abstraction
|
||||
↓
|
||||
UIElement Detection (Couche 2) - Types + Rôles sémantiques
|
||||
↓
|
||||
State Embedding (Couche 3) - Fusion multi-modale
|
||||
↓
|
||||
Workflow Graph (Couche 4) - Nodes + Edges + Learning States
|
||||
RawSession (couche 0) — capture événements + screenshots
|
||||
↓
|
||||
ScreenState (couche 1) — états d'écran à plusieurs niveaux d'abstraction
|
||||
↓
|
||||
UIElement (couche 2) — détection sémantique (cascade OCR + templates + VLM)
|
||||
↓
|
||||
State Embedding (couche 3) — fusion multi-modale + index FAISS
|
||||
↓
|
||||
Workflow Graph (couche 4) — nœuds, transitions, résolution de cibles
|
||||
```
|
||||
|
||||
## 📁 Structure
|
||||
## État des fonctionnalités (synthèse)
|
||||
|
||||
```
|
||||
rpa_vision_v3/
|
||||
├── core/
|
||||
│ ├── models/ # Couches 0-4 : Structures de données
|
||||
│ ├── capture/ # Couche 0 : Capture événements + screenshots
|
||||
│ ├── detection/ # Couche 2 : Détection UI sémantique
|
||||
│ ├── embedding/ # Couche 3 : Fusion multi-modale + FAISS
|
||||
│ ├── graph/ # Couche 4 : Construction + Matching + Exécution
|
||||
│ └── persistence/ # Sauvegarde/Chargement
|
||||
├── data/
|
||||
│ ├── sessions/ # RawSessions
|
||||
│ ├── screen_states/ # ScreenStates
|
||||
│ ├── embeddings/ # Vecteurs .npy
|
||||
│ ├── faiss_index/ # Index FAISS
|
||||
│ └── workflows/ # Workflow Graphs
|
||||
└── tests/ # Tests unitaires + intégration
|
||||
```
|
||||
Le détail par module est dans [`docs/STATUS.md`](docs/STATUS.md).
|
||||
|
||||
## 🚀 Démarrage Rapide
|
||||
**Opérationnel**
|
||||
- Capture Windows (Agent V1) + streaming vers serveur Linux
|
||||
- Stockage des sessions brutes (screenshots + événements)
|
||||
- Streaming server FastAPI, sessions en mémoire
|
||||
- Build du package Windows (`deploy/build_package.sh`)
|
||||
|
||||
**Alpha (fonctionnel sur un cas de référence, encore peu généralisé)**
|
||||
- Détection UI par cascade VLM + OCR + templates
|
||||
- Construction de workflow graph depuis une session
|
||||
- Replay E2E supervisé — premier succès sur Notepad le 13 avril 2026
|
||||
- Mode apprentissage : pause et demande d'aide humaine quand la résolution échoue
|
||||
- Embeddings CLIP + index FAISS
|
||||
- Module auth (Fernet + TOTP), federation (LearningPack)
|
||||
- Web Dashboard, Agent Chat
|
||||
|
||||
**En cours**
|
||||
- Visual Workflow Builder (VWB) — bugs DB runtime connus
|
||||
- Self-healing / recovery global
|
||||
- Analytics / reporting
|
||||
- Worker de compilation sessions → ExecutionPlan
|
||||
- Tests E2E multi-applications
|
||||
|
||||
## Limitations connues
|
||||
|
||||
- Le pipeline de replay est validé sur un nombre très restreint d'applications.
|
||||
- `TargetMemoryStore` (apprentissage Phase 1) est câblé mais sa base reste
|
||||
vide tant qu'un replay complet n'a pas été cristallisé.
|
||||
- Certaines asymétries entre chemins stricts et legacy dans le serveur de
|
||||
streaming peuvent provoquer des arrêts au lieu de pauses d'apprentissage.
|
||||
- VWB n'est pas encore stable en écriture ; un outil dédié plus simple est
|
||||
envisagé.
|
||||
|
||||
## Démarrage
|
||||
|
||||
### Prérequis
|
||||
|
||||
- Python 3.10 à 3.12
|
||||
- [Ollama](https://ollama.ai) installé et démarré localement
|
||||
- Recommandé : GPU NVIDIA pour l'inférence VLM
|
||||
- Windows 10/11 uniquement pour le client Agent V1
|
||||
|
||||
### Installation
|
||||
|
||||
```bash
|
||||
# 1. Installer Ollama
|
||||
curl -fsSL https://ollama.ai/install.sh | sh # Linux
|
||||
# ou
|
||||
brew install ollama # macOS
|
||||
|
||||
# 2. Démarrer Ollama
|
||||
ollama serve
|
||||
|
||||
# 3. Télécharger le modèle VLM
|
||||
ollama pull qwen3-vl:8b
|
||||
|
||||
# 4. Installer dépendances Python
|
||||
# 1) Cloner puis créer le venv
|
||||
python3 -m venv .venv
|
||||
source .venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
|
||||
# 2) Démarrer Ollama et récupérer le modèle VLM par défaut
|
||||
ollama serve &
|
||||
ollama pull gemma4:latest # défaut du projet
|
||||
# Alternatives supportées :
|
||||
# ollama pull qwen3-vl:8b
|
||||
# ollama pull 0000/ui-tars-1.5-7b-q8_0:7b # grounder visuel
|
||||
|
||||
# 3) Copier et ajuster la configuration
|
||||
cp .env.example .env
|
||||
# éditer .env pour vérifier RPA_VLM_MODEL, VLM_ENDPOINT, ports, etc.
|
||||
```
|
||||
|
||||
### Test Rapide
|
||||
### Lancer les services
|
||||
|
||||
Tous les services sont pilotés par `svc.sh` (source de vérité des ports :
|
||||
`services.conf`).
|
||||
|
||||
```bash
|
||||
# Diagnostic système
|
||||
python3 rpa_vision_v3/examples/diagnostic_vlm.py
|
||||
|
||||
# Test de détection
|
||||
./rpa_vision_v3/test_quick.sh
|
||||
./svc.sh status # État de tous les services
|
||||
./svc.sh start # Tout démarrer
|
||||
./svc.sh start streaming # Streaming server uniquement (port 5005)
|
||||
./svc.sh restart api # Redémarrer l'API (port 8000)
|
||||
./svc.sh stop # Tout arrêter
|
||||
```
|
||||
|
||||
### Utilisation - Détection UI
|
||||
| Port | Service |
|
||||
|---|---|
|
||||
| 8000 | API Server (upload / traitement core) |
|
||||
| 5001 | Web Dashboard |
|
||||
| 5002 | VWB Backend (Flask) |
|
||||
| 5003 | Monitoring |
|
||||
| 5004 | Agent Chat |
|
||||
| 5005 | Streaming Server (Agent V1 → pipeline core) |
|
||||
| 5006 | Session Cleaner |
|
||||
| 5099 | Worker de compilation (optionnel) |
|
||||
| 3002 | VWB Frontend (Vite/React) |
|
||||
|
||||
```python
|
||||
from rpa_vision_v3.core.detection import create_detector
|
||||
### Client Windows (Agent V1)
|
||||
|
||||
# Créer le détecteur
|
||||
detector = create_detector()
|
||||
|
||||
# Détecter les éléments UI
|
||||
elements = detector.detect("screenshot.png")
|
||||
|
||||
# Utiliser les résultats
|
||||
for elem in elements:
|
||||
print(f"{elem.type:15s} | {elem.role:20s} | {elem.label}")
|
||||
```
|
||||
|
||||
### Utilisation - Workflow (Phase 4 - À venir)
|
||||
|
||||
```python
|
||||
from rpa_vision_v3.core.models import RawSession, ScreenState, Workflow
|
||||
from rpa_vision_v3.core.graph import GraphBuilder, NodeMatcher
|
||||
|
||||
# 1. Capturer une session
|
||||
session = RawSession(...)
|
||||
# ... capturer événements et screenshots
|
||||
|
||||
# 2. Construire workflow automatiquement
|
||||
builder = GraphBuilder(...)
|
||||
workflow = builder.build_from_session(session)
|
||||
|
||||
# 3. Matcher état actuel
|
||||
matcher = NodeMatcher(...)
|
||||
current_state = ScreenState(...)
|
||||
match = matcher.match(current_state, workflow)
|
||||
|
||||
# 4. Exécuter action
|
||||
if match:
|
||||
edge = workflow.get_outgoing_edges(match.node.node_id)[0]
|
||||
executor.execute_edge(edge, current_state)
|
||||
```
|
||||
|
||||
## 📚 Documentation
|
||||
|
||||
### Guides Principaux
|
||||
- **Quick Start** : `QUICK_START.md` - Démarrage rapide
|
||||
- **Prochaines Étapes** : `NEXT_STEPS.md` - Roadmap et Phase 4
|
||||
- **Phase 3 Complète** : `PHASE3_COMPLETE.md` - Résumé Phase 3
|
||||
|
||||
### Documentation Technique
|
||||
- **Spec complète** : `.kiro/specs/workflow-graph-implementation/`
|
||||
- **Architecture** : `docs/reference/ARCHITECTURE_VISION_COMPLETE.md`
|
||||
- **Détection Hybride** : `HYBRID_DETECTION_SUMMARY.md`
|
||||
- **Intégration Ollama** : `docs/OLLAMA_INTEGRATION.md`
|
||||
|
||||
## 🎓 Concepts Clés
|
||||
|
||||
### RPA 100% Vision
|
||||
|
||||
- ❌ Pas de coordonnées (x, y) fixes
|
||||
- ✅ Rôles sémantiques (primary_action, form_input, etc.)
|
||||
- ✅ Matching par similarité visuelle et textuelle
|
||||
- ✅ Robuste aux changements d'UI
|
||||
|
||||
### Apprentissage Progressif
|
||||
|
||||
```
|
||||
OBSERVATION (5+ exécutions)
|
||||
↓
|
||||
COACHING (10+ assistances, succès >90%)
|
||||
↓
|
||||
AUTO_CANDIDATE (20+ exécutions, succès >95%)
|
||||
↓
|
||||
AUTO_CONFIRMÉ (validation utilisateur)
|
||||
```
|
||||
|
||||
### State Embedding
|
||||
|
||||
Fusion multi-modale :
|
||||
- 50% Image (screenshot complet)
|
||||
- 30% Texte (texte détecté)
|
||||
- 10% Titre (fenêtre)
|
||||
- 10% UI (éléments détectés)
|
||||
|
||||
## 🧪 Tests
|
||||
Le client capture souris, clavier et écran sur le poste Windows et envoie
|
||||
les données au streaming server Linux.
|
||||
|
||||
```bash
|
||||
# Tests unitaires
|
||||
pytest tests/unit/
|
||||
|
||||
# Tests d'intégration
|
||||
pytest tests/integration/
|
||||
|
||||
# Tests de performance
|
||||
pytest tests/performance/ --benchmark-only
|
||||
# Build du package Windows depuis le repo Linux
|
||||
./deploy/build_package.sh
|
||||
# produit deploy/Lea_v<version>.zip
|
||||
```
|
||||
|
||||
## 📈 Roadmap - 77% Complété (10/13 Phases)
|
||||
Voir [`docs/DEV_SETUP.md`](docs/DEV_SETUP.md) pour la maintenance du dépôt
|
||||
(worktrees, build, services).
|
||||
|
||||
### ✅ **Phases Complétées**
|
||||
- [x] **Phase 1-2** : Fondations + Embeddings FAISS ✅
|
||||
- [x] **Phase 4-6** : Détection UI + Workflow Graphs + Action Execution ✅
|
||||
- [x] **Phase 7-8** : Learning System + Training System ✅
|
||||
- [x] **Phase 10-12** : GPU Management + Performance + Monitoring ✅
|
||||
## Arborescence du dépôt
|
||||
|
||||
### 🎯 **Phases Restantes**
|
||||
- [ ] **Phase 3** : Checkpoint Final (tests storage)
|
||||
- [ ] **Phase 9** : Visual Workflow Builder (90% → 100%)
|
||||
- [ ] **Phase 13** : Tests End-to-End + Documentation finale
|
||||
```
|
||||
rpa_vision_v3/
|
||||
├── agent_v0/ # Agent V1 (client Windows) + serveur de streaming
|
||||
│ ├── agent_v1/ # Source de l'agent (capture, UI tray, exécution)
|
||||
│ └── server_v1/ # FastAPI streaming + processeurs
|
||||
├── core/ # Pipeline core
|
||||
│ ├── detection/ # Cascade VLM + OCR + templates
|
||||
│ ├── embedding/ # CLIP + FAISS
|
||||
│ ├── graph/ # Construction / matching de workflow graphs
|
||||
│ ├── execution/ # Résolution de cibles, actions LLM
|
||||
│ ├── learning/ # TargetMemoryStore (apprentissage)
|
||||
│ ├── auth/ # Vault Fernet + TOTP
|
||||
│ └── federation/ # Export/import de LearningPacks
|
||||
├── visual_workflow_builder/ # VWB (backend Flask + frontend React Vite)
|
||||
├── web_dashboard/ # Dashboard Flask + SocketIO
|
||||
├── agent_chat/ # Interface conversationnelle + planner
|
||||
├── deploy/ # Scripts de build et unités systemd
|
||||
├── data/ # Sessions, embeddings, index FAISS, apprentissage
|
||||
├── docs/ # Documentation technique
|
||||
├── tests/ # pytest (unit, integration, e2e)
|
||||
├── services.conf # Source de vérité des ports
|
||||
├── svc.sh # Orchestrateur des services
|
||||
└── run.sh # Démarrage tout-en-un (legacy, préférer svc.sh)
|
||||
```
|
||||
|
||||
### 🚀 **Composants Production-Ready**
|
||||
- **Agent V0** : Capture cross-platform + Encryption ✅
|
||||
- **Server API** : Processing pipeline + Web dashboard ✅
|
||||
- **Analytics System** : Monitoring + Insights + Reporting ✅
|
||||
- **Self-Healing** : Automatic adaptation + Recovery ✅
|
||||
## Tests
|
||||
|
||||
## 🤝 Contribution
|
||||
```bash
|
||||
source .venv/bin/activate
|
||||
|
||||
Voir `.kiro/specs/workflow-graph-implementation/tasks.md` pour les tâches en cours.
|
||||
# Tests rapides (hors marqueur slow)
|
||||
pytest -m "not slow" -q
|
||||
|
||||
## 📄 Licence
|
||||
# Tests d'intégration (streaming, pipeline)
|
||||
pytest tests/integration/ -q
|
||||
|
||||
Propriétaire - Tous droits réservés
|
||||
# Tests E2E
|
||||
pytest tests/test_pipeline_e2e.py -q
|
||||
```
|
||||
|
||||
Quelques tests legacy sont connus comme cassés — voir la mémoire projet et
|
||||
`docs/` pour la liste.
|
||||
|
||||
## Documentation
|
||||
|
||||
- [`docs/STATUS.md`](docs/STATUS.md) — état réel par module
|
||||
- [`docs/DEV_SETUP.md`](docs/DEV_SETUP.md) — tâches d'administration (worktrees, build)
|
||||
- [`docs/EXECUTION_LOOP_FLAGS.md`](docs/EXECUTION_LOOP_FLAGS.md) — flags C1 vision-aware (`enable_ui_detection`, `enable_ocr`, `analyze_timeout_ms`, `window_info_provider`)
|
||||
- [`docs/VISION_RPA_INTELLIGENT.md`](docs/VISION_RPA_INTELLIGENT.md) — cahier des charges
|
||||
- [`docs/PLAN_ACTEUR_V1.md`](docs/PLAN_ACTEUR_V1.md) — architecture 3 niveaux (Macro / Méso / Micro)
|
||||
- [`docs/CONFORMITE_AI_ACT.md`](docs/CONFORMITE_AI_ACT.md) — journalisation, floutage, rétention
|
||||
|
||||
## Concepts clés
|
||||
|
||||
- **RPA 100 % vision** : pas de coordonnées fixes ; l'agent localise un
|
||||
élément par ce qu'il voit (label + contexte visuel), pas par `x,y`.
|
||||
- **Apprentissage progressif** : mode shadow → assisté → autonome, validé
|
||||
par supervision humaine sur les échecs.
|
||||
- **LLM 100 % local** : Ollama sur la machine. Aucun appel cloud dans le
|
||||
pipeline par défaut (cf. feedback projet `feedback_local_only.md`).
|
||||
|
||||
## Licence
|
||||
|
||||
Propriétaire — tous droits réservés.
|
||||
|
||||
@@ -147,8 +147,10 @@ class AutonomousPlanner:
|
||||
"""Initialise le client VLM pour analyse intelligente."""
|
||||
if VLM_AVAILABLE and OllamaClient:
|
||||
try:
|
||||
self._vlm_client = OllamaClient(model="qwen2.5vl:7b")
|
||||
logger.info("VLM client initialized (qwen2.5vl:7b)")
|
||||
from core.detection.vlm_config import get_vlm_model
|
||||
_planner_vlm = get_vlm_model()
|
||||
self._vlm_client = OllamaClient(model=_planner_vlm)
|
||||
logger.info("VLM client initialized (%s)", _planner_vlm)
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not initialize VLM client: {e}")
|
||||
self._vlm_client = None
|
||||
|
||||
2
agent_rust/.gitignore
vendored
2
agent_rust/.gitignore
vendored
@@ -1,2 +0,0 @@
|
||||
/target
|
||||
Cargo.lock
|
||||
@@ -1,85 +0,0 @@
|
||||
[package]
|
||||
name = "rpa-agent"
|
||||
version = "0.2.0"
|
||||
edition = "2021"
|
||||
description = "Agent RPA Vision - Lea (Phases 1-5)"
|
||||
|
||||
[dependencies]
|
||||
# Capture d'ecran
|
||||
xcap = "0.7"
|
||||
|
||||
# Simulation souris/clavier (replay)
|
||||
enigo = { version = "0.3", features = ["serde"] }
|
||||
|
||||
# Capture evenements souris/clavier (recording) — Phase 5
|
||||
rdev = "0.5"
|
||||
|
||||
# Client HTTP (mode bloquant, pas de tokio)
|
||||
reqwest = { version = "0.12", features = ["blocking", "multipart", "json"] }
|
||||
|
||||
# Traitement d'images (JPEG encode, resize, crop)
|
||||
image = "0.25"
|
||||
|
||||
# Floutage zones sensibles — Phase 5
|
||||
imageproc = "0.25"
|
||||
|
||||
# Encodage base64
|
||||
base64 = "0.22"
|
||||
|
||||
# Serialisation JSON
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
|
||||
# Mini serveur HTTP synchrone (port 5006)
|
||||
tiny_http = "0.12"
|
||||
|
||||
# Hostname de la machine
|
||||
hostname = "0.4"
|
||||
|
||||
# Date/heure
|
||||
chrono = "0.4"
|
||||
|
||||
# Canaux inter-threads performants
|
||||
crossbeam-channel = "0.5"
|
||||
|
||||
# Logging
|
||||
log = "0.4"
|
||||
env_logger = "0.11"
|
||||
|
||||
# Signal handling Unix (Ctrl+C)
|
||||
[target.'cfg(unix)'.dependencies]
|
||||
libc = "0.2"
|
||||
|
||||
# Dependances Windows uniquement — Phases 3-5
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
# Systray — Phase 3
|
||||
tray-icon = "0.19"
|
||||
muda = "0.15"
|
||||
|
||||
# Boucle d'evenements — Phase 3
|
||||
winit = { version = "0.30", features = ["rwh_06"] }
|
||||
|
||||
# Notifications toast — Phase 3
|
||||
winrt-notification = "0.5"
|
||||
|
||||
# Chat WebView2 — Phase 4
|
||||
wry = "0.48"
|
||||
|
||||
# Raw window handle pour wry + fenetre native
|
||||
raw-window-handle = "0.6"
|
||||
|
||||
# Win32 API (info fenetre, dialogues, etc.)
|
||||
windows-sys = { version = "0.59", features = [
|
||||
"Win32_UI_WindowsAndMessaging",
|
||||
"Win32_System_Threading",
|
||||
"Win32_System_LibraryLoader",
|
||||
"Win32_Foundation",
|
||||
"Win32_Graphics_Gdi",
|
||||
] }
|
||||
|
||||
[profile.release]
|
||||
opt-level = "z"
|
||||
lto = true
|
||||
strip = true
|
||||
codegen-units = 1
|
||||
panic = "abort"
|
||||
@@ -1,34 +0,0 @@
|
||||
╔══════════════════════════════════════════╗
|
||||
║ Léa — Assistante IA ║
|
||||
║ Automatisation de tâches ║
|
||||
╚══════════════════════════════════════════╝
|
||||
|
||||
INSTALLATION
|
||||
────────────
|
||||
1. Copiez le dossier "Lea" sur votre Bureau
|
||||
2. Double-cliquez sur "Lea.exe" pour démarrer
|
||||
|
||||
PREMIÈRE UTILISATION
|
||||
────────────────────
|
||||
• Léa s'ouvre automatiquement dans votre navigateur
|
||||
• Cliquez "Apprenez-moi une tâche" pour commencer
|
||||
• Effectuez votre tâche normalement
|
||||
• Cliquez "C'est terminé" quand vous avez fini
|
||||
• Léa a appris ! Demandez-lui de refaire la tâche
|
||||
|
||||
ARRÊTER LÉA
|
||||
────────────
|
||||
• Fermez la fenêtre Léa dans la barre des tâches
|
||||
• Ou appuyez Ctrl+C dans le terminal
|
||||
|
||||
BESOIN D'AIDE ?
|
||||
───────────────
|
||||
Contactez le support : [à compléter]
|
||||
|
||||
────────────────────────────────────────────
|
||||
⚠ Cet outil utilise l'intelligence artificielle.
|
||||
Article 50 du Règlement européen sur l'IA.
|
||||
Vos données restent sur votre ordinateur et notre
|
||||
serveur sécurisé. Aucune donnée n'est partagée
|
||||
avec des tiers.
|
||||
────────────────────────────────────────────
|
||||
@@ -1,101 +0,0 @@
|
||||
# RPA Vision Agent (Rust) — Phases 1-5
|
||||
|
||||
Agent complet pour RPA Vision V3, ecrit en Rust.
|
||||
Parite fonctionnelle avec l'agent Python (`agent_v0/agent_v1/`) en un seul executable de 2.4 Mo.
|
||||
|
||||
## Fonctionnalites
|
||||
|
||||
### Phase 1 — Agent minimal (headless)
|
||||
- **Heartbeat** : capture ecran toutes les 5s, JPEG, dedup par hash perceptuel
|
||||
- **Replay** : poll serveur, execute actions (click, type, key_combo, scroll, wait)
|
||||
- **Resolution visuelle** : resolution de cibles via le serveur (template matching)
|
||||
- **Serveur de capture** : port 5006 (GET /capture, GET /health, POST /file-action)
|
||||
|
||||
### Phase 3 — Systray + Notifications
|
||||
- **Systray** : icone avec cercle colore (gris=idle, rouge=enregistrement, vert=connecte, bleu=replay)
|
||||
- **Menu contextuel** : Machine ID, statut, Apprenez-moi, C'est termine, Mes taches, ARRET D'URGENCE, Chat, Fichiers, Quitter
|
||||
- **Notifications toast** : via winrt-notification (bienvenue, session, replay, connexion)
|
||||
- **Etat partage** : thread-safe via AtomicBool + Mutex
|
||||
|
||||
### Phase 4 — Chat WebView2
|
||||
- **WebView2** : fenetre 520x720, charge http://{server}:5004/chat
|
||||
- **Positionnement** : bas-droite pres du systray
|
||||
- **Fallback** : HTML embarque si le serveur est indisponible
|
||||
- **Toggle** : show/hide via menu systray
|
||||
|
||||
### Phase 5 — Parite complete
|
||||
- **Enregistrement** : capture evenements souris/clavier via rdev, envoi au serveur
|
||||
- **Floutage** : detection de champs de saisie + blur gaussien (protection donnees sensibles)
|
||||
- **Configuration** : BLUR_SENSITIVE, LOG_RETENTION_DAYS, CHAT_PORT
|
||||
- **Health check** : verification connexion serveur toutes les 30s
|
||||
|
||||
## Build
|
||||
|
||||
### Linux (pour tests)
|
||||
|
||||
```bash
|
||||
sudo apt install libpipewire-0.3-dev libclang-dev libgbm-dev libxdo-dev
|
||||
cargo build --release
|
||||
```
|
||||
|
||||
### Cross-compilation vers Windows
|
||||
|
||||
```bash
|
||||
rustup target add x86_64-pc-windows-gnu
|
||||
sudo apt install gcc-mingw-w64-x86-64
|
||||
cargo build --release --target x86_64-pc-windows-gnu
|
||||
```
|
||||
|
||||
### Deploiement sur le PC cible
|
||||
|
||||
```bash
|
||||
sshpass -p 'loli' scp -o StrictHostKeyChecking=no \
|
||||
target/x86_64-pc-windows-gnu/release/rpa-agent.exe \
|
||||
dom@192.168.1.11:"C:\\rpa_vision\\rpa-agent.exe"
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
| Variable | Defaut | Description |
|
||||
|---|---|---|
|
||||
| `RPA_SERVER_URL` | `http://localhost:5005/api/v1` | URL du serveur streaming |
|
||||
| `RPA_MACHINE_ID` | `{hostname}_{os}` | Identifiant de la machine |
|
||||
| `RPA_CAPTURE_PORT` | `5006` | Port du serveur de capture |
|
||||
| `RPA_HEARTBEAT_INTERVAL` | `5` | Intervalle heartbeat (secondes) |
|
||||
| `RPA_JPEG_QUALITY` | `85` | Qualite JPEG (1-100) |
|
||||
| `RPA_BLUR_SENSITIVE` | `true` | Flouter les zones sensibles |
|
||||
| `RPA_LOG_RETENTION_DAYS` | `180` | Retention des logs (jours) |
|
||||
| `RPA_CHAT_PORT` | `5004` | Port du serveur de chat |
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
src/
|
||||
├── main.rs — Orchestrateur, 7 threads (heartbeat, replay, serveur, health, recorder, chat, tray)
|
||||
├── config.rs — Configuration (env vars + defauts)
|
||||
├── state.rs — Etat partage thread-safe (AtomicBool, Mutex)
|
||||
├── capture.rs — Capture ecran (xcap), JPEG, hash perceptuel
|
||||
├── network.rs — Client HTTP (heartbeat, poll replay, rapport resultat)
|
||||
├── replay.rs — Boucle de polling replay avec notifications
|
||||
├── executor.rs — Execution actions (click, type, key_combo, scroll, wait)
|
||||
├── visual.rs — Resolution visuelle des cibles via le serveur
|
||||
├── server.rs — Mini serveur HTTP port 5006 (/capture, /health, /file-action)
|
||||
├── tray.rs — Icone systray + menu contextuel (tray-icon, winit)
|
||||
├── notifications.rs — Notifications toast Windows (winrt-notification)
|
||||
├── chat.rs — Fenetre de chat WebView2 (wry)
|
||||
├── recorder.rs — Capture evenements souris/clavier (rdev)
|
||||
└── blur.rs — Floutage zones sensibles (detection + box blur)
|
||||
```
|
||||
|
||||
## Taille du binaire
|
||||
|
||||
| Configuration | Taille |
|
||||
|---|---|
|
||||
| Release (LTO + strip + opt-level z) | **2.4 Mo** |
|
||||
| Python equivalent (venv + packages) | ~200 Mo |
|
||||
|
||||
## Compatibilite
|
||||
|
||||
- **OS** : Windows 10/11 (systray, notifications, chat WebView2)
|
||||
- **Fallback Linux** : mode console (heartbeat, replay, serveur)
|
||||
- **Serveur** : compatible api_stream.py (port 5005)
|
||||
@@ -1,22 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Build du kit démo pour Windows
|
||||
set -e
|
||||
|
||||
echo "=== Build Léa pour Windows ==="
|
||||
cargo build --release --target x86_64-pc-windows-gnu
|
||||
|
||||
# Préparer le dossier de démo
|
||||
DEMO_DIR="demo_kit/Lea"
|
||||
rm -rf demo_kit
|
||||
mkdir -p "$DEMO_DIR"
|
||||
|
||||
# Copier les fichiers
|
||||
cp target/x86_64-pc-windows-gnu/release/rpa-agent.exe "$DEMO_DIR/Lea.exe"
|
||||
cp config.txt "$DEMO_DIR/config.txt"
|
||||
cp LISEZMOI.txt "$DEMO_DIR/LISEZMOI.txt"
|
||||
|
||||
echo ""
|
||||
echo "=== Kit démo prêt dans demo_kit/Lea/ ==="
|
||||
ls -lh "$DEMO_DIR/"
|
||||
echo ""
|
||||
echo "Copiez le dossier Lea/ sur le PC du docteur."
|
||||
@@ -1,12 +0,0 @@
|
||||
# === Configuration Léa ===
|
||||
# Adresse du serveur (ne pas modifier sauf instruction)
|
||||
RPA_SERVER_URL=https://lea.labs.laurinebazin.design/api/v1
|
||||
|
||||
# Clé d'accès (ne pas modifier)
|
||||
RPA_API_TOKEN=86031addb338e449fccdb1a983f61807aec15d42d482b9c7748ad607dc23caab
|
||||
|
||||
# Qualité des captures (1-100, défaut: 85)
|
||||
RPA_JPEG_QUALITY=85
|
||||
|
||||
# Floutage des données sensibles (true/false)
|
||||
RPA_BLUR_SENSITIVE=true
|
||||
3
agent_rust/lea_uia/.gitignore
vendored
Normal file
3
agent_rust/lea_uia/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
target/
|
||||
**/target/
|
||||
|
||||
384
agent_rust/lea_uia/Cargo.lock
generated
Normal file
384
agent_rust/lea_uia/Cargo.lock
generated
Normal file
@@ -0,0 +1,384 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "anstream"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"anstyle-parse",
|
||||
"anstyle-query",
|
||||
"anstyle-wincon",
|
||||
"colorchoice",
|
||||
"is_terminal_polyfill",
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle"
|
||||
version = "1.0.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000"
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-parse"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e"
|
||||
dependencies = [
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-query"
|
||||
version = "1.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
|
||||
dependencies = [
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-wincon"
|
||||
version = "3.0.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"once_cell_polyfill",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "4.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351"
|
||||
dependencies = [
|
||||
"clap_builder",
|
||||
"clap_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_builder"
|
||||
version = "4.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anstyle",
|
||||
"clap_lex",
|
||||
"strsim",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_derive"
|
||||
version = "4.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1110bd8a634a1ab8cb04345d8d878267d57c3cf1b38d91b71af6686408bbca6a"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_lex"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9"
|
||||
|
||||
[[package]]
|
||||
name = "colorchoice"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570"
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||
|
||||
[[package]]
|
||||
name = "is_terminal_polyfill"
|
||||
version = "1.70.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
|
||||
|
||||
[[package]]
|
||||
name = "lea_uia"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"windows",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
|
||||
|
||||
[[package]]
|
||||
name = "once_cell_polyfill"
|
||||
version = "1.70.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.106"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.45"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.228"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
|
||||
dependencies = [
|
||||
"serde_core",
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_core"
|
||||
version = "1.0.228"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.228"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.149"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"memchr",
|
||||
"serde",
|
||||
"serde_core",
|
||||
"zmij",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.117"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
||||
|
||||
[[package]]
|
||||
name = "utf8parse"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
||||
|
||||
[[package]]
|
||||
name = "windows"
|
||||
version = "0.59.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f919aee0a93304be7f62e8e5027811bbba96bcb1de84d6618be56e43f8a32a1"
|
||||
dependencies = [
|
||||
"windows-core",
|
||||
"windows-targets",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-core"
|
||||
version = "0.59.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "810ce18ed2112484b0d4e15d022e5f598113e220c53e373fb31e67e21670c1ce"
|
||||
dependencies = [
|
||||
"windows-implement",
|
||||
"windows-interface",
|
||||
"windows-result",
|
||||
"windows-strings",
|
||||
"windows-targets",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-implement"
|
||||
version = "0.59.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "83577b051e2f49a058c308f17f273b570a6a758386fc291b5f6a934dd84e48c1"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-interface"
|
||||
version = "0.59.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-link"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
|
||||
|
||||
[[package]]
|
||||
name = "windows-link"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
||||
|
||||
[[package]]
|
||||
name = "windows-result"
|
||||
version = "0.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6"
|
||||
dependencies = [
|
||||
"windows-link 0.1.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-strings"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "87fa48cc5d406560701792be122a10132491cff9d0aeb23583cc2dcafc847319"
|
||||
dependencies = [
|
||||
"windows-link 0.1.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.61.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
|
||||
dependencies = [
|
||||
"windows-link 0.2.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.53.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
|
||||
dependencies = [
|
||||
"windows-link 0.2.1",
|
||||
"windows_aarch64_gnullvm",
|
||||
"windows_aarch64_msvc",
|
||||
"windows_i686_gnu",
|
||||
"windows_i686_gnullvm",
|
||||
"windows_i686_msvc",
|
||||
"windows_x86_64_gnu",
|
||||
"windows_x86_64_gnullvm",
|
||||
"windows_x86_64_msvc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnullvm"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
|
||||
|
||||
[[package]]
|
||||
name = "zmij"
|
||||
version = "1.0.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
|
||||
34
agent_rust/lea_uia/Cargo.toml
Normal file
34
agent_rust/lea_uia/Cargo.toml
Normal file
@@ -0,0 +1,34 @@
|
||||
[package]
|
||||
name = "lea_uia"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
authors = ["Dom <dom@rpa-vision-v3>"]
|
||||
description = "Helper Windows UI Automation pour Léa (agent RPA V3)"
|
||||
license = "Proprietary"
|
||||
|
||||
[[bin]]
|
||||
name = "lea_uia"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
clap = { version = "4.5", features = ["derive"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
windows = { version = "0.59", features = [
|
||||
"Win32_Foundation",
|
||||
"Win32_System_Com",
|
||||
"Win32_System_Ole",
|
||||
"Win32_System_Variant",
|
||||
"Win32_UI_Accessibility",
|
||||
"Win32_UI_WindowsAndMessaging",
|
||||
"Win32_Graphics_Gdi",
|
||||
] }
|
||||
|
||||
[profile.release]
|
||||
opt-level = "z" # Taille minimale
|
||||
lto = true # Link-time optimization
|
||||
codegen-units = 1 # Meilleure optimisation
|
||||
strip = true # Retirer les symboles
|
||||
panic = "abort" # Pas d'unwinding → binaire plus petit
|
||||
564
agent_rust/lea_uia/src/main.rs
Normal file
564
agent_rust/lea_uia/src/main.rs
Normal file
@@ -0,0 +1,564 @@
|
||||
// lea_uia — Helper Windows UI Automation pour Léa
|
||||
//
|
||||
// Binaire standalone qui expose 3 commandes UIA :
|
||||
// query → retourne l'élément UIA à une position (x, y)
|
||||
// find → retrouve un élément par son chemin logique
|
||||
// capture → liste les éléments visibles (debug)
|
||||
//
|
||||
// Communication avec l'agent Python via stdin/stdout JSON.
|
||||
// Tous les appels sont non-bloquants et retournent du JSON structuré.
|
||||
//
|
||||
// Sur Linux (développement) : retourne des stubs d'erreur.
|
||||
// Sur Windows : utilise UIAutomationCore via `windows-rs`.
|
||||
|
||||
use clap::{Parser, Subcommand};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(name = "lea_uia")]
|
||||
#[command(about = "Helper UI Automation pour Léa", long_about = None)]
|
||||
#[command(version)]
|
||||
struct Cli {
|
||||
#[command(subcommand)]
|
||||
command: Commands,
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
enum Commands {
|
||||
/// Retourner l'élément UIA à une position donnée (x, y en pixels écran)
|
||||
Query {
|
||||
/// Coordonnée X (pixels)
|
||||
#[arg(long)]
|
||||
x: i32,
|
||||
/// Coordonnée Y (pixels)
|
||||
#[arg(long)]
|
||||
y: i32,
|
||||
/// Inclure la hiérarchie des parents (peut être lent)
|
||||
#[arg(long, default_value_t = true)]
|
||||
with_parents: bool,
|
||||
},
|
||||
/// Rechercher un élément par son chemin logique ou son nom
|
||||
Find {
|
||||
/// Nom de l'élément (Name property)
|
||||
#[arg(long)]
|
||||
name: Option<String>,
|
||||
/// Type de contrôle (Button, Edit, MenuItem, etc.)
|
||||
#[arg(long)]
|
||||
control_type: Option<String>,
|
||||
/// AutomationId
|
||||
#[arg(long)]
|
||||
automation_id: Option<String>,
|
||||
/// Limite la recherche à cette fenêtre (titre exact)
|
||||
#[arg(long)]
|
||||
window: Option<String>,
|
||||
/// Timeout en millisecondes
|
||||
#[arg(long, default_value_t = 2000)]
|
||||
timeout_ms: u32,
|
||||
},
|
||||
/// Lister tous les éléments visibles de la fenêtre active (debug)
|
||||
Capture {
|
||||
/// Profondeur maximale de l'arbre
|
||||
#[arg(long, default_value_t = 3)]
|
||||
max_depth: u32,
|
||||
},
|
||||
/// Vérifier que UIA est disponible et fonctionnel
|
||||
Health,
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Modèles de sortie JSON
|
||||
// =========================================================================
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
struct UiaElement {
|
||||
/// Nom visible de l'élément
|
||||
name: String,
|
||||
/// Type de contrôle (Button, Edit, MenuItem, Window, ...)
|
||||
control_type: String,
|
||||
/// Classe Windows (Edit, Static, #32770, ...)
|
||||
class_name: String,
|
||||
/// AutomationId (ID interne, parfois vide)
|
||||
automation_id: String,
|
||||
/// Rectangle absolu [x1, y1, x2, y2] en pixels écran
|
||||
bounding_rect: [i32; 4],
|
||||
/// Est-ce que l'élément est activable
|
||||
is_enabled: bool,
|
||||
/// Est-ce que l'élément est visible
|
||||
is_offscreen: bool,
|
||||
/// Hiérarchie des parents (chemin logique)
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
parent_path: Vec<ParentHint>,
|
||||
/// Process owning this element
|
||||
#[serde(skip_serializing_if = "String::is_empty")]
|
||||
process_name: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
struct ParentHint {
|
||||
name: String,
|
||||
control_type: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(tag = "status")]
|
||||
enum UiaResponse {
|
||||
#[serde(rename = "ok")]
|
||||
Ok {
|
||||
element: Option<UiaElement>,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
elements: Vec<UiaElement>,
|
||||
elapsed_ms: u64,
|
||||
},
|
||||
#[serde(rename = "not_found")]
|
||||
NotFound {
|
||||
reason: String,
|
||||
elapsed_ms: u64,
|
||||
},
|
||||
#[serde(rename = "error")]
|
||||
Error {
|
||||
message: String,
|
||||
code: String,
|
||||
},
|
||||
#[serde(rename = "unavailable")]
|
||||
Unavailable {
|
||||
reason: String,
|
||||
},
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Implémentation Windows
|
||||
// =========================================================================
|
||||
|
||||
#[cfg(windows)]
|
||||
mod uia_impl {
|
||||
use super::*;
|
||||
use std::time::Instant;
|
||||
use windows::Win32::Foundation::POINT;
|
||||
use windows::Win32::System::Com::{
|
||||
CoCreateInstance, CoInitializeEx, CoUninitialize, CLSCTX_INPROC_SERVER,
|
||||
COINIT_APARTMENTTHREADED,
|
||||
};
|
||||
use windows::Win32::UI::Accessibility::{
|
||||
CUIAutomation, IUIAutomation, IUIAutomationElement, IUIAutomationTreeWalker,
|
||||
};
|
||||
|
||||
struct ComGuard;
|
||||
impl ComGuard {
|
||||
fn new() -> windows::core::Result<Self> {
|
||||
unsafe {
|
||||
let hr = CoInitializeEx(None, COINIT_APARTMENTTHREADED);
|
||||
if hr.is_err() {
|
||||
// RPC_E_CHANGED_MODE : le thread est déjà initialisé → OK
|
||||
let code = hr.0 as u32;
|
||||
if code != 0x80010106 {
|
||||
return Err(windows::core::Error::from(hr));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(Self)
|
||||
}
|
||||
}
|
||||
impl Drop for ComGuard {
|
||||
fn drop(&mut self) {
|
||||
unsafe { CoUninitialize() };
|
||||
}
|
||||
}
|
||||
|
||||
fn get_automation() -> windows::core::Result<IUIAutomation> {
|
||||
unsafe { CoCreateInstance(&CUIAutomation, None, CLSCTX_INPROC_SERVER) }
|
||||
}
|
||||
|
||||
fn element_to_struct(
|
||||
element: &IUIAutomationElement,
|
||||
with_parents: bool,
|
||||
) -> windows::core::Result<UiaElement> {
|
||||
let mut result = UiaElement {
|
||||
name: String::new(),
|
||||
control_type: String::new(),
|
||||
class_name: String::new(),
|
||||
automation_id: String::new(),
|
||||
bounding_rect: [0, 0, 0, 0],
|
||||
is_enabled: false,
|
||||
is_offscreen: true,
|
||||
parent_path: Vec::new(),
|
||||
process_name: String::new(),
|
||||
};
|
||||
|
||||
unsafe {
|
||||
if let Ok(name) = element.CurrentName() {
|
||||
result.name = name.to_string();
|
||||
}
|
||||
if let Ok(ct) = element.CurrentLocalizedControlType() {
|
||||
result.control_type = ct.to_string();
|
||||
}
|
||||
if let Ok(cn) = element.CurrentClassName() {
|
||||
result.class_name = cn.to_string();
|
||||
}
|
||||
if let Ok(aid) = element.CurrentAutomationId() {
|
||||
result.automation_id = aid.to_string();
|
||||
}
|
||||
if let Ok(rect) = element.CurrentBoundingRectangle() {
|
||||
result.bounding_rect = [rect.left, rect.top, rect.right, rect.bottom];
|
||||
}
|
||||
if let Ok(enabled) = element.CurrentIsEnabled() {
|
||||
result.is_enabled = enabled.as_bool();
|
||||
}
|
||||
if let Ok(offscreen) = element.CurrentIsOffscreen() {
|
||||
result.is_offscreen = offscreen.as_bool();
|
||||
}
|
||||
if with_parents {
|
||||
// Remonter la hiérarchie jusqu'à la Window root
|
||||
if let Ok(automation) = get_automation() {
|
||||
let walker = automation.ControlViewWalker();
|
||||
if let Ok(walker) = walker {
|
||||
let mut current = element.clone();
|
||||
for _ in 0..10 {
|
||||
match walker.GetParentElement(¤t) {
|
||||
Ok(parent) => {
|
||||
let name = parent
|
||||
.CurrentName()
|
||||
.map(|n| n.to_string())
|
||||
.unwrap_or_default();
|
||||
let ct = parent
|
||||
.CurrentLocalizedControlType()
|
||||
.map(|c| c.to_string())
|
||||
.unwrap_or_default();
|
||||
if name.is_empty() && ct.is_empty() {
|
||||
break;
|
||||
}
|
||||
result.parent_path.insert(
|
||||
0,
|
||||
ParentHint {
|
||||
name,
|
||||
control_type: ct,
|
||||
},
|
||||
);
|
||||
current = parent;
|
||||
}
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn query_at_point(x: i32, y: i32, with_parents: bool) -> UiaResponse {
|
||||
let start = Instant::now();
|
||||
let _com = match ComGuard::new() {
|
||||
Ok(g) => g,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("CoInitializeEx: {}", e),
|
||||
code: "com_init_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let automation = match get_automation() {
|
||||
Ok(a) => a,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("CUIAutomation: {}", e),
|
||||
code: "automation_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let point = POINT { x, y };
|
||||
let element = unsafe { automation.ElementFromPoint(point) };
|
||||
match element {
|
||||
Ok(el) => match element_to_struct(&el, with_parents) {
|
||||
Ok(e) => UiaResponse::Ok {
|
||||
element: Some(e),
|
||||
elements: Vec::new(),
|
||||
elapsed_ms: start.elapsed().as_millis() as u64,
|
||||
},
|
||||
Err(e) => UiaResponse::Error {
|
||||
message: format!("element_to_struct: {}", e),
|
||||
code: "extract_failed".into(),
|
||||
},
|
||||
},
|
||||
Err(_) => UiaResponse::NotFound {
|
||||
reason: format!("Aucun élément UIA à ({}, {})", x, y),
|
||||
elapsed_ms: start.elapsed().as_millis() as u64,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn find_element(
|
||||
name: Option<String>,
|
||||
_control_type: Option<String>,
|
||||
_automation_id: Option<String>,
|
||||
_window: Option<String>,
|
||||
_timeout_ms: u32,
|
||||
) -> UiaResponse {
|
||||
let start = Instant::now();
|
||||
let _com = match ComGuard::new() {
|
||||
Ok(g) => g,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("CoInitializeEx: {}", e),
|
||||
code: "com_init_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let automation = match get_automation() {
|
||||
Ok(a) => a,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("CUIAutomation: {}", e),
|
||||
code: "automation_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let root = match unsafe { automation.GetRootElement() } {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("GetRootElement: {}", e),
|
||||
code: "root_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Recherche simple par parcours d'arbre (MVP)
|
||||
// L'arbre UIA peut être énorme → on limite la profondeur
|
||||
if let Some(target_name) = name {
|
||||
let walker = unsafe { automation.ControlViewWalker() };
|
||||
if let Ok(walker) = walker {
|
||||
if let Some(found) =
|
||||
walk_and_find(&walker, &root, &target_name, 0, 6, &_control_type, &_automation_id)
|
||||
{
|
||||
match element_to_struct(&found, true) {
|
||||
Ok(e) => {
|
||||
return UiaResponse::Ok {
|
||||
element: Some(e),
|
||||
elements: Vec::new(),
|
||||
elapsed_ms: start.elapsed().as_millis() as u64,
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("element_to_struct: {}", e),
|
||||
code: "extract_failed".into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UiaResponse::NotFound {
|
||||
reason: "Aucun élément trouvé".into(),
|
||||
elapsed_ms: start.elapsed().as_millis() as u64,
|
||||
}
|
||||
}
|
||||
|
||||
/// Parcours récursif de l'arbre UIA pour trouver un élément par nom
|
||||
fn walk_and_find(
|
||||
walker: &IUIAutomationTreeWalker,
|
||||
element: &IUIAutomationElement,
|
||||
target_name: &str,
|
||||
depth: u32,
|
||||
max_depth: u32,
|
||||
target_control_type: &Option<String>,
|
||||
target_automation_id: &Option<String>,
|
||||
) -> Option<IUIAutomationElement> {
|
||||
if depth > max_depth {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Tester l'élément courant
|
||||
unsafe {
|
||||
if let Ok(name) = element.CurrentName() {
|
||||
if name.to_string() == target_name {
|
||||
// Vérifier les filtres additionnels
|
||||
let mut matches = true;
|
||||
if let Some(ct) = target_control_type {
|
||||
if let Ok(local_ct) = element.CurrentLocalizedControlType() {
|
||||
if !local_ct.to_string().to_lowercase().contains(&ct.to_lowercase()) {
|
||||
matches = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
if matches {
|
||||
if let Some(aid) = target_automation_id {
|
||||
if let Ok(local_aid) = element.CurrentAutomationId() {
|
||||
if local_aid.to_string() != *aid {
|
||||
matches = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if matches {
|
||||
return Some(element.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Parcourir les enfants
|
||||
if let Ok(first_child) = walker.GetFirstChildElement(element) {
|
||||
let mut current = first_child;
|
||||
loop {
|
||||
if let Some(found) = walk_and_find(
|
||||
walker,
|
||||
¤t,
|
||||
target_name,
|
||||
depth + 1,
|
||||
max_depth,
|
||||
target_control_type,
|
||||
target_automation_id,
|
||||
) {
|
||||
return Some(found);
|
||||
}
|
||||
match walker.GetNextSiblingElement(¤t) {
|
||||
Ok(next) => current = next,
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub fn capture_tree(_max_depth: u32) -> UiaResponse {
|
||||
let start = Instant::now();
|
||||
let _com = match ComGuard::new() {
|
||||
Ok(g) => g,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("CoInitializeEx: {}", e),
|
||||
code: "com_init_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let automation = match get_automation() {
|
||||
Ok(a) => a,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("CUIAutomation: {}", e),
|
||||
code: "automation_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let focused = unsafe { automation.GetFocusedElement() };
|
||||
match focused {
|
||||
Ok(el) => match element_to_struct(&el, true) {
|
||||
Ok(e) => UiaResponse::Ok {
|
||||
element: Some(e),
|
||||
elements: Vec::new(),
|
||||
elapsed_ms: start.elapsed().as_millis() as u64,
|
||||
},
|
||||
Err(e) => UiaResponse::Error {
|
||||
message: format!("element_to_struct: {}", e),
|
||||
code: "extract_failed".into(),
|
||||
},
|
||||
},
|
||||
Err(e) => UiaResponse::Error {
|
||||
message: format!("GetFocusedElement: {}", e),
|
||||
code: "focused_failed".into(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn health_check() -> UiaResponse {
|
||||
let _com = match ComGuard::new() {
|
||||
Ok(g) => g,
|
||||
Err(e) => {
|
||||
return UiaResponse::Unavailable {
|
||||
reason: format!("COM init failed: {}", e),
|
||||
}
|
||||
}
|
||||
};
|
||||
match get_automation() {
|
||||
Ok(_) => UiaResponse::Ok {
|
||||
element: None,
|
||||
elements: Vec::new(),
|
||||
elapsed_ms: 0,
|
||||
},
|
||||
Err(e) => UiaResponse::Unavailable {
|
||||
reason: format!("UIA not available: {}", e),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Stub Linux (pour développement et tests)
|
||||
// =========================================================================
|
||||
|
||||
#[cfg(not(windows))]
|
||||
mod uia_impl {
|
||||
use super::*;
|
||||
|
||||
pub fn query_at_point(_x: i32, _y: i32, _with_parents: bool) -> UiaResponse {
|
||||
UiaResponse::Unavailable {
|
||||
reason: "UIA n'est disponible que sur Windows".into(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn find_element(
|
||||
_name: Option<String>,
|
||||
_control_type: Option<String>,
|
||||
_automation_id: Option<String>,
|
||||
_window: Option<String>,
|
||||
_timeout_ms: u32,
|
||||
) -> UiaResponse {
|
||||
UiaResponse::Unavailable {
|
||||
reason: "UIA n'est disponible que sur Windows".into(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn capture_tree(_max_depth: u32) -> UiaResponse {
|
||||
UiaResponse::Unavailable {
|
||||
reason: "UIA n'est disponible que sur Windows".into(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn health_check() -> UiaResponse {
|
||||
UiaResponse::Unavailable {
|
||||
reason: "UIA n'est disponible que sur Windows".into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Main
|
||||
// =========================================================================
|
||||
|
||||
fn main() {
|
||||
let cli = Cli::parse();
|
||||
|
||||
let response = match cli.command {
|
||||
Commands::Query {
|
||||
x,
|
||||
y,
|
||||
with_parents,
|
||||
} => uia_impl::query_at_point(x, y, with_parents),
|
||||
Commands::Find {
|
||||
name,
|
||||
control_type,
|
||||
automation_id,
|
||||
window,
|
||||
timeout_ms,
|
||||
} => uia_impl::find_element(name, control_type, automation_id, window, timeout_ms),
|
||||
Commands::Capture { max_depth } => uia_impl::capture_tree(max_depth),
|
||||
Commands::Health => uia_impl::health_check(),
|
||||
};
|
||||
|
||||
// Sortie JSON sur stdout
|
||||
match serde_json::to_string(&response) {
|
||||
Ok(json) => println!("{}", json),
|
||||
Err(e) => {
|
||||
eprintln!("{{\"status\":\"error\",\"message\":\"JSON serialization: {}\"}}", e);
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,340 +0,0 @@
|
||||
//! Floutage des zones sensibles dans les captures d'ecran.
|
||||
//!
|
||||
//! Detecte les champs de saisie (zones claires rectangulaires) et applique
|
||||
//! un flou gaussien pour proteger les donnees sensibles (mots de passe, etc.).
|
||||
//! Equivalent de agent_v1/vision/blur_sensitive.py.
|
||||
//!
|
||||
//! Algorithme :
|
||||
//! 1. Conversion en niveaux de gris
|
||||
//! 2. Seuillage binaire (detecter les zones claires = champs de saisie)
|
||||
//! 3. Detection de contours rectangulaires > 50px de large
|
||||
//! 4. Application d'un flou gaussien sur les zones detectees
|
||||
//!
|
||||
//! Utilise le crate image pour le traitement et imageproc pour le flou.
|
||||
|
||||
use image::{DynamicImage, GrayImage, Rgba, RgbaImage};
|
||||
|
||||
/// Seuil de luminosite pour detecter les champs de saisie (0-255).
|
||||
/// Les zones plus claires que ce seuil sont considerees comme des champs.
|
||||
const BRIGHTNESS_THRESHOLD: u8 = 220;
|
||||
|
||||
/// Largeur minimale d'un champ de saisie detecte (en pixels).
|
||||
const MIN_FIELD_WIDTH: u32 = 50;
|
||||
|
||||
/// Hauteur minimale d'un champ de saisie detecte (en pixels).
|
||||
const MIN_FIELD_HEIGHT: u32 = 15;
|
||||
|
||||
/// Hauteur maximale d'un champ de saisie (evite de flouter l'ecran entier).
|
||||
const MAX_FIELD_HEIGHT: u32 = 80;
|
||||
|
||||
/// Largeur maximale d'un champ (evite les faux positifs sur grandes zones blanches).
|
||||
const MAX_FIELD_WIDTH: u32 = 800;
|
||||
|
||||
/// Intensite du flou gaussien (sigma).
|
||||
const BLUR_SIGMA: f32 = 10.0;
|
||||
|
||||
/// Rectangle representant une zone a flouter.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct BlurRegion {
|
||||
pub x: u32,
|
||||
pub y: u32,
|
||||
pub width: u32,
|
||||
pub height: u32,
|
||||
}
|
||||
|
||||
/// Detecte les champs de saisie dans une image et les floute.
|
||||
///
|
||||
/// Retourne l'image modifiee avec les zones sensibles floutees.
|
||||
/// Si aucun champ n'est detecte, retourne l'image inchangee.
|
||||
pub fn blur_sensitive_fields(img: &DynamicImage) -> DynamicImage {
|
||||
let regions = detect_input_fields(img);
|
||||
|
||||
if regions.is_empty() {
|
||||
return img.clone();
|
||||
}
|
||||
|
||||
println!(
|
||||
"[BLUR] {} zone(s) sensible(s) detectee(s) — floutage...",
|
||||
regions.len()
|
||||
);
|
||||
|
||||
let mut result = img.to_rgba8();
|
||||
|
||||
for region in ®ions {
|
||||
blur_region(&mut result, region);
|
||||
}
|
||||
|
||||
DynamicImage::ImageRgba8(result)
|
||||
}
|
||||
|
||||
/// Detecte les champs de saisie (zones claires rectangulaires).
|
||||
///
|
||||
/// Algorithme simplifie :
|
||||
/// 1. Convertir en niveaux de gris
|
||||
/// 2. Seuillage binaire
|
||||
/// 3. Scanner les lignes horizontales pour trouver les series de pixels clairs
|
||||
/// 4. Regrouper les series adjacentes en rectangles
|
||||
pub fn detect_input_fields(img: &DynamicImage) -> Vec<BlurRegion> {
|
||||
let gray = img.to_luma8();
|
||||
let (width, height) = gray.dimensions();
|
||||
let mut regions = Vec::new();
|
||||
|
||||
// Creer une image binaire (seuillage)
|
||||
let binary = threshold_image(&gray, BRIGHTNESS_THRESHOLD);
|
||||
|
||||
// Scanner par bandes horizontales pour detecter les champs
|
||||
// On cherche des sequences continues de pixels blancs sur plusieurs lignes
|
||||
let mut y = 0;
|
||||
while y < height {
|
||||
// Pour chaque ligne, trouver les segments horizontaux blancs
|
||||
let segments = find_white_segments(&binary, y, width);
|
||||
|
||||
for (seg_start, seg_end) in &segments {
|
||||
let seg_width = seg_end - seg_start;
|
||||
if seg_width < MIN_FIELD_WIDTH || seg_width > MAX_FIELD_WIDTH {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Verifier combien de lignes consecutives partagent ce segment
|
||||
let field_height = count_vertical_extent(
|
||||
&binary,
|
||||
*seg_start,
|
||||
*seg_end,
|
||||
y,
|
||||
height,
|
||||
);
|
||||
|
||||
if field_height >= MIN_FIELD_HEIGHT && field_height <= MAX_FIELD_HEIGHT {
|
||||
// Verifier que cette region ne chevauche pas une region existante
|
||||
let new_region = BlurRegion {
|
||||
x: *seg_start,
|
||||
y,
|
||||
width: seg_width,
|
||||
height: field_height,
|
||||
};
|
||||
|
||||
if !overlaps_existing(®ions, &new_region) {
|
||||
regions.push(new_region);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Avancer de la hauteur du dernier champ detecte, ou de 1 ligne
|
||||
y += 1;
|
||||
}
|
||||
|
||||
// Deduplication : fusionner les regions tres proches
|
||||
merge_close_regions(&mut regions);
|
||||
|
||||
regions
|
||||
}
|
||||
|
||||
/// Applique un seuillage binaire simple.
|
||||
fn threshold_image(gray: &GrayImage, threshold: u8) -> GrayImage {
|
||||
let (width, height) = gray.dimensions();
|
||||
let mut binary = GrayImage::new(width, height);
|
||||
|
||||
for y in 0..height {
|
||||
for x in 0..width {
|
||||
let pixel = gray.get_pixel(x, y).0[0];
|
||||
if pixel >= threshold {
|
||||
binary.put_pixel(x, y, image::Luma([255]));
|
||||
} else {
|
||||
binary.put_pixel(x, y, image::Luma([0]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
binary
|
||||
}
|
||||
|
||||
/// Trouve les segments horizontaux de pixels blancs sur une ligne.
|
||||
fn find_white_segments(binary: &GrayImage, y: u32, width: u32) -> Vec<(u32, u32)> {
|
||||
let mut segments = Vec::new();
|
||||
let mut in_segment = false;
|
||||
let mut seg_start = 0u32;
|
||||
|
||||
for x in 0..width {
|
||||
let is_white = binary.get_pixel(x, y).0[0] > 128;
|
||||
|
||||
if is_white && !in_segment {
|
||||
seg_start = x;
|
||||
in_segment = true;
|
||||
} else if !is_white && in_segment {
|
||||
segments.push((seg_start, x));
|
||||
in_segment = false;
|
||||
}
|
||||
}
|
||||
|
||||
if in_segment {
|
||||
segments.push((seg_start, width));
|
||||
}
|
||||
|
||||
segments
|
||||
}
|
||||
|
||||
/// Compte le nombre de lignes consecutives ou le segment est blanc.
|
||||
fn count_vertical_extent(
|
||||
binary: &GrayImage,
|
||||
seg_start: u32,
|
||||
seg_end: u32,
|
||||
start_y: u32,
|
||||
max_y: u32,
|
||||
) -> u32 {
|
||||
let mut count = 0u32;
|
||||
let check_width = seg_end - seg_start;
|
||||
let threshold = (check_width as f64 * 0.7) as u32; // 70% doivent etre blancs
|
||||
|
||||
for y in start_y..max_y.min(start_y + MAX_FIELD_HEIGHT + 5) {
|
||||
let mut white_count = 0u32;
|
||||
for x in seg_start..seg_end {
|
||||
if binary.get_pixel(x, y).0[0] > 128 {
|
||||
white_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if white_count >= threshold {
|
||||
count += 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
count
|
||||
}
|
||||
|
||||
/// Verifie si une region chevauche une region existante.
|
||||
fn overlaps_existing(regions: &[BlurRegion], new_region: &BlurRegion) -> bool {
|
||||
for region in regions {
|
||||
let x_overlap = new_region.x < region.x + region.width
|
||||
&& new_region.x + new_region.width > region.x;
|
||||
let y_overlap = new_region.y < region.y + region.height
|
||||
&& new_region.y + new_region.height > region.y;
|
||||
|
||||
if x_overlap && y_overlap {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Fusionne les regions tres proches (< 10px de distance).
|
||||
fn merge_close_regions(regions: &mut Vec<BlurRegion>) {
|
||||
if regions.len() < 2 {
|
||||
return;
|
||||
}
|
||||
|
||||
// Tri par position (y, puis x)
|
||||
regions.sort_by(|a, b| a.y.cmp(&b.y).then(a.x.cmp(&b.x)));
|
||||
|
||||
let mut merged = Vec::new();
|
||||
let mut current = regions[0].clone();
|
||||
|
||||
for region in regions.iter().skip(1) {
|
||||
let x_close = (current.x + current.width + 10 >= region.x)
|
||||
&& (region.x + region.width + 10 >= current.x);
|
||||
let y_close = (current.y + current.height + 5 >= region.y)
|
||||
&& (region.y + region.height + 5 >= current.y);
|
||||
|
||||
if x_close && y_close {
|
||||
// Fusionner
|
||||
let min_x = current.x.min(region.x);
|
||||
let min_y = current.y.min(region.y);
|
||||
let max_x = (current.x + current.width).max(region.x + region.width);
|
||||
let max_y = (current.y + current.height).max(region.y + region.height);
|
||||
|
||||
current = BlurRegion {
|
||||
x: min_x,
|
||||
y: min_y,
|
||||
width: max_x - min_x,
|
||||
height: max_y - min_y,
|
||||
};
|
||||
} else {
|
||||
merged.push(current);
|
||||
current = region.clone();
|
||||
}
|
||||
}
|
||||
merged.push(current);
|
||||
|
||||
*regions = merged;
|
||||
}
|
||||
|
||||
/// Applique un flou gaussien sur une region de l'image.
|
||||
///
|
||||
/// Implementation simplifiee : box blur avec plusieurs passes
|
||||
/// (approximation du gaussien, plus rapide que le vrai gaussien).
|
||||
fn blur_region(img: &mut RgbaImage, region: &BlurRegion) {
|
||||
let (img_w, img_h) = img.dimensions();
|
||||
|
||||
// Borner la region aux dimensions de l'image
|
||||
let x_start = region.x.min(img_w);
|
||||
let y_start = region.y.min(img_h);
|
||||
let x_end = (region.x + region.width).min(img_w);
|
||||
let y_end = (region.y + region.height).min(img_h);
|
||||
|
||||
if x_start >= x_end || y_start >= y_end {
|
||||
return;
|
||||
}
|
||||
|
||||
let radius = BLUR_SIGMA as u32;
|
||||
let kernel_size = (radius * 2 + 1) as i32;
|
||||
let kernel_area = (kernel_size * kernel_size) as u32;
|
||||
|
||||
// Box blur : moyenne des pixels dans un carre de rayon `radius`
|
||||
// On fait 3 passes pour approximer un flou gaussien
|
||||
for _pass in 0..3 {
|
||||
// Copier les pixels de la region dans un buffer temporaire
|
||||
let reg_w = (x_end - x_start) as usize;
|
||||
let reg_h = (y_end - y_start) as usize;
|
||||
let mut buffer: Vec<[u8; 4]> = Vec::with_capacity(reg_w * reg_h);
|
||||
|
||||
for y in y_start..y_end {
|
||||
for x in x_start..x_end {
|
||||
buffer.push(img.get_pixel(x, y).0);
|
||||
}
|
||||
}
|
||||
|
||||
// Appliquer le box blur
|
||||
for y in y_start..y_end {
|
||||
for x in x_start..x_end {
|
||||
let mut sum_r = 0u32;
|
||||
let mut sum_g = 0u32;
|
||||
let mut sum_b = 0u32;
|
||||
let mut count = 0u32;
|
||||
|
||||
for ky in -(radius as i32)..=(radius as i32) {
|
||||
for kx in -(radius as i32)..=(radius as i32) {
|
||||
let sx = x as i32 + kx;
|
||||
let sy = y as i32 + ky;
|
||||
|
||||
if sx >= x_start as i32
|
||||
&& sx < x_end as i32
|
||||
&& sy >= y_start as i32
|
||||
&& sy < y_end as i32
|
||||
{
|
||||
let bx = (sx - x_start as i32) as usize;
|
||||
let by = (sy - y_start as i32) as usize;
|
||||
let pixel = buffer[by * reg_w + bx];
|
||||
sum_r += pixel[0] as u32;
|
||||
sum_g += pixel[1] as u32;
|
||||
sum_b += pixel[2] as u32;
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if count > 0 {
|
||||
let pixel = Rgba([
|
||||
(sum_r / count) as u8,
|
||||
(sum_g / count) as u8,
|
||||
(sum_b / count) as u8,
|
||||
255,
|
||||
]);
|
||||
img.put_pixel(x, y, pixel);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let _ = kernel_area; // suppress unused warning
|
||||
}
|
||||
@@ -1,115 +0,0 @@
|
||||
//! Capture d'écran via xcap.
|
||||
//!
|
||||
//! Fournit la capture du moniteur principal, l'encodage JPEG en base64,
|
||||
//! et un hash perceptuel rapide pour la déduplication des heartbeats.
|
||||
|
||||
use base64::Engine;
|
||||
use image::codecs::jpeg::JpegEncoder;
|
||||
use image::DynamicImage;
|
||||
use std::io::Cursor;
|
||||
|
||||
/// Capture le moniteur principal et retourne un DynamicImage.
|
||||
///
|
||||
/// Utilise xcap pour la capture cross-platform (DXGI sur Windows, X11/Wayland sur Linux).
|
||||
pub fn capture_screenshot() -> Option<DynamicImage> {
|
||||
let monitors = match xcap::Monitor::all() {
|
||||
Ok(m) => m,
|
||||
Err(e) => {
|
||||
eprintln!("[CAPTURE] Erreur enumeration moniteurs : {}", e);
|
||||
return None;
|
||||
}
|
||||
};
|
||||
|
||||
let primary = monitors
|
||||
.into_iter()
|
||||
.find(|m| m.is_primary().unwrap_or(false));
|
||||
let monitor = match primary {
|
||||
Some(m) => m,
|
||||
None => {
|
||||
eprintln!("[CAPTURE] Aucun moniteur principal trouve");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
|
||||
match monitor.capture_image() {
|
||||
Ok(rgba_image) => Some(DynamicImage::ImageRgba8(rgba_image)),
|
||||
Err(e) => {
|
||||
eprintln!("[CAPTURE] Erreur capture ecran : {}", e);
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Encode une image en JPEG et retourne le résultat en base64.
|
||||
///
|
||||
/// La qualité doit être entre 1 (mauvaise) et 100 (excellente).
|
||||
/// 85 est un bon compromis taille/qualité pour le streaming réseau.
|
||||
pub fn screenshot_to_jpeg_base64(img: &DynamicImage, quality: u8) -> String {
|
||||
let rgb = img.to_rgb8();
|
||||
let mut buffer = Cursor::new(Vec::new());
|
||||
|
||||
let mut encoder = JpegEncoder::new_with_quality(&mut buffer, quality);
|
||||
if let Err(e) = encoder.encode(
|
||||
rgb.as_raw(),
|
||||
rgb.width(),
|
||||
rgb.height(),
|
||||
image::ExtendedColorType::Rgb8,
|
||||
) {
|
||||
eprintln!("[CAPTURE] Erreur encodage JPEG : {}", e);
|
||||
return String::new();
|
||||
}
|
||||
|
||||
base64::engine::general_purpose::STANDARD.encode(buffer.into_inner())
|
||||
}
|
||||
|
||||
/// Encode une image en JPEG et retourne les bytes bruts.
|
||||
pub fn screenshot_to_jpeg_bytes(img: &DynamicImage, quality: u8) -> Vec<u8> {
|
||||
let rgb = img.to_rgb8();
|
||||
let mut buffer = Cursor::new(Vec::new());
|
||||
|
||||
let mut encoder = JpegEncoder::new_with_quality(&mut buffer, quality);
|
||||
if let Err(e) = encoder.encode(
|
||||
rgb.as_raw(),
|
||||
rgb.width(),
|
||||
rgb.height(),
|
||||
image::ExtendedColorType::Rgb8,
|
||||
) {
|
||||
eprintln!("[CAPTURE] Erreur encodage JPEG : {}", e);
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
buffer.into_inner()
|
||||
}
|
||||
|
||||
/// Calcule un hash perceptuel rapide pour la déduplication.
|
||||
///
|
||||
/// Réduit l'image à 16x16 en niveaux de gris, puis calcule
|
||||
/// un hash simple basé sur les pixels. Identique à la logique
|
||||
/// Python (_quick_hash) dans agent_v1.
|
||||
pub fn image_hash(img: &DynamicImage) -> u64 {
|
||||
let small = img.resize_exact(16, 16, image::imageops::FilterType::Nearest);
|
||||
let gray = small.to_luma8();
|
||||
|
||||
// Hash FNV-1a simple sur les pixels (rapide, pas besoin de crypto)
|
||||
let mut hash: u64 = 0xcbf29ce484222325;
|
||||
for pixel in gray.as_raw() {
|
||||
hash ^= *pixel as u64;
|
||||
hash = hash.wrapping_mul(0x100000001b3);
|
||||
}
|
||||
hash
|
||||
}
|
||||
|
||||
/// Retourne les dimensions du moniteur principal (largeur, hauteur).
|
||||
///
|
||||
/// xcap utilise DXGI sur Windows qui retourne toujours les pixels physiques,
|
||||
/// independamment du DPI awareness. Ceci est coherent avec les coordonnees
|
||||
/// physiques d'enigo quand le process est DPI-aware.
|
||||
pub fn screen_dimensions() -> Option<(u32, u32)> {
|
||||
let monitors = xcap::Monitor::all().ok()?;
|
||||
let primary = monitors
|
||||
.into_iter()
|
||||
.find(|m| m.is_primary().unwrap_or(false))?;
|
||||
let w = primary.width().ok()?;
|
||||
let h = primary.height().ok()?;
|
||||
Some((w, h))
|
||||
}
|
||||
@@ -1,123 +0,0 @@
|
||||
//! Chat Léa via Edge en mode app (--app=URL).
|
||||
//!
|
||||
//! Ouvre Edge sans barre d'adresse — rendu propre et professionnel.
|
||||
//! Equivalent de agent_v1/ui/chat_window.py (approche Edge mode app).
|
||||
|
||||
use crate::config::Config;
|
||||
use crate::state::AgentState;
|
||||
use std::sync::Arc;
|
||||
use std::process::Command;
|
||||
|
||||
/// URL du serveur de chat
|
||||
fn chat_url(config: &Config) -> String {
|
||||
config.chat_url()
|
||||
}
|
||||
|
||||
/// Chemin de Edge sur Windows (via le registre ou chemins courants)
|
||||
fn find_edge() -> Option<String> {
|
||||
let paths = [
|
||||
r"C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe",
|
||||
r"C:\Program Files\Microsoft\Edge\Application\msedge.exe",
|
||||
];
|
||||
for p in &paths {
|
||||
if std::path::Path::new(p).exists() {
|
||||
return Some(p.to_string());
|
||||
}
|
||||
}
|
||||
// Essayer via le registre
|
||||
#[cfg(target_os = "windows")]
|
||||
{
|
||||
use std::process::Command;
|
||||
if let Ok(output) = Command::new("reg")
|
||||
.args(&["query", r"HKLM\SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\msedge.exe", "/ve"])
|
||||
.output()
|
||||
{
|
||||
let text = String::from_utf8_lossy(&output.stdout);
|
||||
for line in text.lines() {
|
||||
if line.contains("REG_SZ") {
|
||||
if let Some(path) = line.split("REG_SZ").last() {
|
||||
let path = path.trim();
|
||||
if std::path::Path::new(path).exists() {
|
||||
return Some(path.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Lance le chat dans un thread.
|
||||
///
|
||||
/// Attend que `state.chat_visible` passe à true, puis ouvre Edge en mode app.
|
||||
/// Quand la fenêtre est fermée, remet `chat_visible` à false.
|
||||
pub fn run_chat_thread(config: &Config, state: Arc<AgentState>) {
|
||||
let url = chat_url(config);
|
||||
let edge_path = find_edge();
|
||||
|
||||
if let Some(ref path) = edge_path {
|
||||
println!("[CHAT] Edge trouvé : {}", path);
|
||||
} else {
|
||||
println!("[CHAT] Edge non trouvé — fallback navigateur par défaut");
|
||||
}
|
||||
|
||||
loop {
|
||||
// Attendre l'activation
|
||||
while !state.chat_visible.load(std::sync::atomic::Ordering::Relaxed) {
|
||||
if !state.is_running() {
|
||||
println!("[CHAT] Arrêt du thread chat");
|
||||
return;
|
||||
}
|
||||
std::thread::sleep(std::time::Duration::from_millis(200));
|
||||
}
|
||||
|
||||
println!("[CHAT] Ouverture du chat...");
|
||||
println!("[CHAT] URL : {}", url);
|
||||
|
||||
let result = if let Some(ref path) = edge_path {
|
||||
// Edge en mode app — fenêtre propre sans barre d'adresse
|
||||
Command::new(path)
|
||||
.args(&[
|
||||
&format!("--app={}", url),
|
||||
"--window-size=600,800",
|
||||
"--window-position=1300,200",
|
||||
"--disable-extensions",
|
||||
"--no-first-run",
|
||||
])
|
||||
.spawn()
|
||||
} else {
|
||||
// Fallback : ouvrir dans le navigateur par défaut
|
||||
#[cfg(target_os = "windows")]
|
||||
{
|
||||
Command::new("cmd")
|
||||
.args(&["/C", "start", &url])
|
||||
.spawn()
|
||||
}
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
{
|
||||
Command::new("xdg-open")
|
||||
.arg(&url)
|
||||
.spawn()
|
||||
}
|
||||
};
|
||||
|
||||
match result {
|
||||
Ok(mut child) => {
|
||||
println!("[CHAT] Fenêtre ouverte (PID: {:?})", child.id());
|
||||
// Attendre que la fenêtre se ferme
|
||||
let _ = child.wait();
|
||||
println!("[CHAT] Fenêtre fermée");
|
||||
}
|
||||
Err(e) => {
|
||||
println!("[CHAT] Erreur ouverture : {}", e);
|
||||
}
|
||||
}
|
||||
|
||||
// Marquer comme invisible
|
||||
state.chat_visible.store(false, std::sync::atomic::Ordering::Relaxed);
|
||||
|
||||
// Petit délai avant de pouvoir réouvrir
|
||||
std::thread::sleep(std::time::Duration::from_millis(500));
|
||||
}
|
||||
}
|
||||
@@ -1,246 +0,0 @@
|
||||
//! Configuration de l'agent RPA.
|
||||
//!
|
||||
//! Parametres charges depuis les variables d'environnement ou valeurs par defaut.
|
||||
//! Un fichier `config.txt` (clé=valeur) peut être placé à côté de l'exécutable.
|
||||
//! Les variables d'environnement ont priorité sur le fichier.
|
||||
//! Compatible avec la configuration Python (agent_v1/config.py).
|
||||
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// Version de l'agent Rust
|
||||
pub const AGENT_VERSION: &str = "0.2.0-rust";
|
||||
|
||||
/// Configuration complete de l'agent
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Config {
|
||||
/// URL de base du serveur streaming (ex: http://192.168.1.10:5005/api/v1)
|
||||
pub server_url: String,
|
||||
|
||||
/// Identifiant unique de la machine (hostname_os par defaut)
|
||||
pub machine_id: String,
|
||||
|
||||
/// Port du mini-serveur HTTP de capture (defaut: 5006)
|
||||
pub capture_port: u16,
|
||||
|
||||
/// Intervalle du heartbeat en secondes
|
||||
pub heartbeat_interval_s: u64,
|
||||
|
||||
/// Intervalle de polling replay en secondes
|
||||
pub replay_poll_interval_s: f64,
|
||||
|
||||
/// Qualite JPEG pour les screenshots envoyes (1-100)
|
||||
pub jpeg_quality: u8,
|
||||
|
||||
/// Flouter les zones sensibles dans les captures (defaut: true)
|
||||
pub blur_sensitive: bool,
|
||||
|
||||
/// Retention des logs en jours (Article 12, Reglement IA, defaut: 180)
|
||||
pub log_retention_days: u32,
|
||||
|
||||
/// Port du serveur de chat (defaut: 5004)
|
||||
pub chat_port: u16,
|
||||
|
||||
/// Token Bearer pour l'authentification API (defaut: vide = pas d'auth)
|
||||
pub api_token: String,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
/// Charge le fichier `config.txt` situé à côté de l'exécutable (ou dans le dossier courant).
|
||||
///
|
||||
/// Format : une ligne par clé, `CLÉ=VALEUR`. Les lignes vides et celles commençant
|
||||
/// par `#` sont ignorées. Seules les clés **absentes** de l'environnement sont injectées
|
||||
/// (les variables d'environnement ont toujours priorité).
|
||||
fn load_config_file() {
|
||||
// 1. Chercher config.txt à côté de l'exécutable
|
||||
let mut config_path: Option<PathBuf> = None;
|
||||
|
||||
if let Ok(exe) = env::current_exe() {
|
||||
let candidate = exe.parent().map(|p| p.join("config.txt"));
|
||||
if let Some(ref p) = candidate {
|
||||
if p.is_file() {
|
||||
config_path = candidate;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Fallback : dossier courant
|
||||
if config_path.is_none() {
|
||||
let cwd_candidate = PathBuf::from("config.txt");
|
||||
if cwd_candidate.is_file() {
|
||||
config_path = Some(cwd_candidate);
|
||||
}
|
||||
}
|
||||
|
||||
let path = match config_path {
|
||||
Some(p) => p,
|
||||
None => return, // Pas de fichier config — ce n'est pas une erreur
|
||||
};
|
||||
|
||||
let content = match fs::read_to_string(&path) {
|
||||
Ok(c) => c,
|
||||
Err(e) => {
|
||||
eprintln!("[config] Impossible de lire {} : {}", path.display(), e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
eprintln!("[config] Chargement de {}", path.display());
|
||||
|
||||
for line in content.lines() {
|
||||
let trimmed = line.trim();
|
||||
|
||||
// Ignorer les lignes vides et les commentaires
|
||||
if trimmed.is_empty() || trimmed.starts_with('#') {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Séparer au premier '='
|
||||
if let Some(eq_pos) = trimmed.find('=') {
|
||||
let key = trimmed[..eq_pos].trim();
|
||||
let value = trimmed[eq_pos + 1..].trim();
|
||||
|
||||
if key.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Ne positionner que si la variable n'existe pas déjà
|
||||
if env::var(key).is_err() {
|
||||
// SAFETY: appelé une seule fois au démarrage, avant tout thread
|
||||
unsafe {
|
||||
env::set_var(key, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Charge la configuration depuis les variables d'environnement.
|
||||
///
|
||||
/// Le fichier `config.txt` est lu en premier (voir [`load_config_file`]) ;
|
||||
/// les variables d'environnement déjà définies ne sont pas écrasées.
|
||||
///
|
||||
/// Variables supportees :
|
||||
/// - `RPA_SERVER_URL` : URL du serveur (defaut: http://localhost:5005/api/v1)
|
||||
/// - `RPA_MACHINE_ID` : Identifiant machine (defaut: hostname_os)
|
||||
/// - `RPA_CAPTURE_PORT` : Port du serveur de capture (defaut: 5006)
|
||||
/// - `RPA_HEARTBEAT_INTERVAL` : Intervalle heartbeat en secondes (defaut: 5)
|
||||
/// - `RPA_JPEG_QUALITY` : Qualite JPEG (defaut: 85)
|
||||
/// - `RPA_BLUR_SENSITIVE` : Flouter les zones sensibles (defaut: true)
|
||||
/// - `RPA_LOG_RETENTION_DAYS` : Retention des logs en jours (defaut: 180)
|
||||
/// - `RPA_CHAT_PORT` : Port du serveur de chat (defaut: 5004)
|
||||
/// - `RPA_API_TOKEN` : Token Bearer pour l'authentification (defaut: vide)
|
||||
pub fn from_env() -> Self {
|
||||
// Charger config.txt AVANT de lire les variables d'environnement
|
||||
Self::load_config_file();
|
||||
let machine_id = env::var("RPA_MACHINE_ID").unwrap_or_else(|_| {
|
||||
let host = hostname::get()
|
||||
.map(|h| h.to_string_lossy().to_string())
|
||||
.unwrap_or_else(|_| "unknown".to_string());
|
||||
let os_name = if cfg!(target_os = "windows") {
|
||||
"windows"
|
||||
} else if cfg!(target_os = "linux") {
|
||||
"linux"
|
||||
} else {
|
||||
"unknown"
|
||||
};
|
||||
format!("{}_{}", host, os_name)
|
||||
});
|
||||
|
||||
let server_url = env::var("RPA_SERVER_URL")
|
||||
.unwrap_or_else(|_| "http://localhost:5005/api/v1".to_string());
|
||||
|
||||
let capture_port = env::var("RPA_CAPTURE_PORT")
|
||||
.ok()
|
||||
.and_then(|v| v.parse().ok())
|
||||
.unwrap_or(5006);
|
||||
|
||||
let heartbeat_interval_s = env::var("RPA_HEARTBEAT_INTERVAL")
|
||||
.ok()
|
||||
.and_then(|v| v.parse().ok())
|
||||
.unwrap_or(5);
|
||||
|
||||
let jpeg_quality = env::var("RPA_JPEG_QUALITY")
|
||||
.ok()
|
||||
.and_then(|v| v.parse().ok())
|
||||
.unwrap_or(85);
|
||||
|
||||
let blur_sensitive = env::var("RPA_BLUR_SENSITIVE")
|
||||
.map(|v| v != "0" && v.to_lowercase() != "false")
|
||||
.unwrap_or(true);
|
||||
|
||||
let log_retention_days = env::var("RPA_LOG_RETENTION_DAYS")
|
||||
.ok()
|
||||
.and_then(|v| v.parse().ok())
|
||||
.unwrap_or(180);
|
||||
|
||||
let chat_port = env::var("RPA_CHAT_PORT")
|
||||
.ok()
|
||||
.and_then(|v| v.parse().ok())
|
||||
.unwrap_or(5004);
|
||||
|
||||
let api_token = env::var("RPA_API_TOKEN").unwrap_or_default();
|
||||
|
||||
Config {
|
||||
server_url,
|
||||
machine_id,
|
||||
capture_port,
|
||||
heartbeat_interval_s,
|
||||
replay_poll_interval_s: 1.0,
|
||||
jpeg_quality,
|
||||
blur_sensitive,
|
||||
log_retention_days,
|
||||
chat_port,
|
||||
api_token,
|
||||
}
|
||||
}
|
||||
|
||||
/// URL de base pour le streaming (ex: http://...:5005/api/v1/traces/stream)
|
||||
pub fn streaming_url(&self) -> String {
|
||||
format!("{}/traces/stream", self.server_url)
|
||||
}
|
||||
|
||||
/// Session ID pour le heartbeat permanent (sans session active)
|
||||
pub fn bg_session_id(&self) -> String {
|
||||
format!("bg_{}", self.machine_id)
|
||||
}
|
||||
|
||||
/// Session ID pour le polling replay (sans session active)
|
||||
pub fn agent_session_id(&self) -> String {
|
||||
format!("agent_{}", self.machine_id)
|
||||
}
|
||||
|
||||
/// URL du serveur de chat.
|
||||
pub fn chat_url(&self) -> String {
|
||||
// Extraire le host du server_url
|
||||
let base = &self.server_url;
|
||||
if let Some(host_start) = base.find("://") {
|
||||
let after_scheme = &base[host_start + 3..];
|
||||
if let Some(colon_pos) = after_scheme.find(':') {
|
||||
let host = &after_scheme[..colon_pos];
|
||||
return format!(
|
||||
"http://{}:{}/?machine_id={}",
|
||||
host, self.chat_port, self.machine_id
|
||||
);
|
||||
}
|
||||
}
|
||||
format!(
|
||||
"http://localhost:{}/?machine_id={}",
|
||||
self.chat_port, self.machine_id
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Config {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"Config {{ server: {}, machine: {}, capture_port: {}, heartbeat: {}s, jpeg_q: {}, blur: {}, log_retention: {}j, chat_port: {}, auth: {} }}",
|
||||
self.server_url, self.machine_id, self.capture_port,
|
||||
self.heartbeat_interval_s, self.jpeg_quality,
|
||||
self.blur_sensitive, self.log_retention_days, self.chat_port,
|
||||
if self.api_token.is_empty() { "none" } else { "Bearer" },
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -1,384 +0,0 @@
|
||||
//! Exécuteur d'actions pour le replay.
|
||||
//!
|
||||
//! Simule les clics souris, la saisie de texte, les combos clavier et les attentes.
|
||||
//! Utilise enigo pour la simulation, compatible Windows et Linux.
|
||||
//! Reproduit le comportement de agent_v1/core/executor.py.
|
||||
|
||||
use crate::config::Config;
|
||||
use crate::network::{Action, ActionResult};
|
||||
use crate::visual;
|
||||
use enigo::{
|
||||
Coordinate, Direction, Enigo, Key, Keyboard, Mouse, Settings,
|
||||
};
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
|
||||
/// Exécute une action de replay et retourne le résultat.
|
||||
///
|
||||
/// Dispatche vers le bon handler selon le type d'action.
|
||||
/// Les coordonnées x_pct/y_pct (0.0-1.0) sont converties en pixels
|
||||
/// à partir des dimensions de l'écran.
|
||||
/// Si visual_mode est activé, résout d'abord la cible via le serveur.
|
||||
pub fn execute_action(
|
||||
action: &Action,
|
||||
screen_width: u32,
|
||||
screen_height: u32,
|
||||
config: &Config,
|
||||
) -> ActionResult {
|
||||
match action.action_type.as_str() {
|
||||
"click" => execute_click(action, screen_width, screen_height, config),
|
||||
"type" => execute_type(action, screen_width, screen_height, config),
|
||||
"key_combo" => execute_key_combo(action),
|
||||
"scroll" => execute_scroll(action, screen_width, screen_height),
|
||||
"wait" => execute_wait(action),
|
||||
_ => ActionResult::error(
|
||||
&action.action_id,
|
||||
&format!("Type d'action inconnu : {}", action.action_type),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
/// Résout les coordonnées visuellement si visual_mode est activé.
|
||||
///
|
||||
/// Si la résolution échoue, retourne les coordonnées de fallback (blind).
|
||||
/// Si visual_mode est désactivé ou target_spec absent, retourne les coordonnées originales.
|
||||
fn resolve_coordinates(
|
||||
action: &Action,
|
||||
screen_width: u32,
|
||||
screen_height: u32,
|
||||
config: &Config,
|
||||
) -> (f64, f64) {
|
||||
let mut x_pct = action.x_pct;
|
||||
let mut y_pct = action.y_pct;
|
||||
|
||||
if action.visual_mode && !action.target_spec.is_null() {
|
||||
println!(
|
||||
" [VISUAL] Mode visuel active — resolution de la cible..."
|
||||
);
|
||||
match visual::resolve_target_visual(
|
||||
config,
|
||||
&action.target_spec,
|
||||
x_pct,
|
||||
y_pct,
|
||||
screen_width,
|
||||
screen_height,
|
||||
) {
|
||||
Some((rx, ry)) => {
|
||||
println!(" [VISUAL] Resolu : ({:.4}, {:.4})", rx, ry);
|
||||
x_pct = rx;
|
||||
y_pct = ry;
|
||||
}
|
||||
None => {
|
||||
println!(
|
||||
" [VISUAL] Echec — fallback coordonnees aveugles ({:.4}, {:.4})",
|
||||
x_pct, y_pct
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(x_pct, y_pct)
|
||||
}
|
||||
|
||||
/// Exécute un clic souris aux coordonnées normalisées.
|
||||
/// Résout visuellement la cible si visual_mode est activé.
|
||||
fn execute_click(action: &Action, screen_width: u32, screen_height: u32, config: &Config) -> ActionResult {
|
||||
let (x_pct, y_pct) = resolve_coordinates(action, screen_width, screen_height, config);
|
||||
let real_x = (x_pct * screen_width as f64) as i32;
|
||||
let real_y = (y_pct * screen_height as f64) as i32;
|
||||
|
||||
println!(
|
||||
" [CLICK] ({:.4}, {:.4}) -> ({}, {}) sur ({}x{}), bouton={}{}",
|
||||
x_pct, y_pct, real_x, real_y, screen_width, screen_height, action.button,
|
||||
if action.visual_mode { " [VISUAL]" } else { "" }
|
||||
);
|
||||
|
||||
let mut enigo = match Enigo::new(&Settings::default()) {
|
||||
Ok(e) => e,
|
||||
Err(e) => {
|
||||
return ActionResult::error(
|
||||
&action.action_id,
|
||||
&format!("Impossible d'initialiser enigo : {}", e),
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
// Déplacer la souris
|
||||
if let Err(e) = enigo.move_mouse(real_x, real_y, Coordinate::Abs) {
|
||||
return ActionResult::error(
|
||||
&action.action_id,
|
||||
&format!("Erreur deplacement souris : {}", e),
|
||||
);
|
||||
}
|
||||
|
||||
// Petit délai pour simuler le temps de réaction humain
|
||||
thread::sleep(Duration::from_millis(100));
|
||||
|
||||
// Cliquer selon le bouton demandé
|
||||
let button = match action.button.as_str() {
|
||||
"right" => enigo::Button::Right,
|
||||
"middle" => enigo::Button::Middle,
|
||||
_ => enigo::Button::Left,
|
||||
};
|
||||
|
||||
if action.button == "double" {
|
||||
// Double-clic gauche
|
||||
if let Err(e) = enigo.button(enigo::Button::Left, Direction::Click) {
|
||||
return ActionResult::error(&action.action_id, &format!("Erreur clic : {}", e));
|
||||
}
|
||||
thread::sleep(Duration::from_millis(50));
|
||||
if let Err(e) = enigo.button(enigo::Button::Left, Direction::Click) {
|
||||
return ActionResult::error(&action.action_id, &format!("Erreur double-clic : {}", e));
|
||||
}
|
||||
} else if let Err(e) = enigo.button(button, Direction::Click) {
|
||||
return ActionResult::error(&action.action_id, &format!("Erreur clic : {}", e));
|
||||
}
|
||||
|
||||
println!(" [CLICK] Termine.");
|
||||
ActionResult::ok(&action.action_id)
|
||||
}
|
||||
|
||||
/// Exécute une saisie de texte.
|
||||
///
|
||||
/// Si des coordonnées sont fournies (x_pct > 0), clique d'abord
|
||||
/// sur le champ avant de taper (comme en Python).
|
||||
fn execute_type(action: &Action, screen_width: u32, screen_height: u32, config: &Config) -> ActionResult {
|
||||
let text = &action.text;
|
||||
println!(
|
||||
" [TYPE] Texte: '{}' ({} chars)",
|
||||
if text.len() > 50 { &text[..50] } else { text },
|
||||
text.len()
|
||||
);
|
||||
|
||||
// Résoudre visuellement les coordonnées si visual_mode est activé
|
||||
let (x_pct, y_pct) = resolve_coordinates(action, screen_width, screen_height, config);
|
||||
|
||||
let mut enigo = match Enigo::new(&Settings::default()) {
|
||||
Ok(e) => e,
|
||||
Err(e) => {
|
||||
return ActionResult::error(
|
||||
&action.action_id,
|
||||
&format!("Impossible d'initialiser enigo : {}", e),
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
// Clic préalable sur le champ si coordonnées disponibles
|
||||
if x_pct > 0.0 && y_pct > 0.0 {
|
||||
let real_x = (x_pct * screen_width as f64) as i32;
|
||||
let real_y = (y_pct * screen_height as f64) as i32;
|
||||
println!(" [TYPE] Clic prealable sur ({}, {}){}", real_x, real_y,
|
||||
if action.visual_mode { " [VISUAL]" } else { "" });
|
||||
|
||||
if let Err(e) = enigo.move_mouse(real_x, real_y, Coordinate::Abs) {
|
||||
eprintln!(" [TYPE] Erreur deplacement souris : {}", e);
|
||||
}
|
||||
thread::sleep(Duration::from_millis(100));
|
||||
if let Err(e) = enigo.button(enigo::Button::Left, Direction::Click) {
|
||||
eprintln!(" [TYPE] Erreur clic : {}", e);
|
||||
}
|
||||
thread::sleep(Duration::from_millis(300));
|
||||
}
|
||||
|
||||
// Saisir le texte
|
||||
if let Err(e) = enigo.text(text) {
|
||||
return ActionResult::error(
|
||||
&action.action_id,
|
||||
&format!("Erreur saisie texte : {}", e),
|
||||
);
|
||||
}
|
||||
|
||||
println!(" [TYPE] Termine.");
|
||||
ActionResult::ok(&action.action_id)
|
||||
}
|
||||
|
||||
/// Exécute une combinaison de touches.
|
||||
///
|
||||
/// Ex: ["ctrl", "a"] -> maintenir Ctrl, appuyer A, relâcher Ctrl
|
||||
/// Ex: ["enter"] -> appuyer Enter
|
||||
fn execute_key_combo(action: &Action) -> ActionResult {
|
||||
let keys = &action.keys;
|
||||
println!(" [KEY_COMBO] Touches: {:?}", keys);
|
||||
|
||||
if keys.is_empty() {
|
||||
return ActionResult::error(&action.action_id, "Aucune touche specifiee");
|
||||
}
|
||||
|
||||
let mut enigo = match Enigo::new(&Settings::default()) {
|
||||
Ok(e) => e,
|
||||
Err(e) => {
|
||||
return ActionResult::error(
|
||||
&action.action_id,
|
||||
&format!("Impossible d'initialiser enigo : {}", e),
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
// Résoudre les noms de touches
|
||||
let resolved: Vec<Key> = keys
|
||||
.iter()
|
||||
.filter_map(|name| resolve_key(name))
|
||||
.collect();
|
||||
|
||||
if resolved.is_empty() {
|
||||
return ActionResult::error(
|
||||
&action.action_id,
|
||||
&format!("Aucune touche reconnue dans {:?}", keys),
|
||||
);
|
||||
}
|
||||
|
||||
if resolved.len() == 1 {
|
||||
// Une seule touche : simple press/release
|
||||
if let Err(e) = enigo.key(resolved[0], Direction::Click) {
|
||||
return ActionResult::error(&action.action_id, &format!("Erreur touche : {}", e));
|
||||
}
|
||||
} else {
|
||||
// Combo : maintenir les modifieurs, taper la dernière touche, relâcher
|
||||
let (modifiers, last) = resolved.split_at(resolved.len() - 1);
|
||||
|
||||
for modifier in modifiers {
|
||||
if let Err(e) = enigo.key(*modifier, Direction::Press) {
|
||||
return ActionResult::error(
|
||||
&action.action_id,
|
||||
&format!("Erreur modifier press : {}", e),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
thread::sleep(Duration::from_millis(50));
|
||||
|
||||
if let Err(e) = enigo.key(last[0], Direction::Click) {
|
||||
// Toujours relâcher les modifieurs même en cas d'erreur
|
||||
for modifier in modifiers.iter().rev() {
|
||||
let _ = enigo.key(*modifier, Direction::Release);
|
||||
}
|
||||
return ActionResult::error(
|
||||
&action.action_id,
|
||||
&format!("Erreur touche finale : {}", e),
|
||||
);
|
||||
}
|
||||
|
||||
for modifier in modifiers.iter().rev() {
|
||||
if let Err(e) = enigo.key(*modifier, Direction::Release) {
|
||||
eprintln!(" [KEY_COMBO] Erreur release modifier : {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
println!(" [KEY_COMBO] Termine.");
|
||||
ActionResult::ok(&action.action_id)
|
||||
}
|
||||
|
||||
/// Exécute un scroll de souris.
|
||||
fn execute_scroll(action: &Action, screen_width: u32, screen_height: u32) -> ActionResult {
|
||||
let real_x = if action.x_pct > 0.0 {
|
||||
(action.x_pct * screen_width as f64) as i32
|
||||
} else {
|
||||
(0.5 * screen_width as f64) as i32
|
||||
};
|
||||
let real_y = if action.y_pct > 0.0 {
|
||||
(action.y_pct * screen_height as f64) as i32
|
||||
} else {
|
||||
(0.5 * screen_height as f64) as i32
|
||||
};
|
||||
|
||||
let delta = action.delta;
|
||||
println!(" [SCROLL] delta={} a ({}, {})", delta, real_x, real_y);
|
||||
|
||||
let mut enigo = match Enigo::new(&Settings::default()) {
|
||||
Ok(e) => e,
|
||||
Err(e) => {
|
||||
return ActionResult::error(
|
||||
&action.action_id,
|
||||
&format!("Impossible d'initialiser enigo : {}", e),
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
if let Err(e) = enigo.move_mouse(real_x, real_y, Coordinate::Abs) {
|
||||
return ActionResult::error(
|
||||
&action.action_id,
|
||||
&format!("Erreur deplacement souris : {}", e),
|
||||
);
|
||||
}
|
||||
thread::sleep(Duration::from_millis(50));
|
||||
|
||||
if let Err(e) = enigo.scroll(delta, enigo::Axis::Vertical) {
|
||||
return ActionResult::error(
|
||||
&action.action_id,
|
||||
&format!("Erreur scroll : {}", e),
|
||||
);
|
||||
}
|
||||
|
||||
println!(" [SCROLL] Termine.");
|
||||
ActionResult::ok(&action.action_id)
|
||||
}
|
||||
|
||||
/// Exécute une attente (pause).
|
||||
fn execute_wait(action: &Action) -> ActionResult {
|
||||
let duration_ms = action.duration_ms;
|
||||
println!(" [WAIT] {}ms...", duration_ms);
|
||||
thread::sleep(Duration::from_millis(duration_ms));
|
||||
println!(" [WAIT] Termine.");
|
||||
ActionResult::ok(&action.action_id)
|
||||
}
|
||||
|
||||
/// Résout un nom de touche (string) vers un enigo::Key.
|
||||
///
|
||||
/// Mapping compatible avec le Python executor (_SPECIAL_KEYS).
|
||||
fn resolve_key(name: &str) -> Option<Key> {
|
||||
match name.to_lowercase().as_str() {
|
||||
// Touches de contrôle
|
||||
"enter" | "return" => Some(Key::Return),
|
||||
"tab" => Some(Key::Tab),
|
||||
"escape" | "esc" => Some(Key::Escape),
|
||||
"backspace" => Some(Key::Backspace),
|
||||
"delete" => Some(Key::Delete),
|
||||
"space" => Some(Key::Space),
|
||||
|
||||
// Touches de navigation
|
||||
"up" => Some(Key::UpArrow),
|
||||
"down" => Some(Key::DownArrow),
|
||||
"left" => Some(Key::LeftArrow),
|
||||
"right" => Some(Key::RightArrow),
|
||||
"home" => Some(Key::Home),
|
||||
"end" => Some(Key::End),
|
||||
"page_up" | "pageup" => Some(Key::PageUp),
|
||||
"page_down" | "pagedown" => Some(Key::PageDown),
|
||||
|
||||
// Touches de fonction
|
||||
"f1" => Some(Key::F1),
|
||||
"f2" => Some(Key::F2),
|
||||
"f3" => Some(Key::F3),
|
||||
"f4" => Some(Key::F4),
|
||||
"f5" => Some(Key::F5),
|
||||
"f6" => Some(Key::F6),
|
||||
"f7" => Some(Key::F7),
|
||||
"f8" => Some(Key::F8),
|
||||
"f9" => Some(Key::F9),
|
||||
"f10" => Some(Key::F10),
|
||||
"f11" => Some(Key::F11),
|
||||
"f12" => Some(Key::F12),
|
||||
|
||||
// Modifieurs
|
||||
"ctrl" | "ctrl_l" | "ctrl_r" | "control" => Some(Key::Control),
|
||||
"alt" | "alt_l" | "alt_r" => Some(Key::Alt),
|
||||
"shift" | "shift_l" | "shift_r" => Some(Key::Shift),
|
||||
"cmd" | "win" | "super" | "super_l" | "super_r" | "windows" | "meta" => Some(Key::Meta),
|
||||
|
||||
// Touches spéciales
|
||||
"insert" => Some(Key::Other(0x2D)), // VK_INSERT
|
||||
"caps_lock" | "capslock" => Some(Key::CapsLock),
|
||||
|
||||
// Caractère unique -> Unicode
|
||||
s if s.len() == 1 => {
|
||||
let c = s.chars().next().unwrap();
|
||||
Some(Key::Unicode(c))
|
||||
}
|
||||
|
||||
_ => {
|
||||
eprintln!(" [KEY_COMBO] Touche inconnue : '{}', ignoree", name);
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,430 +0,0 @@
|
||||
//! Agent RPA Vision — Phases 1-5 (parite complete)
|
||||
//!
|
||||
//! Point d'entree principal. Architecture multi-threads :
|
||||
//!
|
||||
//! - Thread principal : boucle d'evenements systray (Windows) ou attente console (Linux)
|
||||
//! - Thread heartbeat : capture + envoi toutes les 5s (avec dedup par hash)
|
||||
//! - Thread replay : poll toutes les 1s, execute les actions
|
||||
//! - Thread serveur : HTTP port 5006 pour les captures a la demande
|
||||
//! - Thread recorder : capture evenements souris/clavier (quand enregistrement actif)
|
||||
//! - Thread chat : fenetre WebView2 (Windows, a la demande)
|
||||
//! - Thread health : verification connexion serveur (toutes les 30s)
|
||||
//!
|
||||
//! Le thread principal gere le systray sur Windows via winit.
|
||||
//! Sur Linux, le thread principal attend Ctrl+C (mode console).
|
||||
//!
|
||||
//! Configuration via variables d'environnement ou valeurs par defaut.
|
||||
//! Compatible avec le serveur streaming existant (api_stream.py, port 5005).
|
||||
|
||||
#[allow(dead_code)]
|
||||
mod blur;
|
||||
mod capture;
|
||||
mod chat;
|
||||
mod config;
|
||||
mod executor;
|
||||
mod network;
|
||||
#[allow(dead_code)]
|
||||
mod notifications;
|
||||
mod recorder;
|
||||
mod replay;
|
||||
mod server;
|
||||
#[allow(dead_code)]
|
||||
mod state;
|
||||
mod sysinfo;
|
||||
mod tray;
|
||||
mod visual;
|
||||
|
||||
use config::Config;
|
||||
use reqwest::blocking::Client;
|
||||
use state::AgentState;
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
|
||||
/// Trouve un navigateur compatible sur Windows (Edge, Chrome, Brave, Firefox)
|
||||
#[cfg(target_os = "windows")]
|
||||
fn find_browser() -> Option<String> {
|
||||
let paths = [
|
||||
// Edge
|
||||
r"C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe",
|
||||
r"C:\Program Files\Microsoft\Edge\Application\msedge.exe",
|
||||
// Chrome
|
||||
r"C:\Program Files\Google\Chrome\Application\chrome.exe",
|
||||
r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe",
|
||||
// Brave
|
||||
r"C:\Program Files\BraveSoftware\Brave-Browser\Application\brave.exe",
|
||||
// Firefox (supporte --kiosk mais pas --app)
|
||||
r"C:\Program Files\Mozilla Firefox\firefox.exe",
|
||||
];
|
||||
for p in &paths {
|
||||
if std::path::Path::new(p).exists() {
|
||||
return Some(p.to_string());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn main() {
|
||||
// --- DPI awareness (DOIT etre appele avant toute operation graphique) ---
|
||||
// Rend le process DPI-aware sur Windows pour que les API (enigo, xcap,
|
||||
// GetSystemMetrics, etc.) travaillent en coordonnees physiques (pixels reels)
|
||||
// au lieu de coordonnees logiques (virtualisees par le DPI scaling).
|
||||
// Sans cet appel, un ecran 2560x1600 a 150% DPI apparait comme 1707x1067
|
||||
// pour enigo et GetSystemMetrics, ce qui cause des erreurs de positionnement
|
||||
// pendant le replay.
|
||||
// PROCESS_PER_MONITOR_DPI_AWARE = 2 : le niveau le plus precis.
|
||||
#[cfg(target_os = "windows")]
|
||||
{
|
||||
// SetProcessDpiAwareness (shcore.dll) et SetProcessDPIAware (user32.dll)
|
||||
// ne sont pas toujours exposes par windows-sys selon les features.
|
||||
// On utilise des appels FFI raw pour eviter d'ajouter des features.
|
||||
#[link(name = "shcore")]
|
||||
extern "system" {
|
||||
fn SetProcessDpiAwareness(value: i32) -> i32;
|
||||
}
|
||||
#[link(name = "user32")]
|
||||
extern "system" {
|
||||
fn SetProcessDPIAware() -> i32;
|
||||
}
|
||||
unsafe {
|
||||
// Tenter SetProcessDpiAwareness(2) = PROCESS_PER_MONITOR_DPI_AWARE
|
||||
let hr = SetProcessDpiAwareness(2);
|
||||
if hr != 0 {
|
||||
// Fallback pour Windows < 8.1 : SetProcessDPIAware()
|
||||
SetProcessDPIAware();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Initialiser le logging
|
||||
env_logger::Builder::from_env(
|
||||
env_logger::Env::default().default_filter_or("info"),
|
||||
)
|
||||
.format_timestamp_secs()
|
||||
.init();
|
||||
|
||||
let config = Config::from_env();
|
||||
let config = Arc::new(config);
|
||||
|
||||
// Etat partage thread-safe
|
||||
let state = AgentState::new();
|
||||
|
||||
// Banniere de demarrage
|
||||
print_banner(&config);
|
||||
|
||||
// Handler Ctrl+C pour arret propre
|
||||
install_ctrlc_handler(state.clone());
|
||||
|
||||
// Verifier que la capture d'ecran fonctionne
|
||||
print!("[MAIN] Test de capture d'ecran... ");
|
||||
match capture::screen_dimensions() {
|
||||
Some((w, h)) => println!("OK ({}x{})", w, h),
|
||||
None => {
|
||||
println!("ECHEC");
|
||||
eprintln!("[MAIN] ATTENTION : Capture d'ecran non disponible.");
|
||||
eprintln!("[MAIN] Sur Linux sans display, les heartbeats seront desactives.");
|
||||
}
|
||||
}
|
||||
|
||||
// Thread 1 : Heartbeat loop
|
||||
let hb_config = config.clone();
|
||||
let hb_state = state.clone();
|
||||
let _heartbeat_thread = thread::Builder::new()
|
||||
.name("heartbeat".to_string())
|
||||
.spawn(move || {
|
||||
heartbeat_loop(&hb_config, &hb_state);
|
||||
})
|
||||
.expect("Impossible de demarrer le thread heartbeat");
|
||||
|
||||
// Thread 2 : Replay poll loop
|
||||
let rp_config = config.clone();
|
||||
let rp_state = state.clone();
|
||||
let _replay_thread = thread::Builder::new()
|
||||
.name("replay".to_string())
|
||||
.spawn(move || {
|
||||
replay::replay_poll_loop(&rp_config, &rp_state);
|
||||
})
|
||||
.expect("Impossible de demarrer le thread replay");
|
||||
|
||||
// Thread 3 : Capture HTTP server
|
||||
let srv_port = config.capture_port;
|
||||
let _server_thread = thread::Builder::new()
|
||||
.name("capture-server".to_string())
|
||||
.spawn(move || {
|
||||
server::start_capture_server(srv_port);
|
||||
})
|
||||
.expect("Impossible de demarrer le thread serveur");
|
||||
|
||||
// Thread 4 : Health check (verification connexion serveur)
|
||||
let hc_config = config.clone();
|
||||
let hc_state = state.clone();
|
||||
let _health_thread = thread::Builder::new()
|
||||
.name("health-check".to_string())
|
||||
.spawn(move || {
|
||||
health_check_loop(&hc_config, &hc_state);
|
||||
})
|
||||
.expect("Impossible de demarrer le thread health check");
|
||||
|
||||
// Thread 5 : Recorder (capture evenements — inactif jusqu'a enregistrement)
|
||||
let rec_config = config.clone();
|
||||
let rec_state = state.clone();
|
||||
let _recorder_rx = recorder::start_recorder(rec_config, rec_state);
|
||||
|
||||
// Thread 6 : Chat window (WebView2, a la demande)
|
||||
let chat_config = config.clone();
|
||||
let chat_state = state.clone();
|
||||
chat::run_chat_thread(&chat_config, chat_state);
|
||||
|
||||
// Synchroniser les workflows disponibles depuis le serveur
|
||||
let sync_config = config.clone();
|
||||
let workflows = {
|
||||
let client = Client::new();
|
||||
network::fetch_workflows(&client, &sync_config)
|
||||
};
|
||||
if workflows.is_empty() {
|
||||
println!("[MAIN] Aucun workflow disponible pour cette machine.");
|
||||
} else {
|
||||
println!(
|
||||
"[MAIN] {} workflow(s) disponible(s) :",
|
||||
workflows.len()
|
||||
);
|
||||
for wf in &workflows {
|
||||
println!(
|
||||
" - {} ({} noeuds, {} transitions)",
|
||||
wf.name, wf.nodes, wf.edges
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
println!("\n[MAIN] Agent operationnel — tous les threads demarres.\n");
|
||||
|
||||
// Ouvrir Léa dans le navigateur disponible (mode app) au démarrage
|
||||
#[cfg(target_os = "windows")]
|
||||
{
|
||||
let chat_url = config.chat_url();
|
||||
if let Some(browser) = find_browser() {
|
||||
let browser_name = if browser.contains("chrome") { "Chrome" }
|
||||
else if browser.contains("edge") || browser.contains("Edge") { "Edge" }
|
||||
else if browser.contains("brave") || browser.contains("Brave") { "Brave" }
|
||||
else if browser.contains("firefox") || browser.contains("Firefox") { "Firefox" }
|
||||
else { "navigateur" };
|
||||
println!("[MAIN] Ouverture de Léa dans {}...", browser_name);
|
||||
let _ = std::process::Command::new(&browser)
|
||||
.args(&[
|
||||
&format!("--app={}", chat_url),
|
||||
"--window-size=600,800",
|
||||
"--disable-extensions",
|
||||
"--no-first-run",
|
||||
])
|
||||
.spawn();
|
||||
} else {
|
||||
println!("[MAIN] Aucun navigateur trouvé — ouvrez manuellement : {}", chat_url);
|
||||
}
|
||||
}
|
||||
|
||||
// Attente principale : Ctrl+C pour arrêter
|
||||
println!("[MAIN] Appuyez sur Ctrl+C pour quitter.\n");
|
||||
loop {
|
||||
if !state.is_running() {
|
||||
break;
|
||||
}
|
||||
thread::sleep(Duration::from_millis(500));
|
||||
}
|
||||
|
||||
// Si on arrive ici, l'agent doit s'arreter
|
||||
println!("\n[MAIN] Arret en cours...");
|
||||
state.request_shutdown();
|
||||
|
||||
// Laisser le temps aux threads de se terminer
|
||||
thread::sleep(Duration::from_millis(500));
|
||||
|
||||
println!("[MAIN] Agent arrete.");
|
||||
}
|
||||
|
||||
/// Installe un handler Ctrl+C qui met l'etat a "arret demande".
|
||||
fn install_ctrlc_handler(state: Arc<AgentState>) {
|
||||
#[cfg(unix)]
|
||||
{
|
||||
let mut fds = [0i32; 2];
|
||||
unsafe {
|
||||
if libc::pipe(fds.as_mut_ptr()) != 0 {
|
||||
eprintln!("[MAIN] Impossible de creer le pipe pour Ctrl+C");
|
||||
return;
|
||||
}
|
||||
|
||||
static mut WRITE_FD: i32 = -1;
|
||||
WRITE_FD = fds[1];
|
||||
|
||||
// Sauvegarder un pointeur vers l'etat dans une static
|
||||
// pour pouvoir y acceder depuis le handler
|
||||
static mut STATE_PTR: *const AgentState = std::ptr::null();
|
||||
STATE_PTR = Arc::as_ptr(&state);
|
||||
|
||||
extern "C" fn sigint_handler(_sig: i32) {
|
||||
unsafe {
|
||||
if !STATE_PTR.is_null() {
|
||||
(*STATE_PTR)
|
||||
.running
|
||||
.store(false, std::sync::atomic::Ordering::SeqCst);
|
||||
}
|
||||
let buf = [1u8];
|
||||
let _ = libc::write(WRITE_FD, buf.as_ptr() as *const _, 1);
|
||||
}
|
||||
}
|
||||
|
||||
libc::signal(libc::SIGINT, sigint_handler as *const () as libc::sighandler_t);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(unix))]
|
||||
{
|
||||
// Sur Windows, le systray gere l'arret via le menu "Quitter"
|
||||
// Le handler console est un bonus pour le mode headless
|
||||
let _ = state;
|
||||
}
|
||||
}
|
||||
|
||||
/// Boucle de heartbeat : capture un screenshot toutes les N secondes
|
||||
/// et l'envoie au serveur si l'ecran a change.
|
||||
/// Applique le floutage des zones sensibles si active dans la config.
|
||||
fn heartbeat_loop(config: &Config, state: &AgentState) {
|
||||
let client = Client::new();
|
||||
let session_id = config.bg_session_id();
|
||||
let mut last_hash: u64 = 0;
|
||||
let mut consecutive_errors: u32 = 0;
|
||||
|
||||
println!(
|
||||
"[HEARTBEAT] Boucle permanente demarree (session={}, intervalle={}s)",
|
||||
session_id, config.heartbeat_interval_s
|
||||
);
|
||||
|
||||
while state.is_running() {
|
||||
// Verifier l'arret d'urgence
|
||||
if state
|
||||
.emergency_stop
|
||||
.load(std::sync::atomic::Ordering::SeqCst)
|
||||
{
|
||||
thread::sleep(Duration::from_secs(1));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Capturer l'ecran
|
||||
match capture::capture_screenshot() {
|
||||
Some(img) => {
|
||||
// Deduplication par hash perceptuel
|
||||
let current_hash = capture::image_hash(&img);
|
||||
if current_hash == last_hash {
|
||||
thread::sleep(Duration::from_secs(config.heartbeat_interval_s));
|
||||
continue;
|
||||
}
|
||||
last_hash = current_hash;
|
||||
|
||||
// Appliquer le floutage des zones sensibles si active
|
||||
let final_img = if config.blur_sensitive {
|
||||
blur::blur_sensitive_fields(&img)
|
||||
} else {
|
||||
img
|
||||
};
|
||||
|
||||
// Encoder en JPEG
|
||||
let jpeg_bytes =
|
||||
capture::screenshot_to_jpeg_bytes(&final_img, config.jpeg_quality);
|
||||
if jpeg_bytes.is_empty() {
|
||||
thread::sleep(Duration::from_secs(config.heartbeat_interval_s));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Envoyer au serveur
|
||||
let success =
|
||||
network::send_heartbeat(&client, config, &jpeg_bytes, &session_id);
|
||||
if success {
|
||||
consecutive_errors = 0;
|
||||
} else {
|
||||
consecutive_errors += 1;
|
||||
if consecutive_errors == 1 || consecutive_errors % 12 == 0 {
|
||||
eprintln!(
|
||||
"[HEARTBEAT] {} erreur(s) consecutives",
|
||||
consecutive_errors
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
None => {
|
||||
thread::sleep(Duration::from_secs(config.heartbeat_interval_s * 2));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
thread::sleep(Duration::from_secs(config.heartbeat_interval_s));
|
||||
}
|
||||
|
||||
println!("[HEARTBEAT] Boucle arretee.");
|
||||
}
|
||||
|
||||
/// Boucle de health check : verifie la connexion au serveur toutes les 30s.
|
||||
/// Met a jour l'etat de connexion dans AgentState.
|
||||
fn health_check_loop(config: &Config, state: &AgentState) {
|
||||
let client = Client::new();
|
||||
let check_interval = Duration::from_secs(30);
|
||||
let timeout = Duration::from_secs(5);
|
||||
|
||||
println!("[HEALTH] Boucle health check demarree (intervalle=30s)");
|
||||
|
||||
while state.is_running() {
|
||||
let url = format!("{}/stats", config.server_url);
|
||||
let request = client.get(&url).timeout(timeout);
|
||||
let connected = network::with_auth(request, config)
|
||||
.send()
|
||||
.map(|r| r.status().is_success())
|
||||
.unwrap_or(false);
|
||||
|
||||
let was_connected = state.connected.load(std::sync::atomic::Ordering::SeqCst);
|
||||
state.set_connected(connected);
|
||||
|
||||
// Notifier si le statut a change
|
||||
if connected != was_connected {
|
||||
notifications::connection_changed(connected);
|
||||
}
|
||||
|
||||
thread::sleep(check_interval);
|
||||
}
|
||||
|
||||
println!("[HEALTH] Boucle arretee.");
|
||||
}
|
||||
|
||||
/// Affiche la banniere de demarrage.
|
||||
fn print_banner(config: &Config) {
|
||||
let meta = sysinfo::get_screen_metadata();
|
||||
|
||||
println!("======================================================");
|
||||
println!(
|
||||
" RPA Vision Agent v{} (Rust)",
|
||||
config::AGENT_VERSION
|
||||
);
|
||||
println!(" Phases 1-5 — Parite complete");
|
||||
println!("------------------------------------------------------");
|
||||
println!(" Machine : {}", config.machine_id);
|
||||
println!(" Serveur : {}", config.server_url);
|
||||
println!(" Capture : port {}", config.capture_port);
|
||||
println!(" Chat : port {}", config.chat_port);
|
||||
println!(" Heartbeat : toutes les {}s", config.heartbeat_interval_s);
|
||||
println!(" JPEG : qualite {}", config.jpeg_quality);
|
||||
println!(" Floutage : {}", if config.blur_sensitive { "actif" } else { "inactif" });
|
||||
println!(" Logs : retention {} jours", config.log_retention_days);
|
||||
println!(" Auth : {}", if config.api_token.is_empty() { "aucune" } else { "Bearer token" });
|
||||
println!(" Workflows : synchronisation au demarrage");
|
||||
println!(
|
||||
" Ecran : {}x{} @ {}% DPI",
|
||||
meta.screen_resolution[0], meta.screen_resolution[1], meta.dpi_scale
|
||||
);
|
||||
println!(
|
||||
" Moniteur : #{} ({})",
|
||||
meta.monitor_index,
|
||||
if meta.monitor_index == 0 { "principal" } else { "secondaire" }
|
||||
);
|
||||
println!("======================================================");
|
||||
println!();
|
||||
println!(" [IA] Cet agent utilise l'intelligence artificielle.");
|
||||
println!(" Article 50 du Reglement europeen sur l'IA.");
|
||||
println!();
|
||||
}
|
||||
@@ -1,391 +0,0 @@
|
||||
//! Client HTTP pour la communication avec le serveur streaming.
|
||||
//!
|
||||
//! Gère l'envoi des heartbeats (screenshots périodiques),
|
||||
//! le polling des actions replay, et le rapport des résultats.
|
||||
//! Compatible avec l'API de agent_v0/server_v1/api_stream.py (port 5005).
|
||||
|
||||
use crate::config::Config;
|
||||
use crate::sysinfo;
|
||||
use reqwest::blocking::{Client, RequestBuilder};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Ajoute le header Authorization Bearer si un token est configure.
|
||||
///
|
||||
/// Si `config.api_token` est vide, la requete est retournee telle quelle.
|
||||
pub fn with_auth(request: RequestBuilder, config: &Config) -> RequestBuilder {
|
||||
if config.api_token.is_empty() {
|
||||
request
|
||||
} else {
|
||||
request.header("Authorization", format!("Bearer {}", config.api_token))
|
||||
}
|
||||
}
|
||||
|
||||
/// Action de replay reçue du serveur.
|
||||
///
|
||||
/// Format identique à celui du Python executor (agent_v1/core/executor.py).
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Action {
|
||||
/// Identifiant unique de l'action
|
||||
#[serde(default)]
|
||||
pub action_id: String,
|
||||
|
||||
/// Type d'action : "click", "type", "key_combo", "scroll", "wait"
|
||||
#[serde(rename = "type")]
|
||||
pub action_type: String,
|
||||
|
||||
/// Coordonnée X normalisée (0.0 à 1.0)
|
||||
#[serde(default)]
|
||||
pub x_pct: f64,
|
||||
|
||||
/// Coordonnée Y normalisée (0.0 à 1.0)
|
||||
#[serde(default)]
|
||||
pub y_pct: f64,
|
||||
|
||||
/// Texte à taper (pour action "type")
|
||||
#[serde(default)]
|
||||
pub text: String,
|
||||
|
||||
/// Liste de touches (pour action "key_combo")
|
||||
#[serde(default)]
|
||||
pub keys: Vec<String>,
|
||||
|
||||
/// Bouton de souris : "left", "right", "double"
|
||||
#[serde(default = "default_button")]
|
||||
pub button: String,
|
||||
|
||||
/// Durée d'attente en ms (pour action "wait")
|
||||
#[serde(default = "default_duration")]
|
||||
pub duration_ms: u64,
|
||||
|
||||
/// Delta de scroll (pour action "scroll")
|
||||
#[serde(default)]
|
||||
pub delta: i32,
|
||||
|
||||
/// Mode visuel (résolution par le serveur)
|
||||
#[serde(default)]
|
||||
pub visual_mode: bool,
|
||||
|
||||
/// Spécification de la cible visuelle
|
||||
#[serde(default)]
|
||||
pub target_spec: serde_json::Value,
|
||||
}
|
||||
|
||||
fn default_button() -> String {
|
||||
"left".to_string()
|
||||
}
|
||||
|
||||
fn default_duration() -> u64 {
|
||||
500
|
||||
}
|
||||
|
||||
/// Résultat d'exécution d'une action.
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct ActionResult {
|
||||
pub action_id: String,
|
||||
pub success: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub error: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub screenshot: Option<String>,
|
||||
}
|
||||
|
||||
impl ActionResult {
|
||||
/// Crée un résultat d'erreur.
|
||||
pub fn error(action_id: &str, msg: &str) -> Self {
|
||||
ActionResult {
|
||||
action_id: action_id.to_string(),
|
||||
success: false,
|
||||
error: Some(msg.to_string()),
|
||||
screenshot: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Crée un résultat de succès.
|
||||
pub fn ok(action_id: &str) -> Self {
|
||||
ActionResult {
|
||||
action_id: action_id.to_string(),
|
||||
success: true,
|
||||
error: None,
|
||||
screenshot: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Envoie un heartbeat (screenshot) au serveur streaming.
|
||||
///
|
||||
/// POST /traces/stream/image avec le screenshot en multipart.
|
||||
/// Inclut les métadonnées système (DPI, résolution, fenêtre, moniteur)
|
||||
/// dans les query params pour que le serveur puisse les exploiter.
|
||||
/// Retourne true si l'envoi a réussi.
|
||||
pub fn send_heartbeat(
|
||||
client: &Client,
|
||||
config: &Config,
|
||||
jpeg_bytes: &[u8],
|
||||
session_id: &str,
|
||||
) -> bool {
|
||||
let url = format!("{}/image", config.streaming_url());
|
||||
let shot_id = format!("heartbeat_{}", chrono::Utc::now().timestamp());
|
||||
|
||||
// Collecter les métadonnées système
|
||||
let meta = sysinfo::get_screen_metadata();
|
||||
let dpi_str = meta.dpi_scale.to_string();
|
||||
let screen_w_str = meta.screen_resolution[0].to_string();
|
||||
let screen_h_str = meta.screen_resolution[1].to_string();
|
||||
let monitor_str = meta.monitor_index.to_string();
|
||||
|
||||
// Sérialiser window_bounds en JSON compact (ou "null")
|
||||
let wb_str = match meta.window_bounds {
|
||||
Some(wb) => format!("[{},{},{},{}]", wb[0], wb[1], wb[2], wb[3]),
|
||||
None => "null".to_string(),
|
||||
};
|
||||
|
||||
let part = reqwest::blocking::multipart::Part::bytes(jpeg_bytes.to_vec())
|
||||
.file_name("screenshot.jpg")
|
||||
.mime_str("image/jpeg")
|
||||
.unwrap_or_else(|_| {
|
||||
reqwest::blocking::multipart::Part::bytes(jpeg_bytes.to_vec())
|
||||
.file_name("screenshot.jpg")
|
||||
});
|
||||
|
||||
let form = reqwest::blocking::multipart::Form::new().part("file", part);
|
||||
|
||||
let request = client
|
||||
.post(&url)
|
||||
.query(&[
|
||||
("session_id", session_id),
|
||||
("shot_id", &shot_id),
|
||||
("machine_id", &config.machine_id),
|
||||
("dpi_scale", &dpi_str),
|
||||
("screen_w", &screen_w_str),
|
||||
("screen_h", &screen_h_str),
|
||||
("monitor_index", &monitor_str),
|
||||
("window_bounds", &wb_str),
|
||||
])
|
||||
.multipart(form)
|
||||
.timeout(std::time::Duration::from_secs(10));
|
||||
|
||||
match with_auth(request, config).send() {
|
||||
Ok(resp) => {
|
||||
if resp.status().is_success() {
|
||||
true
|
||||
} else {
|
||||
eprintln!(
|
||||
"[HEARTBEAT] Envoi echoue : HTTP {}",
|
||||
resp.status()
|
||||
);
|
||||
false
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
// Log discret pour ne pas spammer la console
|
||||
eprintln!("[HEARTBEAT] Erreur reseau : {}", e);
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Réponse du serveur pour GET /replay/next
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ReplayNextResponse {
|
||||
action: Option<Action>,
|
||||
}
|
||||
|
||||
/// Poll le serveur pour récupérer la prochaine action de replay.
|
||||
///
|
||||
/// GET /traces/stream/replay/next?session_id=...&machine_id=...
|
||||
/// Retourne None si pas d'action en attente ou si le serveur est indisponible.
|
||||
pub fn poll_next_action(client: &Client, config: &Config) -> Option<Action> {
|
||||
let url = format!("{}/replay/next", config.streaming_url());
|
||||
let session_id = config.agent_session_id();
|
||||
|
||||
let request = client
|
||||
.get(&url)
|
||||
.query(&[
|
||||
("session_id", session_id.as_str()),
|
||||
("machine_id", config.machine_id.as_str()),
|
||||
])
|
||||
.timeout(std::time::Duration::from_secs(5));
|
||||
|
||||
let resp = with_auth(request, config).send().ok()?;
|
||||
|
||||
if !resp.status().is_success() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let data: ReplayNextResponse = resp.json().ok()?;
|
||||
data.action
|
||||
}
|
||||
|
||||
/// Informations résumées d'un workflow disponible.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct WorkflowInfo {
|
||||
/// Identifiant unique du workflow
|
||||
pub workflow_id: String,
|
||||
|
||||
/// Nom lisible du workflow
|
||||
#[serde(default)]
|
||||
pub name: String,
|
||||
|
||||
/// Identifiant machine associé
|
||||
#[serde(default)]
|
||||
pub machine_id: String,
|
||||
|
||||
/// Nombre de nœuds
|
||||
#[serde(default)]
|
||||
pub nodes: u32,
|
||||
|
||||
/// Nombre de transitions
|
||||
#[serde(default)]
|
||||
pub edges: u32,
|
||||
}
|
||||
|
||||
/// Réponse du serveur pour GET /traces/stream/workflows
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct WorkflowsResponse {
|
||||
#[serde(default)]
|
||||
workflows: Vec<WorkflowInfo>,
|
||||
}
|
||||
|
||||
/// Récupère la liste des workflows disponibles pour cette machine.
|
||||
///
|
||||
/// GET /traces/stream/workflows?machine_id=<machine_id>
|
||||
/// Sauvegarde le résultat dans workflows.json à côté de l'exécutable.
|
||||
/// Retourne la liste (éventuellement depuis le cache local si le serveur est indisponible).
|
||||
pub fn fetch_workflows(client: &Client, config: &Config) -> Vec<WorkflowInfo> {
|
||||
let url = format!("{}/workflows", config.streaming_url());
|
||||
|
||||
let request = client
|
||||
.get(&url)
|
||||
.query(&[("machine_id", config.machine_id.as_str())])
|
||||
.timeout(std::time::Duration::from_secs(5));
|
||||
|
||||
let workflows = match with_auth(request, config).send() {
|
||||
Ok(resp) if resp.status().is_success() => {
|
||||
match resp.json::<WorkflowsResponse>() {
|
||||
Ok(data) => data.workflows,
|
||||
Err(e) => {
|
||||
eprintln!("[WORKFLOWS] Erreur parsing reponse : {}", e);
|
||||
Vec::new()
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(resp) => {
|
||||
eprintln!("[WORKFLOWS] Serveur HTTP {} — chargement cache local", resp.status());
|
||||
return load_workflows_cache();
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("[WORKFLOWS] Serveur injoignable ({}) — chargement cache local", e);
|
||||
return load_workflows_cache();
|
||||
}
|
||||
};
|
||||
|
||||
// Sauvegarder dans le cache local
|
||||
save_workflows_cache(&workflows);
|
||||
|
||||
workflows
|
||||
}
|
||||
|
||||
/// Chemin du fichier cache workflows.json (à côté de l'exécutable ou dans le dossier courant).
|
||||
fn workflows_cache_path() -> std::path::PathBuf {
|
||||
if let Ok(exe) = std::env::current_exe() {
|
||||
if let Some(dir) = exe.parent() {
|
||||
return dir.join("workflows.json");
|
||||
}
|
||||
}
|
||||
std::path::PathBuf::from("workflows.json")
|
||||
}
|
||||
|
||||
/// Sauvegarde les workflows dans le cache local.
|
||||
fn save_workflows_cache(workflows: &[WorkflowInfo]) {
|
||||
let path = workflows_cache_path();
|
||||
match serde_json::to_string_pretty(workflows) {
|
||||
Ok(json) => {
|
||||
if let Err(e) = std::fs::write(&path, json) {
|
||||
eprintln!("[WORKFLOWS] Erreur ecriture cache {} : {}", path.display(), e);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("[WORKFLOWS] Erreur serialisation cache : {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Charge les workflows depuis le cache local.
|
||||
fn load_workflows_cache() -> Vec<WorkflowInfo> {
|
||||
let path = workflows_cache_path();
|
||||
match std::fs::read_to_string(&path) {
|
||||
Ok(content) => {
|
||||
match serde_json::from_str::<Vec<WorkflowInfo>>(&content) {
|
||||
Ok(workflows) => {
|
||||
println!("[WORKFLOWS] {} workflow(s) charges depuis le cache local", workflows.len());
|
||||
workflows
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("[WORKFLOWS] Erreur parsing cache : {}", e);
|
||||
Vec::new()
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(_) => Vec::new(), // Pas de cache, pas d'erreur
|
||||
}
|
||||
}
|
||||
|
||||
/// Rapporte le résultat d'une action au serveur.
|
||||
///
|
||||
/// POST /traces/stream/replay/result avec le résultat en JSON.
|
||||
pub fn report_result(client: &Client, config: &Config, result: &ActionResult) -> bool {
|
||||
let url = format!("{}/replay/result", config.streaming_url());
|
||||
let session_id = config.agent_session_id();
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct Report<'a> {
|
||||
session_id: &'a str,
|
||||
action_id: &'a str,
|
||||
success: bool,
|
||||
error: &'a Option<String>,
|
||||
screenshot: &'a Option<String>,
|
||||
}
|
||||
|
||||
let report = Report {
|
||||
session_id: &session_id,
|
||||
action_id: &result.action_id,
|
||||
success: result.success,
|
||||
error: &result.error,
|
||||
screenshot: &result.screenshot,
|
||||
};
|
||||
|
||||
let request = client
|
||||
.post(&url)
|
||||
.json(&report)
|
||||
.timeout(std::time::Duration::from_secs(10));
|
||||
|
||||
match with_auth(request, config).send() {
|
||||
Ok(resp) => {
|
||||
if resp.status().is_success() {
|
||||
if let Ok(data) = resp.json::<serde_json::Value>() {
|
||||
let status = data.get("replay_status")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("?");
|
||||
let remaining = data.get("remaining_actions")
|
||||
.and_then(|v| v.as_i64())
|
||||
.unwrap_or(-1);
|
||||
println!(
|
||||
" [RESULT] Rapporte : status={}, restant={}",
|
||||
status, remaining
|
||||
);
|
||||
}
|
||||
true
|
||||
} else {
|
||||
eprintln!(
|
||||
" [RESULT] Rapport echoue : HTTP {}",
|
||||
resp.status()
|
||||
);
|
||||
false
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!(" [RESULT] Erreur reseau : {}", e);
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,135 +0,0 @@
|
||||
//! Notifications toast Windows.
|
||||
//!
|
||||
//! Affiche des notifications natives Windows via l'API WinRT (winrt-notification).
|
||||
//! Equivalent de agent_v1/ui/notifications.py.
|
||||
//!
|
||||
//! Sur Linux/macOS : les notifications sont simplement affichees en console (log).
|
||||
//! Le crate winrt-notification n'est disponible que sur Windows.
|
||||
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
/// Intervalle minimum entre deux notifications identiques (en secondes).
|
||||
/// Evite le spam de notifications si le meme evenement se repete.
|
||||
const MIN_INTERVAL_SECS: u64 = 5;
|
||||
|
||||
/// Timestamp de la derniere notification envoyee (rate limiting).
|
||||
static LAST_NOTIFY_TIME: AtomicU64 = AtomicU64::new(0);
|
||||
|
||||
/// Affiche une notification toast native.
|
||||
///
|
||||
/// Sur Windows : utilise winrt-notification pour les toasts natifs.
|
||||
/// Sur les autres OS : affiche en console.
|
||||
/// Rate-limited : pas plus d'une notification toutes les 5 secondes.
|
||||
pub fn notify(title: &str, message: &str) {
|
||||
// Rate limiting
|
||||
let now = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs();
|
||||
let last = LAST_NOTIFY_TIME.load(Ordering::Relaxed);
|
||||
if now - last < MIN_INTERVAL_SECS {
|
||||
return;
|
||||
}
|
||||
LAST_NOTIFY_TIME.store(now, Ordering::Relaxed);
|
||||
|
||||
// Log console dans tous les cas
|
||||
println!("[NOTIFICATION] {} : {}", title, message);
|
||||
|
||||
// Toast natif Windows
|
||||
#[cfg(windows)]
|
||||
{
|
||||
notify_windows(title, message);
|
||||
}
|
||||
}
|
||||
|
||||
/// Implementation Windows via winrt-notification.
|
||||
#[cfg(windows)]
|
||||
fn notify_windows(title: &str, message: &str) {
|
||||
use winrt_notification::{Toast, Sound};
|
||||
|
||||
let result = Toast::new(Toast::POWERSHELL_APP_ID)
|
||||
.title(title)
|
||||
.text1(message)
|
||||
.sound(Some(Sound::Default))
|
||||
.show();
|
||||
|
||||
if let Err(e) = result {
|
||||
eprintln!("[NOTIFICATION] Erreur toast Windows : {:?}", e);
|
||||
}
|
||||
}
|
||||
|
||||
// --- Notifications predefinies (equivalent Python) ---
|
||||
|
||||
/// Notification de bienvenue au demarrage.
|
||||
pub fn greet() {
|
||||
notify(
|
||||
"Lea - Assistant IA",
|
||||
"Bonjour ! Lea est prete. (IA)\nJe peux observer et automatiser vos taches.",
|
||||
);
|
||||
}
|
||||
|
||||
/// Notification de debut de session d'enregistrement.
|
||||
pub fn session_started(name: &str) {
|
||||
notify(
|
||||
"Enregistrement demarre",
|
||||
&format!(
|
||||
"C'est parti ! Je regarde et je memorise.\nSession : {}",
|
||||
name
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
/// Notification de fin de session d'enregistrement.
|
||||
pub fn session_ended(actions_count: u32) {
|
||||
notify(
|
||||
"Enregistrement termine",
|
||||
&format!(
|
||||
"C'est note ! J'ai compris les {} etapes.",
|
||||
actions_count
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
/// Notification de debut de replay.
|
||||
pub fn replay_started(name: &str) {
|
||||
notify(
|
||||
"Replay en cours",
|
||||
&format!(
|
||||
"Le systeme d'IA execute la tache...\nWorkflow : {}",
|
||||
name
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
/// Notification de fin de replay.
|
||||
pub fn replay_finished(success: bool) {
|
||||
if success {
|
||||
notify("Replay termine", "C'est fait ! La tache a ete executee avec succes.");
|
||||
} else {
|
||||
notify(
|
||||
"Replay echoue",
|
||||
"Hmm, j'ai eu un souci. Verifiez le resultat.",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Notification de changement de connexion.
|
||||
pub fn connection_changed(connected: bool) {
|
||||
if connected {
|
||||
notify("Connexion etablie", "Connectee au serveur RPA Vision.");
|
||||
} else {
|
||||
notify(
|
||||
"Connexion perdue",
|
||||
"Connexion au serveur perdue. Tentative de reconnexion...",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Notification d'arret d'urgence.
|
||||
pub fn emergency_stop_activated() {
|
||||
notify(
|
||||
"ARRET D'URGENCE",
|
||||
"Toutes les operations ont ete arretees immediatement.",
|
||||
);
|
||||
}
|
||||
@@ -1,713 +0,0 @@
|
||||
//! Capture d'evenements souris/clavier pour l'enregistrement de sessions.
|
||||
//!
|
||||
//! Utilise rdev pour intercepter les evenements globaux (sans focus).
|
||||
//! Les evenements sont envoyes au serveur streaming via network.rs.
|
||||
//! Equivalent de agent_v1/core/captor.py.
|
||||
//!
|
||||
//! Le recorder est actif uniquement quand state.recording == true.
|
||||
//! Il capture :
|
||||
//! - Clics souris (gauche, droit, double-clic)
|
||||
//! - Saisie clavier (buffer de texte avec flush apres 500ms d'inactivite)
|
||||
//! - Combos clavier (Ctrl+C, Alt+Tab, etc.)
|
||||
//!
|
||||
//! Sur les OS non-Windows, rdev fonctionne aussi (Linux via X11/evdev)
|
||||
//! mais les tests doivent etre faits manuellement.
|
||||
|
||||
use crate::capture;
|
||||
use crate::config::Config;
|
||||
use crate::state::AgentState;
|
||||
use crossbeam_channel::{bounded, Receiver, Sender};
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
/// Evenement capture et pret a etre envoye au serveur.
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum CapturedEvent {
|
||||
/// Clic souris (x_pct, y_pct, bouton, window_title)
|
||||
Click {
|
||||
x_pct: f64,
|
||||
y_pct: f64,
|
||||
button: String,
|
||||
window_title: String,
|
||||
},
|
||||
/// Double-clic (x_pct, y_pct, window_title)
|
||||
DoubleClick {
|
||||
x_pct: f64,
|
||||
y_pct: f64,
|
||||
window_title: String,
|
||||
},
|
||||
/// Texte saisi (accumule via le buffer de frappe)
|
||||
Text {
|
||||
text: String,
|
||||
x_pct: f64,
|
||||
y_pct: f64,
|
||||
},
|
||||
/// Combo clavier (ex: ["ctrl", "c"])
|
||||
KeyCombo { keys: Vec<String> },
|
||||
/// Scroll (delta, x_pct, y_pct)
|
||||
Scroll {
|
||||
delta: i32,
|
||||
x_pct: f64,
|
||||
y_pct: f64,
|
||||
},
|
||||
}
|
||||
|
||||
/// Etat interne du recorder pour le buffer de frappe.
|
||||
struct RecorderState {
|
||||
/// Buffer de texte en cours (flush apres 500ms d'inactivite)
|
||||
text_buffer: String,
|
||||
/// Dernier timestamp de frappe (pour le flush timeout)
|
||||
last_keystroke: Instant,
|
||||
/// Position du curseur au debut de la saisie
|
||||
text_start_x: f64,
|
||||
text_start_y: f64,
|
||||
/// Derniere position du clic (pour le double-clic)
|
||||
last_click_time: Instant,
|
||||
last_click_x: f64,
|
||||
last_click_y: f64,
|
||||
/// Modifieurs actuellement enfonces
|
||||
ctrl_held: bool,
|
||||
alt_held: bool,
|
||||
shift_held: bool,
|
||||
meta_held: bool,
|
||||
/// Dimensions de l'ecran (pour normaliser les coordonnees)
|
||||
screen_width: u32,
|
||||
screen_height: u32,
|
||||
}
|
||||
|
||||
impl RecorderState {
|
||||
fn new(screen_width: u32, screen_height: u32) -> Self {
|
||||
Self {
|
||||
text_buffer: String::new(),
|
||||
last_keystroke: Instant::now(),
|
||||
text_start_x: 0.0,
|
||||
text_start_y: 0.0,
|
||||
last_click_time: Instant::now() - Duration::from_secs(10),
|
||||
last_click_x: 0.0,
|
||||
last_click_y: 0.0,
|
||||
ctrl_held: false,
|
||||
alt_held: false,
|
||||
shift_held: false,
|
||||
meta_held: false,
|
||||
screen_width,
|
||||
screen_height,
|
||||
}
|
||||
}
|
||||
|
||||
/// Normalise les coordonnees absolues en pourcentages (0.0-1.0).
|
||||
fn normalize(&self, x: f64, y: f64) -> (f64, f64) {
|
||||
if self.screen_width == 0 || self.screen_height == 0 {
|
||||
return (0.0, 0.0);
|
||||
}
|
||||
(
|
||||
x / self.screen_width as f64,
|
||||
y / self.screen_height as f64,
|
||||
)
|
||||
}
|
||||
|
||||
/// Un modifieur est-il enfonce ?
|
||||
fn any_modifier_held(&self) -> bool {
|
||||
self.ctrl_held || self.alt_held || self.meta_held
|
||||
}
|
||||
}
|
||||
|
||||
/// Delai de flush du buffer de texte (ms).
|
||||
const TEXT_FLUSH_DELAY_MS: u64 = 500;
|
||||
|
||||
/// Seuil de distance pour considerer un double-clic (pixels).
|
||||
const DOUBLE_CLICK_DIST_THRESHOLD: f64 = 10.0;
|
||||
|
||||
/// Seuil de temps pour un double-clic (ms).
|
||||
const DOUBLE_CLICK_TIME_MS: u64 = 400;
|
||||
|
||||
/// Demarre le thread de capture d'evenements.
|
||||
///
|
||||
/// Cree un canal crossbeam pour envoyer les evenements captures
|
||||
/// vers le thread d'envoi reseau. Le listener rdev tourne dans
|
||||
/// un thread dedie car il bloque (callback-based).
|
||||
pub fn start_recorder(
|
||||
config: Arc<Config>,
|
||||
state: Arc<AgentState>,
|
||||
) -> Receiver<CapturedEvent> {
|
||||
let (tx, rx) = bounded::<CapturedEvent>(100);
|
||||
|
||||
// Thread du listener rdev
|
||||
let listener_state = state.clone();
|
||||
let listener_tx = tx.clone();
|
||||
thread::Builder::new()
|
||||
.name("event-listener".to_string())
|
||||
.spawn(move || {
|
||||
event_listener_loop(listener_tx, listener_state);
|
||||
})
|
||||
.expect("Impossible de demarrer le thread listener");
|
||||
|
||||
// Thread de flush du buffer de texte
|
||||
let flush_tx = tx;
|
||||
let flush_state = state.clone();
|
||||
thread::Builder::new()
|
||||
.name("text-flush".to_string())
|
||||
.spawn(move || {
|
||||
text_flush_loop(flush_tx, flush_state);
|
||||
})
|
||||
.expect("Impossible de demarrer le thread flush");
|
||||
|
||||
// Thread d'envoi des evenements captures vers le serveur
|
||||
let send_state = state;
|
||||
let send_rx = rx.clone();
|
||||
let send_config = config;
|
||||
thread::Builder::new()
|
||||
.name("event-sender".to_string())
|
||||
.spawn(move || {
|
||||
event_sender_loop(send_rx, send_config, send_state);
|
||||
})
|
||||
.expect("Impossible de demarrer le thread sender");
|
||||
|
||||
rx
|
||||
}
|
||||
|
||||
/// Boucle du listener rdev — capture les evenements souris/clavier globaux.
|
||||
///
|
||||
/// rdev::listen est bloquant et appelle le callback pour chaque evenement.
|
||||
/// On filtre et transforme les evenements pertinents, puis on les envoie
|
||||
/// via le canal crossbeam.
|
||||
fn event_listener_loop(tx: Sender<CapturedEvent>, state: Arc<AgentState>) {
|
||||
let (screen_w, screen_h) = capture::screen_dimensions().unwrap_or((1920, 1080));
|
||||
let rec_state = std::sync::Mutex::new(RecorderState::new(screen_w, screen_h));
|
||||
|
||||
println!(
|
||||
"[RECORDER] Listener demarre (ecran {}x{})",
|
||||
screen_w, screen_h
|
||||
);
|
||||
|
||||
// rdev::listen prend un callback FnMut
|
||||
let callback = move |event: rdev::Event| {
|
||||
// Ne capturer que si l'enregistrement est actif
|
||||
if !state.recording.load(std::sync::atomic::Ordering::SeqCst) {
|
||||
return;
|
||||
}
|
||||
|
||||
let mut rs = match rec_state.lock() {
|
||||
Ok(s) => s,
|
||||
Err(_) => return,
|
||||
};
|
||||
|
||||
match event.event_type {
|
||||
rdev::EventType::ButtonPress(button) => {
|
||||
let btn_name = match button {
|
||||
rdev::Button::Left => "left",
|
||||
rdev::Button::Right => "right",
|
||||
rdev::Button::Middle => "middle",
|
||||
_ => return,
|
||||
};
|
||||
|
||||
// Obtenir la position de la souris depuis l'evenement
|
||||
// rdev ne fournit pas toujours les coordonnees dans ButtonPress,
|
||||
// on utilise la derniere position connue via MouseMove.
|
||||
// Pour simplifier, on capture la position courante du curseur.
|
||||
let (mx, my) = get_cursor_position();
|
||||
let (x_pct, y_pct) = rs.normalize(mx, my);
|
||||
|
||||
// Flush le buffer de texte avant le clic
|
||||
if !rs.text_buffer.is_empty() {
|
||||
let text_event = CapturedEvent::Text {
|
||||
text: rs.text_buffer.clone(),
|
||||
x_pct: rs.text_start_x,
|
||||
y_pct: rs.text_start_y,
|
||||
};
|
||||
let _ = tx.try_send(text_event);
|
||||
rs.text_buffer.clear();
|
||||
}
|
||||
|
||||
// Detection double-clic
|
||||
let now = Instant::now();
|
||||
let dt = now.duration_since(rs.last_click_time);
|
||||
let dx = (mx - rs.last_click_x).abs();
|
||||
let dy = (my - rs.last_click_y).abs();
|
||||
let dist = (dx * dx + dy * dy).sqrt();
|
||||
|
||||
if btn_name == "left"
|
||||
&& dt < Duration::from_millis(DOUBLE_CLICK_TIME_MS)
|
||||
&& dist < DOUBLE_CLICK_DIST_THRESHOLD
|
||||
{
|
||||
// Double-clic detecte
|
||||
let event = CapturedEvent::DoubleClick {
|
||||
x_pct,
|
||||
y_pct,
|
||||
window_title: get_active_window_title(),
|
||||
};
|
||||
let _ = tx.try_send(event);
|
||||
} else {
|
||||
// Clic simple
|
||||
let event = CapturedEvent::Click {
|
||||
x_pct,
|
||||
y_pct,
|
||||
button: btn_name.to_string(),
|
||||
window_title: get_active_window_title(),
|
||||
};
|
||||
let _ = tx.try_send(event);
|
||||
|
||||
// Incrementer le compteur d'actions
|
||||
state.increment_actions();
|
||||
}
|
||||
|
||||
rs.last_click_time = now;
|
||||
rs.last_click_x = mx;
|
||||
rs.last_click_y = my;
|
||||
}
|
||||
|
||||
rdev::EventType::KeyPress(key) => {
|
||||
// Mettre a jour les modifieurs
|
||||
match key {
|
||||
rdev::Key::ControlLeft | rdev::Key::ControlRight => {
|
||||
rs.ctrl_held = true;
|
||||
return;
|
||||
}
|
||||
rdev::Key::Alt | rdev::Key::AltGr => {
|
||||
rs.alt_held = true;
|
||||
return;
|
||||
}
|
||||
rdev::Key::ShiftLeft | rdev::Key::ShiftRight => {
|
||||
rs.shift_held = true;
|
||||
return;
|
||||
}
|
||||
rdev::Key::MetaLeft | rdev::Key::MetaRight => {
|
||||
rs.meta_held = true;
|
||||
return;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Si un modifieur non-shift est enfonce, c'est un combo
|
||||
if rs.any_modifier_held() {
|
||||
let mut keys = Vec::new();
|
||||
if rs.ctrl_held {
|
||||
keys.push("ctrl".to_string());
|
||||
}
|
||||
if rs.alt_held {
|
||||
keys.push("alt".to_string());
|
||||
}
|
||||
if rs.meta_held {
|
||||
keys.push("win".to_string());
|
||||
}
|
||||
if rs.shift_held {
|
||||
keys.push("shift".to_string());
|
||||
}
|
||||
keys.push(rdev_key_to_string(key));
|
||||
|
||||
// Flush le buffer avant le combo
|
||||
if !rs.text_buffer.is_empty() {
|
||||
let text_event = CapturedEvent::Text {
|
||||
text: rs.text_buffer.clone(),
|
||||
x_pct: rs.text_start_x,
|
||||
y_pct: rs.text_start_y,
|
||||
};
|
||||
let _ = tx.try_send(text_event);
|
||||
rs.text_buffer.clear();
|
||||
}
|
||||
|
||||
let event = CapturedEvent::KeyCombo { keys };
|
||||
let _ = tx.try_send(event);
|
||||
state.increment_actions();
|
||||
} else {
|
||||
// Touche de saisie normale — ajouter au buffer
|
||||
if let Some(c) = rdev_key_to_char(key) {
|
||||
if rs.text_buffer.is_empty() {
|
||||
let (mx, my) = get_cursor_position();
|
||||
let (x, y) = rs.normalize(mx, my);
|
||||
rs.text_start_x = x;
|
||||
rs.text_start_y = y;
|
||||
}
|
||||
rs.text_buffer.push(c);
|
||||
rs.last_keystroke = Instant::now();
|
||||
} else {
|
||||
// Touche speciale non-texte (Enter, Tab, etc.)
|
||||
// Flush le buffer et envoyer comme combo simple
|
||||
if !rs.text_buffer.is_empty() {
|
||||
let text_event = CapturedEvent::Text {
|
||||
text: rs.text_buffer.clone(),
|
||||
x_pct: rs.text_start_x,
|
||||
y_pct: rs.text_start_y,
|
||||
};
|
||||
let _ = tx.try_send(text_event);
|
||||
rs.text_buffer.clear();
|
||||
}
|
||||
|
||||
let key_name = rdev_key_to_string(key);
|
||||
let event = CapturedEvent::KeyCombo {
|
||||
keys: vec![key_name],
|
||||
};
|
||||
let _ = tx.try_send(event);
|
||||
state.increment_actions();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rdev::EventType::KeyRelease(key) => {
|
||||
// Mettre a jour les modifieurs
|
||||
match key {
|
||||
rdev::Key::ControlLeft | rdev::Key::ControlRight => rs.ctrl_held = false,
|
||||
rdev::Key::Alt | rdev::Key::AltGr => rs.alt_held = false,
|
||||
rdev::Key::ShiftLeft | rdev::Key::ShiftRight => rs.shift_held = false,
|
||||
rdev::Key::MetaLeft | rdev::Key::MetaRight => rs.meta_held = false,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
rdev::EventType::Wheel { delta_x: _, delta_y } => {
|
||||
let (mx, my) = get_cursor_position();
|
||||
let (x_pct, y_pct) = rs.normalize(mx, my);
|
||||
let delta = if delta_y > 0 { 3 } else { -3 };
|
||||
|
||||
let event = CapturedEvent::Scroll {
|
||||
delta,
|
||||
x_pct,
|
||||
y_pct,
|
||||
};
|
||||
let _ = tx.try_send(event);
|
||||
state.increment_actions();
|
||||
}
|
||||
|
||||
_ => {
|
||||
// MouseMove et autres evenements ignores
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// rdev::listen est bloquant — il ne retourne qu'en cas d'erreur
|
||||
if let Err(e) = rdev::listen(callback) {
|
||||
eprintln!("[RECORDER] Erreur fatale du listener rdev : {:?}", e);
|
||||
}
|
||||
}
|
||||
|
||||
/// Boucle de flush periodique du buffer de texte.
|
||||
///
|
||||
/// Toutes les 100ms, verifie si le buffer de texte est non-vide
|
||||
/// et si le delai de flush (500ms) est depasse. Si oui, flush le buffer
|
||||
/// en envoyant un evenement Text.
|
||||
fn text_flush_loop(_tx: Sender<CapturedEvent>, state: Arc<AgentState>) {
|
||||
// Note: le flush est gere dans le callback rdev via le Mutex.
|
||||
// Cette boucle est un filet de securite pour les cas ou le buffer
|
||||
// resterait non-flush (timeout sans nouveau evenement).
|
||||
// L'implementation complete necessiterait un acces partage au RecorderState.
|
||||
// Pour l'instant, le flush est declenche par le prochain evenement (clic, combo).
|
||||
|
||||
while state.is_running() {
|
||||
thread::sleep(Duration::from_millis(TEXT_FLUSH_DELAY_MS));
|
||||
}
|
||||
}
|
||||
|
||||
/// Boucle d'envoi des evenements captures vers le serveur streaming.
|
||||
///
|
||||
/// Lit les evenements du canal crossbeam et les envoie au serveur
|
||||
/// via HTTP POST (format compatible avec le Python streamer).
|
||||
fn event_sender_loop(
|
||||
rx: Receiver<CapturedEvent>,
|
||||
config: Arc<Config>,
|
||||
state: Arc<AgentState>,
|
||||
) {
|
||||
let client = reqwest::blocking::Client::new();
|
||||
|
||||
println!("[RECORDER] Thread d'envoi d'evenements demarre");
|
||||
|
||||
loop {
|
||||
// Bloquer jusqu'au prochain evenement (ou timeout)
|
||||
match rx.recv_timeout(Duration::from_secs(1)) {
|
||||
Ok(event) => {
|
||||
if !state.recording.load(std::sync::atomic::Ordering::SeqCst) {
|
||||
continue; // Enregistrement arrete entre-temps
|
||||
}
|
||||
|
||||
let session_name = state.current_recording_name();
|
||||
send_event_to_server(&client, &config, &event, &session_name);
|
||||
}
|
||||
Err(crossbeam_channel::RecvTimeoutError::Timeout) => {
|
||||
if !state.is_running() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Err(crossbeam_channel::RecvTimeoutError::Disconnected) => {
|
||||
println!("[RECORDER] Canal deconnecte — arret du sender");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Envoie un evenement capture au serveur streaming.
|
||||
///
|
||||
/// Inclut la resolution de l'ecran dans chaque event pour que le serveur
|
||||
/// puisse construire des ScreenStates avec la bonne resolution d'apprentissage
|
||||
/// (au lieu du fallback 1920x1080).
|
||||
fn send_event_to_server(
|
||||
client: &reqwest::blocking::Client,
|
||||
config: &Config,
|
||||
event: &CapturedEvent,
|
||||
session_name: &str,
|
||||
) {
|
||||
let url = format!("{}/traces/stream/event", config.server_url);
|
||||
let timestamp = chrono::Utc::now().to_rfc3339();
|
||||
let (screen_w, screen_h) = capture::screen_dimensions().unwrap_or((1920, 1080));
|
||||
|
||||
let payload = match event {
|
||||
CapturedEvent::Click {
|
||||
x_pct,
|
||||
y_pct,
|
||||
button,
|
||||
window_title,
|
||||
} => {
|
||||
serde_json::json!({
|
||||
"type": "click",
|
||||
"x_pct": x_pct,
|
||||
"y_pct": y_pct,
|
||||
"button": button,
|
||||
"window_title": window_title,
|
||||
"session_name": session_name,
|
||||
"machine_id": config.machine_id,
|
||||
"timestamp": timestamp,
|
||||
"screen_resolution": [screen_w, screen_h],
|
||||
})
|
||||
}
|
||||
CapturedEvent::DoubleClick {
|
||||
x_pct,
|
||||
y_pct,
|
||||
window_title,
|
||||
} => {
|
||||
serde_json::json!({
|
||||
"type": "click",
|
||||
"x_pct": x_pct,
|
||||
"y_pct": y_pct,
|
||||
"button": "double",
|
||||
"window_title": window_title,
|
||||
"session_name": session_name,
|
||||
"machine_id": config.machine_id,
|
||||
"timestamp": timestamp,
|
||||
"screen_resolution": [screen_w, screen_h],
|
||||
})
|
||||
}
|
||||
CapturedEvent::Text {
|
||||
text,
|
||||
x_pct,
|
||||
y_pct,
|
||||
} => {
|
||||
serde_json::json!({
|
||||
"type": "type",
|
||||
"text": text,
|
||||
"x_pct": x_pct,
|
||||
"y_pct": y_pct,
|
||||
"session_name": session_name,
|
||||
"machine_id": config.machine_id,
|
||||
"timestamp": timestamp,
|
||||
"screen_resolution": [screen_w, screen_h],
|
||||
})
|
||||
}
|
||||
CapturedEvent::KeyCombo { keys } => {
|
||||
serde_json::json!({
|
||||
"type": "key_combo",
|
||||
"keys": keys,
|
||||
"session_name": session_name,
|
||||
"machine_id": config.machine_id,
|
||||
"timestamp": timestamp,
|
||||
"screen_resolution": [screen_w, screen_h],
|
||||
})
|
||||
}
|
||||
CapturedEvent::Scroll {
|
||||
delta,
|
||||
x_pct,
|
||||
y_pct,
|
||||
} => {
|
||||
serde_json::json!({
|
||||
"type": "scroll",
|
||||
"delta": delta,
|
||||
"x_pct": x_pct,
|
||||
"y_pct": y_pct,
|
||||
"session_name": session_name,
|
||||
"machine_id": config.machine_id,
|
||||
"timestamp": timestamp,
|
||||
"screen_resolution": [screen_w, screen_h],
|
||||
})
|
||||
}
|
||||
};
|
||||
|
||||
// Envoi non-bloquant (on ne veut pas ralentir la capture)
|
||||
match client
|
||||
.post(&url)
|
||||
.json(&payload)
|
||||
.timeout(Duration::from_secs(5))
|
||||
.send()
|
||||
{
|
||||
Ok(resp) => {
|
||||
if !resp.status().is_success() {
|
||||
eprintln!(
|
||||
"[RECORDER] Envoi evenement echoue : HTTP {}",
|
||||
resp.status()
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("[RECORDER] Erreur reseau : {}", e);
|
||||
}
|
||||
}
|
||||
|
||||
// Capturer un screenshot pour les clics (dual: full + crop)
|
||||
if matches!(
|
||||
event,
|
||||
CapturedEvent::Click { .. } | CapturedEvent::DoubleClick { .. }
|
||||
) {
|
||||
if let Some(img) = capture::capture_screenshot() {
|
||||
let jpeg = capture::screenshot_to_jpeg_bytes(&img, 80);
|
||||
if !jpeg.is_empty() {
|
||||
let shot_id = format!("rec_{}", chrono::Utc::now().timestamp_millis());
|
||||
let _ = crate::network::send_heartbeat(
|
||||
&reqwest::blocking::Client::new(),
|
||||
&crate::config::Config::from_env(),
|
||||
&jpeg,
|
||||
session_name,
|
||||
);
|
||||
let _ = shot_id; // utilise implicitement via send_heartbeat
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --- Fonctions utilitaires ---
|
||||
|
||||
/// Obtient la position actuelle du curseur souris.
|
||||
fn get_cursor_position() -> (f64, f64) {
|
||||
#[cfg(windows)]
|
||||
{
|
||||
use windows_sys::Win32::UI::WindowsAndMessaging::GetCursorPos;
|
||||
use windows_sys::Win32::Foundation::POINT;
|
||||
|
||||
unsafe {
|
||||
let mut point: POINT = std::mem::zeroed();
|
||||
if GetCursorPos(&mut point) != 0 {
|
||||
return (point.x as f64, point.y as f64);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback : position inconnue
|
||||
(0.0, 0.0)
|
||||
}
|
||||
|
||||
/// Obtient le titre de la fenetre active.
|
||||
fn get_active_window_title() -> String {
|
||||
#[cfg(windows)]
|
||||
{
|
||||
use windows_sys::Win32::UI::WindowsAndMessaging::{
|
||||
GetForegroundWindow, GetWindowTextW,
|
||||
};
|
||||
|
||||
unsafe {
|
||||
let hwnd = GetForegroundWindow();
|
||||
if !hwnd.is_null() {
|
||||
let mut buf = [0u16; 256];
|
||||
let len = GetWindowTextW(hwnd, buf.as_mut_ptr(), buf.len() as i32);
|
||||
if len > 0 {
|
||||
return String::from_utf16_lossy(&buf[..len as usize]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
"Inconnu".to_string()
|
||||
}
|
||||
|
||||
/// Convertit une touche rdev en caractere texte (pour le buffer de saisie).
|
||||
/// Retourne None pour les touches speciales (Enter, Tab, etc.).
|
||||
fn rdev_key_to_char(key: rdev::Key) -> Option<char> {
|
||||
match key {
|
||||
rdev::Key::KeyA => Some('a'),
|
||||
rdev::Key::KeyB => Some('b'),
|
||||
rdev::Key::KeyC => Some('c'),
|
||||
rdev::Key::KeyD => Some('d'),
|
||||
rdev::Key::KeyE => Some('e'),
|
||||
rdev::Key::KeyF => Some('f'),
|
||||
rdev::Key::KeyG => Some('g'),
|
||||
rdev::Key::KeyH => Some('h'),
|
||||
rdev::Key::KeyI => Some('i'),
|
||||
rdev::Key::KeyJ => Some('j'),
|
||||
rdev::Key::KeyK => Some('k'),
|
||||
rdev::Key::KeyL => Some('l'),
|
||||
rdev::Key::KeyM => Some('m'),
|
||||
rdev::Key::KeyN => Some('n'),
|
||||
rdev::Key::KeyO => Some('o'),
|
||||
rdev::Key::KeyP => Some('p'),
|
||||
rdev::Key::KeyQ => Some('q'),
|
||||
rdev::Key::KeyR => Some('r'),
|
||||
rdev::Key::KeyS => Some('s'),
|
||||
rdev::Key::KeyT => Some('t'),
|
||||
rdev::Key::KeyU => Some('u'),
|
||||
rdev::Key::KeyV => Some('v'),
|
||||
rdev::Key::KeyW => Some('w'),
|
||||
rdev::Key::KeyX => Some('x'),
|
||||
rdev::Key::KeyY => Some('y'),
|
||||
rdev::Key::KeyZ => Some('z'),
|
||||
rdev::Key::Num0 => Some('0'),
|
||||
rdev::Key::Num1 => Some('1'),
|
||||
rdev::Key::Num2 => Some('2'),
|
||||
rdev::Key::Num3 => Some('3'),
|
||||
rdev::Key::Num4 => Some('4'),
|
||||
rdev::Key::Num5 => Some('5'),
|
||||
rdev::Key::Num6 => Some('6'),
|
||||
rdev::Key::Num7 => Some('7'),
|
||||
rdev::Key::Num8 => Some('8'),
|
||||
rdev::Key::Num9 => Some('9'),
|
||||
rdev::Key::Space => Some(' '),
|
||||
rdev::Key::Minus => Some('-'),
|
||||
rdev::Key::Equal => Some('='),
|
||||
rdev::Key::LeftBracket => Some('['),
|
||||
rdev::Key::RightBracket => Some(']'),
|
||||
rdev::Key::SemiColon => Some(';'),
|
||||
rdev::Key::Quote => Some('\''),
|
||||
rdev::Key::Comma => Some(','),
|
||||
rdev::Key::Dot => Some('.'),
|
||||
rdev::Key::Slash => Some('/'),
|
||||
rdev::Key::BackSlash => Some('\\'),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Convertit une touche rdev en nom de touche (pour les combos).
|
||||
fn rdev_key_to_string(key: rdev::Key) -> String {
|
||||
match key {
|
||||
rdev::Key::Return => "enter".to_string(),
|
||||
rdev::Key::Tab => "tab".to_string(),
|
||||
rdev::Key::Escape => "escape".to_string(),
|
||||
rdev::Key::Backspace => "backspace".to_string(),
|
||||
rdev::Key::Delete => "delete".to_string(),
|
||||
rdev::Key::Space => "space".to_string(),
|
||||
rdev::Key::UpArrow => "up".to_string(),
|
||||
rdev::Key::DownArrow => "down".to_string(),
|
||||
rdev::Key::LeftArrow => "left".to_string(),
|
||||
rdev::Key::RightArrow => "right".to_string(),
|
||||
rdev::Key::Home => "home".to_string(),
|
||||
rdev::Key::End => "end".to_string(),
|
||||
rdev::Key::PageUp => "page_up".to_string(),
|
||||
rdev::Key::PageDown => "page_down".to_string(),
|
||||
rdev::Key::F1 => "f1".to_string(),
|
||||
rdev::Key::F2 => "f2".to_string(),
|
||||
rdev::Key::F3 => "f3".to_string(),
|
||||
rdev::Key::F4 => "f4".to_string(),
|
||||
rdev::Key::F5 => "f5".to_string(),
|
||||
rdev::Key::F6 => "f6".to_string(),
|
||||
rdev::Key::F7 => "f7".to_string(),
|
||||
rdev::Key::F8 => "f8".to_string(),
|
||||
rdev::Key::F9 => "f9".to_string(),
|
||||
rdev::Key::F10 => "f10".to_string(),
|
||||
rdev::Key::F11 => "f11".to_string(),
|
||||
rdev::Key::F12 => "f12".to_string(),
|
||||
rdev::Key::CapsLock => "caps_lock".to_string(),
|
||||
rdev::Key::Insert => "insert".to_string(),
|
||||
rdev::Key::PrintScreen => "print_screen".to_string(),
|
||||
// Pour les lettres et chiffres, reutiliser rdev_key_to_char
|
||||
other => {
|
||||
if let Some(c) = rdev_key_to_char(other) {
|
||||
c.to_string()
|
||||
} else {
|
||||
format!("{:?}", other).to_lowercase()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,125 +0,0 @@
|
||||
//! Boucle de polling replay.
|
||||
//!
|
||||
//! Poll le serveur toutes les secondes pour recuperer les actions a executer.
|
||||
//! Quand une action est recue, l'execute via executor et rapporte le resultat.
|
||||
//! Gere le backoff exponentiel en cas d'indisponibilite du serveur.
|
||||
//!
|
||||
//! Reproduit le comportement de _replay_poll_loop dans agent_v1/main.py.
|
||||
|
||||
use crate::capture;
|
||||
use crate::config::Config;
|
||||
use crate::executor;
|
||||
use crate::network;
|
||||
use crate::notifications;
|
||||
use crate::state::AgentState;
|
||||
use reqwest::blocking::Client;
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
|
||||
/// Boucle de polling replay (tourne dans un thread dedie).
|
||||
///
|
||||
/// - Poll GET /replay/next toutes les secondes
|
||||
/// - Execute l'action via executor
|
||||
/// - Capture un screenshot post-action
|
||||
/// - Rapporte le resultat via POST /replay/result
|
||||
/// - Backoff exponentiel si le serveur est indisponible
|
||||
pub fn replay_poll_loop(config: &Config, state: &AgentState) {
|
||||
let client = Client::new();
|
||||
let mut poll_count: u64 = 0;
|
||||
let backoff = config.replay_poll_interval_s;
|
||||
let _backoff_max = 30.0_f64;
|
||||
let _backoff_factor = 1.5_f64;
|
||||
let mut replay_active = false;
|
||||
|
||||
println!(
|
||||
"[REPLAY] Boucle replay demarree — poll toutes les {:.0}s sur {}",
|
||||
config.replay_poll_interval_s, config.server_url
|
||||
);
|
||||
|
||||
while state.is_running() {
|
||||
// Verifier l'arret d'urgence
|
||||
if state
|
||||
.emergency_stop
|
||||
.load(std::sync::atomic::Ordering::SeqCst)
|
||||
{
|
||||
if replay_active {
|
||||
println!("[REPLAY] ARRET D'URGENCE — replay interrompu");
|
||||
replay_active = false;
|
||||
state.set_replay_active(false);
|
||||
}
|
||||
thread::sleep(Duration::from_secs(1));
|
||||
continue;
|
||||
}
|
||||
|
||||
poll_count += 1;
|
||||
|
||||
// Log periodique toutes les 60s pour confirmer que la boucle tourne
|
||||
let polls_per_minute = (60.0 / backoff).ceil() as u64;
|
||||
if polls_per_minute > 0 && poll_count % polls_per_minute == 0 {
|
||||
println!(
|
||||
"[REPLAY] Poll #{} — session={} — serveur={}",
|
||||
poll_count,
|
||||
config.agent_session_id(),
|
||||
config.server_url,
|
||||
);
|
||||
}
|
||||
|
||||
match network::poll_next_action(&client, config) {
|
||||
Some(action) => {
|
||||
if !replay_active {
|
||||
replay_active = true;
|
||||
state.set_replay_active(true);
|
||||
notifications::replay_started("workflow");
|
||||
println!("[REPLAY] Replay demarre");
|
||||
}
|
||||
|
||||
let action_type = action.action_type.clone();
|
||||
let action_id = action.action_id.clone();
|
||||
println!(
|
||||
"\n>>> REPLAY ACTION RECUE : {} (id={})",
|
||||
action_type, action_id
|
||||
);
|
||||
|
||||
// Obtenir les dimensions de l'ecran
|
||||
let (sw, sh) = capture::screen_dimensions().unwrap_or((1920, 1080));
|
||||
|
||||
// Executer l'action (avec config pour la resolution visuelle)
|
||||
println!(">>> Execution de l'action {}...", action_type);
|
||||
let mut result = executor::execute_action(&action, sw, sh, config);
|
||||
println!(
|
||||
">>> Resultat execution : success={}, error={:?}",
|
||||
result.success, result.error
|
||||
);
|
||||
|
||||
// Capture screenshot post-action (apres 500ms)
|
||||
thread::sleep(Duration::from_millis(500));
|
||||
if let Some(img) = capture::capture_screenshot() {
|
||||
let b64 = capture::screenshot_to_jpeg_base64(&img, 60);
|
||||
if !b64.is_empty() {
|
||||
result.screenshot = Some(b64);
|
||||
}
|
||||
}
|
||||
|
||||
// Rapporter le resultat au serveur (TOUJOURS, meme en erreur)
|
||||
network::report_result(&client, config, &result);
|
||||
|
||||
// Poll plus rapidement pour enchainer les actions
|
||||
thread::sleep(Duration::from_millis(200));
|
||||
continue;
|
||||
}
|
||||
None => {
|
||||
if replay_active {
|
||||
println!("[REPLAY] Replay termine — retour en mode capture");
|
||||
replay_active = false;
|
||||
state.set_replay_active(false);
|
||||
notifications::replay_finished(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let sleep_duration = Duration::from_secs_f64(backoff);
|
||||
thread::sleep(sleep_duration);
|
||||
}
|
||||
|
||||
println!("[REPLAY] Boucle arretee.");
|
||||
}
|
||||
@@ -1,402 +0,0 @@
|
||||
//! Mini serveur HTTP pour les captures d'écran à la demande.
|
||||
//!
|
||||
//! Écoute sur le port 5006 (configurable via RPA_CAPTURE_PORT).
|
||||
//! Endpoints :
|
||||
//! GET /capture -> screenshot frais en JSON {image, width, height, format}
|
||||
//! GET /health -> {"status": "ok"}
|
||||
//! POST /file-action -> opérations fichiers (list, create, move, copy, sort)
|
||||
//!
|
||||
//! Reproduit le comportement de agent_v1/ui/capture_server.py.
|
||||
|
||||
use crate::capture;
|
||||
use serde_json::json;
|
||||
use tiny_http::{Header, Method, Response, Server};
|
||||
|
||||
/// Démarre le serveur de capture sur le port donné (bloquant).
|
||||
///
|
||||
/// Cette fonction tourne dans un thread dédié et ne retourne jamais.
|
||||
pub fn start_capture_server(port: u16) {
|
||||
let addr = format!("0.0.0.0:{}", port);
|
||||
let server = match Server::http(&addr) {
|
||||
Ok(s) => s,
|
||||
Err(e) => {
|
||||
eprintln!("[CAPTURE] Impossible de demarrer le serveur sur {} : {}", addr, e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
println!("[CAPTURE] Serveur de capture demarre sur le port {}", port);
|
||||
|
||||
for request in server.incoming_requests() {
|
||||
let url = request.url().to_string();
|
||||
let method = request.method().clone();
|
||||
|
||||
match (method, url.as_str()) {
|
||||
(Method::Get, "/capture") => handle_capture(request),
|
||||
(Method::Get, "/health") => handle_health(request),
|
||||
(Method::Post, "/file-action") => handle_file_action(request),
|
||||
(Method::Options, _) => handle_options(request),
|
||||
_ => {
|
||||
let body = json!({"error": "not found"}).to_string();
|
||||
let _ = send_json_response(request, 404, &body);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// GET /capture — Capture un screenshot frais et le renvoie en JSON base64.
|
||||
fn handle_capture(request: tiny_http::Request) {
|
||||
let start = std::time::Instant::now();
|
||||
|
||||
match capture::capture_screenshot() {
|
||||
Some(img) => {
|
||||
let width = img.width();
|
||||
let height = img.height();
|
||||
let b64 = capture::screenshot_to_jpeg_base64(&img, 80);
|
||||
let elapsed_ms = start.elapsed().as_millis();
|
||||
|
||||
let body = json!({
|
||||
"image": b64,
|
||||
"width": width,
|
||||
"height": height,
|
||||
"format": "jpeg",
|
||||
"source": "rust_agent",
|
||||
"capture_ms": elapsed_ms,
|
||||
})
|
||||
.to_string();
|
||||
|
||||
let _ = send_json_response(request, 200, &body);
|
||||
}
|
||||
None => {
|
||||
let body = json!({"error": "Capture echouee"}).to_string();
|
||||
let _ = send_json_response(request, 500, &body);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// GET /health — Vérification de santé.
|
||||
fn handle_health(request: tiny_http::Request) {
|
||||
let body = json!({
|
||||
"status": "ok",
|
||||
"agent": "rust",
|
||||
"version": crate::config::AGENT_VERSION,
|
||||
})
|
||||
.to_string();
|
||||
let _ = send_json_response(request, 200, &body);
|
||||
}
|
||||
|
||||
/// POST /file-action — Opérations fichiers sur la machine locale.
|
||||
///
|
||||
/// Body JSON attendu : {"action": "file_list_dir", "params": {"path": "C:\\..."}}
|
||||
/// Actions supportées : file_list_dir, file_create_dir, file_move, file_copy, file_sort_by_ext
|
||||
fn handle_file_action(mut request: tiny_http::Request) {
|
||||
// Lire le body
|
||||
let mut body_str = String::new();
|
||||
if let Err(e) = request.as_reader().read_to_string(&mut body_str) {
|
||||
let resp = json!({"error": format!("Erreur lecture body : {}", e)}).to_string();
|
||||
let _ = send_json_response(request, 400, &resp);
|
||||
return;
|
||||
}
|
||||
|
||||
// Parser le JSON
|
||||
let data: serde_json::Value = match serde_json::from_str(&body_str) {
|
||||
Ok(v) => v,
|
||||
Err(_) => {
|
||||
let resp = json!({"error": "JSON invalide"}).to_string();
|
||||
let _ = send_json_response(request, 400, &resp);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let action = data.get("action").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let params = data.get("params").cloned().unwrap_or(json!({}));
|
||||
|
||||
if action.is_empty() {
|
||||
let resp = json!({"error": "Parametre 'action' requis"}).to_string();
|
||||
let _ = send_json_response(request, 400, &resp);
|
||||
return;
|
||||
}
|
||||
|
||||
let result = execute_file_action(action, ¶ms);
|
||||
let code = if result.get("error").is_some() { 500 } else { 200 };
|
||||
let _ = send_json_response(request, code, &result.to_string());
|
||||
}
|
||||
|
||||
/// OPTIONS — Réponse CORS preflight.
|
||||
fn handle_options(request: tiny_http::Request) {
|
||||
let response = Response::empty(200)
|
||||
.with_header(cors_origin())
|
||||
.with_header(cors_methods())
|
||||
.with_header(cors_headers());
|
||||
let _ = request.respond(response);
|
||||
}
|
||||
|
||||
/// Exécute une action fichier.
|
||||
fn execute_file_action(action: &str, params: &serde_json::Value) -> serde_json::Value {
|
||||
match action {
|
||||
"file_list_dir" => {
|
||||
let path = params.get("path").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let pattern = params
|
||||
.get("pattern")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("*");
|
||||
|
||||
if path.is_empty() {
|
||||
return json!({"error": "Parametre 'path' requis"});
|
||||
}
|
||||
if !is_safe_path(path) {
|
||||
return json!({"error": format!("Chemin non autorise : {}", path)});
|
||||
}
|
||||
|
||||
match std::fs::read_dir(path) {
|
||||
Ok(entries) => {
|
||||
let mut files = Vec::new();
|
||||
let mut extensions: std::collections::HashMap<String, u32> =
|
||||
std::collections::HashMap::new();
|
||||
|
||||
for entry in entries.flatten() {
|
||||
if let Ok(metadata) = entry.metadata() {
|
||||
if metadata.is_file() {
|
||||
let name = entry.file_name().to_string_lossy().to_string();
|
||||
|
||||
// Filtrage par pattern (simple glob avec *)
|
||||
if pattern != "*" && !simple_glob_match(pattern, &name) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let ext = std::path::Path::new(&name)
|
||||
.extension()
|
||||
.map(|e| e.to_string_lossy().to_lowercase())
|
||||
.unwrap_or_else(|| "sans_extension".to_string());
|
||||
|
||||
files.push(json!({
|
||||
"name": name,
|
||||
"extension": ext,
|
||||
"size": metadata.len(),
|
||||
"path": entry.path().to_string_lossy(),
|
||||
}));
|
||||
|
||||
*extensions.entry(ext).or_insert(0) += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
json!({
|
||||
"files": files,
|
||||
"count": files.len(),
|
||||
"extensions": extensions,
|
||||
"path": path,
|
||||
})
|
||||
}
|
||||
Err(e) => json!({"error": format!("Erreur lecture dossier : {}", e)}),
|
||||
}
|
||||
}
|
||||
|
||||
"file_create_dir" => {
|
||||
let path = params.get("path").and_then(|v| v.as_str()).unwrap_or("");
|
||||
if path.is_empty() {
|
||||
return json!({"error": "Parametre 'path' requis"});
|
||||
}
|
||||
if !is_safe_path(path) {
|
||||
return json!({"error": format!("Chemin non autorise : {}", path)});
|
||||
}
|
||||
|
||||
let existed = std::path::Path::new(path).exists();
|
||||
match std::fs::create_dir_all(path) {
|
||||
Ok(_) => json!({
|
||||
"created": !existed,
|
||||
"path": path,
|
||||
"already_existed": existed,
|
||||
}),
|
||||
Err(e) => json!({"error": format!("Erreur creation dossier : {}", e)}),
|
||||
}
|
||||
}
|
||||
|
||||
"file_move" => {
|
||||
let src = params.get("source").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let dst = params
|
||||
.get("destination")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("");
|
||||
|
||||
if src.is_empty() || dst.is_empty() {
|
||||
return json!({"error": "Parametres 'source' et 'destination' requis"});
|
||||
}
|
||||
if !is_safe_path(src) || !is_safe_path(dst) {
|
||||
return json!({"error": "Chemin non autorise"});
|
||||
}
|
||||
|
||||
// Créer le dossier parent de destination
|
||||
if let Some(parent) = std::path::Path::new(dst).parent() {
|
||||
let _ = std::fs::create_dir_all(parent);
|
||||
}
|
||||
|
||||
match std::fs::rename(src, dst) {
|
||||
Ok(_) => json!({"moved": true, "source": src, "destination": dst}),
|
||||
Err(e) => json!({"error": format!("Erreur deplacement : {}", e)}),
|
||||
}
|
||||
}
|
||||
|
||||
"file_copy" => {
|
||||
let src = params.get("source").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let dst = params
|
||||
.get("destination")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("");
|
||||
|
||||
if src.is_empty() || dst.is_empty() {
|
||||
return json!({"error": "Parametres 'source' et 'destination' requis"});
|
||||
}
|
||||
if !is_safe_path(src) || !is_safe_path(dst) {
|
||||
return json!({"error": "Chemin non autorise"});
|
||||
}
|
||||
|
||||
if let Some(parent) = std::path::Path::new(dst).parent() {
|
||||
let _ = std::fs::create_dir_all(parent);
|
||||
}
|
||||
|
||||
match std::fs::copy(src, dst) {
|
||||
Ok(_) => json!({"copied": true, "source": src, "destination": dst}),
|
||||
Err(e) => json!({"error": format!("Erreur copie : {}", e)}),
|
||||
}
|
||||
}
|
||||
|
||||
"file_sort_by_ext" => {
|
||||
let source_dir = params
|
||||
.get("source_dir")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("");
|
||||
let create_subdirs = params
|
||||
.get("create_subdirs")
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(true);
|
||||
|
||||
if source_dir.is_empty() {
|
||||
return json!({"error": "Parametre 'source_dir' requis"});
|
||||
}
|
||||
if !is_safe_path(source_dir) {
|
||||
return json!({"error": format!("Chemin non autorise : {}", source_dir)});
|
||||
}
|
||||
|
||||
let mut moved = Vec::new();
|
||||
let mut extensions: std::collections::HashMap<String, u32> =
|
||||
std::collections::HashMap::new();
|
||||
|
||||
if let Ok(entries) = std::fs::read_dir(source_dir) {
|
||||
for entry in entries.flatten() {
|
||||
if let Ok(metadata) = entry.metadata() {
|
||||
if metadata.is_file() {
|
||||
let name = entry.file_name().to_string_lossy().to_string();
|
||||
let ext = std::path::Path::new(&name)
|
||||
.extension()
|
||||
.map(|e| e.to_string_lossy().to_lowercase())
|
||||
.unwrap_or_else(|| "sans_extension".to_string());
|
||||
|
||||
let target_dir =
|
||||
std::path::Path::new(source_dir).join(&ext);
|
||||
|
||||
if create_subdirs {
|
||||
let _ = std::fs::create_dir_all(&target_dir);
|
||||
} else if !target_dir.exists() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let dest = target_dir.join(&name);
|
||||
if let Err(e) = std::fs::rename(entry.path(), &dest) {
|
||||
eprintln!("[FILE] Erreur deplacement {} : {}", name, e);
|
||||
continue;
|
||||
}
|
||||
|
||||
moved.push(json!({
|
||||
"file": name,
|
||||
"to": ext,
|
||||
"destination": dest.to_string_lossy(),
|
||||
}));
|
||||
*extensions.entry(ext).or_insert(0) += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
json!({
|
||||
"moved": moved,
|
||||
"count": moved.len(),
|
||||
"extensions": extensions,
|
||||
"source_dir": source_dir,
|
||||
})
|
||||
}
|
||||
|
||||
_ => json!({"error": format!("Action fichier inconnue : {}", action)}),
|
||||
}
|
||||
}
|
||||
|
||||
/// Vérifie qu'un chemin est dans une zone autorisée (sécurité anti-traversal).
|
||||
///
|
||||
/// Sur Windows : C:\Users, D:\, E:\
|
||||
/// Sur Linux : /home, /tmp (pour les tests)
|
||||
fn is_safe_path(path_str: &str) -> bool {
|
||||
if path_str.is_empty() {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Normaliser le chemin
|
||||
let normalized = std::path::Path::new(path_str)
|
||||
.to_string_lossy()
|
||||
.to_uppercase();
|
||||
|
||||
if cfg!(target_os = "windows") {
|
||||
let allowed = ["C:\\USERS", "D:\\", "E:\\"];
|
||||
allowed.iter().any(|root| normalized.starts_with(root))
|
||||
} else {
|
||||
// Sur Linux (pour les tests)
|
||||
let allowed = ["/HOME", "/TMP"];
|
||||
allowed.iter().any(|root| normalized.starts_with(root))
|
||||
}
|
||||
}
|
||||
|
||||
/// Matching glob simple (supporte * comme wildcard).
|
||||
fn simple_glob_match(pattern: &str, name: &str) -> bool {
|
||||
if pattern == "*" {
|
||||
return true;
|
||||
}
|
||||
// Pattern simple : *.ext
|
||||
if let Some(ext) = pattern.strip_prefix("*.") {
|
||||
return name.to_lowercase().ends_with(&format!(".{}", ext.to_lowercase()));
|
||||
}
|
||||
// Sinon, comparaison exacte
|
||||
name.to_lowercase() == pattern.to_lowercase()
|
||||
}
|
||||
|
||||
// --- Headers CORS ---
|
||||
|
||||
fn cors_origin() -> Header {
|
||||
Header::from_bytes("Access-Control-Allow-Origin", "*").unwrap()
|
||||
}
|
||||
|
||||
fn cors_methods() -> Header {
|
||||
Header::from_bytes("Access-Control-Allow-Methods", "GET, POST, OPTIONS").unwrap()
|
||||
}
|
||||
|
||||
fn cors_headers() -> Header {
|
||||
Header::from_bytes("Access-Control-Allow-Headers", "Content-Type").unwrap()
|
||||
}
|
||||
|
||||
/// Envoie une réponse JSON avec les headers CORS.
|
||||
fn send_json_response(
|
||||
request: tiny_http::Request,
|
||||
status_code: u16,
|
||||
body: &str,
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let status = tiny_http::StatusCode(status_code);
|
||||
let content_type = Header::from_bytes("Content-Type", "application/json").unwrap();
|
||||
|
||||
let response = Response::from_string(body)
|
||||
.with_status_code(status)
|
||||
.with_header(content_type)
|
||||
.with_header(cors_origin())
|
||||
.with_header(cors_methods())
|
||||
.with_header(cors_headers());
|
||||
|
||||
request.respond(response)?;
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,175 +0,0 @@
|
||||
//! Etat partage thread-safe de l'agent.
|
||||
//!
|
||||
//! Centralise l'etat courant (enregistrement, replay, connexion, etc.)
|
||||
//! accessible depuis tous les threads (systray, heartbeat, replay, recorder).
|
||||
//! Equivalent de agent_v1/ui/shared_state.py.
|
||||
|
||||
use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
/// Etats possibles de l'icone systray
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum TrayState {
|
||||
/// Gris — en attente, pas de session active
|
||||
Idle,
|
||||
/// Rouge — enregistrement en cours
|
||||
Recording,
|
||||
/// Vert — connecte au serveur, pret
|
||||
Connected,
|
||||
/// Bleu — replay en cours
|
||||
Replay,
|
||||
}
|
||||
|
||||
/// Etat partage de l'agent, thread-safe via Arc + atomics.
|
||||
///
|
||||
/// Les booleens utilisent AtomicBool pour un acces lock-free.
|
||||
/// Le nom de session utilise un Mutex car c'est une String.
|
||||
#[derive(Debug)]
|
||||
pub struct AgentState {
|
||||
/// Enregistrement en cours (session de capture)
|
||||
pub recording: AtomicBool,
|
||||
|
||||
/// Nom de la session d'enregistrement courante
|
||||
pub recording_name: Mutex<String>,
|
||||
|
||||
/// Replay en cours (execution d'actions)
|
||||
pub replay_active: AtomicBool,
|
||||
|
||||
/// Connecte au serveur streaming
|
||||
pub connected: AtomicBool,
|
||||
|
||||
/// Nombre d'actions capturees dans la session courante
|
||||
pub actions_count: AtomicU32,
|
||||
|
||||
/// L'agent est en cours d'execution (false = arret demande)
|
||||
pub running: AtomicBool,
|
||||
|
||||
/// Fenetre de chat visible
|
||||
pub chat_visible: AtomicBool,
|
||||
|
||||
/// Arret d'urgence active
|
||||
pub emergency_stop: AtomicBool,
|
||||
|
||||
/// Dernier message de notification (pour eviter les doublons)
|
||||
#[allow(dead_code)]
|
||||
pub last_notification: Mutex<String>,
|
||||
}
|
||||
|
||||
impl AgentState {
|
||||
/// Cree un nouvel etat avec les valeurs par defaut.
|
||||
pub fn new() -> Arc<Self> {
|
||||
Arc::new(Self {
|
||||
recording: AtomicBool::new(false),
|
||||
recording_name: Mutex::new(String::new()),
|
||||
replay_active: AtomicBool::new(false),
|
||||
connected: AtomicBool::new(false),
|
||||
actions_count: AtomicU32::new(0),
|
||||
running: AtomicBool::new(true),
|
||||
chat_visible: AtomicBool::new(false),
|
||||
emergency_stop: AtomicBool::new(false),
|
||||
last_notification: Mutex::new(String::new()),
|
||||
})
|
||||
}
|
||||
|
||||
/// Demarre un enregistrement avec le nom donne.
|
||||
pub fn start_recording(&self, name: &str) {
|
||||
self.recording.store(true, Ordering::SeqCst);
|
||||
self.actions_count.store(0, Ordering::SeqCst);
|
||||
if let Ok(mut n) = self.recording_name.lock() {
|
||||
*n = name.to_string();
|
||||
}
|
||||
println!("[STATE] Enregistrement demarre : '{}'", name);
|
||||
}
|
||||
|
||||
/// Arrete l'enregistrement en cours.
|
||||
pub fn stop_recording(&self) -> (String, u32) {
|
||||
self.recording.store(false, Ordering::SeqCst);
|
||||
let count = self.actions_count.load(Ordering::SeqCst);
|
||||
let name = self
|
||||
.recording_name
|
||||
.lock()
|
||||
.map(|n| n.clone())
|
||||
.unwrap_or_default();
|
||||
println!("[STATE] Enregistrement arrete : '{}' ({} actions)", name, count);
|
||||
(name, count)
|
||||
}
|
||||
|
||||
/// Incremente le compteur d'actions capturees.
|
||||
pub fn increment_actions(&self) -> u32 {
|
||||
self.actions_count.fetch_add(1, Ordering::SeqCst) + 1
|
||||
}
|
||||
|
||||
/// Verifie si l'agent est en cours d'execution.
|
||||
pub fn is_running(&self) -> bool {
|
||||
self.running.load(Ordering::SeqCst)
|
||||
}
|
||||
|
||||
/// Demande l'arret de l'agent.
|
||||
pub fn request_shutdown(&self) {
|
||||
self.running.store(false, Ordering::SeqCst);
|
||||
println!("[STATE] Arret demande");
|
||||
}
|
||||
|
||||
/// Active/desactive le replay.
|
||||
pub fn set_replay_active(&self, active: bool) {
|
||||
self.replay_active.store(active, Ordering::SeqCst);
|
||||
}
|
||||
|
||||
/// Met a jour le statut de connexion au serveur.
|
||||
pub fn set_connected(&self, connected: bool) {
|
||||
let was_connected = self.connected.swap(connected, Ordering::SeqCst);
|
||||
if was_connected != connected {
|
||||
println!(
|
||||
"[STATE] Connexion serveur : {}",
|
||||
if connected { "CONNECTE" } else { "DECONNECTE" }
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Active l'arret d'urgence — stoppe tout immediatement.
|
||||
pub fn emergency_stop(&self) {
|
||||
self.emergency_stop.store(true, Ordering::SeqCst);
|
||||
self.recording.store(false, Ordering::SeqCst);
|
||||
self.replay_active.store(false, Ordering::SeqCst);
|
||||
println!("[STATE] === ARRET D'URGENCE ACTIVE ===");
|
||||
}
|
||||
|
||||
/// Retourne l'etat courant du systray.
|
||||
pub fn tray_state(&self) -> TrayState {
|
||||
if self.recording.load(Ordering::SeqCst) {
|
||||
TrayState::Recording
|
||||
} else if self.replay_active.load(Ordering::SeqCst) {
|
||||
TrayState::Replay
|
||||
} else if self.connected.load(Ordering::SeqCst) {
|
||||
TrayState::Connected
|
||||
} else {
|
||||
TrayState::Idle
|
||||
}
|
||||
}
|
||||
|
||||
/// Retourne le nom de la session d'enregistrement courante.
|
||||
pub fn current_recording_name(&self) -> String {
|
||||
self.recording_name
|
||||
.lock()
|
||||
.map(|n| n.clone())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for AgentState {
|
||||
fn default() -> Self {
|
||||
// Note: on ne peut pas retourner Arc<Self> depuis Default,
|
||||
// donc on fournit les valeurs brutes. Utiliser new() de preference.
|
||||
Self {
|
||||
recording: AtomicBool::new(false),
|
||||
recording_name: Mutex::new(String::new()),
|
||||
replay_active: AtomicBool::new(false),
|
||||
connected: AtomicBool::new(false),
|
||||
actions_count: AtomicU32::new(0),
|
||||
running: AtomicBool::new(true),
|
||||
chat_visible: AtomicBool::new(false),
|
||||
emergency_stop: AtomicBool::new(false),
|
||||
last_notification: Mutex::new(String::new()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,274 +0,0 @@
|
||||
//! Métadonnées système : DPI, résolution, fenêtre active, moniteur.
|
||||
//!
|
||||
//! Expose des fonctions pour capturer les informations d'affichage
|
||||
//! critiques qui seront envoyées au serveur avec chaque heartbeat.
|
||||
//! Sur Windows, utilise les API Win32 (user32.dll).
|
||||
//! Sur Linux, retourne des valeurs par défaut ou utilise xcap.
|
||||
|
||||
use serde::Serialize;
|
||||
|
||||
/// Métadonnées complètes de l'écran.
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct ScreenMetadata {
|
||||
/// Facteur DPI en pourcentage (100 = normal, 150 = haute résolution)
|
||||
pub dpi_scale: u32,
|
||||
/// Résolution de l'écran principal [largeur, hauteur]
|
||||
pub screen_resolution: [u32; 2],
|
||||
/// Bounds de la fenêtre active [x, y, largeur, hauteur], None si pas de fenêtre
|
||||
pub window_bounds: Option<[i32; 4]>,
|
||||
/// Index du moniteur sur lequel se trouve la fenêtre active (0 = principal)
|
||||
pub monitor_index: u32,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ScreenMetadata {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"{}x{} @ {}% DPI, monitor #{}",
|
||||
self.screen_resolution[0],
|
||||
self.screen_resolution[1],
|
||||
self.dpi_scale,
|
||||
self.monitor_index,
|
||||
)?;
|
||||
if let Some(wb) = &self.window_bounds {
|
||||
write!(f, ", fenetre [{}x{} @ ({},{})]", wb[2], wb[3], wb[0], wb[1])?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Windows : API Win32 via FFI
|
||||
// =============================================================================
|
||||
|
||||
#[cfg(target_os = "windows")]
|
||||
mod win {
|
||||
use windows_sys::Win32::Foundation::{BOOL, LPARAM, RECT};
|
||||
use windows_sys::Win32::Graphics::Gdi::{
|
||||
EnumDisplayMonitors, GetMonitorInfoW, MonitorFromWindow, HMONITOR, MONITORINFO,
|
||||
MONITOR_DEFAULTTOPRIMARY,
|
||||
};
|
||||
use windows_sys::Win32::UI::WindowsAndMessaging::{
|
||||
GetForegroundWindow, GetSystemMetrics, GetWindowRect, SM_CXSCREEN, SM_CYSCREEN,
|
||||
};
|
||||
|
||||
// GetDpiForSystem est dans Win32_UI_HiDpi (non activée).
|
||||
// On utilise un appel FFI raw pour éviter d'ajouter une feature.
|
||||
extern "system" {
|
||||
fn GetDpiForSystem() -> u32;
|
||||
}
|
||||
|
||||
/// Retourne le facteur DPI en % (100 = normal, 125, 150, 200...).
|
||||
pub fn get_dpi_scale() -> u32 {
|
||||
unsafe {
|
||||
let dpi = GetDpiForSystem();
|
||||
if dpi == 0 {
|
||||
// Fallback si l'API n'est pas disponible (Windows < 10 1607)
|
||||
100
|
||||
} else {
|
||||
(dpi * 100) / 96
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Retourne (largeur, hauteur) du moniteur principal via GetSystemMetrics.
|
||||
///
|
||||
/// IMPORTANT : Retourne la resolution physique uniquement si le process est
|
||||
/// DPI-aware (SetProcessDpiAwareness(2) appele dans main.rs). Sans cela,
|
||||
/// retourne la resolution logique (virtualisee par le DPI scaling).
|
||||
pub fn get_screen_resolution() -> (u32, u32) {
|
||||
unsafe {
|
||||
let w = GetSystemMetrics(SM_CXSCREEN);
|
||||
let h = GetSystemMetrics(SM_CYSCREEN);
|
||||
if w > 0 && h > 0 {
|
||||
(w as u32, h as u32)
|
||||
} else {
|
||||
(0, 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Retourne (x, y, largeur, hauteur) de la fenêtre active, ou None.
|
||||
pub fn get_window_bounds() -> Option<(i32, i32, i32, i32)> {
|
||||
unsafe {
|
||||
let hwnd = GetForegroundWindow();
|
||||
if hwnd.is_null() {
|
||||
return None;
|
||||
}
|
||||
let mut rect: RECT = std::mem::zeroed();
|
||||
if GetWindowRect(hwnd, &mut rect) != 0 {
|
||||
let w = rect.right - rect.left;
|
||||
let h = rect.bottom - rect.top;
|
||||
Some((rect.left, rect.top, w, h))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Flag indiquant le moniteur principal dans MONITORINFO.dwFlags.
|
||||
const MONITORINFOF_PRIMARY: u32 = 1;
|
||||
|
||||
/// Retourne l'index du moniteur sur lequel se trouve la fenêtre active.
|
||||
/// 0 = moniteur principal. Enumère tous les moniteurs pour trouver l'index.
|
||||
pub fn get_monitor_index() -> u32 {
|
||||
unsafe {
|
||||
let hwnd = GetForegroundWindow();
|
||||
if hwnd.is_null() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
let target_hmon = MonitorFromWindow(hwnd, MONITOR_DEFAULTTOPRIMARY);
|
||||
if target_hmon.is_null() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Énumérer les moniteurs pour trouver l'index
|
||||
struct CallbackData {
|
||||
target: HMONITOR,
|
||||
current_index: u32,
|
||||
found_index: u32,
|
||||
}
|
||||
|
||||
unsafe extern "system" fn enum_callback(
|
||||
hmonitor: HMONITOR,
|
||||
_hdc: windows_sys::Win32::Graphics::Gdi::HDC,
|
||||
_lprect: *mut RECT,
|
||||
lparam: LPARAM,
|
||||
) -> BOOL {
|
||||
let data = &mut *(lparam as *mut CallbackData);
|
||||
|
||||
// Vérifier si c'est le moniteur principal — il est toujours #0
|
||||
let mut info: MONITORINFO = std::mem::zeroed();
|
||||
info.cbSize = std::mem::size_of::<MONITORINFO>() as u32;
|
||||
GetMonitorInfoW(hmonitor, &mut info);
|
||||
|
||||
if info.dwFlags & MONITORINFOF_PRIMARY != 0 {
|
||||
// Moniteur principal — index 0, mais on continue pour le comptage
|
||||
if hmonitor == data.target {
|
||||
data.found_index = 0;
|
||||
}
|
||||
} else if hmonitor == data.target {
|
||||
data.found_index = data.current_index;
|
||||
}
|
||||
|
||||
data.current_index += 1;
|
||||
1 // TRUE, continuer l'énumération
|
||||
}
|
||||
|
||||
let mut data = CallbackData {
|
||||
target: target_hmon,
|
||||
current_index: 0,
|
||||
found_index: 0,
|
||||
};
|
||||
|
||||
EnumDisplayMonitors(
|
||||
std::ptr::null_mut(), // HDC null = tous les moniteurs
|
||||
std::ptr::null(),
|
||||
Some(enum_callback),
|
||||
&mut data as *mut CallbackData as LPARAM,
|
||||
);
|
||||
|
||||
data.found_index
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Linux / fallback : valeurs par défaut ou xcap
|
||||
// =============================================================================
|
||||
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
mod fallback {
|
||||
/// Sur Linux, pas de DPI système accessible simplement. Retourne 100%.
|
||||
pub fn get_dpi_scale() -> u32 {
|
||||
100
|
||||
}
|
||||
|
||||
/// Résolution via xcap (mêmes moniteurs que la capture).
|
||||
pub fn get_screen_resolution() -> (u32, u32) {
|
||||
if let Ok(monitors) = xcap::Monitor::all() {
|
||||
if let Some(primary) = monitors.into_iter().find(|m| m.is_primary().unwrap_or(false)) {
|
||||
let w = primary.width().unwrap_or(0);
|
||||
let h = primary.height().unwrap_or(0);
|
||||
return (w, h);
|
||||
}
|
||||
}
|
||||
(0, 0)
|
||||
}
|
||||
|
||||
/// Pas d'API window bounds sur Linux en mode headless. Retourne None.
|
||||
pub fn get_window_bounds() -> Option<(i32, i32, i32, i32)> {
|
||||
None
|
||||
}
|
||||
|
||||
/// Moniteur principal = index 0 (fallback).
|
||||
pub fn get_monitor_index() -> u32 {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// API publique
|
||||
// =============================================================================
|
||||
|
||||
/// Retourne le facteur DPI en % (100 = normal, 150 = haute résolution).
|
||||
pub fn get_dpi_scale() -> u32 {
|
||||
#[cfg(target_os = "windows")]
|
||||
{
|
||||
win::get_dpi_scale()
|
||||
}
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
{
|
||||
fallback::get_dpi_scale()
|
||||
}
|
||||
}
|
||||
|
||||
/// Retourne (largeur, hauteur) du moniteur principal.
|
||||
pub fn get_screen_resolution() -> (u32, u32) {
|
||||
#[cfg(target_os = "windows")]
|
||||
{
|
||||
win::get_screen_resolution()
|
||||
}
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
{
|
||||
fallback::get_screen_resolution()
|
||||
}
|
||||
}
|
||||
|
||||
/// Retourne (x, y, largeur, hauteur) de la fenêtre active, ou None.
|
||||
pub fn get_window_bounds() -> Option<(i32, i32, i32, i32)> {
|
||||
#[cfg(target_os = "windows")]
|
||||
{
|
||||
win::get_window_bounds()
|
||||
}
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
{
|
||||
fallback::get_window_bounds()
|
||||
}
|
||||
}
|
||||
|
||||
/// Retourne l'index du moniteur de la fenêtre active (0 = principal).
|
||||
pub fn get_monitor_index() -> u32 {
|
||||
#[cfg(target_os = "windows")]
|
||||
{
|
||||
win::get_monitor_index()
|
||||
}
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
{
|
||||
fallback::get_monitor_index()
|
||||
}
|
||||
}
|
||||
|
||||
/// Collecte toutes les métadonnées système en une seule structure.
|
||||
pub fn get_screen_metadata() -> ScreenMetadata {
|
||||
let (sw, sh) = get_screen_resolution();
|
||||
let wb = get_window_bounds().map(|(x, y, w, h)| [x, y, w, h]);
|
||||
|
||||
ScreenMetadata {
|
||||
dpi_scale: get_dpi_scale(),
|
||||
screen_resolution: [sw, sh],
|
||||
window_bounds: wb,
|
||||
monitor_index: get_monitor_index(),
|
||||
}
|
||||
}
|
||||
@@ -1,336 +0,0 @@
|
||||
//! Icone systray avec menu contextuel.
|
||||
//!
|
||||
//! Affiche une icone dans la barre des taches Windows avec un menu contextuel
|
||||
//! permettant de controler l'agent (enregistrement, replay, chat, etc.).
|
||||
//! Equivalent de agent_v1/ui/smart_tray.py.
|
||||
//!
|
||||
//! Utilise tray-icon (crate Tauri) pour l'icone et le menu.
|
||||
//! Necessite une boucle d'evenements Windows (winit ou Win32 message pump).
|
||||
//!
|
||||
//! Sur Linux : le systray n'est pas disponible, l'agent tourne en mode console.
|
||||
|
||||
#[allow(unused_imports)]
|
||||
use crate::config::Config;
|
||||
#[allow(unused_imports)]
|
||||
use crate::notifications;
|
||||
#[allow(unused_imports)]
|
||||
use crate::state::{AgentState, TrayState};
|
||||
use std::sync::Arc;
|
||||
|
||||
/// Identifiants des elements du menu (pour le dispatch des evenements).
|
||||
#[cfg(windows)]
|
||||
pub struct TrayMenuIds {
|
||||
pub machine_info: tray_icon::menu::MenuItem,
|
||||
pub status_item: tray_icon::menu::MenuItem,
|
||||
pub start_recording: tray_icon::menu::MenuItem,
|
||||
pub stop_recording: tray_icon::menu::MenuItem,
|
||||
pub workflows_submenu: tray_icon::menu::Submenu,
|
||||
pub emergency_stop: tray_icon::menu::MenuItem,
|
||||
pub open_chat: tray_icon::menu::MenuItem,
|
||||
pub open_files: tray_icon::menu::MenuItem,
|
||||
pub quit: tray_icon::menu::MenuItem,
|
||||
}
|
||||
|
||||
/// Cree l'icone du systray et la boucle d'evenements associee.
|
||||
///
|
||||
/// Cette fonction bloque le thread appelant (doit etre le thread principal sur Windows).
|
||||
/// Sur les OS non-Windows, attend Ctrl+C en mode console.
|
||||
#[cfg(windows)]
|
||||
pub fn run_tray_loop(config: Arc<Config>, state: Arc<AgentState>) {
|
||||
use tray_icon::{
|
||||
menu::MenuEvent,
|
||||
TrayIconBuilder,
|
||||
};
|
||||
use winit::application::ApplicationHandler;
|
||||
use winit::event::WindowEvent;
|
||||
use winit::event_loop::{ActiveEventLoop, ControlFlow, EventLoop};
|
||||
use winit::window::WindowId;
|
||||
|
||||
// Creer le menu
|
||||
let menu_ids = create_menu(&config);
|
||||
let menu = build_tray_menu(&menu_ids);
|
||||
|
||||
// Generer l'icone initiale (gris = idle)
|
||||
let icon = generate_tray_icon(TrayState::Idle);
|
||||
|
||||
// Creer l'icone systray
|
||||
let tray = match TrayIconBuilder::new()
|
||||
.with_menu(Box::new(menu))
|
||||
.with_tooltip("Lea - Agent RPA Vision (IA)")
|
||||
.with_icon(icon)
|
||||
.build()
|
||||
{
|
||||
Ok(t) => t,
|
||||
Err(e) => {
|
||||
eprintln!("[TRAY] Impossible de creer l'icone systray : {}", e);
|
||||
// Fallback mode console
|
||||
fallback_console_loop(&state);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
println!("[TRAY] Icone systray creee — menu contextuel disponible");
|
||||
notifications::greet();
|
||||
|
||||
// Structure pour l'ApplicationHandler de winit
|
||||
struct TrayApp {
|
||||
config: Arc<Config>,
|
||||
state: Arc<AgentState>,
|
||||
tray: tray_icon::TrayIcon,
|
||||
menu_ids: TrayMenuIds,
|
||||
current_tray_state: TrayState,
|
||||
}
|
||||
|
||||
impl ApplicationHandler for TrayApp {
|
||||
fn resumed(&mut self, _event_loop: &ActiveEventLoop) {
|
||||
// Rien a faire — pas de fenetre winit
|
||||
}
|
||||
|
||||
fn window_event(
|
||||
&mut self,
|
||||
_event_loop: &ActiveEventLoop,
|
||||
_window_id: WindowId,
|
||||
_event: WindowEvent,
|
||||
) {
|
||||
// Pas de fenetre winit — ignorer
|
||||
}
|
||||
|
||||
fn about_to_wait(&mut self, event_loop: &ActiveEventLoop) {
|
||||
// Verifier si l'agent doit s'arreter
|
||||
if !self.state.is_running() {
|
||||
event_loop.exit();
|
||||
return;
|
||||
}
|
||||
|
||||
// Traiter les evenements menu
|
||||
let menu_receiver = MenuEvent::receiver();
|
||||
if let Ok(event) = menu_receiver.try_recv() {
|
||||
handle_menu_event(&event, &self.menu_ids, &self.config, &self.state);
|
||||
}
|
||||
|
||||
// Mettre a jour l'icone si l'etat a change
|
||||
let new_state = self.state.tray_state();
|
||||
if new_state != self.current_tray_state {
|
||||
self.current_tray_state = new_state;
|
||||
let tooltip = match new_state {
|
||||
TrayState::Idle => "Lea - En attente",
|
||||
TrayState::Recording => "Lea - ENREGISTREMENT EN COURS",
|
||||
TrayState::Connected => "Lea - Connectee au serveur",
|
||||
TrayState::Replay => "Lea - REPLAY EN COURS",
|
||||
};
|
||||
let _ = self.tray.set_tooltip(Some(tooltip));
|
||||
let icon = generate_tray_icon(new_state);
|
||||
let _ = self.tray.set_icon(Some(icon));
|
||||
}
|
||||
|
||||
// Attendre un peu avant le prochain cycle
|
||||
event_loop.set_control_flow(ControlFlow::WaitUntil(
|
||||
std::time::Instant::now() + std::time::Duration::from_millis(100),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Creer et demarrer la boucle d'evenements winit
|
||||
let event_loop = match EventLoop::new() {
|
||||
Ok(el) => el,
|
||||
Err(e) => {
|
||||
eprintln!("[TRAY] Impossible de creer la boucle d'evenements : {}", e);
|
||||
fallback_console_loop(&state);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let mut app = TrayApp {
|
||||
config,
|
||||
state,
|
||||
tray,
|
||||
menu_ids,
|
||||
current_tray_state: TrayState::Idle,
|
||||
};
|
||||
|
||||
let _ = event_loop.run_app(&mut app);
|
||||
}
|
||||
|
||||
/// Cree les elements de menu avec leurs labels.
|
||||
#[cfg(windows)]
|
||||
fn create_menu(config: &Config) -> TrayMenuIds {
|
||||
use tray_icon::menu::{MenuItem, Submenu};
|
||||
|
||||
let machine_info = MenuItem::new(
|
||||
format!("Machine : {}", config.machine_id),
|
||||
false, // disabled — info seulement
|
||||
None,
|
||||
);
|
||||
|
||||
let status_item = MenuItem::new("Deconnectee", false, None);
|
||||
|
||||
let start_recording = MenuItem::new("Apprenez-moi une tache", true, None);
|
||||
|
||||
let stop_recording = MenuItem::new("C'est termine", true, None);
|
||||
|
||||
let workflows_submenu = Submenu::new("Mes taches", true);
|
||||
let _ = workflows_submenu.append(&MenuItem::new("(chargement...)", false, None));
|
||||
|
||||
let emergency_stop = MenuItem::new("ARRET D'URGENCE", true, None);
|
||||
|
||||
let open_chat = MenuItem::new("Discuter avec Lea", true, None);
|
||||
|
||||
let open_files = MenuItem::new("Mes fichiers", true, None);
|
||||
|
||||
let quit = MenuItem::new("Quitter Lea", true, None);
|
||||
|
||||
TrayMenuIds {
|
||||
machine_info,
|
||||
status_item,
|
||||
start_recording,
|
||||
stop_recording,
|
||||
workflows_submenu,
|
||||
emergency_stop,
|
||||
open_chat,
|
||||
open_files,
|
||||
quit,
|
||||
}
|
||||
}
|
||||
|
||||
/// Construit le menu systray a partir des elements.
|
||||
#[cfg(windows)]
|
||||
fn build_tray_menu(ids: &TrayMenuIds) -> tray_icon::menu::Menu {
|
||||
use tray_icon::menu::{Menu, PredefinedMenuItem};
|
||||
|
||||
let menu = Menu::new();
|
||||
|
||||
let _ = menu.append(&ids.machine_info);
|
||||
let _ = menu.append(&ids.status_item);
|
||||
let _ = menu.append(&PredefinedMenuItem::separator());
|
||||
let _ = menu.append(&ids.start_recording);
|
||||
let _ = menu.append(&ids.stop_recording);
|
||||
let _ = menu.append(&PredefinedMenuItem::separator());
|
||||
let _ = menu.append(&ids.workflows_submenu);
|
||||
let _ = menu.append(&PredefinedMenuItem::separator());
|
||||
let _ = menu.append(&ids.emergency_stop);
|
||||
let _ = menu.append(&ids.open_chat);
|
||||
let _ = menu.append(&ids.open_files);
|
||||
let _ = menu.append(&PredefinedMenuItem::separator());
|
||||
let _ = menu.append(&ids.quit);
|
||||
|
||||
menu
|
||||
}
|
||||
|
||||
/// Gere un evenement de clic sur un element du menu.
|
||||
#[cfg(windows)]
|
||||
fn handle_menu_event(
|
||||
event: &tray_icon::menu::MenuEvent,
|
||||
ids: &TrayMenuIds,
|
||||
_config: &Config,
|
||||
state: &AgentState,
|
||||
) {
|
||||
let event_id = event.id();
|
||||
|
||||
if event_id == ids.start_recording.id() {
|
||||
if !state.recording.load(std::sync::atomic::Ordering::SeqCst) {
|
||||
let name = format!(
|
||||
"session_{}",
|
||||
chrono::Utc::now().format("%Y%m%d_%H%M%S")
|
||||
);
|
||||
state.start_recording(&name);
|
||||
notifications::session_started(&name);
|
||||
println!("[TRAY] Enregistrement demarre : {}", name);
|
||||
}
|
||||
} else if event_id == ids.stop_recording.id() {
|
||||
if state.recording.load(std::sync::atomic::Ordering::SeqCst) {
|
||||
let (name, count) = state.stop_recording();
|
||||
notifications::session_ended(count);
|
||||
println!(
|
||||
"[TRAY] Enregistrement arrete : {} ({} actions)",
|
||||
name, count
|
||||
);
|
||||
}
|
||||
} else if event_id == ids.emergency_stop.id() {
|
||||
state.emergency_stop();
|
||||
notifications::emergency_stop_activated();
|
||||
println!("[TRAY] ARRET D'URGENCE ACTIVE");
|
||||
} else if event_id == ids.open_chat.id() {
|
||||
state
|
||||
.chat_visible
|
||||
.store(true, std::sync::atomic::Ordering::SeqCst);
|
||||
println!("[TRAY] Ouverture du chat demandee");
|
||||
} else if event_id == ids.open_files.id() {
|
||||
let sessions_dir = if cfg!(target_os = "windows") {
|
||||
"C:\\rpa_vision\\sessions".to_string()
|
||||
} else {
|
||||
"/tmp/rpa_vision/sessions".to_string()
|
||||
};
|
||||
println!("[TRAY] Ouverture du dossier : {}", sessions_dir);
|
||||
#[cfg(windows)]
|
||||
{
|
||||
let _ = std::process::Command::new("explorer")
|
||||
.arg(&sessions_dir)
|
||||
.spawn();
|
||||
}
|
||||
} else if event_id == ids.quit.id() {
|
||||
println!("[TRAY] Fermeture demandee par l'utilisateur");
|
||||
state.request_shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
/// Genere une icone systray coloree selon l'etat.
|
||||
///
|
||||
/// Cree une image 32x32 RGBA avec un cercle colore :
|
||||
/// - Gris (#808080) : idle
|
||||
/// - Rouge (#FF0000) : enregistrement
|
||||
/// - Vert (#00CC00) : connecte
|
||||
/// - Bleu (#0066FF) : replay
|
||||
#[cfg(windows)]
|
||||
fn generate_tray_icon(tray_state: TrayState) -> tray_icon::Icon {
|
||||
let size = 32u32;
|
||||
let mut rgba = vec![0u8; (size * size * 4) as usize];
|
||||
|
||||
let (r, g, b) = match tray_state {
|
||||
TrayState::Idle => (128u8, 128u8, 128u8),
|
||||
TrayState::Recording => (255u8, 0u8, 0u8),
|
||||
TrayState::Connected => (0u8, 204u8, 0u8),
|
||||
TrayState::Replay => (0u8, 102u8, 255u8),
|
||||
};
|
||||
|
||||
let center = (size / 2) as f64;
|
||||
let radius = (size / 2 - 2) as f64;
|
||||
|
||||
for y in 0..size {
|
||||
for x in 0..size {
|
||||
let dx = x as f64 - center;
|
||||
let dy = y as f64 - center;
|
||||
let dist = (dx * dx + dy * dy).sqrt();
|
||||
|
||||
let offset = ((y * size + x) * 4) as usize;
|
||||
if dist <= radius {
|
||||
rgba[offset] = r;
|
||||
rgba[offset + 1] = g;
|
||||
rgba[offset + 2] = b;
|
||||
rgba[offset + 3] = 255;
|
||||
} else if dist <= radius + 1.0 {
|
||||
let alpha = ((radius + 1.0 - dist) * 255.0) as u8;
|
||||
rgba[offset] = r;
|
||||
rgba[offset + 1] = g;
|
||||
rgba[offset + 2] = b;
|
||||
rgba[offset + 3] = alpha;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tray_icon::Icon::from_rgba(rgba, size, size).expect("Erreur creation icone systray")
|
||||
}
|
||||
|
||||
/// Mode console (Linux ou fallback si le systray echoue).
|
||||
fn fallback_console_loop(state: &AgentState) {
|
||||
println!("[TRAY] Mode console — Appuyez sur Ctrl+C pour quitter");
|
||||
while state.is_running() {
|
||||
std::thread::sleep(std::time::Duration::from_millis(500));
|
||||
}
|
||||
}
|
||||
|
||||
/// Version non-Windows : pas de systray, l'agent tourne en mode console.
|
||||
#[cfg(not(windows))]
|
||||
pub fn run_tray_loop(_config: Arc<Config>, state: Arc<AgentState>) {
|
||||
println!("[TRAY] Systray non disponible sur cet OS — mode console");
|
||||
fallback_console_loop(&state);
|
||||
}
|
||||
@@ -1,110 +0,0 @@
|
||||
//! Résolution visuelle des cibles via le serveur.
|
||||
//!
|
||||
//! Envoie un screenshot + target_spec au serveur qui effectue le template
|
||||
//! matching OpenCV et retourne les coordonnées résolues (x_pct, y_pct).
|
||||
//! Approche server-side : pas de dépendance OpenCV dans le binaire Rust.
|
||||
|
||||
use crate::capture;
|
||||
use crate::config::Config;
|
||||
use reqwest::blocking::Client;
|
||||
|
||||
/// Résout visuellement une cible en envoyant le screenshot courant au serveur.
|
||||
///
|
||||
/// Capture l'écran, l'encode en JPEG base64, envoie au endpoint
|
||||
/// `/traces/stream/replay/resolve_target` qui fait le template matching.
|
||||
///
|
||||
/// Retourne Some((x_pct, y_pct)) si la cible est trouvée, None sinon.
|
||||
pub fn resolve_target_visual(
|
||||
config: &Config,
|
||||
target_spec: &serde_json::Value,
|
||||
fallback_x: f64,
|
||||
fallback_y: f64,
|
||||
screen_width: u32,
|
||||
screen_height: u32,
|
||||
) -> Option<(f64, f64)> {
|
||||
// 1. Capturer le screenshot actuel
|
||||
let screenshot = match capture::capture_screenshot() {
|
||||
Some(img) => img,
|
||||
None => {
|
||||
eprintln!(" [VISUAL] Echec capture screenshot pour résolution visuelle");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
|
||||
// Encoder en JPEG base64 (qualité 75 — bon compromis taille/précision)
|
||||
let screenshot_b64 = capture::screenshot_to_jpeg_base64(&screenshot, 75);
|
||||
if screenshot_b64.is_empty() {
|
||||
eprintln!(" [VISUAL] Echec encodage JPEG");
|
||||
return None;
|
||||
}
|
||||
|
||||
println!(
|
||||
" [VISUAL] Screenshot capture ({}x{}), envoi au serveur...",
|
||||
screen_width, screen_height
|
||||
);
|
||||
|
||||
// 2. Envoyer au serveur /replay/resolve_target
|
||||
let client = Client::new();
|
||||
let payload = serde_json::json!({
|
||||
"session_id": config.agent_session_id(),
|
||||
"screenshot_b64": screenshot_b64,
|
||||
"target_spec": target_spec,
|
||||
"fallback_x_pct": fallback_x,
|
||||
"fallback_y_pct": fallback_y,
|
||||
"screen_width": screen_width,
|
||||
"screen_height": screen_height,
|
||||
});
|
||||
|
||||
let url = format!("{}/traces/stream/replay/resolve_target", config.server_url);
|
||||
|
||||
let resp = match client
|
||||
.post(&url)
|
||||
.json(&payload)
|
||||
.timeout(std::time::Duration::from_secs(30))
|
||||
.send()
|
||||
{
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
eprintln!(" [VISUAL] Erreur reseau vers {} : {}", url, e);
|
||||
return None;
|
||||
}
|
||||
};
|
||||
|
||||
if !resp.status().is_success() {
|
||||
eprintln!(
|
||||
" [VISUAL] Serveur a repondu HTTP {}",
|
||||
resp.status()
|
||||
);
|
||||
return None;
|
||||
}
|
||||
|
||||
// 3. Parser la réponse
|
||||
let data: serde_json::Value = match resp.json() {
|
||||
Ok(d) => d,
|
||||
Err(e) => {
|
||||
eprintln!(" [VISUAL] Erreur parsing reponse JSON : {}", e);
|
||||
return None;
|
||||
}
|
||||
};
|
||||
|
||||
let resolved = data["resolved"].as_bool().unwrap_or(false);
|
||||
if resolved {
|
||||
let x = data["x_pct"].as_f64()?;
|
||||
let y = data["y_pct"].as_f64()?;
|
||||
let method = data["method"].as_str().unwrap_or("?");
|
||||
let score = data["score"].as_f64().unwrap_or(0.0);
|
||||
println!(
|
||||
" [VISUAL] Resolu par {} (score={:.3}) : ({:.4}, {:.4})",
|
||||
method, score, x, y
|
||||
);
|
||||
Some((x, y))
|
||||
} else {
|
||||
let reason = data["reason"].as_str().unwrap_or("inconnu");
|
||||
let method = data["method"].as_str().unwrap_or("?");
|
||||
println!(
|
||||
" [VISUAL] Non resolu (methode={}, raison={})",
|
||||
method, reason
|
||||
);
|
||||
None
|
||||
}
|
||||
}
|
||||
@@ -40,10 +40,18 @@ MACHINE_ID = os.environ.get(
|
||||
BASE_DIR = Path(__file__).resolve().parent
|
||||
|
||||
# Endpoint du serveur Streaming (port 5005)
|
||||
# SERVER_URL contient TOUJOURS /api/v1 à la fin (convention unifiée).
|
||||
SERVER_URL = os.getenv("RPA_SERVER_URL", "http://localhost:5005/api/v1")
|
||||
# Base sans /api/v1 — pour les routes à la racine (/health)
|
||||
SERVER_BASE = SERVER_URL.rsplit("/api/v1", 1)[0]
|
||||
UPLOAD_ENDPOINT = f"{SERVER_URL}/traces/upload"
|
||||
STREAMING_ENDPOINT = f"{SERVER_URL}/traces/stream"
|
||||
|
||||
# Host Ollama — SÉPARÉ du serveur RPA.
|
||||
# Ollama tourne en local sur la machine serveur, jamais exposé via le reverse proxy.
|
||||
# Défaut : localhost (exécution locale ou accès LAN direct).
|
||||
OLLAMA_HOST = os.getenv("RPA_OLLAMA_HOST", "localhost")
|
||||
|
||||
# Token d'authentification API (doit correspondre au token du serveur)
|
||||
# Configurable via variable d'environnement RPA_API_TOKEN
|
||||
API_TOKEN = os.environ.get("RPA_API_TOKEN", "")
|
||||
@@ -52,8 +60,9 @@ API_TOKEN = os.environ.get("RPA_API_TOKEN", "")
|
||||
MAX_SESSION_DURATION_S = 60 * 60 # 1 heure
|
||||
SESSIONS_ROOT = BASE_DIR / "sessions"
|
||||
|
||||
# Paramètres Vision (Crops pour qwen3-vl)
|
||||
TARGETED_CROP_SIZE = (150, 150)
|
||||
# Paramètres Vision (Crops pour la résolution visuelle)
|
||||
# 80x80 : assez petit pour être discriminant (icônes), assez grand pour le contexte
|
||||
TARGETED_CROP_SIZE = (80, 80)
|
||||
SCREENSHOT_QUALITY = 85
|
||||
|
||||
# Floutage des données sensibles (conformité AI Act)
|
||||
|
||||
@@ -178,8 +178,41 @@ class EventCaptorV1:
|
||||
"timestamp": now,
|
||||
}
|
||||
self._inject_screen_metadata(event)
|
||||
# Capturer le snapshot UIA à la position du clic (si helper dispo)
|
||||
# Non-bloquant : si UIA échoue, l'event est enrichi uniquement
|
||||
# des données vision comme aujourd'hui.
|
||||
self._inject_uia_snapshot(event, x, y)
|
||||
self.on_event(event)
|
||||
|
||||
def _inject_uia_snapshot(self, event: dict, x: int, y: int) -> None:
|
||||
"""Ajouter un uia_snapshot à l'événement si le helper UIA est dispo.
|
||||
|
||||
Appelle lea_uia.exe query --x N --y N en ~10-20ms.
|
||||
Fallback silencieux si le helper n'est pas dispo ou échoue.
|
||||
"""
|
||||
try:
|
||||
from .uia_helper import get_shared_helper
|
||||
helper = get_shared_helper()
|
||||
if not helper.available:
|
||||
return
|
||||
element = helper.query_at(int(x), int(y), with_parents=True)
|
||||
if element is None:
|
||||
return
|
||||
event["uia_snapshot"] = {
|
||||
"name": element.name,
|
||||
"control_type": element.control_type,
|
||||
"class_name": element.class_name,
|
||||
"automation_id": element.automation_id,
|
||||
"bounding_rect": list(element.bounding_rect),
|
||||
"is_enabled": element.is_enabled,
|
||||
"is_offscreen": element.is_offscreen,
|
||||
"parent_path": element.parent_path,
|
||||
}
|
||||
except Exception as e:
|
||||
# Non bloquant — on continue sans UIA
|
||||
import logging
|
||||
logging.getLogger(__name__).debug(f"UIA snapshot skip: {e}")
|
||||
|
||||
def _on_scroll(self, x, y, dx, dy):
|
||||
event = {
|
||||
"type": "mouse_scroll",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
291
agent_v0/agent_v1/core/grounding.py
Normal file
291
agent_v0/agent_v1/core/grounding.py
Normal file
@@ -0,0 +1,291 @@
|
||||
# agent_v1/core/grounding.py
|
||||
"""
|
||||
Module Grounding — localisation pure d'éléments UI sur l'écran.
|
||||
|
||||
Responsabilité unique : "Trouve l'élément X sur l'écran et retourne ses coordonnées."
|
||||
Ne prend AUCUNE décision. Si l'élément n'est pas trouvé → retourne NOT_FOUND.
|
||||
|
||||
Stratégies disponibles (cascade configurable) :
|
||||
1. Serveur SomEngine + VLM (GPU distant)
|
||||
2. Template matching local (CPU, ~10ms)
|
||||
3. VLM local direct (CPU/GPU local)
|
||||
|
||||
Séparé de Policy (qui décide quoi faire quand grounding échoue).
|
||||
Ref: docs/PLAN_ACTEUR_V1.md — Architecture MICRO (grounding + exécution)
|
||||
"""
|
||||
|
||||
import base64
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class GroundingResult:
|
||||
"""Résultat d'une tentative de localisation visuelle."""
|
||||
found: bool # L'élément a été trouvé
|
||||
x_pct: float = 0.0 # Position X en % (0.0-1.0)
|
||||
y_pct: float = 0.0 # Position Y en % (0.0-1.0)
|
||||
method: str = "" # Méthode utilisée (server_som, anchor_template, vlm_direct...)
|
||||
score: float = 0.0 # Confiance (0.0-1.0)
|
||||
elapsed_ms: float = 0.0 # Temps de résolution
|
||||
detail: str = "" # Info supplémentaire (label trouvé, raison échec)
|
||||
raw: Optional[Dict] = None # Données brutes du resolver (pour debug)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"found": self.found,
|
||||
"x_pct": self.x_pct,
|
||||
"y_pct": self.y_pct,
|
||||
"method": self.method,
|
||||
"score": round(self.score, 3),
|
||||
"elapsed_ms": round(self.elapsed_ms, 1),
|
||||
"detail": self.detail,
|
||||
}
|
||||
|
||||
|
||||
# Résultat singleton pour "pas trouvé"
|
||||
NOT_FOUND = GroundingResult(found=False, detail="Aucune méthode n'a trouvé l'élément")
|
||||
|
||||
|
||||
class GroundingEngine:
|
||||
"""Moteur de localisation visuelle d'éléments UI.
|
||||
|
||||
Encapsule la cascade de résolution (serveur → template → VLM local)
|
||||
avec une interface unifiée. Ne prend aucune décision — c'est le rôle
|
||||
de PolicyEngine.
|
||||
|
||||
Usage :
|
||||
engine = GroundingEngine(executor)
|
||||
result = engine.locate(screenshot_b64, target_spec, screen_w, screen_h)
|
||||
if result.found:
|
||||
click(result.x_pct, result.y_pct)
|
||||
"""
|
||||
|
||||
def __init__(self, executor):
|
||||
"""
|
||||
Args:
|
||||
executor: ActionExecutorV1 — fournit les méthodes de résolution existantes.
|
||||
"""
|
||||
self._executor = executor
|
||||
|
||||
def locate(
|
||||
self,
|
||||
server_url: str,
|
||||
target_spec: Dict[str, Any],
|
||||
fallback_x: float,
|
||||
fallback_y: float,
|
||||
screen_width: int,
|
||||
screen_height: int,
|
||||
strategies: Optional[List[str]] = None,
|
||||
) -> GroundingResult:
|
||||
"""Localiser un élément UI sur l'écran.
|
||||
|
||||
Exécute la cascade de stratégies dans l'ordre et retourne
|
||||
dès qu'une stratégie trouve l'élément.
|
||||
|
||||
Args:
|
||||
server_url: URL du serveur (SomEngine + VLM GPU)
|
||||
target_spec: Spécification de la cible (by_text, anchor, vlm_description...)
|
||||
fallback_x, fallback_y: Coordonnées de fallback (enregistrement)
|
||||
screen_width, screen_height: Résolution écran
|
||||
strategies: Liste ordonnée de stratégies à essayer.
|
||||
Par défaut : ["server", "template", "vlm_local"]
|
||||
|
||||
Returns:
|
||||
GroundingResult avec found=True et coordonnées, ou NOT_FOUND
|
||||
"""
|
||||
if strategies is None:
|
||||
strategies = ["server", "template", "vlm_local"]
|
||||
|
||||
# ── Apprentissage : réordonner les stratégies selon l'historique ──
|
||||
# Si le Learning sait quelle méthode marche pour cette cible,
|
||||
# la mettre en premier. C'est la boucle d'apprentissage.
|
||||
learned = target_spec.get("_learned_strategy", "")
|
||||
if learned:
|
||||
strategy_map = {
|
||||
"som_text_match": "server",
|
||||
"grounding_vlm": "server",
|
||||
"server_som": "server",
|
||||
"anchor_template": "template",
|
||||
"template_matching": "template",
|
||||
"hybrid_text_direct": "vlm_local",
|
||||
"hybrid_vlm_text": "vlm_local",
|
||||
"vlm_direct": "vlm_local",
|
||||
}
|
||||
preferred = strategy_map.get(learned, "")
|
||||
if preferred and preferred in strategies:
|
||||
strategies = [preferred] + [s for s in strategies if s != preferred]
|
||||
logger.info(
|
||||
f"Grounding: stratégie réordonnée par l'apprentissage → "
|
||||
f"{strategies} (learned={learned})"
|
||||
)
|
||||
|
||||
t_start = time.time()
|
||||
|
||||
# ── Capture contrainte à la fenêtre active ──
|
||||
# Le grounding ne voit QUE la fenêtre attendue — pas la taskbar,
|
||||
# pas le systray, pas les autres apps. Comme un humain qui regarde
|
||||
# l'application sur laquelle il travaille.
|
||||
window_rect = None
|
||||
try:
|
||||
from ..window_info_crossplatform import get_active_window_rect
|
||||
win_info = get_active_window_rect()
|
||||
if win_info and win_info.get("rect"):
|
||||
r = win_info["rect"] # [left, top, right, bottom]
|
||||
# Validation : fenêtre visible et pas minuscule
|
||||
w = r[2] - r[0]
|
||||
h = r[3] - r[1]
|
||||
if w > 50 and h > 50:
|
||||
window_rect = {
|
||||
"left": max(0, r[0]),
|
||||
"top": max(0, r[1]),
|
||||
"width": min(w, screen_width),
|
||||
"height": min(h, screen_height),
|
||||
}
|
||||
logger.info(
|
||||
f"Grounding contraint à la fenêtre : "
|
||||
f"{window_rect['width']}x{window_rect['height']} "
|
||||
f"à ({window_rect['left']}, {window_rect['top']})"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"Pas de window rect disponible : {e}")
|
||||
|
||||
screenshot_b64 = self._capture_window_or_screen(window_rect)
|
||||
if not screenshot_b64:
|
||||
return GroundingResult(
|
||||
found=False, detail="Capture screenshot échouée",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
# Dimensions de la zone capturée (fenêtre ou écran entier)
|
||||
cap_w = window_rect["width"] if window_rect else screen_width
|
||||
cap_h = window_rect["height"] if window_rect else screen_height
|
||||
|
||||
for strategy in strategies:
|
||||
result = self._try_strategy(
|
||||
strategy, server_url, screenshot_b64, target_spec,
|
||||
fallback_x, fallback_y, cap_w, cap_h,
|
||||
)
|
||||
if result.found:
|
||||
# ── Conversion coords fenêtre → coords écran ──
|
||||
if window_rect:
|
||||
# Le grounding a retourné des coords relatives à la fenêtre
|
||||
# On les convertit en coords relatives à l'écran entier
|
||||
abs_x = window_rect["left"] + result.x_pct * cap_w
|
||||
abs_y = window_rect["top"] + result.y_pct * cap_h
|
||||
result.x_pct = abs_x / screen_width
|
||||
result.y_pct = abs_y / screen_height
|
||||
result.detail = f"{result.detail} [fenêtre {cap_w}x{cap_h}]"
|
||||
|
||||
result.elapsed_ms = (time.time() - t_start) * 1000
|
||||
return result
|
||||
|
||||
return GroundingResult(
|
||||
found=False,
|
||||
detail=f"Toutes les stratégies ont échoué ({', '.join(strategies)})",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
def _capture_window_or_screen(self, window_rect: Optional[Dict]) -> str:
|
||||
"""Capturer soit la fenêtre active (croppée), soit l'écran entier.
|
||||
|
||||
Si window_rect est fourni, capture uniquement cette zone.
|
||||
Sinon, capture l'écran entier (fallback).
|
||||
"""
|
||||
try:
|
||||
from PIL import Image
|
||||
import mss as mss_lib
|
||||
|
||||
with mss_lib.mss() as local_sct:
|
||||
if window_rect:
|
||||
# Capture de la zone fenêtre uniquement
|
||||
region = {
|
||||
"left": window_rect["left"],
|
||||
"top": window_rect["top"],
|
||||
"width": window_rect["width"],
|
||||
"height": window_rect["height"],
|
||||
}
|
||||
raw = local_sct.grab(region)
|
||||
else:
|
||||
# Fallback écran entier
|
||||
raw = local_sct.grab(local_sct.monitors[1])
|
||||
|
||||
img = Image.frombytes("RGB", raw.size, raw.bgra, "raw", "BGRX")
|
||||
buffer = io.BytesIO()
|
||||
img.save(buffer, format="JPEG", quality=75)
|
||||
return base64.b64encode(buffer.getvalue()).decode("utf-8")
|
||||
except Exception as e:
|
||||
logger.warning(f"Capture échouée : {e}")
|
||||
# Fallback sur la méthode existante de l'executor
|
||||
return self._executor._capture_screenshot_b64(max_width=0, quality=75)
|
||||
|
||||
def _try_strategy(
|
||||
self,
|
||||
strategy: str,
|
||||
server_url: str,
|
||||
screenshot_b64: str,
|
||||
target_spec: Dict[str, Any],
|
||||
fallback_x: float,
|
||||
fallback_y: float,
|
||||
screen_width: int,
|
||||
screen_height: int,
|
||||
) -> GroundingResult:
|
||||
"""Essayer une stratégie de grounding unique."""
|
||||
|
||||
if strategy == "server" and server_url:
|
||||
raw = self._executor._server_resolve_target(
|
||||
server_url, screenshot_b64, target_spec,
|
||||
fallback_x, fallback_y, screen_width, screen_height,
|
||||
)
|
||||
if raw and raw.get("resolved"):
|
||||
return GroundingResult(
|
||||
found=True,
|
||||
x_pct=raw["x_pct"],
|
||||
y_pct=raw["y_pct"],
|
||||
method=raw.get("method", "server"),
|
||||
score=raw.get("score", 0.0),
|
||||
detail=raw.get("matched_element", {}).get("label", ""),
|
||||
raw=raw,
|
||||
)
|
||||
|
||||
elif strategy == "template":
|
||||
anchor_b64 = target_spec.get("anchor_image_base64", "")
|
||||
if anchor_b64:
|
||||
raw = self._executor._template_match_anchor(
|
||||
screenshot_b64, anchor_b64, screen_width, screen_height,
|
||||
)
|
||||
if raw and raw.get("resolved"):
|
||||
return GroundingResult(
|
||||
found=True,
|
||||
x_pct=raw["x_pct"],
|
||||
y_pct=raw["y_pct"],
|
||||
method="anchor_template",
|
||||
score=raw.get("score", 0.0),
|
||||
raw=raw,
|
||||
)
|
||||
|
||||
elif strategy == "vlm_local":
|
||||
by_text = target_spec.get("by_text", "")
|
||||
vlm_desc = target_spec.get("vlm_description", "")
|
||||
if vlm_desc or by_text:
|
||||
raw = self._executor._hybrid_vlm_resolve(
|
||||
screenshot_b64, target_spec, screen_width, screen_height,
|
||||
)
|
||||
if raw and raw.get("resolved"):
|
||||
return GroundingResult(
|
||||
found=True,
|
||||
x_pct=raw["x_pct"],
|
||||
y_pct=raw["y_pct"],
|
||||
method=raw.get("method", "vlm_local"),
|
||||
score=raw.get("score", 0.0),
|
||||
detail=raw.get("matched_element", {}).get("label", ""),
|
||||
raw=raw,
|
||||
)
|
||||
|
||||
return GroundingResult(found=False, method=strategy, detail=f"{strategy}: pas trouvé")
|
||||
172
agent_v0/agent_v1/core/policy.py
Normal file
172
agent_v0/agent_v1/core/policy.py
Normal file
@@ -0,0 +1,172 @@
|
||||
# agent_v1/core/policy.py
|
||||
"""
|
||||
Module Policy — décisions intelligentes quand le grounding échoue.
|
||||
|
||||
Responsabilité unique : "Le Grounding dit NOT_FOUND. Que fait-on ?"
|
||||
Ne localise AUCUN élément — c'est le rôle du Grounding.
|
||||
|
||||
Décisions possibles :
|
||||
- RETRY : re-tenter le grounding (après popup fermée, par exemple)
|
||||
- SKIP : l'action n'est plus nécessaire (état déjà atteint)
|
||||
- ABORT : arrêter le workflow (état incohérent)
|
||||
- SUPERVISE : rendre la main à l'utilisateur
|
||||
|
||||
Séparé de Grounding (qui localise les éléments).
|
||||
Ref: docs/PLAN_ACTEUR_V1.md — Architecture MÉSO (acteur intelligent)
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Decision(Enum):
|
||||
"""Décisions possibles quand le grounding échoue."""
|
||||
RETRY = "retry" # Re-tenter (après correction : popup fermée, navigation...)
|
||||
SKIP = "skip" # Action inutile (état déjà atteint)
|
||||
ABORT = "abort" # Arrêter le workflow (état incohérent)
|
||||
SUPERVISE = "supervise" # Rendre la main à l'utilisateur (Léa dit "je bloque")
|
||||
CONTINUE = "continue" # Continuer malgré l'échec (action non critique)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PolicyDecision:
|
||||
"""Résultat d'une décision Policy."""
|
||||
decision: Decision
|
||||
reason: str # Explication de la décision
|
||||
action_taken: str = "" # Action corrective effectuée (ex: "popup fermée")
|
||||
elapsed_ms: float = 0.0
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"decision": self.decision.value,
|
||||
"reason": self.reason,
|
||||
"action_taken": self.action_taken,
|
||||
"elapsed_ms": round(self.elapsed_ms, 1),
|
||||
}
|
||||
|
||||
|
||||
class PolicyEngine:
|
||||
"""Moteur de décision quand le grounding échoue.
|
||||
|
||||
Cascade de décision :
|
||||
1. Popup détectée ? → fermer et RETRY
|
||||
2. Acteur gemma4 → SKIP / ABORT / SUPERVISE
|
||||
3. Fallback → SUPERVISE (rendre la main)
|
||||
|
||||
Usage :
|
||||
policy = PolicyEngine(executor)
|
||||
decision = policy.decide(action, target_spec, grounding_result)
|
||||
if decision.decision == Decision.RETRY:
|
||||
# re-tenter le grounding
|
||||
elif decision.decision == Decision.SKIP:
|
||||
# marquer comme réussi, passer à la suite
|
||||
"""
|
||||
|
||||
def __init__(self, executor):
|
||||
self._executor = executor
|
||||
|
||||
def decide(
|
||||
self,
|
||||
action: Dict[str, Any],
|
||||
target_spec: Dict[str, Any],
|
||||
retry_count: int = 0,
|
||||
max_retries: int = 1,
|
||||
) -> PolicyDecision:
|
||||
"""Décider quoi faire quand le grounding a échoué.
|
||||
|
||||
Cascade :
|
||||
1. Si c'est le premier essai → tenter de fermer une popup → RETRY
|
||||
2. Si retry déjà fait → demander à l'acteur gemma4
|
||||
3. Selon gemma4 : SKIP, ABORT, ou SUPERVISE
|
||||
|
||||
**SÉCURITÉ** : si, pendant l'étape 1, le handler popup détecte un
|
||||
dialogue système Windows (UAC, CredUI, SmartScreen…), on bascule
|
||||
immédiatement en SUPERVISE. Cf. system_dialog_guard.py.
|
||||
|
||||
Args:
|
||||
action: L'action qui a échoué
|
||||
target_spec: La cible non trouvée
|
||||
retry_count: Nombre de retries déjà faits
|
||||
max_retries: Maximum de retries autorisés
|
||||
"""
|
||||
t_start = time.time()
|
||||
|
||||
# ── Étape 1 : Tentative de fermeture popup (premier essai) ──
|
||||
if retry_count == 0:
|
||||
popup_handled = self._try_close_popup()
|
||||
|
||||
# Si le popup handler a détecté un dialogue système, on
|
||||
# bascule immédiatement en SUPERVISE — pas de retry, pas de
|
||||
# gemma4 : on rend la main à l'humain.
|
||||
if getattr(self._executor, "_system_dialog_pause", None):
|
||||
sd = self._executor._system_dialog_pause
|
||||
return PolicyDecision(
|
||||
decision=Decision.SUPERVISE,
|
||||
reason=(
|
||||
f"Dialogue système détecté ({sd.get('category', '?')}) — "
|
||||
f"refus d'interaction automatique"
|
||||
),
|
||||
action_taken="system_dialog_blocked",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
if popup_handled:
|
||||
return PolicyDecision(
|
||||
decision=Decision.RETRY,
|
||||
reason="Popup détectée et fermée, re-tentative",
|
||||
action_taken="popup_closed",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
# ── Étape 2 : Max retries atteint → acteur gemma4 ──
|
||||
if retry_count >= max_retries:
|
||||
actor_decision = self._ask_actor(action, target_spec)
|
||||
|
||||
if actor_decision == "PASSER":
|
||||
return PolicyDecision(
|
||||
decision=Decision.SKIP,
|
||||
reason="Acteur gemma4 : l'état est déjà atteint",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
elif actor_decision == "STOPPER":
|
||||
return PolicyDecision(
|
||||
decision=Decision.ABORT,
|
||||
reason="Acteur gemma4 : état incohérent, arrêt",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
else:
|
||||
# EXECUTER ou inconnu → pause supervisée
|
||||
return PolicyDecision(
|
||||
decision=Decision.SUPERVISE,
|
||||
reason=f"Acteur gemma4 : {actor_decision}, pause supervisée",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
# ── Étape 3 : Encore des retries disponibles → RETRY ──
|
||||
return PolicyDecision(
|
||||
decision=Decision.RETRY,
|
||||
reason=f"Retry {retry_count + 1}/{max_retries}",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
def _try_close_popup(self) -> bool:
|
||||
"""Tenter de fermer une popup via le handler VLM existant."""
|
||||
try:
|
||||
return self._executor._handle_popup_vlm()
|
||||
except Exception as e:
|
||||
logger.debug(f"Policy: popup handler échoué : {e}")
|
||||
return False
|
||||
|
||||
def _ask_actor(self, action: Dict, target_spec: Dict) -> str:
|
||||
"""Demander à gemma4 de décider (PASSER/EXECUTER/STOPPER)."""
|
||||
try:
|
||||
return self._executor._actor_decide(action, target_spec)
|
||||
except Exception as e:
|
||||
logger.debug(f"Policy: acteur gemma4 échoué : {e}")
|
||||
return "EXECUTER" # Fallback → supervisé
|
||||
215
agent_v0/agent_v1/core/recovery.py
Normal file
215
agent_v0/agent_v1/core/recovery.py
Normal file
@@ -0,0 +1,215 @@
|
||||
# agent_v1/core/recovery.py
|
||||
"""
|
||||
Module Recovery — mécanisme de rollback quand une action échoue.
|
||||
|
||||
Responsabilité : "L'action a échoué ou produit un résultat inattendu.
|
||||
Comment revenir en arrière ?"
|
||||
|
||||
Stratégies de recovery :
|
||||
1. Ctrl+Z (undo natif) — pour les frappes et modifications
|
||||
2. Escape (fermer dialogue) — pour les popups/menus
|
||||
3. Alt+F4 (fermer fenêtre) — si mauvaise application ouverte
|
||||
4. Clic hors zone — fermer un menu déroulant
|
||||
5. Navigation retour — retourner à l'écran précédent
|
||||
|
||||
Le Recovery est appelé par le Policy quand le Critic détecte un
|
||||
résultat inattendu (pixel OK + sémantique NON = changement inattendu).
|
||||
|
||||
Ref: docs/VISION_RPA_INTELLIGENT.md — "Il se trompe" → correction
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class RecoveryAction(Enum):
|
||||
"""Actions de recovery possibles."""
|
||||
UNDO = "undo" # Ctrl+Z
|
||||
ESCAPE = "escape" # Echap (fermer dialogue/menu)
|
||||
CLOSE_WINDOW = "close" # Alt+F4
|
||||
CLICK_AWAY = "click_away" # Clic hors zone (fermer menu)
|
||||
NONE = "none" # Pas de recovery possible
|
||||
|
||||
|
||||
@dataclass
|
||||
class RecoveryResult:
|
||||
"""Résultat d'une tentative de recovery."""
|
||||
action_taken: RecoveryAction
|
||||
success: bool
|
||||
detail: str = ""
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"action_taken": self.action_taken.value,
|
||||
"success": self.success,
|
||||
"detail": self.detail,
|
||||
}
|
||||
|
||||
|
||||
class RecoveryEngine:
|
||||
"""Moteur de recovery — tente de revenir en arrière après un échec.
|
||||
|
||||
Choisit la stratégie de recovery en fonction du type d'action qui a échoué
|
||||
et de l'état actuel de l'écran.
|
||||
|
||||
Usage :
|
||||
recovery = RecoveryEngine(executor)
|
||||
result = recovery.attempt(failed_action, critic_result)
|
||||
if result.success:
|
||||
# re-tenter l'action
|
||||
"""
|
||||
|
||||
def __init__(self, executor):
|
||||
self._executor = executor
|
||||
|
||||
def attempt(
|
||||
self,
|
||||
failed_action: Dict[str, Any],
|
||||
critic_detail: str = "",
|
||||
) -> RecoveryResult:
|
||||
"""Tenter une recovery après un échec.
|
||||
|
||||
Sélectionne la stratégie appropriée selon le type d'action :
|
||||
- click qui ouvre la mauvaise chose → Escape ou Ctrl+Z
|
||||
- type qui tape au mauvais endroit → Ctrl+Z
|
||||
- key_combo inattendu → Ctrl+Z
|
||||
- popup apparue → Escape
|
||||
|
||||
Args:
|
||||
failed_action: L'action qui a échoué
|
||||
critic_detail: Détail du Critic (raison de l'échec sémantique)
|
||||
"""
|
||||
action_type = failed_action.get("type", "")
|
||||
detail_lower = critic_detail.lower()
|
||||
|
||||
# Choisir la stratégie de recovery
|
||||
strategy = self._select_strategy(action_type, detail_lower)
|
||||
|
||||
if strategy == RecoveryAction.NONE:
|
||||
return RecoveryResult(
|
||||
action_taken=RecoveryAction.NONE,
|
||||
success=False,
|
||||
detail="Pas de stratégie de recovery applicable",
|
||||
)
|
||||
|
||||
return self._execute_recovery(strategy)
|
||||
|
||||
def _select_strategy(self, action_type: str, critic_detail: str) -> RecoveryAction:
|
||||
"""Sélectionner la meilleure stratégie de recovery.
|
||||
|
||||
Priorité : type d'action d'abord (frappe → undo), puis contexte.
|
||||
"""
|
||||
# Frappe ou modification incorrecte → toujours Ctrl+Z
|
||||
if action_type in ("type", "key_combo"):
|
||||
return RecoveryAction.UNDO
|
||||
|
||||
# Popup/dialogue détecté
|
||||
if any(w in critic_detail for w in ["popup", "dialog", "erreur", "error", "modal"]):
|
||||
return RecoveryAction.ESCAPE
|
||||
|
||||
# Menu ouvert par erreur
|
||||
if any(w in critic_detail for w in ["menu", "dropdown", "déroulant"]):
|
||||
return RecoveryAction.ESCAPE
|
||||
|
||||
# Mauvaise fenêtre ouverte
|
||||
if any(w in critic_detail for w in ["mauvaise fenêtre", "wrong window"]):
|
||||
return RecoveryAction.CLOSE_WINDOW
|
||||
|
||||
# Clic qui a produit un résultat inattendu
|
||||
if action_type == "click":
|
||||
return RecoveryAction.ESCAPE
|
||||
|
||||
return RecoveryAction.NONE
|
||||
|
||||
def _execute_recovery(self, strategy: RecoveryAction) -> RecoveryResult:
|
||||
"""Exécuter la stratégie de recovery choisie."""
|
||||
from pynput.keyboard import Controller as KeyboardController, Key
|
||||
|
||||
keyboard = self._executor.keyboard
|
||||
|
||||
try:
|
||||
if strategy == RecoveryAction.UNDO:
|
||||
# Ctrl+Z
|
||||
logger.info("Recovery : Ctrl+Z (undo)")
|
||||
print(" [RECOVERY] Ctrl+Z — annulation de la dernière action")
|
||||
keyboard.press(Key.ctrl)
|
||||
keyboard.press('z')
|
||||
keyboard.release('z')
|
||||
keyboard.release(Key.ctrl)
|
||||
time.sleep(0.5)
|
||||
return RecoveryResult(
|
||||
action_taken=RecoveryAction.UNDO,
|
||||
success=True,
|
||||
detail="Ctrl+Z exécuté",
|
||||
)
|
||||
|
||||
elif strategy == RecoveryAction.ESCAPE:
|
||||
# Echap
|
||||
logger.info("Recovery : Escape (fermer dialogue)")
|
||||
print(" [RECOVERY] Escape — fermeture dialogue/menu")
|
||||
keyboard.press(Key.esc)
|
||||
keyboard.release(Key.esc)
|
||||
time.sleep(0.5)
|
||||
return RecoveryResult(
|
||||
action_taken=RecoveryAction.ESCAPE,
|
||||
success=True,
|
||||
detail="Escape exécuté",
|
||||
)
|
||||
|
||||
elif strategy == RecoveryAction.CLOSE_WINDOW:
|
||||
# Alt+F4 — AVEC vérification fenêtre active
|
||||
# Sur un poste hospitalier, Alt+F4 sans vérif peut fermer le DPI patient
|
||||
try:
|
||||
from ..window_info_crossplatform import get_active_window_info
|
||||
active = get_active_window_info()
|
||||
active_title = active.get("title", "")
|
||||
logger.info(f"Recovery : Alt+F4 sur '{active_title}'")
|
||||
print(f" [RECOVERY] Alt+F4 — fermeture de '{active_title}'")
|
||||
except Exception:
|
||||
logger.info("Recovery : Alt+F4 (fenêtre active inconnue)")
|
||||
print(" [RECOVERY] Alt+F4 — fermeture fenêtre indésirable")
|
||||
|
||||
keyboard.press(Key.alt)
|
||||
keyboard.press(Key.f4)
|
||||
keyboard.release(Key.f4)
|
||||
keyboard.release(Key.alt)
|
||||
time.sleep(1.0)
|
||||
return RecoveryResult(
|
||||
action_taken=RecoveryAction.CLOSE_WINDOW,
|
||||
success=True,
|
||||
detail=f"Alt+F4 exécuté sur '{active_title if 'active_title' in dir() else '?'}'",
|
||||
)
|
||||
|
||||
elif strategy == RecoveryAction.CLICK_AWAY:
|
||||
# Clic au centre de l'écran (hors popup)
|
||||
logger.info("Recovery : clic hors zone")
|
||||
print(" [RECOVERY] Clic hors zone — fermeture menu")
|
||||
monitor = self._executor.sct.monitors[1]
|
||||
w, h = monitor["width"], monitor["height"]
|
||||
# Cliquer dans un coin neutre (10% depuis le haut-gauche)
|
||||
self._executor._click((int(w * 0.1), int(h * 0.1)), "left")
|
||||
time.sleep(0.5)
|
||||
return RecoveryResult(
|
||||
action_taken=RecoveryAction.CLICK_AWAY,
|
||||
success=True,
|
||||
detail="Clic hors zone exécuté",
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Recovery échoué ({strategy.value}) : {e}")
|
||||
return RecoveryResult(
|
||||
action_taken=strategy,
|
||||
success=False,
|
||||
detail=f"Erreur : {e}",
|
||||
)
|
||||
|
||||
return RecoveryResult(
|
||||
action_taken=RecoveryAction.NONE,
|
||||
success=False,
|
||||
detail="Stratégie non implémentée",
|
||||
)
|
||||
448
agent_v0/agent_v1/core/system_dialog_guard.py
Normal file
448
agent_v0/agent_v1/core/system_dialog_guard.py
Normal file
@@ -0,0 +1,448 @@
|
||||
# agent_v1/core/system_dialog_guard.py
|
||||
"""
|
||||
Garde-fou sécurité : détection des dialogues système Windows critiques.
|
||||
|
||||
==============================================================================
|
||||
POURQUOI ?
|
||||
==============================================================================
|
||||
|
||||
Pendant un replay, si un dialogue UAC, CredUI (mot de passe Windows),
|
||||
SmartScreen ou une notification de sécurité Windows apparaît, Léa pourrait
|
||||
demander au VLM "quel bouton cliquer" et recevoir "Oui" en réponse.
|
||||
|
||||
→ **Léa cliquerait OUI sur une élévation UAC** → vecteur d'attaque ransomware.
|
||||
|
||||
Ce module fournit la détection de ces dialogues pour que l'exécuteur
|
||||
**ne clique JAMAIS dessus automatiquement**. La décision est renvoyée à
|
||||
l'humain (pause supervisée).
|
||||
|
||||
==============================================================================
|
||||
PRINCIPE
|
||||
==============================================================================
|
||||
|
||||
- **Faux positif tolérable** : on préfère pauser pour rien plutôt que cliquer
|
||||
sur un UAC.
|
||||
- **Faux négatif catastrophique** : mieux vaut être trop prudent.
|
||||
- **Multi-signal** : titre, ClassName UIA, nom de processus, parent_path.
|
||||
Un seul signal suffit à bloquer.
|
||||
- **Compatible Citrix** : les dialogues UAC d'un client Citrix apparaissent
|
||||
aussi dans la VM distante — la détection par classe UIA fonctionne.
|
||||
|
||||
==============================================================================
|
||||
PATTERNS DE DÉTECTION (ordre de criticité décroissant)
|
||||
==============================================================================
|
||||
|
||||
1. UAC Consent (élévation de privilèges)
|
||||
- ClassName : `$$$Secure UAP Dummy Window Class$$$`
|
||||
- Process : `consent.exe`
|
||||
- Titre : "Contrôle de compte d'utilisateur", "User Account Control"
|
||||
|
||||
2. CredUI (prompt mot de passe Windows)
|
||||
- ClassName : `Credential Dialog Xaml Host`
|
||||
- Process : `credentialuibroker.exe`, `credui.exe`
|
||||
- Titre : "Sécurité Windows", "Windows Security"
|
||||
|
||||
3. SmartScreen (protection contre applications inconnues)
|
||||
- Process : `smartscreen.exe`
|
||||
- Titre : "Windows a protégé votre ordinateur", "Windows protected your PC"
|
||||
|
||||
4. Windows Defender / Security Center
|
||||
- Process : `securityhealthhost.exe`, `msmpeng.exe`
|
||||
- Titre : "Sécurité Windows", "Windows Defender"
|
||||
|
||||
5. Signatures pilotes / driver install
|
||||
- Titre : "Installer ce pilote", "Driver signature"
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Catégories de dialogues système (pour logging + messages)
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class SystemDialogCategory:
|
||||
"""Catégories de dialogues système à bloquer absolument."""
|
||||
UAC = "uac_consent" # Élévation de privilèges
|
||||
CREDUI = "windows_credential_prompt" # Prompt de mot de passe
|
||||
SMARTSCREEN = "smartscreen" # Protection SmartScreen
|
||||
DEFENDER = "windows_defender" # Alerte Windows Defender
|
||||
DRIVER = "driver_install" # Installation pilote signé
|
||||
SECURITY_TOAST = "security_toast" # Toast de sécurité Windows
|
||||
UNKNOWN_DIALOG = "unknown_system_dialog" # Dialogue #32770 sans app connue
|
||||
|
||||
|
||||
@dataclass
|
||||
class SystemDialogDetection:
|
||||
"""Résultat d'une analyse de dialogue système."""
|
||||
is_system_dialog: bool
|
||||
category: str = "" # Valeur de SystemDialogCategory
|
||||
matched_signal: str = "" # Ex: "class_name=Consent.exe"
|
||||
matched_value: str = "" # La valeur qui a matché
|
||||
reason: str = "" # Explication lisible
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"is_system_dialog": self.is_system_dialog,
|
||||
"category": self.category,
|
||||
"matched_signal": self.matched_signal,
|
||||
"matched_value": self.matched_value,
|
||||
"reason": self.reason,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Signatures de détection
|
||||
# =============================================================================
|
||||
|
||||
|
||||
# ClassName UIA (casse préservée — Windows exposées telle quelle par UIA).
|
||||
# Utilisées telles quelles puis en minuscules pour matcher avec souplesse.
|
||||
_CLASS_NAMES_SYSTEM = {
|
||||
# UAC Consent
|
||||
"$$$Secure UAP Dummy Window Class$$$": SystemDialogCategory.UAC,
|
||||
"Credential Dialog Xaml Host": SystemDialogCategory.CREDUI,
|
||||
# Windows Credential UI ancien nom
|
||||
"CredentialDialogXamlHost": SystemDialogCategory.CREDUI,
|
||||
}
|
||||
|
||||
# Nom de processus (comparaison insensible à la casse, .exe normalisé)
|
||||
_PROCESS_NAMES_SYSTEM = {
|
||||
"consent.exe": SystemDialogCategory.UAC,
|
||||
"credentialuibroker.exe": SystemDialogCategory.CREDUI,
|
||||
"credui.exe": SystemDialogCategory.CREDUI,
|
||||
"credwiz.exe": SystemDialogCategory.CREDUI,
|
||||
"smartscreen.exe": SystemDialogCategory.SMARTSCREEN,
|
||||
"securityhealthhost.exe": SystemDialogCategory.DEFENDER,
|
||||
"securityhealthui.exe": SystemDialogCategory.DEFENDER,
|
||||
"securityhealthsystray.exe": SystemDialogCategory.DEFENDER,
|
||||
"msmpeng.exe": SystemDialogCategory.DEFENDER,
|
||||
"windowsdefender.exe": SystemDialogCategory.DEFENDER,
|
||||
"msiexec.exe": SystemDialogCategory.DRIVER, # prompts pilotes signés
|
||||
"drvinst.exe": SystemDialogCategory.DRIVER,
|
||||
}
|
||||
|
||||
# Motifs titre (insensibles à la casse, regex avec word boundaries)
|
||||
# On ne matche pas les titres génériques trop larges pour limiter les faux
|
||||
# positifs sur OSIRIS/OBSIUS/MEDSPHERE.
|
||||
_TITLE_PATTERNS_SYSTEM: Tuple[Tuple[re.Pattern, str], ...] = (
|
||||
# UAC
|
||||
(re.compile(r"contr[oô]le\s+de\s+compte\s+d'?utilisateur", re.IGNORECASE),
|
||||
SystemDialogCategory.UAC),
|
||||
(re.compile(r"\buser\s+account\s+control\b", re.IGNORECASE),
|
||||
SystemDialogCategory.UAC),
|
||||
(re.compile(r"voulez-vous\s+autoriser\s+cette\s+application", re.IGNORECASE),
|
||||
SystemDialogCategory.UAC),
|
||||
(re.compile(r"do\s+you\s+want\s+to\s+allow\s+this\s+app", re.IGNORECASE),
|
||||
SystemDialogCategory.UAC),
|
||||
|
||||
# CredUI / Sécurité Windows
|
||||
(re.compile(r"\bs[eé]curit[eé]\s+windows\b", re.IGNORECASE),
|
||||
SystemDialogCategory.CREDUI),
|
||||
(re.compile(r"\bwindows\s+security\b", re.IGNORECASE),
|
||||
SystemDialogCategory.CREDUI),
|
||||
(re.compile(r"entrer\s+les\s+informations\s+d'?identification", re.IGNORECASE),
|
||||
SystemDialogCategory.CREDUI),
|
||||
(re.compile(r"enter\s+(?:your\s+)?credentials?", re.IGNORECASE),
|
||||
SystemDialogCategory.CREDUI),
|
||||
(re.compile(r"connectez-vous\s+[aà]\s+votre\s+compte", re.IGNORECASE),
|
||||
SystemDialogCategory.CREDUI),
|
||||
(re.compile(r"\bsign\s+in\s+to\s+your\s+account\b", re.IGNORECASE),
|
||||
SystemDialogCategory.CREDUI),
|
||||
|
||||
# SmartScreen
|
||||
(re.compile(r"windows\s+a\s+prot[eé]g[eé]", re.IGNORECASE),
|
||||
SystemDialogCategory.SMARTSCREEN),
|
||||
(re.compile(r"windows\s+protected\s+your\s+pc", re.IGNORECASE),
|
||||
SystemDialogCategory.SMARTSCREEN),
|
||||
(re.compile(r"\bsmartscreen\b", re.IGNORECASE),
|
||||
SystemDialogCategory.SMARTSCREEN),
|
||||
(re.compile(r"\b[eé]diteur\s+inconnu\b", re.IGNORECASE),
|
||||
SystemDialogCategory.SMARTSCREEN),
|
||||
(re.compile(r"\bunknown\s+publisher\b", re.IGNORECASE),
|
||||
SystemDialogCategory.SMARTSCREEN),
|
||||
|
||||
# Windows Defender
|
||||
(re.compile(r"windows\s+defender", re.IGNORECASE),
|
||||
SystemDialogCategory.DEFENDER),
|
||||
(re.compile(r"menace\s+d[eé]tect[eé]e", re.IGNORECASE),
|
||||
SystemDialogCategory.DEFENDER),
|
||||
(re.compile(r"threat\s+detected", re.IGNORECASE),
|
||||
SystemDialogCategory.DEFENDER),
|
||||
|
||||
# Driver
|
||||
(re.compile(r"installer\s+ce\s+pilote", re.IGNORECASE),
|
||||
SystemDialogCategory.DRIVER),
|
||||
(re.compile(r"install\s+this\s+driver", re.IGNORECASE),
|
||||
SystemDialogCategory.DRIVER),
|
||||
(re.compile(r"signature\s+num[eé]rique\s+du\s+pilote", re.IGNORECASE),
|
||||
SystemDialogCategory.DRIVER),
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Fonctions de détection
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def _normalize_process(name: str) -> str:
|
||||
"""Normaliser un nom de processus pour comparaison."""
|
||||
if not name:
|
||||
return ""
|
||||
name = name.strip().lower()
|
||||
# Enlever le chemin éventuel
|
||||
if "\\" in name or "/" in name:
|
||||
name = name.replace("\\", "/").split("/")[-1]
|
||||
# Assurer suffixe .exe pour matcher le dictionnaire
|
||||
if not name.endswith(".exe") and name:
|
||||
# Les process_name peuvent venir sans .exe (psutil) — on ajoute
|
||||
# pour avoir une clé uniforme
|
||||
name_with_exe = name + ".exe"
|
||||
if name_with_exe in _PROCESS_NAMES_SYSTEM:
|
||||
return name_with_exe
|
||||
return name
|
||||
|
||||
|
||||
def _check_class_name(class_name: str) -> Optional[Tuple[str, str, str]]:
|
||||
"""Vérifier si un ClassName UIA matche un dialogue système.
|
||||
|
||||
Returns:
|
||||
(category, matched_class, reason) si match, None sinon.
|
||||
"""
|
||||
if not class_name:
|
||||
return None
|
||||
|
||||
# Match exact
|
||||
if class_name in _CLASS_NAMES_SYSTEM:
|
||||
cat = _CLASS_NAMES_SYSTEM[class_name]
|
||||
return (cat, class_name, f"ClassName UIA '{class_name}' = dialogue système {cat}")
|
||||
|
||||
# Match insensible à la casse + normalisation espaces
|
||||
cn_norm = class_name.strip()
|
||||
for known, cat in _CLASS_NAMES_SYSTEM.items():
|
||||
if cn_norm.lower() == known.lower():
|
||||
return (cat, class_name, f"ClassName UIA ~= '{known}' ({cat})")
|
||||
|
||||
# Détection souple UAC (il existe quelques variantes de la classe secure)
|
||||
if "secure uap" in class_name.lower() or "uap dummy" in class_name.lower():
|
||||
return (SystemDialogCategory.UAC, class_name,
|
||||
f"ClassName '{class_name}' contient 'Secure UAP' → UAC")
|
||||
|
||||
# Credential XAML Host
|
||||
if "credential" in class_name.lower() and "xaml" in class_name.lower():
|
||||
return (SystemDialogCategory.CREDUI, class_name,
|
||||
f"ClassName '{class_name}' contient Credential+Xaml → CredUI")
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _check_process_name(process_name: str) -> Optional[Tuple[str, str, str]]:
|
||||
"""Vérifier si un nom de processus est un dialogue système.
|
||||
|
||||
Returns:
|
||||
(category, matched_process, reason) si match, None sinon.
|
||||
"""
|
||||
if not process_name:
|
||||
return None
|
||||
|
||||
norm = _normalize_process(process_name)
|
||||
if norm in _PROCESS_NAMES_SYSTEM:
|
||||
cat = _PROCESS_NAMES_SYSTEM[norm]
|
||||
return (cat, process_name, f"Processus '{norm}' = {cat}")
|
||||
return None
|
||||
|
||||
|
||||
def _check_title(title: str) -> Optional[Tuple[str, str, str]]:
|
||||
"""Vérifier si un titre de fenêtre matche un dialogue système.
|
||||
|
||||
Returns:
|
||||
(category, matched_pattern, reason) si match, None sinon.
|
||||
"""
|
||||
if not title:
|
||||
return None
|
||||
|
||||
for pattern, cat in _TITLE_PATTERNS_SYSTEM:
|
||||
m = pattern.search(title)
|
||||
if m:
|
||||
return (cat, m.group(0),
|
||||
f"Titre '{title[:60]}' matche '{pattern.pattern}' → {cat}")
|
||||
return None
|
||||
|
||||
|
||||
def is_system_dialog(
|
||||
uia_snapshot: Optional[Dict[str, Any]] = None,
|
||||
window_info: Optional[Dict[str, Any]] = None,
|
||||
) -> SystemDialogDetection:
|
||||
"""Déterminer si la fenêtre active est un dialogue système critique.
|
||||
|
||||
La détection combine plusieurs signaux — **un seul suffit à bloquer**.
|
||||
On préfère un faux positif (pause inutile) à un faux négatif (clic UAC).
|
||||
|
||||
Args:
|
||||
uia_snapshot: Dict avec champs `class_name`, `process_name`,
|
||||
`parent_path`, `name`. Peut être None si UIA indisponible.
|
||||
window_info: Dict avec champs `title`, `app_name`. Peut être None.
|
||||
|
||||
Returns:
|
||||
SystemDialogDetection avec is_system_dialog=True si un dialogue
|
||||
système est détecté.
|
||||
|
||||
Exemples::
|
||||
|
||||
det = is_system_dialog(window_info={"title": "User Account Control"})
|
||||
assert det.is_system_dialog # UAC détecté
|
||||
|
||||
det = is_system_dialog(uia_snapshot={"class_name": "$$$Secure UAP Dummy Window Class$$$"})
|
||||
assert det.is_system_dialog # UAC via ClassName
|
||||
|
||||
det = is_system_dialog(window_info={"title": "OSIRIS - Patient Dupont"})
|
||||
assert not det.is_system_dialog # Application métier → OK
|
||||
"""
|
||||
# ── Signal 1 : ClassName UIA ──
|
||||
if uia_snapshot:
|
||||
cn = uia_snapshot.get("class_name", "") or ""
|
||||
r = _check_class_name(cn)
|
||||
if r:
|
||||
cat, matched, reason = r
|
||||
return SystemDialogDetection(
|
||||
is_system_dialog=True,
|
||||
category=cat,
|
||||
matched_signal="class_name",
|
||||
matched_value=matched,
|
||||
reason=reason,
|
||||
)
|
||||
|
||||
# Explorer aussi les parents (le champ cliqué peut être un bouton
|
||||
# interne dont la ClassName est "Button", mais le root de la fenêtre
|
||||
# est le Consent.exe).
|
||||
for parent in uia_snapshot.get("parent_path", []) or []:
|
||||
p_cn = parent.get("class_name", "") or ""
|
||||
r = _check_class_name(p_cn)
|
||||
if r:
|
||||
cat, matched, reason = r
|
||||
return SystemDialogDetection(
|
||||
is_system_dialog=True,
|
||||
category=cat,
|
||||
matched_signal="parent_class_name",
|
||||
matched_value=matched,
|
||||
reason=f"Parent : {reason}",
|
||||
)
|
||||
|
||||
# ── Signal 2 : Process name ──
|
||||
if uia_snapshot:
|
||||
pn = uia_snapshot.get("process_name", "") or ""
|
||||
r = _check_process_name(pn)
|
||||
if r:
|
||||
cat, matched, reason = r
|
||||
return SystemDialogDetection(
|
||||
is_system_dialog=True,
|
||||
category=cat,
|
||||
matched_signal="process_name",
|
||||
matched_value=matched,
|
||||
reason=reason,
|
||||
)
|
||||
|
||||
if window_info:
|
||||
app = window_info.get("app_name", "") or ""
|
||||
r = _check_process_name(app)
|
||||
if r:
|
||||
cat, matched, reason = r
|
||||
return SystemDialogDetection(
|
||||
is_system_dialog=True,
|
||||
category=cat,
|
||||
matched_signal="app_name",
|
||||
matched_value=matched,
|
||||
reason=reason,
|
||||
)
|
||||
|
||||
# ── Signal 3 : Titre de fenêtre ──
|
||||
if window_info:
|
||||
title = window_info.get("title", "") or ""
|
||||
r = _check_title(title)
|
||||
if r:
|
||||
cat, matched, reason = r
|
||||
return SystemDialogDetection(
|
||||
is_system_dialog=True,
|
||||
category=cat,
|
||||
matched_signal="window_title",
|
||||
matched_value=matched,
|
||||
reason=reason,
|
||||
)
|
||||
|
||||
if uia_snapshot:
|
||||
# Certains dialogues système remontent leur titre dans uia.name
|
||||
uia_name = uia_snapshot.get("name", "") or ""
|
||||
r = _check_title(uia_name)
|
||||
if r:
|
||||
cat, matched, reason = r
|
||||
return SystemDialogDetection(
|
||||
is_system_dialog=True,
|
||||
category=cat,
|
||||
matched_signal="uia_name",
|
||||
matched_value=matched,
|
||||
reason=reason,
|
||||
)
|
||||
|
||||
return SystemDialogDetection(is_system_dialog=False)
|
||||
|
||||
|
||||
def detect_current_system_dialog() -> SystemDialogDetection:
|
||||
"""Analyser l'écran actuel et détecter un dialogue système.
|
||||
|
||||
Helper autonome qui interroge à la fois `get_active_window_info()` et
|
||||
le helper UIA (si dispo) pour obtenir la détection la plus fiable.
|
||||
|
||||
Returns:
|
||||
SystemDialogDetection. Si un signal matche, is_system_dialog=True.
|
||||
Si rien n'est disponible (Linux, UIA absent), is_system_dialog=False
|
||||
mais le caller peut encore fallback sur une analyse par titre.
|
||||
"""
|
||||
window_info: Optional[Dict[str, Any]] = None
|
||||
uia_snapshot: Optional[Dict[str, Any]] = None
|
||||
|
||||
# Fenêtre active (cross-platform)
|
||||
try:
|
||||
from ..window_info_crossplatform import get_active_window_info
|
||||
window_info = get_active_window_info()
|
||||
except Exception as e: # pragma: no cover — best-effort
|
||||
logger.debug(f"[SYS-DIALOG] window_info indisponible : {e}")
|
||||
|
||||
# UIA local (Windows uniquement, via lea_uia.exe)
|
||||
try:
|
||||
from .uia_helper import get_shared_helper
|
||||
helper = get_shared_helper()
|
||||
if helper.available:
|
||||
# On capture l'élément focalisé (root = fenêtre active)
|
||||
element = helper.capture_focused(max_depth=2)
|
||||
if element is not None:
|
||||
uia_snapshot = element.to_dict()
|
||||
except Exception as e: # pragma: no cover
|
||||
logger.debug(f"[SYS-DIALOG] UIA indisponible : {e}")
|
||||
|
||||
detection = is_system_dialog(
|
||||
uia_snapshot=uia_snapshot, window_info=window_info,
|
||||
)
|
||||
|
||||
if detection.is_system_dialog:
|
||||
logger.warning(
|
||||
f"[SYS-DIALOG] BLOCAGE — dialogue système détecté "
|
||||
f"[{detection.category}] via {detection.matched_signal}='{detection.matched_value}' "
|
||||
f"— {detection.reason}"
|
||||
)
|
||||
return detection
|
||||
|
||||
|
||||
__all__ = [
|
||||
"SystemDialogCategory",
|
||||
"SystemDialogDetection",
|
||||
"is_system_dialog",
|
||||
"detect_current_system_dialog",
|
||||
]
|
||||
294
agent_v0/agent_v1/core/uia_helper.py
Normal file
294
agent_v0/agent_v1/core/uia_helper.py
Normal file
@@ -0,0 +1,294 @@
|
||||
# core/workflow/uia_helper.py
|
||||
"""
|
||||
UIAHelper — Wrapper Python pour lea_uia.exe (helper Rust UI Automation).
|
||||
|
||||
Expose une API Python simple pour interroger UIA via le binaire Rust.
|
||||
Communique via subprocess + stdin/stdout JSON.
|
||||
|
||||
Pourquoi un helper Rust ?
|
||||
- 5-10x plus rapide que pywinauto (10-20ms vs 50-200ms)
|
||||
- Binaire standalone ~500 Ko, aucune dépendance runtime
|
||||
- Pas de problèmes de threading COM en Python
|
||||
- Crash-safe (le crash du helper n'affecte pas l'agent Python)
|
||||
|
||||
Architecture :
|
||||
Python executor
|
||||
↓ subprocess.run
|
||||
lea_uia.exe query --x 812 --y 436
|
||||
↓ UIA API Windows
|
||||
JSON response
|
||||
↓ stdout
|
||||
Python executor parse JSON
|
||||
|
||||
Si lea_uia.exe n'est pas disponible (Linux, binaire absent, crash) :
|
||||
toutes les méthodes retournent None → fallback vision automatique.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Timeout par défaut pour les appels UIA (en secondes)
|
||||
_DEFAULT_TIMEOUT = 5.0
|
||||
|
||||
# Masquer la fenêtre console lors du spawn de lea_uia.exe sur Windows.
|
||||
# Sans ce flag, chaque appel (à chaque clic utilisateur pendant
|
||||
# l'enregistrement) fait apparaître une fenêtre cmd noire brièvement
|
||||
# visible à l'écran → ralentit la souris et pollue les screenshots
|
||||
# capturés (le VLM peut "voir" le chemin lea_uia.exe comme texte cliqué).
|
||||
#
|
||||
# La valeur 0x08000000 correspond à CREATE_NO_WINDOW défini dans
|
||||
# l'API Windows. Sur Linux/Mac, la valeur est 0 et `creationflags`
|
||||
# est ignoré. getattr() gère le cas où Python expose déjà la constante
|
||||
# sur Windows.
|
||||
if platform.system() == "Windows":
|
||||
_SUBPROCESS_CREATION_FLAGS = getattr(subprocess, "CREATE_NO_WINDOW", 0x08000000)
|
||||
else:
|
||||
_SUBPROCESS_CREATION_FLAGS = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class UiaElement:
|
||||
"""Représentation Python d'un élément UIA."""
|
||||
name: str = ""
|
||||
control_type: str = ""
|
||||
class_name: str = ""
|
||||
automation_id: str = ""
|
||||
bounding_rect: Tuple[int, int, int, int] = (0, 0, 0, 0)
|
||||
is_enabled: bool = False
|
||||
is_offscreen: bool = True
|
||||
parent_path: List[Dict[str, str]] = field(default_factory=list)
|
||||
process_name: str = ""
|
||||
|
||||
def center(self) -> Tuple[int, int]:
|
||||
"""Retourner le centre du rectangle (pixels)."""
|
||||
x1, y1, x2, y2 = self.bounding_rect
|
||||
return ((x1 + x2) // 2, (y1 + y2) // 2)
|
||||
|
||||
def width(self) -> int:
|
||||
return self.bounding_rect[2] - self.bounding_rect[0]
|
||||
|
||||
def height(self) -> int:
|
||||
return self.bounding_rect[3] - self.bounding_rect[1]
|
||||
|
||||
def is_clickable(self) -> bool:
|
||||
"""Peut-on cliquer dessus ?"""
|
||||
return (
|
||||
self.is_enabled
|
||||
and not self.is_offscreen
|
||||
and self.width() > 0
|
||||
and self.height() > 0
|
||||
)
|
||||
|
||||
def path_signature(self) -> str:
|
||||
"""Signature du chemin parent (pour retrouver l'élément)."""
|
||||
parts = [f"{p['control_type']}[{p['name']}]" for p in self.parent_path if p.get("name")]
|
||||
parts.append(f"{self.control_type}[{self.name}]")
|
||||
return " > ".join(parts)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"control_type": self.control_type,
|
||||
"class_name": self.class_name,
|
||||
"automation_id": self.automation_id,
|
||||
"bounding_rect": list(self.bounding_rect),
|
||||
"is_enabled": self.is_enabled,
|
||||
"is_offscreen": self.is_offscreen,
|
||||
"parent_path": self.parent_path,
|
||||
"process_name": self.process_name,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict[str, Any]) -> "UiaElement":
|
||||
rect = d.get("bounding_rect", [0, 0, 0, 0])
|
||||
if isinstance(rect, list) and len(rect) >= 4:
|
||||
rect = tuple(rect[:4])
|
||||
else:
|
||||
rect = (0, 0, 0, 0)
|
||||
return cls(
|
||||
name=d.get("name", ""),
|
||||
control_type=d.get("control_type", ""),
|
||||
class_name=d.get("class_name", ""),
|
||||
automation_id=d.get("automation_id", ""),
|
||||
bounding_rect=rect,
|
||||
is_enabled=d.get("is_enabled", False),
|
||||
is_offscreen=d.get("is_offscreen", True),
|
||||
parent_path=d.get("parent_path", []),
|
||||
process_name=d.get("process_name", ""),
|
||||
)
|
||||
|
||||
|
||||
class UIAHelper:
|
||||
"""Wrapper Python pour lea_uia.exe."""
|
||||
|
||||
def __init__(self, helper_path: str = "", timeout: float = _DEFAULT_TIMEOUT):
|
||||
self._helper_path = helper_path or self._find_helper()
|
||||
self._timeout = timeout
|
||||
self._available = self._check_available()
|
||||
|
||||
def _find_helper(self) -> str:
|
||||
"""Trouver lea_uia.exe dans les emplacements standards."""
|
||||
candidates = [
|
||||
r"C:\Lea\helpers\lea_uia.exe",
|
||||
os.path.join(os.path.dirname(__file__), "..", "..",
|
||||
"agent_rust", "lea_uia", "target",
|
||||
"x86_64-pc-windows-gnu", "release", "lea_uia.exe"),
|
||||
"./helpers/lea_uia.exe",
|
||||
"lea_uia.exe",
|
||||
]
|
||||
for path in candidates:
|
||||
if os.path.isfile(path):
|
||||
return os.path.abspath(path)
|
||||
return ""
|
||||
|
||||
def _check_available(self) -> bool:
|
||||
"""Vérifier que le helper est utilisable (Windows + binaire + health OK)."""
|
||||
if platform.system() != "Windows":
|
||||
logger.debug("UIAHelper: Linux/Mac — helper désactivé")
|
||||
return False
|
||||
if not self._helper_path:
|
||||
logger.debug("UIAHelper: lea_uia.exe introuvable")
|
||||
return False
|
||||
if not os.path.isfile(self._helper_path):
|
||||
logger.debug(f"UIAHelper: chemin invalide {self._helper_path}")
|
||||
return False
|
||||
return True
|
||||
|
||||
@property
|
||||
def available(self) -> bool:
|
||||
return self._available
|
||||
|
||||
@property
|
||||
def helper_path(self) -> str:
|
||||
return self._helper_path
|
||||
|
||||
def _run(self, args: List[str]) -> Optional[Dict[str, Any]]:
|
||||
"""Exécuter lea_uia.exe avec les arguments et parser le JSON."""
|
||||
if not self._available:
|
||||
return None
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[self._helper_path] + args,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=self._timeout,
|
||||
encoding="utf-8",
|
||||
errors="replace",
|
||||
creationflags=_SUBPROCESS_CREATION_FLAGS,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
logger.debug(
|
||||
f"UIAHelper: exit code {result.returncode}, "
|
||||
f"stderr: {result.stderr[:200]}"
|
||||
)
|
||||
return None
|
||||
output = result.stdout.strip()
|
||||
if not output:
|
||||
return None
|
||||
return json.loads(output)
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.debug(f"UIAHelper: timeout ({self._timeout}s) sur {args}")
|
||||
return None
|
||||
except json.JSONDecodeError as e:
|
||||
logger.debug(f"UIAHelper: JSON invalide — {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.debug(f"UIAHelper: erreur {e}")
|
||||
return None
|
||||
|
||||
def health(self) -> bool:
|
||||
"""Vérifier que UIA répond."""
|
||||
data = self._run(["health"])
|
||||
return data is not None and data.get("status") == "ok"
|
||||
|
||||
def query_at(
|
||||
self,
|
||||
x: int,
|
||||
y: int,
|
||||
with_parents: bool = True,
|
||||
) -> Optional[UiaElement]:
|
||||
"""Récupérer l'élément UIA à une position écran.
|
||||
|
||||
Args:
|
||||
x, y: Coordonnées pixel absolues
|
||||
with_parents: Inclure la hiérarchie des parents
|
||||
|
||||
Returns:
|
||||
UiaElement si trouvé, None sinon (pas d'élément ou UIA indispo)
|
||||
"""
|
||||
args = ["query", "--x", str(x), "--y", str(y)]
|
||||
if not with_parents:
|
||||
args.append("--with-parents=false")
|
||||
|
||||
data = self._run(args)
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
def find_by_name(
|
||||
self,
|
||||
name: str,
|
||||
control_type: Optional[str] = None,
|
||||
automation_id: Optional[str] = None,
|
||||
window: Optional[str] = None,
|
||||
timeout_ms: int = 2000,
|
||||
) -> Optional[UiaElement]:
|
||||
"""Rechercher un élément par son nom (+ filtres optionnels).
|
||||
|
||||
Args:
|
||||
name: Nom exact de l'élément
|
||||
control_type: Type de contrôle (Button, Edit, MenuItem...)
|
||||
automation_id: ID d'automation
|
||||
window: Restreindre à une fenêtre spécifique
|
||||
timeout_ms: Timeout de recherche en millisecondes
|
||||
"""
|
||||
args = ["find", "--name", name, "--timeout-ms", str(timeout_ms)]
|
||||
if control_type:
|
||||
args.extend(["--control-type", control_type])
|
||||
if automation_id:
|
||||
args.extend(["--automation-id", automation_id])
|
||||
if window:
|
||||
args.extend(["--window", window])
|
||||
|
||||
data = self._run(args)
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
def capture_focused(self, max_depth: int = 3) -> Optional[UiaElement]:
|
||||
"""Capturer l'élément ayant le focus + son contexte."""
|
||||
data = self._run(["capture", "--max-depth", str(max_depth)])
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
|
||||
# Instance globale partagée (singleton léger)
|
||||
_SHARED_HELPER: Optional[UIAHelper] = None
|
||||
|
||||
|
||||
def get_shared_helper() -> UIAHelper:
|
||||
"""Retourner une instance partagée de UIAHelper."""
|
||||
global _SHARED_HELPER
|
||||
if _SHARED_HELPER is None:
|
||||
_SHARED_HELPER = UIAHelper()
|
||||
return _SHARED_HELPER
|
||||
@@ -1,55 +0,0 @@
|
||||
# window_info.py
|
||||
"""
|
||||
Récupération des informations sur la fenêtre active (X11).
|
||||
|
||||
v0 :
|
||||
- utilise xdotool pour obtenir :
|
||||
- le titre de la fenêtre active
|
||||
- le PID de la fenêtre active, puis le nom du process via ps
|
||||
|
||||
Si quelque chose ne fonctionne pas, on renvoie des valeurs "unknown".
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
from typing import Dict, Optional
|
||||
|
||||
|
||||
def _run_cmd(cmd: list[str]) -> Optional[str]:
|
||||
"""Exécute une commande et renvoie la sortie texte (strippée), ou None en cas d'erreur."""
|
||||
try:
|
||||
out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
|
||||
return out.decode("utf-8", errors="ignore").strip()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def get_active_window_info() -> Dict[str, str]:
|
||||
"""
|
||||
Renvoie un dict :
|
||||
{
|
||||
"title": "...",
|
||||
"app_name": "..."
|
||||
}
|
||||
|
||||
Nécessite xdotool installé sur le système.
|
||||
"""
|
||||
title = _run_cmd(["xdotool", "getactivewindow", "getwindowname"])
|
||||
pid_str = _run_cmd(["xdotool", "getactivewindow", "getwindowpid"])
|
||||
|
||||
app_name: Optional[str] = None
|
||||
if pid_str:
|
||||
pid_str = pid_str.strip()
|
||||
# On récupère le nom du binaire via ps
|
||||
app_name = _run_cmd(["ps", "-p", pid_str, "-o", "comm="])
|
||||
|
||||
if not title:
|
||||
title = "unknown_window"
|
||||
if not app_name:
|
||||
app_name = "unknown_app"
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"app_name": app_name,
|
||||
}
|
||||
@@ -1,192 +0,0 @@
|
||||
# window_info_crossplatform.py
|
||||
"""
|
||||
Récupération des informations sur la fenêtre active - CROSS-PLATFORM
|
||||
|
||||
Supporte:
|
||||
- Linux (X11 via xdotool)
|
||||
- Windows (via pywin32)
|
||||
- macOS (via pyobjc)
|
||||
|
||||
Installation des dépendances:
|
||||
pip install pywin32 # Windows
|
||||
pip install pyobjc-framework-Cocoa # macOS
|
||||
pip install psutil # Tous OS
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import platform
|
||||
import subprocess
|
||||
from typing import Dict, Optional
|
||||
|
||||
|
||||
def _run_cmd(cmd: list[str]) -> Optional[str]:
|
||||
"""Exécute une commande et renvoie la sortie texte (strippée), ou None en cas d'erreur."""
|
||||
try:
|
||||
out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
|
||||
return out.decode("utf-8", errors="ignore").strip()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def get_active_window_info() -> Dict[str, str]:
|
||||
"""
|
||||
Renvoie un dict :
|
||||
{
|
||||
"title": "...",
|
||||
"app_name": "..."
|
||||
}
|
||||
|
||||
Détecte automatiquement l'OS et utilise la méthode appropriée.
|
||||
"""
|
||||
system = platform.system()
|
||||
|
||||
if system == "Linux":
|
||||
return _get_window_info_linux()
|
||||
elif system == "Windows":
|
||||
return _get_window_info_windows()
|
||||
elif system == "Darwin": # macOS
|
||||
return _get_window_info_macos()
|
||||
else:
|
||||
return {"title": "unknown_window", "app_name": "unknown_app"}
|
||||
|
||||
|
||||
def _get_window_info_linux() -> Dict[str, str]:
|
||||
"""
|
||||
Linux: utilise xdotool (X11)
|
||||
|
||||
Nécessite: sudo apt-get install xdotool
|
||||
"""
|
||||
title = _run_cmd(["xdotool", "getactivewindow", "getwindowname"])
|
||||
pid_str = _run_cmd(["xdotool", "getactivewindow", "getwindowpid"])
|
||||
|
||||
app_name: Optional[str] = None
|
||||
if pid_str:
|
||||
pid_str = pid_str.strip()
|
||||
# On récupère le nom du binaire via ps
|
||||
app_name = _run_cmd(["ps", "-p", pid_str, "-o", "comm="])
|
||||
|
||||
if not title:
|
||||
title = "unknown_window"
|
||||
if not app_name:
|
||||
app_name = "unknown_app"
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"app_name": app_name,
|
||||
}
|
||||
|
||||
|
||||
def _get_window_info_windows() -> Dict[str, str]:
|
||||
"""
|
||||
Windows: utilise pywin32 + psutil
|
||||
|
||||
Nécessite: pip install pywin32 psutil
|
||||
"""
|
||||
try:
|
||||
import win32gui
|
||||
import win32process
|
||||
import psutil
|
||||
|
||||
# Fenêtre au premier plan
|
||||
hwnd = win32gui.GetForegroundWindow()
|
||||
|
||||
# Titre de la fenêtre
|
||||
title = win32gui.GetWindowText(hwnd)
|
||||
if not title:
|
||||
title = "unknown_window"
|
||||
|
||||
# PID du processus
|
||||
_, pid = win32process.GetWindowThreadProcessId(hwnd)
|
||||
|
||||
# Nom du processus
|
||||
try:
|
||||
process = psutil.Process(pid)
|
||||
app_name = process.name()
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
app_name = "unknown_app"
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"app_name": app_name,
|
||||
}
|
||||
|
||||
except ImportError:
|
||||
# pywin32 ou psutil non installé
|
||||
return {
|
||||
"title": "unknown_window (pywin32 missing)",
|
||||
"app_name": "unknown_app (pywin32 missing)",
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"title": f"error: {e}",
|
||||
"app_name": "unknown_app",
|
||||
}
|
||||
|
||||
|
||||
def _get_window_info_macos() -> Dict[str, str]:
|
||||
"""
|
||||
macOS: utilise pyobjc (AppKit)
|
||||
|
||||
Nécessite: pip install pyobjc-framework-Cocoa
|
||||
|
||||
Note: Nécessite les permissions "Accessibility" dans System Preferences
|
||||
"""
|
||||
try:
|
||||
from AppKit import NSWorkspace
|
||||
from Quartz import (
|
||||
CGWindowListCopyWindowInfo,
|
||||
kCGWindowListOptionOnScreenOnly,
|
||||
kCGNullWindowID
|
||||
)
|
||||
|
||||
# Application active
|
||||
active_app = NSWorkspace.sharedWorkspace().activeApplication()
|
||||
app_name = active_app.get('NSApplicationName', 'unknown_app')
|
||||
|
||||
# Titre de la fenêtre (via Quartz)
|
||||
# On cherche la fenêtre de l'app active qui est au premier plan
|
||||
window_list = CGWindowListCopyWindowInfo(
|
||||
kCGWindowListOptionOnScreenOnly,
|
||||
kCGNullWindowID
|
||||
)
|
||||
|
||||
title = "unknown_window"
|
||||
for window in window_list:
|
||||
owner_name = window.get('kCGWindowOwnerName', '')
|
||||
if owner_name == app_name:
|
||||
window_title = window.get('kCGWindowName', '')
|
||||
if window_title:
|
||||
title = window_title
|
||||
break
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"app_name": app_name,
|
||||
}
|
||||
|
||||
except ImportError:
|
||||
# pyobjc non installé
|
||||
return {
|
||||
"title": "unknown_window (pyobjc missing)",
|
||||
"app_name": "unknown_app (pyobjc missing)",
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"title": f"error: {e}",
|
||||
"app_name": "unknown_app",
|
||||
}
|
||||
|
||||
|
||||
# Test rapide
|
||||
if __name__ == "__main__":
|
||||
import time
|
||||
|
||||
print(f"OS détecté: {platform.system()}")
|
||||
print("\nTest de capture fenêtre active (5 secondes)...")
|
||||
print("Changez de fenêtre pour tester!\n")
|
||||
|
||||
for i in range(5):
|
||||
info = get_active_window_info()
|
||||
print(f"[{i+1}] App: {info['app_name']:20s} | Title: {info['title']}")
|
||||
time.sleep(1)
|
||||
@@ -16,7 +16,8 @@ import logging
|
||||
import threading
|
||||
from .config import (
|
||||
SESSIONS_ROOT, AGENT_VERSION, SERVER_URL, MACHINE_ID, LOG_RETENTION_DAYS,
|
||||
SCREEN_RESOLUTION, DPI_SCALE, OS_THEME,
|
||||
SCREEN_RESOLUTION, DPI_SCALE, OS_THEME, API_TOKEN, MAX_SESSION_DURATION_S,
|
||||
STREAMING_ENDPOINT,
|
||||
)
|
||||
from .core.captor import EventCaptorV1
|
||||
from .core.executor import ActionExecutorV1
|
||||
@@ -38,8 +39,19 @@ except (ImportError, ValueError):
|
||||
except ImportError:
|
||||
LeaServerClient = None
|
||||
|
||||
# Configuration du logging
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
||||
# Configuration du logging — format structuré et lisible pour un TIM
|
||||
# Niveau de détail : INFO par défaut, DEBUG si RPA_AGENT_DEBUG=1
|
||||
_log_level = logging.DEBUG if os.environ.get("RPA_AGENT_DEBUG") == "1" else logging.INFO
|
||||
logging.basicConfig(
|
||||
level=_log_level,
|
||||
format="%(asctime)s %(levelname)-7s %(name)-25s %(message)s",
|
||||
datefmt="%H:%M:%S",
|
||||
)
|
||||
|
||||
# Réduire le bruit de certaines libs
|
||||
for _noisy in ("urllib3", "requests.packages.urllib3", "PIL", "mss"):
|
||||
logging.getLogger(_noisy).setLevel(logging.WARNING)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Intervalle de polling replay (secondes)
|
||||
@@ -75,15 +87,23 @@ class AgentV1:
|
||||
self._state.set_on_stop(self.stop_session)
|
||||
|
||||
# Client serveur pour le chat et les workflows
|
||||
# Plus de RPA_SERVER_HOST : le LeaServerClient derive tout de SERVER_URL
|
||||
self._server_client = None
|
||||
if LeaServerClient is not None:
|
||||
# Forcer le token API pour éviter les 401
|
||||
# (le token est set par start.bat dans l'environnement)
|
||||
from .config import API_TOKEN as _token
|
||||
self._server_client = LeaServerClient()
|
||||
if _token and not self._server_client._api_token:
|
||||
self._server_client._api_token = _token
|
||||
logger.info("Token API forcé dans LeaServerClient")
|
||||
|
||||
# Fenetre de chat Lea (tkinter natif)
|
||||
# Le host est derive de SERVER_URL (plus de RPA_SERVER_HOST)
|
||||
server_host = (
|
||||
self._server_client.server_host
|
||||
if self._server_client is not None
|
||||
else os.getenv("RPA_SERVER_HOST", "localhost")
|
||||
else "localhost"
|
||||
)
|
||||
self._chat_window = ChatWindow(
|
||||
server_client=self._server_client,
|
||||
@@ -129,6 +149,58 @@ class AgentV1:
|
||||
time.sleep(30)
|
||||
self.storage.run_auto_cleanup()
|
||||
|
||||
def _auto_stop_loop(self):
|
||||
"""Auto-stop de l'enregistrement après MAX_SESSION_DURATION_S.
|
||||
|
||||
L'utilisateur peut oublier d'arrêter. On notifie à 50 min,
|
||||
puis on arrête automatiquement à 60 min (configurable).
|
||||
"""
|
||||
warn_before = 600 # Prévenir 10 min avant la fin
|
||||
warned = False
|
||||
|
||||
while self.running and self.session_id:
|
||||
elapsed = time.time() - self._session_start_time
|
||||
remaining = MAX_SESSION_DURATION_S - elapsed
|
||||
|
||||
# Notification 10 min avant la fin
|
||||
if not warned and remaining <= warn_before:
|
||||
warned = True
|
||||
mins = int(remaining / 60)
|
||||
logger.info(f"Auto-stop dans {mins} min")
|
||||
try:
|
||||
from .ui.notifications import NotificationManager
|
||||
NotificationManager().notify(
|
||||
"Léa",
|
||||
f"L'enregistrement s'arrêtera automatiquement dans {mins} minutes.",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Auto-stop
|
||||
if remaining <= 0:
|
||||
logger.info(
|
||||
f"Auto-stop : session {self.session_id} après "
|
||||
f"{int(elapsed)}s ({int(elapsed/60)} min)"
|
||||
)
|
||||
try:
|
||||
from .ui.notifications import NotificationManager
|
||||
NotificationManager().notify(
|
||||
"Léa",
|
||||
f"Enregistrement terminé automatiquement après "
|
||||
f"{int(elapsed/60)} minutes. Merci !",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Arrêter via l'état partagé (synchronise systray + chat)
|
||||
if self._state is not None:
|
||||
self._state.stop_recording()
|
||||
else:
|
||||
self.stop_session()
|
||||
break
|
||||
|
||||
time.sleep(30) # Vérifier toutes les 30s
|
||||
|
||||
def start_session(self, workflow_name):
|
||||
self.session_id = f"sess_{time.strftime('%Y%m%dT%H%M%S')}_{uuid.uuid4().hex[:6]}"
|
||||
self.session_dir = self.storage.get_session_dir(self.session_id)
|
||||
@@ -150,6 +222,11 @@ class AgentV1:
|
||||
# Heartbeat Contextuel (Toutes les 5s par defaut)
|
||||
threading.Thread(target=self._heartbeat_loop, daemon=True).start()
|
||||
|
||||
# Auto-stop : arrêter l'enregistrement après MAX_SESSION_DURATION_S
|
||||
# L'utilisateur peut oublier d'arrêter — on le fait automatiquement
|
||||
self._session_start_time = time.time()
|
||||
threading.Thread(target=self._auto_stop_loop, daemon=True).start()
|
||||
|
||||
# Watchdog de Commandes (GHOST Replay — legacy fichier)
|
||||
threading.Thread(target=self._command_watchdog_loop, daemon=True).start()
|
||||
|
||||
@@ -288,29 +365,42 @@ class AgentV1:
|
||||
continue
|
||||
self._last_bg_hash = img_hash
|
||||
|
||||
# Envoyer au streaming server
|
||||
# Envoyer au streaming server (via STREAMING_ENDPOINT unifié)
|
||||
headers = {"Authorization": f"Bearer {API_TOKEN}"} if API_TOKEN else {}
|
||||
with open(full_path, 'rb') as f:
|
||||
req.post(
|
||||
f"{SERVER_URL}/traces/stream/image",
|
||||
f"{STREAMING_ENDPOINT}/image",
|
||||
params={
|
||||
"session_id": bg_session,
|
||||
"shot_id": f"heartbeat_{int(time.time())}",
|
||||
"machine_id": self.machine_id,
|
||||
},
|
||||
headers=headers,
|
||||
files={"file": ("screenshot.png", f, "image/png")},
|
||||
timeout=10,
|
||||
allow_redirects=False,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"[HEARTBEAT] Erreur: {e}")
|
||||
time.sleep(5)
|
||||
|
||||
def stop_session(self):
|
||||
# Arrêter la capture et le streaming de la session d'enregistrement
|
||||
if self.captor: self.captor.stop()
|
||||
if self.streamer: self.streamer.stop()
|
||||
logger.info(f"Session {self.session_id} terminée.")
|
||||
# Sauvegarder le session_id avant de l'annuler (pour les logs)
|
||||
ended_session_id = self.session_id
|
||||
|
||||
# Reset le session_id pour que le poll replay utilise l'ID stable
|
||||
# Arrêter la capture d'abord (plus d'events entrants)
|
||||
if self.captor: self.captor.stop()
|
||||
|
||||
# Attendre que les events en cours de traitement dans _on_event_bridge
|
||||
# aient le temps d'être envoyés au streamer (capture duale + push)
|
||||
import time
|
||||
time.sleep(1.5)
|
||||
|
||||
# Maintenant arrêter le streamer (drain queue + finalize)
|
||||
if self.streamer: self.streamer.stop()
|
||||
logger.info(f"Session {ended_session_id} terminée.")
|
||||
|
||||
# Reset le session_id APRÈS le stop complet du streamer
|
||||
self.session_id = None
|
||||
|
||||
# Reset le backoff de l'executor pour reprendre le polling immédiatement
|
||||
@@ -337,6 +427,7 @@ class AgentV1:
|
||||
"""Capture périodique pour donner du contexte au stagiaire.
|
||||
Déduplication : n'envoie que si l'écran a changé.
|
||||
Tourne tant que session_id est défini (= enregistrement actif).
|
||||
Enrichi avec le titre de la fenêtre active pour contextualisation.
|
||||
"""
|
||||
while self.running and self.session_id:
|
||||
try:
|
||||
@@ -347,7 +438,17 @@ class AgentV1:
|
||||
if img_hash != self._last_heartbeat_hash:
|
||||
self._last_heartbeat_hash = img_hash
|
||||
self.streamer.push_image(full_path, f"heartbeat_{int(time.time())}")
|
||||
self.streamer.push_event({"type": "heartbeat", "image": full_path, "timestamp": time.time(), "machine_id": self.machine_id})
|
||||
heartbeat_event = {
|
||||
"type": "heartbeat",
|
||||
"image": full_path,
|
||||
"timestamp": time.time(),
|
||||
"machine_id": self.machine_id,
|
||||
}
|
||||
# Ajouter le titre de la fenêtre active (léger, pas de crop)
|
||||
window_title = self.vision.get_active_window_title()
|
||||
if window_title:
|
||||
heartbeat_event["active_window_title"] = window_title
|
||||
self.streamer.push_event(heartbeat_event)
|
||||
except Exception as e:
|
||||
logger.error(f"Heartbeat error: {e}")
|
||||
time.sleep(5)
|
||||
@@ -382,20 +483,33 @@ class AgentV1:
|
||||
event["screenshot_context"] = full_path
|
||||
self.streamer.push_image(full_path, f"focus_{int(time.time())}")
|
||||
|
||||
# 🔴 Capture Interactive (Dual)
|
||||
# Capture Interactive (Dual + Fenêtre active)
|
||||
if event["type"] in ["mouse_click", "key_combo"]:
|
||||
self.shot_counter += 1
|
||||
shot_id = f"shot_{self.shot_counter:04d}"
|
||||
|
||||
|
||||
pos = event.get("pos", (0, 0))
|
||||
capture_info = self.vision.capture_dual(pos[0], pos[1], shot_id)
|
||||
|
||||
|
||||
event["screenshot_id"] = shot_id
|
||||
event["vision_info"] = capture_info
|
||||
|
||||
|
||||
# Enrichir l'event avec les métadonnées de la fenêtre active
|
||||
# (titre, rect, coordonnées clic relatives, taille fenêtre)
|
||||
window_capture = capture_info.get("window_capture")
|
||||
if window_capture:
|
||||
event["window_capture"] = {
|
||||
"title": window_capture.get("window_title", ""),
|
||||
"app_name": window_capture.get("app_name", ""),
|
||||
"rect": window_capture.get("window_rect"),
|
||||
"click_relative": window_capture.get("click_in_window"),
|
||||
"window_size": window_capture.get("window_size"),
|
||||
"click_inside_window": window_capture.get("click_inside_window", True),
|
||||
}
|
||||
|
||||
self._stream_capture_info(capture_info, shot_id)
|
||||
|
||||
# 🕒 POST-ACTION : Capture du résultat après 1s (pour voir le résultat du clic)
|
||||
|
||||
# POST-ACTION : Capture du résultat après 1s (pour voir le résultat du clic)
|
||||
threading.Timer(1.0, self._capture_result, args=(shot_id,)).start()
|
||||
|
||||
self.ui.update_stats(self.shot_counter)
|
||||
@@ -415,6 +529,12 @@ class AgentV1:
|
||||
self.streamer.push_image(capture_info["full"], f"{shot_id}_full")
|
||||
if "crop" in capture_info:
|
||||
self.streamer.push_image(capture_info["crop"], f"{shot_id}_crop")
|
||||
# Streamer l'image de la fenêtre active si disponible
|
||||
window_capture = capture_info.get("window_capture")
|
||||
if window_capture and "window_image" in window_capture:
|
||||
self.streamer.push_image(
|
||||
window_capture["window_image"], f"{shot_id}_window"
|
||||
)
|
||||
|
||||
def run(self):
|
||||
self.ui.run()
|
||||
|
||||
380
agent_v0/agent_v1/network/persistent_buffer.py
Normal file
380
agent_v0/agent_v1/network/persistent_buffer.py
Normal file
@@ -0,0 +1,380 @@
|
||||
# agent_v1/network/persistent_buffer.py
|
||||
"""
|
||||
Buffer persistant SQLite pour les événements/images qui n'ont pas pu être envoyés.
|
||||
|
||||
Résout le bloquant AI Act Article 12 : en cas de coupure serveur ou de queue pleine,
|
||||
les événements prioritaires (click, key, action, screenshot) sont persistés sur disque
|
||||
au lieu d'être silencieusement perdus. Ils sont rejoués à la reconnexion.
|
||||
|
||||
Caractéristiques :
|
||||
- SQLite fichier unique (agent_v1/buffer/pending_events.db), thread-safe
|
||||
- Async : les écritures se font depuis un thread daemon, jamais bloquant
|
||||
- Quota : compteur d'attempts par item, abandon après MAX_ATTEMPTS
|
||||
- Robustesse : un fichier corrompu est renommé et recréé vide
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sqlite3
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Nombre max de tentatives avant abandon définitif d'un item
|
||||
MAX_ATTEMPTS = 10
|
||||
|
||||
# Taille max du buffer en items pour éviter une explosion disque
|
||||
# (typiquement : 1000 events + 1000 images = quelques Mo de SQLite)
|
||||
MAX_BUFFER_ITEMS = 2000
|
||||
|
||||
|
||||
class PersistentBuffer:
|
||||
"""Buffer SQLite pour événements/images en attente d'envoi.
|
||||
|
||||
Deux tables :
|
||||
- pending_events (id, session_id, payload_json, attempts, created_at)
|
||||
- pending_images (id, session_id, shot_id, image_path, attempts, created_at)
|
||||
|
||||
Usage :
|
||||
buf = PersistentBuffer(base_dir / "buffer")
|
||||
buf.add_event(session_id, event_dict) # persiste un event
|
||||
buf.add_image(session_id, image_path, shot_id) # persiste une image
|
||||
for row in buf.drain_events(): # itère sur les events
|
||||
if envoyer(row): buf.delete_event(row["id"])
|
||||
else: buf.mark_attempt(row["id"], "event")
|
||||
"""
|
||||
|
||||
def __init__(self, buffer_dir: Path):
|
||||
self.buffer_dir = Path(buffer_dir)
|
||||
self.buffer_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.db_path = self.buffer_dir / "pending_events.db"
|
||||
self._lock = threading.Lock()
|
||||
self._init_db()
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Initialisation / gestion corruption
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
def _init_db(self):
|
||||
"""Crée les tables si elles n'existent pas.
|
||||
|
||||
En cas de fichier corrompu, on le renomme en .corrupted et on recrée
|
||||
un buffer vide. On préfère perdre un buffer non lisible plutôt que
|
||||
de crasher l'agent au démarrage.
|
||||
"""
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS pending_events (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
session_id TEXT NOT NULL,
|
||||
payload TEXT NOT NULL,
|
||||
attempts INTEGER NOT NULL DEFAULT 0,
|
||||
created_at REAL NOT NULL
|
||||
)
|
||||
"""
|
||||
)
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS pending_images (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
session_id TEXT NOT NULL,
|
||||
shot_id TEXT NOT NULL,
|
||||
image_path TEXT NOT NULL,
|
||||
attempts INTEGER NOT NULL DEFAULT 0,
|
||||
created_at REAL NOT NULL
|
||||
)
|
||||
"""
|
||||
)
|
||||
conn.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_events_created "
|
||||
"ON pending_events(created_at)"
|
||||
)
|
||||
conn.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_images_created "
|
||||
"ON pending_images(created_at)"
|
||||
)
|
||||
conn.commit()
|
||||
except sqlite3.DatabaseError as e:
|
||||
logger.warning(
|
||||
f"Buffer SQLite corrompu ({e}) — renommage en .corrupted "
|
||||
f"et recréation d'un buffer vide"
|
||||
)
|
||||
try:
|
||||
corrupted = self.db_path.with_suffix(
|
||||
f".corrupted.{int(time.time())}"
|
||||
)
|
||||
os.rename(self.db_path, corrupted)
|
||||
except OSError:
|
||||
# Si le rename échoue, on tente la suppression directe
|
||||
try:
|
||||
os.remove(self.db_path)
|
||||
except OSError:
|
||||
pass
|
||||
# Nouvelle tentative (table vide)
|
||||
with self._connect() as conn:
|
||||
conn.execute(
|
||||
"CREATE TABLE IF NOT EXISTS pending_events ("
|
||||
"id INTEGER PRIMARY KEY AUTOINCREMENT, "
|
||||
"session_id TEXT NOT NULL, payload TEXT NOT NULL, "
|
||||
"attempts INTEGER NOT NULL DEFAULT 0, "
|
||||
"created_at REAL NOT NULL)"
|
||||
)
|
||||
conn.execute(
|
||||
"CREATE TABLE IF NOT EXISTS pending_images ("
|
||||
"id INTEGER PRIMARY KEY AUTOINCREMENT, "
|
||||
"session_id TEXT NOT NULL, shot_id TEXT NOT NULL, "
|
||||
"image_path TEXT NOT NULL, "
|
||||
"attempts INTEGER NOT NULL DEFAULT 0, "
|
||||
"created_at REAL NOT NULL)"
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
def _connect(self) -> sqlite3.Connection:
|
||||
"""Connexion SQLite en mode WAL (meilleure concurrence)."""
|
||||
conn = sqlite3.connect(
|
||||
str(self.db_path),
|
||||
timeout=5.0,
|
||||
check_same_thread=False,
|
||||
isolation_level=None, # autocommit — on gère les transactions
|
||||
)
|
||||
try:
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
conn.execute("PRAGMA synchronous=NORMAL")
|
||||
except sqlite3.DatabaseError:
|
||||
pass
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Écriture — persiste un item
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
def add_event(self, session_id: str, event: dict) -> bool:
|
||||
"""Persiste un événement. Retourne True si écrit, False sinon.
|
||||
|
||||
Si le buffer dépasse MAX_BUFFER_ITEMS, on drop l'insertion (plutôt
|
||||
que saturer le disque). On log un warning au premier dépassement.
|
||||
"""
|
||||
with self._lock:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
count = conn.execute(
|
||||
"SELECT COUNT(*) FROM pending_events"
|
||||
).fetchone()[0]
|
||||
if count >= MAX_BUFFER_ITEMS:
|
||||
logger.warning(
|
||||
f"Buffer persistant saturé ({count} events) "
|
||||
f"— event droppé"
|
||||
)
|
||||
return False
|
||||
conn.execute(
|
||||
"INSERT INTO pending_events "
|
||||
"(session_id, payload, attempts, created_at) "
|
||||
"VALUES (?, ?, 0, ?)",
|
||||
(session_id, json.dumps(event), time.time()),
|
||||
)
|
||||
return True
|
||||
except (sqlite3.DatabaseError, TypeError, ValueError) as e:
|
||||
logger.error(f"Buffer add_event échoué : {e}")
|
||||
return False
|
||||
|
||||
def add_image(
|
||||
self, session_id: str, image_path: str, shot_id: str
|
||||
) -> bool:
|
||||
"""Persiste une référence image (chemin fichier + shot_id).
|
||||
|
||||
On ne stocke PAS les bytes de l'image (risque de faire gonfler la DB) :
|
||||
uniquement le chemin. Donc l'image doit rester présente sur disque
|
||||
tant qu'elle n'a pas été envoyée avec succès au serveur.
|
||||
"""
|
||||
with self._lock:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
count = conn.execute(
|
||||
"SELECT COUNT(*) FROM pending_images"
|
||||
).fetchone()[0]
|
||||
if count >= MAX_BUFFER_ITEMS:
|
||||
logger.warning(
|
||||
f"Buffer persistant saturé ({count} images) "
|
||||
f"— image droppée"
|
||||
)
|
||||
return False
|
||||
conn.execute(
|
||||
"INSERT INTO pending_images "
|
||||
"(session_id, shot_id, image_path, attempts, created_at) "
|
||||
"VALUES (?, ?, ?, 0, ?)",
|
||||
(session_id, shot_id, image_path, time.time()),
|
||||
)
|
||||
return True
|
||||
except sqlite3.DatabaseError as e:
|
||||
logger.error(f"Buffer add_image échoué : {e}")
|
||||
return False
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Lecture — drain dans l'ordre chronologique
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
def drain_events(self, limit: int = 100) -> list:
|
||||
"""Retourne les events en attente, triés par date de création."""
|
||||
with self._lock:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
rows = conn.execute(
|
||||
"SELECT id, session_id, payload, attempts "
|
||||
"FROM pending_events "
|
||||
"ORDER BY created_at ASC LIMIT ?",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
except sqlite3.DatabaseError as e:
|
||||
logger.error(f"Buffer drain_events échoué : {e}")
|
||||
return []
|
||||
|
||||
def drain_images(self, limit: int = 50) -> list:
|
||||
"""Retourne les images en attente, triées par date de création."""
|
||||
with self._lock:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
rows = conn.execute(
|
||||
"SELECT id, session_id, shot_id, image_path, attempts "
|
||||
"FROM pending_images "
|
||||
"ORDER BY created_at ASC LIMIT ?",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
except sqlite3.DatabaseError as e:
|
||||
logger.error(f"Buffer drain_images échoué : {e}")
|
||||
return []
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Marquage — succès, échec, abandon
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
def delete_event(self, row_id: int):
|
||||
"""Supprime un event après envoi réussi."""
|
||||
with self._lock:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
conn.execute(
|
||||
"DELETE FROM pending_events WHERE id = ?", (row_id,)
|
||||
)
|
||||
except sqlite3.DatabaseError as e:
|
||||
logger.error(f"Buffer delete_event échoué : {e}")
|
||||
|
||||
def delete_image(self, row_id: int):
|
||||
"""Supprime une image après envoi réussi."""
|
||||
with self._lock:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
conn.execute(
|
||||
"DELETE FROM pending_images WHERE id = ?", (row_id,)
|
||||
)
|
||||
except sqlite3.DatabaseError as e:
|
||||
logger.error(f"Buffer delete_image échoué : {e}")
|
||||
|
||||
def increment_attempts(self, row_id: int, kind: str) -> int:
|
||||
"""Incrémente le compteur d'attempts. Retourne la nouvelle valeur.
|
||||
|
||||
kind : "event" ou "image"
|
||||
"""
|
||||
table = "pending_events" if kind == "event" else "pending_images"
|
||||
with self._lock:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
conn.execute(
|
||||
f"UPDATE {table} SET attempts = attempts + 1 "
|
||||
"WHERE id = ?",
|
||||
(row_id,),
|
||||
)
|
||||
row = conn.execute(
|
||||
f"SELECT attempts FROM {table} WHERE id = ?", (row_id,)
|
||||
).fetchone()
|
||||
return int(row["attempts"]) if row else MAX_ATTEMPTS
|
||||
except sqlite3.DatabaseError as e:
|
||||
logger.error(f"Buffer increment_attempts échoué : {e}")
|
||||
return MAX_ATTEMPTS
|
||||
|
||||
def abandon_exceeded(self) -> int:
|
||||
"""Supprime les items ayant dépassé MAX_ATTEMPTS.
|
||||
|
||||
Un item abandonné est logué en erreur (trace AI Act) puis supprimé.
|
||||
Retourne le nombre d'items abandonnés.
|
||||
"""
|
||||
abandoned = 0
|
||||
with self._lock:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
# Events abandonnés
|
||||
rows = conn.execute(
|
||||
"SELECT id, session_id, payload FROM pending_events "
|
||||
"WHERE attempts >= ?",
|
||||
(MAX_ATTEMPTS,),
|
||||
).fetchall()
|
||||
for r in rows:
|
||||
try:
|
||||
event_type = json.loads(r["payload"]).get(
|
||||
"type", "?"
|
||||
)
|
||||
except (ValueError, TypeError):
|
||||
event_type = "?"
|
||||
logger.error(
|
||||
f"Buffer : event abandonné après {MAX_ATTEMPTS} "
|
||||
f"tentatives — session={r['session_id']} "
|
||||
f"type={event_type}"
|
||||
)
|
||||
abandoned += 1
|
||||
conn.execute(
|
||||
"DELETE FROM pending_events WHERE attempts >= ?",
|
||||
(MAX_ATTEMPTS,),
|
||||
)
|
||||
|
||||
# Images abandonnées
|
||||
rows = conn.execute(
|
||||
"SELECT id, session_id, shot_id FROM pending_images "
|
||||
"WHERE attempts >= ?",
|
||||
(MAX_ATTEMPTS,),
|
||||
).fetchall()
|
||||
for r in rows:
|
||||
logger.error(
|
||||
f"Buffer : image abandonnée après {MAX_ATTEMPTS} "
|
||||
f"tentatives — session={r['session_id']} "
|
||||
f"shot_id={r['shot_id']}"
|
||||
)
|
||||
abandoned += 1
|
||||
conn.execute(
|
||||
"DELETE FROM pending_images WHERE attempts >= ?",
|
||||
(MAX_ATTEMPTS,),
|
||||
)
|
||||
except sqlite3.DatabaseError as e:
|
||||
logger.error(f"Buffer abandon_exceeded échoué : {e}")
|
||||
return abandoned
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Introspection
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
def counts(self) -> dict:
|
||||
"""Retourne (events_count, images_count) pour diagnostic."""
|
||||
with self._lock:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
ev = conn.execute(
|
||||
"SELECT COUNT(*) FROM pending_events"
|
||||
).fetchone()[0]
|
||||
im = conn.execute(
|
||||
"SELECT COUNT(*) FROM pending_images"
|
||||
).fetchone()[0]
|
||||
return {"events": ev, "images": im}
|
||||
except sqlite3.DatabaseError:
|
||||
return {"events": 0, "images": 0}
|
||||
|
||||
def is_empty(self) -> bool:
|
||||
c = self.counts()
|
||||
return c["events"] == 0 and c["images"] == 0
|
||||
@@ -14,10 +14,19 @@ Robustesse (P0-2) :
|
||||
- Health-check périodique (30s) pour recovery du flag _server_available
|
||||
- Compression JPEG qualité 85 pour les images (réduction ~5-10x)
|
||||
- Backpressure : queue bornée (maxsize=100), drop des heartbeat si pleine
|
||||
|
||||
Conformité AI Act (Article 12 — journalisation automatique) :
|
||||
- Purge après ACK : les screenshots locaux sont supprimés après HTTP 200
|
||||
du serveur (par défaut). Le serveur devient la source de vérité.
|
||||
- Buffer persistant : les events/images prioritaires non envoyés sont
|
||||
persistés dans un SQLite local (agent_v1/buffer/pending_events.db)
|
||||
et rejoués au démarrage et à la reconnexion.
|
||||
"""
|
||||
|
||||
import enum
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
import queue
|
||||
import threading
|
||||
import time
|
||||
@@ -25,7 +34,18 @@ import time
|
||||
import requests
|
||||
from PIL import Image
|
||||
|
||||
from ..config import API_TOKEN, STREAMING_ENDPOINT
|
||||
from ..config import API_TOKEN, BASE_DIR, STREAMING_ENDPOINT
|
||||
from .persistent_buffer import MAX_ATTEMPTS, PersistentBuffer
|
||||
|
||||
|
||||
# Fix P0-E : résultat d'envoi d'image trivaleur (succès / échec réseau / fichier
|
||||
# disparu). On ne doit PAS considérer un FileNotFoundError comme un succès
|
||||
# HTTP 200 — sinon le buffer SQLite supprime l'entrée alors que le serveur n'a
|
||||
# jamais reçu l'image (perte silencieuse).
|
||||
class ImageSendResult(enum.Enum):
|
||||
OK = "ok" # HTTP 200, serveur a accusé réception
|
||||
FAILED = "failed" # Erreur réseau/serveur récupérable (retry OK)
|
||||
FILE_GONE = "file_gone" # Fichier local introuvable (abandon, pas retry)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -45,6 +65,20 @@ QUEUE_MAX_SIZE = 100
|
||||
# Types d'événements à ne jamais dropper
|
||||
PRIORITY_EVENT_TYPES = {"click", "key", "scroll", "action", "screenshot"}
|
||||
|
||||
# Purge locale après ACK serveur (Partie A de l'audit)
|
||||
# Activé par défaut : le serveur conserve déjà les screenshots 180 jours
|
||||
# (conformité AI Act Article 12). Désactivable via RPA_PURGE_AFTER_ACK=0
|
||||
# pour debugging local.
|
||||
PURGE_AFTER_ACK = os.environ.get("RPA_PURGE_AFTER_ACK", "1").lower() in (
|
||||
"1", "true", "yes",
|
||||
)
|
||||
|
||||
# Chemin du buffer persistant (Partie B de l'audit)
|
||||
BUFFER_DIR = BASE_DIR / "buffer"
|
||||
|
||||
# Intervalle entre deux tentatives de drain du buffer (secondes)
|
||||
BUFFER_DRAIN_INTERVAL_S = 15
|
||||
|
||||
|
||||
class TraceStreamer:
|
||||
def __init__(self, session_id: str, machine_id: str = "default"):
|
||||
@@ -54,8 +88,20 @@ class TraceStreamer:
|
||||
self.running = False
|
||||
self._thread = None
|
||||
self._health_thread = None
|
||||
self._drain_thread = None
|
||||
self._server_available = True # Désactivé après trop d'échecs
|
||||
|
||||
# Buffer persistant — partagé entre sessions (survit au redémarrage)
|
||||
# Initialisé paresseusement pour ne pas payer le coût SQLite en dehors
|
||||
# d'un streaming actif.
|
||||
self._buffer: PersistentBuffer | None = None
|
||||
|
||||
def _get_buffer(self) -> PersistentBuffer:
|
||||
"""Retourne le buffer persistant, en l'initialisant au besoin."""
|
||||
if self._buffer is None:
|
||||
self._buffer = PersistentBuffer(BUFFER_DIR)
|
||||
return self._buffer
|
||||
|
||||
@staticmethod
|
||||
def _auth_headers() -> dict:
|
||||
"""Headers d'authentification Bearer pour les requêtes API."""
|
||||
@@ -75,6 +121,11 @@ class TraceStreamer:
|
||||
target=self._health_check_loop, daemon=True
|
||||
)
|
||||
self._health_thread.start()
|
||||
# Thread de drain du buffer persistant (rejoue les items en attente)
|
||||
self._drain_thread = threading.Thread(
|
||||
target=self._buffer_drain_loop, daemon=True
|
||||
)
|
||||
self._drain_thread.start()
|
||||
logger.info(f"Streamer pour {self.session_id} démarré")
|
||||
|
||||
def stop(self):
|
||||
@@ -99,6 +150,9 @@ class TraceStreamer:
|
||||
if self._health_thread:
|
||||
self._health_thread.join(timeout=2.0)
|
||||
|
||||
if self._drain_thread:
|
||||
self._drain_thread.join(timeout=2.0)
|
||||
|
||||
self._finalize_session()
|
||||
logger.info(f"Streamer pour {self.session_id} arrêté")
|
||||
|
||||
@@ -126,11 +180,21 @@ class TraceStreamer:
|
||||
|
||||
Quand la queue est pleine :
|
||||
- Les événements prioritaires (click, key, action, screenshot) sont
|
||||
ajoutés en bloquant brièvement (0.5s)
|
||||
- Les heartbeat sont silencieusement droppés
|
||||
ajoutés en bloquant brièvement (0.5s). Si toujours pleine → persistés
|
||||
dans le buffer SQLite pour rejeu ultérieur.
|
||||
- Les heartbeat sont silencieusement droppés.
|
||||
- Si le serveur est marqué indisponible, on persiste immédiatement les
|
||||
items prioritaires (évite de remplir la queue inutilement).
|
||||
"""
|
||||
is_priority = self._is_priority_item(item_type, data)
|
||||
|
||||
# Serveur indisponible + item prioritaire → on persiste directement
|
||||
# sans polluer la queue RAM (qui ne sera jamais vidée tant que le
|
||||
# serveur est down).
|
||||
if is_priority and not self._server_available:
|
||||
self._persist_to_buffer(item_type, data)
|
||||
return
|
||||
|
||||
try:
|
||||
self.queue.put_nowait((item_type, data))
|
||||
except queue.Full:
|
||||
@@ -139,10 +203,18 @@ class TraceStreamer:
|
||||
try:
|
||||
self.queue.put((item_type, data), timeout=0.5)
|
||||
except queue.Full:
|
||||
logger.warning(
|
||||
f"Queue pleine — événement prioritaire droppé "
|
||||
f"(type={item_type})"
|
||||
)
|
||||
# Persistance disque (ne JAMAIS dropper un prioritaire)
|
||||
persisted = self._persist_to_buffer(item_type, data)
|
||||
if persisted:
|
||||
logger.warning(
|
||||
f"Queue pleine — événement prioritaire persisté "
|
||||
f"sur disque (type={item_type})"
|
||||
)
|
||||
else:
|
||||
logger.error(
|
||||
f"Queue pleine ET buffer saturé — événement "
|
||||
f"prioritaire perdu (type={item_type})"
|
||||
)
|
||||
else:
|
||||
# Heartbeat ou événement non-critique : on drop silencieusement
|
||||
logger.debug(
|
||||
@@ -163,6 +235,23 @@ class TraceStreamer:
|
||||
return event_type in PRIORITY_EVENT_TYPES
|
||||
return False
|
||||
|
||||
def _persist_to_buffer(self, item_type: str, data) -> bool:
|
||||
"""Persiste un item dans le buffer SQLite. Retourne True si OK.
|
||||
|
||||
Utilisé quand la queue est pleine ou le serveur indisponible.
|
||||
"""
|
||||
try:
|
||||
buf = self._get_buffer()
|
||||
if item_type == "event" and isinstance(data, dict):
|
||||
return buf.add_event(self.session_id, data)
|
||||
if item_type == "image":
|
||||
path, shot_id = data
|
||||
return buf.add_image(self.session_id, path, shot_id)
|
||||
except Exception as e:
|
||||
# On n'arrête jamais l'agent si le buffer échoue
|
||||
logger.error(f"Persistance buffer échouée : {e}")
|
||||
return False
|
||||
|
||||
# =========================================================================
|
||||
# Boucle d'envoi
|
||||
# =========================================================================
|
||||
@@ -174,16 +263,36 @@ class TraceStreamer:
|
||||
try:
|
||||
item_type, data = self.queue.get(timeout=0.5)
|
||||
success = False
|
||||
is_file_gone = False
|
||||
if item_type == "event":
|
||||
success = self._send_with_retry(self._send_event, data)
|
||||
elif item_type == "image":
|
||||
success = self._send_with_retry(self._send_image, *data)
|
||||
result = self._send_with_retry(self._send_image, *data)
|
||||
# Fix P0-E : distinguer FILE_GONE du vrai succès HTTP.
|
||||
if result is ImageSendResult.OK:
|
||||
success = True
|
||||
elif result is ImageSendResult.FILE_GONE:
|
||||
# Fichier disparu : pas de retry, pas de persistance
|
||||
# (on ne peut plus le renvoyer). On considère l'item
|
||||
# comme traité sans comptabiliser un succès réseau.
|
||||
is_file_gone = True
|
||||
success = False
|
||||
else:
|
||||
success = False
|
||||
self.queue.task_done()
|
||||
|
||||
if success:
|
||||
consecutive_failures = 0
|
||||
elif is_file_gone:
|
||||
# Fichier introuvable — déjà logué ERROR dans _send_image.
|
||||
# On ne persiste PAS dans le buffer (retry voué à échouer).
|
||||
consecutive_failures = 0
|
||||
else:
|
||||
consecutive_failures += 1
|
||||
# Après 3 retries infructueux, si l'item est prioritaire,
|
||||
# on le persiste pour ne pas le perdre définitivement.
|
||||
if self._is_priority_item(item_type, data):
|
||||
self._persist_to_buffer(item_type, data)
|
||||
if consecutive_failures >= 10:
|
||||
logger.warning(
|
||||
"10 échecs consécutifs — serveur marqué indisponible"
|
||||
@@ -200,15 +309,22 @@ class TraceStreamer:
|
||||
# Retry avec backoff exponentiel
|
||||
# =========================================================================
|
||||
|
||||
def _send_with_retry(self, send_fn, *args) -> bool:
|
||||
def _send_with_retry(self, send_fn, *args):
|
||||
"""Tente l'envoi avec retry et backoff exponentiel.
|
||||
|
||||
3 tentatives max avec délais de 1s, 2s, 4s entre chaque.
|
||||
Retourne True si l'envoi a réussi, False sinon.
|
||||
Retourne :
|
||||
- True / ImageSendResult.OK si l'envoi a réussi
|
||||
- ImageSendResult.FILE_GONE (images uniquement) — pas de retry
|
||||
- False / ImageSendResult.FAILED sinon
|
||||
"""
|
||||
# Première tentative (sans délai)
|
||||
if send_fn(*args):
|
||||
return True
|
||||
first = send_fn(*args)
|
||||
if first is ImageSendResult.OK or first is True:
|
||||
return first
|
||||
# Fix P0-E : FILE_GONE → pas de retry, l'erreur est permanente.
|
||||
if first is ImageSendResult.FILE_GONE:
|
||||
return first
|
||||
|
||||
# Retries avec backoff
|
||||
for attempt, delay in enumerate(RETRY_DELAYS, start=1):
|
||||
@@ -219,9 +335,13 @@ class TraceStreamer:
|
||||
f"Retry {attempt}/{MAX_RETRIES} dans {delay}s..."
|
||||
)
|
||||
time.sleep(delay)
|
||||
if send_fn(*args):
|
||||
result = send_fn(*args)
|
||||
if result is ImageSendResult.OK or result is True:
|
||||
logger.debug(f"Retry {attempt} réussi")
|
||||
return True
|
||||
return result
|
||||
# FILE_GONE pendant un retry — idem, on arrête
|
||||
if result is ImageSendResult.FILE_GONE:
|
||||
return result
|
||||
|
||||
logger.debug(f"Envoi échoué après {MAX_RETRIES} retries")
|
||||
return False
|
||||
@@ -260,6 +380,115 @@ class TraceStreamer:
|
||||
except Exception:
|
||||
logger.debug("Health-check échoué — serveur toujours indisponible")
|
||||
|
||||
# =========================================================================
|
||||
# Drain du buffer persistant (Partie B)
|
||||
# =========================================================================
|
||||
|
||||
def _buffer_drain_loop(self):
|
||||
"""Rejoue les items persistés en arrière-plan.
|
||||
|
||||
Tourne tant que self.running. Essaie de drainer le buffer toutes les
|
||||
BUFFER_DRAIN_INTERVAL_S secondes, mais seulement si :
|
||||
- le serveur est disponible,
|
||||
- il y a effectivement des items en attente.
|
||||
|
||||
Au premier passage (démarrage agent), on draine immédiatement pour
|
||||
rejouer tout ce qui a été persisté lors de la session précédente.
|
||||
"""
|
||||
# Au démarrage : drain immédiat (pas d'attente)
|
||||
first_pass = True
|
||||
while self.running:
|
||||
if not first_pass:
|
||||
time.sleep(BUFFER_DRAIN_INTERVAL_S)
|
||||
if not self.running:
|
||||
break
|
||||
first_pass = False
|
||||
|
||||
if not self._server_available:
|
||||
continue
|
||||
|
||||
try:
|
||||
buf = self._get_buffer()
|
||||
# Abandonner d'abord les items exceeded (évite de les retenter)
|
||||
abandoned = buf.abandon_exceeded()
|
||||
if abandoned:
|
||||
logger.warning(
|
||||
f"Buffer : {abandoned} items abandonnés "
|
||||
f"après {MAX_ATTEMPTS} tentatives"
|
||||
)
|
||||
|
||||
counts = buf.counts()
|
||||
if counts["events"] == 0 and counts["images"] == 0:
|
||||
continue
|
||||
|
||||
logger.info(
|
||||
f"Buffer drain : {counts['events']} events, "
|
||||
f"{counts['images']} images en attente — rejeu"
|
||||
)
|
||||
self._drain_buffer_once(buf)
|
||||
except Exception as e:
|
||||
logger.error(f"Buffer drain loop échoué : {e}")
|
||||
|
||||
def _drain_buffer_once(self, buf: PersistentBuffer):
|
||||
"""Une passe de drain : envoie ce qui peut l'être, incrémente le reste.
|
||||
|
||||
On arrête dès qu'un envoi échoue (serveur probablement down).
|
||||
"""
|
||||
# Events d'abord (plus légers, priorité métier AI Act)
|
||||
for row in buf.drain_events(limit=50):
|
||||
if not self._server_available:
|
||||
return
|
||||
try:
|
||||
import json as _json
|
||||
event = _json.loads(row["payload"])
|
||||
except (ValueError, TypeError):
|
||||
logger.error(
|
||||
f"Buffer : payload event #{row['id']} corrompu, suppression"
|
||||
)
|
||||
buf.delete_event(row["id"])
|
||||
continue
|
||||
if self._send_event(event):
|
||||
buf.delete_event(row["id"])
|
||||
else:
|
||||
buf.increment_attempts(row["id"], "event")
|
||||
# Serveur répond mal — on arrête la passe
|
||||
return
|
||||
|
||||
# Puis images
|
||||
for row in buf.drain_images(limit=20):
|
||||
if not self._server_available:
|
||||
return
|
||||
image_path = row["image_path"]
|
||||
shot_id = row["shot_id"]
|
||||
if not os.path.exists(image_path):
|
||||
# Fichier local disparu (purge, clean-up) — on abandonne.
|
||||
# Fix P0-E : log ERROR (pas warning) — c'est une perte de donnée.
|
||||
logger.error(
|
||||
f"Buffer : image #{row['id']} introuvable sur disque "
|
||||
f"({image_path}) — entrée abandonnée (le serveur n'a "
|
||||
f"jamais reçu cette image, session={row['session_id']}, "
|
||||
f"shot={shot_id})"
|
||||
)
|
||||
buf.delete_image(row["id"])
|
||||
continue
|
||||
result = self._send_image(image_path, shot_id)
|
||||
if result is ImageSendResult.OK or result is True:
|
||||
buf.delete_image(row["id"])
|
||||
elif result is ImageSendResult.FILE_GONE:
|
||||
# Fix P0-E : fichier disparu pendant l'envoi.
|
||||
# Ce n'est PAS un succès HTTP — ne pas considérer comme tel.
|
||||
# On supprime néanmoins l'entrée (retry voué à échouer)
|
||||
# mais avec un log ERROR explicite.
|
||||
logger.error(
|
||||
f"Buffer : image #{row['id']} disparue pendant l'envoi "
|
||||
f"({image_path}) — entrée abandonnée, pas de retry "
|
||||
f"(session={row['session_id']}, shot={shot_id})"
|
||||
)
|
||||
buf.delete_image(row["id"])
|
||||
else:
|
||||
buf.increment_attempts(row["id"], "image")
|
||||
return
|
||||
|
||||
# =========================================================================
|
||||
# Compression JPEG
|
||||
# =========================================================================
|
||||
@@ -287,6 +516,56 @@ class TraceStreamer:
|
||||
logger.warning(f"Compression JPEG échouée, envoi PNG brut: {e}")
|
||||
return None, None, None
|
||||
|
||||
# =========================================================================
|
||||
# Purge locale après ACK (Partie A)
|
||||
# =========================================================================
|
||||
|
||||
@staticmethod
|
||||
def _purge_local_image(path: str):
|
||||
"""Supprime un screenshot local après ACK 200 du serveur.
|
||||
|
||||
Ne crashe JAMAIS si le fichier est verrouillé (cas Windows) ou
|
||||
déjà supprimé : on log en debug et on continue. L'auto-cleanup
|
||||
de SessionStorage repassera plus tard.
|
||||
"""
|
||||
if not PURGE_AFTER_ACK:
|
||||
return
|
||||
try:
|
||||
os.remove(path)
|
||||
logger.debug(f"Screenshot local purgé après ACK : {path}")
|
||||
except FileNotFoundError:
|
||||
# Déjà supprimé ou chemin invalide — silencieux
|
||||
pass
|
||||
except PermissionError as e:
|
||||
# Windows verrouille parfois les fichiers (antivirus, indexation...)
|
||||
logger.debug(
|
||||
f"Purge différée (fichier verrouillé) : {path} — {e}"
|
||||
)
|
||||
except OSError as e:
|
||||
logger.debug(f"Purge échouée : {path} — {e}")
|
||||
|
||||
# =========================================================================
|
||||
# Protection redirect POST→GET (INC-7)
|
||||
# =========================================================================
|
||||
|
||||
@staticmethod
|
||||
def _check_redirect(resp, url: str):
|
||||
"""Detecter et logger une redirection sur un POST.
|
||||
|
||||
La lib requests transforme un POST en GET sur 301/302 (RFC 7231).
|
||||
Avec allow_redirects=False, on recoit le 301/302 directement.
|
||||
On log un WARNING explicite pour que l'admin corrige l'URL.
|
||||
"""
|
||||
if resp.status_code in (301, 302, 307, 308):
|
||||
location = resp.headers.get("Location", "?")
|
||||
logger.warning(
|
||||
f"Redirection {resp.status_code} detectee sur POST {url} "
|
||||
f"→ {location}. Verifiez que RPA_SERVER_URL utilise "
|
||||
f"https:// si le serveur redirige."
|
||||
)
|
||||
return True
|
||||
return False
|
||||
|
||||
# =========================================================================
|
||||
# Envois HTTP
|
||||
# =========================================================================
|
||||
@@ -294,15 +573,20 @@ class TraceStreamer:
|
||||
def _register_session(self):
|
||||
"""Enregistrer la session auprès du serveur (avec identifiant machine)."""
|
||||
try:
|
||||
url = f"{STREAMING_ENDPOINT}/register"
|
||||
resp = requests.post(
|
||||
f"{STREAMING_ENDPOINT}/register",
|
||||
url,
|
||||
params={
|
||||
"session_id": self.session_id,
|
||||
"machine_id": self.machine_id,
|
||||
},
|
||||
headers=self._auth_headers(),
|
||||
timeout=3,
|
||||
allow_redirects=False,
|
||||
)
|
||||
if self._check_redirect(resp, url):
|
||||
logger.warning("Enregistrement session échoué (redirect)")
|
||||
return
|
||||
if resp.ok:
|
||||
logger.info(
|
||||
f"Session {self.session_id} enregistrée sur le serveur "
|
||||
@@ -322,28 +606,32 @@ class TraceStreamer:
|
||||
C'est la dernière chance de sauver les données de la session.
|
||||
"""
|
||||
try:
|
||||
url = f"{STREAMING_ENDPOINT}/finalize"
|
||||
resp = requests.post(
|
||||
f"{STREAMING_ENDPOINT}/finalize",
|
||||
url,
|
||||
params={
|
||||
"session_id": self.session_id,
|
||||
"machine_id": self.machine_id,
|
||||
},
|
||||
headers=self._auth_headers(),
|
||||
timeout=30, # Le build workflow peut prendre du temps
|
||||
allow_redirects=False,
|
||||
)
|
||||
self._check_redirect(resp, url)
|
||||
if resp.ok:
|
||||
result = resp.json()
|
||||
logger.info(f"Session finalisée: {result}")
|
||||
else:
|
||||
logger.warning(f"Finalisation échouée: {resp.status_code}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Finalisation échouée: {e}")
|
||||
logger.warning(f"Finalisation échouée: {e}")
|
||||
|
||||
def _send_event(self, event: dict) -> bool:
|
||||
"""Envoyer un événement au serveur (avec identifiant machine)."""
|
||||
if not self._server_available:
|
||||
return False
|
||||
try:
|
||||
url = f"{STREAMING_ENDPOINT}/event"
|
||||
payload = {
|
||||
"session_id": self.session_id,
|
||||
"timestamp": time.time(),
|
||||
@@ -351,24 +639,36 @@ class TraceStreamer:
|
||||
"machine_id": self.machine_id,
|
||||
}
|
||||
resp = requests.post(
|
||||
f"{STREAMING_ENDPOINT}/event",
|
||||
url,
|
||||
json=payload,
|
||||
headers=self._auth_headers(),
|
||||
timeout=2,
|
||||
allow_redirects=False,
|
||||
)
|
||||
if self._check_redirect(resp, url):
|
||||
return False
|
||||
return resp.ok
|
||||
except Exception as e:
|
||||
logger.debug(f"Streaming Event échoué: {e}")
|
||||
return False
|
||||
|
||||
def _send_image(self, path: str, shot_id: str) -> bool:
|
||||
def _send_image(self, path: str, shot_id: str):
|
||||
"""Envoyer un screenshot au serveur, compressé en JPEG.
|
||||
|
||||
Utilise un context manager pour le fallback PNG afin d'éviter
|
||||
les fuites de descripteurs de fichier.
|
||||
|
||||
Partie A (purge après ACK) : en cas de HTTP 200 confirmé, le fichier
|
||||
local est supprimé (le serveur devient la source de vérité).
|
||||
|
||||
Fix P0-E : retourne `ImageSendResult` (OK / FAILED / FILE_GONE).
|
||||
Les appelants historiques qui attendaient un bool continuent de
|
||||
fonctionner grâce à la truthiness du enum (OK → True, reste → False),
|
||||
MAIS le drain du buffer doit désormais discriminer FILE_GONE pour
|
||||
ne pas confondre "fichier disparu" avec "envoyé avec succès".
|
||||
"""
|
||||
if not self._server_available:
|
||||
return False
|
||||
return ImageSendResult.FAILED
|
||||
try:
|
||||
# Tenter la compression JPEG (réduction ~5-10x vs PNG)
|
||||
jpeg_buf, content_type, suffix = self._compress_image_to_jpeg(path)
|
||||
@@ -379,19 +679,26 @@ class TraceStreamer:
|
||||
"machine_id": self.machine_id,
|
||||
}
|
||||
|
||||
url = f"{STREAMING_ENDPOINT}/image"
|
||||
if jpeg_buf is not None:
|
||||
# Envoi du JPEG compressé (BytesIO, pas de fuite possible)
|
||||
files = {
|
||||
"file": (f"{shot_id}{suffix}", jpeg_buf, content_type)
|
||||
}
|
||||
resp = requests.post(
|
||||
f"{STREAMING_ENDPOINT}/image",
|
||||
url,
|
||||
files=files,
|
||||
params=params,
|
||||
headers=self._auth_headers(),
|
||||
timeout=5,
|
||||
allow_redirects=False,
|
||||
)
|
||||
return resp.ok
|
||||
if self._check_redirect(resp, url):
|
||||
return ImageSendResult.FAILED
|
||||
if resp.ok:
|
||||
self._purge_local_image(path)
|
||||
return ImageSendResult.OK
|
||||
return ImageSendResult.FAILED
|
||||
else:
|
||||
# Fallback : envoi PNG original avec context manager
|
||||
with open(path, "rb") as f:
|
||||
@@ -399,13 +706,29 @@ class TraceStreamer:
|
||||
"file": (f"{shot_id}.png", f, "image/png")
|
||||
}
|
||||
resp = requests.post(
|
||||
f"{STREAMING_ENDPOINT}/image",
|
||||
url,
|
||||
files=files,
|
||||
params=params,
|
||||
headers=self._auth_headers(),
|
||||
timeout=5,
|
||||
allow_redirects=False,
|
||||
)
|
||||
return resp.ok
|
||||
if self._check_redirect(resp, url):
|
||||
return ImageSendResult.FAILED
|
||||
if resp.ok:
|
||||
self._purge_local_image(path)
|
||||
return ImageSendResult.OK
|
||||
return ImageSendResult.FAILED
|
||||
except FileNotFoundError:
|
||||
# Fix P0-E : fichier local disparu. On NE doit PAS considérer ça
|
||||
# comme un succès HTTP 200. Le serveur n'a rien reçu. On signale
|
||||
# `FILE_GONE` pour que le drain du buffer supprime l'entrée
|
||||
# (pas de retry possible) tout en loguant ERROR (pas debug).
|
||||
logger.error(
|
||||
f"Image {shot_id} introuvable sur disque ({path}) — "
|
||||
f"abandon (serveur n'a rien reçu)"
|
||||
)
|
||||
return ImageSendResult.FILE_GONE
|
||||
except Exception as e:
|
||||
logger.debug(f"Streaming Image échoué: {e}")
|
||||
return False
|
||||
return ImageSendResult.FAILED
|
||||
|
||||
418
agent_v0/agent_v1/ui/activity_panel.py
Normal file
418
agent_v0/agent_v1/ui/activity_panel.py
Normal file
@@ -0,0 +1,418 @@
|
||||
# agent_v1/ui/activity_panel.py
|
||||
"""
|
||||
Panel d'activité temps réel de Léa.
|
||||
|
||||
Affiche à l'utilisateur ce que Léa fait *maintenant* :
|
||||
- État courant (Observe / Cherche / Agit / Vérifie / Bloquée)
|
||||
- Action en cours (ex: "Clic sur Rechercher")
|
||||
- Progression (ex: "3/15")
|
||||
- Temps écoulé depuis le début du workflow
|
||||
|
||||
Contraintes :
|
||||
- Fallback silencieux si tkinter absent (ne crash jamais)
|
||||
- Thread-safe (mises à jour depuis les threads de replay)
|
||||
- Pas de dépendance à PyQt5 (seulement tkinter, déjà utilisé par chat_window)
|
||||
|
||||
Utilisation :
|
||||
panel = ActivityPanel()
|
||||
panel.definir_workflow("Saisie patient", nb_etapes=15)
|
||||
panel.mettre_a_jour(etat=EtatLea.AGIT, action="Clic sur Valider", etape=3)
|
||||
panel.masquer()
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EtatLea(Enum):
|
||||
"""États macroscopiques de Léa pendant un replay."""
|
||||
|
||||
INACTIVE = ("inactive", "Prête", "#808080") # Gris
|
||||
OBSERVE = ("observe", "Observe", "#4A90E2") # Bleu
|
||||
CHERCHE = ("cherche", "Cherche", "#F5A623") # Orange
|
||||
AGIT = ("agit", "Agit", "#7ED321") # Vert
|
||||
VERIFIE = ("verifie", "Vérifie", "#9013FE") # Violet
|
||||
BLOQUEE = ("bloquee", "Bloquée", "#D0021B") # Rouge
|
||||
TERMINE = ("termine", "Terminé", "#50E3C2") # Turquoise
|
||||
|
||||
def __init__(self, code: str, libelle: str, couleur: str) -> None:
|
||||
self.code = code
|
||||
self.libelle = libelle
|
||||
self.couleur = couleur
|
||||
|
||||
|
||||
@dataclass
|
||||
class EtatActivite:
|
||||
"""Instantané de l'activité courante de Léa.
|
||||
|
||||
Utilisé par le panel et exposé par `ActivityPanel.snapshot()` pour les
|
||||
tests (sans dépendre de tkinter).
|
||||
"""
|
||||
|
||||
etat: EtatLea = EtatLea.INACTIVE
|
||||
action_courante: str = ""
|
||||
nom_workflow: str = ""
|
||||
etape: int = 0
|
||||
nb_etapes: int = 0
|
||||
debut_timestamp: float = 0.0
|
||||
dernier_message: str = ""
|
||||
|
||||
def temps_ecoule_s(self) -> float:
|
||||
"""Temps écoulé depuis le début du workflow (secondes)."""
|
||||
if self.debut_timestamp <= 0:
|
||||
return 0.0
|
||||
return max(0.0, time.time() - self.debut_timestamp)
|
||||
|
||||
def progression_texte(self) -> str:
|
||||
"""Représentation textuelle de la progression (ex: '3/15')."""
|
||||
if self.nb_etapes <= 0:
|
||||
return ""
|
||||
return f"{self.etape}/{self.nb_etapes}"
|
||||
|
||||
def temps_ecoule_texte(self) -> str:
|
||||
"""Représentation humaine du temps écoulé (ex: '12s', '1m24s')."""
|
||||
s = int(self.temps_ecoule_s())
|
||||
if s < 60:
|
||||
return f"{s}s"
|
||||
return f"{s // 60}m{s % 60:02d}s"
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Sérialiser pour le logging et les tests."""
|
||||
return {
|
||||
"etat": self.etat.code,
|
||||
"etat_libelle": self.etat.libelle,
|
||||
"action_courante": self.action_courante,
|
||||
"nom_workflow": self.nom_workflow,
|
||||
"etape": self.etape,
|
||||
"nb_etapes": self.nb_etapes,
|
||||
"progression": self.progression_texte(),
|
||||
"temps_ecoule_s": round(self.temps_ecoule_s(), 1),
|
||||
"dernier_message": self.dernier_message,
|
||||
}
|
||||
|
||||
|
||||
class ActivityPanel:
|
||||
"""Panel d'activité de Léa.
|
||||
|
||||
Thread-safe. Le panel tkinter est créé à la demande (lazy) et uniquement
|
||||
si tkinter est disponible. Toutes les méthodes sont safe à appeler même
|
||||
si l'UI n'est pas dispo (fallback silencieux).
|
||||
"""
|
||||
|
||||
def __init__(self, activer_ui: bool = True) -> None:
|
||||
self._lock = threading.RLock()
|
||||
self._etat = EtatActivite()
|
||||
self._activer_ui = activer_ui
|
||||
# UI tkinter (créée à la demande dans le thread UI)
|
||||
self._tk_root = None
|
||||
self._tk_labels: dict = {}
|
||||
self._ui_disponible = None # Lazy : résolu au premier usage
|
||||
self._listeners = [] # Callbacks pour les changements d'état
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# API publique (thread-safe)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def definir_workflow(self, nom: str, nb_etapes: int = 0) -> None:
|
||||
"""Démarrer le suivi d'un nouveau workflow."""
|
||||
with self._lock:
|
||||
self._etat = EtatActivite(
|
||||
etat=EtatLea.OBSERVE,
|
||||
nom_workflow=nom,
|
||||
nb_etapes=nb_etapes,
|
||||
debut_timestamp=time.time(),
|
||||
)
|
||||
self._notifier_changement()
|
||||
self._rafraichir_ui()
|
||||
logger.info(f"[ACTIVITY] Workflow démarré : {nom} ({nb_etapes} étapes)")
|
||||
|
||||
def mettre_a_jour(
|
||||
self,
|
||||
etat: Optional[EtatLea] = None,
|
||||
action: Optional[str] = None,
|
||||
etape: Optional[int] = None,
|
||||
message: Optional[str] = None,
|
||||
) -> None:
|
||||
"""Mettre à jour l'état affiché.
|
||||
|
||||
Tous les paramètres sont optionnels — on ne met à jour que ce qui est
|
||||
fourni. Les autres champs conservent leur valeur actuelle.
|
||||
"""
|
||||
with self._lock:
|
||||
if etat is not None:
|
||||
self._etat.etat = etat
|
||||
if action is not None:
|
||||
self._etat.action_courante = action
|
||||
if etape is not None:
|
||||
self._etat.etape = etape
|
||||
if message is not None:
|
||||
self._etat.dernier_message = message
|
||||
|
||||
self._notifier_changement()
|
||||
self._rafraichir_ui()
|
||||
|
||||
def terminer(self, succes: bool = True) -> None:
|
||||
"""Marquer le workflow comme terminé."""
|
||||
with self._lock:
|
||||
self._etat.etat = EtatLea.TERMINE if succes else EtatLea.BLOQUEE
|
||||
if not succes:
|
||||
self._etat.dernier_message = (
|
||||
self._etat.dernier_message or "Léa a rendu la main"
|
||||
)
|
||||
self._notifier_changement()
|
||||
self._rafraichir_ui()
|
||||
|
||||
def reinitialiser(self) -> None:
|
||||
"""Remettre le panel en état inactif."""
|
||||
with self._lock:
|
||||
self._etat = EtatActivite()
|
||||
self._notifier_changement()
|
||||
self._rafraichir_ui()
|
||||
|
||||
def snapshot(self) -> EtatActivite:
|
||||
"""Obtenir un instantané immuable de l'état courant (pour les tests)."""
|
||||
with self._lock:
|
||||
return EtatActivite(
|
||||
etat=self._etat.etat,
|
||||
action_courante=self._etat.action_courante,
|
||||
nom_workflow=self._etat.nom_workflow,
|
||||
etape=self._etat.etape,
|
||||
nb_etapes=self._etat.nb_etapes,
|
||||
debut_timestamp=self._etat.debut_timestamp,
|
||||
dernier_message=self._etat.dernier_message,
|
||||
)
|
||||
|
||||
def masquer(self) -> None:
|
||||
"""Masquer le panel UI si affiché."""
|
||||
if self._tk_root is not None:
|
||||
try:
|
||||
self._tk_root.withdraw()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def afficher(self) -> None:
|
||||
"""Afficher le panel UI si disponible."""
|
||||
self._creer_ui_si_besoin()
|
||||
if self._tk_root is not None:
|
||||
try:
|
||||
self._tk_root.deiconify()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def on_change(self, callback) -> None:
|
||||
"""Enregistrer un listener appelé à chaque changement d'état."""
|
||||
with self._lock:
|
||||
self._listeners.append(callback)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Gestion UI tkinter (lazy, fallback silencieux)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _creer_ui_si_besoin(self) -> None:
|
||||
"""Créer la fenêtre tkinter au premier usage (lazy)."""
|
||||
if not self._activer_ui:
|
||||
return
|
||||
if self._tk_root is not None:
|
||||
return
|
||||
if self._ui_disponible is False:
|
||||
return # Déjà testé et indisponible
|
||||
|
||||
try:
|
||||
import tkinter as tk
|
||||
except Exception as e:
|
||||
logger.debug(f"[ACTIVITY] tkinter indisponible : {e}")
|
||||
self._ui_disponible = False
|
||||
return
|
||||
|
||||
try:
|
||||
self._tk_root = tk.Toplevel() if _tk_root_existe() else tk.Tk()
|
||||
self._tk_root.title("Léa — Activité")
|
||||
self._tk_root.geometry("340x180+40+40")
|
||||
self._tk_root.attributes("-topmost", True)
|
||||
self._tk_root.resizable(False, False)
|
||||
self._tk_root.configure(bg="#1E1E1E")
|
||||
|
||||
titre = tk.Label(
|
||||
self._tk_root,
|
||||
text="Léa",
|
||||
font=("Segoe UI", 14, "bold"),
|
||||
fg="#FFFFFF",
|
||||
bg="#1E1E1E",
|
||||
)
|
||||
titre.pack(pady=(10, 2))
|
||||
|
||||
self._tk_labels["etat"] = tk.Label(
|
||||
self._tk_root,
|
||||
text="Prête",
|
||||
font=("Segoe UI", 11),
|
||||
fg="#808080",
|
||||
bg="#1E1E1E",
|
||||
)
|
||||
self._tk_labels["etat"].pack()
|
||||
|
||||
self._tk_labels["action"] = tk.Label(
|
||||
self._tk_root,
|
||||
text="",
|
||||
font=("Segoe UI", 10),
|
||||
fg="#FFFFFF",
|
||||
bg="#1E1E1E",
|
||||
wraplength=300,
|
||||
)
|
||||
self._tk_labels["action"].pack(pady=(8, 2))
|
||||
|
||||
self._tk_labels["progression"] = tk.Label(
|
||||
self._tk_root,
|
||||
text="",
|
||||
font=("Segoe UI", 9),
|
||||
fg="#B0B0B0",
|
||||
bg="#1E1E1E",
|
||||
)
|
||||
self._tk_labels["progression"].pack()
|
||||
|
||||
self._tk_labels["temps"] = tk.Label(
|
||||
self._tk_root,
|
||||
text="",
|
||||
font=("Segoe UI", 9),
|
||||
fg="#808080",
|
||||
bg="#1E1E1E",
|
||||
)
|
||||
self._tk_labels["temps"].pack(pady=(4, 0))
|
||||
|
||||
self._tk_labels["message"] = tk.Label(
|
||||
self._tk_root,
|
||||
text="",
|
||||
font=("Segoe UI", 9, "italic"),
|
||||
fg="#B0B0B0",
|
||||
bg="#1E1E1E",
|
||||
wraplength=300,
|
||||
)
|
||||
self._tk_labels["message"].pack(pady=(6, 10))
|
||||
|
||||
# Masquer par défaut : on affiche seulement pendant un workflow
|
||||
self._tk_root.withdraw()
|
||||
self._ui_disponible = True
|
||||
except Exception as e:
|
||||
logger.debug(f"[ACTIVITY] Impossible de créer l'UI : {e}")
|
||||
self._ui_disponible = False
|
||||
self._tk_root = None
|
||||
|
||||
def _rafraichir_ui(self) -> None:
|
||||
"""Mettre à jour les labels tkinter (safe si l'UI n'existe pas)."""
|
||||
if not self._activer_ui or self._ui_disponible is False:
|
||||
return
|
||||
self._creer_ui_si_besoin()
|
||||
if self._tk_root is None:
|
||||
return
|
||||
|
||||
try:
|
||||
with self._lock:
|
||||
snap = self.snapshot()
|
||||
|
||||
# Utiliser after(0) pour rester dans le thread UI tkinter
|
||||
def _update():
|
||||
try:
|
||||
self._tk_labels["etat"].config(
|
||||
text=snap.etat.libelle,
|
||||
fg=snap.etat.couleur,
|
||||
)
|
||||
if snap.action_courante:
|
||||
self._tk_labels["action"].config(text=snap.action_courante)
|
||||
else:
|
||||
self._tk_labels["action"].config(text="")
|
||||
|
||||
prog = snap.progression_texte()
|
||||
if prog and snap.nom_workflow:
|
||||
self._tk_labels["progression"].config(
|
||||
text=f"« {snap.nom_workflow} » — {prog}"
|
||||
)
|
||||
elif snap.nom_workflow:
|
||||
self._tk_labels["progression"].config(
|
||||
text=f"« {snap.nom_workflow} »"
|
||||
)
|
||||
else:
|
||||
self._tk_labels["progression"].config(text="")
|
||||
|
||||
if snap.debut_timestamp > 0:
|
||||
self._tk_labels["temps"].config(
|
||||
text=f"⏱ {snap.temps_ecoule_texte()}"
|
||||
)
|
||||
else:
|
||||
self._tk_labels["temps"].config(text="")
|
||||
|
||||
self._tk_labels["message"].config(text=snap.dernier_message)
|
||||
|
||||
# Afficher automatiquement si actif
|
||||
if snap.etat != EtatLea.INACTIVE:
|
||||
self._tk_root.deiconify()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
self._tk_root.after(0, _update)
|
||||
except Exception:
|
||||
# Si le root a été détruit
|
||||
self._tk_root = None
|
||||
self._ui_disponible = False
|
||||
except Exception as e:
|
||||
logger.debug(f"[ACTIVITY] Erreur rafraîchissement UI : {e}")
|
||||
|
||||
def _notifier_changement(self) -> None:
|
||||
"""Notifier tous les listeners du changement d'état."""
|
||||
with self._lock:
|
||||
listeners = list(self._listeners)
|
||||
snap = self.snapshot()
|
||||
|
||||
for cb in listeners:
|
||||
try:
|
||||
cb(snap)
|
||||
except Exception as e:
|
||||
logger.debug(f"[ACTIVITY] Listener erreur : {e}")
|
||||
|
||||
|
||||
def _tk_root_existe() -> bool:
|
||||
"""Vérifier si un root tkinter existe déjà (pour créer un Toplevel)."""
|
||||
try:
|
||||
import tkinter as tk
|
||||
|
||||
default_root = getattr(tk, "_default_root", None)
|
||||
return default_root is not None
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Singleton global (optionnel)
|
||||
# ============================================================================
|
||||
|
||||
|
||||
_INSTANCE_GLOBALE: Optional[ActivityPanel] = None
|
||||
_LOCK_SINGLETON = threading.Lock()
|
||||
|
||||
|
||||
def get_activity_panel(activer_ui: bool = True) -> ActivityPanel:
|
||||
"""Obtenir l'instance globale du panel d'activité (lazy)."""
|
||||
global _INSTANCE_GLOBALE
|
||||
with _LOCK_SINGLETON:
|
||||
if _INSTANCE_GLOBALE is None:
|
||||
_INSTANCE_GLOBALE = ActivityPanel(activer_ui=activer_ui)
|
||||
return _INSTANCE_GLOBALE
|
||||
|
||||
|
||||
def reset_activity_panel() -> None:
|
||||
"""Réinitialiser le singleton (utile pour les tests)."""
|
||||
global _INSTANCE_GLOBALE
|
||||
with _LOCK_SINGLETON:
|
||||
if _INSTANCE_GLOBALE is not None:
|
||||
try:
|
||||
_INSTANCE_GLOBALE.masquer()
|
||||
except Exception:
|
||||
pass
|
||||
_INSTANCE_GLOBALE = None
|
||||
@@ -3,15 +3,25 @@ Mini serveur HTTP sur l'agent Windows pour les captures d'ecran a la demande
|
||||
et les operations fichiers.
|
||||
|
||||
Ecoute sur le port 5006 (configurable via RPA_CAPTURE_PORT).
|
||||
Bind par defaut sur 127.0.0.1 (configurable via RPA_CAPTURE_BIND).
|
||||
Endpoints :
|
||||
GET /capture -> screenshot frais en base64 (JPEG)
|
||||
GET /health -> {"status": "ok"}
|
||||
GET /health -> {"status": "ok"} (pas d'auth — sonde liveness)
|
||||
POST /file-action -> operations fichiers (list, create, move, copy, sort)
|
||||
|
||||
Securite :
|
||||
- Authentification Bearer obligatoire (RPA_API_TOKEN) pour /capture et
|
||||
/file-action. Sans token configure, ces endpoints sont desactives.
|
||||
- Les tentatives non authentifiees sont loguees (WARNING) avec l'IP source.
|
||||
- Bind defaut localhost. Pour exposer sur le LAN (cas VWB backend qui
|
||||
appelle l'agent a distance), definir explicitement
|
||||
RPA_CAPTURE_BIND=0.0.0.0. L'auth reste alors la seule protection.
|
||||
"""
|
||||
import threading
|
||||
import logging
|
||||
import json
|
||||
import base64
|
||||
import hmac
|
||||
import io
|
||||
import os
|
||||
import time
|
||||
@@ -20,6 +30,17 @@ from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
CAPTURE_PORT = int(os.environ.get("RPA_CAPTURE_PORT", "5006"))
|
||||
# Bind par defaut sur localhost — defense en profondeur.
|
||||
# Pour le deploiement VWB (backend Linux -> agent Windows), definir
|
||||
# RPA_CAPTURE_BIND=0.0.0.0 explicitement. L'auth par token reste requise.
|
||||
CAPTURE_BIND = os.environ.get("RPA_CAPTURE_BIND", "127.0.0.1")
|
||||
|
||||
# Token d'authentification (partage avec le streaming). Doit etre defini pour
|
||||
# que /capture et /file-action soient accessibles.
|
||||
CAPTURE_TOKEN = os.environ.get("RPA_API_TOKEN", "")
|
||||
|
||||
# Endpoints ouverts (pas d'auth requise — sondes techniques uniquement)
|
||||
_PUBLIC_PATHS = {"/health"}
|
||||
|
||||
# Floutage des données sensibles (conformité AI Act)
|
||||
BLUR_SENSITIVE = os.environ.get("RPA_BLUR_SENSITIVE", "true").lower() in ("true", "1", "yes")
|
||||
@@ -33,6 +54,8 @@ class CaptureHandler(BaseHTTPRequestHandler):
|
||||
|
||||
def do_GET(self):
|
||||
if self.path == "/capture":
|
||||
if not self._check_auth():
|
||||
return
|
||||
self._handle_capture()
|
||||
elif self.path == "/health":
|
||||
self._send_json(200, {"status": "ok"})
|
||||
@@ -41,10 +64,56 @@ class CaptureHandler(BaseHTTPRequestHandler):
|
||||
|
||||
def do_POST(self):
|
||||
if self.path == "/file-action":
|
||||
if not self._check_auth():
|
||||
return
|
||||
self._handle_file_action()
|
||||
else:
|
||||
self._send_json(404, {"error": "not found"})
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _check_auth(self) -> bool:
|
||||
"""Valide le Bearer token. Renvoie 401/503 si invalide.
|
||||
|
||||
- Si aucun token n'est configure cote serveur (RPA_API_TOKEN vide),
|
||||
on refuse toutes les requetes sensibles (503) — fail-closed.
|
||||
- Sinon, on compare en temps constant via hmac.compare_digest.
|
||||
- Les tentatives echouees sont loguees avec l'IP source.
|
||||
"""
|
||||
# Autoriser les endpoints publics
|
||||
if self.path in _PUBLIC_PATHS:
|
||||
return True
|
||||
|
||||
peer = self.client_address[0] if self.client_address else "?"
|
||||
|
||||
if not CAPTURE_TOKEN:
|
||||
logger.error(
|
||||
"Refus %s depuis %s : RPA_API_TOKEN non configure "
|
||||
"(capture server en mode fail-closed)",
|
||||
self.path, peer,
|
||||
)
|
||||
self._send_json(503, {
|
||||
"error": "capture server non configure (token manquant)",
|
||||
})
|
||||
return False
|
||||
|
||||
auth_header = self.headers.get("Authorization", "")
|
||||
token = ""
|
||||
if auth_header.startswith("Bearer "):
|
||||
token = auth_header[len("Bearer "):].strip()
|
||||
|
||||
if not token or not hmac.compare_digest(token, CAPTURE_TOKEN):
|
||||
logger.warning(
|
||||
"Tentative d'acces non autorisee a %s depuis %s "
|
||||
"(token %s)",
|
||||
self.path, peer,
|
||||
"absent" if not token else "invalide",
|
||||
)
|
||||
self._send_json(401, {"error": "unauthorized"})
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def do_OPTIONS(self):
|
||||
"""Gestion CORS preflight."""
|
||||
self.send_response(200)
|
||||
@@ -351,21 +420,46 @@ class _FileActionHandlerLocal:
|
||||
class CaptureServer:
|
||||
"""Serveur de capture d'ecran en temps reel (thread daemon)."""
|
||||
|
||||
def __init__(self, port: int = CAPTURE_PORT):
|
||||
def __init__(self, port: int = CAPTURE_PORT, bind: str = CAPTURE_BIND):
|
||||
self._port = port
|
||||
self._bind = bind
|
||||
self._server: HTTPServer | None = None
|
||||
self._thread: threading.Thread | None = None
|
||||
|
||||
def start(self):
|
||||
"""Demarre le serveur dans un thread daemon."""
|
||||
"""Demarre le serveur dans un thread daemon.
|
||||
|
||||
Avertit si le serveur est expose sur le LAN sans token configure.
|
||||
"""
|
||||
# Defense en profondeur : refus de demarrer si expose LAN sans auth
|
||||
exposed_lan = self._bind not in ("127.0.0.1", "localhost", "::1")
|
||||
if exposed_lan and not CAPTURE_TOKEN:
|
||||
logger.error(
|
||||
"REFUS demarrage capture server : bind=%s (LAN) sans "
|
||||
"RPA_API_TOKEN. Definir le token ou RPA_CAPTURE_BIND=127.0.0.1.",
|
||||
self._bind,
|
||||
)
|
||||
print(
|
||||
f"[CAPTURE] REFUS demarrage : bind={self._bind} sans token. "
|
||||
f"Definir RPA_API_TOKEN ou RPA_CAPTURE_BIND=127.0.0.1."
|
||||
)
|
||||
return
|
||||
|
||||
try:
|
||||
self._server = HTTPServer(("0.0.0.0", self._port), CaptureHandler)
|
||||
self._server = HTTPServer((self._bind, self._port), CaptureHandler)
|
||||
self._thread = threading.Thread(
|
||||
target=self._server.serve_forever, daemon=True
|
||||
)
|
||||
self._thread.start()
|
||||
logger.info(f"Capture server demarre sur le port {self._port}")
|
||||
print(f"[CAPTURE] Serveur de capture demarre sur le port {self._port}")
|
||||
auth_mode = "token requis" if CAPTURE_TOKEN else "token absent (fail-closed)"
|
||||
logger.info(
|
||||
"Capture server demarre sur %s:%s (%s)",
|
||||
self._bind, self._port, auth_mode,
|
||||
)
|
||||
print(
|
||||
f"[CAPTURE] Serveur de capture demarre sur "
|
||||
f"{self._bind}:{self._port} ({auth_mode})"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Impossible de demarrer le capture server : {e}")
|
||||
print(f"[CAPTURE] ERREUR demarrage : {e}")
|
||||
|
||||
655
agent_v0/agent_v1/ui/messages.py
Normal file
655
agent_v0/agent_v1/ui/messages.py
Normal file
@@ -0,0 +1,655 @@
|
||||
# agent_v1/ui/messages.py
|
||||
"""
|
||||
Formatage des messages utilisateur pour Léa.
|
||||
|
||||
Convertit les codes d'erreur techniques (`target_not_found`, `no_screen_change`...)
|
||||
en phrases en français naturel, orientées action, adaptées à un utilisateur non
|
||||
technique (secrétaire médicale, TIM).
|
||||
|
||||
Trois niveaux de sévérité sont définis :
|
||||
- INFO — Léa fait son travail normalement
|
||||
- ATTENTION — Quelque chose de léger (ralentissement, retry)
|
||||
- BLOCAGE — Léa a besoin d'aide, elle rend la main
|
||||
|
||||
Le module est 100% pur (pas d'I/O, pas d'UI) : testable sans mocks lourds.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Any, Mapping, Optional
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Accès paresseux au DomainContext
|
||||
# ----------------------------------------------------------------------------
|
||||
#
|
||||
# On importe le module à l'appel pour éviter toute dépendance circulaire
|
||||
# avec `agent_v0.server_v1.domain_context` (qui ne doit pas importer l'UI).
|
||||
# Si l'import échoue (contexte client sans server_v1), on retombe sur None
|
||||
# et les formatters gardent leur comportement générique historique.
|
||||
|
||||
|
||||
def _get_domain_ctx(domain_id: Optional[str]):
|
||||
"""Récupérer un DomainContext si possible, sinon None (fallback)."""
|
||||
if not domain_id:
|
||||
return None
|
||||
try:
|
||||
from agent_v0.server_v1.domain_context import get_domain_context # lazy
|
||||
return get_domain_context(domain_id)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _friendly_target(description: str, domain_id: Optional[str] = None) -> str:
|
||||
"""Transformer une description technique en langage métier si possible.
|
||||
|
||||
Ex (tim_codage) : "DP" → "diagnostic principal"
|
||||
Ex (comptabilite) : "TVA" → "montant de TVA"
|
||||
Retombe sur la description nettoyée si aucun domaine ne matche.
|
||||
"""
|
||||
base = _nettoyer_description_cible(description)
|
||||
ctx = _get_domain_ctx(domain_id)
|
||||
if ctx is None or not base:
|
||||
return base
|
||||
try:
|
||||
return ctx._apply_synonyms(base)
|
||||
except Exception:
|
||||
return base
|
||||
|
||||
|
||||
class NiveauMessage(Enum):
|
||||
"""Niveaux hiérarchiques des messages affichés à l'utilisateur."""
|
||||
|
||||
INFO = "info" # Fond vert clair, disparaît tout seul, 3-5s
|
||||
ATTENTION = "attention" # Fond orange clair, disparaît tout seul, 7s
|
||||
BLOCAGE = "blocage" # Fond rouge clair, reste affiché, 15s+
|
||||
|
||||
|
||||
# Durée d'affichage par défaut (secondes), par niveau
|
||||
DUREE_PAR_NIVEAU: dict[NiveauMessage, int] = {
|
||||
NiveauMessage.INFO: 4,
|
||||
NiveauMessage.ATTENTION: 7,
|
||||
NiveauMessage.BLOCAGE: 15,
|
||||
}
|
||||
|
||||
# Icône textuelle par niveau (compatible plyer/Windows/Linux)
|
||||
ICONE_PAR_NIVEAU: dict[NiveauMessage, str] = {
|
||||
NiveauMessage.INFO: "i",
|
||||
NiveauMessage.ATTENTION: "!",
|
||||
NiveauMessage.BLOCAGE: "?",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class MessageUtilisateur:
|
||||
"""Un message prêt à être affiché à l'utilisateur.
|
||||
|
||||
Attributes:
|
||||
niveau: Hiérarchie (info/attention/blocage)
|
||||
titre: Titre court de la notification (≤60 caractères)
|
||||
corps: Corps du message en français naturel
|
||||
duree_s: Durée d'affichage recommandée (secondes)
|
||||
persistent: Si True, l'utilisateur doit fermer manuellement
|
||||
"""
|
||||
|
||||
niveau: NiveauMessage
|
||||
titre: str
|
||||
corps: str
|
||||
duree_s: int
|
||||
persistent: bool = False
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Sérialiser le message (utile pour les tests et le logging)."""
|
||||
return {
|
||||
"niveau": self.niveau.value,
|
||||
"titre": self.titre,
|
||||
"corps": self.corps,
|
||||
"duree_s": self.duree_s,
|
||||
"persistent": self.persistent,
|
||||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Helpers d'extraction
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def _extraire_nom_application(titre_fenetre: str) -> str:
|
||||
"""Extraire le nom de l'application à partir d'un titre de fenêtre.
|
||||
|
||||
Les titres Windows suivent généralement le format :
|
||||
"Document.txt – Bloc-notes"
|
||||
"Ma Page - Google Chrome"
|
||||
"Sans titre — Paint"
|
||||
|
||||
On retourne la partie après le dernier séparateur, ou le titre entier.
|
||||
"""
|
||||
if not titre_fenetre:
|
||||
return ""
|
||||
titre = titre_fenetre.strip()
|
||||
# Chercher le dernier séparateur parmi " – ", " — ", " - "
|
||||
for sep in (" – ", " — ", " - "):
|
||||
if sep in titre:
|
||||
return titre.rsplit(sep, 1)[-1].strip()
|
||||
return titre
|
||||
|
||||
|
||||
def _nettoyer_description_cible(description: str) -> str:
|
||||
"""Nettoyer la description technique d'une cible pour l'afficher.
|
||||
|
||||
Supprime les caractères techniques (guillemets inutiles, ':').
|
||||
"""
|
||||
if not description:
|
||||
return ""
|
||||
desc = description.strip()
|
||||
# Retirer les guillemets encapsulants
|
||||
desc = desc.strip("'\"`")
|
||||
# Limiter la longueur
|
||||
if len(desc) > 80:
|
||||
desc = desc[:77] + "..."
|
||||
return desc
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Formattage des messages techniques → humains
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def formatter_cible_non_trouvee(
|
||||
description_cible: str,
|
||||
titre_fenetre: Optional[str] = None,
|
||||
domain_id: Optional[str] = None,
|
||||
params: Optional[Mapping[str, Any]] = None,
|
||||
) -> MessageUtilisateur:
|
||||
"""Message quand Léa ne trouve pas un élément à cliquer.
|
||||
|
||||
Si un domaine métier est fourni, la description de la cible est
|
||||
transformée en langage métier via le DomainContext :
|
||||
- tim_codage + "DP" → "diagnostic principal"
|
||||
- comptabilite + "TVA" → "montant de TVA"
|
||||
|
||||
Exemple avant :
|
||||
target_not_found: 'bonjour' dans *bonjour, – Bloc-notes
|
||||
Exemple après :
|
||||
Léa a besoin d'aide
|
||||
Je ne trouve pas "bonjour" dans le Bloc-notes. Peux-tu cliquer
|
||||
dessus toi-même ? Je reprends ensuite.
|
||||
|
||||
Args:
|
||||
description_cible: Description brute de la cible.
|
||||
titre_fenetre: Titre de la fenêtre active (pour extraire l'app).
|
||||
domain_id: Domaine métier pour enrichir la sortie (optionnel).
|
||||
params: Paramètres du workflow (nom_patient, num_facture...)
|
||||
utilisés par les templates de clarification métier.
|
||||
"""
|
||||
cible = _friendly_target(description_cible, domain_id) or "l'élément"
|
||||
app = _extraire_nom_application(titre_fenetre or "")
|
||||
|
||||
# Si un domaine et un template de clarification existent, préférer la
|
||||
# question métier (plus pertinente que le message générique).
|
||||
ctx = _get_domain_ctx(domain_id)
|
||||
if ctx is not None and ctx.clarification_templates:
|
||||
try:
|
||||
corps = ctx.pose_clarification_question(
|
||||
{
|
||||
"blocked_on": "target_not_found",
|
||||
"target": description_cible or "",
|
||||
"app": app,
|
||||
"params": dict(params or {}),
|
||||
}
|
||||
)
|
||||
except Exception:
|
||||
corps = ""
|
||||
if corps:
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.BLOCAGE,
|
||||
titre="Léa a besoin d'aide",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||
persistent=True,
|
||||
)
|
||||
|
||||
if app:
|
||||
corps = (
|
||||
f"Je ne trouve pas « {cible} » dans {app}. "
|
||||
f"Peux-tu cliquer dessus toi-même ? Je reprends ensuite."
|
||||
)
|
||||
else:
|
||||
corps = (
|
||||
f"Je ne trouve pas « {cible} » à l'écran. "
|
||||
f"Peux-tu le faire toi-même ? Je reprends ensuite."
|
||||
)
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.BLOCAGE,
|
||||
titre="Léa a besoin d'aide",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||
persistent=True,
|
||||
)
|
||||
|
||||
|
||||
def formatter_fenetre_incorrecte(
|
||||
titre_actuel: str,
|
||||
titre_attendu: str,
|
||||
) -> MessageUtilisateur:
|
||||
"""Message quand la fenêtre active n'est pas celle attendue.
|
||||
|
||||
Exemple avant :
|
||||
Fenêtre incorrecte: 'Program Manager' (attendu: 'Lea : Explorateur de fichiers')
|
||||
Exemple après :
|
||||
Léa attend une fenêtre
|
||||
J'attends « Explorateur de fichiers » mais c'est « Program Manager »
|
||||
qui est affiché. Peux-tu ouvrir la bonne fenêtre ?
|
||||
"""
|
||||
app_actuelle = _extraire_nom_application(titre_actuel) or "une autre fenêtre"
|
||||
app_attendue = _extraire_nom_application(titre_attendu) or titre_attendu
|
||||
|
||||
corps = (
|
||||
f"J'attends « {app_attendue} » mais c'est « {app_actuelle} » "
|
||||
f"qui est affiché. Peux-tu ouvrir la bonne fenêtre ?"
|
||||
)
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.BLOCAGE,
|
||||
titre="Léa attend une fenêtre",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||
persistent=True,
|
||||
)
|
||||
|
||||
|
||||
def formatter_ecran_inchange(action_type: str = "") -> MessageUtilisateur:
|
||||
"""Message quand l'action n'a pas eu d'effet visible.
|
||||
|
||||
Exemple avant :
|
||||
Ecran inchange apres l'action
|
||||
Exemple après :
|
||||
Léa vérifie
|
||||
Mon clic n'a pas eu l'air de marcher. Je vais réessayer ou te
|
||||
rendre la main si ça ne passe pas.
|
||||
"""
|
||||
actions_fr = {
|
||||
"click": "Mon clic",
|
||||
"type": "Ma saisie",
|
||||
"key_combo": "Mon raccourci clavier",
|
||||
"scroll": "Mon défilement",
|
||||
}
|
||||
quoi = actions_fr.get(action_type, "Mon action")
|
||||
|
||||
corps = (
|
||||
f"{quoi} n'a pas eu l'air de marcher. Je vais réessayer, "
|
||||
f"ou te rendre la main si ça ne passe pas."
|
||||
)
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa vérifie",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
|
||||
|
||||
def formatter_mode_apprentissage(
|
||||
raison: str = "",
|
||||
description_cible: str = "",
|
||||
titre_fenetre: Optional[str] = None,
|
||||
) -> MessageUtilisateur:
|
||||
"""Message quand Léa passe en mode apprentissage (pause supervisée).
|
||||
|
||||
L'utilisateur doit comprendre :
|
||||
1. Léa est bloquée et a besoin d'aide
|
||||
2. L'utilisateur doit prendre la main et montrer comment faire
|
||||
3. Ctrl+Shift+L pour signaler qu'il a fini
|
||||
|
||||
Le ton est humble, clair, actionnable. Pas technique.
|
||||
|
||||
Exemple :
|
||||
Léa a besoin d'aide
|
||||
Je n'y arrive pas, montrez-moi comment faire.
|
||||
Quand vous avez fini, appuyez sur Ctrl+Shift+L.
|
||||
"""
|
||||
cible = _nettoyer_description_cible(description_cible) if description_cible else ""
|
||||
app = _extraire_nom_application(titre_fenetre or "") if titre_fenetre else ""
|
||||
|
||||
# Construire un contexte court si disponible
|
||||
contexte = ""
|
||||
if cible and app:
|
||||
contexte = f" (« {cible} » dans {app})"
|
||||
elif cible:
|
||||
contexte = f" (« {cible} »)"
|
||||
|
||||
corps = (
|
||||
f"Je n'y arrive pas{contexte}, montrez-moi comment faire. "
|
||||
f"Quand vous avez fini, appuyez sur Ctrl+Shift+L."
|
||||
)
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.BLOCAGE,
|
||||
titre="Léa a besoin d'aide",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||
persistent=True,
|
||||
)
|
||||
|
||||
|
||||
def formatter_connexion_perdue(hote_serveur: str = "") -> MessageUtilisateur:
|
||||
"""Message quand la connexion avec le serveur est perdue.
|
||||
|
||||
Rassurant : on dit qu'on va réessayer automatiquement.
|
||||
"""
|
||||
corps = (
|
||||
"J'ai perdu le lien avec le serveur. Je retente automatiquement, "
|
||||
"pas besoin d'intervenir."
|
||||
)
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa est déconnectée",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
|
||||
|
||||
def formatter_connexion_retablie() -> MessageUtilisateur:
|
||||
"""Message quand la connexion serveur est rétablie."""
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.INFO,
|
||||
titre="Léa",
|
||||
corps="C'est bon, la connexion est revenue. Je continue.",
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.INFO],
|
||||
)
|
||||
|
||||
|
||||
def formatter_debut_workflow(nom_workflow: str, nb_etapes: int = 0) -> MessageUtilisateur:
|
||||
"""Message au démarrage d'un workflow de replay."""
|
||||
if nb_etapes > 0:
|
||||
corps = (
|
||||
f"Je démarre « {nom_workflow} » ({nb_etapes} étapes). "
|
||||
f"Je t'indique mon avancement."
|
||||
)
|
||||
else:
|
||||
corps = f"Je démarre « {nom_workflow} ». Je t'indique mon avancement."
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.INFO,
|
||||
titre="Léa démarre",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.INFO],
|
||||
)
|
||||
|
||||
|
||||
def formatter_etape_workflow(
|
||||
etape_actuelle: int,
|
||||
nb_etapes: int,
|
||||
description: str = "",
|
||||
) -> MessageUtilisateur:
|
||||
"""Message pour la progression d'une étape."""
|
||||
if description:
|
||||
desc = _nettoyer_description_cible(description)
|
||||
corps = f"Étape {etape_actuelle}/{nb_etapes} — {desc}"
|
||||
else:
|
||||
corps = f"Étape {etape_actuelle}/{nb_etapes}"
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.INFO,
|
||||
titre="Léa avance",
|
||||
corps=corps,
|
||||
duree_s=3,
|
||||
)
|
||||
|
||||
|
||||
def formatter_retry(action_type: str = "", tentative: int = 2) -> MessageUtilisateur:
|
||||
"""Message quand Léa retente une action."""
|
||||
corps = (
|
||||
f"Je retente (tentative {tentative}). Ça arrive parfois, "
|
||||
f"l'écran était peut-être en cours de chargement."
|
||||
)
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa retente",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
|
||||
|
||||
def formatter_ralentissement() -> MessageUtilisateur:
|
||||
"""Message quand Léa prend plus de temps que prévu."""
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa prend son temps",
|
||||
corps="Je vais plus lentement que prévu. L'écran met du temps à répondre.",
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
|
||||
|
||||
def formatter_fin_workflow(
|
||||
succes: bool,
|
||||
nom_workflow: str = "",
|
||||
nb_etapes: int = 0,
|
||||
duree_s: float = 0.0,
|
||||
domain_id: Optional[str] = None,
|
||||
items_count: int = 0,
|
||||
failed_count: int = 0,
|
||||
params: Optional[Mapping[str, Any]] = None,
|
||||
) -> MessageUtilisateur:
|
||||
"""Message à la fin d'un workflow.
|
||||
|
||||
Si un domaine métier est fourni (et qu'il expose des summary_templates),
|
||||
on utilise `DomainContext.describe_workflow_outcome` pour formuler un
|
||||
rapport en langage métier (ex: "J'ai codé 14 dossiers sur 15").
|
||||
|
||||
Args:
|
||||
succes: True si l'ensemble du workflow a réussi.
|
||||
nom_workflow: Nom du workflow.
|
||||
nb_etapes: Nombre d'étapes techniques (pour fallback générique).
|
||||
duree_s: Durée totale en secondes.
|
||||
domain_id: Domaine métier (optionnel).
|
||||
items_count: Nombre d'items métier traités (ex: 15 dossiers).
|
||||
failed_count: Nombre d'items en échec.
|
||||
params: Infos supplémentaires passées aux templates.
|
||||
"""
|
||||
ctx = _get_domain_ctx(domain_id)
|
||||
if ctx is not None and ctx.summary_templates:
|
||||
try:
|
||||
corps = ctx.describe_workflow_outcome(
|
||||
workflow_name=nom_workflow,
|
||||
success=succes,
|
||||
items_count=items_count or max(1, nb_etapes),
|
||||
failed_count=failed_count,
|
||||
elapsed_s=duree_s,
|
||||
extra=dict(params or {}),
|
||||
)
|
||||
except Exception:
|
||||
corps = ""
|
||||
if corps:
|
||||
if succes and failed_count == 0:
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.INFO,
|
||||
titre="Léa a terminé",
|
||||
corps=corps,
|
||||
duree_s=6,
|
||||
)
|
||||
if succes and failed_count > 0:
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa a terminé partiellement",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.BLOCAGE,
|
||||
titre="Léa s'arrête",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||
persistent=True,
|
||||
)
|
||||
|
||||
if succes:
|
||||
if nom_workflow and nb_etapes > 0:
|
||||
corps = (
|
||||
f"C'est fait ! « {nom_workflow} » est terminé "
|
||||
f"({nb_etapes} étapes en {int(duree_s)}s)."
|
||||
)
|
||||
else:
|
||||
corps = "C'est fait ! Tout s'est bien passé."
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.INFO,
|
||||
titre="Léa a terminé",
|
||||
corps=corps,
|
||||
duree_s=6,
|
||||
)
|
||||
else:
|
||||
corps = (
|
||||
"Je n'ai pas pu terminer. Je te rends la main, "
|
||||
"tu peux continuer à partir de là où je me suis arrêtée."
|
||||
)
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.BLOCAGE,
|
||||
titre="Léa s'arrête",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||
persistent=True,
|
||||
)
|
||||
|
||||
|
||||
def formatter_erreur_generique(
|
||||
message_technique: str,
|
||||
domain_id: Optional[str] = None,
|
||||
params: Optional[Mapping[str, Any]] = None,
|
||||
) -> MessageUtilisateur:
|
||||
"""Formater un message d'erreur technique non catégorisé.
|
||||
|
||||
On essaie de détecter les motifs connus dans le message technique pour
|
||||
le router vers le bon formatter spécialisé, sinon on emballe le message.
|
||||
Si `domain_id` est fourni, il est propagé aux formatters spécialisés
|
||||
pour produire un message en langage métier.
|
||||
"""
|
||||
if not message_technique:
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa",
|
||||
corps="J'ai rencontré un petit souci. Je continue.",
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
|
||||
msg_lower = message_technique.lower()
|
||||
|
||||
# target_not_found[:...]
|
||||
if "target_not_found" in msg_lower:
|
||||
# Essayer d'extraire la description après le ':'
|
||||
match = re.match(r"target_not_found[:\s]*(.*)", message_technique, re.IGNORECASE)
|
||||
desc = match.group(1).strip() if match else ""
|
||||
return formatter_cible_non_trouvee(desc, domain_id=domain_id, params=params)
|
||||
|
||||
# Fenêtre incorrecte: 'X' (attendu: 'Y')
|
||||
if "fenêtre incorrecte" in msg_lower or "fenetre incorrecte" in msg_lower:
|
||||
# Extraire actuel et attendu
|
||||
m_actuel = re.search(r"[:,]\s*['\"]([^'\"]+)['\"]", message_technique)
|
||||
m_attendu = re.search(r"attendu[:\s]*['\"]([^'\"]+)['\"]", message_technique)
|
||||
actuel = m_actuel.group(1) if m_actuel else ""
|
||||
attendu = m_attendu.group(1) if m_attendu else ""
|
||||
return formatter_fenetre_incorrecte(actuel, attendu)
|
||||
|
||||
# Ecran inchangé
|
||||
if "inchang" in msg_lower or "no_screen_change" in msg_lower:
|
||||
return formatter_ecran_inchange()
|
||||
|
||||
# Policy abort / supervise
|
||||
if "policy_abort" in msg_lower or "visual_resolve_failed" in msg_lower:
|
||||
return formatter_cible_non_trouvee(
|
||||
message_technique, domain_id=domain_id, params=params
|
||||
)
|
||||
|
||||
# Fallback : message technique tronqué
|
||||
msg_tronque = message_technique.strip()
|
||||
if len(msg_tronque) > 120:
|
||||
msg_tronque = msg_tronque[:117] + "..."
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa",
|
||||
corps=f"J'ai rencontré un souci : {msg_tronque}",
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Détection fenêtre Léa (utilisé par l'executor pour ignorer sa propre UI)
|
||||
# ============================================================================
|
||||
|
||||
|
||||
# Motifs qui identifient une fenêtre appartenant à Léa (l'agent lui-même).
|
||||
# On utilise des regex avec \b pour éviter les faux positifs sur des noms
|
||||
# contenant "lea" (ex: "cléa.txt", "leapfrog", "replay").
|
||||
_MOTIFS_FENETRE_LEA_REGEX = (
|
||||
r"\bléa\b",
|
||||
r"\blea\b(?!p)", # "lea" mot entier, pas "leapfrog"
|
||||
r"lea\s*[—–\-:]", # "Lea —", "Lea -", "Lea :"
|
||||
r"léa\s*[—–\-:]",
|
||||
r"\bassistante ia\b",
|
||||
r"\bléa ia\b",
|
||||
r"\blea ia\b",
|
||||
)
|
||||
|
||||
|
||||
def est_fenetre_lea(titre_fenetre: str) -> bool:
|
||||
"""Détecter si un titre de fenêtre appartient à l'agent Léa lui-même.
|
||||
|
||||
Utilisé pour éviter que Léa ne se considère comme une fenêtre intrusive
|
||||
dans ses propres pré-vérifications.
|
||||
|
||||
Utilise des regex avec des word boundaries pour éviter les faux positifs
|
||||
sur des noms de fichiers contenant "lea" (ex: "cléa.txt", "replay.log").
|
||||
"""
|
||||
if not titre_fenetre:
|
||||
return False
|
||||
titre_lower = titre_fenetre.lower().strip()
|
||||
return any(re.search(motif, titre_lower) for motif in _MOTIFS_FENETRE_LEA_REGEX)
|
||||
|
||||
|
||||
# Fenêtres parasites Windows à ignorer dans les pré-vérifications.
|
||||
# Ce ne sont pas des fenêtres applicatives — c'est du bruit système
|
||||
# qui prend le focus de manière imprévisible.
|
||||
_FENETRES_BRUIT_SYSTEME = (
|
||||
"fenêtre de dépassement de capacité",
|
||||
"overflow", # version anglaise systray
|
||||
"program manager",
|
||||
"barre des tâches",
|
||||
"task bar",
|
||||
"cortana",
|
||||
"action center",
|
||||
"centre de notifications",
|
||||
)
|
||||
|
||||
|
||||
def est_fenetre_bruit(titre_fenetre: str) -> bool:
|
||||
"""Détecter si un titre de fenêtre est du bruit système Windows.
|
||||
|
||||
Ces fenêtres prennent le focus de manière imprévisible (systray overflow,
|
||||
taskbar, Program Manager) et ne sont jamais la cible d'une action utilisateur.
|
||||
"""
|
||||
if not titre_fenetre:
|
||||
return True # pas de titre = bruit
|
||||
titre_lower = titre_fenetre.lower().strip()
|
||||
if titre_lower == "unknown_window":
|
||||
return True
|
||||
return any(p in titre_lower for p in _FENETRES_BRUIT_SYSTEME)
|
||||
|
||||
|
||||
# Conservé pour rétro-compatibilité avec le code qui listait MOTIFS_FENETRE_LEA
|
||||
MOTIFS_FENETRE_LEA = (
|
||||
"léa",
|
||||
"lea —",
|
||||
"léa —",
|
||||
"lea -",
|
||||
"léa -",
|
||||
"lea assistante",
|
||||
"léa assistante",
|
||||
"lea : ",
|
||||
"léa : ",
|
||||
"assistante ia",
|
||||
)
|
||||
@@ -5,6 +5,14 @@ Utilise plyer pour les notifications système, sans dépendance PyQt5.
|
||||
|
||||
Remplace les dialogues Qt par des toasts non-bloquants.
|
||||
Thread-safe avec rate limiting (1 notification / 2 secondes max).
|
||||
|
||||
Les messages utilisateur sont formatés via `agent_v1.ui.messages` qui convertit
|
||||
les codes techniques (target_not_found, etc.) en français naturel.
|
||||
|
||||
Hiérarchie des notifications (cf. messages.NiveauMessage) :
|
||||
- INFO : auto-dismiss en ~4s, rate-limité classique
|
||||
- ATTENTION : auto-dismiss en ~7s, rate-limité classique
|
||||
- BLOCAGE : persistant (15s+), bypass du rate limit
|
||||
"""
|
||||
|
||||
import logging
|
||||
@@ -12,6 +20,23 @@ import threading
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
from .messages import (
|
||||
MessageUtilisateur,
|
||||
NiveauMessage,
|
||||
formatter_cible_non_trouvee,
|
||||
formatter_connexion_perdue,
|
||||
formatter_connexion_retablie,
|
||||
formatter_debut_workflow,
|
||||
formatter_ecran_inchange,
|
||||
formatter_erreur_generique,
|
||||
formatter_etape_workflow,
|
||||
formatter_fenetre_incorrecte,
|
||||
formatter_fin_workflow,
|
||||
formatter_mode_apprentissage,
|
||||
formatter_ralentissement,
|
||||
formatter_retry,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Import conditionnel de plyer — fallback silencieux si absent
|
||||
@@ -59,7 +84,13 @@ class NotificationManager:
|
||||
# Méthode générique
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
def notify(self, title: str, message: str, timeout: int = 5) -> bool:
|
||||
def notify(
|
||||
self,
|
||||
title: str,
|
||||
message: str,
|
||||
timeout: int = 5,
|
||||
bypass_rate_limit: bool = False,
|
||||
) -> bool:
|
||||
"""
|
||||
Affiche une notification toast.
|
||||
|
||||
@@ -67,6 +98,8 @@ class NotificationManager:
|
||||
title: Titre de la notification.
|
||||
message: Corps du message.
|
||||
timeout: Durée d'affichage en secondes.
|
||||
bypass_rate_limit: Si True, ignore le rate limit (pour les blocages
|
||||
importants qui ne doivent pas être écrasés).
|
||||
|
||||
Returns:
|
||||
True si la notification a été envoyée, False sinon
|
||||
@@ -76,17 +109,21 @@ class NotificationManager:
|
||||
logger.debug("Notification ignorée (plyer absent) : %s", title)
|
||||
return False
|
||||
|
||||
with self._lock:
|
||||
now = time.monotonic()
|
||||
elapsed = now - self._last_notification_time
|
||||
if elapsed < RATE_LIMIT_SECONDS:
|
||||
logger.debug(
|
||||
"Notification ignorée (rate limit, %.1fs restantes) : %s",
|
||||
RATE_LIMIT_SECONDS - elapsed,
|
||||
title,
|
||||
)
|
||||
return False
|
||||
self._last_notification_time = now
|
||||
if not bypass_rate_limit:
|
||||
with self._lock:
|
||||
now = time.monotonic()
|
||||
elapsed = now - self._last_notification_time
|
||||
if elapsed < RATE_LIMIT_SECONDS:
|
||||
logger.debug(
|
||||
"Notification ignorée (rate limit, %.1fs restantes) : %s",
|
||||
RATE_LIMIT_SECONDS - elapsed,
|
||||
title,
|
||||
)
|
||||
return False
|
||||
self._last_notification_time = now
|
||||
else:
|
||||
with self._lock:
|
||||
self._last_notification_time = time.monotonic()
|
||||
|
||||
# Envoi dans un thread dédié pour ne jamais bloquer l'appelant
|
||||
thread = threading.Thread(
|
||||
@@ -97,6 +134,39 @@ class NotificationManager:
|
||||
thread.start()
|
||||
return True
|
||||
|
||||
def notify_message(self, msg: MessageUtilisateur) -> bool:
|
||||
"""Envoyer un MessageUtilisateur structuré (niveau, titre, corps).
|
||||
|
||||
Les messages BLOCAGE bypass le rate limit pour garantir que
|
||||
l'utilisateur voit qu'on a besoin de lui.
|
||||
"""
|
||||
bypass = msg.niveau == NiveauMessage.BLOCAGE
|
||||
# Log aussi pour tracer dans les logs fichiers
|
||||
self._log_message(msg)
|
||||
return self.notify(
|
||||
title=msg.titre,
|
||||
message=msg.corps,
|
||||
timeout=msg.duree_s,
|
||||
bypass_rate_limit=bypass,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _log_message(msg: MessageUtilisateur) -> None:
|
||||
"""Logger un message utilisateur avec le niveau approprié.
|
||||
|
||||
Les logs agents sont plus lisibles quand on route info → INFO,
|
||||
attention → WARNING, blocage → ERROR, avec un préfixe [LEA].
|
||||
"""
|
||||
prefix = f"[LEA] {msg.titre}: {msg.corps}"
|
||||
if msg.niveau == NiveauMessage.INFO:
|
||||
logger.info(prefix)
|
||||
elif msg.niveau == NiveauMessage.ATTENTION:
|
||||
logger.warning(prefix)
|
||||
elif msg.niveau == NiveauMessage.BLOCAGE:
|
||||
logger.error(prefix)
|
||||
else:
|
||||
logger.info(prefix)
|
||||
|
||||
def _send(self, title: str, message: str, timeout: int) -> None:
|
||||
"""Envoi effectif de la notification (exécuté dans un thread dédié)."""
|
||||
try:
|
||||
@@ -180,40 +250,93 @@ class NotificationManager:
|
||||
timeout=3,
|
||||
)
|
||||
|
||||
def replay_finished(self, success: bool, workflow_name: str) -> bool:
|
||||
"""Notification de fin de replay (succès ou échec)."""
|
||||
if success:
|
||||
return self.notify(
|
||||
title=APP_NAME,
|
||||
message="C'est fait ! Tout s'est bien passé.",
|
||||
timeout=5,
|
||||
)
|
||||
else:
|
||||
return self.notify(
|
||||
title=APP_NAME,
|
||||
message="Hmm, j'ai eu un souci. Vous pouvez me remontrer ?",
|
||||
timeout=7,
|
||||
)
|
||||
def replay_target_not_found(
|
||||
self,
|
||||
target_description: str,
|
||||
window_title: Optional[str] = None,
|
||||
) -> bool:
|
||||
"""Notification quand un élément n'est pas trouvé pendant le replay.
|
||||
|
||||
def connection_changed(self, connected: bool, server_host: str) -> bool:
|
||||
Le replay est mis en pause et attend une intervention humaine.
|
||||
Utilise `messages.formatter_cible_non_trouvee` pour un message en
|
||||
français naturel.
|
||||
"""
|
||||
msg = formatter_cible_non_trouvee(target_description, window_title)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_wrong_window(self, current_title: str, expected_title: str) -> bool:
|
||||
"""Notification quand la fenêtre active n'est pas celle attendue."""
|
||||
msg = formatter_fenetre_incorrecte(current_title, expected_title)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_no_screen_change(self, action_type: str = "") -> bool:
|
||||
"""Notification quand une action n'a pas eu d'effet visible."""
|
||||
msg = formatter_ecran_inchange(action_type)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_learning_mode(
|
||||
self,
|
||||
raison: str = "",
|
||||
target_description: str = "",
|
||||
window_title: Optional[str] = None,
|
||||
) -> bool:
|
||||
"""Notification quand Léa passe en mode apprentissage.
|
||||
|
||||
Léa est bloquée et demande à l'utilisateur de montrer comment faire.
|
||||
Message humble et actionnable pour un utilisateur non technique.
|
||||
"""
|
||||
msg = formatter_mode_apprentissage(raison, target_description, window_title)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_retry(self, action_type: str = "", tentative: int = 2) -> bool:
|
||||
"""Notification quand Léa retente une action."""
|
||||
msg = formatter_retry(action_type, tentative)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_slow(self) -> bool:
|
||||
"""Notification quand Léa va plus lentement que prévu."""
|
||||
msg = formatter_ralentissement()
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_finished(
|
||||
self,
|
||||
success: bool,
|
||||
workflow_name: str,
|
||||
step_count: int = 0,
|
||||
duration_s: float = 0.0,
|
||||
) -> bool:
|
||||
"""Notification de fin de replay (succès ou échec)."""
|
||||
msg = formatter_fin_workflow(success, workflow_name, step_count, duration_s)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_workflow_started(self, workflow_name: str, step_count: int = 0) -> bool:
|
||||
"""Notification de début de workflow (remplace `replay_started`)."""
|
||||
msg = formatter_debut_workflow(workflow_name, step_count)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_step_progress(
|
||||
self,
|
||||
current: int,
|
||||
total: int,
|
||||
description: str = "",
|
||||
) -> bool:
|
||||
"""Notification de progression d'une étape (niveau INFO)."""
|
||||
msg = formatter_etape_workflow(current, total, description)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def connection_changed(self, connected: bool, server_host: str = "") -> bool:
|
||||
"""Notification de changement d'état de la connexion serveur."""
|
||||
if connected:
|
||||
return self.notify(
|
||||
title=APP_NAME,
|
||||
message="Connectée au serveur.",
|
||||
timeout=5,
|
||||
)
|
||||
msg = formatter_connexion_retablie()
|
||||
else:
|
||||
return self.notify(
|
||||
title=APP_NAME,
|
||||
message="J'ai perdu la connexion avec le serveur.",
|
||||
timeout=7,
|
||||
)
|
||||
msg = formatter_connexion_perdue(server_host)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def error(self, message: str) -> bool:
|
||||
"""Notification d'erreur."""
|
||||
return self.notify(
|
||||
title=APP_NAME,
|
||||
message=f"Oups, un problème : {message}",
|
||||
timeout=10,
|
||||
)
|
||||
"""Notification d'erreur générique.
|
||||
|
||||
Essaie d'abord de détecter un motif technique connu et de formater
|
||||
correctement, sinon fallback sur un message générique aidant.
|
||||
"""
|
||||
msg = formatter_erreur_generique(message)
|
||||
return self.notify_message(msg)
|
||||
|
||||
@@ -474,9 +474,14 @@ class SmartTrayV1:
|
||||
|
||||
try:
|
||||
import requests
|
||||
# Auth headers pour le streaming server (port 5005)
|
||||
auth_headers = {}
|
||||
if self.server_client is not None:
|
||||
auth_headers = self.server_client._auth_headers()
|
||||
resp = requests.post(
|
||||
f"{self.server_client._stream_base}/api/v1/traces/stream/replay/start",
|
||||
json={"workflow_id": workflow_id},
|
||||
headers=auth_headers,
|
||||
timeout=10,
|
||||
)
|
||||
if resp.ok:
|
||||
|
||||
@@ -2,12 +2,20 @@
|
||||
"""
|
||||
Gestionnaire de vision avancé pour Agent V1.
|
||||
Optimisé pour le streaming fibre avec détection de changement.
|
||||
|
||||
Captures disponibles :
|
||||
- Plein écran (full) : contexte global 1920x1080+
|
||||
- Crop ciblé (crop) : 80x80 autour du clic (apprentissage VLM)
|
||||
- Fenêtre active (window) : image isolée de la fenêtre + métadonnées
|
||||
(titre, rect, coordonnées clic relatives) — cross-platform
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
import logging
|
||||
import hashlib
|
||||
import platform
|
||||
from typing import Any, Dict, Optional
|
||||
from PIL import Image, ImageFilter, ImageStat
|
||||
import mss
|
||||
from ..config import TARGETED_CROP_SIZE, SCREENSHOT_QUALITY, BLUR_SENSITIVE
|
||||
@@ -15,6 +23,9 @@ from .blur_sensitive import blur_sensitive_regions
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# OS courant (détecté une seule fois)
|
||||
_SYSTEM = platform.system()
|
||||
|
||||
class VisionCapturer:
|
||||
def __init__(self, session_dir: str):
|
||||
self.session_dir = session_dir
|
||||
@@ -27,13 +38,16 @@ class VisionCapturer:
|
||||
"""
|
||||
Capture l'écran complet.
|
||||
Si force=False, vérifie d'abord si l'écran a changé.
|
||||
|
||||
Enrichit les métadonnées avec le titre de la fenêtre active
|
||||
(utile pour le contextualisation des heartbeats côté serveur).
|
||||
"""
|
||||
try:
|
||||
with mss.mss() as sct:
|
||||
monitor = sct.monitors[1]
|
||||
sct_img = sct.grab(monitor)
|
||||
img = Image.frombytes("RGB", sct_img.size, sct_img.bgra, "raw", "BGRX")
|
||||
|
||||
|
||||
# Détection de changement (pour Heartbeat)
|
||||
if not force:
|
||||
current_hash = self._compute_quick_hash(img)
|
||||
@@ -52,8 +66,24 @@ class VisionCapturer:
|
||||
logger.error(f"Erreur Context Capture: {e}")
|
||||
return ""
|
||||
|
||||
def get_active_window_title(self) -> str:
|
||||
"""Retourne le titre de la fenêtre active (pour enrichir les heartbeats).
|
||||
|
||||
Fallback gracieux : retourne une chaîne vide si indisponible.
|
||||
"""
|
||||
try:
|
||||
from ..window_info_crossplatform import get_active_window_info
|
||||
info = get_active_window_info()
|
||||
return info.get("title", "")
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
def capture_dual(self, x: int, y: int, screenshot_id: str, anonymize=False) -> dict:
|
||||
"""Capture duale (Full + Crop) systématique (forcée car liée à une action)."""
|
||||
"""Capture triple (Full + Crop + Fenêtre active) systématique.
|
||||
|
||||
La fenêtre active est un AJOUT — en cas d'échec, le full + crop
|
||||
sont toujours retournés (fallback gracieux).
|
||||
"""
|
||||
try:
|
||||
with mss.mss() as sct:
|
||||
full_path = os.path.join(self.shots_dir, f"{screenshot_id}_full.png")
|
||||
@@ -67,7 +97,7 @@ class VisionCapturer:
|
||||
left = max(0, x - w // 2)
|
||||
top = max(0, y - h // 2)
|
||||
crop_img = img.crop((left, top, left + w, top + h))
|
||||
|
||||
|
||||
if anonymize:
|
||||
crop_img = crop_img.filter(ImageFilter.GaussianBlur(radius=4))
|
||||
|
||||
@@ -82,11 +112,130 @@ class VisionCapturer:
|
||||
# Mise à jour du hash pour le prochain heartbeat
|
||||
self.last_img_hash = self._compute_quick_hash(img)
|
||||
|
||||
return {"full": full_path, "crop": crop_path}
|
||||
result = {"full": full_path, "crop": crop_path}
|
||||
|
||||
# --- Capture de la fenêtre active ---
|
||||
# Ajout non-bloquant : enrichit le résultat avec l'image
|
||||
# de la fenêtre seule + métadonnées (titre, rect, clic relatif)
|
||||
window_info = self.capture_active_window(x, y, screenshot_id, full_img=img)
|
||||
if window_info:
|
||||
result["window_capture"] = window_info
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.error(f"Erreur Dual Capture: {e}")
|
||||
return {}
|
||||
|
||||
def capture_active_window(
|
||||
self,
|
||||
x: int,
|
||||
y: int,
|
||||
screenshot_id: str,
|
||||
full_img: Optional[Image.Image] = None,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Capture l'image de la fenêtre active seule + métadonnées.
|
||||
|
||||
Stratégie :
|
||||
1. Obtenir le rectangle de la fenêtre via l'API OS (pywin32 / xdotool / Quartz)
|
||||
2. Cropper depuis le screenshot plein écran (plus fiable que PrintWindow)
|
||||
3. Calculer les coordonnées du clic relatives à la fenêtre
|
||||
|
||||
Args:
|
||||
x, y: coordonnées du clic en pixels écran
|
||||
screenshot_id: identifiant pour le nom de fichier
|
||||
full_img: screenshot plein écran déjà capturé (optionnel, évite une
|
||||
double capture si appelé depuis capture_dual)
|
||||
|
||||
Returns:
|
||||
Dict avec window_image, window_title, window_rect, click_in_window,
|
||||
window_size — ou None si la fenêtre est introuvable.
|
||||
"""
|
||||
try:
|
||||
from ..window_info_crossplatform import get_active_window_rect
|
||||
|
||||
rect_info = get_active_window_rect()
|
||||
if not rect_info:
|
||||
logger.debug("Fenêtre active introuvable — skip capture fenêtre")
|
||||
return None
|
||||
|
||||
win_rect = rect_info["rect"] # [left, top, right, bottom]
|
||||
win_left, win_top, win_right, win_bottom = win_rect
|
||||
win_w, win_h = rect_info["size"] # [width, height]
|
||||
title = rect_info.get("title", "unknown_window")
|
||||
app_name = rect_info.get("app_name", "unknown_app")
|
||||
|
||||
# Ignorer les fenêtres trop petites (barres de tâches, popups système)
|
||||
if win_w < 50 or win_h < 50:
|
||||
logger.debug(f"Fenêtre trop petite ({win_w}x{win_h}) — skip")
|
||||
return None
|
||||
|
||||
# Coordonnées du clic relatives à la fenêtre
|
||||
click_rel_x = x - win_left
|
||||
click_rel_y = y - win_top
|
||||
|
||||
# Si le clic est en dehors de la fenêtre, on le signale mais on continue
|
||||
click_inside = (0 <= click_rel_x <= win_w and 0 <= click_rel_y <= win_h)
|
||||
|
||||
# --- Crop de la fenêtre depuis le plein écran ---
|
||||
if full_img is None:
|
||||
# Pas de screenshot fourni — en capturer un (cas standalone)
|
||||
try:
|
||||
with mss.mss() as sct:
|
||||
monitor = sct.monitors[1]
|
||||
sct_img = sct.grab(monitor)
|
||||
full_img = Image.frombytes(
|
||||
"RGB", sct_img.size, sct_img.bgra, "raw", "BGRX"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Erreur capture plein écran pour fenêtre : {e}")
|
||||
return None
|
||||
|
||||
# Borner le crop aux limites de l'image plein écran
|
||||
img_w, img_h = full_img.size
|
||||
crop_left = max(0, win_left)
|
||||
crop_top = max(0, win_top)
|
||||
crop_right = min(img_w, win_right)
|
||||
crop_bottom = min(img_h, win_bottom)
|
||||
|
||||
if crop_right <= crop_left or crop_bottom <= crop_top:
|
||||
logger.debug("Fenêtre hors écran — skip capture fenêtre")
|
||||
return None
|
||||
|
||||
window_img = full_img.crop((crop_left, crop_top, crop_right, crop_bottom))
|
||||
|
||||
# Floutage conformité AI Act
|
||||
if BLUR_SENSITIVE:
|
||||
blur_sensitive_regions(window_img)
|
||||
|
||||
# Sauvegarde
|
||||
window_path = os.path.join(
|
||||
self.shots_dir, f"{screenshot_id}_window.png"
|
||||
)
|
||||
window_img.save(window_path, "PNG", quality=SCREENSHOT_QUALITY)
|
||||
|
||||
result = {
|
||||
"window_image": window_path,
|
||||
"window_title": title,
|
||||
"app_name": app_name,
|
||||
"window_rect": win_rect,
|
||||
"window_size": [win_w, win_h],
|
||||
"click_in_window": [click_rel_x, click_rel_y],
|
||||
"click_inside_window": click_inside,
|
||||
}
|
||||
|
||||
logger.debug(
|
||||
f"Fenêtre capturée : {title} ({win_w}x{win_h}) — "
|
||||
f"clic relatif ({click_rel_x}, {click_rel_y})"
|
||||
)
|
||||
return result
|
||||
|
||||
except ImportError as e:
|
||||
logger.debug(f"Module fenêtre indisponible : {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Erreur capture fenêtre active : {e}")
|
||||
return None
|
||||
|
||||
def _compute_quick_hash(self, img: Image) -> str:
|
||||
"""Calcule un hash rapide basé sur une vignette réduite pour détecter les changements."""
|
||||
# On réduit l'image à 64x64 pour comparer les masses de couleurs (très rapide)
|
||||
|
||||
@@ -17,7 +17,7 @@ from __future__ import annotations
|
||||
|
||||
import platform
|
||||
import subprocess
|
||||
from typing import Dict, Optional
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
|
||||
def _run_cmd(cmd: list[str]) -> Optional[str]:
|
||||
@@ -36,11 +36,11 @@ def get_active_window_info() -> Dict[str, str]:
|
||||
"title": "...",
|
||||
"app_name": "..."
|
||||
}
|
||||
|
||||
|
||||
Détecte automatiquement l'OS et utilise la méthode appropriée.
|
||||
"""
|
||||
system = platform.system()
|
||||
|
||||
|
||||
if system == "Linux":
|
||||
return _get_window_info_linux()
|
||||
elif system == "Windows":
|
||||
@@ -51,6 +51,32 @@ def get_active_window_info() -> Dict[str, str]:
|
||||
return {"title": "unknown_window", "app_name": "unknown_app"}
|
||||
|
||||
|
||||
def get_active_window_rect() -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Renvoie le rectangle de la fenêtre active :
|
||||
{
|
||||
"title": "...",
|
||||
"app_name": "...",
|
||||
"rect": [left, top, right, bottom],
|
||||
"position": [left, top],
|
||||
"size": [width, height],
|
||||
"hwnd": int # Windows uniquement
|
||||
}
|
||||
|
||||
Retourne None si la fenêtre est introuvable ou minimisée.
|
||||
Détecte automatiquement l'OS et utilise la méthode appropriée.
|
||||
"""
|
||||
system = platform.system()
|
||||
|
||||
if system == "Windows":
|
||||
return _get_window_rect_windows()
|
||||
elif system == "Linux":
|
||||
return _get_window_rect_linux()
|
||||
elif system == "Darwin":
|
||||
return _get_window_rect_macos()
|
||||
return None
|
||||
|
||||
|
||||
def _get_window_info_linux() -> Dict[str, str]:
|
||||
"""
|
||||
Linux: utilise xdotool (X11)
|
||||
@@ -178,6 +204,163 @@ def _get_window_info_macos() -> Dict[str, str]:
|
||||
}
|
||||
|
||||
|
||||
def _get_window_rect_windows() -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Windows : utilise pywin32 pour obtenir le rectangle de la fenêtre active.
|
||||
|
||||
Retourne None si la fenêtre est minimisée (icônifiée) ou si pywin32 manque.
|
||||
"""
|
||||
try:
|
||||
import win32gui
|
||||
import win32process
|
||||
import psutil
|
||||
|
||||
hwnd = win32gui.GetForegroundWindow()
|
||||
if not hwnd:
|
||||
return None
|
||||
|
||||
# Ignorer les fenêtres minimisées (pas de contenu visible)
|
||||
if win32gui.IsIconic(hwnd):
|
||||
return None
|
||||
|
||||
title = win32gui.GetWindowText(hwnd) or "unknown_window"
|
||||
|
||||
# Rectangle de la fenêtre (coordonnées écran absolues)
|
||||
left, top, right, bottom = win32gui.GetWindowRect(hwnd)
|
||||
width = right - left
|
||||
height = bottom - top
|
||||
|
||||
# Ignorer les fenêtres de taille nulle ou absurde
|
||||
if width <= 0 or height <= 0:
|
||||
return None
|
||||
|
||||
# Nom du processus
|
||||
_, pid = win32process.GetWindowThreadProcessId(hwnd)
|
||||
try:
|
||||
app_name = psutil.Process(pid).name()
|
||||
except Exception:
|
||||
app_name = "unknown_app"
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"app_name": app_name,
|
||||
"rect": [left, top, right, bottom],
|
||||
"position": [left, top],
|
||||
"size": [width, height],
|
||||
"hwnd": hwnd,
|
||||
}
|
||||
|
||||
except ImportError:
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _get_window_rect_linux() -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Linux (X11) : utilise xdotool + xwininfo pour obtenir le rectangle.
|
||||
|
||||
Nécessite : sudo apt-get install xdotool x11-utils
|
||||
"""
|
||||
try:
|
||||
# Identifiant de la fenêtre active
|
||||
wid = _run_cmd(["xdotool", "getactivewindow"])
|
||||
if not wid:
|
||||
return None
|
||||
|
||||
title = _run_cmd(["xdotool", "getactivewindow", "getwindowname"]) or "unknown_window"
|
||||
pid_str = _run_cmd(["xdotool", "getactivewindow", "getwindowpid"])
|
||||
app_name = "unknown_app"
|
||||
if pid_str:
|
||||
app_name = _run_cmd(["ps", "-p", pid_str.strip(), "-o", "comm="]) or "unknown_app"
|
||||
|
||||
# Géométrie via xdotool --shell (position + taille)
|
||||
geom_raw = _run_cmd(["xdotool", "getwindowgeometry", "--shell", wid])
|
||||
if not geom_raw:
|
||||
return None
|
||||
|
||||
vals: Dict[str, int] = {}
|
||||
for line in geom_raw.strip().splitlines():
|
||||
if "=" in line:
|
||||
k, v = line.split("=", 1)
|
||||
try:
|
||||
vals[k.strip()] = int(v.strip())
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if not {"X", "Y", "WIDTH", "HEIGHT"} <= vals.keys():
|
||||
return None
|
||||
|
||||
x, y = vals["X"], vals["Y"]
|
||||
w, h = vals["WIDTH"], vals["HEIGHT"]
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"app_name": app_name,
|
||||
"rect": [x, y, x + w, y + h],
|
||||
"position": [x, y],
|
||||
"size": [w, h],
|
||||
}
|
||||
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _get_window_rect_macos() -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
macOS : utilise Quartz (CGWindowListCopyWindowInfo) pour obtenir le rectangle.
|
||||
|
||||
Nécessite : pip install pyobjc-framework-Quartz
|
||||
"""
|
||||
try:
|
||||
from AppKit import NSWorkspace
|
||||
from Quartz import (
|
||||
CGWindowListCopyWindowInfo,
|
||||
kCGWindowListOptionOnScreenOnly,
|
||||
kCGNullWindowID,
|
||||
)
|
||||
|
||||
active_app = NSWorkspace.sharedWorkspace().activeApplication()
|
||||
app_name = active_app.get("NSApplicationName", "unknown_app")
|
||||
|
||||
window_list = CGWindowListCopyWindowInfo(
|
||||
kCGWindowListOptionOnScreenOnly, kCGNullWindowID
|
||||
)
|
||||
|
||||
for window in window_list:
|
||||
owner_name = window.get("kCGWindowOwnerName", "")
|
||||
if owner_name != app_name:
|
||||
continue
|
||||
|
||||
bounds = window.get("kCGWindowBounds")
|
||||
if not bounds:
|
||||
continue
|
||||
|
||||
x = int(bounds.get("X", 0))
|
||||
y = int(bounds.get("Y", 0))
|
||||
w = int(bounds.get("Width", 0))
|
||||
h = int(bounds.get("Height", 0))
|
||||
if w <= 0 or h <= 0:
|
||||
continue
|
||||
|
||||
title = window.get("kCGWindowName", "unknown_window") or "unknown_window"
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"app_name": app_name,
|
||||
"rect": [x, y, x + w, y + h],
|
||||
"position": [x, y],
|
||||
"size": [w, h],
|
||||
}
|
||||
|
||||
except ImportError:
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# Test rapide
|
||||
if __name__ == "__main__":
|
||||
import time
|
||||
@@ -185,8 +368,13 @@ if __name__ == "__main__":
|
||||
print(f"OS détecté: {platform.system()}")
|
||||
print("\nTest de capture fenêtre active (5 secondes)...")
|
||||
print("Changez de fenêtre pour tester!\n")
|
||||
|
||||
|
||||
for i in range(5):
|
||||
info = get_active_window_info()
|
||||
rect = get_active_window_rect()
|
||||
print(f"[{i+1}] App: {info['app_name']:20s} | Title: {info['title']}")
|
||||
if rect:
|
||||
print(f" Rect: {rect['rect']} | Size: {rect['size']}")
|
||||
else:
|
||||
print(" Rect: non disponible")
|
||||
time.sleep(1)
|
||||
|
||||
@@ -42,6 +42,10 @@ SERVER_URL = os.getenv("RPA_SERVER_URL", "http://localhost:5005/api/v1")
|
||||
UPLOAD_ENDPOINT = f"{SERVER_URL}/traces/upload"
|
||||
STREAMING_ENDPOINT = f"{SERVER_URL}/traces/stream"
|
||||
|
||||
# Token d'authentification API (doit correspondre au token du serveur)
|
||||
# Configurable via variable d'environnement RPA_API_TOKEN
|
||||
API_TOKEN = os.environ.get("RPA_API_TOKEN", "")
|
||||
|
||||
# Paramètres de session
|
||||
MAX_SESSION_DURATION_S = 60 * 60 # 1 heure
|
||||
SESSIONS_ROOT = BASE_DIR / "sessions"
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
214
agent_v0/deploy/windows_client/agent_v1/core/grounding.py
Normal file
214
agent_v0/deploy/windows_client/agent_v1/core/grounding.py
Normal file
@@ -0,0 +1,214 @@
|
||||
# agent_v1/core/grounding.py
|
||||
"""
|
||||
Module Grounding — localisation pure d'éléments UI sur l'écran.
|
||||
|
||||
Responsabilité unique : "Trouve l'élément X sur l'écran et retourne ses coordonnées."
|
||||
Ne prend AUCUNE décision. Si l'élément n'est pas trouvé → retourne NOT_FOUND.
|
||||
|
||||
Stratégies disponibles (cascade configurable) :
|
||||
1. Serveur SomEngine + VLM (GPU distant)
|
||||
2. Template matching local (CPU, ~10ms)
|
||||
3. VLM local direct (CPU/GPU local)
|
||||
|
||||
Séparé de Policy (qui décide quoi faire quand grounding échoue).
|
||||
Ref: docs/PLAN_ACTEUR_V1.md — Architecture MICRO (grounding + exécution)
|
||||
"""
|
||||
|
||||
import base64
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class GroundingResult:
|
||||
"""Résultat d'une tentative de localisation visuelle."""
|
||||
found: bool # L'élément a été trouvé
|
||||
x_pct: float = 0.0 # Position X en % (0.0-1.0)
|
||||
y_pct: float = 0.0 # Position Y en % (0.0-1.0)
|
||||
method: str = "" # Méthode utilisée (server_som, anchor_template, vlm_direct...)
|
||||
score: float = 0.0 # Confiance (0.0-1.0)
|
||||
elapsed_ms: float = 0.0 # Temps de résolution
|
||||
detail: str = "" # Info supplémentaire (label trouvé, raison échec)
|
||||
raw: Optional[Dict] = None # Données brutes du resolver (pour debug)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"found": self.found,
|
||||
"x_pct": self.x_pct,
|
||||
"y_pct": self.y_pct,
|
||||
"method": self.method,
|
||||
"score": round(self.score, 3),
|
||||
"elapsed_ms": round(self.elapsed_ms, 1),
|
||||
"detail": self.detail,
|
||||
}
|
||||
|
||||
|
||||
# Résultat singleton pour "pas trouvé"
|
||||
NOT_FOUND = GroundingResult(found=False, detail="Aucune méthode n'a trouvé l'élément")
|
||||
|
||||
|
||||
class GroundingEngine:
|
||||
"""Moteur de localisation visuelle d'éléments UI.
|
||||
|
||||
Encapsule la cascade de résolution (serveur → template → VLM local)
|
||||
avec une interface unifiée. Ne prend aucune décision — c'est le rôle
|
||||
de PolicyEngine.
|
||||
|
||||
Usage :
|
||||
engine = GroundingEngine(executor)
|
||||
result = engine.locate(screenshot_b64, target_spec, screen_w, screen_h)
|
||||
if result.found:
|
||||
click(result.x_pct, result.y_pct)
|
||||
"""
|
||||
|
||||
def __init__(self, executor):
|
||||
"""
|
||||
Args:
|
||||
executor: ActionExecutorV1 — fournit les méthodes de résolution existantes.
|
||||
"""
|
||||
self._executor = executor
|
||||
|
||||
def locate(
|
||||
self,
|
||||
server_url: str,
|
||||
target_spec: Dict[str, Any],
|
||||
fallback_x: float,
|
||||
fallback_y: float,
|
||||
screen_width: int,
|
||||
screen_height: int,
|
||||
strategies: Optional[List[str]] = None,
|
||||
) -> GroundingResult:
|
||||
"""Localiser un élément UI sur l'écran.
|
||||
|
||||
Exécute la cascade de stratégies dans l'ordre et retourne
|
||||
dès qu'une stratégie trouve l'élément.
|
||||
|
||||
Args:
|
||||
server_url: URL du serveur (SomEngine + VLM GPU)
|
||||
target_spec: Spécification de la cible (by_text, anchor, vlm_description...)
|
||||
fallback_x, fallback_y: Coordonnées de fallback (enregistrement)
|
||||
screen_width, screen_height: Résolution écran
|
||||
strategies: Liste ordonnée de stratégies à essayer.
|
||||
Par défaut : ["server", "template", "vlm_local"]
|
||||
|
||||
Returns:
|
||||
GroundingResult avec found=True et coordonnées, ou NOT_FOUND
|
||||
"""
|
||||
if strategies is None:
|
||||
strategies = ["server", "template", "vlm_local"]
|
||||
|
||||
# ── Apprentissage : réordonner les stratégies selon l'historique ──
|
||||
# Si le Learning sait quelle méthode marche pour cette cible,
|
||||
# la mettre en premier. C'est la boucle d'apprentissage.
|
||||
learned = target_spec.get("_learned_strategy", "")
|
||||
if learned:
|
||||
strategy_map = {
|
||||
"som_text_match": "server",
|
||||
"grounding_vlm": "server",
|
||||
"server_som": "server",
|
||||
"anchor_template": "template",
|
||||
"template_matching": "template",
|
||||
"hybrid_text_direct": "vlm_local",
|
||||
"hybrid_vlm_text": "vlm_local",
|
||||
"vlm_direct": "vlm_local",
|
||||
}
|
||||
preferred = strategy_map.get(learned, "")
|
||||
if preferred and preferred in strategies:
|
||||
strategies = [preferred] + [s for s in strategies if s != preferred]
|
||||
logger.info(
|
||||
f"Grounding: stratégie réordonnée par l'apprentissage → "
|
||||
f"{strategies} (learned={learned})"
|
||||
)
|
||||
|
||||
t_start = time.time()
|
||||
screenshot_b64 = self._executor._capture_screenshot_b64(max_width=0, quality=75)
|
||||
if not screenshot_b64:
|
||||
return GroundingResult(
|
||||
found=False, detail="Capture screenshot échouée",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
for strategy in strategies:
|
||||
result = self._try_strategy(
|
||||
strategy, server_url, screenshot_b64, target_spec,
|
||||
fallback_x, fallback_y, screen_width, screen_height,
|
||||
)
|
||||
if result.found:
|
||||
result.elapsed_ms = (time.time() - t_start) * 1000
|
||||
return result
|
||||
|
||||
return GroundingResult(
|
||||
found=False,
|
||||
detail=f"Toutes les stratégies ont échoué ({', '.join(strategies)})",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
def _try_strategy(
|
||||
self,
|
||||
strategy: str,
|
||||
server_url: str,
|
||||
screenshot_b64: str,
|
||||
target_spec: Dict[str, Any],
|
||||
fallback_x: float,
|
||||
fallback_y: float,
|
||||
screen_width: int,
|
||||
screen_height: int,
|
||||
) -> GroundingResult:
|
||||
"""Essayer une stratégie de grounding unique."""
|
||||
|
||||
if strategy == "server" and server_url:
|
||||
raw = self._executor._server_resolve_target(
|
||||
server_url, screenshot_b64, target_spec,
|
||||
fallback_x, fallback_y, screen_width, screen_height,
|
||||
)
|
||||
if raw and raw.get("resolved"):
|
||||
return GroundingResult(
|
||||
found=True,
|
||||
x_pct=raw["x_pct"],
|
||||
y_pct=raw["y_pct"],
|
||||
method=raw.get("method", "server"),
|
||||
score=raw.get("score", 0.0),
|
||||
detail=raw.get("matched_element", {}).get("label", ""),
|
||||
raw=raw,
|
||||
)
|
||||
|
||||
elif strategy == "template":
|
||||
anchor_b64 = target_spec.get("anchor_image_base64", "")
|
||||
if anchor_b64:
|
||||
raw = self._executor._template_match_anchor(
|
||||
screenshot_b64, anchor_b64, screen_width, screen_height,
|
||||
)
|
||||
if raw and raw.get("resolved"):
|
||||
return GroundingResult(
|
||||
found=True,
|
||||
x_pct=raw["x_pct"],
|
||||
y_pct=raw["y_pct"],
|
||||
method="anchor_template",
|
||||
score=raw.get("score", 0.0),
|
||||
raw=raw,
|
||||
)
|
||||
|
||||
elif strategy == "vlm_local":
|
||||
by_text = target_spec.get("by_text", "")
|
||||
vlm_desc = target_spec.get("vlm_description", "")
|
||||
if vlm_desc or by_text:
|
||||
raw = self._executor._hybrid_vlm_resolve(
|
||||
screenshot_b64, target_spec, screen_width, screen_height,
|
||||
)
|
||||
if raw and raw.get("resolved"):
|
||||
return GroundingResult(
|
||||
found=True,
|
||||
x_pct=raw["x_pct"],
|
||||
y_pct=raw["y_pct"],
|
||||
method=raw.get("method", "vlm_local"),
|
||||
score=raw.get("score", 0.0),
|
||||
detail=raw.get("matched_element", {}).get("label", ""),
|
||||
raw=raw,
|
||||
)
|
||||
|
||||
return GroundingResult(found=False, method=strategy, detail=f"{strategy}: pas trouvé")
|
||||
152
agent_v0/deploy/windows_client/agent_v1/core/policy.py
Normal file
152
agent_v0/deploy/windows_client/agent_v1/core/policy.py
Normal file
@@ -0,0 +1,152 @@
|
||||
# agent_v1/core/policy.py
|
||||
"""
|
||||
Module Policy — décisions intelligentes quand le grounding échoue.
|
||||
|
||||
Responsabilité unique : "Le Grounding dit NOT_FOUND. Que fait-on ?"
|
||||
Ne localise AUCUN élément — c'est le rôle du Grounding.
|
||||
|
||||
Décisions possibles :
|
||||
- RETRY : re-tenter le grounding (après popup fermée, par exemple)
|
||||
- SKIP : l'action n'est plus nécessaire (état déjà atteint)
|
||||
- ABORT : arrêter le workflow (état incohérent)
|
||||
- SUPERVISE : rendre la main à l'utilisateur
|
||||
|
||||
Séparé de Grounding (qui localise les éléments).
|
||||
Ref: docs/PLAN_ACTEUR_V1.md — Architecture MÉSO (acteur intelligent)
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Decision(Enum):
|
||||
"""Décisions possibles quand le grounding échoue."""
|
||||
RETRY = "retry" # Re-tenter (après correction : popup fermée, navigation...)
|
||||
SKIP = "skip" # Action inutile (état déjà atteint)
|
||||
ABORT = "abort" # Arrêter le workflow (état incohérent)
|
||||
SUPERVISE = "supervise" # Rendre la main à l'utilisateur (Léa dit "je bloque")
|
||||
CONTINUE = "continue" # Continuer malgré l'échec (action non critique)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PolicyDecision:
|
||||
"""Résultat d'une décision Policy."""
|
||||
decision: Decision
|
||||
reason: str # Explication de la décision
|
||||
action_taken: str = "" # Action corrective effectuée (ex: "popup fermée")
|
||||
elapsed_ms: float = 0.0
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"decision": self.decision.value,
|
||||
"reason": self.reason,
|
||||
"action_taken": self.action_taken,
|
||||
"elapsed_ms": round(self.elapsed_ms, 1),
|
||||
}
|
||||
|
||||
|
||||
class PolicyEngine:
|
||||
"""Moteur de décision quand le grounding échoue.
|
||||
|
||||
Cascade de décision :
|
||||
1. Popup détectée ? → fermer et RETRY
|
||||
2. Acteur gemma4 → SKIP / ABORT / SUPERVISE
|
||||
3. Fallback → SUPERVISE (rendre la main)
|
||||
|
||||
Usage :
|
||||
policy = PolicyEngine(executor)
|
||||
decision = policy.decide(action, target_spec, grounding_result)
|
||||
if decision.decision == Decision.RETRY:
|
||||
# re-tenter le grounding
|
||||
elif decision.decision == Decision.SKIP:
|
||||
# marquer comme réussi, passer à la suite
|
||||
"""
|
||||
|
||||
def __init__(self, executor):
|
||||
self._executor = executor
|
||||
|
||||
def decide(
|
||||
self,
|
||||
action: Dict[str, Any],
|
||||
target_spec: Dict[str, Any],
|
||||
retry_count: int = 0,
|
||||
max_retries: int = 1,
|
||||
) -> PolicyDecision:
|
||||
"""Décider quoi faire quand le grounding a échoué.
|
||||
|
||||
Cascade :
|
||||
1. Si c'est le premier essai → tenter de fermer une popup → RETRY
|
||||
2. Si retry déjà fait → demander à l'acteur gemma4
|
||||
3. Selon gemma4 : SKIP, ABORT, ou SUPERVISE
|
||||
|
||||
Args:
|
||||
action: L'action qui a échoué
|
||||
target_spec: La cible non trouvée
|
||||
retry_count: Nombre de retries déjà faits
|
||||
max_retries: Maximum de retries autorisés
|
||||
"""
|
||||
t_start = time.time()
|
||||
|
||||
# ── Étape 1 : Tentative de fermeture popup (premier essai) ──
|
||||
if retry_count == 0:
|
||||
popup_handled = self._try_close_popup()
|
||||
if popup_handled:
|
||||
return PolicyDecision(
|
||||
decision=Decision.RETRY,
|
||||
reason="Popup détectée et fermée, re-tentative",
|
||||
action_taken="popup_closed",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
# ── Étape 2 : Max retries atteint → acteur gemma4 ──
|
||||
if retry_count >= max_retries:
|
||||
actor_decision = self._ask_actor(action, target_spec)
|
||||
|
||||
if actor_decision == "PASSER":
|
||||
return PolicyDecision(
|
||||
decision=Decision.SKIP,
|
||||
reason="Acteur gemma4 : l'état est déjà atteint",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
elif actor_decision == "STOPPER":
|
||||
return PolicyDecision(
|
||||
decision=Decision.ABORT,
|
||||
reason="Acteur gemma4 : état incohérent, arrêt",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
else:
|
||||
# EXECUTER ou inconnu → pause supervisée
|
||||
return PolicyDecision(
|
||||
decision=Decision.SUPERVISE,
|
||||
reason=f"Acteur gemma4 : {actor_decision}, pause supervisée",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
# ── Étape 3 : Encore des retries disponibles → RETRY ──
|
||||
return PolicyDecision(
|
||||
decision=Decision.RETRY,
|
||||
reason=f"Retry {retry_count + 1}/{max_retries}",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
def _try_close_popup(self) -> bool:
|
||||
"""Tenter de fermer une popup via le handler VLM existant."""
|
||||
try:
|
||||
return self._executor._handle_popup_vlm()
|
||||
except Exception as e:
|
||||
logger.debug(f"Policy: popup handler échoué : {e}")
|
||||
return False
|
||||
|
||||
def _ask_actor(self, action: Dict, target_spec: Dict) -> str:
|
||||
"""Demander à gemma4 de décider (PASSER/EXECUTER/STOPPER)."""
|
||||
try:
|
||||
return self._executor._actor_decide(action, target_spec)
|
||||
except Exception as e:
|
||||
logger.debug(f"Policy: acteur gemma4 échoué : {e}")
|
||||
return "EXECUTER" # Fallback → supervisé
|
||||
294
agent_v0/deploy/windows_client/agent_v1/core/uia_helper.py
Normal file
294
agent_v0/deploy/windows_client/agent_v1/core/uia_helper.py
Normal file
@@ -0,0 +1,294 @@
|
||||
# core/workflow/uia_helper.py
|
||||
"""
|
||||
UIAHelper — Wrapper Python pour lea_uia.exe (helper Rust UI Automation).
|
||||
|
||||
Expose une API Python simple pour interroger UIA via le binaire Rust.
|
||||
Communique via subprocess + stdin/stdout JSON.
|
||||
|
||||
Pourquoi un helper Rust ?
|
||||
- 5-10x plus rapide que pywinauto (10-20ms vs 50-200ms)
|
||||
- Binaire standalone ~500 Ko, aucune dépendance runtime
|
||||
- Pas de problèmes de threading COM en Python
|
||||
- Crash-safe (le crash du helper n'affecte pas l'agent Python)
|
||||
|
||||
Architecture :
|
||||
Python executor
|
||||
↓ subprocess.run
|
||||
lea_uia.exe query --x 812 --y 436
|
||||
↓ UIA API Windows
|
||||
JSON response
|
||||
↓ stdout
|
||||
Python executor parse JSON
|
||||
|
||||
Si lea_uia.exe n'est pas disponible (Linux, binaire absent, crash) :
|
||||
toutes les méthodes retournent None → fallback vision automatique.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Timeout par défaut pour les appels UIA (en secondes)
|
||||
_DEFAULT_TIMEOUT = 5.0
|
||||
|
||||
# Masquer la fenêtre console lors du spawn de lea_uia.exe sur Windows.
|
||||
# Sans ce flag, chaque appel (à chaque clic utilisateur pendant
|
||||
# l'enregistrement) fait apparaître une fenêtre cmd noire brièvement
|
||||
# visible à l'écran → ralentit la souris et pollue les screenshots
|
||||
# capturés (le VLM peut "voir" le chemin lea_uia.exe comme texte cliqué).
|
||||
#
|
||||
# La valeur 0x08000000 correspond à CREATE_NO_WINDOW défini dans
|
||||
# l'API Windows. Sur Linux/Mac, la valeur est 0 et `creationflags`
|
||||
# est ignoré. getattr() gère le cas où Python expose déjà la constante
|
||||
# sur Windows.
|
||||
if platform.system() == "Windows":
|
||||
_SUBPROCESS_CREATION_FLAGS = getattr(subprocess, "CREATE_NO_WINDOW", 0x08000000)
|
||||
else:
|
||||
_SUBPROCESS_CREATION_FLAGS = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class UiaElement:
|
||||
"""Représentation Python d'un élément UIA."""
|
||||
name: str = ""
|
||||
control_type: str = ""
|
||||
class_name: str = ""
|
||||
automation_id: str = ""
|
||||
bounding_rect: Tuple[int, int, int, int] = (0, 0, 0, 0)
|
||||
is_enabled: bool = False
|
||||
is_offscreen: bool = True
|
||||
parent_path: List[Dict[str, str]] = field(default_factory=list)
|
||||
process_name: str = ""
|
||||
|
||||
def center(self) -> Tuple[int, int]:
|
||||
"""Retourner le centre du rectangle (pixels)."""
|
||||
x1, y1, x2, y2 = self.bounding_rect
|
||||
return ((x1 + x2) // 2, (y1 + y2) // 2)
|
||||
|
||||
def width(self) -> int:
|
||||
return self.bounding_rect[2] - self.bounding_rect[0]
|
||||
|
||||
def height(self) -> int:
|
||||
return self.bounding_rect[3] - self.bounding_rect[1]
|
||||
|
||||
def is_clickable(self) -> bool:
|
||||
"""Peut-on cliquer dessus ?"""
|
||||
return (
|
||||
self.is_enabled
|
||||
and not self.is_offscreen
|
||||
and self.width() > 0
|
||||
and self.height() > 0
|
||||
)
|
||||
|
||||
def path_signature(self) -> str:
|
||||
"""Signature du chemin parent (pour retrouver l'élément)."""
|
||||
parts = [f"{p['control_type']}[{p['name']}]" for p in self.parent_path if p.get("name")]
|
||||
parts.append(f"{self.control_type}[{self.name}]")
|
||||
return " > ".join(parts)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"control_type": self.control_type,
|
||||
"class_name": self.class_name,
|
||||
"automation_id": self.automation_id,
|
||||
"bounding_rect": list(self.bounding_rect),
|
||||
"is_enabled": self.is_enabled,
|
||||
"is_offscreen": self.is_offscreen,
|
||||
"parent_path": self.parent_path,
|
||||
"process_name": self.process_name,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict[str, Any]) -> "UiaElement":
|
||||
rect = d.get("bounding_rect", [0, 0, 0, 0])
|
||||
if isinstance(rect, list) and len(rect) >= 4:
|
||||
rect = tuple(rect[:4])
|
||||
else:
|
||||
rect = (0, 0, 0, 0)
|
||||
return cls(
|
||||
name=d.get("name", ""),
|
||||
control_type=d.get("control_type", ""),
|
||||
class_name=d.get("class_name", ""),
|
||||
automation_id=d.get("automation_id", ""),
|
||||
bounding_rect=rect,
|
||||
is_enabled=d.get("is_enabled", False),
|
||||
is_offscreen=d.get("is_offscreen", True),
|
||||
parent_path=d.get("parent_path", []),
|
||||
process_name=d.get("process_name", ""),
|
||||
)
|
||||
|
||||
|
||||
class UIAHelper:
|
||||
"""Wrapper Python pour lea_uia.exe."""
|
||||
|
||||
def __init__(self, helper_path: str = "", timeout: float = _DEFAULT_TIMEOUT):
|
||||
self._helper_path = helper_path or self._find_helper()
|
||||
self._timeout = timeout
|
||||
self._available = self._check_available()
|
||||
|
||||
def _find_helper(self) -> str:
|
||||
"""Trouver lea_uia.exe dans les emplacements standards."""
|
||||
candidates = [
|
||||
r"C:\Lea\helpers\lea_uia.exe",
|
||||
os.path.join(os.path.dirname(__file__), "..", "..",
|
||||
"agent_rust", "lea_uia", "target",
|
||||
"x86_64-pc-windows-gnu", "release", "lea_uia.exe"),
|
||||
"./helpers/lea_uia.exe",
|
||||
"lea_uia.exe",
|
||||
]
|
||||
for path in candidates:
|
||||
if os.path.isfile(path):
|
||||
return os.path.abspath(path)
|
||||
return ""
|
||||
|
||||
def _check_available(self) -> bool:
|
||||
"""Vérifier que le helper est utilisable (Windows + binaire + health OK)."""
|
||||
if platform.system() != "Windows":
|
||||
logger.debug("UIAHelper: Linux/Mac — helper désactivé")
|
||||
return False
|
||||
if not self._helper_path:
|
||||
logger.debug("UIAHelper: lea_uia.exe introuvable")
|
||||
return False
|
||||
if not os.path.isfile(self._helper_path):
|
||||
logger.debug(f"UIAHelper: chemin invalide {self._helper_path}")
|
||||
return False
|
||||
return True
|
||||
|
||||
@property
|
||||
def available(self) -> bool:
|
||||
return self._available
|
||||
|
||||
@property
|
||||
def helper_path(self) -> str:
|
||||
return self._helper_path
|
||||
|
||||
def _run(self, args: List[str]) -> Optional[Dict[str, Any]]:
|
||||
"""Exécuter lea_uia.exe avec les arguments et parser le JSON."""
|
||||
if not self._available:
|
||||
return None
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[self._helper_path] + args,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=self._timeout,
|
||||
encoding="utf-8",
|
||||
errors="replace",
|
||||
creationflags=_SUBPROCESS_CREATION_FLAGS,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
logger.debug(
|
||||
f"UIAHelper: exit code {result.returncode}, "
|
||||
f"stderr: {result.stderr[:200]}"
|
||||
)
|
||||
return None
|
||||
output = result.stdout.strip()
|
||||
if not output:
|
||||
return None
|
||||
return json.loads(output)
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.debug(f"UIAHelper: timeout ({self._timeout}s) sur {args}")
|
||||
return None
|
||||
except json.JSONDecodeError as e:
|
||||
logger.debug(f"UIAHelper: JSON invalide — {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.debug(f"UIAHelper: erreur {e}")
|
||||
return None
|
||||
|
||||
def health(self) -> bool:
|
||||
"""Vérifier que UIA répond."""
|
||||
data = self._run(["health"])
|
||||
return data is not None and data.get("status") == "ok"
|
||||
|
||||
def query_at(
|
||||
self,
|
||||
x: int,
|
||||
y: int,
|
||||
with_parents: bool = True,
|
||||
) -> Optional[UiaElement]:
|
||||
"""Récupérer l'élément UIA à une position écran.
|
||||
|
||||
Args:
|
||||
x, y: Coordonnées pixel absolues
|
||||
with_parents: Inclure la hiérarchie des parents
|
||||
|
||||
Returns:
|
||||
UiaElement si trouvé, None sinon (pas d'élément ou UIA indispo)
|
||||
"""
|
||||
args = ["query", "--x", str(x), "--y", str(y)]
|
||||
if not with_parents:
|
||||
args.append("--with-parents=false")
|
||||
|
||||
data = self._run(args)
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
def find_by_name(
|
||||
self,
|
||||
name: str,
|
||||
control_type: Optional[str] = None,
|
||||
automation_id: Optional[str] = None,
|
||||
window: Optional[str] = None,
|
||||
timeout_ms: int = 2000,
|
||||
) -> Optional[UiaElement]:
|
||||
"""Rechercher un élément par son nom (+ filtres optionnels).
|
||||
|
||||
Args:
|
||||
name: Nom exact de l'élément
|
||||
control_type: Type de contrôle (Button, Edit, MenuItem...)
|
||||
automation_id: ID d'automation
|
||||
window: Restreindre à une fenêtre spécifique
|
||||
timeout_ms: Timeout de recherche en millisecondes
|
||||
"""
|
||||
args = ["find", "--name", name, "--timeout-ms", str(timeout_ms)]
|
||||
if control_type:
|
||||
args.extend(["--control-type", control_type])
|
||||
if automation_id:
|
||||
args.extend(["--automation-id", automation_id])
|
||||
if window:
|
||||
args.extend(["--window", window])
|
||||
|
||||
data = self._run(args)
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
def capture_focused(self, max_depth: int = 3) -> Optional[UiaElement]:
|
||||
"""Capturer l'élément ayant le focus + son contexte."""
|
||||
data = self._run(["capture", "--max-depth", str(max_depth)])
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
|
||||
# Instance globale partagée (singleton léger)
|
||||
_SHARED_HELPER: Optional[UIAHelper] = None
|
||||
|
||||
|
||||
def get_shared_helper() -> UIAHelper:
|
||||
"""Retourner une instance partagée de UIAHelper."""
|
||||
global _SHARED_HELPER
|
||||
if _SHARED_HELPER is None:
|
||||
_SHARED_HELPER = UIAHelper()
|
||||
return _SHARED_HELPER
|
||||
@@ -1,55 +0,0 @@
|
||||
# window_info.py
|
||||
"""
|
||||
Récupération des informations sur la fenêtre active (X11).
|
||||
|
||||
v0 :
|
||||
- utilise xdotool pour obtenir :
|
||||
- le titre de la fenêtre active
|
||||
- le PID de la fenêtre active, puis le nom du process via ps
|
||||
|
||||
Si quelque chose ne fonctionne pas, on renvoie des valeurs "unknown".
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
from typing import Dict, Optional
|
||||
|
||||
|
||||
def _run_cmd(cmd: list[str]) -> Optional[str]:
|
||||
"""Exécute une commande et renvoie la sortie texte (strippée), ou None en cas d'erreur."""
|
||||
try:
|
||||
out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
|
||||
return out.decode("utf-8", errors="ignore").strip()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def get_active_window_info() -> Dict[str, str]:
|
||||
"""
|
||||
Renvoie un dict :
|
||||
{
|
||||
"title": "...",
|
||||
"app_name": "..."
|
||||
}
|
||||
|
||||
Nécessite xdotool installé sur le système.
|
||||
"""
|
||||
title = _run_cmd(["xdotool", "getactivewindow", "getwindowname"])
|
||||
pid_str = _run_cmd(["xdotool", "getactivewindow", "getwindowpid"])
|
||||
|
||||
app_name: Optional[str] = None
|
||||
if pid_str:
|
||||
pid_str = pid_str.strip()
|
||||
# On récupère le nom du binaire via ps
|
||||
app_name = _run_cmd(["ps", "-p", pid_str, "-o", "comm="])
|
||||
|
||||
if not title:
|
||||
title = "unknown_window"
|
||||
if not app_name:
|
||||
app_name = "unknown_app"
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"app_name": app_name,
|
||||
}
|
||||
@@ -1,192 +0,0 @@
|
||||
# window_info_crossplatform.py
|
||||
"""
|
||||
Récupération des informations sur la fenêtre active - CROSS-PLATFORM
|
||||
|
||||
Supporte:
|
||||
- Linux (X11 via xdotool)
|
||||
- Windows (via pywin32)
|
||||
- macOS (via pyobjc)
|
||||
|
||||
Installation des dépendances:
|
||||
pip install pywin32 # Windows
|
||||
pip install pyobjc-framework-Cocoa # macOS
|
||||
pip install psutil # Tous OS
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import platform
|
||||
import subprocess
|
||||
from typing import Dict, Optional
|
||||
|
||||
|
||||
def _run_cmd(cmd: list[str]) -> Optional[str]:
|
||||
"""Exécute une commande et renvoie la sortie texte (strippée), ou None en cas d'erreur."""
|
||||
try:
|
||||
out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
|
||||
return out.decode("utf-8", errors="ignore").strip()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def get_active_window_info() -> Dict[str, str]:
|
||||
"""
|
||||
Renvoie un dict :
|
||||
{
|
||||
"title": "...",
|
||||
"app_name": "..."
|
||||
}
|
||||
|
||||
Détecte automatiquement l'OS et utilise la méthode appropriée.
|
||||
"""
|
||||
system = platform.system()
|
||||
|
||||
if system == "Linux":
|
||||
return _get_window_info_linux()
|
||||
elif system == "Windows":
|
||||
return _get_window_info_windows()
|
||||
elif system == "Darwin": # macOS
|
||||
return _get_window_info_macos()
|
||||
else:
|
||||
return {"title": "unknown_window", "app_name": "unknown_app"}
|
||||
|
||||
|
||||
def _get_window_info_linux() -> Dict[str, str]:
|
||||
"""
|
||||
Linux: utilise xdotool (X11)
|
||||
|
||||
Nécessite: sudo apt-get install xdotool
|
||||
"""
|
||||
title = _run_cmd(["xdotool", "getactivewindow", "getwindowname"])
|
||||
pid_str = _run_cmd(["xdotool", "getactivewindow", "getwindowpid"])
|
||||
|
||||
app_name: Optional[str] = None
|
||||
if pid_str:
|
||||
pid_str = pid_str.strip()
|
||||
# On récupère le nom du binaire via ps
|
||||
app_name = _run_cmd(["ps", "-p", pid_str, "-o", "comm="])
|
||||
|
||||
if not title:
|
||||
title = "unknown_window"
|
||||
if not app_name:
|
||||
app_name = "unknown_app"
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"app_name": app_name,
|
||||
}
|
||||
|
||||
|
||||
def _get_window_info_windows() -> Dict[str, str]:
|
||||
"""
|
||||
Windows: utilise pywin32 + psutil
|
||||
|
||||
Nécessite: pip install pywin32 psutil
|
||||
"""
|
||||
try:
|
||||
import win32gui
|
||||
import win32process
|
||||
import psutil
|
||||
|
||||
# Fenêtre au premier plan
|
||||
hwnd = win32gui.GetForegroundWindow()
|
||||
|
||||
# Titre de la fenêtre
|
||||
title = win32gui.GetWindowText(hwnd)
|
||||
if not title:
|
||||
title = "unknown_window"
|
||||
|
||||
# PID du processus
|
||||
_, pid = win32process.GetWindowThreadProcessId(hwnd)
|
||||
|
||||
# Nom du processus
|
||||
try:
|
||||
process = psutil.Process(pid)
|
||||
app_name = process.name()
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
app_name = "unknown_app"
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"app_name": app_name,
|
||||
}
|
||||
|
||||
except ImportError:
|
||||
# pywin32 ou psutil non installé
|
||||
return {
|
||||
"title": "unknown_window (pywin32 missing)",
|
||||
"app_name": "unknown_app (pywin32 missing)",
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"title": f"error: {e}",
|
||||
"app_name": "unknown_app",
|
||||
}
|
||||
|
||||
|
||||
def _get_window_info_macos() -> Dict[str, str]:
|
||||
"""
|
||||
macOS: utilise pyobjc (AppKit)
|
||||
|
||||
Nécessite: pip install pyobjc-framework-Cocoa
|
||||
|
||||
Note: Nécessite les permissions "Accessibility" dans System Preferences
|
||||
"""
|
||||
try:
|
||||
from AppKit import NSWorkspace
|
||||
from Quartz import (
|
||||
CGWindowListCopyWindowInfo,
|
||||
kCGWindowListOptionOnScreenOnly,
|
||||
kCGNullWindowID
|
||||
)
|
||||
|
||||
# Application active
|
||||
active_app = NSWorkspace.sharedWorkspace().activeApplication()
|
||||
app_name = active_app.get('NSApplicationName', 'unknown_app')
|
||||
|
||||
# Titre de la fenêtre (via Quartz)
|
||||
# On cherche la fenêtre de l'app active qui est au premier plan
|
||||
window_list = CGWindowListCopyWindowInfo(
|
||||
kCGWindowListOptionOnScreenOnly,
|
||||
kCGNullWindowID
|
||||
)
|
||||
|
||||
title = "unknown_window"
|
||||
for window in window_list:
|
||||
owner_name = window.get('kCGWindowOwnerName', '')
|
||||
if owner_name == app_name:
|
||||
window_title = window.get('kCGWindowName', '')
|
||||
if window_title:
|
||||
title = window_title
|
||||
break
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"app_name": app_name,
|
||||
}
|
||||
|
||||
except ImportError:
|
||||
# pyobjc non installé
|
||||
return {
|
||||
"title": "unknown_window (pyobjc missing)",
|
||||
"app_name": "unknown_app (pyobjc missing)",
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"title": f"error: {e}",
|
||||
"app_name": "unknown_app",
|
||||
}
|
||||
|
||||
|
||||
# Test rapide
|
||||
if __name__ == "__main__":
|
||||
import time
|
||||
|
||||
print(f"OS détecté: {platform.system()}")
|
||||
print("\nTest de capture fenêtre active (5 secondes)...")
|
||||
print("Changez de fenêtre pour tester!\n")
|
||||
|
||||
for i in range(5):
|
||||
info = get_active_window_info()
|
||||
print(f"[{i+1}] App: {info['app_name']:20s} | Title: {info['title']}")
|
||||
time.sleep(1)
|
||||
@@ -14,7 +14,7 @@ import uuid
|
||||
import time
|
||||
import logging
|
||||
import threading
|
||||
from .config import SESSIONS_ROOT, AGENT_VERSION, SERVER_URL, MACHINE_ID
|
||||
from .config import SESSIONS_ROOT, AGENT_VERSION, SERVER_URL, MACHINE_ID, API_TOKEN
|
||||
from .core.captor import EventCaptorV1
|
||||
from .core.executor import ActionExecutorV1
|
||||
from .network.streamer import TraceStreamer
|
||||
@@ -84,9 +84,11 @@ class AgentV1:
|
||||
# Executeur pour le replay (doit exister avant le poll)
|
||||
self._executor = ActionExecutorV1()
|
||||
|
||||
# Boucle de polling replay PERMANENTE (pas besoin de session active)
|
||||
# Boucles permanentes (pas besoin de session active)
|
||||
self.running = True
|
||||
self._bg_vision = VisionCapturer(str(SESSIONS_ROOT / "_background"))
|
||||
threading.Thread(target=self._replay_poll_loop, daemon=True).start()
|
||||
threading.Thread(target=self._background_heartbeat_loop, daemon=True).start()
|
||||
|
||||
# UI Tray intelligent (remplace TrayAppV1, plus de PyQt5)
|
||||
self.ui = SmartTrayV1(
|
||||
@@ -126,11 +128,59 @@ class AgentV1:
|
||||
# Watchdog de Commandes (GHOST Replay — legacy fichier)
|
||||
threading.Thread(target=self._command_watchdog_loop, daemon=True).start()
|
||||
|
||||
# Boucle de polling replay (P0-5 — pull depuis le serveur)
|
||||
threading.Thread(target=self._replay_poll_loop, daemon=True).start()
|
||||
# Note: la boucle de polling replay est déjà lancée dans __init__
|
||||
# Ne PAS en relancer une ici — deux threads poll simultanés causent
|
||||
# une race condition où les actions sont consommées mais pas exécutées.
|
||||
|
||||
logger.info(f"Session {self.session_id} ({workflow_name}) sur machine {self.machine_id} en cours...")
|
||||
|
||||
_last_bg_hash: str = ""
|
||||
|
||||
def _background_heartbeat_loop(self):
|
||||
"""Heartbeat permanent — envoie un screenshot toutes les 5s au serveur.
|
||||
Tourne même sans session active, pour que le VWB puisse capturer Windows.
|
||||
"""
|
||||
import requests as req
|
||||
bg_session = f"bg_{self.machine_id}"
|
||||
logger.info(f"[HEARTBEAT] Boucle permanente démarrée (session={bg_session})")
|
||||
|
||||
while self.running:
|
||||
try:
|
||||
# Ne pas envoyer pendant un enregistrement (le heartbeat session s'en charge)
|
||||
if self.session_id:
|
||||
time.sleep(5)
|
||||
continue
|
||||
|
||||
full_path = self._bg_vision.capture_full_context("heartbeat")
|
||||
if not full_path:
|
||||
time.sleep(5)
|
||||
continue
|
||||
|
||||
# Dédup : skip si écran identique
|
||||
img_hash = self._quick_hash(full_path)
|
||||
if img_hash and img_hash == self._last_bg_hash:
|
||||
time.sleep(5)
|
||||
continue
|
||||
self._last_bg_hash = img_hash
|
||||
|
||||
# Envoyer au streaming server (avec token auth)
|
||||
headers = {"Authorization": f"Bearer {API_TOKEN}"} if API_TOKEN else {}
|
||||
with open(full_path, 'rb') as f:
|
||||
req.post(
|
||||
f"{SERVER_URL}/traces/stream/image",
|
||||
params={
|
||||
"session_id": bg_session,
|
||||
"shot_id": f"heartbeat_{int(time.time())}",
|
||||
"machine_id": self.machine_id,
|
||||
},
|
||||
headers=headers,
|
||||
files={"file": ("screenshot.png", f, "image/png")},
|
||||
timeout=10,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"[HEARTBEAT] Erreur: {e}")
|
||||
time.sleep(5)
|
||||
|
||||
def _command_watchdog_loop(self):
|
||||
"""Surveille un fichier de commande pour executer des ordres visuels (legacy)."""
|
||||
import json
|
||||
@@ -143,7 +193,7 @@ class AgentV1:
|
||||
else:
|
||||
cmd_path = str(BASE_DIR / "command.json")
|
||||
|
||||
while self.running:
|
||||
while self.running and self.session_id:
|
||||
# Ne pas traiter les commandes fichier pendant un replay serveur
|
||||
if self._replay_active:
|
||||
time.sleep(1)
|
||||
@@ -181,8 +231,11 @@ class AgentV1:
|
||||
time.sleep(REPLAY_POLL_INTERVAL)
|
||||
continue
|
||||
|
||||
# Utiliser la session active ou un ID par défaut pour le replay
|
||||
poll_session = self.session_id or f"agent_{self.user_id}"
|
||||
# TOUJOURS utiliser un session_id stable pour le replay.
|
||||
# L'enregistrement et le replay sont indépendants : le serveur
|
||||
# envoie les actions sur agent_{user_id}, pas sur la session
|
||||
# d'enregistrement (sess_xxx).
|
||||
poll_session = f"agent_{self.user_id}"
|
||||
|
||||
# Log periodique pour confirmer que la boucle tourne (toutes les 60s)
|
||||
poll_count += 1
|
||||
@@ -226,18 +279,38 @@ class AgentV1:
|
||||
time.sleep(max(poll_delay, REPLAY_POLL_INTERVAL))
|
||||
|
||||
def stop_session(self):
|
||||
self.running = False
|
||||
# Arrêter la capture et le streaming de la session d'enregistrement
|
||||
if self.captor: self.captor.stop()
|
||||
if self.streamer: self.streamer.stop()
|
||||
logger.info(f"Session {self.session_id} terminée.")
|
||||
|
||||
# Reset le session_id pour que le poll replay utilise l'ID stable
|
||||
self.session_id = None
|
||||
|
||||
# Reset le backoff de l'executor pour reprendre le polling immédiatement
|
||||
if self._executor:
|
||||
self._executor._poll_backoff = self._executor._poll_backoff_min
|
||||
self._executor._server_available = True
|
||||
if hasattr(self._executor, '_last_conn_error_logged'):
|
||||
self._executor._last_conn_error_logged = False
|
||||
|
||||
# NE PAS mettre self.running = False ici !
|
||||
# self.running contrôle la boucle _replay_poll_loop (permanente).
|
||||
# Seule la sortie du programme doit le mettre à False.
|
||||
|
||||
logger.info(
|
||||
f"Session arrêtée — replay poll actif avec session="
|
||||
f"agent_{self.user_id}"
|
||||
)
|
||||
|
||||
_last_heartbeat_hash: str = ""
|
||||
|
||||
def _heartbeat_loop(self):
|
||||
"""Capture périodique pour donner du contexte au stagiaire.
|
||||
Déduplication : n'envoie que si l'écran a changé.
|
||||
Tourne tant que session_id est défini (= enregistrement actif).
|
||||
"""
|
||||
while self.running:
|
||||
while self.running and self.session_id:
|
||||
try:
|
||||
full_path = self.vision.capture_full_context("heartbeat")
|
||||
if full_path:
|
||||
|
||||
@@ -25,7 +25,7 @@ import time
|
||||
import requests
|
||||
from PIL import Image
|
||||
|
||||
from ..config import STREAMING_ENDPOINT
|
||||
from ..config import API_TOKEN, STREAMING_ENDPOINT
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -56,6 +56,13 @@ class TraceStreamer:
|
||||
self._health_thread = None
|
||||
self._server_available = True # Désactivé après trop d'échecs
|
||||
|
||||
@staticmethod
|
||||
def _auth_headers() -> dict:
|
||||
"""Headers d'authentification Bearer pour les requêtes API."""
|
||||
if API_TOKEN:
|
||||
return {"Authorization": f"Bearer {API_TOKEN}"}
|
||||
return {}
|
||||
|
||||
def start(self):
|
||||
"""Démarrer le streaming et enregistrer la session côté serveur."""
|
||||
self.running = True
|
||||
@@ -240,6 +247,7 @@ class TraceStreamer:
|
||||
try:
|
||||
resp = requests.get(
|
||||
f"{STREAMING_ENDPOINT}/stats",
|
||||
headers=self._auth_headers(),
|
||||
timeout=3,
|
||||
)
|
||||
if resp.ok:
|
||||
@@ -292,6 +300,7 @@ class TraceStreamer:
|
||||
"session_id": self.session_id,
|
||||
"machine_id": self.machine_id,
|
||||
},
|
||||
headers=self._auth_headers(),
|
||||
timeout=3,
|
||||
)
|
||||
if resp.ok:
|
||||
@@ -319,6 +328,7 @@ class TraceStreamer:
|
||||
"session_id": self.session_id,
|
||||
"machine_id": self.machine_id,
|
||||
},
|
||||
headers=self._auth_headers(),
|
||||
timeout=30, # Le build workflow peut prendre du temps
|
||||
)
|
||||
if resp.ok:
|
||||
@@ -343,6 +353,7 @@ class TraceStreamer:
|
||||
resp = requests.post(
|
||||
f"{STREAMING_ENDPOINT}/event",
|
||||
json=payload,
|
||||
headers=self._auth_headers(),
|
||||
timeout=2,
|
||||
)
|
||||
return resp.ok
|
||||
@@ -377,6 +388,7 @@ class TraceStreamer:
|
||||
f"{STREAMING_ENDPOINT}/image",
|
||||
files=files,
|
||||
params=params,
|
||||
headers=self._auth_headers(),
|
||||
timeout=5,
|
||||
)
|
||||
return resp.ok
|
||||
@@ -390,6 +402,7 @@ class TraceStreamer:
|
||||
f"{STREAMING_ENDPOINT}/image",
|
||||
files=files,
|
||||
params=params,
|
||||
headers=self._auth_headers(),
|
||||
timeout=5,
|
||||
)
|
||||
return resp.ok
|
||||
|
||||
@@ -367,9 +367,14 @@ class SmartTrayV1:
|
||||
|
||||
try:
|
||||
import requests
|
||||
# Auth headers pour le streaming server (port 5005)
|
||||
auth_headers = {}
|
||||
if self.server_client is not None:
|
||||
auth_headers = self.server_client._auth_headers()
|
||||
resp = requests.post(
|
||||
f"{self.server_client._stream_base}/api/v1/traces/stream/replay/start",
|
||||
json={"workflow_id": workflow_id},
|
||||
headers=auth_headers,
|
||||
timeout=10,
|
||||
)
|
||||
if resp.ok:
|
||||
|
||||
@@ -1,55 +0,0 @@
|
||||
# window_info.py
|
||||
"""
|
||||
Récupération des informations sur la fenêtre active (X11).
|
||||
|
||||
v0 :
|
||||
- utilise xdotool pour obtenir :
|
||||
- le titre de la fenêtre active
|
||||
- le PID de la fenêtre active, puis le nom du process via ps
|
||||
|
||||
Si quelque chose ne fonctionne pas, on renvoie des valeurs "unknown".
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
from typing import Dict, Optional
|
||||
|
||||
|
||||
def _run_cmd(cmd: list[str]) -> Optional[str]:
|
||||
"""Exécute une commande et renvoie la sortie texte (strippée), ou None en cas d'erreur."""
|
||||
try:
|
||||
out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
|
||||
return out.decode("utf-8", errors="ignore").strip()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def get_active_window_info() -> Dict[str, str]:
|
||||
"""
|
||||
Renvoie un dict :
|
||||
{
|
||||
"title": "...",
|
||||
"app_name": "..."
|
||||
}
|
||||
|
||||
Nécessite xdotool installé sur le système.
|
||||
"""
|
||||
title = _run_cmd(["xdotool", "getactivewindow", "getwindowname"])
|
||||
pid_str = _run_cmd(["xdotool", "getactivewindow", "getwindowpid"])
|
||||
|
||||
app_name: Optional[str] = None
|
||||
if pid_str:
|
||||
pid_str = pid_str.strip()
|
||||
# On récupère le nom du binaire via ps
|
||||
app_name = _run_cmd(["ps", "-p", pid_str, "-o", "comm="])
|
||||
|
||||
if not title:
|
||||
title = "unknown_window"
|
||||
if not app_name:
|
||||
app_name = "unknown_app"
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"app_name": app_name,
|
||||
}
|
||||
@@ -1,192 +0,0 @@
|
||||
# window_info_crossplatform.py
|
||||
"""
|
||||
Récupération des informations sur la fenêtre active - CROSS-PLATFORM
|
||||
|
||||
Supporte:
|
||||
- Linux (X11 via xdotool)
|
||||
- Windows (via pywin32)
|
||||
- macOS (via pyobjc)
|
||||
|
||||
Installation des dépendances:
|
||||
pip install pywin32 # Windows
|
||||
pip install pyobjc-framework-Cocoa # macOS
|
||||
pip install psutil # Tous OS
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import platform
|
||||
import subprocess
|
||||
from typing import Dict, Optional
|
||||
|
||||
|
||||
def _run_cmd(cmd: list[str]) -> Optional[str]:
|
||||
"""Exécute une commande et renvoie la sortie texte (strippée), ou None en cas d'erreur."""
|
||||
try:
|
||||
out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
|
||||
return out.decode("utf-8", errors="ignore").strip()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def get_active_window_info() -> Dict[str, str]:
|
||||
"""
|
||||
Renvoie un dict :
|
||||
{
|
||||
"title": "...",
|
||||
"app_name": "..."
|
||||
}
|
||||
|
||||
Détecte automatiquement l'OS et utilise la méthode appropriée.
|
||||
"""
|
||||
system = platform.system()
|
||||
|
||||
if system == "Linux":
|
||||
return _get_window_info_linux()
|
||||
elif system == "Windows":
|
||||
return _get_window_info_windows()
|
||||
elif system == "Darwin": # macOS
|
||||
return _get_window_info_macos()
|
||||
else:
|
||||
return {"title": "unknown_window", "app_name": "unknown_app"}
|
||||
|
||||
|
||||
def _get_window_info_linux() -> Dict[str, str]:
|
||||
"""
|
||||
Linux: utilise xdotool (X11)
|
||||
|
||||
Nécessite: sudo apt-get install xdotool
|
||||
"""
|
||||
title = _run_cmd(["xdotool", "getactivewindow", "getwindowname"])
|
||||
pid_str = _run_cmd(["xdotool", "getactivewindow", "getwindowpid"])
|
||||
|
||||
app_name: Optional[str] = None
|
||||
if pid_str:
|
||||
pid_str = pid_str.strip()
|
||||
# On récupère le nom du binaire via ps
|
||||
app_name = _run_cmd(["ps", "-p", pid_str, "-o", "comm="])
|
||||
|
||||
if not title:
|
||||
title = "unknown_window"
|
||||
if not app_name:
|
||||
app_name = "unknown_app"
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"app_name": app_name,
|
||||
}
|
||||
|
||||
|
||||
def _get_window_info_windows() -> Dict[str, str]:
|
||||
"""
|
||||
Windows: utilise pywin32 + psutil
|
||||
|
||||
Nécessite: pip install pywin32 psutil
|
||||
"""
|
||||
try:
|
||||
import win32gui
|
||||
import win32process
|
||||
import psutil
|
||||
|
||||
# Fenêtre au premier plan
|
||||
hwnd = win32gui.GetForegroundWindow()
|
||||
|
||||
# Titre de la fenêtre
|
||||
title = win32gui.GetWindowText(hwnd)
|
||||
if not title:
|
||||
title = "unknown_window"
|
||||
|
||||
# PID du processus
|
||||
_, pid = win32process.GetWindowThreadProcessId(hwnd)
|
||||
|
||||
# Nom du processus
|
||||
try:
|
||||
process = psutil.Process(pid)
|
||||
app_name = process.name()
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
app_name = "unknown_app"
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"app_name": app_name,
|
||||
}
|
||||
|
||||
except ImportError:
|
||||
# pywin32 ou psutil non installé
|
||||
return {
|
||||
"title": "unknown_window (pywin32 missing)",
|
||||
"app_name": "unknown_app (pywin32 missing)",
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"title": f"error: {e}",
|
||||
"app_name": "unknown_app",
|
||||
}
|
||||
|
||||
|
||||
def _get_window_info_macos() -> Dict[str, str]:
|
||||
"""
|
||||
macOS: utilise pyobjc (AppKit)
|
||||
|
||||
Nécessite: pip install pyobjc-framework-Cocoa
|
||||
|
||||
Note: Nécessite les permissions "Accessibility" dans System Preferences
|
||||
"""
|
||||
try:
|
||||
from AppKit import NSWorkspace
|
||||
from Quartz import (
|
||||
CGWindowListCopyWindowInfo,
|
||||
kCGWindowListOptionOnScreenOnly,
|
||||
kCGNullWindowID
|
||||
)
|
||||
|
||||
# Application active
|
||||
active_app = NSWorkspace.sharedWorkspace().activeApplication()
|
||||
app_name = active_app.get('NSApplicationName', 'unknown_app')
|
||||
|
||||
# Titre de la fenêtre (via Quartz)
|
||||
# On cherche la fenêtre de l'app active qui est au premier plan
|
||||
window_list = CGWindowListCopyWindowInfo(
|
||||
kCGWindowListOptionOnScreenOnly,
|
||||
kCGNullWindowID
|
||||
)
|
||||
|
||||
title = "unknown_window"
|
||||
for window in window_list:
|
||||
owner_name = window.get('kCGWindowOwnerName', '')
|
||||
if owner_name == app_name:
|
||||
window_title = window.get('kCGWindowName', '')
|
||||
if window_title:
|
||||
title = window_title
|
||||
break
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"app_name": app_name,
|
||||
}
|
||||
|
||||
except ImportError:
|
||||
# pyobjc non installé
|
||||
return {
|
||||
"title": "unknown_window (pyobjc missing)",
|
||||
"app_name": "unknown_app (pyobjc missing)",
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"title": f"error: {e}",
|
||||
"app_name": "unknown_app",
|
||||
}
|
||||
|
||||
|
||||
# Test rapide
|
||||
if __name__ == "__main__":
|
||||
import time
|
||||
|
||||
print(f"OS détecté: {platform.system()}")
|
||||
print("\nTest de capture fenêtre active (5 secondes)...")
|
||||
print("Changez de fenêtre pour tester!\n")
|
||||
|
||||
for i in range(5):
|
||||
info = get_active_window_info()
|
||||
print(f"[{i+1}] App: {info['app_name']:20s} | Title: {info['title']}")
|
||||
time.sleep(1)
|
||||
@@ -91,11 +91,24 @@ class LeaServerClient:
|
||||
# Session de chat
|
||||
self._chat_session_id: Optional[str] = None
|
||||
|
||||
# Token API pour le serveur streaming (auth Bearer)
|
||||
self._api_token = os.environ.get("RPA_API_TOKEN", "")
|
||||
|
||||
logger.info(
|
||||
"LeaServerClient initialise : chat=%s, stream=%s",
|
||||
self._chat_base, self._stream_base,
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Auth
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _auth_headers(self) -> Dict[str, str]:
|
||||
"""Headers d'authentification pour le serveur streaming."""
|
||||
if self._api_token:
|
||||
return {"Authorization": f"Bearer {self._api_token}"}
|
||||
return {}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Proprietes
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -133,11 +146,12 @@ class LeaServerClient:
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def check_connection(self) -> bool:
|
||||
"""Tester la connexion au serveur chat."""
|
||||
"""Tester la connexion au serveur streaming (port 5005)."""
|
||||
try:
|
||||
import requests
|
||||
resp = requests.get(
|
||||
f"{self._chat_base}/api/status",
|
||||
f"{self._stream_base}/health",
|
||||
headers=self._auth_headers(),
|
||||
timeout=5,
|
||||
)
|
||||
was_connected = self._connected
|
||||
@@ -200,16 +214,21 @@ class LeaServerClient:
|
||||
return None
|
||||
|
||||
def list_workflows(self) -> List[Dict[str, Any]]:
|
||||
"""Recuperer la liste des workflows depuis le serveur chat."""
|
||||
"""Recuperer la liste des workflows depuis le serveur streaming."""
|
||||
try:
|
||||
import requests
|
||||
headers = self._auth_headers()
|
||||
resp = requests.get(
|
||||
f"{self._chat_base}/api/workflows",
|
||||
f"{self._stream_base}/api/v1/traces/stream/workflows",
|
||||
headers=headers,
|
||||
timeout=10,
|
||||
)
|
||||
if resp.ok:
|
||||
data = resp.json()
|
||||
self._connected = True
|
||||
# L'API renvoie directement une liste ou un dict avec clé "workflows"
|
||||
if isinstance(data, list):
|
||||
return data
|
||||
return data.get("workflows", [])
|
||||
return []
|
||||
except Exception as e:
|
||||
@@ -218,20 +237,10 @@ class LeaServerClient:
|
||||
return []
|
||||
|
||||
def list_gestures(self) -> List[Dict[str, Any]]:
|
||||
"""Recuperer la liste des gestes depuis le serveur chat."""
|
||||
try:
|
||||
import requests
|
||||
resp = requests.get(
|
||||
f"{self._chat_base}/api/workflows",
|
||||
timeout=10,
|
||||
)
|
||||
if resp.ok:
|
||||
data = resp.json()
|
||||
return data.get("workflows", [])
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.error("List gestures erreur : %s", e)
|
||||
return []
|
||||
"""Recuperer la liste des gestes (non disponible sur streaming server)."""
|
||||
# Les gestes etaient sur le chat server (5004) qui n'est plus utilise.
|
||||
# Retourner une liste vide silencieusement.
|
||||
return []
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Replay Polling (port 5005)
|
||||
@@ -269,6 +278,7 @@ class LeaServerClient:
|
||||
resp = req_lib.get(
|
||||
f"{self._stream_base}/api/v1/traces/stream/replay/next",
|
||||
params={"session_id": self._poll_session_id},
|
||||
headers=self._auth_headers(),
|
||||
timeout=5,
|
||||
)
|
||||
|
||||
@@ -301,6 +311,7 @@ class LeaServerClient:
|
||||
import requests
|
||||
resp = requests.get(
|
||||
f"{self._stream_base}/api/v1/traces/stream/replays",
|
||||
headers=self._auth_headers(),
|
||||
timeout=5,
|
||||
)
|
||||
if resp.ok:
|
||||
@@ -335,6 +346,7 @@ class LeaServerClient:
|
||||
"error": error,
|
||||
"screenshot": screenshot,
|
||||
},
|
||||
headers=self._auth_headers(),
|
||||
timeout=5,
|
||||
)
|
||||
except Exception as e:
|
||||
|
||||
@@ -2,6 +2,17 @@
|
||||
"""
|
||||
deploy_windows.py — Script de packaging du client Windows pour Agent V1.
|
||||
|
||||
⚠️ OBSOLÈTE (avril 2026)
|
||||
Le build officiel du package Windows passe par ``deploy/build_package.sh``
|
||||
(à la racine du repo) qui lit directement ``agent_v0/agent_v1/`` et évite
|
||||
les clones intermédiaires. Ce script est conservé pour référence mais son
|
||||
manifeste ``FILE_MANIFEST`` est incomplet : il n'inclut pas
|
||||
``system_dialog_guard.py``, ``persistent_buffer.py``, ``recovery.py``,
|
||||
``uia_helper.py``, ``grounding.py``, ``policy.py``,
|
||||
``vision/blur_sensitive.py``, ``vision/system_info.py``,
|
||||
``ui/chat_window.py``, ``ui/capture_server.py``, ``ui/shared_state.py``.
|
||||
Ne PAS l'utiliser pour un packaging réel.
|
||||
|
||||
Copie uniquement les fichiers nécessaires au fonctionnement de l'agent
|
||||
sur le PC cible (Windows), sans le serveur ni les dépendances lourdes.
|
||||
|
||||
@@ -41,8 +52,6 @@ FILE_MANIFEST: list[tuple[str, str]] = [
|
||||
("agent_v1/core/__init__.py", "agent_v1/core/__init__.py"),
|
||||
("agent_v1/core/captor.py", "agent_v1/core/captor.py"),
|
||||
("agent_v1/core/executor.py", "agent_v1/core/executor.py"),
|
||||
("agent_v1/core/window_info.py", "agent_v1/core/window_info.py"),
|
||||
("agent_v1/core/window_info_crossplatform.py", "agent_v1/core/window_info_crossplatform.py"),
|
||||
|
||||
# agent_v1/network
|
||||
("agent_v1/network/__init__.py", "agent_v1/network/__init__.py"),
|
||||
|
||||
@@ -1,13 +1,6 @@
|
||||
# agent_v0.lea_ui — Interface utilisateur "Lea"
|
||||
# agent_v0.lea_ui — Communication serveur pour l'agent Léa
|
||||
#
|
||||
# Panneau PyQt5 integre qui remplace le system tray + navigateur web
|
||||
# par une interface unifiee pour piloter l'Agent RPA Vision V3.
|
||||
#
|
||||
# Composants :
|
||||
# - LeaMainWindow : fenetre principale ancree a droite
|
||||
# - ChatWidget : zone de conversation avec le serveur
|
||||
# - OverlayWidget : feedback visuel pendant le replay
|
||||
# Composant :
|
||||
# - LeaServerClient : client API vers le serveur Linux
|
||||
# - styles : theme et couleurs
|
||||
|
||||
__version__ = "0.1.0"
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
# agent_v0/lea_ui/__main__.py
|
||||
"""Permet le lancement via: python -m agent_v0.lea_ui"""
|
||||
|
||||
from .launcher import main
|
||||
|
||||
main()
|
||||
@@ -1,250 +0,0 @@
|
||||
# agent_v0/lea_ui/chat_widget.py
|
||||
"""
|
||||
Widget de chat pour l'interface Lea.
|
||||
|
||||
Affiche les messages avec des bulles :
|
||||
- Utilisateur a droite (fond indigo)
|
||||
- Lea a gauche (fond blanc)
|
||||
|
||||
Communique avec le serveur Linux via LeaServerClient.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import List, Optional
|
||||
|
||||
from PyQt5.QtCore import (
|
||||
QPropertyAnimation,
|
||||
QSize,
|
||||
Qt,
|
||||
QTimer,
|
||||
pyqtSignal,
|
||||
pyqtSlot,
|
||||
)
|
||||
from PyQt5.QtGui import QColor, QFont, QPainter, QPainterPath, QPen
|
||||
from PyQt5.QtWidgets import (
|
||||
QFrame,
|
||||
QHBoxLayout,
|
||||
QLabel,
|
||||
QLineEdit,
|
||||
QPushButton,
|
||||
QScrollArea,
|
||||
QSizePolicy,
|
||||
QVBoxLayout,
|
||||
QWidget,
|
||||
)
|
||||
|
||||
from . import styles
|
||||
|
||||
logger = logging.getLogger("lea_ui.chat")
|
||||
|
||||
|
||||
class ChatBubble(QFrame):
|
||||
"""Bulle de message individuelle."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
text: str,
|
||||
is_user: bool = False,
|
||||
parent: Optional[QWidget] = None,
|
||||
) -> None:
|
||||
super().__init__(parent)
|
||||
self._is_user = is_user
|
||||
|
||||
# Style de la bulle
|
||||
if is_user:
|
||||
bg_color = styles.COLOR_BUBBLE_USER
|
||||
text_color = styles.COLOR_TEXT_ON_ACCENT
|
||||
align = Qt.AlignRight
|
||||
else:
|
||||
bg_color = styles.COLOR_BUBBLE_LEA
|
||||
text_color = styles.COLOR_TEXT
|
||||
align = Qt.AlignLeft
|
||||
|
||||
self.setStyleSheet(f"""
|
||||
QFrame {{
|
||||
background-color: {bg_color};
|
||||
border-radius: {styles.BUBBLE_RADIUS}px;
|
||||
padding: {styles.PADDING}px;
|
||||
border: {"none" if is_user else f"1px solid {styles.COLOR_BORDER}"};
|
||||
}}
|
||||
""")
|
||||
|
||||
layout = QVBoxLayout(self)
|
||||
layout.setContentsMargins(
|
||||
styles.PADDING, styles.PADDING // 2,
|
||||
styles.PADDING, styles.PADDING // 2,
|
||||
)
|
||||
|
||||
label = QLabel(text)
|
||||
label.setWordWrap(True)
|
||||
label.setFont(QFont(styles.FONT_FAMILY, styles.FONT_SIZE_NORMAL))
|
||||
label.setStyleSheet(f"color: {text_color}; background: transparent; border: none;")
|
||||
label.setTextFormat(Qt.RichText)
|
||||
label.setOpenExternalLinks(True)
|
||||
layout.addWidget(label)
|
||||
|
||||
self.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Minimum)
|
||||
self.setMaximumWidth(280)
|
||||
|
||||
|
||||
class ChatWidget(QWidget):
|
||||
"""Widget de chat complet avec zone de messages et champ de saisie.
|
||||
|
||||
Signals :
|
||||
message_sent(str) : emis quand l'utilisateur envoie un message
|
||||
"""
|
||||
|
||||
message_sent = pyqtSignal(str)
|
||||
|
||||
def __init__(self, parent: Optional[QWidget] = None) -> None:
|
||||
super().__init__(parent)
|
||||
self._messages: List[dict] = []
|
||||
self._setup_ui()
|
||||
|
||||
def _setup_ui(self) -> None:
|
||||
layout = QVBoxLayout(self)
|
||||
layout.setContentsMargins(0, 0, 0, 0)
|
||||
layout.setSpacing(0)
|
||||
|
||||
# Zone de messages (scrollable)
|
||||
self._scroll_area = QScrollArea()
|
||||
self._scroll_area.setWidgetResizable(True)
|
||||
self._scroll_area.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
|
||||
self._scroll_area.setStyleSheet(styles.CHAT_AREA_STYLE)
|
||||
|
||||
self._messages_container = QWidget()
|
||||
self._messages_container.setObjectName("ChatContainer")
|
||||
self._messages_layout = QVBoxLayout(self._messages_container)
|
||||
self._messages_layout.setContentsMargins(
|
||||
styles.PADDING, styles.PADDING,
|
||||
styles.PADDING, styles.PADDING,
|
||||
)
|
||||
self._messages_layout.setSpacing(styles.SPACING)
|
||||
self._messages_layout.addStretch()
|
||||
|
||||
self._scroll_area.setWidget(self._messages_container)
|
||||
layout.addWidget(self._scroll_area, stretch=1)
|
||||
|
||||
# Separateur
|
||||
sep = QFrame()
|
||||
sep.setFrameShape(QFrame.HLine)
|
||||
sep.setStyleSheet(f"background-color: {styles.COLOR_BORDER}; max-height: 1px;")
|
||||
layout.addWidget(sep)
|
||||
|
||||
# Zone de saisie
|
||||
input_layout = QHBoxLayout()
|
||||
input_layout.setContentsMargins(
|
||||
styles.PADDING, styles.SPACING,
|
||||
styles.PADDING, styles.SPACING,
|
||||
)
|
||||
input_layout.setSpacing(styles.SPACING)
|
||||
|
||||
self._input = QLineEdit()
|
||||
self._input.setObjectName("ChatInput")
|
||||
self._input.setPlaceholderText("Ecrivez un message...")
|
||||
self._input.setStyleSheet(styles.INPUT_STYLE)
|
||||
self._input.returnPressed.connect(self._on_send)
|
||||
input_layout.addWidget(self._input, stretch=1)
|
||||
|
||||
self._send_btn = QPushButton("Envoyer")
|
||||
self._send_btn.setObjectName("SendButton")
|
||||
self._send_btn.setStyleSheet(styles.SEND_BUTTON_STYLE)
|
||||
self._send_btn.setCursor(Qt.PointingHandCursor)
|
||||
self._send_btn.clicked.connect(self._on_send)
|
||||
input_layout.addWidget(self._send_btn)
|
||||
|
||||
layout.addLayout(input_layout)
|
||||
|
||||
def _on_send(self) -> None:
|
||||
"""Envoyer le message saisi."""
|
||||
text = self._input.text().strip()
|
||||
if not text:
|
||||
return
|
||||
|
||||
self._input.clear()
|
||||
self.add_user_message(text)
|
||||
self.message_sent.emit(text)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# API publique
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def add_user_message(self, text: str) -> None:
|
||||
"""Ajouter un message utilisateur (bulle a droite)."""
|
||||
self._add_bubble(text, is_user=True)
|
||||
|
||||
def add_lea_message(self, text: str) -> None:
|
||||
"""Ajouter un message de Lea (bulle a gauche)."""
|
||||
self._add_bubble(text, is_user=False)
|
||||
|
||||
def add_system_message(self, text: str) -> None:
|
||||
"""Ajouter un message systeme (centre, discret)."""
|
||||
label = QLabel(text)
|
||||
label.setFont(QFont(styles.FONT_FAMILY, styles.FONT_SIZE_SMALL))
|
||||
label.setStyleSheet(
|
||||
f"color: {styles.COLOR_TEXT_SECONDARY}; "
|
||||
f"background: transparent; padding: 4px;"
|
||||
)
|
||||
label.setAlignment(Qt.AlignCenter)
|
||||
label.setWordWrap(True)
|
||||
|
||||
# Inserer avant le stretch final
|
||||
count = self._messages_layout.count()
|
||||
self._messages_layout.insertWidget(count - 1, label)
|
||||
self._scroll_to_bottom()
|
||||
|
||||
def set_input_enabled(self, enabled: bool) -> None:
|
||||
"""Activer/desactiver la saisie (pendant le chargement)."""
|
||||
self._input.setEnabled(enabled)
|
||||
self._send_btn.setEnabled(enabled)
|
||||
if not enabled:
|
||||
self._input.setPlaceholderText("Lea reflechit...")
|
||||
else:
|
||||
self._input.setPlaceholderText("Ecrivez un message...")
|
||||
|
||||
def clear_messages(self) -> None:
|
||||
"""Effacer tous les messages."""
|
||||
while self._messages_layout.count() > 1:
|
||||
item = self._messages_layout.takeAt(0)
|
||||
widget = item.widget()
|
||||
if widget:
|
||||
widget.deleteLater()
|
||||
self._messages = []
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Internals
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _add_bubble(self, text: str, is_user: bool) -> None:
|
||||
"""Ajouter une bulle au conteneur de messages."""
|
||||
bubble = ChatBubble(text, is_user=is_user)
|
||||
|
||||
# Conteneur d'alignement
|
||||
row = QHBoxLayout()
|
||||
row.setContentsMargins(0, 0, 0, 0)
|
||||
if is_user:
|
||||
row.addStretch()
|
||||
row.addWidget(bubble)
|
||||
else:
|
||||
row.addWidget(bubble)
|
||||
row.addStretch()
|
||||
|
||||
# Inserer avant le stretch final
|
||||
count = self._messages_layout.count()
|
||||
wrapper = QWidget()
|
||||
wrapper.setLayout(row)
|
||||
wrapper.setStyleSheet("background: transparent;")
|
||||
self._messages_layout.insertWidget(count - 1, wrapper)
|
||||
|
||||
self._messages.append({"text": text, "is_user": is_user})
|
||||
self._scroll_to_bottom()
|
||||
|
||||
def _scroll_to_bottom(self) -> None:
|
||||
"""Scroller vers le bas apres l'ajout d'un message."""
|
||||
QTimer.singleShot(50, lambda: (
|
||||
self._scroll_area.verticalScrollBar().setValue(
|
||||
self._scroll_area.verticalScrollBar().maximum()
|
||||
)
|
||||
))
|
||||
@@ -1,218 +0,0 @@
|
||||
# agent_v0/lea_ui/launcher.py
|
||||
"""
|
||||
Point d'entree pour le panneau Lea.
|
||||
|
||||
Lancement autonome :
|
||||
python -m agent_v0.lea_ui.launcher
|
||||
|
||||
Ou integre dans agent_v0/agent_v1/main.py avec flag --ui lea.
|
||||
|
||||
Ce module :
|
||||
1. Cree l'application Qt
|
||||
2. Instancie LeaServerClient
|
||||
3. Instancie LeaMainWindow
|
||||
4. Enregistre un raccourci global (Ctrl+Shift+L) via keyboard hook
|
||||
5. Lance la boucle Qt
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger("lea_ui.launcher")
|
||||
|
||||
|
||||
def _setup_logging(verbose: bool = False) -> None:
|
||||
"""Configurer le logging pour le panneau Lea."""
|
||||
level = logging.DEBUG if verbose else logging.INFO
|
||||
logging.basicConfig(
|
||||
level=level,
|
||||
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
||||
datefmt="%H:%M:%S",
|
||||
)
|
||||
|
||||
|
||||
def _setup_global_hotkey(window) -> Optional[object]:
|
||||
"""Enregistrer le raccourci global Ctrl+Shift+L pour afficher/cacher le panneau.
|
||||
|
||||
Utilise la librairie keyboard si disponible (Windows/Linux).
|
||||
Retourne le hook pour pouvoir le desinscrire a l'arret.
|
||||
"""
|
||||
try:
|
||||
import keyboard
|
||||
|
||||
def on_hotkey():
|
||||
# Appeler toggle_visibility dans le thread Qt
|
||||
from PyQt5.QtCore import QTimer
|
||||
QTimer.singleShot(0, window.toggle_visibility)
|
||||
|
||||
keyboard.add_hotkey("ctrl+shift+l", on_hotkey)
|
||||
logger.info("Raccourci global Ctrl+Shift+L enregistre")
|
||||
return True
|
||||
except ImportError:
|
||||
logger.info(
|
||||
"Librairie 'keyboard' non disponible — "
|
||||
"raccourci global Ctrl+Shift+L non enregistre. "
|
||||
"Installez-la avec: pip install keyboard"
|
||||
)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning("Impossible d'enregistrer le raccourci global : %s", e)
|
||||
return None
|
||||
|
||||
|
||||
def _load_environment() -> None:
|
||||
"""Charger les variables d'environnement depuis .env.local."""
|
||||
env_paths = [
|
||||
os.path.join(os.path.dirname(__file__), "..", "..", ".env.local"),
|
||||
os.path.join(os.path.dirname(__file__), "..", ".env.local"),
|
||||
]
|
||||
for env_path in env_paths:
|
||||
env_path = os.path.abspath(env_path)
|
||||
if os.path.exists(env_path):
|
||||
try:
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(env_path)
|
||||
logger.info("Variables d'environnement chargees depuis %s", env_path)
|
||||
return
|
||||
except ImportError:
|
||||
# Fallback : chargement manuel
|
||||
with open(env_path, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line and not line.startswith("#") and "=" in line:
|
||||
key, value = line.split("=", 1)
|
||||
value = value.strip("\"'")
|
||||
os.environ[key.strip()] = value
|
||||
logger.info("Variables chargees manuellement depuis %s", env_path)
|
||||
return
|
||||
|
||||
|
||||
def launch_lea(
|
||||
server_host: Optional[str] = None,
|
||||
chat_port: int = 5004,
|
||||
stream_port: int = 5005,
|
||||
verbose: bool = False,
|
||||
session_id: Optional[str] = None,
|
||||
) -> None:
|
||||
"""Lancer le panneau Lea.
|
||||
|
||||
Args:
|
||||
server_host: adresse du serveur Linux (None = auto-detection)
|
||||
chat_port: port du serveur chat
|
||||
stream_port: port du serveur streaming
|
||||
verbose: mode debug
|
||||
session_id: identifiant de session pour le polling replay
|
||||
"""
|
||||
_setup_logging(verbose)
|
||||
_load_environment()
|
||||
|
||||
# Import PyQt5 ici pour un message d'erreur clair si absent
|
||||
try:
|
||||
from PyQt5.QtWidgets import QApplication
|
||||
from PyQt5.QtCore import Qt
|
||||
except ImportError:
|
||||
logger.error(
|
||||
"PyQt5 n'est pas installe. Installez-le avec :\n"
|
||||
" pip install PyQt5"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
from .server_client import LeaServerClient
|
||||
from .main_window import LeaMainWindow
|
||||
|
||||
# Creer ou recuperer l'application Qt
|
||||
app = QApplication.instance()
|
||||
if app is None:
|
||||
app = QApplication(sys.argv)
|
||||
app.setQuitOnLastWindowClosed(False)
|
||||
|
||||
# Client serveur
|
||||
client = LeaServerClient(
|
||||
server_host=server_host,
|
||||
chat_port=chat_port,
|
||||
stream_port=stream_port,
|
||||
)
|
||||
|
||||
# Fenetre principale
|
||||
window = LeaMainWindow(server_client=client)
|
||||
window.show()
|
||||
|
||||
# Raccourci global
|
||||
hotkey = _setup_global_hotkey(window)
|
||||
|
||||
# Polling replay (si session_id fourni)
|
||||
if session_id:
|
||||
client.start_polling(session_id)
|
||||
|
||||
logger.info(
|
||||
"Panneau Lea demarre — serveur=%s, chat_port=%d, stream_port=%d",
|
||||
client.server_host, chat_port, stream_port,
|
||||
)
|
||||
|
||||
# Boucle Qt
|
||||
try:
|
||||
exit_code = app.exec_()
|
||||
finally:
|
||||
window.shutdown()
|
||||
if hotkey:
|
||||
try:
|
||||
import keyboard
|
||||
keyboard.unhook_all()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
sys.exit(exit_code)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""Point d'entree CLI."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Panneau Lea — Interface utilisateur RPA Vision V3",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--server", "-s",
|
||||
dest="server_host",
|
||||
default=None,
|
||||
help="Adresse du serveur Linux (defaut: RPA_SERVER_HOST ou localhost)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--chat-port",
|
||||
type=int,
|
||||
default=5004,
|
||||
help="Port du serveur chat (defaut: 5004)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--stream-port",
|
||||
type=int,
|
||||
default=5005,
|
||||
help="Port du serveur streaming (defaut: 5005)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--session-id",
|
||||
default=None,
|
||||
help="Identifiant de session pour le polling replay",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--verbose", "-v",
|
||||
action="store_true",
|
||||
help="Mode debug (logs verbeux)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
launch_lea(
|
||||
server_host=args.server_host,
|
||||
chat_port=args.chat_port,
|
||||
stream_port=args.stream_port,
|
||||
verbose=args.verbose,
|
||||
session_id=args.session_id,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,772 +0,0 @@
|
||||
# agent_v0/lea_ui/main_window.py
|
||||
"""
|
||||
Fenetre principale du panneau Lea.
|
||||
|
||||
Panneau semi-transparent, ancre a droite de l'ecran, toujours visible.
|
||||
Peut etre reduit en mini-barre flottante (avatar + indicateur status).
|
||||
|
||||
Sections :
|
||||
- Header : avatar "L" + status connexion
|
||||
- Zone de chat : messages entrants/sortants (natif PyQt5)
|
||||
- Zone de status : progression du replay
|
||||
- Boutons rapides : "Apprends-moi", "Que sais-tu faire ?"
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
from PyQt5.QtCore import (
|
||||
QPoint,
|
||||
QPropertyAnimation,
|
||||
QRect,
|
||||
QSize,
|
||||
Qt,
|
||||
QTimer,
|
||||
pyqtSignal,
|
||||
pyqtSlot,
|
||||
)
|
||||
from PyQt5.QtGui import (
|
||||
QColor,
|
||||
QFont,
|
||||
QIcon,
|
||||
QKeySequence,
|
||||
QPainter,
|
||||
QPainterPath,
|
||||
QPen,
|
||||
)
|
||||
from PyQt5.QtWidgets import (
|
||||
QAction,
|
||||
QApplication,
|
||||
QDesktopWidget,
|
||||
QFrame,
|
||||
QGraphicsDropShadowEffect,
|
||||
QHBoxLayout,
|
||||
QLabel,
|
||||
QProgressBar,
|
||||
QPushButton,
|
||||
QShortcut,
|
||||
QSizePolicy,
|
||||
QVBoxLayout,
|
||||
QWidget,
|
||||
)
|
||||
|
||||
from . import styles
|
||||
from .chat_widget import ChatWidget
|
||||
from .overlay import OverlayWidget
|
||||
from .server_client import LeaServerClient
|
||||
|
||||
logger = logging.getLogger("lea_ui.main_window")
|
||||
|
||||
|
||||
class LeaAvatar(QWidget):
|
||||
"""Avatar rond avec l'initiale 'L'."""
|
||||
|
||||
def __init__(self, size: int = 40, parent: Optional[QWidget] = None) -> None:
|
||||
super().__init__(parent)
|
||||
self._size = size
|
||||
self._connected = False
|
||||
self.setFixedSize(size, size)
|
||||
|
||||
def set_connected(self, connected: bool) -> None:
|
||||
self._connected = connected
|
||||
self.update()
|
||||
|
||||
def paintEvent(self, event) -> None: # noqa: N802
|
||||
painter = QPainter(self)
|
||||
painter.setRenderHint(QPainter.Antialiasing, True)
|
||||
|
||||
# Cercle de fond
|
||||
painter.setBrush(QColor(styles.COLOR_ACCENT))
|
||||
painter.setPen(Qt.NoPen)
|
||||
painter.drawEllipse(2, 2, self._size - 4, self._size - 4)
|
||||
|
||||
# Initiale "L"
|
||||
painter.setPen(QColor(styles.COLOR_TEXT_ON_ACCENT))
|
||||
font = QFont(styles.FONT_FAMILY, self._size // 3, QFont.Bold)
|
||||
painter.setFont(font)
|
||||
painter.drawText(
|
||||
QRect(0, 0, self._size, self._size),
|
||||
Qt.AlignCenter,
|
||||
"L",
|
||||
)
|
||||
|
||||
# Indicateur de connexion (petit cercle en bas a droite)
|
||||
indicator_size = 12
|
||||
ix = self._size - indicator_size - 1
|
||||
iy = self._size - indicator_size - 1
|
||||
indicator_color = (
|
||||
QColor(styles.COLOR_SUCCESS) if self._connected
|
||||
else QColor(styles.COLOR_ERROR)
|
||||
)
|
||||
painter.setBrush(indicator_color)
|
||||
painter.setPen(QPen(QColor(styles.COLOR_BG), 2))
|
||||
painter.drawEllipse(ix, iy, indicator_size, indicator_size)
|
||||
|
||||
painter.end()
|
||||
|
||||
|
||||
class LeaMainWindow(QWidget):
|
||||
"""Panneau principal de l'interface Lea.
|
||||
|
||||
Fenetre semi-transparente, ancree a droite de l'ecran.
|
||||
Peut basculer en mode mini-barre.
|
||||
"""
|
||||
|
||||
# Signal pour les actions de replay a afficher sur l'overlay
|
||||
replay_action_received = pyqtSignal(dict)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
server_client: Optional[LeaServerClient] = None,
|
||||
parent: Optional[QWidget] = None,
|
||||
) -> None:
|
||||
super().__init__(parent)
|
||||
|
||||
# Client serveur
|
||||
self._client = server_client or LeaServerClient()
|
||||
|
||||
# Overlay de feedback
|
||||
self._overlay = OverlayWidget()
|
||||
|
||||
# Mode courant
|
||||
self._minimized = False
|
||||
|
||||
# Setup
|
||||
self._setup_window()
|
||||
self._setup_ui()
|
||||
self._setup_shortcuts()
|
||||
self._connect_signals()
|
||||
self._start_connection_check()
|
||||
|
||||
# Message d'accueil
|
||||
QTimer.singleShot(500, self._show_welcome)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Setup
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _setup_window(self) -> None:
|
||||
"""Configurer les proprietes de la fenetre."""
|
||||
self.setWindowFlags(
|
||||
Qt.WindowStaysOnTopHint
|
||||
| Qt.FramelessWindowHint
|
||||
| Qt.Tool
|
||||
)
|
||||
self.setAttribute(Qt.WA_TranslucentBackground, True)
|
||||
self.setObjectName("LeaMainWindow")
|
||||
|
||||
# Dimensions et position (ancre a droite)
|
||||
self.setFixedWidth(styles.PANEL_WIDTH)
|
||||
self.setMinimumHeight(styles.PANEL_MIN_HEIGHT)
|
||||
self._anchor_to_right()
|
||||
|
||||
# Ombre portee
|
||||
shadow = QGraphicsDropShadowEffect()
|
||||
shadow.setBlurRadius(20)
|
||||
shadow.setColor(QColor(0, 0, 0, 60))
|
||||
shadow.setOffset(0, 4)
|
||||
self.setGraphicsEffect(shadow)
|
||||
|
||||
def _anchor_to_right(self) -> None:
|
||||
"""Positionner le panneau ancre a droite de l'ecran."""
|
||||
desktop = QApplication.desktop()
|
||||
if desktop:
|
||||
screen_rect = desktop.availableGeometry(desktop.primaryScreen())
|
||||
x = screen_rect.right() - styles.PANEL_WIDTH - 10
|
||||
y = screen_rect.top() + 40
|
||||
height = screen_rect.height() - 80
|
||||
self.setGeometry(x, y, styles.PANEL_WIDTH, height)
|
||||
|
||||
def _setup_ui(self) -> None:
|
||||
"""Construire l'interface du panneau."""
|
||||
# Conteneur principal avec fond et coins arrondis
|
||||
self._main_layout = QVBoxLayout(self)
|
||||
self._main_layout.setContentsMargins(0, 0, 0, 0)
|
||||
self._main_layout.setSpacing(0)
|
||||
|
||||
# Widget de fond (pour appliquer le style)
|
||||
self._bg_widget = QWidget()
|
||||
self._bg_widget.setObjectName("LeaPanelBg")
|
||||
self._bg_widget.setStyleSheet(f"""
|
||||
QWidget#LeaPanelBg {{
|
||||
background-color: {styles.COLOR_BG};
|
||||
border-radius: {styles.BORDER_RADIUS}px;
|
||||
border: 1px solid {styles.COLOR_BORDER};
|
||||
}}
|
||||
""")
|
||||
|
||||
bg_layout = QVBoxLayout(self._bg_widget)
|
||||
bg_layout.setContentsMargins(0, 0, 0, 0)
|
||||
bg_layout.setSpacing(0)
|
||||
|
||||
# --- Header ---
|
||||
self._header = self._create_header()
|
||||
bg_layout.addWidget(self._header)
|
||||
|
||||
# --- Chat ---
|
||||
self._chat = ChatWidget()
|
||||
bg_layout.addWidget(self._chat, stretch=1)
|
||||
|
||||
# --- Zone de status replay ---
|
||||
self._status_bar = self._create_status_bar()
|
||||
bg_layout.addWidget(self._status_bar)
|
||||
|
||||
# --- Boutons rapides ---
|
||||
self._quick_buttons = self._create_quick_buttons()
|
||||
bg_layout.addWidget(self._quick_buttons)
|
||||
|
||||
self._main_layout.addWidget(self._bg_widget)
|
||||
|
||||
# --- Mini-barre (cachee par defaut) ---
|
||||
self._mini_bar = self._create_mini_bar()
|
||||
self._mini_bar.hide()
|
||||
self._main_layout.addWidget(self._mini_bar)
|
||||
|
||||
def _create_header(self) -> QWidget:
|
||||
"""Creer le header avec avatar et status."""
|
||||
header = QWidget()
|
||||
header.setObjectName("LeaHeader")
|
||||
header.setStyleSheet(styles.HEADER_STYLE)
|
||||
header.setFixedHeight(60)
|
||||
|
||||
layout = QHBoxLayout(header)
|
||||
layout.setContentsMargins(
|
||||
styles.PADDING, styles.SPACING,
|
||||
styles.PADDING, styles.SPACING,
|
||||
)
|
||||
|
||||
# Avatar
|
||||
self._avatar = LeaAvatar(styles.AVATAR_SIZE)
|
||||
layout.addWidget(self._avatar)
|
||||
|
||||
# Titre + status
|
||||
text_layout = QVBoxLayout()
|
||||
text_layout.setSpacing(2)
|
||||
|
||||
title = QLabel("Lea")
|
||||
title.setObjectName("LeaTitle")
|
||||
title.setStyleSheet(styles.HEADER_STYLE)
|
||||
text_layout.addWidget(title)
|
||||
|
||||
self._status_label = QLabel("Connexion...")
|
||||
self._status_label.setObjectName("LeaStatus")
|
||||
self._status_label.setStyleSheet(styles.HEADER_STYLE)
|
||||
text_layout.addWidget(self._status_label)
|
||||
|
||||
layout.addLayout(text_layout, stretch=1)
|
||||
|
||||
# Bouton reduire
|
||||
minimize_btn = QPushButton("_")
|
||||
minimize_btn.setFixedSize(30, 30)
|
||||
minimize_btn.setCursor(Qt.PointingHandCursor)
|
||||
minimize_btn.setStyleSheet(f"""
|
||||
QPushButton {{
|
||||
background: transparent;
|
||||
color: {styles.COLOR_TEXT_SECONDARY};
|
||||
border: none;
|
||||
border-radius: 15px;
|
||||
font-size: 16px;
|
||||
font-weight: bold;
|
||||
}}
|
||||
QPushButton:hover {{
|
||||
background-color: {styles.COLOR_BORDER};
|
||||
}}
|
||||
""")
|
||||
minimize_btn.clicked.connect(self.toggle_minimize)
|
||||
layout.addWidget(minimize_btn)
|
||||
|
||||
return header
|
||||
|
||||
def _create_status_bar(self) -> QWidget:
|
||||
"""Creer la barre de status du replay."""
|
||||
container = QWidget()
|
||||
container.setFixedHeight(50)
|
||||
layout = QVBoxLayout(container)
|
||||
layout.setContentsMargins(
|
||||
styles.PADDING, styles.SPACING,
|
||||
styles.PADDING, styles.SPACING,
|
||||
)
|
||||
layout.setSpacing(4)
|
||||
|
||||
self._replay_label = QLabel("")
|
||||
self._replay_label.setObjectName("StatusLabel")
|
||||
self._replay_label.setStyleSheet(styles.STATUS_LABEL_STYLE)
|
||||
self._replay_label.hide()
|
||||
layout.addWidget(self._replay_label)
|
||||
|
||||
self._progress_bar = QProgressBar()
|
||||
self._progress_bar.setStyleSheet(styles.PROGRESS_STYLE)
|
||||
self._progress_bar.setTextVisible(False)
|
||||
self._progress_bar.hide()
|
||||
layout.addWidget(self._progress_bar)
|
||||
|
||||
container.hide()
|
||||
self._status_container = container
|
||||
return container
|
||||
|
||||
def _create_quick_buttons(self) -> QWidget:
|
||||
"""Creer les boutons d'action rapide."""
|
||||
container = QWidget()
|
||||
layout = QHBoxLayout(container)
|
||||
layout.setContentsMargins(
|
||||
styles.PADDING, styles.SPACING,
|
||||
styles.PADDING, styles.PADDING,
|
||||
)
|
||||
layout.setSpacing(styles.SPACING)
|
||||
|
||||
btn_learn = QPushButton("Apprends-moi")
|
||||
btn_learn.setObjectName("QuickButton")
|
||||
btn_learn.setStyleSheet(styles.QUICK_BUTTON_STYLE)
|
||||
btn_learn.setCursor(Qt.PointingHandCursor)
|
||||
btn_learn.clicked.connect(self._on_learn_clicked)
|
||||
layout.addWidget(btn_learn)
|
||||
|
||||
btn_list = QPushButton("Que sais-tu faire ?")
|
||||
btn_list.setObjectName("QuickButton")
|
||||
btn_list.setStyleSheet(styles.QUICK_BUTTON_STYLE)
|
||||
btn_list.setCursor(Qt.PointingHandCursor)
|
||||
btn_list.clicked.connect(self._on_list_clicked)
|
||||
layout.addWidget(btn_list)
|
||||
|
||||
return container
|
||||
|
||||
def _create_mini_bar(self) -> QWidget:
|
||||
"""Creer la mini-barre flottante (mode reduit)."""
|
||||
bar = QWidget()
|
||||
bar.setObjectName("MiniBar")
|
||||
bar.setStyleSheet(styles.MINI_BAR_STYLE)
|
||||
bar.setFixedSize(80, 50)
|
||||
|
||||
layout = QHBoxLayout(bar)
|
||||
layout.setContentsMargins(8, 4, 8, 4)
|
||||
|
||||
mini_avatar = LeaAvatar(32)
|
||||
self._mini_avatar = mini_avatar
|
||||
layout.addWidget(mini_avatar)
|
||||
|
||||
expand_btn = QPushButton(">")
|
||||
expand_btn.setFixedSize(24, 24)
|
||||
expand_btn.setCursor(Qt.PointingHandCursor)
|
||||
expand_btn.setStyleSheet(f"""
|
||||
QPushButton {{
|
||||
background: transparent;
|
||||
color: {styles.COLOR_TEXT_SECONDARY};
|
||||
border: none;
|
||||
font-size: 14px;
|
||||
font-weight: bold;
|
||||
}}
|
||||
QPushButton:hover {{
|
||||
color: {styles.COLOR_ACCENT};
|
||||
}}
|
||||
""")
|
||||
expand_btn.clicked.connect(self.toggle_minimize)
|
||||
layout.addWidget(expand_btn)
|
||||
|
||||
return bar
|
||||
|
||||
def _setup_shortcuts(self) -> None:
|
||||
"""Configurer les raccourcis globaux."""
|
||||
# Ctrl+Shift+L pour afficher/cacher
|
||||
# Note : Sur Windows, les raccourcis globaux necessitent
|
||||
# un mecanisme supplementaire (keyboard hook). Ici on utilise
|
||||
# le raccourci local qui fonctionne quand le panneau a le focus.
|
||||
# Un hook global sera ajoute dans le launcher.
|
||||
shortcut = QShortcut(QKeySequence("Ctrl+Shift+L"), self)
|
||||
shortcut.activated.connect(self.toggle_visibility)
|
||||
|
||||
def _connect_signals(self) -> None:
|
||||
"""Connecter les signaux internes."""
|
||||
# Chat
|
||||
self._chat.message_sent.connect(self._on_message_sent)
|
||||
|
||||
# Client serveur
|
||||
self._client.set_on_connection_change(self._on_connection_changed)
|
||||
self._client.set_on_replay_action(self._on_replay_action)
|
||||
|
||||
# Overlay
|
||||
self._overlay.action_display_finished.connect(self._on_overlay_finished)
|
||||
|
||||
# Replay via signal (thread-safe)
|
||||
self.replay_action_received.connect(self._handle_replay_action)
|
||||
|
||||
def _start_connection_check(self) -> None:
|
||||
"""Demarrer le timer de verification de connexion."""
|
||||
self._conn_timer = QTimer(self)
|
||||
self._conn_timer.timeout.connect(self._check_connection)
|
||||
self._conn_timer.start(10000) # Toutes les 10 secondes
|
||||
# Premiere verification immediatement
|
||||
QTimer.singleShot(1000, self._check_connection)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Actions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _show_welcome(self) -> None:
|
||||
"""Afficher le message d'accueil."""
|
||||
self._chat.add_lea_message(
|
||||
"Bonjour ! Je suis <b>Lea</b>, votre assistante RPA.<br>"
|
||||
"Je peux apprendre vos taches, les rejouer, "
|
||||
"et vous montrer ce que je fais.<br><br>"
|
||||
"Que souhaitez-vous faire ?"
|
||||
)
|
||||
|
||||
@pyqtSlot(str)
|
||||
def _on_message_sent(self, message: str) -> None:
|
||||
"""Traiter un message envoye par l'utilisateur."""
|
||||
self._chat.set_input_enabled(False)
|
||||
|
||||
# Envoyer au serveur dans un timer pour ne pas bloquer
|
||||
QTimer.singleShot(100, lambda: self._send_to_server(message))
|
||||
|
||||
def _send_to_server(self, message: str) -> None:
|
||||
"""Envoyer le message au serveur et afficher la reponse."""
|
||||
response = self._client.send_chat_message(message)
|
||||
|
||||
if response is None:
|
||||
self._chat.add_lea_message(
|
||||
"Je n'arrive pas a joindre le serveur. "
|
||||
"Verifiez que le serveur Linux est demarre."
|
||||
)
|
||||
elif "error" in response:
|
||||
self._chat.add_lea_message(
|
||||
f"Erreur : {response['error']}"
|
||||
)
|
||||
else:
|
||||
# Extraire la reponse textuelle
|
||||
reply_text = response.get("response", "")
|
||||
if not reply_text:
|
||||
# Construire une reponse a partir des donnees structurees
|
||||
reply_text = self._format_response(response)
|
||||
|
||||
self._chat.add_lea_message(reply_text)
|
||||
|
||||
# Si un workflow a ete lance, mettre a jour la status bar
|
||||
if response.get("success") and response.get("workflow"):
|
||||
self._show_replay_status(
|
||||
f"Execution : {response['workflow']}",
|
||||
0, 1,
|
||||
)
|
||||
|
||||
self._chat.set_input_enabled(True)
|
||||
|
||||
def _format_response(self, data: Dict[str, Any]) -> str:
|
||||
"""Formater une reponse structuree du serveur en texte lisible."""
|
||||
# Reponse de confirmation
|
||||
if data.get("needs_confirmation"):
|
||||
conf = data.get("confirmation", {})
|
||||
return (
|
||||
f"Voulez-vous que j'execute <b>{conf.get('workflow_name', '?')}</b> ?<br>"
|
||||
f"Risque : {conf.get('risk_level', 'normal')}<br>"
|
||||
"Repondez <b>oui</b> ou <b>non</b>."
|
||||
)
|
||||
|
||||
# Liste de workflows
|
||||
if "workflows" in data:
|
||||
workflows = data["workflows"]
|
||||
if not workflows:
|
||||
return "Je ne connais aucun workflow pour le moment."
|
||||
items = []
|
||||
for wf in workflows[:10]:
|
||||
name = wf.get("name", wf.get("id", "?"))
|
||||
desc = wf.get("description", "")
|
||||
items.append(f"- <b>{name}</b>{': ' + desc if desc else ''}")
|
||||
result = "Voici ce que je sais faire :<br>" + "<br>".join(items)
|
||||
if len(workflows) > 10:
|
||||
result += f"<br><i>... et {len(workflows) - 10} autres</i>"
|
||||
return result
|
||||
|
||||
# Workflow non trouve
|
||||
if data.get("not_found"):
|
||||
return (
|
||||
f"Je ne trouve pas de workflow correspondant a "
|
||||
f"'{data.get('query', '?')}'.<br>"
|
||||
"Essayez 'Que sais-tu faire ?' pour voir la liste."
|
||||
)
|
||||
|
||||
# Execution reussie
|
||||
if data.get("success"):
|
||||
return (
|
||||
f"C'est parti ! J'execute <b>{data.get('workflow', '?')}</b>.<br>"
|
||||
"Regardez l'ecran, je vais vous montrer ce que je fais."
|
||||
)
|
||||
|
||||
# Confirmation/refus
|
||||
if data.get("confirmed"):
|
||||
return f"D'accord, je lance <b>{data.get('workflow', '?')}</b> !"
|
||||
if data.get("denied"):
|
||||
return "Pas de probleme, j'annule."
|
||||
|
||||
# Fallback
|
||||
return str(data)
|
||||
|
||||
def _on_learn_clicked(self) -> None:
|
||||
"""Action du bouton 'Apprends-moi'."""
|
||||
self._chat.add_user_message("Apprends-moi une nouvelle tache")
|
||||
self._chat.add_lea_message(
|
||||
"D'accord ! Pour m'apprendre une tache :<br>"
|
||||
"1. Cliquez sur <b>Demarrer</b> dans le tray Agent V1<br>"
|
||||
"2. Effectuez votre tache normalement<br>"
|
||||
"3. Cliquez sur <b>Terminer</b> quand c'est fini<br><br>"
|
||||
"Je vais observer et apprendre automatiquement."
|
||||
)
|
||||
|
||||
def _on_list_clicked(self) -> None:
|
||||
"""Action du bouton 'Que sais-tu faire ?'."""
|
||||
self._chat.add_user_message("Que sais-tu faire ?")
|
||||
self._chat.set_input_enabled(False)
|
||||
QTimer.singleShot(100, self._fetch_workflows)
|
||||
|
||||
def _fetch_workflows(self) -> None:
|
||||
"""Recuperer et afficher la liste des workflows."""
|
||||
workflows = self._client.list_workflows()
|
||||
if workflows:
|
||||
items = []
|
||||
for wf in workflows[:15]:
|
||||
name = wf.get("name", wf.get("id", "?"))
|
||||
desc = wf.get("description", "")
|
||||
items.append(f"- <b>{name}</b>{': ' + desc if desc else ''}")
|
||||
text = "Voici les workflows que je connais :<br>" + "<br>".join(items)
|
||||
if len(workflows) > 15:
|
||||
text += f"<br><i>... et {len(workflows) - 15} autres</i>"
|
||||
else:
|
||||
text = (
|
||||
"Je ne connais aucun workflow pour le moment.<br>"
|
||||
"Apprenez-moi une tache avec le bouton 'Apprends-moi' !"
|
||||
)
|
||||
self._chat.add_lea_message(text)
|
||||
self._chat.set_input_enabled(True)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Connexion
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _check_connection(self) -> None:
|
||||
"""Verifier la connexion au serveur (dans un timer)."""
|
||||
connected = self._client.check_connection()
|
||||
self._update_connection_ui(connected)
|
||||
|
||||
def _on_connection_changed(self, connected: bool) -> None:
|
||||
"""Callback quand l'etat de connexion change."""
|
||||
# Appeler dans le thread principal via QTimer
|
||||
QTimer.singleShot(0, lambda: self._update_connection_ui(connected))
|
||||
|
||||
def _update_connection_ui(self, connected: bool) -> None:
|
||||
"""Mettre a jour l'UI selon l'etat de connexion."""
|
||||
self._avatar.set_connected(connected)
|
||||
if hasattr(self, '_mini_avatar'):
|
||||
self._mini_avatar.set_connected(connected)
|
||||
|
||||
if connected:
|
||||
self._status_label.setText(
|
||||
f"Connecte a {self._client.server_host}"
|
||||
)
|
||||
self._status_label.setStyleSheet(
|
||||
f"color: {styles.COLOR_SUCCESS}; "
|
||||
f"font-family: '{styles.FONT_FAMILY}'; "
|
||||
f"font-size: {styles.FONT_SIZE_SMALL}px; "
|
||||
f"background: transparent; border: none;"
|
||||
)
|
||||
else:
|
||||
error = self._client.last_error or "Serveur injoignable"
|
||||
self._status_label.setText(f"Deconnecte ({error[:30]})")
|
||||
self._status_label.setStyleSheet(
|
||||
f"color: {styles.COLOR_ERROR}; "
|
||||
f"font-family: '{styles.FONT_FAMILY}'; "
|
||||
f"font-size: {styles.FONT_SIZE_SMALL}px; "
|
||||
f"background: transparent; border: none;"
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Replay & Overlay
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _on_replay_action(self, action: Dict[str, Any]) -> None:
|
||||
"""Callback appelee depuis le thread de polling (pas thread-safe).
|
||||
|
||||
Emettre un signal pour traiter dans le thread Qt.
|
||||
"""
|
||||
self.replay_action_received.emit(action)
|
||||
|
||||
@pyqtSlot(dict)
|
||||
def _handle_replay_action(self, action: Dict[str, Any]) -> None:
|
||||
"""Traiter une action de replay dans le thread Qt.
|
||||
|
||||
Afficher l'overlay AVANT l'execution pour que l'utilisateur
|
||||
voie ce qui va se passer.
|
||||
"""
|
||||
action_type = action.get("type", "?")
|
||||
action_text = self._describe_action(action)
|
||||
|
||||
# Calculer les coordonnees ecran
|
||||
desktop = QApplication.desktop()
|
||||
screen = desktop.screenGeometry(desktop.primaryScreen()) if desktop else None
|
||||
if screen:
|
||||
sw, sh = screen.width(), screen.height()
|
||||
else:
|
||||
sw, sh = 1920, 1080
|
||||
|
||||
target_x = int(action.get("x_pct", 0.5) * sw)
|
||||
target_y = int(action.get("y_pct", 0.5) * sh)
|
||||
|
||||
# Recuperer la progression depuis le replay status
|
||||
replay = self._client.get_replay_status()
|
||||
step_current = 0
|
||||
step_total = 0
|
||||
if replay:
|
||||
step_total = replay.get("total_actions", 0)
|
||||
step_current = replay.get("completed_actions", 0) + 1
|
||||
|
||||
# Mettre a jour la status bar
|
||||
self._show_replay_status(action_text, step_current, step_total)
|
||||
|
||||
# Afficher l'overlay
|
||||
self._overlay.show_action(
|
||||
target_x, target_y,
|
||||
action_text,
|
||||
step_current, step_total,
|
||||
duration_ms=1500,
|
||||
)
|
||||
|
||||
# Ajouter dans le chat
|
||||
self._chat.add_system_message(
|
||||
f"Etape {step_current}/{step_total} : {action_text}"
|
||||
)
|
||||
|
||||
def _describe_action(self, action: Dict[str, Any]) -> str:
|
||||
"""Generer une description lisible d'une action de replay."""
|
||||
action_type = action.get("type", "?")
|
||||
target_text = action.get("target_text", "")
|
||||
target_role = action.get("target_role", "")
|
||||
|
||||
if action_type == "click":
|
||||
target = target_text or target_role or "cet element"
|
||||
return f"Je clique sur [{target}]"
|
||||
elif action_type == "type":
|
||||
text = action.get("text", "")
|
||||
preview = text[:30] + "..." if len(text) > 30 else text
|
||||
return f"Je tape : {preview}"
|
||||
elif action_type == "key_combo":
|
||||
keys = action.get("keys", [])
|
||||
return f"Je tape : {'+'.join(keys)}"
|
||||
elif action_type == "scroll":
|
||||
return "Je fais defiler la page"
|
||||
elif action_type == "wait":
|
||||
ms = action.get("duration_ms", 500)
|
||||
return f"J'attends {ms}ms"
|
||||
else:
|
||||
return f"Action : {action_type}"
|
||||
|
||||
def _on_overlay_finished(self) -> None:
|
||||
"""Callback quand l'overlay a fini d'afficher une action."""
|
||||
pass # L'executor continue de son cote
|
||||
|
||||
def _show_replay_status(
|
||||
self, text: str, current: int, total: int,
|
||||
) -> None:
|
||||
"""Afficher la barre de progression du replay."""
|
||||
self._status_container.show()
|
||||
self._replay_label.show()
|
||||
self._replay_label.setText(text)
|
||||
|
||||
if total > 0:
|
||||
self._progress_bar.show()
|
||||
self._progress_bar.setMaximum(total)
|
||||
self._progress_bar.setValue(current)
|
||||
else:
|
||||
self._progress_bar.hide()
|
||||
|
||||
def hide_replay_status(self) -> None:
|
||||
"""Masquer la barre de progression du replay."""
|
||||
self._status_container.hide()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Visibilite
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def toggle_visibility(self) -> None:
|
||||
"""Afficher/cacher le panneau (raccourci Ctrl+Shift+L)."""
|
||||
if self.isVisible():
|
||||
self.hide()
|
||||
else:
|
||||
self.show()
|
||||
self.raise_()
|
||||
self.activateWindow()
|
||||
|
||||
def toggle_minimize(self) -> None:
|
||||
"""Basculer entre panneau complet et mini-barre."""
|
||||
if self._minimized:
|
||||
# Restaurer
|
||||
self._mini_bar.hide()
|
||||
self._bg_widget.show()
|
||||
self._minimized = False
|
||||
self._anchor_to_right()
|
||||
else:
|
||||
# Reduire
|
||||
self._bg_widget.hide()
|
||||
self._mini_bar.show()
|
||||
self._minimized = True
|
||||
# Positionner la mini-barre en haut a droite
|
||||
desktop = QApplication.desktop()
|
||||
if desktop:
|
||||
screen = desktop.availableGeometry(desktop.primaryScreen())
|
||||
x = screen.right() - 90
|
||||
y = screen.top() + 10
|
||||
self.setGeometry(x, y, 80, 50)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Drag (deplacer la fenetre sans barre de titre)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def mousePressEvent(self, event) -> None: # noqa: N802
|
||||
if event.button() == Qt.LeftButton:
|
||||
self._drag_pos = event.globalPos() - self.frameGeometry().topLeft()
|
||||
event.accept()
|
||||
|
||||
def mouseMoveEvent(self, event) -> None: # noqa: N802
|
||||
if event.buttons() == Qt.LeftButton and hasattr(self, '_drag_pos'):
|
||||
self.move(event.globalPos() - self._drag_pos)
|
||||
event.accept()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Painting (fond arrondi semi-transparent)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def paintEvent(self, event) -> None: # noqa: N802
|
||||
"""Peindre le fond semi-transparent avec coins arrondis."""
|
||||
painter = QPainter(self)
|
||||
painter.setRenderHint(QPainter.Antialiasing, True)
|
||||
|
||||
path = QPainterPath()
|
||||
path.addRoundedRect(
|
||||
0, 0, self.width(), self.height(),
|
||||
styles.BORDER_RADIUS, styles.BORDER_RADIUS,
|
||||
)
|
||||
|
||||
# Fond semi-transparent
|
||||
bg = QColor(styles.COLOR_BG)
|
||||
bg.setAlpha(245) # Legerement transparent
|
||||
painter.fillPath(path, bg)
|
||||
|
||||
# Bordure
|
||||
painter.setPen(QPen(QColor(styles.COLOR_BORDER), 1))
|
||||
painter.drawPath(path)
|
||||
|
||||
painter.end()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Lifecycle
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def closeEvent(self, event) -> None: # noqa: N802
|
||||
"""Ne pas fermer, juste cacher."""
|
||||
event.ignore()
|
||||
self.hide()
|
||||
|
||||
def shutdown(self) -> None:
|
||||
"""Arret propre."""
|
||||
self._conn_timer.stop()
|
||||
self._overlay.hide_overlay()
|
||||
self._client.shutdown()
|
||||
logger.info("LeaMainWindow arretee")
|
||||
@@ -1,354 +0,0 @@
|
||||
# agent_v0/lea_ui/overlay.py
|
||||
"""
|
||||
Overlay de feedback visuel pour le replay.
|
||||
|
||||
Fenetre transparente plein ecran, click-through, qui affiche :
|
||||
- Cercle rouge pulsant autour de la cible du clic
|
||||
- Texte descriptif de l'action en cours
|
||||
- Fleche pointant vers la cible
|
||||
- Barre de progression etape X/Y
|
||||
|
||||
Le overlay ne capture JAMAIS les clics (Qt.WA_TransparentForMouseEvents).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import math
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from PyQt5.QtCore import (
|
||||
QPoint,
|
||||
QPropertyAnimation,
|
||||
QRect,
|
||||
QRectF,
|
||||
QSize,
|
||||
Qt,
|
||||
QTimer,
|
||||
pyqtProperty,
|
||||
pyqtSignal,
|
||||
)
|
||||
from PyQt5.QtGui import (
|
||||
QBrush,
|
||||
QColor,
|
||||
QFont,
|
||||
QFontMetrics,
|
||||
QPainter,
|
||||
QPainterPath,
|
||||
QPen,
|
||||
QPolygonF,
|
||||
)
|
||||
from PyQt5.QtWidgets import QApplication, QDesktopWidget, QWidget
|
||||
|
||||
from . import styles
|
||||
|
||||
logger = logging.getLogger("lea_ui.overlay")
|
||||
|
||||
|
||||
class OverlayWidget(QWidget):
|
||||
"""Overlay plein ecran transparent pour le feedback visuel du replay.
|
||||
|
||||
Flags critiques :
|
||||
- WindowStaysOnTopHint : toujours au-dessus
|
||||
- FramelessWindowHint : pas de decoration
|
||||
- Tool : n'apparait pas dans la barre des taches
|
||||
- WA_TranslucentBackground : fond transparent
|
||||
- WA_TransparentForMouseEvents : CLICK-THROUGH COMPLET
|
||||
"""
|
||||
|
||||
# Signal emis quand l'animation d'une action est terminee
|
||||
action_display_finished = pyqtSignal()
|
||||
|
||||
def __init__(self, parent: Optional[QWidget] = None) -> None:
|
||||
super().__init__(parent)
|
||||
|
||||
# Flags de fenetre pour click-through complet
|
||||
self.setWindowFlags(
|
||||
Qt.WindowStaysOnTopHint
|
||||
| Qt.FramelessWindowHint
|
||||
| Qt.Tool
|
||||
)
|
||||
self.setAttribute(Qt.WA_TranslucentBackground, True)
|
||||
self.setAttribute(Qt.WA_TransparentForMouseEvents, True)
|
||||
|
||||
# Etat de l'affichage
|
||||
self._target_pos: Optional[Tuple[int, int]] = None
|
||||
self._action_text: str = ""
|
||||
self._progress_current: int = 0
|
||||
self._progress_total: int = 0
|
||||
self._action_done: bool = False
|
||||
self._visible = False
|
||||
|
||||
# Animation du cercle pulsant
|
||||
self._pulse_radius: float = 30.0
|
||||
self._pulse_growing = True
|
||||
self._pulse_opacity: float = 0.8
|
||||
|
||||
# Timer d'animation
|
||||
self._anim_timer = QTimer(self)
|
||||
self._anim_timer.timeout.connect(self._animate_pulse)
|
||||
self._anim_timer.setInterval(30) # ~33 FPS
|
||||
|
||||
# Timer d'effacement automatique
|
||||
self._fade_timer = QTimer(self)
|
||||
self._fade_timer.setSingleShot(True)
|
||||
self._fade_timer.timeout.connect(self._on_fade)
|
||||
|
||||
# Couvrir tout l'ecran
|
||||
self._update_geometry()
|
||||
|
||||
def _update_geometry(self) -> None:
|
||||
"""Positionner l'overlay sur tout l'ecran principal."""
|
||||
desktop = QApplication.desktop()
|
||||
if desktop:
|
||||
screen_rect = desktop.screenGeometry(desktop.primaryScreen())
|
||||
self.setGeometry(screen_rect)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# API publique
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def show_action(
|
||||
self,
|
||||
target_x: int,
|
||||
target_y: int,
|
||||
text: str,
|
||||
step_current: int = 0,
|
||||
step_total: int = 0,
|
||||
duration_ms: int = 1500,
|
||||
) -> None:
|
||||
"""Afficher le feedback pour une action de replay.
|
||||
|
||||
Args:
|
||||
target_x: position X du clic cible (pixels ecran)
|
||||
target_y: position Y du clic cible (pixels ecran)
|
||||
text: description de l'action (ex: "Je clique sur [Valider]")
|
||||
step_current: etape courante (1-indexed)
|
||||
step_total: nombre total d'etapes
|
||||
duration_ms: duree d'affichage en ms (defaut 1500ms)
|
||||
"""
|
||||
self._target_pos = (target_x, target_y)
|
||||
self._action_text = text
|
||||
self._progress_current = step_current
|
||||
self._progress_total = step_total
|
||||
self._action_done = False
|
||||
self._pulse_radius = 30.0
|
||||
self._pulse_opacity = 0.8
|
||||
self._visible = True
|
||||
|
||||
self._update_geometry()
|
||||
self.show()
|
||||
self.raise_()
|
||||
self._anim_timer.start()
|
||||
|
||||
# Programmer l'effacement
|
||||
self._fade_timer.start(duration_ms)
|
||||
self.update()
|
||||
|
||||
def show_done(self, text: Optional[str] = None) -> None:
|
||||
"""Marquer l'action courante comme terminee (coche verte)."""
|
||||
self._action_done = True
|
||||
if text:
|
||||
self._action_text = text
|
||||
self.update()
|
||||
|
||||
# Effacer apres 800ms
|
||||
self._fade_timer.start(800)
|
||||
|
||||
def hide_overlay(self) -> None:
|
||||
"""Masquer immediatement l'overlay."""
|
||||
self._anim_timer.stop()
|
||||
self._fade_timer.stop()
|
||||
self._visible = False
|
||||
self._target_pos = None
|
||||
self.hide()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Animations
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _animate_pulse(self) -> None:
|
||||
"""Animer le cercle pulsant."""
|
||||
if self._action_done:
|
||||
# Pas d'animation en mode "done"
|
||||
return
|
||||
|
||||
pulse_speed = 0.8
|
||||
if self._pulse_growing:
|
||||
self._pulse_radius += pulse_speed
|
||||
if self._pulse_radius >= 45.0:
|
||||
self._pulse_growing = False
|
||||
else:
|
||||
self._pulse_radius -= pulse_speed
|
||||
if self._pulse_radius <= 25.0:
|
||||
self._pulse_growing = True
|
||||
|
||||
# Opacite qui suit le pulse
|
||||
self._pulse_opacity = 0.5 + 0.3 * (
|
||||
(self._pulse_radius - 25.0) / 20.0
|
||||
)
|
||||
|
||||
self.update()
|
||||
|
||||
def _on_fade(self) -> None:
|
||||
"""Callback apres le timer d'effacement."""
|
||||
self._anim_timer.stop()
|
||||
self._visible = False
|
||||
self._target_pos = None
|
||||
self.hide()
|
||||
self.action_display_finished.emit()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Rendu
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def paintEvent(self, event) -> None: # noqa: N802
|
||||
"""Dessiner l'overlay."""
|
||||
if not self._visible or not self._target_pos:
|
||||
return
|
||||
|
||||
painter = QPainter(self)
|
||||
painter.setRenderHint(QPainter.Antialiasing, True)
|
||||
|
||||
tx, ty = self._target_pos
|
||||
|
||||
if self._action_done:
|
||||
self._draw_done_indicator(painter, tx, ty)
|
||||
else:
|
||||
self._draw_pulse_circle(painter, tx, ty)
|
||||
self._draw_arrow(painter, tx, ty)
|
||||
|
||||
self._draw_action_text(painter, tx, ty)
|
||||
self._draw_progress_bar(painter)
|
||||
|
||||
painter.end()
|
||||
|
||||
def _draw_pulse_circle(self, painter: QPainter, cx: int, cy: int) -> None:
|
||||
"""Dessiner le cercle rouge pulsant autour de la cible."""
|
||||
# Cercle exterieur (pulsant, semi-transparent)
|
||||
color = QColor(styles.COLOR_OVERLAY_PULSE)
|
||||
color.setAlphaF(self._pulse_opacity * 0.4)
|
||||
painter.setBrush(QBrush(color))
|
||||
painter.setPen(Qt.NoPen)
|
||||
painter.drawEllipse(
|
||||
QPoint(cx, cy),
|
||||
int(self._pulse_radius),
|
||||
int(self._pulse_radius),
|
||||
)
|
||||
|
||||
# Cercle interieur (fixe, plus opaque)
|
||||
color_inner = QColor(styles.COLOR_OVERLAY_PULSE)
|
||||
color_inner.setAlphaF(0.7)
|
||||
pen = QPen(color_inner, 3)
|
||||
painter.setPen(pen)
|
||||
painter.setBrush(Qt.NoBrush)
|
||||
painter.drawEllipse(QPoint(cx, cy), 20, 20)
|
||||
|
||||
# Point central
|
||||
painter.setPen(Qt.NoPen)
|
||||
painter.setBrush(QBrush(QColor(styles.COLOR_OVERLAY_PULSE)))
|
||||
painter.drawEllipse(QPoint(cx, cy), 4, 4)
|
||||
|
||||
def _draw_done_indicator(self, painter: QPainter, cx: int, cy: int) -> None:
|
||||
"""Dessiner l'indicateur de succes (cercle vert + coche)."""
|
||||
# Cercle vert
|
||||
color = QColor(styles.COLOR_SUCCESS)
|
||||
color.setAlphaF(0.8)
|
||||
painter.setBrush(QBrush(color))
|
||||
painter.setPen(Qt.NoPen)
|
||||
painter.drawEllipse(QPoint(cx, cy), 25, 25)
|
||||
|
||||
# Coche blanche
|
||||
pen = QPen(QColor(styles.COLOR_TEXT_ON_ACCENT), 3)
|
||||
pen.setCapStyle(Qt.RoundCap)
|
||||
pen.setJoinStyle(Qt.RoundJoin)
|
||||
painter.setPen(pen)
|
||||
painter.setBrush(Qt.NoBrush)
|
||||
|
||||
path = QPainterPath()
|
||||
path.moveTo(cx - 10, cy)
|
||||
path.lineTo(cx - 3, cy + 8)
|
||||
path.lineTo(cx + 12, cy - 8)
|
||||
painter.drawPath(path)
|
||||
|
||||
def _draw_arrow(self, painter: QPainter, tx: int, ty: int) -> None:
|
||||
"""Dessiner une fleche pointant vers la cible depuis le texte."""
|
||||
# Position du texte (au-dessus ou en dessous selon l'espace)
|
||||
text_y = ty - 80 if ty > 120 else ty + 80
|
||||
text_x = max(100, min(tx, self.width() - 200))
|
||||
|
||||
# Ligne de la fleche
|
||||
color = QColor(styles.COLOR_OVERLAY_PULSE)
|
||||
color.setAlphaF(0.6)
|
||||
pen = QPen(color, 2, Qt.DashLine)
|
||||
painter.setPen(pen)
|
||||
painter.drawLine(text_x, text_y + (15 if text_y < ty else -15), tx, ty)
|
||||
|
||||
def _draw_action_text(self, painter: QPainter, tx: int, ty: int) -> None:
|
||||
"""Dessiner le texte descriptif de l'action."""
|
||||
if not self._action_text:
|
||||
return
|
||||
|
||||
# Positionner le texte au-dessus ou en dessous de la cible
|
||||
text_y = ty - 90 if ty > 140 else ty + 70
|
||||
|
||||
font = QFont(styles.FONT_FAMILY, styles.FONT_SIZE_LARGE, QFont.Bold)
|
||||
painter.setFont(font)
|
||||
metrics = QFontMetrics(font)
|
||||
|
||||
# Mesurer le texte
|
||||
text_rect = metrics.boundingRect(self._action_text)
|
||||
text_width = text_rect.width() + 30
|
||||
text_height = text_rect.height() + 16
|
||||
|
||||
# Centrer horizontalement sur la cible (avec limites d'ecran)
|
||||
box_x = max(10, min(tx - text_width // 2, self.width() - text_width - 10))
|
||||
box_y = text_y - text_height // 2
|
||||
|
||||
# Fond semi-transparent arrondi
|
||||
bg_color = QColor(31, 41, 55, 200) # Gris fonce semi-transparent
|
||||
painter.setBrush(QBrush(bg_color))
|
||||
painter.setPen(Qt.NoPen)
|
||||
painter.drawRoundedRect(box_x, box_y, text_width, text_height, 8, 8)
|
||||
|
||||
# Texte blanc
|
||||
painter.setPen(QPen(QColor(styles.COLOR_OVERLAY_TEXT)))
|
||||
painter.drawText(
|
||||
QRect(box_x, box_y, text_width, text_height),
|
||||
Qt.AlignCenter,
|
||||
self._action_text,
|
||||
)
|
||||
|
||||
def _draw_progress_bar(self, painter: QPainter) -> None:
|
||||
"""Dessiner la barre de progression en bas de l'ecran."""
|
||||
if self._progress_total <= 0:
|
||||
return
|
||||
|
||||
bar_width = 300
|
||||
bar_height = 6
|
||||
bar_x = (self.width() - bar_width) // 2
|
||||
bar_y = self.height() - 50
|
||||
|
||||
# Fond
|
||||
bg_color = QColor(255, 255, 255, 80)
|
||||
painter.setBrush(QBrush(bg_color))
|
||||
painter.setPen(Qt.NoPen)
|
||||
painter.drawRoundedRect(bar_x, bar_y, bar_width, bar_height, 3, 3)
|
||||
|
||||
# Progression
|
||||
progress_pct = self._progress_current / self._progress_total
|
||||
fill_width = int(bar_width * progress_pct)
|
||||
accent_color = QColor(styles.COLOR_ACCENT)
|
||||
accent_color.setAlphaF(0.9)
|
||||
painter.setBrush(QBrush(accent_color))
|
||||
painter.drawRoundedRect(bar_x, bar_y, fill_width, bar_height, 3, 3)
|
||||
|
||||
# Label "Etape X/Y"
|
||||
label_font = QFont(styles.FONT_FAMILY, styles.FONT_SIZE_SMALL)
|
||||
painter.setFont(label_font)
|
||||
painter.setPen(QPen(QColor(255, 255, 255, 200)))
|
||||
painter.drawText(
|
||||
QRect(bar_x, bar_y + bar_height + 4, bar_width, 20),
|
||||
Qt.AlignCenter,
|
||||
f"Etape {self._progress_current}/{self._progress_total}",
|
||||
)
|
||||
@@ -1,191 +0,0 @@
|
||||
# agent_v0/lea_ui/replay_integration.py
|
||||
"""
|
||||
Integration du feedback visuel (overlay) dans la boucle de replay de l'Agent V1.
|
||||
|
||||
Ce module fournit un wrapper autour de ActionExecutorV1.execute_replay_action
|
||||
qui affiche l'overlay AVANT chaque action et la marque comme terminee APRES.
|
||||
|
||||
Sequence pour chaque action :
|
||||
1. Afficher l'overlay avec la description de l'action (1.5s)
|
||||
2. Attendre que l'overlay ait ete vu par l'utilisateur
|
||||
3. Executer l'action
|
||||
4. Mettre a jour l'overlay (coche verte)
|
||||
5. Passer a l'action suivante
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
from typing import Any, Callable, Dict, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger("lea_ui.replay_integration")
|
||||
|
||||
# Delai d'affichage de l'overlay avant execution (secondes)
|
||||
PRE_ACTION_DELAY = 1.5
|
||||
# Delai apres la coche verte (secondes)
|
||||
POST_ACTION_DELAY = 0.5
|
||||
|
||||
|
||||
class ReplayOverlayBridge:
|
||||
"""Pont entre la boucle de replay et l'overlay.
|
||||
|
||||
Fonctionne de maniere thread-safe : la boucle de replay tourne dans
|
||||
un thread daemon, et l'overlay est controle via des signaux Qt.
|
||||
|
||||
L'overlay est optionnel — si non connecte, l'execution continue normalement.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._overlay = None
|
||||
self._show_callback: Optional[Callable] = None
|
||||
self._done_callback: Optional[Callable] = None
|
||||
self._hide_callback: Optional[Callable] = None
|
||||
self._enabled = False
|
||||
|
||||
# Compteur de progression
|
||||
self._step_current = 0
|
||||
self._step_total = 0
|
||||
|
||||
def connect_overlay(
|
||||
self,
|
||||
show_fn: Callable[[int, int, str, int, int, int], None],
|
||||
done_fn: Callable[[Optional[str]], None],
|
||||
hide_fn: Callable[[], None],
|
||||
) -> None:
|
||||
"""Connecter les callbacks de l'overlay.
|
||||
|
||||
Args:
|
||||
show_fn: overlay.show_action(target_x, target_y, text, step, total, duration_ms)
|
||||
done_fn: overlay.show_done(text)
|
||||
hide_fn: overlay.hide_overlay()
|
||||
"""
|
||||
self._show_callback = show_fn
|
||||
self._done_callback = done_fn
|
||||
self._hide_callback = hide_fn
|
||||
self._enabled = True
|
||||
logger.info("Overlay connecte au bridge de replay")
|
||||
|
||||
def disconnect_overlay(self) -> None:
|
||||
"""Deconnecter l'overlay."""
|
||||
self._show_callback = None
|
||||
self._done_callback = None
|
||||
self._hide_callback = None
|
||||
self._enabled = False
|
||||
|
||||
def set_total_steps(self, total: int) -> None:
|
||||
"""Definir le nombre total d'etapes du replay."""
|
||||
self._step_total = total
|
||||
self._step_current = 0
|
||||
|
||||
def wrap_execute(
|
||||
self,
|
||||
action: Dict[str, Any],
|
||||
executor_fn: Callable[[Dict[str, Any]], Dict[str, Any]],
|
||||
screen_width: int = 1920,
|
||||
screen_height: int = 1080,
|
||||
) -> Dict[str, Any]:
|
||||
"""Wrapper autour de l'execution d'une action avec feedback overlay.
|
||||
|
||||
Args:
|
||||
action: action normalisee (type, x_pct, y_pct, text, keys, ...)
|
||||
executor_fn: fonction d'execution (ex: ActionExecutorV1.execute_replay_action)
|
||||
screen_width: largeur de l'ecran en pixels
|
||||
screen_height: hauteur de l'ecran en pixels
|
||||
|
||||
Returns:
|
||||
Resultat de l'execution (dict avec success, error, screenshot, ...)
|
||||
"""
|
||||
self._step_current += 1
|
||||
|
||||
if not self._enabled or not self._show_callback:
|
||||
# Pas d'overlay — execution directe
|
||||
return executor_fn(action)
|
||||
|
||||
# --- 1. Afficher l'overlay ---
|
||||
action_text = self._describe_action(action)
|
||||
target_x, target_y = self._get_target_coords(action, screen_width, screen_height)
|
||||
|
||||
try:
|
||||
self._show_callback(
|
||||
target_x, target_y,
|
||||
action_text,
|
||||
self._step_current,
|
||||
self._step_total,
|
||||
int(PRE_ACTION_DELAY * 1000),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Erreur affichage overlay : %s", e)
|
||||
|
||||
# --- 2. Attendre que l'utilisateur ait vu ---
|
||||
time.sleep(PRE_ACTION_DELAY)
|
||||
|
||||
# --- 3. Executer l'action ---
|
||||
result = executor_fn(action)
|
||||
|
||||
# --- 4. Marquer comme terminee ---
|
||||
if result.get("success"):
|
||||
done_text = f"{action_text} OK"
|
||||
else:
|
||||
done_text = f"{action_text} ECHEC"
|
||||
|
||||
try:
|
||||
if self._done_callback:
|
||||
self._done_callback(done_text)
|
||||
except Exception as e:
|
||||
logger.warning("Erreur overlay done : %s", e)
|
||||
|
||||
time.sleep(POST_ACTION_DELAY)
|
||||
|
||||
# --- 5. Cacher si c'etait la derniere etape ---
|
||||
if self._step_current >= self._step_total and self._hide_callback:
|
||||
try:
|
||||
self._hide_callback()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return result
|
||||
|
||||
def _describe_action(self, action: Dict[str, Any]) -> str:
|
||||
"""Generer une description lisible d'une action."""
|
||||
action_type = action.get("type", "?")
|
||||
target_text = action.get("target_text", "")
|
||||
target_role = action.get("target_role", "")
|
||||
|
||||
if action_type == "click":
|
||||
target = target_text or target_role or "cet element"
|
||||
return f"Je clique sur [{target}]"
|
||||
elif action_type == "type":
|
||||
text = action.get("text", "")
|
||||
preview = text[:25] + "..." if len(text) > 25 else text
|
||||
return f"Je tape : {preview}"
|
||||
elif action_type == "key_combo":
|
||||
keys = action.get("keys", [])
|
||||
return f"Combinaison : {'+'.join(keys)}"
|
||||
elif action_type == "scroll":
|
||||
return "Defilement"
|
||||
elif action_type == "wait":
|
||||
ms = action.get("duration_ms", 500)
|
||||
return f"Attente {ms}ms"
|
||||
else:
|
||||
return f"Action : {action_type}"
|
||||
|
||||
def _get_target_coords(
|
||||
self, action: Dict[str, Any], sw: int, sh: int,
|
||||
) -> Tuple[int, int]:
|
||||
"""Calculer les coordonnees cible en pixels."""
|
||||
x_pct = action.get("x_pct", 0.5)
|
||||
y_pct = action.get("y_pct", 0.5)
|
||||
return int(x_pct * sw), int(y_pct * sh)
|
||||
|
||||
|
||||
# Instance globale (singleton) pour l'integration
|
||||
_bridge: Optional[ReplayOverlayBridge] = None
|
||||
|
||||
|
||||
def get_replay_bridge() -> ReplayOverlayBridge:
|
||||
"""Obtenir l'instance globale du bridge overlay/replay."""
|
||||
global _bridge
|
||||
if _bridge is None:
|
||||
_bridge = ReplayOverlayBridge()
|
||||
return _bridge
|
||||
@@ -21,36 +21,33 @@ from typing import Any, Callable, Dict, List, Optional
|
||||
logger = logging.getLogger("lea_ui.server_client")
|
||||
|
||||
|
||||
def _get_server_host() -> str:
|
||||
"""Recuperer l'adresse du serveur Linux.
|
||||
def _get_server_url() -> str:
|
||||
"""Recuperer l'URL du serveur RPA (avec /api/v1).
|
||||
|
||||
Ordre de resolution :
|
||||
1. Variable d'environnement RPA_SERVER_HOST
|
||||
2. Fichier de config agent_config.json (cle "server_host")
|
||||
3. Fallback localhost
|
||||
1. Import depuis agent_v1.config (source de verite unique)
|
||||
2. Variable d'environnement RPA_SERVER_URL
|
||||
3. Fallback http://localhost:5005/api/v1
|
||||
"""
|
||||
# 1. Variable d'environnement
|
||||
host = os.environ.get("RPA_SERVER_HOST", "").strip()
|
||||
if host:
|
||||
return host
|
||||
# 1. Import depuis config.py (source de verite)
|
||||
try:
|
||||
from agent_v1.config import SERVER_URL
|
||||
return SERVER_URL
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# 2. Fichier de config
|
||||
config_paths = [
|
||||
os.path.join(os.path.dirname(__file__), "..", "agent_config.json"),
|
||||
os.path.join(os.path.dirname(__file__), "..", "..", "agent_config.json"),
|
||||
]
|
||||
for config_path in config_paths:
|
||||
try:
|
||||
with open(config_path, "r", encoding="utf-8") as f:
|
||||
cfg = json.load(f)
|
||||
host = cfg.get("server_host", "").strip()
|
||||
if host:
|
||||
return host
|
||||
except (OSError, json.JSONDecodeError):
|
||||
continue
|
||||
# 2. Variable d'environnement directe
|
||||
url = os.environ.get("RPA_SERVER_URL", "").strip().rstrip("/")
|
||||
if url:
|
||||
return url
|
||||
|
||||
# 3. Fallback
|
||||
return "localhost"
|
||||
return "http://localhost:5005/api/v1"
|
||||
|
||||
|
||||
def _get_server_base(server_url: str) -> str:
|
||||
"""Extraire la base URL (sans /api/v1) pour les routes racine (/health)."""
|
||||
return server_url.rsplit("/api/v1", 1)[0]
|
||||
|
||||
|
||||
class LeaServerClient:
|
||||
@@ -67,12 +64,23 @@ class LeaServerClient:
|
||||
chat_port: int = 5004,
|
||||
stream_port: int = 5005,
|
||||
) -> None:
|
||||
self._host = server_host or _get_server_host()
|
||||
# URL unifiée : SERVER_URL contient TOUJOURS /api/v1 (convention INC-1).
|
||||
# _stream_url = URL avec /api/v1 (pour les routes API)
|
||||
# _stream_base = URL sans /api/v1 (pour /health uniquement)
|
||||
self._stream_url = _get_server_url()
|
||||
self._stream_base = _get_server_base(self._stream_url)
|
||||
|
||||
# Extraire le host depuis l'URL pour le chat et pour l'affichage
|
||||
try:
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(self._stream_base)
|
||||
self._host = parsed.hostname or "localhost"
|
||||
except Exception:
|
||||
self._host = server_host or "localhost"
|
||||
|
||||
self._chat_port = chat_port
|
||||
self._stream_port = stream_port
|
||||
|
||||
self._chat_base = f"http://{self._host}:{self._chat_port}"
|
||||
self._stream_base = f"http://{self._host}:{self._stream_port}"
|
||||
|
||||
# Etat de connexion
|
||||
self._connected = False
|
||||
@@ -91,11 +99,24 @@ class LeaServerClient:
|
||||
# Session de chat
|
||||
self._chat_session_id: Optional[str] = None
|
||||
|
||||
# Token API pour le serveur streaming (auth Bearer)
|
||||
self._api_token = os.environ.get("RPA_API_TOKEN", "")
|
||||
|
||||
logger.info(
|
||||
"LeaServerClient initialise : chat=%s, stream=%s",
|
||||
self._chat_base, self._stream_base,
|
||||
"LeaServerClient initialise : chat=%s, stream_url=%s, stream_base=%s",
|
||||
self._chat_base, self._stream_url, self._stream_base,
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Auth
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _auth_headers(self) -> Dict[str, str]:
|
||||
"""Headers d'authentification pour le serveur streaming."""
|
||||
if self._api_token:
|
||||
return {"Authorization": f"Bearer {self._api_token}"}
|
||||
return {}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Proprietes
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -133,11 +154,16 @@ class LeaServerClient:
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def check_connection(self) -> bool:
|
||||
"""Tester la connexion au serveur chat."""
|
||||
"""Tester la connexion au serveur streaming (port 5005).
|
||||
|
||||
Le health check utilise _stream_base (sans /api/v1) car la route
|
||||
/health est a la racine du serveur FastAPI, pas sous /api/v1.
|
||||
"""
|
||||
try:
|
||||
import requests
|
||||
resp = requests.get(
|
||||
f"{self._chat_base}/api/workflows",
|
||||
f"{self._stream_base}/health",
|
||||
headers=self._auth_headers(),
|
||||
timeout=5,
|
||||
)
|
||||
was_connected = self._connected
|
||||
@@ -200,11 +226,13 @@ class LeaServerClient:
|
||||
return None
|
||||
|
||||
def list_workflows(self) -> List[Dict[str, Any]]:
|
||||
"""Recuperer la liste des workflows depuis le serveur chat."""
|
||||
"""Recuperer la liste des workflows depuis le serveur streaming."""
|
||||
try:
|
||||
import requests
|
||||
headers = self._auth_headers()
|
||||
resp = requests.get(
|
||||
f"{self._chat_base}/api/workflows",
|
||||
f"{self._stream_url}/traces/stream/workflows",
|
||||
headers=headers,
|
||||
timeout=10,
|
||||
)
|
||||
if resp.ok:
|
||||
@@ -221,22 +249,10 @@ class LeaServerClient:
|
||||
return []
|
||||
|
||||
def list_gestures(self) -> List[Dict[str, Any]]:
|
||||
"""Recuperer la liste des gestes depuis le serveur chat."""
|
||||
try:
|
||||
import requests
|
||||
resp = requests.get(
|
||||
f"{self._chat_base}/api/gestures",
|
||||
timeout=10,
|
||||
)
|
||||
if resp.ok:
|
||||
data = resp.json()
|
||||
if isinstance(data, list):
|
||||
return data
|
||||
return data.get("gestures", [])
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.error("List gestures erreur : %s", e)
|
||||
return []
|
||||
"""Recuperer la liste des gestes (non disponible sur streaming server)."""
|
||||
# Les gestes etaient sur le chat server (5004) qui n'est plus utilise.
|
||||
# Retourner une liste vide silencieusement.
|
||||
return []
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Replay Polling (port 5005)
|
||||
@@ -272,8 +288,9 @@ class LeaServerClient:
|
||||
while self._polling:
|
||||
try:
|
||||
resp = req_lib.get(
|
||||
f"{self._stream_base}/api/v1/traces/stream/replay/next",
|
||||
f"{self._stream_url}/traces/stream/replay/next",
|
||||
params={"session_id": self._poll_session_id},
|
||||
headers=self._auth_headers(),
|
||||
timeout=5,
|
||||
)
|
||||
|
||||
@@ -305,7 +322,8 @@ class LeaServerClient:
|
||||
try:
|
||||
import requests
|
||||
resp = requests.get(
|
||||
f"{self._stream_base}/api/v1/traces/stream/replays",
|
||||
f"{self._stream_url}/traces/stream/replays",
|
||||
headers=self._auth_headers(),
|
||||
timeout=5,
|
||||
)
|
||||
if resp.ok:
|
||||
@@ -332,7 +350,7 @@ class LeaServerClient:
|
||||
try:
|
||||
import requests
|
||||
requests.post(
|
||||
f"{self._stream_base}/api/v1/traces/stream/replay/result",
|
||||
f"{self._stream_url}/traces/stream/replay/result",
|
||||
json={
|
||||
"session_id": session_id,
|
||||
"action_id": action_id,
|
||||
@@ -340,6 +358,7 @@ class LeaServerClient:
|
||||
"error": error,
|
||||
"screenshot": screenshot,
|
||||
},
|
||||
headers=self._auth_headers(),
|
||||
timeout=5,
|
||||
)
|
||||
except Exception as e:
|
||||
|
||||
@@ -1,200 +0,0 @@
|
||||
# agent_v0/lea_ui/styles.py
|
||||
"""
|
||||
Theme et couleurs pour l'interface Lea.
|
||||
|
||||
Palette douce et moderne, pensee pour ne pas fatiguer les yeux
|
||||
lors d'une utilisation prolongee sur un poste de travail Windows.
|
||||
"""
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Palette de couleurs
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Fond principal
|
||||
COLOR_BG = "#F5F7FA"
|
||||
# Fond secondaire (sidebar, header)
|
||||
COLOR_BG_SECONDARY = "#EEF1F6"
|
||||
# Fond des bulles utilisateur
|
||||
COLOR_BUBBLE_USER = "#6366F1"
|
||||
# Fond des bulles Lea
|
||||
COLOR_BUBBLE_LEA = "#FFFFFF"
|
||||
# Accent principal (indigo)
|
||||
COLOR_ACCENT = "#6366F1"
|
||||
# Accent hover
|
||||
COLOR_ACCENT_HOVER = "#4F46E5"
|
||||
# Texte principal
|
||||
COLOR_TEXT = "#1F2937"
|
||||
# Texte secondaire
|
||||
COLOR_TEXT_SECONDARY = "#6B7280"
|
||||
# Texte sur accent (blanc)
|
||||
COLOR_TEXT_ON_ACCENT = "#FFFFFF"
|
||||
# Bordure legere
|
||||
COLOR_BORDER = "#E5E7EB"
|
||||
# Succes (vert)
|
||||
COLOR_SUCCESS = "#10B981"
|
||||
# Erreur (rouge)
|
||||
COLOR_ERROR = "#EF4444"
|
||||
# Avertissement (orange)
|
||||
COLOR_WARNING = "#F59E0B"
|
||||
# Overlay rouge pulsant
|
||||
COLOR_OVERLAY_PULSE = "#EF4444"
|
||||
# Overlay texte
|
||||
COLOR_OVERLAY_TEXT = "#FFFFFF"
|
||||
# Overlay fond info
|
||||
COLOR_OVERLAY_INFO_BG = "rgba(31, 41, 55, 200)"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Typographie
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
FONT_FAMILY = "Segoe UI"
|
||||
FONT_SIZE_SMALL = 11
|
||||
FONT_SIZE_NORMAL = 13
|
||||
FONT_SIZE_LARGE = 15
|
||||
FONT_SIZE_TITLE = 18
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dimensions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Largeur du panneau Lea
|
||||
PANEL_WIDTH = 380
|
||||
# Hauteur minimale
|
||||
PANEL_MIN_HEIGHT = 500
|
||||
# Rayon des coins arrondis
|
||||
BORDER_RADIUS = 12
|
||||
# Rayon des bulles de chat
|
||||
BUBBLE_RADIUS = 16
|
||||
# Padding interne
|
||||
PADDING = 12
|
||||
# Taille de l'avatar
|
||||
AVATAR_SIZE = 40
|
||||
# Marge entre les elements
|
||||
SPACING = 8
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stylesheet global du panneau Lea
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
MAIN_WINDOW_STYLE = f"""
|
||||
QWidget#LeaMainWindow {{
|
||||
background-color: {COLOR_BG};
|
||||
border-radius: {BORDER_RADIUS}px;
|
||||
border: 1px solid {COLOR_BORDER};
|
||||
}}
|
||||
"""
|
||||
|
||||
HEADER_STYLE = f"""
|
||||
QWidget#LeaHeader {{
|
||||
background-color: {COLOR_BG_SECONDARY};
|
||||
border-top-left-radius: {BORDER_RADIUS}px;
|
||||
border-top-right-radius: {BORDER_RADIUS}px;
|
||||
border-bottom: 1px solid {COLOR_BORDER};
|
||||
}}
|
||||
QLabel#LeaTitle {{
|
||||
color: {COLOR_TEXT};
|
||||
font-family: "{FONT_FAMILY}";
|
||||
font-size: {FONT_SIZE_TITLE}px;
|
||||
font-weight: bold;
|
||||
}}
|
||||
QLabel#LeaStatus {{
|
||||
color: {COLOR_TEXT_SECONDARY};
|
||||
font-family: "{FONT_FAMILY}";
|
||||
font-size: {FONT_SIZE_SMALL}px;
|
||||
}}
|
||||
"""
|
||||
|
||||
CHAT_AREA_STYLE = f"""
|
||||
QScrollArea {{
|
||||
border: none;
|
||||
background-color: {COLOR_BG};
|
||||
}}
|
||||
QWidget#ChatContainer {{
|
||||
background-color: {COLOR_BG};
|
||||
}}
|
||||
"""
|
||||
|
||||
INPUT_STYLE = f"""
|
||||
QLineEdit#ChatInput {{
|
||||
background-color: {COLOR_BUBBLE_LEA};
|
||||
border: 1px solid {COLOR_BORDER};
|
||||
border-radius: 20px;
|
||||
padding: 8px 16px;
|
||||
font-family: "{FONT_FAMILY}";
|
||||
font-size: {FONT_SIZE_NORMAL}px;
|
||||
color: {COLOR_TEXT};
|
||||
}}
|
||||
QLineEdit#ChatInput:focus {{
|
||||
border-color: {COLOR_ACCENT};
|
||||
}}
|
||||
"""
|
||||
|
||||
SEND_BUTTON_STYLE = f"""
|
||||
QPushButton#SendButton {{
|
||||
background-color: {COLOR_ACCENT};
|
||||
color: {COLOR_TEXT_ON_ACCENT};
|
||||
border: none;
|
||||
border-radius: 20px;
|
||||
padding: 8px 16px;
|
||||
font-family: "{FONT_FAMILY}";
|
||||
font-size: {FONT_SIZE_NORMAL}px;
|
||||
font-weight: bold;
|
||||
min-width: 50px;
|
||||
}}
|
||||
QPushButton#SendButton:hover {{
|
||||
background-color: {COLOR_ACCENT_HOVER};
|
||||
}}
|
||||
QPushButton#SendButton:pressed {{
|
||||
background-color: #3730A3;
|
||||
}}
|
||||
"""
|
||||
|
||||
QUICK_BUTTON_STYLE = f"""
|
||||
QPushButton#QuickButton {{
|
||||
background-color: {COLOR_BUBBLE_LEA};
|
||||
color: {COLOR_ACCENT};
|
||||
border: 1px solid {COLOR_ACCENT};
|
||||
border-radius: 18px;
|
||||
padding: 6px 14px;
|
||||
font-family: "{FONT_FAMILY}";
|
||||
font-size: {FONT_SIZE_SMALL}px;
|
||||
}}
|
||||
QPushButton#QuickButton:hover {{
|
||||
background-color: {COLOR_ACCENT};
|
||||
color: {COLOR_TEXT_ON_ACCENT};
|
||||
}}
|
||||
"""
|
||||
|
||||
PROGRESS_STYLE = f"""
|
||||
QProgressBar {{
|
||||
border: none;
|
||||
border-radius: 4px;
|
||||
background-color: {COLOR_BORDER};
|
||||
text-align: center;
|
||||
font-family: "{FONT_FAMILY}";
|
||||
font-size: {FONT_SIZE_SMALL}px;
|
||||
color: {COLOR_TEXT};
|
||||
max-height: 8px;
|
||||
}}
|
||||
QProgressBar::chunk {{
|
||||
background-color: {COLOR_ACCENT};
|
||||
border-radius: 4px;
|
||||
}}
|
||||
"""
|
||||
|
||||
STATUS_LABEL_STYLE = f"""
|
||||
QLabel#StatusLabel {{
|
||||
color: {COLOR_TEXT_SECONDARY};
|
||||
font-family: "{FONT_FAMILY}";
|
||||
font-size: {FONT_SIZE_SMALL}px;
|
||||
padding: 4px 8px;
|
||||
}}
|
||||
"""
|
||||
|
||||
MINI_BAR_STYLE = f"""
|
||||
QWidget#MiniBar {{
|
||||
background-color: {COLOR_BG_SECONDARY};
|
||||
border-radius: 20px;
|
||||
border: 1px solid {COLOR_BORDER};
|
||||
}}
|
||||
"""
|
||||
@@ -1,16 +1,134 @@
|
||||
# run_agent_v1.py
|
||||
import sys
|
||||
import os
|
||||
import atexit
|
||||
|
||||
# Ajout du répertoire courant au PYTHONPATH pour permettre les imports de modules
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
if current_dir not in sys.path:
|
||||
sys.path.append(current_dir)
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Verrou PID — empêche le lancement de plusieurs instances
|
||||
# Même si Lea.bat est double-cliqué ou lancé deux fois,
|
||||
# un seul agent tourne à la fois (defense-in-depth).
|
||||
# ---------------------------------------------------------------
|
||||
LOCK_FILE = os.path.join(current_dir, "lea_agent.lock")
|
||||
|
||||
|
||||
def _pid_is_alive(pid: int) -> bool:
|
||||
"""Vérifie si un processus avec ce PID existe encore (Windows + Unix)."""
|
||||
if sys.platform == "win32":
|
||||
try:
|
||||
import ctypes
|
||||
kernel32 = ctypes.windll.kernel32 # type: ignore[attr-defined]
|
||||
PROCESS_QUERY_LIMITED_INFORMATION = 0x1000
|
||||
handle = kernel32.OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, False, pid)
|
||||
if handle:
|
||||
kernel32.CloseHandle(handle)
|
||||
return True
|
||||
return False
|
||||
except Exception:
|
||||
# Fallback : tasklist
|
||||
try:
|
||||
import subprocess
|
||||
result = subprocess.run(
|
||||
["tasklist", "/FI", f"PID eq {pid}", "/NH"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
return str(pid) in result.stdout
|
||||
except Exception:
|
||||
return False
|
||||
else:
|
||||
# Unix/Linux — os.kill(pid, 0) ne tue pas le process
|
||||
try:
|
||||
os.kill(pid, 0)
|
||||
return True
|
||||
except (OSError, ProcessLookupError):
|
||||
return False
|
||||
|
||||
|
||||
def _acquire_lock() -> bool:
|
||||
"""Tente d'acquérir le verrou PID. Retourne False si une autre instance tourne."""
|
||||
my_pid = os.getpid()
|
||||
|
||||
# Lire le PID existant
|
||||
if os.path.isfile(LOCK_FILE):
|
||||
try:
|
||||
with open(LOCK_FILE, "r", encoding="utf-8") as f:
|
||||
old_pid = int(f.read().strip())
|
||||
# Le PID dans le lock est-il encore vivant ?
|
||||
if old_pid != my_pid and _pid_is_alive(old_pid):
|
||||
return False # Une autre instance tourne déjà
|
||||
except (ValueError, OSError):
|
||||
pass # Fichier corrompu — on l'écrase
|
||||
|
||||
# Écrire notre PID
|
||||
try:
|
||||
with open(LOCK_FILE, "w", encoding="utf-8") as f:
|
||||
f.write(str(my_pid))
|
||||
except OSError:
|
||||
pass # Pas bloquant — on continue sans lock
|
||||
return True
|
||||
|
||||
|
||||
def _release_lock():
|
||||
"""Supprime le fichier lock au shutdown."""
|
||||
try:
|
||||
if os.path.isfile(LOCK_FILE):
|
||||
with open(LOCK_FILE, "r", encoding="utf-8") as f:
|
||||
stored_pid = int(f.read().strip())
|
||||
# Ne supprimer que si c'est bien NOTRE lock
|
||||
if stored_pid == os.getpid():
|
||||
os.remove(LOCK_FILE)
|
||||
except (ValueError, OSError):
|
||||
pass
|
||||
|
||||
|
||||
# Vérification du lock AVANT toute initialisation lourde
|
||||
if not _acquire_lock():
|
||||
# Une autre instance de Léa tourne déjà — on quitte silencieusement
|
||||
sys.exit(0)
|
||||
|
||||
atexit.register(_release_lock)
|
||||
|
||||
# Charger config.txt et .env comme variables d'environnement
|
||||
# (équivalent du `set` dans Lea.bat, mais fonctionne aussi sans le .bat)
|
||||
for config_file in ("config.txt", ".env"):
|
||||
config_path = os.path.join(current_dir, config_file)
|
||||
if os.path.isfile(config_path):
|
||||
with open(config_path, encoding="utf-8", errors="ignore") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
if "=" in line:
|
||||
key, _, value = line.partition("=")
|
||||
key = key.strip()
|
||||
value = value.strip()
|
||||
if key and value and key not in os.environ:
|
||||
os.environ[key] = value
|
||||
|
||||
# Configurer le logging dans un fichier (fonctionne même avec pythonw.exe)
|
||||
import logging
|
||||
log_path = os.path.join(current_dir, "agent_debug.log")
|
||||
logging.basicConfig(
|
||||
filename=log_path,
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
|
||||
)
|
||||
logging.info("=== Agent V1 démarrage — config chargée (PID %d) ===", os.getpid())
|
||||
logging.info("RPA_SERVER_URL=%s", os.environ.get("RPA_SERVER_URL", "(non défini)"))
|
||||
logging.info("RPA_SERVER_HOST=%s", os.environ.get("RPA_SERVER_HOST", "(non défini)"))
|
||||
logging.info("RPA_API_TOKEN=%s", os.environ.get("RPA_API_TOKEN", "(non défini)")[:8] + "...")
|
||||
logging.info("RPA_BLUR_SENSITIVE=%s", os.environ.get("RPA_BLUR_SENSITIVE", "(non défini)"))
|
||||
|
||||
try:
|
||||
from agent_v1.main import main
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
except ImportError as e:
|
||||
logging.error("Erreur d'importation : %s", e)
|
||||
print(f"Erreur d'importation : {e}")
|
||||
print("Assurez-vous d'être dans le répertoire racine du projet et que agent_v1 est bien un package Python.")
|
||||
except Exception as e:
|
||||
logging.error("Erreur fatale : %s", e, exc_info=True)
|
||||
|
||||
296
agent_v0/server_v1/agent_registry.py
Normal file
296
agent_v0/server_v1/agent_registry.py
Normal file
@@ -0,0 +1,296 @@
|
||||
# agent_v0/server_v1/agent_registry.py
|
||||
"""
|
||||
Registre des agents Lea enrolles sur le parc.
|
||||
|
||||
Alimente par les endpoints /api/v1/agents/enroll et /api/v1/agents/uninstall
|
||||
que l'installeur Inno Setup (`deploy/installer/Lea.iss`) appelle a
|
||||
l'installation et a la desinstallation sur chaque poste collaborateur.
|
||||
|
||||
Stockage : SQLite simple, cohabite avec rpa_data.db dans data/databases/.
|
||||
Aucune dependance GPU/LLM — ce module doit rester leger (juste sqlite3 +
|
||||
stdlib) pour pouvoir etre importe par le serveur HTTP.
|
||||
|
||||
Schema de la table `enrolled_agents` :
|
||||
id INTEGER PK AUTOINCREMENT
|
||||
machine_id TEXT UNIQUE NOT NULL — identifiant genere par l'installeur
|
||||
user_name TEXT — nom affichage collaborateur
|
||||
user_email TEXT
|
||||
user_id TEXT — identifiant metier (ex: AIVA-001)
|
||||
hostname TEXT
|
||||
os_info TEXT
|
||||
version TEXT — version du client Lea
|
||||
status TEXT DEFAULT 'active' — 'active' | 'uninstalled'
|
||||
enrolled_at TEXT NOT NULL — ISO 8601 UTC
|
||||
last_seen_at TEXT — ISO 8601 UTC (heartbeat / stream)
|
||||
uninstalled_at TEXT
|
||||
uninstall_reason TEXT
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import sqlite3
|
||||
import threading
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Verrou global : SQLite tolere plusieurs threads mais on serialise
|
||||
# les ecritures pour eviter les races sur _init_db + upserts concurrents.
|
||||
_DB_LOCK = threading.Lock()
|
||||
|
||||
|
||||
def _utc_now_iso() -> str:
|
||||
"""Horodatage ISO 8601 UTC (compatible toutes les autres tables)."""
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
class AgentRegistry:
|
||||
"""Gestion CRUD des agents enrolles (SQLite)."""
|
||||
|
||||
def __init__(self, db_path: str | Path = "data/databases/rpa_data.db"):
|
||||
self.db_path = Path(db_path)
|
||||
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
self._init_db()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Infra SQLite
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _connect(self) -> sqlite3.Connection:
|
||||
# check_same_thread=False : on protege nous-memes via _DB_LOCK,
|
||||
# indispensable car FastAPI appelle les endpoints sur threads
|
||||
# differents (thread pool).
|
||||
conn = sqlite3.connect(str(self.db_path), check_same_thread=False)
|
||||
conn.row_factory = sqlite3.Row
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
conn.execute("PRAGMA foreign_keys=ON")
|
||||
return conn
|
||||
|
||||
def _init_db(self) -> None:
|
||||
"""Cree la table et ses index si absents (idempotent)."""
|
||||
with _DB_LOCK, self._connect() as conn:
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS enrolled_agents (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
machine_id TEXT NOT NULL UNIQUE,
|
||||
user_name TEXT,
|
||||
user_email TEXT,
|
||||
user_id TEXT,
|
||||
hostname TEXT,
|
||||
os_info TEXT,
|
||||
version TEXT,
|
||||
status TEXT NOT NULL DEFAULT 'active',
|
||||
enrolled_at TEXT NOT NULL,
|
||||
last_seen_at TEXT,
|
||||
uninstalled_at TEXT,
|
||||
uninstall_reason TEXT
|
||||
)
|
||||
"""
|
||||
)
|
||||
conn.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_enrolled_agents_status "
|
||||
"ON enrolled_agents(status)"
|
||||
)
|
||||
conn.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_enrolled_agents_machine "
|
||||
"ON enrolled_agents(machine_id)"
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Lecture
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def get(self, machine_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""Recupere un agent par machine_id (ou None)."""
|
||||
with _DB_LOCK, self._connect() as conn:
|
||||
row = conn.execute(
|
||||
"SELECT * FROM enrolled_agents WHERE machine_id = ?",
|
||||
(machine_id,),
|
||||
).fetchone()
|
||||
return dict(row) if row else None
|
||||
|
||||
def list_by_status(self, status: str) -> List[Dict[str, Any]]:
|
||||
"""Liste les agents par statut ('active' | 'uninstalled')."""
|
||||
with _DB_LOCK, self._connect() as conn:
|
||||
rows = conn.execute(
|
||||
"SELECT * FROM enrolled_agents WHERE status = ? "
|
||||
"ORDER BY enrolled_at DESC",
|
||||
(status,),
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
def count_by_status(self, status: str) -> int:
|
||||
with _DB_LOCK, self._connect() as conn:
|
||||
row = conn.execute(
|
||||
"SELECT COUNT(*) AS n FROM enrolled_agents WHERE status = ?",
|
||||
(status,),
|
||||
).fetchone()
|
||||
return int(row["n"]) if row else 0
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Ecriture
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def enroll(
|
||||
self,
|
||||
*,
|
||||
machine_id: str,
|
||||
user_name: str | None = None,
|
||||
user_email: str | None = None,
|
||||
user_id: str | None = None,
|
||||
hostname: str | None = None,
|
||||
os_info: str | None = None,
|
||||
version: str | None = None,
|
||||
allow_reactivate: bool = True,
|
||||
) -> Dict[str, Any]:
|
||||
"""Enregistre un nouvel agent ou reactive un agent desinstalle.
|
||||
|
||||
Returns:
|
||||
dict avec clefs {"created": bool, "reactivated": bool, "agent": row}
|
||||
|
||||
Raises:
|
||||
ValueError: si machine_id est vide.
|
||||
AgentAlreadyEnrolledError: si deja actif (status=active).
|
||||
"""
|
||||
if not machine_id or not machine_id.strip():
|
||||
raise ValueError("machine_id est obligatoire")
|
||||
machine_id = machine_id.strip()
|
||||
|
||||
now = _utc_now_iso()
|
||||
|
||||
with _DB_LOCK, self._connect() as conn:
|
||||
existing = conn.execute(
|
||||
"SELECT * FROM enrolled_agents WHERE machine_id = ?",
|
||||
(machine_id,),
|
||||
).fetchone()
|
||||
|
||||
if existing is not None:
|
||||
if existing["status"] == "active":
|
||||
# Deja enrolle et actif -> conflit explicit
|
||||
raise AgentAlreadyEnrolledError(dict(existing))
|
||||
|
||||
# Agent desinstalle : reactivation si autorise (defaut)
|
||||
if not allow_reactivate:
|
||||
raise AgentAlreadyEnrolledError(dict(existing))
|
||||
|
||||
conn.execute(
|
||||
"""
|
||||
UPDATE enrolled_agents
|
||||
SET user_name = COALESCE(?, user_name),
|
||||
user_email = COALESCE(?, user_email),
|
||||
user_id = COALESCE(?, user_id),
|
||||
hostname = COALESCE(?, hostname),
|
||||
os_info = COALESCE(?, os_info),
|
||||
version = COALESCE(?, version),
|
||||
status = 'active',
|
||||
enrolled_at = ?,
|
||||
last_seen_at = ?,
|
||||
uninstalled_at = NULL,
|
||||
uninstall_reason = NULL
|
||||
WHERE machine_id = ?
|
||||
""",
|
||||
(
|
||||
user_name, user_email, user_id,
|
||||
hostname, os_info, version,
|
||||
now, now, machine_id,
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
row = conn.execute(
|
||||
"SELECT * FROM enrolled_agents WHERE machine_id = ?",
|
||||
(machine_id,),
|
||||
).fetchone()
|
||||
return {"created": False, "reactivated": True, "agent": dict(row)}
|
||||
|
||||
# Nouvelle inscription
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO enrolled_agents (
|
||||
machine_id, user_name, user_email, user_id,
|
||||
hostname, os_info, version,
|
||||
status, enrolled_at, last_seen_at
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, 'active', ?, ?)
|
||||
""",
|
||||
(
|
||||
machine_id, user_name, user_email, user_id,
|
||||
hostname, os_info, version,
|
||||
now, now,
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
row = conn.execute(
|
||||
"SELECT * FROM enrolled_agents WHERE machine_id = ?",
|
||||
(machine_id,),
|
||||
).fetchone()
|
||||
return {"created": True, "reactivated": False, "agent": dict(row)}
|
||||
|
||||
def uninstall(
|
||||
self,
|
||||
*,
|
||||
machine_id: str,
|
||||
reason: str | None = None,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Marque un agent comme desinstalle (soft delete).
|
||||
|
||||
Returns:
|
||||
Le row mis a jour, ou None si l'agent n'existe pas.
|
||||
"""
|
||||
if not machine_id or not machine_id.strip():
|
||||
raise ValueError("machine_id est obligatoire")
|
||||
machine_id = machine_id.strip()
|
||||
|
||||
now = _utc_now_iso()
|
||||
with _DB_LOCK, self._connect() as conn:
|
||||
existing = conn.execute(
|
||||
"SELECT * FROM enrolled_agents WHERE machine_id = ?",
|
||||
(machine_id,),
|
||||
).fetchone()
|
||||
if existing is None:
|
||||
return None
|
||||
|
||||
conn.execute(
|
||||
"""
|
||||
UPDATE enrolled_agents
|
||||
SET status = 'uninstalled',
|
||||
uninstalled_at = ?,
|
||||
uninstall_reason = ?
|
||||
WHERE machine_id = ?
|
||||
""",
|
||||
(now, reason, machine_id),
|
||||
)
|
||||
conn.commit()
|
||||
row = conn.execute(
|
||||
"SELECT * FROM enrolled_agents WHERE machine_id = ?",
|
||||
(machine_id,),
|
||||
).fetchone()
|
||||
return dict(row)
|
||||
|
||||
def touch_last_seen(self, machine_id: str) -> None:
|
||||
"""Met a jour last_seen_at (appel depuis le stream / heartbeat).
|
||||
|
||||
Silencieux si l'agent est inconnu (evite les erreurs sur vieux clients).
|
||||
"""
|
||||
if not machine_id:
|
||||
return
|
||||
now = _utc_now_iso()
|
||||
with _DB_LOCK, self._connect() as conn:
|
||||
conn.execute(
|
||||
"UPDATE enrolled_agents SET last_seen_at = ? WHERE machine_id = ?",
|
||||
(now, machine_id),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
class AgentAlreadyEnrolledError(Exception):
|
||||
"""Levee si on tente d'enrouler une machine deja active."""
|
||||
|
||||
def __init__(self, existing_row: Dict[str, Any]):
|
||||
self.existing = existing_row
|
||||
super().__init__(
|
||||
f"machine_id={existing_row.get('machine_id')} deja enrole "
|
||||
f"(status={existing_row.get('status')})"
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
393
agent_v0/server_v1/audit_trail.py
Normal file
393
agent_v0/server_v1/audit_trail.py
Normal file
@@ -0,0 +1,393 @@
|
||||
# agent_v0/server_v1/audit_trail.py
|
||||
"""
|
||||
Module Audit Trail — traçabilité complète des actions RPA.
|
||||
|
||||
Responsabilité : "Chaque action exécutée par Léa est tracée, datée, attribuée."
|
||||
|
||||
En milieu hospitalier (codage CIM-10 via DPI), la traçabilité est une obligation
|
||||
légale. Ce module enregistre chaque action avec :
|
||||
- L'identité du TIM (Technicien d'Information Médicale) superviseur
|
||||
- Le mode d'exécution (autonome, assisté, shadow)
|
||||
- Le résultat détaillé (succès, échec, correction)
|
||||
- L'horodatage ISO 8601
|
||||
|
||||
Format de stockage : fichiers JSONL datés dans data/audit/ (un par jour).
|
||||
Aucune dépendance externe (stdlib + dataclasses uniquement).
|
||||
|
||||
Usage :
|
||||
audit = AuditTrail()
|
||||
audit.record(AuditEntry(
|
||||
session_id="sess_abc",
|
||||
action_id="act_001",
|
||||
user_id="tim_dupont",
|
||||
user_name="Marie Dupont",
|
||||
...
|
||||
))
|
||||
entries = audit.query(user_id="tim_dupont", date_from="2026-04-01")
|
||||
csv_data = audit.export_csv(date_from="2026-04-01", date_to="2026-04-06")
|
||||
summary = audit.get_summary("2026-04-05")
|
||||
"""
|
||||
|
||||
import csv
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
from dataclasses import dataclass, asdict, fields
|
||||
from datetime import datetime, date, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Répertoire par défaut pour le stockage des fichiers d'audit
|
||||
_DEFAULT_AUDIT_DIR = os.environ.get("RPA_AUDIT_DIR", "data/audit")
|
||||
|
||||
|
||||
@dataclass
|
||||
class AuditEntry:
|
||||
"""Entrée d'audit — un événement tracé dans le système."""
|
||||
|
||||
# Horodatage ISO 8601 (ex: 2026-04-05T14:23:01.456789)
|
||||
timestamp: str = ""
|
||||
|
||||
# Identifiants de session et d'action
|
||||
session_id: str = ""
|
||||
action_id: str = ""
|
||||
|
||||
# Identité de l'utilisateur superviseur
|
||||
user_id: str = "" # Identifiant du TIM (login Windows ou configuré)
|
||||
user_name: str = "" # Nom affiché (ex: "Marie Dupont")
|
||||
machine_id: str = "" # ID du poste client (hostname ou configuré)
|
||||
|
||||
# Description de l'action
|
||||
action_type: str = "" # click, type, key_combo, wait, etc.
|
||||
action_detail: str = "" # Description humaine ("Clic sur 'Enregistrer' dans DxCare")
|
||||
target_app: str = "" # Application cible (DxCare, Orbis, etc.)
|
||||
|
||||
# Mode d'exécution
|
||||
execution_mode: str = "" # "autonomous", "assisted", "shadow"
|
||||
|
||||
# Résultat
|
||||
result: str = "" # "success", "failed", "skipped", "recovered"
|
||||
resolution_method: str = "" # Comment la cible a été trouvée (som_text_match, vlm_direct, etc.)
|
||||
critic_result: str = "" # Résultat de la vérification sémantique
|
||||
recovery_action: str = "" # Action corrective si échec (undo, escape, retry, none)
|
||||
|
||||
# Contexte métier
|
||||
domain: str = "" # Domaine métier (tim_codage, generic, etc.)
|
||||
workflow_id: str = "" # ID du workflow exécuté
|
||||
workflow_name: str = "" # Nom lisible du workflow
|
||||
|
||||
# Performance
|
||||
duration_ms: float = 0.0 # Durée de l'action en millisecondes
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convertir en dictionnaire sérialisable JSON."""
|
||||
return asdict(self)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "AuditEntry":
|
||||
"""Créer une entrée depuis un dictionnaire.
|
||||
|
||||
Ignore les clés inconnues pour la compatibilité future.
|
||||
"""
|
||||
known_fields = {f.name for f in fields(cls)}
|
||||
filtered = {k: v for k, v in data.items() if k in known_fields}
|
||||
return cls(**filtered)
|
||||
|
||||
|
||||
class AuditTrail:
|
||||
"""Gestionnaire de traçabilité — enregistrement et consultation des actions.
|
||||
|
||||
Stocke chaque événement dans un fichier JSONL daté (un fichier par jour).
|
||||
Thread-safe grâce à un verrou d'écriture.
|
||||
|
||||
Fichiers produits :
|
||||
data/audit/audit_2026-04-05.jsonl
|
||||
data/audit/audit_2026-04-06.jsonl
|
||||
...
|
||||
"""
|
||||
|
||||
def __init__(self, audit_dir: str = ""):
|
||||
self.audit_dir = Path(audit_dir or _DEFAULT_AUDIT_DIR)
|
||||
self.audit_dir.mkdir(parents=True, exist_ok=True)
|
||||
self._lock = threading.Lock()
|
||||
logger.info(f"Audit Trail initialisé : {self.audit_dir}")
|
||||
|
||||
def _file_for_date(self, d: date) -> Path:
|
||||
"""Chemin du fichier JSONL pour une date donnée."""
|
||||
return self.audit_dir / f"audit_{d.isoformat()}.jsonl"
|
||||
|
||||
def record(self, entry: AuditEntry) -> None:
|
||||
"""Enregistrer une entrée d'audit.
|
||||
|
||||
Ajoute un horodatage ISO 8601 si absent, puis écrit en append
|
||||
dans le fichier JSONL du jour.
|
||||
"""
|
||||
# Horodatage automatique si absent
|
||||
if not entry.timestamp:
|
||||
entry.timestamp = datetime.now().isoformat()
|
||||
|
||||
# Déterminer le fichier du jour à partir du timestamp
|
||||
try:
|
||||
entry_date = datetime.fromisoformat(entry.timestamp).date()
|
||||
except (ValueError, TypeError):
|
||||
entry_date = date.today()
|
||||
|
||||
audit_file = self._file_for_date(entry_date)
|
||||
|
||||
with self._lock:
|
||||
try:
|
||||
with open(audit_file, "a", encoding="utf-8") as f:
|
||||
f.write(json.dumps(entry.to_dict(), ensure_ascii=False) + "\n")
|
||||
except Exception as e:
|
||||
logger.error(f"Audit Trail: échec écriture {audit_file}: {e}")
|
||||
return
|
||||
|
||||
logger.debug(
|
||||
f"Audit: {entry.result} {entry.action_type} "
|
||||
f"'{entry.action_detail[:50]}' "
|
||||
f"[user={entry.user_id}] [session={entry.session_id}]"
|
||||
)
|
||||
|
||||
def _load_file(self, filepath: Path) -> List[AuditEntry]:
|
||||
"""Charger toutes les entrées d'un fichier JSONL."""
|
||||
if not filepath.is_file():
|
||||
return []
|
||||
|
||||
entries = []
|
||||
try:
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
for line_num, line in enumerate(f, 1):
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
data = json.loads(line)
|
||||
entries.append(AuditEntry.from_dict(data))
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(
|
||||
f"Audit Trail: ligne {line_num} invalide dans "
|
||||
f"{filepath.name}: {e}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Audit Trail: échec lecture {filepath}: {e}")
|
||||
|
||||
return entries
|
||||
|
||||
def _date_range(self, date_from: str = "", date_to: str = "") -> List[date]:
|
||||
"""Calculer la liste de dates entre date_from et date_to (inclus).
|
||||
|
||||
Si date_from est vide, utilise aujourd'hui.
|
||||
Si date_to est vide, utilise date_from.
|
||||
Format attendu : YYYY-MM-DD.
|
||||
"""
|
||||
if date_from:
|
||||
try:
|
||||
d_from = date.fromisoformat(date_from)
|
||||
except ValueError:
|
||||
d_from = date.today()
|
||||
else:
|
||||
d_from = date.today()
|
||||
|
||||
if date_to:
|
||||
try:
|
||||
d_to = date.fromisoformat(date_to)
|
||||
except ValueError:
|
||||
d_to = d_from
|
||||
else:
|
||||
d_to = d_from
|
||||
|
||||
# Assurer l'ordre chronologique
|
||||
if d_to < d_from:
|
||||
d_from, d_to = d_to, d_from
|
||||
|
||||
dates = []
|
||||
current = d_from
|
||||
while current <= d_to:
|
||||
dates.append(current)
|
||||
current += timedelta(days=1)
|
||||
|
||||
return dates
|
||||
|
||||
def query(
|
||||
self,
|
||||
date_from: str = "",
|
||||
date_to: str = "",
|
||||
user_id: str = "",
|
||||
session_id: str = "",
|
||||
result: str = "",
|
||||
action_type: str = "",
|
||||
workflow_id: str = "",
|
||||
domain: str = "",
|
||||
limit: int = 500,
|
||||
offset: int = 0,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Rechercher des entrées d'audit avec filtres.
|
||||
|
||||
Tous les filtres sont optionnels et combinés en AND.
|
||||
Retourne les entrées triées par timestamp décroissant (plus récentes d'abord).
|
||||
"""
|
||||
dates = self._date_range(date_from, date_to)
|
||||
all_entries: List[AuditEntry] = []
|
||||
|
||||
for d in dates:
|
||||
filepath = self._file_for_date(d)
|
||||
all_entries.extend(self._load_file(filepath))
|
||||
|
||||
# Appliquer les filtres
|
||||
filtered = []
|
||||
for entry in all_entries:
|
||||
if user_id and entry.user_id != user_id:
|
||||
continue
|
||||
if session_id and entry.session_id != session_id:
|
||||
continue
|
||||
if result and entry.result != result:
|
||||
continue
|
||||
if action_type and entry.action_type != action_type:
|
||||
continue
|
||||
if workflow_id and entry.workflow_id != workflow_id:
|
||||
continue
|
||||
if domain and entry.domain != domain:
|
||||
continue
|
||||
filtered.append(entry)
|
||||
|
||||
# Tri par timestamp décroissant (plus récent en premier)
|
||||
filtered.sort(key=lambda e: e.timestamp, reverse=True)
|
||||
|
||||
# Pagination
|
||||
paginated = filtered[offset:offset + limit]
|
||||
|
||||
return [e.to_dict() for e in paginated]
|
||||
|
||||
def get_summary(self, target_date: str = "") -> Dict[str, Any]:
|
||||
"""Résumé journalier d'une date donnée.
|
||||
|
||||
Retourne les statistiques agrégées :
|
||||
- Nombre total d'actions
|
||||
- Taux de succès
|
||||
- Répartition par utilisateur
|
||||
- Répartition par résultat
|
||||
- Répartition par type d'action
|
||||
- Répartition par workflow
|
||||
- Répartition par mode d'exécution
|
||||
"""
|
||||
if not target_date:
|
||||
target_date = date.today().isoformat()
|
||||
|
||||
try:
|
||||
d = date.fromisoformat(target_date)
|
||||
except ValueError:
|
||||
d = date.today()
|
||||
|
||||
entries = self._load_file(self._file_for_date(d))
|
||||
|
||||
if not entries:
|
||||
return {
|
||||
"date": d.isoformat(),
|
||||
"total_actions": 0,
|
||||
"success_rate": 0.0,
|
||||
"by_user": {},
|
||||
"by_result": {},
|
||||
"by_action_type": {},
|
||||
"by_workflow": {},
|
||||
"by_execution_mode": {},
|
||||
}
|
||||
|
||||
total = len(entries)
|
||||
successes = sum(1 for e in entries if e.result == "success")
|
||||
|
||||
# Agrégations
|
||||
by_user: Dict[str, Dict[str, Any]] = {}
|
||||
by_result: Dict[str, int] = {}
|
||||
by_action_type: Dict[str, int] = {}
|
||||
by_workflow: Dict[str, int] = {}
|
||||
by_execution_mode: Dict[str, int] = {}
|
||||
|
||||
for entry in entries:
|
||||
# Par utilisateur
|
||||
uid = entry.user_id or "inconnu"
|
||||
if uid not in by_user:
|
||||
by_user[uid] = {
|
||||
"user_name": entry.user_name,
|
||||
"total": 0,
|
||||
"success": 0,
|
||||
}
|
||||
by_user[uid]["total"] += 1
|
||||
if entry.result == "success":
|
||||
by_user[uid]["success"] += 1
|
||||
|
||||
# Par résultat
|
||||
r = entry.result or "inconnu"
|
||||
by_result[r] = by_result.get(r, 0) + 1
|
||||
|
||||
# Par type d'action
|
||||
at = entry.action_type or "inconnu"
|
||||
by_action_type[at] = by_action_type.get(at, 0) + 1
|
||||
|
||||
# Par workflow
|
||||
wf = entry.workflow_id or "inconnu"
|
||||
by_workflow[wf] = by_workflow.get(wf, 0) + 1
|
||||
|
||||
# Par mode d'exécution
|
||||
em = entry.execution_mode or "inconnu"
|
||||
by_execution_mode[em] = by_execution_mode.get(em, 0) + 1
|
||||
|
||||
# Calculer le taux de succès par utilisateur
|
||||
for uid, stats in by_user.items():
|
||||
stats["success_rate"] = round(
|
||||
stats["success"] / stats["total"], 3
|
||||
) if stats["total"] > 0 else 0.0
|
||||
|
||||
return {
|
||||
"date": d.isoformat(),
|
||||
"total_actions": total,
|
||||
"success_rate": round(successes / total, 3) if total > 0 else 0.0,
|
||||
"by_user": by_user,
|
||||
"by_result": by_result,
|
||||
"by_action_type": by_action_type,
|
||||
"by_workflow": by_workflow,
|
||||
"by_execution_mode": by_execution_mode,
|
||||
}
|
||||
|
||||
def export_csv(
|
||||
self,
|
||||
date_from: str = "",
|
||||
date_to: str = "",
|
||||
user_id: str = "",
|
||||
session_id: str = "",
|
||||
) -> str:
|
||||
"""Exporter les entrées d'audit en CSV.
|
||||
|
||||
Retourne une chaîne CSV complète (avec en-tête).
|
||||
Filtres optionnels par date, utilisateur, session.
|
||||
"""
|
||||
# Récupérer les entrées avec les mêmes filtres que query()
|
||||
entries = self.query(
|
||||
date_from=date_from,
|
||||
date_to=date_to,
|
||||
user_id=user_id,
|
||||
session_id=session_id,
|
||||
limit=100000, # Pas de pagination pour l'export
|
||||
)
|
||||
|
||||
if not entries:
|
||||
return ""
|
||||
|
||||
# En-têtes CSV — même ordre que le dataclass
|
||||
fieldnames = [f.name for f in fields(AuditEntry)]
|
||||
|
||||
output = io.StringIO()
|
||||
writer = csv.DictWriter(
|
||||
output,
|
||||
fieldnames=fieldnames,
|
||||
extrasaction="ignore",
|
||||
quoting=csv.QUOTE_MINIMAL,
|
||||
)
|
||||
writer.writeheader()
|
||||
for entry_dict in entries:
|
||||
writer.writerow(entry_dict)
|
||||
|
||||
return output.getvalue()
|
||||
622
agent_v0/server_v1/chat_interface.py
Normal file
622
agent_v0/server_v1/chat_interface.py
Normal file
@@ -0,0 +1,622 @@
|
||||
"""
|
||||
ChatInterface — Interface de chat conversationnelle pour Léa.
|
||||
|
||||
Permet au TIM (Technicien Information Médicale) de parler à Léa en langage
|
||||
naturel :
|
||||
- "Ouvre le Bloc-notes et écris bonjour"
|
||||
- Léa comprend (TaskPlanner) et propose un plan
|
||||
- Le TIM confirme (ou refuse)
|
||||
- Léa exécute (replay) et envoie des updates de progression
|
||||
- Historique conversationnel conservé par session
|
||||
|
||||
C'est une couche LÉGÈRE au-dessus du TaskPlanner. Toute la logique de
|
||||
compréhension reste dans TaskPlanner — ChatInterface gère uniquement
|
||||
l'état conversationnel, la confirmation et le suivi d'exécution.
|
||||
|
||||
États de la session :
|
||||
idle → en attente d'un message
|
||||
planning → TaskPlanner.understand() en cours
|
||||
awaiting_confirmation → plan prêt, attend la confirmation du TIM
|
||||
executing → replay en cours
|
||||
done → dernier tour terminé (retour à idle au prochain message)
|
||||
error → erreur interne (instruction non comprise, exception…)
|
||||
|
||||
Langue : 100% français (c'est l'interface utilisateur).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# =============================================================================
|
||||
# États
|
||||
# =============================================================================
|
||||
|
||||
STATE_IDLE = "idle"
|
||||
STATE_PLANNING = "planning"
|
||||
STATE_AWAITING_CONFIRMATION = "awaiting_confirmation"
|
||||
STATE_EXECUTING = "executing"
|
||||
STATE_DONE = "done"
|
||||
STATE_ERROR = "error"
|
||||
|
||||
VALID_STATES = {
|
||||
STATE_IDLE,
|
||||
STATE_PLANNING,
|
||||
STATE_AWAITING_CONFIRMATION,
|
||||
STATE_EXECUTING,
|
||||
STATE_DONE,
|
||||
STATE_ERROR,
|
||||
}
|
||||
|
||||
# Rôles de messages
|
||||
ROLE_USER = "user"
|
||||
ROLE_LEA = "lea"
|
||||
ROLE_SYSTEM = "system"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Message
|
||||
# =============================================================================
|
||||
|
||||
@dataclass
|
||||
class ChatMessage:
|
||||
"""Un message dans l'historique d'une conversation."""
|
||||
role: str # "user", "lea", "system"
|
||||
content: str # Texte du message
|
||||
timestamp: float = field(default_factory=time.time)
|
||||
# Données contextuelles optionnelles (plan, résultat, progression…)
|
||||
meta: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"role": self.role,
|
||||
"content": self.content,
|
||||
"timestamp": self.timestamp,
|
||||
"meta": self.meta,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ChatSession
|
||||
# =============================================================================
|
||||
|
||||
class ChatSession:
|
||||
"""Une conversation entre un utilisateur et Léa.
|
||||
|
||||
Maintient l'historique, l'état courant, et le dernier plan en attente
|
||||
de confirmation. Thread-safe (un lock par session).
|
||||
|
||||
Dépendances injectées (pour tester facilement) :
|
||||
- task_planner : instance de TaskPlanner (ou mock)
|
||||
- workflows_provider : callable () -> List[Dict] (liste des workflows)
|
||||
- replay_callback : callable (session_id, machine_id, params) -> replay_id
|
||||
- status_provider : callable (replay_id) -> Dict (pour suivre l'exécution)
|
||||
|
||||
Toutes ces dépendances sont optionnelles : ChatSession dégrade
|
||||
gracieusement (fallback) si gemma4 / replay indisponibles.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
session_id: str = "",
|
||||
task_planner: Any = None,
|
||||
workflows_provider: Optional[Callable[[], List[Dict[str, Any]]]] = None,
|
||||
replay_callback: Optional[Callable[..., str]] = None,
|
||||
status_provider: Optional[Callable[[str], Dict[str, Any]]] = None,
|
||||
machine_id: str = "default",
|
||||
):
|
||||
self.session_id = session_id or f"chat_{uuid.uuid4().hex[:12]}"
|
||||
self.machine_id = machine_id
|
||||
self.created_at = time.time()
|
||||
self.updated_at = self.created_at
|
||||
|
||||
self._task_planner = task_planner
|
||||
self._workflows_provider = workflows_provider
|
||||
self._replay_callback = replay_callback
|
||||
self._status_provider = status_provider
|
||||
|
||||
self._state: str = STATE_IDLE
|
||||
self._messages: List[ChatMessage] = []
|
||||
self._pending_plan: Any = None # TaskPlan en attente de confirmation
|
||||
self._active_replay_id: str = "" # Replay courant (si executing)
|
||||
self._last_progress: Dict[str, Any] = {}
|
||||
|
||||
self._lock = threading.RLock()
|
||||
|
||||
# Message d'accueil
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"Bonjour ! Je suis Léa. Dites-moi ce que vous voulez que je fasse.",
|
||||
meta={"welcome": True},
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Accesseurs
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
@property
|
||||
def state(self) -> str:
|
||||
with self._lock:
|
||||
return self._state
|
||||
|
||||
def get_history(self) -> List[Dict[str, Any]]:
|
||||
"""Retourne l'historique complet des messages (sérialisé)."""
|
||||
with self._lock:
|
||||
return [m.to_dict() for m in self._messages]
|
||||
|
||||
def get_snapshot(self) -> Dict[str, Any]:
|
||||
"""État complet pour l'UI (historique + état + progression)."""
|
||||
with self._lock:
|
||||
return {
|
||||
"session_id": self.session_id,
|
||||
"state": self._state,
|
||||
"machine_id": self.machine_id,
|
||||
"created_at": self.created_at,
|
||||
"updated_at": self.updated_at,
|
||||
"messages": [m.to_dict() for m in self._messages],
|
||||
"pending_plan": (
|
||||
self._pending_plan.to_dict()
|
||||
if self._pending_plan is not None
|
||||
else None
|
||||
),
|
||||
"active_replay_id": self._active_replay_id,
|
||||
"progress": dict(self._last_progress),
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# API publique
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
def send_message(self, text: str) -> Dict[str, Any]:
|
||||
"""Envoyer un message utilisateur.
|
||||
|
||||
Trois cas possibles selon l'état courant :
|
||||
1. awaiting_confirmation → c'est une réponse OUI/NON
|
||||
2. executing → on rafraîchit la progression
|
||||
3. idle/done/error → nouvelle instruction, on appelle TaskPlanner
|
||||
"""
|
||||
text = (text or "").strip()
|
||||
if not text:
|
||||
return {
|
||||
"ok": False,
|
||||
"error": "Message vide",
|
||||
"state": self._state,
|
||||
}
|
||||
|
||||
with self._lock:
|
||||
# Cas 1 : on attend une confirmation
|
||||
if self._state == STATE_AWAITING_CONFIRMATION:
|
||||
return self._handle_confirmation_reply(text)
|
||||
|
||||
# Cas 2 : en pleine exécution → message ajouté mais pas d'action
|
||||
if self._state == STATE_EXECUTING:
|
||||
self._append(ROLE_USER, text)
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"Je suis en train d'exécuter le workflow. Un instant…",
|
||||
)
|
||||
return {"ok": True, "state": self._state}
|
||||
|
||||
# Cas 3 : nouvelle instruction
|
||||
self._append(ROLE_USER, text)
|
||||
self._set_state(STATE_PLANNING)
|
||||
|
||||
# Appel TaskPlanner hors du lock (peut être lent : gemma4)
|
||||
return self._plan_and_reply(text)
|
||||
|
||||
def confirm(self, confirmed: bool = True) -> Dict[str, Any]:
|
||||
"""Confirmer (ou refuser) l'exécution du plan en attente."""
|
||||
with self._lock:
|
||||
if self._state != STATE_AWAITING_CONFIRMATION:
|
||||
return {
|
||||
"ok": False,
|
||||
"error": f"Pas de plan en attente (état={self._state})",
|
||||
"state": self._state,
|
||||
}
|
||||
|
||||
if not confirmed:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"D'accord, j'annule. Dites-moi autre chose quand vous voulez.",
|
||||
)
|
||||
self._pending_plan = None
|
||||
self._set_state(STATE_IDLE)
|
||||
return {"ok": True, "state": self._state, "confirmed": False}
|
||||
|
||||
plan = self._pending_plan
|
||||
if plan is None:
|
||||
self._set_state(STATE_IDLE)
|
||||
return {
|
||||
"ok": False,
|
||||
"error": "Aucun plan à confirmer",
|
||||
"state": self._state,
|
||||
}
|
||||
|
||||
self._set_state(STATE_EXECUTING)
|
||||
|
||||
# Exécution hors du lock
|
||||
return self._execute_plan(plan)
|
||||
|
||||
def refresh_progress(self) -> Dict[str, Any]:
|
||||
"""Rafraîchir la progression du replay en cours.
|
||||
|
||||
Appelé par le client (polling) pour obtenir les updates d'exécution.
|
||||
Si le replay est terminé, passe l'état à done.
|
||||
"""
|
||||
with self._lock:
|
||||
if self._state != STATE_EXECUTING or not self._active_replay_id:
|
||||
return {"ok": True, "state": self._state, "progress": self._last_progress}
|
||||
|
||||
replay_id = self._active_replay_id
|
||||
provider = self._status_provider
|
||||
|
||||
if provider is None:
|
||||
return {"ok": True, "state": self._state, "progress": {}}
|
||||
|
||||
try:
|
||||
status = provider(replay_id) or {}
|
||||
except Exception as e:
|
||||
logger.warning(f"ChatSession: status_provider erreur: {e}")
|
||||
status = {}
|
||||
|
||||
with self._lock:
|
||||
self._last_progress = status
|
||||
self.updated_at = time.time()
|
||||
|
||||
# Détection de fin
|
||||
replay_status = str(status.get("status", "")).lower()
|
||||
completed = status.get("completed_actions", 0)
|
||||
total = status.get("total_actions", 0)
|
||||
|
||||
if replay_status in ("done", "completed", "finished", "success"):
|
||||
summary = (
|
||||
f"Workflow terminé ! {completed}/{total} actions réussies."
|
||||
if total
|
||||
else "Workflow terminé."
|
||||
)
|
||||
self._append(ROLE_LEA, summary, meta={"progress": dict(status)})
|
||||
self._set_state(STATE_DONE)
|
||||
self._active_replay_id = ""
|
||||
elif replay_status in ("failed", "error", "aborted"):
|
||||
err = status.get("error") or status.get("message") or "Erreur inconnue"
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
f"Le workflow a échoué : {err}",
|
||||
meta={"progress": dict(status)},
|
||||
)
|
||||
self._set_state(STATE_ERROR)
|
||||
self._active_replay_id = ""
|
||||
elif replay_status == "paused_need_help":
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"Je suis bloquée sur une action, j'ai besoin d'aide…",
|
||||
meta={"progress": dict(status)},
|
||||
)
|
||||
# on reste en executing pour que le TIM puisse reprendre
|
||||
# else : toujours en cours, pas de message
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"state": self._state,
|
||||
"progress": dict(self._last_progress),
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Logique interne
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
def _plan_and_reply(self, instruction: str) -> Dict[str, Any]:
|
||||
"""Appeler TaskPlanner.understand() et produire une réponse."""
|
||||
plan = None
|
||||
error_msg = ""
|
||||
|
||||
if self._task_planner is None:
|
||||
error_msg = "Planificateur indisponible"
|
||||
else:
|
||||
try:
|
||||
workflows = []
|
||||
if self._workflows_provider is not None:
|
||||
try:
|
||||
workflows = self._workflows_provider() or []
|
||||
except Exception as e:
|
||||
logger.warning(f"ChatSession: workflows_provider erreur: {e}")
|
||||
workflows = []
|
||||
|
||||
plan = self._task_planner.understand(
|
||||
instruction=instruction,
|
||||
available_workflows=workflows,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"ChatSession: TaskPlanner.understand erreur: {e}")
|
||||
error_msg = f"Erreur de compréhension : {e}"
|
||||
|
||||
# Fallback gracieux si pas de plan / gemma4 indisponible
|
||||
if plan is None:
|
||||
with self._lock:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
f"Désolée, je n'arrive pas à comprendre pour l'instant. {error_msg}".strip(),
|
||||
meta={"error": error_msg},
|
||||
)
|
||||
self._set_state(STATE_ERROR)
|
||||
return {
|
||||
"ok": False,
|
||||
"state": self._state,
|
||||
"error": error_msg,
|
||||
}
|
||||
|
||||
# Plan non compris
|
||||
if not plan.understood:
|
||||
reason = plan.error or "je n'ai pas compris votre demande"
|
||||
with self._lock:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
(
|
||||
f"Désolée, {reason}. "
|
||||
"Pouvez-vous reformuler ? Je connais les workflows que vous m'avez appris."
|
||||
),
|
||||
meta={"plan": plan.to_dict()},
|
||||
)
|
||||
self._set_state(STATE_ERROR)
|
||||
return {
|
||||
"ok": False,
|
||||
"state": self._state,
|
||||
"plan": plan.to_dict(),
|
||||
"error": reason,
|
||||
}
|
||||
|
||||
# Plan compris → formuler la proposition
|
||||
proposal = self._format_proposal(plan)
|
||||
|
||||
with self._lock:
|
||||
self._pending_plan = plan
|
||||
self._append(ROLE_LEA, proposal, meta={"plan": plan.to_dict()})
|
||||
self._set_state(STATE_AWAITING_CONFIRMATION)
|
||||
return {
|
||||
"ok": True,
|
||||
"state": self._state,
|
||||
"plan": plan.to_dict(),
|
||||
"message": proposal,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _format_proposal(plan: Any) -> str:
|
||||
"""Formuler une proposition en français à partir d'un TaskPlan."""
|
||||
lines = []
|
||||
lines.append(f"J'ai compris : « {plan.instruction} ».")
|
||||
|
||||
if plan.workflow_name:
|
||||
conf_pct = int(round((plan.match_confidence or 0.0) * 100))
|
||||
lines.append(
|
||||
f"Je vais utiliser le workflow « {plan.workflow_name} »"
|
||||
f" (confiance {conf_pct}%)."
|
||||
)
|
||||
elif plan.mode == "free" and plan.steps:
|
||||
lines.append(
|
||||
f"Je n'ai pas de workflow enregistré pour ça, "
|
||||
f"mais j'ai planifié {len(plan.steps)} étape(s) :"
|
||||
)
|
||||
for i, step in enumerate(plan.steps[:5], 1):
|
||||
desc = step.get("description", "") if isinstance(step, dict) else str(step)
|
||||
lines.append(f" {i}. {desc}")
|
||||
if len(plan.steps) > 5:
|
||||
lines.append(f" … et {len(plan.steps) - 5} autre(s) étape(s).")
|
||||
else:
|
||||
lines.append("Je n'ai pas de plan d'action clair pour cette demande.")
|
||||
|
||||
if plan.parameters:
|
||||
params_str = ", ".join(f"{k}={v}" for k, v in plan.parameters.items())
|
||||
lines.append(f"Paramètres détectés : {params_str}.")
|
||||
|
||||
if plan.is_loop:
|
||||
src = plan.loop_source or "éléments à traiter"
|
||||
lines.append(f"Traitement en boucle sur : {src}.")
|
||||
|
||||
lines.append("")
|
||||
lines.append("Est-ce que je peux y aller ? (oui / non)")
|
||||
return "\n".join(lines)
|
||||
|
||||
def _handle_confirmation_reply(self, text: str) -> Dict[str, Any]:
|
||||
"""Interpréter un message utilisateur comme OUI/NON."""
|
||||
self._append(ROLE_USER, text)
|
||||
yes_tokens = {"oui", "yes", "ok", "y", "go", "vas-y", "allez", "allez-y", "confirme", "confirmer", "continue"}
|
||||
no_tokens = {"non", "no", "annule", "annuler", "stop", "arrête", "arrete", "abandonne", "abandonner"}
|
||||
|
||||
t = text.strip().lower().rstrip("!.?")
|
||||
|
||||
if t in yes_tokens or any(t.startswith(tok + " ") for tok in yes_tokens):
|
||||
# Déverrouiller : sortir du lock avant d'exécuter (confirm re-prend le lock)
|
||||
pass
|
||||
elif t in no_tokens or any(t.startswith(tok + " ") for tok in no_tokens):
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"D'accord, j'annule. Dites-moi autre chose quand vous voulez.",
|
||||
)
|
||||
self._pending_plan = None
|
||||
self._set_state(STATE_IDLE)
|
||||
return {"ok": True, "state": self._state, "confirmed": False}
|
||||
else:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"Je n'ai pas compris votre réponse. Répondez « oui » pour lancer ou « non » pour annuler.",
|
||||
)
|
||||
return {"ok": True, "state": self._state, "needs_clarification": True}
|
||||
|
||||
# Libérer le lock pour confirm() qui le re-prendra
|
||||
plan = self._pending_plan
|
||||
self._pending_plan = None
|
||||
self._set_state(STATE_EXECUTING)
|
||||
# Exécution hors du lock (sortie du with bloc appelant)
|
||||
# Note : _handle_confirmation_reply est appelé sous lock via send_message
|
||||
# On ne peut pas appeler _execute_plan ici sans risque de double-lock.
|
||||
# On relâche le lock via une astuce : on retourne un marqueur et send_message
|
||||
# orchestrera. Ici on appelle directement _execute_plan qui utilise RLock,
|
||||
# donc c'est safe (re-entrant).
|
||||
return self._execute_plan(plan)
|
||||
|
||||
def _execute_plan(self, plan: Any) -> Dict[str, Any]:
|
||||
"""Lancer le replay correspondant au plan."""
|
||||
if plan is None:
|
||||
with self._lock:
|
||||
self._append(ROLE_LEA, "Rien à exécuter.", meta={})
|
||||
self._set_state(STATE_IDLE)
|
||||
return {"ok": False, "state": self._state, "error": "Aucun plan"}
|
||||
|
||||
if self._replay_callback is None:
|
||||
with self._lock:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"Je ne peux pas exécuter : aucun moteur d'exécution n'est configuré.",
|
||||
)
|
||||
self._set_state(STATE_ERROR)
|
||||
return {
|
||||
"ok": False,
|
||||
"state": self._state,
|
||||
"error": "replay_callback non configuré",
|
||||
}
|
||||
|
||||
# Annoncer le démarrage
|
||||
with self._lock:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"C'est parti ! Je lance le workflow…",
|
||||
meta={"plan": plan.to_dict()},
|
||||
)
|
||||
|
||||
# Appeler le callback
|
||||
try:
|
||||
if plan.workflow_match:
|
||||
replay_id = self._replay_callback(
|
||||
session_id=plan.workflow_match,
|
||||
machine_id=self.machine_id,
|
||||
params=plan.parameters,
|
||||
)
|
||||
else:
|
||||
# Mode libre : pas encore branché côté chat (on refuse proprement)
|
||||
replay_id = ""
|
||||
raise RuntimeError(
|
||||
"Mode libre non supporté pour l'instant — "
|
||||
"entraînez un workflow pour cette tâche"
|
||||
)
|
||||
except Exception as e:
|
||||
with self._lock:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
f"Je n'ai pas pu lancer le workflow : {e}",
|
||||
meta={"error": str(e)},
|
||||
)
|
||||
self._set_state(STATE_ERROR)
|
||||
return {"ok": False, "state": self._state, "error": str(e)}
|
||||
|
||||
with self._lock:
|
||||
self._active_replay_id = replay_id or ""
|
||||
return {
|
||||
"ok": True,
|
||||
"state": self._state,
|
||||
"replay_id": self._active_replay_id,
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
def _append(self, role: str, content: str, meta: Optional[Dict[str, Any]] = None) -> None:
|
||||
"""Ajouter un message à l'historique (doit être appelé sous lock)."""
|
||||
msg = ChatMessage(role=role, content=content, meta=meta or {})
|
||||
self._messages.append(msg)
|
||||
self.updated_at = msg.timestamp
|
||||
|
||||
def _set_state(self, new_state: str) -> None:
|
||||
"""Changer d'état (doit être appelé sous lock)."""
|
||||
if new_state not in VALID_STATES:
|
||||
raise ValueError(f"État invalide : {new_state}")
|
||||
old = self._state
|
||||
self._state = new_state
|
||||
self.updated_at = time.time()
|
||||
if old != new_state:
|
||||
logger.debug(
|
||||
f"ChatSession {self.session_id}: {old} -> {new_state}"
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ChatManager — registre en mémoire des sessions
|
||||
# =============================================================================
|
||||
|
||||
class ChatManager:
|
||||
"""Registre en mémoire des sessions de chat.
|
||||
|
||||
Thread-safe. Utilisé par l'API FastAPI pour gérer plusieurs
|
||||
conversations simultanées.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
task_planner: Any = None,
|
||||
workflows_provider: Optional[Callable[[], List[Dict[str, Any]]]] = None,
|
||||
replay_callback: Optional[Callable[..., str]] = None,
|
||||
status_provider: Optional[Callable[[str], Dict[str, Any]]] = None,
|
||||
):
|
||||
self._task_planner = task_planner
|
||||
self._workflows_provider = workflows_provider
|
||||
self._replay_callback = replay_callback
|
||||
self._status_provider = status_provider
|
||||
self._sessions: Dict[str, ChatSession] = {}
|
||||
self._lock = threading.RLock()
|
||||
|
||||
def create_session(self, machine_id: str = "default") -> ChatSession:
|
||||
"""Créer une nouvelle session de chat."""
|
||||
session = ChatSession(
|
||||
task_planner=self._task_planner,
|
||||
workflows_provider=self._workflows_provider,
|
||||
replay_callback=self._replay_callback,
|
||||
status_provider=self._status_provider,
|
||||
machine_id=machine_id,
|
||||
)
|
||||
with self._lock:
|
||||
self._sessions[session.session_id] = session
|
||||
logger.info(f"ChatManager: session créée {session.session_id}")
|
||||
return session
|
||||
|
||||
def get_session(self, session_id: str) -> Optional[ChatSession]:
|
||||
with self._lock:
|
||||
return self._sessions.get(session_id)
|
||||
|
||||
def list_sessions(self) -> List[Dict[str, Any]]:
|
||||
with self._lock:
|
||||
return [
|
||||
{
|
||||
"session_id": s.session_id,
|
||||
"state": s.state,
|
||||
"machine_id": s.machine_id,
|
||||
"created_at": s.created_at,
|
||||
"updated_at": s.updated_at,
|
||||
"message_count": len(s.get_history()),
|
||||
}
|
||||
for s in self._sessions.values()
|
||||
]
|
||||
|
||||
def delete_session(self, session_id: str) -> bool:
|
||||
with self._lock:
|
||||
return self._sessions.pop(session_id, None) is not None
|
||||
|
||||
def cleanup_old(self, max_age_s: float = 3600 * 24) -> int:
|
||||
"""Supprimer les sessions inactives depuis max_age_s secondes."""
|
||||
now = time.time()
|
||||
removed = 0
|
||||
with self._lock:
|
||||
to_delete = [
|
||||
sid for sid, s in self._sessions.items()
|
||||
if (now - s.updated_at) > max_age_s
|
||||
]
|
||||
for sid in to_delete:
|
||||
del self._sessions[sid]
|
||||
removed += 1
|
||||
return removed
|
||||
1020
agent_v0/server_v1/domain_context.py
Normal file
1020
agent_v0/server_v1/domain_context.py
Normal file
File diff suppressed because it is too large
Load Diff
373
agent_v0/server_v1/execution_plan_runner.py
Normal file
373
agent_v0/server_v1/execution_plan_runner.py
Normal file
@@ -0,0 +1,373 @@
|
||||
# agent_v0/server_v1/execution_plan_runner.py
|
||||
"""
|
||||
ExecutionPlanRunner — Adaptateur ExecutionPlan → actions replay.
|
||||
|
||||
Pièce d'intégration du pipeline V4 :
|
||||
RawTrace → IRBuilder → WorkflowIR → ExecutionCompiler → ExecutionPlan → Runtime
|
||||
|
||||
Ce module convertit un `ExecutionPlan` (plan pré-compilé, déterministe) en
|
||||
liste d'actions au format attendu par l'executor replay actuel (clé x_pct,
|
||||
y_pct, target_spec, etc.), puis les injecte dans `_replay_queues`.
|
||||
|
||||
L'ancien chemin `build_replay_from_raw_events()` dans stream_processor.py
|
||||
reste inchangé — les deux chemins coexistent pendant la transition.
|
||||
|
||||
Format d'action produit (compatible executor existant) :
|
||||
{
|
||||
"action_id": "act_...",
|
||||
"type": "click",
|
||||
"x_pct": 0.5,
|
||||
"y_pct": 0.3,
|
||||
"visual_mode": True,
|
||||
"target_spec": {
|
||||
"by_text": "...",
|
||||
"window_title": "...",
|
||||
"vlm_description": "...",
|
||||
"anchor_image_base64": "...",
|
||||
},
|
||||
"expected_window_title": "...",
|
||||
}
|
||||
|
||||
Auteur: Dom, Alice - Avril 2026
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import threading
|
||||
import uuid
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from core.workflow.execution_plan import (
|
||||
ExecutionNode,
|
||||
ExecutionPlan,
|
||||
ResolutionStrategy,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Substitution de variables
|
||||
# =========================================================================
|
||||
# Le WorkflowIR utilise la syntaxe `{var}` dans les champs texte.
|
||||
# Ici on supporte les deux : `{var}` (IR natif) et `${var}` (replay legacy).
|
||||
_VARIABLE_RE_CURLY = re.compile(r"\{(\w+)\}")
|
||||
_VARIABLE_RE_DOLLAR = re.compile(r"\$\{(\w+)\}")
|
||||
|
||||
|
||||
def substitute_variables(text: str, variables: Dict[str, Any]) -> str:
|
||||
"""Remplacer `{var}` et `${var}` par leurs valeurs.
|
||||
|
||||
Priorité : variables fournies > placeholder brut (inchangé si inconnu).
|
||||
"""
|
||||
if not text or not variables:
|
||||
return text
|
||||
|
||||
def replacer(match: "re.Match[str]") -> str:
|
||||
var_name = match.group(1)
|
||||
if var_name in variables:
|
||||
return str(variables[var_name])
|
||||
return match.group(0)
|
||||
|
||||
text = _VARIABLE_RE_DOLLAR.sub(replacer, text)
|
||||
text = _VARIABLE_RE_CURLY.sub(replacer, text)
|
||||
return text
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Conversion ExecutionNode → action replay
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def _strategy_to_target_spec(
|
||||
strategy: Optional[ResolutionStrategy],
|
||||
fallbacks: Optional[List[ResolutionStrategy]] = None,
|
||||
intent: str = "",
|
||||
) -> Dict[str, Any]:
|
||||
"""Construire un `target_spec` depuis les stratégies de résolution.
|
||||
|
||||
Fusionne la primaire et les fallbacks pour donner un maximum d'indices
|
||||
au resolve_engine :
|
||||
- OCR → by_text
|
||||
- template → anchor_image_base64 (depuis anchor_b64)
|
||||
- VLM → vlm_description
|
||||
|
||||
Règle V4 : la stratégie primaire dicte la méthode préférée.
|
||||
Le champ `resolve_order` liste les méthodes dans l'ordre à essayer.
|
||||
Le resolve_engine honore cet ordre au lieu de sa cascade par défaut.
|
||||
|
||||
resolve_order est la clé du "zéro VLM au runtime" :
|
||||
- ["ocr", "template", "vlm"] → V4 typique (OCR rapide)
|
||||
- ["template", "ocr", "vlm"] → apprentissage : template marche mieux
|
||||
- ["vlm"] → éléments sans texte (icônes)
|
||||
"""
|
||||
spec: Dict[str, Any] = {}
|
||||
|
||||
all_strategies: List[ResolutionStrategy] = []
|
||||
if strategy is not None:
|
||||
all_strategies.append(strategy)
|
||||
if fallbacks:
|
||||
all_strategies.extend(fallbacks)
|
||||
|
||||
by_text_candidate = ""
|
||||
anchor_candidate = ""
|
||||
vlm_candidate = ""
|
||||
uia_data: Dict[str, Any] = {}
|
||||
dom_data: Dict[str, Any] = {}
|
||||
resolve_order: List[str] = []
|
||||
seen_methods: set = set()
|
||||
|
||||
for strat in all_strategies:
|
||||
if not strat:
|
||||
continue
|
||||
if strat.method == "ocr" and strat.target_text and not by_text_candidate:
|
||||
by_text_candidate = strat.target_text
|
||||
elif strat.method == "template":
|
||||
if strat.anchor_b64 and not anchor_candidate:
|
||||
anchor_candidate = strat.anchor_b64
|
||||
if strat.target_text and not by_text_candidate:
|
||||
by_text_candidate = strat.target_text
|
||||
elif strat.method == "vlm" and strat.vlm_description and not vlm_candidate:
|
||||
vlm_candidate = strat.vlm_description
|
||||
elif strat.method == "uia" and strat.uia_name and not uia_data:
|
||||
uia_data = {
|
||||
"name": strat.uia_name,
|
||||
"control_type": strat.uia_control_type,
|
||||
"automation_id": strat.uia_automation_id,
|
||||
"parent_path": strat.uia_parent_path,
|
||||
}
|
||||
elif strat.method == "dom" and strat.dom_selector and not dom_data:
|
||||
dom_data = {
|
||||
"selector": strat.dom_selector,
|
||||
"xpath": strat.dom_xpath,
|
||||
"url_pattern": strat.dom_url_pattern,
|
||||
}
|
||||
|
||||
# Construire l'ordre des méthodes (dans l'ordre primaire → fallbacks)
|
||||
if strat.method and strat.method not in seen_methods:
|
||||
resolve_order.append(strat.method)
|
||||
seen_methods.add(strat.method)
|
||||
|
||||
if by_text_candidate:
|
||||
spec["by_text"] = by_text_candidate
|
||||
if anchor_candidate:
|
||||
spec["anchor_image_base64"] = anchor_candidate
|
||||
if vlm_candidate:
|
||||
spec["vlm_description"] = vlm_candidate
|
||||
elif intent and "vlm_description" not in spec:
|
||||
# L'intention métier devient le prompt VLM de dernier recours
|
||||
spec["vlm_description"] = intent
|
||||
|
||||
# Données UIA — consommées par l'agent Windows via lea_uia.exe
|
||||
if uia_data:
|
||||
spec["uia_target"] = uia_data
|
||||
|
||||
# Données DOM — consommées par l'agent Windows via CDP (futur)
|
||||
if dom_data:
|
||||
spec["dom_target"] = dom_data
|
||||
|
||||
# Ordre de résolution pré-compilé — c'est LA pièce centrale du V4
|
||||
if resolve_order:
|
||||
spec["resolve_order"] = resolve_order
|
||||
|
||||
return spec
|
||||
|
||||
|
||||
def execution_node_to_action(
|
||||
node: ExecutionNode,
|
||||
variables: Optional[Dict[str, Any]] = None,
|
||||
id_prefix: str = "act_plan",
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Convertir un `ExecutionNode` en action replay.
|
||||
|
||||
Retourne `None` si le nœud n'est pas exécutable (type inconnu).
|
||||
|
||||
Args:
|
||||
node: Le nœud à convertir.
|
||||
variables: Dictionnaire de variables pour substituer les {var}.
|
||||
id_prefix: Préfixe pour l'action_id générée.
|
||||
"""
|
||||
variables = variables or {}
|
||||
|
||||
action: Dict[str, Any] = {
|
||||
"action_id": f"{id_prefix}_{uuid.uuid4().hex[:8]}",
|
||||
"plan_node_id": node.node_id,
|
||||
}
|
||||
|
||||
if node.intent:
|
||||
action["intention"] = node.intent
|
||||
if node.step_id:
|
||||
action["plan_step_id"] = node.step_id
|
||||
if node.is_optional:
|
||||
action["is_optional"] = True
|
||||
|
||||
# Métadonnées d'exécution utiles au runtime
|
||||
if node.timeout_ms:
|
||||
action["timeout_ms"] = node.timeout_ms
|
||||
if node.max_retries:
|
||||
action["max_retries"] = node.max_retries
|
||||
if node.recovery_action:
|
||||
action["recovery_action"] = node.recovery_action
|
||||
if node.success_condition:
|
||||
action["success_condition"] = node.success_condition.to_dict()
|
||||
|
||||
action_type = node.action_type
|
||||
|
||||
if action_type == "click":
|
||||
action["type"] = "click"
|
||||
|
||||
strategy = node.strategy_primary
|
||||
fallbacks = node.strategy_fallbacks or []
|
||||
|
||||
# ── Déduction des coordonnées depuis la stratégie primaire ──
|
||||
# - OCR : pas de coordonnées (le runtime trouve via OCR)
|
||||
# - template : l'anchor sera utilisé au runtime
|
||||
# - VLM : la description sera utilisée au runtime
|
||||
# Dans tous les cas le resolve_engine retrouve les pixels au replay.
|
||||
# On expose néanmoins un centre (0.5, 0.5) neutre pour rester
|
||||
# compatible avec les validations de queue existantes.
|
||||
action["x_pct"] = 0.5
|
||||
action["y_pct"] = 0.5
|
||||
action["visual_mode"] = True
|
||||
|
||||
target_spec = _strategy_to_target_spec(
|
||||
strategy=strategy,
|
||||
fallbacks=fallbacks,
|
||||
intent=node.intent,
|
||||
)
|
||||
|
||||
# Titre fenêtre attendu AVANT (pré-vérif stricte)
|
||||
# Si absent, aucune pré-vérif → l'action s'exécute quel que soit l'écran
|
||||
if node.expected_window_before:
|
||||
action["expected_window_before"] = node.expected_window_before
|
||||
target_spec["window_title"] = node.expected_window_before
|
||||
|
||||
# Titre fenêtre attendu APRÈS (post-vérif stricte)
|
||||
# C'est la garantie de passage à l'action suivante
|
||||
if node.success_condition and node.success_condition.expected_title:
|
||||
action["expected_window_title"] = node.success_condition.expected_title
|
||||
action["success_strict"] = (
|
||||
node.success_condition.method == "title_match"
|
||||
)
|
||||
if "window_title" not in target_spec:
|
||||
target_spec["window_title"] = node.success_condition.expected_title
|
||||
|
||||
if target_spec:
|
||||
action["target_spec"] = target_spec
|
||||
|
||||
elif action_type == "type":
|
||||
action["type"] = "type"
|
||||
text = node.text or ""
|
||||
# Substituer les variables avant d'envoyer (ex: {patient} → "DUPONT")
|
||||
action["text"] = substitute_variables(text, variables)
|
||||
if node.variable_name:
|
||||
action["variable_name"] = node.variable_name
|
||||
|
||||
elif action_type in ("key_combo", "key_press"):
|
||||
action["type"] = "key_combo"
|
||||
keys = list(node.keys or [])
|
||||
if not keys:
|
||||
return None
|
||||
action["keys"] = keys
|
||||
|
||||
elif action_type == "wait":
|
||||
action["type"] = "wait"
|
||||
duration = node.duration_ms or 1000
|
||||
action["duration_ms"] = int(duration)
|
||||
|
||||
elif action_type == "scroll":
|
||||
action["type"] = "scroll"
|
||||
# Les stratégies peuvent contenir une zone — pas exploitée ici,
|
||||
# le scroll est implicitement sur la fenêtre active.
|
||||
action["delta"] = -3
|
||||
|
||||
else:
|
||||
logger.debug("execution_node_to_action: type inconnu '%s' ignoré", action_type)
|
||||
return None
|
||||
|
||||
return action
|
||||
|
||||
|
||||
def execution_plan_to_actions(
|
||||
plan: ExecutionPlan,
|
||||
variables: Optional[Dict[str, Any]] = None,
|
||||
id_prefix: str = "act_plan",
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Convertir un `ExecutionPlan` complet en liste d'actions replay.
|
||||
|
||||
Les variables passées en argument écrasent celles du plan.
|
||||
"""
|
||||
merged_vars: Dict[str, Any] = dict(plan.variables or {})
|
||||
if variables:
|
||||
merged_vars.update(variables)
|
||||
|
||||
actions: List[Dict[str, Any]] = []
|
||||
for node in plan.nodes:
|
||||
action = execution_node_to_action(
|
||||
node=node,
|
||||
variables=merged_vars,
|
||||
id_prefix=id_prefix,
|
||||
)
|
||||
if action is not None:
|
||||
actions.append(action)
|
||||
|
||||
logger.info(
|
||||
"execution_plan_to_actions(%s) : %d nœuds → %d actions replay "
|
||||
"(vars=%d)",
|
||||
plan.plan_id, plan.total_nodes, len(actions), len(merged_vars),
|
||||
)
|
||||
return actions
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Injection dans la queue de replay
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def inject_plan_into_queue(
|
||||
plan: ExecutionPlan,
|
||||
session_id: str,
|
||||
replay_queues: Dict[str, List[Dict[str, Any]]],
|
||||
variables: Optional[Dict[str, Any]] = None,
|
||||
lock: Optional[threading.Lock] = None,
|
||||
replace: bool = True,
|
||||
id_prefix: str = "act_plan",
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Injecter un `ExecutionPlan` dans la queue de replay d'une session.
|
||||
|
||||
Args:
|
||||
plan: Le plan à exécuter.
|
||||
session_id: La session Agent V1 cible.
|
||||
replay_queues: Le dict global `_replay_queues` partagé par le serveur.
|
||||
variables: Variables à substituer dans les actions.
|
||||
lock: Verrou optionnel à acquérir avant d'écrire (threadsafe).
|
||||
replace: Si True (défaut), remplace la queue existante. Sinon, append.
|
||||
id_prefix: Préfixe pour les action_id générés.
|
||||
|
||||
Returns:
|
||||
La liste des actions injectées (après substitution).
|
||||
"""
|
||||
actions = execution_plan_to_actions(
|
||||
plan=plan, variables=variables, id_prefix=id_prefix,
|
||||
)
|
||||
|
||||
def _write() -> None:
|
||||
if replace:
|
||||
replay_queues[session_id] = list(actions)
|
||||
else:
|
||||
replay_queues[session_id].extend(actions)
|
||||
|
||||
if lock is not None:
|
||||
with lock:
|
||||
_write()
|
||||
else:
|
||||
_write()
|
||||
|
||||
logger.info(
|
||||
"inject_plan_into_queue(%s) : %d actions injectées dans la queue "
|
||||
"de la session '%s' (replace=%s)",
|
||||
plan.plan_id, len(actions), session_id, replace,
|
||||
)
|
||||
return actions
|
||||
@@ -65,7 +65,8 @@ class LiveSessionState:
|
||||
class LiveSessionManager:
|
||||
"""Gère les sessions live en mémoire côté serveur avec persistance disque."""
|
||||
|
||||
def __init__(self, persist_dir: str = "data/streaming_sessions"):
|
||||
def __init__(self, persist_dir: str = "data/streaming_sessions",
|
||||
live_sessions_dir: Optional[str] = None):
|
||||
self._sessions: Dict[str, LiveSessionState] = {}
|
||||
self._lock = threading.Lock()
|
||||
self._persist_dir = Path(persist_dir)
|
||||
@@ -74,11 +75,16 @@ class LiveSessionManager:
|
||||
self._persist_counter = 0 # Compteur pour limiter la fréquence de persistance
|
||||
self._persist_interval = 10 # Persister toutes les N modifications
|
||||
|
||||
# Dossier des sessions live (JSONL + screenshots)
|
||||
self._live_sessions_dir = Path(live_sessions_dir) if live_sessions_dir else None
|
||||
|
||||
# Charger les sessions persistées au démarrage
|
||||
self._load_persisted_sessions()
|
||||
# Reconstruire les sessions depuis les live_events.jsonl sur disque
|
||||
self._discover_sessions_from_disk()
|
||||
|
||||
def _load_persisted_sessions(self):
|
||||
"""Charger les sessions sauvegardées au démarrage."""
|
||||
"""Charger les sessions sauvegardées au démarrage (JSON state files)."""
|
||||
count = 0
|
||||
for session_file in sorted(self._persist_dir.glob("sess_*.json")):
|
||||
try:
|
||||
@@ -92,6 +98,66 @@ class LiveSessionManager:
|
||||
if count:
|
||||
logger.info(f"{count} session(s) restaurée(s) depuis {self._persist_dir}")
|
||||
|
||||
def _discover_sessions_from_disk(self):
|
||||
"""Découvrir les sessions depuis les live_events.jsonl sur disque.
|
||||
|
||||
Reconstruit les sessions manquantes du session_manager en scannant :
|
||||
- live_sessions/sess_*/live_events.jsonl (sessions racine)
|
||||
- live_sessions/{machine_id}/sess_*/live_events.jsonl (multi-machine)
|
||||
|
||||
Ne touche pas aux sessions déjà chargées depuis le JSON persist.
|
||||
"""
|
||||
if self._live_sessions_dir is None:
|
||||
return
|
||||
live_dir = self._live_sessions_dir
|
||||
if not live_dir.exists():
|
||||
return
|
||||
|
||||
discovered = 0
|
||||
for jsonl_file in sorted(live_dir.glob("**/live_events.jsonl")):
|
||||
session_dir = jsonl_file.parent
|
||||
session_id = session_dir.name
|
||||
if not session_id.startswith("sess_"):
|
||||
continue
|
||||
if session_id in self._sessions:
|
||||
continue
|
||||
|
||||
# Déduire le machine_id depuis le chemin parent
|
||||
parent_name = session_dir.parent.name
|
||||
if parent_name == live_dir.name:
|
||||
machine_id = "default"
|
||||
else:
|
||||
machine_id = parent_name
|
||||
|
||||
# Compter events et screenshots
|
||||
events_count = 0
|
||||
try:
|
||||
with open(jsonl_file, 'r', encoding='utf-8') as f:
|
||||
for _ in f:
|
||||
events_count += 1
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
shots_dir = session_dir / "shots"
|
||||
shots_count = len(list(shots_dir.glob("shot_*_full.png"))) if shots_dir.exists() else 0
|
||||
|
||||
# Créer la session en mémoire
|
||||
session = LiveSessionState(
|
||||
session_id=session_id,
|
||||
machine_id=machine_id,
|
||||
finalized=False,
|
||||
)
|
||||
# Stocker le nombre d'events/shots dans les métadonnées
|
||||
session.shot_paths = {f"shot_{i:04d}": "" for i in range(shots_count)}
|
||||
self._sessions[session_id] = session
|
||||
discovered += 1
|
||||
|
||||
if discovered:
|
||||
logger.info(
|
||||
f"{discovered} session(s) découverte(s) depuis {live_dir} "
|
||||
f"(total: {len(self._sessions)} sessions en mémoire)"
|
||||
)
|
||||
|
||||
def _persist_session(self, session_id: str):
|
||||
"""Sauvegarder une session sur disque (appelé périodiquement)."""
|
||||
session = self._sessions.get(session_id)
|
||||
@@ -102,7 +168,7 @@ class LiveSessionManager:
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
json.dump(session.to_dict(), f, ensure_ascii=False)
|
||||
except Exception as e:
|
||||
logger.debug(f"Erreur persistance session {session_id}: {e}")
|
||||
logger.warning(f"Erreur persistance session {session_id}: {e}")
|
||||
|
||||
def _maybe_persist(self, session_id: str):
|
||||
"""Persister si le compteur atteint l'intervalle."""
|
||||
@@ -180,6 +246,17 @@ class LiveSessionManager:
|
||||
if meta_val is not None:
|
||||
info[meta_key] = meta_val
|
||||
session.last_window_info = info
|
||||
# Exploiter window_capture (envoyé par l'agent avec la capture fenêtre)
|
||||
# pour enrichir last_window_info avec le titre précis de la fenêtre cliquée
|
||||
window_capture = event_data.get("window_capture")
|
||||
if window_capture and isinstance(window_capture, dict):
|
||||
wc_title = window_capture.get("title", "").strip()
|
||||
wc_app = window_capture.get("app_name", "").strip()
|
||||
if wc_title:
|
||||
session.last_window_info["title"] = wc_title
|
||||
if wc_app:
|
||||
session.last_window_info["app_name"] = wc_app
|
||||
|
||||
# Accumuler les titres/apps pour le nommage automatique
|
||||
title = session.last_window_info.get("title", "").strip()
|
||||
app_name = session.last_window_info.get("app_name", "").strip()
|
||||
@@ -221,18 +298,41 @@ class LiveSessionManager:
|
||||
import socket
|
||||
|
||||
# Construire les événements au format RawSession
|
||||
# Important : copier TOUTES les données de l'événement (pos, text, keys, button...)
|
||||
# car Event.from_dict() met tout sauf t/type/window/screenshot_id dans event.data,
|
||||
# et le GraphBuilder utilise event.data pour construire les actions.
|
||||
events = []
|
||||
for evt in session.events:
|
||||
window_info = {
|
||||
"title": evt.get("window_title", session.last_window_info.get("title", "")),
|
||||
"app_name": evt.get("app_name", session.last_window_info.get("app_name", "unknown")),
|
||||
}
|
||||
events.append({
|
||||
# Extraire window info (plusieurs formats possibles)
|
||||
window_raw = evt.get("window")
|
||||
if isinstance(window_raw, dict):
|
||||
window_info = {
|
||||
"title": window_raw.get("title", session.last_window_info.get("title", "")),
|
||||
"app_name": window_raw.get("app_name", session.last_window_info.get("app_name", "unknown")),
|
||||
}
|
||||
else:
|
||||
window_info = {
|
||||
"title": evt.get("window_title", session.last_window_info.get("title", "")),
|
||||
"app_name": evt.get("app_name", session.last_window_info.get("app_name", "unknown")),
|
||||
}
|
||||
|
||||
raw_event = {
|
||||
"t": evt.get("timestamp", 0),
|
||||
"type": evt.get("type", "unknown"),
|
||||
"window": window_info,
|
||||
"screenshot_id": evt.get("screenshot_id"),
|
||||
})
|
||||
}
|
||||
|
||||
# Copier les données spécifiques au type d'événement
|
||||
# (pos, button, text, keys, etc.) — indispensable pour le replay
|
||||
_skip_keys = {"type", "timestamp", "window", "window_title",
|
||||
"app_name", "screenshot_id", "machine_id",
|
||||
"screen_metadata", "vision_info"}
|
||||
for key, value in evt.items():
|
||||
if key not in _skip_keys and key not in raw_event:
|
||||
raw_event[key] = value
|
||||
|
||||
events.append(raw_event)
|
||||
|
||||
# Construire les screenshots au format RawSession
|
||||
screenshots = []
|
||||
|
||||
1322
agent_v0/server_v1/replay_engine.py
Normal file
1322
agent_v0/server_v1/replay_engine.py
Normal file
File diff suppressed because it is too large
Load Diff
143
agent_v0/server_v1/replay_failure_logger.py
Normal file
143
agent_v0/server_v1/replay_failure_logger.py
Normal file
@@ -0,0 +1,143 @@
|
||||
# agent_v0/server_v1/replay_failure_logger.py
|
||||
"""
|
||||
Logger des echecs de replay pour l'apprentissage futur.
|
||||
|
||||
Chaque echec de resolution visuelle (target_not_found) est sauvegarde dans un
|
||||
fichier JSONL par session, avec le screenshot de ce que l'agent voit au moment
|
||||
de l'echec. Ces donnees alimentent le learning loop : re-entrainement des
|
||||
embeddings, ajustement des seuils, enrichissement des target_spec.
|
||||
|
||||
Structure :
|
||||
data/training/replay_failures/{replay_id}/failures.jsonl
|
||||
data/training/replay_failures/{replay_id}/screenshots/{action_id}.jpg
|
||||
"""
|
||||
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger("replay_failure_logger")
|
||||
|
||||
# Repertoire racine des echecs de replay
|
||||
_FAILURES_BASE_DIR = Path("data/training/replay_failures")
|
||||
|
||||
# Lock pour les ecritures concurrentes
|
||||
_write_lock = threading.Lock()
|
||||
|
||||
|
||||
def log_replay_failure(
|
||||
replay_id: str,
|
||||
action_id: str,
|
||||
target_spec: Optional[Dict[str, Any]],
|
||||
screenshot_b64: Optional[str],
|
||||
resolution_attempts: Optional[List[Dict[str, Any]]] = None,
|
||||
error: str = "target_not_found",
|
||||
extra: Optional[Dict[str, Any]] = None,
|
||||
) -> Optional[str]:
|
||||
"""Sauvegarder un echec de replay pour l'apprentissage futur.
|
||||
|
||||
Args:
|
||||
replay_id: Identifiant du replay en cours
|
||||
action_id: Identifiant de l'action echouee
|
||||
target_spec: Specification de la cible recherchee
|
||||
screenshot_b64: Screenshot JPEG base64 de ce que l'agent voit
|
||||
resolution_attempts: Liste des tentatives de resolution (methode, score, etc.)
|
||||
error: Type d'erreur (defaut: "target_not_found")
|
||||
extra: Champs supplementaires a stocker
|
||||
|
||||
Returns:
|
||||
Chemin du fichier JSONL cree, ou None en cas d'erreur.
|
||||
"""
|
||||
try:
|
||||
# Creer le repertoire de la session
|
||||
session_dir = _FAILURES_BASE_DIR / replay_id
|
||||
session_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Sauvegarder le screenshot si fourni
|
||||
screenshot_path = None
|
||||
if screenshot_b64:
|
||||
screenshots_dir = session_dir / "screenshots"
|
||||
screenshots_dir.mkdir(exist_ok=True)
|
||||
screenshot_path = str(screenshots_dir / f"{action_id}.jpg")
|
||||
try:
|
||||
img_bytes = base64.b64decode(screenshot_b64)
|
||||
with open(screenshot_path, "wb") as f:
|
||||
f.write(img_bytes)
|
||||
except Exception as e:
|
||||
logger.warning(f"Impossible de sauvegarder le screenshot : {e}")
|
||||
screenshot_path = None
|
||||
|
||||
# Construire l'entree JSONL
|
||||
entry = {
|
||||
"timestamp": time.strftime("%Y-%m-%dT%H:%M:%S"),
|
||||
"replay_id": replay_id,
|
||||
"action_id": action_id,
|
||||
"target_spec": _sanitize_target_spec(target_spec) if target_spec else None,
|
||||
"screenshot_path": screenshot_path,
|
||||
"resolution_attempts": resolution_attempts or [],
|
||||
"error": error,
|
||||
}
|
||||
if extra:
|
||||
entry.update(extra)
|
||||
|
||||
# Ecrire dans le fichier JSONL (thread-safe)
|
||||
jsonl_path = session_dir / "failures.jsonl"
|
||||
with _write_lock:
|
||||
with open(jsonl_path, "a", encoding="utf-8") as f:
|
||||
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
||||
|
||||
logger.info(
|
||||
f"Echec replay loggue : replay={replay_id} action={action_id} "
|
||||
f"error={error} -> {jsonl_path}"
|
||||
)
|
||||
return str(jsonl_path)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Impossible de logger l'echec replay : {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _sanitize_target_spec(target_spec: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Nettoyer le target_spec pour le stockage (retirer les images base64 volumineuses)."""
|
||||
cleaned = {}
|
||||
for key, value in target_spec.items():
|
||||
# Ne pas stocker les images base64 (trop volumineux pour le JSONL)
|
||||
if key.endswith("_base64") or key.endswith("_b64"):
|
||||
cleaned[key] = f"<{len(str(value))} chars>" if value else None
|
||||
else:
|
||||
cleaned[key] = value
|
||||
return cleaned
|
||||
|
||||
|
||||
def get_failure_count(replay_id: str) -> int:
|
||||
"""Compter le nombre d'echecs pour un replay donne."""
|
||||
jsonl_path = _FAILURES_BASE_DIR / replay_id / "failures.jsonl"
|
||||
if not jsonl_path.exists():
|
||||
return 0
|
||||
try:
|
||||
with open(jsonl_path, "r", encoding="utf-8") as f:
|
||||
return sum(1 for _ in f)
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
|
||||
def get_failures(replay_id: str) -> List[Dict[str, Any]]:
|
||||
"""Lire tous les echecs pour un replay donne."""
|
||||
jsonl_path = _FAILURES_BASE_DIR / replay_id / "failures.jsonl"
|
||||
if not jsonl_path.exists():
|
||||
return []
|
||||
failures = []
|
||||
try:
|
||||
with open(jsonl_path, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
failures.append(json.loads(line))
|
||||
except Exception as e:
|
||||
logger.warning(f"Erreur lecture echecs replay {replay_id} : {e}")
|
||||
return failures
|
||||
395
agent_v0/server_v1/replay_learner.py
Normal file
395
agent_v0/server_v1/replay_learner.py
Normal file
@@ -0,0 +1,395 @@
|
||||
# agent_v0/server_v1/replay_learner.py
|
||||
"""
|
||||
Module Learning — apprentissage à partir des résultats de replay.
|
||||
|
||||
Responsabilité : "Chaque replay qui échoue enrichit notre base de connaissances."
|
||||
|
||||
Stocke les résultats structurés de chaque action (succès/échec, méthode,
|
||||
screenshots, correction appliquée) pour :
|
||||
1. Améliorer les décisions futures (Policy)
|
||||
2. Affiner les stratégies de grounding (quel méthode marche pour quel écran)
|
||||
3. Détecter les patterns récurrents d'échec
|
||||
4. Alimenter le fine-tuning futur du VLM
|
||||
|
||||
Format inspiré du cahier des charges (docs/VISION_RPA_INTELLIGENT.md) :
|
||||
{
|
||||
"screenshot_before": "base64...",
|
||||
"action": {"type": "click", "target": "Bouton Valider", ...},
|
||||
"screenshot_after": "base64...",
|
||||
"success": true,
|
||||
"resolution_method": "som_text_match",
|
||||
"correction": null,
|
||||
"human_validated": false
|
||||
}
|
||||
|
||||
Ref: docs/VISION_RPA_INTELLIGENT.md — Boucle d'apprentissage (section 4)
|
||||
Ref: docs/PLAN_ACTEUR_V1.md — Phase 3 : apprentissage continu
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Répertoire par défaut pour le stockage des résultats d'apprentissage
|
||||
_DEFAULT_LEARNING_DIR = os.environ.get(
|
||||
"RPA_LEARNING_DIR", "data/learning/replay_results"
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ActionOutcome:
|
||||
"""Résultat structuré d'une action de replay."""
|
||||
# Identifiants
|
||||
session_id: str
|
||||
action_id: str
|
||||
action_type: str # click, type, key_combo
|
||||
timestamp: float = 0.0 # Epoch
|
||||
|
||||
# Contexte
|
||||
target_description: str = "" # "Clic sur 'Enregistrer' dans Bloc-notes"
|
||||
intention: str = "" # "Sauvegarder le fichier"
|
||||
window_title: str = ""
|
||||
|
||||
# Résolution
|
||||
resolution_method: str = "" # server_som, anchor_template, vlm_direct...
|
||||
resolution_score: float = 0.0
|
||||
resolution_elapsed_ms: float = 0.0
|
||||
|
||||
# Résultat
|
||||
success: bool = False
|
||||
error: str = ""
|
||||
warning: str = ""
|
||||
|
||||
# Vérification (Critic)
|
||||
pixel_verified: Optional[bool] = None
|
||||
semantic_verified: Optional[bool] = None
|
||||
critic_detail: str = ""
|
||||
|
||||
# Recovery
|
||||
recovery_action: str = "" # undo, escape, close, none
|
||||
recovery_success: bool = False
|
||||
|
||||
# Screenshots (chemins relatifs, pas base64 — trop lourd)
|
||||
screenshot_before_path: str = ""
|
||||
screenshot_after_path: str = ""
|
||||
|
||||
# Correction humaine (feedback loop)
|
||||
human_validated: bool = False
|
||||
human_correction: str = "" # Description de la correction
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return asdict(self)
|
||||
|
||||
|
||||
class ReplayLearner:
|
||||
"""Apprentissage à partir des résultats de replay.
|
||||
|
||||
Stocke chaque action dans un fichier JSONL par session.
|
||||
Fournit des requêtes pour améliorer les décisions futures.
|
||||
|
||||
Usage côté serveur (api_stream.py) :
|
||||
learner = ReplayLearner()
|
||||
learner.record(outcome)
|
||||
|
||||
Usage côté Policy :
|
||||
history = learner.query_similar(target_description, window_title)
|
||||
# → "La dernière fois, template matching a échoué mais SoM a trouvé"
|
||||
"""
|
||||
|
||||
def __init__(self, learning_dir: str = ""):
|
||||
self.learning_dir = Path(learning_dir or _DEFAULT_LEARNING_DIR)
|
||||
self.learning_dir.mkdir(parents=True, exist_ok=True)
|
||||
# Cache mémoire des derniers résultats (pour requêtes rapides)
|
||||
self._recent: List[ActionOutcome] = []
|
||||
self._max_recent = 500
|
||||
|
||||
def record(self, outcome: ActionOutcome) -> None:
|
||||
"""Enregistrer le résultat d'une action.
|
||||
|
||||
Écrit en append dans un fichier JSONL par session.
|
||||
Garde aussi en mémoire pour les requêtes rapides.
|
||||
"""
|
||||
if not outcome.timestamp:
|
||||
outcome.timestamp = time.time()
|
||||
|
||||
# Fichier JSONL par session
|
||||
session_file = self.learning_dir / f"{outcome.session_id}.jsonl"
|
||||
try:
|
||||
with open(session_file, "a") as f:
|
||||
f.write(json.dumps(outcome.to_dict(), ensure_ascii=False) + "\n")
|
||||
except Exception as e:
|
||||
logger.warning(f"Learning: échec écriture {session_file}: {e}")
|
||||
|
||||
# Cache mémoire
|
||||
self._recent.append(outcome)
|
||||
if len(self._recent) > self._max_recent:
|
||||
self._recent = self._recent[-self._max_recent:]
|
||||
|
||||
# Log résumé
|
||||
status = "OK" if outcome.success else "ÉCHEC"
|
||||
logger.info(
|
||||
f"Learning: {status} {outcome.action_type} "
|
||||
f"'{outcome.target_description[:40]}' "
|
||||
f"[{outcome.resolution_method}] "
|
||||
f"critic={'OK' if outcome.semantic_verified else 'NON' if outcome.semantic_verified is False else '?'}"
|
||||
)
|
||||
|
||||
def record_from_replay_result(
|
||||
self,
|
||||
session_id: str,
|
||||
action: Dict[str, Any],
|
||||
result: Dict[str, Any],
|
||||
verification: Optional[Dict] = None,
|
||||
) -> None:
|
||||
"""Enregistrer depuis les structures existantes du replay.
|
||||
|
||||
Convertit le format action/result du replay en ActionOutcome.
|
||||
Appelé depuis api_stream.py après chaque action de replay.
|
||||
"""
|
||||
target_spec = action.get("target_spec", {})
|
||||
outcome = ActionOutcome(
|
||||
session_id=session_id,
|
||||
action_id=action.get("action_id", ""),
|
||||
action_type=action.get("type", ""),
|
||||
target_description=target_spec.get("by_text", ""),
|
||||
intention=action.get("intention", ""),
|
||||
window_title=target_spec.get("window_title", ""),
|
||||
resolution_method=result.get("resolution_method", ""),
|
||||
resolution_score=result.get("resolution_score", 0.0),
|
||||
resolution_elapsed_ms=result.get("resolution_elapsed_ms", 0.0),
|
||||
success=result.get("success", False),
|
||||
error=result.get("error", ""),
|
||||
warning=result.get("warning", ""),
|
||||
)
|
||||
|
||||
if verification:
|
||||
outcome.pixel_verified = verification.get("verified")
|
||||
outcome.semantic_verified = verification.get("semantic_verified")
|
||||
outcome.critic_detail = verification.get("semantic_detail", "")
|
||||
|
||||
self.record(outcome)
|
||||
|
||||
def record_human_correction(
|
||||
self,
|
||||
session_id: str,
|
||||
action: Dict[str, Any],
|
||||
correction: Dict[str, Any],
|
||||
) -> None:
|
||||
"""Enregistrer une correction humaine (mode apprentissage supervisé).
|
||||
|
||||
L'humain a montré à Léa où cliquer. On stocke cette correction
|
||||
dans target_memory.db pour que la prochaine fois, Léa sache.
|
||||
"""
|
||||
target_spec = action.get("target_spec", {})
|
||||
by_text = target_spec.get("by_text", "")
|
||||
window_title = target_spec.get("window_title", "")
|
||||
x_pct = correction.get("x_pct", 0.0)
|
||||
y_pct = correction.get("y_pct", 0.0)
|
||||
|
||||
# Enregistrer dans le JSONL d'apprentissage
|
||||
outcome = ActionOutcome(
|
||||
session_id=session_id,
|
||||
action_id=action.get("action_id", ""),
|
||||
action_type="click",
|
||||
target_description=by_text,
|
||||
window_title=window_title,
|
||||
resolution_method="human_supervised",
|
||||
resolution_score=1.0, # Confiance maximale — l'humain a montré
|
||||
success=True,
|
||||
)
|
||||
self.record(outcome)
|
||||
|
||||
# Stocker dans target_memory.db pour le lookup futur
|
||||
try:
|
||||
from .replay_memory import get_target_memory_store
|
||||
store = get_target_memory_store()
|
||||
if store:
|
||||
store.record_success(
|
||||
screen_signature="human_correction",
|
||||
target_spec=target_spec,
|
||||
resolved_position={"x_pct": x_pct, "y_pct": y_pct},
|
||||
method="human_supervised",
|
||||
score=1.0,
|
||||
)
|
||||
logger.info(
|
||||
f"[APPRENTISSAGE] Correction stockée dans target_memory : "
|
||||
f"'{by_text}' → ({x_pct:.4f}, {y_pct:.4f})"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Learning: échec stockage target_memory: {e}")
|
||||
|
||||
def query_similar(
|
||||
self,
|
||||
target_description: str = "",
|
||||
window_title: str = "",
|
||||
limit: int = 10,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Chercher des résultats similaires dans l'historique.
|
||||
|
||||
Recherche par correspondance textuelle sur la description de cible
|
||||
et le titre de fenêtre. Retourne les plus récents en premier.
|
||||
|
||||
Utile pour le Policy : "qu'est-ce qui a marché avant pour cette cible ?"
|
||||
"""
|
||||
results = []
|
||||
target_lower = target_description.lower()
|
||||
window_lower = window_title.lower()
|
||||
|
||||
for outcome in reversed(self._recent):
|
||||
score = 0
|
||||
if target_lower and target_lower in outcome.target_description.lower():
|
||||
score += 2
|
||||
if window_lower and window_lower in outcome.window_title.lower():
|
||||
score += 1
|
||||
if score > 0:
|
||||
results.append({
|
||||
"outcome": outcome.to_dict(),
|
||||
"relevance": score,
|
||||
})
|
||||
if len(results) >= limit:
|
||||
break
|
||||
|
||||
return sorted(results, key=lambda x: x["relevance"], reverse=True)
|
||||
|
||||
def best_strategy_for(
|
||||
self,
|
||||
target_description: str = "",
|
||||
window_title: str = "",
|
||||
) -> Optional[str]:
|
||||
"""Quelle méthode de grounding a le mieux marché pour cette cible ?
|
||||
|
||||
Consulte l'historique et retourne la méthode qui a le plus haut
|
||||
taux de succès pour des cibles similaires. C'est la boucle
|
||||
d'apprentissage : les replays passés améliorent les suivants.
|
||||
|
||||
Returns:
|
||||
Nom de la meilleure méthode (ex: "som_text_match") ou None
|
||||
"""
|
||||
similar = self.query_similar(target_description, window_title, limit=20)
|
||||
if not similar:
|
||||
return None
|
||||
|
||||
# Compter les succès par méthode
|
||||
method_stats: Dict[str, List[int]] = {} # method → [successes, total]
|
||||
for entry in similar:
|
||||
outcome = entry["outcome"]
|
||||
method = outcome.get("resolution_method", "")
|
||||
if not method:
|
||||
continue
|
||||
if method not in method_stats:
|
||||
method_stats[method] = [0, 0]
|
||||
method_stats[method][1] += 1
|
||||
if outcome.get("success"):
|
||||
method_stats[method][0] += 1
|
||||
|
||||
if not method_stats:
|
||||
return None
|
||||
|
||||
# Retourner la méthode avec le meilleur taux de succès (minimum 2 occurrences)
|
||||
best = None
|
||||
best_rate = 0.0
|
||||
for method, (successes, total) in method_stats.items():
|
||||
if total >= 2: # Au moins 2 essais pour être significatif
|
||||
rate = successes / total
|
||||
if rate > best_rate:
|
||||
best_rate = rate
|
||||
best = method
|
||||
|
||||
if best:
|
||||
logger.info(
|
||||
f"Learning: meilleure stratégie pour '{target_description[:30]}' → "
|
||||
f"{best} ({best_rate:.0%} sur {method_stats[best][1]} essais)"
|
||||
)
|
||||
|
||||
return best
|
||||
|
||||
def consolidate_workflow(
|
||||
self,
|
||||
actions: list,
|
||||
session_id: str = "",
|
||||
) -> int:
|
||||
"""Consolider un workflow avec les apprentissages passés.
|
||||
|
||||
Pour chaque action du workflow, vérifie si l'historique suggère
|
||||
une meilleure stratégie de résolution. Si oui, l'ajoute en
|
||||
hint dans le target_spec de l'action.
|
||||
|
||||
Modifie les actions in-place. Retourne le nombre d'actions enrichies.
|
||||
|
||||
C'est la cross-pollination : un replay qui a réussi "Enregistrer"
|
||||
via som_text améliore tous les futurs workflows qui cliquent sur "Enregistrer".
|
||||
"""
|
||||
enriched = 0
|
||||
for action in actions:
|
||||
if action.get("type") != "click":
|
||||
continue
|
||||
target_spec = action.get("target_spec", {})
|
||||
by_text = target_spec.get("by_text", "")
|
||||
window = target_spec.get("window_title", "")
|
||||
if not by_text:
|
||||
continue
|
||||
|
||||
best = self.best_strategy_for(by_text, window)
|
||||
if best:
|
||||
target_spec["_learned_strategy"] = best
|
||||
enriched += 1
|
||||
|
||||
if enriched:
|
||||
logger.info(
|
||||
f"Consolidation : {enriched} actions enrichies par l'apprentissage "
|
||||
f"(session {session_id})"
|
||||
)
|
||||
return enriched
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
"""Statistiques globales des résultats de replay."""
|
||||
if not self._recent:
|
||||
return {"total": 0}
|
||||
|
||||
total = len(self._recent)
|
||||
successes = sum(1 for o in self._recent if o.success)
|
||||
methods = {}
|
||||
for o in self._recent:
|
||||
m = o.resolution_method or "unknown"
|
||||
if m not in methods:
|
||||
methods[m] = {"total": 0, "success": 0}
|
||||
methods[m]["total"] += 1
|
||||
if o.success:
|
||||
methods[m]["success"] += 1
|
||||
|
||||
return {
|
||||
"total": total,
|
||||
"success_rate": round(successes / total, 3) if total > 0 else 0,
|
||||
"methods": {
|
||||
m: {
|
||||
"total": v["total"],
|
||||
"success_rate": round(v["success"] / v["total"], 3) if v["total"] > 0 else 0,
|
||||
}
|
||||
for m, v in methods.items()
|
||||
},
|
||||
}
|
||||
|
||||
def load_session(self, session_id: str) -> List[ActionOutcome]:
|
||||
"""Charger tous les résultats d'une session depuis le fichier JSONL."""
|
||||
session_file = self.learning_dir / f"{session_id}.jsonl"
|
||||
if not session_file.is_file():
|
||||
return []
|
||||
|
||||
outcomes = []
|
||||
try:
|
||||
with open(session_file) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
data = json.loads(line)
|
||||
outcomes.append(ActionOutcome(**data))
|
||||
except Exception as e:
|
||||
logger.warning(f"Learning: échec lecture {session_file}: {e}")
|
||||
|
||||
return outcomes
|
||||
323
agent_v0/server_v1/replay_memory.py
Normal file
323
agent_v0/server_v1/replay_memory.py
Normal file
@@ -0,0 +1,323 @@
|
||||
# agent_v0/server_v1/replay_memory.py
|
||||
"""
|
||||
replay_memory — Greffe de TargetMemoryStore (Fiche #18) sur le pipeline V4.
|
||||
|
||||
Phase 1 du plan apprentissage Léa (docs/PLAN_APPRENTISSAGE_LEA.md).
|
||||
|
||||
Le runtime V4 appelle :
|
||||
- `memory_lookup()` AVANT la cascade coûteuse (OCR/template/VLM)
|
||||
- `memory_record_success()` APRÈS validation post-condition (`title_match` strict)
|
||||
- `memory_record_failure()` sur les échecs
|
||||
|
||||
Fingerprint léger V4 : les coordonnées clic (x_pct, y_pct) sont stockées dans
|
||||
les deux premières valeurs de `TargetFingerprint.bbox`, et la méthode de
|
||||
résolution ayant réussi dans le champ `etype`.
|
||||
|
||||
Signature d'écran V4 : `sha256(normalize(window_title))[:16]`. Simple et
|
||||
robuste aux données variables car les titres de fenêtre restent stables.
|
||||
Les faux positifs (même titre, écrans différents) sont rattrapés par la
|
||||
post-condition qui décrémentera la fiabilité via `record_failure()`.
|
||||
|
||||
Critère de fiabilité : 2 succès minimum et < 30% d'échecs pour déclencher
|
||||
un hit (paramètres de `TargetMemoryStore.lookup`). C'est exactement la
|
||||
cristallisation par répétition que l'on veut — Léa est un stagiaire qui
|
||||
apprend de l'observation.
|
||||
|
||||
Auteur : Dom, Alice — avril 2026
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
import unicodedata
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# =========================================================================
|
||||
# Singleton du store persistant
|
||||
# =========================================================================
|
||||
|
||||
_MEMORY_SINGLETON: Optional[Any] = None
|
||||
_MEMORY_DISABLED = False
|
||||
|
||||
|
||||
def get_memory_store():
|
||||
"""Retourne le `TargetMemoryStore` partagé, ou None si indisponible.
|
||||
|
||||
Lazy-init : le store n'est créé qu'au premier appel, ce qui évite
|
||||
d'importer `core.learning.target_memory_store` à l'import du module
|
||||
(et donc d'éviter les effets de bord sur le démarrage du serveur).
|
||||
"""
|
||||
global _MEMORY_SINGLETON, _MEMORY_DISABLED
|
||||
|
||||
if _MEMORY_DISABLED:
|
||||
return None
|
||||
if _MEMORY_SINGLETON is not None:
|
||||
return _MEMORY_SINGLETON
|
||||
|
||||
try:
|
||||
from core.learning.target_memory_store import TargetMemoryStore
|
||||
|
||||
base_path = os.environ.get("RPA_LEARNING_DIR", "data/learning")
|
||||
_MEMORY_SINGLETON = TargetMemoryStore(base_path=base_path)
|
||||
logger.info(
|
||||
"replay_memory: TargetMemoryStore initialisé (base=%s)", base_path,
|
||||
)
|
||||
return _MEMORY_SINGLETON
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"replay_memory: TargetMemoryStore indisponible (%s) — "
|
||||
"l'apprentissage persistant est désactivé", exc,
|
||||
)
|
||||
_MEMORY_DISABLED = True
|
||||
return None
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Normalisation de texte et hash
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def _norm_text(s: str) -> str:
|
||||
"""Normalise un texte pour un hash stable (accents, casse, NBSP, espaces)."""
|
||||
if not s:
|
||||
return ""
|
||||
s = s.replace("\u00A0", " ").strip().lower()
|
||||
s = unicodedata.normalize("NFKD", s)
|
||||
s = "".join(ch for ch in s if not unicodedata.combining(ch))
|
||||
return " ".join(s.split())
|
||||
|
||||
|
||||
def compute_screen_sig(window_title: str) -> str:
|
||||
"""Calcule la signature d'écran V4 à partir du titre de fenêtre.
|
||||
|
||||
Le `window_title` est strict depuis la phase "controle des étapes"
|
||||
(post-condition `title_match` obligatoire). C'est notre clé naturelle.
|
||||
"""
|
||||
norm = _norm_text(window_title)
|
||||
if not norm:
|
||||
return ""
|
||||
return hashlib.sha256(norm.encode("utf-8")).hexdigest()[:16]
|
||||
|
||||
|
||||
class _TargetSpecLike:
|
||||
"""Adaptateur dict → objet pour `TargetMemoryStore._hash_target_spec()`.
|
||||
|
||||
Le hash interne de TargetMemoryStore utilise `getattr(spec, "by_role", ...)`
|
||||
qui ne fonctionne pas avec un dict brut. On expose les attributs nécessaires.
|
||||
|
||||
On intègre aussi `resolve_order` et `vlm_description` dans `context_hints`
|
||||
pour qu'ils entrent dans le hash — deux actions avec le même `by_text`
|
||||
mais un `resolve_order` différent doivent avoir des hashes distincts.
|
||||
"""
|
||||
|
||||
__slots__ = ("by_role", "by_text", "by_position", "context_hints")
|
||||
|
||||
def __init__(self, d: Dict[str, Any]):
|
||||
self.by_role = d.get("by_role", "") or ""
|
||||
self.by_text = d.get("by_text", "") or ""
|
||||
self.by_position = d.get("by_position")
|
||||
|
||||
hints = dict(d.get("context_hints") or {})
|
||||
resolve_order = d.get("resolve_order")
|
||||
if resolve_order:
|
||||
hints["_resolve_order"] = "|".join(resolve_order) if isinstance(
|
||||
resolve_order, list
|
||||
) else str(resolve_order)
|
||||
if d.get("vlm_description"):
|
||||
hints["_vlm_desc"] = str(d["vlm_description"])
|
||||
if d.get("anchor_hint"):
|
||||
hints["_anchor_hint"] = str(d["anchor_hint"])
|
||||
self.context_hints = hints
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Lookup — consulté AVANT la cascade coûteuse
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def memory_lookup(
|
||||
window_title: str,
|
||||
target_spec: Dict[str, Any],
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Cherche une résolution apprise pour cette cible sur cet écran.
|
||||
|
||||
Returns:
|
||||
Dict compatible avec le format de sortie de `_resolve_target_sync`
|
||||
(resolved, method, x_pct, y_pct, score, ...) si une entrée fiable
|
||||
est trouvée. None sinon.
|
||||
"""
|
||||
store = get_memory_store()
|
||||
if store is None:
|
||||
return None
|
||||
|
||||
screen_sig = compute_screen_sig(window_title)
|
||||
if not screen_sig:
|
||||
return None
|
||||
|
||||
try:
|
||||
spec_shim = _TargetSpecLike(target_spec)
|
||||
fp = store.lookup(screen_sig, spec_shim)
|
||||
except Exception as exc:
|
||||
logger.debug("memory_lookup: erreur lookup (%s)", exc)
|
||||
return None
|
||||
|
||||
if fp is None:
|
||||
return None
|
||||
|
||||
# Fingerprint léger : bbox = (x_pct, y_pct, 0, 0)
|
||||
try:
|
||||
x_pct = float(fp.bbox[0])
|
||||
y_pct = float(fp.bbox[1])
|
||||
except (TypeError, IndexError, ValueError):
|
||||
logger.debug("memory_lookup: fingerprint bbox invalide")
|
||||
return None
|
||||
|
||||
# Sanity check : les pourcentages doivent être dans [0, 1]
|
||||
if not (0.0 <= x_pct <= 1.0 and 0.0 <= y_pct <= 1.0):
|
||||
logger.warning(
|
||||
"memory_lookup: coords invalides (%.3f, %.3f) pour sig=%s — "
|
||||
"entrée ignorée",
|
||||
x_pct, y_pct, screen_sig,
|
||||
)
|
||||
return None
|
||||
|
||||
method = fp.etype or "memory"
|
||||
confidence = float(getattr(fp, "confidence", 0.9) or 0.9)
|
||||
|
||||
logger.info(
|
||||
"memory_lookup HIT : sig=%s method=%s coords=(%.4f, %.4f) conf=%.2f "
|
||||
"target='%s'",
|
||||
screen_sig, method, x_pct, y_pct, confidence,
|
||||
(target_spec.get("by_text") or "")[:60],
|
||||
)
|
||||
|
||||
return {
|
||||
"resolved": True,
|
||||
"method": f"memory_{method}",
|
||||
"x_pct": x_pct,
|
||||
"y_pct": y_pct,
|
||||
"score": confidence,
|
||||
"from_memory": True,
|
||||
"screen_sig": screen_sig,
|
||||
}
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Record — appelé APRÈS validation post-condition
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def memory_record_success(
|
||||
window_title: str,
|
||||
target_spec: Dict[str, Any],
|
||||
x_pct: float,
|
||||
y_pct: float,
|
||||
method: str,
|
||||
confidence: float = 0.9,
|
||||
) -> bool:
|
||||
"""Enregistre une résolution réussie dans la mémoire persistante.
|
||||
|
||||
À appeler APRÈS validation de la post-condition (`title_match` strict).
|
||||
"""
|
||||
store = get_memory_store()
|
||||
if store is None:
|
||||
return False
|
||||
|
||||
screen_sig = compute_screen_sig(window_title)
|
||||
if not screen_sig:
|
||||
return False
|
||||
|
||||
# Sanity check : coordonnées dans [0, 1]
|
||||
try:
|
||||
x_pct = float(x_pct)
|
||||
y_pct = float(y_pct)
|
||||
except (TypeError, ValueError):
|
||||
logger.debug("memory_record_success: coords non numériques, skip")
|
||||
return False
|
||||
if not (0.0 <= x_pct <= 1.0 and 0.0 <= y_pct <= 1.0):
|
||||
logger.debug(
|
||||
"memory_record_success: coords hors [0,1] (%.3f, %.3f), skip",
|
||||
x_pct, y_pct,
|
||||
)
|
||||
return False
|
||||
|
||||
try:
|
||||
from core.learning.target_memory_store import TargetFingerprint
|
||||
|
||||
# Stripper les préfixes "memory_" empilés pour ne garder que
|
||||
# la méthode de résolution originale (ex: template_matching).
|
||||
# Sans ça, le cycle lookup → record → lookup empile "memory_"
|
||||
# indéfiniment : memory_memory_memory_template_matching.
|
||||
method_clean = method or "v4_unknown"
|
||||
while method_clean.startswith("memory_"):
|
||||
method_clean = method_clean[len("memory_"):]
|
||||
method_clean = method_clean or "v4_unknown"
|
||||
fingerprint = TargetFingerprint(
|
||||
element_id=f"v4_{method_clean}",
|
||||
bbox=(x_pct, y_pct, 0.0, 0.0),
|
||||
role=target_spec.get("by_role", "") or None,
|
||||
etype=method_clean,
|
||||
label=(target_spec.get("by_text") or "")[:200] or None,
|
||||
confidence=float(confidence),
|
||||
)
|
||||
|
||||
spec_shim = _TargetSpecLike(target_spec)
|
||||
store.record_success(
|
||||
screen_signature=screen_sig,
|
||||
target_spec=spec_shim,
|
||||
fingerprint=fingerprint,
|
||||
strategy_used=method_clean,
|
||||
confidence=float(confidence),
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"memory_record_success: sig=%s method=%s coords=(%.4f, %.4f) "
|
||||
"target='%s'",
|
||||
screen_sig, method_clean, x_pct, y_pct,
|
||||
(target_spec.get("by_text") or "")[:60],
|
||||
)
|
||||
return True
|
||||
|
||||
except Exception as exc:
|
||||
logger.warning("memory_record_success: échec (%s)", exc)
|
||||
return False
|
||||
|
||||
|
||||
def memory_record_failure(
|
||||
window_title: str,
|
||||
target_spec: Dict[str, Any],
|
||||
error_message: str,
|
||||
) -> bool:
|
||||
"""Incrémente le `fail_count` pour cette (signature, target).
|
||||
|
||||
Appelé quand l'action échoue OU quand la post-condition n'est pas
|
||||
satisfaite. Le `TargetMemoryStore.lookup()` ignorera cette entrée
|
||||
si le ratio d'échecs dépasse 30%.
|
||||
"""
|
||||
store = get_memory_store()
|
||||
if store is None:
|
||||
return False
|
||||
|
||||
screen_sig = compute_screen_sig(window_title)
|
||||
if not screen_sig:
|
||||
return False
|
||||
|
||||
try:
|
||||
spec_shim = _TargetSpecLike(target_spec)
|
||||
store.record_failure(
|
||||
screen_signature=screen_sig,
|
||||
target_spec=spec_shim,
|
||||
error_message=(error_message or "unknown")[:200],
|
||||
)
|
||||
logger.debug(
|
||||
"memory_record_failure: sig=%s error='%s'",
|
||||
screen_sig, (error_message or "")[:80],
|
||||
)
|
||||
return True
|
||||
except Exception as exc:
|
||||
logger.debug("memory_record_failure: échec (%s)", exc)
|
||||
return False
|
||||
@@ -1,20 +1,24 @@
|
||||
# agent_v0/server_v1/replay_verifier.py
|
||||
"""
|
||||
ReplayVerifier — Vérification post-action pour le replay de workflows.
|
||||
ReplayVerifier — Vérification post-action (Critic) pour le replay de workflows.
|
||||
|
||||
Compare les screenshots avant/après une action pour détecter si elle a eu
|
||||
un effet visible. Utilisé par l'API de replay pour décider si une action
|
||||
a réussi ou si un retry est nécessaire.
|
||||
Deux niveaux de vérification :
|
||||
1. PIXEL : Différence d'image avant/après (rapide, ~10ms)
|
||||
- L'écran a-t-il changé ? Où ? De combien ?
|
||||
2. SÉMANTIQUE : VLM évalue si le résultat correspond à l'attendu (~2-5s)
|
||||
- L'action a-t-elle eu l'EFFET voulu ? (pas juste "des pixels ont bougé")
|
||||
|
||||
Stratégies de vérification :
|
||||
1. Différence d'image globale (avant == après → probablement rien ne s'est passé)
|
||||
2. Zone locale autour du clic (si l'action est un clic)
|
||||
3. Détection de texte apparu (si l'action est une frappe)
|
||||
Le niveau pixel existait déjà. Le niveau sémantique (Critic) est le chaînon
|
||||
manquant identifié par comparaison avec Claude Computer Use et OpenAdapt.
|
||||
|
||||
Ref: docs/VISION_RPA_INTELLIGENT.md — étape VERIFY du pipeline.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -35,9 +39,13 @@ class VerificationResult:
|
||||
suggestion: str # "retry", "skip", "abort", "continue"
|
||||
detail: str = "" # Description humaine du résultat
|
||||
local_change_pct: float = 0.0 # % de changement dans la zone locale (si applicable)
|
||||
# Critic sémantique (VLM)
|
||||
semantic_verified: Optional[bool] = None # None = pas de vérif sémantique
|
||||
semantic_detail: str = "" # Explication du VLM
|
||||
semantic_elapsed_ms: float = 0.0 # Temps de la vérif sémantique
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
d = {
|
||||
"verified": self.verified,
|
||||
"confidence": round(self.confidence, 3),
|
||||
"changes_detected": self.changes_detected,
|
||||
@@ -46,6 +54,11 @@ class VerificationResult:
|
||||
"detail": self.detail,
|
||||
"local_change_pct": round(self.local_change_pct, 3),
|
||||
}
|
||||
if self.semantic_verified is not None:
|
||||
d["semantic_verified"] = self.semantic_verified
|
||||
d["semantic_detail"] = self.semantic_detail
|
||||
d["semantic_elapsed_ms"] = round(self.semantic_elapsed_ms, 1)
|
||||
return d
|
||||
|
||||
|
||||
class ReplayVerifier:
|
||||
@@ -345,3 +358,275 @@ class ReplayVerifier:
|
||||
f"(global={global_change_pct:.3f}%, local={local_change_pct:.3f}%)"
|
||||
),
|
||||
)
|
||||
|
||||
# =========================================================================
|
||||
# Critic sémantique — VLM évalue si le résultat correspond à l'attendu
|
||||
# =========================================================================
|
||||
|
||||
def verify_with_critic(
|
||||
self,
|
||||
action: Dict[str, Any],
|
||||
result: Dict[str, Any],
|
||||
screenshot_before: Optional[str] = None,
|
||||
screenshot_after: Optional[str] = None,
|
||||
expected_result: str = "",
|
||||
action_intention: str = "",
|
||||
workflow_context: str = "",
|
||||
) -> VerificationResult:
|
||||
"""Vérification complète : pixel + sémantique (Critic).
|
||||
|
||||
Étape 1 : Vérification pixel (rapide, ~10ms) — l'écran a-t-il changé ?
|
||||
Étape 2 : Vérification sémantique (VLM, ~2-5s) — le changement est-il le bon ?
|
||||
|
||||
La vérification sémantique n'est lancée que si :
|
||||
- expected_result est fourni (description de l'état attendu après l'action)
|
||||
- La vérification pixel a détecté un changement (sinon, pas besoin du VLM)
|
||||
|
||||
Args:
|
||||
action: L'action exécutée
|
||||
result: Le résultat rapporté par l'agent
|
||||
screenshot_before: Screenshot avant l'action (base64)
|
||||
screenshot_after: Screenshot après l'action (base64)
|
||||
expected_result: Description de l'état attendu après l'action
|
||||
action_intention: Ce que l'action était censée faire
|
||||
workflow_context: Contexte global (progression, objectif)
|
||||
"""
|
||||
# Étape 1 : vérification pixel (existante)
|
||||
pixel_result = self.verify_action(
|
||||
action=action,
|
||||
result=result,
|
||||
screenshot_before=screenshot_before,
|
||||
screenshot_after=screenshot_after,
|
||||
)
|
||||
|
||||
# Pas de description attendue → retourner le résultat pixel seul
|
||||
if not expected_result:
|
||||
return pixel_result
|
||||
|
||||
# Si aucun changement pixel ET suggestion retry → pas besoin du VLM
|
||||
if not pixel_result.changes_detected and pixel_result.suggestion == "retry":
|
||||
return pixel_result
|
||||
|
||||
# Étape 2 : vérification sémantique via VLM
|
||||
semantic = self._verify_semantic(
|
||||
screenshot_before=screenshot_before,
|
||||
screenshot_after=screenshot_after,
|
||||
expected_result=expected_result,
|
||||
action_intention=action_intention,
|
||||
workflow_context=workflow_context,
|
||||
)
|
||||
|
||||
if semantic is None:
|
||||
# VLM indisponible → garder le résultat pixel seul
|
||||
return pixel_result
|
||||
|
||||
# Fusionner les résultats pixel + sémantique
|
||||
return self._merge_results(pixel_result, semantic)
|
||||
|
||||
def _verify_semantic(
|
||||
self,
|
||||
screenshot_before: Optional[str],
|
||||
screenshot_after: Optional[str],
|
||||
expected_result: str,
|
||||
action_intention: str = "",
|
||||
workflow_context: str = "",
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Appeler le VLM pour évaluer sémantiquement le résultat de l'action.
|
||||
|
||||
Utilise gemma4 en mode texte+images (Docker port 11435) pour analyser
|
||||
les screenshots avant/après et dire si le résultat attendu est atteint.
|
||||
|
||||
Sur Citrix (image plate), c'est la SEULE façon de vérifier intelligemment
|
||||
si une action a eu l'effet voulu.
|
||||
|
||||
Returns:
|
||||
Dict avec {"verified": bool, "detail": str, "elapsed_ms": float}
|
||||
ou None si le VLM est indisponible.
|
||||
"""
|
||||
import requests as _requests
|
||||
|
||||
if not screenshot_after:
|
||||
return None
|
||||
|
||||
gemma4_port = os.environ.get("GEMMA4_PORT", "11435")
|
||||
gemma4_url = f"http://localhost:{gemma4_port}/api/chat"
|
||||
|
||||
# Construire le prompt Critic
|
||||
context_parts = []
|
||||
if action_intention:
|
||||
context_parts.append(f"Action effectuée : {action_intention}")
|
||||
if workflow_context:
|
||||
context_parts.append(f"Contexte : {workflow_context}")
|
||||
context_str = "\n".join(context_parts)
|
||||
|
||||
# Deux images : avant et après
|
||||
images = []
|
||||
prompt_images = ""
|
||||
if screenshot_before and screenshot_after:
|
||||
images = [screenshot_before, screenshot_after]
|
||||
prompt_images = (
|
||||
"Image 1 = écran AVANT l'action.\n"
|
||||
"Image 2 = écran APRÈS l'action.\n"
|
||||
)
|
||||
elif screenshot_after:
|
||||
images = [screenshot_after]
|
||||
prompt_images = "Image = écran APRÈS l'action.\n"
|
||||
|
||||
prompt = (
|
||||
f"Tu es le VÉRIFICATEUR d'un robot RPA. Tu dois dire si l'action a réussi.\n\n"
|
||||
f"{prompt_images}"
|
||||
f"{context_str}\n\n"
|
||||
f"Résultat attendu : {expected_result}\n\n"
|
||||
f"Est-ce que le résultat attendu est visible à l'écran ?\n"
|
||||
f"Réponds EXACTEMENT dans ce format :\n"
|
||||
f"VERDICT: OUI ou NON\n"
|
||||
f"RAISON: explication courte (1 ligne)"
|
||||
)
|
||||
|
||||
# Injecter le contexte métier si disponible
|
||||
from .domain_context import get_domain_context
|
||||
domain = get_domain_context(os.environ.get("RPA_DOMAIN", "generic"))
|
||||
messages = []
|
||||
if domain.system_prompt:
|
||||
messages.append({"role": "system", "content": domain.system_prompt})
|
||||
messages.append({"role": "user", "content": prompt, "images": images})
|
||||
|
||||
try:
|
||||
t_start = time.time()
|
||||
resp = _requests.post(
|
||||
gemma4_url,
|
||||
json={
|
||||
"model": "gemma4:e4b",
|
||||
"messages": messages,
|
||||
"stream": False,
|
||||
"think": True,
|
||||
"options": {"temperature": 0.1, "num_predict": 800},
|
||||
},
|
||||
timeout=30,
|
||||
)
|
||||
elapsed_ms = (time.time() - t_start) * 1000
|
||||
|
||||
if not resp.ok:
|
||||
logger.warning(f"Critic VLM HTTP {resp.status_code}")
|
||||
return None
|
||||
|
||||
content = resp.json().get("message", {}).get("content", "").strip()
|
||||
|
||||
# Parser le verdict
|
||||
verified = None
|
||||
detail = content
|
||||
for line in content.split("\n"):
|
||||
line_upper = line.strip().upper()
|
||||
if line_upper.startswith("VERDICT:"):
|
||||
verdict_text = line_upper.replace("VERDICT:", "").strip()
|
||||
if "OUI" in verdict_text or "YES" in verdict_text:
|
||||
verified = True
|
||||
elif "NON" in verdict_text or "NO" in verdict_text:
|
||||
verified = False
|
||||
elif line_upper.startswith("RAISON:"):
|
||||
detail = line.strip().replace("RAISON:", "").strip()
|
||||
|
||||
if verified is None:
|
||||
# Fallback : chercher OUI/NON dans le texte brut
|
||||
upper = content.upper()
|
||||
if "OUI" in upper and "NON" not in upper:
|
||||
verified = True
|
||||
elif "NON" in upper:
|
||||
verified = False
|
||||
else:
|
||||
logger.warning(f"Critic VLM réponse non parsable : {content[:100]}")
|
||||
return None
|
||||
|
||||
logger.info(
|
||||
f"Critic VLM : {'OUI' if verified else 'NON'} en {elapsed_ms:.0f}ms — {detail[:80]}"
|
||||
)
|
||||
return {
|
||||
"verified": verified,
|
||||
"detail": detail,
|
||||
"elapsed_ms": elapsed_ms,
|
||||
}
|
||||
|
||||
except _requests.Timeout:
|
||||
logger.warning("Critic VLM timeout (30s)")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning(f"Critic VLM erreur : {e}")
|
||||
return None
|
||||
|
||||
def _merge_results(
|
||||
self,
|
||||
pixel: VerificationResult,
|
||||
semantic: Dict[str, Any],
|
||||
) -> VerificationResult:
|
||||
"""Fusionner les résultats pixel et sémantique.
|
||||
|
||||
Matrice de décision :
|
||||
- Pixel OK + Semantic OK → vérifié (confiance haute)
|
||||
- Pixel OK + Semantic NON → INATTENDU (l'écran a changé mais pas comme prévu)
|
||||
- Pixel NON + Semantic OK → vérifié quand même (le VLM voit le résultat)
|
||||
- Pixel NON + Semantic NON → échec (retry)
|
||||
"""
|
||||
sem_ok = semantic["verified"]
|
||||
pix_ok = pixel.changes_detected
|
||||
|
||||
if pix_ok and sem_ok:
|
||||
# Tout concorde — confiance maximale
|
||||
return VerificationResult(
|
||||
verified=True,
|
||||
confidence=min(0.95, pixel.confidence + 0.2),
|
||||
changes_detected=True,
|
||||
change_area_pct=pixel.change_area_pct,
|
||||
local_change_pct=pixel.local_change_pct,
|
||||
suggestion="continue",
|
||||
detail=f"Pixel OK + Critic OK : {semantic['detail']}",
|
||||
semantic_verified=True,
|
||||
semantic_detail=semantic["detail"],
|
||||
semantic_elapsed_ms=semantic["elapsed_ms"],
|
||||
)
|
||||
|
||||
elif pix_ok and not sem_ok:
|
||||
# L'écran a changé mais pas dans le bon sens → INATTENDU
|
||||
# C'est le cas le plus important : popup, erreur, mauvaise fenêtre
|
||||
return VerificationResult(
|
||||
verified=False,
|
||||
confidence=0.7,
|
||||
changes_detected=True,
|
||||
change_area_pct=pixel.change_area_pct,
|
||||
local_change_pct=pixel.local_change_pct,
|
||||
suggestion="retry",
|
||||
detail=f"Pixel OK mais Critic NON : {semantic['detail']}",
|
||||
semantic_verified=False,
|
||||
semantic_detail=semantic["detail"],
|
||||
semantic_elapsed_ms=semantic["elapsed_ms"],
|
||||
)
|
||||
|
||||
elif not pix_ok and sem_ok:
|
||||
# Peu de pixels ont changé mais le VLM dit que le résultat est bon
|
||||
# Ex: focus sur un onglet déjà visible (changement subtil)
|
||||
return VerificationResult(
|
||||
verified=True,
|
||||
confidence=0.6,
|
||||
changes_detected=False,
|
||||
change_area_pct=pixel.change_area_pct,
|
||||
local_change_pct=pixel.local_change_pct,
|
||||
suggestion="continue",
|
||||
detail=f"Pixel inchangé mais Critic OK : {semantic['detail']}",
|
||||
semantic_verified=True,
|
||||
semantic_detail=semantic["detail"],
|
||||
semantic_elapsed_ms=semantic["elapsed_ms"],
|
||||
)
|
||||
|
||||
else:
|
||||
# Rien n'a changé et le VLM confirme → échec
|
||||
return VerificationResult(
|
||||
verified=False,
|
||||
confidence=0.8,
|
||||
changes_detected=False,
|
||||
change_area_pct=pixel.change_area_pct,
|
||||
local_change_pct=pixel.local_change_pct,
|
||||
suggestion="retry",
|
||||
detail=f"Pixel inchangé + Critic NON : {semantic['detail']}",
|
||||
semantic_verified=False,
|
||||
semantic_detail=semantic["detail"],
|
||||
semantic_elapsed_ms=semantic["elapsed_ms"],
|
||||
)
|
||||
|
||||
2385
agent_v0/server_v1/resolve_engine.py
Normal file
2385
agent_v0/server_v1/resolve_engine.py
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
596
agent_v0/server_v1/task_planner.py
Normal file
596
agent_v0/server_v1/task_planner.py
Normal file
@@ -0,0 +1,596 @@
|
||||
# agent_v0/server_v1/task_planner.py
|
||||
"""
|
||||
TaskPlanner — Planificateur MACRO pour RPA Vision V3.
|
||||
|
||||
Responsabilité : comprendre un ordre en langage naturel et l'exécuter.
|
||||
|
||||
"Traite les dossiers de janvier" →
|
||||
1. Comprendre l'instruction (gemma4)
|
||||
2. Trouver le workflow appris correspondant
|
||||
3. Identifier les paramètres/variables
|
||||
4. Exécuter (replay avec substitution) ou planifier (actions libres)
|
||||
|
||||
C'est le niveau MACRO de l'architecture 3 niveaux :
|
||||
MACRO (TaskPlanner) → décompose et orchestre
|
||||
MÉSO (Policy/Observer/Critic) → décide et vérifie
|
||||
MICRO (Grounding/Executor) → localise et clique
|
||||
|
||||
Ref: docs/PLAN_ACTEUR_V1.md — Phase 3 : Planificateur
|
||||
Ref: docs/VISION_RPA_INTELLIGENT.md — "Il observe" → "Il devient autonome"
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class TaskPlan:
|
||||
"""Plan d'exécution généré par le planificateur."""
|
||||
instruction: str # Instruction originale de l'utilisateur
|
||||
understood: bool = False # L'instruction a été comprise
|
||||
workflow_match: str = "" # ID du workflow correspondant (si trouvé)
|
||||
workflow_name: str = "" # Nom du workflow correspondant
|
||||
match_confidence: float = 0.0 # Confiance du match (0-1)
|
||||
parameters: Dict[str, Any] = field(default_factory=dict) # Variables extraites
|
||||
is_loop: bool = False # Boucle sur une liste d'éléments
|
||||
loop_source: str = "" # Source des éléments (écran, fichier, requête)
|
||||
steps: List[Dict[str, Any]] = field(default_factory=list) # Actions planifiées
|
||||
mode: str = "" # "replay" (workflow connu) ou "free" (actions générées)
|
||||
error: str = ""
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"instruction": self.instruction,
|
||||
"understood": self.understood,
|
||||
"workflow_match": self.workflow_match,
|
||||
"workflow_name": self.workflow_name,
|
||||
"match_confidence": round(self.match_confidence, 3),
|
||||
"parameters": self.parameters,
|
||||
"is_loop": self.is_loop,
|
||||
"loop_source": self.loop_source,
|
||||
"steps_count": len(self.steps),
|
||||
"mode": self.mode,
|
||||
"error": self.error,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class TaskResult:
|
||||
"""Résultat de l'exécution d'une tâche."""
|
||||
instruction: str
|
||||
success: bool
|
||||
total_items: int = 1 # Nombre d'éléments traités (1 si pas de boucle)
|
||||
completed_items: int = 0
|
||||
failed_items: int = 0
|
||||
results: List[Dict[str, Any]] = field(default_factory=list)
|
||||
elapsed_s: float = 0.0
|
||||
summary: str = ""
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"instruction": self.instruction,
|
||||
"success": self.success,
|
||||
"total_items": self.total_items,
|
||||
"completed_items": self.completed_items,
|
||||
"failed_items": self.failed_items,
|
||||
"elapsed_s": round(self.elapsed_s, 1),
|
||||
"summary": self.summary,
|
||||
}
|
||||
|
||||
|
||||
class TaskPlanner:
|
||||
"""Planificateur MACRO — comprend les instructions et orchestre l'exécution.
|
||||
|
||||
Usage :
|
||||
planner = TaskPlanner()
|
||||
plan = planner.understand("traite les dossiers de janvier")
|
||||
result = planner.execute(plan, replay_callback=launch_replay)
|
||||
"""
|
||||
|
||||
def __init__(self, gemma4_port: str = "", domain_id: str = ""):
|
||||
self._gemma4_port = gemma4_port or os.environ.get("GEMMA4_PORT", "11435")
|
||||
self._gemma4_url = f"http://localhost:{self._gemma4_port}/api/chat"
|
||||
self._domain_id = domain_id or os.environ.get("RPA_DOMAIN", "generic")
|
||||
|
||||
# Charger le contexte métier
|
||||
try:
|
||||
from .domain_context import get_domain_context
|
||||
self._domain = get_domain_context(self._domain_id)
|
||||
except Exception:
|
||||
self._domain = None
|
||||
|
||||
def understand(
|
||||
self,
|
||||
instruction: str,
|
||||
available_workflows: Optional[List[Dict[str, Any]]] = None,
|
||||
screen_context: str = "",
|
||||
) -> TaskPlan:
|
||||
"""Comprendre une instruction en langage naturel.
|
||||
|
||||
Étape 1 : gemma4 analyse l'instruction et identifie :
|
||||
- Le type de tâche (ouvrir, traiter, rechercher, etc.)
|
||||
- Le workflow correspondant (s'il en existe un)
|
||||
- Les paramètres/variables (nom, date, fichier, etc.)
|
||||
- Si c'est une boucle (traiter TOUS les dossiers)
|
||||
|
||||
Args:
|
||||
instruction: L'ordre de l'utilisateur ("traite les dossiers de janvier")
|
||||
available_workflows: Liste des workflows connus [{name, description, session_id}]
|
||||
screen_context: Description de l'écran actuel (pour le contexte)
|
||||
"""
|
||||
import requests as _requests
|
||||
|
||||
plan = TaskPlan(instruction=instruction)
|
||||
|
||||
# Construire la liste des workflows disponibles pour le prompt (top 10)
|
||||
workflows_desc = "Aucun workflow enregistré."
|
||||
if available_workflows:
|
||||
top_workflows = available_workflows[:10]
|
||||
lines = []
|
||||
for i, wf in enumerate(top_workflows):
|
||||
name = wf.get("name", wf.get("session_id", f"workflow_{i}"))
|
||||
desc = wf.get("description", "")
|
||||
sid = wf.get("session_id", "")
|
||||
# Montrer la description métier pour aider le matching sémantique
|
||||
label = f"{name}"
|
||||
if desc:
|
||||
label += f" — {desc}"
|
||||
lines.append(f" {i+1}. {label} (id={sid})")
|
||||
workflows_desc = "\n".join(lines)
|
||||
|
||||
# Contexte métier
|
||||
domain_prompt = ""
|
||||
if self._domain and self._domain.system_prompt:
|
||||
domain_prompt = f"\nCONTEXTE MÉTIER :\n{self._domain.system_prompt}\n"
|
||||
|
||||
prompt = (
|
||||
f"Tu es le PLANIFICATEUR d'un robot RPA (Léa). "
|
||||
f"Analyse l'ordre utilisateur et identifie le workflow correspondant.\n"
|
||||
f"{domain_prompt}\n"
|
||||
f"WORKFLOWS DISPONIBLES :\n{workflows_desc}\n\n"
|
||||
f"ORDRE : \"{instruction}\"\n\n"
|
||||
f"RÈGLE DE MATCHING :\n"
|
||||
f"- Compare l'INTENTION de l'ordre avec la DESCRIPTION de chaque workflow\n"
|
||||
f"- \"Ouvre le bloc-notes\" correspond à un workflow décrit \"Ouvrir Bloc-notes via recherche\"\n"
|
||||
f"- Un workflow qui utilise la même application EST un match même si les mots diffèrent\n"
|
||||
f"- Si aucun workflow ne correspond, réponds WORKFLOW: AUCUN\n\n"
|
||||
f"Réponds EXACTEMENT dans ce format (une ligne par champ) :\n"
|
||||
f"COMPRIS: OUI\n"
|
||||
f"WORKFLOW: <numéro> (ou AUCUN)\n"
|
||||
f"CONFIANCE: <0.0 à 1.0>\n"
|
||||
f"PARAMETRES: clé1=valeur1, clé2=valeur2 (ou AUCUN)\n"
|
||||
f"BOUCLE: OUI ou NON\n"
|
||||
f"SOURCE_BOUCLE: écran, fichier, ou aucun\n"
|
||||
f"PLAN:\n"
|
||||
f"1. première étape\n"
|
||||
f"2. deuxième étape\n"
|
||||
)
|
||||
|
||||
try:
|
||||
resp = _requests.post(
|
||||
self._gemma4_url,
|
||||
json={
|
||||
"model": "gemma4:e4b",
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"stream": False,
|
||||
"think": True,
|
||||
"options": {"temperature": 0.2, "num_predict": 800},
|
||||
},
|
||||
timeout=120,
|
||||
)
|
||||
|
||||
if not resp.ok:
|
||||
plan.error = f"gemma4 HTTP {resp.status_code}"
|
||||
return plan
|
||||
|
||||
content = resp.json().get("message", {}).get("content", "").strip()
|
||||
logger.info(f"TaskPlanner: réponse gemma4 ({len(content)} chars)")
|
||||
|
||||
# Parser la réponse
|
||||
plan = self._parse_understanding(plan, content, available_workflows)
|
||||
|
||||
except Exception as e:
|
||||
plan.error = f"gemma4 erreur: {e}"
|
||||
logger.warning(f"TaskPlanner: {plan.error}")
|
||||
|
||||
return plan
|
||||
|
||||
def _parse_understanding(
|
||||
self,
|
||||
plan: TaskPlan,
|
||||
content: str,
|
||||
available_workflows: Optional[List[Dict]] = None,
|
||||
) -> TaskPlan:
|
||||
"""Parser la réponse de gemma4 pour construire le plan.
|
||||
|
||||
Tolérant aux variations de format :
|
||||
- "COMPRIS : OUI" ou "COMPRIS: oui" ou "**COMPRIS:** OUI"
|
||||
- Numéros de workflow : "1", "1.", "#1", "Workflow 1"
|
||||
- Paramètres : "clé=valeur" ou "clé: valeur" sur la même ligne ou les suivantes
|
||||
"""
|
||||
import re
|
||||
|
||||
# Nettoyer le markdown (gras, italique)
|
||||
content_clean = re.sub(r'\*{1,2}([^*]+)\*{1,2}', r'\1', content)
|
||||
|
||||
in_params_section = False
|
||||
in_plan_section = False
|
||||
|
||||
for line in content_clean.split("\n"):
|
||||
line_clean = line.strip()
|
||||
if not line_clean:
|
||||
continue
|
||||
upper = line_clean.upper()
|
||||
|
||||
# --- COMPRIS ---
|
||||
if re.match(r'^COMPRIS\s*[:=]', upper):
|
||||
val = re.split(r'[:=]', upper, 1)[1].strip()
|
||||
plan.understood = "OUI" in val or "YES" in val or "TRUE" in val
|
||||
in_params_section = False
|
||||
in_plan_section = False
|
||||
|
||||
# --- WORKFLOW ---
|
||||
elif re.match(r'^WORKFLOW\s*[:=]', upper):
|
||||
val = line_clean.split(":", 1)[1].strip() if ":" in line_clean else line_clean.split("=", 1)[1].strip()
|
||||
val_upper = val.upper().strip()
|
||||
in_params_section = False
|
||||
in_plan_section = False
|
||||
if val_upper in ("AUCUN", "NONE", "NON", "N/A", "-", ""):
|
||||
continue
|
||||
# Extraire le numéro : "1", "1.", "#1", "Workflow 1", "1 (Bloc-notes)"
|
||||
num_match = re.search(r'(\d+)', val)
|
||||
if num_match and available_workflows:
|
||||
idx = int(num_match.group(1)) - 1
|
||||
if 0 <= idx < len(available_workflows):
|
||||
wf = available_workflows[idx]
|
||||
plan.workflow_match = wf.get("session_id", "")
|
||||
plan.workflow_name = wf.get("name", "")
|
||||
plan.match_confidence = 0.8
|
||||
plan.mode = "replay"
|
||||
|
||||
# --- CONFIANCE ---
|
||||
elif re.match(r'^CONFIANCE\s*[:=]', upper):
|
||||
val = re.split(r'[:=]', line_clean, 1)[1].strip()
|
||||
in_params_section = False
|
||||
in_plan_section = False
|
||||
# Extraire un float : "0.9", "0,9", "90%"
|
||||
float_match = re.search(r'(\d+[.,]\d+)', val)
|
||||
if float_match:
|
||||
try:
|
||||
plan.match_confidence = float(float_match.group(1).replace(",", "."))
|
||||
except ValueError:
|
||||
pass
|
||||
elif "%" in val:
|
||||
pct_match = re.search(r'(\d+)', val)
|
||||
if pct_match:
|
||||
plan.match_confidence = int(pct_match.group(1)) / 100.0
|
||||
|
||||
# --- PARAMETRES ---
|
||||
elif re.match(r'^PARAM[EÈ]TRES?\s*[:=]', upper):
|
||||
val = re.split(r'[:=]', line_clean, 1)[1].strip()
|
||||
in_plan_section = False
|
||||
val_upper = val.upper().strip()
|
||||
if val_upper in ("AUCUN", "NONE", "NON", "N/A", "-"):
|
||||
in_params_section = False
|
||||
continue
|
||||
# Vide = paramètres sur les lignes suivantes
|
||||
in_params_section = True
|
||||
if val and val_upper not in ("", ):
|
||||
# Paramètres sur la même ligne : "clé1=val1, clé2=val2"
|
||||
self._extract_params_from_line(val, plan)
|
||||
|
||||
# --- BOUCLE ---
|
||||
elif re.match(r'^BOUCLE\s*[:=]', upper):
|
||||
val = re.split(r'[:=]', upper, 1)[1].strip()
|
||||
plan.is_loop = "OUI" in val or "YES" in val or "TRUE" in val
|
||||
in_params_section = False
|
||||
in_plan_section = False
|
||||
|
||||
# --- SOURCE_BOUCLE ---
|
||||
elif re.match(r'^SOURCE[_ ]BOUCLE\s*[:=]', upper):
|
||||
plan.loop_source = re.split(r'[:=]', line_clean, 1)[1].strip()
|
||||
in_params_section = False
|
||||
in_plan_section = False
|
||||
|
||||
# --- PLAN ---
|
||||
elif re.match(r'^PLAN\s*[:=]?\s*$', upper) or upper == "PLAN:":
|
||||
in_plan_section = True
|
||||
in_params_section = False
|
||||
|
||||
# --- Lignes de contenu (paramètres d'abord, puis étapes) ---
|
||||
elif in_params_section and ("=" in line_clean or ": " in line_clean):
|
||||
self._extract_params_from_line(line_clean, plan)
|
||||
|
||||
elif in_plan_section and re.match(r'^(\d+[.)]\s+|- )', line_clean):
|
||||
plan.steps.append({"description": line_clean})
|
||||
|
||||
elif re.match(r'^(\d+[.)]\s+|- )', line_clean) and not in_params_section:
|
||||
# Étape numérotée en dehors d'une section explicite
|
||||
plan.steps.append({"description": line_clean})
|
||||
|
||||
# Si pas de workflow trouvé mais compris → mode libre
|
||||
if plan.understood and not plan.workflow_match:
|
||||
plan.mode = "free"
|
||||
|
||||
return plan
|
||||
|
||||
@staticmethod
|
||||
def _extract_params_from_line(text: str, plan: TaskPlan) -> None:
|
||||
"""Extraire des paramètres clé=valeur ou clé: valeur d'une ligne."""
|
||||
import re
|
||||
text = text.strip().strip("- ")
|
||||
# Ignorer les labels de section
|
||||
if re.match(r'^(COMPRIS|WORKFLOW|BOUCLE|SOURCE|PLAN|CONFIANCE)', text.upper()):
|
||||
return
|
||||
# Essayer clé=valeur d'abord
|
||||
if "=" in text:
|
||||
for part in text.split(","):
|
||||
part = part.strip()
|
||||
if "=" in part:
|
||||
k, v = part.split("=", 1)
|
||||
k, v = k.strip().strip("- "), v.strip()
|
||||
if k and v and v.upper() not in ("AUCUN", "NONE"):
|
||||
plan.parameters[k] = v
|
||||
# Sinon clé: valeur (mais pas les labels de section)
|
||||
elif ": " in text:
|
||||
k, v = text.split(": ", 1)
|
||||
k, v = k.strip().strip("- "), v.strip()
|
||||
if k and v and len(k) < 30 and v.upper() not in ("AUCUN", "NONE"):
|
||||
plan.parameters[k] = v
|
||||
|
||||
def execute(
|
||||
self,
|
||||
plan: TaskPlan,
|
||||
replay_callback=None,
|
||||
machine_id: str = "default",
|
||||
) -> TaskResult:
|
||||
"""Exécuter un plan.
|
||||
|
||||
Deux modes :
|
||||
1. "replay" : relancer un workflow enregistré avec substitution de variables
|
||||
2. "free" : exécuter les actions planifiées par gemma4
|
||||
|
||||
Args:
|
||||
plan: Le plan généré par understand()
|
||||
replay_callback: Fonction qui lance un replay
|
||||
signature: (session_id, machine_id, params) → replay_id
|
||||
machine_id: Machine cible pour l'exécution
|
||||
"""
|
||||
t_start = time.time()
|
||||
result = TaskResult(instruction=plan.instruction, success=False)
|
||||
|
||||
if not plan.understood:
|
||||
result.summary = f"Instruction non comprise : {plan.error or 'réponse gemma4 invalide'}"
|
||||
return result
|
||||
|
||||
if plan.mode == "replay" and plan.workflow_match:
|
||||
# Mode replay : relancer un workflow connu
|
||||
result = self._execute_replay(plan, replay_callback, machine_id)
|
||||
|
||||
elif plan.mode == "free" and plan.steps:
|
||||
# Mode libre : actions planifiées par gemma4
|
||||
result = self._execute_free(plan, replay_callback, machine_id)
|
||||
|
||||
else:
|
||||
result.summary = "Pas de workflow correspondant et pas d'actions planifiées"
|
||||
|
||||
result.elapsed_s = time.time() - t_start
|
||||
return result
|
||||
|
||||
def _execute_replay(
|
||||
self,
|
||||
plan: TaskPlan,
|
||||
replay_callback,
|
||||
machine_id: str,
|
||||
) -> TaskResult:
|
||||
"""Exécuter en mode replay (workflow connu)."""
|
||||
result = TaskResult(instruction=plan.instruction, success=False)
|
||||
|
||||
if not replay_callback:
|
||||
result.summary = "Pas de callback replay configuré"
|
||||
return result
|
||||
|
||||
if plan.is_loop:
|
||||
# Boucle : TODO — lister les éléments puis itérer
|
||||
# Pour l'instant, exécution simple
|
||||
logger.info(
|
||||
f"TaskPlanner: boucle détectée mais pas encore implémentée, "
|
||||
f"exécution simple du workflow {plan.workflow_name}"
|
||||
)
|
||||
|
||||
try:
|
||||
replay_id = replay_callback(
|
||||
session_id=plan.workflow_match,
|
||||
machine_id=machine_id,
|
||||
params=plan.parameters,
|
||||
)
|
||||
result.success = True
|
||||
result.completed_items = 1
|
||||
result.total_items = 1
|
||||
result.summary = (
|
||||
f"Workflow '{plan.workflow_name}' lancé (replay={replay_id})"
|
||||
f" avec paramètres {plan.parameters}" if plan.parameters else ""
|
||||
)
|
||||
result.results.append({
|
||||
"replay_id": replay_id,
|
||||
"workflow": plan.workflow_name,
|
||||
"params": plan.parameters,
|
||||
})
|
||||
except Exception as e:
|
||||
result.summary = f"Erreur lancement replay : {e}"
|
||||
logger.error(f"TaskPlanner: {result.summary}")
|
||||
|
||||
return result
|
||||
|
||||
def _execute_free(
|
||||
self,
|
||||
plan: TaskPlan,
|
||||
replay_callback,
|
||||
machine_id: str,
|
||||
) -> TaskResult:
|
||||
"""Exécuter en mode libre (actions planifiées par gemma4)."""
|
||||
result = TaskResult(instruction=plan.instruction, success=False)
|
||||
|
||||
# Convertir les étapes en actions replay
|
||||
actions = self._steps_to_actions(plan.steps, plan.parameters)
|
||||
|
||||
if not actions:
|
||||
result.summary = "Impossible de convertir le plan en actions exécutables"
|
||||
return result
|
||||
|
||||
if replay_callback:
|
||||
try:
|
||||
replay_id = replay_callback(
|
||||
actions=actions,
|
||||
machine_id=machine_id,
|
||||
task_description=plan.instruction,
|
||||
)
|
||||
result.success = True
|
||||
result.completed_items = 1
|
||||
result.summary = f"Plan libre exécuté ({len(actions)} actions, replay={replay_id})"
|
||||
except Exception as e:
|
||||
result.summary = f"Erreur exécution plan libre : {e}"
|
||||
else:
|
||||
result.summary = f"Plan prêt ({len(actions)} actions) mais pas de callback"
|
||||
result.results = actions
|
||||
|
||||
return result
|
||||
|
||||
def _steps_to_actions(
|
||||
self,
|
||||
steps: List[Dict[str, Any]],
|
||||
parameters: Dict[str, Any],
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Convertir les étapes textuelles en actions replay.
|
||||
|
||||
Utilise gemma4 pour traduire chaque étape en action structurée.
|
||||
Les types d'actions supportés : click, type, key_combo, wait.
|
||||
"""
|
||||
import re
|
||||
import requests as _requests
|
||||
|
||||
steps_text = "\n".join(
|
||||
s.get("description", str(s)) for s in steps
|
||||
)
|
||||
|
||||
prompt = (
|
||||
"Convertis ces étapes RPA en actions JSON.\n\n"
|
||||
f"ÉTAPES :\n{steps_text}\n\n"
|
||||
f"PARAMÈTRES : {json.dumps(parameters, ensure_ascii=False)}\n\n"
|
||||
"TYPES D'ACTIONS DISPONIBLES :\n"
|
||||
'- Cliquer : {"type": "click", "target_spec": {"by_text": "texte du bouton"}}\n'
|
||||
'- Taper du texte : {"type": "type", "text": "texte à taper"}\n'
|
||||
'- Raccourci clavier : {"type": "key_combo", "keys": ["ctrl", "s"]}\n'
|
||||
'- Attendre : {"type": "wait", "duration_ms": 2000}\n\n'
|
||||
"RÈGLES :\n"
|
||||
"- UNE action JSON par ligne\n"
|
||||
"- Pas de commentaires, pas de texte autour, JUSTE le JSON\n"
|
||||
"- Utilise les paramètres fournis dans les valeurs\n\n"
|
||||
"ACTIONS :\n"
|
||||
)
|
||||
|
||||
try:
|
||||
resp = _requests.post(
|
||||
self._gemma4_url,
|
||||
json={
|
||||
"model": "gemma4:e4b",
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"stream": False,
|
||||
"think": True,
|
||||
"options": {"temperature": 0.1, "num_predict": 1500},
|
||||
},
|
||||
timeout=120,
|
||||
)
|
||||
|
||||
if not resp.ok:
|
||||
return []
|
||||
|
||||
content = resp.json().get("message", {}).get("content", "")
|
||||
return self._parse_actions_json(content)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"TaskPlanner: conversion étapes échouée : {e}")
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def _parse_actions_json(content: str) -> List[Dict[str, Any]]:
|
||||
"""Parser des actions JSON depuis une réponse VLM.
|
||||
|
||||
Tolère :
|
||||
- Un JSON par ligne
|
||||
- Un tableau JSON [...]
|
||||
- Du texte autour des JSON (markdown, commentaires)
|
||||
- Des objets imbriqués (target_spec)
|
||||
"""
|
||||
import re
|
||||
|
||||
actions = []
|
||||
valid_types = {"click", "type", "key_combo", "wait"}
|
||||
|
||||
# Stratégie 1 : essayer de parser comme un tableau JSON
|
||||
array_match = re.search(r'\[[\s\S]*\]', content)
|
||||
if array_match:
|
||||
try:
|
||||
parsed = json.loads(array_match.group())
|
||||
if isinstance(parsed, list):
|
||||
for item in parsed:
|
||||
if isinstance(item, dict) and item.get("type") in valid_types:
|
||||
if item["type"] == "click":
|
||||
item["visual_mode"] = True
|
||||
actions.append(item)
|
||||
if actions:
|
||||
return actions
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Stratégie 2 : extraire les objets JSON individuels (supporte imbrication)
|
||||
# Trouver chaque { ... } en gérant les accolades imbriquées
|
||||
i = 0
|
||||
while i < len(content):
|
||||
if content[i] == '{':
|
||||
depth = 0
|
||||
start = i
|
||||
while i < len(content):
|
||||
if content[i] == '{':
|
||||
depth += 1
|
||||
elif content[i] == '}':
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
candidate = content[start:i+1]
|
||||
try:
|
||||
action = json.loads(candidate)
|
||||
if isinstance(action, dict) and action.get("type") in valid_types:
|
||||
if action["type"] == "click":
|
||||
action["visual_mode"] = True
|
||||
actions.append(action)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
break
|
||||
i += 1
|
||||
i += 1
|
||||
|
||||
return actions
|
||||
|
||||
def list_capabilities(
|
||||
self,
|
||||
available_workflows: List[Dict[str, Any]],
|
||||
) -> str:
|
||||
"""Lister ce que Léa sait faire (pour l'interface utilisateur)."""
|
||||
if not available_workflows:
|
||||
return "Léa n'a pas encore appris de workflows. Enregistrez-en un d'abord."
|
||||
|
||||
lines = ["Léa sait faire :"]
|
||||
for wf in available_workflows:
|
||||
name = wf.get("name", "?")
|
||||
desc = wf.get("description", "")
|
||||
lines.append(f" - {name}" + (f" ({desc})" if desc else ""))
|
||||
|
||||
lines.append("")
|
||||
lines.append("Dites-lui ce que vous voulez faire en langage naturel.")
|
||||
return "\n".join(lines)
|
||||
185
agent_v0/server_v1/workflow_replay.py
Normal file
185
agent_v0/server_v1/workflow_replay.py
Normal file
@@ -0,0 +1,185 @@
|
||||
"""
|
||||
workflow_replay.py — Pont entre le WorkflowRunner et le replay Agent V1.
|
||||
|
||||
Convertit un Workflow enrichi (avec embeddings CLIP + FAISS) en actions
|
||||
de replay pour l'Agent V1, avec vérification FAISS à chaque étape.
|
||||
|
||||
Architecture :
|
||||
Workflow (nodes + edges + embeddings)
|
||||
→ pour chaque edge : action + embedding du node source
|
||||
→ FAISS vérifie que l'écran actuel correspond au node attendu
|
||||
→ si OK : exécuter l'action normalement
|
||||
→ si MISMATCH : stopper ou adapter
|
||||
|
||||
Auteur : Dom + Claude
|
||||
Date : 5 avril 2026
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import numpy as np
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def build_workflow_replay(
|
||||
workflow_path: str,
|
||||
session_dir: str,
|
||||
faiss_manager=None,
|
||||
clip_embedder=None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Convertir un Workflow enrichi en actions de replay avec vérification FAISS.
|
||||
|
||||
Chaque action de clic est enrichie avec :
|
||||
- L'embedding CLIP du node source (pour vérification au replay)
|
||||
- Le titre de fenêtre attendu
|
||||
- Les textes OCR du node (pour le grounding)
|
||||
|
||||
Args:
|
||||
workflow_path: Chemin vers le workflow JSON
|
||||
session_dir: Répertoire de la session (pour les screenshots/crops)
|
||||
faiss_manager: FAISSManager pré-chargé (optionnel, créé si None)
|
||||
clip_embedder: CLIPEmbedder pré-chargé (optionnel, créé si None)
|
||||
|
||||
Returns:
|
||||
Liste d'actions prêtes pour la queue de replay Agent V1.
|
||||
"""
|
||||
import json
|
||||
import uuid
|
||||
|
||||
# Charger le workflow
|
||||
with open(workflow_path) as f:
|
||||
wf_data = json.load(f)
|
||||
|
||||
nodes = {n["node_id"]: n for n in wf_data.get("nodes", [])}
|
||||
edges = wf_data.get("edges", [])
|
||||
entry_nodes = wf_data.get("entry_nodes", [])
|
||||
|
||||
if not nodes or not edges:
|
||||
logger.warning("Workflow vide : %d nodes, %d edges", len(nodes), len(edges))
|
||||
return []
|
||||
|
||||
logger.info(
|
||||
"Workflow '%s' chargé : %d nodes, %d edges",
|
||||
wf_data.get("name", "?"), len(nodes), len(edges),
|
||||
)
|
||||
|
||||
# Construire la séquence d'actions depuis le graphe (BFS linéaire)
|
||||
actions = []
|
||||
visited = set()
|
||||
current_node_id = entry_nodes[0] if entry_nodes else list(nodes.keys())[0]
|
||||
|
||||
while current_node_id and current_node_id not in visited:
|
||||
visited.add(current_node_id)
|
||||
node = nodes.get(current_node_id)
|
||||
if not node:
|
||||
break
|
||||
|
||||
# Trouver l'edge sortant
|
||||
outgoing = [e for e in edges if e.get("from_node") == current_node_id]
|
||||
if not outgoing:
|
||||
break
|
||||
|
||||
edge = outgoing[0] # Premier edge (linéaire)
|
||||
action_data = edge.get("action", {})
|
||||
next_node_id = edge.get("to_node")
|
||||
next_node = nodes.get(next_node_id, {})
|
||||
|
||||
# Extraire les infos du node source pour la vérification
|
||||
node_metadata = node.get("metadata", {})
|
||||
node_title = node_metadata.get("window_title", "")
|
||||
|
||||
# Extraire les infos de l'action
|
||||
action_type = action_data.get("type", "unknown")
|
||||
target = action_data.get("target", {})
|
||||
params = action_data.get("parameters", {})
|
||||
|
||||
if action_type == "compound":
|
||||
# Actions compound : décomposer en étapes
|
||||
steps = params.get("steps", [])
|
||||
for step in steps:
|
||||
step_type = step.get("type", "unknown")
|
||||
step_action = {
|
||||
"action_id": f"wf_{uuid.uuid4().hex[:8]}",
|
||||
"type": _map_action_type(step_type),
|
||||
"workflow_node": current_node_id,
|
||||
"expected_window_title": node_title,
|
||||
}
|
||||
|
||||
if step_type == "mouse_click":
|
||||
step_action["x_pct"] = step.get("x_pct", 0)
|
||||
step_action["y_pct"] = step.get("y_pct", 0)
|
||||
step_action["button"] = step.get("button", "left")
|
||||
step_action["visual_mode"] = True
|
||||
# Target spec pour le grounding
|
||||
step_action["target_spec"] = {
|
||||
"by_text": target.get("by_text", ""),
|
||||
"by_role": target.get("by_role", ""),
|
||||
"by_text_source": "ocr" if target.get("by_text") else "",
|
||||
"window_title": node_title,
|
||||
"original_position": {
|
||||
"y_relative": "",
|
||||
"x_relative": "",
|
||||
},
|
||||
}
|
||||
# Ajouter le crop anchor si disponible
|
||||
_attach_anchor(step_action, step, session_dir)
|
||||
|
||||
elif step_type in ("text_input", "key_press"):
|
||||
if step_type == "text_input":
|
||||
step_action["type"] = "type"
|
||||
step_action["text"] = step.get("text", "")
|
||||
else:
|
||||
step_action["type"] = "key_combo"
|
||||
step_action["keys"] = step.get("keys", [])
|
||||
|
||||
elif step_type == "wait":
|
||||
step_action["type"] = "wait"
|
||||
step_action["duration_ms"] = step.get("duration_ms", 500)
|
||||
|
||||
actions.append(step_action)
|
||||
|
||||
# Passer au node suivant
|
||||
current_node_id = next_node_id
|
||||
|
||||
# Ajouter expected_window_title pour la post-vérification
|
||||
click_indices = [i for i, a in enumerate(actions) if a.get("type") == "click"]
|
||||
for j, ci in enumerate(click_indices):
|
||||
if j + 1 < len(click_indices):
|
||||
next_ci = click_indices[j + 1]
|
||||
next_title = actions[next_ci].get("expected_window_title", "")
|
||||
if next_title:
|
||||
actions[ci]["expected_window_title"] = next_title
|
||||
|
||||
logger.info("Workflow → %d actions de replay", len(actions))
|
||||
return actions
|
||||
|
||||
|
||||
def _map_action_type(step_type: str) -> str:
|
||||
"""Mapper les types d'action du workflow vers les types de replay."""
|
||||
mapping = {
|
||||
"mouse_click": "click",
|
||||
"text_input": "type",
|
||||
"key_press": "key_combo",
|
||||
"wait": "wait",
|
||||
"scroll": "scroll",
|
||||
}
|
||||
return mapping.get(step_type, step_type)
|
||||
|
||||
|
||||
def _attach_anchor(action: dict, step: dict, session_dir: str) -> None:
|
||||
"""Attacher le crop anchor au target_spec si disponible."""
|
||||
import base64
|
||||
|
||||
# Chercher le crop dans le session_dir
|
||||
screenshot_id = step.get("screenshot_id", "")
|
||||
if screenshot_id and session_dir:
|
||||
crop_path = Path(session_dir) / "shots" / f"{screenshot_id}_crop.png"
|
||||
if crop_path.is_file():
|
||||
action["target_spec"]["anchor_image_base64"] = base64.b64encode(
|
||||
crop_path.read_bytes()
|
||||
).decode()
|
||||
@@ -1,55 +0,0 @@
|
||||
# window_info.py
|
||||
"""
|
||||
Récupération des informations sur la fenêtre active (X11).
|
||||
|
||||
v0 :
|
||||
- utilise xdotool pour obtenir :
|
||||
- le titre de la fenêtre active
|
||||
- le PID de la fenêtre active, puis le nom du process via ps
|
||||
|
||||
Si quelque chose ne fonctionne pas, on renvoie des valeurs "unknown".
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
from typing import Dict, Optional
|
||||
|
||||
|
||||
def _run_cmd(cmd: list[str]) -> Optional[str]:
|
||||
"""Exécute une commande et renvoie la sortie texte (strippée), ou None en cas d'erreur."""
|
||||
try:
|
||||
out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
|
||||
return out.decode("utf-8", errors="ignore").strip()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def get_active_window_info() -> Dict[str, str]:
|
||||
"""
|
||||
Renvoie un dict :
|
||||
{
|
||||
"title": "...",
|
||||
"app_name": "..."
|
||||
}
|
||||
|
||||
Nécessite xdotool installé sur le système.
|
||||
"""
|
||||
title = _run_cmd(["xdotool", "getactivewindow", "getwindowname"])
|
||||
pid_str = _run_cmd(["xdotool", "getactivewindow", "getwindowpid"])
|
||||
|
||||
app_name: Optional[str] = None
|
||||
if pid_str:
|
||||
pid_str = pid_str.strip()
|
||||
# On récupère le nom du binaire via ps
|
||||
app_name = _run_cmd(["ps", "-p", pid_str, "-o", "comm="])
|
||||
|
||||
if not title:
|
||||
title = "unknown_window"
|
||||
if not app_name:
|
||||
app_name = "unknown_app"
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"app_name": app_name,
|
||||
}
|
||||
@@ -1,192 +0,0 @@
|
||||
# window_info_crossplatform.py
|
||||
"""
|
||||
Récupération des informations sur la fenêtre active - CROSS-PLATFORM
|
||||
|
||||
Supporte:
|
||||
- Linux (X11 via xdotool)
|
||||
- Windows (via pywin32)
|
||||
- macOS (via pyobjc)
|
||||
|
||||
Installation des dépendances:
|
||||
pip install pywin32 # Windows
|
||||
pip install pyobjc-framework-Cocoa # macOS
|
||||
pip install psutil # Tous OS
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import platform
|
||||
import subprocess
|
||||
from typing import Dict, Optional
|
||||
|
||||
|
||||
def _run_cmd(cmd: list[str]) -> Optional[str]:
|
||||
"""Exécute une commande et renvoie la sortie texte (strippée), ou None en cas d'erreur."""
|
||||
try:
|
||||
out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
|
||||
return out.decode("utf-8", errors="ignore").strip()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def get_active_window_info() -> Dict[str, str]:
|
||||
"""
|
||||
Renvoie un dict :
|
||||
{
|
||||
"title": "...",
|
||||
"app_name": "..."
|
||||
}
|
||||
|
||||
Détecte automatiquement l'OS et utilise la méthode appropriée.
|
||||
"""
|
||||
system = platform.system()
|
||||
|
||||
if system == "Linux":
|
||||
return _get_window_info_linux()
|
||||
elif system == "Windows":
|
||||
return _get_window_info_windows()
|
||||
elif system == "Darwin": # macOS
|
||||
return _get_window_info_macos()
|
||||
else:
|
||||
return {"title": "unknown_window", "app_name": "unknown_app"}
|
||||
|
||||
|
||||
def _get_window_info_linux() -> Dict[str, str]:
|
||||
"""
|
||||
Linux: utilise xdotool (X11)
|
||||
|
||||
Nécessite: sudo apt-get install xdotool
|
||||
"""
|
||||
title = _run_cmd(["xdotool", "getactivewindow", "getwindowname"])
|
||||
pid_str = _run_cmd(["xdotool", "getactivewindow", "getwindowpid"])
|
||||
|
||||
app_name: Optional[str] = None
|
||||
if pid_str:
|
||||
pid_str = pid_str.strip()
|
||||
# On récupère le nom du binaire via ps
|
||||
app_name = _run_cmd(["ps", "-p", pid_str, "-o", "comm="])
|
||||
|
||||
if not title:
|
||||
title = "unknown_window"
|
||||
if not app_name:
|
||||
app_name = "unknown_app"
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"app_name": app_name,
|
||||
}
|
||||
|
||||
|
||||
def _get_window_info_windows() -> Dict[str, str]:
|
||||
"""
|
||||
Windows: utilise pywin32 + psutil
|
||||
|
||||
Nécessite: pip install pywin32 psutil
|
||||
"""
|
||||
try:
|
||||
import win32gui
|
||||
import win32process
|
||||
import psutil
|
||||
|
||||
# Fenêtre au premier plan
|
||||
hwnd = win32gui.GetForegroundWindow()
|
||||
|
||||
# Titre de la fenêtre
|
||||
title = win32gui.GetWindowText(hwnd)
|
||||
if not title:
|
||||
title = "unknown_window"
|
||||
|
||||
# PID du processus
|
||||
_, pid = win32process.GetWindowThreadProcessId(hwnd)
|
||||
|
||||
# Nom du processus
|
||||
try:
|
||||
process = psutil.Process(pid)
|
||||
app_name = process.name()
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
app_name = "unknown_app"
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"app_name": app_name,
|
||||
}
|
||||
|
||||
except ImportError:
|
||||
# pywin32 ou psutil non installé
|
||||
return {
|
||||
"title": "unknown_window (pywin32 missing)",
|
||||
"app_name": "unknown_app (pywin32 missing)",
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"title": f"error: {e}",
|
||||
"app_name": "unknown_app",
|
||||
}
|
||||
|
||||
|
||||
def _get_window_info_macos() -> Dict[str, str]:
|
||||
"""
|
||||
macOS: utilise pyobjc (AppKit)
|
||||
|
||||
Nécessite: pip install pyobjc-framework-Cocoa
|
||||
|
||||
Note: Nécessite les permissions "Accessibility" dans System Preferences
|
||||
"""
|
||||
try:
|
||||
from AppKit import NSWorkspace
|
||||
from Quartz import (
|
||||
CGWindowListCopyWindowInfo,
|
||||
kCGWindowListOptionOnScreenOnly,
|
||||
kCGNullWindowID
|
||||
)
|
||||
|
||||
# Application active
|
||||
active_app = NSWorkspace.sharedWorkspace().activeApplication()
|
||||
app_name = active_app.get('NSApplicationName', 'unknown_app')
|
||||
|
||||
# Titre de la fenêtre (via Quartz)
|
||||
# On cherche la fenêtre de l'app active qui est au premier plan
|
||||
window_list = CGWindowListCopyWindowInfo(
|
||||
kCGWindowListOptionOnScreenOnly,
|
||||
kCGNullWindowID
|
||||
)
|
||||
|
||||
title = "unknown_window"
|
||||
for window in window_list:
|
||||
owner_name = window.get('kCGWindowOwnerName', '')
|
||||
if owner_name == app_name:
|
||||
window_title = window.get('kCGWindowName', '')
|
||||
if window_title:
|
||||
title = window_title
|
||||
break
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"app_name": app_name,
|
||||
}
|
||||
|
||||
except ImportError:
|
||||
# pyobjc non installé
|
||||
return {
|
||||
"title": "unknown_window (pyobjc missing)",
|
||||
"app_name": "unknown_app (pyobjc missing)",
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"title": f"error: {e}",
|
||||
"app_name": "unknown_app",
|
||||
}
|
||||
|
||||
|
||||
# Test rapide
|
||||
if __name__ == "__main__":
|
||||
import time
|
||||
|
||||
print(f"OS détecté: {platform.system()}")
|
||||
print("\nTest de capture fenêtre active (5 secondes)...")
|
||||
print("Changez de fenêtre pour tester!\n")
|
||||
|
||||
for i in range(5):
|
||||
info = get_active_window_info()
|
||||
print(f"[{i+1}] App: {info['app_name']:20s} | Title: {info['title']}")
|
||||
time.sleep(1)
|
||||
@@ -76,7 +76,16 @@ class StepMetrics:
|
||||
confidence_score: float
|
||||
retry_count: int = 0
|
||||
error_details: Optional[str] = None
|
||||
|
||||
# C1 — Instrumentation vision-aware (ExecutionLoop)
|
||||
# Ces champs proviennent de `StepResult` (core/execution/execution_loop.py).
|
||||
# Tous optionnels avec valeurs par défaut pour rétrocompatibilité.
|
||||
ocr_ms: float = 0.0 # Temps OCR sur ce step
|
||||
ui_ms: float = 0.0 # Temps détection UI sur ce step
|
||||
analyze_ms: float = 0.0 # Temps analyse ScreenState (OCR + UI + reste)
|
||||
total_ms: float = 0.0 # Temps total du step (alias duration_ms)
|
||||
cache_hit: bool = False # True si ScreenState vient du cache perceptuel
|
||||
degraded: bool = False # True si mode dégradé (timeout analyse)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary for storage."""
|
||||
return {
|
||||
@@ -92,9 +101,15 @@ class StepMetrics:
|
||||
'status': self.status,
|
||||
'confidence_score': self.confidence_score,
|
||||
'retry_count': self.retry_count,
|
||||
'error_details': self.error_details
|
||||
'error_details': self.error_details,
|
||||
'ocr_ms': self.ocr_ms,
|
||||
'ui_ms': self.ui_ms,
|
||||
'analyze_ms': self.analyze_ms,
|
||||
'total_ms': self.total_ms,
|
||||
'cache_hit': self.cache_hit,
|
||||
'degraded': self.degraded,
|
||||
}
|
||||
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'StepMetrics':
|
||||
"""Create from dictionary."""
|
||||
@@ -111,7 +126,13 @@ class StepMetrics:
|
||||
status=data['status'],
|
||||
confidence_score=data['confidence_score'],
|
||||
retry_count=data.get('retry_count', 0),
|
||||
error_details=data.get('error_details')
|
||||
error_details=data.get('error_details'),
|
||||
ocr_ms=float(data.get('ocr_ms') or 0.0),
|
||||
ui_ms=float(data.get('ui_ms') or 0.0),
|
||||
analyze_ms=float(data.get('analyze_ms') or 0.0),
|
||||
total_ms=float(data.get('total_ms') or 0.0),
|
||||
cache_hit=bool(data.get('cache_hit') or False),
|
||||
degraded=bool(data.get('degraded') or False),
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
"""Integration of analytics with ExecutionLoop."""
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
from datetime import datetime
|
||||
from typing import Any, Optional
|
||||
from datetime import datetime, timedelta
|
||||
import uuid
|
||||
|
||||
from ..analytics_system import get_analytics_system
|
||||
@@ -14,17 +14,35 @@ logger = logging.getLogger(__name__)
|
||||
class AnalyticsExecutionIntegration:
|
||||
"""Integrate analytics collection with workflow execution."""
|
||||
|
||||
def __init__(self, enabled: bool = True):
|
||||
def __init__(self, analytics_system: Any = True, enabled: Optional[bool] = None):
|
||||
"""
|
||||
Initialize analytics integration.
|
||||
|
||||
|
||||
Accepte deux formes d'appel pour la rétrocompatibilité :
|
||||
- ``AnalyticsExecutionIntegration(enabled=True)`` → auto-load du système
|
||||
- ``AnalyticsExecutionIntegration(analytics_system_instance)`` →
|
||||
utilise l'instance fournie (utilisé par ExecutionLoop)
|
||||
|
||||
Args:
|
||||
enabled: Whether analytics collection is enabled
|
||||
analytics_system: Instance d'AnalyticsSystem pré-construite, ou
|
||||
True/False pour activer/désactiver (legacy).
|
||||
enabled: Legacy — si défini, prime sur analytics_system.
|
||||
"""
|
||||
self.enabled = enabled
|
||||
self.analytics = None
|
||||
|
||||
if enabled:
|
||||
# Détection de la forme d'appel
|
||||
if enabled is not None:
|
||||
# Appel legacy explicite: AnalyticsExecutionIntegration(enabled=...)
|
||||
self.enabled = bool(enabled)
|
||||
self.analytics = None
|
||||
elif isinstance(analytics_system, bool):
|
||||
# Appel legacy: AnalyticsExecutionIntegration(True/False)
|
||||
self.enabled = analytics_system
|
||||
self.analytics = None
|
||||
else:
|
||||
# Nouvelle forme: instance injectée
|
||||
self.enabled = analytics_system is not None
|
||||
self.analytics = analytics_system
|
||||
|
||||
if self.enabled and self.analytics is None:
|
||||
try:
|
||||
self.analytics = get_analytics_system()
|
||||
logger.info("Analytics integration enabled")
|
||||
@@ -36,37 +54,50 @@ class AnalyticsExecutionIntegration:
|
||||
self,
|
||||
workflow_id: str,
|
||||
execution_id: Optional[str] = None,
|
||||
total_steps: int = 0
|
||||
total_steps: int = 0,
|
||||
mode: Optional[str] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Called when workflow execution starts.
|
||||
|
||||
Appelé au démarrage d'une exécution de workflow.
|
||||
|
||||
Args:
|
||||
workflow_id: Workflow identifier
|
||||
execution_id: Execution identifier (generated if None)
|
||||
total_steps: Total number of steps
|
||||
|
||||
workflow_id: Identifiant du workflow
|
||||
execution_id: Identifiant d'exécution (généré si None)
|
||||
total_steps: Nombre total d'étapes prévues
|
||||
mode: Mode d'exécution (OBSERVATION / COACHING / SUPERVISED /
|
||||
AUTOMATIC). Propagé en contexte pour MetricsCollector.
|
||||
|
||||
Returns:
|
||||
Execution ID
|
||||
Identifiant d'exécution (celui fourni ou nouvellement généré).
|
||||
"""
|
||||
if not self.enabled or not self.analytics:
|
||||
return execution_id or str(uuid.uuid4())
|
||||
|
||||
|
||||
if execution_id is None:
|
||||
execution_id = str(uuid.uuid4())
|
||||
|
||||
|
||||
try:
|
||||
# Start real-time tracking
|
||||
# Démarrage du tracking temps réel
|
||||
self.analytics.realtime_analytics.track_execution(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
total_steps=total_steps
|
||||
total_steps=total_steps,
|
||||
)
|
||||
|
||||
|
||||
# Ouverture de l'ExecutionMetrics côté collector (état "running").
|
||||
# Cela permet à `on_execution_complete` d'appeler
|
||||
# `record_execution_complete` qui clôture proprement.
|
||||
context = {"mode": mode} if mode else {}
|
||||
self.analytics.metrics_collector.record_execution_start(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
context=context,
|
||||
)
|
||||
|
||||
logger.debug(f"Started tracking execution: {execution_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error starting execution tracking: {e}")
|
||||
|
||||
|
||||
return execution_id
|
||||
|
||||
def on_step_start(
|
||||
@@ -101,110 +132,249 @@ class AnalyticsExecutionIntegration:
|
||||
execution_id: str,
|
||||
workflow_id: str,
|
||||
node_id: str,
|
||||
action_type: str,
|
||||
started_at: datetime,
|
||||
completed_at: datetime,
|
||||
duration: float,
|
||||
*,
|
||||
duration_ms: float,
|
||||
success: bool,
|
||||
error_message: Optional[str] = None
|
||||
action_type: str = "",
|
||||
started_at: Optional[datetime] = None,
|
||||
completed_at: Optional[datetime] = None,
|
||||
error_message: Optional[str] = None,
|
||||
confidence: float = 0.0,
|
||||
target_element: str = "",
|
||||
retry_count: int = 0,
|
||||
ocr_ms: float = 0.0,
|
||||
ui_ms: float = 0.0,
|
||||
analyze_ms: float = 0.0,
|
||||
total_ms: float = 0.0,
|
||||
cache_hit: bool = False,
|
||||
degraded: bool = False,
|
||||
step_id: Optional[str] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Called when a step completes.
|
||||
|
||||
Appelé à la fin d'un step.
|
||||
|
||||
Contrat normalisé (Lot A — avril 2026) : ``duration_ms`` est
|
||||
obligatoire et en millisecondes. Plus de rétrocompat silencieuse
|
||||
sur ``duration`` en secondes.
|
||||
|
||||
Args:
|
||||
execution_id: Execution identifier
|
||||
workflow_id: Workflow identifier
|
||||
node_id: Node identifier
|
||||
action_type: Type of action
|
||||
started_at: Start timestamp
|
||||
completed_at: Completion timestamp
|
||||
duration: Duration in seconds
|
||||
success: Whether step succeeded
|
||||
error_message: Error message if failed
|
||||
execution_id: Identifiant d'exécution
|
||||
workflow_id: Identifiant du workflow
|
||||
node_id: Identifiant du node
|
||||
duration_ms: Durée du step en millisecondes (obligatoire)
|
||||
success: Vrai si le step a réussi
|
||||
action_type: Type d'action (``click``, ``type``, …)
|
||||
started_at: Timestamp de début (déduit de duration_ms si None)
|
||||
completed_at: Timestamp de fin (``now()`` si None)
|
||||
error_message: Message d'erreur si ``success=False``
|
||||
confidence: Score de matching [0, 1]
|
||||
target_element: Élément ciblé (optionnel)
|
||||
retry_count: Nombre de retries
|
||||
ocr_ms: Temps OCR (C1)
|
||||
ui_ms: Temps détection UI (C1)
|
||||
analyze_ms: Temps analyse ScreenState (C1)
|
||||
total_ms: Temps total du step (C1, alias duration_ms)
|
||||
cache_hit: ScreenState depuis cache perceptuel (C1)
|
||||
degraded: Mode dégradé activé (C1)
|
||||
step_id: ID unique du step (généré si None)
|
||||
"""
|
||||
if not self.enabled or not self.analytics:
|
||||
return
|
||||
|
||||
|
||||
try:
|
||||
# Record step metrics
|
||||
duration_ms_final = float(duration_ms)
|
||||
|
||||
# Normaliser les timestamps
|
||||
if completed_at is None:
|
||||
completed_at = datetime.now()
|
||||
if started_at is None:
|
||||
started_at = completed_at - timedelta(milliseconds=duration_ms_final)
|
||||
|
||||
step_metrics = StepMetrics(
|
||||
step_id=step_id or f"{execution_id}:{node_id}:{completed_at.isoformat()}",
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
node_id=node_id,
|
||||
action_type=action_type,
|
||||
action_type=action_type or "unknown",
|
||||
target_element=target_element,
|
||||
started_at=started_at,
|
||||
completed_at=completed_at,
|
||||
duration=duration,
|
||||
success=success,
|
||||
error_message=error_message
|
||||
duration_ms=duration_ms_final,
|
||||
status="completed" if success else "failed",
|
||||
confidence_score=float(confidence),
|
||||
retry_count=retry_count,
|
||||
error_details=error_message,
|
||||
# C1 — vision-aware
|
||||
ocr_ms=float(ocr_ms or 0.0),
|
||||
ui_ms=float(ui_ms or 0.0),
|
||||
analyze_ms=float(analyze_ms or 0.0),
|
||||
total_ms=float(total_ms or duration_ms_final),
|
||||
cache_hit=bool(cache_hit),
|
||||
degraded=bool(degraded),
|
||||
)
|
||||
|
||||
|
||||
self.analytics.metrics_collector.record_step(step_metrics)
|
||||
|
||||
# Update real-time tracking
|
||||
self.analytics.realtime_analytics.record_step_complete(
|
||||
execution_id=execution_id,
|
||||
success=success
|
||||
|
||||
# Tracking temps réel
|
||||
try:
|
||||
self.analytics.realtime_analytics.record_step_complete(
|
||||
execution_id=execution_id,
|
||||
success=success,
|
||||
)
|
||||
except Exception as rt_err:
|
||||
logger.debug(f"Realtime tracking skipped: {rt_err}")
|
||||
|
||||
logger.debug(
|
||||
f"Recorded step: {node_id} "
|
||||
f"({'success' if success else 'failed'}, "
|
||||
f"analyze_ms={analyze_ms:.0f}, cache_hit={cache_hit}, "
|
||||
f"degraded={degraded})"
|
||||
)
|
||||
|
||||
logger.debug(f"Recorded step: {node_id} ({'success' if success else 'failed'})")
|
||||
except Exception as e:
|
||||
logger.error(f"Error recording step completion: {e}")
|
||||
|
||||
def on_step_result(
|
||||
self,
|
||||
execution_id: str,
|
||||
workflow_id: str,
|
||||
step_result: Any,
|
||||
) -> None:
|
||||
"""
|
||||
Raccourci C1 — enregistre un `StepResult` complet.
|
||||
|
||||
Évite aux appelants d'extraire manuellement les champs vision-aware.
|
||||
Utilisé par ExecutionLoop pour pousser StepResult au système analytics.
|
||||
|
||||
Args:
|
||||
execution_id: Identifiant d'exécution
|
||||
workflow_id: Identifiant de workflow
|
||||
step_result: Instance de `core.execution.execution_loop.StepResult`
|
||||
"""
|
||||
if not self.enabled or not self.analytics:
|
||||
return
|
||||
|
||||
action_type = "unknown"
|
||||
try:
|
||||
if getattr(step_result, "action_result", None) is not None:
|
||||
ar = step_result.action_result
|
||||
# ExecutionResult.action est optionnel selon la branche
|
||||
action_type = (
|
||||
getattr(ar, "action_type", None)
|
||||
or getattr(ar, "action", None)
|
||||
or "unknown"
|
||||
)
|
||||
except Exception:
|
||||
action_type = "unknown"
|
||||
|
||||
self.on_step_complete(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
node_id=getattr(step_result, "node_id", "unknown"),
|
||||
action_type=str(action_type),
|
||||
success=bool(getattr(step_result, "success", False)),
|
||||
error_message=None
|
||||
if getattr(step_result, "success", False)
|
||||
else getattr(step_result, "message", None),
|
||||
duration_ms=float(getattr(step_result, "duration_ms", 0.0) or 0.0),
|
||||
confidence=float(getattr(step_result, "match_confidence", 0.0) or 0.0),
|
||||
ocr_ms=float(getattr(step_result, "ocr_ms", 0.0) or 0.0),
|
||||
ui_ms=float(getattr(step_result, "ui_ms", 0.0) or 0.0),
|
||||
analyze_ms=float(getattr(step_result, "analyze_ms", 0.0) or 0.0),
|
||||
total_ms=float(getattr(step_result, "total_ms", 0.0) or 0.0),
|
||||
cache_hit=bool(getattr(step_result, "cache_hit", False)),
|
||||
degraded=bool(getattr(step_result, "degraded", False)),
|
||||
)
|
||||
|
||||
def on_execution_complete(
|
||||
self,
|
||||
execution_id: str,
|
||||
workflow_id: str,
|
||||
started_at: datetime,
|
||||
completed_at: datetime,
|
||||
duration: float,
|
||||
*,
|
||||
duration_ms: float,
|
||||
status: str,
|
||||
error_message: Optional[str] = None,
|
||||
steps_total: Optional[int] = None,
|
||||
steps_completed: int = 0,
|
||||
steps_failed: int = 0
|
||||
steps_failed: int = 0,
|
||||
error_message: Optional[str] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Called when workflow execution completes.
|
||||
|
||||
Appelé à la fin d'une exécution de workflow.
|
||||
|
||||
Contrat normalisé (Lot A — avril 2026) :
|
||||
- ``duration_ms`` en millisecondes, toujours. Plus de rétrocompat
|
||||
silencieuse sur ``duration`` en secondes.
|
||||
- ``status`` est une chaîne libre (``"completed"``, ``"failed"``,
|
||||
``"stopped"``, ``"timeout"``, …). L'appelant décide.
|
||||
- ``steps_total`` / ``steps_completed`` / ``steps_failed`` : noms
|
||||
alignés sur le dataclass ``ExecutionMetrics``. Si ``steps_total``
|
||||
n'est pas fourni, on le déduit par somme.
|
||||
|
||||
Args:
|
||||
execution_id: Execution identifier
|
||||
workflow_id: Workflow identifier
|
||||
started_at: Start timestamp
|
||||
completed_at: Completion timestamp
|
||||
duration: Duration in seconds
|
||||
status: Final status (success, failed, timeout)
|
||||
error_message: Error message if failed
|
||||
steps_completed: Number of steps completed
|
||||
steps_failed: Number of steps failed
|
||||
execution_id: Identifiant d'exécution
|
||||
workflow_id: Identifiant du workflow
|
||||
duration_ms: Durée totale en millisecondes
|
||||
status: Statut final (``"completed"`` / ``"failed"`` / ``"stopped"``)
|
||||
steps_total: Nombre total de steps exécutés (tous statuts confondus)
|
||||
steps_completed: Nombre de steps réussis
|
||||
steps_failed: Nombre de steps en échec
|
||||
error_message: Message d'erreur si ``status != "completed"``
|
||||
"""
|
||||
if not self.enabled or not self.analytics:
|
||||
return
|
||||
|
||||
|
||||
# steps_total dérivé si non fourni explicitement
|
||||
if steps_total is None:
|
||||
steps_total = int(steps_completed) + int(steps_failed)
|
||||
|
||||
try:
|
||||
# Record execution metrics
|
||||
execution_metrics = ExecutionMetrics(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
started_at=started_at,
|
||||
completed_at=completed_at,
|
||||
duration=duration,
|
||||
status=status,
|
||||
error_message=error_message,
|
||||
steps_completed=steps_completed,
|
||||
steps_failed=steps_failed
|
||||
)
|
||||
|
||||
self.analytics.metrics_collector.record_execution(execution_metrics)
|
||||
|
||||
# Flush to ensure persistence
|
||||
self.analytics.metrics_collector.flush()
|
||||
|
||||
# Complete real-time tracking
|
||||
collector = self.analytics.metrics_collector
|
||||
|
||||
# record_execution_complete clôture proprement un ExecutionMetrics
|
||||
# ouvert par record_execution_start (chemin nominal via
|
||||
# on_execution_start). Si l'état n'est pas présent (tests, legacy),
|
||||
# on pousse un ExecutionMetrics synthétique directement.
|
||||
completed_at = datetime.now()
|
||||
started_at = completed_at - timedelta(milliseconds=float(duration_ms))
|
||||
|
||||
active = getattr(collector, "_active_executions", None)
|
||||
if active is not None and execution_id in active:
|
||||
collector.record_execution_complete(
|
||||
execution_id=execution_id,
|
||||
status=status,
|
||||
steps_total=int(steps_total),
|
||||
steps_completed=int(steps_completed),
|
||||
steps_failed=int(steps_failed),
|
||||
error_message=error_message,
|
||||
)
|
||||
else:
|
||||
# Fallback explicite : on construit directement un ExecutionMetrics
|
||||
# aligné sur le dataclass (duration_ms, status, steps_*).
|
||||
execution_metrics = ExecutionMetrics(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
started_at=started_at,
|
||||
completed_at=completed_at,
|
||||
duration_ms=float(duration_ms),
|
||||
status=status,
|
||||
steps_total=int(steps_total),
|
||||
steps_completed=int(steps_completed),
|
||||
steps_failed=int(steps_failed),
|
||||
error_message=error_message,
|
||||
)
|
||||
# Le collector n'expose pas record_execution(...) : on pousse
|
||||
# dans le buffer protégé par lock pour rester cohérent.
|
||||
with collector._lock:
|
||||
collector._buffer.append(execution_metrics)
|
||||
|
||||
# Flush pour garantir la persistance immédiate
|
||||
collector.flush()
|
||||
|
||||
# Clôture du tracking temps réel
|
||||
self.analytics.realtime_analytics.complete_execution(
|
||||
execution_id=execution_id,
|
||||
status=status
|
||||
status=status,
|
||||
)
|
||||
|
||||
|
||||
logger.info(f"Recorded execution: {execution_id} ({status})")
|
||||
except Exception as e:
|
||||
logger.error(f"Error recording execution completion: {e}")
|
||||
@@ -216,39 +386,54 @@ class AnalyticsExecutionIntegration:
|
||||
node_id: str,
|
||||
strategy: str,
|
||||
success: bool,
|
||||
duration: float
|
||||
duration_ms: float,
|
||||
) -> None:
|
||||
"""
|
||||
Called when self-healing attempts recovery.
|
||||
|
||||
Appelé quand le self-healing tente une récupération.
|
||||
|
||||
Contrat normalisé (Lot A — avril 2026) : ``duration_ms`` en
|
||||
millisecondes, cohérent avec ``on_execution_complete`` et
|
||||
``on_step_complete``. Le StepMetrics construit respecte strictement
|
||||
le dataclass (``status``, ``duration_ms``, ``error_details``,
|
||||
``confidence_score``, ``target_element``, ``step_id``).
|
||||
|
||||
Args:
|
||||
execution_id: Execution identifier
|
||||
workflow_id: Workflow identifier
|
||||
node_id: Node identifier
|
||||
strategy: Recovery strategy used
|
||||
success: Whether recovery succeeded
|
||||
duration: Recovery duration
|
||||
execution_id: Identifiant d'exécution
|
||||
workflow_id: Identifiant du workflow
|
||||
node_id: Node où la récupération est tentée
|
||||
strategy: Stratégie de récupération employée
|
||||
success: Vrai si la récupération a réussi
|
||||
duration_ms: Durée de la tentative en millisecondes
|
||||
"""
|
||||
if not self.enabled or not self.analytics:
|
||||
return
|
||||
|
||||
|
||||
try:
|
||||
# Record as a special step metric
|
||||
now = datetime.now()
|
||||
started_at = now - timedelta(milliseconds=float(duration_ms))
|
||||
|
||||
recovery_metrics = StepMetrics(
|
||||
step_id=f"{execution_id}:{node_id}:recovery:{now.isoformat()}",
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
node_id=f"{node_id}_recovery",
|
||||
action_type=f"recovery_{strategy}",
|
||||
started_at=datetime.now(),
|
||||
completed_at=datetime.now(),
|
||||
duration=duration,
|
||||
success=success,
|
||||
error_message=None if success else f"Recovery failed: {strategy}"
|
||||
target_element="",
|
||||
started_at=started_at,
|
||||
completed_at=now,
|
||||
duration_ms=float(duration_ms),
|
||||
status="completed" if success else "failed",
|
||||
confidence_score=0.0,
|
||||
retry_count=0,
|
||||
error_details=None if success else f"Recovery failed: {strategy}",
|
||||
)
|
||||
|
||||
|
||||
self.analytics.metrics_collector.record_step(recovery_metrics)
|
||||
|
||||
logger.debug(f"Recorded recovery: {strategy} ({'success' if success else 'failed'})")
|
||||
|
||||
logger.debug(
|
||||
f"Recorded recovery: {strategy} "
|
||||
f"({'success' if success else 'failed'})"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error recording recovery attempt: {e}")
|
||||
|
||||
|
||||
643
core/analytics/process_mining_bridge.py
Normal file
643
core/analytics/process_mining_bridge.py
Normal file
@@ -0,0 +1,643 @@
|
||||
"""
|
||||
Bridge entre les workflows Lea (core) et PM4Py pour le process mining.
|
||||
Genere des diagrammes BPMN et KPIs depuis les traces Shadow.
|
||||
|
||||
Usage:
|
||||
from core.analytics.process_mining_bridge import (
|
||||
sessions_to_event_log,
|
||||
workflow_to_event_log,
|
||||
discover_bpmn,
|
||||
compute_kpis,
|
||||
)
|
||||
|
||||
# Depuis des sessions JSONL brutes
|
||||
df = sessions_to_event_log(sessions_data)
|
||||
result = discover_bpmn(df, output_dir="data/analytics/bpmn")
|
||||
kpis = compute_kpis(df)
|
||||
|
||||
# Depuis un workflow core (dict JSON)
|
||||
df = workflow_to_event_log(workflow_dict)
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import pandas as pd
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---- Import conditionnel PM4Py -----------------------------------------
|
||||
|
||||
try:
|
||||
import pm4py
|
||||
PM4PY_AVAILABLE = True
|
||||
except ImportError:
|
||||
PM4PY_AVAILABLE = False
|
||||
logger.warning("pm4py non installe -- le process mining est desactive")
|
||||
|
||||
|
||||
def _sanitize_label(label: str) -> str:
|
||||
"""
|
||||
Supprime les caracteres de controle (0x00-0x1F sauf tab/newline)
|
||||
qui sont invalides en XML et font planter PM4Py.
|
||||
"""
|
||||
return "".join(
|
||||
c if c in ("\t", "\n", "\r") or ord(c) >= 0x20 else f"<0x{ord(c):02x}>"
|
||||
for c in label
|
||||
)
|
||||
|
||||
|
||||
# ---- Types d'evenements a ignorer (bruit) --------------------------------
|
||||
|
||||
_NOISE_EVENT_TYPES = frozenset({
|
||||
"heartbeat",
|
||||
"action_result",
|
||||
"screenshot",
|
||||
})
|
||||
|
||||
# Types d'evenements significatifs pour le process mining
|
||||
_RELEVANT_EVENT_TYPES = frozenset({
|
||||
"mouse_click",
|
||||
"text_input",
|
||||
"key_press",
|
||||
"key_combo",
|
||||
"window_focus_change",
|
||||
})
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Conversion sessions JSONL -> event log PM4Py
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
def _build_activity_label(event: dict) -> Optional[str]:
|
||||
"""
|
||||
Construit un label d'activite lisible depuis un event JSONL brut.
|
||||
|
||||
Regles :
|
||||
- mouse_click -> "Clic - <app_name> (<window_title tronque>)"
|
||||
- text_input -> "Saisie '<text>' - <app_name>"
|
||||
- key_press -> "Touche <key> - <app_name>"
|
||||
- key_combo -> "Raccourci <keys> - <app_name>"
|
||||
- window_focus_change -> "Fenetre <to.title> (<to.app_name>)"
|
||||
|
||||
Tous les labels sont sanitises pour supprimer les caracteres de controle
|
||||
(ex: \\x13 pour Ctrl+S) qui sont invalides en XML/BPMN.
|
||||
"""
|
||||
evt = event.get("event", event)
|
||||
etype = evt.get("type", "")
|
||||
|
||||
if etype in _NOISE_EVENT_TYPES:
|
||||
return None
|
||||
|
||||
# Extraction fenetre
|
||||
window = evt.get("window", {})
|
||||
app_name = window.get("app_name", "inconnu")
|
||||
win_title = window.get("title", "")
|
||||
# Tronquer le titre a 40 caracteres
|
||||
short_title = (win_title[:40] + "...") if len(win_title) > 40 else win_title
|
||||
|
||||
label: Optional[str] = None
|
||||
|
||||
if etype == "mouse_click":
|
||||
label = f"Clic - {app_name} ({short_title})"
|
||||
|
||||
elif etype == "text_input":
|
||||
text = evt.get("text", "")
|
||||
# Tronquer le texte a 20 caracteres pour rester lisible
|
||||
short_text = (text[:20] + "...") if len(text) > 20 else text
|
||||
label = f"Saisie '{short_text}' - {app_name}"
|
||||
|
||||
elif etype == "key_press":
|
||||
key = evt.get("key", "?")
|
||||
label = f"Touche {key} - {app_name}"
|
||||
|
||||
elif etype == "key_combo":
|
||||
keys = evt.get("keys", [])
|
||||
combo = "+".join(str(k) for k in keys)
|
||||
label = f"Raccourci {combo} - {app_name}"
|
||||
|
||||
elif etype == "window_focus_change":
|
||||
to_info = evt.get("to", {})
|
||||
if not to_info:
|
||||
return None
|
||||
to_title = to_info.get("title", "?")
|
||||
to_app = to_info.get("app_name", "?")
|
||||
label = f"Fenetre {to_title} ({to_app})"
|
||||
|
||||
else:
|
||||
# Types non reconnus : label generique
|
||||
label = f"{etype} - {app_name}"
|
||||
|
||||
return _sanitize_label(label) if label else None
|
||||
|
||||
|
||||
def _extract_timestamp(event: dict) -> Optional[float]:
|
||||
"""Extrait le timestamp unix depuis un event JSONL."""
|
||||
# Le timestamp peut etre au niveau racine ou dans event.timestamp
|
||||
evt = event.get("event", event)
|
||||
ts = evt.get("timestamp") or event.get("timestamp")
|
||||
if ts is not None:
|
||||
return float(ts)
|
||||
# Fallback sur le champ 't' (format simplifie)
|
||||
t = evt.get("t") or event.get("t")
|
||||
if t is not None:
|
||||
return float(t)
|
||||
return None
|
||||
|
||||
|
||||
def sessions_to_event_log(
|
||||
sessions_data: List[dict],
|
||||
deduplicate_windows: bool = True,
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Convertit des traces de sessions brutes (events JSONL) en event log PM4Py.
|
||||
|
||||
Chaque event pertinent devient une ligne :
|
||||
- case:concept:name = session_id
|
||||
- concept:name = label d'activite (ex: "Clic - Notepad.exe (Bloc-notes)")
|
||||
- time:timestamp = timestamp UTC
|
||||
|
||||
Args:
|
||||
sessions_data: liste de dicts, chaque dict est une ligne JSONL parsee.
|
||||
deduplicate_windows: si True, supprime les window_focus_change
|
||||
consecutifs vers la meme fenetre (bruit typique de Windows).
|
||||
|
||||
Returns:
|
||||
DataFrame pret pour PM4Py.
|
||||
"""
|
||||
rows: List[Dict[str, Any]] = []
|
||||
|
||||
# Regrouper par session_id pour le deduplication
|
||||
sessions: Dict[str, List[dict]] = {}
|
||||
for event in sessions_data:
|
||||
sid = event.get("session_id", "unknown")
|
||||
sessions.setdefault(sid, []).append(event)
|
||||
|
||||
for sid, events in sessions.items():
|
||||
# Trier par timestamp
|
||||
events.sort(key=lambda e: _extract_timestamp(e) or 0.0)
|
||||
last_window_label: Optional[str] = None
|
||||
|
||||
for event in events:
|
||||
label = _build_activity_label(event)
|
||||
if label is None:
|
||||
continue
|
||||
|
||||
ts = _extract_timestamp(event)
|
||||
if ts is None:
|
||||
continue
|
||||
|
||||
# Deduplication des changements de fenetre consecutifs
|
||||
evt = event.get("event", event)
|
||||
if deduplicate_windows and evt.get("type") == "window_focus_change":
|
||||
if label == last_window_label:
|
||||
continue
|
||||
last_window_label = label
|
||||
else:
|
||||
last_window_label = None
|
||||
|
||||
rows.append({
|
||||
"case:concept:name": sid,
|
||||
"concept:name": label,
|
||||
"time:timestamp": pd.Timestamp(
|
||||
datetime.fromtimestamp(ts, tz=timezone.utc)
|
||||
),
|
||||
"event_type": evt.get("type", ""),
|
||||
"app_name": evt.get("window", {}).get("app_name", ""),
|
||||
})
|
||||
|
||||
if not rows:
|
||||
logger.warning("Aucun evenement pertinent trouve dans les sessions")
|
||||
return pd.DataFrame(columns=[
|
||||
"case:concept:name",
|
||||
"concept:name",
|
||||
"time:timestamp",
|
||||
"event_type",
|
||||
"app_name",
|
||||
])
|
||||
|
||||
df = pd.DataFrame(rows)
|
||||
df = df.sort_values(["case:concept:name", "time:timestamp"]).reset_index(drop=True)
|
||||
logger.info(
|
||||
"Event log cree : %d evenements, %d sessions, %d activites distinctes",
|
||||
len(df),
|
||||
df["case:concept:name"].nunique(),
|
||||
df["concept:name"].nunique(),
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Conversion workflow core (dict JSON) -> event log PM4Py
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
def workflow_to_event_log(workflow_dict: dict) -> pd.DataFrame:
|
||||
"""
|
||||
Convertit un workflow core (dict JSON) en DataFrame PM4Py.
|
||||
|
||||
Utilise les nodes et edges pour reconstituer une trace.
|
||||
Chaque chemin du entry_node vers un end_node = un case.
|
||||
|
||||
Mapping :
|
||||
- case:concept:name = workflow_id + suffixe de chemin
|
||||
- concept:name = node.name
|
||||
- time:timestamp = deduced from edge stats ou created_at
|
||||
"""
|
||||
wf_id = workflow_dict.get("workflow_id", "wf_unknown")
|
||||
nodes = {n["node_id"]: n for n in workflow_dict.get("nodes", [])}
|
||||
edges = workflow_dict.get("edges", [])
|
||||
entry_nodes = workflow_dict.get("entry_nodes", [])
|
||||
created_at = workflow_dict.get("created_at", datetime.now(timezone.utc).isoformat())
|
||||
|
||||
if not nodes or not edges:
|
||||
logger.warning("Workflow vide ou sans edges : %s", wf_id)
|
||||
return pd.DataFrame(columns=[
|
||||
"case:concept:name",
|
||||
"concept:name",
|
||||
"time:timestamp",
|
||||
])
|
||||
|
||||
# Construire un graphe d'adjacence
|
||||
adjacency: Dict[str, List[dict]] = {}
|
||||
for edge in edges:
|
||||
from_node = edge.get("from_node") or edge.get("source_node", "")
|
||||
adjacency.setdefault(from_node, []).append(edge)
|
||||
|
||||
# Parcours DFS pour trouver les chemins (limites a eviter l'explosion)
|
||||
MAX_PATHS = 100
|
||||
paths: List[List[str]] = []
|
||||
|
||||
def _dfs(current: str, path: List[str], visited: set) -> None:
|
||||
if len(paths) >= MAX_PATHS:
|
||||
return
|
||||
if current in visited:
|
||||
# Boucle detectee, sauvegarder le chemin tel quel
|
||||
paths.append(path[:])
|
||||
return
|
||||
visited.add(current)
|
||||
path.append(current)
|
||||
|
||||
outgoing = adjacency.get(current, [])
|
||||
if not outgoing:
|
||||
# End node
|
||||
paths.append(path[:])
|
||||
else:
|
||||
for edge in outgoing:
|
||||
to_node = edge.get("to_node") or edge.get("target_node", "")
|
||||
if to_node:
|
||||
_dfs(to_node, path, visited)
|
||||
path.pop()
|
||||
visited.discard(current)
|
||||
|
||||
for entry in entry_nodes:
|
||||
if entry in nodes:
|
||||
_dfs(entry, [], set())
|
||||
|
||||
# Si pas d'entry nodes, essayer tous les nodes sans edges entrants
|
||||
if not paths:
|
||||
target_nodes = set()
|
||||
for edge in edges:
|
||||
to_node = edge.get("to_node") or edge.get("target_node", "")
|
||||
target_nodes.add(to_node)
|
||||
root_nodes = [nid for nid in nodes if nid not in target_nodes]
|
||||
for root in root_nodes[:3]:
|
||||
_dfs(root, [], set())
|
||||
|
||||
# Construire le DataFrame
|
||||
rows: List[Dict[str, Any]] = []
|
||||
try:
|
||||
base_time = pd.Timestamp(datetime.fromisoformat(created_at))
|
||||
except (ValueError, TypeError):
|
||||
base_time = pd.Timestamp(datetime.now(timezone.utc))
|
||||
|
||||
for i, path in enumerate(paths):
|
||||
case_id = f"{wf_id}_path_{i}"
|
||||
for step_idx, node_id in enumerate(path):
|
||||
node = nodes.get(node_id, {})
|
||||
rows.append({
|
||||
"case:concept:name": case_id,
|
||||
"concept:name": node.get("name", node_id),
|
||||
"time:timestamp": base_time + pd.Timedelta(seconds=step_idx),
|
||||
})
|
||||
|
||||
df = pd.DataFrame(rows)
|
||||
if not df.empty:
|
||||
df = df.sort_values(["case:concept:name", "time:timestamp"]).reset_index(drop=True)
|
||||
logger.info(
|
||||
"Event log depuis workflow : %d evenements, %d chemins",
|
||||
len(df), len(paths),
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Decouverte BPMN
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
def discover_bpmn(
|
||||
event_log_df: pd.DataFrame,
|
||||
output_dir: str = "data/analytics/bpmn",
|
||||
name: str = "process",
|
||||
) -> dict:
|
||||
"""
|
||||
Decouvre un modele BPMN depuis un event log via Inductive Miner.
|
||||
|
||||
Args:
|
||||
event_log_df: DataFrame au format PM4Py.
|
||||
output_dir: repertoire de sortie pour les fichiers generes.
|
||||
name: prefixe pour les noms de fichiers.
|
||||
|
||||
Returns:
|
||||
{
|
||||
'bpmn_xml_path': str,
|
||||
'bpmn_image_path': str,
|
||||
'petri_net_image_path': str,
|
||||
'dfg_image_path': str,
|
||||
'stats': {
|
||||
'activities': int,
|
||||
'variants': int,
|
||||
'cases': int,
|
||||
}
|
||||
}
|
||||
"""
|
||||
if not PM4PY_AVAILABLE:
|
||||
raise ImportError("pm4py n'est pas installe. Installez-le : pip install pm4py")
|
||||
|
||||
if event_log_df.empty:
|
||||
raise ValueError("Event log vide, impossible de decouvrir un BPMN")
|
||||
|
||||
out = Path(output_dir)
|
||||
out.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Decouverte BPMN par Inductive Miner
|
||||
bpmn_model = pm4py.discover_bpmn_inductive(event_log_df)
|
||||
|
||||
# Export BPMN XML
|
||||
bpmn_xml_path = str(out / f"{name}.bpmn")
|
||||
try:
|
||||
pm4py.write_bpmn(bpmn_model, bpmn_xml_path)
|
||||
except Exception as e:
|
||||
# PM4Py layout peut echouer avec des labels contenant des caracteres
|
||||
# speciaux (accents, guillemets, etc.). Fallback : export via l'exporter
|
||||
# interne sans layout.
|
||||
logger.warning("Layout BPMN echoue (%s), export sans layout", e)
|
||||
from pm4py.objects.bpmn.exporter import exporter as bpmn_exporter
|
||||
bpmn_exporter.apply(bpmn_model, bpmn_xml_path)
|
||||
logger.info("BPMN XML exporte : %s", bpmn_xml_path)
|
||||
|
||||
# Export image BPMN (PNG) — grande taille pour lisibilité
|
||||
bpmn_image_path = str(out / f"{name}_bpmn.png")
|
||||
try:
|
||||
from pm4py.visualization.bpmn import visualizer as bpmn_vis
|
||||
gviz = bpmn_vis.apply(bpmn_model, parameters={
|
||||
"rankdir": "TB",
|
||||
"font_size": "12",
|
||||
})
|
||||
gviz.graph_attr["dpi"] = "150"
|
||||
gviz.graph_attr["size"] = "40,20!"
|
||||
gviz.graph_attr["rankdir"] = "TB"
|
||||
gviz.render(filename=bpmn_image_path.replace(".png", ""), format="png", cleanup=True)
|
||||
logger.info("BPMN PNG exporte : %s", bpmn_image_path)
|
||||
except Exception as e:
|
||||
logger.warning("BPMN image fallback : %s", e)
|
||||
try:
|
||||
pm4py.save_vis_bpmn(bpmn_model, bpmn_image_path)
|
||||
except Exception:
|
||||
bpmn_image_path = None
|
||||
|
||||
# DFG (Directly-Follows Graph) — grande taille
|
||||
dfg_image_path = str(out / f"{name}_dfg.png")
|
||||
try:
|
||||
from pm4py.visualization.dfg import visualizer as dfg_vis
|
||||
dfg, sa, ea = pm4py.discover_dfg(event_log_df)
|
||||
gviz = dfg_vis.apply(dfg, activities_count=sa, parameters={
|
||||
"start_activities": sa,
|
||||
"end_activities": ea,
|
||||
"rankdir": "TB",
|
||||
"font_size": "11",
|
||||
})
|
||||
gviz.graph_attr["dpi"] = "150"
|
||||
gviz.graph_attr["size"] = "40,20!"
|
||||
gviz.graph_attr["rankdir"] = "TB"
|
||||
gviz.render(filename=dfg_image_path.replace(".png", ""), format="png", cleanup=True)
|
||||
logger.info("DFG PNG exporte : %s", dfg_image_path)
|
||||
except Exception as e:
|
||||
logger.warning("DFG image fallback : %s", e)
|
||||
try:
|
||||
pm4py.save_vis_dfg(*pm4py.discover_dfg(event_log_df), file_path=dfg_image_path)
|
||||
except Exception:
|
||||
dfg_image_path = None
|
||||
|
||||
# Petri net via Inductive Miner (pour visualisation alternative)
|
||||
petri_image_path = str(out / f"{name}_petri.png")
|
||||
try:
|
||||
net, im, fm = pm4py.discover_petri_net_inductive(event_log_df)
|
||||
pm4py.save_vis_petri_net(net, im, fm, file_path=petri_image_path)
|
||||
logger.info("Petri net PNG exporte : %s", petri_image_path)
|
||||
except Exception as e:
|
||||
logger.warning("Impossible de generer le Petri net : %s", e)
|
||||
petri_image_path = None
|
||||
|
||||
# Stats de base
|
||||
variants = pm4py.get_variants(event_log_df)
|
||||
n_cases = event_log_df["case:concept:name"].nunique()
|
||||
n_activities = event_log_df["concept:name"].nunique()
|
||||
|
||||
result = {
|
||||
"bpmn_xml_path": bpmn_xml_path,
|
||||
"bpmn_image_path": bpmn_image_path,
|
||||
"petri_net_image_path": petri_image_path,
|
||||
"dfg_image_path": dfg_image_path,
|
||||
"stats": {
|
||||
"activities": n_activities,
|
||||
"variants": len(variants),
|
||||
"cases": n_cases,
|
||||
},
|
||||
}
|
||||
logger.info("Decouverte BPMN terminee : %s", result["stats"])
|
||||
return result
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# KPIs de process mining
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
def compute_kpis(event_log_df: pd.DataFrame) -> dict:
|
||||
"""
|
||||
Calcule les KPIs de process mining.
|
||||
|
||||
Returns:
|
||||
{
|
||||
'total_cases': int,
|
||||
'total_events': int,
|
||||
'unique_activities': int,
|
||||
'variants_count': int,
|
||||
'variants_top5': list,
|
||||
'avg_case_duration_seconds': float,
|
||||
'median_case_duration_seconds': float,
|
||||
'avg_events_per_case': float,
|
||||
'activity_stats': {
|
||||
'<activity_name>': {
|
||||
'count': int,
|
||||
'avg_duration_seconds': float,
|
||||
'min_duration_seconds': float,
|
||||
'max_duration_seconds': float,
|
||||
}
|
||||
},
|
||||
'bottlenecks': [...], # top 3 activites les plus lentes
|
||||
'app_distribution': { '<app_name>': int },
|
||||
}
|
||||
"""
|
||||
if event_log_df.empty:
|
||||
return {
|
||||
"total_cases": 0,
|
||||
"total_events": 0,
|
||||
"unique_activities": 0,
|
||||
"variants_count": 0,
|
||||
"variants_top5": [],
|
||||
"avg_case_duration_seconds": 0.0,
|
||||
"median_case_duration_seconds": 0.0,
|
||||
"avg_events_per_case": 0.0,
|
||||
"activity_stats": {},
|
||||
"bottlenecks": [],
|
||||
"app_distribution": {},
|
||||
}
|
||||
|
||||
df = event_log_df.copy()
|
||||
|
||||
# ---- Metriques globales ----
|
||||
total_cases = df["case:concept:name"].nunique()
|
||||
total_events = len(df)
|
||||
unique_activities = df["concept:name"].nunique()
|
||||
|
||||
# ---- Variantes (PM4Py) ----
|
||||
if PM4PY_AVAILABLE:
|
||||
variants = pm4py.get_variants(df)
|
||||
variants_count = len(variants)
|
||||
# Top 5 variantes par frequence
|
||||
sorted_variants = sorted(variants.items(), key=lambda x: x[1], reverse=True)
|
||||
variants_top5 = [
|
||||
{"variant": " -> ".join(v), "count": c}
|
||||
for v, c in sorted_variants[:5]
|
||||
]
|
||||
else:
|
||||
variants_count = 0
|
||||
variants_top5 = []
|
||||
|
||||
# ---- Duree par case ----
|
||||
case_durations: List[float] = []
|
||||
for _case_id, group in df.groupby("case:concept:name"):
|
||||
ts = group["time:timestamp"]
|
||||
if len(ts) >= 2:
|
||||
duration = (ts.max() - ts.min()).total_seconds()
|
||||
case_durations.append(duration)
|
||||
|
||||
avg_case_dur = float(pd.Series(case_durations).mean()) if case_durations else 0.0
|
||||
median_case_dur = float(pd.Series(case_durations).median()) if case_durations else 0.0
|
||||
avg_events_per_case = total_events / total_cases if total_cases > 0 else 0.0
|
||||
|
||||
# ---- Stats par activite ----
|
||||
activity_stats: Dict[str, Dict[str, Any]] = {}
|
||||
# Calculer la duree entre chaque evenement et le suivant dans le meme case
|
||||
df_sorted = df.sort_values(["case:concept:name", "time:timestamp"])
|
||||
df_sorted["next_timestamp"] = df_sorted.groupby("case:concept:name")[
|
||||
"time:timestamp"
|
||||
].shift(-1)
|
||||
df_sorted["duration_to_next"] = (
|
||||
df_sorted["next_timestamp"] - df_sorted["time:timestamp"]
|
||||
).dt.total_seconds()
|
||||
|
||||
for activity, grp in df_sorted.groupby("concept:name"):
|
||||
durations = grp["duration_to_next"].dropna()
|
||||
# Filtrer les durees aberrantes (> 5 min = probablement une pause)
|
||||
durations = durations[durations <= 300]
|
||||
stats: Dict[str, Any] = {
|
||||
"count": len(grp),
|
||||
"avg_duration_seconds": round(float(durations.mean()), 2) if len(durations) > 0 else 0.0,
|
||||
"min_duration_seconds": round(float(durations.min()), 2) if len(durations) > 0 else 0.0,
|
||||
"max_duration_seconds": round(float(durations.max()), 2) if len(durations) > 0 else 0.0,
|
||||
}
|
||||
activity_stats[activity] = stats
|
||||
|
||||
# ---- Goulots d'etranglement (top 3 activites les plus lentes) ----
|
||||
bottlenecks = sorted(
|
||||
[
|
||||
{"activity": act, "avg_duration_seconds": s["avg_duration_seconds"]}
|
||||
for act, s in activity_stats.items()
|
||||
if s["avg_duration_seconds"] > 0
|
||||
],
|
||||
key=lambda x: x["avg_duration_seconds"],
|
||||
reverse=True,
|
||||
)[:3]
|
||||
|
||||
# ---- Distribution par application ----
|
||||
app_distribution: Dict[str, int] = {}
|
||||
if "app_name" in df.columns:
|
||||
app_distribution = df["app_name"].value_counts().to_dict()
|
||||
|
||||
return {
|
||||
"total_cases": total_cases,
|
||||
"total_events": total_events,
|
||||
"unique_activities": unique_activities,
|
||||
"variants_count": variants_count,
|
||||
"variants_top5": variants_top5,
|
||||
"avg_case_duration_seconds": round(avg_case_dur, 2),
|
||||
"median_case_duration_seconds": round(median_case_dur, 2),
|
||||
"avg_events_per_case": round(avg_events_per_case, 1),
|
||||
"activity_stats": activity_stats,
|
||||
"bottlenecks": bottlenecks,
|
||||
"app_distribution": app_distribution,
|
||||
}
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Helpers : chargement sessions JSONL
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
def load_jsonl_session(jsonl_path: str) -> List[dict]:
|
||||
"""
|
||||
Charge un fichier live_events.jsonl en liste de dicts.
|
||||
|
||||
Ignore les lignes vides ou invalides.
|
||||
"""
|
||||
events: List[dict] = []
|
||||
path = Path(jsonl_path)
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"Fichier JSONL introuvable : {jsonl_path}")
|
||||
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
for line_num, line in enumerate(f, 1):
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
events.append(json.loads(line))
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning("Ligne %d invalide dans %s : %s", line_num, jsonl_path, e)
|
||||
|
||||
logger.info("Charge %d evenements depuis %s", len(events), jsonl_path)
|
||||
return events
|
||||
|
||||
|
||||
def load_multiple_sessions(session_dirs: List[str]) -> List[dict]:
|
||||
"""
|
||||
Charge plusieurs sessions depuis leurs repertoires.
|
||||
|
||||
Cherche un fichier live_events.jsonl dans chaque repertoire.
|
||||
"""
|
||||
all_events: List[dict] = []
|
||||
for session_dir in session_dirs:
|
||||
jsonl_path = Path(session_dir) / "live_events.jsonl"
|
||||
if jsonl_path.exists():
|
||||
all_events.extend(load_jsonl_session(str(jsonl_path)))
|
||||
else:
|
||||
logger.warning("Pas de live_events.jsonl dans %s", session_dir)
|
||||
return all_events
|
||||
60
core/analytics/screen_change_detector.py
Normal file
60
core/analytics/screen_change_detector.py
Normal file
@@ -0,0 +1,60 @@
|
||||
"""
|
||||
Détection rapide de changement d'écran via perceptual hash (pHash).
|
||||
|
||||
Utilise imagehash pour calculer un hash perceptuel par screenshot.
|
||||
La distance de Hamming entre deux hashes indique le degré de changement :
|
||||
- < 5 : même écran (bruit, curseur déplacé)
|
||||
- 5-15 : changement mineur (scroll, popup, champ rempli)
|
||||
- > 15 : nouvel écran (nouvelle fenêtre, navigation)
|
||||
|
||||
Performance : ~15ms par hash sur CPU pour des screenshots 2560x1600.
|
||||
"""
|
||||
|
||||
from PIL import Image
|
||||
import imagehash
|
||||
from typing import Tuple, Optional
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class ScreenChangeLevel(Enum):
|
||||
SAME = "same" # distance < 5
|
||||
MINOR = "minor" # 5 <= distance < 15
|
||||
MAJOR = "major" # distance >= 15
|
||||
|
||||
|
||||
def compute_phash(image: Image.Image, hash_size: int = 8) -> imagehash.ImageHash:
|
||||
"""Calcule le pHash d'une image PIL."""
|
||||
return imagehash.phash(image, hash_size=hash_size)
|
||||
|
||||
|
||||
def compare_screenshots(img1: Image.Image, img2: Image.Image, hash_size: int = 8) -> Tuple[int, ScreenChangeLevel]:
|
||||
"""
|
||||
Compare deux screenshots et retourne la distance + le niveau de changement.
|
||||
|
||||
Returns:
|
||||
(distance, level) — distance de Hamming et niveau de changement
|
||||
"""
|
||||
h1 = compute_phash(img1, hash_size)
|
||||
h2 = compute_phash(img2, hash_size)
|
||||
distance = h1 - h2
|
||||
|
||||
if distance < 5:
|
||||
level = ScreenChangeLevel.SAME
|
||||
elif distance < 15:
|
||||
level = ScreenChangeLevel.MINOR
|
||||
else:
|
||||
level = ScreenChangeLevel.MAJOR
|
||||
|
||||
return distance, level
|
||||
|
||||
|
||||
def compare_hashes(hash1: imagehash.ImageHash, hash2: imagehash.ImageHash) -> Tuple[int, ScreenChangeLevel]:
|
||||
"""Compare deux hashes pré-calculés."""
|
||||
distance = hash1 - hash2
|
||||
if distance < 5:
|
||||
level = ScreenChangeLevel.SAME
|
||||
elif distance < 15:
|
||||
level = ScreenChangeLevel.MINOR
|
||||
else:
|
||||
level = ScreenChangeLevel.MAJOR
|
||||
return distance, level
|
||||
@@ -42,6 +42,8 @@ class TimeSeriesStore:
|
||||
ON execution_metrics(started_at);
|
||||
|
||||
-- Step metrics table
|
||||
-- Les colonnes ocr_ms, ui_ms, analyze_ms, total_ms, cache_hit, degraded
|
||||
-- proviennent de l'instrumentation vision-aware (C1) de ExecutionLoop.
|
||||
CREATE TABLE IF NOT EXISTS step_metrics (
|
||||
step_id TEXT PRIMARY KEY,
|
||||
execution_id TEXT NOT NULL,
|
||||
@@ -56,6 +58,12 @@ class TimeSeriesStore:
|
||||
confidence_score REAL,
|
||||
retry_count INTEGER DEFAULT 0,
|
||||
error_details TEXT,
|
||||
ocr_ms REAL DEFAULT 0.0,
|
||||
ui_ms REAL DEFAULT 0.0,
|
||||
analyze_ms REAL DEFAULT 0.0,
|
||||
total_ms REAL DEFAULT 0.0,
|
||||
cache_hit INTEGER DEFAULT 0,
|
||||
degraded INTEGER DEFAULT 0,
|
||||
FOREIGN KEY (execution_id) REFERENCES execution_metrics(execution_id)
|
||||
);
|
||||
|
||||
@@ -101,11 +109,40 @@ class TimeSeriesStore:
|
||||
|
||||
logger.info(f"TimeSeriesStore initialized at {self.db_path}")
|
||||
|
||||
# Colonnes ajoutées ultérieurement — appliquées via ALTER TABLE si absentes.
|
||||
# (C1 — instrumentation vision-aware, avril 2026)
|
||||
_STEP_METRICS_MIGRATIONS = [
|
||||
("ocr_ms", "REAL DEFAULT 0.0"),
|
||||
("ui_ms", "REAL DEFAULT 0.0"),
|
||||
("analyze_ms", "REAL DEFAULT 0.0"),
|
||||
("total_ms", "REAL DEFAULT 0.0"),
|
||||
("cache_hit", "INTEGER DEFAULT 0"),
|
||||
("degraded", "INTEGER DEFAULT 0"),
|
||||
]
|
||||
|
||||
def _init_database(self) -> None:
|
||||
"""Initialize database schema."""
|
||||
"""Initialize database schema and apply lightweight migrations."""
|
||||
with self._get_connection() as conn:
|
||||
conn.executescript(self.SCHEMA)
|
||||
self._migrate_step_metrics(conn)
|
||||
conn.commit()
|
||||
|
||||
def _migrate_step_metrics(self, conn: sqlite3.Connection) -> None:
|
||||
"""Ajoute les colonnes C1 sur une base `step_metrics` pré-existante."""
|
||||
cursor = conn.execute("PRAGMA table_info(step_metrics)")
|
||||
existing = {row[1] for row in cursor.fetchall()}
|
||||
for column, ddl in self._STEP_METRICS_MIGRATIONS:
|
||||
if column not in existing:
|
||||
try:
|
||||
conn.execute(
|
||||
f"ALTER TABLE step_metrics ADD COLUMN {column} {ddl}"
|
||||
)
|
||||
logger.info(
|
||||
f"Migration step_metrics: ajout colonne {column}"
|
||||
)
|
||||
except sqlite3.OperationalError as e:
|
||||
# Collision bénigne (colonne déjà ajoutée par un autre process)
|
||||
logger.debug(f"Migration colonne {column} ignorée: {e}")
|
||||
|
||||
@contextmanager
|
||||
def _get_connection(self):
|
||||
@@ -164,13 +201,14 @@ class TimeSeriesStore:
|
||||
))
|
||||
|
||||
def _write_step_metric(self, conn: sqlite3.Connection, metric: StepMetrics) -> None:
|
||||
"""Write step metric."""
|
||||
"""Write step metric (inclut les champs vision-aware C1)."""
|
||||
conn.execute("""
|
||||
INSERT OR REPLACE INTO step_metrics
|
||||
(step_id, execution_id, workflow_id, node_id, action_type, target_element,
|
||||
started_at, completed_at, duration_ms, status, confidence_score,
|
||||
retry_count, error_details)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
retry_count, error_details,
|
||||
ocr_ms, ui_ms, analyze_ms, total_ms, cache_hit, degraded)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
metric.step_id,
|
||||
metric.execution_id,
|
||||
@@ -184,7 +222,13 @@ class TimeSeriesStore:
|
||||
metric.status,
|
||||
metric.confidence_score,
|
||||
metric.retry_count,
|
||||
metric.error_details
|
||||
metric.error_details,
|
||||
getattr(metric, 'ocr_ms', 0.0),
|
||||
getattr(metric, 'ui_ms', 0.0),
|
||||
getattr(metric, 'analyze_ms', 0.0),
|
||||
getattr(metric, 'total_ms', 0.0),
|
||||
1 if getattr(metric, 'cache_hit', False) else 0,
|
||||
1 if getattr(metric, 'degraded', False) else 0,
|
||||
))
|
||||
|
||||
def _write_resource_metric(self, conn: sqlite3.Connection, metric: ResourceMetrics) -> None:
|
||||
|
||||
31
core/anonymisation/__init__.py
Normal file
31
core/anonymisation/__init__.py
Normal file
@@ -0,0 +1,31 @@
|
||||
# core/anonymisation/__init__.py
|
||||
"""Module de floutage ciblé des PII côté serveur.
|
||||
|
||||
Remplace l'ancien blur client-side (`agent_v0/agent_v1/vision/blur_sensitive.py`)
|
||||
qui floutait toutes les zones de texte claires, cassant les codes CIM, les
|
||||
montants PMSI et les boutons.
|
||||
|
||||
Stratégie :
|
||||
1. OCR (docTR) sur le screenshot → texte + bounding boxes
|
||||
2. NER (EDS-NLP si disponible, sinon regex) → détection des PII
|
||||
3. Filtrage : ne conserver que PERSON / LOCATION / PHONE / NIR / EMAIL
|
||||
4. Blur gaussien uniquement sur les bbox des PII filtrées
|
||||
|
||||
Usage :
|
||||
from core.anonymisation import blur_pii_on_image
|
||||
blurred_path = blur_pii_on_image("shot_0001_full.png")
|
||||
"""
|
||||
|
||||
from .pii_blur import (
|
||||
PIIBlurResult,
|
||||
PIIEntity,
|
||||
PIIBlurrer,
|
||||
blur_pii_on_image,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"PIIBlurResult",
|
||||
"PIIEntity",
|
||||
"PIIBlurrer",
|
||||
"blur_pii_on_image",
|
||||
]
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user