Compare commits
140 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
16ff396dbf | ||
|
|
e44fd7b328 | ||
|
|
66815b7a1a | ||
|
|
c6b695eca8 | ||
|
|
99d2083dea | ||
|
|
a718086140 | ||
|
|
c82979e72b | ||
|
|
2185c41cc1 | ||
|
|
26804eb123 | ||
|
|
d71d5df4a8 | ||
|
|
6829ad8e79 | ||
|
|
8903f35433 | ||
|
|
4ab2c15e5c | ||
|
|
eba6fea779 | ||
|
|
f04398d5a7 | ||
|
|
4ce9c47f45 | ||
|
|
9dfcdb5fb0 | ||
|
|
3efe15d2c7 | ||
|
|
9d87ed64c5 | ||
|
|
00134963e5 | ||
|
|
0ec5e2a25b | ||
|
|
0c5fffe951 | ||
|
|
5027ed9a23 | ||
|
|
6caab2c600 | ||
|
|
552e66dbf6 | ||
|
|
de1026ee2e | ||
|
|
7b50725bf8 | ||
|
|
7feef3b6a9 | ||
|
|
0b06db222d | ||
|
|
74ee0dadee | ||
|
|
0b452f975a | ||
|
|
6ab385d671 | ||
|
|
b3eab83a0f | ||
|
|
27490849a8 | ||
|
|
cebbf0809a | ||
|
|
3e227d28ad | ||
|
|
8ce63fcba2 | ||
|
|
4202431421 | ||
|
|
4923623dd4 | ||
|
|
84181cc982 | ||
|
|
7355d315a3 | ||
|
|
c50adab3a1 | ||
|
|
2fbb305f65 | ||
|
|
ff581be397 | ||
|
|
203e5cc6c1 | ||
|
|
d1b556b6cd | ||
|
|
729cd67743 | ||
|
|
73ddcdb29d | ||
|
|
14a9442343 | ||
|
|
5da4581e76 | ||
|
|
cbe8dc95d2 | ||
|
|
04a14a56b2 | ||
|
|
2290f1846b | ||
|
|
c57b40ae1d | ||
|
|
bc21b27da7 | ||
|
|
6a2248ddcd | ||
|
|
82d7b38cff | ||
|
|
6c7f88c05d | ||
|
|
447fbb2c6e | ||
|
|
623be15bfe | ||
|
|
55d5aebbd2 | ||
|
|
73b731fef8 | ||
|
|
ffd97ae9a5 | ||
|
|
d168833609 | ||
|
|
23a06a744c | ||
|
|
af4eae28b9 | ||
|
|
c198c930a1 | ||
|
|
e3efef2fe7 | ||
|
|
95fddeebb3 | ||
|
|
71523cebd3 | ||
|
|
3aa806a630 | ||
|
|
588c8f22c1 | ||
|
|
3d243d731d | ||
|
|
2431a6c9e9 | ||
|
|
969236da03 | ||
|
|
f30461b88c | ||
|
|
f34eca20f9 | ||
|
|
309dfd5287 | ||
|
|
f5a672d7b9 | ||
|
|
1acea85fa6 | ||
|
|
4f61741420 | ||
|
|
2fa864b5c7 | ||
|
|
10739c33fa | ||
|
|
39bea1b042 | ||
|
|
26b4e6d8ce | ||
|
|
4fb84b1090 | ||
|
|
7f2bc6fe97 | ||
|
|
eded968c70 | ||
|
|
53d29d9b24 | ||
|
|
690053bd57 | ||
|
|
c7b0649716 | ||
|
|
2bfcfa4535 | ||
|
|
b808e48b1f | ||
|
|
78ee962918 | ||
|
|
c8a3618e27 | ||
|
|
9ca277a63f | ||
|
|
8c7b6e5696 | ||
|
|
af4ffa189a | ||
|
|
42f571d496 | ||
|
|
36737cfe9d | ||
|
|
93ef93e563 | ||
|
|
376e4a88b3 | ||
|
|
bb4ed2a75d | ||
|
|
f7b8cddd2b | ||
|
|
a9a99953dd | ||
|
|
aee64f54b1 | ||
|
|
c77844fa9a | ||
|
|
013fe071a2 | ||
|
|
203dc00d53 | ||
|
|
e9a028134a | ||
|
|
01bba7bc6c | ||
|
|
d5285de99c | ||
|
|
33c198b827 | ||
|
|
816b37af98 | ||
|
|
d82aad984f | ||
|
|
057c37131f | ||
|
|
9bcce3fc68 | ||
|
|
f96f6322ec | ||
|
|
02ee2d7b5b | ||
|
|
47993e2ee9 | ||
|
|
7cc03f6f10 | ||
|
|
a21f1ea9fa | ||
|
|
9188bd7df1 | ||
|
|
f82753debe | ||
|
|
b92cb9db03 | ||
|
|
e66629ce1a | ||
|
|
cecdf417b7 | ||
|
|
56e3cc052a | ||
|
|
332366b58c | ||
|
|
ac9c207474 | ||
|
|
f85d56ac05 | ||
|
|
172167f6c0 | ||
|
|
42d49dd8bd | ||
|
|
f541bb8ce4 | ||
|
|
a6eb4c168f | ||
|
|
f6ad5ff2b2 | ||
|
|
2ac781343a | ||
|
|
bffcfb2db3 | ||
|
|
cc673755f7 | ||
|
|
4509038bf0 |
@@ -30,7 +30,9 @@ DASHBOARD_PORT=5001
|
|||||||
CLIP_MODEL=ViT-B-32
|
CLIP_MODEL=ViT-B-32
|
||||||
CLIP_PRETRAINED=openai
|
CLIP_PRETRAINED=openai
|
||||||
CLIP_DEVICE=cpu # cpu or cuda
|
CLIP_DEVICE=cpu # cpu or cuda
|
||||||
VLM_MODEL=qwen3-vl:8b
|
RPA_VLM_MODEL=gemma4:latest # gemma4:latest (défaut), qwen3-vl:8b, ui-tars (fallback)
|
||||||
|
VLM_MODEL=gemma4:latest # alias de compatibilité
|
||||||
|
# VLM_ALLOW_CLOUD=false # true pour activer les APIs cloud en fallback (OpenAI, Gemini, Anthropic)
|
||||||
VLM_ENDPOINT=http://localhost:11434
|
VLM_ENDPOINT=http://localhost:11434
|
||||||
OWL_MODEL=google/owlv2-base-patch16-ensemble
|
OWL_MODEL=google/owlv2-base-patch16-ensemble
|
||||||
OWL_CONFIDENCE_THRESHOLD=0.1
|
OWL_CONFIDENCE_THRESHOLD=0.1
|
||||||
|
|||||||
207
.gitea/workflows/security-audit.yml
Normal file
207
.gitea/workflows/security-audit.yml
Normal file
@@ -0,0 +1,207 @@
|
|||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Audit sécurité — bandit + pip-audit + scan secrets
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Jamais bloquant : on reporte les warnings, on ne casse pas la CI.
|
||||||
|
# Utile pour détecter les dérives progressives (nouveaux CVE, secrets
|
||||||
|
# oubliés dans un commit, patterns risqués).
|
||||||
|
#
|
||||||
|
# Fréquence : à chaque push sur main + hebdo (cron).
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
name: security-audit
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
schedule:
|
||||||
|
# Tous les lundis à 6h UTC (8h Paris hiver, 7h Paris été).
|
||||||
|
- cron: "0 6 * * 1"
|
||||||
|
workflow_dispatch: {}
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# Job 1 — bandit (bonnes pratiques sécu Python)
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
bandit:
|
||||||
|
name: Bandit (scan statique)
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
timeout-minutes: 5
|
||||||
|
continue-on-error: true
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Setup Python 3.12
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: "3.12"
|
||||||
|
cache: "pip"
|
||||||
|
|
||||||
|
- name: Installation bandit
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install "bandit[toml]==1.7.10"
|
||||||
|
|
||||||
|
- name: Scan bandit sur core/
|
||||||
|
run: |
|
||||||
|
# -ll : niveau LOW minimum (remonte tout)
|
||||||
|
# -ii : confiance LOW minimum
|
||||||
|
# --skip B101 : on ignore les asserts (usuels en tests/validation)
|
||||||
|
bandit -r core/ \
|
||||||
|
--skip B101,B404,B603 \
|
||||||
|
--format txt \
|
||||||
|
--exit-zero \
|
||||||
|
--output bandit-report.txt
|
||||||
|
echo "=== RAPPORT BANDIT ==="
|
||||||
|
cat bandit-report.txt
|
||||||
|
|
||||||
|
- name: Upload rapport bandit
|
||||||
|
if: always()
|
||||||
|
uses: actions/upload-artifact@v3
|
||||||
|
with:
|
||||||
|
name: bandit-report
|
||||||
|
path: bandit-report.txt
|
||||||
|
retention-days: 30
|
||||||
|
if-no-files-found: ignore
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# Job 2 — pip-audit (CVE sur requirements)
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
pip-audit:
|
||||||
|
name: pip-audit (CVE dépendances)
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
timeout-minutes: 5
|
||||||
|
continue-on-error: true
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Setup Python 3.12
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: "3.12"
|
||||||
|
cache: "pip"
|
||||||
|
|
||||||
|
- name: Installation pip-audit
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install "pip-audit==2.7.3"
|
||||||
|
|
||||||
|
- name: Audit CVE sur requirements-ci.txt
|
||||||
|
run: |
|
||||||
|
if [ -f requirements-ci.txt ]; then
|
||||||
|
pip-audit -r requirements-ci.txt \
|
||||||
|
--format json \
|
||||||
|
--output pip-audit-ci.json \
|
||||||
|
--progress-spinner off \
|
||||||
|
--disable-pip || echo "::warning::CVE détectées dans requirements-ci.txt"
|
||||||
|
echo "=== RAPPORT pip-audit (CI) ==="
|
||||||
|
cat pip-audit-ci.json || true
|
||||||
|
else
|
||||||
|
echo "::notice::requirements-ci.txt absent — skip"
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Audit CVE sur requirements.txt (best-effort)
|
||||||
|
run: |
|
||||||
|
# Timeout généreux car requirements.txt est massif (torch, CUDA).
|
||||||
|
timeout 120 pip-audit -r requirements.txt \
|
||||||
|
--format json \
|
||||||
|
--output pip-audit-full.json \
|
||||||
|
--progress-spinner off \
|
||||||
|
--disable-pip 2>&1 | head -200 || \
|
||||||
|
echo "::warning::pip-audit sur requirements.txt a timeout ou échoué (non bloquant)"
|
||||||
|
|
||||||
|
- name: Upload rapports pip-audit
|
||||||
|
if: always()
|
||||||
|
uses: actions/upload-artifact@v3
|
||||||
|
with:
|
||||||
|
name: pip-audit-reports
|
||||||
|
path: |
|
||||||
|
pip-audit-ci.json
|
||||||
|
pip-audit-full.json
|
||||||
|
retention-days: 30
|
||||||
|
if-no-files-found: ignore
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# Job 3 — Scan secrets en clair (grep simple)
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# Patterns recherchés : clés API Anthropic (sk-ant-), OpenAI (sk-),
|
||||||
|
# Google (AIzaSy), AWS (AKIA), tokens Hugging Face (hf_).
|
||||||
|
# Ne cherche QUE dans les fichiers trackés (pas .env, pas .venv).
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
secrets-scan:
|
||||||
|
name: Scan secrets (grep)
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
timeout-minutes: 3
|
||||||
|
continue-on-error: true
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout (historique complet)
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Scan patterns de secrets
|
||||||
|
run: |
|
||||||
|
# Chemins exclus : venvs, caches, data, htmlcov, models.
|
||||||
|
EXCLUDES='--exclude-dir=.venv --exclude-dir=venv_v3 --exclude-dir=.git \
|
||||||
|
--exclude-dir=node_modules --exclude-dir=htmlcov --exclude-dir=models \
|
||||||
|
--exclude-dir=data --exclude-dir=__pycache__ --exclude-dir=.pytest_cache \
|
||||||
|
--exclude=*.lock --exclude=*.log --exclude=*.md'
|
||||||
|
|
||||||
|
echo "=== Recherche de secrets potentiels ==="
|
||||||
|
FOUND=0
|
||||||
|
|
||||||
|
# Anthropic
|
||||||
|
if grep -rnI $EXCLUDES -E 'sk-ant-[a-zA-Z0-9_-]{20,}' . 2>/dev/null; then
|
||||||
|
echo "::warning::Clé Anthropic potentielle détectée"
|
||||||
|
FOUND=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# OpenAI
|
||||||
|
if grep -rnI $EXCLUDES -E 'sk-proj-[a-zA-Z0-9_-]{20,}|sk-[a-zA-Z0-9]{40,}' . 2>/dev/null; then
|
||||||
|
echo "::warning::Clé OpenAI potentielle détectée"
|
||||||
|
FOUND=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Google Cloud / API Keys
|
||||||
|
if grep -rnI $EXCLUDES -E 'AIzaSy[a-zA-Z0-9_-]{33}' . 2>/dev/null; then
|
||||||
|
echo "::warning::Clé Google API potentielle détectée"
|
||||||
|
FOUND=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# AWS
|
||||||
|
if grep -rnI $EXCLUDES -E 'AKIA[0-9A-Z]{16}' . 2>/dev/null; then
|
||||||
|
echo "::warning::Clé AWS potentielle détectée"
|
||||||
|
FOUND=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Hugging Face
|
||||||
|
if grep -rnI $EXCLUDES -E 'hf_[a-zA-Z0-9]{30,}' . 2>/dev/null; then
|
||||||
|
echo "::warning::Token Hugging Face potentiel détecté"
|
||||||
|
FOUND=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Mots-clés suspects à côté d'assignations
|
||||||
|
if grep -rnI $EXCLUDES -E '(password|passwd|secret|api_key|apikey|token)\s*=\s*["\x27][a-zA-Z0-9_\-!@#\$%]{12,}["\x27]' . 2>/dev/null \
|
||||||
|
| grep -viE '(example|dummy|placeholder|test|fake|xxx|changeme|\$\{)' 2>/dev/null; then
|
||||||
|
echo "::warning::Assignation suspecte d'un secret détectée"
|
||||||
|
FOUND=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$FOUND" -eq 0 ]; then
|
||||||
|
echo "Aucun secret détecté par les patterns de base."
|
||||||
|
else
|
||||||
|
echo ""
|
||||||
|
echo "::notice::Vérifier manuellement les occurrences ci-dessus."
|
||||||
|
echo "::notice::Si faux positif : ajouter le fichier aux exclusions ou reformater."
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Toujours succès (job non bloquant).
|
||||||
|
exit 0
|
||||||
214
.gitea/workflows/tests.yml
Normal file
214
.gitea/workflows/tests.yml
Normal file
@@ -0,0 +1,214 @@
|
|||||||
|
# ------------------------------------------------------------------
|
||||||
|
# CI principale — Tests unitaires + lint léger
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Déclenchement : push / pull_request sur n'importe quelle branche.
|
||||||
|
# Objectif : feedback rapide (< 3 min) sans GPU ni Ollama.
|
||||||
|
# Runner : self-hosted (label "ubuntu-latest" ou équivalent).
|
||||||
|
#
|
||||||
|
# Les tests marqués `slow`, `gpu`, `integration`, `performance`,
|
||||||
|
# `visual` et `smoke` sont exclus volontairement — ils nécessitent
|
||||||
|
# CUDA, Ollama, ou des captures d'écran réelles.
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
name: tests
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- "**"
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- "**"
|
||||||
|
|
||||||
|
# Permet à une nouvelle exécution d'annuler les précédentes
|
||||||
|
# sur la même branche (évite l'engorgement du runner local).
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
env:
|
||||||
|
# Empêche l'import accidentel de torch/CUDA pendant la CI.
|
||||||
|
PYTHONDONTWRITEBYTECODE: "1"
|
||||||
|
PIP_DISABLE_PIP_VERSION_CHECK: "1"
|
||||||
|
PIP_NO_PYTHON_VERSION_WARNING: "1"
|
||||||
|
# Les modules d'exécution lisent parfois ces vars ; valeurs neutres en CI.
|
||||||
|
RPA_VISION_CI: "1"
|
||||||
|
RPA_AUTH_VAULT_PATH: "/tmp/ci_vault.enc"
|
||||||
|
# api_stream.py a un fail-closed P0-C : si RPA_API_TOKEN absent, sys.exit(1)
|
||||||
|
# au module load. On fournit un token bidon pour que les imports passent en CI.
|
||||||
|
# (Le token n'est jamais utilisé réellement — les tests mockent les requêtes.)
|
||||||
|
RPA_API_TOKEN: "ci_test_token_not_used_for_real_auth_just_to_pass_import_check_0123456789"
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# Job 1 — Lint (ruff + black --check)
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# Non-bloquant : si ruff/black ne sont pas installables, on log
|
||||||
|
# un warning et on continue. L'objectif ici est d'alerter, pas de
|
||||||
|
# casser la CI pour des espaces en trop.
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
lint:
|
||||||
|
name: Lint (ruff + black)
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
timeout-minutes: 5
|
||||||
|
continue-on-error: true
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout du code
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Setup Python 3.12
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: "3.12"
|
||||||
|
cache: "pip"
|
||||||
|
|
||||||
|
- name: Installation des linters
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install "ruff==0.6.9" "black==23.12.1" || {
|
||||||
|
echo "::warning::Impossible d'installer ruff/black — job ignoré"
|
||||||
|
exit 0
|
||||||
|
}
|
||||||
|
|
||||||
|
- name: Ruff (lint rapide)
|
||||||
|
run: |
|
||||||
|
if command -v ruff >/dev/null 2>&1; then
|
||||||
|
# Ruff : erreurs critiques uniquement (E9 syntax, F63 invalid print,
|
||||||
|
# F7 syntax, F82 undefined in __all__).
|
||||||
|
# F821 (undefined name) volontairement exclu le temps de nettoyer
|
||||||
|
# la dette technique préexistante (voir docs/STATUS.md).
|
||||||
|
# Dossiers legacy exclus :
|
||||||
|
# - agent_v0/deploy/windows_client/ : clone obsolète (marqué OBSOLÈTE)
|
||||||
|
# - tests/property/ : tests cassés connus (cf. MEMORY.md)
|
||||||
|
ruff check --select=E9,F63,F7,F82 --output-format=github \
|
||||||
|
--exclude "agent_v0/deploy/windows_client" \
|
||||||
|
--exclude "tests/property" \
|
||||||
|
--exclude "tests/integration/test_visual_rpa_checkpoint.py" \
|
||||||
|
core/ agent_v0/ tests/ || {
|
||||||
|
echo "::warning::Ruff a trouvé des erreurs critiques"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
else
|
||||||
|
echo "::warning::ruff indisponible — skip"
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Black (format check)
|
||||||
|
run: |
|
||||||
|
if command -v black >/dev/null 2>&1; then
|
||||||
|
# --check : ne modifie pas, signale juste.
|
||||||
|
# Dossiers legacy exclus (cohérent avec ruff).
|
||||||
|
black --check --diff \
|
||||||
|
--exclude "agent_v0/deploy/windows_client|tests/property" \
|
||||||
|
core/ agent_v0/ tests/ || {
|
||||||
|
echo "::warning::Black suggère un reformatage — non bloquant"
|
||||||
|
exit 0
|
||||||
|
}
|
||||||
|
else
|
||||||
|
echo "::warning::black indisponible — skip"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# Job 2 — Tests unitaires
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# Exclut tous les marqueurs lourds. Utilise requirements-ci.txt
|
||||||
|
# pour éviter torch/CUDA (économie ~3 Go + ~2 min).
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
unit-tests:
|
||||||
|
name: Tests unitaires (sans GPU)
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
timeout-minutes: 10
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout du code
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Setup Python 3.12
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: "3.12"
|
||||||
|
cache: "pip"
|
||||||
|
cache-dependency-path: |
|
||||||
|
requirements-ci.txt
|
||||||
|
requirements.txt
|
||||||
|
|
||||||
|
- name: Installation des dépendances CI
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
if [ -f requirements-ci.txt ]; then
|
||||||
|
echo "Utilisation de requirements-ci.txt (léger, sans torch)"
|
||||||
|
pip install -r requirements-ci.txt
|
||||||
|
else
|
||||||
|
echo "::warning::requirements-ci.txt absent — fallback requirements.txt (lourd)"
|
||||||
|
pip install -r requirements.txt
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Vérification imports critiques
|
||||||
|
run: |
|
||||||
|
python -c "import pytest; print(f'pytest {pytest.__version__}')"
|
||||||
|
python -c "import sys; sys.path.insert(0, '.'); import core; print('core OK')" || {
|
||||||
|
echo "::error::Impossible d'importer core.*"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
- name: Tests unitaires (hors slow/gpu/integration)
|
||||||
|
run: |
|
||||||
|
python -m pytest tests/unit/ \
|
||||||
|
-m "not slow and not gpu and not integration and not performance and not visual" \
|
||||||
|
--tb=short \
|
||||||
|
--strict-markers \
|
||||||
|
-q \
|
||||||
|
--maxfail=10 \
|
||||||
|
-o cache_dir=/tmp/.pytest_cache_ci
|
||||||
|
|
||||||
|
- name: Upload logs si échec
|
||||||
|
if: failure()
|
||||||
|
uses: actions/upload-artifact@v3
|
||||||
|
with:
|
||||||
|
name: pytest-logs
|
||||||
|
path: |
|
||||||
|
/tmp/.pytest_cache_ci
|
||||||
|
logs/
|
||||||
|
retention-days: 3
|
||||||
|
if-no-files-found: ignore
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# Job 3 — Tests sécurité (bloquant)
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
# Les tests `test_security_*` valident des invariants critiques
|
||||||
|
# (évaluation sûre, sérialisation signée). Aucune régression tolérée.
|
||||||
|
# ----------------------------------------------------------------
|
||||||
|
security-tests:
|
||||||
|
name: Tests sécurité (critique)
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
timeout-minutes: 5
|
||||||
|
needs: [unit-tests]
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout du code
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Setup Python 3.12
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: "3.12"
|
||||||
|
cache: "pip"
|
||||||
|
cache-dependency-path: |
|
||||||
|
requirements-ci.txt
|
||||||
|
requirements.txt
|
||||||
|
|
||||||
|
- name: Installation des dépendances CI
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
if [ -f requirements-ci.txt ]; then
|
||||||
|
pip install -r requirements-ci.txt
|
||||||
|
else
|
||||||
|
pip install -r requirements.txt
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Tests sécurité (test_security_*)
|
||||||
|
run: |
|
||||||
|
python -m pytest tests/unit/test_security_*.py \
|
||||||
|
--tb=long \
|
||||||
|
--strict-markers \
|
||||||
|
-v \
|
||||||
|
-o cache_dir=/tmp/.pytest_cache_ci_sec
|
||||||
36
.gitignore
vendored
36
.gitignore
vendored
@@ -75,3 +75,39 @@ htmlcov/
|
|||||||
# === Backups ===
|
# === Backups ===
|
||||||
*_backup_*
|
*_backup_*
|
||||||
backups/
|
backups/
|
||||||
|
*.bak
|
||||||
|
*.bak_*
|
||||||
|
*.orig
|
||||||
|
*.old
|
||||||
|
|
||||||
|
# === Legacy / Triage ===
|
||||||
|
_a_trier/
|
||||||
|
archives/
|
||||||
|
|
||||||
|
# === Claude Code — worktrees et données locales ===
|
||||||
|
# Worktrees générés par la CLI Claude Code lors d'exécutions d'agents
|
||||||
|
# parallèles. Peuvent atteindre plusieurs centaines de Mo chacun.
|
||||||
|
# Ne jamais committer — gérer via `git worktree list` / `git worktree remove`.
|
||||||
|
.claude/
|
||||||
|
.kiro/
|
||||||
|
.mcp.json
|
||||||
|
.snapshots/
|
||||||
|
|
||||||
|
# === Données runtime (sessions, learning, buffer, config local) ===
|
||||||
|
data/
|
||||||
|
**/capture_library.json
|
||||||
|
.hypothesis/
|
||||||
|
.deps_installed
|
||||||
|
# Buffers SQLite locaux (streamer, cache)
|
||||||
|
**/buffer/
|
||||||
|
**/pending_events.db
|
||||||
|
# Databases applicatives (instance Flask)
|
||||||
|
**/instance/*.db
|
||||||
|
**/instance/*.sqlite
|
||||||
|
**/instance/*.sqlite3
|
||||||
|
# Caches et index locaux
|
||||||
|
*.sqlite
|
||||||
|
*.sqlite3
|
||||||
|
*.db-journal
|
||||||
|
*.db-wal
|
||||||
|
*.db-shm
|
||||||
|
|||||||
@@ -21,7 +21,12 @@ ollama serve
|
|||||||
### 3. Télécharger le modèle VLM
|
### 3. Télécharger le modèle VLM
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
ollama pull qwen3-vl:8b
|
# Modèle par défaut du projet (voir .env.example)
|
||||||
|
ollama pull gemma4:latest
|
||||||
|
|
||||||
|
# Alternatives supportées
|
||||||
|
# ollama pull qwen3-vl:8b
|
||||||
|
# ollama pull 0000/ui-tars-1.5-7b-q8_0:7b # grounder visuel
|
||||||
```
|
```
|
||||||
|
|
||||||
## Utilisation
|
## Utilisation
|
||||||
|
|||||||
331
README.md
331
README.md
@@ -1,207 +1,204 @@
|
|||||||
# RPA Vision V3 - 100% Vision-Based Workflow Automation
|
# RPA Vision V3 — Automatisation basée sur la compréhension visuelle des interfaces
|
||||||
|
|
||||||
## 📊 Status
|
> ⚠️ **Projet en phase POC** — voir [`docs/STATUS.md`](docs/STATUS.md) pour l'état
|
||||||
|
> réel par module. Certaines briques sont opérationnelles bout en bout,
|
||||||
|
> d'autres sont en cours de stabilisation. Ce dépôt n'est pas production-ready.
|
||||||
|
|
||||||
🚀 **PRODUCTION-READY** - Phase 12 Complete (77% System Completion) ✅
|
*Dernière mise à jour : 14 avril 2026*
|
||||||
|
|
||||||
**Latest Update**: 14 Décembre 2024
|
## Intention
|
||||||
- ✅ **10/13 Phases Complétées** - Système mature et fonctionnel
|
|
||||||
- ✅ **Performance Exceptionnelle** - 500-6250x plus rapide que requis
|
|
||||||
- ✅ **Architecture Entreprise** - 148k+ lignes, 19 modules, 6 specs complètes
|
|
||||||
- ✅ **Innovations Techniques** - Self-healing, Multi-modal, GPU management
|
|
||||||
- 📊 **Audit Complet** - [Rapport détaillé](AUDIT_COMPLET_SYSTEME_RPA_VISION_V3.md)
|
|
||||||
|
|
||||||
**Quick Test**: `bash test_clip.sh`
|
Automatiser des workflows métier par **compréhension sémantique de l'écran**
|
||||||
|
plutôt que par coordonnées de clic fixes. Le système observe l'utilisateur,
|
||||||
|
reconstruit un graphe d'états de l'interface, et cherche à rejouer la
|
||||||
|
procédure en reconnaissant visuellement les éléments cibles — y compris
|
||||||
|
quand l'UI change légèrement.
|
||||||
|
|
||||||
## 🎯 Vision
|
Terrain cible principal : postes hospitaliers (Citrix, applications métier
|
||||||
|
web et desktop). Contrainte forte : **100 % local**, pas d'appel à un LLM
|
||||||
|
cloud dans le pipeline par défaut.
|
||||||
|
|
||||||
RPA basé sur la **compréhension sémantique** des interfaces, pas sur des coordonnées de clics.
|
## Architecture en couches
|
||||||
|
|
||||||
Le système apprend des workflows en observant l'utilisateur et les automatise de manière robuste grâce à une architecture en 5 couches.
|
|
||||||
|
|
||||||
## 🏗️ Architecture en 5 Couches
|
|
||||||
|
|
||||||
```
|
```
|
||||||
RawSession (Couche 0)
|
RawSession (couche 0) — capture événements + screenshots
|
||||||
↓
|
↓
|
||||||
ScreenState (Couche 1) - 4 niveaux d'abstraction
|
ScreenState (couche 1) — états d'écran à plusieurs niveaux d'abstraction
|
||||||
↓
|
↓
|
||||||
UIElement Detection (Couche 2) - Types + Rôles sémantiques
|
UIElement (couche 2) — détection sémantique (cascade OCR + templates + VLM)
|
||||||
↓
|
↓
|
||||||
State Embedding (Couche 3) - Fusion multi-modale
|
State Embedding (couche 3) — fusion multi-modale + index FAISS
|
||||||
↓
|
↓
|
||||||
Workflow Graph (Couche 4) - Nodes + Edges + Learning States
|
Workflow Graph (couche 4) — nœuds, transitions, résolution de cibles
|
||||||
```
|
```
|
||||||
|
|
||||||
## 📁 Structure
|
## État des fonctionnalités (synthèse)
|
||||||
|
|
||||||
```
|
Le détail par module est dans [`docs/STATUS.md`](docs/STATUS.md).
|
||||||
rpa_vision_v3/
|
|
||||||
├── core/
|
|
||||||
│ ├── models/ # Couches 0-4 : Structures de données
|
|
||||||
│ ├── capture/ # Couche 0 : Capture événements + screenshots
|
|
||||||
│ ├── detection/ # Couche 2 : Détection UI sémantique
|
|
||||||
│ ├── embedding/ # Couche 3 : Fusion multi-modale + FAISS
|
|
||||||
│ ├── graph/ # Couche 4 : Construction + Matching + Exécution
|
|
||||||
│ └── persistence/ # Sauvegarde/Chargement
|
|
||||||
├── data/
|
|
||||||
│ ├── sessions/ # RawSessions
|
|
||||||
│ ├── screen_states/ # ScreenStates
|
|
||||||
│ ├── embeddings/ # Vecteurs .npy
|
|
||||||
│ ├── faiss_index/ # Index FAISS
|
|
||||||
│ └── workflows/ # Workflow Graphs
|
|
||||||
└── tests/ # Tests unitaires + intégration
|
|
||||||
```
|
|
||||||
|
|
||||||
## 🚀 Démarrage Rapide
|
**Opérationnel**
|
||||||
|
- Capture Windows (Agent V1) + streaming vers serveur Linux
|
||||||
|
- Stockage des sessions brutes (screenshots + événements)
|
||||||
|
- Streaming server FastAPI, sessions en mémoire
|
||||||
|
- Build du package Windows (`deploy/build_package.sh`)
|
||||||
|
|
||||||
|
**Alpha (fonctionnel sur un cas de référence, encore peu généralisé)**
|
||||||
|
- Détection UI par cascade VLM + OCR + templates
|
||||||
|
- Construction de workflow graph depuis une session
|
||||||
|
- Replay E2E supervisé — premier succès sur Notepad le 13 avril 2026
|
||||||
|
- Mode apprentissage : pause et demande d'aide humaine quand la résolution échoue
|
||||||
|
- Embeddings CLIP + index FAISS
|
||||||
|
- Module auth (Fernet + TOTP), federation (LearningPack)
|
||||||
|
- Web Dashboard, Agent Chat
|
||||||
|
|
||||||
|
**En cours**
|
||||||
|
- Visual Workflow Builder (VWB) — bugs DB runtime connus
|
||||||
|
- Self-healing / recovery global
|
||||||
|
- Analytics / reporting
|
||||||
|
- Worker de compilation sessions → ExecutionPlan
|
||||||
|
- Tests E2E multi-applications
|
||||||
|
|
||||||
|
## Limitations connues
|
||||||
|
|
||||||
|
- Le pipeline de replay est validé sur un nombre très restreint d'applications.
|
||||||
|
- `TargetMemoryStore` (apprentissage Phase 1) est câblé mais sa base reste
|
||||||
|
vide tant qu'un replay complet n'a pas été cristallisé.
|
||||||
|
- Certaines asymétries entre chemins stricts et legacy dans le serveur de
|
||||||
|
streaming peuvent provoquer des arrêts au lieu de pauses d'apprentissage.
|
||||||
|
- VWB n'est pas encore stable en écriture ; un outil dédié plus simple est
|
||||||
|
envisagé.
|
||||||
|
|
||||||
|
## Démarrage
|
||||||
|
|
||||||
|
### Prérequis
|
||||||
|
|
||||||
|
- Python 3.10 à 3.12
|
||||||
|
- [Ollama](https://ollama.ai) installé et démarré localement
|
||||||
|
- Recommandé : GPU NVIDIA pour l'inférence VLM
|
||||||
|
- Windows 10/11 uniquement pour le client Agent V1
|
||||||
|
|
||||||
### Installation
|
### Installation
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# 1. Installer Ollama
|
# 1) Cloner puis créer le venv
|
||||||
curl -fsSL https://ollama.ai/install.sh | sh # Linux
|
python3 -m venv .venv
|
||||||
# ou
|
source .venv/bin/activate
|
||||||
brew install ollama # macOS
|
|
||||||
|
|
||||||
# 2. Démarrer Ollama
|
|
||||||
ollama serve
|
|
||||||
|
|
||||||
# 3. Télécharger le modèle VLM
|
|
||||||
ollama pull qwen3-vl:8b
|
|
||||||
|
|
||||||
# 4. Installer dépendances Python
|
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
# 2) Démarrer Ollama et récupérer le modèle VLM par défaut
|
||||||
|
ollama serve &
|
||||||
|
ollama pull gemma4:latest # défaut du projet
|
||||||
|
# Alternatives supportées :
|
||||||
|
# ollama pull qwen3-vl:8b
|
||||||
|
# ollama pull 0000/ui-tars-1.5-7b-q8_0:7b # grounder visuel
|
||||||
|
|
||||||
|
# 3) Copier et ajuster la configuration
|
||||||
|
cp .env.example .env
|
||||||
|
# éditer .env pour vérifier RPA_VLM_MODEL, VLM_ENDPOINT, ports, etc.
|
||||||
```
|
```
|
||||||
|
|
||||||
### Test Rapide
|
### Lancer les services
|
||||||
|
|
||||||
|
Tous les services sont pilotés par `svc.sh` (source de vérité des ports :
|
||||||
|
`services.conf`).
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Diagnostic système
|
./svc.sh status # État de tous les services
|
||||||
python3 rpa_vision_v3/examples/diagnostic_vlm.py
|
./svc.sh start # Tout démarrer
|
||||||
|
./svc.sh start streaming # Streaming server uniquement (port 5005)
|
||||||
# Test de détection
|
./svc.sh restart api # Redémarrer l'API (port 8000)
|
||||||
./rpa_vision_v3/test_quick.sh
|
./svc.sh stop # Tout arrêter
|
||||||
```
|
```
|
||||||
|
|
||||||
### Utilisation - Détection UI
|
| Port | Service |
|
||||||
|
|---|---|
|
||||||
|
| 8000 | API Server (upload / traitement core) |
|
||||||
|
| 5001 | Web Dashboard |
|
||||||
|
| 5002 | VWB Backend (Flask) |
|
||||||
|
| 5003 | Monitoring |
|
||||||
|
| 5004 | Agent Chat |
|
||||||
|
| 5005 | Streaming Server (Agent V1 → pipeline core) |
|
||||||
|
| 5006 | Session Cleaner |
|
||||||
|
| 5099 | Worker de compilation (optionnel) |
|
||||||
|
| 3002 | VWB Frontend (Vite/React) |
|
||||||
|
|
||||||
```python
|
### Client Windows (Agent V1)
|
||||||
from rpa_vision_v3.core.detection import create_detector
|
|
||||||
|
|
||||||
# Créer le détecteur
|
Le client capture souris, clavier et écran sur le poste Windows et envoie
|
||||||
detector = create_detector()
|
les données au streaming server Linux.
|
||||||
|
|
||||||
# Détecter les éléments UI
|
|
||||||
elements = detector.detect("screenshot.png")
|
|
||||||
|
|
||||||
# Utiliser les résultats
|
|
||||||
for elem in elements:
|
|
||||||
print(f"{elem.type:15s} | {elem.role:20s} | {elem.label}")
|
|
||||||
```
|
|
||||||
|
|
||||||
### Utilisation - Workflow (Phase 4 - À venir)
|
|
||||||
|
|
||||||
```python
|
|
||||||
from rpa_vision_v3.core.models import RawSession, ScreenState, Workflow
|
|
||||||
from rpa_vision_v3.core.graph import GraphBuilder, NodeMatcher
|
|
||||||
|
|
||||||
# 1. Capturer une session
|
|
||||||
session = RawSession(...)
|
|
||||||
# ... capturer événements et screenshots
|
|
||||||
|
|
||||||
# 2. Construire workflow automatiquement
|
|
||||||
builder = GraphBuilder(...)
|
|
||||||
workflow = builder.build_from_session(session)
|
|
||||||
|
|
||||||
# 3. Matcher état actuel
|
|
||||||
matcher = NodeMatcher(...)
|
|
||||||
current_state = ScreenState(...)
|
|
||||||
match = matcher.match(current_state, workflow)
|
|
||||||
|
|
||||||
# 4. Exécuter action
|
|
||||||
if match:
|
|
||||||
edge = workflow.get_outgoing_edges(match.node.node_id)[0]
|
|
||||||
executor.execute_edge(edge, current_state)
|
|
||||||
```
|
|
||||||
|
|
||||||
## 📚 Documentation
|
|
||||||
|
|
||||||
### Guides Principaux
|
|
||||||
- **Quick Start** : `QUICK_START.md` - Démarrage rapide
|
|
||||||
- **Prochaines Étapes** : `NEXT_STEPS.md` - Roadmap et Phase 4
|
|
||||||
- **Phase 3 Complète** : `PHASE3_COMPLETE.md` - Résumé Phase 3
|
|
||||||
|
|
||||||
### Documentation Technique
|
|
||||||
- **Spec complète** : `.kiro/specs/workflow-graph-implementation/`
|
|
||||||
- **Architecture** : `docs/reference/ARCHITECTURE_VISION_COMPLETE.md`
|
|
||||||
- **Détection Hybride** : `HYBRID_DETECTION_SUMMARY.md`
|
|
||||||
- **Intégration Ollama** : `docs/OLLAMA_INTEGRATION.md`
|
|
||||||
|
|
||||||
## 🎓 Concepts Clés
|
|
||||||
|
|
||||||
### RPA 100% Vision
|
|
||||||
|
|
||||||
- ❌ Pas de coordonnées (x, y) fixes
|
|
||||||
- ✅ Rôles sémantiques (primary_action, form_input, etc.)
|
|
||||||
- ✅ Matching par similarité visuelle et textuelle
|
|
||||||
- ✅ Robuste aux changements d'UI
|
|
||||||
|
|
||||||
### Apprentissage Progressif
|
|
||||||
|
|
||||||
```
|
|
||||||
OBSERVATION (5+ exécutions)
|
|
||||||
↓
|
|
||||||
COACHING (10+ assistances, succès >90%)
|
|
||||||
↓
|
|
||||||
AUTO_CANDIDATE (20+ exécutions, succès >95%)
|
|
||||||
↓
|
|
||||||
AUTO_CONFIRMÉ (validation utilisateur)
|
|
||||||
```
|
|
||||||
|
|
||||||
### State Embedding
|
|
||||||
|
|
||||||
Fusion multi-modale :
|
|
||||||
- 50% Image (screenshot complet)
|
|
||||||
- 30% Texte (texte détecté)
|
|
||||||
- 10% Titre (fenêtre)
|
|
||||||
- 10% UI (éléments détectés)
|
|
||||||
|
|
||||||
## 🧪 Tests
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Tests unitaires
|
# Build du package Windows depuis le repo Linux
|
||||||
pytest tests/unit/
|
./deploy/build_package.sh
|
||||||
|
# produit deploy/Lea_v<version>.zip
|
||||||
# Tests d'intégration
|
|
||||||
pytest tests/integration/
|
|
||||||
|
|
||||||
# Tests de performance
|
|
||||||
pytest tests/performance/ --benchmark-only
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## 📈 Roadmap - 77% Complété (10/13 Phases)
|
Voir [`docs/DEV_SETUP.md`](docs/DEV_SETUP.md) pour la maintenance du dépôt
|
||||||
|
(worktrees, build, services).
|
||||||
|
|
||||||
### ✅ **Phases Complétées**
|
## Arborescence du dépôt
|
||||||
- [x] **Phase 1-2** : Fondations + Embeddings FAISS ✅
|
|
||||||
- [x] **Phase 4-6** : Détection UI + Workflow Graphs + Action Execution ✅
|
|
||||||
- [x] **Phase 7-8** : Learning System + Training System ✅
|
|
||||||
- [x] **Phase 10-12** : GPU Management + Performance + Monitoring ✅
|
|
||||||
|
|
||||||
### 🎯 **Phases Restantes**
|
```
|
||||||
- [ ] **Phase 3** : Checkpoint Final (tests storage)
|
rpa_vision_v3/
|
||||||
- [ ] **Phase 9** : Visual Workflow Builder (90% → 100%)
|
├── agent_v0/ # Agent V1 (client Windows) + serveur de streaming
|
||||||
- [ ] **Phase 13** : Tests End-to-End + Documentation finale
|
│ ├── agent_v1/ # Source de l'agent (capture, UI tray, exécution)
|
||||||
|
│ └── server_v1/ # FastAPI streaming + processeurs
|
||||||
|
├── core/ # Pipeline core
|
||||||
|
│ ├── detection/ # Cascade VLM + OCR + templates
|
||||||
|
│ ├── embedding/ # CLIP + FAISS
|
||||||
|
│ ├── graph/ # Construction / matching de workflow graphs
|
||||||
|
│ ├── execution/ # Résolution de cibles, actions LLM
|
||||||
|
│ ├── learning/ # TargetMemoryStore (apprentissage)
|
||||||
|
│ ├── auth/ # Vault Fernet + TOTP
|
||||||
|
│ └── federation/ # Export/import de LearningPacks
|
||||||
|
├── visual_workflow_builder/ # VWB (backend Flask + frontend React Vite)
|
||||||
|
├── web_dashboard/ # Dashboard Flask + SocketIO
|
||||||
|
├── agent_chat/ # Interface conversationnelle + planner
|
||||||
|
├── deploy/ # Scripts de build et unités systemd
|
||||||
|
├── data/ # Sessions, embeddings, index FAISS, apprentissage
|
||||||
|
├── docs/ # Documentation technique
|
||||||
|
├── tests/ # pytest (unit, integration, e2e)
|
||||||
|
├── services.conf # Source de vérité des ports
|
||||||
|
├── svc.sh # Orchestrateur des services
|
||||||
|
└── run.sh # Démarrage tout-en-un (legacy, préférer svc.sh)
|
||||||
|
```
|
||||||
|
|
||||||
### 🚀 **Composants Production-Ready**
|
## Tests
|
||||||
- **Agent V0** : Capture cross-platform + Encryption ✅
|
|
||||||
- **Server API** : Processing pipeline + Web dashboard ✅
|
|
||||||
- **Analytics System** : Monitoring + Insights + Reporting ✅
|
|
||||||
- **Self-Healing** : Automatic adaptation + Recovery ✅
|
|
||||||
|
|
||||||
## 🤝 Contribution
|
```bash
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
Voir `.kiro/specs/workflow-graph-implementation/tasks.md` pour les tâches en cours.
|
# Tests rapides (hors marqueur slow)
|
||||||
|
pytest -m "not slow" -q
|
||||||
|
|
||||||
## 📄 Licence
|
# Tests d'intégration (streaming, pipeline)
|
||||||
|
pytest tests/integration/ -q
|
||||||
|
|
||||||
Propriétaire - Tous droits réservés
|
# Tests E2E
|
||||||
|
pytest tests/test_pipeline_e2e.py -q
|
||||||
|
```
|
||||||
|
|
||||||
|
Quelques tests legacy sont connus comme cassés — voir la mémoire projet et
|
||||||
|
`docs/` pour la liste.
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
|
||||||
|
- [`docs/STATUS.md`](docs/STATUS.md) — état réel par module
|
||||||
|
- [`docs/DEV_SETUP.md`](docs/DEV_SETUP.md) — tâches d'administration (worktrees, build)
|
||||||
|
- [`docs/EXECUTION_LOOP_FLAGS.md`](docs/EXECUTION_LOOP_FLAGS.md) — flags C1 vision-aware (`enable_ui_detection`, `enable_ocr`, `analyze_timeout_ms`, `window_info_provider`)
|
||||||
|
- [`docs/VISION_RPA_INTELLIGENT.md`](docs/VISION_RPA_INTELLIGENT.md) — cahier des charges
|
||||||
|
- [`docs/PLAN_ACTEUR_V1.md`](docs/PLAN_ACTEUR_V1.md) — architecture 3 niveaux (Macro / Méso / Micro)
|
||||||
|
- [`docs/CONFORMITE_AI_ACT.md`](docs/CONFORMITE_AI_ACT.md) — journalisation, floutage, rétention
|
||||||
|
|
||||||
|
## Concepts clés
|
||||||
|
|
||||||
|
- **RPA 100 % vision** : pas de coordonnées fixes ; l'agent localise un
|
||||||
|
élément par ce qu'il voit (label + contexte visuel), pas par `x,y`.
|
||||||
|
- **Apprentissage progressif** : mode shadow → assisté → autonome, validé
|
||||||
|
par supervision humaine sur les échecs.
|
||||||
|
- **LLM 100 % local** : Ollama sur la machine. Aucun appel cloud dans le
|
||||||
|
pipeline par défaut (cf. feedback projet `feedback_local_only.md`).
|
||||||
|
|
||||||
|
## Licence
|
||||||
|
|
||||||
|
Propriétaire — tous droits réservés.
|
||||||
|
|||||||
@@ -147,8 +147,10 @@ class AutonomousPlanner:
|
|||||||
"""Initialise le client VLM pour analyse intelligente."""
|
"""Initialise le client VLM pour analyse intelligente."""
|
||||||
if VLM_AVAILABLE and OllamaClient:
|
if VLM_AVAILABLE and OllamaClient:
|
||||||
try:
|
try:
|
||||||
self._vlm_client = OllamaClient(model="qwen2.5vl:7b")
|
from core.detection.vlm_config import get_vlm_model
|
||||||
logger.info("VLM client initialized (qwen2.5vl:7b)")
|
_planner_vlm = get_vlm_model()
|
||||||
|
self._vlm_client = OllamaClient(model=_planner_vlm)
|
||||||
|
logger.info("VLM client initialized (%s)", _planner_vlm)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Could not initialize VLM client: {e}")
|
logger.warning(f"Could not initialize VLM client: {e}")
|
||||||
self._vlm_client = None
|
self._vlm_client = None
|
||||||
|
|||||||
3
agent_rust/lea_uia/.gitignore
vendored
Normal file
3
agent_rust/lea_uia/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
target/
|
||||||
|
**/target/
|
||||||
|
|
||||||
384
agent_rust/lea_uia/Cargo.lock
generated
Normal file
384
agent_rust/lea_uia/Cargo.lock
generated
Normal file
@@ -0,0 +1,384 @@
|
|||||||
|
# This file is automatically @generated by Cargo.
|
||||||
|
# It is not intended for manual editing.
|
||||||
|
version = 4
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstream"
|
||||||
|
version = "1.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d"
|
||||||
|
dependencies = [
|
||||||
|
"anstyle",
|
||||||
|
"anstyle-parse",
|
||||||
|
"anstyle-query",
|
||||||
|
"anstyle-wincon",
|
||||||
|
"colorchoice",
|
||||||
|
"is_terminal_polyfill",
|
||||||
|
"utf8parse",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle"
|
||||||
|
version = "1.0.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle-parse"
|
||||||
|
version = "1.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e"
|
||||||
|
dependencies = [
|
||||||
|
"utf8parse",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle-query"
|
||||||
|
version = "1.1.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
|
||||||
|
dependencies = [
|
||||||
|
"windows-sys",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle-wincon"
|
||||||
|
version = "3.0.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
|
||||||
|
dependencies = [
|
||||||
|
"anstyle",
|
||||||
|
"once_cell_polyfill",
|
||||||
|
"windows-sys",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap"
|
||||||
|
version = "4.6.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351"
|
||||||
|
dependencies = [
|
||||||
|
"clap_builder",
|
||||||
|
"clap_derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap_builder"
|
||||||
|
version = "4.6.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f"
|
||||||
|
dependencies = [
|
||||||
|
"anstream",
|
||||||
|
"anstyle",
|
||||||
|
"clap_lex",
|
||||||
|
"strsim",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap_derive"
|
||||||
|
version = "4.6.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1110bd8a634a1ab8cb04345d8d878267d57c3cf1b38d91b71af6686408bbca6a"
|
||||||
|
dependencies = [
|
||||||
|
"heck",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap_lex"
|
||||||
|
version = "1.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "colorchoice"
|
||||||
|
version = "1.0.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "heck"
|
||||||
|
version = "0.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "is_terminal_polyfill"
|
||||||
|
version = "1.70.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "itoa"
|
||||||
|
version = "1.0.18"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lea_uia"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"clap",
|
||||||
|
"serde",
|
||||||
|
"serde_json",
|
||||||
|
"windows",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "memchr"
|
||||||
|
version = "2.8.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "once_cell_polyfill"
|
||||||
|
version = "1.70.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "proc-macro2"
|
||||||
|
version = "1.0.106"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quote"
|
||||||
|
version = "1.0.45"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde"
|
||||||
|
version = "1.0.228"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
|
||||||
|
dependencies = [
|
||||||
|
"serde_core",
|
||||||
|
"serde_derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde_core"
|
||||||
|
version = "1.0.228"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
|
||||||
|
dependencies = [
|
||||||
|
"serde_derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde_derive"
|
||||||
|
version = "1.0.228"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde_json"
|
||||||
|
version = "1.0.149"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
|
||||||
|
dependencies = [
|
||||||
|
"itoa",
|
||||||
|
"memchr",
|
||||||
|
"serde",
|
||||||
|
"serde_core",
|
||||||
|
"zmij",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "strsim"
|
||||||
|
version = "0.11.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "syn"
|
||||||
|
version = "2.0.117"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-ident"
|
||||||
|
version = "1.0.24"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "utf8parse"
|
||||||
|
version = "0.2.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows"
|
||||||
|
version = "0.59.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7f919aee0a93304be7f62e8e5027811bbba96bcb1de84d6618be56e43f8a32a1"
|
||||||
|
dependencies = [
|
||||||
|
"windows-core",
|
||||||
|
"windows-targets",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-core"
|
||||||
|
version = "0.59.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "810ce18ed2112484b0d4e15d022e5f598113e220c53e373fb31e67e21670c1ce"
|
||||||
|
dependencies = [
|
||||||
|
"windows-implement",
|
||||||
|
"windows-interface",
|
||||||
|
"windows-result",
|
||||||
|
"windows-strings",
|
||||||
|
"windows-targets",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-implement"
|
||||||
|
version = "0.59.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "83577b051e2f49a058c308f17f273b570a6a758386fc291b5f6a934dd84e48c1"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-interface"
|
||||||
|
version = "0.59.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-link"
|
||||||
|
version = "0.1.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-link"
|
||||||
|
version = "0.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-result"
|
||||||
|
version = "0.3.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6"
|
||||||
|
dependencies = [
|
||||||
|
"windows-link 0.1.3",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-strings"
|
||||||
|
version = "0.3.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "87fa48cc5d406560701792be122a10132491cff9d0aeb23583cc2dcafc847319"
|
||||||
|
dependencies = [
|
||||||
|
"windows-link 0.1.3",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-sys"
|
||||||
|
version = "0.61.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
|
||||||
|
dependencies = [
|
||||||
|
"windows-link 0.2.1",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-targets"
|
||||||
|
version = "0.53.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
|
||||||
|
dependencies = [
|
||||||
|
"windows-link 0.2.1",
|
||||||
|
"windows_aarch64_gnullvm",
|
||||||
|
"windows_aarch64_msvc",
|
||||||
|
"windows_i686_gnu",
|
||||||
|
"windows_i686_gnullvm",
|
||||||
|
"windows_i686_msvc",
|
||||||
|
"windows_x86_64_gnu",
|
||||||
|
"windows_x86_64_gnullvm",
|
||||||
|
"windows_x86_64_msvc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_aarch64_gnullvm"
|
||||||
|
version = "0.53.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_aarch64_msvc"
|
||||||
|
version = "0.53.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_gnu"
|
||||||
|
version = "0.53.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_gnullvm"
|
||||||
|
version = "0.53.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_msvc"
|
||||||
|
version = "0.53.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_gnu"
|
||||||
|
version = "0.53.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_gnullvm"
|
||||||
|
version = "0.53.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_msvc"
|
||||||
|
version = "0.53.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zmij"
|
||||||
|
version = "1.0.21"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
|
||||||
34
agent_rust/lea_uia/Cargo.toml
Normal file
34
agent_rust/lea_uia/Cargo.toml
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
[package]
|
||||||
|
name = "lea_uia"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
authors = ["Dom <dom@rpa-vision-v3>"]
|
||||||
|
description = "Helper Windows UI Automation pour Léa (agent RPA V3)"
|
||||||
|
license = "Proprietary"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "lea_uia"
|
||||||
|
path = "src/main.rs"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
clap = { version = "4.5", features = ["derive"] }
|
||||||
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
|
serde_json = "1.0"
|
||||||
|
|
||||||
|
[target.'cfg(windows)'.dependencies]
|
||||||
|
windows = { version = "0.59", features = [
|
||||||
|
"Win32_Foundation",
|
||||||
|
"Win32_System_Com",
|
||||||
|
"Win32_System_Ole",
|
||||||
|
"Win32_System_Variant",
|
||||||
|
"Win32_UI_Accessibility",
|
||||||
|
"Win32_UI_WindowsAndMessaging",
|
||||||
|
"Win32_Graphics_Gdi",
|
||||||
|
] }
|
||||||
|
|
||||||
|
[profile.release]
|
||||||
|
opt-level = "z" # Taille minimale
|
||||||
|
lto = true # Link-time optimization
|
||||||
|
codegen-units = 1 # Meilleure optimisation
|
||||||
|
strip = true # Retirer les symboles
|
||||||
|
panic = "abort" # Pas d'unwinding → binaire plus petit
|
||||||
564
agent_rust/lea_uia/src/main.rs
Normal file
564
agent_rust/lea_uia/src/main.rs
Normal file
@@ -0,0 +1,564 @@
|
|||||||
|
// lea_uia — Helper Windows UI Automation pour Léa
|
||||||
|
//
|
||||||
|
// Binaire standalone qui expose 3 commandes UIA :
|
||||||
|
// query → retourne l'élément UIA à une position (x, y)
|
||||||
|
// find → retrouve un élément par son chemin logique
|
||||||
|
// capture → liste les éléments visibles (debug)
|
||||||
|
//
|
||||||
|
// Communication avec l'agent Python via stdin/stdout JSON.
|
||||||
|
// Tous les appels sont non-bloquants et retournent du JSON structuré.
|
||||||
|
//
|
||||||
|
// Sur Linux (développement) : retourne des stubs d'erreur.
|
||||||
|
// Sur Windows : utilise UIAutomationCore via `windows-rs`.
|
||||||
|
|
||||||
|
use clap::{Parser, Subcommand};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
#[derive(Parser)]
|
||||||
|
#[command(name = "lea_uia")]
|
||||||
|
#[command(about = "Helper UI Automation pour Léa", long_about = None)]
|
||||||
|
#[command(version)]
|
||||||
|
struct Cli {
|
||||||
|
#[command(subcommand)]
|
||||||
|
command: Commands,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Subcommand)]
|
||||||
|
enum Commands {
|
||||||
|
/// Retourner l'élément UIA à une position donnée (x, y en pixels écran)
|
||||||
|
Query {
|
||||||
|
/// Coordonnée X (pixels)
|
||||||
|
#[arg(long)]
|
||||||
|
x: i32,
|
||||||
|
/// Coordonnée Y (pixels)
|
||||||
|
#[arg(long)]
|
||||||
|
y: i32,
|
||||||
|
/// Inclure la hiérarchie des parents (peut être lent)
|
||||||
|
#[arg(long, default_value_t = true)]
|
||||||
|
with_parents: bool,
|
||||||
|
},
|
||||||
|
/// Rechercher un élément par son chemin logique ou son nom
|
||||||
|
Find {
|
||||||
|
/// Nom de l'élément (Name property)
|
||||||
|
#[arg(long)]
|
||||||
|
name: Option<String>,
|
||||||
|
/// Type de contrôle (Button, Edit, MenuItem, etc.)
|
||||||
|
#[arg(long)]
|
||||||
|
control_type: Option<String>,
|
||||||
|
/// AutomationId
|
||||||
|
#[arg(long)]
|
||||||
|
automation_id: Option<String>,
|
||||||
|
/// Limite la recherche à cette fenêtre (titre exact)
|
||||||
|
#[arg(long)]
|
||||||
|
window: Option<String>,
|
||||||
|
/// Timeout en millisecondes
|
||||||
|
#[arg(long, default_value_t = 2000)]
|
||||||
|
timeout_ms: u32,
|
||||||
|
},
|
||||||
|
/// Lister tous les éléments visibles de la fenêtre active (debug)
|
||||||
|
Capture {
|
||||||
|
/// Profondeur maximale de l'arbre
|
||||||
|
#[arg(long, default_value_t = 3)]
|
||||||
|
max_depth: u32,
|
||||||
|
},
|
||||||
|
/// Vérifier que UIA est disponible et fonctionnel
|
||||||
|
Health,
|
||||||
|
}
|
||||||
|
|
||||||
|
// =========================================================================
|
||||||
|
// Modèles de sortie JSON
|
||||||
|
// =========================================================================
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||||
|
struct UiaElement {
|
||||||
|
/// Nom visible de l'élément
|
||||||
|
name: String,
|
||||||
|
/// Type de contrôle (Button, Edit, MenuItem, Window, ...)
|
||||||
|
control_type: String,
|
||||||
|
/// Classe Windows (Edit, Static, #32770, ...)
|
||||||
|
class_name: String,
|
||||||
|
/// AutomationId (ID interne, parfois vide)
|
||||||
|
automation_id: String,
|
||||||
|
/// Rectangle absolu [x1, y1, x2, y2] en pixels écran
|
||||||
|
bounding_rect: [i32; 4],
|
||||||
|
/// Est-ce que l'élément est activable
|
||||||
|
is_enabled: bool,
|
||||||
|
/// Est-ce que l'élément est visible
|
||||||
|
is_offscreen: bool,
|
||||||
|
/// Hiérarchie des parents (chemin logique)
|
||||||
|
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||||
|
parent_path: Vec<ParentHint>,
|
||||||
|
/// Process owning this element
|
||||||
|
#[serde(skip_serializing_if = "String::is_empty")]
|
||||||
|
process_name: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||||
|
struct ParentHint {
|
||||||
|
name: String,
|
||||||
|
control_type: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize, Debug)]
|
||||||
|
#[serde(tag = "status")]
|
||||||
|
enum UiaResponse {
|
||||||
|
#[serde(rename = "ok")]
|
||||||
|
Ok {
|
||||||
|
element: Option<UiaElement>,
|
||||||
|
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||||
|
elements: Vec<UiaElement>,
|
||||||
|
elapsed_ms: u64,
|
||||||
|
},
|
||||||
|
#[serde(rename = "not_found")]
|
||||||
|
NotFound {
|
||||||
|
reason: String,
|
||||||
|
elapsed_ms: u64,
|
||||||
|
},
|
||||||
|
#[serde(rename = "error")]
|
||||||
|
Error {
|
||||||
|
message: String,
|
||||||
|
code: String,
|
||||||
|
},
|
||||||
|
#[serde(rename = "unavailable")]
|
||||||
|
Unavailable {
|
||||||
|
reason: String,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
// =========================================================================
|
||||||
|
// Implémentation Windows
|
||||||
|
// =========================================================================
|
||||||
|
|
||||||
|
#[cfg(windows)]
|
||||||
|
mod uia_impl {
|
||||||
|
use super::*;
|
||||||
|
use std::time::Instant;
|
||||||
|
use windows::Win32::Foundation::POINT;
|
||||||
|
use windows::Win32::System::Com::{
|
||||||
|
CoCreateInstance, CoInitializeEx, CoUninitialize, CLSCTX_INPROC_SERVER,
|
||||||
|
COINIT_APARTMENTTHREADED,
|
||||||
|
};
|
||||||
|
use windows::Win32::UI::Accessibility::{
|
||||||
|
CUIAutomation, IUIAutomation, IUIAutomationElement, IUIAutomationTreeWalker,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ComGuard;
|
||||||
|
impl ComGuard {
|
||||||
|
fn new() -> windows::core::Result<Self> {
|
||||||
|
unsafe {
|
||||||
|
let hr = CoInitializeEx(None, COINIT_APARTMENTTHREADED);
|
||||||
|
if hr.is_err() {
|
||||||
|
// RPC_E_CHANGED_MODE : le thread est déjà initialisé → OK
|
||||||
|
let code = hr.0 as u32;
|
||||||
|
if code != 0x80010106 {
|
||||||
|
return Err(windows::core::Error::from(hr));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(Self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl Drop for ComGuard {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
unsafe { CoUninitialize() };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_automation() -> windows::core::Result<IUIAutomation> {
|
||||||
|
unsafe { CoCreateInstance(&CUIAutomation, None, CLSCTX_INPROC_SERVER) }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn element_to_struct(
|
||||||
|
element: &IUIAutomationElement,
|
||||||
|
with_parents: bool,
|
||||||
|
) -> windows::core::Result<UiaElement> {
|
||||||
|
let mut result = UiaElement {
|
||||||
|
name: String::new(),
|
||||||
|
control_type: String::new(),
|
||||||
|
class_name: String::new(),
|
||||||
|
automation_id: String::new(),
|
||||||
|
bounding_rect: [0, 0, 0, 0],
|
||||||
|
is_enabled: false,
|
||||||
|
is_offscreen: true,
|
||||||
|
parent_path: Vec::new(),
|
||||||
|
process_name: String::new(),
|
||||||
|
};
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
if let Ok(name) = element.CurrentName() {
|
||||||
|
result.name = name.to_string();
|
||||||
|
}
|
||||||
|
if let Ok(ct) = element.CurrentLocalizedControlType() {
|
||||||
|
result.control_type = ct.to_string();
|
||||||
|
}
|
||||||
|
if let Ok(cn) = element.CurrentClassName() {
|
||||||
|
result.class_name = cn.to_string();
|
||||||
|
}
|
||||||
|
if let Ok(aid) = element.CurrentAutomationId() {
|
||||||
|
result.automation_id = aid.to_string();
|
||||||
|
}
|
||||||
|
if let Ok(rect) = element.CurrentBoundingRectangle() {
|
||||||
|
result.bounding_rect = [rect.left, rect.top, rect.right, rect.bottom];
|
||||||
|
}
|
||||||
|
if let Ok(enabled) = element.CurrentIsEnabled() {
|
||||||
|
result.is_enabled = enabled.as_bool();
|
||||||
|
}
|
||||||
|
if let Ok(offscreen) = element.CurrentIsOffscreen() {
|
||||||
|
result.is_offscreen = offscreen.as_bool();
|
||||||
|
}
|
||||||
|
if with_parents {
|
||||||
|
// Remonter la hiérarchie jusqu'à la Window root
|
||||||
|
if let Ok(automation) = get_automation() {
|
||||||
|
let walker = automation.ControlViewWalker();
|
||||||
|
if let Ok(walker) = walker {
|
||||||
|
let mut current = element.clone();
|
||||||
|
for _ in 0..10 {
|
||||||
|
match walker.GetParentElement(¤t) {
|
||||||
|
Ok(parent) => {
|
||||||
|
let name = parent
|
||||||
|
.CurrentName()
|
||||||
|
.map(|n| n.to_string())
|
||||||
|
.unwrap_or_default();
|
||||||
|
let ct = parent
|
||||||
|
.CurrentLocalizedControlType()
|
||||||
|
.map(|c| c.to_string())
|
||||||
|
.unwrap_or_default();
|
||||||
|
if name.is_empty() && ct.is_empty() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
result.parent_path.insert(
|
||||||
|
0,
|
||||||
|
ParentHint {
|
||||||
|
name,
|
||||||
|
control_type: ct,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
current = parent;
|
||||||
|
}
|
||||||
|
Err(_) => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn query_at_point(x: i32, y: i32, with_parents: bool) -> UiaResponse {
|
||||||
|
let start = Instant::now();
|
||||||
|
let _com = match ComGuard::new() {
|
||||||
|
Ok(g) => g,
|
||||||
|
Err(e) => {
|
||||||
|
return UiaResponse::Error {
|
||||||
|
message: format!("CoInitializeEx: {}", e),
|
||||||
|
code: "com_init_failed".into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let automation = match get_automation() {
|
||||||
|
Ok(a) => a,
|
||||||
|
Err(e) => {
|
||||||
|
return UiaResponse::Error {
|
||||||
|
message: format!("CUIAutomation: {}", e),
|
||||||
|
code: "automation_failed".into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let point = POINT { x, y };
|
||||||
|
let element = unsafe { automation.ElementFromPoint(point) };
|
||||||
|
match element {
|
||||||
|
Ok(el) => match element_to_struct(&el, with_parents) {
|
||||||
|
Ok(e) => UiaResponse::Ok {
|
||||||
|
element: Some(e),
|
||||||
|
elements: Vec::new(),
|
||||||
|
elapsed_ms: start.elapsed().as_millis() as u64,
|
||||||
|
},
|
||||||
|
Err(e) => UiaResponse::Error {
|
||||||
|
message: format!("element_to_struct: {}", e),
|
||||||
|
code: "extract_failed".into(),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Err(_) => UiaResponse::NotFound {
|
||||||
|
reason: format!("Aucun élément UIA à ({}, {})", x, y),
|
||||||
|
elapsed_ms: start.elapsed().as_millis() as u64,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn find_element(
|
||||||
|
name: Option<String>,
|
||||||
|
_control_type: Option<String>,
|
||||||
|
_automation_id: Option<String>,
|
||||||
|
_window: Option<String>,
|
||||||
|
_timeout_ms: u32,
|
||||||
|
) -> UiaResponse {
|
||||||
|
let start = Instant::now();
|
||||||
|
let _com = match ComGuard::new() {
|
||||||
|
Ok(g) => g,
|
||||||
|
Err(e) => {
|
||||||
|
return UiaResponse::Error {
|
||||||
|
message: format!("CoInitializeEx: {}", e),
|
||||||
|
code: "com_init_failed".into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let automation = match get_automation() {
|
||||||
|
Ok(a) => a,
|
||||||
|
Err(e) => {
|
||||||
|
return UiaResponse::Error {
|
||||||
|
message: format!("CUIAutomation: {}", e),
|
||||||
|
code: "automation_failed".into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let root = match unsafe { automation.GetRootElement() } {
|
||||||
|
Ok(r) => r,
|
||||||
|
Err(e) => {
|
||||||
|
return UiaResponse::Error {
|
||||||
|
message: format!("GetRootElement: {}", e),
|
||||||
|
code: "root_failed".into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Recherche simple par parcours d'arbre (MVP)
|
||||||
|
// L'arbre UIA peut être énorme → on limite la profondeur
|
||||||
|
if let Some(target_name) = name {
|
||||||
|
let walker = unsafe { automation.ControlViewWalker() };
|
||||||
|
if let Ok(walker) = walker {
|
||||||
|
if let Some(found) =
|
||||||
|
walk_and_find(&walker, &root, &target_name, 0, 6, &_control_type, &_automation_id)
|
||||||
|
{
|
||||||
|
match element_to_struct(&found, true) {
|
||||||
|
Ok(e) => {
|
||||||
|
return UiaResponse::Ok {
|
||||||
|
element: Some(e),
|
||||||
|
elements: Vec::new(),
|
||||||
|
elapsed_ms: start.elapsed().as_millis() as u64,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
return UiaResponse::Error {
|
||||||
|
message: format!("element_to_struct: {}", e),
|
||||||
|
code: "extract_failed".into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
UiaResponse::NotFound {
|
||||||
|
reason: "Aucun élément trouvé".into(),
|
||||||
|
elapsed_ms: start.elapsed().as_millis() as u64,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parcours récursif de l'arbre UIA pour trouver un élément par nom
|
||||||
|
fn walk_and_find(
|
||||||
|
walker: &IUIAutomationTreeWalker,
|
||||||
|
element: &IUIAutomationElement,
|
||||||
|
target_name: &str,
|
||||||
|
depth: u32,
|
||||||
|
max_depth: u32,
|
||||||
|
target_control_type: &Option<String>,
|
||||||
|
target_automation_id: &Option<String>,
|
||||||
|
) -> Option<IUIAutomationElement> {
|
||||||
|
if depth > max_depth {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tester l'élément courant
|
||||||
|
unsafe {
|
||||||
|
if let Ok(name) = element.CurrentName() {
|
||||||
|
if name.to_string() == target_name {
|
||||||
|
// Vérifier les filtres additionnels
|
||||||
|
let mut matches = true;
|
||||||
|
if let Some(ct) = target_control_type {
|
||||||
|
if let Ok(local_ct) = element.CurrentLocalizedControlType() {
|
||||||
|
if !local_ct.to_string().to_lowercase().contains(&ct.to_lowercase()) {
|
||||||
|
matches = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if matches {
|
||||||
|
if let Some(aid) = target_automation_id {
|
||||||
|
if let Ok(local_aid) = element.CurrentAutomationId() {
|
||||||
|
if local_aid.to_string() != *aid {
|
||||||
|
matches = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if matches {
|
||||||
|
return Some(element.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parcourir les enfants
|
||||||
|
if let Ok(first_child) = walker.GetFirstChildElement(element) {
|
||||||
|
let mut current = first_child;
|
||||||
|
loop {
|
||||||
|
if let Some(found) = walk_and_find(
|
||||||
|
walker,
|
||||||
|
¤t,
|
||||||
|
target_name,
|
||||||
|
depth + 1,
|
||||||
|
max_depth,
|
||||||
|
target_control_type,
|
||||||
|
target_automation_id,
|
||||||
|
) {
|
||||||
|
return Some(found);
|
||||||
|
}
|
||||||
|
match walker.GetNextSiblingElement(¤t) {
|
||||||
|
Ok(next) => current = next,
|
||||||
|
Err(_) => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn capture_tree(_max_depth: u32) -> UiaResponse {
|
||||||
|
let start = Instant::now();
|
||||||
|
let _com = match ComGuard::new() {
|
||||||
|
Ok(g) => g,
|
||||||
|
Err(e) => {
|
||||||
|
return UiaResponse::Error {
|
||||||
|
message: format!("CoInitializeEx: {}", e),
|
||||||
|
code: "com_init_failed".into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let automation = match get_automation() {
|
||||||
|
Ok(a) => a,
|
||||||
|
Err(e) => {
|
||||||
|
return UiaResponse::Error {
|
||||||
|
message: format!("CUIAutomation: {}", e),
|
||||||
|
code: "automation_failed".into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let focused = unsafe { automation.GetFocusedElement() };
|
||||||
|
match focused {
|
||||||
|
Ok(el) => match element_to_struct(&el, true) {
|
||||||
|
Ok(e) => UiaResponse::Ok {
|
||||||
|
element: Some(e),
|
||||||
|
elements: Vec::new(),
|
||||||
|
elapsed_ms: start.elapsed().as_millis() as u64,
|
||||||
|
},
|
||||||
|
Err(e) => UiaResponse::Error {
|
||||||
|
message: format!("element_to_struct: {}", e),
|
||||||
|
code: "extract_failed".into(),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Err(e) => UiaResponse::Error {
|
||||||
|
message: format!("GetFocusedElement: {}", e),
|
||||||
|
code: "focused_failed".into(),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn health_check() -> UiaResponse {
|
||||||
|
let _com = match ComGuard::new() {
|
||||||
|
Ok(g) => g,
|
||||||
|
Err(e) => {
|
||||||
|
return UiaResponse::Unavailable {
|
||||||
|
reason: format!("COM init failed: {}", e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
match get_automation() {
|
||||||
|
Ok(_) => UiaResponse::Ok {
|
||||||
|
element: None,
|
||||||
|
elements: Vec::new(),
|
||||||
|
elapsed_ms: 0,
|
||||||
|
},
|
||||||
|
Err(e) => UiaResponse::Unavailable {
|
||||||
|
reason: format!("UIA not available: {}", e),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// =========================================================================
|
||||||
|
// Stub Linux (pour développement et tests)
|
||||||
|
// =========================================================================
|
||||||
|
|
||||||
|
#[cfg(not(windows))]
|
||||||
|
mod uia_impl {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
pub fn query_at_point(_x: i32, _y: i32, _with_parents: bool) -> UiaResponse {
|
||||||
|
UiaResponse::Unavailable {
|
||||||
|
reason: "UIA n'est disponible que sur Windows".into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn find_element(
|
||||||
|
_name: Option<String>,
|
||||||
|
_control_type: Option<String>,
|
||||||
|
_automation_id: Option<String>,
|
||||||
|
_window: Option<String>,
|
||||||
|
_timeout_ms: u32,
|
||||||
|
) -> UiaResponse {
|
||||||
|
UiaResponse::Unavailable {
|
||||||
|
reason: "UIA n'est disponible que sur Windows".into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn capture_tree(_max_depth: u32) -> UiaResponse {
|
||||||
|
UiaResponse::Unavailable {
|
||||||
|
reason: "UIA n'est disponible que sur Windows".into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn health_check() -> UiaResponse {
|
||||||
|
UiaResponse::Unavailable {
|
||||||
|
reason: "UIA n'est disponible que sur Windows".into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// =========================================================================
|
||||||
|
// Main
|
||||||
|
// =========================================================================
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let cli = Cli::parse();
|
||||||
|
|
||||||
|
let response = match cli.command {
|
||||||
|
Commands::Query {
|
||||||
|
x,
|
||||||
|
y,
|
||||||
|
with_parents,
|
||||||
|
} => uia_impl::query_at_point(x, y, with_parents),
|
||||||
|
Commands::Find {
|
||||||
|
name,
|
||||||
|
control_type,
|
||||||
|
automation_id,
|
||||||
|
window,
|
||||||
|
timeout_ms,
|
||||||
|
} => uia_impl::find_element(name, control_type, automation_id, window, timeout_ms),
|
||||||
|
Commands::Capture { max_depth } => uia_impl::capture_tree(max_depth),
|
||||||
|
Commands::Health => uia_impl::health_check(),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Sortie JSON sur stdout
|
||||||
|
match serde_json::to_string(&response) {
|
||||||
|
Ok(json) => println!("{}", json),
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("{{\"status\":\"error\",\"message\":\"JSON serialization: {}\"}}", e);
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -40,10 +40,18 @@ MACHINE_ID = os.environ.get(
|
|||||||
BASE_DIR = Path(__file__).resolve().parent
|
BASE_DIR = Path(__file__).resolve().parent
|
||||||
|
|
||||||
# Endpoint du serveur Streaming (port 5005)
|
# Endpoint du serveur Streaming (port 5005)
|
||||||
|
# SERVER_URL contient TOUJOURS /api/v1 à la fin (convention unifiée).
|
||||||
SERVER_URL = os.getenv("RPA_SERVER_URL", "http://localhost:5005/api/v1")
|
SERVER_URL = os.getenv("RPA_SERVER_URL", "http://localhost:5005/api/v1")
|
||||||
|
# Base sans /api/v1 — pour les routes à la racine (/health)
|
||||||
|
SERVER_BASE = SERVER_URL.rsplit("/api/v1", 1)[0]
|
||||||
UPLOAD_ENDPOINT = f"{SERVER_URL}/traces/upload"
|
UPLOAD_ENDPOINT = f"{SERVER_URL}/traces/upload"
|
||||||
STREAMING_ENDPOINT = f"{SERVER_URL}/traces/stream"
|
STREAMING_ENDPOINT = f"{SERVER_URL}/traces/stream"
|
||||||
|
|
||||||
|
# Host Ollama — SÉPARÉ du serveur RPA.
|
||||||
|
# Ollama tourne en local sur la machine serveur, jamais exposé via le reverse proxy.
|
||||||
|
# Défaut : localhost (exécution locale ou accès LAN direct).
|
||||||
|
OLLAMA_HOST = os.getenv("RPA_OLLAMA_HOST", "localhost")
|
||||||
|
|
||||||
# Token d'authentification API (doit correspondre au token du serveur)
|
# Token d'authentification API (doit correspondre au token du serveur)
|
||||||
# Configurable via variable d'environnement RPA_API_TOKEN
|
# Configurable via variable d'environnement RPA_API_TOKEN
|
||||||
API_TOKEN = os.environ.get("RPA_API_TOKEN", "")
|
API_TOKEN = os.environ.get("RPA_API_TOKEN", "")
|
||||||
|
|||||||
@@ -178,8 +178,41 @@ class EventCaptorV1:
|
|||||||
"timestamp": now,
|
"timestamp": now,
|
||||||
}
|
}
|
||||||
self._inject_screen_metadata(event)
|
self._inject_screen_metadata(event)
|
||||||
|
# Capturer le snapshot UIA à la position du clic (si helper dispo)
|
||||||
|
# Non-bloquant : si UIA échoue, l'event est enrichi uniquement
|
||||||
|
# des données vision comme aujourd'hui.
|
||||||
|
self._inject_uia_snapshot(event, x, y)
|
||||||
self.on_event(event)
|
self.on_event(event)
|
||||||
|
|
||||||
|
def _inject_uia_snapshot(self, event: dict, x: int, y: int) -> None:
|
||||||
|
"""Ajouter un uia_snapshot à l'événement si le helper UIA est dispo.
|
||||||
|
|
||||||
|
Appelle lea_uia.exe query --x N --y N en ~10-20ms.
|
||||||
|
Fallback silencieux si le helper n'est pas dispo ou échoue.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from .uia_helper import get_shared_helper
|
||||||
|
helper = get_shared_helper()
|
||||||
|
if not helper.available:
|
||||||
|
return
|
||||||
|
element = helper.query_at(int(x), int(y), with_parents=True)
|
||||||
|
if element is None:
|
||||||
|
return
|
||||||
|
event["uia_snapshot"] = {
|
||||||
|
"name": element.name,
|
||||||
|
"control_type": element.control_type,
|
||||||
|
"class_name": element.class_name,
|
||||||
|
"automation_id": element.automation_id,
|
||||||
|
"bounding_rect": list(element.bounding_rect),
|
||||||
|
"is_enabled": element.is_enabled,
|
||||||
|
"is_offscreen": element.is_offscreen,
|
||||||
|
"parent_path": element.parent_path,
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
# Non bloquant — on continue sans UIA
|
||||||
|
import logging
|
||||||
|
logging.getLogger(__name__).debug(f"UIA snapshot skip: {e}")
|
||||||
|
|
||||||
def _on_scroll(self, x, y, dx, dy):
|
def _on_scroll(self, x, y, dx, dy):
|
||||||
event = {
|
event = {
|
||||||
"type": "mouse_scroll",
|
"type": "mouse_scroll",
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -15,6 +15,7 @@ Ref: docs/PLAN_ACTEUR_V1.md — Architecture MICRO (grounding + exécution)
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
|
import io
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
@@ -126,19 +127,62 @@ class GroundingEngine:
|
|||||||
)
|
)
|
||||||
|
|
||||||
t_start = time.time()
|
t_start = time.time()
|
||||||
screenshot_b64 = self._executor._capture_screenshot_b64(max_width=0, quality=75)
|
|
||||||
|
# ── Capture contrainte à la fenêtre active ──
|
||||||
|
# Le grounding ne voit QUE la fenêtre attendue — pas la taskbar,
|
||||||
|
# pas le systray, pas les autres apps. Comme un humain qui regarde
|
||||||
|
# l'application sur laquelle il travaille.
|
||||||
|
window_rect = None
|
||||||
|
try:
|
||||||
|
from ..window_info_crossplatform import get_active_window_rect
|
||||||
|
win_info = get_active_window_rect()
|
||||||
|
if win_info and win_info.get("rect"):
|
||||||
|
r = win_info["rect"] # [left, top, right, bottom]
|
||||||
|
# Validation : fenêtre visible et pas minuscule
|
||||||
|
w = r[2] - r[0]
|
||||||
|
h = r[3] - r[1]
|
||||||
|
if w > 50 and h > 50:
|
||||||
|
window_rect = {
|
||||||
|
"left": max(0, r[0]),
|
||||||
|
"top": max(0, r[1]),
|
||||||
|
"width": min(w, screen_width),
|
||||||
|
"height": min(h, screen_height),
|
||||||
|
}
|
||||||
|
logger.info(
|
||||||
|
f"Grounding contraint à la fenêtre : "
|
||||||
|
f"{window_rect['width']}x{window_rect['height']} "
|
||||||
|
f"à ({window_rect['left']}, {window_rect['top']})"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Pas de window rect disponible : {e}")
|
||||||
|
|
||||||
|
screenshot_b64 = self._capture_window_or_screen(window_rect)
|
||||||
if not screenshot_b64:
|
if not screenshot_b64:
|
||||||
return GroundingResult(
|
return GroundingResult(
|
||||||
found=False, detail="Capture screenshot échouée",
|
found=False, detail="Capture screenshot échouée",
|
||||||
elapsed_ms=(time.time() - t_start) * 1000,
|
elapsed_ms=(time.time() - t_start) * 1000,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Dimensions de la zone capturée (fenêtre ou écran entier)
|
||||||
|
cap_w = window_rect["width"] if window_rect else screen_width
|
||||||
|
cap_h = window_rect["height"] if window_rect else screen_height
|
||||||
|
|
||||||
for strategy in strategies:
|
for strategy in strategies:
|
||||||
result = self._try_strategy(
|
result = self._try_strategy(
|
||||||
strategy, server_url, screenshot_b64, target_spec,
|
strategy, server_url, screenshot_b64, target_spec,
|
||||||
fallback_x, fallback_y, screen_width, screen_height,
|
fallback_x, fallback_y, cap_w, cap_h,
|
||||||
)
|
)
|
||||||
if result.found:
|
if result.found:
|
||||||
|
# ── Conversion coords fenêtre → coords écran ──
|
||||||
|
if window_rect:
|
||||||
|
# Le grounding a retourné des coords relatives à la fenêtre
|
||||||
|
# On les convertit en coords relatives à l'écran entier
|
||||||
|
abs_x = window_rect["left"] + result.x_pct * cap_w
|
||||||
|
abs_y = window_rect["top"] + result.y_pct * cap_h
|
||||||
|
result.x_pct = abs_x / screen_width
|
||||||
|
result.y_pct = abs_y / screen_height
|
||||||
|
result.detail = f"{result.detail} [fenêtre {cap_w}x{cap_h}]"
|
||||||
|
|
||||||
result.elapsed_ms = (time.time() - t_start) * 1000
|
result.elapsed_ms = (time.time() - t_start) * 1000
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@@ -148,6 +192,39 @@ class GroundingEngine:
|
|||||||
elapsed_ms=(time.time() - t_start) * 1000,
|
elapsed_ms=(time.time() - t_start) * 1000,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _capture_window_or_screen(self, window_rect: Optional[Dict]) -> str:
|
||||||
|
"""Capturer soit la fenêtre active (croppée), soit l'écran entier.
|
||||||
|
|
||||||
|
Si window_rect est fourni, capture uniquement cette zone.
|
||||||
|
Sinon, capture l'écran entier (fallback).
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from PIL import Image
|
||||||
|
import mss as mss_lib
|
||||||
|
|
||||||
|
with mss_lib.mss() as local_sct:
|
||||||
|
if window_rect:
|
||||||
|
# Capture de la zone fenêtre uniquement
|
||||||
|
region = {
|
||||||
|
"left": window_rect["left"],
|
||||||
|
"top": window_rect["top"],
|
||||||
|
"width": window_rect["width"],
|
||||||
|
"height": window_rect["height"],
|
||||||
|
}
|
||||||
|
raw = local_sct.grab(region)
|
||||||
|
else:
|
||||||
|
# Fallback écran entier
|
||||||
|
raw = local_sct.grab(local_sct.monitors[1])
|
||||||
|
|
||||||
|
img = Image.frombytes("RGB", raw.size, raw.bgra, "raw", "BGRX")
|
||||||
|
buffer = io.BytesIO()
|
||||||
|
img.save(buffer, format="JPEG", quality=75)
|
||||||
|
return base64.b64encode(buffer.getvalue()).decode("utf-8")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Capture échouée : {e}")
|
||||||
|
# Fallback sur la méthode existante de l'executor
|
||||||
|
return self._executor._capture_screenshot_b64(max_width=0, quality=75)
|
||||||
|
|
||||||
def _try_strategy(
|
def _try_strategy(
|
||||||
self,
|
self,
|
||||||
strategy: str,
|
strategy: str,
|
||||||
|
|||||||
@@ -85,6 +85,10 @@ class PolicyEngine:
|
|||||||
2. Si retry déjà fait → demander à l'acteur gemma4
|
2. Si retry déjà fait → demander à l'acteur gemma4
|
||||||
3. Selon gemma4 : SKIP, ABORT, ou SUPERVISE
|
3. Selon gemma4 : SKIP, ABORT, ou SUPERVISE
|
||||||
|
|
||||||
|
**SÉCURITÉ** : si, pendant l'étape 1, le handler popup détecte un
|
||||||
|
dialogue système Windows (UAC, CredUI, SmartScreen…), on bascule
|
||||||
|
immédiatement en SUPERVISE. Cf. system_dialog_guard.py.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
action: L'action qui a échoué
|
action: L'action qui a échoué
|
||||||
target_spec: La cible non trouvée
|
target_spec: La cible non trouvée
|
||||||
@@ -96,6 +100,22 @@ class PolicyEngine:
|
|||||||
# ── Étape 1 : Tentative de fermeture popup (premier essai) ──
|
# ── Étape 1 : Tentative de fermeture popup (premier essai) ──
|
||||||
if retry_count == 0:
|
if retry_count == 0:
|
||||||
popup_handled = self._try_close_popup()
|
popup_handled = self._try_close_popup()
|
||||||
|
|
||||||
|
# Si le popup handler a détecté un dialogue système, on
|
||||||
|
# bascule immédiatement en SUPERVISE — pas de retry, pas de
|
||||||
|
# gemma4 : on rend la main à l'humain.
|
||||||
|
if getattr(self._executor, "_system_dialog_pause", None):
|
||||||
|
sd = self._executor._system_dialog_pause
|
||||||
|
return PolicyDecision(
|
||||||
|
decision=Decision.SUPERVISE,
|
||||||
|
reason=(
|
||||||
|
f"Dialogue système détecté ({sd.get('category', '?')}) — "
|
||||||
|
f"refus d'interaction automatique"
|
||||||
|
),
|
||||||
|
action_taken="system_dialog_blocked",
|
||||||
|
elapsed_ms=(time.time() - t_start) * 1000,
|
||||||
|
)
|
||||||
|
|
||||||
if popup_handled:
|
if popup_handled:
|
||||||
return PolicyDecision(
|
return PolicyDecision(
|
||||||
decision=Decision.RETRY,
|
decision=Decision.RETRY,
|
||||||
|
|||||||
448
agent_v0/agent_v1/core/system_dialog_guard.py
Normal file
448
agent_v0/agent_v1/core/system_dialog_guard.py
Normal file
@@ -0,0 +1,448 @@
|
|||||||
|
# agent_v1/core/system_dialog_guard.py
|
||||||
|
"""
|
||||||
|
Garde-fou sécurité : détection des dialogues système Windows critiques.
|
||||||
|
|
||||||
|
==============================================================================
|
||||||
|
POURQUOI ?
|
||||||
|
==============================================================================
|
||||||
|
|
||||||
|
Pendant un replay, si un dialogue UAC, CredUI (mot de passe Windows),
|
||||||
|
SmartScreen ou une notification de sécurité Windows apparaît, Léa pourrait
|
||||||
|
demander au VLM "quel bouton cliquer" et recevoir "Oui" en réponse.
|
||||||
|
|
||||||
|
→ **Léa cliquerait OUI sur une élévation UAC** → vecteur d'attaque ransomware.
|
||||||
|
|
||||||
|
Ce module fournit la détection de ces dialogues pour que l'exécuteur
|
||||||
|
**ne clique JAMAIS dessus automatiquement**. La décision est renvoyée à
|
||||||
|
l'humain (pause supervisée).
|
||||||
|
|
||||||
|
==============================================================================
|
||||||
|
PRINCIPE
|
||||||
|
==============================================================================
|
||||||
|
|
||||||
|
- **Faux positif tolérable** : on préfère pauser pour rien plutôt que cliquer
|
||||||
|
sur un UAC.
|
||||||
|
- **Faux négatif catastrophique** : mieux vaut être trop prudent.
|
||||||
|
- **Multi-signal** : titre, ClassName UIA, nom de processus, parent_path.
|
||||||
|
Un seul signal suffit à bloquer.
|
||||||
|
- **Compatible Citrix** : les dialogues UAC d'un client Citrix apparaissent
|
||||||
|
aussi dans la VM distante — la détection par classe UIA fonctionne.
|
||||||
|
|
||||||
|
==============================================================================
|
||||||
|
PATTERNS DE DÉTECTION (ordre de criticité décroissant)
|
||||||
|
==============================================================================
|
||||||
|
|
||||||
|
1. UAC Consent (élévation de privilèges)
|
||||||
|
- ClassName : `$$$Secure UAP Dummy Window Class$$$`
|
||||||
|
- Process : `consent.exe`
|
||||||
|
- Titre : "Contrôle de compte d'utilisateur", "User Account Control"
|
||||||
|
|
||||||
|
2. CredUI (prompt mot de passe Windows)
|
||||||
|
- ClassName : `Credential Dialog Xaml Host`
|
||||||
|
- Process : `credentialuibroker.exe`, `credui.exe`
|
||||||
|
- Titre : "Sécurité Windows", "Windows Security"
|
||||||
|
|
||||||
|
3. SmartScreen (protection contre applications inconnues)
|
||||||
|
- Process : `smartscreen.exe`
|
||||||
|
- Titre : "Windows a protégé votre ordinateur", "Windows protected your PC"
|
||||||
|
|
||||||
|
4. Windows Defender / Security Center
|
||||||
|
- Process : `securityhealthhost.exe`, `msmpeng.exe`
|
||||||
|
- Titre : "Sécurité Windows", "Windows Defender"
|
||||||
|
|
||||||
|
5. Signatures pilotes / driver install
|
||||||
|
- Titre : "Installer ce pilote", "Driver signature"
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Any, Dict, Optional, Tuple
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Catégories de dialogues système (pour logging + messages)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class SystemDialogCategory:
|
||||||
|
"""Catégories de dialogues système à bloquer absolument."""
|
||||||
|
UAC = "uac_consent" # Élévation de privilèges
|
||||||
|
CREDUI = "windows_credential_prompt" # Prompt de mot de passe
|
||||||
|
SMARTSCREEN = "smartscreen" # Protection SmartScreen
|
||||||
|
DEFENDER = "windows_defender" # Alerte Windows Defender
|
||||||
|
DRIVER = "driver_install" # Installation pilote signé
|
||||||
|
SECURITY_TOAST = "security_toast" # Toast de sécurité Windows
|
||||||
|
UNKNOWN_DIALOG = "unknown_system_dialog" # Dialogue #32770 sans app connue
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SystemDialogDetection:
|
||||||
|
"""Résultat d'une analyse de dialogue système."""
|
||||||
|
is_system_dialog: bool
|
||||||
|
category: str = "" # Valeur de SystemDialogCategory
|
||||||
|
matched_signal: str = "" # Ex: "class_name=Consent.exe"
|
||||||
|
matched_value: str = "" # La valeur qui a matché
|
||||||
|
reason: str = "" # Explication lisible
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"is_system_dialog": self.is_system_dialog,
|
||||||
|
"category": self.category,
|
||||||
|
"matched_signal": self.matched_signal,
|
||||||
|
"matched_value": self.matched_value,
|
||||||
|
"reason": self.reason,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Signatures de détection
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# ClassName UIA (casse préservée — Windows exposées telle quelle par UIA).
|
||||||
|
# Utilisées telles quelles puis en minuscules pour matcher avec souplesse.
|
||||||
|
_CLASS_NAMES_SYSTEM = {
|
||||||
|
# UAC Consent
|
||||||
|
"$$$Secure UAP Dummy Window Class$$$": SystemDialogCategory.UAC,
|
||||||
|
"Credential Dialog Xaml Host": SystemDialogCategory.CREDUI,
|
||||||
|
# Windows Credential UI ancien nom
|
||||||
|
"CredentialDialogXamlHost": SystemDialogCategory.CREDUI,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Nom de processus (comparaison insensible à la casse, .exe normalisé)
|
||||||
|
_PROCESS_NAMES_SYSTEM = {
|
||||||
|
"consent.exe": SystemDialogCategory.UAC,
|
||||||
|
"credentialuibroker.exe": SystemDialogCategory.CREDUI,
|
||||||
|
"credui.exe": SystemDialogCategory.CREDUI,
|
||||||
|
"credwiz.exe": SystemDialogCategory.CREDUI,
|
||||||
|
"smartscreen.exe": SystemDialogCategory.SMARTSCREEN,
|
||||||
|
"securityhealthhost.exe": SystemDialogCategory.DEFENDER,
|
||||||
|
"securityhealthui.exe": SystemDialogCategory.DEFENDER,
|
||||||
|
"securityhealthsystray.exe": SystemDialogCategory.DEFENDER,
|
||||||
|
"msmpeng.exe": SystemDialogCategory.DEFENDER,
|
||||||
|
"windowsdefender.exe": SystemDialogCategory.DEFENDER,
|
||||||
|
"msiexec.exe": SystemDialogCategory.DRIVER, # prompts pilotes signés
|
||||||
|
"drvinst.exe": SystemDialogCategory.DRIVER,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Motifs titre (insensibles à la casse, regex avec word boundaries)
|
||||||
|
# On ne matche pas les titres génériques trop larges pour limiter les faux
|
||||||
|
# positifs sur OSIRIS/OBSIUS/MEDSPHERE.
|
||||||
|
_TITLE_PATTERNS_SYSTEM: Tuple[Tuple[re.Pattern, str], ...] = (
|
||||||
|
# UAC
|
||||||
|
(re.compile(r"contr[oô]le\s+de\s+compte\s+d'?utilisateur", re.IGNORECASE),
|
||||||
|
SystemDialogCategory.UAC),
|
||||||
|
(re.compile(r"\buser\s+account\s+control\b", re.IGNORECASE),
|
||||||
|
SystemDialogCategory.UAC),
|
||||||
|
(re.compile(r"voulez-vous\s+autoriser\s+cette\s+application", re.IGNORECASE),
|
||||||
|
SystemDialogCategory.UAC),
|
||||||
|
(re.compile(r"do\s+you\s+want\s+to\s+allow\s+this\s+app", re.IGNORECASE),
|
||||||
|
SystemDialogCategory.UAC),
|
||||||
|
|
||||||
|
# CredUI / Sécurité Windows
|
||||||
|
(re.compile(r"\bs[eé]curit[eé]\s+windows\b", re.IGNORECASE),
|
||||||
|
SystemDialogCategory.CREDUI),
|
||||||
|
(re.compile(r"\bwindows\s+security\b", re.IGNORECASE),
|
||||||
|
SystemDialogCategory.CREDUI),
|
||||||
|
(re.compile(r"entrer\s+les\s+informations\s+d'?identification", re.IGNORECASE),
|
||||||
|
SystemDialogCategory.CREDUI),
|
||||||
|
(re.compile(r"enter\s+(?:your\s+)?credentials?", re.IGNORECASE),
|
||||||
|
SystemDialogCategory.CREDUI),
|
||||||
|
(re.compile(r"connectez-vous\s+[aà]\s+votre\s+compte", re.IGNORECASE),
|
||||||
|
SystemDialogCategory.CREDUI),
|
||||||
|
(re.compile(r"\bsign\s+in\s+to\s+your\s+account\b", re.IGNORECASE),
|
||||||
|
SystemDialogCategory.CREDUI),
|
||||||
|
|
||||||
|
# SmartScreen
|
||||||
|
(re.compile(r"windows\s+a\s+prot[eé]g[eé]", re.IGNORECASE),
|
||||||
|
SystemDialogCategory.SMARTSCREEN),
|
||||||
|
(re.compile(r"windows\s+protected\s+your\s+pc", re.IGNORECASE),
|
||||||
|
SystemDialogCategory.SMARTSCREEN),
|
||||||
|
(re.compile(r"\bsmartscreen\b", re.IGNORECASE),
|
||||||
|
SystemDialogCategory.SMARTSCREEN),
|
||||||
|
(re.compile(r"\b[eé]diteur\s+inconnu\b", re.IGNORECASE),
|
||||||
|
SystemDialogCategory.SMARTSCREEN),
|
||||||
|
(re.compile(r"\bunknown\s+publisher\b", re.IGNORECASE),
|
||||||
|
SystemDialogCategory.SMARTSCREEN),
|
||||||
|
|
||||||
|
# Windows Defender
|
||||||
|
(re.compile(r"windows\s+defender", re.IGNORECASE),
|
||||||
|
SystemDialogCategory.DEFENDER),
|
||||||
|
(re.compile(r"menace\s+d[eé]tect[eé]e", re.IGNORECASE),
|
||||||
|
SystemDialogCategory.DEFENDER),
|
||||||
|
(re.compile(r"threat\s+detected", re.IGNORECASE),
|
||||||
|
SystemDialogCategory.DEFENDER),
|
||||||
|
|
||||||
|
# Driver
|
||||||
|
(re.compile(r"installer\s+ce\s+pilote", re.IGNORECASE),
|
||||||
|
SystemDialogCategory.DRIVER),
|
||||||
|
(re.compile(r"install\s+this\s+driver", re.IGNORECASE),
|
||||||
|
SystemDialogCategory.DRIVER),
|
||||||
|
(re.compile(r"signature\s+num[eé]rique\s+du\s+pilote", re.IGNORECASE),
|
||||||
|
SystemDialogCategory.DRIVER),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Fonctions de détection
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_process(name: str) -> str:
|
||||||
|
"""Normaliser un nom de processus pour comparaison."""
|
||||||
|
if not name:
|
||||||
|
return ""
|
||||||
|
name = name.strip().lower()
|
||||||
|
# Enlever le chemin éventuel
|
||||||
|
if "\\" in name or "/" in name:
|
||||||
|
name = name.replace("\\", "/").split("/")[-1]
|
||||||
|
# Assurer suffixe .exe pour matcher le dictionnaire
|
||||||
|
if not name.endswith(".exe") and name:
|
||||||
|
# Les process_name peuvent venir sans .exe (psutil) — on ajoute
|
||||||
|
# pour avoir une clé uniforme
|
||||||
|
name_with_exe = name + ".exe"
|
||||||
|
if name_with_exe in _PROCESS_NAMES_SYSTEM:
|
||||||
|
return name_with_exe
|
||||||
|
return name
|
||||||
|
|
||||||
|
|
||||||
|
def _check_class_name(class_name: str) -> Optional[Tuple[str, str, str]]:
|
||||||
|
"""Vérifier si un ClassName UIA matche un dialogue système.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(category, matched_class, reason) si match, None sinon.
|
||||||
|
"""
|
||||||
|
if not class_name:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Match exact
|
||||||
|
if class_name in _CLASS_NAMES_SYSTEM:
|
||||||
|
cat = _CLASS_NAMES_SYSTEM[class_name]
|
||||||
|
return (cat, class_name, f"ClassName UIA '{class_name}' = dialogue système {cat}")
|
||||||
|
|
||||||
|
# Match insensible à la casse + normalisation espaces
|
||||||
|
cn_norm = class_name.strip()
|
||||||
|
for known, cat in _CLASS_NAMES_SYSTEM.items():
|
||||||
|
if cn_norm.lower() == known.lower():
|
||||||
|
return (cat, class_name, f"ClassName UIA ~= '{known}' ({cat})")
|
||||||
|
|
||||||
|
# Détection souple UAC (il existe quelques variantes de la classe secure)
|
||||||
|
if "secure uap" in class_name.lower() or "uap dummy" in class_name.lower():
|
||||||
|
return (SystemDialogCategory.UAC, class_name,
|
||||||
|
f"ClassName '{class_name}' contient 'Secure UAP' → UAC")
|
||||||
|
|
||||||
|
# Credential XAML Host
|
||||||
|
if "credential" in class_name.lower() and "xaml" in class_name.lower():
|
||||||
|
return (SystemDialogCategory.CREDUI, class_name,
|
||||||
|
f"ClassName '{class_name}' contient Credential+Xaml → CredUI")
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _check_process_name(process_name: str) -> Optional[Tuple[str, str, str]]:
|
||||||
|
"""Vérifier si un nom de processus est un dialogue système.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(category, matched_process, reason) si match, None sinon.
|
||||||
|
"""
|
||||||
|
if not process_name:
|
||||||
|
return None
|
||||||
|
|
||||||
|
norm = _normalize_process(process_name)
|
||||||
|
if norm in _PROCESS_NAMES_SYSTEM:
|
||||||
|
cat = _PROCESS_NAMES_SYSTEM[norm]
|
||||||
|
return (cat, process_name, f"Processus '{norm}' = {cat}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _check_title(title: str) -> Optional[Tuple[str, str, str]]:
|
||||||
|
"""Vérifier si un titre de fenêtre matche un dialogue système.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(category, matched_pattern, reason) si match, None sinon.
|
||||||
|
"""
|
||||||
|
if not title:
|
||||||
|
return None
|
||||||
|
|
||||||
|
for pattern, cat in _TITLE_PATTERNS_SYSTEM:
|
||||||
|
m = pattern.search(title)
|
||||||
|
if m:
|
||||||
|
return (cat, m.group(0),
|
||||||
|
f"Titre '{title[:60]}' matche '{pattern.pattern}' → {cat}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def is_system_dialog(
|
||||||
|
uia_snapshot: Optional[Dict[str, Any]] = None,
|
||||||
|
window_info: Optional[Dict[str, Any]] = None,
|
||||||
|
) -> SystemDialogDetection:
|
||||||
|
"""Déterminer si la fenêtre active est un dialogue système critique.
|
||||||
|
|
||||||
|
La détection combine plusieurs signaux — **un seul suffit à bloquer**.
|
||||||
|
On préfère un faux positif (pause inutile) à un faux négatif (clic UAC).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
uia_snapshot: Dict avec champs `class_name`, `process_name`,
|
||||||
|
`parent_path`, `name`. Peut être None si UIA indisponible.
|
||||||
|
window_info: Dict avec champs `title`, `app_name`. Peut être None.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
SystemDialogDetection avec is_system_dialog=True si un dialogue
|
||||||
|
système est détecté.
|
||||||
|
|
||||||
|
Exemples::
|
||||||
|
|
||||||
|
det = is_system_dialog(window_info={"title": "User Account Control"})
|
||||||
|
assert det.is_system_dialog # UAC détecté
|
||||||
|
|
||||||
|
det = is_system_dialog(uia_snapshot={"class_name": "$$$Secure UAP Dummy Window Class$$$"})
|
||||||
|
assert det.is_system_dialog # UAC via ClassName
|
||||||
|
|
||||||
|
det = is_system_dialog(window_info={"title": "OSIRIS - Patient Dupont"})
|
||||||
|
assert not det.is_system_dialog # Application métier → OK
|
||||||
|
"""
|
||||||
|
# ── Signal 1 : ClassName UIA ──
|
||||||
|
if uia_snapshot:
|
||||||
|
cn = uia_snapshot.get("class_name", "") or ""
|
||||||
|
r = _check_class_name(cn)
|
||||||
|
if r:
|
||||||
|
cat, matched, reason = r
|
||||||
|
return SystemDialogDetection(
|
||||||
|
is_system_dialog=True,
|
||||||
|
category=cat,
|
||||||
|
matched_signal="class_name",
|
||||||
|
matched_value=matched,
|
||||||
|
reason=reason,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Explorer aussi les parents (le champ cliqué peut être un bouton
|
||||||
|
# interne dont la ClassName est "Button", mais le root de la fenêtre
|
||||||
|
# est le Consent.exe).
|
||||||
|
for parent in uia_snapshot.get("parent_path", []) or []:
|
||||||
|
p_cn = parent.get("class_name", "") or ""
|
||||||
|
r = _check_class_name(p_cn)
|
||||||
|
if r:
|
||||||
|
cat, matched, reason = r
|
||||||
|
return SystemDialogDetection(
|
||||||
|
is_system_dialog=True,
|
||||||
|
category=cat,
|
||||||
|
matched_signal="parent_class_name",
|
||||||
|
matched_value=matched,
|
||||||
|
reason=f"Parent : {reason}",
|
||||||
|
)
|
||||||
|
|
||||||
|
# ── Signal 2 : Process name ──
|
||||||
|
if uia_snapshot:
|
||||||
|
pn = uia_snapshot.get("process_name", "") or ""
|
||||||
|
r = _check_process_name(pn)
|
||||||
|
if r:
|
||||||
|
cat, matched, reason = r
|
||||||
|
return SystemDialogDetection(
|
||||||
|
is_system_dialog=True,
|
||||||
|
category=cat,
|
||||||
|
matched_signal="process_name",
|
||||||
|
matched_value=matched,
|
||||||
|
reason=reason,
|
||||||
|
)
|
||||||
|
|
||||||
|
if window_info:
|
||||||
|
app = window_info.get("app_name", "") or ""
|
||||||
|
r = _check_process_name(app)
|
||||||
|
if r:
|
||||||
|
cat, matched, reason = r
|
||||||
|
return SystemDialogDetection(
|
||||||
|
is_system_dialog=True,
|
||||||
|
category=cat,
|
||||||
|
matched_signal="app_name",
|
||||||
|
matched_value=matched,
|
||||||
|
reason=reason,
|
||||||
|
)
|
||||||
|
|
||||||
|
# ── Signal 3 : Titre de fenêtre ──
|
||||||
|
if window_info:
|
||||||
|
title = window_info.get("title", "") or ""
|
||||||
|
r = _check_title(title)
|
||||||
|
if r:
|
||||||
|
cat, matched, reason = r
|
||||||
|
return SystemDialogDetection(
|
||||||
|
is_system_dialog=True,
|
||||||
|
category=cat,
|
||||||
|
matched_signal="window_title",
|
||||||
|
matched_value=matched,
|
||||||
|
reason=reason,
|
||||||
|
)
|
||||||
|
|
||||||
|
if uia_snapshot:
|
||||||
|
# Certains dialogues système remontent leur titre dans uia.name
|
||||||
|
uia_name = uia_snapshot.get("name", "") or ""
|
||||||
|
r = _check_title(uia_name)
|
||||||
|
if r:
|
||||||
|
cat, matched, reason = r
|
||||||
|
return SystemDialogDetection(
|
||||||
|
is_system_dialog=True,
|
||||||
|
category=cat,
|
||||||
|
matched_signal="uia_name",
|
||||||
|
matched_value=matched,
|
||||||
|
reason=reason,
|
||||||
|
)
|
||||||
|
|
||||||
|
return SystemDialogDetection(is_system_dialog=False)
|
||||||
|
|
||||||
|
|
||||||
|
def detect_current_system_dialog() -> SystemDialogDetection:
|
||||||
|
"""Analyser l'écran actuel et détecter un dialogue système.
|
||||||
|
|
||||||
|
Helper autonome qui interroge à la fois `get_active_window_info()` et
|
||||||
|
le helper UIA (si dispo) pour obtenir la détection la plus fiable.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
SystemDialogDetection. Si un signal matche, is_system_dialog=True.
|
||||||
|
Si rien n'est disponible (Linux, UIA absent), is_system_dialog=False
|
||||||
|
mais le caller peut encore fallback sur une analyse par titre.
|
||||||
|
"""
|
||||||
|
window_info: Optional[Dict[str, Any]] = None
|
||||||
|
uia_snapshot: Optional[Dict[str, Any]] = None
|
||||||
|
|
||||||
|
# Fenêtre active (cross-platform)
|
||||||
|
try:
|
||||||
|
from ..window_info_crossplatform import get_active_window_info
|
||||||
|
window_info = get_active_window_info()
|
||||||
|
except Exception as e: # pragma: no cover — best-effort
|
||||||
|
logger.debug(f"[SYS-DIALOG] window_info indisponible : {e}")
|
||||||
|
|
||||||
|
# UIA local (Windows uniquement, via lea_uia.exe)
|
||||||
|
try:
|
||||||
|
from .uia_helper import get_shared_helper
|
||||||
|
helper = get_shared_helper()
|
||||||
|
if helper.available:
|
||||||
|
# On capture l'élément focalisé (root = fenêtre active)
|
||||||
|
element = helper.capture_focused(max_depth=2)
|
||||||
|
if element is not None:
|
||||||
|
uia_snapshot = element.to_dict()
|
||||||
|
except Exception as e: # pragma: no cover
|
||||||
|
logger.debug(f"[SYS-DIALOG] UIA indisponible : {e}")
|
||||||
|
|
||||||
|
detection = is_system_dialog(
|
||||||
|
uia_snapshot=uia_snapshot, window_info=window_info,
|
||||||
|
)
|
||||||
|
|
||||||
|
if detection.is_system_dialog:
|
||||||
|
logger.warning(
|
||||||
|
f"[SYS-DIALOG] BLOCAGE — dialogue système détecté "
|
||||||
|
f"[{detection.category}] via {detection.matched_signal}='{detection.matched_value}' "
|
||||||
|
f"— {detection.reason}"
|
||||||
|
)
|
||||||
|
return detection
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"SystemDialogCategory",
|
||||||
|
"SystemDialogDetection",
|
||||||
|
"is_system_dialog",
|
||||||
|
"detect_current_system_dialog",
|
||||||
|
]
|
||||||
294
agent_v0/agent_v1/core/uia_helper.py
Normal file
294
agent_v0/agent_v1/core/uia_helper.py
Normal file
@@ -0,0 +1,294 @@
|
|||||||
|
# core/workflow/uia_helper.py
|
||||||
|
"""
|
||||||
|
UIAHelper — Wrapper Python pour lea_uia.exe (helper Rust UI Automation).
|
||||||
|
|
||||||
|
Expose une API Python simple pour interroger UIA via le binaire Rust.
|
||||||
|
Communique via subprocess + stdin/stdout JSON.
|
||||||
|
|
||||||
|
Pourquoi un helper Rust ?
|
||||||
|
- 5-10x plus rapide que pywinauto (10-20ms vs 50-200ms)
|
||||||
|
- Binaire standalone ~500 Ko, aucune dépendance runtime
|
||||||
|
- Pas de problèmes de threading COM en Python
|
||||||
|
- Crash-safe (le crash du helper n'affecte pas l'agent Python)
|
||||||
|
|
||||||
|
Architecture :
|
||||||
|
Python executor
|
||||||
|
↓ subprocess.run
|
||||||
|
lea_uia.exe query --x 812 --y 436
|
||||||
|
↓ UIA API Windows
|
||||||
|
JSON response
|
||||||
|
↓ stdout
|
||||||
|
Python executor parse JSON
|
||||||
|
|
||||||
|
Si lea_uia.exe n'est pas disponible (Linux, binaire absent, crash) :
|
||||||
|
toutes les méthodes retournent None → fallback vision automatique.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import platform
|
||||||
|
import subprocess
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Timeout par défaut pour les appels UIA (en secondes)
|
||||||
|
_DEFAULT_TIMEOUT = 5.0
|
||||||
|
|
||||||
|
# Masquer la fenêtre console lors du spawn de lea_uia.exe sur Windows.
|
||||||
|
# Sans ce flag, chaque appel (à chaque clic utilisateur pendant
|
||||||
|
# l'enregistrement) fait apparaître une fenêtre cmd noire brièvement
|
||||||
|
# visible à l'écran → ralentit la souris et pollue les screenshots
|
||||||
|
# capturés (le VLM peut "voir" le chemin lea_uia.exe comme texte cliqué).
|
||||||
|
#
|
||||||
|
# La valeur 0x08000000 correspond à CREATE_NO_WINDOW défini dans
|
||||||
|
# l'API Windows. Sur Linux/Mac, la valeur est 0 et `creationflags`
|
||||||
|
# est ignoré. getattr() gère le cas où Python expose déjà la constante
|
||||||
|
# sur Windows.
|
||||||
|
if platform.system() == "Windows":
|
||||||
|
_SUBPROCESS_CREATION_FLAGS = getattr(subprocess, "CREATE_NO_WINDOW", 0x08000000)
|
||||||
|
else:
|
||||||
|
_SUBPROCESS_CREATION_FLAGS = 0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class UiaElement:
|
||||||
|
"""Représentation Python d'un élément UIA."""
|
||||||
|
name: str = ""
|
||||||
|
control_type: str = ""
|
||||||
|
class_name: str = ""
|
||||||
|
automation_id: str = ""
|
||||||
|
bounding_rect: Tuple[int, int, int, int] = (0, 0, 0, 0)
|
||||||
|
is_enabled: bool = False
|
||||||
|
is_offscreen: bool = True
|
||||||
|
parent_path: List[Dict[str, str]] = field(default_factory=list)
|
||||||
|
process_name: str = ""
|
||||||
|
|
||||||
|
def center(self) -> Tuple[int, int]:
|
||||||
|
"""Retourner le centre du rectangle (pixels)."""
|
||||||
|
x1, y1, x2, y2 = self.bounding_rect
|
||||||
|
return ((x1 + x2) // 2, (y1 + y2) // 2)
|
||||||
|
|
||||||
|
def width(self) -> int:
|
||||||
|
return self.bounding_rect[2] - self.bounding_rect[0]
|
||||||
|
|
||||||
|
def height(self) -> int:
|
||||||
|
return self.bounding_rect[3] - self.bounding_rect[1]
|
||||||
|
|
||||||
|
def is_clickable(self) -> bool:
|
||||||
|
"""Peut-on cliquer dessus ?"""
|
||||||
|
return (
|
||||||
|
self.is_enabled
|
||||||
|
and not self.is_offscreen
|
||||||
|
and self.width() > 0
|
||||||
|
and self.height() > 0
|
||||||
|
)
|
||||||
|
|
||||||
|
def path_signature(self) -> str:
|
||||||
|
"""Signature du chemin parent (pour retrouver l'élément)."""
|
||||||
|
parts = [f"{p['control_type']}[{p['name']}]" for p in self.parent_path if p.get("name")]
|
||||||
|
parts.append(f"{self.control_type}[{self.name}]")
|
||||||
|
return " > ".join(parts)
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"name": self.name,
|
||||||
|
"control_type": self.control_type,
|
||||||
|
"class_name": self.class_name,
|
||||||
|
"automation_id": self.automation_id,
|
||||||
|
"bounding_rect": list(self.bounding_rect),
|
||||||
|
"is_enabled": self.is_enabled,
|
||||||
|
"is_offscreen": self.is_offscreen,
|
||||||
|
"parent_path": self.parent_path,
|
||||||
|
"process_name": self.process_name,
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, d: Dict[str, Any]) -> "UiaElement":
|
||||||
|
rect = d.get("bounding_rect", [0, 0, 0, 0])
|
||||||
|
if isinstance(rect, list) and len(rect) >= 4:
|
||||||
|
rect = tuple(rect[:4])
|
||||||
|
else:
|
||||||
|
rect = (0, 0, 0, 0)
|
||||||
|
return cls(
|
||||||
|
name=d.get("name", ""),
|
||||||
|
control_type=d.get("control_type", ""),
|
||||||
|
class_name=d.get("class_name", ""),
|
||||||
|
automation_id=d.get("automation_id", ""),
|
||||||
|
bounding_rect=rect,
|
||||||
|
is_enabled=d.get("is_enabled", False),
|
||||||
|
is_offscreen=d.get("is_offscreen", True),
|
||||||
|
parent_path=d.get("parent_path", []),
|
||||||
|
process_name=d.get("process_name", ""),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class UIAHelper:
|
||||||
|
"""Wrapper Python pour lea_uia.exe."""
|
||||||
|
|
||||||
|
def __init__(self, helper_path: str = "", timeout: float = _DEFAULT_TIMEOUT):
|
||||||
|
self._helper_path = helper_path or self._find_helper()
|
||||||
|
self._timeout = timeout
|
||||||
|
self._available = self._check_available()
|
||||||
|
|
||||||
|
def _find_helper(self) -> str:
|
||||||
|
"""Trouver lea_uia.exe dans les emplacements standards."""
|
||||||
|
candidates = [
|
||||||
|
r"C:\Lea\helpers\lea_uia.exe",
|
||||||
|
os.path.join(os.path.dirname(__file__), "..", "..",
|
||||||
|
"agent_rust", "lea_uia", "target",
|
||||||
|
"x86_64-pc-windows-gnu", "release", "lea_uia.exe"),
|
||||||
|
"./helpers/lea_uia.exe",
|
||||||
|
"lea_uia.exe",
|
||||||
|
]
|
||||||
|
for path in candidates:
|
||||||
|
if os.path.isfile(path):
|
||||||
|
return os.path.abspath(path)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def _check_available(self) -> bool:
|
||||||
|
"""Vérifier que le helper est utilisable (Windows + binaire + health OK)."""
|
||||||
|
if platform.system() != "Windows":
|
||||||
|
logger.debug("UIAHelper: Linux/Mac — helper désactivé")
|
||||||
|
return False
|
||||||
|
if not self._helper_path:
|
||||||
|
logger.debug("UIAHelper: lea_uia.exe introuvable")
|
||||||
|
return False
|
||||||
|
if not os.path.isfile(self._helper_path):
|
||||||
|
logger.debug(f"UIAHelper: chemin invalide {self._helper_path}")
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
@property
|
||||||
|
def available(self) -> bool:
|
||||||
|
return self._available
|
||||||
|
|
||||||
|
@property
|
||||||
|
def helper_path(self) -> str:
|
||||||
|
return self._helper_path
|
||||||
|
|
||||||
|
def _run(self, args: List[str]) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Exécuter lea_uia.exe avec les arguments et parser le JSON."""
|
||||||
|
if not self._available:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
[self._helper_path] + args,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=self._timeout,
|
||||||
|
encoding="utf-8",
|
||||||
|
errors="replace",
|
||||||
|
creationflags=_SUBPROCESS_CREATION_FLAGS,
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
logger.debug(
|
||||||
|
f"UIAHelper: exit code {result.returncode}, "
|
||||||
|
f"stderr: {result.stderr[:200]}"
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
output = result.stdout.strip()
|
||||||
|
if not output:
|
||||||
|
return None
|
||||||
|
return json.loads(output)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
logger.debug(f"UIAHelper: timeout ({self._timeout}s) sur {args}")
|
||||||
|
return None
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.debug(f"UIAHelper: JSON invalide — {e}")
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"UIAHelper: erreur {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def health(self) -> bool:
|
||||||
|
"""Vérifier que UIA répond."""
|
||||||
|
data = self._run(["health"])
|
||||||
|
return data is not None and data.get("status") == "ok"
|
||||||
|
|
||||||
|
def query_at(
|
||||||
|
self,
|
||||||
|
x: int,
|
||||||
|
y: int,
|
||||||
|
with_parents: bool = True,
|
||||||
|
) -> Optional[UiaElement]:
|
||||||
|
"""Récupérer l'élément UIA à une position écran.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
x, y: Coordonnées pixel absolues
|
||||||
|
with_parents: Inclure la hiérarchie des parents
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
UiaElement si trouvé, None sinon (pas d'élément ou UIA indispo)
|
||||||
|
"""
|
||||||
|
args = ["query", "--x", str(x), "--y", str(y)]
|
||||||
|
if not with_parents:
|
||||||
|
args.append("--with-parents=false")
|
||||||
|
|
||||||
|
data = self._run(args)
|
||||||
|
if not data or data.get("status") != "ok":
|
||||||
|
return None
|
||||||
|
|
||||||
|
elem_data = data.get("element")
|
||||||
|
if not elem_data:
|
||||||
|
return None
|
||||||
|
return UiaElement.from_dict(elem_data)
|
||||||
|
|
||||||
|
def find_by_name(
|
||||||
|
self,
|
||||||
|
name: str,
|
||||||
|
control_type: Optional[str] = None,
|
||||||
|
automation_id: Optional[str] = None,
|
||||||
|
window: Optional[str] = None,
|
||||||
|
timeout_ms: int = 2000,
|
||||||
|
) -> Optional[UiaElement]:
|
||||||
|
"""Rechercher un élément par son nom (+ filtres optionnels).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name: Nom exact de l'élément
|
||||||
|
control_type: Type de contrôle (Button, Edit, MenuItem...)
|
||||||
|
automation_id: ID d'automation
|
||||||
|
window: Restreindre à une fenêtre spécifique
|
||||||
|
timeout_ms: Timeout de recherche en millisecondes
|
||||||
|
"""
|
||||||
|
args = ["find", "--name", name, "--timeout-ms", str(timeout_ms)]
|
||||||
|
if control_type:
|
||||||
|
args.extend(["--control-type", control_type])
|
||||||
|
if automation_id:
|
||||||
|
args.extend(["--automation-id", automation_id])
|
||||||
|
if window:
|
||||||
|
args.extend(["--window", window])
|
||||||
|
|
||||||
|
data = self._run(args)
|
||||||
|
if not data or data.get("status") != "ok":
|
||||||
|
return None
|
||||||
|
|
||||||
|
elem_data = data.get("element")
|
||||||
|
if not elem_data:
|
||||||
|
return None
|
||||||
|
return UiaElement.from_dict(elem_data)
|
||||||
|
|
||||||
|
def capture_focused(self, max_depth: int = 3) -> Optional[UiaElement]:
|
||||||
|
"""Capturer l'élément ayant le focus + son contexte."""
|
||||||
|
data = self._run(["capture", "--max-depth", str(max_depth)])
|
||||||
|
if not data or data.get("status") != "ok":
|
||||||
|
return None
|
||||||
|
|
||||||
|
elem_data = data.get("element")
|
||||||
|
if not elem_data:
|
||||||
|
return None
|
||||||
|
return UiaElement.from_dict(elem_data)
|
||||||
|
|
||||||
|
|
||||||
|
# Instance globale partagée (singleton léger)
|
||||||
|
_SHARED_HELPER: Optional[UIAHelper] = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_shared_helper() -> UIAHelper:
|
||||||
|
"""Retourner une instance partagée de UIAHelper."""
|
||||||
|
global _SHARED_HELPER
|
||||||
|
if _SHARED_HELPER is None:
|
||||||
|
_SHARED_HELPER = UIAHelper()
|
||||||
|
return _SHARED_HELPER
|
||||||
@@ -17,6 +17,7 @@ import threading
|
|||||||
from .config import (
|
from .config import (
|
||||||
SESSIONS_ROOT, AGENT_VERSION, SERVER_URL, MACHINE_ID, LOG_RETENTION_DAYS,
|
SESSIONS_ROOT, AGENT_VERSION, SERVER_URL, MACHINE_ID, LOG_RETENTION_DAYS,
|
||||||
SCREEN_RESOLUTION, DPI_SCALE, OS_THEME, API_TOKEN, MAX_SESSION_DURATION_S,
|
SCREEN_RESOLUTION, DPI_SCALE, OS_THEME, API_TOKEN, MAX_SESSION_DURATION_S,
|
||||||
|
STREAMING_ENDPOINT,
|
||||||
)
|
)
|
||||||
from .core.captor import EventCaptorV1
|
from .core.captor import EventCaptorV1
|
||||||
from .core.executor import ActionExecutorV1
|
from .core.executor import ActionExecutorV1
|
||||||
@@ -38,8 +39,19 @@ except (ImportError, ValueError):
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
LeaServerClient = None
|
LeaServerClient = None
|
||||||
|
|
||||||
# Configuration du logging
|
# Configuration du logging — format structuré et lisible pour un TIM
|
||||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
# Niveau de détail : INFO par défaut, DEBUG si RPA_AGENT_DEBUG=1
|
||||||
|
_log_level = logging.DEBUG if os.environ.get("RPA_AGENT_DEBUG") == "1" else logging.INFO
|
||||||
|
logging.basicConfig(
|
||||||
|
level=_log_level,
|
||||||
|
format="%(asctime)s %(levelname)-7s %(name)-25s %(message)s",
|
||||||
|
datefmt="%H:%M:%S",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Réduire le bruit de certaines libs
|
||||||
|
for _noisy in ("urllib3", "requests.packages.urllib3", "PIL", "mss"):
|
||||||
|
logging.getLogger(_noisy).setLevel(logging.WARNING)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Intervalle de polling replay (secondes)
|
# Intervalle de polling replay (secondes)
|
||||||
@@ -75,22 +87,23 @@ class AgentV1:
|
|||||||
self._state.set_on_stop(self.stop_session)
|
self._state.set_on_stop(self.stop_session)
|
||||||
|
|
||||||
# Client serveur pour le chat et les workflows
|
# Client serveur pour le chat et les workflows
|
||||||
|
# Plus de RPA_SERVER_HOST : le LeaServerClient derive tout de SERVER_URL
|
||||||
self._server_client = None
|
self._server_client = None
|
||||||
if LeaServerClient is not None:
|
if LeaServerClient is not None:
|
||||||
# Forcer le token API pour éviter les 401
|
# Forcer le token API pour éviter les 401
|
||||||
# (le token est set par start.bat dans l'environnement)
|
# (le token est set par start.bat dans l'environnement)
|
||||||
from .config import API_TOKEN as _token
|
from .config import API_TOKEN as _token
|
||||||
server_host = os.getenv("RPA_SERVER_HOST", "localhost")
|
self._server_client = LeaServerClient()
|
||||||
self._server_client = LeaServerClient(server_host=server_host)
|
|
||||||
if _token and not self._server_client._api_token:
|
if _token and not self._server_client._api_token:
|
||||||
self._server_client._api_token = _token
|
self._server_client._api_token = _token
|
||||||
logger.info("Token API forcé dans LeaServerClient")
|
logger.info("Token API forcé dans LeaServerClient")
|
||||||
|
|
||||||
# Fenetre de chat Lea (tkinter natif)
|
# Fenetre de chat Lea (tkinter natif)
|
||||||
|
# Le host est derive de SERVER_URL (plus de RPA_SERVER_HOST)
|
||||||
server_host = (
|
server_host = (
|
||||||
self._server_client.server_host
|
self._server_client.server_host
|
||||||
if self._server_client is not None
|
if self._server_client is not None
|
||||||
else os.getenv("RPA_SERVER_HOST", "localhost")
|
else "localhost"
|
||||||
)
|
)
|
||||||
self._chat_window = ChatWindow(
|
self._chat_window = ChatWindow(
|
||||||
server_client=self._server_client,
|
server_client=self._server_client,
|
||||||
@@ -352,11 +365,11 @@ class AgentV1:
|
|||||||
continue
|
continue
|
||||||
self._last_bg_hash = img_hash
|
self._last_bg_hash = img_hash
|
||||||
|
|
||||||
# Envoyer au streaming server (avec token auth)
|
# Envoyer au streaming server (via STREAMING_ENDPOINT unifié)
|
||||||
headers = {"Authorization": f"Bearer {API_TOKEN}"} if API_TOKEN else {}
|
headers = {"Authorization": f"Bearer {API_TOKEN}"} if API_TOKEN else {}
|
||||||
with open(full_path, 'rb') as f:
|
with open(full_path, 'rb') as f:
|
||||||
req.post(
|
req.post(
|
||||||
f"{SERVER_URL}/traces/stream/image",
|
f"{STREAMING_ENDPOINT}/image",
|
||||||
params={
|
params={
|
||||||
"session_id": bg_session,
|
"session_id": bg_session,
|
||||||
"shot_id": f"heartbeat_{int(time.time())}",
|
"shot_id": f"heartbeat_{int(time.time())}",
|
||||||
@@ -365,18 +378,29 @@ class AgentV1:
|
|||||||
headers=headers,
|
headers=headers,
|
||||||
files={"file": ("screenshot.png", f, "image/png")},
|
files={"file": ("screenshot.png", f, "image/png")},
|
||||||
timeout=10,
|
timeout=10,
|
||||||
|
allow_redirects=False,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug(f"[HEARTBEAT] Erreur: {e}")
|
logger.debug(f"[HEARTBEAT] Erreur: {e}")
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
|
|
||||||
def stop_session(self):
|
def stop_session(self):
|
||||||
# Arrêter la capture et le streaming de la session d'enregistrement
|
# Sauvegarder le session_id avant de l'annuler (pour les logs)
|
||||||
if self.captor: self.captor.stop()
|
ended_session_id = self.session_id
|
||||||
if self.streamer: self.streamer.stop()
|
|
||||||
logger.info(f"Session {self.session_id} terminée.")
|
|
||||||
|
|
||||||
# Reset le session_id pour que le poll replay utilise l'ID stable
|
# Arrêter la capture d'abord (plus d'events entrants)
|
||||||
|
if self.captor: self.captor.stop()
|
||||||
|
|
||||||
|
# Attendre que les events en cours de traitement dans _on_event_bridge
|
||||||
|
# aient le temps d'être envoyés au streamer (capture duale + push)
|
||||||
|
import time
|
||||||
|
time.sleep(1.5)
|
||||||
|
|
||||||
|
# Maintenant arrêter le streamer (drain queue + finalize)
|
||||||
|
if self.streamer: self.streamer.stop()
|
||||||
|
logger.info(f"Session {ended_session_id} terminée.")
|
||||||
|
|
||||||
|
# Reset le session_id APRÈS le stop complet du streamer
|
||||||
self.session_id = None
|
self.session_id = None
|
||||||
|
|
||||||
# Reset le backoff de l'executor pour reprendre le polling immédiatement
|
# Reset le backoff de l'executor pour reprendre le polling immédiatement
|
||||||
@@ -403,6 +427,7 @@ class AgentV1:
|
|||||||
"""Capture périodique pour donner du contexte au stagiaire.
|
"""Capture périodique pour donner du contexte au stagiaire.
|
||||||
Déduplication : n'envoie que si l'écran a changé.
|
Déduplication : n'envoie que si l'écran a changé.
|
||||||
Tourne tant que session_id est défini (= enregistrement actif).
|
Tourne tant que session_id est défini (= enregistrement actif).
|
||||||
|
Enrichi avec le titre de la fenêtre active pour contextualisation.
|
||||||
"""
|
"""
|
||||||
while self.running and self.session_id:
|
while self.running and self.session_id:
|
||||||
try:
|
try:
|
||||||
@@ -413,7 +438,17 @@ class AgentV1:
|
|||||||
if img_hash != self._last_heartbeat_hash:
|
if img_hash != self._last_heartbeat_hash:
|
||||||
self._last_heartbeat_hash = img_hash
|
self._last_heartbeat_hash = img_hash
|
||||||
self.streamer.push_image(full_path, f"heartbeat_{int(time.time())}")
|
self.streamer.push_image(full_path, f"heartbeat_{int(time.time())}")
|
||||||
self.streamer.push_event({"type": "heartbeat", "image": full_path, "timestamp": time.time(), "machine_id": self.machine_id})
|
heartbeat_event = {
|
||||||
|
"type": "heartbeat",
|
||||||
|
"image": full_path,
|
||||||
|
"timestamp": time.time(),
|
||||||
|
"machine_id": self.machine_id,
|
||||||
|
}
|
||||||
|
# Ajouter le titre de la fenêtre active (léger, pas de crop)
|
||||||
|
window_title = self.vision.get_active_window_title()
|
||||||
|
if window_title:
|
||||||
|
heartbeat_event["active_window_title"] = window_title
|
||||||
|
self.streamer.push_event(heartbeat_event)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Heartbeat error: {e}")
|
logger.error(f"Heartbeat error: {e}")
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
@@ -448,7 +483,7 @@ class AgentV1:
|
|||||||
event["screenshot_context"] = full_path
|
event["screenshot_context"] = full_path
|
||||||
self.streamer.push_image(full_path, f"focus_{int(time.time())}")
|
self.streamer.push_image(full_path, f"focus_{int(time.time())}")
|
||||||
|
|
||||||
# 🔴 Capture Interactive (Dual)
|
# Capture Interactive (Dual + Fenêtre active)
|
||||||
if event["type"] in ["mouse_click", "key_combo"]:
|
if event["type"] in ["mouse_click", "key_combo"]:
|
||||||
self.shot_counter += 1
|
self.shot_counter += 1
|
||||||
shot_id = f"shot_{self.shot_counter:04d}"
|
shot_id = f"shot_{self.shot_counter:04d}"
|
||||||
@@ -459,9 +494,22 @@ class AgentV1:
|
|||||||
event["screenshot_id"] = shot_id
|
event["screenshot_id"] = shot_id
|
||||||
event["vision_info"] = capture_info
|
event["vision_info"] = capture_info
|
||||||
|
|
||||||
|
# Enrichir l'event avec les métadonnées de la fenêtre active
|
||||||
|
# (titre, rect, coordonnées clic relatives, taille fenêtre)
|
||||||
|
window_capture = capture_info.get("window_capture")
|
||||||
|
if window_capture:
|
||||||
|
event["window_capture"] = {
|
||||||
|
"title": window_capture.get("window_title", ""),
|
||||||
|
"app_name": window_capture.get("app_name", ""),
|
||||||
|
"rect": window_capture.get("window_rect"),
|
||||||
|
"click_relative": window_capture.get("click_in_window"),
|
||||||
|
"window_size": window_capture.get("window_size"),
|
||||||
|
"click_inside_window": window_capture.get("click_inside_window", True),
|
||||||
|
}
|
||||||
|
|
||||||
self._stream_capture_info(capture_info, shot_id)
|
self._stream_capture_info(capture_info, shot_id)
|
||||||
|
|
||||||
# 🕒 POST-ACTION : Capture du résultat après 1s (pour voir le résultat du clic)
|
# POST-ACTION : Capture du résultat après 1s (pour voir le résultat du clic)
|
||||||
threading.Timer(1.0, self._capture_result, args=(shot_id,)).start()
|
threading.Timer(1.0, self._capture_result, args=(shot_id,)).start()
|
||||||
|
|
||||||
self.ui.update_stats(self.shot_counter)
|
self.ui.update_stats(self.shot_counter)
|
||||||
@@ -481,6 +529,12 @@ class AgentV1:
|
|||||||
self.streamer.push_image(capture_info["full"], f"{shot_id}_full")
|
self.streamer.push_image(capture_info["full"], f"{shot_id}_full")
|
||||||
if "crop" in capture_info:
|
if "crop" in capture_info:
|
||||||
self.streamer.push_image(capture_info["crop"], f"{shot_id}_crop")
|
self.streamer.push_image(capture_info["crop"], f"{shot_id}_crop")
|
||||||
|
# Streamer l'image de la fenêtre active si disponible
|
||||||
|
window_capture = capture_info.get("window_capture")
|
||||||
|
if window_capture and "window_image" in window_capture:
|
||||||
|
self.streamer.push_image(
|
||||||
|
window_capture["window_image"], f"{shot_id}_window"
|
||||||
|
)
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
self.ui.run()
|
self.ui.run()
|
||||||
|
|||||||
380
agent_v0/agent_v1/network/persistent_buffer.py
Normal file
380
agent_v0/agent_v1/network/persistent_buffer.py
Normal file
@@ -0,0 +1,380 @@
|
|||||||
|
# agent_v1/network/persistent_buffer.py
|
||||||
|
"""
|
||||||
|
Buffer persistant SQLite pour les événements/images qui n'ont pas pu être envoyés.
|
||||||
|
|
||||||
|
Résout le bloquant AI Act Article 12 : en cas de coupure serveur ou de queue pleine,
|
||||||
|
les événements prioritaires (click, key, action, screenshot) sont persistés sur disque
|
||||||
|
au lieu d'être silencieusement perdus. Ils sont rejoués à la reconnexion.
|
||||||
|
|
||||||
|
Caractéristiques :
|
||||||
|
- SQLite fichier unique (agent_v1/buffer/pending_events.db), thread-safe
|
||||||
|
- Async : les écritures se font depuis un thread daemon, jamais bloquant
|
||||||
|
- Quota : compteur d'attempts par item, abandon après MAX_ATTEMPTS
|
||||||
|
- Robustesse : un fichier corrompu est renommé et recréé vide
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Nombre max de tentatives avant abandon définitif d'un item
|
||||||
|
MAX_ATTEMPTS = 10
|
||||||
|
|
||||||
|
# Taille max du buffer en items pour éviter une explosion disque
|
||||||
|
# (typiquement : 1000 events + 1000 images = quelques Mo de SQLite)
|
||||||
|
MAX_BUFFER_ITEMS = 2000
|
||||||
|
|
||||||
|
|
||||||
|
class PersistentBuffer:
|
||||||
|
"""Buffer SQLite pour événements/images en attente d'envoi.
|
||||||
|
|
||||||
|
Deux tables :
|
||||||
|
- pending_events (id, session_id, payload_json, attempts, created_at)
|
||||||
|
- pending_images (id, session_id, shot_id, image_path, attempts, created_at)
|
||||||
|
|
||||||
|
Usage :
|
||||||
|
buf = PersistentBuffer(base_dir / "buffer")
|
||||||
|
buf.add_event(session_id, event_dict) # persiste un event
|
||||||
|
buf.add_image(session_id, image_path, shot_id) # persiste une image
|
||||||
|
for row in buf.drain_events(): # itère sur les events
|
||||||
|
if envoyer(row): buf.delete_event(row["id"])
|
||||||
|
else: buf.mark_attempt(row["id"], "event")
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, buffer_dir: Path):
|
||||||
|
self.buffer_dir = Path(buffer_dir)
|
||||||
|
self.buffer_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
self.db_path = self.buffer_dir / "pending_events.db"
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
self._init_db()
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
# Initialisation / gestion corruption
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
|
||||||
|
def _init_db(self):
|
||||||
|
"""Crée les tables si elles n'existent pas.
|
||||||
|
|
||||||
|
En cas de fichier corrompu, on le renomme en .corrupted et on recrée
|
||||||
|
un buffer vide. On préfère perdre un buffer non lisible plutôt que
|
||||||
|
de crasher l'agent au démarrage.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with self._connect() as conn:
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
CREATE TABLE IF NOT EXISTS pending_events (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
session_id TEXT NOT NULL,
|
||||||
|
payload TEXT NOT NULL,
|
||||||
|
attempts INTEGER NOT NULL DEFAULT 0,
|
||||||
|
created_at REAL NOT NULL
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
CREATE TABLE IF NOT EXISTS pending_images (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
session_id TEXT NOT NULL,
|
||||||
|
shot_id TEXT NOT NULL,
|
||||||
|
image_path TEXT NOT NULL,
|
||||||
|
attempts INTEGER NOT NULL DEFAULT 0,
|
||||||
|
created_at REAL NOT NULL
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
conn.execute(
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_events_created "
|
||||||
|
"ON pending_events(created_at)"
|
||||||
|
)
|
||||||
|
conn.execute(
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_images_created "
|
||||||
|
"ON pending_images(created_at)"
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
except sqlite3.DatabaseError as e:
|
||||||
|
logger.warning(
|
||||||
|
f"Buffer SQLite corrompu ({e}) — renommage en .corrupted "
|
||||||
|
f"et recréation d'un buffer vide"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
corrupted = self.db_path.with_suffix(
|
||||||
|
f".corrupted.{int(time.time())}"
|
||||||
|
)
|
||||||
|
os.rename(self.db_path, corrupted)
|
||||||
|
except OSError:
|
||||||
|
# Si le rename échoue, on tente la suppression directe
|
||||||
|
try:
|
||||||
|
os.remove(self.db_path)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
# Nouvelle tentative (table vide)
|
||||||
|
with self._connect() as conn:
|
||||||
|
conn.execute(
|
||||||
|
"CREATE TABLE IF NOT EXISTS pending_events ("
|
||||||
|
"id INTEGER PRIMARY KEY AUTOINCREMENT, "
|
||||||
|
"session_id TEXT NOT NULL, payload TEXT NOT NULL, "
|
||||||
|
"attempts INTEGER NOT NULL DEFAULT 0, "
|
||||||
|
"created_at REAL NOT NULL)"
|
||||||
|
)
|
||||||
|
conn.execute(
|
||||||
|
"CREATE TABLE IF NOT EXISTS pending_images ("
|
||||||
|
"id INTEGER PRIMARY KEY AUTOINCREMENT, "
|
||||||
|
"session_id TEXT NOT NULL, shot_id TEXT NOT NULL, "
|
||||||
|
"image_path TEXT NOT NULL, "
|
||||||
|
"attempts INTEGER NOT NULL DEFAULT 0, "
|
||||||
|
"created_at REAL NOT NULL)"
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
def _connect(self) -> sqlite3.Connection:
|
||||||
|
"""Connexion SQLite en mode WAL (meilleure concurrence)."""
|
||||||
|
conn = sqlite3.connect(
|
||||||
|
str(self.db_path),
|
||||||
|
timeout=5.0,
|
||||||
|
check_same_thread=False,
|
||||||
|
isolation_level=None, # autocommit — on gère les transactions
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
conn.execute("PRAGMA journal_mode=WAL")
|
||||||
|
conn.execute("PRAGMA synchronous=NORMAL")
|
||||||
|
except sqlite3.DatabaseError:
|
||||||
|
pass
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
return conn
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
# Écriture — persiste un item
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
|
||||||
|
def add_event(self, session_id: str, event: dict) -> bool:
|
||||||
|
"""Persiste un événement. Retourne True si écrit, False sinon.
|
||||||
|
|
||||||
|
Si le buffer dépasse MAX_BUFFER_ITEMS, on drop l'insertion (plutôt
|
||||||
|
que saturer le disque). On log un warning au premier dépassement.
|
||||||
|
"""
|
||||||
|
with self._lock:
|
||||||
|
try:
|
||||||
|
with self._connect() as conn:
|
||||||
|
count = conn.execute(
|
||||||
|
"SELECT COUNT(*) FROM pending_events"
|
||||||
|
).fetchone()[0]
|
||||||
|
if count >= MAX_BUFFER_ITEMS:
|
||||||
|
logger.warning(
|
||||||
|
f"Buffer persistant saturé ({count} events) "
|
||||||
|
f"— event droppé"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO pending_events "
|
||||||
|
"(session_id, payload, attempts, created_at) "
|
||||||
|
"VALUES (?, ?, 0, ?)",
|
||||||
|
(session_id, json.dumps(event), time.time()),
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
except (sqlite3.DatabaseError, TypeError, ValueError) as e:
|
||||||
|
logger.error(f"Buffer add_event échoué : {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def add_image(
|
||||||
|
self, session_id: str, image_path: str, shot_id: str
|
||||||
|
) -> bool:
|
||||||
|
"""Persiste une référence image (chemin fichier + shot_id).
|
||||||
|
|
||||||
|
On ne stocke PAS les bytes de l'image (risque de faire gonfler la DB) :
|
||||||
|
uniquement le chemin. Donc l'image doit rester présente sur disque
|
||||||
|
tant qu'elle n'a pas été envoyée avec succès au serveur.
|
||||||
|
"""
|
||||||
|
with self._lock:
|
||||||
|
try:
|
||||||
|
with self._connect() as conn:
|
||||||
|
count = conn.execute(
|
||||||
|
"SELECT COUNT(*) FROM pending_images"
|
||||||
|
).fetchone()[0]
|
||||||
|
if count >= MAX_BUFFER_ITEMS:
|
||||||
|
logger.warning(
|
||||||
|
f"Buffer persistant saturé ({count} images) "
|
||||||
|
f"— image droppée"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO pending_images "
|
||||||
|
"(session_id, shot_id, image_path, attempts, created_at) "
|
||||||
|
"VALUES (?, ?, ?, 0, ?)",
|
||||||
|
(session_id, shot_id, image_path, time.time()),
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
except sqlite3.DatabaseError as e:
|
||||||
|
logger.error(f"Buffer add_image échoué : {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
# Lecture — drain dans l'ordre chronologique
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
|
||||||
|
def drain_events(self, limit: int = 100) -> list:
|
||||||
|
"""Retourne les events en attente, triés par date de création."""
|
||||||
|
with self._lock:
|
||||||
|
try:
|
||||||
|
with self._connect() as conn:
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT id, session_id, payload, attempts "
|
||||||
|
"FROM pending_events "
|
||||||
|
"ORDER BY created_at ASC LIMIT ?",
|
||||||
|
(limit,),
|
||||||
|
).fetchall()
|
||||||
|
return [dict(r) for r in rows]
|
||||||
|
except sqlite3.DatabaseError as e:
|
||||||
|
logger.error(f"Buffer drain_events échoué : {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
def drain_images(self, limit: int = 50) -> list:
|
||||||
|
"""Retourne les images en attente, triées par date de création."""
|
||||||
|
with self._lock:
|
||||||
|
try:
|
||||||
|
with self._connect() as conn:
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT id, session_id, shot_id, image_path, attempts "
|
||||||
|
"FROM pending_images "
|
||||||
|
"ORDER BY created_at ASC LIMIT ?",
|
||||||
|
(limit,),
|
||||||
|
).fetchall()
|
||||||
|
return [dict(r) for r in rows]
|
||||||
|
except sqlite3.DatabaseError as e:
|
||||||
|
logger.error(f"Buffer drain_images échoué : {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
# Marquage — succès, échec, abandon
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
|
||||||
|
def delete_event(self, row_id: int):
|
||||||
|
"""Supprime un event après envoi réussi."""
|
||||||
|
with self._lock:
|
||||||
|
try:
|
||||||
|
with self._connect() as conn:
|
||||||
|
conn.execute(
|
||||||
|
"DELETE FROM pending_events WHERE id = ?", (row_id,)
|
||||||
|
)
|
||||||
|
except sqlite3.DatabaseError as e:
|
||||||
|
logger.error(f"Buffer delete_event échoué : {e}")
|
||||||
|
|
||||||
|
def delete_image(self, row_id: int):
|
||||||
|
"""Supprime une image après envoi réussi."""
|
||||||
|
with self._lock:
|
||||||
|
try:
|
||||||
|
with self._connect() as conn:
|
||||||
|
conn.execute(
|
||||||
|
"DELETE FROM pending_images WHERE id = ?", (row_id,)
|
||||||
|
)
|
||||||
|
except sqlite3.DatabaseError as e:
|
||||||
|
logger.error(f"Buffer delete_image échoué : {e}")
|
||||||
|
|
||||||
|
def increment_attempts(self, row_id: int, kind: str) -> int:
|
||||||
|
"""Incrémente le compteur d'attempts. Retourne la nouvelle valeur.
|
||||||
|
|
||||||
|
kind : "event" ou "image"
|
||||||
|
"""
|
||||||
|
table = "pending_events" if kind == "event" else "pending_images"
|
||||||
|
with self._lock:
|
||||||
|
try:
|
||||||
|
with self._connect() as conn:
|
||||||
|
conn.execute(
|
||||||
|
f"UPDATE {table} SET attempts = attempts + 1 "
|
||||||
|
"WHERE id = ?",
|
||||||
|
(row_id,),
|
||||||
|
)
|
||||||
|
row = conn.execute(
|
||||||
|
f"SELECT attempts FROM {table} WHERE id = ?", (row_id,)
|
||||||
|
).fetchone()
|
||||||
|
return int(row["attempts"]) if row else MAX_ATTEMPTS
|
||||||
|
except sqlite3.DatabaseError as e:
|
||||||
|
logger.error(f"Buffer increment_attempts échoué : {e}")
|
||||||
|
return MAX_ATTEMPTS
|
||||||
|
|
||||||
|
def abandon_exceeded(self) -> int:
|
||||||
|
"""Supprime les items ayant dépassé MAX_ATTEMPTS.
|
||||||
|
|
||||||
|
Un item abandonné est logué en erreur (trace AI Act) puis supprimé.
|
||||||
|
Retourne le nombre d'items abandonnés.
|
||||||
|
"""
|
||||||
|
abandoned = 0
|
||||||
|
with self._lock:
|
||||||
|
try:
|
||||||
|
with self._connect() as conn:
|
||||||
|
# Events abandonnés
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT id, session_id, payload FROM pending_events "
|
||||||
|
"WHERE attempts >= ?",
|
||||||
|
(MAX_ATTEMPTS,),
|
||||||
|
).fetchall()
|
||||||
|
for r in rows:
|
||||||
|
try:
|
||||||
|
event_type = json.loads(r["payload"]).get(
|
||||||
|
"type", "?"
|
||||||
|
)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
event_type = "?"
|
||||||
|
logger.error(
|
||||||
|
f"Buffer : event abandonné après {MAX_ATTEMPTS} "
|
||||||
|
f"tentatives — session={r['session_id']} "
|
||||||
|
f"type={event_type}"
|
||||||
|
)
|
||||||
|
abandoned += 1
|
||||||
|
conn.execute(
|
||||||
|
"DELETE FROM pending_events WHERE attempts >= ?",
|
||||||
|
(MAX_ATTEMPTS,),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Images abandonnées
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT id, session_id, shot_id FROM pending_images "
|
||||||
|
"WHERE attempts >= ?",
|
||||||
|
(MAX_ATTEMPTS,),
|
||||||
|
).fetchall()
|
||||||
|
for r in rows:
|
||||||
|
logger.error(
|
||||||
|
f"Buffer : image abandonnée après {MAX_ATTEMPTS} "
|
||||||
|
f"tentatives — session={r['session_id']} "
|
||||||
|
f"shot_id={r['shot_id']}"
|
||||||
|
)
|
||||||
|
abandoned += 1
|
||||||
|
conn.execute(
|
||||||
|
"DELETE FROM pending_images WHERE attempts >= ?",
|
||||||
|
(MAX_ATTEMPTS,),
|
||||||
|
)
|
||||||
|
except sqlite3.DatabaseError as e:
|
||||||
|
logger.error(f"Buffer abandon_exceeded échoué : {e}")
|
||||||
|
return abandoned
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
# Introspection
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
|
||||||
|
def counts(self) -> dict:
|
||||||
|
"""Retourne (events_count, images_count) pour diagnostic."""
|
||||||
|
with self._lock:
|
||||||
|
try:
|
||||||
|
with self._connect() as conn:
|
||||||
|
ev = conn.execute(
|
||||||
|
"SELECT COUNT(*) FROM pending_events"
|
||||||
|
).fetchone()[0]
|
||||||
|
im = conn.execute(
|
||||||
|
"SELECT COUNT(*) FROM pending_images"
|
||||||
|
).fetchone()[0]
|
||||||
|
return {"events": ev, "images": im}
|
||||||
|
except sqlite3.DatabaseError:
|
||||||
|
return {"events": 0, "images": 0}
|
||||||
|
|
||||||
|
def is_empty(self) -> bool:
|
||||||
|
c = self.counts()
|
||||||
|
return c["events"] == 0 and c["images"] == 0
|
||||||
@@ -14,10 +14,19 @@ Robustesse (P0-2) :
|
|||||||
- Health-check périodique (30s) pour recovery du flag _server_available
|
- Health-check périodique (30s) pour recovery du flag _server_available
|
||||||
- Compression JPEG qualité 85 pour les images (réduction ~5-10x)
|
- Compression JPEG qualité 85 pour les images (réduction ~5-10x)
|
||||||
- Backpressure : queue bornée (maxsize=100), drop des heartbeat si pleine
|
- Backpressure : queue bornée (maxsize=100), drop des heartbeat si pleine
|
||||||
|
|
||||||
|
Conformité AI Act (Article 12 — journalisation automatique) :
|
||||||
|
- Purge après ACK : les screenshots locaux sont supprimés après HTTP 200
|
||||||
|
du serveur (par défaut). Le serveur devient la source de vérité.
|
||||||
|
- Buffer persistant : les events/images prioritaires non envoyés sont
|
||||||
|
persistés dans un SQLite local (agent_v1/buffer/pending_events.db)
|
||||||
|
et rejoués au démarrage et à la reconnexion.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import enum
|
||||||
import io
|
import io
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
import queue
|
import queue
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
@@ -25,7 +34,18 @@ import time
|
|||||||
import requests
|
import requests
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
from ..config import API_TOKEN, STREAMING_ENDPOINT
|
from ..config import API_TOKEN, BASE_DIR, STREAMING_ENDPOINT
|
||||||
|
from .persistent_buffer import MAX_ATTEMPTS, PersistentBuffer
|
||||||
|
|
||||||
|
|
||||||
|
# Fix P0-E : résultat d'envoi d'image trivaleur (succès / échec réseau / fichier
|
||||||
|
# disparu). On ne doit PAS considérer un FileNotFoundError comme un succès
|
||||||
|
# HTTP 200 — sinon le buffer SQLite supprime l'entrée alors que le serveur n'a
|
||||||
|
# jamais reçu l'image (perte silencieuse).
|
||||||
|
class ImageSendResult(enum.Enum):
|
||||||
|
OK = "ok" # HTTP 200, serveur a accusé réception
|
||||||
|
FAILED = "failed" # Erreur réseau/serveur récupérable (retry OK)
|
||||||
|
FILE_GONE = "file_gone" # Fichier local introuvable (abandon, pas retry)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -45,6 +65,20 @@ QUEUE_MAX_SIZE = 100
|
|||||||
# Types d'événements à ne jamais dropper
|
# Types d'événements à ne jamais dropper
|
||||||
PRIORITY_EVENT_TYPES = {"click", "key", "scroll", "action", "screenshot"}
|
PRIORITY_EVENT_TYPES = {"click", "key", "scroll", "action", "screenshot"}
|
||||||
|
|
||||||
|
# Purge locale après ACK serveur (Partie A de l'audit)
|
||||||
|
# Activé par défaut : le serveur conserve déjà les screenshots 180 jours
|
||||||
|
# (conformité AI Act Article 12). Désactivable via RPA_PURGE_AFTER_ACK=0
|
||||||
|
# pour debugging local.
|
||||||
|
PURGE_AFTER_ACK = os.environ.get("RPA_PURGE_AFTER_ACK", "1").lower() in (
|
||||||
|
"1", "true", "yes",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Chemin du buffer persistant (Partie B de l'audit)
|
||||||
|
BUFFER_DIR = BASE_DIR / "buffer"
|
||||||
|
|
||||||
|
# Intervalle entre deux tentatives de drain du buffer (secondes)
|
||||||
|
BUFFER_DRAIN_INTERVAL_S = 15
|
||||||
|
|
||||||
|
|
||||||
class TraceStreamer:
|
class TraceStreamer:
|
||||||
def __init__(self, session_id: str, machine_id: str = "default"):
|
def __init__(self, session_id: str, machine_id: str = "default"):
|
||||||
@@ -54,8 +88,20 @@ class TraceStreamer:
|
|||||||
self.running = False
|
self.running = False
|
||||||
self._thread = None
|
self._thread = None
|
||||||
self._health_thread = None
|
self._health_thread = None
|
||||||
|
self._drain_thread = None
|
||||||
self._server_available = True # Désactivé après trop d'échecs
|
self._server_available = True # Désactivé après trop d'échecs
|
||||||
|
|
||||||
|
# Buffer persistant — partagé entre sessions (survit au redémarrage)
|
||||||
|
# Initialisé paresseusement pour ne pas payer le coût SQLite en dehors
|
||||||
|
# d'un streaming actif.
|
||||||
|
self._buffer: PersistentBuffer | None = None
|
||||||
|
|
||||||
|
def _get_buffer(self) -> PersistentBuffer:
|
||||||
|
"""Retourne le buffer persistant, en l'initialisant au besoin."""
|
||||||
|
if self._buffer is None:
|
||||||
|
self._buffer = PersistentBuffer(BUFFER_DIR)
|
||||||
|
return self._buffer
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _auth_headers() -> dict:
|
def _auth_headers() -> dict:
|
||||||
"""Headers d'authentification Bearer pour les requêtes API."""
|
"""Headers d'authentification Bearer pour les requêtes API."""
|
||||||
@@ -75,6 +121,11 @@ class TraceStreamer:
|
|||||||
target=self._health_check_loop, daemon=True
|
target=self._health_check_loop, daemon=True
|
||||||
)
|
)
|
||||||
self._health_thread.start()
|
self._health_thread.start()
|
||||||
|
# Thread de drain du buffer persistant (rejoue les items en attente)
|
||||||
|
self._drain_thread = threading.Thread(
|
||||||
|
target=self._buffer_drain_loop, daemon=True
|
||||||
|
)
|
||||||
|
self._drain_thread.start()
|
||||||
logger.info(f"Streamer pour {self.session_id} démarré")
|
logger.info(f"Streamer pour {self.session_id} démarré")
|
||||||
|
|
||||||
def stop(self):
|
def stop(self):
|
||||||
@@ -99,6 +150,9 @@ class TraceStreamer:
|
|||||||
if self._health_thread:
|
if self._health_thread:
|
||||||
self._health_thread.join(timeout=2.0)
|
self._health_thread.join(timeout=2.0)
|
||||||
|
|
||||||
|
if self._drain_thread:
|
||||||
|
self._drain_thread.join(timeout=2.0)
|
||||||
|
|
||||||
self._finalize_session()
|
self._finalize_session()
|
||||||
logger.info(f"Streamer pour {self.session_id} arrêté")
|
logger.info(f"Streamer pour {self.session_id} arrêté")
|
||||||
|
|
||||||
@@ -126,11 +180,21 @@ class TraceStreamer:
|
|||||||
|
|
||||||
Quand la queue est pleine :
|
Quand la queue est pleine :
|
||||||
- Les événements prioritaires (click, key, action, screenshot) sont
|
- Les événements prioritaires (click, key, action, screenshot) sont
|
||||||
ajoutés en bloquant brièvement (0.5s)
|
ajoutés en bloquant brièvement (0.5s). Si toujours pleine → persistés
|
||||||
- Les heartbeat sont silencieusement droppés
|
dans le buffer SQLite pour rejeu ultérieur.
|
||||||
|
- Les heartbeat sont silencieusement droppés.
|
||||||
|
- Si le serveur est marqué indisponible, on persiste immédiatement les
|
||||||
|
items prioritaires (évite de remplir la queue inutilement).
|
||||||
"""
|
"""
|
||||||
is_priority = self._is_priority_item(item_type, data)
|
is_priority = self._is_priority_item(item_type, data)
|
||||||
|
|
||||||
|
# Serveur indisponible + item prioritaire → on persiste directement
|
||||||
|
# sans polluer la queue RAM (qui ne sera jamais vidée tant que le
|
||||||
|
# serveur est down).
|
||||||
|
if is_priority and not self._server_available:
|
||||||
|
self._persist_to_buffer(item_type, data)
|
||||||
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.queue.put_nowait((item_type, data))
|
self.queue.put_nowait((item_type, data))
|
||||||
except queue.Full:
|
except queue.Full:
|
||||||
@@ -139,9 +203,17 @@ class TraceStreamer:
|
|||||||
try:
|
try:
|
||||||
self.queue.put((item_type, data), timeout=0.5)
|
self.queue.put((item_type, data), timeout=0.5)
|
||||||
except queue.Full:
|
except queue.Full:
|
||||||
|
# Persistance disque (ne JAMAIS dropper un prioritaire)
|
||||||
|
persisted = self._persist_to_buffer(item_type, data)
|
||||||
|
if persisted:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Queue pleine — événement prioritaire droppé "
|
f"Queue pleine — événement prioritaire persisté "
|
||||||
f"(type={item_type})"
|
f"sur disque (type={item_type})"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.error(
|
||||||
|
f"Queue pleine ET buffer saturé — événement "
|
||||||
|
f"prioritaire perdu (type={item_type})"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Heartbeat ou événement non-critique : on drop silencieusement
|
# Heartbeat ou événement non-critique : on drop silencieusement
|
||||||
@@ -163,6 +235,23 @@ class TraceStreamer:
|
|||||||
return event_type in PRIORITY_EVENT_TYPES
|
return event_type in PRIORITY_EVENT_TYPES
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def _persist_to_buffer(self, item_type: str, data) -> bool:
|
||||||
|
"""Persiste un item dans le buffer SQLite. Retourne True si OK.
|
||||||
|
|
||||||
|
Utilisé quand la queue est pleine ou le serveur indisponible.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
buf = self._get_buffer()
|
||||||
|
if item_type == "event" and isinstance(data, dict):
|
||||||
|
return buf.add_event(self.session_id, data)
|
||||||
|
if item_type == "image":
|
||||||
|
path, shot_id = data
|
||||||
|
return buf.add_image(self.session_id, path, shot_id)
|
||||||
|
except Exception as e:
|
||||||
|
# On n'arrête jamais l'agent si le buffer échoue
|
||||||
|
logger.error(f"Persistance buffer échouée : {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# Boucle d'envoi
|
# Boucle d'envoi
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
@@ -174,16 +263,36 @@ class TraceStreamer:
|
|||||||
try:
|
try:
|
||||||
item_type, data = self.queue.get(timeout=0.5)
|
item_type, data = self.queue.get(timeout=0.5)
|
||||||
success = False
|
success = False
|
||||||
|
is_file_gone = False
|
||||||
if item_type == "event":
|
if item_type == "event":
|
||||||
success = self._send_with_retry(self._send_event, data)
|
success = self._send_with_retry(self._send_event, data)
|
||||||
elif item_type == "image":
|
elif item_type == "image":
|
||||||
success = self._send_with_retry(self._send_image, *data)
|
result = self._send_with_retry(self._send_image, *data)
|
||||||
|
# Fix P0-E : distinguer FILE_GONE du vrai succès HTTP.
|
||||||
|
if result is ImageSendResult.OK:
|
||||||
|
success = True
|
||||||
|
elif result is ImageSendResult.FILE_GONE:
|
||||||
|
# Fichier disparu : pas de retry, pas de persistance
|
||||||
|
# (on ne peut plus le renvoyer). On considère l'item
|
||||||
|
# comme traité sans comptabiliser un succès réseau.
|
||||||
|
is_file_gone = True
|
||||||
|
success = False
|
||||||
|
else:
|
||||||
|
success = False
|
||||||
self.queue.task_done()
|
self.queue.task_done()
|
||||||
|
|
||||||
if success:
|
if success:
|
||||||
consecutive_failures = 0
|
consecutive_failures = 0
|
||||||
|
elif is_file_gone:
|
||||||
|
# Fichier introuvable — déjà logué ERROR dans _send_image.
|
||||||
|
# On ne persiste PAS dans le buffer (retry voué à échouer).
|
||||||
|
consecutive_failures = 0
|
||||||
else:
|
else:
|
||||||
consecutive_failures += 1
|
consecutive_failures += 1
|
||||||
|
# Après 3 retries infructueux, si l'item est prioritaire,
|
||||||
|
# on le persiste pour ne pas le perdre définitivement.
|
||||||
|
if self._is_priority_item(item_type, data):
|
||||||
|
self._persist_to_buffer(item_type, data)
|
||||||
if consecutive_failures >= 10:
|
if consecutive_failures >= 10:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"10 échecs consécutifs — serveur marqué indisponible"
|
"10 échecs consécutifs — serveur marqué indisponible"
|
||||||
@@ -200,15 +309,22 @@ class TraceStreamer:
|
|||||||
# Retry avec backoff exponentiel
|
# Retry avec backoff exponentiel
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
||||||
def _send_with_retry(self, send_fn, *args) -> bool:
|
def _send_with_retry(self, send_fn, *args):
|
||||||
"""Tente l'envoi avec retry et backoff exponentiel.
|
"""Tente l'envoi avec retry et backoff exponentiel.
|
||||||
|
|
||||||
3 tentatives max avec délais de 1s, 2s, 4s entre chaque.
|
3 tentatives max avec délais de 1s, 2s, 4s entre chaque.
|
||||||
Retourne True si l'envoi a réussi, False sinon.
|
Retourne :
|
||||||
|
- True / ImageSendResult.OK si l'envoi a réussi
|
||||||
|
- ImageSendResult.FILE_GONE (images uniquement) — pas de retry
|
||||||
|
- False / ImageSendResult.FAILED sinon
|
||||||
"""
|
"""
|
||||||
# Première tentative (sans délai)
|
# Première tentative (sans délai)
|
||||||
if send_fn(*args):
|
first = send_fn(*args)
|
||||||
return True
|
if first is ImageSendResult.OK or first is True:
|
||||||
|
return first
|
||||||
|
# Fix P0-E : FILE_GONE → pas de retry, l'erreur est permanente.
|
||||||
|
if first is ImageSendResult.FILE_GONE:
|
||||||
|
return first
|
||||||
|
|
||||||
# Retries avec backoff
|
# Retries avec backoff
|
||||||
for attempt, delay in enumerate(RETRY_DELAYS, start=1):
|
for attempt, delay in enumerate(RETRY_DELAYS, start=1):
|
||||||
@@ -219,9 +335,13 @@ class TraceStreamer:
|
|||||||
f"Retry {attempt}/{MAX_RETRIES} dans {delay}s..."
|
f"Retry {attempt}/{MAX_RETRIES} dans {delay}s..."
|
||||||
)
|
)
|
||||||
time.sleep(delay)
|
time.sleep(delay)
|
||||||
if send_fn(*args):
|
result = send_fn(*args)
|
||||||
|
if result is ImageSendResult.OK or result is True:
|
||||||
logger.debug(f"Retry {attempt} réussi")
|
logger.debug(f"Retry {attempt} réussi")
|
||||||
return True
|
return result
|
||||||
|
# FILE_GONE pendant un retry — idem, on arrête
|
||||||
|
if result is ImageSendResult.FILE_GONE:
|
||||||
|
return result
|
||||||
|
|
||||||
logger.debug(f"Envoi échoué après {MAX_RETRIES} retries")
|
logger.debug(f"Envoi échoué après {MAX_RETRIES} retries")
|
||||||
return False
|
return False
|
||||||
@@ -260,6 +380,115 @@ class TraceStreamer:
|
|||||||
except Exception:
|
except Exception:
|
||||||
logger.debug("Health-check échoué — serveur toujours indisponible")
|
logger.debug("Health-check échoué — serveur toujours indisponible")
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Drain du buffer persistant (Partie B)
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
def _buffer_drain_loop(self):
|
||||||
|
"""Rejoue les items persistés en arrière-plan.
|
||||||
|
|
||||||
|
Tourne tant que self.running. Essaie de drainer le buffer toutes les
|
||||||
|
BUFFER_DRAIN_INTERVAL_S secondes, mais seulement si :
|
||||||
|
- le serveur est disponible,
|
||||||
|
- il y a effectivement des items en attente.
|
||||||
|
|
||||||
|
Au premier passage (démarrage agent), on draine immédiatement pour
|
||||||
|
rejouer tout ce qui a été persisté lors de la session précédente.
|
||||||
|
"""
|
||||||
|
# Au démarrage : drain immédiat (pas d'attente)
|
||||||
|
first_pass = True
|
||||||
|
while self.running:
|
||||||
|
if not first_pass:
|
||||||
|
time.sleep(BUFFER_DRAIN_INTERVAL_S)
|
||||||
|
if not self.running:
|
||||||
|
break
|
||||||
|
first_pass = False
|
||||||
|
|
||||||
|
if not self._server_available:
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
buf = self._get_buffer()
|
||||||
|
# Abandonner d'abord les items exceeded (évite de les retenter)
|
||||||
|
abandoned = buf.abandon_exceeded()
|
||||||
|
if abandoned:
|
||||||
|
logger.warning(
|
||||||
|
f"Buffer : {abandoned} items abandonnés "
|
||||||
|
f"après {MAX_ATTEMPTS} tentatives"
|
||||||
|
)
|
||||||
|
|
||||||
|
counts = buf.counts()
|
||||||
|
if counts["events"] == 0 and counts["images"] == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Buffer drain : {counts['events']} events, "
|
||||||
|
f"{counts['images']} images en attente — rejeu"
|
||||||
|
)
|
||||||
|
self._drain_buffer_once(buf)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Buffer drain loop échoué : {e}")
|
||||||
|
|
||||||
|
def _drain_buffer_once(self, buf: PersistentBuffer):
|
||||||
|
"""Une passe de drain : envoie ce qui peut l'être, incrémente le reste.
|
||||||
|
|
||||||
|
On arrête dès qu'un envoi échoue (serveur probablement down).
|
||||||
|
"""
|
||||||
|
# Events d'abord (plus légers, priorité métier AI Act)
|
||||||
|
for row in buf.drain_events(limit=50):
|
||||||
|
if not self._server_available:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
import json as _json
|
||||||
|
event = _json.loads(row["payload"])
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
logger.error(
|
||||||
|
f"Buffer : payload event #{row['id']} corrompu, suppression"
|
||||||
|
)
|
||||||
|
buf.delete_event(row["id"])
|
||||||
|
continue
|
||||||
|
if self._send_event(event):
|
||||||
|
buf.delete_event(row["id"])
|
||||||
|
else:
|
||||||
|
buf.increment_attempts(row["id"], "event")
|
||||||
|
# Serveur répond mal — on arrête la passe
|
||||||
|
return
|
||||||
|
|
||||||
|
# Puis images
|
||||||
|
for row in buf.drain_images(limit=20):
|
||||||
|
if not self._server_available:
|
||||||
|
return
|
||||||
|
image_path = row["image_path"]
|
||||||
|
shot_id = row["shot_id"]
|
||||||
|
if not os.path.exists(image_path):
|
||||||
|
# Fichier local disparu (purge, clean-up) — on abandonne.
|
||||||
|
# Fix P0-E : log ERROR (pas warning) — c'est une perte de donnée.
|
||||||
|
logger.error(
|
||||||
|
f"Buffer : image #{row['id']} introuvable sur disque "
|
||||||
|
f"({image_path}) — entrée abandonnée (le serveur n'a "
|
||||||
|
f"jamais reçu cette image, session={row['session_id']}, "
|
||||||
|
f"shot={shot_id})"
|
||||||
|
)
|
||||||
|
buf.delete_image(row["id"])
|
||||||
|
continue
|
||||||
|
result = self._send_image(image_path, shot_id)
|
||||||
|
if result is ImageSendResult.OK or result is True:
|
||||||
|
buf.delete_image(row["id"])
|
||||||
|
elif result is ImageSendResult.FILE_GONE:
|
||||||
|
# Fix P0-E : fichier disparu pendant l'envoi.
|
||||||
|
# Ce n'est PAS un succès HTTP — ne pas considérer comme tel.
|
||||||
|
# On supprime néanmoins l'entrée (retry voué à échouer)
|
||||||
|
# mais avec un log ERROR explicite.
|
||||||
|
logger.error(
|
||||||
|
f"Buffer : image #{row['id']} disparue pendant l'envoi "
|
||||||
|
f"({image_path}) — entrée abandonnée, pas de retry "
|
||||||
|
f"(session={row['session_id']}, shot={shot_id})"
|
||||||
|
)
|
||||||
|
buf.delete_image(row["id"])
|
||||||
|
else:
|
||||||
|
buf.increment_attempts(row["id"], "image")
|
||||||
|
return
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# Compression JPEG
|
# Compression JPEG
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
@@ -287,6 +516,56 @@ class TraceStreamer:
|
|||||||
logger.warning(f"Compression JPEG échouée, envoi PNG brut: {e}")
|
logger.warning(f"Compression JPEG échouée, envoi PNG brut: {e}")
|
||||||
return None, None, None
|
return None, None, None
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Purge locale après ACK (Partie A)
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _purge_local_image(path: str):
|
||||||
|
"""Supprime un screenshot local après ACK 200 du serveur.
|
||||||
|
|
||||||
|
Ne crashe JAMAIS si le fichier est verrouillé (cas Windows) ou
|
||||||
|
déjà supprimé : on log en debug et on continue. L'auto-cleanup
|
||||||
|
de SessionStorage repassera plus tard.
|
||||||
|
"""
|
||||||
|
if not PURGE_AFTER_ACK:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
os.remove(path)
|
||||||
|
logger.debug(f"Screenshot local purgé après ACK : {path}")
|
||||||
|
except FileNotFoundError:
|
||||||
|
# Déjà supprimé ou chemin invalide — silencieux
|
||||||
|
pass
|
||||||
|
except PermissionError as e:
|
||||||
|
# Windows verrouille parfois les fichiers (antivirus, indexation...)
|
||||||
|
logger.debug(
|
||||||
|
f"Purge différée (fichier verrouillé) : {path} — {e}"
|
||||||
|
)
|
||||||
|
except OSError as e:
|
||||||
|
logger.debug(f"Purge échouée : {path} — {e}")
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Protection redirect POST→GET (INC-7)
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _check_redirect(resp, url: str):
|
||||||
|
"""Detecter et logger une redirection sur un POST.
|
||||||
|
|
||||||
|
La lib requests transforme un POST en GET sur 301/302 (RFC 7231).
|
||||||
|
Avec allow_redirects=False, on recoit le 301/302 directement.
|
||||||
|
On log un WARNING explicite pour que l'admin corrige l'URL.
|
||||||
|
"""
|
||||||
|
if resp.status_code in (301, 302, 307, 308):
|
||||||
|
location = resp.headers.get("Location", "?")
|
||||||
|
logger.warning(
|
||||||
|
f"Redirection {resp.status_code} detectee sur POST {url} "
|
||||||
|
f"→ {location}. Verifiez que RPA_SERVER_URL utilise "
|
||||||
|
f"https:// si le serveur redirige."
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# Envois HTTP
|
# Envois HTTP
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
@@ -294,15 +573,20 @@ class TraceStreamer:
|
|||||||
def _register_session(self):
|
def _register_session(self):
|
||||||
"""Enregistrer la session auprès du serveur (avec identifiant machine)."""
|
"""Enregistrer la session auprès du serveur (avec identifiant machine)."""
|
||||||
try:
|
try:
|
||||||
|
url = f"{STREAMING_ENDPOINT}/register"
|
||||||
resp = requests.post(
|
resp = requests.post(
|
||||||
f"{STREAMING_ENDPOINT}/register",
|
url,
|
||||||
params={
|
params={
|
||||||
"session_id": self.session_id,
|
"session_id": self.session_id,
|
||||||
"machine_id": self.machine_id,
|
"machine_id": self.machine_id,
|
||||||
},
|
},
|
||||||
headers=self._auth_headers(),
|
headers=self._auth_headers(),
|
||||||
timeout=3,
|
timeout=3,
|
||||||
|
allow_redirects=False,
|
||||||
)
|
)
|
||||||
|
if self._check_redirect(resp, url):
|
||||||
|
logger.warning("Enregistrement session échoué (redirect)")
|
||||||
|
return
|
||||||
if resp.ok:
|
if resp.ok:
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Session {self.session_id} enregistrée sur le serveur "
|
f"Session {self.session_id} enregistrée sur le serveur "
|
||||||
@@ -322,28 +606,32 @@ class TraceStreamer:
|
|||||||
C'est la dernière chance de sauver les données de la session.
|
C'est la dernière chance de sauver les données de la session.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
url = f"{STREAMING_ENDPOINT}/finalize"
|
||||||
resp = requests.post(
|
resp = requests.post(
|
||||||
f"{STREAMING_ENDPOINT}/finalize",
|
url,
|
||||||
params={
|
params={
|
||||||
"session_id": self.session_id,
|
"session_id": self.session_id,
|
||||||
"machine_id": self.machine_id,
|
"machine_id": self.machine_id,
|
||||||
},
|
},
|
||||||
headers=self._auth_headers(),
|
headers=self._auth_headers(),
|
||||||
timeout=30, # Le build workflow peut prendre du temps
|
timeout=30, # Le build workflow peut prendre du temps
|
||||||
|
allow_redirects=False,
|
||||||
)
|
)
|
||||||
|
self._check_redirect(resp, url)
|
||||||
if resp.ok:
|
if resp.ok:
|
||||||
result = resp.json()
|
result = resp.json()
|
||||||
logger.info(f"Session finalisée: {result}")
|
logger.info(f"Session finalisée: {result}")
|
||||||
else:
|
else:
|
||||||
logger.warning(f"Finalisation échouée: {resp.status_code}")
|
logger.warning(f"Finalisation échouée: {resp.status_code}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug(f"Finalisation échouée: {e}")
|
logger.warning(f"Finalisation échouée: {e}")
|
||||||
|
|
||||||
def _send_event(self, event: dict) -> bool:
|
def _send_event(self, event: dict) -> bool:
|
||||||
"""Envoyer un événement au serveur (avec identifiant machine)."""
|
"""Envoyer un événement au serveur (avec identifiant machine)."""
|
||||||
if not self._server_available:
|
if not self._server_available:
|
||||||
return False
|
return False
|
||||||
try:
|
try:
|
||||||
|
url = f"{STREAMING_ENDPOINT}/event"
|
||||||
payload = {
|
payload = {
|
||||||
"session_id": self.session_id,
|
"session_id": self.session_id,
|
||||||
"timestamp": time.time(),
|
"timestamp": time.time(),
|
||||||
@@ -351,24 +639,36 @@ class TraceStreamer:
|
|||||||
"machine_id": self.machine_id,
|
"machine_id": self.machine_id,
|
||||||
}
|
}
|
||||||
resp = requests.post(
|
resp = requests.post(
|
||||||
f"{STREAMING_ENDPOINT}/event",
|
url,
|
||||||
json=payload,
|
json=payload,
|
||||||
headers=self._auth_headers(),
|
headers=self._auth_headers(),
|
||||||
timeout=2,
|
timeout=2,
|
||||||
|
allow_redirects=False,
|
||||||
)
|
)
|
||||||
|
if self._check_redirect(resp, url):
|
||||||
|
return False
|
||||||
return resp.ok
|
return resp.ok
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug(f"Streaming Event échoué: {e}")
|
logger.debug(f"Streaming Event échoué: {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _send_image(self, path: str, shot_id: str) -> bool:
|
def _send_image(self, path: str, shot_id: str):
|
||||||
"""Envoyer un screenshot au serveur, compressé en JPEG.
|
"""Envoyer un screenshot au serveur, compressé en JPEG.
|
||||||
|
|
||||||
Utilise un context manager pour le fallback PNG afin d'éviter
|
Utilise un context manager pour le fallback PNG afin d'éviter
|
||||||
les fuites de descripteurs de fichier.
|
les fuites de descripteurs de fichier.
|
||||||
|
|
||||||
|
Partie A (purge après ACK) : en cas de HTTP 200 confirmé, le fichier
|
||||||
|
local est supprimé (le serveur devient la source de vérité).
|
||||||
|
|
||||||
|
Fix P0-E : retourne `ImageSendResult` (OK / FAILED / FILE_GONE).
|
||||||
|
Les appelants historiques qui attendaient un bool continuent de
|
||||||
|
fonctionner grâce à la truthiness du enum (OK → True, reste → False),
|
||||||
|
MAIS le drain du buffer doit désormais discriminer FILE_GONE pour
|
||||||
|
ne pas confondre "fichier disparu" avec "envoyé avec succès".
|
||||||
"""
|
"""
|
||||||
if not self._server_available:
|
if not self._server_available:
|
||||||
return False
|
return ImageSendResult.FAILED
|
||||||
try:
|
try:
|
||||||
# Tenter la compression JPEG (réduction ~5-10x vs PNG)
|
# Tenter la compression JPEG (réduction ~5-10x vs PNG)
|
||||||
jpeg_buf, content_type, suffix = self._compress_image_to_jpeg(path)
|
jpeg_buf, content_type, suffix = self._compress_image_to_jpeg(path)
|
||||||
@@ -379,19 +679,26 @@ class TraceStreamer:
|
|||||||
"machine_id": self.machine_id,
|
"machine_id": self.machine_id,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
url = f"{STREAMING_ENDPOINT}/image"
|
||||||
if jpeg_buf is not None:
|
if jpeg_buf is not None:
|
||||||
# Envoi du JPEG compressé (BytesIO, pas de fuite possible)
|
# Envoi du JPEG compressé (BytesIO, pas de fuite possible)
|
||||||
files = {
|
files = {
|
||||||
"file": (f"{shot_id}{suffix}", jpeg_buf, content_type)
|
"file": (f"{shot_id}{suffix}", jpeg_buf, content_type)
|
||||||
}
|
}
|
||||||
resp = requests.post(
|
resp = requests.post(
|
||||||
f"{STREAMING_ENDPOINT}/image",
|
url,
|
||||||
files=files,
|
files=files,
|
||||||
params=params,
|
params=params,
|
||||||
headers=self._auth_headers(),
|
headers=self._auth_headers(),
|
||||||
timeout=5,
|
timeout=5,
|
||||||
|
allow_redirects=False,
|
||||||
)
|
)
|
||||||
return resp.ok
|
if self._check_redirect(resp, url):
|
||||||
|
return ImageSendResult.FAILED
|
||||||
|
if resp.ok:
|
||||||
|
self._purge_local_image(path)
|
||||||
|
return ImageSendResult.OK
|
||||||
|
return ImageSendResult.FAILED
|
||||||
else:
|
else:
|
||||||
# Fallback : envoi PNG original avec context manager
|
# Fallback : envoi PNG original avec context manager
|
||||||
with open(path, "rb") as f:
|
with open(path, "rb") as f:
|
||||||
@@ -399,13 +706,29 @@ class TraceStreamer:
|
|||||||
"file": (f"{shot_id}.png", f, "image/png")
|
"file": (f"{shot_id}.png", f, "image/png")
|
||||||
}
|
}
|
||||||
resp = requests.post(
|
resp = requests.post(
|
||||||
f"{STREAMING_ENDPOINT}/image",
|
url,
|
||||||
files=files,
|
files=files,
|
||||||
params=params,
|
params=params,
|
||||||
headers=self._auth_headers(),
|
headers=self._auth_headers(),
|
||||||
timeout=5,
|
timeout=5,
|
||||||
|
allow_redirects=False,
|
||||||
)
|
)
|
||||||
return resp.ok
|
if self._check_redirect(resp, url):
|
||||||
|
return ImageSendResult.FAILED
|
||||||
|
if resp.ok:
|
||||||
|
self._purge_local_image(path)
|
||||||
|
return ImageSendResult.OK
|
||||||
|
return ImageSendResult.FAILED
|
||||||
|
except FileNotFoundError:
|
||||||
|
# Fix P0-E : fichier local disparu. On NE doit PAS considérer ça
|
||||||
|
# comme un succès HTTP 200. Le serveur n'a rien reçu. On signale
|
||||||
|
# `FILE_GONE` pour que le drain du buffer supprime l'entrée
|
||||||
|
# (pas de retry possible) tout en loguant ERROR (pas debug).
|
||||||
|
logger.error(
|
||||||
|
f"Image {shot_id} introuvable sur disque ({path}) — "
|
||||||
|
f"abandon (serveur n'a rien reçu)"
|
||||||
|
)
|
||||||
|
return ImageSendResult.FILE_GONE
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug(f"Streaming Image échoué: {e}")
|
logger.debug(f"Streaming Image échoué: {e}")
|
||||||
return False
|
return ImageSendResult.FAILED
|
||||||
|
|||||||
418
agent_v0/agent_v1/ui/activity_panel.py
Normal file
418
agent_v0/agent_v1/ui/activity_panel.py
Normal file
@@ -0,0 +1,418 @@
|
|||||||
|
# agent_v1/ui/activity_panel.py
|
||||||
|
"""
|
||||||
|
Panel d'activité temps réel de Léa.
|
||||||
|
|
||||||
|
Affiche à l'utilisateur ce que Léa fait *maintenant* :
|
||||||
|
- État courant (Observe / Cherche / Agit / Vérifie / Bloquée)
|
||||||
|
- Action en cours (ex: "Clic sur Rechercher")
|
||||||
|
- Progression (ex: "3/15")
|
||||||
|
- Temps écoulé depuis le début du workflow
|
||||||
|
|
||||||
|
Contraintes :
|
||||||
|
- Fallback silencieux si tkinter absent (ne crash jamais)
|
||||||
|
- Thread-safe (mises à jour depuis les threads de replay)
|
||||||
|
- Pas de dépendance à PyQt5 (seulement tkinter, déjà utilisé par chat_window)
|
||||||
|
|
||||||
|
Utilisation :
|
||||||
|
panel = ActivityPanel()
|
||||||
|
panel.definir_workflow("Saisie patient", nb_etapes=15)
|
||||||
|
panel.mettre_a_jour(etat=EtatLea.AGIT, action="Clic sur Valider", etape=3)
|
||||||
|
panel.masquer()
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class EtatLea(Enum):
|
||||||
|
"""États macroscopiques de Léa pendant un replay."""
|
||||||
|
|
||||||
|
INACTIVE = ("inactive", "Prête", "#808080") # Gris
|
||||||
|
OBSERVE = ("observe", "Observe", "#4A90E2") # Bleu
|
||||||
|
CHERCHE = ("cherche", "Cherche", "#F5A623") # Orange
|
||||||
|
AGIT = ("agit", "Agit", "#7ED321") # Vert
|
||||||
|
VERIFIE = ("verifie", "Vérifie", "#9013FE") # Violet
|
||||||
|
BLOQUEE = ("bloquee", "Bloquée", "#D0021B") # Rouge
|
||||||
|
TERMINE = ("termine", "Terminé", "#50E3C2") # Turquoise
|
||||||
|
|
||||||
|
def __init__(self, code: str, libelle: str, couleur: str) -> None:
|
||||||
|
self.code = code
|
||||||
|
self.libelle = libelle
|
||||||
|
self.couleur = couleur
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class EtatActivite:
|
||||||
|
"""Instantané de l'activité courante de Léa.
|
||||||
|
|
||||||
|
Utilisé par le panel et exposé par `ActivityPanel.snapshot()` pour les
|
||||||
|
tests (sans dépendre de tkinter).
|
||||||
|
"""
|
||||||
|
|
||||||
|
etat: EtatLea = EtatLea.INACTIVE
|
||||||
|
action_courante: str = ""
|
||||||
|
nom_workflow: str = ""
|
||||||
|
etape: int = 0
|
||||||
|
nb_etapes: int = 0
|
||||||
|
debut_timestamp: float = 0.0
|
||||||
|
dernier_message: str = ""
|
||||||
|
|
||||||
|
def temps_ecoule_s(self) -> float:
|
||||||
|
"""Temps écoulé depuis le début du workflow (secondes)."""
|
||||||
|
if self.debut_timestamp <= 0:
|
||||||
|
return 0.0
|
||||||
|
return max(0.0, time.time() - self.debut_timestamp)
|
||||||
|
|
||||||
|
def progression_texte(self) -> str:
|
||||||
|
"""Représentation textuelle de la progression (ex: '3/15')."""
|
||||||
|
if self.nb_etapes <= 0:
|
||||||
|
return ""
|
||||||
|
return f"{self.etape}/{self.nb_etapes}"
|
||||||
|
|
||||||
|
def temps_ecoule_texte(self) -> str:
|
||||||
|
"""Représentation humaine du temps écoulé (ex: '12s', '1m24s')."""
|
||||||
|
s = int(self.temps_ecoule_s())
|
||||||
|
if s < 60:
|
||||||
|
return f"{s}s"
|
||||||
|
return f"{s // 60}m{s % 60:02d}s"
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
"""Sérialiser pour le logging et les tests."""
|
||||||
|
return {
|
||||||
|
"etat": self.etat.code,
|
||||||
|
"etat_libelle": self.etat.libelle,
|
||||||
|
"action_courante": self.action_courante,
|
||||||
|
"nom_workflow": self.nom_workflow,
|
||||||
|
"etape": self.etape,
|
||||||
|
"nb_etapes": self.nb_etapes,
|
||||||
|
"progression": self.progression_texte(),
|
||||||
|
"temps_ecoule_s": round(self.temps_ecoule_s(), 1),
|
||||||
|
"dernier_message": self.dernier_message,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ActivityPanel:
|
||||||
|
"""Panel d'activité de Léa.
|
||||||
|
|
||||||
|
Thread-safe. Le panel tkinter est créé à la demande (lazy) et uniquement
|
||||||
|
si tkinter est disponible. Toutes les méthodes sont safe à appeler même
|
||||||
|
si l'UI n'est pas dispo (fallback silencieux).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, activer_ui: bool = True) -> None:
|
||||||
|
self._lock = threading.RLock()
|
||||||
|
self._etat = EtatActivite()
|
||||||
|
self._activer_ui = activer_ui
|
||||||
|
# UI tkinter (créée à la demande dans le thread UI)
|
||||||
|
self._tk_root = None
|
||||||
|
self._tk_labels: dict = {}
|
||||||
|
self._ui_disponible = None # Lazy : résolu au premier usage
|
||||||
|
self._listeners = [] # Callbacks pour les changements d'état
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# API publique (thread-safe)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def definir_workflow(self, nom: str, nb_etapes: int = 0) -> None:
|
||||||
|
"""Démarrer le suivi d'un nouveau workflow."""
|
||||||
|
with self._lock:
|
||||||
|
self._etat = EtatActivite(
|
||||||
|
etat=EtatLea.OBSERVE,
|
||||||
|
nom_workflow=nom,
|
||||||
|
nb_etapes=nb_etapes,
|
||||||
|
debut_timestamp=time.time(),
|
||||||
|
)
|
||||||
|
self._notifier_changement()
|
||||||
|
self._rafraichir_ui()
|
||||||
|
logger.info(f"[ACTIVITY] Workflow démarré : {nom} ({nb_etapes} étapes)")
|
||||||
|
|
||||||
|
def mettre_a_jour(
|
||||||
|
self,
|
||||||
|
etat: Optional[EtatLea] = None,
|
||||||
|
action: Optional[str] = None,
|
||||||
|
etape: Optional[int] = None,
|
||||||
|
message: Optional[str] = None,
|
||||||
|
) -> None:
|
||||||
|
"""Mettre à jour l'état affiché.
|
||||||
|
|
||||||
|
Tous les paramètres sont optionnels — on ne met à jour que ce qui est
|
||||||
|
fourni. Les autres champs conservent leur valeur actuelle.
|
||||||
|
"""
|
||||||
|
with self._lock:
|
||||||
|
if etat is not None:
|
||||||
|
self._etat.etat = etat
|
||||||
|
if action is not None:
|
||||||
|
self._etat.action_courante = action
|
||||||
|
if etape is not None:
|
||||||
|
self._etat.etape = etape
|
||||||
|
if message is not None:
|
||||||
|
self._etat.dernier_message = message
|
||||||
|
|
||||||
|
self._notifier_changement()
|
||||||
|
self._rafraichir_ui()
|
||||||
|
|
||||||
|
def terminer(self, succes: bool = True) -> None:
|
||||||
|
"""Marquer le workflow comme terminé."""
|
||||||
|
with self._lock:
|
||||||
|
self._etat.etat = EtatLea.TERMINE if succes else EtatLea.BLOQUEE
|
||||||
|
if not succes:
|
||||||
|
self._etat.dernier_message = (
|
||||||
|
self._etat.dernier_message or "Léa a rendu la main"
|
||||||
|
)
|
||||||
|
self._notifier_changement()
|
||||||
|
self._rafraichir_ui()
|
||||||
|
|
||||||
|
def reinitialiser(self) -> None:
|
||||||
|
"""Remettre le panel en état inactif."""
|
||||||
|
with self._lock:
|
||||||
|
self._etat = EtatActivite()
|
||||||
|
self._notifier_changement()
|
||||||
|
self._rafraichir_ui()
|
||||||
|
|
||||||
|
def snapshot(self) -> EtatActivite:
|
||||||
|
"""Obtenir un instantané immuable de l'état courant (pour les tests)."""
|
||||||
|
with self._lock:
|
||||||
|
return EtatActivite(
|
||||||
|
etat=self._etat.etat,
|
||||||
|
action_courante=self._etat.action_courante,
|
||||||
|
nom_workflow=self._etat.nom_workflow,
|
||||||
|
etape=self._etat.etape,
|
||||||
|
nb_etapes=self._etat.nb_etapes,
|
||||||
|
debut_timestamp=self._etat.debut_timestamp,
|
||||||
|
dernier_message=self._etat.dernier_message,
|
||||||
|
)
|
||||||
|
|
||||||
|
def masquer(self) -> None:
|
||||||
|
"""Masquer le panel UI si affiché."""
|
||||||
|
if self._tk_root is not None:
|
||||||
|
try:
|
||||||
|
self._tk_root.withdraw()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def afficher(self) -> None:
|
||||||
|
"""Afficher le panel UI si disponible."""
|
||||||
|
self._creer_ui_si_besoin()
|
||||||
|
if self._tk_root is not None:
|
||||||
|
try:
|
||||||
|
self._tk_root.deiconify()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def on_change(self, callback) -> None:
|
||||||
|
"""Enregistrer un listener appelé à chaque changement d'état."""
|
||||||
|
with self._lock:
|
||||||
|
self._listeners.append(callback)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Gestion UI tkinter (lazy, fallback silencieux)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _creer_ui_si_besoin(self) -> None:
|
||||||
|
"""Créer la fenêtre tkinter au premier usage (lazy)."""
|
||||||
|
if not self._activer_ui:
|
||||||
|
return
|
||||||
|
if self._tk_root is not None:
|
||||||
|
return
|
||||||
|
if self._ui_disponible is False:
|
||||||
|
return # Déjà testé et indisponible
|
||||||
|
|
||||||
|
try:
|
||||||
|
import tkinter as tk
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"[ACTIVITY] tkinter indisponible : {e}")
|
||||||
|
self._ui_disponible = False
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
self._tk_root = tk.Toplevel() if _tk_root_existe() else tk.Tk()
|
||||||
|
self._tk_root.title("Léa — Activité")
|
||||||
|
self._tk_root.geometry("340x180+40+40")
|
||||||
|
self._tk_root.attributes("-topmost", True)
|
||||||
|
self._tk_root.resizable(False, False)
|
||||||
|
self._tk_root.configure(bg="#1E1E1E")
|
||||||
|
|
||||||
|
titre = tk.Label(
|
||||||
|
self._tk_root,
|
||||||
|
text="Léa",
|
||||||
|
font=("Segoe UI", 14, "bold"),
|
||||||
|
fg="#FFFFFF",
|
||||||
|
bg="#1E1E1E",
|
||||||
|
)
|
||||||
|
titre.pack(pady=(10, 2))
|
||||||
|
|
||||||
|
self._tk_labels["etat"] = tk.Label(
|
||||||
|
self._tk_root,
|
||||||
|
text="Prête",
|
||||||
|
font=("Segoe UI", 11),
|
||||||
|
fg="#808080",
|
||||||
|
bg="#1E1E1E",
|
||||||
|
)
|
||||||
|
self._tk_labels["etat"].pack()
|
||||||
|
|
||||||
|
self._tk_labels["action"] = tk.Label(
|
||||||
|
self._tk_root,
|
||||||
|
text="",
|
||||||
|
font=("Segoe UI", 10),
|
||||||
|
fg="#FFFFFF",
|
||||||
|
bg="#1E1E1E",
|
||||||
|
wraplength=300,
|
||||||
|
)
|
||||||
|
self._tk_labels["action"].pack(pady=(8, 2))
|
||||||
|
|
||||||
|
self._tk_labels["progression"] = tk.Label(
|
||||||
|
self._tk_root,
|
||||||
|
text="",
|
||||||
|
font=("Segoe UI", 9),
|
||||||
|
fg="#B0B0B0",
|
||||||
|
bg="#1E1E1E",
|
||||||
|
)
|
||||||
|
self._tk_labels["progression"].pack()
|
||||||
|
|
||||||
|
self._tk_labels["temps"] = tk.Label(
|
||||||
|
self._tk_root,
|
||||||
|
text="",
|
||||||
|
font=("Segoe UI", 9),
|
||||||
|
fg="#808080",
|
||||||
|
bg="#1E1E1E",
|
||||||
|
)
|
||||||
|
self._tk_labels["temps"].pack(pady=(4, 0))
|
||||||
|
|
||||||
|
self._tk_labels["message"] = tk.Label(
|
||||||
|
self._tk_root,
|
||||||
|
text="",
|
||||||
|
font=("Segoe UI", 9, "italic"),
|
||||||
|
fg="#B0B0B0",
|
||||||
|
bg="#1E1E1E",
|
||||||
|
wraplength=300,
|
||||||
|
)
|
||||||
|
self._tk_labels["message"].pack(pady=(6, 10))
|
||||||
|
|
||||||
|
# Masquer par défaut : on affiche seulement pendant un workflow
|
||||||
|
self._tk_root.withdraw()
|
||||||
|
self._ui_disponible = True
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"[ACTIVITY] Impossible de créer l'UI : {e}")
|
||||||
|
self._ui_disponible = False
|
||||||
|
self._tk_root = None
|
||||||
|
|
||||||
|
def _rafraichir_ui(self) -> None:
|
||||||
|
"""Mettre à jour les labels tkinter (safe si l'UI n'existe pas)."""
|
||||||
|
if not self._activer_ui or self._ui_disponible is False:
|
||||||
|
return
|
||||||
|
self._creer_ui_si_besoin()
|
||||||
|
if self._tk_root is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
with self._lock:
|
||||||
|
snap = self.snapshot()
|
||||||
|
|
||||||
|
# Utiliser after(0) pour rester dans le thread UI tkinter
|
||||||
|
def _update():
|
||||||
|
try:
|
||||||
|
self._tk_labels["etat"].config(
|
||||||
|
text=snap.etat.libelle,
|
||||||
|
fg=snap.etat.couleur,
|
||||||
|
)
|
||||||
|
if snap.action_courante:
|
||||||
|
self._tk_labels["action"].config(text=snap.action_courante)
|
||||||
|
else:
|
||||||
|
self._tk_labels["action"].config(text="")
|
||||||
|
|
||||||
|
prog = snap.progression_texte()
|
||||||
|
if prog and snap.nom_workflow:
|
||||||
|
self._tk_labels["progression"].config(
|
||||||
|
text=f"« {snap.nom_workflow} » — {prog}"
|
||||||
|
)
|
||||||
|
elif snap.nom_workflow:
|
||||||
|
self._tk_labels["progression"].config(
|
||||||
|
text=f"« {snap.nom_workflow} »"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self._tk_labels["progression"].config(text="")
|
||||||
|
|
||||||
|
if snap.debut_timestamp > 0:
|
||||||
|
self._tk_labels["temps"].config(
|
||||||
|
text=f"⏱ {snap.temps_ecoule_texte()}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self._tk_labels["temps"].config(text="")
|
||||||
|
|
||||||
|
self._tk_labels["message"].config(text=snap.dernier_message)
|
||||||
|
|
||||||
|
# Afficher automatiquement si actif
|
||||||
|
if snap.etat != EtatLea.INACTIVE:
|
||||||
|
self._tk_root.deiconify()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
self._tk_root.after(0, _update)
|
||||||
|
except Exception:
|
||||||
|
# Si le root a été détruit
|
||||||
|
self._tk_root = None
|
||||||
|
self._ui_disponible = False
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"[ACTIVITY] Erreur rafraîchissement UI : {e}")
|
||||||
|
|
||||||
|
def _notifier_changement(self) -> None:
|
||||||
|
"""Notifier tous les listeners du changement d'état."""
|
||||||
|
with self._lock:
|
||||||
|
listeners = list(self._listeners)
|
||||||
|
snap = self.snapshot()
|
||||||
|
|
||||||
|
for cb in listeners:
|
||||||
|
try:
|
||||||
|
cb(snap)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"[ACTIVITY] Listener erreur : {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def _tk_root_existe() -> bool:
|
||||||
|
"""Vérifier si un root tkinter existe déjà (pour créer un Toplevel)."""
|
||||||
|
try:
|
||||||
|
import tkinter as tk
|
||||||
|
|
||||||
|
default_root = getattr(tk, "_default_root", None)
|
||||||
|
return default_root is not None
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Singleton global (optionnel)
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
_INSTANCE_GLOBALE: Optional[ActivityPanel] = None
|
||||||
|
_LOCK_SINGLETON = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
|
def get_activity_panel(activer_ui: bool = True) -> ActivityPanel:
|
||||||
|
"""Obtenir l'instance globale du panel d'activité (lazy)."""
|
||||||
|
global _INSTANCE_GLOBALE
|
||||||
|
with _LOCK_SINGLETON:
|
||||||
|
if _INSTANCE_GLOBALE is None:
|
||||||
|
_INSTANCE_GLOBALE = ActivityPanel(activer_ui=activer_ui)
|
||||||
|
return _INSTANCE_GLOBALE
|
||||||
|
|
||||||
|
|
||||||
|
def reset_activity_panel() -> None:
|
||||||
|
"""Réinitialiser le singleton (utile pour les tests)."""
|
||||||
|
global _INSTANCE_GLOBALE
|
||||||
|
with _LOCK_SINGLETON:
|
||||||
|
if _INSTANCE_GLOBALE is not None:
|
||||||
|
try:
|
||||||
|
_INSTANCE_GLOBALE.masquer()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
_INSTANCE_GLOBALE = None
|
||||||
@@ -3,15 +3,25 @@ Mini serveur HTTP sur l'agent Windows pour les captures d'ecran a la demande
|
|||||||
et les operations fichiers.
|
et les operations fichiers.
|
||||||
|
|
||||||
Ecoute sur le port 5006 (configurable via RPA_CAPTURE_PORT).
|
Ecoute sur le port 5006 (configurable via RPA_CAPTURE_PORT).
|
||||||
|
Bind par defaut sur 127.0.0.1 (configurable via RPA_CAPTURE_BIND).
|
||||||
Endpoints :
|
Endpoints :
|
||||||
GET /capture -> screenshot frais en base64 (JPEG)
|
GET /capture -> screenshot frais en base64 (JPEG)
|
||||||
GET /health -> {"status": "ok"}
|
GET /health -> {"status": "ok"} (pas d'auth — sonde liveness)
|
||||||
POST /file-action -> operations fichiers (list, create, move, copy, sort)
|
POST /file-action -> operations fichiers (list, create, move, copy, sort)
|
||||||
|
|
||||||
|
Securite :
|
||||||
|
- Authentification Bearer obligatoire (RPA_API_TOKEN) pour /capture et
|
||||||
|
/file-action. Sans token configure, ces endpoints sont desactives.
|
||||||
|
- Les tentatives non authentifiees sont loguees (WARNING) avec l'IP source.
|
||||||
|
- Bind defaut localhost. Pour exposer sur le LAN (cas VWB backend qui
|
||||||
|
appelle l'agent a distance), definir explicitement
|
||||||
|
RPA_CAPTURE_BIND=0.0.0.0. L'auth reste alors la seule protection.
|
||||||
"""
|
"""
|
||||||
import threading
|
import threading
|
||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
import base64
|
import base64
|
||||||
|
import hmac
|
||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
@@ -20,6 +30,17 @@ from http.server import HTTPServer, BaseHTTPRequestHandler
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
CAPTURE_PORT = int(os.environ.get("RPA_CAPTURE_PORT", "5006"))
|
CAPTURE_PORT = int(os.environ.get("RPA_CAPTURE_PORT", "5006"))
|
||||||
|
# Bind par defaut sur localhost — defense en profondeur.
|
||||||
|
# Pour le deploiement VWB (backend Linux -> agent Windows), definir
|
||||||
|
# RPA_CAPTURE_BIND=0.0.0.0 explicitement. L'auth par token reste requise.
|
||||||
|
CAPTURE_BIND = os.environ.get("RPA_CAPTURE_BIND", "127.0.0.1")
|
||||||
|
|
||||||
|
# Token d'authentification (partage avec le streaming). Doit etre defini pour
|
||||||
|
# que /capture et /file-action soient accessibles.
|
||||||
|
CAPTURE_TOKEN = os.environ.get("RPA_API_TOKEN", "")
|
||||||
|
|
||||||
|
# Endpoints ouverts (pas d'auth requise — sondes techniques uniquement)
|
||||||
|
_PUBLIC_PATHS = {"/health"}
|
||||||
|
|
||||||
# Floutage des données sensibles (conformité AI Act)
|
# Floutage des données sensibles (conformité AI Act)
|
||||||
BLUR_SENSITIVE = os.environ.get("RPA_BLUR_SENSITIVE", "true").lower() in ("true", "1", "yes")
|
BLUR_SENSITIVE = os.environ.get("RPA_BLUR_SENSITIVE", "true").lower() in ("true", "1", "yes")
|
||||||
@@ -33,6 +54,8 @@ class CaptureHandler(BaseHTTPRequestHandler):
|
|||||||
|
|
||||||
def do_GET(self):
|
def do_GET(self):
|
||||||
if self.path == "/capture":
|
if self.path == "/capture":
|
||||||
|
if not self._check_auth():
|
||||||
|
return
|
||||||
self._handle_capture()
|
self._handle_capture()
|
||||||
elif self.path == "/health":
|
elif self.path == "/health":
|
||||||
self._send_json(200, {"status": "ok"})
|
self._send_json(200, {"status": "ok"})
|
||||||
@@ -41,10 +64,56 @@ class CaptureHandler(BaseHTTPRequestHandler):
|
|||||||
|
|
||||||
def do_POST(self):
|
def do_POST(self):
|
||||||
if self.path == "/file-action":
|
if self.path == "/file-action":
|
||||||
|
if not self._check_auth():
|
||||||
|
return
|
||||||
self._handle_file_action()
|
self._handle_file_action()
|
||||||
else:
|
else:
|
||||||
self._send_json(404, {"error": "not found"})
|
self._send_json(404, {"error": "not found"})
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _check_auth(self) -> bool:
|
||||||
|
"""Valide le Bearer token. Renvoie 401/503 si invalide.
|
||||||
|
|
||||||
|
- Si aucun token n'est configure cote serveur (RPA_API_TOKEN vide),
|
||||||
|
on refuse toutes les requetes sensibles (503) — fail-closed.
|
||||||
|
- Sinon, on compare en temps constant via hmac.compare_digest.
|
||||||
|
- Les tentatives echouees sont loguees avec l'IP source.
|
||||||
|
"""
|
||||||
|
# Autoriser les endpoints publics
|
||||||
|
if self.path in _PUBLIC_PATHS:
|
||||||
|
return True
|
||||||
|
|
||||||
|
peer = self.client_address[0] if self.client_address else "?"
|
||||||
|
|
||||||
|
if not CAPTURE_TOKEN:
|
||||||
|
logger.error(
|
||||||
|
"Refus %s depuis %s : RPA_API_TOKEN non configure "
|
||||||
|
"(capture server en mode fail-closed)",
|
||||||
|
self.path, peer,
|
||||||
|
)
|
||||||
|
self._send_json(503, {
|
||||||
|
"error": "capture server non configure (token manquant)",
|
||||||
|
})
|
||||||
|
return False
|
||||||
|
|
||||||
|
auth_header = self.headers.get("Authorization", "")
|
||||||
|
token = ""
|
||||||
|
if auth_header.startswith("Bearer "):
|
||||||
|
token = auth_header[len("Bearer "):].strip()
|
||||||
|
|
||||||
|
if not token or not hmac.compare_digest(token, CAPTURE_TOKEN):
|
||||||
|
logger.warning(
|
||||||
|
"Tentative d'acces non autorisee a %s depuis %s "
|
||||||
|
"(token %s)",
|
||||||
|
self.path, peer,
|
||||||
|
"absent" if not token else "invalide",
|
||||||
|
)
|
||||||
|
self._send_json(401, {"error": "unauthorized"})
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
def do_OPTIONS(self):
|
def do_OPTIONS(self):
|
||||||
"""Gestion CORS preflight."""
|
"""Gestion CORS preflight."""
|
||||||
self.send_response(200)
|
self.send_response(200)
|
||||||
@@ -351,21 +420,46 @@ class _FileActionHandlerLocal:
|
|||||||
class CaptureServer:
|
class CaptureServer:
|
||||||
"""Serveur de capture d'ecran en temps reel (thread daemon)."""
|
"""Serveur de capture d'ecran en temps reel (thread daemon)."""
|
||||||
|
|
||||||
def __init__(self, port: int = CAPTURE_PORT):
|
def __init__(self, port: int = CAPTURE_PORT, bind: str = CAPTURE_BIND):
|
||||||
self._port = port
|
self._port = port
|
||||||
|
self._bind = bind
|
||||||
self._server: HTTPServer | None = None
|
self._server: HTTPServer | None = None
|
||||||
self._thread: threading.Thread | None = None
|
self._thread: threading.Thread | None = None
|
||||||
|
|
||||||
def start(self):
|
def start(self):
|
||||||
"""Demarre le serveur dans un thread daemon."""
|
"""Demarre le serveur dans un thread daemon.
|
||||||
|
|
||||||
|
Avertit si le serveur est expose sur le LAN sans token configure.
|
||||||
|
"""
|
||||||
|
# Defense en profondeur : refus de demarrer si expose LAN sans auth
|
||||||
|
exposed_lan = self._bind not in ("127.0.0.1", "localhost", "::1")
|
||||||
|
if exposed_lan and not CAPTURE_TOKEN:
|
||||||
|
logger.error(
|
||||||
|
"REFUS demarrage capture server : bind=%s (LAN) sans "
|
||||||
|
"RPA_API_TOKEN. Definir le token ou RPA_CAPTURE_BIND=127.0.0.1.",
|
||||||
|
self._bind,
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
f"[CAPTURE] REFUS demarrage : bind={self._bind} sans token. "
|
||||||
|
f"Definir RPA_API_TOKEN ou RPA_CAPTURE_BIND=127.0.0.1."
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self._server = HTTPServer(("0.0.0.0", self._port), CaptureHandler)
|
self._server = HTTPServer((self._bind, self._port), CaptureHandler)
|
||||||
self._thread = threading.Thread(
|
self._thread = threading.Thread(
|
||||||
target=self._server.serve_forever, daemon=True
|
target=self._server.serve_forever, daemon=True
|
||||||
)
|
)
|
||||||
self._thread.start()
|
self._thread.start()
|
||||||
logger.info(f"Capture server demarre sur le port {self._port}")
|
auth_mode = "token requis" if CAPTURE_TOKEN else "token absent (fail-closed)"
|
||||||
print(f"[CAPTURE] Serveur de capture demarre sur le port {self._port}")
|
logger.info(
|
||||||
|
"Capture server demarre sur %s:%s (%s)",
|
||||||
|
self._bind, self._port, auth_mode,
|
||||||
|
)
|
||||||
|
print(
|
||||||
|
f"[CAPTURE] Serveur de capture demarre sur "
|
||||||
|
f"{self._bind}:{self._port} ({auth_mode})"
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Impossible de demarrer le capture server : {e}")
|
logger.error(f"Impossible de demarrer le capture server : {e}")
|
||||||
print(f"[CAPTURE] ERREUR demarrage : {e}")
|
print(f"[CAPTURE] ERREUR demarrage : {e}")
|
||||||
|
|||||||
655
agent_v0/agent_v1/ui/messages.py
Normal file
655
agent_v0/agent_v1/ui/messages.py
Normal file
@@ -0,0 +1,655 @@
|
|||||||
|
# agent_v1/ui/messages.py
|
||||||
|
"""
|
||||||
|
Formatage des messages utilisateur pour Léa.
|
||||||
|
|
||||||
|
Convertit les codes d'erreur techniques (`target_not_found`, `no_screen_change`...)
|
||||||
|
en phrases en français naturel, orientées action, adaptées à un utilisateur non
|
||||||
|
technique (secrétaire médicale, TIM).
|
||||||
|
|
||||||
|
Trois niveaux de sévérité sont définis :
|
||||||
|
- INFO — Léa fait son travail normalement
|
||||||
|
- ATTENTION — Quelque chose de léger (ralentissement, retry)
|
||||||
|
- BLOCAGE — Léa a besoin d'aide, elle rend la main
|
||||||
|
|
||||||
|
Le module est 100% pur (pas d'I/O, pas d'UI) : testable sans mocks lourds.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Any, Mapping, Optional
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
# Accès paresseux au DomainContext
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
#
|
||||||
|
# On importe le module à l'appel pour éviter toute dépendance circulaire
|
||||||
|
# avec `agent_v0.server_v1.domain_context` (qui ne doit pas importer l'UI).
|
||||||
|
# Si l'import échoue (contexte client sans server_v1), on retombe sur None
|
||||||
|
# et les formatters gardent leur comportement générique historique.
|
||||||
|
|
||||||
|
|
||||||
|
def _get_domain_ctx(domain_id: Optional[str]):
|
||||||
|
"""Récupérer un DomainContext si possible, sinon None (fallback)."""
|
||||||
|
if not domain_id:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
from agent_v0.server_v1.domain_context import get_domain_context # lazy
|
||||||
|
return get_domain_context(domain_id)
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _friendly_target(description: str, domain_id: Optional[str] = None) -> str:
|
||||||
|
"""Transformer une description technique en langage métier si possible.
|
||||||
|
|
||||||
|
Ex (tim_codage) : "DP" → "diagnostic principal"
|
||||||
|
Ex (comptabilite) : "TVA" → "montant de TVA"
|
||||||
|
Retombe sur la description nettoyée si aucun domaine ne matche.
|
||||||
|
"""
|
||||||
|
base = _nettoyer_description_cible(description)
|
||||||
|
ctx = _get_domain_ctx(domain_id)
|
||||||
|
if ctx is None or not base:
|
||||||
|
return base
|
||||||
|
try:
|
||||||
|
return ctx._apply_synonyms(base)
|
||||||
|
except Exception:
|
||||||
|
return base
|
||||||
|
|
||||||
|
|
||||||
|
class NiveauMessage(Enum):
|
||||||
|
"""Niveaux hiérarchiques des messages affichés à l'utilisateur."""
|
||||||
|
|
||||||
|
INFO = "info" # Fond vert clair, disparaît tout seul, 3-5s
|
||||||
|
ATTENTION = "attention" # Fond orange clair, disparaît tout seul, 7s
|
||||||
|
BLOCAGE = "blocage" # Fond rouge clair, reste affiché, 15s+
|
||||||
|
|
||||||
|
|
||||||
|
# Durée d'affichage par défaut (secondes), par niveau
|
||||||
|
DUREE_PAR_NIVEAU: dict[NiveauMessage, int] = {
|
||||||
|
NiveauMessage.INFO: 4,
|
||||||
|
NiveauMessage.ATTENTION: 7,
|
||||||
|
NiveauMessage.BLOCAGE: 15,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Icône textuelle par niveau (compatible plyer/Windows/Linux)
|
||||||
|
ICONE_PAR_NIVEAU: dict[NiveauMessage, str] = {
|
||||||
|
NiveauMessage.INFO: "i",
|
||||||
|
NiveauMessage.ATTENTION: "!",
|
||||||
|
NiveauMessage.BLOCAGE: "?",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MessageUtilisateur:
|
||||||
|
"""Un message prêt à être affiché à l'utilisateur.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
niveau: Hiérarchie (info/attention/blocage)
|
||||||
|
titre: Titre court de la notification (≤60 caractères)
|
||||||
|
corps: Corps du message en français naturel
|
||||||
|
duree_s: Durée d'affichage recommandée (secondes)
|
||||||
|
persistent: Si True, l'utilisateur doit fermer manuellement
|
||||||
|
"""
|
||||||
|
|
||||||
|
niveau: NiveauMessage
|
||||||
|
titre: str
|
||||||
|
corps: str
|
||||||
|
duree_s: int
|
||||||
|
persistent: bool = False
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
"""Sérialiser le message (utile pour les tests et le logging)."""
|
||||||
|
return {
|
||||||
|
"niveau": self.niveau.value,
|
||||||
|
"titre": self.titre,
|
||||||
|
"corps": self.corps,
|
||||||
|
"duree_s": self.duree_s,
|
||||||
|
"persistent": self.persistent,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Helpers d'extraction
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def _extraire_nom_application(titre_fenetre: str) -> str:
|
||||||
|
"""Extraire le nom de l'application à partir d'un titre de fenêtre.
|
||||||
|
|
||||||
|
Les titres Windows suivent généralement le format :
|
||||||
|
"Document.txt – Bloc-notes"
|
||||||
|
"Ma Page - Google Chrome"
|
||||||
|
"Sans titre — Paint"
|
||||||
|
|
||||||
|
On retourne la partie après le dernier séparateur, ou le titre entier.
|
||||||
|
"""
|
||||||
|
if not titre_fenetre:
|
||||||
|
return ""
|
||||||
|
titre = titre_fenetre.strip()
|
||||||
|
# Chercher le dernier séparateur parmi " – ", " — ", " - "
|
||||||
|
for sep in (" – ", " — ", " - "):
|
||||||
|
if sep in titre:
|
||||||
|
return titre.rsplit(sep, 1)[-1].strip()
|
||||||
|
return titre
|
||||||
|
|
||||||
|
|
||||||
|
def _nettoyer_description_cible(description: str) -> str:
|
||||||
|
"""Nettoyer la description technique d'une cible pour l'afficher.
|
||||||
|
|
||||||
|
Supprime les caractères techniques (guillemets inutiles, ':').
|
||||||
|
"""
|
||||||
|
if not description:
|
||||||
|
return ""
|
||||||
|
desc = description.strip()
|
||||||
|
# Retirer les guillemets encapsulants
|
||||||
|
desc = desc.strip("'\"`")
|
||||||
|
# Limiter la longueur
|
||||||
|
if len(desc) > 80:
|
||||||
|
desc = desc[:77] + "..."
|
||||||
|
return desc
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Formattage des messages techniques → humains
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def formatter_cible_non_trouvee(
|
||||||
|
description_cible: str,
|
||||||
|
titre_fenetre: Optional[str] = None,
|
||||||
|
domain_id: Optional[str] = None,
|
||||||
|
params: Optional[Mapping[str, Any]] = None,
|
||||||
|
) -> MessageUtilisateur:
|
||||||
|
"""Message quand Léa ne trouve pas un élément à cliquer.
|
||||||
|
|
||||||
|
Si un domaine métier est fourni, la description de la cible est
|
||||||
|
transformée en langage métier via le DomainContext :
|
||||||
|
- tim_codage + "DP" → "diagnostic principal"
|
||||||
|
- comptabilite + "TVA" → "montant de TVA"
|
||||||
|
|
||||||
|
Exemple avant :
|
||||||
|
target_not_found: 'bonjour' dans *bonjour, – Bloc-notes
|
||||||
|
Exemple après :
|
||||||
|
Léa a besoin d'aide
|
||||||
|
Je ne trouve pas "bonjour" dans le Bloc-notes. Peux-tu cliquer
|
||||||
|
dessus toi-même ? Je reprends ensuite.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
description_cible: Description brute de la cible.
|
||||||
|
titre_fenetre: Titre de la fenêtre active (pour extraire l'app).
|
||||||
|
domain_id: Domaine métier pour enrichir la sortie (optionnel).
|
||||||
|
params: Paramètres du workflow (nom_patient, num_facture...)
|
||||||
|
utilisés par les templates de clarification métier.
|
||||||
|
"""
|
||||||
|
cible = _friendly_target(description_cible, domain_id) or "l'élément"
|
||||||
|
app = _extraire_nom_application(titre_fenetre or "")
|
||||||
|
|
||||||
|
# Si un domaine et un template de clarification existent, préférer la
|
||||||
|
# question métier (plus pertinente que le message générique).
|
||||||
|
ctx = _get_domain_ctx(domain_id)
|
||||||
|
if ctx is not None and ctx.clarification_templates:
|
||||||
|
try:
|
||||||
|
corps = ctx.pose_clarification_question(
|
||||||
|
{
|
||||||
|
"blocked_on": "target_not_found",
|
||||||
|
"target": description_cible or "",
|
||||||
|
"app": app,
|
||||||
|
"params": dict(params or {}),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
corps = ""
|
||||||
|
if corps:
|
||||||
|
return MessageUtilisateur(
|
||||||
|
niveau=NiveauMessage.BLOCAGE,
|
||||||
|
titre="Léa a besoin d'aide",
|
||||||
|
corps=corps,
|
||||||
|
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||||
|
persistent=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
if app:
|
||||||
|
corps = (
|
||||||
|
f"Je ne trouve pas « {cible} » dans {app}. "
|
||||||
|
f"Peux-tu cliquer dessus toi-même ? Je reprends ensuite."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
corps = (
|
||||||
|
f"Je ne trouve pas « {cible} » à l'écran. "
|
||||||
|
f"Peux-tu le faire toi-même ? Je reprends ensuite."
|
||||||
|
)
|
||||||
|
|
||||||
|
return MessageUtilisateur(
|
||||||
|
niveau=NiveauMessage.BLOCAGE,
|
||||||
|
titre="Léa a besoin d'aide",
|
||||||
|
corps=corps,
|
||||||
|
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||||
|
persistent=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def formatter_fenetre_incorrecte(
|
||||||
|
titre_actuel: str,
|
||||||
|
titre_attendu: str,
|
||||||
|
) -> MessageUtilisateur:
|
||||||
|
"""Message quand la fenêtre active n'est pas celle attendue.
|
||||||
|
|
||||||
|
Exemple avant :
|
||||||
|
Fenêtre incorrecte: 'Program Manager' (attendu: 'Lea : Explorateur de fichiers')
|
||||||
|
Exemple après :
|
||||||
|
Léa attend une fenêtre
|
||||||
|
J'attends « Explorateur de fichiers » mais c'est « Program Manager »
|
||||||
|
qui est affiché. Peux-tu ouvrir la bonne fenêtre ?
|
||||||
|
"""
|
||||||
|
app_actuelle = _extraire_nom_application(titre_actuel) or "une autre fenêtre"
|
||||||
|
app_attendue = _extraire_nom_application(titre_attendu) or titre_attendu
|
||||||
|
|
||||||
|
corps = (
|
||||||
|
f"J'attends « {app_attendue} » mais c'est « {app_actuelle} » "
|
||||||
|
f"qui est affiché. Peux-tu ouvrir la bonne fenêtre ?"
|
||||||
|
)
|
||||||
|
|
||||||
|
return MessageUtilisateur(
|
||||||
|
niveau=NiveauMessage.BLOCAGE,
|
||||||
|
titre="Léa attend une fenêtre",
|
||||||
|
corps=corps,
|
||||||
|
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||||
|
persistent=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def formatter_ecran_inchange(action_type: str = "") -> MessageUtilisateur:
|
||||||
|
"""Message quand l'action n'a pas eu d'effet visible.
|
||||||
|
|
||||||
|
Exemple avant :
|
||||||
|
Ecran inchange apres l'action
|
||||||
|
Exemple après :
|
||||||
|
Léa vérifie
|
||||||
|
Mon clic n'a pas eu l'air de marcher. Je vais réessayer ou te
|
||||||
|
rendre la main si ça ne passe pas.
|
||||||
|
"""
|
||||||
|
actions_fr = {
|
||||||
|
"click": "Mon clic",
|
||||||
|
"type": "Ma saisie",
|
||||||
|
"key_combo": "Mon raccourci clavier",
|
||||||
|
"scroll": "Mon défilement",
|
||||||
|
}
|
||||||
|
quoi = actions_fr.get(action_type, "Mon action")
|
||||||
|
|
||||||
|
corps = (
|
||||||
|
f"{quoi} n'a pas eu l'air de marcher. Je vais réessayer, "
|
||||||
|
f"ou te rendre la main si ça ne passe pas."
|
||||||
|
)
|
||||||
|
|
||||||
|
return MessageUtilisateur(
|
||||||
|
niveau=NiveauMessage.ATTENTION,
|
||||||
|
titre="Léa vérifie",
|
||||||
|
corps=corps,
|
||||||
|
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def formatter_mode_apprentissage(
|
||||||
|
raison: str = "",
|
||||||
|
description_cible: str = "",
|
||||||
|
titre_fenetre: Optional[str] = None,
|
||||||
|
) -> MessageUtilisateur:
|
||||||
|
"""Message quand Léa passe en mode apprentissage (pause supervisée).
|
||||||
|
|
||||||
|
L'utilisateur doit comprendre :
|
||||||
|
1. Léa est bloquée et a besoin d'aide
|
||||||
|
2. L'utilisateur doit prendre la main et montrer comment faire
|
||||||
|
3. Ctrl+Shift+L pour signaler qu'il a fini
|
||||||
|
|
||||||
|
Le ton est humble, clair, actionnable. Pas technique.
|
||||||
|
|
||||||
|
Exemple :
|
||||||
|
Léa a besoin d'aide
|
||||||
|
Je n'y arrive pas, montrez-moi comment faire.
|
||||||
|
Quand vous avez fini, appuyez sur Ctrl+Shift+L.
|
||||||
|
"""
|
||||||
|
cible = _nettoyer_description_cible(description_cible) if description_cible else ""
|
||||||
|
app = _extraire_nom_application(titre_fenetre or "") if titre_fenetre else ""
|
||||||
|
|
||||||
|
# Construire un contexte court si disponible
|
||||||
|
contexte = ""
|
||||||
|
if cible and app:
|
||||||
|
contexte = f" (« {cible} » dans {app})"
|
||||||
|
elif cible:
|
||||||
|
contexte = f" (« {cible} »)"
|
||||||
|
|
||||||
|
corps = (
|
||||||
|
f"Je n'y arrive pas{contexte}, montrez-moi comment faire. "
|
||||||
|
f"Quand vous avez fini, appuyez sur Ctrl+Shift+L."
|
||||||
|
)
|
||||||
|
|
||||||
|
return MessageUtilisateur(
|
||||||
|
niveau=NiveauMessage.BLOCAGE,
|
||||||
|
titre="Léa a besoin d'aide",
|
||||||
|
corps=corps,
|
||||||
|
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||||
|
persistent=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def formatter_connexion_perdue(hote_serveur: str = "") -> MessageUtilisateur:
|
||||||
|
"""Message quand la connexion avec le serveur est perdue.
|
||||||
|
|
||||||
|
Rassurant : on dit qu'on va réessayer automatiquement.
|
||||||
|
"""
|
||||||
|
corps = (
|
||||||
|
"J'ai perdu le lien avec le serveur. Je retente automatiquement, "
|
||||||
|
"pas besoin d'intervenir."
|
||||||
|
)
|
||||||
|
|
||||||
|
return MessageUtilisateur(
|
||||||
|
niveau=NiveauMessage.ATTENTION,
|
||||||
|
titre="Léa est déconnectée",
|
||||||
|
corps=corps,
|
||||||
|
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def formatter_connexion_retablie() -> MessageUtilisateur:
|
||||||
|
"""Message quand la connexion serveur est rétablie."""
|
||||||
|
return MessageUtilisateur(
|
||||||
|
niveau=NiveauMessage.INFO,
|
||||||
|
titre="Léa",
|
||||||
|
corps="C'est bon, la connexion est revenue. Je continue.",
|
||||||
|
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.INFO],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def formatter_debut_workflow(nom_workflow: str, nb_etapes: int = 0) -> MessageUtilisateur:
|
||||||
|
"""Message au démarrage d'un workflow de replay."""
|
||||||
|
if nb_etapes > 0:
|
||||||
|
corps = (
|
||||||
|
f"Je démarre « {nom_workflow} » ({nb_etapes} étapes). "
|
||||||
|
f"Je t'indique mon avancement."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
corps = f"Je démarre « {nom_workflow} ». Je t'indique mon avancement."
|
||||||
|
|
||||||
|
return MessageUtilisateur(
|
||||||
|
niveau=NiveauMessage.INFO,
|
||||||
|
titre="Léa démarre",
|
||||||
|
corps=corps,
|
||||||
|
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.INFO],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def formatter_etape_workflow(
|
||||||
|
etape_actuelle: int,
|
||||||
|
nb_etapes: int,
|
||||||
|
description: str = "",
|
||||||
|
) -> MessageUtilisateur:
|
||||||
|
"""Message pour la progression d'une étape."""
|
||||||
|
if description:
|
||||||
|
desc = _nettoyer_description_cible(description)
|
||||||
|
corps = f"Étape {etape_actuelle}/{nb_etapes} — {desc}"
|
||||||
|
else:
|
||||||
|
corps = f"Étape {etape_actuelle}/{nb_etapes}"
|
||||||
|
|
||||||
|
return MessageUtilisateur(
|
||||||
|
niveau=NiveauMessage.INFO,
|
||||||
|
titre="Léa avance",
|
||||||
|
corps=corps,
|
||||||
|
duree_s=3,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def formatter_retry(action_type: str = "", tentative: int = 2) -> MessageUtilisateur:
|
||||||
|
"""Message quand Léa retente une action."""
|
||||||
|
corps = (
|
||||||
|
f"Je retente (tentative {tentative}). Ça arrive parfois, "
|
||||||
|
f"l'écran était peut-être en cours de chargement."
|
||||||
|
)
|
||||||
|
return MessageUtilisateur(
|
||||||
|
niveau=NiveauMessage.ATTENTION,
|
||||||
|
titre="Léa retente",
|
||||||
|
corps=corps,
|
||||||
|
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def formatter_ralentissement() -> MessageUtilisateur:
|
||||||
|
"""Message quand Léa prend plus de temps que prévu."""
|
||||||
|
return MessageUtilisateur(
|
||||||
|
niveau=NiveauMessage.ATTENTION,
|
||||||
|
titre="Léa prend son temps",
|
||||||
|
corps="Je vais plus lentement que prévu. L'écran met du temps à répondre.",
|
||||||
|
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def formatter_fin_workflow(
|
||||||
|
succes: bool,
|
||||||
|
nom_workflow: str = "",
|
||||||
|
nb_etapes: int = 0,
|
||||||
|
duree_s: float = 0.0,
|
||||||
|
domain_id: Optional[str] = None,
|
||||||
|
items_count: int = 0,
|
||||||
|
failed_count: int = 0,
|
||||||
|
params: Optional[Mapping[str, Any]] = None,
|
||||||
|
) -> MessageUtilisateur:
|
||||||
|
"""Message à la fin d'un workflow.
|
||||||
|
|
||||||
|
Si un domaine métier est fourni (et qu'il expose des summary_templates),
|
||||||
|
on utilise `DomainContext.describe_workflow_outcome` pour formuler un
|
||||||
|
rapport en langage métier (ex: "J'ai codé 14 dossiers sur 15").
|
||||||
|
|
||||||
|
Args:
|
||||||
|
succes: True si l'ensemble du workflow a réussi.
|
||||||
|
nom_workflow: Nom du workflow.
|
||||||
|
nb_etapes: Nombre d'étapes techniques (pour fallback générique).
|
||||||
|
duree_s: Durée totale en secondes.
|
||||||
|
domain_id: Domaine métier (optionnel).
|
||||||
|
items_count: Nombre d'items métier traités (ex: 15 dossiers).
|
||||||
|
failed_count: Nombre d'items en échec.
|
||||||
|
params: Infos supplémentaires passées aux templates.
|
||||||
|
"""
|
||||||
|
ctx = _get_domain_ctx(domain_id)
|
||||||
|
if ctx is not None and ctx.summary_templates:
|
||||||
|
try:
|
||||||
|
corps = ctx.describe_workflow_outcome(
|
||||||
|
workflow_name=nom_workflow,
|
||||||
|
success=succes,
|
||||||
|
items_count=items_count or max(1, nb_etapes),
|
||||||
|
failed_count=failed_count,
|
||||||
|
elapsed_s=duree_s,
|
||||||
|
extra=dict(params or {}),
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
corps = ""
|
||||||
|
if corps:
|
||||||
|
if succes and failed_count == 0:
|
||||||
|
return MessageUtilisateur(
|
||||||
|
niveau=NiveauMessage.INFO,
|
||||||
|
titre="Léa a terminé",
|
||||||
|
corps=corps,
|
||||||
|
duree_s=6,
|
||||||
|
)
|
||||||
|
if succes and failed_count > 0:
|
||||||
|
return MessageUtilisateur(
|
||||||
|
niveau=NiveauMessage.ATTENTION,
|
||||||
|
titre="Léa a terminé partiellement",
|
||||||
|
corps=corps,
|
||||||
|
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||||
|
)
|
||||||
|
return MessageUtilisateur(
|
||||||
|
niveau=NiveauMessage.BLOCAGE,
|
||||||
|
titre="Léa s'arrête",
|
||||||
|
corps=corps,
|
||||||
|
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||||
|
persistent=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
if succes:
|
||||||
|
if nom_workflow and nb_etapes > 0:
|
||||||
|
corps = (
|
||||||
|
f"C'est fait ! « {nom_workflow} » est terminé "
|
||||||
|
f"({nb_etapes} étapes en {int(duree_s)}s)."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
corps = "C'est fait ! Tout s'est bien passé."
|
||||||
|
return MessageUtilisateur(
|
||||||
|
niveau=NiveauMessage.INFO,
|
||||||
|
titre="Léa a terminé",
|
||||||
|
corps=corps,
|
||||||
|
duree_s=6,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
corps = (
|
||||||
|
"Je n'ai pas pu terminer. Je te rends la main, "
|
||||||
|
"tu peux continuer à partir de là où je me suis arrêtée."
|
||||||
|
)
|
||||||
|
return MessageUtilisateur(
|
||||||
|
niveau=NiveauMessage.BLOCAGE,
|
||||||
|
titre="Léa s'arrête",
|
||||||
|
corps=corps,
|
||||||
|
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||||
|
persistent=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def formatter_erreur_generique(
|
||||||
|
message_technique: str,
|
||||||
|
domain_id: Optional[str] = None,
|
||||||
|
params: Optional[Mapping[str, Any]] = None,
|
||||||
|
) -> MessageUtilisateur:
|
||||||
|
"""Formater un message d'erreur technique non catégorisé.
|
||||||
|
|
||||||
|
On essaie de détecter les motifs connus dans le message technique pour
|
||||||
|
le router vers le bon formatter spécialisé, sinon on emballe le message.
|
||||||
|
Si `domain_id` est fourni, il est propagé aux formatters spécialisés
|
||||||
|
pour produire un message en langage métier.
|
||||||
|
"""
|
||||||
|
if not message_technique:
|
||||||
|
return MessageUtilisateur(
|
||||||
|
niveau=NiveauMessage.ATTENTION,
|
||||||
|
titre="Léa",
|
||||||
|
corps="J'ai rencontré un petit souci. Je continue.",
|
||||||
|
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||||
|
)
|
||||||
|
|
||||||
|
msg_lower = message_technique.lower()
|
||||||
|
|
||||||
|
# target_not_found[:...]
|
||||||
|
if "target_not_found" in msg_lower:
|
||||||
|
# Essayer d'extraire la description après le ':'
|
||||||
|
match = re.match(r"target_not_found[:\s]*(.*)", message_technique, re.IGNORECASE)
|
||||||
|
desc = match.group(1).strip() if match else ""
|
||||||
|
return formatter_cible_non_trouvee(desc, domain_id=domain_id, params=params)
|
||||||
|
|
||||||
|
# Fenêtre incorrecte: 'X' (attendu: 'Y')
|
||||||
|
if "fenêtre incorrecte" in msg_lower or "fenetre incorrecte" in msg_lower:
|
||||||
|
# Extraire actuel et attendu
|
||||||
|
m_actuel = re.search(r"[:,]\s*['\"]([^'\"]+)['\"]", message_technique)
|
||||||
|
m_attendu = re.search(r"attendu[:\s]*['\"]([^'\"]+)['\"]", message_technique)
|
||||||
|
actuel = m_actuel.group(1) if m_actuel else ""
|
||||||
|
attendu = m_attendu.group(1) if m_attendu else ""
|
||||||
|
return formatter_fenetre_incorrecte(actuel, attendu)
|
||||||
|
|
||||||
|
# Ecran inchangé
|
||||||
|
if "inchang" in msg_lower or "no_screen_change" in msg_lower:
|
||||||
|
return formatter_ecran_inchange()
|
||||||
|
|
||||||
|
# Policy abort / supervise
|
||||||
|
if "policy_abort" in msg_lower or "visual_resolve_failed" in msg_lower:
|
||||||
|
return formatter_cible_non_trouvee(
|
||||||
|
message_technique, domain_id=domain_id, params=params
|
||||||
|
)
|
||||||
|
|
||||||
|
# Fallback : message technique tronqué
|
||||||
|
msg_tronque = message_technique.strip()
|
||||||
|
if len(msg_tronque) > 120:
|
||||||
|
msg_tronque = msg_tronque[:117] + "..."
|
||||||
|
|
||||||
|
return MessageUtilisateur(
|
||||||
|
niveau=NiveauMessage.ATTENTION,
|
||||||
|
titre="Léa",
|
||||||
|
corps=f"J'ai rencontré un souci : {msg_tronque}",
|
||||||
|
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Détection fenêtre Léa (utilisé par l'executor pour ignorer sa propre UI)
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# Motifs qui identifient une fenêtre appartenant à Léa (l'agent lui-même).
|
||||||
|
# On utilise des regex avec \b pour éviter les faux positifs sur des noms
|
||||||
|
# contenant "lea" (ex: "cléa.txt", "leapfrog", "replay").
|
||||||
|
_MOTIFS_FENETRE_LEA_REGEX = (
|
||||||
|
r"\bléa\b",
|
||||||
|
r"\blea\b(?!p)", # "lea" mot entier, pas "leapfrog"
|
||||||
|
r"lea\s*[—–\-:]", # "Lea —", "Lea -", "Lea :"
|
||||||
|
r"léa\s*[—–\-:]",
|
||||||
|
r"\bassistante ia\b",
|
||||||
|
r"\bléa ia\b",
|
||||||
|
r"\blea ia\b",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def est_fenetre_lea(titre_fenetre: str) -> bool:
|
||||||
|
"""Détecter si un titre de fenêtre appartient à l'agent Léa lui-même.
|
||||||
|
|
||||||
|
Utilisé pour éviter que Léa ne se considère comme une fenêtre intrusive
|
||||||
|
dans ses propres pré-vérifications.
|
||||||
|
|
||||||
|
Utilise des regex avec des word boundaries pour éviter les faux positifs
|
||||||
|
sur des noms de fichiers contenant "lea" (ex: "cléa.txt", "replay.log").
|
||||||
|
"""
|
||||||
|
if not titre_fenetre:
|
||||||
|
return False
|
||||||
|
titre_lower = titre_fenetre.lower().strip()
|
||||||
|
return any(re.search(motif, titre_lower) for motif in _MOTIFS_FENETRE_LEA_REGEX)
|
||||||
|
|
||||||
|
|
||||||
|
# Fenêtres parasites Windows à ignorer dans les pré-vérifications.
|
||||||
|
# Ce ne sont pas des fenêtres applicatives — c'est du bruit système
|
||||||
|
# qui prend le focus de manière imprévisible.
|
||||||
|
_FENETRES_BRUIT_SYSTEME = (
|
||||||
|
"fenêtre de dépassement de capacité",
|
||||||
|
"overflow", # version anglaise systray
|
||||||
|
"program manager",
|
||||||
|
"barre des tâches",
|
||||||
|
"task bar",
|
||||||
|
"cortana",
|
||||||
|
"action center",
|
||||||
|
"centre de notifications",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def est_fenetre_bruit(titre_fenetre: str) -> bool:
|
||||||
|
"""Détecter si un titre de fenêtre est du bruit système Windows.
|
||||||
|
|
||||||
|
Ces fenêtres prennent le focus de manière imprévisible (systray overflow,
|
||||||
|
taskbar, Program Manager) et ne sont jamais la cible d'une action utilisateur.
|
||||||
|
"""
|
||||||
|
if not titre_fenetre:
|
||||||
|
return True # pas de titre = bruit
|
||||||
|
titre_lower = titre_fenetre.lower().strip()
|
||||||
|
if titre_lower == "unknown_window":
|
||||||
|
return True
|
||||||
|
return any(p in titre_lower for p in _FENETRES_BRUIT_SYSTEME)
|
||||||
|
|
||||||
|
|
||||||
|
# Conservé pour rétro-compatibilité avec le code qui listait MOTIFS_FENETRE_LEA
|
||||||
|
MOTIFS_FENETRE_LEA = (
|
||||||
|
"léa",
|
||||||
|
"lea —",
|
||||||
|
"léa —",
|
||||||
|
"lea -",
|
||||||
|
"léa -",
|
||||||
|
"lea assistante",
|
||||||
|
"léa assistante",
|
||||||
|
"lea : ",
|
||||||
|
"léa : ",
|
||||||
|
"assistante ia",
|
||||||
|
)
|
||||||
@@ -5,6 +5,14 @@ Utilise plyer pour les notifications système, sans dépendance PyQt5.
|
|||||||
|
|
||||||
Remplace les dialogues Qt par des toasts non-bloquants.
|
Remplace les dialogues Qt par des toasts non-bloquants.
|
||||||
Thread-safe avec rate limiting (1 notification / 2 secondes max).
|
Thread-safe avec rate limiting (1 notification / 2 secondes max).
|
||||||
|
|
||||||
|
Les messages utilisateur sont formatés via `agent_v1.ui.messages` qui convertit
|
||||||
|
les codes techniques (target_not_found, etc.) en français naturel.
|
||||||
|
|
||||||
|
Hiérarchie des notifications (cf. messages.NiveauMessage) :
|
||||||
|
- INFO : auto-dismiss en ~4s, rate-limité classique
|
||||||
|
- ATTENTION : auto-dismiss en ~7s, rate-limité classique
|
||||||
|
- BLOCAGE : persistant (15s+), bypass du rate limit
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
@@ -12,6 +20,23 @@ import threading
|
|||||||
import time
|
import time
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
from .messages import (
|
||||||
|
MessageUtilisateur,
|
||||||
|
NiveauMessage,
|
||||||
|
formatter_cible_non_trouvee,
|
||||||
|
formatter_connexion_perdue,
|
||||||
|
formatter_connexion_retablie,
|
||||||
|
formatter_debut_workflow,
|
||||||
|
formatter_ecran_inchange,
|
||||||
|
formatter_erreur_generique,
|
||||||
|
formatter_etape_workflow,
|
||||||
|
formatter_fenetre_incorrecte,
|
||||||
|
formatter_fin_workflow,
|
||||||
|
formatter_mode_apprentissage,
|
||||||
|
formatter_ralentissement,
|
||||||
|
formatter_retry,
|
||||||
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Import conditionnel de plyer — fallback silencieux si absent
|
# Import conditionnel de plyer — fallback silencieux si absent
|
||||||
@@ -59,7 +84,13 @@ class NotificationManager:
|
|||||||
# Méthode générique
|
# Méthode générique
|
||||||
# ------------------------------------------------------------------ #
|
# ------------------------------------------------------------------ #
|
||||||
|
|
||||||
def notify(self, title: str, message: str, timeout: int = 5) -> bool:
|
def notify(
|
||||||
|
self,
|
||||||
|
title: str,
|
||||||
|
message: str,
|
||||||
|
timeout: int = 5,
|
||||||
|
bypass_rate_limit: bool = False,
|
||||||
|
) -> bool:
|
||||||
"""
|
"""
|
||||||
Affiche une notification toast.
|
Affiche une notification toast.
|
||||||
|
|
||||||
@@ -67,6 +98,8 @@ class NotificationManager:
|
|||||||
title: Titre de la notification.
|
title: Titre de la notification.
|
||||||
message: Corps du message.
|
message: Corps du message.
|
||||||
timeout: Durée d'affichage en secondes.
|
timeout: Durée d'affichage en secondes.
|
||||||
|
bypass_rate_limit: Si True, ignore le rate limit (pour les blocages
|
||||||
|
importants qui ne doivent pas être écrasés).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
True si la notification a été envoyée, False sinon
|
True si la notification a été envoyée, False sinon
|
||||||
@@ -76,6 +109,7 @@ class NotificationManager:
|
|||||||
logger.debug("Notification ignorée (plyer absent) : %s", title)
|
logger.debug("Notification ignorée (plyer absent) : %s", title)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
if not bypass_rate_limit:
|
||||||
with self._lock:
|
with self._lock:
|
||||||
now = time.monotonic()
|
now = time.monotonic()
|
||||||
elapsed = now - self._last_notification_time
|
elapsed = now - self._last_notification_time
|
||||||
@@ -87,6 +121,9 @@ class NotificationManager:
|
|||||||
)
|
)
|
||||||
return False
|
return False
|
||||||
self._last_notification_time = now
|
self._last_notification_time = now
|
||||||
|
else:
|
||||||
|
with self._lock:
|
||||||
|
self._last_notification_time = time.monotonic()
|
||||||
|
|
||||||
# Envoi dans un thread dédié pour ne jamais bloquer l'appelant
|
# Envoi dans un thread dédié pour ne jamais bloquer l'appelant
|
||||||
thread = threading.Thread(
|
thread = threading.Thread(
|
||||||
@@ -97,6 +134,39 @@ class NotificationManager:
|
|||||||
thread.start()
|
thread.start()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def notify_message(self, msg: MessageUtilisateur) -> bool:
|
||||||
|
"""Envoyer un MessageUtilisateur structuré (niveau, titre, corps).
|
||||||
|
|
||||||
|
Les messages BLOCAGE bypass le rate limit pour garantir que
|
||||||
|
l'utilisateur voit qu'on a besoin de lui.
|
||||||
|
"""
|
||||||
|
bypass = msg.niveau == NiveauMessage.BLOCAGE
|
||||||
|
# Log aussi pour tracer dans les logs fichiers
|
||||||
|
self._log_message(msg)
|
||||||
|
return self.notify(
|
||||||
|
title=msg.titre,
|
||||||
|
message=msg.corps,
|
||||||
|
timeout=msg.duree_s,
|
||||||
|
bypass_rate_limit=bypass,
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _log_message(msg: MessageUtilisateur) -> None:
|
||||||
|
"""Logger un message utilisateur avec le niveau approprié.
|
||||||
|
|
||||||
|
Les logs agents sont plus lisibles quand on route info → INFO,
|
||||||
|
attention → WARNING, blocage → ERROR, avec un préfixe [LEA].
|
||||||
|
"""
|
||||||
|
prefix = f"[LEA] {msg.titre}: {msg.corps}"
|
||||||
|
if msg.niveau == NiveauMessage.INFO:
|
||||||
|
logger.info(prefix)
|
||||||
|
elif msg.niveau == NiveauMessage.ATTENTION:
|
||||||
|
logger.warning(prefix)
|
||||||
|
elif msg.niveau == NiveauMessage.BLOCAGE:
|
||||||
|
logger.error(prefix)
|
||||||
|
else:
|
||||||
|
logger.info(prefix)
|
||||||
|
|
||||||
def _send(self, title: str, message: str, timeout: int) -> None:
|
def _send(self, title: str, message: str, timeout: int) -> None:
|
||||||
"""Envoi effectif de la notification (exécuté dans un thread dédié)."""
|
"""Envoi effectif de la notification (exécuté dans un thread dédié)."""
|
||||||
try:
|
try:
|
||||||
@@ -180,40 +250,93 @@ class NotificationManager:
|
|||||||
timeout=3,
|
timeout=3,
|
||||||
)
|
)
|
||||||
|
|
||||||
def replay_finished(self, success: bool, workflow_name: str) -> bool:
|
def replay_target_not_found(
|
||||||
"""Notification de fin de replay (succès ou échec)."""
|
self,
|
||||||
if success:
|
target_description: str,
|
||||||
return self.notify(
|
window_title: Optional[str] = None,
|
||||||
title=APP_NAME,
|
) -> bool:
|
||||||
message="C'est fait ! Tout s'est bien passé.",
|
"""Notification quand un élément n'est pas trouvé pendant le replay.
|
||||||
timeout=5,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
return self.notify(
|
|
||||||
title=APP_NAME,
|
|
||||||
message="Hmm, j'ai eu un souci. Vous pouvez me remontrer ?",
|
|
||||||
timeout=7,
|
|
||||||
)
|
|
||||||
|
|
||||||
def connection_changed(self, connected: bool, server_host: str) -> bool:
|
Le replay est mis en pause et attend une intervention humaine.
|
||||||
|
Utilise `messages.formatter_cible_non_trouvee` pour un message en
|
||||||
|
français naturel.
|
||||||
|
"""
|
||||||
|
msg = formatter_cible_non_trouvee(target_description, window_title)
|
||||||
|
return self.notify_message(msg)
|
||||||
|
|
||||||
|
def replay_wrong_window(self, current_title: str, expected_title: str) -> bool:
|
||||||
|
"""Notification quand la fenêtre active n'est pas celle attendue."""
|
||||||
|
msg = formatter_fenetre_incorrecte(current_title, expected_title)
|
||||||
|
return self.notify_message(msg)
|
||||||
|
|
||||||
|
def replay_no_screen_change(self, action_type: str = "") -> bool:
|
||||||
|
"""Notification quand une action n'a pas eu d'effet visible."""
|
||||||
|
msg = formatter_ecran_inchange(action_type)
|
||||||
|
return self.notify_message(msg)
|
||||||
|
|
||||||
|
def replay_learning_mode(
|
||||||
|
self,
|
||||||
|
raison: str = "",
|
||||||
|
target_description: str = "",
|
||||||
|
window_title: Optional[str] = None,
|
||||||
|
) -> bool:
|
||||||
|
"""Notification quand Léa passe en mode apprentissage.
|
||||||
|
|
||||||
|
Léa est bloquée et demande à l'utilisateur de montrer comment faire.
|
||||||
|
Message humble et actionnable pour un utilisateur non technique.
|
||||||
|
"""
|
||||||
|
msg = formatter_mode_apprentissage(raison, target_description, window_title)
|
||||||
|
return self.notify_message(msg)
|
||||||
|
|
||||||
|
def replay_retry(self, action_type: str = "", tentative: int = 2) -> bool:
|
||||||
|
"""Notification quand Léa retente une action."""
|
||||||
|
msg = formatter_retry(action_type, tentative)
|
||||||
|
return self.notify_message(msg)
|
||||||
|
|
||||||
|
def replay_slow(self) -> bool:
|
||||||
|
"""Notification quand Léa va plus lentement que prévu."""
|
||||||
|
msg = formatter_ralentissement()
|
||||||
|
return self.notify_message(msg)
|
||||||
|
|
||||||
|
def replay_finished(
|
||||||
|
self,
|
||||||
|
success: bool,
|
||||||
|
workflow_name: str,
|
||||||
|
step_count: int = 0,
|
||||||
|
duration_s: float = 0.0,
|
||||||
|
) -> bool:
|
||||||
|
"""Notification de fin de replay (succès ou échec)."""
|
||||||
|
msg = formatter_fin_workflow(success, workflow_name, step_count, duration_s)
|
||||||
|
return self.notify_message(msg)
|
||||||
|
|
||||||
|
def replay_workflow_started(self, workflow_name: str, step_count: int = 0) -> bool:
|
||||||
|
"""Notification de début de workflow (remplace `replay_started`)."""
|
||||||
|
msg = formatter_debut_workflow(workflow_name, step_count)
|
||||||
|
return self.notify_message(msg)
|
||||||
|
|
||||||
|
def replay_step_progress(
|
||||||
|
self,
|
||||||
|
current: int,
|
||||||
|
total: int,
|
||||||
|
description: str = "",
|
||||||
|
) -> bool:
|
||||||
|
"""Notification de progression d'une étape (niveau INFO)."""
|
||||||
|
msg = formatter_etape_workflow(current, total, description)
|
||||||
|
return self.notify_message(msg)
|
||||||
|
|
||||||
|
def connection_changed(self, connected: bool, server_host: str = "") -> bool:
|
||||||
"""Notification de changement d'état de la connexion serveur."""
|
"""Notification de changement d'état de la connexion serveur."""
|
||||||
if connected:
|
if connected:
|
||||||
return self.notify(
|
msg = formatter_connexion_retablie()
|
||||||
title=APP_NAME,
|
|
||||||
message="Connectée au serveur.",
|
|
||||||
timeout=5,
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
return self.notify(
|
msg = formatter_connexion_perdue(server_host)
|
||||||
title=APP_NAME,
|
return self.notify_message(msg)
|
||||||
message="J'ai perdu la connexion avec le serveur.",
|
|
||||||
timeout=7,
|
|
||||||
)
|
|
||||||
|
|
||||||
def error(self, message: str) -> bool:
|
def error(self, message: str) -> bool:
|
||||||
"""Notification d'erreur."""
|
"""Notification d'erreur générique.
|
||||||
return self.notify(
|
|
||||||
title=APP_NAME,
|
Essaie d'abord de détecter un motif technique connu et de formater
|
||||||
message=f"Oups, un problème : {message}",
|
correctement, sinon fallback sur un message générique aidant.
|
||||||
timeout=10,
|
"""
|
||||||
)
|
msg = formatter_erreur_generique(message)
|
||||||
|
return self.notify_message(msg)
|
||||||
|
|||||||
@@ -2,12 +2,20 @@
|
|||||||
"""
|
"""
|
||||||
Gestionnaire de vision avancé pour Agent V1.
|
Gestionnaire de vision avancé pour Agent V1.
|
||||||
Optimisé pour le streaming fibre avec détection de changement.
|
Optimisé pour le streaming fibre avec détection de changement.
|
||||||
|
|
||||||
|
Captures disponibles :
|
||||||
|
- Plein écran (full) : contexte global 1920x1080+
|
||||||
|
- Crop ciblé (crop) : 80x80 autour du clic (apprentissage VLM)
|
||||||
|
- Fenêtre active (window) : image isolée de la fenêtre + métadonnées
|
||||||
|
(titre, rect, coordonnées clic relatives) — cross-platform
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
import logging
|
import logging
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import platform
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
from PIL import Image, ImageFilter, ImageStat
|
from PIL import Image, ImageFilter, ImageStat
|
||||||
import mss
|
import mss
|
||||||
from ..config import TARGETED_CROP_SIZE, SCREENSHOT_QUALITY, BLUR_SENSITIVE
|
from ..config import TARGETED_CROP_SIZE, SCREENSHOT_QUALITY, BLUR_SENSITIVE
|
||||||
@@ -15,6 +23,9 @@ from .blur_sensitive import blur_sensitive_regions
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# OS courant (détecté une seule fois)
|
||||||
|
_SYSTEM = platform.system()
|
||||||
|
|
||||||
class VisionCapturer:
|
class VisionCapturer:
|
||||||
def __init__(self, session_dir: str):
|
def __init__(self, session_dir: str):
|
||||||
self.session_dir = session_dir
|
self.session_dir = session_dir
|
||||||
@@ -27,6 +38,9 @@ class VisionCapturer:
|
|||||||
"""
|
"""
|
||||||
Capture l'écran complet.
|
Capture l'écran complet.
|
||||||
Si force=False, vérifie d'abord si l'écran a changé.
|
Si force=False, vérifie d'abord si l'écran a changé.
|
||||||
|
|
||||||
|
Enrichit les métadonnées avec le titre de la fenêtre active
|
||||||
|
(utile pour le contextualisation des heartbeats côté serveur).
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
with mss.mss() as sct:
|
with mss.mss() as sct:
|
||||||
@@ -52,8 +66,24 @@ class VisionCapturer:
|
|||||||
logger.error(f"Erreur Context Capture: {e}")
|
logger.error(f"Erreur Context Capture: {e}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
def get_active_window_title(self) -> str:
|
||||||
|
"""Retourne le titre de la fenêtre active (pour enrichir les heartbeats).
|
||||||
|
|
||||||
|
Fallback gracieux : retourne une chaîne vide si indisponible.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from ..window_info_crossplatform import get_active_window_info
|
||||||
|
info = get_active_window_info()
|
||||||
|
return info.get("title", "")
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
def capture_dual(self, x: int, y: int, screenshot_id: str, anonymize=False) -> dict:
|
def capture_dual(self, x: int, y: int, screenshot_id: str, anonymize=False) -> dict:
|
||||||
"""Capture duale (Full + Crop) systématique (forcée car liée à une action)."""
|
"""Capture triple (Full + Crop + Fenêtre active) systématique.
|
||||||
|
|
||||||
|
La fenêtre active est un AJOUT — en cas d'échec, le full + crop
|
||||||
|
sont toujours retournés (fallback gracieux).
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
with mss.mss() as sct:
|
with mss.mss() as sct:
|
||||||
full_path = os.path.join(self.shots_dir, f"{screenshot_id}_full.png")
|
full_path = os.path.join(self.shots_dir, f"{screenshot_id}_full.png")
|
||||||
@@ -82,11 +112,130 @@ class VisionCapturer:
|
|||||||
# Mise à jour du hash pour le prochain heartbeat
|
# Mise à jour du hash pour le prochain heartbeat
|
||||||
self.last_img_hash = self._compute_quick_hash(img)
|
self.last_img_hash = self._compute_quick_hash(img)
|
||||||
|
|
||||||
return {"full": full_path, "crop": crop_path}
|
result = {"full": full_path, "crop": crop_path}
|
||||||
|
|
||||||
|
# --- Capture de la fenêtre active ---
|
||||||
|
# Ajout non-bloquant : enrichit le résultat avec l'image
|
||||||
|
# de la fenêtre seule + métadonnées (titre, rect, clic relatif)
|
||||||
|
window_info = self.capture_active_window(x, y, screenshot_id, full_img=img)
|
||||||
|
if window_info:
|
||||||
|
result["window_capture"] = window_info
|
||||||
|
|
||||||
|
return result
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Erreur Dual Capture: {e}")
|
logger.error(f"Erreur Dual Capture: {e}")
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
def capture_active_window(
|
||||||
|
self,
|
||||||
|
x: int,
|
||||||
|
y: int,
|
||||||
|
screenshot_id: str,
|
||||||
|
full_img: Optional[Image.Image] = None,
|
||||||
|
) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Capture l'image de la fenêtre active seule + métadonnées.
|
||||||
|
|
||||||
|
Stratégie :
|
||||||
|
1. Obtenir le rectangle de la fenêtre via l'API OS (pywin32 / xdotool / Quartz)
|
||||||
|
2. Cropper depuis le screenshot plein écran (plus fiable que PrintWindow)
|
||||||
|
3. Calculer les coordonnées du clic relatives à la fenêtre
|
||||||
|
|
||||||
|
Args:
|
||||||
|
x, y: coordonnées du clic en pixels écran
|
||||||
|
screenshot_id: identifiant pour le nom de fichier
|
||||||
|
full_img: screenshot plein écran déjà capturé (optionnel, évite une
|
||||||
|
double capture si appelé depuis capture_dual)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict avec window_image, window_title, window_rect, click_in_window,
|
||||||
|
window_size — ou None si la fenêtre est introuvable.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from ..window_info_crossplatform import get_active_window_rect
|
||||||
|
|
||||||
|
rect_info = get_active_window_rect()
|
||||||
|
if not rect_info:
|
||||||
|
logger.debug("Fenêtre active introuvable — skip capture fenêtre")
|
||||||
|
return None
|
||||||
|
|
||||||
|
win_rect = rect_info["rect"] # [left, top, right, bottom]
|
||||||
|
win_left, win_top, win_right, win_bottom = win_rect
|
||||||
|
win_w, win_h = rect_info["size"] # [width, height]
|
||||||
|
title = rect_info.get("title", "unknown_window")
|
||||||
|
app_name = rect_info.get("app_name", "unknown_app")
|
||||||
|
|
||||||
|
# Ignorer les fenêtres trop petites (barres de tâches, popups système)
|
||||||
|
if win_w < 50 or win_h < 50:
|
||||||
|
logger.debug(f"Fenêtre trop petite ({win_w}x{win_h}) — skip")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Coordonnées du clic relatives à la fenêtre
|
||||||
|
click_rel_x = x - win_left
|
||||||
|
click_rel_y = y - win_top
|
||||||
|
|
||||||
|
# Si le clic est en dehors de la fenêtre, on le signale mais on continue
|
||||||
|
click_inside = (0 <= click_rel_x <= win_w and 0 <= click_rel_y <= win_h)
|
||||||
|
|
||||||
|
# --- Crop de la fenêtre depuis le plein écran ---
|
||||||
|
if full_img is None:
|
||||||
|
# Pas de screenshot fourni — en capturer un (cas standalone)
|
||||||
|
try:
|
||||||
|
with mss.mss() as sct:
|
||||||
|
monitor = sct.monitors[1]
|
||||||
|
sct_img = sct.grab(monitor)
|
||||||
|
full_img = Image.frombytes(
|
||||||
|
"RGB", sct_img.size, sct_img.bgra, "raw", "BGRX"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Erreur capture plein écran pour fenêtre : {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Borner le crop aux limites de l'image plein écran
|
||||||
|
img_w, img_h = full_img.size
|
||||||
|
crop_left = max(0, win_left)
|
||||||
|
crop_top = max(0, win_top)
|
||||||
|
crop_right = min(img_w, win_right)
|
||||||
|
crop_bottom = min(img_h, win_bottom)
|
||||||
|
|
||||||
|
if crop_right <= crop_left or crop_bottom <= crop_top:
|
||||||
|
logger.debug("Fenêtre hors écran — skip capture fenêtre")
|
||||||
|
return None
|
||||||
|
|
||||||
|
window_img = full_img.crop((crop_left, crop_top, crop_right, crop_bottom))
|
||||||
|
|
||||||
|
# Floutage conformité AI Act
|
||||||
|
if BLUR_SENSITIVE:
|
||||||
|
blur_sensitive_regions(window_img)
|
||||||
|
|
||||||
|
# Sauvegarde
|
||||||
|
window_path = os.path.join(
|
||||||
|
self.shots_dir, f"{screenshot_id}_window.png"
|
||||||
|
)
|
||||||
|
window_img.save(window_path, "PNG", quality=SCREENSHOT_QUALITY)
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"window_image": window_path,
|
||||||
|
"window_title": title,
|
||||||
|
"app_name": app_name,
|
||||||
|
"window_rect": win_rect,
|
||||||
|
"window_size": [win_w, win_h],
|
||||||
|
"click_in_window": [click_rel_x, click_rel_y],
|
||||||
|
"click_inside_window": click_inside,
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"Fenêtre capturée : {title} ({win_w}x{win_h}) — "
|
||||||
|
f"clic relatif ({click_rel_x}, {click_rel_y})"
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
||||||
|
except ImportError as e:
|
||||||
|
logger.debug(f"Module fenêtre indisponible : {e}")
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Erreur capture fenêtre active : {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
def _compute_quick_hash(self, img: Image) -> str:
|
def _compute_quick_hash(self, img: Image) -> str:
|
||||||
"""Calcule un hash rapide basé sur une vignette réduite pour détecter les changements."""
|
"""Calcule un hash rapide basé sur une vignette réduite pour détecter les changements."""
|
||||||
# On réduit l'image à 64x64 pour comparer les masses de couleurs (très rapide)
|
# On réduit l'image à 64x64 pour comparer les masses de couleurs (très rapide)
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import platform
|
import platform
|
||||||
import subprocess
|
import subprocess
|
||||||
from typing import Dict, Optional
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
|
||||||
def _run_cmd(cmd: list[str]) -> Optional[str]:
|
def _run_cmd(cmd: list[str]) -> Optional[str]:
|
||||||
@@ -51,6 +51,32 @@ def get_active_window_info() -> Dict[str, str]:
|
|||||||
return {"title": "unknown_window", "app_name": "unknown_app"}
|
return {"title": "unknown_window", "app_name": "unknown_app"}
|
||||||
|
|
||||||
|
|
||||||
|
def get_active_window_rect() -> Optional[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Renvoie le rectangle de la fenêtre active :
|
||||||
|
{
|
||||||
|
"title": "...",
|
||||||
|
"app_name": "...",
|
||||||
|
"rect": [left, top, right, bottom],
|
||||||
|
"position": [left, top],
|
||||||
|
"size": [width, height],
|
||||||
|
"hwnd": int # Windows uniquement
|
||||||
|
}
|
||||||
|
|
||||||
|
Retourne None si la fenêtre est introuvable ou minimisée.
|
||||||
|
Détecte automatiquement l'OS et utilise la méthode appropriée.
|
||||||
|
"""
|
||||||
|
system = platform.system()
|
||||||
|
|
||||||
|
if system == "Windows":
|
||||||
|
return _get_window_rect_windows()
|
||||||
|
elif system == "Linux":
|
||||||
|
return _get_window_rect_linux()
|
||||||
|
elif system == "Darwin":
|
||||||
|
return _get_window_rect_macos()
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _get_window_info_linux() -> Dict[str, str]:
|
def _get_window_info_linux() -> Dict[str, str]:
|
||||||
"""
|
"""
|
||||||
Linux: utilise xdotool (X11)
|
Linux: utilise xdotool (X11)
|
||||||
@@ -178,6 +204,163 @@ def _get_window_info_macos() -> Dict[str, str]:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _get_window_rect_windows() -> Optional[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Windows : utilise pywin32 pour obtenir le rectangle de la fenêtre active.
|
||||||
|
|
||||||
|
Retourne None si la fenêtre est minimisée (icônifiée) ou si pywin32 manque.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
import win32gui
|
||||||
|
import win32process
|
||||||
|
import psutil
|
||||||
|
|
||||||
|
hwnd = win32gui.GetForegroundWindow()
|
||||||
|
if not hwnd:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Ignorer les fenêtres minimisées (pas de contenu visible)
|
||||||
|
if win32gui.IsIconic(hwnd):
|
||||||
|
return None
|
||||||
|
|
||||||
|
title = win32gui.GetWindowText(hwnd) or "unknown_window"
|
||||||
|
|
||||||
|
# Rectangle de la fenêtre (coordonnées écran absolues)
|
||||||
|
left, top, right, bottom = win32gui.GetWindowRect(hwnd)
|
||||||
|
width = right - left
|
||||||
|
height = bottom - top
|
||||||
|
|
||||||
|
# Ignorer les fenêtres de taille nulle ou absurde
|
||||||
|
if width <= 0 or height <= 0:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Nom du processus
|
||||||
|
_, pid = win32process.GetWindowThreadProcessId(hwnd)
|
||||||
|
try:
|
||||||
|
app_name = psutil.Process(pid).name()
|
||||||
|
except Exception:
|
||||||
|
app_name = "unknown_app"
|
||||||
|
|
||||||
|
return {
|
||||||
|
"title": title,
|
||||||
|
"app_name": app_name,
|
||||||
|
"rect": [left, top, right, bottom],
|
||||||
|
"position": [left, top],
|
||||||
|
"size": [width, height],
|
||||||
|
"hwnd": hwnd,
|
||||||
|
}
|
||||||
|
|
||||||
|
except ImportError:
|
||||||
|
return None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_window_rect_linux() -> Optional[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Linux (X11) : utilise xdotool + xwininfo pour obtenir le rectangle.
|
||||||
|
|
||||||
|
Nécessite : sudo apt-get install xdotool x11-utils
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Identifiant de la fenêtre active
|
||||||
|
wid = _run_cmd(["xdotool", "getactivewindow"])
|
||||||
|
if not wid:
|
||||||
|
return None
|
||||||
|
|
||||||
|
title = _run_cmd(["xdotool", "getactivewindow", "getwindowname"]) or "unknown_window"
|
||||||
|
pid_str = _run_cmd(["xdotool", "getactivewindow", "getwindowpid"])
|
||||||
|
app_name = "unknown_app"
|
||||||
|
if pid_str:
|
||||||
|
app_name = _run_cmd(["ps", "-p", pid_str.strip(), "-o", "comm="]) or "unknown_app"
|
||||||
|
|
||||||
|
# Géométrie via xdotool --shell (position + taille)
|
||||||
|
geom_raw = _run_cmd(["xdotool", "getwindowgeometry", "--shell", wid])
|
||||||
|
if not geom_raw:
|
||||||
|
return None
|
||||||
|
|
||||||
|
vals: Dict[str, int] = {}
|
||||||
|
for line in geom_raw.strip().splitlines():
|
||||||
|
if "=" in line:
|
||||||
|
k, v = line.split("=", 1)
|
||||||
|
try:
|
||||||
|
vals[k.strip()] = int(v.strip())
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if not {"X", "Y", "WIDTH", "HEIGHT"} <= vals.keys():
|
||||||
|
return None
|
||||||
|
|
||||||
|
x, y = vals["X"], vals["Y"]
|
||||||
|
w, h = vals["WIDTH"], vals["HEIGHT"]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"title": title,
|
||||||
|
"app_name": app_name,
|
||||||
|
"rect": [x, y, x + w, y + h],
|
||||||
|
"position": [x, y],
|
||||||
|
"size": [w, h],
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_window_rect_macos() -> Optional[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
macOS : utilise Quartz (CGWindowListCopyWindowInfo) pour obtenir le rectangle.
|
||||||
|
|
||||||
|
Nécessite : pip install pyobjc-framework-Quartz
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from AppKit import NSWorkspace
|
||||||
|
from Quartz import (
|
||||||
|
CGWindowListCopyWindowInfo,
|
||||||
|
kCGWindowListOptionOnScreenOnly,
|
||||||
|
kCGNullWindowID,
|
||||||
|
)
|
||||||
|
|
||||||
|
active_app = NSWorkspace.sharedWorkspace().activeApplication()
|
||||||
|
app_name = active_app.get("NSApplicationName", "unknown_app")
|
||||||
|
|
||||||
|
window_list = CGWindowListCopyWindowInfo(
|
||||||
|
kCGWindowListOptionOnScreenOnly, kCGNullWindowID
|
||||||
|
)
|
||||||
|
|
||||||
|
for window in window_list:
|
||||||
|
owner_name = window.get("kCGWindowOwnerName", "")
|
||||||
|
if owner_name != app_name:
|
||||||
|
continue
|
||||||
|
|
||||||
|
bounds = window.get("kCGWindowBounds")
|
||||||
|
if not bounds:
|
||||||
|
continue
|
||||||
|
|
||||||
|
x = int(bounds.get("X", 0))
|
||||||
|
y = int(bounds.get("Y", 0))
|
||||||
|
w = int(bounds.get("Width", 0))
|
||||||
|
h = int(bounds.get("Height", 0))
|
||||||
|
if w <= 0 or h <= 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
title = window.get("kCGWindowName", "unknown_window") or "unknown_window"
|
||||||
|
|
||||||
|
return {
|
||||||
|
"title": title,
|
||||||
|
"app_name": app_name,
|
||||||
|
"rect": [x, y, x + w, y + h],
|
||||||
|
"position": [x, y],
|
||||||
|
"size": [w, h],
|
||||||
|
}
|
||||||
|
|
||||||
|
except ImportError:
|
||||||
|
return None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
# Test rapide
|
# Test rapide
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import time
|
import time
|
||||||
@@ -188,5 +371,10 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
for i in range(5):
|
for i in range(5):
|
||||||
info = get_active_window_info()
|
info = get_active_window_info()
|
||||||
|
rect = get_active_window_rect()
|
||||||
print(f"[{i+1}] App: {info['app_name']:20s} | Title: {info['title']}")
|
print(f"[{i+1}] App: {info['app_name']:20s} | Title: {info['title']}")
|
||||||
|
if rect:
|
||||||
|
print(f" Rect: {rect['rect']} | Size: {rect['size']}")
|
||||||
|
else:
|
||||||
|
print(" Rect: non disponible")
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
214
agent_v0/deploy/windows_client/agent_v1/core/grounding.py
Normal file
214
agent_v0/deploy/windows_client/agent_v1/core/grounding.py
Normal file
@@ -0,0 +1,214 @@
|
|||||||
|
# agent_v1/core/grounding.py
|
||||||
|
"""
|
||||||
|
Module Grounding — localisation pure d'éléments UI sur l'écran.
|
||||||
|
|
||||||
|
Responsabilité unique : "Trouve l'élément X sur l'écran et retourne ses coordonnées."
|
||||||
|
Ne prend AUCUNE décision. Si l'élément n'est pas trouvé → retourne NOT_FOUND.
|
||||||
|
|
||||||
|
Stratégies disponibles (cascade configurable) :
|
||||||
|
1. Serveur SomEngine + VLM (GPU distant)
|
||||||
|
2. Template matching local (CPU, ~10ms)
|
||||||
|
3. VLM local direct (CPU/GPU local)
|
||||||
|
|
||||||
|
Séparé de Policy (qui décide quoi faire quand grounding échoue).
|
||||||
|
Ref: docs/PLAN_ACTEUR_V1.md — Architecture MICRO (grounding + exécution)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class GroundingResult:
|
||||||
|
"""Résultat d'une tentative de localisation visuelle."""
|
||||||
|
found: bool # L'élément a été trouvé
|
||||||
|
x_pct: float = 0.0 # Position X en % (0.0-1.0)
|
||||||
|
y_pct: float = 0.0 # Position Y en % (0.0-1.0)
|
||||||
|
method: str = "" # Méthode utilisée (server_som, anchor_template, vlm_direct...)
|
||||||
|
score: float = 0.0 # Confiance (0.0-1.0)
|
||||||
|
elapsed_ms: float = 0.0 # Temps de résolution
|
||||||
|
detail: str = "" # Info supplémentaire (label trouvé, raison échec)
|
||||||
|
raw: Optional[Dict] = None # Données brutes du resolver (pour debug)
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"found": self.found,
|
||||||
|
"x_pct": self.x_pct,
|
||||||
|
"y_pct": self.y_pct,
|
||||||
|
"method": self.method,
|
||||||
|
"score": round(self.score, 3),
|
||||||
|
"elapsed_ms": round(self.elapsed_ms, 1),
|
||||||
|
"detail": self.detail,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Résultat singleton pour "pas trouvé"
|
||||||
|
NOT_FOUND = GroundingResult(found=False, detail="Aucune méthode n'a trouvé l'élément")
|
||||||
|
|
||||||
|
|
||||||
|
class GroundingEngine:
|
||||||
|
"""Moteur de localisation visuelle d'éléments UI.
|
||||||
|
|
||||||
|
Encapsule la cascade de résolution (serveur → template → VLM local)
|
||||||
|
avec une interface unifiée. Ne prend aucune décision — c'est le rôle
|
||||||
|
de PolicyEngine.
|
||||||
|
|
||||||
|
Usage :
|
||||||
|
engine = GroundingEngine(executor)
|
||||||
|
result = engine.locate(screenshot_b64, target_spec, screen_w, screen_h)
|
||||||
|
if result.found:
|
||||||
|
click(result.x_pct, result.y_pct)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, executor):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
executor: ActionExecutorV1 — fournit les méthodes de résolution existantes.
|
||||||
|
"""
|
||||||
|
self._executor = executor
|
||||||
|
|
||||||
|
def locate(
|
||||||
|
self,
|
||||||
|
server_url: str,
|
||||||
|
target_spec: Dict[str, Any],
|
||||||
|
fallback_x: float,
|
||||||
|
fallback_y: float,
|
||||||
|
screen_width: int,
|
||||||
|
screen_height: int,
|
||||||
|
strategies: Optional[List[str]] = None,
|
||||||
|
) -> GroundingResult:
|
||||||
|
"""Localiser un élément UI sur l'écran.
|
||||||
|
|
||||||
|
Exécute la cascade de stratégies dans l'ordre et retourne
|
||||||
|
dès qu'une stratégie trouve l'élément.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
server_url: URL du serveur (SomEngine + VLM GPU)
|
||||||
|
target_spec: Spécification de la cible (by_text, anchor, vlm_description...)
|
||||||
|
fallback_x, fallback_y: Coordonnées de fallback (enregistrement)
|
||||||
|
screen_width, screen_height: Résolution écran
|
||||||
|
strategies: Liste ordonnée de stratégies à essayer.
|
||||||
|
Par défaut : ["server", "template", "vlm_local"]
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
GroundingResult avec found=True et coordonnées, ou NOT_FOUND
|
||||||
|
"""
|
||||||
|
if strategies is None:
|
||||||
|
strategies = ["server", "template", "vlm_local"]
|
||||||
|
|
||||||
|
# ── Apprentissage : réordonner les stratégies selon l'historique ──
|
||||||
|
# Si le Learning sait quelle méthode marche pour cette cible,
|
||||||
|
# la mettre en premier. C'est la boucle d'apprentissage.
|
||||||
|
learned = target_spec.get("_learned_strategy", "")
|
||||||
|
if learned:
|
||||||
|
strategy_map = {
|
||||||
|
"som_text_match": "server",
|
||||||
|
"grounding_vlm": "server",
|
||||||
|
"server_som": "server",
|
||||||
|
"anchor_template": "template",
|
||||||
|
"template_matching": "template",
|
||||||
|
"hybrid_text_direct": "vlm_local",
|
||||||
|
"hybrid_vlm_text": "vlm_local",
|
||||||
|
"vlm_direct": "vlm_local",
|
||||||
|
}
|
||||||
|
preferred = strategy_map.get(learned, "")
|
||||||
|
if preferred and preferred in strategies:
|
||||||
|
strategies = [preferred] + [s for s in strategies if s != preferred]
|
||||||
|
logger.info(
|
||||||
|
f"Grounding: stratégie réordonnée par l'apprentissage → "
|
||||||
|
f"{strategies} (learned={learned})"
|
||||||
|
)
|
||||||
|
|
||||||
|
t_start = time.time()
|
||||||
|
screenshot_b64 = self._executor._capture_screenshot_b64(max_width=0, quality=75)
|
||||||
|
if not screenshot_b64:
|
||||||
|
return GroundingResult(
|
||||||
|
found=False, detail="Capture screenshot échouée",
|
||||||
|
elapsed_ms=(time.time() - t_start) * 1000,
|
||||||
|
)
|
||||||
|
|
||||||
|
for strategy in strategies:
|
||||||
|
result = self._try_strategy(
|
||||||
|
strategy, server_url, screenshot_b64, target_spec,
|
||||||
|
fallback_x, fallback_y, screen_width, screen_height,
|
||||||
|
)
|
||||||
|
if result.found:
|
||||||
|
result.elapsed_ms = (time.time() - t_start) * 1000
|
||||||
|
return result
|
||||||
|
|
||||||
|
return GroundingResult(
|
||||||
|
found=False,
|
||||||
|
detail=f"Toutes les stratégies ont échoué ({', '.join(strategies)})",
|
||||||
|
elapsed_ms=(time.time() - t_start) * 1000,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _try_strategy(
|
||||||
|
self,
|
||||||
|
strategy: str,
|
||||||
|
server_url: str,
|
||||||
|
screenshot_b64: str,
|
||||||
|
target_spec: Dict[str, Any],
|
||||||
|
fallback_x: float,
|
||||||
|
fallback_y: float,
|
||||||
|
screen_width: int,
|
||||||
|
screen_height: int,
|
||||||
|
) -> GroundingResult:
|
||||||
|
"""Essayer une stratégie de grounding unique."""
|
||||||
|
|
||||||
|
if strategy == "server" and server_url:
|
||||||
|
raw = self._executor._server_resolve_target(
|
||||||
|
server_url, screenshot_b64, target_spec,
|
||||||
|
fallback_x, fallback_y, screen_width, screen_height,
|
||||||
|
)
|
||||||
|
if raw and raw.get("resolved"):
|
||||||
|
return GroundingResult(
|
||||||
|
found=True,
|
||||||
|
x_pct=raw["x_pct"],
|
||||||
|
y_pct=raw["y_pct"],
|
||||||
|
method=raw.get("method", "server"),
|
||||||
|
score=raw.get("score", 0.0),
|
||||||
|
detail=raw.get("matched_element", {}).get("label", ""),
|
||||||
|
raw=raw,
|
||||||
|
)
|
||||||
|
|
||||||
|
elif strategy == "template":
|
||||||
|
anchor_b64 = target_spec.get("anchor_image_base64", "")
|
||||||
|
if anchor_b64:
|
||||||
|
raw = self._executor._template_match_anchor(
|
||||||
|
screenshot_b64, anchor_b64, screen_width, screen_height,
|
||||||
|
)
|
||||||
|
if raw and raw.get("resolved"):
|
||||||
|
return GroundingResult(
|
||||||
|
found=True,
|
||||||
|
x_pct=raw["x_pct"],
|
||||||
|
y_pct=raw["y_pct"],
|
||||||
|
method="anchor_template",
|
||||||
|
score=raw.get("score", 0.0),
|
||||||
|
raw=raw,
|
||||||
|
)
|
||||||
|
|
||||||
|
elif strategy == "vlm_local":
|
||||||
|
by_text = target_spec.get("by_text", "")
|
||||||
|
vlm_desc = target_spec.get("vlm_description", "")
|
||||||
|
if vlm_desc or by_text:
|
||||||
|
raw = self._executor._hybrid_vlm_resolve(
|
||||||
|
screenshot_b64, target_spec, screen_width, screen_height,
|
||||||
|
)
|
||||||
|
if raw and raw.get("resolved"):
|
||||||
|
return GroundingResult(
|
||||||
|
found=True,
|
||||||
|
x_pct=raw["x_pct"],
|
||||||
|
y_pct=raw["y_pct"],
|
||||||
|
method=raw.get("method", "vlm_local"),
|
||||||
|
score=raw.get("score", 0.0),
|
||||||
|
detail=raw.get("matched_element", {}).get("label", ""),
|
||||||
|
raw=raw,
|
||||||
|
)
|
||||||
|
|
||||||
|
return GroundingResult(found=False, method=strategy, detail=f"{strategy}: pas trouvé")
|
||||||
152
agent_v0/deploy/windows_client/agent_v1/core/policy.py
Normal file
152
agent_v0/deploy/windows_client/agent_v1/core/policy.py
Normal file
@@ -0,0 +1,152 @@
|
|||||||
|
# agent_v1/core/policy.py
|
||||||
|
"""
|
||||||
|
Module Policy — décisions intelligentes quand le grounding échoue.
|
||||||
|
|
||||||
|
Responsabilité unique : "Le Grounding dit NOT_FOUND. Que fait-on ?"
|
||||||
|
Ne localise AUCUN élément — c'est le rôle du Grounding.
|
||||||
|
|
||||||
|
Décisions possibles :
|
||||||
|
- RETRY : re-tenter le grounding (après popup fermée, par exemple)
|
||||||
|
- SKIP : l'action n'est plus nécessaire (état déjà atteint)
|
||||||
|
- ABORT : arrêter le workflow (état incohérent)
|
||||||
|
- SUPERVISE : rendre la main à l'utilisateur
|
||||||
|
|
||||||
|
Séparé de Grounding (qui localise les éléments).
|
||||||
|
Ref: docs/PLAN_ACTEUR_V1.md — Architecture MÉSO (acteur intelligent)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class Decision(Enum):
|
||||||
|
"""Décisions possibles quand le grounding échoue."""
|
||||||
|
RETRY = "retry" # Re-tenter (après correction : popup fermée, navigation...)
|
||||||
|
SKIP = "skip" # Action inutile (état déjà atteint)
|
||||||
|
ABORT = "abort" # Arrêter le workflow (état incohérent)
|
||||||
|
SUPERVISE = "supervise" # Rendre la main à l'utilisateur (Léa dit "je bloque")
|
||||||
|
CONTINUE = "continue" # Continuer malgré l'échec (action non critique)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class PolicyDecision:
|
||||||
|
"""Résultat d'une décision Policy."""
|
||||||
|
decision: Decision
|
||||||
|
reason: str # Explication de la décision
|
||||||
|
action_taken: str = "" # Action corrective effectuée (ex: "popup fermée")
|
||||||
|
elapsed_ms: float = 0.0
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"decision": self.decision.value,
|
||||||
|
"reason": self.reason,
|
||||||
|
"action_taken": self.action_taken,
|
||||||
|
"elapsed_ms": round(self.elapsed_ms, 1),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class PolicyEngine:
|
||||||
|
"""Moteur de décision quand le grounding échoue.
|
||||||
|
|
||||||
|
Cascade de décision :
|
||||||
|
1. Popup détectée ? → fermer et RETRY
|
||||||
|
2. Acteur gemma4 → SKIP / ABORT / SUPERVISE
|
||||||
|
3. Fallback → SUPERVISE (rendre la main)
|
||||||
|
|
||||||
|
Usage :
|
||||||
|
policy = PolicyEngine(executor)
|
||||||
|
decision = policy.decide(action, target_spec, grounding_result)
|
||||||
|
if decision.decision == Decision.RETRY:
|
||||||
|
# re-tenter le grounding
|
||||||
|
elif decision.decision == Decision.SKIP:
|
||||||
|
# marquer comme réussi, passer à la suite
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, executor):
|
||||||
|
self._executor = executor
|
||||||
|
|
||||||
|
def decide(
|
||||||
|
self,
|
||||||
|
action: Dict[str, Any],
|
||||||
|
target_spec: Dict[str, Any],
|
||||||
|
retry_count: int = 0,
|
||||||
|
max_retries: int = 1,
|
||||||
|
) -> PolicyDecision:
|
||||||
|
"""Décider quoi faire quand le grounding a échoué.
|
||||||
|
|
||||||
|
Cascade :
|
||||||
|
1. Si c'est le premier essai → tenter de fermer une popup → RETRY
|
||||||
|
2. Si retry déjà fait → demander à l'acteur gemma4
|
||||||
|
3. Selon gemma4 : SKIP, ABORT, ou SUPERVISE
|
||||||
|
|
||||||
|
Args:
|
||||||
|
action: L'action qui a échoué
|
||||||
|
target_spec: La cible non trouvée
|
||||||
|
retry_count: Nombre de retries déjà faits
|
||||||
|
max_retries: Maximum de retries autorisés
|
||||||
|
"""
|
||||||
|
t_start = time.time()
|
||||||
|
|
||||||
|
# ── Étape 1 : Tentative de fermeture popup (premier essai) ──
|
||||||
|
if retry_count == 0:
|
||||||
|
popup_handled = self._try_close_popup()
|
||||||
|
if popup_handled:
|
||||||
|
return PolicyDecision(
|
||||||
|
decision=Decision.RETRY,
|
||||||
|
reason="Popup détectée et fermée, re-tentative",
|
||||||
|
action_taken="popup_closed",
|
||||||
|
elapsed_ms=(time.time() - t_start) * 1000,
|
||||||
|
)
|
||||||
|
|
||||||
|
# ── Étape 2 : Max retries atteint → acteur gemma4 ──
|
||||||
|
if retry_count >= max_retries:
|
||||||
|
actor_decision = self._ask_actor(action, target_spec)
|
||||||
|
|
||||||
|
if actor_decision == "PASSER":
|
||||||
|
return PolicyDecision(
|
||||||
|
decision=Decision.SKIP,
|
||||||
|
reason="Acteur gemma4 : l'état est déjà atteint",
|
||||||
|
elapsed_ms=(time.time() - t_start) * 1000,
|
||||||
|
)
|
||||||
|
elif actor_decision == "STOPPER":
|
||||||
|
return PolicyDecision(
|
||||||
|
decision=Decision.ABORT,
|
||||||
|
reason="Acteur gemma4 : état incohérent, arrêt",
|
||||||
|
elapsed_ms=(time.time() - t_start) * 1000,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# EXECUTER ou inconnu → pause supervisée
|
||||||
|
return PolicyDecision(
|
||||||
|
decision=Decision.SUPERVISE,
|
||||||
|
reason=f"Acteur gemma4 : {actor_decision}, pause supervisée",
|
||||||
|
elapsed_ms=(time.time() - t_start) * 1000,
|
||||||
|
)
|
||||||
|
|
||||||
|
# ── Étape 3 : Encore des retries disponibles → RETRY ──
|
||||||
|
return PolicyDecision(
|
||||||
|
decision=Decision.RETRY,
|
||||||
|
reason=f"Retry {retry_count + 1}/{max_retries}",
|
||||||
|
elapsed_ms=(time.time() - t_start) * 1000,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _try_close_popup(self) -> bool:
|
||||||
|
"""Tenter de fermer une popup via le handler VLM existant."""
|
||||||
|
try:
|
||||||
|
return self._executor._handle_popup_vlm()
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Policy: popup handler échoué : {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _ask_actor(self, action: Dict, target_spec: Dict) -> str:
|
||||||
|
"""Demander à gemma4 de décider (PASSER/EXECUTER/STOPPER)."""
|
||||||
|
try:
|
||||||
|
return self._executor._actor_decide(action, target_spec)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Policy: acteur gemma4 échoué : {e}")
|
||||||
|
return "EXECUTER" # Fallback → supervisé
|
||||||
294
agent_v0/deploy/windows_client/agent_v1/core/uia_helper.py
Normal file
294
agent_v0/deploy/windows_client/agent_v1/core/uia_helper.py
Normal file
@@ -0,0 +1,294 @@
|
|||||||
|
# core/workflow/uia_helper.py
|
||||||
|
"""
|
||||||
|
UIAHelper — Wrapper Python pour lea_uia.exe (helper Rust UI Automation).
|
||||||
|
|
||||||
|
Expose une API Python simple pour interroger UIA via le binaire Rust.
|
||||||
|
Communique via subprocess + stdin/stdout JSON.
|
||||||
|
|
||||||
|
Pourquoi un helper Rust ?
|
||||||
|
- 5-10x plus rapide que pywinauto (10-20ms vs 50-200ms)
|
||||||
|
- Binaire standalone ~500 Ko, aucune dépendance runtime
|
||||||
|
- Pas de problèmes de threading COM en Python
|
||||||
|
- Crash-safe (le crash du helper n'affecte pas l'agent Python)
|
||||||
|
|
||||||
|
Architecture :
|
||||||
|
Python executor
|
||||||
|
↓ subprocess.run
|
||||||
|
lea_uia.exe query --x 812 --y 436
|
||||||
|
↓ UIA API Windows
|
||||||
|
JSON response
|
||||||
|
↓ stdout
|
||||||
|
Python executor parse JSON
|
||||||
|
|
||||||
|
Si lea_uia.exe n'est pas disponible (Linux, binaire absent, crash) :
|
||||||
|
toutes les méthodes retournent None → fallback vision automatique.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import platform
|
||||||
|
import subprocess
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Timeout par défaut pour les appels UIA (en secondes)
|
||||||
|
_DEFAULT_TIMEOUT = 5.0
|
||||||
|
|
||||||
|
# Masquer la fenêtre console lors du spawn de lea_uia.exe sur Windows.
|
||||||
|
# Sans ce flag, chaque appel (à chaque clic utilisateur pendant
|
||||||
|
# l'enregistrement) fait apparaître une fenêtre cmd noire brièvement
|
||||||
|
# visible à l'écran → ralentit la souris et pollue les screenshots
|
||||||
|
# capturés (le VLM peut "voir" le chemin lea_uia.exe comme texte cliqué).
|
||||||
|
#
|
||||||
|
# La valeur 0x08000000 correspond à CREATE_NO_WINDOW défini dans
|
||||||
|
# l'API Windows. Sur Linux/Mac, la valeur est 0 et `creationflags`
|
||||||
|
# est ignoré. getattr() gère le cas où Python expose déjà la constante
|
||||||
|
# sur Windows.
|
||||||
|
if platform.system() == "Windows":
|
||||||
|
_SUBPROCESS_CREATION_FLAGS = getattr(subprocess, "CREATE_NO_WINDOW", 0x08000000)
|
||||||
|
else:
|
||||||
|
_SUBPROCESS_CREATION_FLAGS = 0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class UiaElement:
|
||||||
|
"""Représentation Python d'un élément UIA."""
|
||||||
|
name: str = ""
|
||||||
|
control_type: str = ""
|
||||||
|
class_name: str = ""
|
||||||
|
automation_id: str = ""
|
||||||
|
bounding_rect: Tuple[int, int, int, int] = (0, 0, 0, 0)
|
||||||
|
is_enabled: bool = False
|
||||||
|
is_offscreen: bool = True
|
||||||
|
parent_path: List[Dict[str, str]] = field(default_factory=list)
|
||||||
|
process_name: str = ""
|
||||||
|
|
||||||
|
def center(self) -> Tuple[int, int]:
|
||||||
|
"""Retourner le centre du rectangle (pixels)."""
|
||||||
|
x1, y1, x2, y2 = self.bounding_rect
|
||||||
|
return ((x1 + x2) // 2, (y1 + y2) // 2)
|
||||||
|
|
||||||
|
def width(self) -> int:
|
||||||
|
return self.bounding_rect[2] - self.bounding_rect[0]
|
||||||
|
|
||||||
|
def height(self) -> int:
|
||||||
|
return self.bounding_rect[3] - self.bounding_rect[1]
|
||||||
|
|
||||||
|
def is_clickable(self) -> bool:
|
||||||
|
"""Peut-on cliquer dessus ?"""
|
||||||
|
return (
|
||||||
|
self.is_enabled
|
||||||
|
and not self.is_offscreen
|
||||||
|
and self.width() > 0
|
||||||
|
and self.height() > 0
|
||||||
|
)
|
||||||
|
|
||||||
|
def path_signature(self) -> str:
|
||||||
|
"""Signature du chemin parent (pour retrouver l'élément)."""
|
||||||
|
parts = [f"{p['control_type']}[{p['name']}]" for p in self.parent_path if p.get("name")]
|
||||||
|
parts.append(f"{self.control_type}[{self.name}]")
|
||||||
|
return " > ".join(parts)
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"name": self.name,
|
||||||
|
"control_type": self.control_type,
|
||||||
|
"class_name": self.class_name,
|
||||||
|
"automation_id": self.automation_id,
|
||||||
|
"bounding_rect": list(self.bounding_rect),
|
||||||
|
"is_enabled": self.is_enabled,
|
||||||
|
"is_offscreen": self.is_offscreen,
|
||||||
|
"parent_path": self.parent_path,
|
||||||
|
"process_name": self.process_name,
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, d: Dict[str, Any]) -> "UiaElement":
|
||||||
|
rect = d.get("bounding_rect", [0, 0, 0, 0])
|
||||||
|
if isinstance(rect, list) and len(rect) >= 4:
|
||||||
|
rect = tuple(rect[:4])
|
||||||
|
else:
|
||||||
|
rect = (0, 0, 0, 0)
|
||||||
|
return cls(
|
||||||
|
name=d.get("name", ""),
|
||||||
|
control_type=d.get("control_type", ""),
|
||||||
|
class_name=d.get("class_name", ""),
|
||||||
|
automation_id=d.get("automation_id", ""),
|
||||||
|
bounding_rect=rect,
|
||||||
|
is_enabled=d.get("is_enabled", False),
|
||||||
|
is_offscreen=d.get("is_offscreen", True),
|
||||||
|
parent_path=d.get("parent_path", []),
|
||||||
|
process_name=d.get("process_name", ""),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class UIAHelper:
|
||||||
|
"""Wrapper Python pour lea_uia.exe."""
|
||||||
|
|
||||||
|
def __init__(self, helper_path: str = "", timeout: float = _DEFAULT_TIMEOUT):
|
||||||
|
self._helper_path = helper_path or self._find_helper()
|
||||||
|
self._timeout = timeout
|
||||||
|
self._available = self._check_available()
|
||||||
|
|
||||||
|
def _find_helper(self) -> str:
|
||||||
|
"""Trouver lea_uia.exe dans les emplacements standards."""
|
||||||
|
candidates = [
|
||||||
|
r"C:\Lea\helpers\lea_uia.exe",
|
||||||
|
os.path.join(os.path.dirname(__file__), "..", "..",
|
||||||
|
"agent_rust", "lea_uia", "target",
|
||||||
|
"x86_64-pc-windows-gnu", "release", "lea_uia.exe"),
|
||||||
|
"./helpers/lea_uia.exe",
|
||||||
|
"lea_uia.exe",
|
||||||
|
]
|
||||||
|
for path in candidates:
|
||||||
|
if os.path.isfile(path):
|
||||||
|
return os.path.abspath(path)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def _check_available(self) -> bool:
|
||||||
|
"""Vérifier que le helper est utilisable (Windows + binaire + health OK)."""
|
||||||
|
if platform.system() != "Windows":
|
||||||
|
logger.debug("UIAHelper: Linux/Mac — helper désactivé")
|
||||||
|
return False
|
||||||
|
if not self._helper_path:
|
||||||
|
logger.debug("UIAHelper: lea_uia.exe introuvable")
|
||||||
|
return False
|
||||||
|
if not os.path.isfile(self._helper_path):
|
||||||
|
logger.debug(f"UIAHelper: chemin invalide {self._helper_path}")
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
@property
|
||||||
|
def available(self) -> bool:
|
||||||
|
return self._available
|
||||||
|
|
||||||
|
@property
|
||||||
|
def helper_path(self) -> str:
|
||||||
|
return self._helper_path
|
||||||
|
|
||||||
|
def _run(self, args: List[str]) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Exécuter lea_uia.exe avec les arguments et parser le JSON."""
|
||||||
|
if not self._available:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
[self._helper_path] + args,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=self._timeout,
|
||||||
|
encoding="utf-8",
|
||||||
|
errors="replace",
|
||||||
|
creationflags=_SUBPROCESS_CREATION_FLAGS,
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
logger.debug(
|
||||||
|
f"UIAHelper: exit code {result.returncode}, "
|
||||||
|
f"stderr: {result.stderr[:200]}"
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
output = result.stdout.strip()
|
||||||
|
if not output:
|
||||||
|
return None
|
||||||
|
return json.loads(output)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
logger.debug(f"UIAHelper: timeout ({self._timeout}s) sur {args}")
|
||||||
|
return None
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.debug(f"UIAHelper: JSON invalide — {e}")
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"UIAHelper: erreur {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def health(self) -> bool:
|
||||||
|
"""Vérifier que UIA répond."""
|
||||||
|
data = self._run(["health"])
|
||||||
|
return data is not None and data.get("status") == "ok"
|
||||||
|
|
||||||
|
def query_at(
|
||||||
|
self,
|
||||||
|
x: int,
|
||||||
|
y: int,
|
||||||
|
with_parents: bool = True,
|
||||||
|
) -> Optional[UiaElement]:
|
||||||
|
"""Récupérer l'élément UIA à une position écran.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
x, y: Coordonnées pixel absolues
|
||||||
|
with_parents: Inclure la hiérarchie des parents
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
UiaElement si trouvé, None sinon (pas d'élément ou UIA indispo)
|
||||||
|
"""
|
||||||
|
args = ["query", "--x", str(x), "--y", str(y)]
|
||||||
|
if not with_parents:
|
||||||
|
args.append("--with-parents=false")
|
||||||
|
|
||||||
|
data = self._run(args)
|
||||||
|
if not data or data.get("status") != "ok":
|
||||||
|
return None
|
||||||
|
|
||||||
|
elem_data = data.get("element")
|
||||||
|
if not elem_data:
|
||||||
|
return None
|
||||||
|
return UiaElement.from_dict(elem_data)
|
||||||
|
|
||||||
|
def find_by_name(
|
||||||
|
self,
|
||||||
|
name: str,
|
||||||
|
control_type: Optional[str] = None,
|
||||||
|
automation_id: Optional[str] = None,
|
||||||
|
window: Optional[str] = None,
|
||||||
|
timeout_ms: int = 2000,
|
||||||
|
) -> Optional[UiaElement]:
|
||||||
|
"""Rechercher un élément par son nom (+ filtres optionnels).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name: Nom exact de l'élément
|
||||||
|
control_type: Type de contrôle (Button, Edit, MenuItem...)
|
||||||
|
automation_id: ID d'automation
|
||||||
|
window: Restreindre à une fenêtre spécifique
|
||||||
|
timeout_ms: Timeout de recherche en millisecondes
|
||||||
|
"""
|
||||||
|
args = ["find", "--name", name, "--timeout-ms", str(timeout_ms)]
|
||||||
|
if control_type:
|
||||||
|
args.extend(["--control-type", control_type])
|
||||||
|
if automation_id:
|
||||||
|
args.extend(["--automation-id", automation_id])
|
||||||
|
if window:
|
||||||
|
args.extend(["--window", window])
|
||||||
|
|
||||||
|
data = self._run(args)
|
||||||
|
if not data or data.get("status") != "ok":
|
||||||
|
return None
|
||||||
|
|
||||||
|
elem_data = data.get("element")
|
||||||
|
if not elem_data:
|
||||||
|
return None
|
||||||
|
return UiaElement.from_dict(elem_data)
|
||||||
|
|
||||||
|
def capture_focused(self, max_depth: int = 3) -> Optional[UiaElement]:
|
||||||
|
"""Capturer l'élément ayant le focus + son contexte."""
|
||||||
|
data = self._run(["capture", "--max-depth", str(max_depth)])
|
||||||
|
if not data or data.get("status") != "ok":
|
||||||
|
return None
|
||||||
|
|
||||||
|
elem_data = data.get("element")
|
||||||
|
if not elem_data:
|
||||||
|
return None
|
||||||
|
return UiaElement.from_dict(elem_data)
|
||||||
|
|
||||||
|
|
||||||
|
# Instance globale partagée (singleton léger)
|
||||||
|
_SHARED_HELPER: Optional[UIAHelper] = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_shared_helper() -> UIAHelper:
|
||||||
|
"""Retourner une instance partagée de UIAHelper."""
|
||||||
|
global _SHARED_HELPER
|
||||||
|
if _SHARED_HELPER is None:
|
||||||
|
_SHARED_HELPER = UIAHelper()
|
||||||
|
return _SHARED_HELPER
|
||||||
@@ -2,6 +2,17 @@
|
|||||||
"""
|
"""
|
||||||
deploy_windows.py — Script de packaging du client Windows pour Agent V1.
|
deploy_windows.py — Script de packaging du client Windows pour Agent V1.
|
||||||
|
|
||||||
|
⚠️ OBSOLÈTE (avril 2026)
|
||||||
|
Le build officiel du package Windows passe par ``deploy/build_package.sh``
|
||||||
|
(à la racine du repo) qui lit directement ``agent_v0/agent_v1/`` et évite
|
||||||
|
les clones intermédiaires. Ce script est conservé pour référence mais son
|
||||||
|
manifeste ``FILE_MANIFEST`` est incomplet : il n'inclut pas
|
||||||
|
``system_dialog_guard.py``, ``persistent_buffer.py``, ``recovery.py``,
|
||||||
|
``uia_helper.py``, ``grounding.py``, ``policy.py``,
|
||||||
|
``vision/blur_sensitive.py``, ``vision/system_info.py``,
|
||||||
|
``ui/chat_window.py``, ``ui/capture_server.py``, ``ui/shared_state.py``.
|
||||||
|
Ne PAS l'utiliser pour un packaging réel.
|
||||||
|
|
||||||
Copie uniquement les fichiers nécessaires au fonctionnement de l'agent
|
Copie uniquement les fichiers nécessaires au fonctionnement de l'agent
|
||||||
sur le PC cible (Windows), sans le serveur ni les dépendances lourdes.
|
sur le PC cible (Windows), sans le serveur ni les dépendances lourdes.
|
||||||
|
|
||||||
|
|||||||
@@ -21,36 +21,33 @@ from typing import Any, Callable, Dict, List, Optional
|
|||||||
logger = logging.getLogger("lea_ui.server_client")
|
logger = logging.getLogger("lea_ui.server_client")
|
||||||
|
|
||||||
|
|
||||||
def _get_server_host() -> str:
|
def _get_server_url() -> str:
|
||||||
"""Recuperer l'adresse du serveur Linux.
|
"""Recuperer l'URL du serveur RPA (avec /api/v1).
|
||||||
|
|
||||||
Ordre de resolution :
|
Ordre de resolution :
|
||||||
1. Variable d'environnement RPA_SERVER_HOST
|
1. Import depuis agent_v1.config (source de verite unique)
|
||||||
2. Fichier de config agent_config.json (cle "server_host")
|
2. Variable d'environnement RPA_SERVER_URL
|
||||||
3. Fallback localhost
|
3. Fallback http://localhost:5005/api/v1
|
||||||
"""
|
"""
|
||||||
# 1. Variable d'environnement
|
# 1. Import depuis config.py (source de verite)
|
||||||
host = os.environ.get("RPA_SERVER_HOST", "").strip()
|
|
||||||
if host:
|
|
||||||
return host
|
|
||||||
|
|
||||||
# 2. Fichier de config
|
|
||||||
config_paths = [
|
|
||||||
os.path.join(os.path.dirname(__file__), "..", "agent_config.json"),
|
|
||||||
os.path.join(os.path.dirname(__file__), "..", "..", "agent_config.json"),
|
|
||||||
]
|
|
||||||
for config_path in config_paths:
|
|
||||||
try:
|
try:
|
||||||
with open(config_path, "r", encoding="utf-8") as f:
|
from agent_v1.config import SERVER_URL
|
||||||
cfg = json.load(f)
|
return SERVER_URL
|
||||||
host = cfg.get("server_host", "").strip()
|
except ImportError:
|
||||||
if host:
|
pass
|
||||||
return host
|
|
||||||
except (OSError, json.JSONDecodeError):
|
# 2. Variable d'environnement directe
|
||||||
continue
|
url = os.environ.get("RPA_SERVER_URL", "").strip().rstrip("/")
|
||||||
|
if url:
|
||||||
|
return url
|
||||||
|
|
||||||
# 3. Fallback
|
# 3. Fallback
|
||||||
return "localhost"
|
return "http://localhost:5005/api/v1"
|
||||||
|
|
||||||
|
|
||||||
|
def _get_server_base(server_url: str) -> str:
|
||||||
|
"""Extraire la base URL (sans /api/v1) pour les routes racine (/health)."""
|
||||||
|
return server_url.rsplit("/api/v1", 1)[0]
|
||||||
|
|
||||||
|
|
||||||
class LeaServerClient:
|
class LeaServerClient:
|
||||||
@@ -67,12 +64,23 @@ class LeaServerClient:
|
|||||||
chat_port: int = 5004,
|
chat_port: int = 5004,
|
||||||
stream_port: int = 5005,
|
stream_port: int = 5005,
|
||||||
) -> None:
|
) -> None:
|
||||||
self._host = server_host or _get_server_host()
|
# URL unifiée : SERVER_URL contient TOUJOURS /api/v1 (convention INC-1).
|
||||||
|
# _stream_url = URL avec /api/v1 (pour les routes API)
|
||||||
|
# _stream_base = URL sans /api/v1 (pour /health uniquement)
|
||||||
|
self._stream_url = _get_server_url()
|
||||||
|
self._stream_base = _get_server_base(self._stream_url)
|
||||||
|
|
||||||
|
# Extraire le host depuis l'URL pour le chat et pour l'affichage
|
||||||
|
try:
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
parsed = urlparse(self._stream_base)
|
||||||
|
self._host = parsed.hostname or "localhost"
|
||||||
|
except Exception:
|
||||||
|
self._host = server_host or "localhost"
|
||||||
|
|
||||||
self._chat_port = chat_port
|
self._chat_port = chat_port
|
||||||
self._stream_port = stream_port
|
self._stream_port = stream_port
|
||||||
|
|
||||||
self._chat_base = f"http://{self._host}:{self._chat_port}"
|
self._chat_base = f"http://{self._host}:{self._chat_port}"
|
||||||
self._stream_base = f"http://{self._host}:{self._stream_port}"
|
|
||||||
|
|
||||||
# Etat de connexion
|
# Etat de connexion
|
||||||
self._connected = False
|
self._connected = False
|
||||||
@@ -95,8 +103,8 @@ class LeaServerClient:
|
|||||||
self._api_token = os.environ.get("RPA_API_TOKEN", "")
|
self._api_token = os.environ.get("RPA_API_TOKEN", "")
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"LeaServerClient initialise : chat=%s, stream=%s",
|
"LeaServerClient initialise : chat=%s, stream_url=%s, stream_base=%s",
|
||||||
self._chat_base, self._stream_base,
|
self._chat_base, self._stream_url, self._stream_base,
|
||||||
)
|
)
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -146,7 +154,11 @@ class LeaServerClient:
|
|||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
def check_connection(self) -> bool:
|
def check_connection(self) -> bool:
|
||||||
"""Tester la connexion au serveur streaming (port 5005)."""
|
"""Tester la connexion au serveur streaming (port 5005).
|
||||||
|
|
||||||
|
Le health check utilise _stream_base (sans /api/v1) car la route
|
||||||
|
/health est a la racine du serveur FastAPI, pas sous /api/v1.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
import requests
|
import requests
|
||||||
resp = requests.get(
|
resp = requests.get(
|
||||||
@@ -219,7 +231,7 @@ class LeaServerClient:
|
|||||||
import requests
|
import requests
|
||||||
headers = self._auth_headers()
|
headers = self._auth_headers()
|
||||||
resp = requests.get(
|
resp = requests.get(
|
||||||
f"{self._stream_base}/api/v1/traces/stream/workflows",
|
f"{self._stream_url}/traces/stream/workflows",
|
||||||
headers=headers,
|
headers=headers,
|
||||||
timeout=10,
|
timeout=10,
|
||||||
)
|
)
|
||||||
@@ -276,7 +288,7 @@ class LeaServerClient:
|
|||||||
while self._polling:
|
while self._polling:
|
||||||
try:
|
try:
|
||||||
resp = req_lib.get(
|
resp = req_lib.get(
|
||||||
f"{self._stream_base}/api/v1/traces/stream/replay/next",
|
f"{self._stream_url}/traces/stream/replay/next",
|
||||||
params={"session_id": self._poll_session_id},
|
params={"session_id": self._poll_session_id},
|
||||||
headers=self._auth_headers(),
|
headers=self._auth_headers(),
|
||||||
timeout=5,
|
timeout=5,
|
||||||
@@ -310,7 +322,7 @@ class LeaServerClient:
|
|||||||
try:
|
try:
|
||||||
import requests
|
import requests
|
||||||
resp = requests.get(
|
resp = requests.get(
|
||||||
f"{self._stream_base}/api/v1/traces/stream/replays",
|
f"{self._stream_url}/traces/stream/replays",
|
||||||
headers=self._auth_headers(),
|
headers=self._auth_headers(),
|
||||||
timeout=5,
|
timeout=5,
|
||||||
)
|
)
|
||||||
@@ -338,7 +350,7 @@ class LeaServerClient:
|
|||||||
try:
|
try:
|
||||||
import requests
|
import requests
|
||||||
requests.post(
|
requests.post(
|
||||||
f"{self._stream_base}/api/v1/traces/stream/replay/result",
|
f"{self._stream_url}/traces/stream/replay/result",
|
||||||
json={
|
json={
|
||||||
"session_id": session_id,
|
"session_id": session_id,
|
||||||
"action_id": action_id,
|
"action_id": action_id,
|
||||||
|
|||||||
@@ -1,12 +1,97 @@
|
|||||||
# run_agent_v1.py
|
# run_agent_v1.py
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
import atexit
|
||||||
|
|
||||||
# Ajout du répertoire courant au PYTHONPATH pour permettre les imports de modules
|
# Ajout du répertoire courant au PYTHONPATH pour permettre les imports de modules
|
||||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
if current_dir not in sys.path:
|
if current_dir not in sys.path:
|
||||||
sys.path.append(current_dir)
|
sys.path.append(current_dir)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
# Verrou PID — empêche le lancement de plusieurs instances
|
||||||
|
# Même si Lea.bat est double-cliqué ou lancé deux fois,
|
||||||
|
# un seul agent tourne à la fois (defense-in-depth).
|
||||||
|
# ---------------------------------------------------------------
|
||||||
|
LOCK_FILE = os.path.join(current_dir, "lea_agent.lock")
|
||||||
|
|
||||||
|
|
||||||
|
def _pid_is_alive(pid: int) -> bool:
|
||||||
|
"""Vérifie si un processus avec ce PID existe encore (Windows + Unix)."""
|
||||||
|
if sys.platform == "win32":
|
||||||
|
try:
|
||||||
|
import ctypes
|
||||||
|
kernel32 = ctypes.windll.kernel32 # type: ignore[attr-defined]
|
||||||
|
PROCESS_QUERY_LIMITED_INFORMATION = 0x1000
|
||||||
|
handle = kernel32.OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, False, pid)
|
||||||
|
if handle:
|
||||||
|
kernel32.CloseHandle(handle)
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
except Exception:
|
||||||
|
# Fallback : tasklist
|
||||||
|
try:
|
||||||
|
import subprocess
|
||||||
|
result = subprocess.run(
|
||||||
|
["tasklist", "/FI", f"PID eq {pid}", "/NH"],
|
||||||
|
capture_output=True, text=True, timeout=5,
|
||||||
|
)
|
||||||
|
return str(pid) in result.stdout
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
# Unix/Linux — os.kill(pid, 0) ne tue pas le process
|
||||||
|
try:
|
||||||
|
os.kill(pid, 0)
|
||||||
|
return True
|
||||||
|
except (OSError, ProcessLookupError):
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _acquire_lock() -> bool:
|
||||||
|
"""Tente d'acquérir le verrou PID. Retourne False si une autre instance tourne."""
|
||||||
|
my_pid = os.getpid()
|
||||||
|
|
||||||
|
# Lire le PID existant
|
||||||
|
if os.path.isfile(LOCK_FILE):
|
||||||
|
try:
|
||||||
|
with open(LOCK_FILE, "r", encoding="utf-8") as f:
|
||||||
|
old_pid = int(f.read().strip())
|
||||||
|
# Le PID dans le lock est-il encore vivant ?
|
||||||
|
if old_pid != my_pid and _pid_is_alive(old_pid):
|
||||||
|
return False # Une autre instance tourne déjà
|
||||||
|
except (ValueError, OSError):
|
||||||
|
pass # Fichier corrompu — on l'écrase
|
||||||
|
|
||||||
|
# Écrire notre PID
|
||||||
|
try:
|
||||||
|
with open(LOCK_FILE, "w", encoding="utf-8") as f:
|
||||||
|
f.write(str(my_pid))
|
||||||
|
except OSError:
|
||||||
|
pass # Pas bloquant — on continue sans lock
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def _release_lock():
|
||||||
|
"""Supprime le fichier lock au shutdown."""
|
||||||
|
try:
|
||||||
|
if os.path.isfile(LOCK_FILE):
|
||||||
|
with open(LOCK_FILE, "r", encoding="utf-8") as f:
|
||||||
|
stored_pid = int(f.read().strip())
|
||||||
|
# Ne supprimer que si c'est bien NOTRE lock
|
||||||
|
if stored_pid == os.getpid():
|
||||||
|
os.remove(LOCK_FILE)
|
||||||
|
except (ValueError, OSError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# Vérification du lock AVANT toute initialisation lourde
|
||||||
|
if not _acquire_lock():
|
||||||
|
# Une autre instance de Léa tourne déjà — on quitte silencieusement
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
atexit.register(_release_lock)
|
||||||
|
|
||||||
# Charger config.txt et .env comme variables d'environnement
|
# Charger config.txt et .env comme variables d'environnement
|
||||||
# (équivalent du `set` dans Lea.bat, mais fonctionne aussi sans le .bat)
|
# (équivalent du `set` dans Lea.bat, mais fonctionne aussi sans le .bat)
|
||||||
for config_file in ("config.txt", ".env"):
|
for config_file in ("config.txt", ".env"):
|
||||||
@@ -32,7 +117,7 @@ logging.basicConfig(
|
|||||||
level=logging.INFO,
|
level=logging.INFO,
|
||||||
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
|
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
|
||||||
)
|
)
|
||||||
logging.info("=== Agent V1 démarrage — config chargée ===")
|
logging.info("=== Agent V1 démarrage — config chargée (PID %d) ===", os.getpid())
|
||||||
logging.info("RPA_SERVER_URL=%s", os.environ.get("RPA_SERVER_URL", "(non défini)"))
|
logging.info("RPA_SERVER_URL=%s", os.environ.get("RPA_SERVER_URL", "(non défini)"))
|
||||||
logging.info("RPA_SERVER_HOST=%s", os.environ.get("RPA_SERVER_HOST", "(non défini)"))
|
logging.info("RPA_SERVER_HOST=%s", os.environ.get("RPA_SERVER_HOST", "(non défini)"))
|
||||||
logging.info("RPA_API_TOKEN=%s", os.environ.get("RPA_API_TOKEN", "(non défini)")[:8] + "...")
|
logging.info("RPA_API_TOKEN=%s", os.environ.get("RPA_API_TOKEN", "(non défini)")[:8] + "...")
|
||||||
|
|||||||
296
agent_v0/server_v1/agent_registry.py
Normal file
296
agent_v0/server_v1/agent_registry.py
Normal file
@@ -0,0 +1,296 @@
|
|||||||
|
# agent_v0/server_v1/agent_registry.py
|
||||||
|
"""
|
||||||
|
Registre des agents Lea enrolles sur le parc.
|
||||||
|
|
||||||
|
Alimente par les endpoints /api/v1/agents/enroll et /api/v1/agents/uninstall
|
||||||
|
que l'installeur Inno Setup (`deploy/installer/Lea.iss`) appelle a
|
||||||
|
l'installation et a la desinstallation sur chaque poste collaborateur.
|
||||||
|
|
||||||
|
Stockage : SQLite simple, cohabite avec rpa_data.db dans data/databases/.
|
||||||
|
Aucune dependance GPU/LLM — ce module doit rester leger (juste sqlite3 +
|
||||||
|
stdlib) pour pouvoir etre importe par le serveur HTTP.
|
||||||
|
|
||||||
|
Schema de la table `enrolled_agents` :
|
||||||
|
id INTEGER PK AUTOINCREMENT
|
||||||
|
machine_id TEXT UNIQUE NOT NULL — identifiant genere par l'installeur
|
||||||
|
user_name TEXT — nom affichage collaborateur
|
||||||
|
user_email TEXT
|
||||||
|
user_id TEXT — identifiant metier (ex: AIVA-001)
|
||||||
|
hostname TEXT
|
||||||
|
os_info TEXT
|
||||||
|
version TEXT — version du client Lea
|
||||||
|
status TEXT DEFAULT 'active' — 'active' | 'uninstalled'
|
||||||
|
enrolled_at TEXT NOT NULL — ISO 8601 UTC
|
||||||
|
last_seen_at TEXT — ISO 8601 UTC (heartbeat / stream)
|
||||||
|
uninstalled_at TEXT
|
||||||
|
uninstall_reason TEXT
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import sqlite3
|
||||||
|
import threading
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Verrou global : SQLite tolere plusieurs threads mais on serialise
|
||||||
|
# les ecritures pour eviter les races sur _init_db + upserts concurrents.
|
||||||
|
_DB_LOCK = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
|
def _utc_now_iso() -> str:
|
||||||
|
"""Horodatage ISO 8601 UTC (compatible toutes les autres tables)."""
|
||||||
|
return datetime.now(timezone.utc).isoformat()
|
||||||
|
|
||||||
|
|
||||||
|
class AgentRegistry:
|
||||||
|
"""Gestion CRUD des agents enrolles (SQLite)."""
|
||||||
|
|
||||||
|
def __init__(self, db_path: str | Path = "data/databases/rpa_data.db"):
|
||||||
|
self.db_path = Path(db_path)
|
||||||
|
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
self._init_db()
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Infra SQLite
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _connect(self) -> sqlite3.Connection:
|
||||||
|
# check_same_thread=False : on protege nous-memes via _DB_LOCK,
|
||||||
|
# indispensable car FastAPI appelle les endpoints sur threads
|
||||||
|
# differents (thread pool).
|
||||||
|
conn = sqlite3.connect(str(self.db_path), check_same_thread=False)
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
conn.execute("PRAGMA journal_mode=WAL")
|
||||||
|
conn.execute("PRAGMA foreign_keys=ON")
|
||||||
|
return conn
|
||||||
|
|
||||||
|
def _init_db(self) -> None:
|
||||||
|
"""Cree la table et ses index si absents (idempotent)."""
|
||||||
|
with _DB_LOCK, self._connect() as conn:
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
CREATE TABLE IF NOT EXISTS enrolled_agents (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
machine_id TEXT NOT NULL UNIQUE,
|
||||||
|
user_name TEXT,
|
||||||
|
user_email TEXT,
|
||||||
|
user_id TEXT,
|
||||||
|
hostname TEXT,
|
||||||
|
os_info TEXT,
|
||||||
|
version TEXT,
|
||||||
|
status TEXT NOT NULL DEFAULT 'active',
|
||||||
|
enrolled_at TEXT NOT NULL,
|
||||||
|
last_seen_at TEXT,
|
||||||
|
uninstalled_at TEXT,
|
||||||
|
uninstall_reason TEXT
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
conn.execute(
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_enrolled_agents_status "
|
||||||
|
"ON enrolled_agents(status)"
|
||||||
|
)
|
||||||
|
conn.execute(
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_enrolled_agents_machine "
|
||||||
|
"ON enrolled_agents(machine_id)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Lecture
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def get(self, machine_id: str) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Recupere un agent par machine_id (ou None)."""
|
||||||
|
with _DB_LOCK, self._connect() as conn:
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT * FROM enrolled_agents WHERE machine_id = ?",
|
||||||
|
(machine_id,),
|
||||||
|
).fetchone()
|
||||||
|
return dict(row) if row else None
|
||||||
|
|
||||||
|
def list_by_status(self, status: str) -> List[Dict[str, Any]]:
|
||||||
|
"""Liste les agents par statut ('active' | 'uninstalled')."""
|
||||||
|
with _DB_LOCK, self._connect() as conn:
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT * FROM enrolled_agents WHERE status = ? "
|
||||||
|
"ORDER BY enrolled_at DESC",
|
||||||
|
(status,),
|
||||||
|
).fetchall()
|
||||||
|
return [dict(r) for r in rows]
|
||||||
|
|
||||||
|
def count_by_status(self, status: str) -> int:
|
||||||
|
with _DB_LOCK, self._connect() as conn:
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT COUNT(*) AS n FROM enrolled_agents WHERE status = ?",
|
||||||
|
(status,),
|
||||||
|
).fetchone()
|
||||||
|
return int(row["n"]) if row else 0
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Ecriture
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def enroll(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
machine_id: str,
|
||||||
|
user_name: str | None = None,
|
||||||
|
user_email: str | None = None,
|
||||||
|
user_id: str | None = None,
|
||||||
|
hostname: str | None = None,
|
||||||
|
os_info: str | None = None,
|
||||||
|
version: str | None = None,
|
||||||
|
allow_reactivate: bool = True,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Enregistre un nouvel agent ou reactive un agent desinstalle.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict avec clefs {"created": bool, "reactivated": bool, "agent": row}
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: si machine_id est vide.
|
||||||
|
AgentAlreadyEnrolledError: si deja actif (status=active).
|
||||||
|
"""
|
||||||
|
if not machine_id or not machine_id.strip():
|
||||||
|
raise ValueError("machine_id est obligatoire")
|
||||||
|
machine_id = machine_id.strip()
|
||||||
|
|
||||||
|
now = _utc_now_iso()
|
||||||
|
|
||||||
|
with _DB_LOCK, self._connect() as conn:
|
||||||
|
existing = conn.execute(
|
||||||
|
"SELECT * FROM enrolled_agents WHERE machine_id = ?",
|
||||||
|
(machine_id,),
|
||||||
|
).fetchone()
|
||||||
|
|
||||||
|
if existing is not None:
|
||||||
|
if existing["status"] == "active":
|
||||||
|
# Deja enrolle et actif -> conflit explicit
|
||||||
|
raise AgentAlreadyEnrolledError(dict(existing))
|
||||||
|
|
||||||
|
# Agent desinstalle : reactivation si autorise (defaut)
|
||||||
|
if not allow_reactivate:
|
||||||
|
raise AgentAlreadyEnrolledError(dict(existing))
|
||||||
|
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
UPDATE enrolled_agents
|
||||||
|
SET user_name = COALESCE(?, user_name),
|
||||||
|
user_email = COALESCE(?, user_email),
|
||||||
|
user_id = COALESCE(?, user_id),
|
||||||
|
hostname = COALESCE(?, hostname),
|
||||||
|
os_info = COALESCE(?, os_info),
|
||||||
|
version = COALESCE(?, version),
|
||||||
|
status = 'active',
|
||||||
|
enrolled_at = ?,
|
||||||
|
last_seen_at = ?,
|
||||||
|
uninstalled_at = NULL,
|
||||||
|
uninstall_reason = NULL
|
||||||
|
WHERE machine_id = ?
|
||||||
|
""",
|
||||||
|
(
|
||||||
|
user_name, user_email, user_id,
|
||||||
|
hostname, os_info, version,
|
||||||
|
now, now, machine_id,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT * FROM enrolled_agents WHERE machine_id = ?",
|
||||||
|
(machine_id,),
|
||||||
|
).fetchone()
|
||||||
|
return {"created": False, "reactivated": True, "agent": dict(row)}
|
||||||
|
|
||||||
|
# Nouvelle inscription
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
INSERT INTO enrolled_agents (
|
||||||
|
machine_id, user_name, user_email, user_id,
|
||||||
|
hostname, os_info, version,
|
||||||
|
status, enrolled_at, last_seen_at
|
||||||
|
) VALUES (?, ?, ?, ?, ?, ?, ?, 'active', ?, ?)
|
||||||
|
""",
|
||||||
|
(
|
||||||
|
machine_id, user_name, user_email, user_id,
|
||||||
|
hostname, os_info, version,
|
||||||
|
now, now,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT * FROM enrolled_agents WHERE machine_id = ?",
|
||||||
|
(machine_id,),
|
||||||
|
).fetchone()
|
||||||
|
return {"created": True, "reactivated": False, "agent": dict(row)}
|
||||||
|
|
||||||
|
def uninstall(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
machine_id: str,
|
||||||
|
reason: str | None = None,
|
||||||
|
) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Marque un agent comme desinstalle (soft delete).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Le row mis a jour, ou None si l'agent n'existe pas.
|
||||||
|
"""
|
||||||
|
if not machine_id or not machine_id.strip():
|
||||||
|
raise ValueError("machine_id est obligatoire")
|
||||||
|
machine_id = machine_id.strip()
|
||||||
|
|
||||||
|
now = _utc_now_iso()
|
||||||
|
with _DB_LOCK, self._connect() as conn:
|
||||||
|
existing = conn.execute(
|
||||||
|
"SELECT * FROM enrolled_agents WHERE machine_id = ?",
|
||||||
|
(machine_id,),
|
||||||
|
).fetchone()
|
||||||
|
if existing is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
UPDATE enrolled_agents
|
||||||
|
SET status = 'uninstalled',
|
||||||
|
uninstalled_at = ?,
|
||||||
|
uninstall_reason = ?
|
||||||
|
WHERE machine_id = ?
|
||||||
|
""",
|
||||||
|
(now, reason, machine_id),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT * FROM enrolled_agents WHERE machine_id = ?",
|
||||||
|
(machine_id,),
|
||||||
|
).fetchone()
|
||||||
|
return dict(row)
|
||||||
|
|
||||||
|
def touch_last_seen(self, machine_id: str) -> None:
|
||||||
|
"""Met a jour last_seen_at (appel depuis le stream / heartbeat).
|
||||||
|
|
||||||
|
Silencieux si l'agent est inconnu (evite les erreurs sur vieux clients).
|
||||||
|
"""
|
||||||
|
if not machine_id:
|
||||||
|
return
|
||||||
|
now = _utc_now_iso()
|
||||||
|
with _DB_LOCK, self._connect() as conn:
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE enrolled_agents SET last_seen_at = ? WHERE machine_id = ?",
|
||||||
|
(now, machine_id),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
class AgentAlreadyEnrolledError(Exception):
|
||||||
|
"""Levee si on tente d'enrouler une machine deja active."""
|
||||||
|
|
||||||
|
def __init__(self, existing_row: Dict[str, Any]):
|
||||||
|
self.existing = existing_row
|
||||||
|
super().__init__(
|
||||||
|
f"machine_id={existing_row.get('machine_id')} deja enrole "
|
||||||
|
f"(status={existing_row.get('status')})"
|
||||||
|
)
|
||||||
File diff suppressed because it is too large
Load Diff
622
agent_v0/server_v1/chat_interface.py
Normal file
622
agent_v0/server_v1/chat_interface.py
Normal file
@@ -0,0 +1,622 @@
|
|||||||
|
"""
|
||||||
|
ChatInterface — Interface de chat conversationnelle pour Léa.
|
||||||
|
|
||||||
|
Permet au TIM (Technicien Information Médicale) de parler à Léa en langage
|
||||||
|
naturel :
|
||||||
|
- "Ouvre le Bloc-notes et écris bonjour"
|
||||||
|
- Léa comprend (TaskPlanner) et propose un plan
|
||||||
|
- Le TIM confirme (ou refuse)
|
||||||
|
- Léa exécute (replay) et envoie des updates de progression
|
||||||
|
- Historique conversationnel conservé par session
|
||||||
|
|
||||||
|
C'est une couche LÉGÈRE au-dessus du TaskPlanner. Toute la logique de
|
||||||
|
compréhension reste dans TaskPlanner — ChatInterface gère uniquement
|
||||||
|
l'état conversationnel, la confirmation et le suivi d'exécution.
|
||||||
|
|
||||||
|
États de la session :
|
||||||
|
idle → en attente d'un message
|
||||||
|
planning → TaskPlanner.understand() en cours
|
||||||
|
awaiting_confirmation → plan prêt, attend la confirmation du TIM
|
||||||
|
executing → replay en cours
|
||||||
|
done → dernier tour terminé (retour à idle au prochain message)
|
||||||
|
error → erreur interne (instruction non comprise, exception…)
|
||||||
|
|
||||||
|
Langue : 100% français (c'est l'interface utilisateur).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any, Callable, Dict, List, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# États
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
STATE_IDLE = "idle"
|
||||||
|
STATE_PLANNING = "planning"
|
||||||
|
STATE_AWAITING_CONFIRMATION = "awaiting_confirmation"
|
||||||
|
STATE_EXECUTING = "executing"
|
||||||
|
STATE_DONE = "done"
|
||||||
|
STATE_ERROR = "error"
|
||||||
|
|
||||||
|
VALID_STATES = {
|
||||||
|
STATE_IDLE,
|
||||||
|
STATE_PLANNING,
|
||||||
|
STATE_AWAITING_CONFIRMATION,
|
||||||
|
STATE_EXECUTING,
|
||||||
|
STATE_DONE,
|
||||||
|
STATE_ERROR,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Rôles de messages
|
||||||
|
ROLE_USER = "user"
|
||||||
|
ROLE_LEA = "lea"
|
||||||
|
ROLE_SYSTEM = "system"
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Message
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ChatMessage:
|
||||||
|
"""Un message dans l'historique d'une conversation."""
|
||||||
|
role: str # "user", "lea", "system"
|
||||||
|
content: str # Texte du message
|
||||||
|
timestamp: float = field(default_factory=time.time)
|
||||||
|
# Données contextuelles optionnelles (plan, résultat, progression…)
|
||||||
|
meta: Dict[str, Any] = field(default_factory=dict)
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"role": self.role,
|
||||||
|
"content": self.content,
|
||||||
|
"timestamp": self.timestamp,
|
||||||
|
"meta": self.meta,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# ChatSession
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
class ChatSession:
|
||||||
|
"""Une conversation entre un utilisateur et Léa.
|
||||||
|
|
||||||
|
Maintient l'historique, l'état courant, et le dernier plan en attente
|
||||||
|
de confirmation. Thread-safe (un lock par session).
|
||||||
|
|
||||||
|
Dépendances injectées (pour tester facilement) :
|
||||||
|
- task_planner : instance de TaskPlanner (ou mock)
|
||||||
|
- workflows_provider : callable () -> List[Dict] (liste des workflows)
|
||||||
|
- replay_callback : callable (session_id, machine_id, params) -> replay_id
|
||||||
|
- status_provider : callable (replay_id) -> Dict (pour suivre l'exécution)
|
||||||
|
|
||||||
|
Toutes ces dépendances sont optionnelles : ChatSession dégrade
|
||||||
|
gracieusement (fallback) si gemma4 / replay indisponibles.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
session_id: str = "",
|
||||||
|
task_planner: Any = None,
|
||||||
|
workflows_provider: Optional[Callable[[], List[Dict[str, Any]]]] = None,
|
||||||
|
replay_callback: Optional[Callable[..., str]] = None,
|
||||||
|
status_provider: Optional[Callable[[str], Dict[str, Any]]] = None,
|
||||||
|
machine_id: str = "default",
|
||||||
|
):
|
||||||
|
self.session_id = session_id or f"chat_{uuid.uuid4().hex[:12]}"
|
||||||
|
self.machine_id = machine_id
|
||||||
|
self.created_at = time.time()
|
||||||
|
self.updated_at = self.created_at
|
||||||
|
|
||||||
|
self._task_planner = task_planner
|
||||||
|
self._workflows_provider = workflows_provider
|
||||||
|
self._replay_callback = replay_callback
|
||||||
|
self._status_provider = status_provider
|
||||||
|
|
||||||
|
self._state: str = STATE_IDLE
|
||||||
|
self._messages: List[ChatMessage] = []
|
||||||
|
self._pending_plan: Any = None # TaskPlan en attente de confirmation
|
||||||
|
self._active_replay_id: str = "" # Replay courant (si executing)
|
||||||
|
self._last_progress: Dict[str, Any] = {}
|
||||||
|
|
||||||
|
self._lock = threading.RLock()
|
||||||
|
|
||||||
|
# Message d'accueil
|
||||||
|
self._append(
|
||||||
|
ROLE_LEA,
|
||||||
|
"Bonjour ! Je suis Léa. Dites-moi ce que vous voulez que je fasse.",
|
||||||
|
meta={"welcome": True},
|
||||||
|
)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# Accesseurs
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
|
||||||
|
@property
|
||||||
|
def state(self) -> str:
|
||||||
|
with self._lock:
|
||||||
|
return self._state
|
||||||
|
|
||||||
|
def get_history(self) -> List[Dict[str, Any]]:
|
||||||
|
"""Retourne l'historique complet des messages (sérialisé)."""
|
||||||
|
with self._lock:
|
||||||
|
return [m.to_dict() for m in self._messages]
|
||||||
|
|
||||||
|
def get_snapshot(self) -> Dict[str, Any]:
|
||||||
|
"""État complet pour l'UI (historique + état + progression)."""
|
||||||
|
with self._lock:
|
||||||
|
return {
|
||||||
|
"session_id": self.session_id,
|
||||||
|
"state": self._state,
|
||||||
|
"machine_id": self.machine_id,
|
||||||
|
"created_at": self.created_at,
|
||||||
|
"updated_at": self.updated_at,
|
||||||
|
"messages": [m.to_dict() for m in self._messages],
|
||||||
|
"pending_plan": (
|
||||||
|
self._pending_plan.to_dict()
|
||||||
|
if self._pending_plan is not None
|
||||||
|
else None
|
||||||
|
),
|
||||||
|
"active_replay_id": self._active_replay_id,
|
||||||
|
"progress": dict(self._last_progress),
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# API publique
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
|
||||||
|
def send_message(self, text: str) -> Dict[str, Any]:
|
||||||
|
"""Envoyer un message utilisateur.
|
||||||
|
|
||||||
|
Trois cas possibles selon l'état courant :
|
||||||
|
1. awaiting_confirmation → c'est une réponse OUI/NON
|
||||||
|
2. executing → on rafraîchit la progression
|
||||||
|
3. idle/done/error → nouvelle instruction, on appelle TaskPlanner
|
||||||
|
"""
|
||||||
|
text = (text or "").strip()
|
||||||
|
if not text:
|
||||||
|
return {
|
||||||
|
"ok": False,
|
||||||
|
"error": "Message vide",
|
||||||
|
"state": self._state,
|
||||||
|
}
|
||||||
|
|
||||||
|
with self._lock:
|
||||||
|
# Cas 1 : on attend une confirmation
|
||||||
|
if self._state == STATE_AWAITING_CONFIRMATION:
|
||||||
|
return self._handle_confirmation_reply(text)
|
||||||
|
|
||||||
|
# Cas 2 : en pleine exécution → message ajouté mais pas d'action
|
||||||
|
if self._state == STATE_EXECUTING:
|
||||||
|
self._append(ROLE_USER, text)
|
||||||
|
self._append(
|
||||||
|
ROLE_LEA,
|
||||||
|
"Je suis en train d'exécuter le workflow. Un instant…",
|
||||||
|
)
|
||||||
|
return {"ok": True, "state": self._state}
|
||||||
|
|
||||||
|
# Cas 3 : nouvelle instruction
|
||||||
|
self._append(ROLE_USER, text)
|
||||||
|
self._set_state(STATE_PLANNING)
|
||||||
|
|
||||||
|
# Appel TaskPlanner hors du lock (peut être lent : gemma4)
|
||||||
|
return self._plan_and_reply(text)
|
||||||
|
|
||||||
|
def confirm(self, confirmed: bool = True) -> Dict[str, Any]:
|
||||||
|
"""Confirmer (ou refuser) l'exécution du plan en attente."""
|
||||||
|
with self._lock:
|
||||||
|
if self._state != STATE_AWAITING_CONFIRMATION:
|
||||||
|
return {
|
||||||
|
"ok": False,
|
||||||
|
"error": f"Pas de plan en attente (état={self._state})",
|
||||||
|
"state": self._state,
|
||||||
|
}
|
||||||
|
|
||||||
|
if not confirmed:
|
||||||
|
self._append(
|
||||||
|
ROLE_LEA,
|
||||||
|
"D'accord, j'annule. Dites-moi autre chose quand vous voulez.",
|
||||||
|
)
|
||||||
|
self._pending_plan = None
|
||||||
|
self._set_state(STATE_IDLE)
|
||||||
|
return {"ok": True, "state": self._state, "confirmed": False}
|
||||||
|
|
||||||
|
plan = self._pending_plan
|
||||||
|
if plan is None:
|
||||||
|
self._set_state(STATE_IDLE)
|
||||||
|
return {
|
||||||
|
"ok": False,
|
||||||
|
"error": "Aucun plan à confirmer",
|
||||||
|
"state": self._state,
|
||||||
|
}
|
||||||
|
|
||||||
|
self._set_state(STATE_EXECUTING)
|
||||||
|
|
||||||
|
# Exécution hors du lock
|
||||||
|
return self._execute_plan(plan)
|
||||||
|
|
||||||
|
def refresh_progress(self) -> Dict[str, Any]:
|
||||||
|
"""Rafraîchir la progression du replay en cours.
|
||||||
|
|
||||||
|
Appelé par le client (polling) pour obtenir les updates d'exécution.
|
||||||
|
Si le replay est terminé, passe l'état à done.
|
||||||
|
"""
|
||||||
|
with self._lock:
|
||||||
|
if self._state != STATE_EXECUTING or not self._active_replay_id:
|
||||||
|
return {"ok": True, "state": self._state, "progress": self._last_progress}
|
||||||
|
|
||||||
|
replay_id = self._active_replay_id
|
||||||
|
provider = self._status_provider
|
||||||
|
|
||||||
|
if provider is None:
|
||||||
|
return {"ok": True, "state": self._state, "progress": {}}
|
||||||
|
|
||||||
|
try:
|
||||||
|
status = provider(replay_id) or {}
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"ChatSession: status_provider erreur: {e}")
|
||||||
|
status = {}
|
||||||
|
|
||||||
|
with self._lock:
|
||||||
|
self._last_progress = status
|
||||||
|
self.updated_at = time.time()
|
||||||
|
|
||||||
|
# Détection de fin
|
||||||
|
replay_status = str(status.get("status", "")).lower()
|
||||||
|
completed = status.get("completed_actions", 0)
|
||||||
|
total = status.get("total_actions", 0)
|
||||||
|
|
||||||
|
if replay_status in ("done", "completed", "finished", "success"):
|
||||||
|
summary = (
|
||||||
|
f"Workflow terminé ! {completed}/{total} actions réussies."
|
||||||
|
if total
|
||||||
|
else "Workflow terminé."
|
||||||
|
)
|
||||||
|
self._append(ROLE_LEA, summary, meta={"progress": dict(status)})
|
||||||
|
self._set_state(STATE_DONE)
|
||||||
|
self._active_replay_id = ""
|
||||||
|
elif replay_status in ("failed", "error", "aborted"):
|
||||||
|
err = status.get("error") or status.get("message") or "Erreur inconnue"
|
||||||
|
self._append(
|
||||||
|
ROLE_LEA,
|
||||||
|
f"Le workflow a échoué : {err}",
|
||||||
|
meta={"progress": dict(status)},
|
||||||
|
)
|
||||||
|
self._set_state(STATE_ERROR)
|
||||||
|
self._active_replay_id = ""
|
||||||
|
elif replay_status == "paused_need_help":
|
||||||
|
self._append(
|
||||||
|
ROLE_LEA,
|
||||||
|
"Je suis bloquée sur une action, j'ai besoin d'aide…",
|
||||||
|
meta={"progress": dict(status)},
|
||||||
|
)
|
||||||
|
# on reste en executing pour que le TIM puisse reprendre
|
||||||
|
# else : toujours en cours, pas de message
|
||||||
|
|
||||||
|
return {
|
||||||
|
"ok": True,
|
||||||
|
"state": self._state,
|
||||||
|
"progress": dict(self._last_progress),
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# Logique interne
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _plan_and_reply(self, instruction: str) -> Dict[str, Any]:
|
||||||
|
"""Appeler TaskPlanner.understand() et produire une réponse."""
|
||||||
|
plan = None
|
||||||
|
error_msg = ""
|
||||||
|
|
||||||
|
if self._task_planner is None:
|
||||||
|
error_msg = "Planificateur indisponible"
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
workflows = []
|
||||||
|
if self._workflows_provider is not None:
|
||||||
|
try:
|
||||||
|
workflows = self._workflows_provider() or []
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"ChatSession: workflows_provider erreur: {e}")
|
||||||
|
workflows = []
|
||||||
|
|
||||||
|
plan = self._task_planner.understand(
|
||||||
|
instruction=instruction,
|
||||||
|
available_workflows=workflows,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"ChatSession: TaskPlanner.understand erreur: {e}")
|
||||||
|
error_msg = f"Erreur de compréhension : {e}"
|
||||||
|
|
||||||
|
# Fallback gracieux si pas de plan / gemma4 indisponible
|
||||||
|
if plan is None:
|
||||||
|
with self._lock:
|
||||||
|
self._append(
|
||||||
|
ROLE_LEA,
|
||||||
|
f"Désolée, je n'arrive pas à comprendre pour l'instant. {error_msg}".strip(),
|
||||||
|
meta={"error": error_msg},
|
||||||
|
)
|
||||||
|
self._set_state(STATE_ERROR)
|
||||||
|
return {
|
||||||
|
"ok": False,
|
||||||
|
"state": self._state,
|
||||||
|
"error": error_msg,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Plan non compris
|
||||||
|
if not plan.understood:
|
||||||
|
reason = plan.error or "je n'ai pas compris votre demande"
|
||||||
|
with self._lock:
|
||||||
|
self._append(
|
||||||
|
ROLE_LEA,
|
||||||
|
(
|
||||||
|
f"Désolée, {reason}. "
|
||||||
|
"Pouvez-vous reformuler ? Je connais les workflows que vous m'avez appris."
|
||||||
|
),
|
||||||
|
meta={"plan": plan.to_dict()},
|
||||||
|
)
|
||||||
|
self._set_state(STATE_ERROR)
|
||||||
|
return {
|
||||||
|
"ok": False,
|
||||||
|
"state": self._state,
|
||||||
|
"plan": plan.to_dict(),
|
||||||
|
"error": reason,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Plan compris → formuler la proposition
|
||||||
|
proposal = self._format_proposal(plan)
|
||||||
|
|
||||||
|
with self._lock:
|
||||||
|
self._pending_plan = plan
|
||||||
|
self._append(ROLE_LEA, proposal, meta={"plan": plan.to_dict()})
|
||||||
|
self._set_state(STATE_AWAITING_CONFIRMATION)
|
||||||
|
return {
|
||||||
|
"ok": True,
|
||||||
|
"state": self._state,
|
||||||
|
"plan": plan.to_dict(),
|
||||||
|
"message": proposal,
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _format_proposal(plan: Any) -> str:
|
||||||
|
"""Formuler une proposition en français à partir d'un TaskPlan."""
|
||||||
|
lines = []
|
||||||
|
lines.append(f"J'ai compris : « {plan.instruction} ».")
|
||||||
|
|
||||||
|
if plan.workflow_name:
|
||||||
|
conf_pct = int(round((plan.match_confidence or 0.0) * 100))
|
||||||
|
lines.append(
|
||||||
|
f"Je vais utiliser le workflow « {plan.workflow_name} »"
|
||||||
|
f" (confiance {conf_pct}%)."
|
||||||
|
)
|
||||||
|
elif plan.mode == "free" and plan.steps:
|
||||||
|
lines.append(
|
||||||
|
f"Je n'ai pas de workflow enregistré pour ça, "
|
||||||
|
f"mais j'ai planifié {len(plan.steps)} étape(s) :"
|
||||||
|
)
|
||||||
|
for i, step in enumerate(plan.steps[:5], 1):
|
||||||
|
desc = step.get("description", "") if isinstance(step, dict) else str(step)
|
||||||
|
lines.append(f" {i}. {desc}")
|
||||||
|
if len(plan.steps) > 5:
|
||||||
|
lines.append(f" … et {len(plan.steps) - 5} autre(s) étape(s).")
|
||||||
|
else:
|
||||||
|
lines.append("Je n'ai pas de plan d'action clair pour cette demande.")
|
||||||
|
|
||||||
|
if plan.parameters:
|
||||||
|
params_str = ", ".join(f"{k}={v}" for k, v in plan.parameters.items())
|
||||||
|
lines.append(f"Paramètres détectés : {params_str}.")
|
||||||
|
|
||||||
|
if plan.is_loop:
|
||||||
|
src = plan.loop_source or "éléments à traiter"
|
||||||
|
lines.append(f"Traitement en boucle sur : {src}.")
|
||||||
|
|
||||||
|
lines.append("")
|
||||||
|
lines.append("Est-ce que je peux y aller ? (oui / non)")
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
def _handle_confirmation_reply(self, text: str) -> Dict[str, Any]:
|
||||||
|
"""Interpréter un message utilisateur comme OUI/NON."""
|
||||||
|
self._append(ROLE_USER, text)
|
||||||
|
yes_tokens = {"oui", "yes", "ok", "y", "go", "vas-y", "allez", "allez-y", "confirme", "confirmer", "continue"}
|
||||||
|
no_tokens = {"non", "no", "annule", "annuler", "stop", "arrête", "arrete", "abandonne", "abandonner"}
|
||||||
|
|
||||||
|
t = text.strip().lower().rstrip("!.?")
|
||||||
|
|
||||||
|
if t in yes_tokens or any(t.startswith(tok + " ") for tok in yes_tokens):
|
||||||
|
# Déverrouiller : sortir du lock avant d'exécuter (confirm re-prend le lock)
|
||||||
|
pass
|
||||||
|
elif t in no_tokens or any(t.startswith(tok + " ") for tok in no_tokens):
|
||||||
|
self._append(
|
||||||
|
ROLE_LEA,
|
||||||
|
"D'accord, j'annule. Dites-moi autre chose quand vous voulez.",
|
||||||
|
)
|
||||||
|
self._pending_plan = None
|
||||||
|
self._set_state(STATE_IDLE)
|
||||||
|
return {"ok": True, "state": self._state, "confirmed": False}
|
||||||
|
else:
|
||||||
|
self._append(
|
||||||
|
ROLE_LEA,
|
||||||
|
"Je n'ai pas compris votre réponse. Répondez « oui » pour lancer ou « non » pour annuler.",
|
||||||
|
)
|
||||||
|
return {"ok": True, "state": self._state, "needs_clarification": True}
|
||||||
|
|
||||||
|
# Libérer le lock pour confirm() qui le re-prendra
|
||||||
|
plan = self._pending_plan
|
||||||
|
self._pending_plan = None
|
||||||
|
self._set_state(STATE_EXECUTING)
|
||||||
|
# Exécution hors du lock (sortie du with bloc appelant)
|
||||||
|
# Note : _handle_confirmation_reply est appelé sous lock via send_message
|
||||||
|
# On ne peut pas appeler _execute_plan ici sans risque de double-lock.
|
||||||
|
# On relâche le lock via une astuce : on retourne un marqueur et send_message
|
||||||
|
# orchestrera. Ici on appelle directement _execute_plan qui utilise RLock,
|
||||||
|
# donc c'est safe (re-entrant).
|
||||||
|
return self._execute_plan(plan)
|
||||||
|
|
||||||
|
def _execute_plan(self, plan: Any) -> Dict[str, Any]:
|
||||||
|
"""Lancer le replay correspondant au plan."""
|
||||||
|
if plan is None:
|
||||||
|
with self._lock:
|
||||||
|
self._append(ROLE_LEA, "Rien à exécuter.", meta={})
|
||||||
|
self._set_state(STATE_IDLE)
|
||||||
|
return {"ok": False, "state": self._state, "error": "Aucun plan"}
|
||||||
|
|
||||||
|
if self._replay_callback is None:
|
||||||
|
with self._lock:
|
||||||
|
self._append(
|
||||||
|
ROLE_LEA,
|
||||||
|
"Je ne peux pas exécuter : aucun moteur d'exécution n'est configuré.",
|
||||||
|
)
|
||||||
|
self._set_state(STATE_ERROR)
|
||||||
|
return {
|
||||||
|
"ok": False,
|
||||||
|
"state": self._state,
|
||||||
|
"error": "replay_callback non configuré",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Annoncer le démarrage
|
||||||
|
with self._lock:
|
||||||
|
self._append(
|
||||||
|
ROLE_LEA,
|
||||||
|
"C'est parti ! Je lance le workflow…",
|
||||||
|
meta={"plan": plan.to_dict()},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Appeler le callback
|
||||||
|
try:
|
||||||
|
if plan.workflow_match:
|
||||||
|
replay_id = self._replay_callback(
|
||||||
|
session_id=plan.workflow_match,
|
||||||
|
machine_id=self.machine_id,
|
||||||
|
params=plan.parameters,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Mode libre : pas encore branché côté chat (on refuse proprement)
|
||||||
|
replay_id = ""
|
||||||
|
raise RuntimeError(
|
||||||
|
"Mode libre non supporté pour l'instant — "
|
||||||
|
"entraînez un workflow pour cette tâche"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
with self._lock:
|
||||||
|
self._append(
|
||||||
|
ROLE_LEA,
|
||||||
|
f"Je n'ai pas pu lancer le workflow : {e}",
|
||||||
|
meta={"error": str(e)},
|
||||||
|
)
|
||||||
|
self._set_state(STATE_ERROR)
|
||||||
|
return {"ok": False, "state": self._state, "error": str(e)}
|
||||||
|
|
||||||
|
with self._lock:
|
||||||
|
self._active_replay_id = replay_id or ""
|
||||||
|
return {
|
||||||
|
"ok": True,
|
||||||
|
"state": self._state,
|
||||||
|
"replay_id": self._active_replay_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
# Helpers
|
||||||
|
# ---------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _append(self, role: str, content: str, meta: Optional[Dict[str, Any]] = None) -> None:
|
||||||
|
"""Ajouter un message à l'historique (doit être appelé sous lock)."""
|
||||||
|
msg = ChatMessage(role=role, content=content, meta=meta or {})
|
||||||
|
self._messages.append(msg)
|
||||||
|
self.updated_at = msg.timestamp
|
||||||
|
|
||||||
|
def _set_state(self, new_state: str) -> None:
|
||||||
|
"""Changer d'état (doit être appelé sous lock)."""
|
||||||
|
if new_state not in VALID_STATES:
|
||||||
|
raise ValueError(f"État invalide : {new_state}")
|
||||||
|
old = self._state
|
||||||
|
self._state = new_state
|
||||||
|
self.updated_at = time.time()
|
||||||
|
if old != new_state:
|
||||||
|
logger.debug(
|
||||||
|
f"ChatSession {self.session_id}: {old} -> {new_state}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# ChatManager — registre en mémoire des sessions
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
class ChatManager:
|
||||||
|
"""Registre en mémoire des sessions de chat.
|
||||||
|
|
||||||
|
Thread-safe. Utilisé par l'API FastAPI pour gérer plusieurs
|
||||||
|
conversations simultanées.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
task_planner: Any = None,
|
||||||
|
workflows_provider: Optional[Callable[[], List[Dict[str, Any]]]] = None,
|
||||||
|
replay_callback: Optional[Callable[..., str]] = None,
|
||||||
|
status_provider: Optional[Callable[[str], Dict[str, Any]]] = None,
|
||||||
|
):
|
||||||
|
self._task_planner = task_planner
|
||||||
|
self._workflows_provider = workflows_provider
|
||||||
|
self._replay_callback = replay_callback
|
||||||
|
self._status_provider = status_provider
|
||||||
|
self._sessions: Dict[str, ChatSession] = {}
|
||||||
|
self._lock = threading.RLock()
|
||||||
|
|
||||||
|
def create_session(self, machine_id: str = "default") -> ChatSession:
|
||||||
|
"""Créer une nouvelle session de chat."""
|
||||||
|
session = ChatSession(
|
||||||
|
task_planner=self._task_planner,
|
||||||
|
workflows_provider=self._workflows_provider,
|
||||||
|
replay_callback=self._replay_callback,
|
||||||
|
status_provider=self._status_provider,
|
||||||
|
machine_id=machine_id,
|
||||||
|
)
|
||||||
|
with self._lock:
|
||||||
|
self._sessions[session.session_id] = session
|
||||||
|
logger.info(f"ChatManager: session créée {session.session_id}")
|
||||||
|
return session
|
||||||
|
|
||||||
|
def get_session(self, session_id: str) -> Optional[ChatSession]:
|
||||||
|
with self._lock:
|
||||||
|
return self._sessions.get(session_id)
|
||||||
|
|
||||||
|
def list_sessions(self) -> List[Dict[str, Any]]:
|
||||||
|
with self._lock:
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"session_id": s.session_id,
|
||||||
|
"state": s.state,
|
||||||
|
"machine_id": s.machine_id,
|
||||||
|
"created_at": s.created_at,
|
||||||
|
"updated_at": s.updated_at,
|
||||||
|
"message_count": len(s.get_history()),
|
||||||
|
}
|
||||||
|
for s in self._sessions.values()
|
||||||
|
]
|
||||||
|
|
||||||
|
def delete_session(self, session_id: str) -> bool:
|
||||||
|
with self._lock:
|
||||||
|
return self._sessions.pop(session_id, None) is not None
|
||||||
|
|
||||||
|
def cleanup_old(self, max_age_s: float = 3600 * 24) -> int:
|
||||||
|
"""Supprimer les sessions inactives depuis max_age_s secondes."""
|
||||||
|
now = time.time()
|
||||||
|
removed = 0
|
||||||
|
with self._lock:
|
||||||
|
to_delete = [
|
||||||
|
sid for sid, s in self._sessions.items()
|
||||||
|
if (now - s.updated_at) > max_age_s
|
||||||
|
]
|
||||||
|
for sid in to_delete:
|
||||||
|
del self._sessions[sid]
|
||||||
|
removed += 1
|
||||||
|
return removed
|
||||||
@@ -3,35 +3,81 @@
|
|||||||
Contexte métier pour les appels VLM — rend Léa experte du domaine.
|
Contexte métier pour les appels VLM — rend Léa experte du domaine.
|
||||||
|
|
||||||
Chaque workflow est associé à un domaine métier (médical, comptable, etc.)
|
Chaque workflow est associé à un domaine métier (médical, comptable, etc.)
|
||||||
qui enrichit TOUS les prompts VLM (Observer, Critic, acteur, enrichissement).
|
qui enrichit TOUS les prompts VLM (Observer, Critic, acteur, enrichissement)
|
||||||
|
ET la personnalité de Léa (résumés, questions de clarification, rapports).
|
||||||
|
|
||||||
Un gemma4 qui sait qu'il regarde un DPI et que l'utilisateur fait du codage
|
Un gemma4 qui sait qu'il regarde un DPI et que l'utilisateur fait du codage
|
||||||
CIM-10 prend des décisions bien meilleures qu'un VLM générique.
|
CIM-10 prend des décisions bien meilleures qu'un VLM générique. Et Léa qui
|
||||||
|
dit "J'ai codé 14 dossiers sur 15" plutôt que "J'ai exécuté 112 clics" est
|
||||||
|
bien plus utile pour un TIM.
|
||||||
|
|
||||||
Premier domaine : TIM (Technicien d'Information Médicale)
|
Domaines pré-configurés :
|
||||||
- Logiciels DPI/DMS (dossier patient informatisé)
|
- tim_codage : TIM, codage CIM-10 / CCAM / PMSI, DPI
|
||||||
- Codage CIM-10 / CCAM / GHM
|
- comptabilite : factures, TVA, OCR, plans comptables
|
||||||
- Lecture de comptes rendus médicaux
|
- rh_paie : fiches de paie, employés, charges sociales
|
||||||
- Validation des séjours / RSS / RSA
|
- stocks_logistique : bons, commandes, réceptions, inventaires
|
||||||
|
- generic : fallback bureautique
|
||||||
|
|
||||||
Usage :
|
Usage basique :
|
||||||
ctx = get_domain_context("tim_codage")
|
ctx = get_domain_context("tim_codage")
|
||||||
prompt = f"{ctx.system_prompt}\n\n{user_prompt}"
|
prompt = ctx.enrich_prompt(user_prompt, role="actor")
|
||||||
|
|
||||||
|
Usage langage métier :
|
||||||
|
ctx = get_domain_context("tim_codage")
|
||||||
|
phrase = ctx.summarize_action("click", {"target": "DP"})
|
||||||
|
# → "saisir le diagnostic principal"
|
||||||
|
|
||||||
|
question = ctx.pose_clarification_question(
|
||||||
|
{"blocked_on": "target_not_found", "target": "Fichier patient",
|
||||||
|
"params": {"nom_patient": "Mme Durand"}}
|
||||||
|
)
|
||||||
|
# → "Je ne trouve pas le dossier de Mme Durand..."
|
||||||
|
|
||||||
|
rapport = ctx.describe_workflow_outcome(
|
||||||
|
workflow_name="Codage séjours janvier",
|
||||||
|
success=True,
|
||||||
|
items_count=15,
|
||||||
|
failed_count=1,
|
||||||
|
)
|
||||||
|
# → "J'ai codé 14 dossiers sur 15..."
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import unicodedata
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Mapping, Optional
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_accents(s: str) -> str:
|
||||||
|
"""Supprimer les accents pour les comparaisons insensibles aux diacritiques."""
|
||||||
|
if not s:
|
||||||
|
return ""
|
||||||
|
nkfd = unicodedata.normalize("NFKD", s)
|
||||||
|
return "".join(c for c in nkfd if not unicodedata.combining(c))
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Data class
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class DomainContext:
|
class DomainContext:
|
||||||
"""Contexte métier pour un domaine spécifique."""
|
"""Contexte métier pour un domaine spécifique.
|
||||||
domain_id: str # Identifiant unique (tim_codage, comptabilite, etc.)
|
|
||||||
name: str # Nom lisible (Codage médical TIM)
|
Contient à la fois les hints pour les prompts VLM et les éléments de
|
||||||
description: str # Description courte du métier
|
personnalité de Léa (langage métier, questions, rapports).
|
||||||
|
"""
|
||||||
|
|
||||||
|
domain_id: str # tim_codage, comptabilite, ...
|
||||||
|
name: str # Nom lisible
|
||||||
|
description: str # Description courte
|
||||||
|
|
||||||
# Prompt système injecté dans TOUS les appels VLM
|
# Prompt système injecté dans TOUS les appels VLM
|
||||||
system_prompt: str = ""
|
system_prompt: str = ""
|
||||||
@@ -39,12 +85,41 @@ class DomainContext:
|
|||||||
# Vocabulaire métier (termes que le VLM doit connaître)
|
# Vocabulaire métier (termes que le VLM doit connaître)
|
||||||
vocabulary: List[str] = field(default_factory=list)
|
vocabulary: List[str] = field(default_factory=list)
|
||||||
|
|
||||||
# Applications connues (noms de logiciels que le VLM peut rencontrer)
|
# Applications connues
|
||||||
known_apps: List[str] = field(default_factory=list)
|
known_apps: List[str] = field(default_factory=list)
|
||||||
|
|
||||||
# Écrans types (descriptions des écrans courants du métier)
|
# Écrans types
|
||||||
screen_patterns: Dict[str, str] = field(default_factory=dict)
|
screen_patterns: Dict[str, str] = field(default_factory=dict)
|
||||||
|
|
||||||
|
# --- Personnalité Léa -------------------------------------------------
|
||||||
|
|
||||||
|
# Mapping d'actions techniques (click/type/key_combo) vers description métier,
|
||||||
|
# indexé par un mot-clé lisible trouvé dans la cible/texte.
|
||||||
|
# Format : { (action_type, keyword_lower) : "description métier" }
|
||||||
|
# Exemple : ("click", "dp") → "saisir le diagnostic principal"
|
||||||
|
common_actions: Dict[str, str] = field(default_factory=dict)
|
||||||
|
|
||||||
|
# Synonymes métier : technique → forme lisible
|
||||||
|
# Exemple : {"dp": "diagnostic principal", "das": "diagnostics associés"}
|
||||||
|
vocabulary_synonyms: Dict[str, str] = field(default_factory=dict)
|
||||||
|
|
||||||
|
# Templates de questions de clarification (selon la raison de blocage).
|
||||||
|
# Clé = identifiant de blocage ("target_not_found", "ambiguous_field", ...)
|
||||||
|
# Valeur = template f-string (champs: {target}, {app}, {nom_patient}, ...)
|
||||||
|
clarification_templates: Dict[str, str] = field(default_factory=dict)
|
||||||
|
|
||||||
|
# Templates de résumés de fin de workflow.
|
||||||
|
# Clés attendues :
|
||||||
|
# - "success" : tout a marché
|
||||||
|
# - "partial" : succès partiel (failed_count > 0)
|
||||||
|
# - "failure" : échec complet
|
||||||
|
# - "success_one" : cas 1 élément (pour éviter "1 dossiers")
|
||||||
|
# - "item_singular" : libellé d'un item ("dossier")
|
||||||
|
# - "item_plural" : libellé au pluriel ("dossiers")
|
||||||
|
summary_templates: Dict[str, str] = field(default_factory=dict)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------ API
|
||||||
|
|
||||||
def enrich_prompt(self, prompt: str, role: str = "") -> str:
|
def enrich_prompt(self, prompt: str, role: str = "") -> str:
|
||||||
"""Enrichir un prompt avec le contexte métier.
|
"""Enrichir un prompt avec le contexte métier.
|
||||||
|
|
||||||
@@ -65,6 +140,310 @@ class DomainContext:
|
|||||||
parts.append(prompt)
|
parts.append(prompt)
|
||||||
return "\n\n".join(parts)
|
return "\n\n".join(parts)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Personnalité : résumé d'action en langage métier
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def summarize_action(
|
||||||
|
self,
|
||||||
|
action: str,
|
||||||
|
params: Optional[Mapping[str, Any]] = None,
|
||||||
|
) -> str:
|
||||||
|
"""Résumer une action technique en langage métier.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
action: Type d'action ("click", "type", "key_combo", "wait", "scroll")
|
||||||
|
params: Paramètres de l'action (target, text, keys, ...)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Phrase en français orientée métier. Fallback générique si aucun
|
||||||
|
mapping ne correspond.
|
||||||
|
|
||||||
|
Exemples (domaine tim_codage) :
|
||||||
|
click sur "DP" → "saisir le diagnostic principal"
|
||||||
|
type "E11.9" → "saisir le code CIM-10 E11.9"
|
||||||
|
click sur "Valider" → "valider le codage"
|
||||||
|
"""
|
||||||
|
params = dict(params or {})
|
||||||
|
target = str(params.get("target") or params.get("description") or "").strip()
|
||||||
|
text = str(params.get("text") or "").strip()
|
||||||
|
keys = params.get("keys") or []
|
||||||
|
|
||||||
|
haystack = _strip_accents(f"{target} {text}".lower())
|
||||||
|
|
||||||
|
# 1) Essayer un match mot-clé dans common_actions.
|
||||||
|
# Clés sous la forme "click:mot" ou "type:mot".
|
||||||
|
# Comparaison insensible à la casse ET aux accents.
|
||||||
|
for key, label in self.common_actions.items():
|
||||||
|
if ":" not in key:
|
||||||
|
continue
|
||||||
|
k_action, k_word = key.split(":", 1)
|
||||||
|
if k_action != action:
|
||||||
|
continue
|
||||||
|
k_word_norm = _strip_accents(k_word.lower())
|
||||||
|
if k_word_norm and k_word_norm in haystack:
|
||||||
|
return label
|
||||||
|
|
||||||
|
# 2) Essayer une substitution via vocabulary_synonyms dans la cible.
|
||||||
|
friendly_target = self._apply_synonyms(target)
|
||||||
|
|
||||||
|
if action == "click":
|
||||||
|
if friendly_target:
|
||||||
|
return f"cliquer sur {friendly_target}"
|
||||||
|
return "cliquer"
|
||||||
|
|
||||||
|
if action == "type":
|
||||||
|
if text and friendly_target:
|
||||||
|
return f"saisir « {text} » dans {friendly_target}"
|
||||||
|
if text:
|
||||||
|
return f"saisir « {text} »"
|
||||||
|
return "saisir du texte"
|
||||||
|
|
||||||
|
if action == "key_combo":
|
||||||
|
if isinstance(keys, (list, tuple)) and keys:
|
||||||
|
return f"utiliser le raccourci {'+'.join(str(k) for k in keys)}"
|
||||||
|
return "utiliser un raccourci clavier"
|
||||||
|
|
||||||
|
if action == "wait":
|
||||||
|
return "attendre le chargement de l'écran"
|
||||||
|
|
||||||
|
if action == "scroll":
|
||||||
|
return "faire défiler l'écran"
|
||||||
|
|
||||||
|
# Fallback ultime
|
||||||
|
return f"effectuer l'action {action}"
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Personnalité : question de clarification
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def pose_clarification_question(
|
||||||
|
self,
|
||||||
|
context: Optional[Mapping[str, Any]] = None,
|
||||||
|
) -> str:
|
||||||
|
"""Générer une question pertinente quand Léa bloque.
|
||||||
|
|
||||||
|
Cherche un template dans clarification_templates selon :
|
||||||
|
- context["blocked_on"] (ex: "target_not_found", "ambiguous_field")
|
||||||
|
- context["target"] (la cible visée)
|
||||||
|
- paramètres du workflow (params) disponibles pour substitution
|
||||||
|
|
||||||
|
Args:
|
||||||
|
context: Dictionnaire libre contenant au minimum 'blocked_on' ou
|
||||||
|
'target'. Peut contenir 'params' pour la substitution.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Question en français. Fallback générique si aucun template ne
|
||||||
|
correspond.
|
||||||
|
"""
|
||||||
|
ctx = dict(context or {})
|
||||||
|
blocked_on = str(ctx.get("blocked_on") or "").strip()
|
||||||
|
target = str(ctx.get("target") or "").strip()
|
||||||
|
params = dict(ctx.get("params") or {})
|
||||||
|
|
||||||
|
# Dictionnaire de substitution : champs du context + params + target
|
||||||
|
subs: Dict[str, Any] = {
|
||||||
|
"target": target,
|
||||||
|
"target_friendly": self._apply_synonyms(target) or target or "cet élément",
|
||||||
|
"app": ctx.get("app", ""),
|
||||||
|
}
|
||||||
|
subs.update(params)
|
||||||
|
|
||||||
|
# 1) Essai par clé exacte de blocage
|
||||||
|
template = self.clarification_templates.get(blocked_on, "")
|
||||||
|
|
||||||
|
# 2) Essai par cible (si la cible matche un mot-clé connu)
|
||||||
|
if not template and target:
|
||||||
|
low = target.lower()
|
||||||
|
for key, tpl in self.clarification_templates.items():
|
||||||
|
if key.startswith("target:") and key.split(":", 1)[1].lower() in low:
|
||||||
|
template = tpl
|
||||||
|
break
|
||||||
|
|
||||||
|
# 3) Template générique du domaine
|
||||||
|
if not template:
|
||||||
|
template = self.clarification_templates.get("default", "")
|
||||||
|
|
||||||
|
if template:
|
||||||
|
try:
|
||||||
|
return template.format_map(_SafeDict(subs))
|
||||||
|
except Exception as e: # pragma: no cover - format inattendu
|
||||||
|
logger.warning("clarification template format error: %s", e)
|
||||||
|
|
||||||
|
# 4) Fallback ultime cross-domaine
|
||||||
|
friendly = subs["target_friendly"]
|
||||||
|
return (
|
||||||
|
f"Je ne trouve pas {friendly}. "
|
||||||
|
f"Peux-tu me le montrer ou me confirmer que c'est le bon écran ?"
|
||||||
|
)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Personnalité : rapport final
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def describe_workflow_outcome(
|
||||||
|
self,
|
||||||
|
workflow_name: str = "",
|
||||||
|
success: bool = True,
|
||||||
|
items_count: int = 1,
|
||||||
|
failed_count: int = 0,
|
||||||
|
elapsed_s: float = 0.0,
|
||||||
|
extra: Optional[Mapping[str, Any]] = None,
|
||||||
|
use_llm: bool = False,
|
||||||
|
) -> str:
|
||||||
|
"""Générer un rapport de fin de workflow en langage métier.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
workflow_name: Nom du workflow ("Codage janvier").
|
||||||
|
success: True si le workflow a globalement réussi.
|
||||||
|
items_count: Nombre d'items traités (ex: 15 dossiers). 1 par défaut.
|
||||||
|
failed_count: Nombre d'items en échec.
|
||||||
|
elapsed_s: Durée totale (secondes).
|
||||||
|
extra: Infos supplémentaires (hint pour le LLM).
|
||||||
|
use_llm: Si True, tenter un appel à gemma4 pour produire le
|
||||||
|
rapport. Fallback sur les templates en cas d'échec.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Rapport en français. Toujours une chaîne, jamais None.
|
||||||
|
"""
|
||||||
|
extra = dict(extra or {})
|
||||||
|
done = max(0, items_count - failed_count)
|
||||||
|
|
||||||
|
item_sg = self.summary_templates.get("item_singular", "élément")
|
||||||
|
item_pl = self.summary_templates.get("item_plural", "éléments")
|
||||||
|
item_word = item_sg if done <= 1 else item_pl
|
||||||
|
|
||||||
|
# Données disponibles pour les templates
|
||||||
|
subs = {
|
||||||
|
"workflow_name": workflow_name or "le workflow",
|
||||||
|
"items_count": items_count,
|
||||||
|
"done": done,
|
||||||
|
"failed": failed_count,
|
||||||
|
"item_singular": item_sg,
|
||||||
|
"item_plural": item_pl,
|
||||||
|
"item_word": item_word,
|
||||||
|
"elapsed_s": int(elapsed_s),
|
||||||
|
}
|
||||||
|
subs.update(extra)
|
||||||
|
|
||||||
|
# Choisir le template adéquat
|
||||||
|
if not success and failed_count >= items_count:
|
||||||
|
key = "failure"
|
||||||
|
elif failed_count > 0:
|
||||||
|
key = "partial"
|
||||||
|
elif items_count == 1:
|
||||||
|
key = "success_one" if "success_one" in self.summary_templates else "success"
|
||||||
|
else:
|
||||||
|
key = "success"
|
||||||
|
|
||||||
|
template = self.summary_templates.get(key, "")
|
||||||
|
|
||||||
|
# Optionnel : raffiner via gemma4
|
||||||
|
if use_llm:
|
||||||
|
llm_text = self._llm_refine_summary(template, subs, success)
|
||||||
|
if llm_text:
|
||||||
|
return llm_text
|
||||||
|
|
||||||
|
if template:
|
||||||
|
try:
|
||||||
|
return template.format_map(_SafeDict(subs))
|
||||||
|
except Exception as e: # pragma: no cover
|
||||||
|
logger.warning("summary template format error: %s", e)
|
||||||
|
|
||||||
|
# Fallback générique
|
||||||
|
if success:
|
||||||
|
if items_count <= 1:
|
||||||
|
return f"C'est fait, j'ai terminé « {workflow_name or 'le workflow'} »."
|
||||||
|
return (
|
||||||
|
f"J'ai traité {done} {item_word} sur {items_count}"
|
||||||
|
+ (f", {failed_count} en échec." if failed_count else ".")
|
||||||
|
)
|
||||||
|
return (
|
||||||
|
f"Je n'ai pas pu terminer « {workflow_name or 'le workflow'} ». "
|
||||||
|
f"Je te rends la main."
|
||||||
|
)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Helpers internes
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _apply_synonyms(self, text: str) -> str:
|
||||||
|
"""Remplacer les sigles/termes techniques par leur forme métier.
|
||||||
|
|
||||||
|
Cherche mots entiers (word boundaries) en insensible à la casse.
|
||||||
|
"""
|
||||||
|
if not text or not self.vocabulary_synonyms:
|
||||||
|
return text
|
||||||
|
result = text
|
||||||
|
for short, full in self.vocabulary_synonyms.items():
|
||||||
|
if not short:
|
||||||
|
continue
|
||||||
|
pattern = r"\b" + re.escape(short) + r"\b"
|
||||||
|
result = re.sub(pattern, full, result, flags=re.IGNORECASE)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _llm_refine_summary(
|
||||||
|
self,
|
||||||
|
template: str,
|
||||||
|
subs: Dict[str, Any],
|
||||||
|
success: bool,
|
||||||
|
) -> str:
|
||||||
|
"""Tenter un raffinement du rapport via gemma4.
|
||||||
|
|
||||||
|
Appel best-effort : toute erreur retourne "" et le caller retombe sur
|
||||||
|
le template brut. Isolé dans une méthode pour pouvoir le monkey-patcher
|
||||||
|
dans les tests.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
import requests as _requests
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
port = os.environ.get("GEMMA4_PORT", "11435")
|
||||||
|
url = f"http://localhost:{port}/api/chat"
|
||||||
|
|
||||||
|
base = ""
|
||||||
|
if template:
|
||||||
|
try:
|
||||||
|
base = template.format_map(_SafeDict(subs))
|
||||||
|
except Exception:
|
||||||
|
base = ""
|
||||||
|
|
||||||
|
prompt = (
|
||||||
|
f"Tu es Léa, une assistante RPA dans le domaine : {self.name}.\n"
|
||||||
|
f"Tu viens de terminer un workflow. Résume en UNE à DEUX phrases "
|
||||||
|
f"en langage métier, chaleureux mais professionnel, en français.\n\n"
|
||||||
|
f"Données :\n"
|
||||||
|
f"- workflow : {subs.get('workflow_name', '')}\n"
|
||||||
|
f"- items traités : {subs.get('done', 0)} / {subs.get('items_count', 0)}\n"
|
||||||
|
f"- échecs : {subs.get('failed', 0)}\n"
|
||||||
|
f"- succès global : {'oui' if success else 'non'}\n"
|
||||||
|
f"- durée : {subs.get('elapsed_s', 0)}s\n\n"
|
||||||
|
f"Base suggérée (tu peux la reformuler) : {base or '(aucune)'}\n\n"
|
||||||
|
f"Ta phrase :"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = _requests.post(
|
||||||
|
url,
|
||||||
|
json={
|
||||||
|
"model": "gemma4:e4b",
|
||||||
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
|
"stream": False,
|
||||||
|
"options": {"temperature": 0.3, "num_predict": 200},
|
||||||
|
},
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
if not resp.ok:
|
||||||
|
return ""
|
||||||
|
content = resp.json().get("message", {}).get("content", "").strip()
|
||||||
|
# Nettoyage basique : supprimer guillemets typographiques en bord
|
||||||
|
content = content.strip("\"' \n")
|
||||||
|
return content
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug("gemma4 refine summary failed: %s", e)
|
||||||
|
return ""
|
||||||
|
|
||||||
def to_dict(self) -> Dict[str, Any]:
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
return {
|
return {
|
||||||
"domain_id": self.domain_id,
|
"domain_id": self.domain_id,
|
||||||
@@ -72,9 +451,24 @@ class DomainContext:
|
|||||||
"description": self.description,
|
"description": self.description,
|
||||||
"known_apps": self.known_apps,
|
"known_apps": self.known_apps,
|
||||||
"vocabulary_count": len(self.vocabulary),
|
"vocabulary_count": len(self.vocabulary),
|
||||||
|
"common_actions_count": len(self.common_actions),
|
||||||
|
"has_clarification_templates": bool(self.clarification_templates),
|
||||||
|
"has_summary_templates": bool(self.summary_templates),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Utilitaires
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class _SafeDict(dict):
|
||||||
|
"""dict pour str.format_map qui retourne "" pour les clés manquantes."""
|
||||||
|
|
||||||
|
def __missing__(self, key): # type: ignore[override]
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
# Hints par rôle VLM — adaptés au contexte métier
|
# Hints par rôle VLM — adaptés au contexte métier
|
||||||
_ROLE_HINTS = {
|
_ROLE_HINTS = {
|
||||||
"observer": (
|
"observer": (
|
||||||
@@ -100,6 +494,7 @@ _ROLE_HINTS = {
|
|||||||
# Domaines pré-configurés
|
# Domaines pré-configurés
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
_TIM_CODAGE = DomainContext(
|
_TIM_CODAGE = DomainContext(
|
||||||
domain_id="tim_codage",
|
domain_id="tim_codage",
|
||||||
name="Codage médical TIM",
|
name="Codage médical TIM",
|
||||||
@@ -156,8 +551,405 @@ _TIM_CODAGE = DomainContext(
|
|||||||
"recherche_code": "Recherche de code CIM-10 ou CCAM (champ de recherche + arborescence)",
|
"recherche_code": "Recherche de code CIM-10 ou CCAM (champ de recherche + arborescence)",
|
||||||
"validation_ghm": "Écran de validation du groupage avec GHM calculé et valorisation",
|
"validation_ghm": "Écran de validation du groupage avec GHM calculé et valorisation",
|
||||||
},
|
},
|
||||||
|
vocabulary_synonyms={
|
||||||
|
"DP": "diagnostic principal",
|
||||||
|
"DAS": "diagnostics associés",
|
||||||
|
"CMA": "complication associée",
|
||||||
|
"UM": "unité médicale",
|
||||||
|
"CR": "compte rendu",
|
||||||
|
"RSS": "résumé de sortie",
|
||||||
|
"RSA": "résumé anonymisé",
|
||||||
|
"GHM": "groupe homogène de malades",
|
||||||
|
"IPP": "identifiant patient",
|
||||||
|
},
|
||||||
|
common_actions={
|
||||||
|
"click:dp": "saisir le diagnostic principal",
|
||||||
|
"click:diagnostic principal": "saisir le diagnostic principal",
|
||||||
|
"click:das": "ajouter un diagnostic associé",
|
||||||
|
"click:ccam": "saisir un acte CCAM",
|
||||||
|
"click:valider": "valider le codage",
|
||||||
|
"click:valider le codage": "valider le codage",
|
||||||
|
"click:grouper": "calculer le GHM",
|
||||||
|
"click:ghm": "consulter le groupage GHM",
|
||||||
|
"click:dossier patient": "ouvrir le dossier patient",
|
||||||
|
"click:fiche patient": "ouvrir la fiche patient",
|
||||||
|
"click:compte rendu": "consulter le compte rendu",
|
||||||
|
"click:cr": "consulter le compte rendu",
|
||||||
|
"click:rechercher": "rechercher un code CIM-10",
|
||||||
|
"type:cim": "saisir un code CIM-10",
|
||||||
|
},
|
||||||
|
clarification_templates={
|
||||||
|
"default": (
|
||||||
|
"Je ne trouve pas {target_friendly}. "
|
||||||
|
"Tu peux me montrer où il se trouve dans le dossier ?"
|
||||||
|
),
|
||||||
|
"target_not_found": (
|
||||||
|
"Je ne trouve pas {target_friendly}. "
|
||||||
|
"Le dossier de {nom_patient} est peut-être déjà codé ou archivé ?"
|
||||||
|
),
|
||||||
|
"target:fichier patient": (
|
||||||
|
"Je ne trouve pas le dossier de {nom_patient}. "
|
||||||
|
"Il est peut-être archivé ? Tu peux me le montrer ?"
|
||||||
|
),
|
||||||
|
"target:dossier": (
|
||||||
|
"Je ne trouve pas le dossier de {nom_patient}. "
|
||||||
|
"Il est peut-être archivé ? Tu peux me le montrer ?"
|
||||||
|
),
|
||||||
|
"ambiguous_code": (
|
||||||
|
"Le compte rendu mentionne plusieurs codes possibles. "
|
||||||
|
"Est-ce le code CIM-10 {code_a} ou {code_b} que tu préfères ?"
|
||||||
|
),
|
||||||
|
"no_cr": (
|
||||||
|
"Je ne trouve pas de compte rendu pour {nom_patient}. "
|
||||||
|
"Tu veux que je saute ce dossier ou que je continue sans ?"
|
||||||
|
),
|
||||||
|
},
|
||||||
|
summary_templates={
|
||||||
|
"item_singular": "dossier",
|
||||||
|
"item_plural": "dossiers",
|
||||||
|
"success_one": (
|
||||||
|
"J'ai codé le dossier de {nom_patient} en {elapsed_s}s. "
|
||||||
|
"Tu peux vérifier le groupage GHM."
|
||||||
|
),
|
||||||
|
"success": (
|
||||||
|
"J'ai codé {done} dossiers sur {items_count}. "
|
||||||
|
"Tout est passé sans erreur, tu peux valider le groupage."
|
||||||
|
),
|
||||||
|
"partial": (
|
||||||
|
"J'ai codé {done} dossiers sur {items_count}. "
|
||||||
|
"{failed} sont en attente — codes CIM-10 ambigus, à valider manuellement."
|
||||||
|
),
|
||||||
|
"failure": (
|
||||||
|
"Je n'ai pas pu coder les dossiers de {workflow_name}. "
|
||||||
|
"Je te rends la main, les comptes rendus sont peut-être inaccessibles."
|
||||||
|
),
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
_COMPTABILITE = DomainContext(
|
||||||
|
domain_id="comptabilite",
|
||||||
|
name="Comptabilité",
|
||||||
|
description=(
|
||||||
|
"Comptable : saisie de factures fournisseurs et clients, lettrage, "
|
||||||
|
"rapprochement bancaire, déclarations de TVA, bilans, immobilisations."
|
||||||
|
),
|
||||||
|
system_prompt=(
|
||||||
|
"Tu es un assistant expert en comptabilité d'entreprise. "
|
||||||
|
"L'utilisateur est un comptable qui utilise un logiciel de saisie comptable "
|
||||||
|
"(Sage, Cegid, EBP, Quadra, Isacompta) pour saisir des factures, faire "
|
||||||
|
"les rapprochements bancaires, préparer la TVA et les bilans.\n\n"
|
||||||
|
"Vocabulaire du métier :\n"
|
||||||
|
"- Facture : justificatif de vente ou d'achat (numéro, date, HT, TVA, TTC)\n"
|
||||||
|
"- HT/TVA/TTC : montants hors taxes, taxe, toutes taxes\n"
|
||||||
|
"- Compte comptable : numéro du plan comptable général (PCG), ex 401 (fournisseurs), 411 (clients)\n"
|
||||||
|
"- Journal : journal de saisie (achats, ventes, banque, OD)\n"
|
||||||
|
"- Lettrage : association d'une facture avec son paiement\n"
|
||||||
|
"- Rapprochement : comparaison compte comptable / relevé bancaire\n"
|
||||||
|
"- OCR / LAD : reconnaissance automatique des factures scannées\n"
|
||||||
|
"- Écriture : ligne comptable (débit/crédit)\n"
|
||||||
|
"- Exercice : période comptable annuelle\n"
|
||||||
|
"- Bilan / compte de résultat : états financiers\n"
|
||||||
|
"- CA : chiffre d'affaires\n\n"
|
||||||
|
"Écrans courants :\n"
|
||||||
|
"- Saisie d'écritures (numéro de compte, libellé, débit, crédit)\n"
|
||||||
|
"- Import OCR de factures fournisseurs\n"
|
||||||
|
"- Lettrage / rapprochement\n"
|
||||||
|
"- Brouillard / journal\n"
|
||||||
|
"- Balance / grand livre"
|
||||||
|
),
|
||||||
|
vocabulary=[
|
||||||
|
"facture", "HT", "TVA", "TTC", "compte", "journal", "lettrage",
|
||||||
|
"rapprochement", "OCR", "LAD", "écriture", "débit", "crédit",
|
||||||
|
"exercice", "bilan", "compte de résultat", "CA", "immobilisation",
|
||||||
|
"fournisseur", "client", "PCG", "plan comptable",
|
||||||
|
],
|
||||||
|
known_apps=[
|
||||||
|
"Sage", "Cegid", "EBP", "Quadra", "Isacompta", "Ciel Compta",
|
||||||
|
"Odoo", "Pennylane", "Dext", "Agicap",
|
||||||
|
],
|
||||||
|
screen_patterns={
|
||||||
|
"saisie_ecriture": "Saisie d'écriture comptable (compte, libellé, débit, crédit)",
|
||||||
|
"ocr_facture": "Import OCR : zone image + champs extraits (numéro, date, HT, TVA, TTC, fournisseur)",
|
||||||
|
"lettrage": "Liste d'écritures à lettrer (débit vs crédit)",
|
||||||
|
"rapprochement": "Comparaison compte banque / relevé",
|
||||||
|
"balance": "Balance comptable (comptes agrégés avec soldes)",
|
||||||
|
},
|
||||||
|
vocabulary_synonyms={
|
||||||
|
"HT": "montant hors taxes",
|
||||||
|
"TVA": "montant de TVA",
|
||||||
|
"TTC": "montant toutes taxes",
|
||||||
|
"CA": "chiffre d'affaires",
|
||||||
|
"PCG": "plan comptable général",
|
||||||
|
"OD": "opération diverse",
|
||||||
|
},
|
||||||
|
common_actions={
|
||||||
|
"click:valider": "valider l'écriture",
|
||||||
|
"click:enregistrer": "enregistrer la saisie",
|
||||||
|
"click:lettrer": "lettrer les écritures",
|
||||||
|
"click:rapprocher": "rapprocher avec la banque",
|
||||||
|
"click:ocr": "lancer la reconnaissance OCR",
|
||||||
|
"click:facture": "ouvrir la facture",
|
||||||
|
"click:compte": "sélectionner le compte comptable",
|
||||||
|
"type:ht": "saisir le montant hors taxes",
|
||||||
|
"type:tva": "saisir le montant de TVA",
|
||||||
|
"type:ttc": "saisir le montant toutes taxes",
|
||||||
|
},
|
||||||
|
clarification_templates={
|
||||||
|
"default": (
|
||||||
|
"Je ne trouve pas {target_friendly}. "
|
||||||
|
"C'est bien la facture {num_facture} que tu veux saisir ?"
|
||||||
|
),
|
||||||
|
"target_not_found": (
|
||||||
|
"Je ne trouve pas le champ {target_friendly}. "
|
||||||
|
"C'est bien la facture {num_facture} qui doit être saisie ?"
|
||||||
|
),
|
||||||
|
"target:montant": (
|
||||||
|
"Je ne trouve pas le champ « Montant HT ». "
|
||||||
|
"C'est bien la facture {num_facture} que tu veux saisir ?"
|
||||||
|
),
|
||||||
|
"target:tva": (
|
||||||
|
"Je ne trouve pas le champ TVA. Est-ce une facture à taux {taux_tva} % ?"
|
||||||
|
),
|
||||||
|
"ambiguous_account": (
|
||||||
|
"Je ne sais pas sur quel compte imputer : {compte_a} ou {compte_b} ?"
|
||||||
|
),
|
||||||
|
},
|
||||||
|
summary_templates={
|
||||||
|
"item_singular": "facture",
|
||||||
|
"item_plural": "factures",
|
||||||
|
"success_one": (
|
||||||
|
"J'ai saisi la facture {num_facture} en {elapsed_s}s."
|
||||||
|
),
|
||||||
|
"success": (
|
||||||
|
"J'ai saisi {done} factures sur {items_count}. "
|
||||||
|
"Tout est en brouillard, tu peux valider."
|
||||||
|
),
|
||||||
|
"partial": (
|
||||||
|
"J'ai saisi {done} factures sur {items_count}. "
|
||||||
|
"{failed} factures sont en attente — imputation comptable à vérifier."
|
||||||
|
),
|
||||||
|
"failure": (
|
||||||
|
"Je n'ai pas pu saisir les factures de {workflow_name}. "
|
||||||
|
"L'OCR n'a peut-être pas fonctionné, je te rends la main."
|
||||||
|
),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
_RH_PAIE = DomainContext(
|
||||||
|
domain_id="rh_paie",
|
||||||
|
name="Ressources humaines et paie",
|
||||||
|
description=(
|
||||||
|
"Gestionnaire RH / paie : fiches employés, contrats, bulletins de salaire, "
|
||||||
|
"déclarations sociales (DSN), charges, congés, absences."
|
||||||
|
),
|
||||||
|
system_prompt=(
|
||||||
|
"Tu es un assistant expert en gestion RH et paie française. "
|
||||||
|
"L'utilisateur est un gestionnaire RH ou de paie qui utilise un logiciel "
|
||||||
|
"(Silae, Sage Paie, Cegid, ADP, PayFit) pour éditer des bulletins de salaire, "
|
||||||
|
"gérer les contrats, les absences, et envoyer les DSN.\n\n"
|
||||||
|
"Vocabulaire du métier :\n"
|
||||||
|
"- Bulletin de paie : fiche de salaire mensuelle\n"
|
||||||
|
"- DSN : Déclaration Sociale Nominative (mensuelle, transmise à l'URSSAF)\n"
|
||||||
|
"- Brut / Net : salaire avant et après charges\n"
|
||||||
|
"- Charges sociales / patronales : cotisations employeur et salarié\n"
|
||||||
|
"- CDI / CDD : types de contrats\n"
|
||||||
|
"- Période de paie : mois concerné par le bulletin\n"
|
||||||
|
"- SMIC : salaire minimum\n"
|
||||||
|
"- IJSS : indemnités journalières sécurité sociale\n"
|
||||||
|
"- Congés payés : solde de congés\n"
|
||||||
|
"- RTT : réduction du temps de travail\n"
|
||||||
|
"- Saisie sur salaire : retenue judiciaire\n"
|
||||||
|
"- Solde de tout compte : dernier bulletin d'un salarié qui part\n\n"
|
||||||
|
"Écrans courants :\n"
|
||||||
|
"- Fiche employé (identité, contrat, poste, salaire)\n"
|
||||||
|
"- Saisie des variables (heures, absences, primes)\n"
|
||||||
|
"- Bulletin de paie (aperçu avant validation)\n"
|
||||||
|
"- Déclaration DSN\n"
|
||||||
|
"- Gestion des absences / congés"
|
||||||
|
),
|
||||||
|
vocabulary=[
|
||||||
|
"bulletin", "salaire", "brut", "net", "charges sociales", "DSN",
|
||||||
|
"CDI", "CDD", "congés", "RTT", "SMIC", "IJSS", "URSSAF",
|
||||||
|
"employé", "salarié", "contrat", "prime", "heures supplémentaires",
|
||||||
|
"absence", "solde de tout compte", "STC",
|
||||||
|
],
|
||||||
|
known_apps=[
|
||||||
|
"Silae", "Sage Paie", "Cegid Paie", "ADP", "PayFit", "Nibelis",
|
||||||
|
"Cegedim SRH", "Lucca", "HR Access",
|
||||||
|
],
|
||||||
|
screen_patterns={
|
||||||
|
"fiche_employe": "Fiche employé avec identité, contrat, poste",
|
||||||
|
"saisie_variables": "Saisie des variables de paie (heures, absences, primes)",
|
||||||
|
"apercu_bulletin": "Aperçu du bulletin de paie avant validation",
|
||||||
|
"dsn": "Écran DSN (déclaration sociale nominative)",
|
||||||
|
"conges": "Gestion des absences et congés",
|
||||||
|
},
|
||||||
|
vocabulary_synonyms={
|
||||||
|
"DSN": "déclaration sociale",
|
||||||
|
"RTT": "réduction du temps de travail",
|
||||||
|
"STC": "solde de tout compte",
|
||||||
|
"IJSS": "indemnités journalières",
|
||||||
|
"CP": "congés payés",
|
||||||
|
},
|
||||||
|
common_actions={
|
||||||
|
"click:valider": "valider le bulletin",
|
||||||
|
"click:editer": "éditer le bulletin",
|
||||||
|
"click:bulletin": "ouvrir le bulletin de paie",
|
||||||
|
"click:employe": "ouvrir la fiche employé",
|
||||||
|
"click:dsn": "lancer la DSN",
|
||||||
|
"click:conges": "gérer les congés",
|
||||||
|
"click:absence": "saisir une absence",
|
||||||
|
"type:heures": "saisir les heures travaillées",
|
||||||
|
"type:prime": "saisir une prime",
|
||||||
|
},
|
||||||
|
clarification_templates={
|
||||||
|
"default": (
|
||||||
|
"Je ne trouve pas {target_friendly} pour {nom_employe}. "
|
||||||
|
"Tu peux me confirmer la période de paie ?"
|
||||||
|
),
|
||||||
|
"target_not_found": (
|
||||||
|
"Je ne trouve pas {target_friendly} dans la fiche de {nom_employe}. "
|
||||||
|
"Le contrat est peut-être clôturé ?"
|
||||||
|
),
|
||||||
|
"target:employe": (
|
||||||
|
"Je ne trouve pas {nom_employe} dans la liste. "
|
||||||
|
"Est-il encore actif dans l'entreprise ?"
|
||||||
|
),
|
||||||
|
"ambiguous_period": (
|
||||||
|
"Est-ce la période {periode_a} ou {periode_b} que tu veux traiter ?"
|
||||||
|
),
|
||||||
|
},
|
||||||
|
summary_templates={
|
||||||
|
"item_singular": "bulletin",
|
||||||
|
"item_plural": "bulletins",
|
||||||
|
"success_one": (
|
||||||
|
"J'ai édité le bulletin de {nom_employe} en {elapsed_s}s."
|
||||||
|
),
|
||||||
|
"success": (
|
||||||
|
"J'ai édité {done} bulletins sur {items_count}. "
|
||||||
|
"La paie est prête pour validation."
|
||||||
|
),
|
||||||
|
"partial": (
|
||||||
|
"J'ai édité {done} bulletins sur {items_count}. "
|
||||||
|
"{failed} sont en attente — variables de paie à compléter."
|
||||||
|
),
|
||||||
|
"failure": (
|
||||||
|
"Je n'ai pas pu éditer les bulletins de {workflow_name}. "
|
||||||
|
"Il y a peut-être un blocage côté logiciel de paie."
|
||||||
|
),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
_STOCKS_LOGISTIQUE = DomainContext(
|
||||||
|
domain_id="stocks_logistique",
|
||||||
|
name="Stocks et logistique",
|
||||||
|
description=(
|
||||||
|
"Gestionnaire de stocks / logistique : bons de commande, bons de livraison, "
|
||||||
|
"réceptions, inventaires, mouvements de stock, expéditions."
|
||||||
|
),
|
||||||
|
system_prompt=(
|
||||||
|
"Tu es un assistant expert en gestion de stocks et logistique. "
|
||||||
|
"L'utilisateur utilise un ERP ou WMS (SAP, Dynamics, Odoo, Sage, Divalto) "
|
||||||
|
"pour gérer les commandes, les réceptions, les expéditions et les inventaires.\n\n"
|
||||||
|
"Vocabulaire du métier :\n"
|
||||||
|
"- BC : Bon de Commande (achat ou vente)\n"
|
||||||
|
"- BL : Bon de Livraison\n"
|
||||||
|
"- BR : Bon de Réception\n"
|
||||||
|
"- Article / Référence / SKU : produit en stock\n"
|
||||||
|
"- Emplacement : localisation physique (allée, rayon, emplacement)\n"
|
||||||
|
"- Mouvement de stock : entrée, sortie, transfert\n"
|
||||||
|
"- Inventaire : comptage physique pour recaler le stock théorique\n"
|
||||||
|
"- FIFO / LIFO : ordre de sortie des stocks\n"
|
||||||
|
"- ERP : progiciel de gestion intégré\n"
|
||||||
|
"- WMS : Warehouse Management System\n"
|
||||||
|
"- Picking : préparation de commande\n"
|
||||||
|
"- Quantité en stock / disponible / réservée\n\n"
|
||||||
|
"Écrans courants :\n"
|
||||||
|
"- Saisie de bon de commande / réception\n"
|
||||||
|
"- Liste des articles (avec photo, quantité, emplacement)\n"
|
||||||
|
"- Inventaire (comptage)\n"
|
||||||
|
"- Mouvements de stock\n"
|
||||||
|
"- Picking list (liste de préparation)"
|
||||||
|
),
|
||||||
|
vocabulary=[
|
||||||
|
"bon de commande", "BC", "bon de livraison", "BL", "bon de réception", "BR",
|
||||||
|
"article", "référence", "SKU", "emplacement", "stock", "inventaire",
|
||||||
|
"mouvement", "entrée", "sortie", "picking", "FIFO", "LIFO", "ERP", "WMS",
|
||||||
|
"fournisseur", "client", "quantité", "disponible", "réservé",
|
||||||
|
],
|
||||||
|
known_apps=[
|
||||||
|
"SAP", "Dynamics", "Odoo", "Sage X3", "Divalto", "Cegid",
|
||||||
|
"Oracle NetSuite", "Reflex WMS", "Infolog",
|
||||||
|
],
|
||||||
|
screen_patterns={
|
||||||
|
"bon_commande": "Saisie de bon de commande (fournisseur, lignes d'articles, quantités)",
|
||||||
|
"reception": "Bon de réception (rapprochement avec la commande)",
|
||||||
|
"inventaire": "Saisie d'inventaire (article, emplacement, quantité comptée)",
|
||||||
|
"picking": "Liste de préparation avec articles et emplacements",
|
||||||
|
"mouvement": "Mouvement de stock (entrée/sortie/transfert)",
|
||||||
|
},
|
||||||
|
vocabulary_synonyms={
|
||||||
|
"BC": "bon de commande",
|
||||||
|
"BL": "bon de livraison",
|
||||||
|
"BR": "bon de réception",
|
||||||
|
"SKU": "référence produit",
|
||||||
|
"WMS": "gestion d'entrepôt",
|
||||||
|
"ERP": "progiciel de gestion",
|
||||||
|
},
|
||||||
|
common_actions={
|
||||||
|
"click:valider": "valider le bon",
|
||||||
|
"click:commande": "ouvrir le bon de commande",
|
||||||
|
"click:livraison": "ouvrir le bon de livraison",
|
||||||
|
"click:reception": "saisir la réception",
|
||||||
|
"click:inventaire": "démarrer l'inventaire",
|
||||||
|
"click:article": "sélectionner un article",
|
||||||
|
"click:picking": "démarrer la préparation",
|
||||||
|
"type:quantite": "saisir la quantité",
|
||||||
|
"type:reference": "saisir la référence article",
|
||||||
|
},
|
||||||
|
clarification_templates={
|
||||||
|
"default": (
|
||||||
|
"Je ne trouve pas {target_friendly}. "
|
||||||
|
"C'est bien la commande {num_bc} qu'on traite ?"
|
||||||
|
),
|
||||||
|
"target_not_found": (
|
||||||
|
"Je ne trouve pas {target_friendly}. "
|
||||||
|
"La commande {num_bc} est peut-être déjà clôturée ?"
|
||||||
|
),
|
||||||
|
"target:article": (
|
||||||
|
"Je ne trouve pas l'article {ref_article}. "
|
||||||
|
"Il est peut-être archivé ou mal référencé ?"
|
||||||
|
),
|
||||||
|
"quantity_mismatch": (
|
||||||
|
"La quantité reçue ({qte_recue}) ne correspond pas à la commande "
|
||||||
|
"({qte_commandee}). Je saisis un écart ou tu vérifies ?"
|
||||||
|
),
|
||||||
|
},
|
||||||
|
summary_templates={
|
||||||
|
"item_singular": "bon",
|
||||||
|
"item_plural": "bons",
|
||||||
|
"success_one": (
|
||||||
|
"J'ai traité le bon {num_bc} en {elapsed_s}s."
|
||||||
|
),
|
||||||
|
"success": (
|
||||||
|
"J'ai traité {done} bons sur {items_count}. "
|
||||||
|
"Les mouvements de stock sont validés."
|
||||||
|
),
|
||||||
|
"partial": (
|
||||||
|
"J'ai traité {done} bons sur {items_count}. "
|
||||||
|
"{failed} bons sont en attente — écarts de quantité à vérifier."
|
||||||
|
),
|
||||||
|
"failure": (
|
||||||
|
"Je n'ai pas pu traiter les bons de {workflow_name}. "
|
||||||
|
"L'ERP a peut-être refusé une ligne, je te rends la main."
|
||||||
|
),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
_GENERIC = DomainContext(
|
_GENERIC = DomainContext(
|
||||||
domain_id="generic",
|
domain_id="generic",
|
||||||
name="Bureautique générale",
|
name="Bureautique générale",
|
||||||
@@ -166,11 +958,37 @@ _GENERIC = DomainContext(
|
|||||||
"Tu es un assistant RPA qui observe des applications bureautiques. "
|
"Tu es un assistant RPA qui observe des applications bureautiques. "
|
||||||
"Décris précisément ce que tu vois à l'écran."
|
"Décris précisément ce que tu vois à l'écran."
|
||||||
),
|
),
|
||||||
|
summary_templates={
|
||||||
|
"item_singular": "action",
|
||||||
|
"item_plural": "actions",
|
||||||
|
"success_one": "C'est fait, j'ai terminé « {workflow_name} » en {elapsed_s}s.",
|
||||||
|
"success": (
|
||||||
|
"J'ai terminé « {workflow_name} » : {done} {item_word} exécutées "
|
||||||
|
"sur {items_count}."
|
||||||
|
),
|
||||||
|
"partial": (
|
||||||
|
"J'ai terminé « {workflow_name} » partiellement : "
|
||||||
|
"{done} {item_word} sur {items_count} ({failed} en échec)."
|
||||||
|
),
|
||||||
|
"failure": (
|
||||||
|
"Je n'ai pas pu terminer « {workflow_name} ». Je te rends la main."
|
||||||
|
),
|
||||||
|
},
|
||||||
|
clarification_templates={
|
||||||
|
"default": (
|
||||||
|
"Je ne trouve pas {target_friendly} à l'écran. "
|
||||||
|
"Tu peux me le montrer ?"
|
||||||
|
),
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# Registre des domaines disponibles
|
# Registre des domaines disponibles
|
||||||
_DOMAINS: Dict[str, DomainContext] = {
|
_DOMAINS: Dict[str, DomainContext] = {
|
||||||
"tim_codage": _TIM_CODAGE,
|
"tim_codage": _TIM_CODAGE,
|
||||||
|
"comptabilite": _COMPTABILITE,
|
||||||
|
"rh_paie": _RH_PAIE,
|
||||||
|
"stocks_logistique": _STOCKS_LOGISTIQUE,
|
||||||
"generic": _GENERIC,
|
"generic": _GENERIC,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -179,7 +997,8 @@ def get_domain_context(domain_id: str = "generic") -> DomainContext:
|
|||||||
"""Récupérer le contexte métier par ID.
|
"""Récupérer le contexte métier par ID.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
domain_id: Identifiant du domaine (tim_codage, generic, etc.)
|
domain_id: Identifiant du domaine (tim_codage, comptabilite, rh_paie,
|
||||||
|
stocks_logistique, generic, etc.)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
DomainContext correspondant, ou generic si non trouvé.
|
DomainContext correspondant, ou generic si non trouvé.
|
||||||
|
|||||||
373
agent_v0/server_v1/execution_plan_runner.py
Normal file
373
agent_v0/server_v1/execution_plan_runner.py
Normal file
@@ -0,0 +1,373 @@
|
|||||||
|
# agent_v0/server_v1/execution_plan_runner.py
|
||||||
|
"""
|
||||||
|
ExecutionPlanRunner — Adaptateur ExecutionPlan → actions replay.
|
||||||
|
|
||||||
|
Pièce d'intégration du pipeline V4 :
|
||||||
|
RawTrace → IRBuilder → WorkflowIR → ExecutionCompiler → ExecutionPlan → Runtime
|
||||||
|
|
||||||
|
Ce module convertit un `ExecutionPlan` (plan pré-compilé, déterministe) en
|
||||||
|
liste d'actions au format attendu par l'executor replay actuel (clé x_pct,
|
||||||
|
y_pct, target_spec, etc.), puis les injecte dans `_replay_queues`.
|
||||||
|
|
||||||
|
L'ancien chemin `build_replay_from_raw_events()` dans stream_processor.py
|
||||||
|
reste inchangé — les deux chemins coexistent pendant la transition.
|
||||||
|
|
||||||
|
Format d'action produit (compatible executor existant) :
|
||||||
|
{
|
||||||
|
"action_id": "act_...",
|
||||||
|
"type": "click",
|
||||||
|
"x_pct": 0.5,
|
||||||
|
"y_pct": 0.3,
|
||||||
|
"visual_mode": True,
|
||||||
|
"target_spec": {
|
||||||
|
"by_text": "...",
|
||||||
|
"window_title": "...",
|
||||||
|
"vlm_description": "...",
|
||||||
|
"anchor_image_base64": "...",
|
||||||
|
},
|
||||||
|
"expected_window_title": "...",
|
||||||
|
}
|
||||||
|
|
||||||
|
Auteur: Dom, Alice - Avril 2026
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import threading
|
||||||
|
import uuid
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
from core.workflow.execution_plan import (
|
||||||
|
ExecutionNode,
|
||||||
|
ExecutionPlan,
|
||||||
|
ResolutionStrategy,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Substitution de variables
|
||||||
|
# =========================================================================
|
||||||
|
# Le WorkflowIR utilise la syntaxe `{var}` dans les champs texte.
|
||||||
|
# Ici on supporte les deux : `{var}` (IR natif) et `${var}` (replay legacy).
|
||||||
|
_VARIABLE_RE_CURLY = re.compile(r"\{(\w+)\}")
|
||||||
|
_VARIABLE_RE_DOLLAR = re.compile(r"\$\{(\w+)\}")
|
||||||
|
|
||||||
|
|
||||||
|
def substitute_variables(text: str, variables: Dict[str, Any]) -> str:
|
||||||
|
"""Remplacer `{var}` et `${var}` par leurs valeurs.
|
||||||
|
|
||||||
|
Priorité : variables fournies > placeholder brut (inchangé si inconnu).
|
||||||
|
"""
|
||||||
|
if not text or not variables:
|
||||||
|
return text
|
||||||
|
|
||||||
|
def replacer(match: "re.Match[str]") -> str:
|
||||||
|
var_name = match.group(1)
|
||||||
|
if var_name in variables:
|
||||||
|
return str(variables[var_name])
|
||||||
|
return match.group(0)
|
||||||
|
|
||||||
|
text = _VARIABLE_RE_DOLLAR.sub(replacer, text)
|
||||||
|
text = _VARIABLE_RE_CURLY.sub(replacer, text)
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Conversion ExecutionNode → action replay
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def _strategy_to_target_spec(
|
||||||
|
strategy: Optional[ResolutionStrategy],
|
||||||
|
fallbacks: Optional[List[ResolutionStrategy]] = None,
|
||||||
|
intent: str = "",
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Construire un `target_spec` depuis les stratégies de résolution.
|
||||||
|
|
||||||
|
Fusionne la primaire et les fallbacks pour donner un maximum d'indices
|
||||||
|
au resolve_engine :
|
||||||
|
- OCR → by_text
|
||||||
|
- template → anchor_image_base64 (depuis anchor_b64)
|
||||||
|
- VLM → vlm_description
|
||||||
|
|
||||||
|
Règle V4 : la stratégie primaire dicte la méthode préférée.
|
||||||
|
Le champ `resolve_order` liste les méthodes dans l'ordre à essayer.
|
||||||
|
Le resolve_engine honore cet ordre au lieu de sa cascade par défaut.
|
||||||
|
|
||||||
|
resolve_order est la clé du "zéro VLM au runtime" :
|
||||||
|
- ["ocr", "template", "vlm"] → V4 typique (OCR rapide)
|
||||||
|
- ["template", "ocr", "vlm"] → apprentissage : template marche mieux
|
||||||
|
- ["vlm"] → éléments sans texte (icônes)
|
||||||
|
"""
|
||||||
|
spec: Dict[str, Any] = {}
|
||||||
|
|
||||||
|
all_strategies: List[ResolutionStrategy] = []
|
||||||
|
if strategy is not None:
|
||||||
|
all_strategies.append(strategy)
|
||||||
|
if fallbacks:
|
||||||
|
all_strategies.extend(fallbacks)
|
||||||
|
|
||||||
|
by_text_candidate = ""
|
||||||
|
anchor_candidate = ""
|
||||||
|
vlm_candidate = ""
|
||||||
|
uia_data: Dict[str, Any] = {}
|
||||||
|
dom_data: Dict[str, Any] = {}
|
||||||
|
resolve_order: List[str] = []
|
||||||
|
seen_methods: set = set()
|
||||||
|
|
||||||
|
for strat in all_strategies:
|
||||||
|
if not strat:
|
||||||
|
continue
|
||||||
|
if strat.method == "ocr" and strat.target_text and not by_text_candidate:
|
||||||
|
by_text_candidate = strat.target_text
|
||||||
|
elif strat.method == "template":
|
||||||
|
if strat.anchor_b64 and not anchor_candidate:
|
||||||
|
anchor_candidate = strat.anchor_b64
|
||||||
|
if strat.target_text and not by_text_candidate:
|
||||||
|
by_text_candidate = strat.target_text
|
||||||
|
elif strat.method == "vlm" and strat.vlm_description and not vlm_candidate:
|
||||||
|
vlm_candidate = strat.vlm_description
|
||||||
|
elif strat.method == "uia" and strat.uia_name and not uia_data:
|
||||||
|
uia_data = {
|
||||||
|
"name": strat.uia_name,
|
||||||
|
"control_type": strat.uia_control_type,
|
||||||
|
"automation_id": strat.uia_automation_id,
|
||||||
|
"parent_path": strat.uia_parent_path,
|
||||||
|
}
|
||||||
|
elif strat.method == "dom" and strat.dom_selector and not dom_data:
|
||||||
|
dom_data = {
|
||||||
|
"selector": strat.dom_selector,
|
||||||
|
"xpath": strat.dom_xpath,
|
||||||
|
"url_pattern": strat.dom_url_pattern,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Construire l'ordre des méthodes (dans l'ordre primaire → fallbacks)
|
||||||
|
if strat.method and strat.method not in seen_methods:
|
||||||
|
resolve_order.append(strat.method)
|
||||||
|
seen_methods.add(strat.method)
|
||||||
|
|
||||||
|
if by_text_candidate:
|
||||||
|
spec["by_text"] = by_text_candidate
|
||||||
|
if anchor_candidate:
|
||||||
|
spec["anchor_image_base64"] = anchor_candidate
|
||||||
|
if vlm_candidate:
|
||||||
|
spec["vlm_description"] = vlm_candidate
|
||||||
|
elif intent and "vlm_description" not in spec:
|
||||||
|
# L'intention métier devient le prompt VLM de dernier recours
|
||||||
|
spec["vlm_description"] = intent
|
||||||
|
|
||||||
|
# Données UIA — consommées par l'agent Windows via lea_uia.exe
|
||||||
|
if uia_data:
|
||||||
|
spec["uia_target"] = uia_data
|
||||||
|
|
||||||
|
# Données DOM — consommées par l'agent Windows via CDP (futur)
|
||||||
|
if dom_data:
|
||||||
|
spec["dom_target"] = dom_data
|
||||||
|
|
||||||
|
# Ordre de résolution pré-compilé — c'est LA pièce centrale du V4
|
||||||
|
if resolve_order:
|
||||||
|
spec["resolve_order"] = resolve_order
|
||||||
|
|
||||||
|
return spec
|
||||||
|
|
||||||
|
|
||||||
|
def execution_node_to_action(
|
||||||
|
node: ExecutionNode,
|
||||||
|
variables: Optional[Dict[str, Any]] = None,
|
||||||
|
id_prefix: str = "act_plan",
|
||||||
|
) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Convertir un `ExecutionNode` en action replay.
|
||||||
|
|
||||||
|
Retourne `None` si le nœud n'est pas exécutable (type inconnu).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
node: Le nœud à convertir.
|
||||||
|
variables: Dictionnaire de variables pour substituer les {var}.
|
||||||
|
id_prefix: Préfixe pour l'action_id générée.
|
||||||
|
"""
|
||||||
|
variables = variables or {}
|
||||||
|
|
||||||
|
action: Dict[str, Any] = {
|
||||||
|
"action_id": f"{id_prefix}_{uuid.uuid4().hex[:8]}",
|
||||||
|
"plan_node_id": node.node_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
if node.intent:
|
||||||
|
action["intention"] = node.intent
|
||||||
|
if node.step_id:
|
||||||
|
action["plan_step_id"] = node.step_id
|
||||||
|
if node.is_optional:
|
||||||
|
action["is_optional"] = True
|
||||||
|
|
||||||
|
# Métadonnées d'exécution utiles au runtime
|
||||||
|
if node.timeout_ms:
|
||||||
|
action["timeout_ms"] = node.timeout_ms
|
||||||
|
if node.max_retries:
|
||||||
|
action["max_retries"] = node.max_retries
|
||||||
|
if node.recovery_action:
|
||||||
|
action["recovery_action"] = node.recovery_action
|
||||||
|
if node.success_condition:
|
||||||
|
action["success_condition"] = node.success_condition.to_dict()
|
||||||
|
|
||||||
|
action_type = node.action_type
|
||||||
|
|
||||||
|
if action_type == "click":
|
||||||
|
action["type"] = "click"
|
||||||
|
|
||||||
|
strategy = node.strategy_primary
|
||||||
|
fallbacks = node.strategy_fallbacks or []
|
||||||
|
|
||||||
|
# ── Déduction des coordonnées depuis la stratégie primaire ──
|
||||||
|
# - OCR : pas de coordonnées (le runtime trouve via OCR)
|
||||||
|
# - template : l'anchor sera utilisé au runtime
|
||||||
|
# - VLM : la description sera utilisée au runtime
|
||||||
|
# Dans tous les cas le resolve_engine retrouve les pixels au replay.
|
||||||
|
# On expose néanmoins un centre (0.5, 0.5) neutre pour rester
|
||||||
|
# compatible avec les validations de queue existantes.
|
||||||
|
action["x_pct"] = 0.5
|
||||||
|
action["y_pct"] = 0.5
|
||||||
|
action["visual_mode"] = True
|
||||||
|
|
||||||
|
target_spec = _strategy_to_target_spec(
|
||||||
|
strategy=strategy,
|
||||||
|
fallbacks=fallbacks,
|
||||||
|
intent=node.intent,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Titre fenêtre attendu AVANT (pré-vérif stricte)
|
||||||
|
# Si absent, aucune pré-vérif → l'action s'exécute quel que soit l'écran
|
||||||
|
if node.expected_window_before:
|
||||||
|
action["expected_window_before"] = node.expected_window_before
|
||||||
|
target_spec["window_title"] = node.expected_window_before
|
||||||
|
|
||||||
|
# Titre fenêtre attendu APRÈS (post-vérif stricte)
|
||||||
|
# C'est la garantie de passage à l'action suivante
|
||||||
|
if node.success_condition and node.success_condition.expected_title:
|
||||||
|
action["expected_window_title"] = node.success_condition.expected_title
|
||||||
|
action["success_strict"] = (
|
||||||
|
node.success_condition.method == "title_match"
|
||||||
|
)
|
||||||
|
if "window_title" not in target_spec:
|
||||||
|
target_spec["window_title"] = node.success_condition.expected_title
|
||||||
|
|
||||||
|
if target_spec:
|
||||||
|
action["target_spec"] = target_spec
|
||||||
|
|
||||||
|
elif action_type == "type":
|
||||||
|
action["type"] = "type"
|
||||||
|
text = node.text or ""
|
||||||
|
# Substituer les variables avant d'envoyer (ex: {patient} → "DUPONT")
|
||||||
|
action["text"] = substitute_variables(text, variables)
|
||||||
|
if node.variable_name:
|
||||||
|
action["variable_name"] = node.variable_name
|
||||||
|
|
||||||
|
elif action_type in ("key_combo", "key_press"):
|
||||||
|
action["type"] = "key_combo"
|
||||||
|
keys = list(node.keys or [])
|
||||||
|
if not keys:
|
||||||
|
return None
|
||||||
|
action["keys"] = keys
|
||||||
|
|
||||||
|
elif action_type == "wait":
|
||||||
|
action["type"] = "wait"
|
||||||
|
duration = node.duration_ms or 1000
|
||||||
|
action["duration_ms"] = int(duration)
|
||||||
|
|
||||||
|
elif action_type == "scroll":
|
||||||
|
action["type"] = "scroll"
|
||||||
|
# Les stratégies peuvent contenir une zone — pas exploitée ici,
|
||||||
|
# le scroll est implicitement sur la fenêtre active.
|
||||||
|
action["delta"] = -3
|
||||||
|
|
||||||
|
else:
|
||||||
|
logger.debug("execution_node_to_action: type inconnu '%s' ignoré", action_type)
|
||||||
|
return None
|
||||||
|
|
||||||
|
return action
|
||||||
|
|
||||||
|
|
||||||
|
def execution_plan_to_actions(
|
||||||
|
plan: ExecutionPlan,
|
||||||
|
variables: Optional[Dict[str, Any]] = None,
|
||||||
|
id_prefix: str = "act_plan",
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""Convertir un `ExecutionPlan` complet en liste d'actions replay.
|
||||||
|
|
||||||
|
Les variables passées en argument écrasent celles du plan.
|
||||||
|
"""
|
||||||
|
merged_vars: Dict[str, Any] = dict(plan.variables or {})
|
||||||
|
if variables:
|
||||||
|
merged_vars.update(variables)
|
||||||
|
|
||||||
|
actions: List[Dict[str, Any]] = []
|
||||||
|
for node in plan.nodes:
|
||||||
|
action = execution_node_to_action(
|
||||||
|
node=node,
|
||||||
|
variables=merged_vars,
|
||||||
|
id_prefix=id_prefix,
|
||||||
|
)
|
||||||
|
if action is not None:
|
||||||
|
actions.append(action)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"execution_plan_to_actions(%s) : %d nœuds → %d actions replay "
|
||||||
|
"(vars=%d)",
|
||||||
|
plan.plan_id, plan.total_nodes, len(actions), len(merged_vars),
|
||||||
|
)
|
||||||
|
return actions
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Injection dans la queue de replay
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def inject_plan_into_queue(
|
||||||
|
plan: ExecutionPlan,
|
||||||
|
session_id: str,
|
||||||
|
replay_queues: Dict[str, List[Dict[str, Any]]],
|
||||||
|
variables: Optional[Dict[str, Any]] = None,
|
||||||
|
lock: Optional[threading.Lock] = None,
|
||||||
|
replace: bool = True,
|
||||||
|
id_prefix: str = "act_plan",
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""Injecter un `ExecutionPlan` dans la queue de replay d'une session.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
plan: Le plan à exécuter.
|
||||||
|
session_id: La session Agent V1 cible.
|
||||||
|
replay_queues: Le dict global `_replay_queues` partagé par le serveur.
|
||||||
|
variables: Variables à substituer dans les actions.
|
||||||
|
lock: Verrou optionnel à acquérir avant d'écrire (threadsafe).
|
||||||
|
replace: Si True (défaut), remplace la queue existante. Sinon, append.
|
||||||
|
id_prefix: Préfixe pour les action_id générés.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
La liste des actions injectées (après substitution).
|
||||||
|
"""
|
||||||
|
actions = execution_plan_to_actions(
|
||||||
|
plan=plan, variables=variables, id_prefix=id_prefix,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _write() -> None:
|
||||||
|
if replace:
|
||||||
|
replay_queues[session_id] = list(actions)
|
||||||
|
else:
|
||||||
|
replay_queues[session_id].extend(actions)
|
||||||
|
|
||||||
|
if lock is not None:
|
||||||
|
with lock:
|
||||||
|
_write()
|
||||||
|
else:
|
||||||
|
_write()
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"inject_plan_into_queue(%s) : %d actions injectées dans la queue "
|
||||||
|
"de la session '%s' (replace=%s)",
|
||||||
|
plan.plan_id, len(actions), session_id, replace,
|
||||||
|
)
|
||||||
|
return actions
|
||||||
@@ -65,7 +65,8 @@ class LiveSessionState:
|
|||||||
class LiveSessionManager:
|
class LiveSessionManager:
|
||||||
"""Gère les sessions live en mémoire côté serveur avec persistance disque."""
|
"""Gère les sessions live en mémoire côté serveur avec persistance disque."""
|
||||||
|
|
||||||
def __init__(self, persist_dir: str = "data/streaming_sessions"):
|
def __init__(self, persist_dir: str = "data/streaming_sessions",
|
||||||
|
live_sessions_dir: Optional[str] = None):
|
||||||
self._sessions: Dict[str, LiveSessionState] = {}
|
self._sessions: Dict[str, LiveSessionState] = {}
|
||||||
self._lock = threading.Lock()
|
self._lock = threading.Lock()
|
||||||
self._persist_dir = Path(persist_dir)
|
self._persist_dir = Path(persist_dir)
|
||||||
@@ -74,11 +75,16 @@ class LiveSessionManager:
|
|||||||
self._persist_counter = 0 # Compteur pour limiter la fréquence de persistance
|
self._persist_counter = 0 # Compteur pour limiter la fréquence de persistance
|
||||||
self._persist_interval = 10 # Persister toutes les N modifications
|
self._persist_interval = 10 # Persister toutes les N modifications
|
||||||
|
|
||||||
|
# Dossier des sessions live (JSONL + screenshots)
|
||||||
|
self._live_sessions_dir = Path(live_sessions_dir) if live_sessions_dir else None
|
||||||
|
|
||||||
# Charger les sessions persistées au démarrage
|
# Charger les sessions persistées au démarrage
|
||||||
self._load_persisted_sessions()
|
self._load_persisted_sessions()
|
||||||
|
# Reconstruire les sessions depuis les live_events.jsonl sur disque
|
||||||
|
self._discover_sessions_from_disk()
|
||||||
|
|
||||||
def _load_persisted_sessions(self):
|
def _load_persisted_sessions(self):
|
||||||
"""Charger les sessions sauvegardées au démarrage."""
|
"""Charger les sessions sauvegardées au démarrage (JSON state files)."""
|
||||||
count = 0
|
count = 0
|
||||||
for session_file in sorted(self._persist_dir.glob("sess_*.json")):
|
for session_file in sorted(self._persist_dir.glob("sess_*.json")):
|
||||||
try:
|
try:
|
||||||
@@ -92,6 +98,66 @@ class LiveSessionManager:
|
|||||||
if count:
|
if count:
|
||||||
logger.info(f"{count} session(s) restaurée(s) depuis {self._persist_dir}")
|
logger.info(f"{count} session(s) restaurée(s) depuis {self._persist_dir}")
|
||||||
|
|
||||||
|
def _discover_sessions_from_disk(self):
|
||||||
|
"""Découvrir les sessions depuis les live_events.jsonl sur disque.
|
||||||
|
|
||||||
|
Reconstruit les sessions manquantes du session_manager en scannant :
|
||||||
|
- live_sessions/sess_*/live_events.jsonl (sessions racine)
|
||||||
|
- live_sessions/{machine_id}/sess_*/live_events.jsonl (multi-machine)
|
||||||
|
|
||||||
|
Ne touche pas aux sessions déjà chargées depuis le JSON persist.
|
||||||
|
"""
|
||||||
|
if self._live_sessions_dir is None:
|
||||||
|
return
|
||||||
|
live_dir = self._live_sessions_dir
|
||||||
|
if not live_dir.exists():
|
||||||
|
return
|
||||||
|
|
||||||
|
discovered = 0
|
||||||
|
for jsonl_file in sorted(live_dir.glob("**/live_events.jsonl")):
|
||||||
|
session_dir = jsonl_file.parent
|
||||||
|
session_id = session_dir.name
|
||||||
|
if not session_id.startswith("sess_"):
|
||||||
|
continue
|
||||||
|
if session_id in self._sessions:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Déduire le machine_id depuis le chemin parent
|
||||||
|
parent_name = session_dir.parent.name
|
||||||
|
if parent_name == live_dir.name:
|
||||||
|
machine_id = "default"
|
||||||
|
else:
|
||||||
|
machine_id = parent_name
|
||||||
|
|
||||||
|
# Compter events et screenshots
|
||||||
|
events_count = 0
|
||||||
|
try:
|
||||||
|
with open(jsonl_file, 'r', encoding='utf-8') as f:
|
||||||
|
for _ in f:
|
||||||
|
events_count += 1
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
shots_dir = session_dir / "shots"
|
||||||
|
shots_count = len(list(shots_dir.glob("shot_*_full.png"))) if shots_dir.exists() else 0
|
||||||
|
|
||||||
|
# Créer la session en mémoire
|
||||||
|
session = LiveSessionState(
|
||||||
|
session_id=session_id,
|
||||||
|
machine_id=machine_id,
|
||||||
|
finalized=False,
|
||||||
|
)
|
||||||
|
# Stocker le nombre d'events/shots dans les métadonnées
|
||||||
|
session.shot_paths = {f"shot_{i:04d}": "" for i in range(shots_count)}
|
||||||
|
self._sessions[session_id] = session
|
||||||
|
discovered += 1
|
||||||
|
|
||||||
|
if discovered:
|
||||||
|
logger.info(
|
||||||
|
f"{discovered} session(s) découverte(s) depuis {live_dir} "
|
||||||
|
f"(total: {len(self._sessions)} sessions en mémoire)"
|
||||||
|
)
|
||||||
|
|
||||||
def _persist_session(self, session_id: str):
|
def _persist_session(self, session_id: str):
|
||||||
"""Sauvegarder une session sur disque (appelé périodiquement)."""
|
"""Sauvegarder une session sur disque (appelé périodiquement)."""
|
||||||
session = self._sessions.get(session_id)
|
session = self._sessions.get(session_id)
|
||||||
@@ -102,7 +168,7 @@ class LiveSessionManager:
|
|||||||
with open(filepath, 'w', encoding='utf-8') as f:
|
with open(filepath, 'w', encoding='utf-8') as f:
|
||||||
json.dump(session.to_dict(), f, ensure_ascii=False)
|
json.dump(session.to_dict(), f, ensure_ascii=False)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug(f"Erreur persistance session {session_id}: {e}")
|
logger.warning(f"Erreur persistance session {session_id}: {e}")
|
||||||
|
|
||||||
def _maybe_persist(self, session_id: str):
|
def _maybe_persist(self, session_id: str):
|
||||||
"""Persister si le compteur atteint l'intervalle."""
|
"""Persister si le compteur atteint l'intervalle."""
|
||||||
@@ -180,6 +246,17 @@ class LiveSessionManager:
|
|||||||
if meta_val is not None:
|
if meta_val is not None:
|
||||||
info[meta_key] = meta_val
|
info[meta_key] = meta_val
|
||||||
session.last_window_info = info
|
session.last_window_info = info
|
||||||
|
# Exploiter window_capture (envoyé par l'agent avec la capture fenêtre)
|
||||||
|
# pour enrichir last_window_info avec le titre précis de la fenêtre cliquée
|
||||||
|
window_capture = event_data.get("window_capture")
|
||||||
|
if window_capture and isinstance(window_capture, dict):
|
||||||
|
wc_title = window_capture.get("title", "").strip()
|
||||||
|
wc_app = window_capture.get("app_name", "").strip()
|
||||||
|
if wc_title:
|
||||||
|
session.last_window_info["title"] = wc_title
|
||||||
|
if wc_app:
|
||||||
|
session.last_window_info["app_name"] = wc_app
|
||||||
|
|
||||||
# Accumuler les titres/apps pour le nommage automatique
|
# Accumuler les titres/apps pour le nommage automatique
|
||||||
title = session.last_window_info.get("title", "").strip()
|
title = session.last_window_info.get("title", "").strip()
|
||||||
app_name = session.last_window_info.get("app_name", "").strip()
|
app_name = session.last_window_info.get("app_name", "").strip()
|
||||||
@@ -221,18 +298,41 @@ class LiveSessionManager:
|
|||||||
import socket
|
import socket
|
||||||
|
|
||||||
# Construire les événements au format RawSession
|
# Construire les événements au format RawSession
|
||||||
|
# Important : copier TOUTES les données de l'événement (pos, text, keys, button...)
|
||||||
|
# car Event.from_dict() met tout sauf t/type/window/screenshot_id dans event.data,
|
||||||
|
# et le GraphBuilder utilise event.data pour construire les actions.
|
||||||
events = []
|
events = []
|
||||||
for evt in session.events:
|
for evt in session.events:
|
||||||
|
# Extraire window info (plusieurs formats possibles)
|
||||||
|
window_raw = evt.get("window")
|
||||||
|
if isinstance(window_raw, dict):
|
||||||
|
window_info = {
|
||||||
|
"title": window_raw.get("title", session.last_window_info.get("title", "")),
|
||||||
|
"app_name": window_raw.get("app_name", session.last_window_info.get("app_name", "unknown")),
|
||||||
|
}
|
||||||
|
else:
|
||||||
window_info = {
|
window_info = {
|
||||||
"title": evt.get("window_title", session.last_window_info.get("title", "")),
|
"title": evt.get("window_title", session.last_window_info.get("title", "")),
|
||||||
"app_name": evt.get("app_name", session.last_window_info.get("app_name", "unknown")),
|
"app_name": evt.get("app_name", session.last_window_info.get("app_name", "unknown")),
|
||||||
}
|
}
|
||||||
events.append({
|
|
||||||
|
raw_event = {
|
||||||
"t": evt.get("timestamp", 0),
|
"t": evt.get("timestamp", 0),
|
||||||
"type": evt.get("type", "unknown"),
|
"type": evt.get("type", "unknown"),
|
||||||
"window": window_info,
|
"window": window_info,
|
||||||
"screenshot_id": evt.get("screenshot_id"),
|
"screenshot_id": evt.get("screenshot_id"),
|
||||||
})
|
}
|
||||||
|
|
||||||
|
# Copier les données spécifiques au type d'événement
|
||||||
|
# (pos, button, text, keys, etc.) — indispensable pour le replay
|
||||||
|
_skip_keys = {"type", "timestamp", "window", "window_title",
|
||||||
|
"app_name", "screenshot_id", "machine_id",
|
||||||
|
"screen_metadata", "vision_info"}
|
||||||
|
for key, value in evt.items():
|
||||||
|
if key not in _skip_keys and key not in raw_event:
|
||||||
|
raw_event[key] = value
|
||||||
|
|
||||||
|
events.append(raw_event)
|
||||||
|
|
||||||
# Construire les screenshots au format RawSession
|
# Construire les screenshots au format RawSession
|
||||||
screenshots = []
|
screenshots = []
|
||||||
|
|||||||
1322
agent_v0/server_v1/replay_engine.py
Normal file
1322
agent_v0/server_v1/replay_engine.py
Normal file
File diff suppressed because it is too large
Load Diff
143
agent_v0/server_v1/replay_failure_logger.py
Normal file
143
agent_v0/server_v1/replay_failure_logger.py
Normal file
@@ -0,0 +1,143 @@
|
|||||||
|
# agent_v0/server_v1/replay_failure_logger.py
|
||||||
|
"""
|
||||||
|
Logger des echecs de replay pour l'apprentissage futur.
|
||||||
|
|
||||||
|
Chaque echec de resolution visuelle (target_not_found) est sauvegarde dans un
|
||||||
|
fichier JSONL par session, avec le screenshot de ce que l'agent voit au moment
|
||||||
|
de l'echec. Ces donnees alimentent le learning loop : re-entrainement des
|
||||||
|
embeddings, ajustement des seuils, enrichissement des target_spec.
|
||||||
|
|
||||||
|
Structure :
|
||||||
|
data/training/replay_failures/{replay_id}/failures.jsonl
|
||||||
|
data/training/replay_failures/{replay_id}/screenshots/{action_id}.jpg
|
||||||
|
"""
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger("replay_failure_logger")
|
||||||
|
|
||||||
|
# Repertoire racine des echecs de replay
|
||||||
|
_FAILURES_BASE_DIR = Path("data/training/replay_failures")
|
||||||
|
|
||||||
|
# Lock pour les ecritures concurrentes
|
||||||
|
_write_lock = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
|
def log_replay_failure(
|
||||||
|
replay_id: str,
|
||||||
|
action_id: str,
|
||||||
|
target_spec: Optional[Dict[str, Any]],
|
||||||
|
screenshot_b64: Optional[str],
|
||||||
|
resolution_attempts: Optional[List[Dict[str, Any]]] = None,
|
||||||
|
error: str = "target_not_found",
|
||||||
|
extra: Optional[Dict[str, Any]] = None,
|
||||||
|
) -> Optional[str]:
|
||||||
|
"""Sauvegarder un echec de replay pour l'apprentissage futur.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
replay_id: Identifiant du replay en cours
|
||||||
|
action_id: Identifiant de l'action echouee
|
||||||
|
target_spec: Specification de la cible recherchee
|
||||||
|
screenshot_b64: Screenshot JPEG base64 de ce que l'agent voit
|
||||||
|
resolution_attempts: Liste des tentatives de resolution (methode, score, etc.)
|
||||||
|
error: Type d'erreur (defaut: "target_not_found")
|
||||||
|
extra: Champs supplementaires a stocker
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Chemin du fichier JSONL cree, ou None en cas d'erreur.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Creer le repertoire de la session
|
||||||
|
session_dir = _FAILURES_BASE_DIR / replay_id
|
||||||
|
session_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Sauvegarder le screenshot si fourni
|
||||||
|
screenshot_path = None
|
||||||
|
if screenshot_b64:
|
||||||
|
screenshots_dir = session_dir / "screenshots"
|
||||||
|
screenshots_dir.mkdir(exist_ok=True)
|
||||||
|
screenshot_path = str(screenshots_dir / f"{action_id}.jpg")
|
||||||
|
try:
|
||||||
|
img_bytes = base64.b64decode(screenshot_b64)
|
||||||
|
with open(screenshot_path, "wb") as f:
|
||||||
|
f.write(img_bytes)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Impossible de sauvegarder le screenshot : {e}")
|
||||||
|
screenshot_path = None
|
||||||
|
|
||||||
|
# Construire l'entree JSONL
|
||||||
|
entry = {
|
||||||
|
"timestamp": time.strftime("%Y-%m-%dT%H:%M:%S"),
|
||||||
|
"replay_id": replay_id,
|
||||||
|
"action_id": action_id,
|
||||||
|
"target_spec": _sanitize_target_spec(target_spec) if target_spec else None,
|
||||||
|
"screenshot_path": screenshot_path,
|
||||||
|
"resolution_attempts": resolution_attempts or [],
|
||||||
|
"error": error,
|
||||||
|
}
|
||||||
|
if extra:
|
||||||
|
entry.update(extra)
|
||||||
|
|
||||||
|
# Ecrire dans le fichier JSONL (thread-safe)
|
||||||
|
jsonl_path = session_dir / "failures.jsonl"
|
||||||
|
with _write_lock:
|
||||||
|
with open(jsonl_path, "a", encoding="utf-8") as f:
|
||||||
|
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Echec replay loggue : replay={replay_id} action={action_id} "
|
||||||
|
f"error={error} -> {jsonl_path}"
|
||||||
|
)
|
||||||
|
return str(jsonl_path)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Impossible de logger l'echec replay : {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitize_target_spec(target_spec: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""Nettoyer le target_spec pour le stockage (retirer les images base64 volumineuses)."""
|
||||||
|
cleaned = {}
|
||||||
|
for key, value in target_spec.items():
|
||||||
|
# Ne pas stocker les images base64 (trop volumineux pour le JSONL)
|
||||||
|
if key.endswith("_base64") or key.endswith("_b64"):
|
||||||
|
cleaned[key] = f"<{len(str(value))} chars>" if value else None
|
||||||
|
else:
|
||||||
|
cleaned[key] = value
|
||||||
|
return cleaned
|
||||||
|
|
||||||
|
|
||||||
|
def get_failure_count(replay_id: str) -> int:
|
||||||
|
"""Compter le nombre d'echecs pour un replay donne."""
|
||||||
|
jsonl_path = _FAILURES_BASE_DIR / replay_id / "failures.jsonl"
|
||||||
|
if not jsonl_path.exists():
|
||||||
|
return 0
|
||||||
|
try:
|
||||||
|
with open(jsonl_path, "r", encoding="utf-8") as f:
|
||||||
|
return sum(1 for _ in f)
|
||||||
|
except Exception:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def get_failures(replay_id: str) -> List[Dict[str, Any]]:
|
||||||
|
"""Lire tous les echecs pour un replay donne."""
|
||||||
|
jsonl_path = _FAILURES_BASE_DIR / replay_id / "failures.jsonl"
|
||||||
|
if not jsonl_path.exists():
|
||||||
|
return []
|
||||||
|
failures = []
|
||||||
|
try:
|
||||||
|
with open(jsonl_path, "r", encoding="utf-8") as f:
|
||||||
|
for line in f:
|
||||||
|
line = line.strip()
|
||||||
|
if line:
|
||||||
|
failures.append(json.loads(line))
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Erreur lecture echecs replay {replay_id} : {e}")
|
||||||
|
return failures
|
||||||
@@ -175,6 +175,55 @@ class ReplayLearner:
|
|||||||
|
|
||||||
self.record(outcome)
|
self.record(outcome)
|
||||||
|
|
||||||
|
def record_human_correction(
|
||||||
|
self,
|
||||||
|
session_id: str,
|
||||||
|
action: Dict[str, Any],
|
||||||
|
correction: Dict[str, Any],
|
||||||
|
) -> None:
|
||||||
|
"""Enregistrer une correction humaine (mode apprentissage supervisé).
|
||||||
|
|
||||||
|
L'humain a montré à Léa où cliquer. On stocke cette correction
|
||||||
|
dans target_memory.db pour que la prochaine fois, Léa sache.
|
||||||
|
"""
|
||||||
|
target_spec = action.get("target_spec", {})
|
||||||
|
by_text = target_spec.get("by_text", "")
|
||||||
|
window_title = target_spec.get("window_title", "")
|
||||||
|
x_pct = correction.get("x_pct", 0.0)
|
||||||
|
y_pct = correction.get("y_pct", 0.0)
|
||||||
|
|
||||||
|
# Enregistrer dans le JSONL d'apprentissage
|
||||||
|
outcome = ActionOutcome(
|
||||||
|
session_id=session_id,
|
||||||
|
action_id=action.get("action_id", ""),
|
||||||
|
action_type="click",
|
||||||
|
target_description=by_text,
|
||||||
|
window_title=window_title,
|
||||||
|
resolution_method="human_supervised",
|
||||||
|
resolution_score=1.0, # Confiance maximale — l'humain a montré
|
||||||
|
success=True,
|
||||||
|
)
|
||||||
|
self.record(outcome)
|
||||||
|
|
||||||
|
# Stocker dans target_memory.db pour le lookup futur
|
||||||
|
try:
|
||||||
|
from .replay_memory import get_target_memory_store
|
||||||
|
store = get_target_memory_store()
|
||||||
|
if store:
|
||||||
|
store.record_success(
|
||||||
|
screen_signature="human_correction",
|
||||||
|
target_spec=target_spec,
|
||||||
|
resolved_position={"x_pct": x_pct, "y_pct": y_pct},
|
||||||
|
method="human_supervised",
|
||||||
|
score=1.0,
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
f"[APPRENTISSAGE] Correction stockée dans target_memory : "
|
||||||
|
f"'{by_text}' → ({x_pct:.4f}, {y_pct:.4f})"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Learning: échec stockage target_memory: {e}")
|
||||||
|
|
||||||
def query_similar(
|
def query_similar(
|
||||||
self,
|
self,
|
||||||
target_description: str = "",
|
target_description: str = "",
|
||||||
|
|||||||
323
agent_v0/server_v1/replay_memory.py
Normal file
323
agent_v0/server_v1/replay_memory.py
Normal file
@@ -0,0 +1,323 @@
|
|||||||
|
# agent_v0/server_v1/replay_memory.py
|
||||||
|
"""
|
||||||
|
replay_memory — Greffe de TargetMemoryStore (Fiche #18) sur le pipeline V4.
|
||||||
|
|
||||||
|
Phase 1 du plan apprentissage Léa (docs/PLAN_APPRENTISSAGE_LEA.md).
|
||||||
|
|
||||||
|
Le runtime V4 appelle :
|
||||||
|
- `memory_lookup()` AVANT la cascade coûteuse (OCR/template/VLM)
|
||||||
|
- `memory_record_success()` APRÈS validation post-condition (`title_match` strict)
|
||||||
|
- `memory_record_failure()` sur les échecs
|
||||||
|
|
||||||
|
Fingerprint léger V4 : les coordonnées clic (x_pct, y_pct) sont stockées dans
|
||||||
|
les deux premières valeurs de `TargetFingerprint.bbox`, et la méthode de
|
||||||
|
résolution ayant réussi dans le champ `etype`.
|
||||||
|
|
||||||
|
Signature d'écran V4 : `sha256(normalize(window_title))[:16]`. Simple et
|
||||||
|
robuste aux données variables car les titres de fenêtre restent stables.
|
||||||
|
Les faux positifs (même titre, écrans différents) sont rattrapés par la
|
||||||
|
post-condition qui décrémentera la fiabilité via `record_failure()`.
|
||||||
|
|
||||||
|
Critère de fiabilité : 2 succès minimum et < 30% d'échecs pour déclencher
|
||||||
|
un hit (paramètres de `TargetMemoryStore.lookup`). C'est exactement la
|
||||||
|
cristallisation par répétition que l'on veut — Léa est un stagiaire qui
|
||||||
|
apprend de l'observation.
|
||||||
|
|
||||||
|
Auteur : Dom, Alice — avril 2026
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import unicodedata
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Singleton du store persistant
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
_MEMORY_SINGLETON: Optional[Any] = None
|
||||||
|
_MEMORY_DISABLED = False
|
||||||
|
|
||||||
|
|
||||||
|
def get_memory_store():
|
||||||
|
"""Retourne le `TargetMemoryStore` partagé, ou None si indisponible.
|
||||||
|
|
||||||
|
Lazy-init : le store n'est créé qu'au premier appel, ce qui évite
|
||||||
|
d'importer `core.learning.target_memory_store` à l'import du module
|
||||||
|
(et donc d'éviter les effets de bord sur le démarrage du serveur).
|
||||||
|
"""
|
||||||
|
global _MEMORY_SINGLETON, _MEMORY_DISABLED
|
||||||
|
|
||||||
|
if _MEMORY_DISABLED:
|
||||||
|
return None
|
||||||
|
if _MEMORY_SINGLETON is not None:
|
||||||
|
return _MEMORY_SINGLETON
|
||||||
|
|
||||||
|
try:
|
||||||
|
from core.learning.target_memory_store import TargetMemoryStore
|
||||||
|
|
||||||
|
base_path = os.environ.get("RPA_LEARNING_DIR", "data/learning")
|
||||||
|
_MEMORY_SINGLETON = TargetMemoryStore(base_path=base_path)
|
||||||
|
logger.info(
|
||||||
|
"replay_memory: TargetMemoryStore initialisé (base=%s)", base_path,
|
||||||
|
)
|
||||||
|
return _MEMORY_SINGLETON
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning(
|
||||||
|
"replay_memory: TargetMemoryStore indisponible (%s) — "
|
||||||
|
"l'apprentissage persistant est désactivé", exc,
|
||||||
|
)
|
||||||
|
_MEMORY_DISABLED = True
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Normalisation de texte et hash
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def _norm_text(s: str) -> str:
|
||||||
|
"""Normalise un texte pour un hash stable (accents, casse, NBSP, espaces)."""
|
||||||
|
if not s:
|
||||||
|
return ""
|
||||||
|
s = s.replace("\u00A0", " ").strip().lower()
|
||||||
|
s = unicodedata.normalize("NFKD", s)
|
||||||
|
s = "".join(ch for ch in s if not unicodedata.combining(ch))
|
||||||
|
return " ".join(s.split())
|
||||||
|
|
||||||
|
|
||||||
|
def compute_screen_sig(window_title: str) -> str:
|
||||||
|
"""Calcule la signature d'écran V4 à partir du titre de fenêtre.
|
||||||
|
|
||||||
|
Le `window_title` est strict depuis la phase "controle des étapes"
|
||||||
|
(post-condition `title_match` obligatoire). C'est notre clé naturelle.
|
||||||
|
"""
|
||||||
|
norm = _norm_text(window_title)
|
||||||
|
if not norm:
|
||||||
|
return ""
|
||||||
|
return hashlib.sha256(norm.encode("utf-8")).hexdigest()[:16]
|
||||||
|
|
||||||
|
|
||||||
|
class _TargetSpecLike:
|
||||||
|
"""Adaptateur dict → objet pour `TargetMemoryStore._hash_target_spec()`.
|
||||||
|
|
||||||
|
Le hash interne de TargetMemoryStore utilise `getattr(spec, "by_role", ...)`
|
||||||
|
qui ne fonctionne pas avec un dict brut. On expose les attributs nécessaires.
|
||||||
|
|
||||||
|
On intègre aussi `resolve_order` et `vlm_description` dans `context_hints`
|
||||||
|
pour qu'ils entrent dans le hash — deux actions avec le même `by_text`
|
||||||
|
mais un `resolve_order` différent doivent avoir des hashes distincts.
|
||||||
|
"""
|
||||||
|
|
||||||
|
__slots__ = ("by_role", "by_text", "by_position", "context_hints")
|
||||||
|
|
||||||
|
def __init__(self, d: Dict[str, Any]):
|
||||||
|
self.by_role = d.get("by_role", "") or ""
|
||||||
|
self.by_text = d.get("by_text", "") or ""
|
||||||
|
self.by_position = d.get("by_position")
|
||||||
|
|
||||||
|
hints = dict(d.get("context_hints") or {})
|
||||||
|
resolve_order = d.get("resolve_order")
|
||||||
|
if resolve_order:
|
||||||
|
hints["_resolve_order"] = "|".join(resolve_order) if isinstance(
|
||||||
|
resolve_order, list
|
||||||
|
) else str(resolve_order)
|
||||||
|
if d.get("vlm_description"):
|
||||||
|
hints["_vlm_desc"] = str(d["vlm_description"])
|
||||||
|
if d.get("anchor_hint"):
|
||||||
|
hints["_anchor_hint"] = str(d["anchor_hint"])
|
||||||
|
self.context_hints = hints
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Lookup — consulté AVANT la cascade coûteuse
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def memory_lookup(
|
||||||
|
window_title: str,
|
||||||
|
target_spec: Dict[str, Any],
|
||||||
|
) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Cherche une résolution apprise pour cette cible sur cet écran.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict compatible avec le format de sortie de `_resolve_target_sync`
|
||||||
|
(resolved, method, x_pct, y_pct, score, ...) si une entrée fiable
|
||||||
|
est trouvée. None sinon.
|
||||||
|
"""
|
||||||
|
store = get_memory_store()
|
||||||
|
if store is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
screen_sig = compute_screen_sig(window_title)
|
||||||
|
if not screen_sig:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
spec_shim = _TargetSpecLike(target_spec)
|
||||||
|
fp = store.lookup(screen_sig, spec_shim)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("memory_lookup: erreur lookup (%s)", exc)
|
||||||
|
return None
|
||||||
|
|
||||||
|
if fp is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Fingerprint léger : bbox = (x_pct, y_pct, 0, 0)
|
||||||
|
try:
|
||||||
|
x_pct = float(fp.bbox[0])
|
||||||
|
y_pct = float(fp.bbox[1])
|
||||||
|
except (TypeError, IndexError, ValueError):
|
||||||
|
logger.debug("memory_lookup: fingerprint bbox invalide")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Sanity check : les pourcentages doivent être dans [0, 1]
|
||||||
|
if not (0.0 <= x_pct <= 1.0 and 0.0 <= y_pct <= 1.0):
|
||||||
|
logger.warning(
|
||||||
|
"memory_lookup: coords invalides (%.3f, %.3f) pour sig=%s — "
|
||||||
|
"entrée ignorée",
|
||||||
|
x_pct, y_pct, screen_sig,
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
method = fp.etype or "memory"
|
||||||
|
confidence = float(getattr(fp, "confidence", 0.9) or 0.9)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"memory_lookup HIT : sig=%s method=%s coords=(%.4f, %.4f) conf=%.2f "
|
||||||
|
"target='%s'",
|
||||||
|
screen_sig, method, x_pct, y_pct, confidence,
|
||||||
|
(target_spec.get("by_text") or "")[:60],
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"resolved": True,
|
||||||
|
"method": f"memory_{method}",
|
||||||
|
"x_pct": x_pct,
|
||||||
|
"y_pct": y_pct,
|
||||||
|
"score": confidence,
|
||||||
|
"from_memory": True,
|
||||||
|
"screen_sig": screen_sig,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Record — appelé APRÈS validation post-condition
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def memory_record_success(
|
||||||
|
window_title: str,
|
||||||
|
target_spec: Dict[str, Any],
|
||||||
|
x_pct: float,
|
||||||
|
y_pct: float,
|
||||||
|
method: str,
|
||||||
|
confidence: float = 0.9,
|
||||||
|
) -> bool:
|
||||||
|
"""Enregistre une résolution réussie dans la mémoire persistante.
|
||||||
|
|
||||||
|
À appeler APRÈS validation de la post-condition (`title_match` strict).
|
||||||
|
"""
|
||||||
|
store = get_memory_store()
|
||||||
|
if store is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
screen_sig = compute_screen_sig(window_title)
|
||||||
|
if not screen_sig:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Sanity check : coordonnées dans [0, 1]
|
||||||
|
try:
|
||||||
|
x_pct = float(x_pct)
|
||||||
|
y_pct = float(y_pct)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
logger.debug("memory_record_success: coords non numériques, skip")
|
||||||
|
return False
|
||||||
|
if not (0.0 <= x_pct <= 1.0 and 0.0 <= y_pct <= 1.0):
|
||||||
|
logger.debug(
|
||||||
|
"memory_record_success: coords hors [0,1] (%.3f, %.3f), skip",
|
||||||
|
x_pct, y_pct,
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
from core.learning.target_memory_store import TargetFingerprint
|
||||||
|
|
||||||
|
# Stripper les préfixes "memory_" empilés pour ne garder que
|
||||||
|
# la méthode de résolution originale (ex: template_matching).
|
||||||
|
# Sans ça, le cycle lookup → record → lookup empile "memory_"
|
||||||
|
# indéfiniment : memory_memory_memory_template_matching.
|
||||||
|
method_clean = method or "v4_unknown"
|
||||||
|
while method_clean.startswith("memory_"):
|
||||||
|
method_clean = method_clean[len("memory_"):]
|
||||||
|
method_clean = method_clean or "v4_unknown"
|
||||||
|
fingerprint = TargetFingerprint(
|
||||||
|
element_id=f"v4_{method_clean}",
|
||||||
|
bbox=(x_pct, y_pct, 0.0, 0.0),
|
||||||
|
role=target_spec.get("by_role", "") or None,
|
||||||
|
etype=method_clean,
|
||||||
|
label=(target_spec.get("by_text") or "")[:200] or None,
|
||||||
|
confidence=float(confidence),
|
||||||
|
)
|
||||||
|
|
||||||
|
spec_shim = _TargetSpecLike(target_spec)
|
||||||
|
store.record_success(
|
||||||
|
screen_signature=screen_sig,
|
||||||
|
target_spec=spec_shim,
|
||||||
|
fingerprint=fingerprint,
|
||||||
|
strategy_used=method_clean,
|
||||||
|
confidence=float(confidence),
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"memory_record_success: sig=%s method=%s coords=(%.4f, %.4f) "
|
||||||
|
"target='%s'",
|
||||||
|
screen_sig, method_clean, x_pct, y_pct,
|
||||||
|
(target_spec.get("by_text") or "")[:60],
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("memory_record_success: échec (%s)", exc)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def memory_record_failure(
|
||||||
|
window_title: str,
|
||||||
|
target_spec: Dict[str, Any],
|
||||||
|
error_message: str,
|
||||||
|
) -> bool:
|
||||||
|
"""Incrémente le `fail_count` pour cette (signature, target).
|
||||||
|
|
||||||
|
Appelé quand l'action échoue OU quand la post-condition n'est pas
|
||||||
|
satisfaite. Le `TargetMemoryStore.lookup()` ignorera cette entrée
|
||||||
|
si le ratio d'échecs dépasse 30%.
|
||||||
|
"""
|
||||||
|
store = get_memory_store()
|
||||||
|
if store is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
screen_sig = compute_screen_sig(window_title)
|
||||||
|
if not screen_sig:
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
spec_shim = _TargetSpecLike(target_spec)
|
||||||
|
store.record_failure(
|
||||||
|
screen_signature=screen_sig,
|
||||||
|
target_spec=spec_shim,
|
||||||
|
error_message=(error_message or "unknown")[:200],
|
||||||
|
)
|
||||||
|
logger.debug(
|
||||||
|
"memory_record_failure: sig=%s error='%s'",
|
||||||
|
screen_sig, (error_message or "")[:80],
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("memory_record_failure: échec (%s)", exc)
|
||||||
|
return False
|
||||||
2385
agent_v0/server_v1/resolve_engine.py
Normal file
2385
agent_v0/server_v1/resolve_engine.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1791,6 +1791,10 @@ class StreamProcessor:
|
|||||||
# Workflows construits (pour le matching)
|
# Workflows construits (pour le matching)
|
||||||
self._workflows: Dict[str, Any] = {}
|
self._workflows: Dict[str, Any] = {}
|
||||||
|
|
||||||
|
# Shadow learning : dernier pattern UI détecté par session
|
||||||
|
# Stocke {session_id: {"pattern": str, "ocr_text": str, "screen_state": obj, "shot_id": str}}
|
||||||
|
self._pending_ui_patterns: Dict[str, Dict[str, Any]] = {}
|
||||||
|
|
||||||
# Charger les workflows existants depuis le disque
|
# Charger les workflows existants depuis le disque
|
||||||
self._load_persisted_workflows()
|
self._load_persisted_workflows()
|
||||||
|
|
||||||
@@ -1975,6 +1979,9 @@ class StreamProcessor:
|
|||||||
- key_combo/key_press avec uniquement des modificateurs seuls (ctrl, alt, shift, etc.)
|
- key_combo/key_press avec uniquement des modificateurs seuls (ctrl, alt, shift, etc.)
|
||||||
- key_combo/key_press avec liste de touches vide
|
- key_combo/key_press avec liste de touches vide
|
||||||
- text_input avec texte vide
|
- text_input avec texte vide
|
||||||
|
|
||||||
|
Shadow learning : quand un clic suit un pattern UI détecté,
|
||||||
|
on apprend l'association dialogue→bouton.
|
||||||
"""
|
"""
|
||||||
if _is_parasitic_event(event_data):
|
if _is_parasitic_event(event_data):
|
||||||
logger.debug(
|
logger.debug(
|
||||||
@@ -1982,9 +1989,119 @@ class StreamProcessor:
|
|||||||
f"type={event_data.get('type')}, data={event_data.get('keys', event_data.get('text', ''))}"
|
f"type={event_data.get('type')}, data={event_data.get('keys', event_data.get('text', ''))}"
|
||||||
)
|
)
|
||||||
return {"status": "event_filtered", "session_id": session_id, "reason": "parasitic"}
|
return {"status": "event_filtered", "session_id": session_id, "reason": "parasitic"}
|
||||||
|
|
||||||
|
# Shadow learning : si un pattern UI est en attente et qu'on reçoit un clic
|
||||||
|
if event_data.get("type") == "mouse_click":
|
||||||
|
self._try_shadow_learn(session_id, event_data)
|
||||||
|
|
||||||
self.session_manager.add_event(session_id, event_data)
|
self.session_manager.add_event(session_id, event_data)
|
||||||
return {"status": "event_recorded", "session_id": session_id}
|
return {"status": "event_recorded", "session_id": session_id}
|
||||||
|
|
||||||
|
def _try_shadow_learn(self, session_id: str, click_event: Dict[str, Any]):
|
||||||
|
"""Tente d'apprendre un pattern UI depuis un clic observé en Shadow.
|
||||||
|
|
||||||
|
Quand un screenshot contenait un pattern UI détecté (dialogue) et que
|
||||||
|
l'utilisateur clique ensuite, on extrait le texte OCR au point de clic
|
||||||
|
pour apprendre l'association : "quand je vois ce texte → cliquer sur ce bouton".
|
||||||
|
"""
|
||||||
|
with self._data_lock:
|
||||||
|
pending = self._pending_ui_patterns.pop(session_id, None)
|
||||||
|
if not pending:
|
||||||
|
return
|
||||||
|
|
||||||
|
screen_state = pending.get("screen_state")
|
||||||
|
if screen_state is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Extraire la position du clic (pixels absolus)
|
||||||
|
pos = click_event.get("pos", [])
|
||||||
|
if not pos or len(pos) != 2:
|
||||||
|
return
|
||||||
|
|
||||||
|
click_x, click_y = pos[0], pos[1]
|
||||||
|
|
||||||
|
# Trouver le texte OCR le plus proche du point de clic
|
||||||
|
# via les ui_elements du ScreenState (ils ont bbox + label)
|
||||||
|
clicked_label = self._find_label_at_position(screen_state, click_x, click_y)
|
||||||
|
if not clicked_label:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Extraire le trigger principal du texte OCR du dialogue
|
||||||
|
ocr_text = pending.get("ocr_text", "")
|
||||||
|
# Utiliser un extrait court comme trigger (max 80 chars, premier segment pertinent)
|
||||||
|
trigger_text = ocr_text[:80].strip().lower()
|
||||||
|
if not trigger_text:
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Shadow learning: pattern '{pending['pattern_name']}' "
|
||||||
|
f"→ utilisateur a cliqué '{clicked_label}' | trigger='{trigger_text[:40]}...'"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Sauvegarder le pattern appris
|
||||||
|
try:
|
||||||
|
from core.knowledge.ui_patterns import UIPatternLibrary
|
||||||
|
lib = UIPatternLibrary()
|
||||||
|
lib.save_learned_pattern({
|
||||||
|
"category": "dialog",
|
||||||
|
"triggers": [trigger_text],
|
||||||
|
"action": "click",
|
||||||
|
"target": clicked_label,
|
||||||
|
"os": "windows",
|
||||||
|
"confidence": 0.8,
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Shadow learning: échec sauvegarde pattern: {e}")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _find_label_at_position(screen_state, click_x: int, click_y: int) -> Optional[str]:
|
||||||
|
"""Trouve le label de l'élément UI le plus proche du point de clic.
|
||||||
|
|
||||||
|
Parcourt les ui_elements du ScreenState et retourne le label de
|
||||||
|
l'élément dont la bbox contient le point, ou le plus proche si aucun
|
||||||
|
ne contient exactement le point.
|
||||||
|
"""
|
||||||
|
ui_elements = getattr(screen_state, "ui_elements", [])
|
||||||
|
if not ui_elements:
|
||||||
|
return None
|
||||||
|
|
||||||
|
best_label = None
|
||||||
|
best_dist = float("inf")
|
||||||
|
|
||||||
|
for elem in ui_elements:
|
||||||
|
bbox = getattr(elem, "bbox", None)
|
||||||
|
label = getattr(elem, "label", "")
|
||||||
|
if not bbox or not label:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# BBox = (x, y, width, height) — extraire les coordonnées
|
||||||
|
try:
|
||||||
|
bx, by = bbox.x, bbox.y
|
||||||
|
bw, bh = bbox.width, bbox.height
|
||||||
|
except AttributeError:
|
||||||
|
# Fallback si bbox est une liste/tuple
|
||||||
|
if hasattr(bbox, '__len__') and len(bbox) >= 4:
|
||||||
|
bx, by, bw, bh = bbox[0], bbox[1], bbox[2], bbox[3]
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Vérifier si le clic est dans la bbox
|
||||||
|
if bx <= click_x <= bx + bw and by <= click_y <= by + bh:
|
||||||
|
return label.strip()
|
||||||
|
|
||||||
|
# Sinon calculer la distance au centre
|
||||||
|
cx = bx + bw / 2
|
||||||
|
cy = by + bh / 2
|
||||||
|
dist = ((click_x - cx) ** 2 + (click_y - cy) ** 2) ** 0.5
|
||||||
|
if dist < best_dist:
|
||||||
|
best_dist = dist
|
||||||
|
best_label = label.strip()
|
||||||
|
|
||||||
|
# Ne retourner le plus proche que s'il est raisonnablement proche (< 100px)
|
||||||
|
if best_label and best_dist < 100:
|
||||||
|
return best_label
|
||||||
|
return None
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# Screenshots
|
# Screenshots
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
@@ -2042,6 +2159,37 @@ class StreamProcessor:
|
|||||||
self._screen_states[session_id] = []
|
self._screen_states[session_id] = []
|
||||||
self._screen_states[session_id].append(screen_state)
|
self._screen_states[session_id].append(screen_state)
|
||||||
|
|
||||||
|
# Enrichir avec les patterns UI connus
|
||||||
|
try:
|
||||||
|
from core.knowledge.ui_patterns import UIPatternLibrary
|
||||||
|
detected_text = getattr(screen_state.perception, "detected_text", [])
|
||||||
|
if detected_text:
|
||||||
|
ocr_text = " ".join(str(t) for t in detected_text) if isinstance(detected_text, list) else str(detected_text)
|
||||||
|
lib = UIPatternLibrary()
|
||||||
|
pattern = lib.find_pattern(ocr_text)
|
||||||
|
if pattern:
|
||||||
|
result["ui_pattern"] = pattern["pattern"]
|
||||||
|
result["ui_pattern_action"] = pattern["action"]
|
||||||
|
result["ui_pattern_target"] = pattern["target"]
|
||||||
|
logger.info(f"Pattern UI détecté: {pattern['pattern']} → {pattern['target']}")
|
||||||
|
|
||||||
|
# Shadow learning : mémoriser le pattern en attente du clic utilisateur
|
||||||
|
with self._data_lock:
|
||||||
|
self._pending_ui_patterns[session_id] = {
|
||||||
|
"pattern_name": pattern["pattern"],
|
||||||
|
"ocr_text": ocr_text,
|
||||||
|
"screen_state": screen_state,
|
||||||
|
"shot_id": shot_id,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
# Pas de pattern connu → effacer le pending (l'écran a changé)
|
||||||
|
with self._data_lock:
|
||||||
|
self._pending_ui_patterns.pop(session_id, None)
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Pattern check: {e}")
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Screenshot analysé: {shot_id} | "
|
f"Screenshot analysé: {shot_id} | "
|
||||||
f"{result['ui_elements_count']} UI elements, "
|
f"{result['ui_elements_count']} UI elements, "
|
||||||
|
|||||||
@@ -76,6 +76,15 @@ class StepMetrics:
|
|||||||
confidence_score: float
|
confidence_score: float
|
||||||
retry_count: int = 0
|
retry_count: int = 0
|
||||||
error_details: Optional[str] = None
|
error_details: Optional[str] = None
|
||||||
|
# C1 — Instrumentation vision-aware (ExecutionLoop)
|
||||||
|
# Ces champs proviennent de `StepResult` (core/execution/execution_loop.py).
|
||||||
|
# Tous optionnels avec valeurs par défaut pour rétrocompatibilité.
|
||||||
|
ocr_ms: float = 0.0 # Temps OCR sur ce step
|
||||||
|
ui_ms: float = 0.0 # Temps détection UI sur ce step
|
||||||
|
analyze_ms: float = 0.0 # Temps analyse ScreenState (OCR + UI + reste)
|
||||||
|
total_ms: float = 0.0 # Temps total du step (alias duration_ms)
|
||||||
|
cache_hit: bool = False # True si ScreenState vient du cache perceptuel
|
||||||
|
degraded: bool = False # True si mode dégradé (timeout analyse)
|
||||||
|
|
||||||
def to_dict(self) -> Dict[str, Any]:
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
"""Convert to dictionary for storage."""
|
"""Convert to dictionary for storage."""
|
||||||
@@ -92,7 +101,13 @@ class StepMetrics:
|
|||||||
'status': self.status,
|
'status': self.status,
|
||||||
'confidence_score': self.confidence_score,
|
'confidence_score': self.confidence_score,
|
||||||
'retry_count': self.retry_count,
|
'retry_count': self.retry_count,
|
||||||
'error_details': self.error_details
|
'error_details': self.error_details,
|
||||||
|
'ocr_ms': self.ocr_ms,
|
||||||
|
'ui_ms': self.ui_ms,
|
||||||
|
'analyze_ms': self.analyze_ms,
|
||||||
|
'total_ms': self.total_ms,
|
||||||
|
'cache_hit': self.cache_hit,
|
||||||
|
'degraded': self.degraded,
|
||||||
}
|
}
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -111,7 +126,13 @@ class StepMetrics:
|
|||||||
status=data['status'],
|
status=data['status'],
|
||||||
confidence_score=data['confidence_score'],
|
confidence_score=data['confidence_score'],
|
||||||
retry_count=data.get('retry_count', 0),
|
retry_count=data.get('retry_count', 0),
|
||||||
error_details=data.get('error_details')
|
error_details=data.get('error_details'),
|
||||||
|
ocr_ms=float(data.get('ocr_ms') or 0.0),
|
||||||
|
ui_ms=float(data.get('ui_ms') or 0.0),
|
||||||
|
analyze_ms=float(data.get('analyze_ms') or 0.0),
|
||||||
|
total_ms=float(data.get('total_ms') or 0.0),
|
||||||
|
cache_hit=bool(data.get('cache_hit') or False),
|
||||||
|
degraded=bool(data.get('degraded') or False),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
"""Integration of analytics with ExecutionLoop."""
|
"""Integration of analytics with ExecutionLoop."""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Optional
|
from typing import Any, Optional
|
||||||
from datetime import datetime
|
from datetime import datetime, timedelta
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from ..analytics_system import get_analytics_system
|
from ..analytics_system import get_analytics_system
|
||||||
@@ -14,17 +14,35 @@ logger = logging.getLogger(__name__)
|
|||||||
class AnalyticsExecutionIntegration:
|
class AnalyticsExecutionIntegration:
|
||||||
"""Integrate analytics collection with workflow execution."""
|
"""Integrate analytics collection with workflow execution."""
|
||||||
|
|
||||||
def __init__(self, enabled: bool = True):
|
def __init__(self, analytics_system: Any = True, enabled: Optional[bool] = None):
|
||||||
"""
|
"""
|
||||||
Initialize analytics integration.
|
Initialize analytics integration.
|
||||||
|
|
||||||
Args:
|
Accepte deux formes d'appel pour la rétrocompatibilité :
|
||||||
enabled: Whether analytics collection is enabled
|
- ``AnalyticsExecutionIntegration(enabled=True)`` → auto-load du système
|
||||||
"""
|
- ``AnalyticsExecutionIntegration(analytics_system_instance)`` →
|
||||||
self.enabled = enabled
|
utilise l'instance fournie (utilisé par ExecutionLoop)
|
||||||
self.analytics = None
|
|
||||||
|
|
||||||
if enabled:
|
Args:
|
||||||
|
analytics_system: Instance d'AnalyticsSystem pré-construite, ou
|
||||||
|
True/False pour activer/désactiver (legacy).
|
||||||
|
enabled: Legacy — si défini, prime sur analytics_system.
|
||||||
|
"""
|
||||||
|
# Détection de la forme d'appel
|
||||||
|
if enabled is not None:
|
||||||
|
# Appel legacy explicite: AnalyticsExecutionIntegration(enabled=...)
|
||||||
|
self.enabled = bool(enabled)
|
||||||
|
self.analytics = None
|
||||||
|
elif isinstance(analytics_system, bool):
|
||||||
|
# Appel legacy: AnalyticsExecutionIntegration(True/False)
|
||||||
|
self.enabled = analytics_system
|
||||||
|
self.analytics = None
|
||||||
|
else:
|
||||||
|
# Nouvelle forme: instance injectée
|
||||||
|
self.enabled = analytics_system is not None
|
||||||
|
self.analytics = analytics_system
|
||||||
|
|
||||||
|
if self.enabled and self.analytics is None:
|
||||||
try:
|
try:
|
||||||
self.analytics = get_analytics_system()
|
self.analytics = get_analytics_system()
|
||||||
logger.info("Analytics integration enabled")
|
logger.info("Analytics integration enabled")
|
||||||
@@ -36,18 +54,21 @@ class AnalyticsExecutionIntegration:
|
|||||||
self,
|
self,
|
||||||
workflow_id: str,
|
workflow_id: str,
|
||||||
execution_id: Optional[str] = None,
|
execution_id: Optional[str] = None,
|
||||||
total_steps: int = 0
|
total_steps: int = 0,
|
||||||
|
mode: Optional[str] = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Called when workflow execution starts.
|
Appelé au démarrage d'une exécution de workflow.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
workflow_id: Workflow identifier
|
workflow_id: Identifiant du workflow
|
||||||
execution_id: Execution identifier (generated if None)
|
execution_id: Identifiant d'exécution (généré si None)
|
||||||
total_steps: Total number of steps
|
total_steps: Nombre total d'étapes prévues
|
||||||
|
mode: Mode d'exécution (OBSERVATION / COACHING / SUPERVISED /
|
||||||
|
AUTOMATIC). Propagé en contexte pour MetricsCollector.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Execution ID
|
Identifiant d'exécution (celui fourni ou nouvellement généré).
|
||||||
"""
|
"""
|
||||||
if not self.enabled or not self.analytics:
|
if not self.enabled or not self.analytics:
|
||||||
return execution_id or str(uuid.uuid4())
|
return execution_id or str(uuid.uuid4())
|
||||||
@@ -56,11 +77,21 @@ class AnalyticsExecutionIntegration:
|
|||||||
execution_id = str(uuid.uuid4())
|
execution_id = str(uuid.uuid4())
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Start real-time tracking
|
# Démarrage du tracking temps réel
|
||||||
self.analytics.realtime_analytics.track_execution(
|
self.analytics.realtime_analytics.track_execution(
|
||||||
execution_id=execution_id,
|
execution_id=execution_id,
|
||||||
workflow_id=workflow_id,
|
workflow_id=workflow_id,
|
||||||
total_steps=total_steps
|
total_steps=total_steps,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Ouverture de l'ExecutionMetrics côté collector (état "running").
|
||||||
|
# Cela permet à `on_execution_complete` d'appeler
|
||||||
|
# `record_execution_complete` qui clôture proprement.
|
||||||
|
context = {"mode": mode} if mode else {}
|
||||||
|
self.analytics.metrics_collector.record_execution_start(
|
||||||
|
execution_id=execution_id,
|
||||||
|
workflow_id=workflow_id,
|
||||||
|
context=context,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.debug(f"Started tracking execution: {execution_id}")
|
logger.debug(f"Started tracking execution: {execution_id}")
|
||||||
@@ -101,108 +132,247 @@ class AnalyticsExecutionIntegration:
|
|||||||
execution_id: str,
|
execution_id: str,
|
||||||
workflow_id: str,
|
workflow_id: str,
|
||||||
node_id: str,
|
node_id: str,
|
||||||
action_type: str,
|
*,
|
||||||
started_at: datetime,
|
duration_ms: float,
|
||||||
completed_at: datetime,
|
|
||||||
duration: float,
|
|
||||||
success: bool,
|
success: bool,
|
||||||
error_message: Optional[str] = None
|
action_type: str = "",
|
||||||
|
started_at: Optional[datetime] = None,
|
||||||
|
completed_at: Optional[datetime] = None,
|
||||||
|
error_message: Optional[str] = None,
|
||||||
|
confidence: float = 0.0,
|
||||||
|
target_element: str = "",
|
||||||
|
retry_count: int = 0,
|
||||||
|
ocr_ms: float = 0.0,
|
||||||
|
ui_ms: float = 0.0,
|
||||||
|
analyze_ms: float = 0.0,
|
||||||
|
total_ms: float = 0.0,
|
||||||
|
cache_hit: bool = False,
|
||||||
|
degraded: bool = False,
|
||||||
|
step_id: Optional[str] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Called when a step completes.
|
Appelé à la fin d'un step.
|
||||||
|
|
||||||
|
Contrat normalisé (Lot A — avril 2026) : ``duration_ms`` est
|
||||||
|
obligatoire et en millisecondes. Plus de rétrocompat silencieuse
|
||||||
|
sur ``duration`` en secondes.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
execution_id: Execution identifier
|
execution_id: Identifiant d'exécution
|
||||||
workflow_id: Workflow identifier
|
workflow_id: Identifiant du workflow
|
||||||
node_id: Node identifier
|
node_id: Identifiant du node
|
||||||
action_type: Type of action
|
duration_ms: Durée du step en millisecondes (obligatoire)
|
||||||
started_at: Start timestamp
|
success: Vrai si le step a réussi
|
||||||
completed_at: Completion timestamp
|
action_type: Type d'action (``click``, ``type``, …)
|
||||||
duration: Duration in seconds
|
started_at: Timestamp de début (déduit de duration_ms si None)
|
||||||
success: Whether step succeeded
|
completed_at: Timestamp de fin (``now()`` si None)
|
||||||
error_message: Error message if failed
|
error_message: Message d'erreur si ``success=False``
|
||||||
|
confidence: Score de matching [0, 1]
|
||||||
|
target_element: Élément ciblé (optionnel)
|
||||||
|
retry_count: Nombre de retries
|
||||||
|
ocr_ms: Temps OCR (C1)
|
||||||
|
ui_ms: Temps détection UI (C1)
|
||||||
|
analyze_ms: Temps analyse ScreenState (C1)
|
||||||
|
total_ms: Temps total du step (C1, alias duration_ms)
|
||||||
|
cache_hit: ScreenState depuis cache perceptuel (C1)
|
||||||
|
degraded: Mode dégradé activé (C1)
|
||||||
|
step_id: ID unique du step (généré si None)
|
||||||
"""
|
"""
|
||||||
if not self.enabled or not self.analytics:
|
if not self.enabled or not self.analytics:
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Record step metrics
|
duration_ms_final = float(duration_ms)
|
||||||
|
|
||||||
|
# Normaliser les timestamps
|
||||||
|
if completed_at is None:
|
||||||
|
completed_at = datetime.now()
|
||||||
|
if started_at is None:
|
||||||
|
started_at = completed_at - timedelta(milliseconds=duration_ms_final)
|
||||||
|
|
||||||
step_metrics = StepMetrics(
|
step_metrics = StepMetrics(
|
||||||
|
step_id=step_id or f"{execution_id}:{node_id}:{completed_at.isoformat()}",
|
||||||
execution_id=execution_id,
|
execution_id=execution_id,
|
||||||
workflow_id=workflow_id,
|
workflow_id=workflow_id,
|
||||||
node_id=node_id,
|
node_id=node_id,
|
||||||
action_type=action_type,
|
action_type=action_type or "unknown",
|
||||||
|
target_element=target_element,
|
||||||
started_at=started_at,
|
started_at=started_at,
|
||||||
completed_at=completed_at,
|
completed_at=completed_at,
|
||||||
duration=duration,
|
duration_ms=duration_ms_final,
|
||||||
success=success,
|
status="completed" if success else "failed",
|
||||||
error_message=error_message
|
confidence_score=float(confidence),
|
||||||
|
retry_count=retry_count,
|
||||||
|
error_details=error_message,
|
||||||
|
# C1 — vision-aware
|
||||||
|
ocr_ms=float(ocr_ms or 0.0),
|
||||||
|
ui_ms=float(ui_ms or 0.0),
|
||||||
|
analyze_ms=float(analyze_ms or 0.0),
|
||||||
|
total_ms=float(total_ms or duration_ms_final),
|
||||||
|
cache_hit=bool(cache_hit),
|
||||||
|
degraded=bool(degraded),
|
||||||
)
|
)
|
||||||
|
|
||||||
self.analytics.metrics_collector.record_step(step_metrics)
|
self.analytics.metrics_collector.record_step(step_metrics)
|
||||||
|
|
||||||
# Update real-time tracking
|
# Tracking temps réel
|
||||||
|
try:
|
||||||
self.analytics.realtime_analytics.record_step_complete(
|
self.analytics.realtime_analytics.record_step_complete(
|
||||||
execution_id=execution_id,
|
execution_id=execution_id,
|
||||||
success=success
|
success=success,
|
||||||
)
|
)
|
||||||
|
except Exception as rt_err:
|
||||||
|
logger.debug(f"Realtime tracking skipped: {rt_err}")
|
||||||
|
|
||||||
logger.debug(f"Recorded step: {node_id} ({'success' if success else 'failed'})")
|
logger.debug(
|
||||||
|
f"Recorded step: {node_id} "
|
||||||
|
f"({'success' if success else 'failed'}, "
|
||||||
|
f"analyze_ms={analyze_ms:.0f}, cache_hit={cache_hit}, "
|
||||||
|
f"degraded={degraded})"
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error recording step completion: {e}")
|
logger.error(f"Error recording step completion: {e}")
|
||||||
|
|
||||||
|
def on_step_result(
|
||||||
|
self,
|
||||||
|
execution_id: str,
|
||||||
|
workflow_id: str,
|
||||||
|
step_result: Any,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Raccourci C1 — enregistre un `StepResult` complet.
|
||||||
|
|
||||||
|
Évite aux appelants d'extraire manuellement les champs vision-aware.
|
||||||
|
Utilisé par ExecutionLoop pour pousser StepResult au système analytics.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
execution_id: Identifiant d'exécution
|
||||||
|
workflow_id: Identifiant de workflow
|
||||||
|
step_result: Instance de `core.execution.execution_loop.StepResult`
|
||||||
|
"""
|
||||||
|
if not self.enabled or not self.analytics:
|
||||||
|
return
|
||||||
|
|
||||||
|
action_type = "unknown"
|
||||||
|
try:
|
||||||
|
if getattr(step_result, "action_result", None) is not None:
|
||||||
|
ar = step_result.action_result
|
||||||
|
# ExecutionResult.action est optionnel selon la branche
|
||||||
|
action_type = (
|
||||||
|
getattr(ar, "action_type", None)
|
||||||
|
or getattr(ar, "action", None)
|
||||||
|
or "unknown"
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
action_type = "unknown"
|
||||||
|
|
||||||
|
self.on_step_complete(
|
||||||
|
execution_id=execution_id,
|
||||||
|
workflow_id=workflow_id,
|
||||||
|
node_id=getattr(step_result, "node_id", "unknown"),
|
||||||
|
action_type=str(action_type),
|
||||||
|
success=bool(getattr(step_result, "success", False)),
|
||||||
|
error_message=None
|
||||||
|
if getattr(step_result, "success", False)
|
||||||
|
else getattr(step_result, "message", None),
|
||||||
|
duration_ms=float(getattr(step_result, "duration_ms", 0.0) or 0.0),
|
||||||
|
confidence=float(getattr(step_result, "match_confidence", 0.0) or 0.0),
|
||||||
|
ocr_ms=float(getattr(step_result, "ocr_ms", 0.0) or 0.0),
|
||||||
|
ui_ms=float(getattr(step_result, "ui_ms", 0.0) or 0.0),
|
||||||
|
analyze_ms=float(getattr(step_result, "analyze_ms", 0.0) or 0.0),
|
||||||
|
total_ms=float(getattr(step_result, "total_ms", 0.0) or 0.0),
|
||||||
|
cache_hit=bool(getattr(step_result, "cache_hit", False)),
|
||||||
|
degraded=bool(getattr(step_result, "degraded", False)),
|
||||||
|
)
|
||||||
|
|
||||||
def on_execution_complete(
|
def on_execution_complete(
|
||||||
self,
|
self,
|
||||||
execution_id: str,
|
execution_id: str,
|
||||||
workflow_id: str,
|
workflow_id: str,
|
||||||
started_at: datetime,
|
*,
|
||||||
completed_at: datetime,
|
duration_ms: float,
|
||||||
duration: float,
|
|
||||||
status: str,
|
status: str,
|
||||||
error_message: Optional[str] = None,
|
steps_total: Optional[int] = None,
|
||||||
steps_completed: int = 0,
|
steps_completed: int = 0,
|
||||||
steps_failed: int = 0
|
steps_failed: int = 0,
|
||||||
|
error_message: Optional[str] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Called when workflow execution completes.
|
Appelé à la fin d'une exécution de workflow.
|
||||||
|
|
||||||
|
Contrat normalisé (Lot A — avril 2026) :
|
||||||
|
- ``duration_ms`` en millisecondes, toujours. Plus de rétrocompat
|
||||||
|
silencieuse sur ``duration`` en secondes.
|
||||||
|
- ``status`` est une chaîne libre (``"completed"``, ``"failed"``,
|
||||||
|
``"stopped"``, ``"timeout"``, …). L'appelant décide.
|
||||||
|
- ``steps_total`` / ``steps_completed`` / ``steps_failed`` : noms
|
||||||
|
alignés sur le dataclass ``ExecutionMetrics``. Si ``steps_total``
|
||||||
|
n'est pas fourni, on le déduit par somme.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
execution_id: Execution identifier
|
execution_id: Identifiant d'exécution
|
||||||
workflow_id: Workflow identifier
|
workflow_id: Identifiant du workflow
|
||||||
started_at: Start timestamp
|
duration_ms: Durée totale en millisecondes
|
||||||
completed_at: Completion timestamp
|
status: Statut final (``"completed"`` / ``"failed"`` / ``"stopped"``)
|
||||||
duration: Duration in seconds
|
steps_total: Nombre total de steps exécutés (tous statuts confondus)
|
||||||
status: Final status (success, failed, timeout)
|
steps_completed: Nombre de steps réussis
|
||||||
error_message: Error message if failed
|
steps_failed: Nombre de steps en échec
|
||||||
steps_completed: Number of steps completed
|
error_message: Message d'erreur si ``status != "completed"``
|
||||||
steps_failed: Number of steps failed
|
|
||||||
"""
|
"""
|
||||||
if not self.enabled or not self.analytics:
|
if not self.enabled or not self.analytics:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# steps_total dérivé si non fourni explicitement
|
||||||
|
if steps_total is None:
|
||||||
|
steps_total = int(steps_completed) + int(steps_failed)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Record execution metrics
|
collector = self.analytics.metrics_collector
|
||||||
|
|
||||||
|
# record_execution_complete clôture proprement un ExecutionMetrics
|
||||||
|
# ouvert par record_execution_start (chemin nominal via
|
||||||
|
# on_execution_start). Si l'état n'est pas présent (tests, legacy),
|
||||||
|
# on pousse un ExecutionMetrics synthétique directement.
|
||||||
|
completed_at = datetime.now()
|
||||||
|
started_at = completed_at - timedelta(milliseconds=float(duration_ms))
|
||||||
|
|
||||||
|
active = getattr(collector, "_active_executions", None)
|
||||||
|
if active is not None and execution_id in active:
|
||||||
|
collector.record_execution_complete(
|
||||||
|
execution_id=execution_id,
|
||||||
|
status=status,
|
||||||
|
steps_total=int(steps_total),
|
||||||
|
steps_completed=int(steps_completed),
|
||||||
|
steps_failed=int(steps_failed),
|
||||||
|
error_message=error_message,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Fallback explicite : on construit directement un ExecutionMetrics
|
||||||
|
# aligné sur le dataclass (duration_ms, status, steps_*).
|
||||||
execution_metrics = ExecutionMetrics(
|
execution_metrics = ExecutionMetrics(
|
||||||
execution_id=execution_id,
|
execution_id=execution_id,
|
||||||
workflow_id=workflow_id,
|
workflow_id=workflow_id,
|
||||||
started_at=started_at,
|
started_at=started_at,
|
||||||
completed_at=completed_at,
|
completed_at=completed_at,
|
||||||
duration=duration,
|
duration_ms=float(duration_ms),
|
||||||
status=status,
|
status=status,
|
||||||
|
steps_total=int(steps_total),
|
||||||
|
steps_completed=int(steps_completed),
|
||||||
|
steps_failed=int(steps_failed),
|
||||||
error_message=error_message,
|
error_message=error_message,
|
||||||
steps_completed=steps_completed,
|
|
||||||
steps_failed=steps_failed
|
|
||||||
)
|
)
|
||||||
|
# Le collector n'expose pas record_execution(...) : on pousse
|
||||||
|
# dans le buffer protégé par lock pour rester cohérent.
|
||||||
|
with collector._lock:
|
||||||
|
collector._buffer.append(execution_metrics)
|
||||||
|
|
||||||
self.analytics.metrics_collector.record_execution(execution_metrics)
|
# Flush pour garantir la persistance immédiate
|
||||||
|
collector.flush()
|
||||||
|
|
||||||
# Flush to ensure persistence
|
# Clôture du tracking temps réel
|
||||||
self.analytics.metrics_collector.flush()
|
|
||||||
|
|
||||||
# Complete real-time tracking
|
|
||||||
self.analytics.realtime_analytics.complete_execution(
|
self.analytics.realtime_analytics.complete_execution(
|
||||||
execution_id=execution_id,
|
execution_id=execution_id,
|
||||||
status=status
|
status=status,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info(f"Recorded execution: {execution_id} ({status})")
|
logger.info(f"Recorded execution: {execution_id} ({status})")
|
||||||
@@ -216,39 +386,54 @@ class AnalyticsExecutionIntegration:
|
|||||||
node_id: str,
|
node_id: str,
|
||||||
strategy: str,
|
strategy: str,
|
||||||
success: bool,
|
success: bool,
|
||||||
duration: float
|
duration_ms: float,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Called when self-healing attempts recovery.
|
Appelé quand le self-healing tente une récupération.
|
||||||
|
|
||||||
|
Contrat normalisé (Lot A — avril 2026) : ``duration_ms`` en
|
||||||
|
millisecondes, cohérent avec ``on_execution_complete`` et
|
||||||
|
``on_step_complete``. Le StepMetrics construit respecte strictement
|
||||||
|
le dataclass (``status``, ``duration_ms``, ``error_details``,
|
||||||
|
``confidence_score``, ``target_element``, ``step_id``).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
execution_id: Execution identifier
|
execution_id: Identifiant d'exécution
|
||||||
workflow_id: Workflow identifier
|
workflow_id: Identifiant du workflow
|
||||||
node_id: Node identifier
|
node_id: Node où la récupération est tentée
|
||||||
strategy: Recovery strategy used
|
strategy: Stratégie de récupération employée
|
||||||
success: Whether recovery succeeded
|
success: Vrai si la récupération a réussi
|
||||||
duration: Recovery duration
|
duration_ms: Durée de la tentative en millisecondes
|
||||||
"""
|
"""
|
||||||
if not self.enabled or not self.analytics:
|
if not self.enabled or not self.analytics:
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Record as a special step metric
|
now = datetime.now()
|
||||||
|
started_at = now - timedelta(milliseconds=float(duration_ms))
|
||||||
|
|
||||||
recovery_metrics = StepMetrics(
|
recovery_metrics = StepMetrics(
|
||||||
|
step_id=f"{execution_id}:{node_id}:recovery:{now.isoformat()}",
|
||||||
execution_id=execution_id,
|
execution_id=execution_id,
|
||||||
workflow_id=workflow_id,
|
workflow_id=workflow_id,
|
||||||
node_id=f"{node_id}_recovery",
|
node_id=f"{node_id}_recovery",
|
||||||
action_type=f"recovery_{strategy}",
|
action_type=f"recovery_{strategy}",
|
||||||
started_at=datetime.now(),
|
target_element="",
|
||||||
completed_at=datetime.now(),
|
started_at=started_at,
|
||||||
duration=duration,
|
completed_at=now,
|
||||||
success=success,
|
duration_ms=float(duration_ms),
|
||||||
error_message=None if success else f"Recovery failed: {strategy}"
|
status="completed" if success else "failed",
|
||||||
|
confidence_score=0.0,
|
||||||
|
retry_count=0,
|
||||||
|
error_details=None if success else f"Recovery failed: {strategy}",
|
||||||
)
|
)
|
||||||
|
|
||||||
self.analytics.metrics_collector.record_step(recovery_metrics)
|
self.analytics.metrics_collector.record_step(recovery_metrics)
|
||||||
|
|
||||||
logger.debug(f"Recorded recovery: {strategy} ({'success' if success else 'failed'})")
|
logger.debug(
|
||||||
|
f"Recorded recovery: {strategy} "
|
||||||
|
f"({'success' if success else 'failed'})"
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error recording recovery attempt: {e}")
|
logger.error(f"Error recording recovery attempt: {e}")
|
||||||
|
|
||||||
|
|||||||
643
core/analytics/process_mining_bridge.py
Normal file
643
core/analytics/process_mining_bridge.py
Normal file
@@ -0,0 +1,643 @@
|
|||||||
|
"""
|
||||||
|
Bridge entre les workflows Lea (core) et PM4Py pour le process mining.
|
||||||
|
Genere des diagrammes BPMN et KPIs depuis les traces Shadow.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
from core.analytics.process_mining_bridge import (
|
||||||
|
sessions_to_event_log,
|
||||||
|
workflow_to_event_log,
|
||||||
|
discover_bpmn,
|
||||||
|
compute_kpis,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Depuis des sessions JSONL brutes
|
||||||
|
df = sessions_to_event_log(sessions_data)
|
||||||
|
result = discover_bpmn(df, output_dir="data/analytics/bpmn")
|
||||||
|
kpis = compute_kpis(df)
|
||||||
|
|
||||||
|
# Depuis un workflow core (dict JSON)
|
||||||
|
df = workflow_to_event_log(workflow_dict)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# ---- Import conditionnel PM4Py -----------------------------------------
|
||||||
|
|
||||||
|
try:
|
||||||
|
import pm4py
|
||||||
|
PM4PY_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
PM4PY_AVAILABLE = False
|
||||||
|
logger.warning("pm4py non installe -- le process mining est desactive")
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitize_label(label: str) -> str:
|
||||||
|
"""
|
||||||
|
Supprime les caracteres de controle (0x00-0x1F sauf tab/newline)
|
||||||
|
qui sont invalides en XML et font planter PM4Py.
|
||||||
|
"""
|
||||||
|
return "".join(
|
||||||
|
c if c in ("\t", "\n", "\r") or ord(c) >= 0x20 else f"<0x{ord(c):02x}>"
|
||||||
|
for c in label
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---- Types d'evenements a ignorer (bruit) --------------------------------
|
||||||
|
|
||||||
|
_NOISE_EVENT_TYPES = frozenset({
|
||||||
|
"heartbeat",
|
||||||
|
"action_result",
|
||||||
|
"screenshot",
|
||||||
|
})
|
||||||
|
|
||||||
|
# Types d'evenements significatifs pour le process mining
|
||||||
|
_RELEVANT_EVENT_TYPES = frozenset({
|
||||||
|
"mouse_click",
|
||||||
|
"text_input",
|
||||||
|
"key_press",
|
||||||
|
"key_combo",
|
||||||
|
"window_focus_change",
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# Conversion sessions JSONL -> event log PM4Py
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def _build_activity_label(event: dict) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Construit un label d'activite lisible depuis un event JSONL brut.
|
||||||
|
|
||||||
|
Regles :
|
||||||
|
- mouse_click -> "Clic - <app_name> (<window_title tronque>)"
|
||||||
|
- text_input -> "Saisie '<text>' - <app_name>"
|
||||||
|
- key_press -> "Touche <key> - <app_name>"
|
||||||
|
- key_combo -> "Raccourci <keys> - <app_name>"
|
||||||
|
- window_focus_change -> "Fenetre <to.title> (<to.app_name>)"
|
||||||
|
|
||||||
|
Tous les labels sont sanitises pour supprimer les caracteres de controle
|
||||||
|
(ex: \\x13 pour Ctrl+S) qui sont invalides en XML/BPMN.
|
||||||
|
"""
|
||||||
|
evt = event.get("event", event)
|
||||||
|
etype = evt.get("type", "")
|
||||||
|
|
||||||
|
if etype in _NOISE_EVENT_TYPES:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Extraction fenetre
|
||||||
|
window = evt.get("window", {})
|
||||||
|
app_name = window.get("app_name", "inconnu")
|
||||||
|
win_title = window.get("title", "")
|
||||||
|
# Tronquer le titre a 40 caracteres
|
||||||
|
short_title = (win_title[:40] + "...") if len(win_title) > 40 else win_title
|
||||||
|
|
||||||
|
label: Optional[str] = None
|
||||||
|
|
||||||
|
if etype == "mouse_click":
|
||||||
|
label = f"Clic - {app_name} ({short_title})"
|
||||||
|
|
||||||
|
elif etype == "text_input":
|
||||||
|
text = evt.get("text", "")
|
||||||
|
# Tronquer le texte a 20 caracteres pour rester lisible
|
||||||
|
short_text = (text[:20] + "...") if len(text) > 20 else text
|
||||||
|
label = f"Saisie '{short_text}' - {app_name}"
|
||||||
|
|
||||||
|
elif etype == "key_press":
|
||||||
|
key = evt.get("key", "?")
|
||||||
|
label = f"Touche {key} - {app_name}"
|
||||||
|
|
||||||
|
elif etype == "key_combo":
|
||||||
|
keys = evt.get("keys", [])
|
||||||
|
combo = "+".join(str(k) for k in keys)
|
||||||
|
label = f"Raccourci {combo} - {app_name}"
|
||||||
|
|
||||||
|
elif etype == "window_focus_change":
|
||||||
|
to_info = evt.get("to", {})
|
||||||
|
if not to_info:
|
||||||
|
return None
|
||||||
|
to_title = to_info.get("title", "?")
|
||||||
|
to_app = to_info.get("app_name", "?")
|
||||||
|
label = f"Fenetre {to_title} ({to_app})"
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Types non reconnus : label generique
|
||||||
|
label = f"{etype} - {app_name}"
|
||||||
|
|
||||||
|
return _sanitize_label(label) if label else None
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_timestamp(event: dict) -> Optional[float]:
|
||||||
|
"""Extrait le timestamp unix depuis un event JSONL."""
|
||||||
|
# Le timestamp peut etre au niveau racine ou dans event.timestamp
|
||||||
|
evt = event.get("event", event)
|
||||||
|
ts = evt.get("timestamp") or event.get("timestamp")
|
||||||
|
if ts is not None:
|
||||||
|
return float(ts)
|
||||||
|
# Fallback sur le champ 't' (format simplifie)
|
||||||
|
t = evt.get("t") or event.get("t")
|
||||||
|
if t is not None:
|
||||||
|
return float(t)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def sessions_to_event_log(
|
||||||
|
sessions_data: List[dict],
|
||||||
|
deduplicate_windows: bool = True,
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Convertit des traces de sessions brutes (events JSONL) en event log PM4Py.
|
||||||
|
|
||||||
|
Chaque event pertinent devient une ligne :
|
||||||
|
- case:concept:name = session_id
|
||||||
|
- concept:name = label d'activite (ex: "Clic - Notepad.exe (Bloc-notes)")
|
||||||
|
- time:timestamp = timestamp UTC
|
||||||
|
|
||||||
|
Args:
|
||||||
|
sessions_data: liste de dicts, chaque dict est une ligne JSONL parsee.
|
||||||
|
deduplicate_windows: si True, supprime les window_focus_change
|
||||||
|
consecutifs vers la meme fenetre (bruit typique de Windows).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
DataFrame pret pour PM4Py.
|
||||||
|
"""
|
||||||
|
rows: List[Dict[str, Any]] = []
|
||||||
|
|
||||||
|
# Regrouper par session_id pour le deduplication
|
||||||
|
sessions: Dict[str, List[dict]] = {}
|
||||||
|
for event in sessions_data:
|
||||||
|
sid = event.get("session_id", "unknown")
|
||||||
|
sessions.setdefault(sid, []).append(event)
|
||||||
|
|
||||||
|
for sid, events in sessions.items():
|
||||||
|
# Trier par timestamp
|
||||||
|
events.sort(key=lambda e: _extract_timestamp(e) or 0.0)
|
||||||
|
last_window_label: Optional[str] = None
|
||||||
|
|
||||||
|
for event in events:
|
||||||
|
label = _build_activity_label(event)
|
||||||
|
if label is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
ts = _extract_timestamp(event)
|
||||||
|
if ts is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Deduplication des changements de fenetre consecutifs
|
||||||
|
evt = event.get("event", event)
|
||||||
|
if deduplicate_windows and evt.get("type") == "window_focus_change":
|
||||||
|
if label == last_window_label:
|
||||||
|
continue
|
||||||
|
last_window_label = label
|
||||||
|
else:
|
||||||
|
last_window_label = None
|
||||||
|
|
||||||
|
rows.append({
|
||||||
|
"case:concept:name": sid,
|
||||||
|
"concept:name": label,
|
||||||
|
"time:timestamp": pd.Timestamp(
|
||||||
|
datetime.fromtimestamp(ts, tz=timezone.utc)
|
||||||
|
),
|
||||||
|
"event_type": evt.get("type", ""),
|
||||||
|
"app_name": evt.get("window", {}).get("app_name", ""),
|
||||||
|
})
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
logger.warning("Aucun evenement pertinent trouve dans les sessions")
|
||||||
|
return pd.DataFrame(columns=[
|
||||||
|
"case:concept:name",
|
||||||
|
"concept:name",
|
||||||
|
"time:timestamp",
|
||||||
|
"event_type",
|
||||||
|
"app_name",
|
||||||
|
])
|
||||||
|
|
||||||
|
df = pd.DataFrame(rows)
|
||||||
|
df = df.sort_values(["case:concept:name", "time:timestamp"]).reset_index(drop=True)
|
||||||
|
logger.info(
|
||||||
|
"Event log cree : %d evenements, %d sessions, %d activites distinctes",
|
||||||
|
len(df),
|
||||||
|
df["case:concept:name"].nunique(),
|
||||||
|
df["concept:name"].nunique(),
|
||||||
|
)
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# Conversion workflow core (dict JSON) -> event log PM4Py
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def workflow_to_event_log(workflow_dict: dict) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Convertit un workflow core (dict JSON) en DataFrame PM4Py.
|
||||||
|
|
||||||
|
Utilise les nodes et edges pour reconstituer une trace.
|
||||||
|
Chaque chemin du entry_node vers un end_node = un case.
|
||||||
|
|
||||||
|
Mapping :
|
||||||
|
- case:concept:name = workflow_id + suffixe de chemin
|
||||||
|
- concept:name = node.name
|
||||||
|
- time:timestamp = deduced from edge stats ou created_at
|
||||||
|
"""
|
||||||
|
wf_id = workflow_dict.get("workflow_id", "wf_unknown")
|
||||||
|
nodes = {n["node_id"]: n for n in workflow_dict.get("nodes", [])}
|
||||||
|
edges = workflow_dict.get("edges", [])
|
||||||
|
entry_nodes = workflow_dict.get("entry_nodes", [])
|
||||||
|
created_at = workflow_dict.get("created_at", datetime.now(timezone.utc).isoformat())
|
||||||
|
|
||||||
|
if not nodes or not edges:
|
||||||
|
logger.warning("Workflow vide ou sans edges : %s", wf_id)
|
||||||
|
return pd.DataFrame(columns=[
|
||||||
|
"case:concept:name",
|
||||||
|
"concept:name",
|
||||||
|
"time:timestamp",
|
||||||
|
])
|
||||||
|
|
||||||
|
# Construire un graphe d'adjacence
|
||||||
|
adjacency: Dict[str, List[dict]] = {}
|
||||||
|
for edge in edges:
|
||||||
|
from_node = edge.get("from_node") or edge.get("source_node", "")
|
||||||
|
adjacency.setdefault(from_node, []).append(edge)
|
||||||
|
|
||||||
|
# Parcours DFS pour trouver les chemins (limites a eviter l'explosion)
|
||||||
|
MAX_PATHS = 100
|
||||||
|
paths: List[List[str]] = []
|
||||||
|
|
||||||
|
def _dfs(current: str, path: List[str], visited: set) -> None:
|
||||||
|
if len(paths) >= MAX_PATHS:
|
||||||
|
return
|
||||||
|
if current in visited:
|
||||||
|
# Boucle detectee, sauvegarder le chemin tel quel
|
||||||
|
paths.append(path[:])
|
||||||
|
return
|
||||||
|
visited.add(current)
|
||||||
|
path.append(current)
|
||||||
|
|
||||||
|
outgoing = adjacency.get(current, [])
|
||||||
|
if not outgoing:
|
||||||
|
# End node
|
||||||
|
paths.append(path[:])
|
||||||
|
else:
|
||||||
|
for edge in outgoing:
|
||||||
|
to_node = edge.get("to_node") or edge.get("target_node", "")
|
||||||
|
if to_node:
|
||||||
|
_dfs(to_node, path, visited)
|
||||||
|
path.pop()
|
||||||
|
visited.discard(current)
|
||||||
|
|
||||||
|
for entry in entry_nodes:
|
||||||
|
if entry in nodes:
|
||||||
|
_dfs(entry, [], set())
|
||||||
|
|
||||||
|
# Si pas d'entry nodes, essayer tous les nodes sans edges entrants
|
||||||
|
if not paths:
|
||||||
|
target_nodes = set()
|
||||||
|
for edge in edges:
|
||||||
|
to_node = edge.get("to_node") or edge.get("target_node", "")
|
||||||
|
target_nodes.add(to_node)
|
||||||
|
root_nodes = [nid for nid in nodes if nid not in target_nodes]
|
||||||
|
for root in root_nodes[:3]:
|
||||||
|
_dfs(root, [], set())
|
||||||
|
|
||||||
|
# Construire le DataFrame
|
||||||
|
rows: List[Dict[str, Any]] = []
|
||||||
|
try:
|
||||||
|
base_time = pd.Timestamp(datetime.fromisoformat(created_at))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
base_time = pd.Timestamp(datetime.now(timezone.utc))
|
||||||
|
|
||||||
|
for i, path in enumerate(paths):
|
||||||
|
case_id = f"{wf_id}_path_{i}"
|
||||||
|
for step_idx, node_id in enumerate(path):
|
||||||
|
node = nodes.get(node_id, {})
|
||||||
|
rows.append({
|
||||||
|
"case:concept:name": case_id,
|
||||||
|
"concept:name": node.get("name", node_id),
|
||||||
|
"time:timestamp": base_time + pd.Timedelta(seconds=step_idx),
|
||||||
|
})
|
||||||
|
|
||||||
|
df = pd.DataFrame(rows)
|
||||||
|
if not df.empty:
|
||||||
|
df = df.sort_values(["case:concept:name", "time:timestamp"]).reset_index(drop=True)
|
||||||
|
logger.info(
|
||||||
|
"Event log depuis workflow : %d evenements, %d chemins",
|
||||||
|
len(df), len(paths),
|
||||||
|
)
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# Decouverte BPMN
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def discover_bpmn(
|
||||||
|
event_log_df: pd.DataFrame,
|
||||||
|
output_dir: str = "data/analytics/bpmn",
|
||||||
|
name: str = "process",
|
||||||
|
) -> dict:
|
||||||
|
"""
|
||||||
|
Decouvre un modele BPMN depuis un event log via Inductive Miner.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
event_log_df: DataFrame au format PM4Py.
|
||||||
|
output_dir: repertoire de sortie pour les fichiers generes.
|
||||||
|
name: prefixe pour les noms de fichiers.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
{
|
||||||
|
'bpmn_xml_path': str,
|
||||||
|
'bpmn_image_path': str,
|
||||||
|
'petri_net_image_path': str,
|
||||||
|
'dfg_image_path': str,
|
||||||
|
'stats': {
|
||||||
|
'activities': int,
|
||||||
|
'variants': int,
|
||||||
|
'cases': int,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
if not PM4PY_AVAILABLE:
|
||||||
|
raise ImportError("pm4py n'est pas installe. Installez-le : pip install pm4py")
|
||||||
|
|
||||||
|
if event_log_df.empty:
|
||||||
|
raise ValueError("Event log vide, impossible de decouvrir un BPMN")
|
||||||
|
|
||||||
|
out = Path(output_dir)
|
||||||
|
out.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Decouverte BPMN par Inductive Miner
|
||||||
|
bpmn_model = pm4py.discover_bpmn_inductive(event_log_df)
|
||||||
|
|
||||||
|
# Export BPMN XML
|
||||||
|
bpmn_xml_path = str(out / f"{name}.bpmn")
|
||||||
|
try:
|
||||||
|
pm4py.write_bpmn(bpmn_model, bpmn_xml_path)
|
||||||
|
except Exception as e:
|
||||||
|
# PM4Py layout peut echouer avec des labels contenant des caracteres
|
||||||
|
# speciaux (accents, guillemets, etc.). Fallback : export via l'exporter
|
||||||
|
# interne sans layout.
|
||||||
|
logger.warning("Layout BPMN echoue (%s), export sans layout", e)
|
||||||
|
from pm4py.objects.bpmn.exporter import exporter as bpmn_exporter
|
||||||
|
bpmn_exporter.apply(bpmn_model, bpmn_xml_path)
|
||||||
|
logger.info("BPMN XML exporte : %s", bpmn_xml_path)
|
||||||
|
|
||||||
|
# Export image BPMN (PNG) — grande taille pour lisibilité
|
||||||
|
bpmn_image_path = str(out / f"{name}_bpmn.png")
|
||||||
|
try:
|
||||||
|
from pm4py.visualization.bpmn import visualizer as bpmn_vis
|
||||||
|
gviz = bpmn_vis.apply(bpmn_model, parameters={
|
||||||
|
"rankdir": "TB",
|
||||||
|
"font_size": "12",
|
||||||
|
})
|
||||||
|
gviz.graph_attr["dpi"] = "150"
|
||||||
|
gviz.graph_attr["size"] = "40,20!"
|
||||||
|
gviz.graph_attr["rankdir"] = "TB"
|
||||||
|
gviz.render(filename=bpmn_image_path.replace(".png", ""), format="png", cleanup=True)
|
||||||
|
logger.info("BPMN PNG exporte : %s", bpmn_image_path)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("BPMN image fallback : %s", e)
|
||||||
|
try:
|
||||||
|
pm4py.save_vis_bpmn(bpmn_model, bpmn_image_path)
|
||||||
|
except Exception:
|
||||||
|
bpmn_image_path = None
|
||||||
|
|
||||||
|
# DFG (Directly-Follows Graph) — grande taille
|
||||||
|
dfg_image_path = str(out / f"{name}_dfg.png")
|
||||||
|
try:
|
||||||
|
from pm4py.visualization.dfg import visualizer as dfg_vis
|
||||||
|
dfg, sa, ea = pm4py.discover_dfg(event_log_df)
|
||||||
|
gviz = dfg_vis.apply(dfg, activities_count=sa, parameters={
|
||||||
|
"start_activities": sa,
|
||||||
|
"end_activities": ea,
|
||||||
|
"rankdir": "TB",
|
||||||
|
"font_size": "11",
|
||||||
|
})
|
||||||
|
gviz.graph_attr["dpi"] = "150"
|
||||||
|
gviz.graph_attr["size"] = "40,20!"
|
||||||
|
gviz.graph_attr["rankdir"] = "TB"
|
||||||
|
gviz.render(filename=dfg_image_path.replace(".png", ""), format="png", cleanup=True)
|
||||||
|
logger.info("DFG PNG exporte : %s", dfg_image_path)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("DFG image fallback : %s", e)
|
||||||
|
try:
|
||||||
|
pm4py.save_vis_dfg(*pm4py.discover_dfg(event_log_df), file_path=dfg_image_path)
|
||||||
|
except Exception:
|
||||||
|
dfg_image_path = None
|
||||||
|
|
||||||
|
# Petri net via Inductive Miner (pour visualisation alternative)
|
||||||
|
petri_image_path = str(out / f"{name}_petri.png")
|
||||||
|
try:
|
||||||
|
net, im, fm = pm4py.discover_petri_net_inductive(event_log_df)
|
||||||
|
pm4py.save_vis_petri_net(net, im, fm, file_path=petri_image_path)
|
||||||
|
logger.info("Petri net PNG exporte : %s", petri_image_path)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Impossible de generer le Petri net : %s", e)
|
||||||
|
petri_image_path = None
|
||||||
|
|
||||||
|
# Stats de base
|
||||||
|
variants = pm4py.get_variants(event_log_df)
|
||||||
|
n_cases = event_log_df["case:concept:name"].nunique()
|
||||||
|
n_activities = event_log_df["concept:name"].nunique()
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"bpmn_xml_path": bpmn_xml_path,
|
||||||
|
"bpmn_image_path": bpmn_image_path,
|
||||||
|
"petri_net_image_path": petri_image_path,
|
||||||
|
"dfg_image_path": dfg_image_path,
|
||||||
|
"stats": {
|
||||||
|
"activities": n_activities,
|
||||||
|
"variants": len(variants),
|
||||||
|
"cases": n_cases,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
logger.info("Decouverte BPMN terminee : %s", result["stats"])
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# KPIs de process mining
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def compute_kpis(event_log_df: pd.DataFrame) -> dict:
|
||||||
|
"""
|
||||||
|
Calcule les KPIs de process mining.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
{
|
||||||
|
'total_cases': int,
|
||||||
|
'total_events': int,
|
||||||
|
'unique_activities': int,
|
||||||
|
'variants_count': int,
|
||||||
|
'variants_top5': list,
|
||||||
|
'avg_case_duration_seconds': float,
|
||||||
|
'median_case_duration_seconds': float,
|
||||||
|
'avg_events_per_case': float,
|
||||||
|
'activity_stats': {
|
||||||
|
'<activity_name>': {
|
||||||
|
'count': int,
|
||||||
|
'avg_duration_seconds': float,
|
||||||
|
'min_duration_seconds': float,
|
||||||
|
'max_duration_seconds': float,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
'bottlenecks': [...], # top 3 activites les plus lentes
|
||||||
|
'app_distribution': { '<app_name>': int },
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
if event_log_df.empty:
|
||||||
|
return {
|
||||||
|
"total_cases": 0,
|
||||||
|
"total_events": 0,
|
||||||
|
"unique_activities": 0,
|
||||||
|
"variants_count": 0,
|
||||||
|
"variants_top5": [],
|
||||||
|
"avg_case_duration_seconds": 0.0,
|
||||||
|
"median_case_duration_seconds": 0.0,
|
||||||
|
"avg_events_per_case": 0.0,
|
||||||
|
"activity_stats": {},
|
||||||
|
"bottlenecks": [],
|
||||||
|
"app_distribution": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
df = event_log_df.copy()
|
||||||
|
|
||||||
|
# ---- Metriques globales ----
|
||||||
|
total_cases = df["case:concept:name"].nunique()
|
||||||
|
total_events = len(df)
|
||||||
|
unique_activities = df["concept:name"].nunique()
|
||||||
|
|
||||||
|
# ---- Variantes (PM4Py) ----
|
||||||
|
if PM4PY_AVAILABLE:
|
||||||
|
variants = pm4py.get_variants(df)
|
||||||
|
variants_count = len(variants)
|
||||||
|
# Top 5 variantes par frequence
|
||||||
|
sorted_variants = sorted(variants.items(), key=lambda x: x[1], reverse=True)
|
||||||
|
variants_top5 = [
|
||||||
|
{"variant": " -> ".join(v), "count": c}
|
||||||
|
for v, c in sorted_variants[:5]
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
variants_count = 0
|
||||||
|
variants_top5 = []
|
||||||
|
|
||||||
|
# ---- Duree par case ----
|
||||||
|
case_durations: List[float] = []
|
||||||
|
for _case_id, group in df.groupby("case:concept:name"):
|
||||||
|
ts = group["time:timestamp"]
|
||||||
|
if len(ts) >= 2:
|
||||||
|
duration = (ts.max() - ts.min()).total_seconds()
|
||||||
|
case_durations.append(duration)
|
||||||
|
|
||||||
|
avg_case_dur = float(pd.Series(case_durations).mean()) if case_durations else 0.0
|
||||||
|
median_case_dur = float(pd.Series(case_durations).median()) if case_durations else 0.0
|
||||||
|
avg_events_per_case = total_events / total_cases if total_cases > 0 else 0.0
|
||||||
|
|
||||||
|
# ---- Stats par activite ----
|
||||||
|
activity_stats: Dict[str, Dict[str, Any]] = {}
|
||||||
|
# Calculer la duree entre chaque evenement et le suivant dans le meme case
|
||||||
|
df_sorted = df.sort_values(["case:concept:name", "time:timestamp"])
|
||||||
|
df_sorted["next_timestamp"] = df_sorted.groupby("case:concept:name")[
|
||||||
|
"time:timestamp"
|
||||||
|
].shift(-1)
|
||||||
|
df_sorted["duration_to_next"] = (
|
||||||
|
df_sorted["next_timestamp"] - df_sorted["time:timestamp"]
|
||||||
|
).dt.total_seconds()
|
||||||
|
|
||||||
|
for activity, grp in df_sorted.groupby("concept:name"):
|
||||||
|
durations = grp["duration_to_next"].dropna()
|
||||||
|
# Filtrer les durees aberrantes (> 5 min = probablement une pause)
|
||||||
|
durations = durations[durations <= 300]
|
||||||
|
stats: Dict[str, Any] = {
|
||||||
|
"count": len(grp),
|
||||||
|
"avg_duration_seconds": round(float(durations.mean()), 2) if len(durations) > 0 else 0.0,
|
||||||
|
"min_duration_seconds": round(float(durations.min()), 2) if len(durations) > 0 else 0.0,
|
||||||
|
"max_duration_seconds": round(float(durations.max()), 2) if len(durations) > 0 else 0.0,
|
||||||
|
}
|
||||||
|
activity_stats[activity] = stats
|
||||||
|
|
||||||
|
# ---- Goulots d'etranglement (top 3 activites les plus lentes) ----
|
||||||
|
bottlenecks = sorted(
|
||||||
|
[
|
||||||
|
{"activity": act, "avg_duration_seconds": s["avg_duration_seconds"]}
|
||||||
|
for act, s in activity_stats.items()
|
||||||
|
if s["avg_duration_seconds"] > 0
|
||||||
|
],
|
||||||
|
key=lambda x: x["avg_duration_seconds"],
|
||||||
|
reverse=True,
|
||||||
|
)[:3]
|
||||||
|
|
||||||
|
# ---- Distribution par application ----
|
||||||
|
app_distribution: Dict[str, int] = {}
|
||||||
|
if "app_name" in df.columns:
|
||||||
|
app_distribution = df["app_name"].value_counts().to_dict()
|
||||||
|
|
||||||
|
return {
|
||||||
|
"total_cases": total_cases,
|
||||||
|
"total_events": total_events,
|
||||||
|
"unique_activities": unique_activities,
|
||||||
|
"variants_count": variants_count,
|
||||||
|
"variants_top5": variants_top5,
|
||||||
|
"avg_case_duration_seconds": round(avg_case_dur, 2),
|
||||||
|
"median_case_duration_seconds": round(median_case_dur, 2),
|
||||||
|
"avg_events_per_case": round(avg_events_per_case, 1),
|
||||||
|
"activity_stats": activity_stats,
|
||||||
|
"bottlenecks": bottlenecks,
|
||||||
|
"app_distribution": app_distribution,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ===========================================================================
|
||||||
|
# Helpers : chargement sessions JSONL
|
||||||
|
# ===========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def load_jsonl_session(jsonl_path: str) -> List[dict]:
|
||||||
|
"""
|
||||||
|
Charge un fichier live_events.jsonl en liste de dicts.
|
||||||
|
|
||||||
|
Ignore les lignes vides ou invalides.
|
||||||
|
"""
|
||||||
|
events: List[dict] = []
|
||||||
|
path = Path(jsonl_path)
|
||||||
|
if not path.exists():
|
||||||
|
raise FileNotFoundError(f"Fichier JSONL introuvable : {jsonl_path}")
|
||||||
|
|
||||||
|
with open(path, "r", encoding="utf-8") as f:
|
||||||
|
for line_num, line in enumerate(f, 1):
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
events.append(json.loads(line))
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.warning("Ligne %d invalide dans %s : %s", line_num, jsonl_path, e)
|
||||||
|
|
||||||
|
logger.info("Charge %d evenements depuis %s", len(events), jsonl_path)
|
||||||
|
return events
|
||||||
|
|
||||||
|
|
||||||
|
def load_multiple_sessions(session_dirs: List[str]) -> List[dict]:
|
||||||
|
"""
|
||||||
|
Charge plusieurs sessions depuis leurs repertoires.
|
||||||
|
|
||||||
|
Cherche un fichier live_events.jsonl dans chaque repertoire.
|
||||||
|
"""
|
||||||
|
all_events: List[dict] = []
|
||||||
|
for session_dir in session_dirs:
|
||||||
|
jsonl_path = Path(session_dir) / "live_events.jsonl"
|
||||||
|
if jsonl_path.exists():
|
||||||
|
all_events.extend(load_jsonl_session(str(jsonl_path)))
|
||||||
|
else:
|
||||||
|
logger.warning("Pas de live_events.jsonl dans %s", session_dir)
|
||||||
|
return all_events
|
||||||
60
core/analytics/screen_change_detector.py
Normal file
60
core/analytics/screen_change_detector.py
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
"""
|
||||||
|
Détection rapide de changement d'écran via perceptual hash (pHash).
|
||||||
|
|
||||||
|
Utilise imagehash pour calculer un hash perceptuel par screenshot.
|
||||||
|
La distance de Hamming entre deux hashes indique le degré de changement :
|
||||||
|
- < 5 : même écran (bruit, curseur déplacé)
|
||||||
|
- 5-15 : changement mineur (scroll, popup, champ rempli)
|
||||||
|
- > 15 : nouvel écran (nouvelle fenêtre, navigation)
|
||||||
|
|
||||||
|
Performance : ~15ms par hash sur CPU pour des screenshots 2560x1600.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from PIL import Image
|
||||||
|
import imagehash
|
||||||
|
from typing import Tuple, Optional
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
class ScreenChangeLevel(Enum):
|
||||||
|
SAME = "same" # distance < 5
|
||||||
|
MINOR = "minor" # 5 <= distance < 15
|
||||||
|
MAJOR = "major" # distance >= 15
|
||||||
|
|
||||||
|
|
||||||
|
def compute_phash(image: Image.Image, hash_size: int = 8) -> imagehash.ImageHash:
|
||||||
|
"""Calcule le pHash d'une image PIL."""
|
||||||
|
return imagehash.phash(image, hash_size=hash_size)
|
||||||
|
|
||||||
|
|
||||||
|
def compare_screenshots(img1: Image.Image, img2: Image.Image, hash_size: int = 8) -> Tuple[int, ScreenChangeLevel]:
|
||||||
|
"""
|
||||||
|
Compare deux screenshots et retourne la distance + le niveau de changement.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(distance, level) — distance de Hamming et niveau de changement
|
||||||
|
"""
|
||||||
|
h1 = compute_phash(img1, hash_size)
|
||||||
|
h2 = compute_phash(img2, hash_size)
|
||||||
|
distance = h1 - h2
|
||||||
|
|
||||||
|
if distance < 5:
|
||||||
|
level = ScreenChangeLevel.SAME
|
||||||
|
elif distance < 15:
|
||||||
|
level = ScreenChangeLevel.MINOR
|
||||||
|
else:
|
||||||
|
level = ScreenChangeLevel.MAJOR
|
||||||
|
|
||||||
|
return distance, level
|
||||||
|
|
||||||
|
|
||||||
|
def compare_hashes(hash1: imagehash.ImageHash, hash2: imagehash.ImageHash) -> Tuple[int, ScreenChangeLevel]:
|
||||||
|
"""Compare deux hashes pré-calculés."""
|
||||||
|
distance = hash1 - hash2
|
||||||
|
if distance < 5:
|
||||||
|
level = ScreenChangeLevel.SAME
|
||||||
|
elif distance < 15:
|
||||||
|
level = ScreenChangeLevel.MINOR
|
||||||
|
else:
|
||||||
|
level = ScreenChangeLevel.MAJOR
|
||||||
|
return distance, level
|
||||||
@@ -42,6 +42,8 @@ class TimeSeriesStore:
|
|||||||
ON execution_metrics(started_at);
|
ON execution_metrics(started_at);
|
||||||
|
|
||||||
-- Step metrics table
|
-- Step metrics table
|
||||||
|
-- Les colonnes ocr_ms, ui_ms, analyze_ms, total_ms, cache_hit, degraded
|
||||||
|
-- proviennent de l'instrumentation vision-aware (C1) de ExecutionLoop.
|
||||||
CREATE TABLE IF NOT EXISTS step_metrics (
|
CREATE TABLE IF NOT EXISTS step_metrics (
|
||||||
step_id TEXT PRIMARY KEY,
|
step_id TEXT PRIMARY KEY,
|
||||||
execution_id TEXT NOT NULL,
|
execution_id TEXT NOT NULL,
|
||||||
@@ -56,6 +58,12 @@ class TimeSeriesStore:
|
|||||||
confidence_score REAL,
|
confidence_score REAL,
|
||||||
retry_count INTEGER DEFAULT 0,
|
retry_count INTEGER DEFAULT 0,
|
||||||
error_details TEXT,
|
error_details TEXT,
|
||||||
|
ocr_ms REAL DEFAULT 0.0,
|
||||||
|
ui_ms REAL DEFAULT 0.0,
|
||||||
|
analyze_ms REAL DEFAULT 0.0,
|
||||||
|
total_ms REAL DEFAULT 0.0,
|
||||||
|
cache_hit INTEGER DEFAULT 0,
|
||||||
|
degraded INTEGER DEFAULT 0,
|
||||||
FOREIGN KEY (execution_id) REFERENCES execution_metrics(execution_id)
|
FOREIGN KEY (execution_id) REFERENCES execution_metrics(execution_id)
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -101,12 +109,41 @@ class TimeSeriesStore:
|
|||||||
|
|
||||||
logger.info(f"TimeSeriesStore initialized at {self.db_path}")
|
logger.info(f"TimeSeriesStore initialized at {self.db_path}")
|
||||||
|
|
||||||
|
# Colonnes ajoutées ultérieurement — appliquées via ALTER TABLE si absentes.
|
||||||
|
# (C1 — instrumentation vision-aware, avril 2026)
|
||||||
|
_STEP_METRICS_MIGRATIONS = [
|
||||||
|
("ocr_ms", "REAL DEFAULT 0.0"),
|
||||||
|
("ui_ms", "REAL DEFAULT 0.0"),
|
||||||
|
("analyze_ms", "REAL DEFAULT 0.0"),
|
||||||
|
("total_ms", "REAL DEFAULT 0.0"),
|
||||||
|
("cache_hit", "INTEGER DEFAULT 0"),
|
||||||
|
("degraded", "INTEGER DEFAULT 0"),
|
||||||
|
]
|
||||||
|
|
||||||
def _init_database(self) -> None:
|
def _init_database(self) -> None:
|
||||||
"""Initialize database schema."""
|
"""Initialize database schema and apply lightweight migrations."""
|
||||||
with self._get_connection() as conn:
|
with self._get_connection() as conn:
|
||||||
conn.executescript(self.SCHEMA)
|
conn.executescript(self.SCHEMA)
|
||||||
|
self._migrate_step_metrics(conn)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
|
def _migrate_step_metrics(self, conn: sqlite3.Connection) -> None:
|
||||||
|
"""Ajoute les colonnes C1 sur une base `step_metrics` pré-existante."""
|
||||||
|
cursor = conn.execute("PRAGMA table_info(step_metrics)")
|
||||||
|
existing = {row[1] for row in cursor.fetchall()}
|
||||||
|
for column, ddl in self._STEP_METRICS_MIGRATIONS:
|
||||||
|
if column not in existing:
|
||||||
|
try:
|
||||||
|
conn.execute(
|
||||||
|
f"ALTER TABLE step_metrics ADD COLUMN {column} {ddl}"
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
f"Migration step_metrics: ajout colonne {column}"
|
||||||
|
)
|
||||||
|
except sqlite3.OperationalError as e:
|
||||||
|
# Collision bénigne (colonne déjà ajoutée par un autre process)
|
||||||
|
logger.debug(f"Migration colonne {column} ignorée: {e}")
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def _get_connection(self):
|
def _get_connection(self):
|
||||||
"""Get database connection context manager."""
|
"""Get database connection context manager."""
|
||||||
@@ -164,13 +201,14 @@ class TimeSeriesStore:
|
|||||||
))
|
))
|
||||||
|
|
||||||
def _write_step_metric(self, conn: sqlite3.Connection, metric: StepMetrics) -> None:
|
def _write_step_metric(self, conn: sqlite3.Connection, metric: StepMetrics) -> None:
|
||||||
"""Write step metric."""
|
"""Write step metric (inclut les champs vision-aware C1)."""
|
||||||
conn.execute("""
|
conn.execute("""
|
||||||
INSERT OR REPLACE INTO step_metrics
|
INSERT OR REPLACE INTO step_metrics
|
||||||
(step_id, execution_id, workflow_id, node_id, action_type, target_element,
|
(step_id, execution_id, workflow_id, node_id, action_type, target_element,
|
||||||
started_at, completed_at, duration_ms, status, confidence_score,
|
started_at, completed_at, duration_ms, status, confidence_score,
|
||||||
retry_count, error_details)
|
retry_count, error_details,
|
||||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
ocr_ms, ui_ms, analyze_ms, total_ms, cache_hit, degraded)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
""", (
|
""", (
|
||||||
metric.step_id,
|
metric.step_id,
|
||||||
metric.execution_id,
|
metric.execution_id,
|
||||||
@@ -184,7 +222,13 @@ class TimeSeriesStore:
|
|||||||
metric.status,
|
metric.status,
|
||||||
metric.confidence_score,
|
metric.confidence_score,
|
||||||
metric.retry_count,
|
metric.retry_count,
|
||||||
metric.error_details
|
metric.error_details,
|
||||||
|
getattr(metric, 'ocr_ms', 0.0),
|
||||||
|
getattr(metric, 'ui_ms', 0.0),
|
||||||
|
getattr(metric, 'analyze_ms', 0.0),
|
||||||
|
getattr(metric, 'total_ms', 0.0),
|
||||||
|
1 if getattr(metric, 'cache_hit', False) else 0,
|
||||||
|
1 if getattr(metric, 'degraded', False) else 0,
|
||||||
))
|
))
|
||||||
|
|
||||||
def _write_resource_metric(self, conn: sqlite3.Connection, metric: ResourceMetrics) -> None:
|
def _write_resource_metric(self, conn: sqlite3.Connection, metric: ResourceMetrics) -> None:
|
||||||
|
|||||||
31
core/anonymisation/__init__.py
Normal file
31
core/anonymisation/__init__.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
# core/anonymisation/__init__.py
|
||||||
|
"""Module de floutage ciblé des PII côté serveur.
|
||||||
|
|
||||||
|
Remplace l'ancien blur client-side (`agent_v0/agent_v1/vision/blur_sensitive.py`)
|
||||||
|
qui floutait toutes les zones de texte claires, cassant les codes CIM, les
|
||||||
|
montants PMSI et les boutons.
|
||||||
|
|
||||||
|
Stratégie :
|
||||||
|
1. OCR (docTR) sur le screenshot → texte + bounding boxes
|
||||||
|
2. NER (EDS-NLP si disponible, sinon regex) → détection des PII
|
||||||
|
3. Filtrage : ne conserver que PERSON / LOCATION / PHONE / NIR / EMAIL
|
||||||
|
4. Blur gaussien uniquement sur les bbox des PII filtrées
|
||||||
|
|
||||||
|
Usage :
|
||||||
|
from core.anonymisation import blur_pii_on_image
|
||||||
|
blurred_path = blur_pii_on_image("shot_0001_full.png")
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .pii_blur import (
|
||||||
|
PIIBlurResult,
|
||||||
|
PIIEntity,
|
||||||
|
PIIBlurrer,
|
||||||
|
blur_pii_on_image,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"PIIBlurResult",
|
||||||
|
"PIIEntity",
|
||||||
|
"PIIBlurrer",
|
||||||
|
"blur_pii_on_image",
|
||||||
|
]
|
||||||
650
core/anonymisation/pii_blur.py
Normal file
650
core/anonymisation/pii_blur.py
Normal file
@@ -0,0 +1,650 @@
|
|||||||
|
# core/anonymisation/pii_blur.py
|
||||||
|
"""Floutage ciblé des PII côté serveur (Personal Identifiable Information).
|
||||||
|
|
||||||
|
Contexte
|
||||||
|
--------
|
||||||
|
L'ancien blur côté client (`agent_v0/agent_v1/vision/blur_sensitive.py`) était
|
||||||
|
trop agressif : il floutait TOUTES les zones blanches avec texte, ce qui
|
||||||
|
détruisait les codes CIM-10, les montants PMSI, les boutons et rendait les
|
||||||
|
screenshots inutilisables pour le replay ou le grounding VLM. De plus,
|
||||||
|
`opencv-python` n'était pas listé dans les dépendances de l'agent, donc le blur
|
||||||
|
échouait silencieusement en production.
|
||||||
|
|
||||||
|
Stratégie retenue (avril 2026)
|
||||||
|
------------------------------
|
||||||
|
1. Agent = zéro blur → envoie les screenshots bruts via TLS.
|
||||||
|
2. Serveur = OCR (docTR) + NER (EDS-NLP avec fallback regex).
|
||||||
|
3. On floute UNIQUEMENT les entités :
|
||||||
|
- PERSON → noms, prénoms
|
||||||
|
- LOCATION → adresses, villes
|
||||||
|
- PHONE → numéros de téléphone
|
||||||
|
- NIR → numéro de sécurité sociale
|
||||||
|
- EMAIL → adresses électroniques
|
||||||
|
Et on préserve :
|
||||||
|
- codes CIM-10 / CCAM
|
||||||
|
- montants (1250€, 31,50 €)
|
||||||
|
- dates (pas PII au sens RGPD santé)
|
||||||
|
- identifiants techniques (shot_0001, session IDs…)
|
||||||
|
4. Deux fichiers sont stockés :
|
||||||
|
- `shot_XXXX_full.png` → version brute (accès restreint)
|
||||||
|
- `shot_XXXX_full_blurred.png` → version pour affichage
|
||||||
|
|
||||||
|
Performance
|
||||||
|
-----------
|
||||||
|
Objectif : < 2 s par screenshot sur RTX 5070.
|
||||||
|
docTR (db_mobilenet_v3_large + crnn_mobilenet_v3_large) : ~800 ms CPU, ~300 ms GPU.
|
||||||
|
EDS-NLP pipeline minimal : ~100 ms pour un texte d'écran typique.
|
||||||
|
Fallback regex : < 10 ms.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import tempfile
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Iterable, List, Optional, Sequence, Tuple, Union
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Types
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Type d'entité PII reconnu. Aligné sur les labels EDS-NLP (`nlp.pipes.eds`)
|
||||||
|
# et enrichi par nos propres patterns regex.
|
||||||
|
PII_LABELS = frozenset({
|
||||||
|
"PERSON", # noms de patient, médecin
|
||||||
|
"LOCATION", # adresses, ville, code postal
|
||||||
|
"ADDRESS", # alias de LOCATION (certains pipelines le produisent)
|
||||||
|
"PHONE", # téléphone
|
||||||
|
"NIR", # numéro sécu FR (15 chiffres)
|
||||||
|
"SECURITY_NUMBER", # alias de NIR
|
||||||
|
"EMAIL", # adresse email
|
||||||
|
})
|
||||||
|
|
||||||
|
# Motifs qu'on NE DOIT PAS flouter même s'ils ressemblent à des PII :
|
||||||
|
# - codes CIM-10 : 1 lettre + 2 chiffres + optionnellement .xx
|
||||||
|
# - codes CCAM : 4 lettres + 3 chiffres
|
||||||
|
# - montants (€, euros)
|
||||||
|
# - dates format fr (dd/mm/yyyy, dd-mm-yy)
|
||||||
|
# - identifiants techniques (ex: shot_0001, session_xxxxx)
|
||||||
|
_RE_ICD10 = re.compile(r"\b[A-Z]\d{2}(\.\d{1,3})?\b")
|
||||||
|
_RE_CCAM = re.compile(r"\b[A-Z]{4}\d{3}\b")
|
||||||
|
_RE_MONEY = re.compile(r"\b\d{1,3}(?:[.,\s]\d{3})*(?:[.,]\d{1,2})?\s?€\b", re.IGNORECASE)
|
||||||
|
_RE_DATE = re.compile(r"\b(0?[1-9]|[12]\d|3[01])[/.-](0?[1-9]|1[0-2])[/.-](\d{2}|\d{4})\b")
|
||||||
|
_RE_TECH_ID = re.compile(r"\b(?:shot|session|sess|frame|trace|req|msg)_[\w-]+\b", re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Entités PII
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class PIIEntity:
|
||||||
|
"""Une entité PII détectée dans un screenshot."""
|
||||||
|
label: str # PERSON, LOCATION, PHONE, NIR, EMAIL
|
||||||
|
text: str # Texte brut détecté
|
||||||
|
bbox: Tuple[int, int, int, int] # (x1, y1, x2, y2) en pixels
|
||||||
|
confidence: float = 1.0 # Score NER (1.0 si regex)
|
||||||
|
source: str = "ner" # "ner" (EDS-NLP) ou "regex"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class PIIBlurResult:
|
||||||
|
"""Résultat du pipeline de blur."""
|
||||||
|
raw_path: Path
|
||||||
|
blurred_path: Path
|
||||||
|
entities: List[PIIEntity] = field(default_factory=list)
|
||||||
|
elapsed_ms: float = 0.0
|
||||||
|
ocr_ms: float = 0.0
|
||||||
|
ner_ms: float = 0.0
|
||||||
|
blur_ms: float = 0.0
|
||||||
|
ocr_engine: str = "doctr"
|
||||||
|
ner_engine: str = "regex" # ou "edsnlp"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def count(self) -> int:
|
||||||
|
return len(self.entities)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Fallback NER par regex (utilisé si EDS-NLP indisponible)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Précaution : on ne marque comme PHONE que des suites contiguës de 10 chiffres
|
||||||
|
# (FR) ou un format international. Les codes à 3-4 chiffres sont ignorés.
|
||||||
|
_RE_PHONE = re.compile(
|
||||||
|
r"\b(?:(?:\+?33|0)\s?[1-9])(?:[\s.-]?\d{2}){4}\b"
|
||||||
|
)
|
||||||
|
_RE_NIR = re.compile(
|
||||||
|
r"\b[12]\s?\d{2}\s?(?:0[1-9]|1[0-2]|20)\s?(?:\d{2}|2A|2B)\s?\d{3}\s?\d{3}(?:\s?\d{2})?\b"
|
||||||
|
)
|
||||||
|
_RE_EMAIL = re.compile(
|
||||||
|
r"\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b", re.IGNORECASE
|
||||||
|
)
|
||||||
|
# Nom : Prénom Nom (au moins 2 majuscules initiales). Attrape aussi
|
||||||
|
# "Mme Dupont", "M. Martin", "Dr. Bernard".
|
||||||
|
# On utilise [^\S\n] (whitespace SANS newline) pour empêcher le match de sauter
|
||||||
|
# de ligne — les lignes sont typiquement des champs distincts dans une UI métier.
|
||||||
|
_RE_PERSON = re.compile(
|
||||||
|
r"\b(?:M\.?|Mme|Mlle|Dr\.?|Pr\.?|Prof\.?)[^\S\n]+"
|
||||||
|
r"[A-ZÉÈÀÂÎÔÛÇ][a-zéèàâîôûç\-]+"
|
||||||
|
r"(?:[^\S\n]+[A-ZÉÈÀÂÎÔÛÇ][a-zéèàâîôûç\-]+)?"
|
||||||
|
)
|
||||||
|
# Adresse : "12 rue de la Paix", "3, avenue Victor Hugo"
|
||||||
|
# Même principe : on empêche le matching de franchir les sauts de ligne.
|
||||||
|
_RE_ADDRESS = re.compile(
|
||||||
|
r"\b\d{1,4}(?:[^\S\n]?(?:bis|ter|quater))?[,\s]+(?:rue|avenue|av\.?|bd|boulevard|"
|
||||||
|
r"allée|all\.?|place|impasse|chemin|route|rte\.?|quai|cours|voie|passage)"
|
||||||
|
r"[^\S\n]+(?:de[^\S\n]+|du[^\S\n]+|des[^\S\n]+|la[^\S\n]+|le[^\S\n]+|les[^\S\n]+|l'|de[^\S\n]+la[^\S\n]+|d')?"
|
||||||
|
r"[A-Za-zÀ-ÿ\-' ]{2,40}",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _regex_find_pii(text: str) -> List[Tuple[str, int, int]]:
|
||||||
|
"""Retourne une liste de (label, offset_debut, offset_fin) par regex.
|
||||||
|
|
||||||
|
Les motifs "techniques" (codes CIM, montants, dates) sont explicitement
|
||||||
|
exclus même si un autre regex les attrape.
|
||||||
|
"""
|
||||||
|
# 1. Collecter toutes les plages à NE PAS flouter
|
||||||
|
protected: List[Tuple[int, int]] = []
|
||||||
|
for rx in (_RE_ICD10, _RE_CCAM, _RE_MONEY, _RE_DATE, _RE_TECH_ID):
|
||||||
|
for m in rx.finditer(text):
|
||||||
|
protected.append(m.span())
|
||||||
|
|
||||||
|
def _is_protected(start: int, end: int) -> bool:
|
||||||
|
for p_start, p_end in protected:
|
||||||
|
# recouvrement non nul
|
||||||
|
if start < p_end and end > p_start:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
hits: List[Tuple[str, int, int]] = []
|
||||||
|
for label, rx in (
|
||||||
|
("NIR", _RE_NIR),
|
||||||
|
("EMAIL", _RE_EMAIL),
|
||||||
|
("PHONE", _RE_PHONE),
|
||||||
|
("PERSON", _RE_PERSON),
|
||||||
|
("LOCATION", _RE_ADDRESS),
|
||||||
|
):
|
||||||
|
for m in rx.finditer(text):
|
||||||
|
if _is_protected(m.start(), m.end()):
|
||||||
|
continue
|
||||||
|
hits.append((label, m.start(), m.end()))
|
||||||
|
return hits
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# NER via EDS-NLP (optionnel)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
_edsnlp_pipeline = None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_edsnlp_pipeline():
|
||||||
|
"""Charge une pipeline EDS-NLP si le module est disponible.
|
||||||
|
|
||||||
|
Retourne None si EDS-NLP n'est pas installé — le pipeline retombera
|
||||||
|
alors sur le NER regex.
|
||||||
|
"""
|
||||||
|
global _edsnlp_pipeline
|
||||||
|
if _edsnlp_pipeline is not None:
|
||||||
|
return _edsnlp_pipeline
|
||||||
|
try:
|
||||||
|
import edsnlp # type: ignore
|
||||||
|
except ImportError:
|
||||||
|
logger.info(
|
||||||
|
"EDS-NLP non installé — fallback regex utilisé pour la détection PII. "
|
||||||
|
"Pour activer EDS-NLP : pip install edsnlp"
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
nlp = edsnlp.blank("eds")
|
||||||
|
nlp.add_pipe("eds.sentences")
|
||||||
|
nlp.add_pipe("eds.normalizer")
|
||||||
|
# Les composants disponibles dépendent de la version installée.
|
||||||
|
# On les ajoute en try/except pour rester résilient.
|
||||||
|
for pipe_name in ("eds.names", "eds.dates", "eds.addresses"):
|
||||||
|
try:
|
||||||
|
nlp.add_pipe(pipe_name)
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
logger.debug("EDS-NLP : composant %s indisponible (%s)", pipe_name, e)
|
||||||
|
_edsnlp_pipeline = nlp
|
||||||
|
logger.info("EDS-NLP : pipeline chargée")
|
||||||
|
return _edsnlp_pipeline
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
logger.warning("EDS-NLP non utilisable (%s) — fallback regex", e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _edsnlp_find_pii(text: str, nlp) -> List[Tuple[str, int, int]]:
|
||||||
|
"""Utilise EDS-NLP pour trouver des entités PII.
|
||||||
|
|
||||||
|
Les labels EDS-NLP sont mappés vers nos labels canoniques.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
doc = nlp(text)
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
logger.debug("EDS-NLP : échec sur texte de %d chars (%s)", len(text), e)
|
||||||
|
return []
|
||||||
|
|
||||||
|
mapping = {
|
||||||
|
"person": "PERSON",
|
||||||
|
"name": "PERSON",
|
||||||
|
"patient": "PERSON",
|
||||||
|
"doctor": "PERSON",
|
||||||
|
"location": "LOCATION",
|
||||||
|
"address": "LOCATION",
|
||||||
|
"city": "LOCATION",
|
||||||
|
}
|
||||||
|
hits: List[Tuple[str, int, int]] = []
|
||||||
|
for ent in getattr(doc, "ents", []):
|
||||||
|
raw_label = str(getattr(ent, "label_", "")).lower()
|
||||||
|
mapped = mapping.get(raw_label)
|
||||||
|
if mapped is None:
|
||||||
|
# On accepte aussi si le label EDS-NLP est déjà l'un de nos labels
|
||||||
|
upper = raw_label.upper()
|
||||||
|
if upper in PII_LABELS:
|
||||||
|
mapped = upper
|
||||||
|
if mapped:
|
||||||
|
hits.append((mapped, ent.start_char, ent.end_char))
|
||||||
|
return hits
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# OCR avec bounding boxes par mot (docTR)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
_ocr_predictor = None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_ocr_predictor():
|
||||||
|
"""Charge un prédicteur docTR léger (mobilenet) pour l'OCR rapide."""
|
||||||
|
global _ocr_predictor
|
||||||
|
if _ocr_predictor is not None:
|
||||||
|
return _ocr_predictor
|
||||||
|
from doctr.models import ocr_predictor # type: ignore
|
||||||
|
_ocr_predictor = ocr_predictor(
|
||||||
|
det_arch="db_mobilenet_v3_large",
|
||||||
|
reco_arch="crnn_mobilenet_v3_large",
|
||||||
|
pretrained=True,
|
||||||
|
)
|
||||||
|
# GPU si disponible
|
||||||
|
try:
|
||||||
|
import torch # type: ignore
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
_ocr_predictor = _ocr_predictor.cuda()
|
||||||
|
logger.info("pii_blur : docTR chargé sur CUDA")
|
||||||
|
else:
|
||||||
|
logger.info("pii_blur : docTR chargé sur CPU")
|
||||||
|
except Exception: # noqa: BLE001
|
||||||
|
logger.info("pii_blur : docTR chargé (device indéterminé)")
|
||||||
|
return _ocr_predictor
|
||||||
|
|
||||||
|
|
||||||
|
def _doctr_ocr(image_path: Path) -> Tuple[List[dict], int, int]:
|
||||||
|
"""Exécute docTR et retourne une liste de mots avec leurs bbox pixel.
|
||||||
|
|
||||||
|
Retour : (words, width, height) où words = [{text, x1, y1, x2, y2}, ...]
|
||||||
|
"""
|
||||||
|
from doctr.io import DocumentFile # type: ignore
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
predictor = _get_ocr_predictor()
|
||||||
|
doc = DocumentFile.from_images([str(image_path)])
|
||||||
|
result = predictor(doc)
|
||||||
|
|
||||||
|
# Les coords sont normalisées (0..1). On les remappe vers la taille réelle.
|
||||||
|
with Image.open(image_path) as img:
|
||||||
|
W, H = img.size
|
||||||
|
|
||||||
|
words: List[dict] = []
|
||||||
|
line_counter = 0
|
||||||
|
for page in result.pages:
|
||||||
|
for block in page.blocks:
|
||||||
|
for line in block.lines:
|
||||||
|
for word in line.words:
|
||||||
|
text = word.value
|
||||||
|
if not text or not text.strip():
|
||||||
|
continue
|
||||||
|
(nx1, ny1), (nx2, ny2) = word.geometry
|
||||||
|
x1 = max(0, int(nx1 * W))
|
||||||
|
y1 = max(0, int(ny1 * H))
|
||||||
|
x2 = min(W, int(nx2 * W))
|
||||||
|
y2 = min(H, int(ny2 * H))
|
||||||
|
words.append({
|
||||||
|
"text": text,
|
||||||
|
"x1": x1, "y1": y1, "x2": x2, "y2": y2,
|
||||||
|
"line": line_counter,
|
||||||
|
})
|
||||||
|
line_counter += 1
|
||||||
|
return words, W, H
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Pipeline principal
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
class PIIBlurrer:
|
||||||
|
"""Pipeline réutilisable (garde les modèles en mémoire entre appels).
|
||||||
|
|
||||||
|
Exemple :
|
||||||
|
blurrer = PIIBlurrer()
|
||||||
|
res = blurrer.blur_image("shot_0001_full.png")
|
||||||
|
print(res.count, res.elapsed_ms)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
blur_kernel: Tuple[int, int] = (31, 31),
|
||||||
|
blur_sigma: float = 15.0,
|
||||||
|
bbox_padding: int = 2,
|
||||||
|
use_edsnlp: bool = True,
|
||||||
|
) -> None:
|
||||||
|
self._blur_kernel = blur_kernel
|
||||||
|
self._blur_sigma = blur_sigma
|
||||||
|
self._bbox_padding = bbox_padding
|
||||||
|
self._use_edsnlp = use_edsnlp
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Point d'entrée publique
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
def blur_image(
|
||||||
|
self,
|
||||||
|
input_path: Union[str, Path],
|
||||||
|
output_path: Optional[Union[str, Path]] = None,
|
||||||
|
) -> PIIBlurResult:
|
||||||
|
"""Floute les PII détectées et écrit la version floutée sur disque.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_path: Chemin vers le screenshot brut (PNG/JPG).
|
||||||
|
output_path: Chemin de sortie. Défaut :
|
||||||
|
`<stem>_blurred.png` à côté de l'input.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
PIIBlurResult avec les timings et la liste des entités détectées.
|
||||||
|
"""
|
||||||
|
input_path = Path(input_path)
|
||||||
|
if not input_path.is_file():
|
||||||
|
raise FileNotFoundError(f"Screenshot introuvable : {input_path}")
|
||||||
|
|
||||||
|
if output_path is None:
|
||||||
|
output_path = input_path.with_name(
|
||||||
|
f"{input_path.stem}_blurred{input_path.suffix or '.png'}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
output_path = Path(output_path)
|
||||||
|
|
||||||
|
t_start = time.perf_counter()
|
||||||
|
|
||||||
|
# 1. OCR
|
||||||
|
t_ocr = time.perf_counter()
|
||||||
|
try:
|
||||||
|
words, W, H = _doctr_ocr(input_path)
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
logger.warning("pii_blur : OCR docTR échoué (%s) — pas de blur appliqué", e)
|
||||||
|
# On copie simplement l'original vers la version "blurred"
|
||||||
|
_copy_file(input_path, output_path)
|
||||||
|
return PIIBlurResult(
|
||||||
|
raw_path=input_path,
|
||||||
|
blurred_path=output_path,
|
||||||
|
entities=[],
|
||||||
|
elapsed_ms=(time.perf_counter() - t_start) * 1000,
|
||||||
|
)
|
||||||
|
ocr_ms = (time.perf_counter() - t_ocr) * 1000
|
||||||
|
|
||||||
|
if not words:
|
||||||
|
_copy_file(input_path, output_path)
|
||||||
|
return PIIBlurResult(
|
||||||
|
raw_path=input_path,
|
||||||
|
blurred_path=output_path,
|
||||||
|
entities=[],
|
||||||
|
elapsed_ms=(time.perf_counter() - t_start) * 1000,
|
||||||
|
ocr_ms=ocr_ms,
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2. Reconstituer le texte ligne par ligne en conservant la correspondance
|
||||||
|
# (offset_char → mot) pour pouvoir repérer les bbox des entités.
|
||||||
|
text, char_to_word = _build_text_with_map(words)
|
||||||
|
|
||||||
|
# 3. NER : EDS-NLP si dispo, sinon regex
|
||||||
|
t_ner = time.perf_counter()
|
||||||
|
ner_engine = "regex"
|
||||||
|
entities_spans: List[Tuple[str, int, int]] = []
|
||||||
|
if self._use_edsnlp:
|
||||||
|
nlp = _get_edsnlp_pipeline()
|
||||||
|
if nlp is not None:
|
||||||
|
entities_spans = _edsnlp_find_pii(text, nlp)
|
||||||
|
ner_engine = "edsnlp"
|
||||||
|
# Toujours compléter avec le regex (EDS-NLP ne couvre pas tous les PII
|
||||||
|
# fréquents : email, NIR, téléphone français).
|
||||||
|
entities_spans.extend(_regex_find_pii(text))
|
||||||
|
ner_ms = (time.perf_counter() - t_ner) * 1000
|
||||||
|
|
||||||
|
# Dédupliquer et normaliser
|
||||||
|
entities_spans = _merge_spans(entities_spans)
|
||||||
|
|
||||||
|
# 4. Convertir (label, start, end) → PIIEntity(label, text, bbox pixel)
|
||||||
|
pii_entities: List[PIIEntity] = []
|
||||||
|
for label, start, end in entities_spans:
|
||||||
|
if label not in PII_LABELS:
|
||||||
|
continue
|
||||||
|
bbox = _spans_to_bbox(start, end, char_to_word, words, self._bbox_padding, W, H)
|
||||||
|
if bbox is None:
|
||||||
|
continue
|
||||||
|
pii_entities.append(PIIEntity(
|
||||||
|
label=label,
|
||||||
|
text=text[start:end],
|
||||||
|
bbox=bbox,
|
||||||
|
confidence=1.0,
|
||||||
|
source=("ner" if ner_engine == "edsnlp" else "regex"),
|
||||||
|
))
|
||||||
|
|
||||||
|
# 5. Appliquer le blur gaussien sur les bbox
|
||||||
|
t_blur = time.perf_counter()
|
||||||
|
_apply_blur(input_path, output_path, pii_entities,
|
||||||
|
kernel=self._blur_kernel, sigma=self._blur_sigma)
|
||||||
|
blur_ms = (time.perf_counter() - t_blur) * 1000
|
||||||
|
|
||||||
|
elapsed_ms = (time.perf_counter() - t_start) * 1000
|
||||||
|
if pii_entities:
|
||||||
|
logger.info(
|
||||||
|
"pii_blur : %d PII floutés sur %s (%.0fms : ocr=%.0f ner=%.0f blur=%.0f, ner=%s)",
|
||||||
|
len(pii_entities), input_path.name, elapsed_ms,
|
||||||
|
ocr_ms, ner_ms, blur_ms, ner_engine,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.debug(
|
||||||
|
"pii_blur : aucune PII détectée dans %s (%.0fms)",
|
||||||
|
input_path.name, elapsed_ms,
|
||||||
|
)
|
||||||
|
|
||||||
|
return PIIBlurResult(
|
||||||
|
raw_path=input_path,
|
||||||
|
blurred_path=output_path,
|
||||||
|
entities=pii_entities,
|
||||||
|
elapsed_ms=elapsed_ms,
|
||||||
|
ocr_ms=ocr_ms,
|
||||||
|
ner_ms=ner_ms,
|
||||||
|
blur_ms=blur_ms,
|
||||||
|
ner_engine=ner_engine,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Instance singleton (lazy)
|
||||||
|
_default_blurrer: Optional[PIIBlurrer] = None
|
||||||
|
|
||||||
|
|
||||||
|
def blur_pii_on_image(
|
||||||
|
input_path: Union[str, Path],
|
||||||
|
output_path: Optional[Union[str, Path]] = None,
|
||||||
|
) -> PIIBlurResult:
|
||||||
|
"""Helper fonctionnel : instancie un PIIBlurrer singleton et l'applique."""
|
||||||
|
global _default_blurrer
|
||||||
|
if _default_blurrer is None:
|
||||||
|
_default_blurrer = PIIBlurrer()
|
||||||
|
return _default_blurrer.blur_image(input_path, output_path)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Helpers internes
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
def _copy_file(src: Path, dst: Path) -> None:
|
||||||
|
"""Copie bytewise (utilisé quand aucun PII n'est détecté / OCR KO)."""
|
||||||
|
dst.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with open(src, "rb") as f_in, open(dst, "wb") as f_out:
|
||||||
|
f_out.write(f_in.read())
|
||||||
|
|
||||||
|
|
||||||
|
def _build_text_with_map(words: Sequence[dict]) -> Tuple[str, List[int]]:
|
||||||
|
"""Concatène les mots en texte + mappe chaque caractère vers son index de mot.
|
||||||
|
|
||||||
|
Quand deux mots consécutifs appartiennent à des lignes différentes (champ
|
||||||
|
`line` dans le dict), on insère un `\n` au lieu d'un espace. Cela empêche
|
||||||
|
les regex gloutons (PERSON, LOCATION…) de matcher à travers des lignes
|
||||||
|
logiques, qui sont typiquement des champs distincts dans une UI métier.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
text : str concaténé (mots séparés par un espace ou un \n)
|
||||||
|
char_to_word : list[int] len == len(text), char_to_word[i] = index du mot
|
||||||
|
(ou -1 pour les séparateurs).
|
||||||
|
"""
|
||||||
|
parts: List[str] = []
|
||||||
|
char_to_word: List[int] = []
|
||||||
|
prev_line: Optional[int] = None
|
||||||
|
for i, w in enumerate(words):
|
||||||
|
cur_line = w.get("line")
|
||||||
|
if i > 0:
|
||||||
|
if prev_line is not None and cur_line is not None and cur_line != prev_line:
|
||||||
|
sep = "\n"
|
||||||
|
else:
|
||||||
|
sep = " "
|
||||||
|
parts.append(sep)
|
||||||
|
char_to_word.append(-1)
|
||||||
|
txt = w["text"]
|
||||||
|
parts.append(txt)
|
||||||
|
char_to_word.extend([i] * len(txt))
|
||||||
|
prev_line = cur_line
|
||||||
|
return "".join(parts), char_to_word
|
||||||
|
|
||||||
|
|
||||||
|
def _spans_to_bbox(
|
||||||
|
start: int,
|
||||||
|
end: int,
|
||||||
|
char_to_word: Sequence[int],
|
||||||
|
words: Sequence[dict],
|
||||||
|
padding: int,
|
||||||
|
image_w: int,
|
||||||
|
image_h: int,
|
||||||
|
) -> Optional[Tuple[int, int, int, int]]:
|
||||||
|
"""Convertit une plage [start, end[ dans le texte en bbox englobant les mots."""
|
||||||
|
if end <= start or start >= len(char_to_word):
|
||||||
|
return None
|
||||||
|
word_ids = set()
|
||||||
|
for i in range(start, min(end, len(char_to_word))):
|
||||||
|
wid = char_to_word[i]
|
||||||
|
if wid >= 0:
|
||||||
|
word_ids.add(wid)
|
||||||
|
if not word_ids:
|
||||||
|
return None
|
||||||
|
xs1, ys1, xs2, ys2 = [], [], [], []
|
||||||
|
for wid in word_ids:
|
||||||
|
w = words[wid]
|
||||||
|
xs1.append(w["x1"]); ys1.append(w["y1"])
|
||||||
|
xs2.append(w["x2"]); ys2.append(w["y2"])
|
||||||
|
x1 = max(0, min(xs1) - padding)
|
||||||
|
y1 = max(0, min(ys1) - padding)
|
||||||
|
x2 = min(image_w, max(xs2) + padding)
|
||||||
|
y2 = min(image_h, max(ys2) + padding)
|
||||||
|
if x2 <= x1 or y2 <= y1:
|
||||||
|
return None
|
||||||
|
return (x1, y1, x2, y2)
|
||||||
|
|
||||||
|
|
||||||
|
def _merge_spans(
|
||||||
|
spans: Sequence[Tuple[str, int, int]],
|
||||||
|
) -> List[Tuple[str, int, int]]:
|
||||||
|
"""Déduplique et fusionne les plages qui se chevauchent sur un même label.
|
||||||
|
|
||||||
|
En cas de conflit inter-labels, on garde celui qui couvre le plus large.
|
||||||
|
"""
|
||||||
|
if not spans:
|
||||||
|
return []
|
||||||
|
# Trier par start puis par -width (le plus long d'abord pour les ties)
|
||||||
|
sorted_spans = sorted(spans, key=lambda s: (s[1], -(s[2] - s[1])))
|
||||||
|
merged: List[Tuple[str, int, int]] = []
|
||||||
|
for label, s, e in sorted_spans:
|
||||||
|
if not merged:
|
||||||
|
merged.append((label, s, e))
|
||||||
|
continue
|
||||||
|
last_label, ls, le = merged[-1]
|
||||||
|
if s < le: # chevauchement
|
||||||
|
# On garde l'étendue fusionnée avec le label du plus large
|
||||||
|
new_start = min(ls, s)
|
||||||
|
new_end = max(le, e)
|
||||||
|
new_label = last_label if (le - ls) >= (e - s) else label
|
||||||
|
merged[-1] = (new_label, new_start, new_end)
|
||||||
|
else:
|
||||||
|
merged.append((label, s, e))
|
||||||
|
return merged
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_blur(
|
||||||
|
src: Path,
|
||||||
|
dst: Path,
|
||||||
|
entities: Sequence[PIIEntity],
|
||||||
|
kernel: Tuple[int, int],
|
||||||
|
sigma: float,
|
||||||
|
) -> None:
|
||||||
|
"""Applique un flou gaussien sur les bbox des entités et écrit l'image."""
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
with Image.open(src) as img:
|
||||||
|
if img.mode != "RGB":
|
||||||
|
img = img.convert("RGB")
|
||||||
|
|
||||||
|
if not entities:
|
||||||
|
dst.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
img.save(dst, format="PNG", optimize=True)
|
||||||
|
return
|
||||||
|
|
||||||
|
# On privilégie OpenCV s'il est disponible (plus rapide),
|
||||||
|
# sinon on utilise PIL ImageFilter.GaussianBlur.
|
||||||
|
try:
|
||||||
|
import cv2 # type: ignore
|
||||||
|
import numpy as np # type: ignore
|
||||||
|
arr = np.array(img)
|
||||||
|
bgr = cv2.cvtColor(arr, cv2.COLOR_RGB2BGR)
|
||||||
|
for ent in entities:
|
||||||
|
x1, y1, x2, y2 = ent.bbox
|
||||||
|
if x2 <= x1 or y2 <= y1:
|
||||||
|
continue
|
||||||
|
roi = bgr[y1:y2, x1:x2]
|
||||||
|
if roi.size == 0:
|
||||||
|
continue
|
||||||
|
k = (max(3, kernel[0] | 1), max(3, kernel[1] | 1)) # impair
|
||||||
|
bgr[y1:y2, x1:x2] = cv2.GaussianBlur(roi, k, sigma)
|
||||||
|
out = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
|
||||||
|
img = Image.fromarray(out)
|
||||||
|
except ImportError:
|
||||||
|
from PIL import ImageFilter
|
||||||
|
radius = max(sigma / 2, 4.0)
|
||||||
|
for ent in entities:
|
||||||
|
x1, y1, x2, y2 = ent.bbox
|
||||||
|
region = img.crop((x1, y1, x2, y2))
|
||||||
|
if region.size[0] == 0 or region.size[1] == 0:
|
||||||
|
continue
|
||||||
|
blurred = region.filter(ImageFilter.GaussianBlur(radius=radius))
|
||||||
|
img.paste(blurred, (x1, y1))
|
||||||
|
|
||||||
|
dst.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
img.save(dst, format="PNG", optimize=True)
|
||||||
0
core/cognition/__init__.py
Normal file
0
core/cognition/__init__.py
Normal file
191
core/cognition/vram_orchestrator.py
Normal file
191
core/cognition/vram_orchestrator.py
Normal file
@@ -0,0 +1,191 @@
|
|||||||
|
"""
|
||||||
|
Orchestrateur VRAM — gère le chargement/déchargement des modèles selon le mode.
|
||||||
|
|
||||||
|
Deux modes :
|
||||||
|
- SHADOW : streaming server + agent_chat actifs, VLM raisonnement déchargé
|
||||||
|
- REPLAY : VLM raisonnement (qwen2.5vl:7b) chargé, services non-essentiels stoppés
|
||||||
|
|
||||||
|
Bascule automatique ou manuelle selon le contexte.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
||||||
|
REASONING_MODEL = os.environ.get("RPA_REASONING_MODEL", "qwen2.5vl:7b")
|
||||||
|
MIN_VRAM_FOR_REASONING = 5.0 # Go minimum pour charger le modèle de raisonnement
|
||||||
|
|
||||||
|
|
||||||
|
class VRAMMode(Enum):
|
||||||
|
SHADOW = "shadow"
|
||||||
|
REPLAY = "replay"
|
||||||
|
|
||||||
|
|
||||||
|
class VRAMOrchestrator:
|
||||||
|
"""Gère la VRAM pour éviter les conflits entre modèles."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self._current_mode: Optional[VRAMMode] = None
|
||||||
|
self._stopped_services: list = []
|
||||||
|
|
||||||
|
def get_free_vram_gb(self) -> float:
|
||||||
|
"""Retourne la VRAM libre en Go."""
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
["nvidia-smi", "--query-gpu=memory.free", "--format=csv,noheader,nounits"],
|
||||||
|
capture_output=True, text=True, timeout=5
|
||||||
|
)
|
||||||
|
return float(result.stdout.strip()) / 1024
|
||||||
|
except Exception:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
def get_used_vram_gb(self) -> float:
|
||||||
|
"""Retourne la VRAM utilisée en Go."""
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
["nvidia-smi", "--query-gpu=memory.used", "--format=csv,noheader,nounits"],
|
||||||
|
capture_output=True, text=True, timeout=5
|
||||||
|
)
|
||||||
|
return float(result.stdout.strip()) / 1024
|
||||||
|
except Exception:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
def switch_to_replay(self) -> bool:
|
||||||
|
"""Bascule en mode replay : libère la VRAM pour le VLM de raisonnement.
|
||||||
|
|
||||||
|
1. Stoppe les services non-essentiels (agent_chat)
|
||||||
|
2. Redémarre Ollama pour libérer les modèles chargés
|
||||||
|
3. Précharge le modèle de raisonnement
|
||||||
|
"""
|
||||||
|
if self._current_mode == VRAMMode.REPLAY:
|
||||||
|
logger.info("Déjà en mode REPLAY")
|
||||||
|
return True
|
||||||
|
|
||||||
|
logger.info("Bascule en mode REPLAY...")
|
||||||
|
|
||||||
|
# Stopper agent_chat si il tourne
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
["pgrep", "-f", "agent_chat"],
|
||||||
|
capture_output=True, text=True, timeout=5
|
||||||
|
)
|
||||||
|
pids = result.stdout.strip().split('\n')
|
||||||
|
for pid in pids:
|
||||||
|
if pid.strip():
|
||||||
|
subprocess.run(["kill", pid.strip()], timeout=5)
|
||||||
|
self._stopped_services.append(("agent_chat", pid.strip()))
|
||||||
|
logger.info(f"agent_chat stoppé (PID {pid.strip()})")
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Pas d'agent_chat à stopper: {e}")
|
||||||
|
|
||||||
|
# Redémarrer Ollama pour libérer la mémoire
|
||||||
|
try:
|
||||||
|
subprocess.run(["sudo", "systemctl", "restart", "ollama"],
|
||||||
|
timeout=10, check=True)
|
||||||
|
time.sleep(2)
|
||||||
|
logger.info("Ollama redémarré")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Impossible de redémarrer Ollama: {e}")
|
||||||
|
|
||||||
|
# Vérifier la VRAM disponible
|
||||||
|
free = self.get_free_vram_gb()
|
||||||
|
logger.info(f"VRAM libre: {free:.1f} Go")
|
||||||
|
|
||||||
|
if free < MIN_VRAM_FOR_REASONING:
|
||||||
|
logger.warning(f"VRAM insuffisante ({free:.1f} Go < {MIN_VRAM_FOR_REASONING} Go)")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Précharger le modèle de raisonnement
|
||||||
|
try:
|
||||||
|
import requests
|
||||||
|
logger.info(f"Préchargement {REASONING_MODEL}...")
|
||||||
|
resp = requests.post(f"{OLLAMA_URL}/api/generate", json={
|
||||||
|
"model": REASONING_MODEL,
|
||||||
|
"prompt": "test",
|
||||||
|
"stream": False,
|
||||||
|
"options": {"num_predict": 1}
|
||||||
|
}, timeout=60)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
logger.info(f"{REASONING_MODEL} chargé en VRAM")
|
||||||
|
free_after = self.get_free_vram_gb()
|
||||||
|
logger.info(f"VRAM libre après chargement: {free_after:.1f} Go")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Préchargement échoué: {e}")
|
||||||
|
|
||||||
|
self._current_mode = VRAMMode.REPLAY
|
||||||
|
return True
|
||||||
|
|
||||||
|
def switch_to_shadow(self) -> bool:
|
||||||
|
"""Bascule en mode shadow : relance les services d'observation.
|
||||||
|
|
||||||
|
1. Redémarre Ollama (décharge le VLM de raisonnement)
|
||||||
|
2. Relance les services stoppés
|
||||||
|
"""
|
||||||
|
if self._current_mode == VRAMMode.SHADOW:
|
||||||
|
logger.info("Déjà en mode SHADOW")
|
||||||
|
return True
|
||||||
|
|
||||||
|
logger.info("Bascule en mode SHADOW...")
|
||||||
|
|
||||||
|
# Redémarrer Ollama
|
||||||
|
try:
|
||||||
|
subprocess.run(["sudo", "systemctl", "restart", "ollama"],
|
||||||
|
timeout=10, check=True)
|
||||||
|
time.sleep(2)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Impossible de redémarrer Ollama: {e}")
|
||||||
|
|
||||||
|
# Relancer les services stoppés
|
||||||
|
for service_name, _pid in self._stopped_services:
|
||||||
|
try:
|
||||||
|
if service_name == "agent_chat":
|
||||||
|
subprocess.Popen(
|
||||||
|
["python3", "-m", "agent_chat.app"],
|
||||||
|
cwd="/home/dom/ai/rpa_vision_v3",
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.DEVNULL
|
||||||
|
)
|
||||||
|
logger.info(f"{service_name} relancé")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Impossible de relancer {service_name}: {e}")
|
||||||
|
|
||||||
|
self._stopped_services.clear()
|
||||||
|
self._current_mode = VRAMMode.SHADOW
|
||||||
|
return True
|
||||||
|
|
||||||
|
def ensure_reasoning_ready(self) -> bool:
|
||||||
|
"""Vérifie que le VLM de raisonnement est prêt. Bascule si nécessaire."""
|
||||||
|
free = self.get_free_vram_gb()
|
||||||
|
if free >= MIN_VRAM_FOR_REASONING:
|
||||||
|
return True
|
||||||
|
return self.switch_to_replay()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def current_mode(self) -> Optional[str]:
|
||||||
|
return self._current_mode.value if self._current_mode else None
|
||||||
|
|
||||||
|
def status(self) -> dict:
|
||||||
|
return {
|
||||||
|
"mode": self.current_mode,
|
||||||
|
"vram_free_gb": round(self.get_free_vram_gb(), 1),
|
||||||
|
"vram_used_gb": round(self.get_used_vram_gb(), 1),
|
||||||
|
"reasoning_model": REASONING_MODEL,
|
||||||
|
"stopped_services": [s[0] for s in self._stopped_services],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton
|
||||||
|
_orchestrator: Optional[VRAMOrchestrator] = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_orchestrator() -> VRAMOrchestrator:
|
||||||
|
global _orchestrator
|
||||||
|
if _orchestrator is None:
|
||||||
|
_orchestrator = VRAMOrchestrator()
|
||||||
|
return _orchestrator
|
||||||
260
core/cognition/working_memory.py
Normal file
260
core/cognition/working_memory.py
Normal file
@@ -0,0 +1,260 @@
|
|||||||
|
"""
|
||||||
|
Mémoire de travail de Léa — contexte cognitif pendant l'exécution.
|
||||||
|
|
||||||
|
Donne à Léa la conscience de "où elle en est" :
|
||||||
|
- Quel objectif elle poursuit
|
||||||
|
- Quel écran elle voit
|
||||||
|
- Ce qu'elle vient de faire
|
||||||
|
- Ce qu'elle doit faire ensuite
|
||||||
|
- Ce qu'elle a appris en cours de route
|
||||||
|
|
||||||
|
Sans ça, chaque étape est indépendante — Léa est amnésique entre
|
||||||
|
deux actions. Avec ça, elle raisonne en contexte.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Observation:
|
||||||
|
"""Ce que Léa observe sur l'écran à un instant donné."""
|
||||||
|
timestamp: datetime
|
||||||
|
window_title: str = ""
|
||||||
|
application: str = ""
|
||||||
|
ocr_text: str = ""
|
||||||
|
ui_pattern: Optional[str] = None
|
||||||
|
screen_description: str = ""
|
||||||
|
confidence: float = 0.0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ActionRecord:
|
||||||
|
"""Une action que Léa a effectuée."""
|
||||||
|
timestamp: datetime
|
||||||
|
action_type: str
|
||||||
|
target: str = ""
|
||||||
|
result: str = ""
|
||||||
|
success: bool = True
|
||||||
|
duration_ms: float = 0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CognitiveContext:
|
||||||
|
"""Contexte cognitif complet — la "pensée" de Léa à un instant donné.
|
||||||
|
|
||||||
|
C'est le bloc-notes interne qui est réinjecté à chaque décision.
|
||||||
|
Le VLM reçoit ce contexte pour raisonner en connaissance de cause.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Objectif global (ce que Léa essaie d'accomplir)
|
||||||
|
objective: str = ""
|
||||||
|
|
||||||
|
# Étape courante dans le plan
|
||||||
|
current_step: int = 0
|
||||||
|
total_steps: int = 0
|
||||||
|
current_step_description: str = ""
|
||||||
|
|
||||||
|
# Ce que Léa voit maintenant
|
||||||
|
current_observation: Optional[Observation] = None
|
||||||
|
|
||||||
|
# Historique des N dernières actions (mémoire court terme)
|
||||||
|
action_history: List[ActionRecord] = field(default_factory=list)
|
||||||
|
max_history: int = 10
|
||||||
|
|
||||||
|
# Ce que Léa a appris pendant cette session
|
||||||
|
learned_facts: List[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
# Plan : les étapes restantes
|
||||||
|
remaining_steps: List[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
# État émotionnel / confiance
|
||||||
|
confidence: float = 1.0
|
||||||
|
needs_help: bool = False
|
||||||
|
help_reason: str = ""
|
||||||
|
|
||||||
|
# Timing
|
||||||
|
session_id: str = ""
|
||||||
|
machine_id: str = ""
|
||||||
|
started_at: Optional[datetime] = None
|
||||||
|
step_started_at: Optional[datetime] = None
|
||||||
|
step_durations: Dict[str, List[float]] = field(default_factory=dict)
|
||||||
|
|
||||||
|
# Ce que Léa devrait voir à l'écran (comparaison attendu vs réel)
|
||||||
|
expected_screen: str = ""
|
||||||
|
|
||||||
|
def record_action(self, action_type: str, target: str = "",
|
||||||
|
result: str = "", success: bool = True,
|
||||||
|
duration_ms: float = 0):
|
||||||
|
"""Enregistre une action dans l'historique."""
|
||||||
|
self.action_history.append(ActionRecord(
|
||||||
|
timestamp=datetime.now(),
|
||||||
|
action_type=action_type,
|
||||||
|
target=target,
|
||||||
|
result=result,
|
||||||
|
success=success,
|
||||||
|
duration_ms=duration_ms,
|
||||||
|
))
|
||||||
|
if len(self.action_history) > self.max_history:
|
||||||
|
self.action_history = self.action_history[-self.max_history:]
|
||||||
|
|
||||||
|
if not success:
|
||||||
|
self.confidence = max(0, self.confidence - 0.2)
|
||||||
|
else:
|
||||||
|
self.confidence = min(1.0, self.confidence + 0.05)
|
||||||
|
|
||||||
|
def observe(self, window_title: str = "", application: str = "",
|
||||||
|
ocr_text: str = "", ui_pattern: Optional[str] = None,
|
||||||
|
screen_description: str = ""):
|
||||||
|
"""Met à jour l'observation courante."""
|
||||||
|
self.current_observation = Observation(
|
||||||
|
timestamp=datetime.now(),
|
||||||
|
window_title=window_title,
|
||||||
|
application=application,
|
||||||
|
ocr_text=ocr_text,
|
||||||
|
ui_pattern=ui_pattern,
|
||||||
|
screen_description=screen_description,
|
||||||
|
)
|
||||||
|
|
||||||
|
def advance_step(self):
|
||||||
|
"""Passe à l'étape suivante du plan."""
|
||||||
|
# Enregistrer la durée de l'étape précédente
|
||||||
|
if self.step_started_at:
|
||||||
|
duration = (datetime.now() - self.step_started_at).total_seconds()
|
||||||
|
step_key = self.current_step_description or f"step_{self.current_step}"
|
||||||
|
self.step_durations.setdefault(step_key, []).append(duration)
|
||||||
|
|
||||||
|
self.current_step += 1
|
||||||
|
self.step_started_at = datetime.now()
|
||||||
|
if self.remaining_steps:
|
||||||
|
self.current_step_description = self.remaining_steps.pop(0)
|
||||||
|
|
||||||
|
def get_step_timing(self) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Retourne les infos de timing de l'étape en cours."""
|
||||||
|
if not self.step_started_at:
|
||||||
|
return None
|
||||||
|
|
||||||
|
elapsed = (datetime.now() - self.step_started_at).total_seconds()
|
||||||
|
step_key = self.current_step_description or f"step_{self.current_step}"
|
||||||
|
history = self.step_durations.get(step_key, [])
|
||||||
|
avg = sum(history) / len(history) if history else None
|
||||||
|
|
||||||
|
result = {"elapsed_seconds": elapsed}
|
||||||
|
if avg:
|
||||||
|
result["avg_previous"] = avg
|
||||||
|
result["is_slow"] = elapsed > avg * 2
|
||||||
|
return result
|
||||||
|
|
||||||
|
def set_expected_screen(self, description: str):
|
||||||
|
"""Définit ce que Léa devrait voir à l'écran pour cette étape."""
|
||||||
|
self.expected_screen = description
|
||||||
|
|
||||||
|
def check_screen_matches_expected(self) -> Optional[bool]:
|
||||||
|
"""Compare l'observation actuelle avec l'écran attendu."""
|
||||||
|
if not self.expected_screen or not self.current_observation:
|
||||||
|
return None
|
||||||
|
obs_text = (self.current_observation.window_title + " " +
|
||||||
|
self.current_observation.ocr_text).lower()
|
||||||
|
expected_words = self.expected_screen.lower().split()
|
||||||
|
matches = sum(1 for w in expected_words if w in obs_text)
|
||||||
|
return matches / max(len(expected_words), 1) > 0.3
|
||||||
|
|
||||||
|
def learn(self, fact: str):
|
||||||
|
"""Enregistre un fait appris pendant l'exécution."""
|
||||||
|
if fact not in self.learned_facts:
|
||||||
|
self.learned_facts.append(fact)
|
||||||
|
logger.info(f"Fait appris: {fact}")
|
||||||
|
|
||||||
|
def ask_for_help(self, reason: str):
|
||||||
|
"""Signale que Léa a besoin d'aide."""
|
||||||
|
self.needs_help = True
|
||||||
|
self.help_reason = reason
|
||||||
|
self.confidence = max(0, self.confidence - 0.3)
|
||||||
|
logger.warning(f"Léa demande de l'aide: {reason}")
|
||||||
|
|
||||||
|
def to_prompt_context(self) -> str:
|
||||||
|
"""Génère le contexte à injecter dans le prompt VLM.
|
||||||
|
|
||||||
|
C'est ce texte qui donne au VLM la conscience de la situation.
|
||||||
|
"""
|
||||||
|
lines = []
|
||||||
|
|
||||||
|
if self.objective:
|
||||||
|
lines.append(f"OBJECTIF : {self.objective}")
|
||||||
|
|
||||||
|
if self.current_step > 0:
|
||||||
|
lines.append(f"PROGRESSION : étape {self.current_step}/{self.total_steps}")
|
||||||
|
if self.current_step_description:
|
||||||
|
lines.append(f"ÉTAPE EN COURS : {self.current_step_description}")
|
||||||
|
|
||||||
|
if self.current_observation:
|
||||||
|
obs = self.current_observation
|
||||||
|
if obs.window_title:
|
||||||
|
lines.append(f"FENÊTRE ACTIVE : {obs.window_title}")
|
||||||
|
if obs.application:
|
||||||
|
lines.append(f"APPLICATION : {obs.application}")
|
||||||
|
if obs.ui_pattern:
|
||||||
|
lines.append(f"DIALOGUE DÉTECTÉ : {obs.ui_pattern}")
|
||||||
|
|
||||||
|
if self.action_history:
|
||||||
|
last_actions = self.action_history[-3:]
|
||||||
|
lines.append("DERNIÈRES ACTIONS :")
|
||||||
|
for a in last_actions:
|
||||||
|
status = "OK" if a.success else "ÉCHEC"
|
||||||
|
lines.append(f" - {a.action_type} '{a.target}' → {status}")
|
||||||
|
|
||||||
|
if self.learned_facts:
|
||||||
|
lines.append("FAITS APPRIS :")
|
||||||
|
for fact in self.learned_facts[-5:]:
|
||||||
|
lines.append(f" - {fact}")
|
||||||
|
|
||||||
|
if self.remaining_steps:
|
||||||
|
lines.append("PROCHAINES ÉTAPES :")
|
||||||
|
for step in self.remaining_steps[:3]:
|
||||||
|
lines.append(f" - {step}")
|
||||||
|
|
||||||
|
timing = self.get_step_timing()
|
||||||
|
if timing:
|
||||||
|
lines.append(f"TEMPS ÉTAPE : {timing['elapsed_seconds']:.1f}s")
|
||||||
|
if timing.get('avg_previous'):
|
||||||
|
lines.append(f"MOYENNE PRÉCÉDENTE : {timing['avg_previous']:.1f}s")
|
||||||
|
if timing.get('is_slow'):
|
||||||
|
lines.append("⚠ ÉTAPE ANORMALEMENT LENTE")
|
||||||
|
|
||||||
|
if self.expected_screen:
|
||||||
|
match = self.check_screen_matches_expected()
|
||||||
|
if match is False:
|
||||||
|
lines.append(f"⚠ ÉCRAN INATTENDU (attendu: {self.expected_screen})")
|
||||||
|
elif match is True:
|
||||||
|
lines.append(f"ÉCRAN CONFORME : {self.expected_screen}")
|
||||||
|
|
||||||
|
lines.append(f"CONFIANCE : {self.confidence:.0%}")
|
||||||
|
|
||||||
|
if self.needs_help:
|
||||||
|
lines.append(f"BESOIN D'AIDE : {self.help_reason}")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
"""Sérialise le contexte pour le stockage/transport."""
|
||||||
|
return {
|
||||||
|
"objective": self.objective,
|
||||||
|
"current_step": self.current_step,
|
||||||
|
"total_steps": self.total_steps,
|
||||||
|
"current_step_description": self.current_step_description,
|
||||||
|
"confidence": self.confidence,
|
||||||
|
"needs_help": self.needs_help,
|
||||||
|
"help_reason": self.help_reason,
|
||||||
|
"action_count": len(self.action_history),
|
||||||
|
"learned_facts": self.learned_facts,
|
||||||
|
"remaining_steps": self.remaining_steps,
|
||||||
|
"last_observation": {
|
||||||
|
"window_title": self.current_observation.window_title,
|
||||||
|
"application": self.current_observation.application,
|
||||||
|
"ui_pattern": self.current_observation.ui_pattern,
|
||||||
|
} if self.current_observation else None,
|
||||||
|
}
|
||||||
@@ -68,7 +68,7 @@ class SystemConfig:
|
|||||||
clip_model: str = "ViT-B-32"
|
clip_model: str = "ViT-B-32"
|
||||||
clip_pretrained: str = "openai"
|
clip_pretrained: str = "openai"
|
||||||
clip_device: str = "cpu"
|
clip_device: str = "cpu"
|
||||||
vlm_model: str = "qwen3-vl:8b"
|
vlm_model: str = "gemma4:latest"
|
||||||
vlm_endpoint: str = "http://localhost:11434"
|
vlm_endpoint: str = "http://localhost:11434"
|
||||||
owl_model: str = "google/owlv2-base-patch16-ensemble"
|
owl_model: str = "google/owlv2-base-patch16-ensemble"
|
||||||
owl_confidence_threshold: float = 0.1
|
owl_confidence_threshold: float = 0.1
|
||||||
@@ -211,7 +211,7 @@ class ConfigurationManager:
|
|||||||
clip_model=os.getenv("CLIP_MODEL", "ViT-B-32"),
|
clip_model=os.getenv("CLIP_MODEL", "ViT-B-32"),
|
||||||
clip_pretrained=os.getenv("CLIP_PRETRAINED", "openai"),
|
clip_pretrained=os.getenv("CLIP_PRETRAINED", "openai"),
|
||||||
clip_device=os.getenv("CLIP_DEVICE", "cpu"),
|
clip_device=os.getenv("CLIP_DEVICE", "cpu"),
|
||||||
vlm_model=os.getenv("VLM_MODEL", "qwen3-vl:8b"),
|
vlm_model=os.getenv("RPA_VLM_MODEL", os.getenv("VLM_MODEL", "gemma4:latest")),
|
||||||
vlm_endpoint=os.getenv("VLM_ENDPOINT", "http://localhost:11434"),
|
vlm_endpoint=os.getenv("VLM_ENDPOINT", "http://localhost:11434"),
|
||||||
owl_model=os.getenv("OWL_MODEL", "google/owlv2-base-patch16-ensemble"),
|
owl_model=os.getenv("OWL_MODEL", "google/owlv2-base-patch16-ensemble"),
|
||||||
owl_confidence_threshold=float(os.getenv("OWL_CONFIDENCE_THRESHOLD", "0.1")),
|
owl_confidence_threshold=float(os.getenv("OWL_CONFIDENCE_THRESHOLD", "0.1")),
|
||||||
@@ -435,7 +435,7 @@ class ModelConfig:
|
|||||||
clip_model: str = "ViT-B-32"
|
clip_model: str = "ViT-B-32"
|
||||||
clip_pretrained: str = "openai"
|
clip_pretrained: str = "openai"
|
||||||
clip_device: str = "cpu"
|
clip_device: str = "cpu"
|
||||||
vlm_model: str = "qwen3-vl:8b"
|
vlm_model: str = "gemma4:latest"
|
||||||
vlm_endpoint: str = "http://localhost:11434"
|
vlm_endpoint: str = "http://localhost:11434"
|
||||||
owl_model: str = "google/owlv2-base-patch16-ensemble"
|
owl_model: str = "google/owlv2-base-patch16-ensemble"
|
||||||
owl_confidence_threshold: float = 0.1
|
owl_confidence_threshold: float = 0.1
|
||||||
@@ -510,7 +510,7 @@ class FAISSConfig:
|
|||||||
class GPUResourceConfig:
|
class GPUResourceConfig:
|
||||||
"""Configuration for GPU resource management - DEPRECATED: Use SystemConfig instead"""
|
"""Configuration for GPU resource management - DEPRECATED: Use SystemConfig instead"""
|
||||||
ollama_endpoint: str = "http://localhost:11434"
|
ollama_endpoint: str = "http://localhost:11434"
|
||||||
vlm_model: str = "qwen3-vl:8b"
|
vlm_model: str = "gemma4:latest"
|
||||||
clip_model: str = "ViT-B-32"
|
clip_model: str = "ViT-B-32"
|
||||||
idle_timeout_seconds: int = 300
|
idle_timeout_seconds: int = 300
|
||||||
vram_threshold_for_clip_gpu_mb: int = 1024
|
vram_threshold_for_clip_gpu_mb: int = 1024
|
||||||
@@ -599,7 +599,7 @@ UPLOADS_PATH=data/training/uploads
|
|||||||
CLIP_MODEL=ViT-B-32
|
CLIP_MODEL=ViT-B-32
|
||||||
CLIP_PRETRAINED=openai
|
CLIP_PRETRAINED=openai
|
||||||
CLIP_DEVICE=cpu
|
CLIP_DEVICE=cpu
|
||||||
VLM_MODEL=qwen3-vl:8b
|
VLM_MODEL=gemma4:latest
|
||||||
VLM_ENDPOINT=http://localhost:11434
|
VLM_ENDPOINT=http://localhost:11434
|
||||||
OWL_MODEL=google/owlv2-base-patch16-ensemble
|
OWL_MODEL=google/owlv2-base-patch16-ensemble
|
||||||
OWL_CONFIDENCE_THRESHOLD=0.1
|
OWL_CONFIDENCE_THRESHOLD=0.1
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ class OllamaClient:
|
|||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
endpoint: str = "http://localhost:11434",
|
endpoint: str = "http://localhost:11434",
|
||||||
model: str = "qwen3-vl:8b",
|
model: str = None,
|
||||||
timeout: int = 180):
|
timeout: int = 180):
|
||||||
"""
|
"""
|
||||||
Initialiser le client Ollama
|
Initialiser le client Ollama
|
||||||
@@ -36,7 +36,12 @@ class OllamaClient:
|
|||||||
timeout: Timeout en secondes
|
timeout: Timeout en secondes
|
||||||
"""
|
"""
|
||||||
self.endpoint = endpoint.rstrip('/')
|
self.endpoint = endpoint.rstrip('/')
|
||||||
|
# Résolution du modèle : paramètre explicite > config centralisée
|
||||||
|
if model is not None:
|
||||||
self.model = model
|
self.model = model
|
||||||
|
else:
|
||||||
|
from core.detection.vlm_config import get_vlm_model
|
||||||
|
self.model = get_vlm_model(endpoint=self.endpoint)
|
||||||
self.timeout = timeout
|
self.timeout = timeout
|
||||||
self._check_connection()
|
self._check_connection()
|
||||||
|
|
||||||
@@ -126,7 +131,12 @@ class OllamaClient:
|
|||||||
messages.append(user_message)
|
messages.append(user_message)
|
||||||
|
|
||||||
# Déterminer si le modèle est un modèle thinking (qwen3)
|
# Déterminer si le modèle est un modèle thinking (qwen3)
|
||||||
is_thinking_model = "qwen3" in self.model.lower()
|
# Les modèles non-thinking (gemma4, qwen2.5vl) n'ont pas besoin
|
||||||
|
# du workaround prefill et supportent le rôle system natif.
|
||||||
|
from core.detection.vlm_config import is_thinking_model as _is_thinking
|
||||||
|
from core.detection.vlm_config import needs_think_false as _needs_think_false
|
||||||
|
is_thinking_model = _is_thinking(self.model)
|
||||||
|
requires_think_false = _needs_think_false(self.model)
|
||||||
|
|
||||||
# WORKAROUND Ollama 0.18.x : think=false est ignoré par le
|
# WORKAROUND Ollama 0.18.x : think=false est ignoré par le
|
||||||
# renderer qwen3-vl-thinking. On utilise un assistant prefill
|
# renderer qwen3-vl-thinking. On utilise un assistant prefill
|
||||||
@@ -168,9 +178,9 @@ class OllamaClient:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Garder think=false au cas où une future version d'Ollama le
|
# think=false : requis pour qwen3 (prefill reste le mécanisme
|
||||||
# corrige — le prefill reste le mécanisme principal
|
# principal) ET pour gemma4 (sinon tokens vides sur Ollama >=0.20)
|
||||||
if is_thinking_model:
|
if is_thinking_model or requires_think_false:
|
||||||
payload["think"] = False
|
payload["think"] = False
|
||||||
|
|
||||||
if force_json:
|
if force_json:
|
||||||
@@ -575,7 +585,7 @@ Your answer:"""
|
|||||||
# Fonctions utilitaires
|
# Fonctions utilitaires
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
def create_ollama_client(model: str = "qwen3-vl:8b",
|
def create_ollama_client(model: str = None,
|
||||||
endpoint: str = "http://localhost:11434") -> OllamaClient:
|
endpoint: str = "http://localhost:11434") -> OllamaClient:
|
||||||
"""
|
"""
|
||||||
Créer un client Ollama
|
Créer un client Ollama
|
||||||
|
|||||||
@@ -72,9 +72,9 @@ class BoundingBox:
|
|||||||
class DetectionConfig:
|
class DetectionConfig:
|
||||||
"""Configuration de la détection UI hybride"""
|
"""Configuration de la détection UI hybride"""
|
||||||
# VLM — modèle configurable via variable d'environnement RPA_VLM_MODEL
|
# VLM — modèle configurable via variable d'environnement RPA_VLM_MODEL
|
||||||
# Production (local) : "qwen3-vl:8b" — GPU local, pas de réseau
|
# Par défaut : gemma4:e4b (meilleur grounding + contextualisation)
|
||||||
# Tests (cloud) : "qwen3-vl:235b-cloud" — pas de GPU, plus lent mais libère la VRAM
|
# Fallback : qwen3-vl:8b si gemma4 non disponible
|
||||||
vlm_model: str = os.environ.get("RPA_VLM_MODEL", "qwen3-vl:8b")
|
vlm_model: str = os.environ.get("RPA_VLM_MODEL", os.environ.get("VLM_MODEL", "gemma4:e4b"))
|
||||||
vlm_endpoint: str = "http://localhost:11434"
|
vlm_endpoint: str = "http://localhost:11434"
|
||||||
use_vlm_classification: bool = True # Utiliser VLM pour classifier
|
use_vlm_classification: bool = True # Utiliser VLM pour classifier
|
||||||
|
|
||||||
@@ -865,7 +865,7 @@ JSON array: [{{"id":0,"type":"...","role":"...","text":"..."}}]"""
|
|||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
def create_detector(
|
def create_detector(
|
||||||
vlm_model: str = "qwen3-vl:8b",
|
vlm_model: str = None,
|
||||||
confidence_threshold: float = 0.7,
|
confidence_threshold: float = 0.7,
|
||||||
use_vlm: bool = True
|
use_vlm: bool = True
|
||||||
) -> UIDetector:
|
) -> UIDetector:
|
||||||
@@ -873,13 +873,16 @@ def create_detector(
|
|||||||
Créer un détecteur avec configuration personnalisée
|
Créer un détecteur avec configuration personnalisée
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
vlm_model: Modèle VLM à utiliser
|
vlm_model: Modèle VLM à utiliser (None = résolution automatique via vlm_config)
|
||||||
confidence_threshold: Seuil de confiance
|
confidence_threshold: Seuil de confiance
|
||||||
use_vlm: Utiliser le VLM pour la classification
|
use_vlm: Utiliser le VLM pour la classification
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
UIDetector configuré
|
UIDetector configuré
|
||||||
"""
|
"""
|
||||||
|
if vlm_model is None:
|
||||||
|
from core.detection.vlm_config import get_vlm_model
|
||||||
|
vlm_model = get_vlm_model()
|
||||||
config = DetectionConfig(
|
config = DetectionConfig(
|
||||||
vlm_model=vlm_model,
|
vlm_model=vlm_model,
|
||||||
confidence_threshold=confidence_threshold,
|
confidence_threshold=confidence_threshold,
|
||||||
|
|||||||
@@ -1,622 +0,0 @@
|
|||||||
"""
|
|
||||||
UIDetector - Détection Sémantique d'Éléments UI avec VLM
|
|
||||||
|
|
||||||
Utilise un Vision-Language Model (VLM) pour détecter et classifier
|
|
||||||
les éléments UI avec leurs types et rôles sémantiques.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from typing import List, Dict, Optional, Any, Tuple
|
|
||||||
from pathlib import Path
|
|
||||||
from dataclasses import dataclass
|
|
||||||
import numpy as np
|
|
||||||
from PIL import Image
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
|
|
||||||
from ..models.ui_element import UIElement, UIElementEmbeddings, VisualFeatures
|
|
||||||
from .ollama_client import OllamaClient, check_ollama_available
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class DetectionConfig:
|
|
||||||
"""Configuration de la détection UI"""
|
|
||||||
vlm_model: str = "qwen3-vl:8b" # Modèle VLM à utiliser (qwen3-vl:8b recommandé)
|
|
||||||
vlm_endpoint: str = "http://localhost:11434" # Endpoint Ollama
|
|
||||||
confidence_threshold: float = 0.7 # Seuil de confiance minimum
|
|
||||||
max_elements: int = 50 # Nombre max d'éléments à détecter
|
|
||||||
detect_regions: bool = True # Détecter régions d'intérêt d'abord
|
|
||||||
use_embeddings: bool = True # Générer embeddings duaux
|
|
||||||
|
|
||||||
|
|
||||||
class UIDetector:
|
|
||||||
"""
|
|
||||||
Détecteur d'éléments UI sémantique
|
|
||||||
|
|
||||||
Utilise un VLM (Vision-Language Model) pour :
|
|
||||||
1. Détecter les régions d'intérêt dans un screenshot
|
|
||||||
2. Classifier le type de chaque élément UI
|
|
||||||
3. Déterminer le rôle sémantique
|
|
||||||
4. Extraire les features visuelles
|
|
||||||
5. Générer des embeddings duaux (image + texte)
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, config: Optional[DetectionConfig] = None):
|
|
||||||
"""
|
|
||||||
Initialiser le détecteur
|
|
||||||
|
|
||||||
Args:
|
|
||||||
config: Configuration (utilise config par défaut si None)
|
|
||||||
"""
|
|
||||||
self.config = config or DetectionConfig()
|
|
||||||
self.vlm_client = None
|
|
||||||
self._initialize_vlm()
|
|
||||||
|
|
||||||
def _initialize_vlm(self) -> None:
|
|
||||||
"""Initialiser le client VLM (Ollama)"""
|
|
||||||
try:
|
|
||||||
# Vérifier si Ollama est disponible
|
|
||||||
if check_ollama_available(self.config.vlm_endpoint):
|
|
||||||
self.vlm_client = OllamaClient(
|
|
||||||
endpoint=self.config.vlm_endpoint,
|
|
||||||
model=self.config.vlm_model
|
|
||||||
)
|
|
||||||
print(f"✓ VLM initialized: {self.config.vlm_model} at {self.config.vlm_endpoint}")
|
|
||||||
else:
|
|
||||||
print(f"⚠ Ollama not available at {self.config.vlm_endpoint}, using simulation mode")
|
|
||||||
self.vlm_client = None
|
|
||||||
except Exception as e:
|
|
||||||
print(f"⚠ Failed to initialize VLM: {e}, using simulation mode")
|
|
||||||
self.vlm_client = None
|
|
||||||
|
|
||||||
def detect(self,
|
|
||||||
screenshot_path: str,
|
|
||||||
window_context: Optional[Dict[str, Any]] = None) -> List[UIElement]:
|
|
||||||
"""
|
|
||||||
Détecter tous les éléments UI dans un screenshot
|
|
||||||
|
|
||||||
Args:
|
|
||||||
screenshot_path: Chemin vers le screenshot
|
|
||||||
window_context: Contexte de la fenêtre (titre, process, etc.)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Liste d'UIElements détectés
|
|
||||||
"""
|
|
||||||
# Charger image
|
|
||||||
image = self._load_image(screenshot_path)
|
|
||||||
if image is None:
|
|
||||||
return []
|
|
||||||
|
|
||||||
# Détecter régions d'intérêt si activé
|
|
||||||
if self.config.detect_regions:
|
|
||||||
regions = self._detect_regions_of_interest(image, window_context)
|
|
||||||
else:
|
|
||||||
# Utiliser image complète
|
|
||||||
regions = [{"bbox": (0, 0, image.width, image.height), "confidence": 1.0}]
|
|
||||||
|
|
||||||
# Détecter éléments UI dans chaque région
|
|
||||||
ui_elements = []
|
|
||||||
for region in regions:
|
|
||||||
elements = self._detect_elements_in_region(
|
|
||||||
image,
|
|
||||||
region,
|
|
||||||
screenshot_path,
|
|
||||||
window_context
|
|
||||||
)
|
|
||||||
ui_elements.extend(elements)
|
|
||||||
|
|
||||||
# Filtrer par confiance
|
|
||||||
ui_elements = [
|
|
||||||
el for el in ui_elements
|
|
||||||
if el.confidence >= self.config.confidence_threshold
|
|
||||||
]
|
|
||||||
|
|
||||||
# Limiter nombre d'éléments
|
|
||||||
if len(ui_elements) > self.config.max_elements:
|
|
||||||
# Trier par confiance et garder les meilleurs
|
|
||||||
ui_elements.sort(key=lambda x: x.confidence, reverse=True)
|
|
||||||
ui_elements = ui_elements[:self.config.max_elements]
|
|
||||||
|
|
||||||
return ui_elements
|
|
||||||
|
|
||||||
def _load_image(self, screenshot_path: str) -> Optional[Image.Image]:
|
|
||||||
"""Charger une image depuis un fichier"""
|
|
||||||
try:
|
|
||||||
return Image.open(screenshot_path)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error loading image {screenshot_path}: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _detect_regions_of_interest(self,
|
|
||||||
image: Image.Image,
|
|
||||||
window_context: Optional[Dict] = None) -> List[Dict]:
|
|
||||||
"""
|
|
||||||
Détecter les régions d'intérêt dans l'image
|
|
||||||
|
|
||||||
Utilise le VLM pour identifier les zones contenant des éléments UI.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
image: Image PIL
|
|
||||||
window_context: Contexte de la fenêtre
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Liste de régions {bbox: (x, y, w, h), confidence: float}
|
|
||||||
"""
|
|
||||||
if self.vlm_client is None:
|
|
||||||
# Mode simulation : diviser l'image en grille
|
|
||||||
return self._simulate_region_detection(image)
|
|
||||||
|
|
||||||
# Utiliser VLM pour détecter régions
|
|
||||||
# Pour l'instant, on utilise l'image complète (plus simple et efficace)
|
|
||||||
width, height = image.size
|
|
||||||
return [{
|
|
||||||
"bbox": (0, 0, width, height),
|
|
||||||
"confidence": 1.0
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _simulate_region_detection(self, image: Image.Image) -> List[Dict]:
|
|
||||||
"""Simulation de détection de régions (pour développement)"""
|
|
||||||
width, height = image.size
|
|
||||||
|
|
||||||
# Diviser en grille 3x3 pour simulation
|
|
||||||
regions = []
|
|
||||||
grid_size = 3
|
|
||||||
cell_w = width // grid_size
|
|
||||||
cell_h = height // grid_size
|
|
||||||
|
|
||||||
for i in range(grid_size):
|
|
||||||
for j in range(grid_size):
|
|
||||||
regions.append({
|
|
||||||
"bbox": (j * cell_w, i * cell_h, cell_w, cell_h),
|
|
||||||
"confidence": 0.8
|
|
||||||
})
|
|
||||||
|
|
||||||
return regions
|
|
||||||
|
|
||||||
def _detect_elements_in_region(self,
|
|
||||||
image: Image.Image,
|
|
||||||
region: Dict,
|
|
||||||
screenshot_path: str,
|
|
||||||
window_context: Optional[Dict] = None) -> List[UIElement]:
|
|
||||||
"""
|
|
||||||
Détecter éléments UI dans une région spécifique
|
|
||||||
|
|
||||||
Args:
|
|
||||||
image: Image complète
|
|
||||||
region: Région à analyser
|
|
||||||
screenshot_path: Chemin du screenshot
|
|
||||||
window_context: Contexte de la fenêtre
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Liste d'UIElements dans cette région
|
|
||||||
"""
|
|
||||||
bbox = region["bbox"]
|
|
||||||
x, y, w, h = bbox
|
|
||||||
|
|
||||||
# Extraire crop de la région
|
|
||||||
region_image = image.crop((x, y, x + w, y + h))
|
|
||||||
|
|
||||||
# Détecter éléments avec VLM
|
|
||||||
if self.vlm_client is None:
|
|
||||||
# Mode simulation
|
|
||||||
return self._simulate_element_detection(
|
|
||||||
region_image, bbox, screenshot_path, window_context
|
|
||||||
)
|
|
||||||
|
|
||||||
# Vraie détection avec VLM !
|
|
||||||
return self._detect_with_vlm(
|
|
||||||
region_image, bbox, screenshot_path, window_context
|
|
||||||
)
|
|
||||||
|
|
||||||
def _detect_with_vlm(self,
|
|
||||||
region_image: Image.Image,
|
|
||||||
region_bbox: Tuple[int, int, int, int],
|
|
||||||
screenshot_path: str,
|
|
||||||
window_context: Optional[Dict] = None) -> List[UIElement]:
|
|
||||||
"""
|
|
||||||
Détecter éléments UI avec le VLM (vraie détection)
|
|
||||||
|
|
||||||
Args:
|
|
||||||
region_image: Image de la région
|
|
||||||
region_bbox: Bbox de la région (x, y, w, h)
|
|
||||||
screenshot_path: Chemin du screenshot
|
|
||||||
window_context: Contexte de la fenêtre
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Liste d'UIElements détectés
|
|
||||||
"""
|
|
||||||
x_offset, y_offset, w, h = region_bbox
|
|
||||||
|
|
||||||
# Construire le prompt pour le VLM
|
|
||||||
context_str = ""
|
|
||||||
if window_context:
|
|
||||||
context_str = f"\nWindow context: {window_context.get('title', 'Unknown')}"
|
|
||||||
|
|
||||||
# Approche simplifiée : demander une description structurée
|
|
||||||
prompt = f"""List all interactive UI elements in this screenshot.{context_str}
|
|
||||||
|
|
||||||
For each element, provide:
|
|
||||||
- type (button, text_input, checkbox, link, etc.)
|
|
||||||
- label (visible text)
|
|
||||||
- approximate position (top/middle/bottom, left/center/right)
|
|
||||||
|
|
||||||
Format as JSON array:
|
|
||||||
[{{"type": "button", "label": "Submit", "position": "middle-center"}}]
|
|
||||||
|
|
||||||
Return ONLY the JSON array, no other text."""
|
|
||||||
|
|
||||||
# Appeler le VLM
|
|
||||||
# Note: Utiliser le chemin du screenshot complet plutôt que le crop
|
|
||||||
# car certains VLM gèrent mieux les fichiers que les images PIL
|
|
||||||
result = self.vlm_client.generate(
|
|
||||||
prompt=prompt,
|
|
||||||
image_path=screenshot_path, # Utiliser le chemin au lieu de l'image PIL
|
|
||||||
temperature=0.1,
|
|
||||||
max_tokens=1000
|
|
||||||
)
|
|
||||||
|
|
||||||
if not result["success"]:
|
|
||||||
print(f"❌ VLM detection failed: {result.get('error', 'Unknown error')}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
if not result["response"] or len(result["response"].strip()) == 0:
|
|
||||||
print(f"⚠ VLM returned empty response")
|
|
||||||
return []
|
|
||||||
|
|
||||||
# Parser la réponse JSON
|
|
||||||
elements = self._parse_vlm_response(
|
|
||||||
result["response"],
|
|
||||||
region_bbox,
|
|
||||||
screenshot_path,
|
|
||||||
window_context
|
|
||||||
)
|
|
||||||
|
|
||||||
return elements
|
|
||||||
|
|
||||||
def _parse_vlm_response(self,
|
|
||||||
response: str,
|
|
||||||
region_bbox: Tuple[int, int, int, int],
|
|
||||||
screenshot_path: str,
|
|
||||||
window_context: Optional[Dict] = None) -> List[UIElement]:
|
|
||||||
"""
|
|
||||||
Parser la réponse JSON du VLM
|
|
||||||
|
|
||||||
Args:
|
|
||||||
response: Réponse texte du VLM
|
|
||||||
region_bbox: Bbox de la région
|
|
||||||
screenshot_path: Chemin du screenshot
|
|
||||||
window_context: Contexte de la fenêtre
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Liste d'UIElements
|
|
||||||
"""
|
|
||||||
x_offset, y_offset, region_w, region_h = region_bbox
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Extraire le JSON de la réponse (peut contenir du texte avant/après)
|
|
||||||
json_match = re.search(r'\[.*\]', response, re.DOTALL)
|
|
||||||
if not json_match:
|
|
||||||
print(f"No JSON array found in VLM response")
|
|
||||||
print(f"VLM response was: {response[:500]}...")
|
|
||||||
return []
|
|
||||||
|
|
||||||
elements_data = json.loads(json_match.group(0))
|
|
||||||
|
|
||||||
if not isinstance(elements_data, list):
|
|
||||||
print(f"VLM response is not a JSON array")
|
|
||||||
return []
|
|
||||||
|
|
||||||
elements = []
|
|
||||||
for i, elem_data in enumerate(elements_data):
|
|
||||||
try:
|
|
||||||
# Gérer les positions (pourcentages ou textuelles)
|
|
||||||
if 'x' in elem_data and 'y' in elem_data:
|
|
||||||
# Format avec pourcentages
|
|
||||||
x_pct = float(elem_data.get('x', 0))
|
|
||||||
y_pct = float(elem_data.get('y', 0))
|
|
||||||
w_pct = float(elem_data.get('width', 10))
|
|
||||||
h_pct = float(elem_data.get('height', 5))
|
|
||||||
|
|
||||||
elem_x = x_offset + int(region_w * x_pct / 100)
|
|
||||||
elem_y = y_offset + int(region_h * y_pct / 100)
|
|
||||||
elem_w = int(region_w * w_pct / 100)
|
|
||||||
elem_h = int(region_h * h_pct / 100)
|
|
||||||
else:
|
|
||||||
# Format avec position textuelle (top/middle/bottom, left/center/right)
|
|
||||||
position = elem_data.get('position', 'middle-center').lower()
|
|
||||||
|
|
||||||
# Parser la position
|
|
||||||
if 'top' in position:
|
|
||||||
elem_y = y_offset + region_h // 4
|
|
||||||
elif 'bottom' in position:
|
|
||||||
elem_y = y_offset + 3 * region_h // 4
|
|
||||||
else: # middle
|
|
||||||
elem_y = y_offset + region_h // 2
|
|
||||||
|
|
||||||
if 'left' in position:
|
|
||||||
elem_x = x_offset + region_w // 4
|
|
||||||
elif 'right' in position:
|
|
||||||
elem_x = x_offset + 3 * region_w // 4
|
|
||||||
else: # center
|
|
||||||
elem_x = x_offset + region_w // 2
|
|
||||||
|
|
||||||
# Taille par défaut basée sur le type
|
|
||||||
elem_type = elem_data.get('type', 'button')
|
|
||||||
if elem_type == 'button':
|
|
||||||
elem_w, elem_h = 100, 40
|
|
||||||
elif elem_type == 'text_input':
|
|
||||||
elem_w, elem_h = 200, 35
|
|
||||||
elif elem_type == 'checkbox':
|
|
||||||
elem_w, elem_h = 25, 25
|
|
||||||
else:
|
|
||||||
elem_w, elem_h = 80, 30
|
|
||||||
|
|
||||||
# Créer l'UIElement
|
|
||||||
element = UIElement(
|
|
||||||
element_id=f"vlm_{elem_x}_{elem_y}",
|
|
||||||
type=elem_data.get('type', 'unknown'),
|
|
||||||
role=elem_data.get('role', 'unknown'),
|
|
||||||
bbox=(elem_x, elem_y, elem_w, elem_h),
|
|
||||||
center=(elem_x + elem_w // 2, elem_y + elem_h // 2),
|
|
||||||
label=elem_data.get('label', ''),
|
|
||||||
label_confidence=0.85, # Confiance par défaut pour VLM
|
|
||||||
embeddings=UIElementEmbeddings(),
|
|
||||||
visual_features=VisualFeatures(
|
|
||||||
dominant_color="rgb(128, 128, 128)",
|
|
||||||
has_icon=elem_data.get('type') == 'icon',
|
|
||||||
shape="rectangle",
|
|
||||||
size_category="medium"
|
|
||||||
),
|
|
||||||
confidence=0.85, # Confiance par défaut pour VLM
|
|
||||||
metadata={
|
|
||||||
"detected_by": "vlm",
|
|
||||||
"model": self.config.vlm_model,
|
|
||||||
"screenshot_path": screenshot_path
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
elements.append(element)
|
|
||||||
|
|
||||||
except (KeyError, ValueError, TypeError) as e:
|
|
||||||
print(f"Error parsing element {i}: {e}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
return elements
|
|
||||||
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
print(f"Failed to parse VLM JSON response: {e}")
|
|
||||||
print(f"Response was: {response[:200]}...")
|
|
||||||
return []
|
|
||||||
|
|
||||||
def _simulate_element_detection(self,
|
|
||||||
region_image: Image.Image,
|
|
||||||
region_bbox: Tuple[int, int, int, int],
|
|
||||||
screenshot_path: str,
|
|
||||||
window_context: Optional[Dict] = None) -> List[UIElement]:
|
|
||||||
"""Simulation de détection d'éléments (pour développement)"""
|
|
||||||
# Pour simulation, créer quelques éléments fictifs
|
|
||||||
elements = []
|
|
||||||
|
|
||||||
x_offset, y_offset, w, h = region_bbox
|
|
||||||
|
|
||||||
# Simuler 2-3 éléments par région
|
|
||||||
num_elements = np.random.randint(2, 4)
|
|
||||||
|
|
||||||
for i in range(num_elements):
|
|
||||||
# Position aléatoire dans la région
|
|
||||||
elem_w = np.random.randint(50, 150)
|
|
||||||
elem_h = np.random.randint(20, 60)
|
|
||||||
elem_x = x_offset + np.random.randint(0, max(1, w - elem_w))
|
|
||||||
elem_y = y_offset + np.random.randint(0, max(1, h - elem_h))
|
|
||||||
|
|
||||||
# Type et rôle aléatoires
|
|
||||||
types = ["button", "text_input", "checkbox", "link", "icon"]
|
|
||||||
roles = ["primary_action", "cancel", "submit", "form_input", "navigation"]
|
|
||||||
|
|
||||||
element = UIElement(
|
|
||||||
element_id=f"elem_{elem_x}_{elem_y}",
|
|
||||||
type=np.random.choice(types),
|
|
||||||
role=np.random.choice(roles),
|
|
||||||
bbox=(elem_x, elem_y, elem_w, elem_h),
|
|
||||||
center=(elem_x + elem_w // 2, elem_y + elem_h // 2),
|
|
||||||
label=f"Element {i}",
|
|
||||||
label_confidence=np.random.uniform(0.7, 0.95),
|
|
||||||
embeddings=UIElementEmbeddings(), # Embeddings vides
|
|
||||||
visual_features=VisualFeatures(
|
|
||||||
dominant_color="rgb(128, 128, 128)",
|
|
||||||
has_icon=np.random.choice([True, False]),
|
|
||||||
shape="rectangle",
|
|
||||||
size_category="medium"
|
|
||||||
),
|
|
||||||
confidence=np.random.uniform(0.7, 0.95),
|
|
||||||
metadata={"simulated": True, "screenshot_path": screenshot_path}
|
|
||||||
)
|
|
||||||
|
|
||||||
elements.append(element)
|
|
||||||
|
|
||||||
return elements
|
|
||||||
|
|
||||||
def classify_type(self,
|
|
||||||
element_image: Image.Image,
|
|
||||||
context: Optional[Dict] = None) -> Tuple[str, float]:
|
|
||||||
"""
|
|
||||||
Classifier le type d'un élément UI
|
|
||||||
|
|
||||||
Args:
|
|
||||||
element_image: Image de l'élément
|
|
||||||
context: Contexte additionnel
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(type, confidence)
|
|
||||||
"""
|
|
||||||
if self.vlm_client is None:
|
|
||||||
# Simulation
|
|
||||||
types = ["button", "text_input", "checkbox", "radio", "dropdown",
|
|
||||||
"tab", "link", "icon", "table_row", "menu_item"]
|
|
||||||
return np.random.choice(types), np.random.uniform(0.7, 0.95)
|
|
||||||
|
|
||||||
# Vraie classification avec VLM
|
|
||||||
result = self.vlm_client.classify_element_type(element_image, context)
|
|
||||||
|
|
||||||
if result["success"]:
|
|
||||||
return result["type"], result["confidence"]
|
|
||||||
|
|
||||||
return "unknown", 0.0
|
|
||||||
|
|
||||||
def classify_role(self,
|
|
||||||
element_image: Image.Image,
|
|
||||||
element_type: str,
|
|
||||||
context: Optional[Dict] = None) -> Tuple[str, float]:
|
|
||||||
"""
|
|
||||||
Classifier le rôle sémantique d'un élément
|
|
||||||
|
|
||||||
Args:
|
|
||||||
element_image: Image de l'élément
|
|
||||||
element_type: Type de l'élément
|
|
||||||
context: Contexte additionnel
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(role, confidence)
|
|
||||||
"""
|
|
||||||
if self.vlm_client is None:
|
|
||||||
# Simulation
|
|
||||||
roles = ["primary_action", "cancel", "submit", "form_input",
|
|
||||||
"search_field", "navigation", "settings", "close"]
|
|
||||||
return np.random.choice(roles), np.random.uniform(0.7, 0.95)
|
|
||||||
|
|
||||||
# Vraie classification avec VLM
|
|
||||||
result = self.vlm_client.classify_element_role(
|
|
||||||
element_image,
|
|
||||||
element_type,
|
|
||||||
context
|
|
||||||
)
|
|
||||||
|
|
||||||
if result["success"]:
|
|
||||||
return result["role"], result["confidence"]
|
|
||||||
|
|
||||||
return "unknown", 0.0
|
|
||||||
|
|
||||||
def extract_visual_features(self,
|
|
||||||
element_image: Image.Image) -> VisualFeatures:
|
|
||||||
"""
|
|
||||||
Extraire les features visuelles d'un élément
|
|
||||||
|
|
||||||
Args:
|
|
||||||
element_image: Image de l'élément
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
VisualFeatures
|
|
||||||
"""
|
|
||||||
# Calculer couleur dominante
|
|
||||||
img_array = np.array(element_image)
|
|
||||||
if len(img_array.shape) == 3:
|
|
||||||
# Moyenne des couleurs
|
|
||||||
dominant_color = tuple(img_array.mean(axis=(0, 1)).astype(int).tolist())
|
|
||||||
else:
|
|
||||||
dominant_color = (128, 128, 128)
|
|
||||||
|
|
||||||
# Déterminer forme (simplifié)
|
|
||||||
width, height = element_image.size
|
|
||||||
aspect_ratio = width / height if height > 0 else 1.0
|
|
||||||
|
|
||||||
if aspect_ratio > 3:
|
|
||||||
shape = "horizontal_bar"
|
|
||||||
elif aspect_ratio < 0.33:
|
|
||||||
shape = "vertical_bar"
|
|
||||||
elif 0.8 <= aspect_ratio <= 1.2:
|
|
||||||
shape = "square"
|
|
||||||
else:
|
|
||||||
shape = "rectangle"
|
|
||||||
|
|
||||||
# Catégorie de taille
|
|
||||||
area = width * height
|
|
||||||
if area < 1000:
|
|
||||||
size_category = "small"
|
|
||||||
elif area < 10000:
|
|
||||||
size_category = "medium"
|
|
||||||
else:
|
|
||||||
size_category = "large"
|
|
||||||
|
|
||||||
# Détection d'icône (simplifié)
|
|
||||||
has_icon = width < 100 and height < 100 and 0.8 <= aspect_ratio <= 1.2
|
|
||||||
|
|
||||||
return VisualFeatures(
|
|
||||||
dominant_color=dominant_color,
|
|
||||||
has_icon=has_icon,
|
|
||||||
shape=shape,
|
|
||||||
size_category=size_category
|
|
||||||
)
|
|
||||||
|
|
||||||
def generate_embeddings(self,
|
|
||||||
element_image: Image.Image,
|
|
||||||
element_label: str,
|
|
||||||
embedder: Optional[Any] = None) -> Optional[UIElementEmbeddings]:
|
|
||||||
"""
|
|
||||||
Générer embeddings duaux (image + texte) pour un élément
|
|
||||||
|
|
||||||
Args:
|
|
||||||
element_image: Image de l'élément
|
|
||||||
element_label: Label textuel de l'élément
|
|
||||||
embedder: Embedder à utiliser (optionnel)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
UIElementEmbeddings ou None
|
|
||||||
"""
|
|
||||||
if not self.config.use_embeddings or embedder is None:
|
|
||||||
return None
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Générer embedding image
|
|
||||||
image_embedding_id = None
|
|
||||||
if hasattr(embedder, 'embed_image'):
|
|
||||||
# Sauvegarder temporairement l'image
|
|
||||||
# TODO: Implémenter sauvegarde et embedding
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Générer embedding texte
|
|
||||||
text_embedding_id = None
|
|
||||||
if element_label and hasattr(embedder, 'embed_text'):
|
|
||||||
# TODO: Implémenter embedding texte
|
|
||||||
pass
|
|
||||||
|
|
||||||
if image_embedding_id or text_embedding_id:
|
|
||||||
return UIElementEmbeddings(
|
|
||||||
image_embedding_id=image_embedding_id,
|
|
||||||
text_embedding_id=text_embedding_id,
|
|
||||||
provider="openclip_ViT-B-32",
|
|
||||||
dimensions=512
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Warning: Failed to generate embeddings: {e}")
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
def set_vlm_client(self, client: Any) -> None:
|
|
||||||
"""Définir le client VLM"""
|
|
||||||
self.vlm_client = client
|
|
||||||
|
|
||||||
def get_config(self) -> DetectionConfig:
|
|
||||||
"""Récupérer la configuration"""
|
|
||||||
return self.config
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# Fonctions utilitaires
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
def create_detector(vlm_model: str = "qwen3-vl:8b",
|
|
||||||
confidence_threshold: float = 0.7) -> UIDetector:
|
|
||||||
"""
|
|
||||||
Créer un UIDetector avec configuration personnalisée
|
|
||||||
|
|
||||||
Args:
|
|
||||||
vlm_model: Modèle VLM à utiliser
|
|
||||||
confidence_threshold: Seuil de confiance
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
UIDetector configuré
|
|
||||||
"""
|
|
||||||
config = DetectionConfig(
|
|
||||||
vlm_model=vlm_model,
|
|
||||||
confidence_threshold=confidence_threshold
|
|
||||||
)
|
|
||||||
return UIDetector(config)
|
|
||||||
194
core/detection/vlm_config.py
Normal file
194
core/detection/vlm_config.py
Normal file
@@ -0,0 +1,194 @@
|
|||||||
|
"""
|
||||||
|
Configuration centralisée du modèle VLM (Vision-Language Model).
|
||||||
|
|
||||||
|
Point unique de configuration pour le modèle VLM utilisé dans tout le pipeline.
|
||||||
|
Gère la variable d'environnement RPA_VLM_MODEL avec fallback automatique
|
||||||
|
si le modèle configuré n'est pas disponible dans Ollama.
|
||||||
|
|
||||||
|
Ordre de résolution du modèle :
|
||||||
|
1. Variable d'env RPA_VLM_MODEL (prioritaire)
|
||||||
|
2. Variable d'env VLM_MODEL (compatibilité)
|
||||||
|
3. Modèle par défaut : gemma4:latest
|
||||||
|
|
||||||
|
Fallback automatique :
|
||||||
|
Si le modèle choisi n'est pas trouvé dans Ollama, on essaie les
|
||||||
|
modèles de fallback dans l'ordre (FALLBACK_VLM_MODELS).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Modèle VLM par défaut — Gemma 4 latest (8B dense, Q4_K_M)
|
||||||
|
# Nécessite think=false dans le payload (sinon tokens vides sur Ollama >=0.20)
|
||||||
|
DEFAULT_VLM_MODEL = "gemma4:latest"
|
||||||
|
|
||||||
|
# Modèles de fallback, testés dans l'ordre si le modèle principal n'est pas dispo
|
||||||
|
FALLBACK_VLM_MODELS = ["qwen3-vl:8b", "0000/ui-tars-1.5-7b-q8_0:7b"]
|
||||||
|
|
||||||
|
# Endpoint Ollama par défaut
|
||||||
|
DEFAULT_OLLAMA_ENDPOINT = "http://localhost:11434"
|
||||||
|
|
||||||
|
# Cache du modèle résolu (évite de requêter Ollama à chaque appel)
|
||||||
|
_resolved_model: Optional[str] = None
|
||||||
|
_resolved_model_checked = False
|
||||||
|
|
||||||
|
|
||||||
|
def get_vlm_model(
|
||||||
|
endpoint: str = DEFAULT_OLLAMA_ENDPOINT,
|
||||||
|
force_check: bool = False,
|
||||||
|
) -> str:
|
||||||
|
"""Retourne le nom du modèle VLM à utiliser, avec fallback automatique.
|
||||||
|
|
||||||
|
Vérifie la disponibilité du modèle dans Ollama au premier appel,
|
||||||
|
puis cache le résultat pour les appels suivants.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
endpoint: URL de l'API Ollama
|
||||||
|
force_check: Forcer une nouvelle vérification (ignorer le cache)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Nom du modèle VLM disponible (ex: "gemma4:latest")
|
||||||
|
"""
|
||||||
|
global _resolved_model, _resolved_model_checked
|
||||||
|
|
||||||
|
if _resolved_model_checked and not force_check:
|
||||||
|
return _resolved_model
|
||||||
|
|
||||||
|
# Lire le modèle configuré depuis l'environnement
|
||||||
|
configured = (
|
||||||
|
os.environ.get("RPA_VLM_MODEL")
|
||||||
|
or os.environ.get("VLM_MODEL")
|
||||||
|
or DEFAULT_VLM_MODEL
|
||||||
|
)
|
||||||
|
|
||||||
|
# Vérifier la disponibilité dans Ollama
|
||||||
|
available = _list_ollama_models(endpoint)
|
||||||
|
|
||||||
|
if available is None:
|
||||||
|
# Ollama non joignable — utiliser le modèle configuré sans vérification
|
||||||
|
logger.warning(
|
||||||
|
"Ollama non joignable (%s) — utilisation de '%s' sans vérification",
|
||||||
|
endpoint, configured,
|
||||||
|
)
|
||||||
|
_resolved_model = configured
|
||||||
|
_resolved_model_checked = True
|
||||||
|
return _resolved_model
|
||||||
|
|
||||||
|
# Vérifier si le modèle configuré est disponible
|
||||||
|
if _model_available(configured, available):
|
||||||
|
logger.info("VLM model: %s (configuré, disponible)", configured)
|
||||||
|
_resolved_model = configured
|
||||||
|
_resolved_model_checked = True
|
||||||
|
return _resolved_model
|
||||||
|
|
||||||
|
# Fallback : essayer les modèles alternatifs
|
||||||
|
logger.warning(
|
||||||
|
"Modèle VLM '%s' non trouvé dans Ollama. Recherche d'un fallback...",
|
||||||
|
configured,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Construire la liste de fallback complète
|
||||||
|
fallback_candidates = [DEFAULT_VLM_MODEL] + FALLBACK_VLM_MODELS
|
||||||
|
for candidate in fallback_candidates:
|
||||||
|
if candidate == configured:
|
||||||
|
continue # Déjà testé
|
||||||
|
if _model_available(candidate, available):
|
||||||
|
logger.info(
|
||||||
|
"VLM model: %s (fallback, '%s' non disponible)",
|
||||||
|
candidate, configured,
|
||||||
|
)
|
||||||
|
_resolved_model = candidate
|
||||||
|
_resolved_model_checked = True
|
||||||
|
return _resolved_model
|
||||||
|
|
||||||
|
# Aucun fallback trouvé — utiliser le modèle configuré quand même
|
||||||
|
# (Ollama le téléchargera peut-être au premier appel)
|
||||||
|
logger.warning(
|
||||||
|
"Aucun modèle VLM trouvé dans Ollama. "
|
||||||
|
"Modèles disponibles : %s. Utilisation de '%s' par défaut.",
|
||||||
|
[m for m in available if "vl" in m.lower() or "gemma" in m.lower()],
|
||||||
|
configured,
|
||||||
|
)
|
||||||
|
_resolved_model = configured
|
||||||
|
_resolved_model_checked = True
|
||||||
|
return _resolved_model
|
||||||
|
|
||||||
|
|
||||||
|
def reset_vlm_model_cache():
|
||||||
|
"""Réinitialiser le cache du modèle résolu.
|
||||||
|
|
||||||
|
Utile après un changement de configuration ou un pull de modèle.
|
||||||
|
"""
|
||||||
|
global _resolved_model, _resolved_model_checked
|
||||||
|
_resolved_model = None
|
||||||
|
_resolved_model_checked = False
|
||||||
|
|
||||||
|
|
||||||
|
def is_thinking_model(model_name: str) -> bool:
|
||||||
|
"""Détermine si un modèle est un modèle 'thinking' (qwen3).
|
||||||
|
|
||||||
|
Les modèles thinking nécessitent un assistant prefill pour éviter
|
||||||
|
le mode réflexion interne qui peut durer >180s avec des images.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model_name: Nom du modèle (ex: "qwen3-vl:8b", "gemma4:e4b")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True si le modèle est de type thinking (nécessite prefill workaround)
|
||||||
|
"""
|
||||||
|
return "qwen3" in model_name.lower()
|
||||||
|
|
||||||
|
|
||||||
|
def needs_think_false(model_name: str) -> bool:
|
||||||
|
"""Détermine si un modèle nécessite think=false dans le payload.
|
||||||
|
|
||||||
|
Sur Ollama >=0.20, gemma4 produit des tokens vides si think n'est pas
|
||||||
|
explicitement désactivé. Ce flag doit être envoyé dans le payload chat.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model_name: Nom du modèle (ex: "gemma4:latest", "gemma4:e4b")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True si le modèle nécessite think=false
|
||||||
|
"""
|
||||||
|
return "gemma4" in model_name.lower()
|
||||||
|
|
||||||
|
|
||||||
|
def _list_ollama_models(endpoint: str) -> Optional[List[str]]:
|
||||||
|
"""Lister les modèles disponibles dans Ollama.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Liste des noms de modèles, ou None si Ollama n'est pas joignable.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
resp = requests.get(f"{endpoint}/api/tags", timeout=5)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
models = resp.json().get("models", [])
|
||||||
|
return [m["name"] for m in models]
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _model_available(model_name: str, available_models: List[str]) -> bool:
|
||||||
|
"""Vérifie si un modèle est disponible dans la liste Ollama.
|
||||||
|
|
||||||
|
Supporte la correspondance exacte et le match sans tag de version
|
||||||
|
(ex: "gemma4:e4b" match "gemma4:e4b" ou "gemma4:e4b-q4_0").
|
||||||
|
"""
|
||||||
|
# Match exact
|
||||||
|
if model_name in available_models:
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Match par préfixe (sans tag) — "gemma4:e4b" match "gemma4:e4b"
|
||||||
|
base_name = model_name.split(":")[0] if ":" in model_name else model_name
|
||||||
|
for m in available_models:
|
||||||
|
if m.startswith(base_name + ":"):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
@@ -58,8 +58,18 @@ class CLIPEmbedder(EmbedderBase):
|
|||||||
"Install it with: pip install open-clip-torch"
|
"Install it with: pip install open-clip-torch"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Default to CPU to save GPU for vision models (Qwen3-VL, etc.)
|
|
||||||
if device is None:
|
if device is None:
|
||||||
|
try:
|
||||||
|
import torch
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
free_vram = torch.cuda.mem_get_info()[0] / 1024**3
|
||||||
|
if free_vram > 1.5:
|
||||||
|
device = "cuda"
|
||||||
|
else:
|
||||||
|
device = "cpu"
|
||||||
|
else:
|
||||||
|
device = "cpu"
|
||||||
|
except Exception:
|
||||||
device = "cpu"
|
device = "cpu"
|
||||||
|
|
||||||
self.model_name = model_name
|
self.model_name = model_name
|
||||||
|
|||||||
@@ -11,7 +11,12 @@ from pathlib import Path
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import json
|
import json
|
||||||
import pickle
|
|
||||||
|
from core.security.signed_serializer import (
|
||||||
|
SignatureVerificationError,
|
||||||
|
load_signed,
|
||||||
|
save_signed,
|
||||||
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -500,21 +505,23 @@ class FAISSManager:
|
|||||||
# Sauvegarder index FAISS
|
# Sauvegarder index FAISS
|
||||||
faiss.write_index(index_to_save, str(index_path))
|
faiss.write_index(index_to_save, str(index_path))
|
||||||
|
|
||||||
# Sauvegarder métadonnées
|
# Sauvegarder métadonnées (JSON signé HMAC — cf. core.security.signed_serializer)
|
||||||
metadata = {
|
metadata = {
|
||||||
"dimensions": self.dimensions,
|
"dimensions": self.dimensions,
|
||||||
"index_type": self.index_type,
|
"index_type": self.index_type,
|
||||||
"metric": self.metric,
|
"metric": self.metric,
|
||||||
"next_id": self.next_id,
|
"next_id": self.next_id,
|
||||||
"metadata_store": self.metadata_store,
|
# Les clés dict sont des int côté Python ; on les sérialise en str
|
||||||
|
# puis on les reconvertit au chargement. JSON n'autorise pas de
|
||||||
|
# clés non-string.
|
||||||
|
"metadata_store": {str(k): v for k, v in self.metadata_store.items()},
|
||||||
"nlist": self.nlist,
|
"nlist": self.nlist,
|
||||||
"nprobe": self.nprobe,
|
"nprobe": self.nprobe,
|
||||||
"is_trained": self.is_trained,
|
"is_trained": self.is_trained,
|
||||||
"auto_optimize": self.auto_optimize
|
"auto_optimize": self.auto_optimize,
|
||||||
}
|
}
|
||||||
|
|
||||||
with open(metadata_path, 'wb') as f:
|
save_signed(metadata_path, metadata)
|
||||||
pickle.dump(metadata, f)
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def load(cls, index_path: Path, metadata_path: Path, use_gpu: bool = False) -> 'FAISSManager':
|
def load(cls, index_path: Path, metadata_path: Path, use_gpu: bool = False) -> 'FAISSManager':
|
||||||
@@ -529,11 +536,22 @@ class FAISSManager:
|
|||||||
Returns:
|
Returns:
|
||||||
FAISSManager chargé
|
FAISSManager chargé
|
||||||
"""
|
"""
|
||||||
# Charger métadonnées
|
# Charger métadonnées (JSON signé ; fallback legacy pickle avec migration).
|
||||||
with open(metadata_path, 'rb') as f:
|
try:
|
||||||
metadata = pickle.load(f)
|
metadata = load_signed(metadata_path)
|
||||||
|
except SignatureVerificationError:
|
||||||
|
logger.error(
|
||||||
|
"Signature HMAC invalide pour %s — refus de chargement.",
|
||||||
|
metadata_path,
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
|
||||||
# Créer instance
|
# Reconvertir les clés int du metadata_store (JSON force des clés str).
|
||||||
|
if isinstance(metadata.get("metadata_store"), dict):
|
||||||
|
metadata["metadata_store"] = {
|
||||||
|
int(k) if isinstance(k, str) and k.lstrip("-").isdigit() else k: v
|
||||||
|
for k, v in metadata["metadata_store"].items()
|
||||||
|
}
|
||||||
manager = cls(
|
manager = cls(
|
||||||
dimensions=metadata["dimensions"],
|
dimensions=metadata["dimensions"],
|
||||||
index_type=metadata["index_type"],
|
index_type=metadata["index_type"],
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ from .error_handler import ErrorHandler, ErrorType, RecoveryStrategy
|
|||||||
from .workflow_runner import WorkflowRunner, RunResult, RunStatus, RunnerConfig
|
from .workflow_runner import WorkflowRunner, RunResult, RunStatus, RunnerConfig
|
||||||
from .dag_executor import DAGExecutor, WorkflowStep, StepType, StepStatus, DAGExecutionResult
|
from .dag_executor import DAGExecutor, WorkflowStep, StepType, StepStatus, DAGExecutionResult
|
||||||
from .llm_actions import LLMActionHandler
|
from .llm_actions import LLMActionHandler
|
||||||
|
from .observe_reason_act import ORALoop, Observation, Decision, VerificationResult, LoopResult
|
||||||
|
|
||||||
# Import tardif pour éviter import circulaire avec pipeline
|
# Import tardif pour éviter import circulaire avec pipeline
|
||||||
def _get_execution_loop():
|
def _get_execution_loop():
|
||||||
@@ -34,5 +35,11 @@ __all__ = [
|
|||||||
'StepStatus',
|
'StepStatus',
|
||||||
'DAGExecutionResult',
|
'DAGExecutionResult',
|
||||||
'LLMActionHandler',
|
'LLMActionHandler',
|
||||||
|
# ORA — boucle Observe-Raisonne-Agit avec vérification
|
||||||
|
'ORALoop',
|
||||||
|
'Observation',
|
||||||
|
'Decision',
|
||||||
|
'VerificationResult',
|
||||||
|
'LoopResult',
|
||||||
# ExecutionLoop accessible via import direct du module
|
# ExecutionLoop accessible via import direct du module
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -654,7 +654,8 @@ class ActionExecutor:
|
|||||||
if PYAUTOGUI_AVAILABLE:
|
if PYAUTOGUI_AVAILABLE:
|
||||||
pyautogui.click(click_x, click_y)
|
pyautogui.click(click_x, click_y)
|
||||||
time.sleep(0.2)
|
time.sleep(0.2)
|
||||||
pyautogui.write(text, interval=0.05)
|
from .input_handler import safe_type_text
|
||||||
|
safe_type_text(text)
|
||||||
else:
|
else:
|
||||||
logger.info(f" (Simulated click at {click_x:.0f}, {click_y:.0f})")
|
logger.info(f" (Simulated click at {click_x:.0f}, {click_y:.0f})")
|
||||||
logger.info(f" (Simulated typing: {text[:50]}...)")
|
logger.info(f" (Simulated typing: {text[:50]}...)")
|
||||||
|
|||||||
@@ -525,11 +525,25 @@ class DAGExecutor:
|
|||||||
True/False selon le résultat de la condition
|
True/False selon le résultat de la condition
|
||||||
"""
|
"""
|
||||||
condition = action.get("condition", "True")
|
condition = action.get("condition", "True")
|
||||||
# Contexte d'évaluation sécurisé : uniquement les résultats
|
# Contexte d'évaluation sécurisé : uniquement les résultats.
|
||||||
|
# NB : on utilise un évaluateur AST restreint (pas d'eval/exec),
|
||||||
|
# seuls literals, comparaisons, booléens et indexations sont permis.
|
||||||
eval_context = {"results": dict(self._results)}
|
eval_context = {"results": dict(self._results)}
|
||||||
|
|
||||||
|
# Import local pour éviter une dépendance circulaire au chargement.
|
||||||
|
from core.execution.safe_condition_evaluator import (
|
||||||
|
UnsafeExpressionError,
|
||||||
|
safe_eval_condition,
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = bool(eval(condition, {"__builtins__": {}}, eval_context))
|
result = bool(safe_eval_condition(condition, eval_context))
|
||||||
|
except UnsafeExpressionError as exc:
|
||||||
|
logger.error(
|
||||||
|
"Condition refusée pour '%s' (expression non sûre) : %s",
|
||||||
|
step.step_id, exc,
|
||||||
|
)
|
||||||
|
result = False
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Erreur d'évaluation de condition pour '%s' : %s",
|
"Erreur d'évaluation de condition pour '%s' : %s",
|
||||||
|
|||||||
@@ -151,6 +151,13 @@ class StepResult:
|
|||||||
duration_ms: float
|
duration_ms: float
|
||||||
message: str
|
message: str
|
||||||
screenshot_path: Optional[str] = None
|
screenshot_path: Optional[str] = None
|
||||||
|
# C1 — Instrumentation vision-aware
|
||||||
|
ocr_ms: float = 0.0 # Temps OCR du ScreenState de ce step
|
||||||
|
ui_ms: float = 0.0 # Temps détection UI de ce step
|
||||||
|
total_ms: float = 0.0 # Temps total (alias de duration_ms pour cohérence)
|
||||||
|
analyze_ms: float = 0.0 # Temps total analyse ScreenState (OCR + UI + reste)
|
||||||
|
cache_hit: bool = False # True si ScreenState vient du cache
|
||||||
|
degraded: bool = False # True si mode dégradé activé (timeout analyse)
|
||||||
|
|
||||||
|
|
||||||
class ExecutionLoop:
|
class ExecutionLoop:
|
||||||
@@ -175,7 +182,13 @@ class ExecutionLoop:
|
|||||||
capture_interval_ms: int = 500,
|
capture_interval_ms: int = 500,
|
||||||
max_no_match_retries: int = 5,
|
max_no_match_retries: int = 5,
|
||||||
confirmation_callback: Optional[Callable[[str, Dict], bool]] = None,
|
confirmation_callback: Optional[Callable[[str, Dict], bool]] = None,
|
||||||
coaching_callback: Optional[Callable[[str, Dict], "CoachingResponse"]] = None
|
coaching_callback: Optional[Callable[[str, Dict], "CoachingResponse"]] = None,
|
||||||
|
screen_analyzer: Optional[Any] = None,
|
||||||
|
screen_state_cache: Optional[Any] = None,
|
||||||
|
enable_ui_detection: bool = True,
|
||||||
|
enable_ocr: bool = True,
|
||||||
|
analyze_timeout_ms: int = 8000,
|
||||||
|
window_info_provider: Optional[Callable[[], Optional[Dict[str, Any]]]] = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Initialiser la boucle d'exécution.
|
Initialiser la boucle d'exécution.
|
||||||
@@ -188,6 +201,15 @@ class ExecutionLoop:
|
|||||||
max_no_match_retries: Nombre max de tentatives si pas de match
|
max_no_match_retries: Nombre max de tentatives si pas de match
|
||||||
confirmation_callback: Callback pour demander confirmation (SUPERVISED)
|
confirmation_callback: Callback pour demander confirmation (SUPERVISED)
|
||||||
coaching_callback: Callback pour décisions coaching (COACHING)
|
coaching_callback: Callback pour décisions coaching (COACHING)
|
||||||
|
screen_analyzer: ScreenAnalyzer pour construire un ScreenState enrichi
|
||||||
|
(lazy init via singleton si None)
|
||||||
|
screen_state_cache: Cache perceptuel (lazy init via singleton si None)
|
||||||
|
enable_ui_detection: Active la détection UI (True par défaut, flag d'urgence)
|
||||||
|
enable_ocr: Active l'OCR (True par défaut)
|
||||||
|
analyze_timeout_ms: Timeout soft pour l'analyse d'un ScreenState.
|
||||||
|
Au-delà, on active le mode dégradé pour les steps suivants.
|
||||||
|
window_info_provider: Callable renvoyant un dict window_info. Si None,
|
||||||
|
on tente `screen_capturer.get_active_window()`.
|
||||||
"""
|
"""
|
||||||
self.pipeline = pipeline
|
self.pipeline = pipeline
|
||||||
self.action_executor = action_executor or ActionExecutor()
|
self.action_executor = action_executor or ActionExecutor()
|
||||||
@@ -204,6 +226,27 @@ class ExecutionLoop:
|
|||||||
self.confirmation_callback = confirmation_callback
|
self.confirmation_callback = confirmation_callback
|
||||||
self.coaching_callback = coaching_callback
|
self.coaching_callback = coaching_callback
|
||||||
|
|
||||||
|
# C1 — Vision-aware execution
|
||||||
|
self._screen_analyzer = screen_analyzer # lazy init si None
|
||||||
|
self._screen_state_cache = screen_state_cache # lazy init si None
|
||||||
|
self.enable_ui_detection = enable_ui_detection
|
||||||
|
self.enable_ocr = enable_ocr
|
||||||
|
self.analyze_timeout_ms = analyze_timeout_ms
|
||||||
|
self._window_info_provider = window_info_provider
|
||||||
|
# Mode dégradé déclenché par un timeout analyse — persiste tant qu'un
|
||||||
|
# probe n'a pas démontré la récupération (voir ci-dessous).
|
||||||
|
self._degraded_mode = False
|
||||||
|
# Auto-rétablissement : compteur de steps rapides consécutifs.
|
||||||
|
# Si l'analyse tourne vite (< analyze_timeout_ms / 2) pendant
|
||||||
|
# _fast_steps_recovery_threshold steps → on quitte le mode dégradé.
|
||||||
|
self._successive_fast_steps = 0
|
||||||
|
self._fast_steps_recovery_threshold = 3
|
||||||
|
# En mode dégradé, on retente l'analyse tous les _probe_interval steps
|
||||||
|
# pour détecter la récupération (les autres steps restent en stub pour
|
||||||
|
# éviter de re-saturer le GPU). 10 par défaut = ~5s à 500ms/step.
|
||||||
|
self._probe_interval = 10
|
||||||
|
self._degraded_step_counter = 0
|
||||||
|
|
||||||
# État interne
|
# État interne
|
||||||
self.state = ExecutionState.IDLE
|
self.state = ExecutionState.IDLE
|
||||||
self.context: Optional[ExecutionContext] = None
|
self.context: Optional[ExecutionContext] = None
|
||||||
@@ -464,15 +507,15 @@ class ExecutionLoop:
|
|||||||
})
|
})
|
||||||
|
|
||||||
# Notify Analytics about step completion
|
# Notify Analytics about step completion
|
||||||
|
# C1 — transmet tous les champs vision-aware (ocr_ms, ui_ms,
|
||||||
|
# analyze_ms, cache_hit, degraded) au système analytics via
|
||||||
|
# on_step_result qui accepte un StepResult complet.
|
||||||
if self._analytics_integration and step_result:
|
if self._analytics_integration and step_result:
|
||||||
try:
|
try:
|
||||||
self._analytics_integration.on_step_complete(
|
self._analytics_integration.on_step_result(
|
||||||
workflow_id=self.context.workflow_id,
|
|
||||||
execution_id=self.context.execution_id,
|
execution_id=self.context.execution_id,
|
||||||
step_id=step_result.node_id,
|
workflow_id=self.context.workflow_id,
|
||||||
success=step_result.success,
|
step_result=step_result,
|
||||||
duration_ms=step_result.duration_ms,
|
|
||||||
confidence=step_result.match_confidence
|
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Analytics step notification failed: {e}")
|
logger.warning(f"Analytics step notification failed: {e}")
|
||||||
@@ -505,10 +548,32 @@ class ExecutionLoop:
|
|||||||
self._notify_state_change(ExecutionState.STOPPED)
|
self._notify_state_change(ExecutionState.STOPPED)
|
||||||
|
|
||||||
# Notify Analytics about execution completion
|
# Notify Analytics about execution completion
|
||||||
|
# Contrat normalisé (Lot A) : duration_ms + status explicite
|
||||||
|
# au lieu du booléen success + duration ambigu.
|
||||||
if self._analytics_integration and self.context:
|
if self._analytics_integration and self.context:
|
||||||
try:
|
try:
|
||||||
success = self.state == ExecutionState.COMPLETED
|
duration_ms = (
|
||||||
duration_ms = (datetime.now() - self.context.started_at).total_seconds() * 1000
|
datetime.now() - self.context.started_at
|
||||||
|
).total_seconds() * 1000
|
||||||
|
|
||||||
|
# Mapping ExecutionState → status analytics
|
||||||
|
if self.state == ExecutionState.COMPLETED:
|
||||||
|
status = "completed"
|
||||||
|
elif self.state == ExecutionState.FAILED:
|
||||||
|
status = "failed"
|
||||||
|
elif self.state == ExecutionState.STOPPED:
|
||||||
|
status = "stopped"
|
||||||
|
elif self.state == ExecutionState.PAUSED:
|
||||||
|
# Pause non résolue à la sortie = blocage non récupéré
|
||||||
|
status = "blocked"
|
||||||
|
else:
|
||||||
|
status = self.state.value
|
||||||
|
|
||||||
|
error_message = (
|
||||||
|
None
|
||||||
|
if status == "completed"
|
||||||
|
else f"Execution ended in state: {self.state.value}"
|
||||||
|
)
|
||||||
|
|
||||||
# Stop resource monitoring
|
# Stop resource monitoring
|
||||||
self._analytics_integration.stop_resource_monitoring(
|
self._analytics_integration.stop_resource_monitoring(
|
||||||
@@ -518,12 +583,12 @@ class ExecutionLoop:
|
|||||||
self._analytics_integration.on_execution_complete(
|
self._analytics_integration.on_execution_complete(
|
||||||
workflow_id=self.context.workflow_id,
|
workflow_id=self.context.workflow_id,
|
||||||
execution_id=self.context.execution_id,
|
execution_id=self.context.execution_id,
|
||||||
success=success,
|
|
||||||
duration_ms=duration_ms,
|
duration_ms=duration_ms,
|
||||||
steps_executed=self.context.steps_executed,
|
status=status,
|
||||||
steps_succeeded=self.context.steps_succeeded,
|
steps_total=self.context.steps_executed,
|
||||||
|
steps_completed=self.context.steps_succeeded,
|
||||||
steps_failed=self.context.steps_failed,
|
steps_failed=self.context.steps_failed,
|
||||||
error_message=None if success else f"Execution ended in state: {self.state.value}"
|
error_message=error_message,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Analytics completion notification failed: {e}")
|
logger.warning(f"Analytics completion notification failed: {e}")
|
||||||
@@ -547,10 +612,23 @@ class ExecutionLoop:
|
|||||||
|
|
||||||
self.context.last_screenshot_path = screenshot_path
|
self.context.last_screenshot_path = screenshot_path
|
||||||
|
|
||||||
|
# 1bis. Construire un ScreenState enrichi (C1) — avec cache perceptuel
|
||||||
|
screen_state, timings = self._build_screen_state(screenshot_path)
|
||||||
|
logger.debug(
|
||||||
|
f"[Step] ScreenState analyze={timings['analyze_ms']:.0f}ms "
|
||||||
|
f"ocr={timings['ocr_ms']:.0f}ms ui={timings['ui_ms']:.0f}ms "
|
||||||
|
f"cache_hit={timings['cache_hit']} degraded={timings['degraded']}"
|
||||||
|
)
|
||||||
|
|
||||||
# 2. Identifier l'état actuel (matching)
|
# 2. Identifier l'état actuel (matching)
|
||||||
match = self.pipeline.match_current_state(
|
#
|
||||||
screenshot_path,
|
# Lot E — on consomme le ScreenState enrichi déjà construit en 1bis
|
||||||
workflow_id=self.context.workflow_id
|
# (avec ui_elements, detected_text, window_title réels) au lieu de
|
||||||
|
# laisser le pipeline reconstruire un stub avec window_title="Unknown".
|
||||||
|
# Premier vrai matching context-aware.
|
||||||
|
match = self.pipeline.match_current_state_from_state(
|
||||||
|
screen_state,
|
||||||
|
workflow_id=self.context.workflow_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
if not match:
|
if not match:
|
||||||
@@ -564,25 +642,98 @@ class ExecutionLoop:
|
|||||||
|
|
||||||
logger.info(f"Matched node: {current_node_id} (confidence: {confidence:.3f})")
|
logger.info(f"Matched node: {current_node_id} (confidence: {confidence:.3f})")
|
||||||
|
|
||||||
# 3. Obtenir la prochaine action
|
# 3. Obtenir la prochaine action (C3 : sélection d'edge robuste)
|
||||||
|
#
|
||||||
|
# Lot A — contrat dict avec status explicite :
|
||||||
|
# "terminal" → fin légitime du workflow (success=True)
|
||||||
|
# "blocked" → pause supervisée (plus JAMAIS traité comme un succès
|
||||||
|
# pour ne pas déclencher un faux _is_workflow_complete)
|
||||||
|
# "selected" → action à exécuter
|
||||||
|
#
|
||||||
|
# Lot B — on propage la confidence du match courant (source_similarity)
|
||||||
|
# pour que l'EdgeScorer puisse vérifier la précondition
|
||||||
|
# `min_source_similarity` de chaque edge. Sans cette propagation, la
|
||||||
|
# contrainte était silencieusement désactivée (hardcodé à 1.0).
|
||||||
next_action = self.pipeline.get_next_action(
|
next_action = self.pipeline.get_next_action(
|
||||||
self.context.workflow_id,
|
self.context.workflow_id,
|
||||||
current_node_id
|
current_node_id,
|
||||||
|
screen_state=screen_state,
|
||||||
|
source_similarity=confidence,
|
||||||
)
|
)
|
||||||
|
|
||||||
if not next_action:
|
# Rétrocompat défensive : si un pipeline legacy renvoie None ou un dict
|
||||||
# Pas d'action suivante = fin du workflow ou node terminal
|
# sans status, on considère ça comme un blocage (safe default).
|
||||||
|
if not isinstance(next_action, dict) or "status" not in next_action:
|
||||||
|
logger.error(
|
||||||
|
"get_next_action a renvoyé un résultat sans status "
|
||||||
|
f"(legacy?). Valeur reçue: {next_action!r}"
|
||||||
|
)
|
||||||
|
next_action = {"status": "blocked", "reason": "legacy_none_return"}
|
||||||
|
|
||||||
|
action_status = next_action.get("status")
|
||||||
|
|
||||||
|
if action_status == "terminal":
|
||||||
|
# Fin légitime : aucun outgoing_edge sur le node courant
|
||||||
|
total_ms = (time.time() - start_time) * 1000
|
||||||
return StepResult(
|
return StepResult(
|
||||||
success=True,
|
success=True,
|
||||||
node_id=current_node_id,
|
node_id=current_node_id,
|
||||||
edge_id=None,
|
edge_id=None,
|
||||||
action_result=None,
|
action_result=None,
|
||||||
match_confidence=confidence,
|
match_confidence=confidence,
|
||||||
duration_ms=(time.time() - start_time) * 1000,
|
duration_ms=total_ms,
|
||||||
message="No next action (terminal node)",
|
message="Workflow terminated (terminal node)",
|
||||||
screenshot_path=screenshot_path
|
screenshot_path=screenshot_path,
|
||||||
|
ocr_ms=timings["ocr_ms"],
|
||||||
|
ui_ms=timings["ui_ms"],
|
||||||
|
analyze_ms=timings["analyze_ms"],
|
||||||
|
total_ms=total_ms,
|
||||||
|
cache_hit=timings["cache_hit"],
|
||||||
|
degraded=timings["degraded"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if action_status == "blocked":
|
||||||
|
# Blocage : des edges existent mais aucun n'est valide.
|
||||||
|
# On déclenche une pause supervisée (paused_need_help) et on
|
||||||
|
# remonte l'erreur. On ne retourne PAS success=True.
|
||||||
|
reason = next_action.get("reason", "unknown")
|
||||||
|
logger.warning(
|
||||||
|
f"ExecutionLoop bloqué sur {current_node_id}: {reason} "
|
||||||
|
f"→ pause supervisée demandée"
|
||||||
|
)
|
||||||
|
# On bascule en PAUSED et on arme _pause_requested pour que la
|
||||||
|
# boucle principale attende un resume() humain.
|
||||||
|
self.state = ExecutionState.PAUSED
|
||||||
|
self._pause_requested = True
|
||||||
|
self._notify_state_change(ExecutionState.PAUSED)
|
||||||
|
if self._on_error:
|
||||||
|
try:
|
||||||
|
self._on_error(
|
||||||
|
"blocked",
|
||||||
|
Exception(f"No valid edge from {current_node_id}: {reason}"),
|
||||||
|
)
|
||||||
|
except Exception as cb_err:
|
||||||
|
logger.debug(f"on_error callback failed: {cb_err}")
|
||||||
|
|
||||||
|
total_ms = (time.time() - start_time) * 1000
|
||||||
|
return StepResult(
|
||||||
|
success=False,
|
||||||
|
node_id=current_node_id,
|
||||||
|
edge_id=None,
|
||||||
|
action_result=None,
|
||||||
|
match_confidence=confidence,
|
||||||
|
duration_ms=total_ms,
|
||||||
|
message=f"Blocked: {reason}",
|
||||||
|
screenshot_path=screenshot_path,
|
||||||
|
ocr_ms=timings["ocr_ms"],
|
||||||
|
ui_ms=timings["ui_ms"],
|
||||||
|
analyze_ms=timings["analyze_ms"],
|
||||||
|
total_ms=total_ms,
|
||||||
|
cache_hit=timings["cache_hit"],
|
||||||
|
degraded=timings["degraded"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# À partir d'ici, on est forcément en status="selected"
|
||||||
edge_id = next_action["edge_id"]
|
edge_id = next_action["edge_id"]
|
||||||
self.context.current_edge_id = edge_id
|
self.context.current_edge_id = edge_id
|
||||||
|
|
||||||
@@ -604,7 +755,7 @@ class ExecutionLoop:
|
|||||||
if coaching_response.decision == CoachingDecision.ACCEPT:
|
if coaching_response.decision == CoachingDecision.ACCEPT:
|
||||||
# Utilisateur accepte : exécuter l'action suggérée
|
# Utilisateur accepte : exécuter l'action suggérée
|
||||||
self._coaching_stats['accepted'] += 1
|
self._coaching_stats['accepted'] += 1
|
||||||
action_result = self._execute_action(next_action)
|
action_result = self._execute_action(next_action, screen_state=screen_state)
|
||||||
self._record_coaching_feedback(
|
self._record_coaching_feedback(
|
||||||
next_action, coaching_response, action_result, success=True
|
next_action, coaching_response, action_result, success=True
|
||||||
)
|
)
|
||||||
@@ -615,15 +766,22 @@ class ExecutionLoop:
|
|||||||
self._record_coaching_feedback(
|
self._record_coaching_feedback(
|
||||||
next_action, coaching_response, None, success=False
|
next_action, coaching_response, None, success=False
|
||||||
)
|
)
|
||||||
|
total_ms = (time.time() - start_time) * 1000
|
||||||
return StepResult(
|
return StepResult(
|
||||||
success=False,
|
success=False,
|
||||||
node_id=current_node_id,
|
node_id=current_node_id,
|
||||||
edge_id=edge_id,
|
edge_id=edge_id,
|
||||||
action_result=None,
|
action_result=None,
|
||||||
match_confidence=confidence,
|
match_confidence=confidence,
|
||||||
duration_ms=(time.time() - start_time) * 1000,
|
duration_ms=total_ms,
|
||||||
message="Action rejected by user in COACHING mode",
|
message="Action rejected by user in COACHING mode",
|
||||||
screenshot_path=screenshot_path
|
screenshot_path=screenshot_path,
|
||||||
|
ocr_ms=timings["ocr_ms"],
|
||||||
|
ui_ms=timings["ui_ms"],
|
||||||
|
analyze_ms=timings["analyze_ms"],
|
||||||
|
total_ms=total_ms,
|
||||||
|
cache_hit=timings["cache_hit"],
|
||||||
|
degraded=timings["degraded"],
|
||||||
)
|
)
|
||||||
|
|
||||||
elif coaching_response.decision == CoachingDecision.CORRECT:
|
elif coaching_response.decision == CoachingDecision.CORRECT:
|
||||||
@@ -632,7 +790,7 @@ class ExecutionLoop:
|
|||||||
corrected_action = self._apply_coaching_correction(
|
corrected_action = self._apply_coaching_correction(
|
||||||
next_action, coaching_response.correction
|
next_action, coaching_response.correction
|
||||||
)
|
)
|
||||||
action_result = self._execute_action(corrected_action)
|
action_result = self._execute_action(corrected_action, screen_state=screen_state)
|
||||||
self._record_coaching_feedback(
|
self._record_coaching_feedback(
|
||||||
next_action, coaching_response, action_result,
|
next_action, coaching_response, action_result,
|
||||||
success=action_result.status == ExecutionStatus.SUCCESS if action_result else False
|
success=action_result.status == ExecutionStatus.SUCCESS if action_result else False
|
||||||
@@ -658,23 +816,30 @@ class ExecutionLoop:
|
|||||||
# Mode supervisé : demander confirmation
|
# Mode supervisé : demander confirmation
|
||||||
if not self._request_confirmation(next_action):
|
if not self._request_confirmation(next_action):
|
||||||
logger.info("Action rejected by user")
|
logger.info("Action rejected by user")
|
||||||
|
total_ms = (time.time() - start_time) * 1000
|
||||||
return StepResult(
|
return StepResult(
|
||||||
success=False,
|
success=False,
|
||||||
node_id=current_node_id,
|
node_id=current_node_id,
|
||||||
edge_id=edge_id,
|
edge_id=edge_id,
|
||||||
action_result=None,
|
action_result=None,
|
||||||
match_confidence=confidence,
|
match_confidence=confidence,
|
||||||
duration_ms=(time.time() - start_time) * 1000,
|
duration_ms=total_ms,
|
||||||
message="Action rejected by user",
|
message="Action rejected by user",
|
||||||
screenshot_path=screenshot_path
|
screenshot_path=screenshot_path,
|
||||||
|
ocr_ms=timings["ocr_ms"],
|
||||||
|
ui_ms=timings["ui_ms"],
|
||||||
|
analyze_ms=timings["analyze_ms"],
|
||||||
|
total_ms=total_ms,
|
||||||
|
cache_hit=timings["cache_hit"],
|
||||||
|
degraded=timings["degraded"],
|
||||||
)
|
)
|
||||||
|
|
||||||
# Exécuter l'action
|
# Exécuter l'action
|
||||||
action_result = self._execute_action(next_action)
|
action_result = self._execute_action(next_action, screen_state=screen_state)
|
||||||
|
|
||||||
elif self.context.mode == ExecutionMode.AUTOMATIC:
|
elif self.context.mode == ExecutionMode.AUTOMATIC:
|
||||||
# Mode automatique : exécuter directement
|
# Mode automatique : exécuter directement
|
||||||
action_result = self._execute_action(next_action)
|
action_result = self._execute_action(next_action, screen_state=screen_state)
|
||||||
|
|
||||||
# 5. Mettre à jour les compteurs
|
# 5. Mettre à jour les compteurs
|
||||||
self.context.steps_executed += 1
|
self.context.steps_executed += 1
|
||||||
@@ -693,7 +858,13 @@ class ExecutionLoop:
|
|||||||
match_confidence=confidence,
|
match_confidence=confidence,
|
||||||
duration_ms=duration_ms,
|
duration_ms=duration_ms,
|
||||||
message=action_result.message if action_result else "Observed",
|
message=action_result.message if action_result else "Observed",
|
||||||
screenshot_path=screenshot_path
|
screenshot_path=screenshot_path,
|
||||||
|
ocr_ms=timings["ocr_ms"],
|
||||||
|
ui_ms=timings["ui_ms"],
|
||||||
|
analyze_ms=timings["analyze_ms"],
|
||||||
|
total_ms=duration_ms,
|
||||||
|
cache_hit=timings["cache_hit"],
|
||||||
|
degraded=timings["degraded"],
|
||||||
)
|
)
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
@@ -718,8 +889,18 @@ class ExecutionLoop:
|
|||||||
logger.error(f"Screen capture failed: {e}")
|
logger.error(f"Screen capture failed: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _execute_action(self, action_info: Dict[str, Any]) -> ExecutionResult:
|
def _execute_action(
|
||||||
"""Exécuter une action via l'ActionExecutor."""
|
self,
|
||||||
|
action_info: Dict[str, Any],
|
||||||
|
screen_state: Optional[Any] = None,
|
||||||
|
) -> ExecutionResult:
|
||||||
|
"""
|
||||||
|
Exécuter une action via l'ActionExecutor.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
action_info: dict action {edge_id, action, target_node, ...}
|
||||||
|
screen_state: ScreenState enrichi (si None, fallback stub minimal)
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
# Charger le workflow et l'edge
|
# Charger le workflow et l'edge
|
||||||
workflow = self.pipeline.load_workflow(self.context.workflow_id)
|
workflow = self.pipeline.load_workflow(self.context.workflow_id)
|
||||||
@@ -732,36 +913,10 @@ class ExecutionLoop:
|
|||||||
duration_ms=0
|
duration_ms=0
|
||||||
)
|
)
|
||||||
|
|
||||||
# Créer un ScreenState minimal pour l'exécution
|
# Utiliser le ScreenState enrichi fourni par le loop ; fallback minimal
|
||||||
from core.models.screen_state import (
|
# uniquement si on n'en a pas (legacy, tests).
|
||||||
ScreenState, WindowContext, RawLevel, PerceptionLevel,
|
if screen_state is None:
|
||||||
ContextLevel, EmbeddingRef
|
screen_state = self._build_stub_screen_state()
|
||||||
)
|
|
||||||
|
|
||||||
screen_state = ScreenState(
|
|
||||||
screen_state_id=f"exec_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
|
|
||||||
timestamp=datetime.now(),
|
|
||||||
session_id=self.context.execution_id,
|
|
||||||
window=WindowContext(
|
|
||||||
app_name="unknown",
|
|
||||||
window_title="Unknown",
|
|
||||||
screen_resolution=[1920, 1080],
|
|
||||||
workspace="main"
|
|
||||||
),
|
|
||||||
raw=RawLevel(
|
|
||||||
screenshot_path=self.context.last_screenshot_path or "",
|
|
||||||
capture_method="execution",
|
|
||||||
file_size_bytes=0
|
|
||||||
),
|
|
||||||
perception=PerceptionLevel(
|
|
||||||
embedding=EmbeddingRef(provider="", vector_id="", dimensions=512),
|
|
||||||
detected_text=[],
|
|
||||||
text_detection_method="none",
|
|
||||||
confidence_avg=0.0
|
|
||||||
),
|
|
||||||
context=ContextLevel(),
|
|
||||||
ui_elements=[]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Exécuter l'action
|
# Exécuter l'action
|
||||||
result = self.action_executor.execute_edge(
|
result = self.action_executor.execute_edge(
|
||||||
@@ -782,6 +937,286 @@ class ExecutionLoop:
|
|||||||
error=e
|
error=e
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# C1 — Construction du ScreenState (vision-aware)
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
def _get_screen_analyzer(self):
|
||||||
|
"""
|
||||||
|
Récupérer le ScreenAnalyzer (singleton partagé, lazy).
|
||||||
|
|
||||||
|
Retourne None si indisponible (import error, etc.) — le loop
|
||||||
|
bascule alors en fallback stub.
|
||||||
|
|
||||||
|
Note Lot C : on ne passe plus `session_id` au singleton. Le session_id
|
||||||
|
est désormais un paramètre d'appel de `analyze()`, pour éviter que deux
|
||||||
|
ExecutionLoop partageant le même analyzer se marchent dessus.
|
||||||
|
"""
|
||||||
|
if self._screen_analyzer is not None:
|
||||||
|
return self._screen_analyzer
|
||||||
|
try:
|
||||||
|
from core.pipeline import get_screen_analyzer
|
||||||
|
self._screen_analyzer = get_screen_analyzer()
|
||||||
|
return self._screen_analyzer
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"ScreenAnalyzer indisponible: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _get_screen_state_cache(self):
|
||||||
|
"""Récupérer le cache de ScreenState (singleton partagé, lazy)."""
|
||||||
|
if self._screen_state_cache is not None:
|
||||||
|
return self._screen_state_cache
|
||||||
|
try:
|
||||||
|
from core.pipeline import get_screen_state_cache
|
||||||
|
self._screen_state_cache = get_screen_state_cache()
|
||||||
|
return self._screen_state_cache
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"ScreenStateCache indisponible: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _resolve_window_info(self) -> Optional[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Récupérer les infos de la fenêtre active.
|
||||||
|
|
||||||
|
Ordre de préférence :
|
||||||
|
1. `window_info_provider` fourni au constructeur
|
||||||
|
2. `screen_capturer.get_active_window()`
|
||||||
|
3. None → ScreenAnalyzer utilisera les valeurs par défaut
|
||||||
|
"""
|
||||||
|
if self._window_info_provider is not None:
|
||||||
|
try:
|
||||||
|
return self._window_info_provider()
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"window_info_provider failed: {e}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
raw = self.screen_capturer.get_active_window()
|
||||||
|
if raw:
|
||||||
|
# Normaliser vers le format attendu par ScreenAnalyzer
|
||||||
|
return {
|
||||||
|
"title": raw.get("title", "Unknown"),
|
||||||
|
"app_name": raw.get("app", "unknown"),
|
||||||
|
"window_bounds": [
|
||||||
|
raw.get("x", 0),
|
||||||
|
raw.get("y", 0),
|
||||||
|
raw.get("width", 0),
|
||||||
|
raw.get("height", 0),
|
||||||
|
],
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"get_active_window failed: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _build_screen_state(
|
||||||
|
self,
|
||||||
|
screenshot_path: str,
|
||||||
|
) -> tuple:
|
||||||
|
"""
|
||||||
|
Construire un ScreenState enrichi depuis un screenshot.
|
||||||
|
|
||||||
|
Logique :
|
||||||
|
- Si enable_ui_detection=False ET enable_ocr=False → stub
|
||||||
|
- Si analyseur indisponible → stub
|
||||||
|
- Sinon : cache.get_or_compute(analyzer.analyze)
|
||||||
|
- Timeout soft : si l'analyse dépasse `analyze_timeout_ms`, on log
|
||||||
|
un warning et on active le mode dégradé pour les prochains steps.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(screen_state, timings_dict)
|
||||||
|
timings_dict: {
|
||||||
|
"analyze_ms", "ocr_ms", "ui_ms", "cache_hit", "degraded"
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
timings = {
|
||||||
|
"analyze_ms": 0.0,
|
||||||
|
"ocr_ms": 0.0,
|
||||||
|
"ui_ms": 0.0,
|
||||||
|
"cache_hit": False,
|
||||||
|
"degraded": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Mode "tout désactivé" (flag d'urgence) → stub
|
||||||
|
if not self.enable_ui_detection and not self.enable_ocr:
|
||||||
|
timings["degraded"] = True
|
||||||
|
return self._build_stub_screen_state(screenshot_path), timings
|
||||||
|
|
||||||
|
analyzer = self._get_screen_analyzer()
|
||||||
|
if analyzer is None:
|
||||||
|
timings["degraded"] = True
|
||||||
|
return self._build_stub_screen_state(screenshot_path), timings
|
||||||
|
|
||||||
|
# Mode dégradé : on reste sur stub, sauf "probe" périodique qui teste
|
||||||
|
# si le GPU est redevenu performant. Si oui, on accumule les steps
|
||||||
|
# rapides ; après _fast_steps_recovery_threshold probes rapides
|
||||||
|
# consécutifs on retourne en mode complet.
|
||||||
|
if self._degraded_mode:
|
||||||
|
self._degraded_step_counter += 1
|
||||||
|
if self._degraded_step_counter < self._probe_interval:
|
||||||
|
timings["degraded"] = True
|
||||||
|
return self._build_stub_screen_state(screenshot_path), timings
|
||||||
|
# Sinon on tente un probe réel ci-dessous
|
||||||
|
self._degraded_step_counter = 0
|
||||||
|
|
||||||
|
cache = self._get_screen_state_cache()
|
||||||
|
|
||||||
|
# Invalidation proactive : si l'écran a massivement changé depuis
|
||||||
|
# la dernière entrée du cache, on purge. Le TTL seul (2s) laisserait
|
||||||
|
# passer des entrées obsolètes sur des changements rapides (popup, nav).
|
||||||
|
if cache is not None:
|
||||||
|
try:
|
||||||
|
cache.invalidate_if_changed(screenshot_path, threshold=0.3)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"invalidate_if_changed a échoué: {e}")
|
||||||
|
|
||||||
|
window_info = self._resolve_window_info()
|
||||||
|
|
||||||
|
# Fonction de calcul (cache miss)
|
||||||
|
# Les flags runtime (enable_ocr, enable_ui_detection) et le session_id
|
||||||
|
# sont passés en kwargs-only à analyze() : AUCUNE mutation de l'analyseur
|
||||||
|
# singleton (Lot C — thread-safety, deux ExecutionLoop peuvent partager
|
||||||
|
# le même analyzer sans se contaminer).
|
||||||
|
execution_id = self.context.execution_id if self.context else ""
|
||||||
|
|
||||||
|
def compute(path: str):
|
||||||
|
t_start = time.time()
|
||||||
|
state = analyzer.analyze(
|
||||||
|
path,
|
||||||
|
window_info=window_info,
|
||||||
|
enable_ocr=self.enable_ocr,
|
||||||
|
enable_ui_detection=self.enable_ui_detection,
|
||||||
|
session_id=execution_id,
|
||||||
|
)
|
||||||
|
elapsed = (time.time() - t_start) * 1000
|
||||||
|
# Annoter le temps dans les métadonnées
|
||||||
|
if hasattr(state, "metadata"):
|
||||||
|
state.metadata["analyze_ms"] = elapsed
|
||||||
|
return state
|
||||||
|
|
||||||
|
t0 = time.time()
|
||||||
|
try:
|
||||||
|
if cache is not None:
|
||||||
|
# Lot D — clé composite context-aware : deux contextes
|
||||||
|
# différents partageant le même screenshot n'entrent plus
|
||||||
|
# en collision. Le workflow_id isole les replays par workflow,
|
||||||
|
# les flags différencient les modes d'analyse (OCR on/off,
|
||||||
|
# UI on/off), et le (window_title, app_name) distingue deux
|
||||||
|
# applications qui présenteraient un rendu visuel similaire.
|
||||||
|
ctx_window_title = (window_info or {}).get("title", "") or ""
|
||||||
|
ctx_app_name = (window_info or {}).get("app_name", "") or ""
|
||||||
|
ctx_workflow_id = (
|
||||||
|
self.context.workflow_id if self.context else ""
|
||||||
|
)
|
||||||
|
state, cache_hit, _ = cache.get_or_compute(
|
||||||
|
screenshot_path,
|
||||||
|
compute,
|
||||||
|
window_title=ctx_window_title,
|
||||||
|
app_name=ctx_app_name,
|
||||||
|
enable_ocr=self.enable_ocr,
|
||||||
|
enable_ui_detection=self.enable_ui_detection,
|
||||||
|
workflow_id=ctx_workflow_id,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
state = compute(screenshot_path)
|
||||||
|
cache_hit = False
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"ScreenState build failed: {e} — fallback stub")
|
||||||
|
timings["degraded"] = True
|
||||||
|
return self._build_stub_screen_state(screenshot_path), timings
|
||||||
|
|
||||||
|
analyze_ms = (time.time() - t0) * 1000
|
||||||
|
timings["analyze_ms"] = analyze_ms
|
||||||
|
timings["cache_hit"] = cache_hit
|
||||||
|
|
||||||
|
# Décomposer OCR vs UI si possible (métadonnées)
|
||||||
|
meta = getattr(state, "metadata", {}) or {}
|
||||||
|
timings["ocr_ms"] = float(meta.get("ocr_ms", 0.0))
|
||||||
|
timings["ui_ms"] = float(meta.get("ui_ms", 0.0))
|
||||||
|
|
||||||
|
# Timeout soft : activer le mode dégradé si > seuil
|
||||||
|
# (cache_hit ignoré : un hit ne prouve rien sur la santé du GPU)
|
||||||
|
if analyze_ms > self.analyze_timeout_ms and not cache_hit:
|
||||||
|
logger.warning(
|
||||||
|
f"ScreenState analysis slow: {analyze_ms:.0f}ms > "
|
||||||
|
f"{self.analyze_timeout_ms}ms → activation mode dégradé"
|
||||||
|
)
|
||||||
|
self._degraded_mode = True
|
||||||
|
self._successive_fast_steps = 0
|
||||||
|
timings["degraded"] = True
|
||||||
|
else:
|
||||||
|
# Step "rapide" : incrémenter le compteur si < timeout / 2.
|
||||||
|
# On ignore les cache hits (pas représentatifs de la perf GPU).
|
||||||
|
fast_threshold_ms = self.analyze_timeout_ms / 2
|
||||||
|
if not cache_hit and analyze_ms < fast_threshold_ms:
|
||||||
|
self._successive_fast_steps += 1
|
||||||
|
|
||||||
|
# Auto-rétablissement : si on était en dégradé et qu'on a
|
||||||
|
# enchaîné assez de steps rapides → retour en mode complet.
|
||||||
|
if (
|
||||||
|
self._degraded_mode
|
||||||
|
and self._successive_fast_steps
|
||||||
|
>= self._fast_steps_recovery_threshold
|
||||||
|
):
|
||||||
|
logger.info(
|
||||||
|
"Mode complet restauré après %d steps rapides "
|
||||||
|
"(dernier analyze_ms=%.0fms < seuil=%.0fms)",
|
||||||
|
self._successive_fast_steps,
|
||||||
|
analyze_ms,
|
||||||
|
fast_threshold_ms,
|
||||||
|
)
|
||||||
|
self._degraded_mode = False
|
||||||
|
self._successive_fast_steps = 0
|
||||||
|
elif not cache_hit:
|
||||||
|
# Step ni lent ni rapide (entre timeout/2 et timeout) : reset
|
||||||
|
self._successive_fast_steps = 0
|
||||||
|
|
||||||
|
# On propage l'état dégradé courant dans les timings (utile pour le
|
||||||
|
# StepResult : tant qu'on n'a pas récupéré assez de steps rapides,
|
||||||
|
# on continue à signaler "degraded=True").
|
||||||
|
timings["degraded"] = self._degraded_mode
|
||||||
|
|
||||||
|
return state, timings
|
||||||
|
|
||||||
|
def _build_stub_screen_state(self, screenshot_path: Optional[str] = None):
|
||||||
|
"""
|
||||||
|
Construire un ScreenState minimal (fallback legacy).
|
||||||
|
|
||||||
|
Utilisé quand l'analyseur est indisponible ou que tous les flags
|
||||||
|
de détection sont désactivés (flag d'urgence).
|
||||||
|
"""
|
||||||
|
from core.models.screen_state import (
|
||||||
|
ScreenState, WindowContext, RawLevel, PerceptionLevel,
|
||||||
|
ContextLevel, EmbeddingRef
|
||||||
|
)
|
||||||
|
|
||||||
|
path = screenshot_path or (
|
||||||
|
self.context.last_screenshot_path if self.context else ""
|
||||||
|
) or ""
|
||||||
|
|
||||||
|
return ScreenState(
|
||||||
|
screen_state_id=f"exec_{datetime.now().strftime('%Y%m%d_%H%M%S_%f')}",
|
||||||
|
timestamp=datetime.now(),
|
||||||
|
session_id=self.context.execution_id if self.context else "stub",
|
||||||
|
window=WindowContext(
|
||||||
|
app_name="unknown",
|
||||||
|
window_title="Unknown",
|
||||||
|
screen_resolution=[1920, 1080],
|
||||||
|
workspace="main",
|
||||||
|
),
|
||||||
|
raw=RawLevel(
|
||||||
|
screenshot_path=path,
|
||||||
|
capture_method="execution",
|
||||||
|
file_size_bytes=0,
|
||||||
|
),
|
||||||
|
perception=PerceptionLevel(
|
||||||
|
embedding=EmbeddingRef(provider="", vector_id="", dimensions=512),
|
||||||
|
detected_text=[],
|
||||||
|
text_detection_method="none",
|
||||||
|
confidence_avg=0.0,
|
||||||
|
),
|
||||||
|
context=ContextLevel(),
|
||||||
|
ui_elements=[],
|
||||||
|
)
|
||||||
|
|
||||||
def _request_confirmation(self, action_info: Dict[str, Any]) -> bool:
|
def _request_confirmation(self, action_info: Dict[str, Any]) -> bool:
|
||||||
"""Demander confirmation à l'utilisateur."""
|
"""Demander confirmation à l'utilisateur."""
|
||||||
if self.confirmation_callback:
|
if self.confirmation_callback:
|
||||||
|
|||||||
724
core/execution/input_handler.py
Normal file
724
core/execution/input_handler.py
Normal file
@@ -0,0 +1,724 @@
|
|||||||
|
"""
|
||||||
|
Module partagé de saisie texte et gestion des dialogues.
|
||||||
|
|
||||||
|
Utilisé par les deux executors :
|
||||||
|
- VWB executor (visual_workflow_builder/backend/api_v3/execute.py)
|
||||||
|
- Core executor (core/execution/action_executor.py)
|
||||||
|
|
||||||
|
Garantit le même comportement AZERTY/VM/Citrix partout.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import subprocess
|
||||||
|
import shutil
|
||||||
|
import time
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
try:
|
||||||
|
import pyautogui
|
||||||
|
PYAUTOGUI_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
PYAUTOGUI_AVAILABLE = False
|
||||||
|
|
||||||
|
|
||||||
|
def safe_type_text(text: str):
|
||||||
|
"""Saisie de texte compatible VM/Citrix et claviers AZERTY/QWERTY.
|
||||||
|
|
||||||
|
Priorité :
|
||||||
|
1. xdotool type avec refresh layout → traverse les VM spice/QEMU
|
||||||
|
2. Presse-papier (xclip) + Ctrl+V → fallback
|
||||||
|
3. pyautogui.write() → dernier recours
|
||||||
|
"""
|
||||||
|
if not text:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Méthode 1 : xdotool type avec refresh du layout clavier
|
||||||
|
if shutil.which('xdotool') and shutil.which('setxkbmap'):
|
||||||
|
try:
|
||||||
|
subprocess.run(['setxkbmap', 'fr'], timeout=2)
|
||||||
|
subprocess.run(
|
||||||
|
['xdotool', 'type', '--delay', '0', '--clearmodifiers', '--', text],
|
||||||
|
timeout=max(30, len(text) * 0.05),
|
||||||
|
check=True
|
||||||
|
)
|
||||||
|
logger.debug(f"Saisie via xdotool type ({len(text)} car.)")
|
||||||
|
return
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"xdotool type échoué: {e}")
|
||||||
|
|
||||||
|
# Méthode 2 : Presse-papier
|
||||||
|
xclip = shutil.which('xclip')
|
||||||
|
if xclip and PYAUTOGUI_AVAILABLE:
|
||||||
|
try:
|
||||||
|
p = subprocess.Popen(
|
||||||
|
['xclip', '-selection', 'clipboard'],
|
||||||
|
stdin=subprocess.PIPE,
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.DEVNULL
|
||||||
|
)
|
||||||
|
p.stdin.write(text.encode('utf-8'))
|
||||||
|
p.stdin.close()
|
||||||
|
time.sleep(0.2)
|
||||||
|
pyautogui.hotkey('ctrl', 'v')
|
||||||
|
time.sleep(0.3)
|
||||||
|
logger.debug(f"Saisie via presse-papier ({len(text)} car.)")
|
||||||
|
return
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"xclip échoué: {e}")
|
||||||
|
|
||||||
|
# Méthode 3 : pyautogui
|
||||||
|
if PYAUTOGUI_AVAILABLE:
|
||||||
|
logger.warning("Saisie via pyautogui.write() (AZERTY non garanti)")
|
||||||
|
pyautogui.write(text, interval=0.02)
|
||||||
|
else:
|
||||||
|
logger.warning(f"Aucune méthode de saisie disponible pour: {text[:50]}")
|
||||||
|
|
||||||
|
|
||||||
|
def check_screen_for_patterns() -> Optional[Dict[str, Any]]:
|
||||||
|
"""Vérifie si l'écran contient un pattern UI connu (dialogue, popup).
|
||||||
|
|
||||||
|
Capture l'écran, extrait le texte via OCR, et cherche un pattern
|
||||||
|
dans la UIPatternLibrary.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict avec le pattern trouvé, ou None.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from core.knowledge.ui_patterns import UIPatternLibrary
|
||||||
|
import mss
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
lib = UIPatternLibrary()
|
||||||
|
|
||||||
|
with mss.mss() as sct:
|
||||||
|
monitor = sct.monitors[0]
|
||||||
|
screenshot = sct.grab(monitor)
|
||||||
|
screen = Image.frombytes('RGB', screenshot.size, screenshot.bgra, 'raw', 'BGRX')
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Essayer docTR d'abord (peut être importé depuis différents chemins)
|
||||||
|
try:
|
||||||
|
from services.ocr_service import ocr_extract_text
|
||||||
|
except ImportError:
|
||||||
|
from core.extraction.field_extractor import FieldExtractor
|
||||||
|
extractor = FieldExtractor()
|
||||||
|
ocr_extract_text = lambda img: extractor.extract_text_from_image(img)
|
||||||
|
|
||||||
|
ocr_text = ocr_extract_text(screen)
|
||||||
|
except ImportError:
|
||||||
|
logger.debug("OCR non disponible pour pattern check")
|
||||||
|
return None
|
||||||
|
|
||||||
|
if not ocr_text or len(ocr_text) < 5:
|
||||||
|
return None
|
||||||
|
|
||||||
|
pattern = lib.find_pattern(ocr_text)
|
||||||
|
if pattern and pattern['category'] in ('dialog', 'popup'):
|
||||||
|
logger.info(f"Pattern UI détecté: {pattern['pattern']} → {pattern['action']} '{pattern['target']}'")
|
||||||
|
return pattern
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Pattern check échoué: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def handle_detected_pattern(pattern: Dict[str, Any]) -> bool:
|
||||||
|
"""Gère automatiquement un pattern UI détecté.
|
||||||
|
|
||||||
|
Cherche le bouton cible via OCR (position réelle sur l'écran).
|
||||||
|
100% vision — zéro coordonnée hardcodée.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True si le pattern a été géré avec succès.
|
||||||
|
"""
|
||||||
|
if not PYAUTOGUI_AVAILABLE:
|
||||||
|
logger.warning("pyautogui non disponible — impossible de gérer le pattern")
|
||||||
|
return False
|
||||||
|
|
||||||
|
action = pattern.get('action')
|
||||||
|
target = pattern.get('target', '')
|
||||||
|
alternatives = pattern.get('alternatives', [])
|
||||||
|
|
||||||
|
if action == 'click':
|
||||||
|
candidates_labels = [target] + alternatives
|
||||||
|
|
||||||
|
try:
|
||||||
|
import mss
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
# Importer OCR (essayer les deux chemins)
|
||||||
|
try:
|
||||||
|
from services.ocr_service import ocr_extract_words
|
||||||
|
except ImportError:
|
||||||
|
from core.extraction.field_extractor import FieldExtractor
|
||||||
|
extractor = FieldExtractor()
|
||||||
|
def ocr_extract_words(img):
|
||||||
|
return extractor.extract_words_from_image(img)
|
||||||
|
|
||||||
|
with mss.mss() as sct:
|
||||||
|
monitor = sct.monitors[0]
|
||||||
|
screenshot = sct.grab(monitor)
|
||||||
|
screen = Image.frombytes('RGB', screenshot.size, screenshot.bgra, 'raw', 'BGRX')
|
||||||
|
|
||||||
|
words = ocr_extract_words(screen)
|
||||||
|
|
||||||
|
# Collecter tous les matchs, prendre le plus bas (bouton = bas du dialogue)
|
||||||
|
all_matches = []
|
||||||
|
|
||||||
|
for candidate in candidates_labels:
|
||||||
|
candidate_lower = candidate.lower()
|
||||||
|
for word in words:
|
||||||
|
word_text = word['text'].lower()
|
||||||
|
if len(word_text) < 2 or len(candidate_lower) < 2:
|
||||||
|
continue
|
||||||
|
if word_text == candidate_lower:
|
||||||
|
x1, y1, x2, y2 = word['bbox']
|
||||||
|
all_matches.append({
|
||||||
|
'text': word['text'],
|
||||||
|
'x': int((x1 + x2) / 2),
|
||||||
|
'y': int((y1 + y2) / 2),
|
||||||
|
'match_type': 'exact',
|
||||||
|
})
|
||||||
|
|
||||||
|
# Recherche partielle (lettre soulignée manquante)
|
||||||
|
if not all_matches:
|
||||||
|
for candidate in candidates_labels:
|
||||||
|
if len(candidate) > 3:
|
||||||
|
partial = candidate[1:].lower()
|
||||||
|
for word in words:
|
||||||
|
if partial in word['text'].lower():
|
||||||
|
x1, y1, x2, y2 = word['bbox']
|
||||||
|
all_matches.append({
|
||||||
|
'text': word['text'],
|
||||||
|
'x': int((x1 + x2) / 2),
|
||||||
|
'y': int((y1 + y2) / 2),
|
||||||
|
'match_type': 'partial',
|
||||||
|
})
|
||||||
|
|
||||||
|
if all_matches:
|
||||||
|
best = max(all_matches, key=lambda m: m['y'])
|
||||||
|
logger.info(f"Clic sur '{best['text']}' à ({best['x']}, {best['y']})")
|
||||||
|
pyautogui.click(best['x'], best['y'])
|
||||||
|
time.sleep(1.0)
|
||||||
|
return True
|
||||||
|
|
||||||
|
logger.info(f"Bouton '{target}' introuvable par OCR — appel VLM...")
|
||||||
|
vlm_result = vlm_reason_about_screen(
|
||||||
|
objective=f"Cliquer sur le bouton '{target}'",
|
||||||
|
context=f"Un dialogue '{pattern.get('pattern')}' est détecté"
|
||||||
|
)
|
||||||
|
if vlm_result and vlm_result.get('action') == 'click' and vlm_result.get('target'):
|
||||||
|
vlm_target = vlm_result['target']
|
||||||
|
for word in words:
|
||||||
|
if vlm_target.lower() in word['text'].lower():
|
||||||
|
x1, y1, x2, y2 = word['bbox']
|
||||||
|
x = int((x1 + x2) / 2)
|
||||||
|
y = int((y1 + y2) / 2)
|
||||||
|
logger.info(f"VLM → clic sur '{word['text']}' à ({x}, {y})")
|
||||||
|
pyautogui.click(x, y)
|
||||||
|
time.sleep(1.0)
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"OCR bouton échoué: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
elif action == 'hotkey':
|
||||||
|
keys = target.split('+')
|
||||||
|
logger.info(f"Raccourci automatique: {target}")
|
||||||
|
pyautogui.hotkey(*keys)
|
||||||
|
time.sleep(0.5)
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def vlm_reason_about_screen(objective: str = "", context: str = "") -> Optional[Dict[str, Any]]:
|
||||||
|
"""Demande au VLM de raisonner sur l'écran actuel et proposer une action.
|
||||||
|
|
||||||
|
Utilisé quand les réflexes (patterns) ne suffisent pas.
|
||||||
|
Le VLM voit l'écran et décide quoi faire.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
objective: Ce que Léa essaie de faire (ex: "cliquer sur Enregistrer")
|
||||||
|
context: Contexte additionnel (ex: "un dialogue est apparu")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict avec 'action', 'target', 'reasoning' ou None si le VLM ne peut pas aider.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
import mss
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
import base64
|
||||||
|
import io
|
||||||
|
import os
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
with mss.mss() as sct:
|
||||||
|
monitor = sct.monitors[0]
|
||||||
|
screenshot = sct.grab(monitor)
|
||||||
|
screen = Image.frombytes('RGB', screenshot.size, screenshot.bgra, 'raw', 'BGRX')
|
||||||
|
|
||||||
|
buffer = io.BytesIO()
|
||||||
|
screen.save(buffer, format='JPEG', quality=70)
|
||||||
|
image_b64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
|
||||||
|
|
||||||
|
prompt = f"""Analyse cet écran et dis-moi quoi faire.
|
||||||
|
|
||||||
|
Objectif : {objective or "Interagir avec l'interface visible"}
|
||||||
|
Contexte : {context or "Aucun contexte supplémentaire"}
|
||||||
|
|
||||||
|
Réponds en JSON strict :
|
||||||
|
{{
|
||||||
|
"action": "click" ou "type" ou "wait" ou "nothing",
|
||||||
|
"target": "texte exact du bouton ou champ à cliquer",
|
||||||
|
"reasoning": "explication courte de ton choix"
|
||||||
|
}}
|
||||||
|
|
||||||
|
Si tu vois un dialogue ou une popup, indique quel bouton cliquer.
|
||||||
|
Si l'écran est normal sans action nécessaire, réponds action="nothing".
|
||||||
|
Réponds UNIQUEMENT le JSON, pas d'explication."""
|
||||||
|
|
||||||
|
ollama_url = os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
||||||
|
model = os.environ.get("RPA_REASONING_MODEL", "qwen2.5vl:7b")
|
||||||
|
|
||||||
|
response = requests.post(
|
||||||
|
f"{ollama_url}/api/generate",
|
||||||
|
json={
|
||||||
|
"model": model,
|
||||||
|
"prompt": prompt,
|
||||||
|
"images": [image_b64],
|
||||||
|
"stream": False,
|
||||||
|
"options": {"temperature": 0.1, "num_predict": 200}
|
||||||
|
},
|
||||||
|
timeout=30
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
logger.warning(f"VLM reasoning failed: HTTP {response.status_code}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
result = response.json()
|
||||||
|
text = result.get('response', '').strip()
|
||||||
|
|
||||||
|
import re
|
||||||
|
match = re.search(r'\{[\s\S]*\}', text)
|
||||||
|
if match:
|
||||||
|
parsed = json.loads(match.group())
|
||||||
|
logger.info(f"VLM reasoning: {parsed.get('action')} '{parsed.get('target')}' — {parsed.get('reasoning', '')[:80]}")
|
||||||
|
return parsed
|
||||||
|
|
||||||
|
logger.debug(f"VLM response not parseable: {text[:100]}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"VLM reasoning failed: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def find_element_on_screen(
|
||||||
|
target_text: str,
|
||||||
|
target_description: str = "",
|
||||||
|
anchor_image_base64: Optional[str] = None,
|
||||||
|
anchor_bbox: Optional[Dict] = None,
|
||||||
|
) -> Optional[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Cherche un élément sur l'écran en utilisant 3 méthodes en cascade.
|
||||||
|
|
||||||
|
Niveau 1 — OCR (rapide, ~1s) : docTR pour trouver le texte exact
|
||||||
|
Niveau 2 — UI-TARS grounding (~3s) : modèle GUI spécialisé
|
||||||
|
Niveau 3 — VLM reasoning (~10s) : raisonnement + OCR de confirmation
|
||||||
|
|
||||||
|
Args:
|
||||||
|
target_text: Texte de l'élément à trouver (ex: "Demo", "Enregistrer")
|
||||||
|
target_description: Description plus longue (ex: "le dossier Demo sur le bureau")
|
||||||
|
anchor_image_base64: Image de référence de l'ancre (pour CLIP matching, réservé futur)
|
||||||
|
anchor_bbox: Position originale de l'ancre (pour désambiguïser les matchs multiples)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
{'x': int, 'y': int, 'method': str, 'confidence': float} ou None
|
||||||
|
"""
|
||||||
|
# Si le target_text est vide ou c'est juste le type d'action,
|
||||||
|
# utiliser le VLM pour décrire l'image de l'ancre
|
||||||
|
action_types = {'click_anchor', 'double_click_anchor', 'right_click_anchor',
|
||||||
|
'hover_anchor', 'focus_anchor', 'scroll_to_anchor'}
|
||||||
|
has_useful_text = target_text and target_text not in action_types
|
||||||
|
|
||||||
|
if not has_useful_text and anchor_image_base64:
|
||||||
|
desc = _describe_anchor_image(anchor_image_base64)
|
||||||
|
if desc:
|
||||||
|
logger.info(f"[Grounding] Ancre décrite par VLM: '{desc}'")
|
||||||
|
target_description = desc
|
||||||
|
if not has_useful_text:
|
||||||
|
target_text = desc
|
||||||
|
|
||||||
|
if not target_text and not target_description:
|
||||||
|
logger.debug("find_element_on_screen: ni target_text ni target_description fournis")
|
||||||
|
return None
|
||||||
|
|
||||||
|
search_label = target_description or target_text
|
||||||
|
logger.info(f"[Grounding] Recherche élément: '{search_label}' (cascade 3 niveaux)")
|
||||||
|
|
||||||
|
# ─── Niveau 1 — OCR (rapide, ~1s) ───
|
||||||
|
result = _grounding_ocr(target_text, anchor_bbox=anchor_bbox)
|
||||||
|
if result:
|
||||||
|
return result
|
||||||
|
|
||||||
|
# ─── Niveau 2 — UI-TARS grounding (~3s) ───
|
||||||
|
result = _grounding_ui_tars(target_text, target_description)
|
||||||
|
if result:
|
||||||
|
return result
|
||||||
|
|
||||||
|
# ─── Niveau 3 — VLM reasoning (~10s) ───
|
||||||
|
result = _grounding_vlm(target_text, target_description)
|
||||||
|
if result:
|
||||||
|
return result
|
||||||
|
|
||||||
|
logger.warning(f"[Grounding] ÉCHEC total pour '{search_label}' — aucune méthode n'a trouvé l'élément")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _describe_anchor_image(anchor_image_base64: str) -> Optional[str]:
|
||||||
|
"""Demande au VLM de décrire l'image de l'ancre en quelques mots.
|
||||||
|
|
||||||
|
Utilisé quand le label est vide — le VLM regarde le crop de l'ancre
|
||||||
|
et décrit ce qu'il voit ("folder icon named Demo", "Save button", etc.)
|
||||||
|
pour que UI-TARS puisse chercher cet élément sur l'écran complet.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
import requests
|
||||||
|
import os
|
||||||
|
|
||||||
|
if ',' in anchor_image_base64:
|
||||||
|
anchor_image_base64 = anchor_image_base64.split(',', 1)[1]
|
||||||
|
|
||||||
|
ollama_url = os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
||||||
|
model = "qwen2.5vl:3b"
|
||||||
|
|
||||||
|
logger.info(f"[Grounding] Description ancre via {model}...")
|
||||||
|
response = requests.post(
|
||||||
|
f"{ollama_url}/api/generate",
|
||||||
|
json={
|
||||||
|
"model": model,
|
||||||
|
"prompt": "Describe this UI element in 5 words maximum. Just the element name, nothing else. Example: 'folder icon named Demo' or 'Save button' or 'Chrome browser icon'",
|
||||||
|
"images": [anchor_image_base64],
|
||||||
|
"stream": False,
|
||||||
|
"options": {"temperature": 0.1, "num_predict": 20}
|
||||||
|
},
|
||||||
|
timeout=30
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
desc = response.json().get('response', '').strip().strip('"').strip("'")
|
||||||
|
if desc and len(desc) > 2:
|
||||||
|
return desc
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"[Grounding] Description ancre échouée: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _capture_screen():
|
||||||
|
"""Capture l'écran principal et retourne (PIL.Image, width, height)."""
|
||||||
|
try:
|
||||||
|
import mss
|
||||||
|
from PIL import Image as PILImage
|
||||||
|
|
||||||
|
with mss.mss() as sct:
|
||||||
|
monitor = sct.monitors[0]
|
||||||
|
screenshot = sct.grab(monitor)
|
||||||
|
screen = PILImage.frombytes('RGB', screenshot.size, screenshot.bgra, 'raw', 'BGRX')
|
||||||
|
return screen, monitor['width'], monitor['height']
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Capture écran échouée: {e}")
|
||||||
|
return None, 0, 0
|
||||||
|
|
||||||
|
|
||||||
|
def _grounding_ocr(target_text: str, anchor_bbox: Optional[Dict] = None) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Niveau 1 — Cherche le texte par OCR (docTR). ~1s.
|
||||||
|
|
||||||
|
Collecte TOUS les matchs et choisit le plus pertinent :
|
||||||
|
- Si anchor_bbox fourni → le plus proche de la position originale
|
||||||
|
- Sinon → le plus proche du centre de l'écran (zone contenu)
|
||||||
|
"""
|
||||||
|
logger.debug(f"[Grounding/OCR] target='{target_text}' bbox={anchor_bbox}")
|
||||||
|
if not target_text:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
screen, screen_w, screen_h = _capture_screen()
|
||||||
|
if screen is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
from services.ocr_service import ocr_extract_words
|
||||||
|
except ImportError:
|
||||||
|
from core.extraction.field_extractor import FieldExtractor
|
||||||
|
extractor = FieldExtractor()
|
||||||
|
def ocr_extract_words(img):
|
||||||
|
return extractor.extract_words_from_image(img)
|
||||||
|
|
||||||
|
words = ocr_extract_words(screen)
|
||||||
|
if not words:
|
||||||
|
logger.debug("[Grounding/OCR] Aucun mot détecté")
|
||||||
|
return None
|
||||||
|
|
||||||
|
target_lower = target_text.lower()
|
||||||
|
all_matches = []
|
||||||
|
|
||||||
|
# Collecter tous les matchs
|
||||||
|
for word in words:
|
||||||
|
word_lower = word['text'].lower()
|
||||||
|
x1, y1, x2, y2 = word['bbox']
|
||||||
|
cx, cy = int((x1 + x2) / 2), int((y1 + y2) / 2)
|
||||||
|
|
||||||
|
if word_lower == target_lower:
|
||||||
|
all_matches.append({'text': word['text'], 'x': cx, 'y': cy, 'type': 'exact', 'conf': 0.95})
|
||||||
|
elif len(word_lower) >= 3 and len(target_lower) >= 3:
|
||||||
|
if target_lower in word_lower or word_lower in target_lower:
|
||||||
|
# Pénaliser les matchs partiels trop courts par rapport au target
|
||||||
|
ratio = len(word_lower) / max(len(target_lower), 1)
|
||||||
|
conf = 0.80 if ratio > 0.5 else 0.50
|
||||||
|
all_matches.append({'text': word['text'], 'x': cx, 'y': cy, 'type': 'partial', 'conf': conf})
|
||||||
|
|
||||||
|
# Matching lettre initiale manquante
|
||||||
|
if not all_matches and len(target_lower) > 3:
|
||||||
|
partial = target_lower[1:]
|
||||||
|
for word in words:
|
||||||
|
if partial in word['text'].lower():
|
||||||
|
x1, y1, x2, y2 = word['bbox']
|
||||||
|
all_matches.append({'text': word['text'], 'x': int((x1+x2)/2), 'y': int((y1+y2)/2), 'type': 'partial_cut', 'conf': 0.70})
|
||||||
|
|
||||||
|
if not all_matches:
|
||||||
|
logger.debug(f"[Grounding/OCR] '{target_text}' non trouvé parmi {len(words)} mots")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Choisir le meilleur match
|
||||||
|
if len(all_matches) == 1:
|
||||||
|
best = all_matches[0]
|
||||||
|
elif anchor_bbox:
|
||||||
|
# Prendre le plus proche de la position originale de l'ancre
|
||||||
|
orig_x = anchor_bbox.get('x', 0) + anchor_bbox.get('width', 0) / 2
|
||||||
|
orig_y = anchor_bbox.get('y', 0) + anchor_bbox.get('height', 0) / 2
|
||||||
|
best = min(all_matches, key=lambda m: ((m['x'] - orig_x)**2 + (m['y'] - orig_y)**2))
|
||||||
|
else:
|
||||||
|
# Prendre le plus central (zone contenu, pas les barres de titre)
|
||||||
|
center_x, center_y = screen_w / 2, screen_h / 2
|
||||||
|
best = min(all_matches, key=lambda m: ((m['x'] - center_x)**2 + (m['y'] - center_y)**2))
|
||||||
|
|
||||||
|
for m in all_matches:
|
||||||
|
sel = " ← CHOISI" if m is best else ""
|
||||||
|
logger.info(f" [OCR] Candidat: '{m['text']}' à ({m['x']}, {m['y']}) [{m['type']}]{sel}")
|
||||||
|
|
||||||
|
return {'x': best['x'], 'y': best['y'], 'method': 'ocr', 'confidence': best['conf']}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"[Grounding/OCR] Erreur: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _grounding_ui_tars(target_text: str, target_description: str = "") -> Optional[Dict[str, Any]]:
|
||||||
|
"""Niveau 2 — UI-TARS grounding visuel (~3s)."""
|
||||||
|
try:
|
||||||
|
import requests
|
||||||
|
import base64
|
||||||
|
import io
|
||||||
|
import re
|
||||||
|
import os
|
||||||
|
|
||||||
|
screen, screen_w, screen_h = _capture_screen()
|
||||||
|
if screen is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Encoder le screenshot en base64
|
||||||
|
buffer = io.BytesIO()
|
||||||
|
screen.save(buffer, format='JPEG', quality=70)
|
||||||
|
image_b64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
|
||||||
|
|
||||||
|
# Construire le prompt pour UI-TARS
|
||||||
|
click_target = target_description or target_text
|
||||||
|
prompt = f"click on {click_target}"
|
||||||
|
|
||||||
|
ollama_url = os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
||||||
|
model = "0000/ui-tars-1.5-7b-q8_0:7b"
|
||||||
|
|
||||||
|
logger.info(f"[Grounding/UI-TARS] Envoi à {model}: '{prompt}'")
|
||||||
|
|
||||||
|
response = requests.post(
|
||||||
|
f"{ollama_url}/api/generate",
|
||||||
|
json={
|
||||||
|
"model": model,
|
||||||
|
"prompt": prompt,
|
||||||
|
"images": [image_b64],
|
||||||
|
"stream": False,
|
||||||
|
"options": {"temperature": 0.1, "num_predict": 50}
|
||||||
|
},
|
||||||
|
timeout=30
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
logger.warning(f"[Grounding/UI-TARS] HTTP {response.status_code}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
result = response.json()
|
||||||
|
text = result.get('response', '').strip()
|
||||||
|
logger.debug(f"[Grounding/UI-TARS] Réponse brute: {text[:200]}")
|
||||||
|
|
||||||
|
# Parser les coordonnées de UI-TARS
|
||||||
|
coords = _parse_ui_tars_coordinates(text, screen_w, screen_h)
|
||||||
|
if coords:
|
||||||
|
x, y = coords
|
||||||
|
# Valider que les coordonnées sont dans l'écran
|
||||||
|
if 0 <= x <= screen_w and 0 <= y <= screen_h:
|
||||||
|
logger.info(f"[Grounding/UI-TARS] Grounding → ({x}, {y})")
|
||||||
|
return {'x': x, 'y': y, 'method': 'ui_tars', 'confidence': 0.85}
|
||||||
|
else:
|
||||||
|
logger.warning(f"[Grounding/UI-TARS] Coordonnées hors écran: ({x}, {y}) pour {screen_w}x{screen_h}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
logger.debug(f"[Grounding/UI-TARS] Pas de coordonnées parsées dans: {text[:100]}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"[Grounding/UI-TARS] Erreur: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_ui_tars_coordinates(text: str, screen_w: int, screen_h: int) -> Optional[tuple]:
|
||||||
|
"""Parse les coordonnées retournées par UI-TARS.
|
||||||
|
|
||||||
|
UI-TARS peut retourner :
|
||||||
|
- Coordonnées normalisées (0-1000) : "click at (500, 300)"
|
||||||
|
- Coordonnées en pixels : "click at (960, 540)"
|
||||||
|
- Format (x, y) ou [x, y] ou x,y
|
||||||
|
- Format "Action: click\nCoordinate: (500, 300)" ou "[500, 300]"
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(x_pixel, y_pixel) ou None
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Chercher des patterns de coordonnées
|
||||||
|
patterns = [
|
||||||
|
r'Coordinate:\s*\[?\(?\s*(\d+(?:\.\d+)?)\s*,\s*(\d+(?:\.\d+)?)\s*\)?\]?',
|
||||||
|
r'click\s+(?:at\s+)?\[?\(?\s*(\d+(?:\.\d+)?)\s*,\s*(\d+(?:\.\d+)?)\s*\)?\]?',
|
||||||
|
r'\(\s*(\d+(?:\.\d+)?)\s*,\s*(\d+(?:\.\d+)?)\s*\)',
|
||||||
|
r'\[\s*(\d+(?:\.\d+)?)\s*,\s*(\d+(?:\.\d+)?)\s*\]',
|
||||||
|
]
|
||||||
|
|
||||||
|
for pattern in patterns:
|
||||||
|
match = re.search(pattern, text, re.IGNORECASE)
|
||||||
|
if match:
|
||||||
|
raw_x = float(match.group(1))
|
||||||
|
raw_y = float(match.group(2))
|
||||||
|
|
||||||
|
# UI-TARS utilise souvent des coordonnées normalisées 0-1000
|
||||||
|
if raw_x <= 1000 and raw_y <= 1000 and (raw_x > 1 or raw_y > 1):
|
||||||
|
# Probablement normalisées sur 1000
|
||||||
|
x = int(raw_x * screen_w / 1000)
|
||||||
|
y = int(raw_y * screen_h / 1000)
|
||||||
|
elif raw_x <= 1.0 and raw_y <= 1.0:
|
||||||
|
# Normalisées 0-1
|
||||||
|
x = int(raw_x * screen_w)
|
||||||
|
y = int(raw_y * screen_h)
|
||||||
|
else:
|
||||||
|
# Pixels directs
|
||||||
|
x = int(raw_x)
|
||||||
|
y = int(raw_y)
|
||||||
|
|
||||||
|
return (x, y)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _grounding_vlm(target_text: str, target_description: str = "") -> Optional[Dict[str, Any]]:
|
||||||
|
"""Niveau 3 — VLM reasoning + confirmation OCR (~10s)."""
|
||||||
|
try:
|
||||||
|
search_label = target_description or target_text
|
||||||
|
|
||||||
|
vlm_result = vlm_reason_about_screen(
|
||||||
|
objective=f"Cliquer sur {search_label}",
|
||||||
|
context=f"Je cherche l'élément '{target_text}' sur l'écran pour cliquer dessus"
|
||||||
|
)
|
||||||
|
|
||||||
|
if not vlm_result:
|
||||||
|
logger.debug("[Grounding/VLM] VLM n'a pas retourné de résultat")
|
||||||
|
return None
|
||||||
|
|
||||||
|
if vlm_result.get('action') != 'click' or not vlm_result.get('target'):
|
||||||
|
logger.debug(f"[Grounding/VLM] VLM action={vlm_result.get('action')}, pas un clic")
|
||||||
|
return None
|
||||||
|
|
||||||
|
vlm_target = vlm_result['target']
|
||||||
|
logger.info(f"[Grounding/VLM] VLM suggère de cliquer sur: '{vlm_target}'")
|
||||||
|
|
||||||
|
# Confirmation par OCR : chercher le target VLM sur l'écran
|
||||||
|
screen, screen_w, screen_h = _capture_screen()
|
||||||
|
if screen is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
try:
|
||||||
|
from services.ocr_service import ocr_extract_words
|
||||||
|
except ImportError:
|
||||||
|
from core.extraction.field_extractor import FieldExtractor
|
||||||
|
extractor = FieldExtractor()
|
||||||
|
def ocr_extract_words(img):
|
||||||
|
return extractor.extract_words_from_image(img)
|
||||||
|
|
||||||
|
words = ocr_extract_words(screen)
|
||||||
|
|
||||||
|
vlm_target_lower = vlm_target.lower()
|
||||||
|
for word in words:
|
||||||
|
if vlm_target_lower in word['text'].lower() or word['text'].lower() in vlm_target_lower:
|
||||||
|
x1, y1, x2, y2 = word['bbox']
|
||||||
|
x = int((x1 + x2) / 2)
|
||||||
|
y = int((y1 + y2) / 2)
|
||||||
|
logger.info(f"[Grounding/VLM] Confirmé par OCR: '{word['text']}' à ({x}, {y})")
|
||||||
|
return {'x': x, 'y': y, 'method': 'vlm', 'confidence': 0.75}
|
||||||
|
|
||||||
|
logger.debug(f"[Grounding/VLM] Target VLM '{vlm_target}' non trouvé par OCR")
|
||||||
|
return None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"[Grounding/VLM] OCR de confirmation échoué: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"[Grounding/VLM] Erreur: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def post_execution_cleanup(execution_mode: str = 'debug'):
|
||||||
|
"""Vérifie l'écran après exécution et gère les dialogues restants.
|
||||||
|
|
||||||
|
Appelé après la dernière étape d'un workflow pour laisser l'écran propre.
|
||||||
|
"""
|
||||||
|
if execution_mode not in ('intelligent', 'debug'):
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info("Vérification écran final...")
|
||||||
|
time.sleep(1.0)
|
||||||
|
for _ in range(3):
|
||||||
|
detected = check_screen_for_patterns()
|
||||||
|
if detected:
|
||||||
|
logger.info(f"Dialogue résiduel détecté: {detected.get('pattern')}")
|
||||||
|
handle_detected_pattern(detected)
|
||||||
|
time.sleep(1.0)
|
||||||
|
else:
|
||||||
|
vlm_result = vlm_reason_about_screen(
|
||||||
|
objective="Vérifier que l'écran est propre après l'exécution",
|
||||||
|
context="Le workflow vient de se terminer"
|
||||||
|
)
|
||||||
|
if vlm_result and vlm_result.get('action') in ('click', 'type'):
|
||||||
|
logger.info(f"VLM post-workflow: {vlm_result.get('action')} '{vlm_result.get('target')}'")
|
||||||
|
break
|
||||||
@@ -40,12 +40,16 @@ class LLMActionHandler:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
ollama_endpoint: str = "http://localhost:11434",
|
ollama_endpoint: str = "http://localhost:11434",
|
||||||
model: str = "qwen3-vl:8b",
|
model: str = None,
|
||||||
temperature: float = 0.1,
|
temperature: float = 0.1,
|
||||||
timeout: int = 120,
|
timeout: int = 120,
|
||||||
):
|
):
|
||||||
self.endpoint = ollama_endpoint.rstrip("/")
|
self.endpoint = ollama_endpoint.rstrip("/")
|
||||||
|
if model is not None:
|
||||||
self.model = model
|
self.model = model
|
||||||
|
else:
|
||||||
|
from core.detection.vlm_config import get_vlm_model
|
||||||
|
self.model = get_vlm_model()
|
||||||
self.temperature = temperature
|
self.temperature = temperature
|
||||||
self.timeout = timeout
|
self.timeout = timeout
|
||||||
|
|
||||||
|
|||||||
1738
core/execution/observe_reason_act.py
Normal file
1738
core/execution/observe_reason_act.py
Normal file
File diff suppressed because it is too large
Load Diff
228
core/execution/safe_condition_evaluator.py
Normal file
228
core/execution/safe_condition_evaluator.py
Normal file
@@ -0,0 +1,228 @@
|
|||||||
|
"""
|
||||||
|
Évaluateur de conditions sécurisé pour le DAGExecutor.
|
||||||
|
|
||||||
|
Remplace `eval()` (vulnérable à l'exécution de code arbitraire) par un
|
||||||
|
parseur AST restreint :
|
||||||
|
|
||||||
|
- Seuls les noeuds AST nécessaires sont autorisés (literals, comparaisons,
|
||||||
|
booléens, indexations, accès attribut limité, arithmétique simple).
|
||||||
|
- Les appels de fonction sont interdits.
|
||||||
|
- Les accès à des attributs « dunder » (`__class__`, `__import__`, etc.)
|
||||||
|
sont systématiquement refusés pour éviter les évasions classiques.
|
||||||
|
- Le contexte d'évaluation est fourni explicitement par l'appelant ;
|
||||||
|
aucun builtins n'est exposé.
|
||||||
|
|
||||||
|
Usage typique :
|
||||||
|
>>> evaluator = SafeConditionEvaluator()
|
||||||
|
>>> evaluator.evaluate("results['step_1']['score'] >= 0.8",
|
||||||
|
... {"results": {"step_1": {"score": 0.92}}})
|
||||||
|
True
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import ast
|
||||||
|
import operator
|
||||||
|
from typing import Any, Callable, Dict, Mapping
|
||||||
|
|
||||||
|
|
||||||
|
class UnsafeExpressionError(ValueError):
|
||||||
|
"""Levée lorsqu'une expression contient un noeud AST interdit."""
|
||||||
|
|
||||||
|
|
||||||
|
# Opérateurs arithmétiques & de comparaison autorisés.
|
||||||
|
_BIN_OPS: Dict[type, Callable[[Any, Any], Any]] = {
|
||||||
|
ast.Add: operator.add,
|
||||||
|
ast.Sub: operator.sub,
|
||||||
|
ast.Mult: operator.mul,
|
||||||
|
ast.Div: operator.truediv,
|
||||||
|
ast.FloorDiv: operator.floordiv,
|
||||||
|
ast.Mod: operator.mod,
|
||||||
|
ast.Pow: operator.pow,
|
||||||
|
}
|
||||||
|
|
||||||
|
_BOOL_OPS: Dict[type, Callable[[Any, Any], Any]] = {
|
||||||
|
ast.And: lambda a, b: a and b,
|
||||||
|
ast.Or: lambda a, b: a or b,
|
||||||
|
}
|
||||||
|
|
||||||
|
_UNARY_OPS: Dict[type, Callable[[Any], Any]] = {
|
||||||
|
ast.Not: operator.not_,
|
||||||
|
ast.USub: operator.neg,
|
||||||
|
ast.UAdd: operator.pos,
|
||||||
|
}
|
||||||
|
|
||||||
|
_CMP_OPS: Dict[type, Callable[[Any, Any], bool]] = {
|
||||||
|
ast.Eq: operator.eq,
|
||||||
|
ast.NotEq: operator.ne,
|
||||||
|
ast.Lt: operator.lt,
|
||||||
|
ast.LtE: operator.le,
|
||||||
|
ast.Gt: operator.gt,
|
||||||
|
ast.GtE: operator.ge,
|
||||||
|
ast.In: lambda a, b: a in b,
|
||||||
|
ast.NotIn: lambda a, b: a not in b,
|
||||||
|
ast.Is: operator.is_,
|
||||||
|
ast.IsNot: operator.is_not,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class SafeConditionEvaluator:
|
||||||
|
"""Évalue une expression de condition via un parseur AST restreint."""
|
||||||
|
|
||||||
|
# Longueur max — stoppe les expressions pathologiques très tôt.
|
||||||
|
MAX_EXPRESSION_LENGTH = 1024
|
||||||
|
|
||||||
|
def evaluate(
|
||||||
|
self,
|
||||||
|
expression: str,
|
||||||
|
context: Mapping[str, Any],
|
||||||
|
) -> Any:
|
||||||
|
if not isinstance(expression, str):
|
||||||
|
raise UnsafeExpressionError(
|
||||||
|
"L'expression doit être une chaîne de caractères."
|
||||||
|
)
|
||||||
|
if len(expression) > self.MAX_EXPRESSION_LENGTH:
|
||||||
|
raise UnsafeExpressionError(
|
||||||
|
"Expression trop longue (> 1024 caractères)."
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
tree = ast.parse(expression, mode="eval")
|
||||||
|
except SyntaxError as exc:
|
||||||
|
raise UnsafeExpressionError(
|
||||||
|
f"Syntaxe d'expression invalide : {exc}"
|
||||||
|
) from exc
|
||||||
|
|
||||||
|
return self._eval_node(tree.body, context)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Dispatch AST
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _eval_node(self, node: ast.AST, context: Mapping[str, Any]) -> Any:
|
||||||
|
# Littéraux (Constant remplace Num/Str/Bytes/NameConstant depuis 3.8)
|
||||||
|
if isinstance(node, ast.Constant):
|
||||||
|
return node.value
|
||||||
|
|
||||||
|
# Variables : uniquement celles présentes dans `context`.
|
||||||
|
if isinstance(node, ast.Name):
|
||||||
|
if node.id not in context:
|
||||||
|
raise UnsafeExpressionError(
|
||||||
|
f"Variable '{node.id}' non autorisée."
|
||||||
|
)
|
||||||
|
return context[node.id]
|
||||||
|
|
||||||
|
# Accès attribut — interdit tout attribut dunder.
|
||||||
|
if isinstance(node, ast.Attribute):
|
||||||
|
if node.attr.startswith("_"):
|
||||||
|
raise UnsafeExpressionError(
|
||||||
|
f"Accès à l'attribut privé '{node.attr}' interdit."
|
||||||
|
)
|
||||||
|
value = self._eval_node(node.value, context)
|
||||||
|
return getattr(value, node.attr)
|
||||||
|
|
||||||
|
# Indexation (results['step_1']).
|
||||||
|
if isinstance(node, ast.Subscript):
|
||||||
|
value = self._eval_node(node.value, context)
|
||||||
|
# Python < 3.9 utilise ast.Index, >= 3.9 utilise directement un
|
||||||
|
# noeud. On gère les deux cas.
|
||||||
|
slice_node = node.slice
|
||||||
|
if isinstance(slice_node, ast.Index): # type: ignore[attr-defined]
|
||||||
|
slice_value = self._eval_node(
|
||||||
|
slice_node.value, context # type: ignore[attr-defined]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
slice_value = self._eval_node(slice_node, context)
|
||||||
|
return value[slice_value]
|
||||||
|
|
||||||
|
# Comparaisons chaînées (a < b <= c).
|
||||||
|
if isinstance(node, ast.Compare):
|
||||||
|
left = self._eval_node(node.left, context)
|
||||||
|
for op_node, comparator in zip(node.ops, node.comparators):
|
||||||
|
op_cls = type(op_node)
|
||||||
|
if op_cls not in _CMP_OPS:
|
||||||
|
raise UnsafeExpressionError(
|
||||||
|
f"Opérateur de comparaison '{op_cls.__name__}' interdit."
|
||||||
|
)
|
||||||
|
right = self._eval_node(comparator, context)
|
||||||
|
if not _CMP_OPS[op_cls](left, right):
|
||||||
|
return False
|
||||||
|
left = right
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Booléen (and / or) — short-circuit manuel.
|
||||||
|
if isinstance(node, ast.BoolOp):
|
||||||
|
op_cls = type(node.op)
|
||||||
|
if op_cls not in _BOOL_OPS:
|
||||||
|
raise UnsafeExpressionError(
|
||||||
|
f"Opérateur booléen '{op_cls.__name__}' interdit."
|
||||||
|
)
|
||||||
|
if isinstance(node.op, ast.And):
|
||||||
|
result: Any = True
|
||||||
|
for sub in node.values:
|
||||||
|
result = self._eval_node(sub, context)
|
||||||
|
if not result:
|
||||||
|
return result
|
||||||
|
return result
|
||||||
|
# Or
|
||||||
|
result = False
|
||||||
|
for sub in node.values:
|
||||||
|
result = self._eval_node(sub, context)
|
||||||
|
if result:
|
||||||
|
return result
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Unaires (-x, not x)
|
||||||
|
if isinstance(node, ast.UnaryOp):
|
||||||
|
op_cls = type(node.op)
|
||||||
|
if op_cls not in _UNARY_OPS:
|
||||||
|
raise UnsafeExpressionError(
|
||||||
|
f"Opérateur unaire '{op_cls.__name__}' interdit."
|
||||||
|
)
|
||||||
|
return _UNARY_OPS[op_cls](self._eval_node(node.operand, context))
|
||||||
|
|
||||||
|
# Binaires (+, -, *, /, %, **, //)
|
||||||
|
if isinstance(node, ast.BinOp):
|
||||||
|
op_cls = type(node.op)
|
||||||
|
if op_cls not in _BIN_OPS:
|
||||||
|
raise UnsafeExpressionError(
|
||||||
|
f"Opérateur binaire '{op_cls.__name__}' interdit."
|
||||||
|
)
|
||||||
|
left = self._eval_node(node.left, context)
|
||||||
|
right = self._eval_node(node.right, context)
|
||||||
|
return _BIN_OPS[op_cls](left, right)
|
||||||
|
|
||||||
|
# Literals composites
|
||||||
|
if isinstance(node, ast.Tuple):
|
||||||
|
return tuple(self._eval_node(e, context) for e in node.elts)
|
||||||
|
if isinstance(node, ast.List):
|
||||||
|
return [self._eval_node(e, context) for e in node.elts]
|
||||||
|
if isinstance(node, ast.Set):
|
||||||
|
return {self._eval_node(e, context) for e in node.elts}
|
||||||
|
if isinstance(node, ast.Dict):
|
||||||
|
return {
|
||||||
|
self._eval_node(k, context) if k is not None else None:
|
||||||
|
self._eval_node(v, context)
|
||||||
|
for k, v in zip(node.keys, node.values)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Tout le reste (Call, Lambda, Comprehensions, Import, etc.) est
|
||||||
|
# refusé explicitement.
|
||||||
|
raise UnsafeExpressionError(
|
||||||
|
f"Noeud AST '{type(node).__name__}' interdit dans les conditions."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def safe_eval_condition(
|
||||||
|
expression: str,
|
||||||
|
context: Mapping[str, Any],
|
||||||
|
) -> Any:
|
||||||
|
"""Helper fonctionnel : évalue `expression` avec le contexte donné."""
|
||||||
|
return SafeConditionEvaluator().evaluate(expression, context)
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"SafeConditionEvaluator",
|
||||||
|
"UnsafeExpressionError",
|
||||||
|
"safe_eval_condition",
|
||||||
|
]
|
||||||
@@ -1697,12 +1697,6 @@ class TargetResolver:
|
|||||||
|
|
||||||
return best_elem, tie_break_criterion
|
return best_elem, tie_break_criterion
|
||||||
|
|
||||||
# Spatial analyzer (lazy load) - Exigence 5.3
|
|
||||||
self._spatial_analyzer: Optional[SpatialAnalyzer] = None
|
|
||||||
self._spatial_relations_cache: Dict[str, List[SpatialRelation]] = {}
|
|
||||||
|
|
||||||
logger.info(f"TargetResolver initialized (threshold={similarity_threshold}, spatial={use_spatial_fallback})")
|
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# Résolution principale
|
# Résolution principale
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
# Configuration Ollama (coherente avec le reste du projet)
|
# Configuration Ollama (coherente avec le reste du projet)
|
||||||
OLLAMA_DEFAULT_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
OLLAMA_DEFAULT_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
||||||
OLLAMA_DEFAULT_MODEL = os.environ.get("VLM_MODEL", "qwen3-vl:8b")
|
OLLAMA_DEFAULT_MODEL = os.environ.get("RPA_VLM_MODEL", os.environ.get("VLM_MODEL", "gemma4:e4b"))
|
||||||
|
|
||||||
|
|
||||||
class FieldExtractor:
|
class FieldExtractor:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
GPU Resource Management Module for RPA Vision V3
|
GPU Resource Management Module for RPA Vision V3
|
||||||
|
|
||||||
This module provides dynamic GPU resource allocation between ML models:
|
This module provides dynamic GPU resource allocation between ML models:
|
||||||
- Ollama VLM (qwen3-vl:8b) for UI classification
|
- Ollama VLM (gemma4:e4b par défaut, configurable via RPA_VLM_MODEL) for UI classification
|
||||||
- CLIP (ViT-B-32) for embedding matching
|
- CLIP (ViT-B-32) for embedding matching
|
||||||
|
|
||||||
The GPUResourceManager optimizes VRAM usage by:
|
The GPUResourceManager optimizes VRAM usage by:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
GPU Resource Manager - Central orchestrator for GPU resource allocation
|
GPU Resource Manager - Central orchestrator for GPU resource allocation
|
||||||
|
|
||||||
Manages dynamic allocation of GPU resources between:
|
Manages dynamic allocation of GPU resources between:
|
||||||
- Ollama VLM (qwen3-vl:8b) - ~10.5 GB VRAM for UI classification
|
- Ollama VLM (gemma4:e4b par défaut) - ~10 GB VRAM for UI classification
|
||||||
- CLIP (ViT-B-32) - ~500 MB VRAM for embedding matching
|
- CLIP (ViT-B-32) - ~500 MB VRAM for embedding matching
|
||||||
|
|
||||||
Optimizes VRAM usage based on execution mode:
|
Optimizes VRAM usage based on execution mode:
|
||||||
@@ -12,13 +12,14 @@ Optimizes VRAM usage based on execution mode:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import contextlib
|
||||||
import logging
|
import logging
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Any, Callable, Dict, List, Optional
|
from typing import Any, Callable, Dict, Iterator, List, Optional
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -53,7 +54,7 @@ class VRAMInfo:
|
|||||||
class GPUResourceConfig:
|
class GPUResourceConfig:
|
||||||
"""Configuration for GPU resource management."""
|
"""Configuration for GPU resource management."""
|
||||||
ollama_endpoint: str = "http://localhost:11434"
|
ollama_endpoint: str = "http://localhost:11434"
|
||||||
vlm_model: str = "qwen3-vl:8b"
|
vlm_model: str = "gemma4:e4b"
|
||||||
clip_model: str = "ViT-B-32"
|
clip_model: str = "ViT-B-32"
|
||||||
idle_timeout_seconds: int = 300 # 5 minutes
|
idle_timeout_seconds: int = 300 # 5 minutes
|
||||||
vram_threshold_for_clip_gpu_mb: int = 1024 # 1 GB
|
vram_threshold_for_clip_gpu_mb: int = 1024 # 1 GB
|
||||||
@@ -127,6 +128,12 @@ class GPUResourceManager:
|
|||||||
self._operation_queue: asyncio.Queue = asyncio.Queue()
|
self._operation_queue: asyncio.Queue = asyncio.Queue()
|
||||||
self._operation_lock = asyncio.Lock()
|
self._operation_lock = asyncio.Lock()
|
||||||
|
|
||||||
|
# Lock d'inférence synchrone : sérialise les appels GPU concurrents
|
||||||
|
# (ScreenAnalyzer.analyze, UIDetector, CLIP.encode) entre
|
||||||
|
# ExecutionLoop et stream_processor pour éviter la saturation VRAM
|
||||||
|
# sur RTX 5070 (12 Go). Un seul analyze à la fois sur le GPU.
|
||||||
|
self._inference_lock = threading.Lock()
|
||||||
|
|
||||||
# Event callbacks
|
# Event callbacks
|
||||||
self._on_resource_changed: List[Callable[[ResourceChangedEvent], None]] = []
|
self._on_resource_changed: List[Callable[[ResourceChangedEvent], None]] = []
|
||||||
self._on_mode_changed: List[Callable[[ExecutionMode], None]] = []
|
self._on_mode_changed: List[Callable[[ExecutionMode], None]] = []
|
||||||
@@ -208,6 +215,44 @@ class GPUResourceManager:
|
|||||||
"""Get the current execution mode."""
|
"""Get the current execution mode."""
|
||||||
return self._execution_mode
|
return self._execution_mode
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Inference serialization (sync)
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
@contextlib.contextmanager
|
||||||
|
def acquire_inference(self, timeout: Optional[float] = None) -> Iterator[bool]:
|
||||||
|
"""
|
||||||
|
Context manager synchrone pour sérialiser les inférences GPU.
|
||||||
|
|
||||||
|
Garantit qu'un seul appel d'inférence (ScreenAnalyzer.analyze,
|
||||||
|
UIDetector.detect, CLIP.encode…) tourne à la fois sur le GPU.
|
||||||
|
Évite la saturation VRAM quand ExecutionLoop et stream_processor
|
||||||
|
appellent analyze() simultanément sur une RTX 5070 (12 Go).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
timeout: Délai max d'attente (secondes). None = bloquant.
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
True si le lock est acquis, False en cas de timeout.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> with gpu_manager.acquire_inference(timeout=30.0) as acquired:
|
||||||
|
... if not acquired:
|
||||||
|
... logger.warning("GPU lock timeout")
|
||||||
|
... state = analyzer.analyze(path)
|
||||||
|
"""
|
||||||
|
if timeout is None:
|
||||||
|
self._inference_lock.acquire()
|
||||||
|
acquired = True
|
||||||
|
else:
|
||||||
|
acquired = self._inference_lock.acquire(timeout=timeout)
|
||||||
|
|
||||||
|
try:
|
||||||
|
yield acquired
|
||||||
|
finally:
|
||||||
|
if acquired:
|
||||||
|
self._inference_lock.release()
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# VLM Management
|
# VLM Management
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ class OllamaManager:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
endpoint: str = "http://localhost:11434",
|
endpoint: str = "http://localhost:11434",
|
||||||
model: str = "qwen3-vl:8b",
|
model: str = "gemma4:e4b",
|
||||||
default_keep_alive: str = "5m"
|
default_keep_alive: str = "5m"
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -173,6 +173,10 @@ class GraphBuilder:
|
|||||||
clustering_eps: float = 0.08,
|
clustering_eps: float = 0.08,
|
||||||
clustering_min_samples: int = 2,
|
clustering_min_samples: int = 2,
|
||||||
enable_quality_validation: bool = True,
|
enable_quality_validation: bool = True,
|
||||||
|
ui_detector: Optional[Any] = None,
|
||||||
|
screen_analyzer: Optional[Any] = None,
|
||||||
|
enable_ui_enrichment: bool = True,
|
||||||
|
element_proximity_max_px: float = 50.0,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Initialiser le GraphBuilder.
|
Initialiser le GraphBuilder.
|
||||||
@@ -185,6 +189,17 @@ class GraphBuilder:
|
|||||||
clustering_eps: Epsilon pour DBSCAN (distance max entre points)
|
clustering_eps: Epsilon pour DBSCAN (distance max entre points)
|
||||||
clustering_min_samples: Nombre minimum d'échantillons pour un cluster
|
clustering_min_samples: Nombre minimum d'échantillons pour un cluster
|
||||||
enable_quality_validation: Activer la validation de qualité
|
enable_quality_validation: Activer la validation de qualité
|
||||||
|
ui_detector: UIDetector optionnel. Si fourni, sera utilisé par
|
||||||
|
l'analyzer lazy-initialisé. Sinon, fallback sur le singleton
|
||||||
|
partagé (`get_screen_analyzer()`).
|
||||||
|
screen_analyzer: Instance ScreenAnalyzer à utiliser directement.
|
||||||
|
Si None, lazy init via le singleton partagé C1.
|
||||||
|
enable_ui_enrichment: Active l'enrichissement visuel des
|
||||||
|
ScreenStates lors de `_create_screen_states` (OCR + UIDetector).
|
||||||
|
False = comportement historique (ui_elements=[], detected_text=[]).
|
||||||
|
element_proximity_max_px: Distance maximale (en pixels) entre un
|
||||||
|
clic et le bbox le plus proche pour qu'un UIElement soit
|
||||||
|
considéré comme cible. Au-delà, le clic reste sans ancre.
|
||||||
"""
|
"""
|
||||||
self.embedding_builder = embedding_builder or StateEmbeddingBuilder()
|
self.embedding_builder = embedding_builder or StateEmbeddingBuilder()
|
||||||
self.faiss_manager = faiss_manager
|
self.faiss_manager = faiss_manager
|
||||||
@@ -193,22 +208,73 @@ class GraphBuilder:
|
|||||||
self.clustering_eps = clustering_eps
|
self.clustering_eps = clustering_eps
|
||||||
self.clustering_min_samples = clustering_min_samples
|
self.clustering_min_samples = clustering_min_samples
|
||||||
self.enable_quality_validation = enable_quality_validation
|
self.enable_quality_validation = enable_quality_validation
|
||||||
self._screen_analyzer = None # ScreenAnalyzer (lazy import)
|
self.enable_ui_enrichment = enable_ui_enrichment
|
||||||
|
self.element_proximity_max_px = element_proximity_max_px
|
||||||
|
# UIDetector explicite (optionnel) — injecté dans l'analyzer lazy.
|
||||||
|
self._ui_detector = ui_detector
|
||||||
|
# Instance ScreenAnalyzer. Si fournie, on l'utilise telle quelle ;
|
||||||
|
# sinon, on bascule sur le singleton partagé (lazy init).
|
||||||
|
self._screen_analyzer = screen_analyzer
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"GraphBuilder initialized: "
|
f"GraphBuilder initialized: "
|
||||||
f"min_repetitions={min_pattern_repetitions}, "
|
f"min_repetitions={min_pattern_repetitions}, "
|
||||||
f"eps={clustering_eps}, "
|
f"eps={clustering_eps}, "
|
||||||
f"min_samples={clustering_min_samples}, "
|
f"min_samples={clustering_min_samples}, "
|
||||||
f"quality_validation={enable_quality_validation}"
|
f"quality_validation={enable_quality_validation}, "
|
||||||
|
f"ui_enrichment={enable_ui_enrichment}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Résolution paresseuse du ScreenAnalyzer (singleton C1 par défaut)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _get_screen_analyzer(self):
|
||||||
|
"""
|
||||||
|
Retourner l'instance ScreenAnalyzer à utiliser.
|
||||||
|
|
||||||
|
Priorité :
|
||||||
|
1. Instance injectée via le constructeur (`screen_analyzer=…`).
|
||||||
|
2. Singleton partagé `get_screen_analyzer()` (C1) — évite le double
|
||||||
|
chargement GPU quand ExecutionLoop et stream_processor tournent.
|
||||||
|
3. En dernier recours (import circulaire, tests), création locale.
|
||||||
|
"""
|
||||||
|
if self._screen_analyzer is not None:
|
||||||
|
return self._screen_analyzer
|
||||||
|
|
||||||
|
try:
|
||||||
|
from core.pipeline import get_screen_analyzer
|
||||||
|
|
||||||
|
self._screen_analyzer = get_screen_analyzer(
|
||||||
|
ui_detector=self._ui_detector,
|
||||||
|
)
|
||||||
|
return self._screen_analyzer
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
f"Impossible d'obtenir le ScreenAnalyzer singleton "
|
||||||
|
f"({e}); fallback sur une instance locale."
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
from core.pipeline.screen_analyzer import ScreenAnalyzer
|
||||||
|
|
||||||
|
self._screen_analyzer = ScreenAnalyzer(
|
||||||
|
ui_detector=self._ui_detector,
|
||||||
|
)
|
||||||
|
return self._screen_analyzer
|
||||||
|
except Exception as e2:
|
||||||
|
logger.error(
|
||||||
|
f"Impossible d'instancier ScreenAnalyzer: {e2}. "
|
||||||
|
"Enrichissement UI désactivé."
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
def build_from_session(
|
def build_from_session(
|
||||||
self,
|
self,
|
||||||
session: RawSession,
|
session: RawSession,
|
||||||
workflow_name: Optional[str] = None,
|
workflow_name: Optional[str] = None,
|
||||||
precomputed_states: Optional[List["ScreenState"]] = None,
|
precomputed_states: Optional[List["ScreenState"]] = None,
|
||||||
precomputed_embeddings: Optional[List] = None,
|
precomputed_embeddings: Optional[List] = None,
|
||||||
|
sequential: bool = False,
|
||||||
) -> Workflow:
|
) -> Workflow:
|
||||||
"""
|
"""
|
||||||
Construire un Workflow complet depuis une RawSession.
|
Construire un Workflow complet depuis une RawSession.
|
||||||
@@ -216,7 +282,7 @@ class GraphBuilder:
|
|||||||
Processus:
|
Processus:
|
||||||
1. Créer ScreenStates depuis screenshots (ou utiliser precomputed_states)
|
1. Créer ScreenStates depuis screenshots (ou utiliser precomputed_states)
|
||||||
2. Calculer embeddings pour chaque état (ou réutiliser precomputed_embeddings)
|
2. Calculer embeddings pour chaque état (ou réutiliser precomputed_embeddings)
|
||||||
3. Détecter patterns via clustering
|
3. Détecter patterns via clustering (ou mode séquentiel)
|
||||||
4. Construire nodes depuis clusters
|
4. Construire nodes depuis clusters
|
||||||
5. Construire edges depuis transitions
|
5. Construire edges depuis transitions
|
||||||
|
|
||||||
@@ -228,6 +294,10 @@ class GraphBuilder:
|
|||||||
precomputed_embeddings: Embeddings déjà calculés (streaming).
|
precomputed_embeddings: Embeddings déjà calculés (streaming).
|
||||||
Si fourni et de la bonne longueur (= len(screen_states)),
|
Si fourni et de la bonne longueur (= len(screen_states)),
|
||||||
saute l'étape 2 (pas de recalcul CLIP).
|
saute l'étape 2 (pas de recalcul CLIP).
|
||||||
|
sequential: Si True, crée un node par état d'écran (pas de
|
||||||
|
clustering DBSCAN). Approprié pour les enregistrements
|
||||||
|
single-pass d'un workflow — chaque screenshot est une étape
|
||||||
|
distincte avec ses actions associées.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Workflow construit avec nodes et edges
|
Workflow construit avec nodes et edges
|
||||||
@@ -242,6 +312,7 @@ class GraphBuilder:
|
|||||||
f"Building workflow from session {session.session_id} "
|
f"Building workflow from session {session.session_id} "
|
||||||
f"with {len(precomputed_states or session.screenshots)} "
|
f"with {len(precomputed_states or session.screenshots)} "
|
||||||
f"{'precomputed states' if precomputed_states else 'screenshots'}"
|
f"{'precomputed states' if precomputed_states else 'screenshots'}"
|
||||||
|
f"{' (mode séquentiel)' if sequential else ''}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Étape 1: Créer ScreenStates (ou réutiliser ceux pré-calculés)
|
# Étape 1: Créer ScreenStates (ou réutiliser ceux pré-calculés)
|
||||||
@@ -266,7 +337,16 @@ class GraphBuilder:
|
|||||||
embeddings = self._compute_embeddings(screen_states)
|
embeddings = self._compute_embeddings(screen_states)
|
||||||
logger.debug(f"Computed {len(embeddings)} embeddings")
|
logger.debug(f"Computed {len(embeddings)} embeddings")
|
||||||
|
|
||||||
# Étape 3: Détecter patterns
|
# Étape 3: Détecter patterns ou mode séquentiel
|
||||||
|
if sequential:
|
||||||
|
# Mode séquentiel : chaque état d'écran est un node distinct.
|
||||||
|
# Pas de clustering — essentiel pour les enregistrements single-pass
|
||||||
|
# où l'on veut reproduire fidèlement la séquence des actions.
|
||||||
|
clusters = {i: [i] for i in range(len(screen_states))}
|
||||||
|
logger.info(
|
||||||
|
f"Mode séquentiel: {len(clusters)} nodes (1 par état)"
|
||||||
|
)
|
||||||
|
else:
|
||||||
clusters = self._detect_patterns(embeddings, screen_states)
|
clusters = self._detect_patterns(embeddings, screen_states)
|
||||||
logger.info(f"Detected {len(clusters)} patterns")
|
logger.info(f"Detected {len(clusters)} patterns")
|
||||||
|
|
||||||
@@ -275,7 +355,10 @@ class GraphBuilder:
|
|||||||
logger.info(f"Built {len(nodes)} workflow nodes")
|
logger.info(f"Built {len(nodes)} workflow nodes")
|
||||||
|
|
||||||
# Étape 5: Construire edges (passer les embeddings pour éviter recalcul)
|
# Étape 5: Construire edges (passer les embeddings pour éviter recalcul)
|
||||||
edges = self._build_edges(nodes, screen_states, session, embeddings=embeddings)
|
edges = self._build_edges(
|
||||||
|
nodes, screen_states, session, embeddings=embeddings,
|
||||||
|
sequential=sequential,
|
||||||
|
)
|
||||||
logger.info(f"Built {len(edges)} workflow edges")
|
logger.info(f"Built {len(edges)} workflow edges")
|
||||||
|
|
||||||
# Créer Workflow
|
# Créer Workflow
|
||||||
@@ -395,11 +478,28 @@ class GraphBuilder:
|
|||||||
if event.screenshot_id:
|
if event.screenshot_id:
|
||||||
screenshot_to_event[event.screenshot_id] = event
|
screenshot_to_event[event.screenshot_id] = event
|
||||||
|
|
||||||
|
# Récupérer (une seule fois) l'analyzer partagé si l'enrichissement est actif.
|
||||||
|
# Le singleton C1 garantit qu'on ne recharge pas UIDetector/CLIP inutilement.
|
||||||
|
analyzer = None
|
||||||
|
if self.enable_ui_enrichment:
|
||||||
|
analyzer = self._get_screen_analyzer()
|
||||||
|
|
||||||
|
# Cache partagé (C1) : réutiliser les analyses si même screenshot est
|
||||||
|
# repassé plusieurs fois (peu fréquent en construction, utile en tests).
|
||||||
|
try:
|
||||||
|
from core.pipeline import get_screen_state_cache
|
||||||
|
|
||||||
|
state_cache = get_screen_state_cache()
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"ScreenStateCache indisponible ({e}); aucun cache utilisé.")
|
||||||
|
state_cache = None
|
||||||
|
|
||||||
|
enriched_count = 0
|
||||||
for i, screenshot in enumerate(session.screenshots):
|
for i, screenshot in enumerate(session.screenshots):
|
||||||
# Trouver l'événement associé
|
# Trouver l'événement associé
|
||||||
event = screenshot_to_event.get(screenshot.screenshot_id)
|
event = screenshot_to_event.get(screenshot.screenshot_id)
|
||||||
|
|
||||||
# Créer WindowContext depuis l'événement
|
# Construire WindowContext depuis l'événement (si dispo)
|
||||||
screen_env = session.environment.get("screen", {})
|
screen_env = session.environment.get("screen", {})
|
||||||
screen_res = screen_env.get("primary_resolution", [1920, 1080])
|
screen_res = screen_env.get("primary_resolution", [1920, 1080])
|
||||||
if event and event.window:
|
if event and event.window:
|
||||||
@@ -427,59 +527,127 @@ class GraphBuilder:
|
|||||||
os_language=session.environment.get("os_language", "unknown"),
|
os_language=session.environment.get("os_language", "unknown"),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Créer RawLevel
|
# Chemin absolu du screenshot
|
||||||
# Construire chemin absolu : data/training/sessions/{session_id}/{session_id}/{relative_path}
|
screenshot_absolute_path = (
|
||||||
screenshot_absolute_path = f"data/training/sessions/{session.session_id}/{session.session_id}/{screenshot.relative_path}"
|
f"data/training/sessions/{session.session_id}/"
|
||||||
|
f"{session.session_id}/{screenshot.relative_path}"
|
||||||
|
)
|
||||||
screenshot_path = Path(screenshot_absolute_path)
|
screenshot_path = Path(screenshot_absolute_path)
|
||||||
|
|
||||||
|
# Timestamp
|
||||||
|
if isinstance(screenshot.captured_at, str):
|
||||||
|
timestamp = datetime.fromisoformat(
|
||||||
|
screenshot.captured_at.replace('Z', '+00:00')
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
timestamp = screenshot.captured_at
|
||||||
|
|
||||||
|
# ------------------------------------------------------------
|
||||||
|
# Enrichissement visuel : déléguer au ScreenAnalyzer partagé
|
||||||
|
# ------------------------------------------------------------
|
||||||
|
# L'analyzer renvoie un ScreenState complet avec :
|
||||||
|
# - raw (image + file_size)
|
||||||
|
# - perception (OCR + embedding ref)
|
||||||
|
# - ui_elements (détection UIDetector)
|
||||||
|
# On récupère ces niveaux et on rebâtit un état final avec le
|
||||||
|
# WindowContext et les metadata issus de la session brute (les
|
||||||
|
# données "metier" que l'analyzer ignore).
|
||||||
|
# ------------------------------------------------------------
|
||||||
|
detected_text: List[str] = []
|
||||||
|
text_method = "none"
|
||||||
|
ui_elements: List = []
|
||||||
raw = RawLevel(
|
raw = RawLevel(
|
||||||
screenshot_path=str(screenshot_path),
|
screenshot_path=str(screenshot_path),
|
||||||
capture_method="mss",
|
capture_method="mss",
|
||||||
file_size_bytes=screenshot_path.stat().st_size if screenshot_path.exists() else 0
|
file_size_bytes=(
|
||||||
|
screenshot_path.stat().st_size
|
||||||
|
if screenshot_path.exists()
|
||||||
|
else 0
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Créer PerceptionLevel — enrichir avec OCR si le screenshot existe
|
if analyzer is not None and screenshot_path.exists():
|
||||||
detected_text = []
|
|
||||||
text_method = "none"
|
|
||||||
|
|
||||||
if screenshot_path.exists():
|
|
||||||
try:
|
try:
|
||||||
if self._screen_analyzer is None:
|
# Construire l'info fenêtre pour donner le contexte à
|
||||||
from core.pipeline.screen_analyzer import ScreenAnalyzer
|
# l'UIDetector (certains détecteurs s'en servent pour
|
||||||
self._screen_analyzer = ScreenAnalyzer(session_id=session.session_id)
|
# filtrer hors-fenêtre).
|
||||||
extracted = self._screen_analyzer._extract_text(str(screenshot_path))
|
window_info = {
|
||||||
if extracted:
|
"app_name": window.app_name,
|
||||||
detected_text = extracted
|
"title": window.window_title,
|
||||||
text_method = self._screen_analyzer._get_ocr_method_name()
|
"screen_resolution": list(window.screen_resolution or []),
|
||||||
except Exception as e:
|
}
|
||||||
logger.debug(f"OCR échoué pour {screenshot_path}: {e}")
|
|
||||||
|
|
||||||
|
analyzed = analyzer.analyze(
|
||||||
|
str(screenshot_path),
|
||||||
|
window_info=window_info,
|
||||||
|
enable_ocr=True,
|
||||||
|
enable_ui_detection=True,
|
||||||
|
session_id=session.session_id,
|
||||||
|
)
|
||||||
|
detected_text = list(analyzed.perception.detected_text or [])
|
||||||
|
text_method = (
|
||||||
|
analyzed.perception.text_detection_method or "none"
|
||||||
|
)
|
||||||
|
ui_elements = list(analyzed.ui_elements or [])
|
||||||
|
# Garder les métriques OCR/UI si présentes (debug)
|
||||||
|
analyzer_metadata = dict(analyzed.metadata or {})
|
||||||
|
raw = analyzed.raw # conserver file_size réel mesuré
|
||||||
|
if ui_elements:
|
||||||
|
enriched_count += 1
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
f"Enrichissement visuel échoué pour {screenshot_path}: {e}. "
|
||||||
|
"Fallback sur ScreenState minimal."
|
||||||
|
)
|
||||||
|
analyzer_metadata = {"analyzer_error": str(e)}
|
||||||
|
else:
|
||||||
|
analyzer_metadata = {}
|
||||||
|
if self.enable_ui_enrichment and not screenshot_path.exists():
|
||||||
|
logger.debug(
|
||||||
|
f"Screenshot introuvable: {screenshot_path} "
|
||||||
|
"— ui_elements restera vide"
|
||||||
|
)
|
||||||
|
|
||||||
|
# PerceptionLevel : vector_id calculé de façon déterministe.
|
||||||
perception = PerceptionLevel(
|
perception = PerceptionLevel(
|
||||||
embedding=EmbeddingRef(
|
embedding=EmbeddingRef(
|
||||||
provider="openclip_ViT-B-32",
|
provider="openclip_ViT-B-32",
|
||||||
vector_id=f"data/embeddings/screens/{session.session_id}_state_{i:04d}.npy",
|
vector_id=(
|
||||||
dimensions=512
|
f"data/embeddings/screens/"
|
||||||
|
f"{session.session_id}_state_{i:04d}.npy"
|
||||||
|
),
|
||||||
|
dimensions=512,
|
||||||
),
|
),
|
||||||
detected_text=detected_text,
|
detected_text=detected_text,
|
||||||
text_detection_method=text_method,
|
text_detection_method=text_method,
|
||||||
confidence_avg=0.85 if detected_text else 0.0
|
confidence_avg=0.85 if detected_text else 0.0,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Créer ContextLevel
|
# ContextLevel (métier)
|
||||||
context = ContextLevel(
|
context = ContextLevel(
|
||||||
current_workflow_candidate=None,
|
current_workflow_candidate=None,
|
||||||
workflow_step=i,
|
workflow_step=i,
|
||||||
user_id=session.user.get("id", "unknown"),
|
user_id=session.user.get("id", "unknown"),
|
||||||
tags=list(session.context.get("tags", [])) if isinstance(session.context.get("tags"), list) else [],
|
tags=(
|
||||||
business_variables={}
|
list(session.context.get("tags", []))
|
||||||
|
if isinstance(session.context.get("tags"), list)
|
||||||
|
else []
|
||||||
|
),
|
||||||
|
business_variables={},
|
||||||
)
|
)
|
||||||
|
|
||||||
# Parser timestamp
|
# Metadata : on garde le lien événement/session + éventuels
|
||||||
if isinstance(screenshot.captured_at, str):
|
# compteurs remontés par l'analyzer.
|
||||||
timestamp = datetime.fromisoformat(screenshot.captured_at.replace('Z', '+00:00'))
|
metadata = {
|
||||||
else:
|
"screenshot_id": screenshot.screenshot_id,
|
||||||
timestamp = screenshot.captured_at
|
"event_type": event.type if event else None,
|
||||||
|
"event_time": event.t if event else None,
|
||||||
|
}
|
||||||
|
# Propager les indicateurs utiles de l'analyzer sans écraser la base.
|
||||||
|
for key in ("ocr_ms", "ui_ms", "analyzer_error"):
|
||||||
|
if key in analyzer_metadata:
|
||||||
|
metadata[key] = analyzer_metadata[key]
|
||||||
|
|
||||||
# Créer ScreenState complet
|
|
||||||
state = ScreenState(
|
state = ScreenState(
|
||||||
screen_state_id=f"{session.session_id}_state_{i:04d}",
|
screen_state_id=f"{session.session_id}_state_{i:04d}",
|
||||||
timestamp=timestamp,
|
timestamp=timestamp,
|
||||||
@@ -488,17 +656,17 @@ class GraphBuilder:
|
|||||||
raw=raw,
|
raw=raw,
|
||||||
perception=perception,
|
perception=perception,
|
||||||
context=context,
|
context=context,
|
||||||
metadata={
|
metadata=metadata,
|
||||||
"screenshot_id": screenshot.screenshot_id,
|
ui_elements=ui_elements,
|
||||||
"event_type": event.type if event else None,
|
|
||||||
"event_time": event.t if event else None
|
|
||||||
},
|
|
||||||
ui_elements=[] # Sera rempli par UIDetector si disponible
|
|
||||||
)
|
)
|
||||||
|
|
||||||
screen_states.append(state)
|
screen_states.append(state)
|
||||||
|
|
||||||
logger.info(f"Created {len(screen_states)} enriched screen states")
|
logger.info(
|
||||||
|
f"Created {len(screen_states)} enriched screen states "
|
||||||
|
f"({enriched_count} avec UI détectée, "
|
||||||
|
f"ui_enrichment={self.enable_ui_enrichment})"
|
||||||
|
)
|
||||||
return screen_states
|
return screen_states
|
||||||
|
|
||||||
def _compute_embeddings(
|
def _compute_embeddings(
|
||||||
@@ -924,6 +1092,99 @@ class GraphBuilder:
|
|||||||
constraints.sort(key=lambda c: role_counts.get(c.get("role", ""), 0), reverse=True)
|
constraints.sort(key=lambda c: role_counts.get(c.get("role", ""), 0), reverse=True)
|
||||||
return constraints[:8]
|
return constraints[:8]
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Association spatiale clic → UIElement
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _find_clicked_element(
|
||||||
|
self,
|
||||||
|
event: Event,
|
||||||
|
ui_elements: List[Any],
|
||||||
|
) -> Optional[Any]:
|
||||||
|
"""
|
||||||
|
Identifier l'UIElement cible d'un clic par proximité spatiale.
|
||||||
|
|
||||||
|
Règle :
|
||||||
|
1. Si un bbox contient strictement la position du clic → match.
|
||||||
|
2. Sinon, on prend le bbox le plus proche (distance euclidienne
|
||||||
|
au bord) sous réserve qu'il soit à <= `element_proximity_max_px`.
|
||||||
|
3. Sinon, aucun ancrage possible → None.
|
||||||
|
|
||||||
|
Cette association transforme un clic "aveugle" (coordonnées brutes)
|
||||||
|
en un clic "intelligent" (rôle + label), permettant au matcher de
|
||||||
|
retrouver l'élément même si la résolution ou la position change.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
event: Événement `mouse_click` (avec `data["pos"] = [x, y]`).
|
||||||
|
ui_elements: Liste des UIElement détectés sur l'écran source.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
UIElement le plus pertinent, ou None si rien ne correspond.
|
||||||
|
"""
|
||||||
|
if not ui_elements:
|
||||||
|
return None
|
||||||
|
if not event or event.type != "mouse_click":
|
||||||
|
return None
|
||||||
|
|
||||||
|
pos = event.data.get("pos") if event.data else None
|
||||||
|
if not pos or len(pos) < 2:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
click_x = float(pos[0])
|
||||||
|
click_y = float(pos[1])
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
best_contained = None
|
||||||
|
best_contained_area = None
|
||||||
|
best_near = None
|
||||||
|
best_near_distance = None
|
||||||
|
|
||||||
|
for element in ui_elements:
|
||||||
|
bbox = getattr(element, "bbox", None)
|
||||||
|
if bbox is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Extraction défensive des coordonnées (BBox Pydantic ou tuple)
|
||||||
|
try:
|
||||||
|
bx = int(getattr(bbox, "x", bbox[0]))
|
||||||
|
by = int(getattr(bbox, "y", bbox[1]))
|
||||||
|
bw = int(getattr(bbox, "width", bbox[2]))
|
||||||
|
bh = int(getattr(bbox, "height", bbox[3]))
|
||||||
|
except (AttributeError, IndexError, TypeError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Cas 1 : la position est strictement dans le bbox.
|
||||||
|
if bx <= click_x <= bx + bw and by <= click_y <= by + bh:
|
||||||
|
# Sélectionner le plus petit bbox qui contient (élément le plus spécifique)
|
||||||
|
area = max(1, bw * bh)
|
||||||
|
if best_contained is None or area < best_contained_area:
|
||||||
|
best_contained = element
|
||||||
|
best_contained_area = area
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Cas 2 : calculer la distance au bord le plus proche.
|
||||||
|
dx = max(bx - click_x, 0, click_x - (bx + bw))
|
||||||
|
dy = max(by - click_y, 0, click_y - (by + bh))
|
||||||
|
distance = (dx * dx + dy * dy) ** 0.5
|
||||||
|
|
||||||
|
if best_near is None or distance < best_near_distance:
|
||||||
|
best_near = element
|
||||||
|
best_near_distance = distance
|
||||||
|
|
||||||
|
if best_contained is not None:
|
||||||
|
return best_contained
|
||||||
|
|
||||||
|
if (
|
||||||
|
best_near is not None
|
||||||
|
and best_near_distance is not None
|
||||||
|
and best_near_distance <= self.element_proximity_max_px
|
||||||
|
):
|
||||||
|
return best_near
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
# Patterns d'erreur courants pour la détection fail_fast
|
# Patterns d'erreur courants pour la détection fail_fast
|
||||||
_ERROR_PATTERNS = [
|
_ERROR_PATTERNS = [
|
||||||
"erreur", "error", "échec", "failed", "impossible",
|
"erreur", "error", "échec", "failed", "impossible",
|
||||||
@@ -937,12 +1198,14 @@ class GraphBuilder:
|
|||||||
screen_states: List[ScreenState],
|
screen_states: List[ScreenState],
|
||||||
session: RawSession,
|
session: RawSession,
|
||||||
embeddings: Optional[List[np.ndarray]] = None,
|
embeddings: Optional[List[np.ndarray]] = None,
|
||||||
|
sequential: bool = False,
|
||||||
) -> List[WorkflowEdge]:
|
) -> List[WorkflowEdge]:
|
||||||
"""
|
"""
|
||||||
Construire WorkflowEdges depuis les transitions observées.
|
Construire WorkflowEdges depuis les transitions observées.
|
||||||
|
|
||||||
Algorithme:
|
Algorithme:
|
||||||
1. Mapper chaque ScreenState vers son node (via embedding similarity)
|
1. Mapper chaque ScreenState vers son node (via embedding similarity)
|
||||||
|
En mode séquentiel, le mapping est direct (state i → node i).
|
||||||
2. Identifier les transitions (state_i -> state_j où node change)
|
2. Identifier les transitions (state_i -> state_j où node change)
|
||||||
3. Extraire l'action depuis l'événement entre les deux états
|
3. Extraire l'action depuis l'événement entre les deux états
|
||||||
4. Créer WorkflowEdge avec action, pré-conditions et post-conditions
|
4. Créer WorkflowEdge avec action, pré-conditions et post-conditions
|
||||||
@@ -960,6 +1223,7 @@ class GraphBuilder:
|
|||||||
screen_states: ScreenStates
|
screen_states: ScreenStates
|
||||||
session: Session brute (pour événements)
|
session: Session brute (pour événements)
|
||||||
embeddings: Embeddings pré-calculés (évite un recalcul dans _map_states_to_nodes)
|
embeddings: Embeddings pré-calculés (évite un recalcul dans _map_states_to_nodes)
|
||||||
|
sequential: Mode séquentiel — chaque paire consécutive = transition
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Liste de WorkflowEdges
|
Liste de WorkflowEdges
|
||||||
@@ -975,7 +1239,19 @@ class GraphBuilder:
|
|||||||
node_by_id = {node.node_id: node for node in nodes}
|
node_by_id = {node.node_id: node for node in nodes}
|
||||||
|
|
||||||
# Étape 1: Mapper chaque état vers son node
|
# Étape 1: Mapper chaque état vers son node
|
||||||
state_to_node = self._map_states_to_nodes(screen_states, nodes, embeddings=embeddings)
|
if sequential:
|
||||||
|
# Mode séquentiel : mapping direct state[i] → node[i]
|
||||||
|
state_to_node = {}
|
||||||
|
for i, state in enumerate(screen_states):
|
||||||
|
if i < len(nodes):
|
||||||
|
state_to_node[state.screen_state_id] = nodes[i].node_id
|
||||||
|
logger.debug(
|
||||||
|
f"Mode séquentiel: {len(state_to_node)} states mappés directement"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
state_to_node = self._map_states_to_nodes(
|
||||||
|
screen_states, nodes, embeddings=embeddings
|
||||||
|
)
|
||||||
|
|
||||||
# Étape 2: Récupérer la résolution d'écran pour normaliser les coordonnées
|
# Étape 2: Récupérer la résolution d'écran pour normaliser les coordonnées
|
||||||
screen_env = session.environment.get("screen", {})
|
screen_env = session.environment.get("screen", {})
|
||||||
@@ -989,8 +1265,11 @@ class GraphBuilder:
|
|||||||
current_node_id = state_to_node.get(current_state.screen_state_id)
|
current_node_id = state_to_node.get(current_state.screen_state_id)
|
||||||
next_node_id = state_to_node.get(next_state.screen_state_id)
|
next_node_id = state_to_node.get(next_state.screen_state_id)
|
||||||
|
|
||||||
# Si les deux états sont dans des nodes différents, c'est une transition
|
# En mode séquentiel, chaque paire consécutive est une transition
|
||||||
if current_node_id and next_node_id and current_node_id != next_node_id:
|
# En mode clustering, uniquement si les nodes sont différents
|
||||||
|
if current_node_id and next_node_id and (
|
||||||
|
sequential or current_node_id != next_node_id
|
||||||
|
):
|
||||||
# Trouver TOUS les événements entre les deux états
|
# Trouver TOUS les événements entre les deux états
|
||||||
transition_events = self._find_transition_events(
|
transition_events = self._find_transition_events(
|
||||||
current_state, next_state, session.events
|
current_state, next_state, session.events
|
||||||
@@ -1012,6 +1291,7 @@ class GraphBuilder:
|
|||||||
target_node=target_node,
|
target_node=target_node,
|
||||||
all_events=transition_events,
|
all_events=transition_events,
|
||||||
screen_resolution=screen_resolution,
|
screen_resolution=screen_resolution,
|
||||||
|
source_state=current_state,
|
||||||
)
|
)
|
||||||
edges.append(edge)
|
edges.append(edge)
|
||||||
|
|
||||||
@@ -1094,6 +1374,32 @@ class GraphBuilder:
|
|||||||
|
|
||||||
return state_to_node
|
return state_to_node
|
||||||
|
|
||||||
|
def _get_state_time(self, state: ScreenState, fallback: float = 0) -> float:
|
||||||
|
"""Extraire le timestamp d'un ScreenState.
|
||||||
|
|
||||||
|
Priorité :
|
||||||
|
1. metadata['event_time'] (set par _create_screen_states)
|
||||||
|
2. metadata['shot_timestamp'] (set par le reprocessing)
|
||||||
|
3. state.timestamp converti en epoch si c'est un datetime
|
||||||
|
4. fallback
|
||||||
|
|
||||||
|
Note : event_time peut être 0.0 (timestamps relatifs), donc on
|
||||||
|
vérifie `is not None` et non `> 0`.
|
||||||
|
"""
|
||||||
|
if state.metadata:
|
||||||
|
et = state.metadata.get("event_time")
|
||||||
|
if et is not None:
|
||||||
|
return float(et)
|
||||||
|
st = state.metadata.get("shot_timestamp")
|
||||||
|
if st is not None:
|
||||||
|
return float(st)
|
||||||
|
if state.timestamp:
|
||||||
|
try:
|
||||||
|
return state.timestamp.timestamp()
|
||||||
|
except (AttributeError, OSError):
|
||||||
|
pass
|
||||||
|
return fallback
|
||||||
|
|
||||||
def _find_transition_events(
|
def _find_transition_events(
|
||||||
self,
|
self,
|
||||||
current_state: ScreenState,
|
current_state: ScreenState,
|
||||||
@@ -1108,6 +1414,9 @@ class GraphBuilder:
|
|||||||
C'est essentiel pour le replay : une transition peut nécessiter
|
C'est essentiel pour le replay : une transition peut nécessiter
|
||||||
plusieurs actions (ex: Win+R → taper "notepad" → Entrée).
|
plusieurs actions (ex: Win+R → taper "notepad" → Entrée).
|
||||||
|
|
||||||
|
Timestamps : utilise _get_state_time() qui supporte plusieurs
|
||||||
|
sources (event_time, shot_timestamp, datetime).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
current_state: État source
|
current_state: État source
|
||||||
next_state: État cible
|
next_state: État cible
|
||||||
@@ -1117,8 +1426,8 @@ class GraphBuilder:
|
|||||||
Liste ordonnée (par timestamp) de tous les événements d'action
|
Liste ordonnée (par timestamp) de tous les événements d'action
|
||||||
entre les deux états. Peut être vide.
|
entre les deux états. Peut être vide.
|
||||||
"""
|
"""
|
||||||
current_time = current_state.metadata.get("event_time", 0)
|
current_time = self._get_state_time(current_state, fallback=0)
|
||||||
next_time = next_state.metadata.get("event_time", float('inf'))
|
next_time = self._get_state_time(next_state, fallback=float('inf'))
|
||||||
|
|
||||||
action_events = []
|
action_events = []
|
||||||
for event in events:
|
for event in events:
|
||||||
@@ -1155,6 +1464,7 @@ class GraphBuilder:
|
|||||||
target_node: Optional[WorkflowNode] = None,
|
target_node: Optional[WorkflowNode] = None,
|
||||||
all_events: Optional[List[Event]] = None,
|
all_events: Optional[List[Event]] = None,
|
||||||
screen_resolution: Tuple[int, int] = (1920, 1080),
|
screen_resolution: Tuple[int, int] = (1920, 1080),
|
||||||
|
source_state: Optional[ScreenState] = None,
|
||||||
) -> WorkflowEdge:
|
) -> WorkflowEdge:
|
||||||
"""
|
"""
|
||||||
Créer un WorkflowEdge depuis une transition observée.
|
Créer un WorkflowEdge depuis une transition observée.
|
||||||
@@ -1180,12 +1490,24 @@ class GraphBuilder:
|
|||||||
# Si on a plusieurs événements, créer une action compound
|
# Si on a plusieurs événements, créer une action compound
|
||||||
events_to_use = all_events or ([event] if event else [])
|
events_to_use = all_events or ([event] if event else [])
|
||||||
|
|
||||||
|
# UIElements de l'écran source — sert à ancrer les clics sur un vrai
|
||||||
|
# élément UI (rôle, texte, bbox) plutôt que sur une coordonnée brute.
|
||||||
|
source_ui_elements = (
|
||||||
|
list(source_state.ui_elements)
|
||||||
|
if source_state and source_state.ui_elements
|
||||||
|
else []
|
||||||
|
)
|
||||||
|
|
||||||
if len(events_to_use) > 1:
|
if len(events_to_use) > 1:
|
||||||
action = self._build_compound_action(
|
action = self._build_compound_action(
|
||||||
events_to_use, screen_resolution
|
events_to_use, screen_resolution,
|
||||||
|
source_ui_elements=source_ui_elements,
|
||||||
)
|
)
|
||||||
elif len(events_to_use) == 1:
|
elif len(events_to_use) == 1:
|
||||||
action = self._build_single_action(events_to_use[0])
|
action = self._build_single_action(
|
||||||
|
events_to_use[0],
|
||||||
|
source_ui_elements=source_ui_elements,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
action = Action(
|
action = Action(
|
||||||
type="unknown",
|
type="unknown",
|
||||||
@@ -1235,15 +1557,29 @@ class GraphBuilder:
|
|||||||
metadata=edge_metadata,
|
metadata=edge_metadata,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _build_single_action(self, event: Event) -> Action:
|
def _build_single_action(
|
||||||
|
self,
|
||||||
|
event: Event,
|
||||||
|
source_ui_elements: Optional[List[Any]] = None,
|
||||||
|
) -> Action:
|
||||||
"""
|
"""
|
||||||
Construire une Action simple depuis un seul événement.
|
Construire une Action simple depuis un seul événement.
|
||||||
|
|
||||||
Rétrocompatible avec l'ancien format : un type d'action direct
|
Pour un clic, si `source_ui_elements` est fourni, on tente d'ancrer
|
||||||
(mouse_click, key_press, text_input) avec ses paramètres.
|
l'action sur l'UIElement le plus proche (par proximité spatiale).
|
||||||
|
Le TargetSpec devient alors discriminant :
|
||||||
|
- `by_role` = rôle sémantique de l'élément (ex: "primary_action")
|
||||||
|
- `by_text` = label détecté (ex: "Valider")
|
||||||
|
- `selection_policy` = "by_similarity" (laisse le matcher scorer)
|
||||||
|
- `context_hints["anchor_element_id"]` = traçabilité
|
||||||
|
- `context_hints["anchor_bbox"]` = invariant spatial debug
|
||||||
|
|
||||||
|
À défaut d'ancrage (pas d'UIElement ou clic hors de toute bbox
|
||||||
|
proche), on retombe sur `by_role="unknown_element"` (legacy).
|
||||||
"""
|
"""
|
||||||
action_type = event.type
|
action_type = event.type
|
||||||
action_params = {}
|
action_params: Dict[str, Any] = {}
|
||||||
|
target_spec: Optional[TargetSpec] = None
|
||||||
|
|
||||||
if action_type == "mouse_click":
|
if action_type == "mouse_click":
|
||||||
action_params = {
|
action_params = {
|
||||||
@@ -1251,39 +1587,111 @@ class GraphBuilder:
|
|||||||
"position": event.data.get("pos", [0, 0]),
|
"position": event.data.get("pos", [0, 0]),
|
||||||
"wait_after_ms": 500,
|
"wait_after_ms": 500,
|
||||||
}
|
}
|
||||||
target_role = "unknown_element"
|
target_spec = self._build_click_target_spec(
|
||||||
|
event, source_ui_elements or []
|
||||||
|
)
|
||||||
|
|
||||||
elif action_type == "key_press":
|
elif action_type == "key_press":
|
||||||
action_params = {
|
action_params = {
|
||||||
"keys": event.data.get("keys", []),
|
"keys": event.data.get("keys", []),
|
||||||
"wait_after_ms": 200,
|
"wait_after_ms": 200,
|
||||||
}
|
}
|
||||||
target_role = "keyboard_input"
|
target_spec = TargetSpec(
|
||||||
|
by_role="keyboard_input",
|
||||||
|
selection_policy="first",
|
||||||
|
fallback_strategy="visual_similarity",
|
||||||
|
)
|
||||||
|
|
||||||
elif action_type == "text_input":
|
elif action_type == "text_input":
|
||||||
action_params = {
|
action_params = {
|
||||||
"text": event.data.get("text", ""),
|
"text": event.data.get("text", ""),
|
||||||
"wait_after_ms": 300,
|
"wait_after_ms": 300,
|
||||||
}
|
}
|
||||||
target_role = "text_field"
|
target_spec = TargetSpec(
|
||||||
|
by_role="text_field",
|
||||||
|
selection_policy="first",
|
||||||
|
fallback_strategy="visual_similarity",
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
action_params = {}
|
action_params = {}
|
||||||
target_role = "unknown"
|
target_spec = TargetSpec(
|
||||||
|
by_role="unknown",
|
||||||
|
selection_policy="first",
|
||||||
|
fallback_strategy="visual_similarity",
|
||||||
|
)
|
||||||
|
|
||||||
return Action(
|
return Action(
|
||||||
type=action_type,
|
type=action_type,
|
||||||
target=TargetSpec(
|
target=target_spec,
|
||||||
by_role=target_role,
|
parameters=action_params,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _build_click_target_spec(
|
||||||
|
self,
|
||||||
|
event: Event,
|
||||||
|
source_ui_elements: List[Any],
|
||||||
|
) -> TargetSpec:
|
||||||
|
"""
|
||||||
|
Construire un TargetSpec pour un clic, en essayant de l'ancrer à
|
||||||
|
un UIElement détecté sur l'écran source.
|
||||||
|
|
||||||
|
Retourne toujours un TargetSpec valide :
|
||||||
|
- ancré (role + text + context_hints) si un élément proche existe ;
|
||||||
|
- fallback `unknown_element` sinon (comportement historique).
|
||||||
|
"""
|
||||||
|
clicked = self._find_clicked_element(event, source_ui_elements)
|
||||||
|
|
||||||
|
if clicked is None:
|
||||||
|
return TargetSpec(
|
||||||
|
by_role="unknown_element",
|
||||||
selection_policy="first",
|
selection_policy="first",
|
||||||
fallback_strategy="visual_similarity",
|
fallback_strategy="visual_similarity",
|
||||||
),
|
)
|
||||||
parameters=action_params,
|
|
||||||
|
# Extraction défensive des attributs de l'élément.
|
||||||
|
role = getattr(clicked, "role", None) or "unknown_element"
|
||||||
|
label = getattr(clicked, "label", None) or None
|
||||||
|
element_id = getattr(clicked, "element_id", None)
|
||||||
|
|
||||||
|
# Contexte de traçabilité — `context_hints` est le seul dict libre
|
||||||
|
# disponible dans TargetSpec (pas de champ `metadata` dédié).
|
||||||
|
context_hints: Dict[str, Any] = {}
|
||||||
|
if element_id:
|
||||||
|
context_hints["anchor_element_id"] = str(element_id)
|
||||||
|
|
||||||
|
bbox = getattr(clicked, "bbox", None)
|
||||||
|
if bbox is not None:
|
||||||
|
try:
|
||||||
|
context_hints["anchor_bbox"] = {
|
||||||
|
"x": int(getattr(bbox, "x", bbox[0])),
|
||||||
|
"y": int(getattr(bbox, "y", bbox[1])),
|
||||||
|
"width": int(getattr(bbox, "width", bbox[2])),
|
||||||
|
"height": int(getattr(bbox, "height", bbox[3])),
|
||||||
|
}
|
||||||
|
except (AttributeError, IndexError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Center (utile comme ancre de fallback quand le matcher échoue)
|
||||||
|
center = getattr(clicked, "center", None)
|
||||||
|
if center is not None:
|
||||||
|
try:
|
||||||
|
context_hints["anchor_center"] = [int(center[0]), int(center[1])]
|
||||||
|
except (IndexError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
return TargetSpec(
|
||||||
|
by_role=role,
|
||||||
|
by_text=label,
|
||||||
|
selection_policy="by_similarity",
|
||||||
|
fallback_strategy="visual_similarity",
|
||||||
|
context_hints=context_hints,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _build_compound_action(
|
def _build_compound_action(
|
||||||
self,
|
self,
|
||||||
events: List[Event],
|
events: List[Event],
|
||||||
screen_resolution: Tuple[int, int] = (1920, 1080),
|
screen_resolution: Tuple[int, int] = (1920, 1080),
|
||||||
|
source_ui_elements: Optional[List[Any]] = None,
|
||||||
) -> Action:
|
) -> Action:
|
||||||
"""
|
"""
|
||||||
Construire une Action compound (multi-étapes) depuis plusieurs événements.
|
Construire une Action compound (multi-étapes) depuis plusieurs événements.
|
||||||
@@ -1360,21 +1768,33 @@ class GraphBuilder:
|
|||||||
# La cible du compound = cible de la dernière action (le clic final, etc.)
|
# La cible du compound = cible de la dernière action (le clic final, etc.)
|
||||||
last_event = events[-1]
|
last_event = events[-1]
|
||||||
if last_event.type == "mouse_click":
|
if last_event.type == "mouse_click":
|
||||||
target_role = "unknown_element"
|
# On tente d'ancrer le clic final aux UIElements détectés,
|
||||||
|
# comme dans _build_single_action.
|
||||||
|
target_spec = self._build_click_target_spec(
|
||||||
|
last_event, source_ui_elements or []
|
||||||
|
)
|
||||||
elif last_event.type == "text_input":
|
elif last_event.type == "text_input":
|
||||||
target_role = "text_field"
|
target_spec = TargetSpec(
|
||||||
|
by_role="text_field",
|
||||||
|
selection_policy="first",
|
||||||
|
fallback_strategy="visual_similarity",
|
||||||
|
)
|
||||||
elif last_event.type == "key_press":
|
elif last_event.type == "key_press":
|
||||||
target_role = "keyboard_input"
|
target_spec = TargetSpec(
|
||||||
|
by_role="keyboard_input",
|
||||||
|
selection_policy="first",
|
||||||
|
fallback_strategy="visual_similarity",
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
target_role = "unknown"
|
target_spec = TargetSpec(
|
||||||
|
by_role="unknown",
|
||||||
|
selection_policy="first",
|
||||||
|
fallback_strategy="visual_similarity",
|
||||||
|
)
|
||||||
|
|
||||||
return Action(
|
return Action(
|
||||||
type="compound",
|
type="compound",
|
||||||
target=TargetSpec(
|
target=target_spec,
|
||||||
by_role=target_role,
|
|
||||||
selection_policy="first",
|
|
||||||
fallback_strategy="visual_similarity",
|
|
||||||
),
|
|
||||||
parameters={
|
parameters={
|
||||||
"steps": steps,
|
"steps": steps,
|
||||||
"step_count": len(steps),
|
"step_count": len(steps),
|
||||||
|
|||||||
0
core/knowledge/__init__.py
Normal file
0
core/knowledge/__init__.py
Normal file
494
core/knowledge/ui_patterns.py
Normal file
494
core/knowledge/ui_patterns.py
Normal file
@@ -0,0 +1,494 @@
|
|||||||
|
"""
|
||||||
|
Base de connaissances des patterns d'interface utilisateur.
|
||||||
|
|
||||||
|
Donne à Léa des "réflexes natifs" : quand elle reconnaît un pattern UI
|
||||||
|
connu (dialogue OK/Annuler, menu, barre d'outils), elle sait immédiatement
|
||||||
|
quoi faire sans avoir besoin de l'apprendre par observation.
|
||||||
|
|
||||||
|
Sources :
|
||||||
|
- GUI-R1 dataset (3K exemples annotés, ritzzai/GUI-R1)
|
||||||
|
- Patterns Windows/Linux courants
|
||||||
|
- Conventions UI universelles
|
||||||
|
|
||||||
|
Utilisation :
|
||||||
|
from core.knowledge.ui_patterns import UIPatternLibrary
|
||||||
|
lib = UIPatternLibrary()
|
||||||
|
match = lib.find_pattern("Voulez-vous enregistrer ?")
|
||||||
|
# → {'action': 'click', 'target': 'Enregistrer', 'zone': 'dialog_center', ...}
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class UIPattern:
|
||||||
|
"""Un pattern d'interface connu."""
|
||||||
|
name: str
|
||||||
|
category: str
|
||||||
|
triggers: List[str]
|
||||||
|
action: str
|
||||||
|
target: str
|
||||||
|
typical_zone: str
|
||||||
|
typical_bbox: Optional[List[float]] = None
|
||||||
|
os: str = "any"
|
||||||
|
confidence: float = 0.9
|
||||||
|
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
|
# Patterns Windows natifs — réflexes de base
|
||||||
|
BUILTIN_PATTERNS: List[Dict[str, Any]] = [
|
||||||
|
# === DIALOGUES DE CONFIRMATION ===
|
||||||
|
{
|
||||||
|
"name": "dialog_save",
|
||||||
|
"category": "dialog",
|
||||||
|
"triggers": [
|
||||||
|
"voulez-vous enregistrer", "do you want to save",
|
||||||
|
"save changes", "enregistrer les modifications",
|
||||||
|
"enregistrer sous", "save as",
|
||||||
|
"sauvegarder", "unsaved changes",
|
||||||
|
],
|
||||||
|
"action": "click",
|
||||||
|
"target": "Enregistrer",
|
||||||
|
"alternatives": ["Save", "Oui", "Yes"],
|
||||||
|
"typical_zone": "dialog_center",
|
||||||
|
"typical_bbox": [0.35, 0.55, 0.50, 0.65],
|
||||||
|
"os": "any",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "dialog_cancel",
|
||||||
|
"category": "dialog",
|
||||||
|
"triggers": [
|
||||||
|
"annuler", "cancel", "abandonner", "discard",
|
||||||
|
],
|
||||||
|
"action": "click",
|
||||||
|
"target": "Annuler",
|
||||||
|
"alternatives": ["Cancel", "Non", "No"],
|
||||||
|
"typical_zone": "dialog_center",
|
||||||
|
"typical_bbox": [0.50, 0.55, 0.65, 0.65],
|
||||||
|
"os": "any",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "dialog_ok",
|
||||||
|
"category": "dialog",
|
||||||
|
"triggers": [
|
||||||
|
"ok", "d'accord", "compris", "information",
|
||||||
|
"erreur", "error", "warning", "avertissement",
|
||||||
|
],
|
||||||
|
"action": "click",
|
||||||
|
"target": "OK",
|
||||||
|
"alternatives": ["Fermer", "Close", "Compris"],
|
||||||
|
"typical_zone": "dialog_center",
|
||||||
|
"typical_bbox": [0.45, 0.60, 0.55, 0.70],
|
||||||
|
"os": "any",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "dialog_yes_no",
|
||||||
|
"category": "dialog",
|
||||||
|
"triggers": [
|
||||||
|
"êtes-vous sûr", "are you sure", "confirmer",
|
||||||
|
"confirm", "supprimer", "delete",
|
||||||
|
],
|
||||||
|
"action": "click",
|
||||||
|
"target": "Oui",
|
||||||
|
"alternatives": ["Yes", "Confirmer", "Confirm"],
|
||||||
|
"typical_zone": "dialog_center",
|
||||||
|
"typical_bbox": [0.35, 0.60, 0.45, 0.68],
|
||||||
|
"os": "any",
|
||||||
|
},
|
||||||
|
|
||||||
|
# === NAVIGATION FENÊTRE ===
|
||||||
|
{
|
||||||
|
"name": "window_close",
|
||||||
|
"category": "window",
|
||||||
|
"triggers": ["fermer la fenêtre", "close window"],
|
||||||
|
"action": "click",
|
||||||
|
"target": "X",
|
||||||
|
"typical_zone": "titlebar",
|
||||||
|
"typical_bbox": [0.96, 0.0, 1.0, 0.04],
|
||||||
|
"os": "windows",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "window_minimize",
|
||||||
|
"category": "window",
|
||||||
|
"triggers": ["minimiser", "minimize"],
|
||||||
|
"action": "click",
|
||||||
|
"target": "_",
|
||||||
|
"typical_zone": "titlebar",
|
||||||
|
"typical_bbox": [0.90, 0.0, 0.94, 0.04],
|
||||||
|
"os": "windows",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "window_maximize",
|
||||||
|
"category": "window",
|
||||||
|
"triggers": ["maximiser", "maximize", "agrandir"],
|
||||||
|
"action": "click",
|
||||||
|
"target": "□",
|
||||||
|
"typical_zone": "titlebar",
|
||||||
|
"typical_bbox": [0.94, 0.0, 0.96, 0.04],
|
||||||
|
"os": "windows",
|
||||||
|
},
|
||||||
|
|
||||||
|
# === MENUS ===
|
||||||
|
{
|
||||||
|
"name": "menu_file",
|
||||||
|
"category": "menu",
|
||||||
|
"triggers": ["menu fichier", "menu file", "ouvrir fichier", "open file"],
|
||||||
|
"action": "click",
|
||||||
|
"target": "Fichier",
|
||||||
|
"alternatives": ["File"],
|
||||||
|
"typical_zone": "menu_toolbar",
|
||||||
|
"typical_bbox": [0.0, 0.03, 0.06, 0.06],
|
||||||
|
"os": "any",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "menu_edit",
|
||||||
|
"category": "menu",
|
||||||
|
"triggers": ["édition", "edit", "modifier"],
|
||||||
|
"action": "click",
|
||||||
|
"target": "Édition",
|
||||||
|
"alternatives": ["Edit"],
|
||||||
|
"typical_zone": "menu_toolbar",
|
||||||
|
"typical_bbox": [0.06, 0.03, 0.12, 0.06],
|
||||||
|
"os": "any",
|
||||||
|
},
|
||||||
|
|
||||||
|
# === FORMULAIRES ===
|
||||||
|
{
|
||||||
|
"name": "form_submit",
|
||||||
|
"category": "form",
|
||||||
|
"triggers": [
|
||||||
|
"valider", "submit", "envoyer", "send",
|
||||||
|
"connexion", "login", "se connecter", "sign in",
|
||||||
|
],
|
||||||
|
"action": "click",
|
||||||
|
"target": "Valider",
|
||||||
|
"alternatives": ["Submit", "Envoyer", "Connexion", "Login", "OK"],
|
||||||
|
"typical_zone": "content",
|
||||||
|
"typical_bbox": [0.35, 0.70, 0.65, 0.80],
|
||||||
|
"os": "any",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "form_search",
|
||||||
|
"category": "form",
|
||||||
|
"triggers": ["rechercher", "search", "chercher", "find"],
|
||||||
|
"action": "click",
|
||||||
|
"target": "Rechercher",
|
||||||
|
"alternatives": ["Search", "🔍", "Go"],
|
||||||
|
"typical_zone": "menu_toolbar",
|
||||||
|
"typical_bbox": [0.30, 0.03, 0.70, 0.06],
|
||||||
|
"os": "any",
|
||||||
|
},
|
||||||
|
|
||||||
|
# === NAVIGATION WEB ===
|
||||||
|
{
|
||||||
|
"name": "cookie_accept",
|
||||||
|
"category": "popup",
|
||||||
|
"triggers": [
|
||||||
|
"accepter les cookies", "accept cookies",
|
||||||
|
"utilise des cookies", "uses cookies",
|
||||||
|
"j'accepte", "accept all", "tout accepter",
|
||||||
|
"consent", "consentement",
|
||||||
|
],
|
||||||
|
"action": "click",
|
||||||
|
"target": "Accepter",
|
||||||
|
"alternatives": ["Accept", "Accept All", "Tout accepter", "J'accepte"],
|
||||||
|
"typical_zone": "content",
|
||||||
|
"typical_bbox": [0.30, 0.80, 0.70, 0.90],
|
||||||
|
"os": "any",
|
||||||
|
},
|
||||||
|
|
||||||
|
# === RACCOURCIS UNIVERSELS ===
|
||||||
|
{
|
||||||
|
"name": "shortcut_save",
|
||||||
|
"category": "shortcut",
|
||||||
|
"triggers": ["sauvegarder", "enregistrer", "save"],
|
||||||
|
"action": "hotkey",
|
||||||
|
"target": "ctrl+s",
|
||||||
|
"typical_zone": "keyboard",
|
||||||
|
"os": "any",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "shortcut_undo",
|
||||||
|
"category": "shortcut",
|
||||||
|
"triggers": ["annuler action", "undo", "défaire"],
|
||||||
|
"action": "hotkey",
|
||||||
|
"target": "ctrl+z",
|
||||||
|
"typical_zone": "keyboard",
|
||||||
|
"os": "any",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "shortcut_copy",
|
||||||
|
"category": "shortcut",
|
||||||
|
"triggers": ["copier", "copy"],
|
||||||
|
"action": "hotkey",
|
||||||
|
"target": "ctrl+c",
|
||||||
|
"typical_zone": "keyboard",
|
||||||
|
"os": "any",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "shortcut_paste",
|
||||||
|
"category": "shortcut",
|
||||||
|
"triggers": ["coller", "paste"],
|
||||||
|
"action": "hotkey",
|
||||||
|
"target": "ctrl+v",
|
||||||
|
"typical_zone": "keyboard",
|
||||||
|
"os": "any",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class UIPatternLibrary:
|
||||||
|
"""Bibliothèque de patterns UI connus.
|
||||||
|
|
||||||
|
Fournit des "réflexes natifs" à Léa : quand un pattern
|
||||||
|
est reconnu dans le texte OCR ou le contexte visuel,
|
||||||
|
elle sait immédiatement quoi faire.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Chemins par défaut des fichiers de patterns additionnels
|
||||||
|
_PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
|
||||||
|
_GUI_R1_PATTERNS_PATH = _PROJECT_ROOT / "data" / "gui_r1_ui_patterns.json"
|
||||||
|
_LEARNED_PATTERNS_PATH = _PROJECT_ROOT / "data" / "learned_patterns.json"
|
||||||
|
|
||||||
|
def __init__(self, extra_patterns_path: Optional[str] = None):
|
||||||
|
self._patterns: List[UIPattern] = []
|
||||||
|
self._load_builtin()
|
||||||
|
|
||||||
|
# Charger les patterns extraits de GUI-R1 (statiques, générés une fois)
|
||||||
|
self._load_from_file(str(self._GUI_R1_PATTERNS_PATH))
|
||||||
|
|
||||||
|
# Charger les patterns appris par observation Shadow (dynamiques)
|
||||||
|
self._load_from_file(str(self._LEARNED_PATTERNS_PATH))
|
||||||
|
|
||||||
|
# Fichier custom fourni explicitement
|
||||||
|
if extra_patterns_path:
|
||||||
|
self._load_from_file(extra_patterns_path)
|
||||||
|
|
||||||
|
logger.info(f"UIPatternLibrary: {len(self._patterns)} patterns chargés")
|
||||||
|
|
||||||
|
def _load_builtin(self):
|
||||||
|
for p in BUILTIN_PATTERNS:
|
||||||
|
self._patterns.append(UIPattern(
|
||||||
|
name=p["name"],
|
||||||
|
category=p["category"],
|
||||||
|
triggers=p["triggers"],
|
||||||
|
action=p["action"],
|
||||||
|
target=p["target"],
|
||||||
|
typical_zone=p.get("typical_zone", "content"),
|
||||||
|
typical_bbox=p.get("typical_bbox"),
|
||||||
|
os=p.get("os", "any"),
|
||||||
|
metadata={
|
||||||
|
"alternatives": p.get("alternatives", []),
|
||||||
|
"source": "builtin",
|
||||||
|
},
|
||||||
|
))
|
||||||
|
|
||||||
|
def _load_from_file(self, path: str):
|
||||||
|
filepath = Path(path)
|
||||||
|
if not filepath.exists():
|
||||||
|
logger.debug(f"Fichier patterns non trouvé (OK si premier lancement): {path}")
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
with open(filepath) as f:
|
||||||
|
data = json.load(f)
|
||||||
|
for p in data.get("patterns", []):
|
||||||
|
# Construire metadata en incluant source/learned_at/gui_r1_id si présents
|
||||||
|
meta = dict(p.get("metadata", {}))
|
||||||
|
if "source" in p:
|
||||||
|
meta["source"] = p["source"]
|
||||||
|
if "learned_at" in p:
|
||||||
|
meta["learned_at"] = p["learned_at"]
|
||||||
|
if "gui_r1_id" in p:
|
||||||
|
meta["gui_r1_id"] = p["gui_r1_id"]
|
||||||
|
self._patterns.append(UIPattern(
|
||||||
|
name=p["name"],
|
||||||
|
category=p.get("category", "custom"),
|
||||||
|
triggers=p.get("triggers", []),
|
||||||
|
action=p.get("action", "click"),
|
||||||
|
target=p.get("target", ""),
|
||||||
|
typical_zone=p.get("typical_zone", "content"),
|
||||||
|
typical_bbox=p.get("typical_bbox"),
|
||||||
|
os=p.get("os", "any"),
|
||||||
|
confidence=p.get("confidence", 0.9),
|
||||||
|
metadata=meta,
|
||||||
|
))
|
||||||
|
logger.info(f"Chargé {len(data.get('patterns', []))} patterns depuis {path}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Erreur chargement patterns: {e}")
|
||||||
|
|
||||||
|
def find_pattern(
|
||||||
|
self,
|
||||||
|
text: str,
|
||||||
|
os_filter: Optional[str] = None,
|
||||||
|
) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Cherche un pattern UI dans du texte (OCR, titre fenêtre, etc.).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: Texte à analyser (peut contenir du bruit OCR)
|
||||||
|
os_filter: Filtrer par OS ("windows", "linux", None=tous)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict avec action, target, confidence, etc. ou None
|
||||||
|
"""
|
||||||
|
text_lower = text.lower()
|
||||||
|
best_match = None
|
||||||
|
best_score = 0
|
||||||
|
|
||||||
|
for pattern in self._patterns:
|
||||||
|
if os_filter and pattern.os not in ("any", os_filter):
|
||||||
|
continue
|
||||||
|
|
||||||
|
score = 0
|
||||||
|
matched_trigger = None
|
||||||
|
for trigger in pattern.triggers:
|
||||||
|
if len(trigger) <= 3:
|
||||||
|
import re
|
||||||
|
if re.search(r'\b' + re.escape(trigger) + r'\b', text_lower):
|
||||||
|
trigger_score = len(trigger) / max(len(text_lower), 1)
|
||||||
|
if trigger_score > score:
|
||||||
|
score = trigger_score
|
||||||
|
matched_trigger = trigger
|
||||||
|
elif trigger in text_lower:
|
||||||
|
trigger_score = len(trigger) / max(len(text_lower), 1)
|
||||||
|
if trigger_score > score:
|
||||||
|
score = trigger_score
|
||||||
|
matched_trigger = trigger
|
||||||
|
|
||||||
|
if score > best_score and matched_trigger is not None:
|
||||||
|
best_score = score
|
||||||
|
best_match = {
|
||||||
|
"pattern": pattern.name,
|
||||||
|
"category": pattern.category,
|
||||||
|
"action": pattern.action,
|
||||||
|
"target": pattern.target,
|
||||||
|
"alternatives": pattern.metadata.get("alternatives", []),
|
||||||
|
"typical_zone": pattern.typical_zone,
|
||||||
|
"typical_bbox": pattern.typical_bbox,
|
||||||
|
"confidence": min(pattern.confidence * (1 + score), 1.0),
|
||||||
|
"matched_trigger": matched_trigger,
|
||||||
|
"os": pattern.os,
|
||||||
|
}
|
||||||
|
|
||||||
|
return best_match
|
||||||
|
|
||||||
|
def find_by_category(self, category: str) -> List[Dict[str, Any]]:
|
||||||
|
"""Retourne tous les patterns d'une catégorie."""
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"name": p.name,
|
||||||
|
"action": p.action,
|
||||||
|
"target": p.target,
|
||||||
|
"triggers": p.triggers,
|
||||||
|
"typical_zone": p.typical_zone,
|
||||||
|
}
|
||||||
|
for p in self._patterns
|
||||||
|
if p.category == category
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_dialog_handler(self, dialog_text: str) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Raccourci : cherche un pattern de dialogue."""
|
||||||
|
match = self.find_pattern(dialog_text)
|
||||||
|
if match and match["category"] == "dialog":
|
||||||
|
return match
|
||||||
|
return self.find_pattern(dialog_text)
|
||||||
|
|
||||||
|
def add_pattern(self, pattern_dict: Dict[str, Any]):
|
||||||
|
"""Ajoute un pattern dynamiquement (ex: appris par observation)."""
|
||||||
|
self._patterns.append(UIPattern(
|
||||||
|
name=pattern_dict["name"],
|
||||||
|
category=pattern_dict.get("category", "learned"),
|
||||||
|
triggers=pattern_dict.get("triggers", []),
|
||||||
|
action=pattern_dict.get("action", "click"),
|
||||||
|
target=pattern_dict.get("target", ""),
|
||||||
|
typical_zone=pattern_dict.get("typical_zone", "content"),
|
||||||
|
typical_bbox=pattern_dict.get("typical_bbox"),
|
||||||
|
os=pattern_dict.get("os", "any"),
|
||||||
|
confidence=pattern_dict.get("confidence", 0.7),
|
||||||
|
metadata={"source": "learned"},
|
||||||
|
))
|
||||||
|
|
||||||
|
def save_to_file(self, path: str):
|
||||||
|
"""Sauvegarde tous les patterns (builtin + appris) dans un fichier."""
|
||||||
|
data = {
|
||||||
|
"patterns": [
|
||||||
|
{
|
||||||
|
"name": p.name,
|
||||||
|
"category": p.category,
|
||||||
|
"triggers": p.triggers,
|
||||||
|
"action": p.action,
|
||||||
|
"target": p.target,
|
||||||
|
"typical_zone": p.typical_zone,
|
||||||
|
"typical_bbox": p.typical_bbox,
|
||||||
|
"os": p.os,
|
||||||
|
"confidence": p.confidence,
|
||||||
|
"metadata": p.metadata,
|
||||||
|
}
|
||||||
|
for p in self._patterns
|
||||||
|
]
|
||||||
|
}
|
||||||
|
with open(path, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||||
|
logger.info(f"Sauvegardé {len(self._patterns)} patterns dans {path}")
|
||||||
|
|
||||||
|
def save_learned_pattern(self, pattern_dict: Dict[str, Any]):
|
||||||
|
"""Persiste un pattern appris par observation Shadow dans learned_patterns.json.
|
||||||
|
|
||||||
|
Le pattern est ajouté en mémoire ET sauvegardé sur disque.
|
||||||
|
Le fichier est créé s'il n'existe pas, ou les patterns existants sont préservés.
|
||||||
|
"""
|
||||||
|
from datetime import datetime as dt
|
||||||
|
|
||||||
|
# Charger le fichier existant ou créer la structure
|
||||||
|
filepath = self._LEARNED_PATTERNS_PATH
|
||||||
|
filepath.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
existing: Dict[str, Any] = {"patterns": []}
|
||||||
|
if filepath.exists():
|
||||||
|
try:
|
||||||
|
with open(filepath, encoding="utf-8") as f:
|
||||||
|
existing = json.load(f)
|
||||||
|
except (json.JSONDecodeError, OSError):
|
||||||
|
logger.warning(f"Fichier {filepath} corrompu, recréation")
|
||||||
|
|
||||||
|
# Vérifier qu'on ne duplique pas (même trigger + même target)
|
||||||
|
new_triggers = set(t.lower() for t in pattern_dict.get("triggers", []))
|
||||||
|
new_target = pattern_dict.get("target", "").lower()
|
||||||
|
for existing_p in existing.get("patterns", []):
|
||||||
|
existing_triggers = set(t.lower() for t in existing_p.get("triggers", []))
|
||||||
|
if existing_triggers == new_triggers and existing_p.get("target", "").lower() == new_target:
|
||||||
|
logger.debug(f"Pattern déjà connu, skip: triggers={new_triggers}, target={new_target}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Numéroter automatiquement et construire l'entrée complète
|
||||||
|
count = len(existing.get("patterns", []))
|
||||||
|
entry = {
|
||||||
|
"name": pattern_dict.get("name", f"learned_dialog_{count + 1:03d}"),
|
||||||
|
"category": pattern_dict.get("category", "dialog"),
|
||||||
|
"triggers": pattern_dict.get("triggers", []),
|
||||||
|
"action": pattern_dict.get("action", "click"),
|
||||||
|
"target": pattern_dict.get("target", ""),
|
||||||
|
"os": pattern_dict.get("os", "windows"),
|
||||||
|
"source": "shadow_learning",
|
||||||
|
"learned_at": dt.now().isoformat(timespec="seconds"),
|
||||||
|
"confidence": pattern_dict.get("confidence", 0.8),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Ajouter en mémoire (avec le nom auto-généré)
|
||||||
|
self.add_pattern(entry)
|
||||||
|
existing.setdefault("patterns", []).append(entry)
|
||||||
|
|
||||||
|
with open(filepath, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(existing, f, indent=2, ensure_ascii=False)
|
||||||
|
logger.info(f"Pattern appris sauvegardé: {entry['name']} → {entry['target']}")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def stats(self) -> Dict[str, int]:
|
||||||
|
from collections import Counter
|
||||||
|
cats = Counter(p.category for p in self._patterns)
|
||||||
|
return {"total": len(self._patterns), "by_category": dict(cats)}
|
||||||
@@ -2,7 +2,140 @@
|
|||||||
Pipeline module - Orchestration du flux RPA Vision V3
|
Pipeline module - Orchestration du flux RPA Vision V3
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import threading
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
from .workflow_pipeline import WorkflowPipeline, create_pipeline
|
from .workflow_pipeline import WorkflowPipeline, create_pipeline
|
||||||
from .screen_analyzer import ScreenAnalyzer
|
from .screen_analyzer import ScreenAnalyzer
|
||||||
|
from .screen_state_cache import ScreenStateCache, compute_perceptual_hash
|
||||||
|
from .edge_scorer import EdgeScorer, EdgeScore
|
||||||
|
|
||||||
__all__ = ["WorkflowPipeline", "create_pipeline", "ScreenAnalyzer"]
|
__all__ = [
|
||||||
|
"WorkflowPipeline",
|
||||||
|
"create_pipeline",
|
||||||
|
"ScreenAnalyzer",
|
||||||
|
"ScreenStateCache",
|
||||||
|
"compute_perceptual_hash",
|
||||||
|
"EdgeScorer",
|
||||||
|
"EdgeScore",
|
||||||
|
"get_screen_analyzer",
|
||||||
|
"reset_screen_analyzer",
|
||||||
|
"get_screen_state_cache",
|
||||||
|
"reset_screen_state_cache",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Singleton ScreenAnalyzer
|
||||||
|
# =============================================================================
|
||||||
|
#
|
||||||
|
# Une seule instance est partagée entre ExecutionLoop, GraphBuilder et
|
||||||
|
# stream_processor pour éviter le double chargement GPU (UIDetector + CLIP
|
||||||
|
# = 6-10 Go VRAM, plafond 12 Go sur RTX 5070).
|
||||||
|
#
|
||||||
|
# Thread-safe : protégé par un lock.
|
||||||
|
#
|
||||||
|
# IMPORTANT (Lot C — avril 2026) :
|
||||||
|
# Ce singleton ne porte plus AUCUN contexte d'exécution. Il détient
|
||||||
|
# uniquement les ressources lourdes (modèles OCR, UIDetector, CLIP).
|
||||||
|
# • Les flags runtime (`enable_ocr`, `enable_ui_detection`) et l'identité
|
||||||
|
# de session (`session_id`) se passent en kwargs-only à `analyze()`,
|
||||||
|
# jamais en mutant l'instance. Voir `ScreenAnalyzer.analyze()`.
|
||||||
|
# • L'argument `session_id` de `get_screen_analyzer()` ne sert QUE de
|
||||||
|
# valeur par défaut historique, ignorée après la première création.
|
||||||
|
# À terme, prévoir sa suppression.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
_SCREEN_ANALYZER_SINGLETON: Optional[ScreenAnalyzer] = None
|
||||||
|
_SCREEN_ANALYZER_LOCK = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
|
def get_screen_analyzer(
|
||||||
|
ui_detector=None,
|
||||||
|
ocr_engine: Optional[str] = None,
|
||||||
|
session_id: str = "",
|
||||||
|
force_new: bool = False,
|
||||||
|
) -> ScreenAnalyzer:
|
||||||
|
"""
|
||||||
|
Récupérer l'instance partagée de ScreenAnalyzer.
|
||||||
|
|
||||||
|
Création à la première demande (lazy). Les appels ultérieurs retournent
|
||||||
|
la même instance, quels que soient les arguments (sauf `force_new=True`).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ui_detector: UIDetector optionnel (utilisé seulement à la 1ère création)
|
||||||
|
ocr_engine: Moteur OCR ("doctr", "tesseract", None=auto)
|
||||||
|
session_id: ID de session pour la 1ère création
|
||||||
|
force_new: Forcer la création d'une nouvelle instance (tests)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Instance partagée de ScreenAnalyzer
|
||||||
|
"""
|
||||||
|
global _SCREEN_ANALYZER_SINGLETON
|
||||||
|
|
||||||
|
if force_new:
|
||||||
|
with _SCREEN_ANALYZER_LOCK:
|
||||||
|
_SCREEN_ANALYZER_SINGLETON = ScreenAnalyzer(
|
||||||
|
ui_detector=ui_detector,
|
||||||
|
ocr_engine=ocr_engine,
|
||||||
|
session_id=session_id,
|
||||||
|
)
|
||||||
|
return _SCREEN_ANALYZER_SINGLETON
|
||||||
|
|
||||||
|
if _SCREEN_ANALYZER_SINGLETON is not None:
|
||||||
|
return _SCREEN_ANALYZER_SINGLETON
|
||||||
|
|
||||||
|
with _SCREEN_ANALYZER_LOCK:
|
||||||
|
# Double-check locking
|
||||||
|
if _SCREEN_ANALYZER_SINGLETON is None:
|
||||||
|
_SCREEN_ANALYZER_SINGLETON = ScreenAnalyzer(
|
||||||
|
ui_detector=ui_detector,
|
||||||
|
ocr_engine=ocr_engine,
|
||||||
|
session_id=session_id,
|
||||||
|
)
|
||||||
|
return _SCREEN_ANALYZER_SINGLETON
|
||||||
|
|
||||||
|
|
||||||
|
def reset_screen_analyzer() -> None:
|
||||||
|
"""Réinitialiser le singleton (tests uniquement)."""
|
||||||
|
global _SCREEN_ANALYZER_SINGLETON
|
||||||
|
with _SCREEN_ANALYZER_LOCK:
|
||||||
|
_SCREEN_ANALYZER_SINGLETON = None
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Singleton ScreenStateCache (partagé)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
_SCREEN_STATE_CACHE_SINGLETON: Optional[ScreenStateCache] = None
|
||||||
|
_SCREEN_STATE_CACHE_LOCK = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
|
def get_screen_state_cache(
|
||||||
|
ttl_seconds: float = 2.0,
|
||||||
|
max_entries: int = 16,
|
||||||
|
) -> ScreenStateCache:
|
||||||
|
"""
|
||||||
|
Retourne le cache de ScreenState partagé (créé à la 1ère demande).
|
||||||
|
"""
|
||||||
|
global _SCREEN_STATE_CACHE_SINGLETON
|
||||||
|
if _SCREEN_STATE_CACHE_SINGLETON is not None:
|
||||||
|
return _SCREEN_STATE_CACHE_SINGLETON
|
||||||
|
with _SCREEN_STATE_CACHE_LOCK:
|
||||||
|
if _SCREEN_STATE_CACHE_SINGLETON is None:
|
||||||
|
_SCREEN_STATE_CACHE_SINGLETON = ScreenStateCache(
|
||||||
|
ttl_seconds=ttl_seconds,
|
||||||
|
max_entries=max_entries,
|
||||||
|
)
|
||||||
|
return _SCREEN_STATE_CACHE_SINGLETON
|
||||||
|
|
||||||
|
|
||||||
|
def reset_screen_state_cache() -> None:
|
||||||
|
"""Réinitialiser le cache partagé (tests uniquement)."""
|
||||||
|
global _SCREEN_STATE_CACHE_SINGLETON
|
||||||
|
with _SCREEN_STATE_CACHE_LOCK:
|
||||||
|
_SCREEN_STATE_CACHE_SINGLETON = None
|
||||||
|
|||||||
380
core/pipeline/edge_scorer.py
Normal file
380
core/pipeline/edge_scorer.py
Normal file
@@ -0,0 +1,380 @@
|
|||||||
|
"""
|
||||||
|
EdgeScorer — Sélection robuste d'un edge parmi plusieurs candidats.
|
||||||
|
|
||||||
|
Au lieu de prendre "le premier edge sortant" (comportement legacy),
|
||||||
|
ce module :
|
||||||
|
|
||||||
|
1. Applique un **filtre dur** : rejette les edges dont les `pre_conditions`
|
||||||
|
(EdgeConstraints) échouent étant donné le ScreenState courant.
|
||||||
|
2. Applique un **ranking léger** : score composite
|
||||||
|
- `stats.success_rate` (pondéré fort)
|
||||||
|
- match du `target_spec` (présence d'un UI element compatible)
|
||||||
|
- récence (dernière exécution réussie)
|
||||||
|
3. Retourne le meilleur edge, ou `None` si aucun ne passe le filtre.
|
||||||
|
|
||||||
|
API principale :
|
||||||
|
>>> scorer = EdgeScorer()
|
||||||
|
>>> edge = scorer.select_best(edges, screen_state=state)
|
||||||
|
|
||||||
|
Les scores individuels sont exposés via `score_edge()` pour les tests
|
||||||
|
et la télémétrie.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import List, Optional, Sequence
|
||||||
|
|
||||||
|
from core.models.screen_state import ScreenState
|
||||||
|
from core.models.workflow_graph import WorkflowEdge
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Résultat de scoring (utile pour la télémétrie / debug)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class EdgeScore:
|
||||||
|
"""Résultat détaillé du scoring d'un edge."""
|
||||||
|
|
||||||
|
edge: WorkflowEdge
|
||||||
|
total: float
|
||||||
|
success_rate: float
|
||||||
|
target_match: float
|
||||||
|
recency: float
|
||||||
|
passed_preconditions: bool
|
||||||
|
precondition_reason: str = "OK"
|
||||||
|
|
||||||
|
def __lt__(self, other: "EdgeScore") -> bool:
|
||||||
|
# Utilisé par sorted() : plus grand score = meilleur
|
||||||
|
return self.total < other.total
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Scorer
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class EdgeScorer:
|
||||||
|
"""
|
||||||
|
Sélectionne le meilleur edge sortant étant donné un ScreenState.
|
||||||
|
|
||||||
|
Les poids par défaut peuvent être ajustés à la construction.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
weight_success_rate: float = 0.55,
|
||||||
|
weight_target_match: float = 0.35,
|
||||||
|
weight_recency: float = 0.10,
|
||||||
|
default_success_rate: float = 0.5,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
weight_success_rate: poids du `edge.stats.success_rate`
|
||||||
|
weight_target_match: poids du match `target_spec` / `ui_elements`
|
||||||
|
weight_recency: poids de la récence de la dernière exécution
|
||||||
|
default_success_rate: valeur quand l'edge n'a jamais été exécuté
|
||||||
|
"""
|
||||||
|
total = weight_success_rate + weight_target_match + weight_recency
|
||||||
|
if total <= 0:
|
||||||
|
raise ValueError("La somme des poids doit être > 0")
|
||||||
|
# Normalisation silencieuse
|
||||||
|
self.w_success = weight_success_rate / total
|
||||||
|
self.w_target = weight_target_match / total
|
||||||
|
self.w_recency = weight_recency / total
|
||||||
|
self.default_success_rate = default_success_rate
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# API publique
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def select_best(
|
||||||
|
self,
|
||||||
|
edges: Sequence[WorkflowEdge],
|
||||||
|
screen_state: Optional[ScreenState] = None,
|
||||||
|
strategy: str = "best",
|
||||||
|
source_similarity: float = 1.0,
|
||||||
|
) -> Optional[WorkflowEdge]:
|
||||||
|
"""
|
||||||
|
Sélectionne le meilleur edge.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
edges: Liste des edges candidats (généralement les sortants d'un node)
|
||||||
|
screen_state: État courant pour évaluer pre_conditions et target_spec
|
||||||
|
strategy: "best" (défaut, score complet) ou "first" (legacy, premier edge)
|
||||||
|
source_similarity: confiance du matching qui a identifié le node
|
||||||
|
source courant (valeur propagée depuis `match_current_state`).
|
||||||
|
Utilisée pour évaluer la précondition ``min_source_similarity``
|
||||||
|
de chaque edge. Défaut à ``1.0`` pour compat avec les appelants
|
||||||
|
qui ne la fournissent pas encore.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Meilleur edge ou None si aucun ne passe les pre_conditions
|
||||||
|
"""
|
||||||
|
if not edges:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if strategy == "first":
|
||||||
|
# Comportement legacy — retourne le premier edge quoi qu'il arrive
|
||||||
|
return edges[0]
|
||||||
|
|
||||||
|
scores = self.rank(
|
||||||
|
edges, screen_state=screen_state, source_similarity=source_similarity
|
||||||
|
)
|
||||||
|
|
||||||
|
# Filtrer ceux qui ont passé les pre_conditions
|
||||||
|
valid = [s for s in scores if s.passed_preconditions]
|
||||||
|
if not valid:
|
||||||
|
# Aucun edge valide → log pour debug, retourner None
|
||||||
|
reasons = "; ".join(
|
||||||
|
f"{s.edge.edge_id}: {s.precondition_reason}" for s in scores[:5]
|
||||||
|
)
|
||||||
|
logger.warning(
|
||||||
|
f"[EdgeScorer] Aucun edge valide parmi {len(edges)} candidats. "
|
||||||
|
f"Raisons: {reasons}"
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
best = valid[0].edge # déjà trié par score décroissant
|
||||||
|
logger.debug(
|
||||||
|
f"[EdgeScorer] Sélection {best.edge_id} "
|
||||||
|
f"(score={valid[0].total:.3f}, parmi {len(valid)} valides)"
|
||||||
|
)
|
||||||
|
return best
|
||||||
|
|
||||||
|
def rank(
|
||||||
|
self,
|
||||||
|
edges: Sequence[WorkflowEdge],
|
||||||
|
screen_state: Optional[ScreenState] = None,
|
||||||
|
source_similarity: float = 1.0,
|
||||||
|
) -> List[EdgeScore]:
|
||||||
|
"""
|
||||||
|
Retourne la liste des edges triés par score décroissant,
|
||||||
|
avec le détail pour chaque edge.
|
||||||
|
|
||||||
|
Tiebreak : `success_rate` le plus haut.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
edges: edges candidats
|
||||||
|
screen_state: état courant (pour pre_conditions + target_match)
|
||||||
|
source_similarity: confiance du match courant, propagée aux
|
||||||
|
pre_conditions pour vérifier ``min_source_similarity``
|
||||||
|
"""
|
||||||
|
scored = [
|
||||||
|
self.score_edge(edge, screen_state, source_similarity=source_similarity)
|
||||||
|
for edge in edges
|
||||||
|
]
|
||||||
|
# Tri : score total décroissant, puis success_rate décroissant
|
||||||
|
scored.sort(key=lambda s: (s.total, s.success_rate), reverse=True)
|
||||||
|
return scored
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# Scoring par edge
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def score_edge(
|
||||||
|
self,
|
||||||
|
edge: WorkflowEdge,
|
||||||
|
screen_state: Optional[ScreenState] = None,
|
||||||
|
source_similarity: float = 1.0,
|
||||||
|
) -> EdgeScore:
|
||||||
|
"""
|
||||||
|
Calcule le score d'un edge.
|
||||||
|
|
||||||
|
Les pre_conditions sont évaluées ici mais servent uniquement de filtre
|
||||||
|
dur (le score total reste calculé, mais `passed_preconditions` est à False).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
edge: edge à scorer
|
||||||
|
screen_state: état courant (fenêtre, textes, ui_elements)
|
||||||
|
source_similarity: confiance du matching courant, injectée dans
|
||||||
|
``EdgeConstraints.check_preconditions`` pour évaluer
|
||||||
|
``min_source_similarity``.
|
||||||
|
"""
|
||||||
|
# 1. Pre-conditions : filtre dur
|
||||||
|
passed, reason = self._check_preconditions(
|
||||||
|
edge, screen_state, source_similarity=source_similarity
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2. Success rate (dépend des stats existantes)
|
||||||
|
success_rate = self._score_success_rate(edge)
|
||||||
|
|
||||||
|
# 3. Target match (UI element présent ?)
|
||||||
|
target_match = self._score_target_match(edge, screen_state)
|
||||||
|
|
||||||
|
# 4. Récence
|
||||||
|
recency = self._score_recency(edge)
|
||||||
|
|
||||||
|
total = (
|
||||||
|
self.w_success * success_rate
|
||||||
|
+ self.w_target * target_match
|
||||||
|
+ self.w_recency * recency
|
||||||
|
)
|
||||||
|
|
||||||
|
return EdgeScore(
|
||||||
|
edge=edge,
|
||||||
|
total=total,
|
||||||
|
success_rate=success_rate,
|
||||||
|
target_match=target_match,
|
||||||
|
recency=recency,
|
||||||
|
passed_preconditions=passed,
|
||||||
|
precondition_reason=reason,
|
||||||
|
)
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# Composantes du score
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _check_preconditions(
|
||||||
|
self,
|
||||||
|
edge: WorkflowEdge,
|
||||||
|
screen_state: Optional[ScreenState],
|
||||||
|
source_similarity: float = 1.0,
|
||||||
|
) -> tuple[bool, str]:
|
||||||
|
"""
|
||||||
|
Vérifier les pre_conditions de l'edge.
|
||||||
|
|
||||||
|
Si pas de ScreenState, on ne peut rien vérifier → on laisse passer
|
||||||
|
(mais on loggue).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
edge: edge à évaluer
|
||||||
|
screen_state: état courant (None si non dispo)
|
||||||
|
source_similarity: confiance du matching courant propagée par
|
||||||
|
l'appelant (EdgeScorer.score_edge/rank/select_best). Elle
|
||||||
|
alimente ``EdgeConstraints.check_preconditions`` pour rendre
|
||||||
|
effective la contrainte ``min_source_similarity``.
|
||||||
|
"""
|
||||||
|
constraints = edge.constraints
|
||||||
|
if constraints is None:
|
||||||
|
return True, "OK (pas de contraintes)"
|
||||||
|
|
||||||
|
if screen_state is None:
|
||||||
|
# Pas de ScreenState → on ne peut évaluer ni fenêtre, ni textes,
|
||||||
|
# mais la similarité source reste vérifiable.
|
||||||
|
try:
|
||||||
|
ok, reason = constraints.check_preconditions(
|
||||||
|
window_title="",
|
||||||
|
app_name="",
|
||||||
|
detected_texts=[],
|
||||||
|
source_similarity=source_similarity,
|
||||||
|
)
|
||||||
|
if not ok:
|
||||||
|
return ok, reason
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"[EdgeScorer] Erreur check_preconditions: {e}")
|
||||||
|
return True, f"Erreur ignorée: {e}"
|
||||||
|
return True, "OK (pas de ScreenState pour évaluer)"
|
||||||
|
|
||||||
|
window_title = screen_state.window.window_title if screen_state.window else ""
|
||||||
|
app_name = screen_state.window.app_name if screen_state.window else ""
|
||||||
|
detected_texts = (
|
||||||
|
screen_state.perception.detected_text
|
||||||
|
if screen_state.perception
|
||||||
|
else []
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
ok, reason = constraints.check_preconditions(
|
||||||
|
window_title=window_title,
|
||||||
|
app_name=app_name,
|
||||||
|
detected_texts=detected_texts,
|
||||||
|
source_similarity=source_similarity,
|
||||||
|
)
|
||||||
|
return ok, reason
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"[EdgeScorer] Erreur check_preconditions: {e}")
|
||||||
|
# En cas d'erreur, on ne bloque pas l'edge
|
||||||
|
return True, f"Erreur ignorée: {e}"
|
||||||
|
|
||||||
|
def _score_success_rate(self, edge: WorkflowEdge) -> float:
|
||||||
|
"""Score basé sur `edge.stats.success_rate`."""
|
||||||
|
if edge.stats is None or edge.stats.execution_count == 0:
|
||||||
|
return self.default_success_rate
|
||||||
|
return max(0.0, min(1.0, edge.stats.success_rate))
|
||||||
|
|
||||||
|
def _score_target_match(
|
||||||
|
self,
|
||||||
|
edge: WorkflowEdge,
|
||||||
|
screen_state: Optional[ScreenState],
|
||||||
|
) -> float:
|
||||||
|
"""
|
||||||
|
Score de correspondance entre le `target_spec` de l'action et
|
||||||
|
les `ui_elements` de l'écran courant.
|
||||||
|
|
||||||
|
Retourne :
|
||||||
|
- 1.0 si un élément matche strictement (texte ou rôle)
|
||||||
|
- 0.5 si aucun screen_state fourni (neutre, pas pénalisant)
|
||||||
|
- 0.0 si aucun élément compatible
|
||||||
|
"""
|
||||||
|
if screen_state is None:
|
||||||
|
return 0.5
|
||||||
|
|
||||||
|
target = edge.action.target if edge.action else None
|
||||||
|
if target is None:
|
||||||
|
return 0.5
|
||||||
|
|
||||||
|
ui_elements = screen_state.ui_elements or []
|
||||||
|
if not ui_elements:
|
||||||
|
# Pas d'UI détectée → on ne peut pas trancher, neutre
|
||||||
|
return 0.5
|
||||||
|
|
||||||
|
target_text = (target.by_text or "").lower().strip()
|
||||||
|
target_role = (target.by_role or "").lower().strip()
|
||||||
|
|
||||||
|
best = 0.0
|
||||||
|
for el in ui_elements:
|
||||||
|
score = 0.0
|
||||||
|
el_label = getattr(el, "label", "") or ""
|
||||||
|
el_role = getattr(el, "role", "") or ""
|
||||||
|
el_type = getattr(el, "type", "") or ""
|
||||||
|
|
||||||
|
if target_text:
|
||||||
|
if target_text == el_label.lower().strip():
|
||||||
|
score = max(score, 1.0)
|
||||||
|
elif target_text in el_label.lower():
|
||||||
|
score = max(score, 0.8)
|
||||||
|
|
||||||
|
if target_role:
|
||||||
|
if target_role == el_role.lower() or target_role == el_type.lower():
|
||||||
|
score = max(score, 0.9)
|
||||||
|
|
||||||
|
if not target_text and not target_role and target.by_position:
|
||||||
|
# Si seule la position est fournie, on considère toujours match possible
|
||||||
|
score = 0.6
|
||||||
|
|
||||||
|
if score > best:
|
||||||
|
best = score
|
||||||
|
|
||||||
|
# Si on n'a rien trouvé mais qu'un target est demandé → 0.0 (fort négatif)
|
||||||
|
if best == 0.0 and (target_text or target_role):
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
return best if best > 0 else 0.5
|
||||||
|
|
||||||
|
def _score_recency(self, edge: WorkflowEdge) -> float:
|
||||||
|
"""
|
||||||
|
Score de récence basé sur `edge.stats.last_executed`.
|
||||||
|
|
||||||
|
Échelle :
|
||||||
|
- exécuté dans les dernières 24h : 1.0
|
||||||
|
- exécuté dans les 7 derniers jours : 0.7
|
||||||
|
- exécuté il y a plus longtemps : 0.3
|
||||||
|
- jamais exécuté : 0.5 (neutre)
|
||||||
|
"""
|
||||||
|
if edge.stats is None or edge.stats.last_executed is None:
|
||||||
|
return 0.5
|
||||||
|
|
||||||
|
delta = datetime.now() - edge.stats.last_executed
|
||||||
|
seconds = delta.total_seconds()
|
||||||
|
if seconds < 24 * 3600:
|
||||||
|
return 1.0
|
||||||
|
if seconds < 7 * 24 * 3600:
|
||||||
|
return 0.7
|
||||||
|
return 0.3
|
||||||
@@ -9,13 +9,33 @@ Orchestre les 4 niveaux du ScreenState :
|
|||||||
|
|
||||||
Ce module comble le chaînon manquant entre la capture brute (Couche 0)
|
Ce module comble le chaînon manquant entre la capture brute (Couche 0)
|
||||||
et la construction d'embeddings (Couche 3).
|
et la construction d'embeddings (Couche 3).
|
||||||
|
|
||||||
|
=============================================================================
|
||||||
|
Thread-safety & partage multi-loops (Lot C — avril 2026)
|
||||||
|
=============================================================================
|
||||||
|
Cet analyseur peut être partagé entre plusieurs `ExecutionLoop` (singleton
|
||||||
|
`get_screen_analyzer()`). Pour éviter la contamination croisée :
|
||||||
|
|
||||||
|
• `analyze()` NE MUTE JAMAIS `self._ocr`, `self._ui_detector`,
|
||||||
|
`self._ocr_initialized`, `self._ui_detector_initialized` pour gérer les
|
||||||
|
flags runtime (enable_ocr / enable_ui_detection). Ces flags sont par
|
||||||
|
appel, résolus en variables locales.
|
||||||
|
• `session_id` circule en paramètre d'appel et renseigne la metadata du
|
||||||
|
ScreenState ; l'attribut `self.session_id` n'est qu'un défaut historique
|
||||||
|
(rétrocompat) et n'est plus la source de vérité.
|
||||||
|
• L'init lazy des composants lourds (OCR, UIDetector) est protégée par un
|
||||||
|
`_init_lock` par instance pour empêcher une double initialisation
|
||||||
|
concurrente.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import contextlib
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional, Dict, Any, List
|
from typing import Optional, Dict, Any, List, Tuple
|
||||||
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
@@ -32,6 +52,44 @@ from core.models.ui_element import UIElement
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# Lock d'inférence local au module : sert de fallback si le GPUResourceManager
|
||||||
|
# n'est pas disponible (import error, tests). Partagé entre toutes les instances
|
||||||
|
# ScreenAnalyzer du process, cohérent avec le singleton get_screen_analyzer().
|
||||||
|
_ANALYZE_FALLBACK_LOCK = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
|
def _acquire_gpu_context(timeout: Optional[float] = None):
|
||||||
|
"""
|
||||||
|
Retourne un context manager pour sérialiser les appels GPU.
|
||||||
|
|
||||||
|
Préfère `GPUResourceManager.acquire_inference()` si disponible (coordination
|
||||||
|
globale), sinon bascule sur un lock threading local au module.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from core.gpu import get_gpu_resource_manager
|
||||||
|
|
||||||
|
manager = get_gpu_resource_manager()
|
||||||
|
return manager.acquire_inference(timeout=timeout)
|
||||||
|
except Exception as e: # pragma: no cover - fallback defensif
|
||||||
|
logger.debug(f"GPUResourceManager indisponible, fallback lock local: {e}")
|
||||||
|
|
||||||
|
@contextlib.contextmanager
|
||||||
|
def _fallback():
|
||||||
|
if timeout is None:
|
||||||
|
_ANALYZE_FALLBACK_LOCK.acquire()
|
||||||
|
yield True
|
||||||
|
_ANALYZE_FALLBACK_LOCK.release()
|
||||||
|
else:
|
||||||
|
got = _ANALYZE_FALLBACK_LOCK.acquire(timeout=timeout)
|
||||||
|
try:
|
||||||
|
yield got
|
||||||
|
finally:
|
||||||
|
if got:
|
||||||
|
_ANALYZE_FALLBACK_LOCK.release()
|
||||||
|
|
||||||
|
return _fallback()
|
||||||
|
|
||||||
|
|
||||||
class ScreenAnalyzer:
|
class ScreenAnalyzer:
|
||||||
"""
|
"""
|
||||||
Construit un ScreenState complet (4 niveaux) depuis un screenshot.
|
Construit un ScreenState complet (4 niveaux) depuis un screenshot.
|
||||||
@@ -44,6 +102,14 @@ class ScreenAnalyzer:
|
|||||||
>>> state = analyzer.analyze("/path/to/screenshot.png")
|
>>> state = analyzer.analyze("/path/to/screenshot.png")
|
||||||
>>> print(state.perception.detected_text)
|
>>> print(state.perception.detected_text)
|
||||||
>>> print(len(state.ui_elements))
|
>>> print(len(state.ui_elements))
|
||||||
|
|
||||||
|
Runtime overrides (kwargs-only) sur analyze() :
|
||||||
|
>>> state = analyzer.analyze(
|
||||||
|
... path,
|
||||||
|
... enable_ocr=False, # bypass OCR pour cet appel
|
||||||
|
... enable_ui_detection=False, # bypass UIDetector
|
||||||
|
... session_id="session_42", # session par appel
|
||||||
|
... )
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
@@ -56,18 +122,27 @@ class ScreenAnalyzer:
|
|||||||
Args:
|
Args:
|
||||||
ui_detector: Instance de UIDetector (créé si None)
|
ui_detector: Instance de UIDetector (créé si None)
|
||||||
ocr_engine: Moteur OCR à utiliser ("doctr", "tesseract", None=auto)
|
ocr_engine: Moteur OCR à utiliser ("doctr", "tesseract", None=auto)
|
||||||
session_id: ID de la session en cours
|
session_id: ID de session par défaut (rétrocompat ; préférer passer
|
||||||
|
`session_id` en kwarg de `analyze()` pour chaque appel).
|
||||||
"""
|
"""
|
||||||
self._ui_detector = ui_detector
|
self._ui_detector = ui_detector
|
||||||
self._ocr_engine_name = ocr_engine
|
self._ocr_engine_name = ocr_engine
|
||||||
self._ocr = None
|
self._ocr = None
|
||||||
|
# Session par défaut (rétrocompat). La source de vérité est désormais
|
||||||
|
# le paramètre `session_id` de `analyze()`.
|
||||||
self.session_id = session_id
|
self.session_id = session_id
|
||||||
|
# Compteur d'états — protégé par _state_lock pour être safe en parallèle.
|
||||||
self._state_counter = 0
|
self._state_counter = 0
|
||||||
|
self._state_lock = threading.Lock()
|
||||||
|
|
||||||
# Initialisation lazy pour éviter les imports lourds au démarrage
|
# Initialisation lazy pour éviter les imports lourds au démarrage.
|
||||||
self._ui_detector_initialized = ui_detector is not None
|
self._ui_detector_initialized = ui_detector is not None
|
||||||
self._ocr_initialized = False
|
self._ocr_initialized = False
|
||||||
|
|
||||||
|
# Lock dédié à l'init lazy : empêche deux threads d'initialiser
|
||||||
|
# simultanément OCR ou UIDetector (double chargement GPU).
|
||||||
|
self._init_lock = threading.Lock()
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# API publique
|
# API publique
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
@@ -77,28 +152,85 @@ class ScreenAnalyzer:
|
|||||||
screenshot_path: str,
|
screenshot_path: str,
|
||||||
window_info: Optional[Dict[str, Any]] = None,
|
window_info: Optional[Dict[str, Any]] = None,
|
||||||
context: Optional[Dict[str, Any]] = None,
|
context: Optional[Dict[str, Any]] = None,
|
||||||
|
*,
|
||||||
|
enable_ocr: bool = True,
|
||||||
|
enable_ui_detection: bool = True,
|
||||||
|
session_id: str = "",
|
||||||
) -> ScreenState:
|
) -> ScreenState:
|
||||||
"""
|
"""
|
||||||
Analyser un screenshot et construire un ScreenState complet.
|
Analyser un screenshot et construire un ScreenState complet.
|
||||||
|
|
||||||
|
Les flags `enable_ocr`, `enable_ui_detection` et `session_id` sont
|
||||||
|
**par appel, kwargs-only**, pour ne pas polluer l'état partagé du
|
||||||
|
singleton quand plusieurs `ExecutionLoop` se partagent l'analyseur.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
screenshot_path: Chemin vers le fichier image
|
screenshot_path: Chemin vers le fichier image
|
||||||
window_info: Infos fenêtre active {"title": ..., "app_name": ...}
|
window_info: Infos fenêtre active {"title": ..., "app_name": ...}
|
||||||
context: Contexte métier optionnel
|
context: Contexte métier optionnel
|
||||||
|
enable_ocr: Active l'OCR pour cet appel (True par défaut).
|
||||||
|
False → `detected_text=[]`, aucune init d'OCR déclenchée.
|
||||||
|
enable_ui_detection: Active la détection UI pour cet appel
|
||||||
|
(True par défaut). False → `ui_elements=[]`.
|
||||||
|
session_id: ID de session pour cet appel. Si vide, on retombe sur
|
||||||
|
`self.session_id` (rétrocompat). Cette valeur est propagée
|
||||||
|
dans `ScreenState.session_id` et `metadata["session_id"]`.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
ScreenState avec les 4 niveaux remplis
|
ScreenState avec les 4 niveaux remplis.
|
||||||
"""
|
"""
|
||||||
screenshot_path = str(screenshot_path)
|
screenshot_path = str(screenshot_path)
|
||||||
|
|
||||||
|
# Résolution de la session : priorité au kwarg, fallback sur l'état
|
||||||
|
# interne (legacy). Variable locale uniquement — pas de mutation.
|
||||||
|
effective_session_id = session_id or self.session_id
|
||||||
|
|
||||||
|
# Compteur incrémenté sous lock pour identifiants uniques même en
|
||||||
|
# parallèle. C'est la seule mutation tolérée : elle n'impacte pas le
|
||||||
|
# comportement OCR/UI.
|
||||||
|
with self._state_lock:
|
||||||
self._state_counter += 1
|
self._state_counter += 1
|
||||||
|
state_counter = self._state_counter
|
||||||
|
|
||||||
state_id = f"{self.session_id}_state_{self._state_counter:04d}" if self.session_id else f"state_{self._state_counter:04d}"
|
state_id = (
|
||||||
|
f"{effective_session_id}_state_{state_counter:04d}"
|
||||||
|
if effective_session_id
|
||||||
|
else f"state_{state_counter:04d}"
|
||||||
|
)
|
||||||
|
|
||||||
# Niveau 1 : Raw
|
# Niveau 1 : Raw (léger, hors lock GPU)
|
||||||
raw = self._build_raw_level(screenshot_path)
|
raw = self._build_raw_level(screenshot_path)
|
||||||
|
|
||||||
# Niveau 2 : Perception (OCR)
|
# Résolution locale des instances OCR / UIDetector selon les flags.
|
||||||
detected_text = self._extract_text(screenshot_path)
|
# Aucune mutation de self ici : on décide simplement ce qu'on utilise.
|
||||||
|
ocr_instance = self._resolve_ocr_instance(enable_ocr=enable_ocr)
|
||||||
|
ui_detector_instance = self._resolve_ui_detector_instance(
|
||||||
|
enable_ui_detection=enable_ui_detection
|
||||||
|
)
|
||||||
|
|
||||||
|
# Niveaux 2 et 3 : OCR + détection UI sont les étapes lourdes en GPU.
|
||||||
|
# On sérialise via GPUResourceManager.acquire_inference() pour éviter
|
||||||
|
# que ExecutionLoop et stream_processor saturent simultanément la VRAM
|
||||||
|
# sur RTX 5070 (12 Go). Timeout généreux : un appel peut prendre 15-20s.
|
||||||
|
with _acquire_gpu_context(timeout=60.0) as acquired:
|
||||||
|
if not acquired:
|
||||||
|
logger.warning(
|
||||||
|
"Timeout en attendant le lock GPU pour ScreenAnalyzer.analyze() "
|
||||||
|
"→ exécution sans sérialisation (risque saturation VRAM)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Niveau 2 : Perception (OCR) — mesure du temps OCR
|
||||||
|
ocr_t0 = time.time()
|
||||||
|
detected_text = self._extract_text_with(ocr_instance, screenshot_path)
|
||||||
|
ocr_ms = (time.time() - ocr_t0) * 1000
|
||||||
|
|
||||||
|
# Niveau 3 : UI Elements — mesure du temps détection
|
||||||
|
ui_t0 = time.time()
|
||||||
|
ui_elements = self._detect_ui_elements_with(
|
||||||
|
ui_detector_instance, screenshot_path, window_info
|
||||||
|
)
|
||||||
|
ui_ms = (time.time() - ui_t0) * 1000
|
||||||
|
|
||||||
perception = PerceptionLevel(
|
perception = PerceptionLevel(
|
||||||
embedding=EmbeddingRef(
|
embedding=EmbeddingRef(
|
||||||
provider="openclip_ViT-B-32",
|
provider="openclip_ViT-B-32",
|
||||||
@@ -106,13 +238,10 @@ class ScreenAnalyzer:
|
|||||||
dimensions=512,
|
dimensions=512,
|
||||||
),
|
),
|
||||||
detected_text=detected_text,
|
detected_text=detected_text,
|
||||||
text_detection_method=self._get_ocr_method_name(),
|
text_detection_method=self._get_ocr_method_name(ocr_instance),
|
||||||
confidence_avg=0.85 if detected_text else 0.0,
|
confidence_avg=0.85 if detected_text else 0.0,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Niveau 3 : UI Elements
|
|
||||||
ui_elements = self._detect_ui_elements(screenshot_path, window_info)
|
|
||||||
|
|
||||||
# Niveau 4 : Contexte
|
# Niveau 4 : Contexte
|
||||||
window_ctx = self._build_window_context(window_info)
|
window_ctx = self._build_window_context(window_info)
|
||||||
context_level = self._build_context_level(context)
|
context_level = self._build_context_level(context)
|
||||||
@@ -120,22 +249,28 @@ class ScreenAnalyzer:
|
|||||||
state = ScreenState(
|
state = ScreenState(
|
||||||
screen_state_id=state_id,
|
screen_state_id=state_id,
|
||||||
timestamp=datetime.now(),
|
timestamp=datetime.now(),
|
||||||
session_id=self.session_id,
|
session_id=effective_session_id,
|
||||||
window=window_ctx,
|
window=window_ctx,
|
||||||
raw=raw,
|
raw=raw,
|
||||||
perception=perception,
|
perception=perception,
|
||||||
context=context_level,
|
context=context_level,
|
||||||
metadata={
|
metadata={
|
||||||
"analyzer_version": "1.0",
|
"analyzer_version": "1.1",
|
||||||
|
"session_id": effective_session_id,
|
||||||
"ui_elements_count": len(ui_elements),
|
"ui_elements_count": len(ui_elements),
|
||||||
"text_regions_count": len(detected_text),
|
"text_regions_count": len(detected_text),
|
||||||
|
"ocr_ms": ocr_ms,
|
||||||
|
"ui_ms": ui_ms,
|
||||||
|
"ocr_enabled": enable_ocr,
|
||||||
|
"ui_detection_enabled": enable_ui_detection,
|
||||||
},
|
},
|
||||||
ui_elements=ui_elements,
|
ui_elements=ui_elements,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"ScreenState {state_id} construit: "
|
f"ScreenState {state_id} construit: "
|
||||||
f"{len(ui_elements)} éléments UI, {len(detected_text)} textes détectés"
|
f"{len(ui_elements)} éléments UI, {len(detected_text)} textes détectés "
|
||||||
|
f"(ocr={enable_ocr}, ui={enable_ui_detection})"
|
||||||
)
|
)
|
||||||
return state
|
return state
|
||||||
|
|
||||||
@@ -145,11 +280,16 @@ class ScreenAnalyzer:
|
|||||||
save_dir: str = "data/screens",
|
save_dir: str = "data/screens",
|
||||||
window_info: Optional[Dict[str, Any]] = None,
|
window_info: Optional[Dict[str, Any]] = None,
|
||||||
context: Optional[Dict[str, Any]] = None,
|
context: Optional[Dict[str, Any]] = None,
|
||||||
|
*,
|
||||||
|
enable_ocr: bool = True,
|
||||||
|
enable_ui_detection: bool = True,
|
||||||
|
session_id: str = "",
|
||||||
) -> ScreenState:
|
) -> ScreenState:
|
||||||
"""
|
"""
|
||||||
Analyser une PIL Image (utile quand on a déjà l'image en mémoire).
|
Analyser une PIL Image (utile quand on a déjà l'image en mémoire).
|
||||||
|
|
||||||
Sauvegarde l'image sur disque puis appelle analyze().
|
Sauvegarde l'image sur disque puis appelle analyze(). Les flags
|
||||||
|
runtime sont propagés à `analyze()` en kwargs-only.
|
||||||
"""
|
"""
|
||||||
save_path = Path(save_dir)
|
save_path = Path(save_dir)
|
||||||
save_path.mkdir(parents=True, exist_ok=True)
|
save_path.mkdir(parents=True, exist_ok=True)
|
||||||
@@ -159,7 +299,49 @@ class ScreenAnalyzer:
|
|||||||
filepath = save_path / filename
|
filepath = save_path / filename
|
||||||
|
|
||||||
image.save(str(filepath))
|
image.save(str(filepath))
|
||||||
return self.analyze(str(filepath), window_info=window_info, context=context)
|
return self.analyze(
|
||||||
|
str(filepath),
|
||||||
|
window_info=window_info,
|
||||||
|
context=context,
|
||||||
|
enable_ocr=enable_ocr,
|
||||||
|
enable_ui_detection=enable_ui_detection,
|
||||||
|
session_id=session_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Résolution des instances OCR / UI selon les flags d'appel
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
def _resolve_ocr_instance(self, *, enable_ocr: bool):
|
||||||
|
"""
|
||||||
|
Retourner l'instance OCR à utiliser pour cet appel.
|
||||||
|
|
||||||
|
- `enable_ocr=False` → None (pas d'init, pas d'appel OCR)
|
||||||
|
- sinon → init lazy sous lock si nécessaire, puis retour de `self._ocr`
|
||||||
|
|
||||||
|
Ne mute `self._ocr` / `self._ocr_initialized` QUE pendant l'init lazy
|
||||||
|
réelle, jamais pour bypasser l'OCR d'un appel.
|
||||||
|
"""
|
||||||
|
if not enable_ocr:
|
||||||
|
return None
|
||||||
|
if not self._ocr_initialized:
|
||||||
|
with self._init_lock:
|
||||||
|
# Double-check : un autre thread a pu initialiser entretemps.
|
||||||
|
if not self._ocr_initialized:
|
||||||
|
self._ensure_ocr_locked()
|
||||||
|
return self._ocr
|
||||||
|
|
||||||
|
def _resolve_ui_detector_instance(self, *, enable_ui_detection: bool):
|
||||||
|
"""
|
||||||
|
Retourner l'instance UIDetector pour cet appel (idem _resolve_ocr_instance).
|
||||||
|
"""
|
||||||
|
if not enable_ui_detection:
|
||||||
|
return None
|
||||||
|
if not self._ui_detector_initialized:
|
||||||
|
with self._init_lock:
|
||||||
|
if not self._ui_detector_initialized:
|
||||||
|
self._ensure_ui_detector_locked()
|
||||||
|
return self._ui_detector
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# Niveau 1 : Raw
|
# Niveau 1 : Raw
|
||||||
@@ -182,23 +364,24 @@ class ScreenAnalyzer:
|
|||||||
# Niveau 2 : Perception — OCR
|
# Niveau 2 : Perception — OCR
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
||||||
def _extract_text(self, screenshot_path: str) -> List[str]:
|
def _extract_text_with(self, ocr_callable, screenshot_path: str) -> List[str]:
|
||||||
"""Extraire le texte d'un screenshot via OCR."""
|
"""Extraire le texte via un callable OCR donné (peut être None)."""
|
||||||
self._ensure_ocr()
|
if ocr_callable is None:
|
||||||
|
|
||||||
if self._ocr is None:
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return self._ocr(screenshot_path)
|
return ocr_callable(screenshot_path)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"OCR échoué: {e}")
|
logger.warning(f"OCR échoué: {e}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def _ensure_ocr(self) -> None:
|
def _ensure_ocr_locked(self) -> None:
|
||||||
"""Initialiser le moteur OCR (lazy)."""
|
"""
|
||||||
if self._ocr_initialized:
|
Initialiser le moteur OCR (appelé sous `self._init_lock`).
|
||||||
return
|
|
||||||
|
Ne doit PAS être appelé hors de `_resolve_ocr_instance()`.
|
||||||
|
"""
|
||||||
|
# Mutation intentionnelle : on installe l'instance OCR réelle.
|
||||||
|
# Protégée par le lock d'init (pas le lock GPU).
|
||||||
self._ocr_initialized = True
|
self._ocr_initialized = True
|
||||||
|
|
||||||
engine = self._ocr_engine_name
|
engine = self._ocr_engine_name
|
||||||
@@ -257,8 +440,9 @@ class ScreenAnalyzer:
|
|||||||
|
|
||||||
return ocr_func
|
return ocr_func
|
||||||
|
|
||||||
def _get_ocr_method_name(self) -> str:
|
def _get_ocr_method_name(self, ocr_instance=None) -> str:
|
||||||
if self._ocr is None:
|
"""Nom du moteur OCR effectivement utilisé pour cet appel."""
|
||||||
|
if ocr_instance is None:
|
||||||
return "none"
|
return "none"
|
||||||
if self._ocr_engine_name:
|
if self._ocr_engine_name:
|
||||||
return self._ocr_engine_name
|
return self._ocr_engine_name
|
||||||
@@ -268,19 +452,18 @@ class ScreenAnalyzer:
|
|||||||
# Niveau 3 : UI Elements
|
# Niveau 3 : UI Elements
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
||||||
def _detect_ui_elements(
|
def _detect_ui_elements_with(
|
||||||
self,
|
self,
|
||||||
|
ui_detector,
|
||||||
screenshot_path: str,
|
screenshot_path: str,
|
||||||
window_info: Optional[Dict[str, Any]] = None,
|
window_info: Optional[Dict[str, Any]] = None,
|
||||||
) -> List[UIElement]:
|
) -> List[UIElement]:
|
||||||
"""Détecter les éléments UI dans le screenshot."""
|
"""Détecter les éléments UI via un détecteur donné (peut être None)."""
|
||||||
self._ensure_ui_detector()
|
if ui_detector is None:
|
||||||
|
|
||||||
if self._ui_detector is None:
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
elements = self._ui_detector.detect(
|
elements = ui_detector.detect(
|
||||||
screenshot_path, window_context=window_info
|
screenshot_path, window_context=window_info
|
||||||
)
|
)
|
||||||
return elements
|
return elements
|
||||||
@@ -288,10 +471,10 @@ class ScreenAnalyzer:
|
|||||||
logger.warning(f"Détection UI échouée: {e}")
|
logger.warning(f"Détection UI échouée: {e}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def _ensure_ui_detector(self) -> None:
|
def _ensure_ui_detector_locked(self) -> None:
|
||||||
"""Initialiser le UIDetector (lazy)."""
|
"""
|
||||||
if self._ui_detector_initialized:
|
Initialiser le UIDetector (appelé sous `self._init_lock`).
|
||||||
return
|
"""
|
||||||
self._ui_detector_initialized = True
|
self._ui_detector_initialized = True
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|||||||
409
core/pipeline/screen_state_cache.py
Normal file
409
core/pipeline/screen_state_cache.py
Normal file
@@ -0,0 +1,409 @@
|
|||||||
|
"""
|
||||||
|
ScreenStateCache — Cache perceptuel de ScreenState (context-aware).
|
||||||
|
|
||||||
|
Objectif : éviter de réanalyser un screenshot identique (5-15s VLM/OCR)
|
||||||
|
à chaque step de la boucle d'exécution.
|
||||||
|
|
||||||
|
Principe (Lot D — avril 2026) :
|
||||||
|
- Clé = composite de 6 éléments pour éviter les collisions silencieuses
|
||||||
|
entre contextes différents partageant un même screenshot :
|
||||||
|
1. phash (dhash 8x8 du screenshot) — calculé en ~2-5ms
|
||||||
|
2. window_title (titre fenêtre active)
|
||||||
|
3. app_name (nom process actif)
|
||||||
|
4. enable_ocr (flag runtime)
|
||||||
|
5. enable_ui_detection (flag runtime)
|
||||||
|
6. workflow_id (isolation inter-workflows)
|
||||||
|
- TTL par défaut : 2 secondes (configurable)
|
||||||
|
- Invalidation explicite possible (par clé composite ou globale)
|
||||||
|
- invalidate_if_changed reste piloté par le phash seul (détection de
|
||||||
|
changement visuel majeur, indépendant du contexte)
|
||||||
|
- Thread-safe (lock interne)
|
||||||
|
|
||||||
|
API principale :
|
||||||
|
>>> cache = ScreenStateCache(ttl_seconds=2.0)
|
||||||
|
>>> state, hit, ms = cache.get_or_compute(
|
||||||
|
... screenshot_path, compute_fn,
|
||||||
|
... window_title="App", app_name="app.exe",
|
||||||
|
... enable_ocr=True, enable_ui_detection=True,
|
||||||
|
... workflow_id="wf_123",
|
||||||
|
... )
|
||||||
|
|
||||||
|
La fonction `compute_fn` prend le chemin du screenshot et doit retourner
|
||||||
|
un `ScreenState`. Elle n'est appelée qu'en cache miss.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Callable, Optional, Tuple
|
||||||
|
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
from core.models.screen_state import ScreenState
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Hash perceptuel (dhash simple, sans dépendance imagehash)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def _hamming_distance_hex(a: str, b: str) -> int:
|
||||||
|
"""
|
||||||
|
Distance de Hamming entre deux chaînes hexadécimales de même longueur.
|
||||||
|
|
||||||
|
Retourne le nombre de bits qui diffèrent entre les deux hashes.
|
||||||
|
Si les longueurs diffèrent, on pad à droite par des zéros.
|
||||||
|
"""
|
||||||
|
if len(a) != len(b):
|
||||||
|
max_len = max(len(a), len(b))
|
||||||
|
a = a.ljust(max_len, "0")
|
||||||
|
b = b.ljust(max_len, "0")
|
||||||
|
try:
|
||||||
|
xor = int(a, 16) ^ int(b, 16)
|
||||||
|
return bin(xor).count("1")
|
||||||
|
except ValueError:
|
||||||
|
# Fallback : comparaison caractère à caractère
|
||||||
|
return sum(1 for ca, cb in zip(a, b) if ca != cb) * 4
|
||||||
|
|
||||||
|
|
||||||
|
def compute_perceptual_hash(screenshot_path: str, size: int = 8) -> str:
|
||||||
|
"""
|
||||||
|
Calculer un dhash (difference hash) pour un screenshot.
|
||||||
|
|
||||||
|
Algorithme :
|
||||||
|
1. Convertir en niveaux de gris
|
||||||
|
2. Redimensionner à (size+1) x size
|
||||||
|
3. Comparer chaque pixel avec son voisin de droite (dhash)
|
||||||
|
4. Retourner un hash hexadécimal de size*size bits
|
||||||
|
|
||||||
|
Robuste aux petites variations (curseur, blink, compression).
|
||||||
|
Coût typique : 2-5 ms sur un 1920x1080.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
screenshot_path: Chemin vers le fichier image
|
||||||
|
size: Taille du hash (8 = 64 bits, défaut)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Chaîne hexadécimale (size*size/4 caractères)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
img = Image.open(screenshot_path)
|
||||||
|
img = img.convert("L").resize((size + 1, size), Image.LANCZOS)
|
||||||
|
pixels = list(img.getdata())
|
||||||
|
|
||||||
|
# dhash : comparer chaque pixel avec celui de droite
|
||||||
|
bits = []
|
||||||
|
for row in range(size):
|
||||||
|
for col in range(size):
|
||||||
|
left = pixels[row * (size + 1) + col]
|
||||||
|
right = pixels[row * (size + 1) + col + 1]
|
||||||
|
bits.append(1 if left > right else 0)
|
||||||
|
|
||||||
|
# Convertir en hex
|
||||||
|
value = 0
|
||||||
|
for bit in bits:
|
||||||
|
value = (value << 1) | bit
|
||||||
|
return format(value, f"0{size * size // 4}x")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Hash perceptuel échoué pour {screenshot_path}: {e}")
|
||||||
|
# Fallback : hash du contenu brut
|
||||||
|
try:
|
||||||
|
data = Path(screenshot_path).read_bytes()
|
||||||
|
return hashlib.md5(data).hexdigest()[:16]
|
||||||
|
except Exception:
|
||||||
|
return f"unhashable_{int(time.time() * 1000)}"
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Clé composite (Lot D)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
def _make_cache_key(
|
||||||
|
phash: str,
|
||||||
|
window_title: str,
|
||||||
|
app_name: str,
|
||||||
|
enable_ocr: bool,
|
||||||
|
enable_ui_detection: bool,
|
||||||
|
workflow_id: str,
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Construire une clé composite stable pour le cache.
|
||||||
|
|
||||||
|
Combine les 6 dimensions du contexte d'exécution dans une chaîne
|
||||||
|
hexadécimale (md5 tronqué à 16 caractères), préfixée par le phash pour
|
||||||
|
conserver une lisibilité minimale en debug (log : `aabb…|ctx=1234…`).
|
||||||
|
|
||||||
|
NB : On hash plutôt que concaténer brut pour :
|
||||||
|
- Borner la taille de la clé même si window_title est long
|
||||||
|
- Éviter les collisions triviales (séparateur présent dans un titre)
|
||||||
|
- Rendre la clé opaque (pas de PII en clair dans les logs de cache)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
phash: Hash perceptuel du screenshot (dhash 8x8)
|
||||||
|
window_title: Titre de la fenêtre active (str)
|
||||||
|
app_name: Nom du process actif (str)
|
||||||
|
enable_ocr: Flag runtime OCR (bool)
|
||||||
|
enable_ui_detection: Flag runtime détection UI (bool)
|
||||||
|
workflow_id: ID du workflow en cours (str, "" pour legacy)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Clé composite `{phash}|{ctx_hash}` où ctx_hash = md5(16)
|
||||||
|
"""
|
||||||
|
# Sérialisation déterministe ; `|` comme séparateur interne puisque hashé.
|
||||||
|
ctx_repr = (
|
||||||
|
f"{window_title or ''}\x1f"
|
||||||
|
f"{app_name or ''}\x1f"
|
||||||
|
f"{int(bool(enable_ocr))}\x1f"
|
||||||
|
f"{int(bool(enable_ui_detection))}\x1f"
|
||||||
|
f"{workflow_id or ''}"
|
||||||
|
)
|
||||||
|
ctx_hash = hashlib.md5(ctx_repr.encode("utf-8")).hexdigest()[:16]
|
||||||
|
return f"{phash}|{ctx_hash}"
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Entry
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class _CacheEntry:
|
||||||
|
state: ScreenState
|
||||||
|
created_at: float
|
||||||
|
phash: str # phash seul (utilisé par invalidate_if_changed)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Cache
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class ScreenStateCache:
|
||||||
|
"""
|
||||||
|
Cache de ScreenState avec TTL et clé composite context-aware.
|
||||||
|
|
||||||
|
Thread-safe. Utilise un lock interne pour les opérations get/set.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, ttl_seconds: float = 2.0, max_entries: int = 16):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
ttl_seconds: Durée de vie d'une entrée (en secondes)
|
||||||
|
max_entries: Nombre max d'entrées avant éviction LRU simple
|
||||||
|
"""
|
||||||
|
self.ttl_seconds = ttl_seconds
|
||||||
|
self.max_entries = max_entries
|
||||||
|
# Clé = composite (_make_cache_key), valeur = _CacheEntry
|
||||||
|
self._store: dict[str, _CacheEntry] = {}
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
|
||||||
|
# Métriques simples (utile pour le debug / logs)
|
||||||
|
self.hits = 0
|
||||||
|
self.misses = 0
|
||||||
|
self.invalidations = 0
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# API bas niveau (par clé composite)
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _get(self, composite_key: str) -> Optional[ScreenState]:
|
||||||
|
"""Retourne l'entrée pour cette clé composite si encore valide."""
|
||||||
|
with self._lock:
|
||||||
|
entry = self._store.get(composite_key)
|
||||||
|
if entry is None:
|
||||||
|
return None
|
||||||
|
if time.time() - entry.created_at > self.ttl_seconds:
|
||||||
|
# Expiré
|
||||||
|
self._store.pop(composite_key, None)
|
||||||
|
return None
|
||||||
|
return entry.state
|
||||||
|
|
||||||
|
def _set(self, composite_key: str, phash: str, state: ScreenState) -> None:
|
||||||
|
"""Enregistre un état pour cette clé composite."""
|
||||||
|
with self._lock:
|
||||||
|
# Éviction simple : si plein, virer l'entrée la plus ancienne
|
||||||
|
if (
|
||||||
|
len(self._store) >= self.max_entries
|
||||||
|
and composite_key not in self._store
|
||||||
|
):
|
||||||
|
oldest_key = min(
|
||||||
|
self._store, key=lambda k: self._store[k].created_at
|
||||||
|
)
|
||||||
|
self._store.pop(oldest_key, None)
|
||||||
|
|
||||||
|
self._store[composite_key] = _CacheEntry(
|
||||||
|
state=state,
|
||||||
|
created_at=time.time(),
|
||||||
|
phash=phash,
|
||||||
|
)
|
||||||
|
|
||||||
|
def invalidate(self, composite_key: Optional[str] = None) -> None:
|
||||||
|
"""
|
||||||
|
Invalider une entrée ou tout le cache.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
composite_key: Clé à invalider. Si None, vide tout le cache.
|
||||||
|
"""
|
||||||
|
with self._lock:
|
||||||
|
if composite_key is None:
|
||||||
|
self._store.clear()
|
||||||
|
else:
|
||||||
|
self._store.pop(composite_key, None)
|
||||||
|
self.invalidations += 1
|
||||||
|
|
||||||
|
def invalidate_if_changed(
|
||||||
|
self,
|
||||||
|
screenshot_path: str,
|
||||||
|
threshold: float = 0.3,
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Invalider le cache si l'écran a suffisamment changé.
|
||||||
|
|
||||||
|
Compare le dhash du screenshot courant avec le phash (seul) de chaque
|
||||||
|
entrée du cache. La décision est volontairement indépendante du reste
|
||||||
|
de la clé composite : un changement visuel majeur rend toutes les
|
||||||
|
entrées obsolètes, quel que soit le contexte.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
screenshot_path: Chemin du screenshot courant
|
||||||
|
threshold: Proportion de bits qui doivent différer (0.0-1.0).
|
||||||
|
0.3 = 30% (~19 bits sur 64) = changement significatif.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True si le cache a été invalidé, False sinon.
|
||||||
|
"""
|
||||||
|
if not self._store:
|
||||||
|
return False
|
||||||
|
|
||||||
|
current_phash = compute_perceptual_hash(screenshot_path)
|
||||||
|
|
||||||
|
# Bits totaux : 64 pour un dhash 8x8 standard. On déduit via la
|
||||||
|
# longueur hexa du hash courant pour rester générique.
|
||||||
|
total_bits = len(current_phash) * 4
|
||||||
|
if total_bits == 0:
|
||||||
|
return False
|
||||||
|
|
||||||
|
threshold_bits = threshold * total_bits
|
||||||
|
|
||||||
|
with self._lock:
|
||||||
|
if not self._store:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Distance de Hamming minimale avec les phashes des entrées
|
||||||
|
# (on regarde entry.phash, pas la clé composite).
|
||||||
|
min_distance = None
|
||||||
|
for entry in self._store.values():
|
||||||
|
distance = _hamming_distance_hex(current_phash, entry.phash)
|
||||||
|
if min_distance is None or distance < min_distance:
|
||||||
|
min_distance = distance
|
||||||
|
|
||||||
|
if min_distance is not None and min_distance > threshold_bits:
|
||||||
|
size_before = len(self._store)
|
||||||
|
self._store.clear()
|
||||||
|
self.invalidations += 1
|
||||||
|
logger.debug(
|
||||||
|
f"[ScreenStateCache] invalidate_if_changed: "
|
||||||
|
f"distance={min_distance}/{total_bits} > "
|
||||||
|
f"threshold={threshold_bits:.1f} → {size_before} entrées purgées"
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# API haut niveau (context-aware)
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def get_or_compute(
|
||||||
|
self,
|
||||||
|
screenshot_path: str,
|
||||||
|
compute_fn: Callable[[str], ScreenState],
|
||||||
|
*,
|
||||||
|
window_title: str = "",
|
||||||
|
app_name: str = "",
|
||||||
|
enable_ocr: bool = True,
|
||||||
|
enable_ui_detection: bool = True,
|
||||||
|
workflow_id: str = "",
|
||||||
|
force_refresh: bool = False,
|
||||||
|
) -> Tuple[ScreenState, bool, float]:
|
||||||
|
"""
|
||||||
|
Récupérer ou calculer le ScreenState pour un screenshot + contexte.
|
||||||
|
|
||||||
|
Clé de cache = composite(phash, window_title, app_name, enable_ocr,
|
||||||
|
enable_ui_detection, workflow_id). Deux contextes différents partageant
|
||||||
|
le même screenshot n'entrent PAS en collision.
|
||||||
|
|
||||||
|
Rétrocompatibilité : tous les kwargs de contexte ont une valeur par
|
||||||
|
défaut. Un caller legacy qui n'a pas encore été adapté partagera la
|
||||||
|
même entrée de cache qu'un autre caller legacy (comportement antérieur).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
screenshot_path: Chemin du screenshot
|
||||||
|
compute_fn: Fonction qui construit un ScreenState si cache miss
|
||||||
|
window_title: Titre de la fenêtre active (contexte visuel)
|
||||||
|
app_name: Nom du process actif (contexte applicatif)
|
||||||
|
enable_ocr: Flag runtime — différencie états avec/sans OCR
|
||||||
|
enable_ui_detection: Flag runtime — différencie états avec/sans UI
|
||||||
|
workflow_id: ID du workflow — isolation inter-workflows
|
||||||
|
force_refresh: Ignorer le cache et recalculer
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple (state, cache_hit, elapsed_ms)
|
||||||
|
"""
|
||||||
|
t0 = time.time()
|
||||||
|
phash = compute_perceptual_hash(screenshot_path)
|
||||||
|
composite_key = _make_cache_key(
|
||||||
|
phash=phash,
|
||||||
|
window_title=window_title,
|
||||||
|
app_name=app_name,
|
||||||
|
enable_ocr=enable_ocr,
|
||||||
|
enable_ui_detection=enable_ui_detection,
|
||||||
|
workflow_id=workflow_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not force_refresh:
|
||||||
|
cached = self._get(composite_key)
|
||||||
|
if cached is not None:
|
||||||
|
self.hits += 1
|
||||||
|
elapsed_ms = (time.time() - t0) * 1000
|
||||||
|
logger.debug(
|
||||||
|
f"[ScreenStateCache] HIT key={composite_key[:24]}… "
|
||||||
|
f"({elapsed_ms:.1f}ms)"
|
||||||
|
)
|
||||||
|
return cached, True, elapsed_ms
|
||||||
|
|
||||||
|
# Cache miss → calcul complet
|
||||||
|
self.misses += 1
|
||||||
|
state = compute_fn(screenshot_path)
|
||||||
|
self._set(composite_key, phash, state)
|
||||||
|
elapsed_ms = (time.time() - t0) * 1000
|
||||||
|
logger.debug(
|
||||||
|
f"[ScreenStateCache] MISS key={composite_key[:24]}… "
|
||||||
|
f"({elapsed_ms:.1f}ms)"
|
||||||
|
)
|
||||||
|
return state, False, elapsed_ms
|
||||||
|
|
||||||
|
def stats(self) -> dict:
|
||||||
|
"""Retourne les métriques du cache."""
|
||||||
|
with self._lock:
|
||||||
|
total = self.hits + self.misses
|
||||||
|
return {
|
||||||
|
"hits": self.hits,
|
||||||
|
"misses": self.misses,
|
||||||
|
"invalidations": self.invalidations,
|
||||||
|
"hit_rate": self.hits / total if total > 0 else 0.0,
|
||||||
|
"size": len(self._store),
|
||||||
|
"max_entries": self.max_entries,
|
||||||
|
"ttl_seconds": self.ttl_seconds,
|
||||||
|
}
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
with self._lock:
|
||||||
|
return len(self._store)
|
||||||
@@ -137,10 +137,14 @@ class WorkflowPipeline:
|
|||||||
else:
|
else:
|
||||||
logger.warning(f"UI Detector not available: {e}")
|
logger.warning(f"UI Detector not available: {e}")
|
||||||
|
|
||||||
# 6. Graph Builder
|
# 6. Graph Builder — reçoit l'UIDetector pour enrichir les
|
||||||
|
# ScreenStates avec ui_elements + OCR pendant _create_screen_states.
|
||||||
|
# Sans ça, les TargetSpec ne peuvent pas être ancrés (by_role=unknown).
|
||||||
self.graph_builder = GraphBuilder(
|
self.graph_builder = GraphBuilder(
|
||||||
embedding_builder=self.embedding_builder,
|
embedding_builder=self.embedding_builder,
|
||||||
faiss_manager=self.faiss_manager
|
faiss_manager=self.faiss_manager,
|
||||||
|
ui_detector=self.ui_detector,
|
||||||
|
enable_ui_enrichment=enable_ui_detection,
|
||||||
)
|
)
|
||||||
logger.info("✓ Graph Builder initialized")
|
logger.info("✓ Graph Builder initialized")
|
||||||
|
|
||||||
@@ -355,87 +359,177 @@ class WorkflowPipeline:
|
|||||||
# Mode MATCHING : Reconnaissance de l'état actuel
|
# Mode MATCHING : Reconnaissance de l'état actuel
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
||||||
def match_current_state(
|
def match_current_state_from_state(
|
||||||
self,
|
self,
|
||||||
screenshot_path: str,
|
screen_state: ScreenState,
|
||||||
workflow_id: Optional[str] = None,
|
workflow_id: Optional[str] = None,
|
||||||
window_title: Optional[str] = None
|
*,
|
||||||
|
min_similarity: float = 0.5,
|
||||||
) -> Optional[Dict[str, Any]]:
|
) -> Optional[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Identifier dans quel node se trouve l'écran actuel.
|
Matcher un ``ScreenState`` enrichi contre les nodes d'un workflow.
|
||||||
|
|
||||||
|
Lot E — premier vrai matching context-aware. Cette méthode consomme
|
||||||
|
directement le ``ScreenState`` déjà construit par ``ExecutionLoop``
|
||||||
|
(avec ``window_title``, ``detected_text`` et ``ui_elements``
|
||||||
|
renseignés par le ``ScreenAnalyzer``) au lieu de reconstruire un
|
||||||
|
stub vide avec ``window_title="Unknown"``.
|
||||||
|
|
||||||
|
Stratégie :
|
||||||
|
1. Si le ``HierarchicalMatcher`` est disponible ET que le workflow
|
||||||
|
cible est chargeable, on privilégie le matching multi-niveau
|
||||||
|
(fenêtre → région → élément) qui exploite pleinement les
|
||||||
|
``ui_elements`` et le ``window_title``.
|
||||||
|
2. Sinon on retombe sur le matching par embedding via FAISS
|
||||||
|
(même logique que l'ancien ``match_current_state``, mais avec
|
||||||
|
le ``ScreenState`` fourni, pas un stub).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
screenshot_path: Chemin vers le screenshot actuel
|
screen_state: ``ScreenState`` complet (ui_elements + detected_text
|
||||||
workflow_id: ID du workflow à matcher (tous si None)
|
+ window_info) construit en amont par l'``ExecutionLoop``.
|
||||||
window_title: Titre de fenêtre pour contexte
|
workflow_id: ID du workflow cible (tous si None).
|
||||||
|
min_similarity: seuil minimum de confidence pour considérer un
|
||||||
|
match valide. Conserve la sémantique historique (0.5 pour
|
||||||
|
le hiérarchique, 0.85 pour le FAISS fallback).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dict avec node_id, workflow_id, confidence, ou None si pas de match
|
Dict avec ``node_id``, ``workflow_id``, ``confidence`` (+ détails
|
||||||
|
du matching hiérarchique si applicable), ou ``None`` si aucun
|
||||||
|
match ne dépasse le seuil.
|
||||||
"""
|
"""
|
||||||
logger.debug(f"Matching screenshot: {screenshot_path}")
|
logger.debug(
|
||||||
|
"Matching ScreenState (app=%s, title=%s, ui_elements=%d, "
|
||||||
# Créer un ScreenState temporaire
|
"detected_text=%d)",
|
||||||
from core.models.screen_state import (
|
screen_state.window.app_name,
|
||||||
WindowContext, RawLevel, PerceptionLevel, ContextLevel, EmbeddingRef
|
screen_state.window.window_title,
|
||||||
|
len(screen_state.ui_elements),
|
||||||
|
len(screen_state.perception.detected_text),
|
||||||
)
|
)
|
||||||
|
|
||||||
screenshot_path = Path(screenshot_path)
|
# --- Stratégie 1 : matching hiérarchique si workflow disponible ---
|
||||||
|
if workflow_id:
|
||||||
window = WindowContext(
|
workflow = self.load_workflow(workflow_id)
|
||||||
app_name="unknown",
|
if workflow is not None and getattr(workflow, "nodes", None):
|
||||||
window_title=window_title or "Unknown",
|
try:
|
||||||
screen_resolution=[1920, 1080],
|
hier_result = self._match_hierarchical_from_state(
|
||||||
workspace="main"
|
screen_state=screen_state,
|
||||||
|
workflow=workflow,
|
||||||
|
workflow_id=workflow_id,
|
||||||
|
min_similarity=min_similarity,
|
||||||
|
)
|
||||||
|
if hier_result is not None:
|
||||||
|
return hier_result
|
||||||
|
except Exception as exc:
|
||||||
|
# Ne jamais casser le matching sur une erreur du
|
||||||
|
# matcher hiérarchique : on retombe sur FAISS.
|
||||||
|
logger.debug(
|
||||||
|
f"Hierarchical matching failed, fallback FAISS: {exc}"
|
||||||
)
|
)
|
||||||
|
|
||||||
raw = RawLevel(
|
# --- Stratégie 2 : fallback embedding + FAISS ---
|
||||||
screenshot_path=str(screenshot_path),
|
return self._match_via_faiss(
|
||||||
capture_method="manual",
|
screen_state=screen_state,
|
||||||
file_size_bytes=screenshot_path.stat().st_size if screenshot_path.exists() else 0
|
workflow_id=workflow_id,
|
||||||
|
min_similarity=min_similarity,
|
||||||
)
|
)
|
||||||
|
|
||||||
perception = PerceptionLevel(
|
def _match_hierarchical_from_state(
|
||||||
embedding=EmbeddingRef(
|
self,
|
||||||
provider="openclip_ViT-B-32",
|
screen_state: ScreenState,
|
||||||
vector_id="temp",
|
workflow: Workflow,
|
||||||
dimensions=512
|
workflow_id: str,
|
||||||
),
|
min_similarity: float,
|
||||||
detected_text=[],
|
) -> Optional[Dict[str, Any]]:
|
||||||
text_detection_method="pending",
|
"""
|
||||||
confidence_avg=0.0
|
Déléguer le matching au ``HierarchicalMatcher`` en extrayant
|
||||||
|
``window_info``, ``detected_elements`` et le screenshot à partir du
|
||||||
|
``ScreenState`` fourni. Factorise la logique de ``match_hierarchical``
|
||||||
|
sans re-ouvrir l'image si ce n'est pas nécessaire.
|
||||||
|
"""
|
||||||
|
# Reconstruire window_info à partir du ScreenState (pas "Unknown")
|
||||||
|
window_info = {
|
||||||
|
"title": screen_state.window.window_title,
|
||||||
|
"app_name": screen_state.window.app_name,
|
||||||
|
"window_title": screen_state.window.window_title,
|
||||||
|
}
|
||||||
|
detected_elements = list(screen_state.ui_elements)
|
||||||
|
|
||||||
|
# Ouvrir le screenshot si nécessaire (le matcher peut en avoir besoin
|
||||||
|
# pour du matching au niveau région). Si le chemin n'existe pas, on
|
||||||
|
# passe None et laisse le matcher travailler avec window + elements.
|
||||||
|
screenshot = None
|
||||||
|
path = screen_state.raw.screenshot_path
|
||||||
|
if path:
|
||||||
|
try:
|
||||||
|
from PIL import Image
|
||||||
|
screenshot = Image.open(path)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug(f"Screenshot unavailable for hierarchical match: {exc}")
|
||||||
|
|
||||||
|
# Contexte temporel par workflow
|
||||||
|
if workflow_id not in self._temporal_context:
|
||||||
|
self._temporal_context[workflow_id] = TemporalContext()
|
||||||
|
temporal_context = self._temporal_context[workflow_id]
|
||||||
|
|
||||||
|
result: MatchResult = self.hierarchical_matcher.match(
|
||||||
|
screenshot=screenshot,
|
||||||
|
workflow=workflow,
|
||||||
|
window_info=window_info,
|
||||||
|
detected_elements=detected_elements,
|
||||||
|
temporal_context=temporal_context,
|
||||||
)
|
)
|
||||||
|
|
||||||
context = ContextLevel(
|
if result.confidence < min_similarity:
|
||||||
current_workflow_candidate=workflow_id,
|
logger.debug(
|
||||||
workflow_step=None,
|
f"Hierarchical match below threshold: {result.confidence:.3f} "
|
||||||
user_id="matcher",
|
f"(min={min_similarity})"
|
||||||
tags=[],
|
|
||||||
business_variables={}
|
|
||||||
)
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
current_state = ScreenState(
|
# Mémoriser le match pour le boost temporel suivant
|
||||||
screen_state_id=f"match_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
|
temporal_context.add_match(result.node_id, result.confidence)
|
||||||
timestamp=datetime.now(),
|
|
||||||
session_id="matching",
|
|
||||||
window=window,
|
|
||||||
raw=raw,
|
|
||||||
perception=perception,
|
|
||||||
context=context,
|
|
||||||
ui_elements=[]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Calculer embedding
|
return {
|
||||||
state_embedding = self.embedding_builder.build(current_state)
|
"node_id": result.node_id,
|
||||||
|
"workflow_id": workflow_id,
|
||||||
|
"confidence": result.confidence,
|
||||||
|
"window_confidence": result.window_confidence,
|
||||||
|
"region_confidence": result.region_confidence,
|
||||||
|
"element_confidence": result.element_confidence,
|
||||||
|
"temporal_boost": result.temporal_boost,
|
||||||
|
"matched_variant": result.matched_variant,
|
||||||
|
"alternatives": [
|
||||||
|
{"node_id": alt.node_id, "confidence": alt.confidence}
|
||||||
|
for alt in result.alternatives
|
||||||
|
],
|
||||||
|
"match_time_ms": result.match_time_ms,
|
||||||
|
"match_type": "hierarchical",
|
||||||
|
}
|
||||||
|
|
||||||
|
def _match_via_faiss(
|
||||||
|
self,
|
||||||
|
screen_state: ScreenState,
|
||||||
|
workflow_id: Optional[str],
|
||||||
|
min_similarity: float,
|
||||||
|
) -> Optional[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Fallback embedding + recherche FAISS. On réutilise le ``ScreenState``
|
||||||
|
fourni (donc ses ``ui_elements`` et son ``window_title`` réels)
|
||||||
|
au lieu d'en recréer un stub.
|
||||||
|
"""
|
||||||
|
# Le seuil FAISS historique était 0.85. On l'honore comme plancher
|
||||||
|
# par défaut mais on respecte un ``min_similarity`` plus permissif
|
||||||
|
# si l'appelant en fournit un (hiérarchique pouvant déjà avoir échoué).
|
||||||
|
threshold = max(min_similarity, 0.85)
|
||||||
|
|
||||||
|
state_embedding = self.embedding_builder.build(screen_state)
|
||||||
query_vector = state_embedding.get_vector()
|
query_vector = state_embedding.get_vector()
|
||||||
|
|
||||||
# Rechercher dans FAISS
|
|
||||||
results = self.faiss_manager.search(query_vector, k=5)
|
results = self.faiss_manager.search(query_vector, k=5)
|
||||||
|
|
||||||
if not results:
|
if not results:
|
||||||
logger.debug("No match found in FAISS")
|
logger.debug("No match found in FAISS")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Filtrer par workflow si spécifié
|
|
||||||
for result in results:
|
for result in results:
|
||||||
metadata = result.get("metadata", {})
|
metadata = result.get("metadata", {})
|
||||||
result_workflow_id = metadata.get("workflow_id")
|
result_workflow_id = metadata.get("workflow_id")
|
||||||
@@ -444,17 +538,136 @@ class WorkflowPipeline:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
similarity = result.get("similarity", 0)
|
similarity = result.get("similarity", 0)
|
||||||
if similarity >= 0.85: # Seuil de matching
|
if similarity >= threshold:
|
||||||
return {
|
return {
|
||||||
"node_id": metadata.get("node_id"),
|
"node_id": metadata.get("node_id"),
|
||||||
"workflow_id": result_workflow_id,
|
"workflow_id": result_workflow_id,
|
||||||
"confidence": similarity,
|
"confidence": similarity,
|
||||||
"state_embedding_id": state_embedding.embedding_id
|
"state_embedding_id": state_embedding.embedding_id,
|
||||||
|
"match_type": "faiss",
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.debug(f"Best match below threshold: {results[0].get('similarity', 0):.3f}")
|
logger.debug(
|
||||||
|
f"Best FAISS match below threshold: "
|
||||||
|
f"{results[0].get('similarity', 0):.3f} (min={threshold})"
|
||||||
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def match_current_state(
|
||||||
|
self,
|
||||||
|
screenshot_path: str,
|
||||||
|
workflow_id: Optional[str] = None,
|
||||||
|
window_title: Optional[str] = None,
|
||||||
|
) -> Optional[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Identifier dans quel node se trouve l'écran actuel (API legacy).
|
||||||
|
|
||||||
|
Lot E — cette méthode est désormais un **wrapper** de rétrocompat :
|
||||||
|
elle construit un ``ScreenState`` enrichi via ``ScreenAnalyzer``
|
||||||
|
(au lieu d'un stub avec ``window_title="Unknown"``) puis délègue
|
||||||
|
à ``match_current_state_from_state``. Garantit la compat pour les
|
||||||
|
callers externes qui ne manipulent que le chemin du screenshot.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
screenshot_path: Chemin vers le screenshot actuel.
|
||||||
|
workflow_id: ID du workflow à matcher (tous si None).
|
||||||
|
window_title: Titre de fenêtre pour contexte (utilisé comme
|
||||||
|
hint si le ScreenAnalyzer n'est pas disponible).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict avec ``node_id``, ``workflow_id``, ``confidence``, ou
|
||||||
|
``None`` si pas de match.
|
||||||
|
"""
|
||||||
|
logger.debug(f"Matching screenshot: {screenshot_path}")
|
||||||
|
|
||||||
|
# Construire un ScreenState enrichi via le ScreenAnalyzer partagé.
|
||||||
|
screen_state = self._build_screen_state_for_matching(
|
||||||
|
screenshot_path=screenshot_path,
|
||||||
|
workflow_id=workflow_id,
|
||||||
|
window_title=window_title,
|
||||||
|
)
|
||||||
|
|
||||||
|
return self.match_current_state_from_state(
|
||||||
|
screen_state=screen_state,
|
||||||
|
workflow_id=workflow_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _build_screen_state_for_matching(
|
||||||
|
self,
|
||||||
|
screenshot_path: str,
|
||||||
|
workflow_id: Optional[str],
|
||||||
|
window_title: Optional[str],
|
||||||
|
) -> ScreenState:
|
||||||
|
"""
|
||||||
|
Construire un ``ScreenState`` pour l'API legacy ``match_current_state``.
|
||||||
|
|
||||||
|
Tente d'utiliser le ``ScreenAnalyzer`` partagé ; en cas d'échec,
|
||||||
|
retombe sur un stub minimaliste (équivalent fonctionnel de l'ancien
|
||||||
|
comportement, mais clairement isolé ici).
|
||||||
|
"""
|
||||||
|
from core.models.screen_state import (
|
||||||
|
WindowContext, RawLevel, PerceptionLevel, ContextLevel, EmbeddingRef
|
||||||
|
)
|
||||||
|
|
||||||
|
path = Path(screenshot_path)
|
||||||
|
|
||||||
|
# Tentative 1 : ScreenAnalyzer partagé (résultat enrichi)
|
||||||
|
try:
|
||||||
|
from core.pipeline import get_screen_analyzer
|
||||||
|
analyzer = get_screen_analyzer()
|
||||||
|
if analyzer is not None:
|
||||||
|
window_info = None
|
||||||
|
if window_title:
|
||||||
|
window_info = {"title": window_title, "app_name": "unknown"}
|
||||||
|
return analyzer.analyze(
|
||||||
|
str(path),
|
||||||
|
window_info=window_info,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug(
|
||||||
|
f"ScreenAnalyzer unavailable in match_current_state wrapper: {exc}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Tentative 2 : stub minimal (comportement legacy d'urgence)
|
||||||
|
window = WindowContext(
|
||||||
|
app_name="unknown",
|
||||||
|
window_title=window_title or "Unknown",
|
||||||
|
screen_resolution=[1920, 1080],
|
||||||
|
workspace="main",
|
||||||
|
)
|
||||||
|
raw = RawLevel(
|
||||||
|
screenshot_path=str(path),
|
||||||
|
capture_method="manual",
|
||||||
|
file_size_bytes=path.stat().st_size if path.exists() else 0,
|
||||||
|
)
|
||||||
|
perception = PerceptionLevel(
|
||||||
|
embedding=EmbeddingRef(
|
||||||
|
provider="openclip_ViT-B-32",
|
||||||
|
vector_id="temp",
|
||||||
|
dimensions=512,
|
||||||
|
),
|
||||||
|
detected_text=[],
|
||||||
|
text_detection_method="pending",
|
||||||
|
confidence_avg=0.0,
|
||||||
|
)
|
||||||
|
context = ContextLevel(
|
||||||
|
current_workflow_candidate=workflow_id,
|
||||||
|
workflow_step=None,
|
||||||
|
user_id="matcher",
|
||||||
|
tags=[],
|
||||||
|
business_variables={},
|
||||||
|
)
|
||||||
|
return ScreenState(
|
||||||
|
screen_state_id=f"match_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
|
||||||
|
timestamp=datetime.now(),
|
||||||
|
session_id="matching",
|
||||||
|
window=window,
|
||||||
|
raw=raw,
|
||||||
|
perception=perception,
|
||||||
|
context=context,
|
||||||
|
ui_elements=[],
|
||||||
|
)
|
||||||
|
|
||||||
def match_hierarchical(
|
def match_hierarchical(
|
||||||
self,
|
self,
|
||||||
screenshot_path: str,
|
screenshot_path: str,
|
||||||
@@ -548,17 +761,56 @@ class WorkflowPipeline:
|
|||||||
def get_next_action(
|
def get_next_action(
|
||||||
self,
|
self,
|
||||||
workflow_id: str,
|
workflow_id: str,
|
||||||
current_node_id: str
|
current_node_id: str,
|
||||||
) -> Optional[Dict[str, Any]]:
|
screen_state: Optional[ScreenState] = None,
|
||||||
|
strategy: str = "best",
|
||||||
|
source_similarity: float = 1.0,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Obtenir la prochaine action à exécuter.
|
Obtenir la prochaine action à exécuter.
|
||||||
|
|
||||||
|
Contrat normalisé (Lot A — avril 2026) : retourne **toujours** un
|
||||||
|
dict avec une clé ``status`` non-ambiguë. Le ``None`` ambigu qui
|
||||||
|
confondait "workflow terminé" et "aucun edge valide" a été
|
||||||
|
supprimé : l'appelant (ExecutionLoop) peut désormais distinguer
|
||||||
|
ces cas pour déclencher une pause supervisée plutôt qu'une fin
|
||||||
|
de workflow faux-positive.
|
||||||
|
|
||||||
|
Sélection d'edge (C3) :
|
||||||
|
- Filtre dur sur ``pre_conditions`` (EdgeConstraints)
|
||||||
|
- Ranking par score composite (success_rate, target_match, recency)
|
||||||
|
- Tiebreak : success_rate le plus haut
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
workflow_id: ID du workflow
|
workflow_id: ID du workflow
|
||||||
current_node_id: ID du node actuel
|
current_node_id: ID du node actuel
|
||||||
|
screen_state: État courant, requis pour évaluer les
|
||||||
|
``pre_conditions`` et le match ``target_spec``. Si None,
|
||||||
|
fallback sur la logique sans filtre de contraintes.
|
||||||
|
strategy: ``"best"`` (défaut, scoring complet) ou ``"first"``
|
||||||
|
(mode legacy, premier edge sans tri)
|
||||||
|
source_similarity: confiance du matching (``match_current_state``)
|
||||||
|
qui a identifié ``current_node_id``. Propagée à l'EdgeScorer
|
||||||
|
pour activer la précondition ``min_source_similarity`` des
|
||||||
|
edges. Défaut ``1.0`` pour compat avec les appelants qui
|
||||||
|
ne la fournissent pas encore (Lot B — avril 2026).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dict avec action, target_node, confidence, ou None
|
Dict avec l'une des formes suivantes :
|
||||||
|
|
||||||
|
- ``{"status": "selected", "edge_id": str, "action": dict,
|
||||||
|
"target_node": str, "confidence": float, "score": float}``
|
||||||
|
→ edge sélectionné, l'ExecutionLoop doit l'exécuter.
|
||||||
|
|
||||||
|
- ``{"status": "terminal"}`` → le node courant n'a pas
|
||||||
|
d'outgoing_edge (fin légitime de workflow).
|
||||||
|
|
||||||
|
- ``{"status": "blocked", "reason": str}`` → il existe des
|
||||||
|
outgoing_edges mais aucun ne satisfait les conditions
|
||||||
|
(``reason="no_valid_edge"``), ou le workflow est introuvable
|
||||||
|
(``reason="workflow_not_found"``). L'ExecutionLoop doit
|
||||||
|
déclencher une pause supervisée et ne **jamais** traiter
|
||||||
|
ce cas comme un succès.
|
||||||
"""
|
"""
|
||||||
workflow = self._workflows.get(workflow_id)
|
workflow = self._workflows.get(workflow_id)
|
||||||
if not workflow:
|
if not workflow:
|
||||||
@@ -569,23 +821,44 @@ class WorkflowPipeline:
|
|||||||
self._workflows[workflow_id] = workflow
|
self._workflows[workflow_id] = workflow
|
||||||
else:
|
else:
|
||||||
logger.error(f"Workflow not found: {workflow_id}")
|
logger.error(f"Workflow not found: {workflow_id}")
|
||||||
return None
|
return {"status": "blocked", "reason": "workflow_not_found"}
|
||||||
|
|
||||||
# Trouver les edges sortants du node actuel
|
# Trouver les edges sortants du node actuel
|
||||||
outgoing_edges = workflow.get_outgoing_edges(current_node_id)
|
outgoing_edges = workflow.get_outgoing_edges(current_node_id)
|
||||||
|
|
||||||
if not outgoing_edges:
|
if not outgoing_edges:
|
||||||
|
# Aucun outgoing_edge = fin légitime du workflow
|
||||||
logger.info(f"No outgoing edges from node {current_node_id}")
|
logger.info(f"No outgoing edges from node {current_node_id}")
|
||||||
return None
|
return {"status": "terminal"}
|
||||||
|
|
||||||
# Pour l'instant, prendre le premier edge (TODO: logique de sélection)
|
# Sélection robuste via EdgeScorer (C3)
|
||||||
edge = outgoing_edges[0]
|
from core.pipeline.edge_scorer import EdgeScorer
|
||||||
|
|
||||||
|
scorer = EdgeScorer()
|
||||||
|
edge = scorer.select_best(
|
||||||
|
outgoing_edges,
|
||||||
|
screen_state=screen_state,
|
||||||
|
strategy=strategy,
|
||||||
|
source_similarity=source_similarity,
|
||||||
|
)
|
||||||
|
|
||||||
|
if edge is None:
|
||||||
|
# Il y avait des candidats mais aucun n'a passé les filtres.
|
||||||
|
# On NE retourne PAS "terminal" : l'ExecutionLoop doit traiter
|
||||||
|
# ce cas comme un blocage et demander de l'aide.
|
||||||
|
logger.warning(
|
||||||
|
f"No valid edge from {current_node_id} "
|
||||||
|
f"({len(outgoing_edges)} candidates rejected)"
|
||||||
|
)
|
||||||
|
return {"status": "blocked", "reason": "no_valid_edge"}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
"status": "selected",
|
||||||
"edge_id": edge.edge_id,
|
"edge_id": edge.edge_id,
|
||||||
"action": edge.action.to_dict(),
|
"action": edge.action.to_dict(),
|
||||||
"target_node": edge.to_node,
|
"target_node": edge.to_node,
|
||||||
"confidence": edge.stats.success_rate if edge.stats else 1.0
|
"confidence": edge.stats.success_rate if edge.stats else 1.0,
|
||||||
|
"score": edge.stats.success_rate if edge.stats else 1.0,
|
||||||
}
|
}
|
||||||
|
|
||||||
def should_execute_automatically(self, workflow_id: str) -> bool:
|
def should_execute_automatically(self, workflow_id: str) -> bool:
|
||||||
@@ -759,10 +1032,11 @@ class WorkflowPipeline:
|
|||||||
current_node_id = match_result["node_id"]
|
current_node_id = match_result["node_id"]
|
||||||
logger.info(f"Matched current state to node: {current_node_id} (confidence: {match_result['confidence']:.3f})")
|
logger.info(f"Matched current state to node: {current_node_id} (confidence: {match_result['confidence']:.3f})")
|
||||||
|
|
||||||
# 2. Obtenir la prochaine action
|
# 2. Obtenir la prochaine action (contrat dict avec status explicite)
|
||||||
action_info = self.get_next_action(workflow_id, current_node_id)
|
action_info = self.get_next_action(workflow_id, current_node_id)
|
||||||
|
action_status = action_info.get("status")
|
||||||
|
|
||||||
if not action_info:
|
if action_status == "terminal":
|
||||||
return {
|
return {
|
||||||
"execution_id": execution_id,
|
"execution_id": execution_id,
|
||||||
"workflow_id": workflow_id,
|
"workflow_id": workflow_id,
|
||||||
@@ -771,7 +1045,19 @@ class WorkflowPipeline:
|
|||||||
"message": "Workflow completed - no more actions",
|
"message": "Workflow completed - no more actions",
|
||||||
"current_node": current_node_id,
|
"current_node": current_node_id,
|
||||||
"execution_time_ms": (datetime.now() - start_time).total_seconds() * 1000,
|
"execution_time_ms": (datetime.now() - start_time).total_seconds() * 1000,
|
||||||
"correlation_id": execution_id
|
"correlation_id": execution_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
if action_status == "blocked":
|
||||||
|
return {
|
||||||
|
"execution_id": execution_id,
|
||||||
|
"workflow_id": workflow_id,
|
||||||
|
"success": False,
|
||||||
|
"step_type": "action_selection",
|
||||||
|
"error": f"No valid edge: {action_info.get('reason', 'unknown')}",
|
||||||
|
"current_node": current_node_id,
|
||||||
|
"execution_time_ms": (datetime.now() - start_time).total_seconds() * 1000,
|
||||||
|
"correlation_id": execution_id,
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info(f"Next action: {action_info['action']['type']} -> {action_info['target_node']}")
|
logger.info(f"Next action: {action_info['action']['type']} -> {action_info['target_node']}")
|
||||||
|
|||||||
@@ -125,18 +125,19 @@ class WorkflowPipelineEnhanced:
|
|||||||
current_node_id = match_result["node_id"]
|
current_node_id = match_result["node_id"]
|
||||||
logger.info(f"Matched current state to node: {current_node_id} (confidence: {match_result['confidence']:.3f})")
|
logger.info(f"Matched current state to node: {current_node_id} (confidence: {match_result['confidence']:.3f})")
|
||||||
|
|
||||||
# 2. Obtenir la prochaine action
|
# 2. Obtenir la prochaine action (contrat dict avec status explicite)
|
||||||
action_info = self.get_next_action(workflow_id, current_node_id)
|
action_info = self.get_next_action(workflow_id, current_node_id)
|
||||||
|
action_status = action_info.get("status")
|
||||||
|
|
||||||
if not action_info:
|
if action_status == "terminal":
|
||||||
# Workflow terminé
|
# Workflow terminé (aucun outgoing_edge = fin légitime)
|
||||||
performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
|
performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
|
||||||
|
|
||||||
result = WorkflowExecutionResult.workflow_complete(
|
result = WorkflowExecutionResult.workflow_complete(
|
||||||
execution_id=execution_id,
|
execution_id=execution_id,
|
||||||
workflow_id=workflow_id,
|
workflow_id=workflow_id,
|
||||||
current_node=current_node_id,
|
current_node=current_node_id,
|
||||||
performance_metrics=performance_metrics
|
performance_metrics=performance_metrics,
|
||||||
)
|
)
|
||||||
result.correlation_id = correlation_id
|
result.correlation_id = correlation_id
|
||||||
result.match_result = match_result
|
result.match_result = match_result
|
||||||
@@ -144,6 +145,27 @@ class WorkflowPipelineEnhanced:
|
|||||||
logger.info(f"Workflow {workflow_id} completed at node {current_node_id}")
|
logger.info(f"Workflow {workflow_id} completed at node {current_node_id}")
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
if action_status == "blocked":
|
||||||
|
# Des edges existent mais aucun ne passe les filtres :
|
||||||
|
# c'est un blocage, pas une fin de workflow.
|
||||||
|
performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
|
||||||
|
|
||||||
|
result = WorkflowExecutionResult.error(
|
||||||
|
execution_id=execution_id,
|
||||||
|
workflow_id=workflow_id,
|
||||||
|
error_message=f"No valid edge: {action_info.get('reason', 'unknown')}",
|
||||||
|
step_type="action_selection",
|
||||||
|
current_node=current_node_id,
|
||||||
|
performance_metrics=performance_metrics,
|
||||||
|
)
|
||||||
|
result.correlation_id = correlation_id
|
||||||
|
|
||||||
|
logger.warning(
|
||||||
|
f"Workflow {workflow_id} blocked at node {current_node_id}: "
|
||||||
|
f"{action_info.get('reason')}"
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
||||||
logger.info(f"Next action: {action_info['action']['type']} -> {action_info['target_node']}")
|
logger.info(f"Next action: {action_info['action']['type']} -> {action_info['target_node']}")
|
||||||
|
|
||||||
# 3. Charger le workflow pour obtenir l'edge complet
|
# 3. Charger le workflow pour obtenir l'edge complet
|
||||||
|
|||||||
@@ -1,327 +0,0 @@
|
|||||||
e)a, field_namg(datin_loggsanitize_fordator.valieturn r()
|
|
||||||
or_validatet_inputalidator = g""
|
|
||||||
v
|
|
||||||
"iséesnées sanit Don
|
|
||||||
Returns:
|
|
||||||
amp
|
|
||||||
chNom du ame: field_ntiser
|
|
||||||
s à saniata: Donnée d
|
|
||||||
|
|
||||||
Args:ging.
|
|
||||||
le loges pours donnéSanitise de """
|
|
||||||
-> str:
|
|
||||||
"data") me: str = nay, field_ta: An(da_loggingize_for sanita
|
|
||||||
|
|
||||||
|
|
||||||
defarsed_dat return p
|
|
||||||
")
|
|
||||||
errors)}t.uljoin(res {'; '.ed:ion failalidator(f"JSON vlidationErrise InputVa ralid:
|
|
||||||
is_vat.not resul if
|
|
||||||
")
|
|
||||||
"json_datafield_name=e, th=max_sizr, max_lengring(json_stalidate_stvalidator.vt =
|
|
||||||
resuldata)s(parsed_on.dump = js json_strtor()
|
|
||||||
put_validaet_in gidator =s
|
|
||||||
vales injectionur lontenu poider le c
|
|
||||||
# Valt")
|
|
||||||
dicng orbe strimust N data "JSOionError(putValidat raise In se:
|
|
||||||
|
|
||||||
elson_data_data = jparsed")
|
|
||||||
size}max_ze of { maximum siexceedsN data rror(f"JSOValidationEaise Input r_size:
|
|
||||||
lized) > maxlen(seria if a)
|
|
||||||
s(json_dat json.dumpalized =eri sialisée
|
|
||||||
ére sla taillrifier # Véct):
|
|
||||||
ata, di_de(jsonncsinsta elif i
|
|
||||||
t: {e}") JSON formaidror(f"InvalErdationalise InputV raie:
|
|
||||||
ror as JSONDecodeErt json. excep n_data)
|
|
||||||
loads(jsojson.= d_data parse
|
|
||||||
try:
|
|
||||||
size}")
|
|
||||||
{max_mum size of axiceeds m data exONor(f"JSrrtionEputValidaise In ra
|
|
||||||
max_size:a) >(json_datf len i
|
|
||||||
data, str):json_isinstance( if ""
|
|
||||||
" invalides
|
|
||||||
sont ess donnéSi letionError: InputValida s:
|
|
||||||
Raise
|
|
||||||
|
|
||||||
ON validéess JS Donnéeurns:
|
|
||||||
|
|
||||||
Ret s
|
|
||||||
n caractèremale exille maax_size: Tai mou dict)
|
|
||||||
string nnées JSON (: Do_data json
|
|
||||||
|
|
||||||
Args: .
|
|
||||||
nnées JSONdo Valide des "
|
|
||||||
|
|
||||||
"") -> dict:= 10000x_size: int t], man[str, dicnion_data: Uput(jsoe_json_inalidat
|
|
||||||
|
|
||||||
|
|
||||||
def ved_pathurn normaliz ret
|
|
||||||
|
|
||||||
")ath}malized_pories: {norwed directllon apath not ior(f"File ionErratlide InputVa rais ):
|
|
||||||
rslowed_di_dir in al for allowedr)d_diallowe.startswith(_obj)str(pathot any( if n)
|
|
||||||
alized_pathPath(normpath_obj = :
|
|
||||||
_dirsif allowed
|
|
||||||
i spécifiésautorisés soires répertrifier lesVé
|
|
||||||
# ")
|
|
||||||
xt}n: {file_extensio engerous filer(f"DaolationErroyVi Securit raisensions:
|
|
||||||
xtegerous_ext in danf file_e()
|
|
||||||
ix.lowerath).suffied_pnormalizxt = Path( file_e p', '.sh'}
|
|
||||||
.ph', ' '.jscr', '.vbs', '.s, '.cmd',xe', '.bat'{'.ensions = ngerous_exte dauses
|
|
||||||
angereons densies exter l Vérifi
|
|
||||||
#_path}")
|
|
||||||
{file detected:attemptl raversa t"Pathrror(fationEyViol Securitise ra"/"):
|
|
||||||
ith(path.startswd_or normalizelized_path in norma ".." ifl
|
|
||||||
rsaraveh tives de patntat les teVérifier # )
|
|
||||||
|
|
||||||
_pathle.normpath(fih = os.pathpatrmalized_ noin
|
|
||||||
ser le chem# Normali
|
|
||||||
ng")
|
|
||||||
t be a strile path mus"Fir(dationErroalise InputV raitr):
|
|
||||||
th, se_pailsinstance(ft i if no
|
|
||||||
"""
|
|
||||||
ngereux dae chemin estError: Si lionnputValidat I
|
|
||||||
aises:
|
|
||||||
R
|
|
||||||
sénormalit min validé e Che
|
|
||||||
Returns:
|
|
||||||
|
|
||||||
orisésutres ars: Répertoilowed_di al valider
|
|
||||||
n àhemie_path: C filgs:
|
|
||||||
Ar
|
|
||||||
chier.
|
|
||||||
hemin de fialide un c V"
|
|
||||||
" ":
|
|
||||||
trne) -> s No] =str]List[ional[rs: Optwed_di: str, allole_pathath_input(fifile_plidate_vae
|
|
||||||
|
|
||||||
|
|
||||||
def ized_valuresult.sanitreturn
|
|
||||||
|
|
||||||
.errors)}").join(resulte}: {'; 'field_named for {dation failf"ValinError(idatio InputValserai is_valid:
|
|
||||||
t.ul not res
|
|
||||||
if_name)
|
|
||||||
_html, fieldength, allow, max_lring(valuealidate_stidator.vval = resultor()
|
|
||||||
idatt_input_valator = ge"
|
|
||||||
valid""ue
|
|
||||||
échotionlidai la vaor: SdationErrnputVali Is:
|
|
||||||
se
|
|
||||||
Rai
|
|
||||||
nitisée sa Valeureturns:
|
|
||||||
R
|
|
||||||
p
|
|
||||||
du chamm d_name: No fiel HTML
|
|
||||||
oriser leow_html: Aut all ximale
|
|
||||||
Longueur mamax_length: r
|
|
||||||
r à valideue: Valeu val Args:
|
|
||||||
|
|
||||||
|
|
||||||
ée string.e une entranitisalide et s
|
|
||||||
V"""r:
|
|
||||||
t") -> st= "inpue: str e, field_namalsool = Fw_html: b allo
|
|
||||||
1000, ength: int =max_lvalue: str, ut(ing_inpvalidate_str
|
|
||||||
|
|
||||||
|
|
||||||
def r_instancern _validato)
|
|
||||||
retudator(alie = InputVancinstalidator_ _v one:
|
|
||||||
tance is Nor_insf _validat
|
|
||||||
itancer_insal _validatolob"
|
|
||||||
g""r
|
|
||||||
alidateuu vstance d Inturns:
|
|
||||||
Re
|
|
||||||
r.
|
|
||||||
teuida du valobaleinstance glourne l' Ret""
|
|
||||||
"or:
|
|
||||||
lidatputVa-> Inr() dato_valit_inputef geNone
|
|
||||||
|
|
||||||
|
|
||||||
d= ] putValidatoronal[Inance: Optilidator_instidateur
|
|
||||||
_va du val globalencesta
|
|
||||||
# In )
|
|
||||||
|
|
||||||
}"
|
|
||||||
_valuezedue: {saniti f"Val . "
|
|
||||||
field_name}ype} in {ation_tvioltected: {iolation dey vf"Securit rning(
|
|
||||||
ger.wa logame)
|
|
||||||
e, field_ng(valuor_logginf.sanitize_f selalue =tized_v sani""
|
|
||||||
té."ride sécuion violatg une Lo """:
|
|
||||||
ny) -> Nonevalue: A_name: str, ldier, fn_type: stolatioon(self, viati_violitylog_secur _
|
|
||||||
def _}]"
|
|
||||||
e_(data).__namntable:{typeme}[unpri{field_nareturn f"
|
|
||||||
ion:cept Except ex
|
|
||||||
ata_str
|
|
||||||
turn d re
|
|
||||||
tr)
|
|
||||||
scape(data_s html.e data_str =
|
|
||||||
dangereuxres es caractèhapper l # Éc
|
|
||||||
."
|
|
||||||
"..r[:200] + ata_stata_str = d d
|
|
||||||
0:r) > 20ata_st if len(d s
|
|
||||||
our les log taille pr la # Limite
|
|
||||||
|
|
||||||
ta)r(dastr = st data_ else:
|
|
||||||
|
|
||||||
, ':')),'s=('eparatore, s_ascii=Trunsurea, e(dat.dumps json = data_str
|
|
||||||
ct, list)): (dia,nstance(datsi if i
|
|
||||||
try:le
|
|
||||||
aila tter lg et limi en strinonvertir # C
|
|
||||||
]"
|
|
||||||
{len(data)}_}:size=a).__name_(dattypeme}[{{field_naturn f" re :
|
|
||||||
))istta, (dict, ltance(daisinsif el )}]"
|
|
||||||
lue(datave_vasensitish:{hash_e}[haield_namf"{f return
|
|
||||||
> 20:d len(data)str) ane(data, sinstanc if is
|
|
||||||
ensiblenées ss donhasher lerisé, En mode sécu # itive:
|
|
||||||
ensself.log_s not if ""
|
|
||||||
|
|
||||||
"r logging pouestisénées saniDon
|
|
||||||
Returns:
|
|
||||||
|
|
||||||
pom du chameld_name: N fi er
|
|
||||||
itis sanes àata: Donné d gs:
|
|
||||||
Ar
|
|
||||||
sécurisé.
|
|
||||||
le logging pouronnéess dnitise de Sa ""
|
|
||||||
" ) -> str:
|
|
||||||
ata"tr = "dd_name: sy, fiel: Anlf, dataging(seogze_for_lef saniti
|
|
||||||
dngs)
|
|
||||||
ors, warninitized, err sa_valid,ult(isationReslid return Va
|
|
||||||
s) == 0error= len(valid is_
|
|
||||||
itized)
|
|
||||||
, san7F]', ''\x1F\x0C\x0E-\x0B8\x0-\x0r'[\x0e.sub(= r sanitized ôle
|
|
||||||
ntrctères de cocaraoyer les # Nett
|
|
||||||
|
|
||||||
anitized).escape(s = html sanitized :
|
|
||||||
allow_html if not ire
|
|
||||||
si nécessatizer HTML# Sani
|
|
||||||
)
|
|
||||||
"SQL patternspicious Noains suntld_name} cofiepend(f"{ngs.ap warni else:
|
|
||||||
|
|
||||||
value)e,nam", field_ attemptionjectQL inlation("NoSecurity_vioog_s._l self ")
|
|
||||||
ernection pattl NoSQL injs potentiae} containd_nam{fiel(f"penderrors.ap
|
|
||||||
_mode:lf.strictse if lue):
|
|
||||||
(vaern.searchif patt ns:
|
|
||||||
atterf._nosql_prn in selte for patSQL
|
|
||||||
njections Nofier les i # Véri
|
|
||||||
")
|
|
||||||
QL pattern Suspiciousontains seld_name} c{fiappend(f"arnings. w:
|
|
||||||
else e)
|
|
||||||
, valu_nameeld, fipt"ection attem"SQL injiolation(security_vg_loself._ )
|
|
||||||
on pattern"L injectiotential SQontains p_name} c"{fieldppend(f.aors err e:
|
|
||||||
.strict_modself if alue):
|
|
||||||
rn.search(vatteif p patterns:
|
|
||||||
sql_f._eln spattern i for ons SQL
|
|
||||||
tir les injecVérifie #
|
|
||||||
|
|
||||||
x_length] value[:matized = sani ers")
|
|
||||||
th} charact{max_lengcated to _name} trunf"{fieldend(s.app warning else:
|
|
||||||
|
|
||||||
}")ax_length{mf length oimum eeds maxe} exc"{field_nam(fpend errors.ap ct_mode:
|
|
||||||
f self.stri ih:
|
|
||||||
lengtalue) > max_ if len(vueur
|
|
||||||
longVérifier la
|
|
||||||
# s)
|
|
||||||
ors, warningne, errt(False, NoonResulidati return Val tring")
|
|
||||||
t be a smusd_name} f"{fielrs.append( erro
|
|
||||||
, str):ce(valueisinstan if not
|
|
||||||
ue
|
|
||||||
d = valanitize sgs = []
|
|
||||||
nin war
|
|
||||||
errors = []"
|
|
||||||
"" alidation
|
|
||||||
vt de Résulta eturns:
|
|
||||||
R
|
|
||||||
s
|
|
||||||
our les logdu champ pNom : ld_name fie HTML
|
|
||||||
toriser le w_html: Au allo e
|
|
||||||
aximalgueur mh: Lonengt max_lder
|
|
||||||
valiue: Valeur à val:
|
|
||||||
Args
|
|
||||||
.
|
|
||||||
tèresde carac chaîne Valide une"
|
|
||||||
"" lt:
|
|
||||||
esuValidationRput") -> : str = "infield_name= False, tml: bool allow_h ,
|
|
||||||
000h: int = 1 max_lengtstr,f, value: (selring validate_st def
|
|
||||||
ERNS]
|
|
||||||
TTN_PAJECTIOlf.NOSQL_INttern in seor paE) fCASe.IGNOREttern, re(pa.compil= [rerns patteself._nosql_ RNS]
|
|
||||||
TE_PATL_INJECTION in self.SQfor patternNORECASE) re.IGtern,compile(pate. = [rerns_sql_pattf. selformance
|
|
||||||
pour pers patterns lepiler # Com
|
|
||||||
ata
|
|
||||||
ive_d.log_sensitive = configsit_sen self.log
|
|
||||||
ationinput_valid.strict_se configels not None _mode istrictct_mode if striict_mode = self.str nfig()
|
|
||||||
security_coig = get_ conf""
|
|
||||||
"g)
|
|
||||||
selon confi auto (None =strictde: Mode strict_mo
|
|
||||||
Args:
|
|
||||||
|
|
||||||
ur.datese le vali Initiali """
|
|
||||||
:
|
|
||||||
one)l] = N[boo: Optionalt_mode stric_(self,it_def __in
|
|
||||||
]
|
|
||||||
)"
|
|
||||||
\.|db\.is r"(th
|
|
||||||
\})",\s*\$.* r"(\{
|
|
||||||
meout\b)",etTil\b|\bs\(|\bevaction\s*"(funr nin)",
|
|
||||||
in|\$gt|\$lt|\$\$e|\$regex|\$n"(\$where| r [
|
|
||||||
TTERNS =CTION_PAL_INJEOSQ N n NoSQL
|
|
||||||
ctiour injengereux poatterns da # P]
|
|
||||||
|
|
||||||
"
|
|
||||||
b)\qlbsp_executes"(\
|
|
||||||
r",dshell\b)bxp_cm r"(\
|
|
||||||
)",[\'\";]r"( )\b)",
|
|
||||||
ONERRORAD|T|ONLOBSCRIP|VIPTAVASCRSCRIPT|J(\b( r" */)",
|
|
||||||
--|#|/\*|\ r"( ",
|
|
||||||
+)s*=\s*\d\AND)\s+\d+(UNION|OR|\b r"(
|
|
||||||
b)",\UTE)EXEC|EXECE|ALTER|OP|CREATDRELETE|ERT|UPDATE|Db(SELECT|INS r"(\
|
|
||||||
RNS = [N_PATTE_INJECTIOSQL
|
|
||||||
SQLnjection ereux pour irns dangtte# Pa
|
|
||||||
|
|
||||||
""teur."s utilisaeur d'entréeidatVal"" "ator:
|
|
||||||
Valids Inputclas
|
|
||||||
|
|
||||||
pass
|
|
||||||
""
|
|
||||||
ée."tectécurité déolation de s"Vi"" Error):
|
|
||||||
tValidationnError(InpuyViolatioSecurit
|
|
||||||
|
|
||||||
class pass
|
|
||||||
"
|
|
||||||
rée.""nton d'ealidatieur de v""Err "
|
|
||||||
ion):r(ExceptidationErroputValass In= []
|
|
||||||
|
|
||||||
|
|
||||||
clf.warnings sel:
|
|
||||||
None isarnings self.w ifors = []
|
|
||||||
elf.err sne:
|
|
||||||
is Nororser if self.
|
|
||||||
lf):init__(seost_def __p
|
|
||||||
r]
|
|
||||||
[sts: Listningwar[str]
|
|
||||||
istrs: L erroue: Any
|
|
||||||
ed_val sanitiz: bool
|
|
||||||
lid
|
|
||||||
is_va"""
|
|
||||||
une entrée.dation d' de valitat"Résul""lt:
|
|
||||||
ationResuclass Validaclass
|
|
||||||
dat
|
|
||||||
|
|
||||||
@_)
|
|
||||||
ame_etLogger(__ngging.g
|
|
||||||
logger = lolue
|
|
||||||
ive_vaash_sensitonfig, h_cecurityimport get_srity_config .secu
|
|
||||||
|
|
||||||
from dataclassrtpoimdataclasses
|
|
||||||
from Union, SetOptional,, List, Any, Dict import ng
|
|
||||||
from typirt Pathimpoib thlfrom pajson
|
|
||||||
|
|
||||||
import l htmortlogging
|
|
||||||
impe
|
|
||||||
import port r
|
|
||||||
imrt ospo"
|
|
||||||
|
|
||||||
im"ggées
|
|
||||||
"données loization des 7.4: Sanit
|
|
||||||
Exigence s chiers de fin des chemintioalida3: VExigence 7.
|
|
||||||
SQL/NoSQLonsti injeccontre lesion ectotence 7.2: PrExigé.
|
|
||||||
a sécuritur lteur polisatrées utiion des envalidat
|
|
||||||
Système de m
|
|
||||||
stedation Syut Vali"""
|
|
||||||
Inp
|
|
||||||
308
core/security/signed_serializer.py
Normal file
308
core/security/signed_serializer.py
Normal file
@@ -0,0 +1,308 @@
|
|||||||
|
"""
|
||||||
|
Sérialiseur signé — RPA Vision V3
|
||||||
|
|
||||||
|
Remplace les usages de `pickle.load` (vulnérables à la désérialisation arbitraire
|
||||||
|
de code) par une sérialisation JSON signée via HMAC-SHA256.
|
||||||
|
|
||||||
|
Principes :
|
||||||
|
- Les données sont sérialisées en JSON (avec support des types numpy / datetime
|
||||||
|
via un encodeur custom).
|
||||||
|
- Une signature HMAC-SHA256 est calculée sur le JSON avec une clé secrète
|
||||||
|
dérivée de `RPA_SIGNING_KEY` (ou, à défaut, de `TOKEN_SECRET_KEY`).
|
||||||
|
- À la lecture, la signature est vérifiée AVANT tout parsing applicatif.
|
||||||
|
- Rétrocompatibilité : un fallback `pickle.load` est disponible pour migrer
|
||||||
|
les anciens fichiers. Il logue un WARNING et doit être suivi d'une
|
||||||
|
ré-écriture en JSON signé.
|
||||||
|
|
||||||
|
ATTENTION : n'utiliser le fallback pickle que sur des fichiers dont la source
|
||||||
|
est réputée sûre (locale + protégée). Le fallback est désactivable via la
|
||||||
|
variable d'environnement `RPA_ALLOW_PICKLE_FALLBACK=0`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import hashlib
|
||||||
|
import hmac
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import pickle
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Callable, Optional, Union
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# Clé de signature
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_SIGNATURE_ALGO = "sha256"
|
||||||
|
_SIGNATURE_HEADER = b"RPA_SIGNED_V1\n" # Marqueur de format signé
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_signing_key() -> bytes:
|
||||||
|
"""Récupère la clé de signature HMAC.
|
||||||
|
|
||||||
|
Ordre de priorité :
|
||||||
|
1. RPA_SIGNING_KEY (dédiée à la signature de fichiers)
|
||||||
|
2. TOKEN_SECRET_KEY (clé déjà utilisée pour signer les tokens API)
|
||||||
|
3. Clé dérivée en dev (avec WARNING)
|
||||||
|
|
||||||
|
La clé dev est stable pour une même machine (dérivée du hostname + path)
|
||||||
|
afin que les lectures/écritures locales restent cohérentes en l'absence
|
||||||
|
de configuration, tout en refusant de valider des fichiers produits
|
||||||
|
ailleurs.
|
||||||
|
"""
|
||||||
|
explicit = os.getenv("RPA_SIGNING_KEY", "").strip()
|
||||||
|
if explicit:
|
||||||
|
return explicit.encode("utf-8")
|
||||||
|
|
||||||
|
fallback = os.getenv("TOKEN_SECRET_KEY", "").strip()
|
||||||
|
if fallback:
|
||||||
|
return fallback.encode("utf-8")
|
||||||
|
|
||||||
|
# Clé dev dérivée : non cryptographiquement sûre, juste pour éviter des
|
||||||
|
# erreurs en dev local. On loggue explicitement.
|
||||||
|
logger.warning(
|
||||||
|
"RPA_SIGNING_KEY et TOKEN_SECRET_KEY non définis — "
|
||||||
|
"utilisation d'une clé dérivée locale. "
|
||||||
|
"Définir RPA_SIGNING_KEY en production."
|
||||||
|
)
|
||||||
|
seed = f"rpa-vision-v3::{os.uname().nodename}::dev-signing" # type: ignore[attr-defined]
|
||||||
|
return hashlib.sha256(seed.encode("utf-8")).digest()
|
||||||
|
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# Encodage JSON étendu (numpy, datetime, Path, bytes)
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class _RPAJSONEncoder(json.JSONEncoder):
|
||||||
|
"""Encodeur JSON supportant numpy / datetime / Path / bytes."""
|
||||||
|
|
||||||
|
def default(self, obj: Any) -> Any: # noqa: D401 - API json standard
|
||||||
|
if isinstance(obj, np.ndarray):
|
||||||
|
return {
|
||||||
|
"__type__": "ndarray",
|
||||||
|
"dtype": str(obj.dtype),
|
||||||
|
"shape": list(obj.shape),
|
||||||
|
"data": base64.b64encode(obj.tobytes()).decode("ascii"),
|
||||||
|
}
|
||||||
|
if isinstance(obj, (np.integer,)):
|
||||||
|
return int(obj)
|
||||||
|
if isinstance(obj, (np.floating,)):
|
||||||
|
return float(obj)
|
||||||
|
if isinstance(obj, (np.bool_,)):
|
||||||
|
return bool(obj)
|
||||||
|
if isinstance(obj, datetime):
|
||||||
|
return {"__type__": "datetime", "iso": obj.isoformat()}
|
||||||
|
if isinstance(obj, timedelta):
|
||||||
|
return {"__type__": "timedelta", "seconds": obj.total_seconds()}
|
||||||
|
if isinstance(obj, Path):
|
||||||
|
return {"__type__": "path", "value": str(obj)}
|
||||||
|
if isinstance(obj, bytes):
|
||||||
|
return {
|
||||||
|
"__type__": "bytes",
|
||||||
|
"data": base64.b64encode(obj).decode("ascii"),
|
||||||
|
}
|
||||||
|
if isinstance(obj, set):
|
||||||
|
return {"__type__": "set", "items": list(obj)}
|
||||||
|
return super().default(obj)
|
||||||
|
|
||||||
|
|
||||||
|
def _json_object_hook(obj: Any) -> Any:
|
||||||
|
"""Reconstruit les types étendus depuis le JSON."""
|
||||||
|
if not isinstance(obj, dict):
|
||||||
|
return obj
|
||||||
|
tag = obj.get("__type__")
|
||||||
|
if tag is None:
|
||||||
|
return obj
|
||||||
|
if tag == "ndarray":
|
||||||
|
raw = base64.b64decode(obj["data"])
|
||||||
|
arr = np.frombuffer(raw, dtype=np.dtype(obj["dtype"]))
|
||||||
|
return arr.reshape(obj["shape"]).copy()
|
||||||
|
if tag == "datetime":
|
||||||
|
return datetime.fromisoformat(obj["iso"])
|
||||||
|
if tag == "timedelta":
|
||||||
|
return timedelta(seconds=float(obj["seconds"]))
|
||||||
|
if tag == "path":
|
||||||
|
return Path(obj["value"])
|
||||||
|
if tag == "bytes":
|
||||||
|
return base64.b64decode(obj["data"])
|
||||||
|
if tag == "set":
|
||||||
|
return set(obj.get("items", []))
|
||||||
|
return obj
|
||||||
|
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# Erreurs dédiées
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class SignedSerializerError(Exception):
|
||||||
|
"""Erreur de base du module."""
|
||||||
|
|
||||||
|
|
||||||
|
class SignatureVerificationError(SignedSerializerError):
|
||||||
|
"""Signature HMAC invalide : le fichier a été altéré ou forgé."""
|
||||||
|
|
||||||
|
|
||||||
|
class UnsupportedFormatError(SignedSerializerError):
|
||||||
|
"""Le fichier n'est ni au format signé, ni reconnu comme pickle legacy."""
|
||||||
|
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# API publique
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _compute_hmac(payload: bytes, key: bytes) -> str:
|
||||||
|
return hmac.new(key, payload, hashlib.sha256).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def dumps_signed(data: Any, key: Optional[bytes] = None) -> bytes:
|
||||||
|
"""Sérialise `data` en JSON signé HMAC-SHA256.
|
||||||
|
|
||||||
|
Format binaire retourné :
|
||||||
|
b"RPA_SIGNED_V1\n" + utf8(json({"hmac": "<hex>", "payload": <data>}))
|
||||||
|
|
||||||
|
Le HMAC couvre le JSON canonique de `payload` (keys triées,
|
||||||
|
séparateurs compacts) pour qu'un même objet produise toujours la
|
||||||
|
même signature.
|
||||||
|
"""
|
||||||
|
signing_key = key if key is not None else _resolve_signing_key()
|
||||||
|
payload_json = json.dumps(
|
||||||
|
data,
|
||||||
|
cls=_RPAJSONEncoder,
|
||||||
|
sort_keys=True,
|
||||||
|
separators=(",", ":"),
|
||||||
|
ensure_ascii=False,
|
||||||
|
).encode("utf-8")
|
||||||
|
signature = _compute_hmac(payload_json, signing_key)
|
||||||
|
envelope = {"hmac": signature, "payload_b64": base64.b64encode(payload_json).decode("ascii")}
|
||||||
|
body = json.dumps(envelope, separators=(",", ":"), ensure_ascii=False).encode("utf-8")
|
||||||
|
return _SIGNATURE_HEADER + body
|
||||||
|
|
||||||
|
|
||||||
|
def loads_signed(raw: bytes, key: Optional[bytes] = None) -> Any:
|
||||||
|
"""Désérialise un blob produit par `dumps_signed` après vérification HMAC."""
|
||||||
|
if not raw.startswith(_SIGNATURE_HEADER):
|
||||||
|
raise UnsupportedFormatError("Marqueur RPA_SIGNED_V1 absent.")
|
||||||
|
signing_key = key if key is not None else _resolve_signing_key()
|
||||||
|
body = raw[len(_SIGNATURE_HEADER):]
|
||||||
|
try:
|
||||||
|
envelope = json.loads(body.decode("utf-8"))
|
||||||
|
except (UnicodeDecodeError, json.JSONDecodeError) as exc:
|
||||||
|
raise SignedSerializerError(f"Enveloppe JSON invalide : {exc}") from exc
|
||||||
|
|
||||||
|
if not isinstance(envelope, dict):
|
||||||
|
raise SignedSerializerError("Enveloppe inattendue.")
|
||||||
|
signature = envelope.get("hmac")
|
||||||
|
payload_b64 = envelope.get("payload_b64")
|
||||||
|
if not isinstance(signature, str) or not isinstance(payload_b64, str):
|
||||||
|
raise SignedSerializerError("Enveloppe mal formée (hmac / payload_b64).")
|
||||||
|
|
||||||
|
try:
|
||||||
|
payload_bytes = base64.b64decode(payload_b64.encode("ascii"), validate=True)
|
||||||
|
except Exception as exc: # noqa: BLE001 - base64 peut lever plusieurs erreurs
|
||||||
|
raise SignedSerializerError(f"Payload base64 invalide : {exc}") from exc
|
||||||
|
|
||||||
|
expected = _compute_hmac(payload_bytes, signing_key)
|
||||||
|
if not hmac.compare_digest(expected, signature):
|
||||||
|
raise SignatureVerificationError(
|
||||||
|
"Signature HMAC invalide — fichier altéré ou clé différente."
|
||||||
|
)
|
||||||
|
|
||||||
|
return json.loads(payload_bytes.decode("utf-8"), object_hook=_json_object_hook)
|
||||||
|
|
||||||
|
|
||||||
|
def _pickle_fallback_allowed() -> bool:
|
||||||
|
return os.getenv("RPA_ALLOW_PICKLE_FALLBACK", "1") != "0"
|
||||||
|
|
||||||
|
|
||||||
|
def save_signed(path: Union[str, Path], data: Any, key: Optional[bytes] = None) -> None:
|
||||||
|
"""Écrit `data` sur disque dans le format JSON signé."""
|
||||||
|
path = Path(path)
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
blob = dumps_signed(data, key=key)
|
||||||
|
tmp = path.with_suffix(path.suffix + ".tmp")
|
||||||
|
with open(tmp, "wb") as fp:
|
||||||
|
fp.write(blob)
|
||||||
|
os.replace(tmp, path)
|
||||||
|
|
||||||
|
|
||||||
|
def load_signed(
|
||||||
|
path: Union[str, Path],
|
||||||
|
*,
|
||||||
|
allow_pickle_fallback: bool = True,
|
||||||
|
migrate_on_fallback: bool = True,
|
||||||
|
pickle_loader: Optional[Callable[[io.BufferedReader], Any]] = None,
|
||||||
|
key: Optional[bytes] = None,
|
||||||
|
) -> Any:
|
||||||
|
"""Charge un fichier sauvegardé par `save_signed`.
|
||||||
|
|
||||||
|
Si le fichier n'est pas au format signé, et si `allow_pickle_fallback`
|
||||||
|
est vrai (ET `RPA_ALLOW_PICKLE_FALLBACK != "0"`), tente un
|
||||||
|
`pickle.load()` pour migrer les anciens fichiers. Dans ce cas, un
|
||||||
|
WARNING est émis et le fichier est ré-écrit en JSON signé si
|
||||||
|
`migrate_on_fallback` vaut True.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path: Chemin du fichier
|
||||||
|
allow_pickle_fallback: Activer la compat legacy
|
||||||
|
migrate_on_fallback: Ré-écrire en JSON signé après fallback
|
||||||
|
pickle_loader: Callable alternatif (pour tests / restricted unpickler)
|
||||||
|
key: Clé HMAC explicite (sinon dérivée de l'environnement)
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
SignatureVerificationError: HMAC invalide (fichier altéré)
|
||||||
|
UnsupportedFormatError: format inconnu et fallback désactivé
|
||||||
|
"""
|
||||||
|
path = Path(path)
|
||||||
|
with open(path, "rb") as fp:
|
||||||
|
raw = fp.read()
|
||||||
|
|
||||||
|
if raw.startswith(_SIGNATURE_HEADER):
|
||||||
|
return loads_signed(raw, key=key)
|
||||||
|
|
||||||
|
if not allow_pickle_fallback or not _pickle_fallback_allowed():
|
||||||
|
raise UnsupportedFormatError(
|
||||||
|
f"{path} n'est pas au format signé et le fallback pickle est désactivé."
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.warning(
|
||||||
|
"Chargement legacy pickle pour %s — ce format est obsolète et "
|
||||||
|
"sera ré-écrit en JSON signé. Voir docs/SECURITY.md.",
|
||||||
|
path,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Par défaut on refuse tout type non documenté dans ce fichier à risque :
|
||||||
|
# utilisateur peut fournir un `pickle_loader` custom (ex: Unpickler
|
||||||
|
# restreint). On log l'ouverture pour la traçabilité.
|
||||||
|
loader = pickle_loader or (lambda f: pickle.load(f)) # noqa: S301 - usage legacy
|
||||||
|
with open(path, "rb") as fp:
|
||||||
|
data = loader(fp)
|
||||||
|
|
||||||
|
if migrate_on_fallback:
|
||||||
|
try:
|
||||||
|
save_signed(path, data, key=key)
|
||||||
|
logger.info("Fichier %s migré en JSON signé.", path)
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
logger.error(
|
||||||
|
"Migration JSON signé échouée pour %s : %s", path, exc
|
||||||
|
)
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"SignedSerializerError",
|
||||||
|
"SignatureVerificationError",
|
||||||
|
"UnsupportedFormatError",
|
||||||
|
"dumps_signed",
|
||||||
|
"loads_signed",
|
||||||
|
"save_signed",
|
||||||
|
"load_signed",
|
||||||
|
]
|
||||||
@@ -26,11 +26,15 @@ from PIL import Image
|
|||||||
import logging
|
import logging
|
||||||
import threading
|
import threading
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
import pickle
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from core.models import BBox
|
from core.models import BBox
|
||||||
from core.embedding.fusion_engine import FusionEngine
|
from core.embedding.fusion_engine import FusionEngine
|
||||||
|
from core.security.signed_serializer import (
|
||||||
|
SignatureVerificationError,
|
||||||
|
load_signed,
|
||||||
|
save_signed,
|
||||||
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -521,29 +525,74 @@ class VisualEmbeddingManager:
|
|||||||
|
|
||||||
logger.debug(f"Éviction de {num_to_remove} entrées du cache")
|
logger.debug(f"Éviction de {num_to_remove} entrées du cache")
|
||||||
|
|
||||||
|
def _entry_to_dict(self, entry: "EmbeddingCacheEntry") -> Dict[str, Any]:
|
||||||
|
"""Convertit une entrée du cache en dict JSON-serialisable."""
|
||||||
|
return {
|
||||||
|
"embedding": entry.embedding, # numpy → encodé par signed_serializer
|
||||||
|
"signature": entry.signature,
|
||||||
|
"created_at": entry.created_at,
|
||||||
|
"access_count": entry.access_count,
|
||||||
|
"last_accessed": entry.last_accessed,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _dict_to_entry(self, data: Any) -> Optional["EmbeddingCacheEntry"]:
|
||||||
|
"""Reconstruit une EmbeddingCacheEntry depuis un dict (format JSON)
|
||||||
|
ou depuis un objet déjà typé (fallback pickle legacy).
|
||||||
|
Retourne None si la donnée n'est pas exploitable.
|
||||||
|
"""
|
||||||
|
if isinstance(data, EmbeddingCacheEntry):
|
||||||
|
return data
|
||||||
|
if not isinstance(data, dict):
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return EmbeddingCacheEntry(
|
||||||
|
embedding=np.asarray(data["embedding"]),
|
||||||
|
signature=data["signature"],
|
||||||
|
created_at=data["created_at"],
|
||||||
|
access_count=int(data.get("access_count", 0)),
|
||||||
|
last_accessed=data.get("last_accessed"),
|
||||||
|
)
|
||||||
|
except (KeyError, TypeError, ValueError) as exc:
|
||||||
|
logger.warning(f"Entrée de cache invalide ignorée: {exc}")
|
||||||
|
return None
|
||||||
|
|
||||||
def _load_persistent_cache(self):
|
def _load_persistent_cache(self):
|
||||||
"""Charge le cache persistant depuis le disque"""
|
"""Charge le cache persistant depuis le disque (JSON signé HMAC,
|
||||||
|
fallback pickle legacy avec migration automatique)."""
|
||||||
if not self.cache_persistence_path or not os.path.exists(self.cache_persistence_path):
|
if not self.cache_persistence_path or not os.path.exists(self.cache_persistence_path):
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open(self.cache_persistence_path, 'rb') as f:
|
cached_data = load_signed(self.cache_persistence_path)
|
||||||
cached_data = pickle.load(f)
|
except SignatureVerificationError:
|
||||||
|
logger.error(
|
||||||
|
"Cache persistant %s altéré (HMAC invalide) — ignoré.",
|
||||||
|
self.cache_persistence_path,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Erreur lors du chargement du cache persistant: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
if not isinstance(cached_data, dict):
|
||||||
|
logger.warning("Format de cache inattendu — ignoré.")
|
||||||
|
return
|
||||||
|
|
||||||
# Filtrer les entrées trop anciennes (plus de 24h)
|
# Filtrer les entrées trop anciennes (plus de 24h)
|
||||||
cutoff_time = datetime.now() - timedelta(hours=24)
|
cutoff_time = datetime.now() - timedelta(hours=24)
|
||||||
|
loaded = 0
|
||||||
for signature, entry in cached_data.items():
|
for signature, raw in cached_data.items():
|
||||||
|
entry = self._dict_to_entry(raw)
|
||||||
|
if entry is None:
|
||||||
|
continue
|
||||||
if entry.created_at > cutoff_time:
|
if entry.created_at > cutoff_time:
|
||||||
self._embedding_cache[signature] = entry
|
self._embedding_cache[signature] = entry
|
||||||
|
loaded += 1
|
||||||
|
|
||||||
logger.info(f"Cache persistant chargé: {len(self._embedding_cache)} entrées")
|
logger.info(f"Cache persistant chargé: {loaded} entrées")
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Erreur lors du chargement du cache persistant: {e}")
|
|
||||||
|
|
||||||
def _save_persistent_cache(self):
|
def _save_persistent_cache(self):
|
||||||
"""Sauvegarde le cache sur disque"""
|
"""Sauvegarde le cache sur disque en JSON signé HMAC."""
|
||||||
if not self.cache_persistence_path:
|
if not self.cache_persistence_path:
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -552,9 +601,12 @@ class VisualEmbeddingManager:
|
|||||||
os.makedirs(os.path.dirname(self.cache_persistence_path), exist_ok=True)
|
os.makedirs(os.path.dirname(self.cache_persistence_path), exist_ok=True)
|
||||||
|
|
||||||
with self._cache_lock:
|
with self._cache_lock:
|
||||||
with open(self.cache_persistence_path, 'wb') as f:
|
serializable = {
|
||||||
pickle.dump(dict(self._embedding_cache), f)
|
signature: self._entry_to_dict(entry)
|
||||||
|
for signature, entry in self._embedding_cache.items()
|
||||||
|
}
|
||||||
|
|
||||||
|
save_signed(self.cache_persistence_path, serializable)
|
||||||
logger.debug("Cache persistant sauvegardé")
|
logger.debug("Cache persistant sauvegardé")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -14,8 +14,9 @@ import asyncio
|
|||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
import base64
|
import base64
|
||||||
import pickle
|
|
||||||
import gzip
|
import gzip
|
||||||
|
import pickle # noqa: S403 - usage legacy restreint au fallback de migration
|
||||||
|
import io
|
||||||
from typing import Dict, List, Optional, Any, Tuple
|
from typing import Dict, List, Optional, Any, Tuple
|
||||||
from dataclasses import dataclass, asdict
|
from dataclasses import dataclass, asdict
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
@@ -24,6 +25,12 @@ import numpy as np
|
|||||||
|
|
||||||
from core.visual.visual_target_manager import VisualTarget, VisualTargetManager
|
from core.visual.visual_target_manager import VisualTarget, VisualTargetManager
|
||||||
from core.visual.screenshot_validation_manager import ScreenshotValidationManager, ValidationResult
|
from core.visual.screenshot_validation_manager import ScreenshotValidationManager, ValidationResult
|
||||||
|
from core.security.signed_serializer import (
|
||||||
|
SignatureVerificationError,
|
||||||
|
UnsupportedFormatError,
|
||||||
|
dumps_signed,
|
||||||
|
loads_signed,
|
||||||
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -435,7 +442,7 @@ class VisualPersistenceManager:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
async def _serialize_workflow_data(self, workflow_data: VisualWorkflowData) -> bytes:
|
async def _serialize_workflow_data(self, workflow_data: VisualWorkflowData) -> bytes:
|
||||||
"""Sérialise les données d'un workflow"""
|
"""Sérialise les données d'un workflow en JSON signé HMAC."""
|
||||||
# Convertir en dictionnaire
|
# Convertir en dictionnaire
|
||||||
data_dict = asdict(workflow_data)
|
data_dict = asdict(workflow_data)
|
||||||
|
|
||||||
@@ -456,13 +463,28 @@ class VisualPersistenceManager:
|
|||||||
]
|
]
|
||||||
data_dict['validation_history'] = serialized_history
|
data_dict['validation_history'] = serialized_history
|
||||||
|
|
||||||
# Convertir en bytes
|
# JSON signé HMAC (cf. core.security.signed_serializer)
|
||||||
return pickle.dumps(data_dict)
|
return dumps_signed(data_dict)
|
||||||
|
|
||||||
async def _deserialize_workflow_data(self, data: bytes) -> VisualWorkflowData:
|
async def _deserialize_workflow_data(self, data: bytes) -> VisualWorkflowData:
|
||||||
"""Désérialise les données d'un workflow"""
|
"""Désérialise les données d'un workflow (JSON signé HMAC ;
|
||||||
# Désérialiser le dictionnaire
|
fallback pickle legacy avec WARNING pour migrer les anciens fichiers)."""
|
||||||
data_dict = pickle.loads(data)
|
try:
|
||||||
|
data_dict = loads_signed(data)
|
||||||
|
except SignatureVerificationError:
|
||||||
|
# Fichier altéré ou clé différente : on refuse sans fallback.
|
||||||
|
logger.error("Workflow visuel : signature HMAC invalide — refus.")
|
||||||
|
raise
|
||||||
|
except UnsupportedFormatError:
|
||||||
|
# Ancien format pickle : fallback explicite et bruyant.
|
||||||
|
import os
|
||||||
|
if os.getenv("RPA_ALLOW_PICKLE_FALLBACK", "1") == "0":
|
||||||
|
raise
|
||||||
|
logger.warning(
|
||||||
|
"Workflow visuel au format pickle legacy — lecture de compat, "
|
||||||
|
"ré-écrire en JSON signé dès que possible."
|
||||||
|
)
|
||||||
|
data_dict = pickle.loads(data) # noqa: S301 - fallback legacy
|
||||||
|
|
||||||
# Reconstruire les objets
|
# Reconstruire les objets
|
||||||
workflow_data = VisualWorkflowData(
|
workflow_data = VisualWorkflowData(
|
||||||
|
|||||||
369
core/workflow/execution_compiler.py
Normal file
369
core/workflow/execution_compiler.py
Normal file
@@ -0,0 +1,369 @@
|
|||||||
|
# core/workflow/execution_compiler.py
|
||||||
|
"""
|
||||||
|
ExecutionCompiler — Compile un WorkflowIR en ExecutionPlan.
|
||||||
|
|
||||||
|
Pièce maîtresse de l'architecture V4.
|
||||||
|
"Le LLM prépare et compile. Le runtime exécute."
|
||||||
|
|
||||||
|
Le compilateur :
|
||||||
|
1. Prend chaque étape du WorkflowIR
|
||||||
|
2. Compile une stratégie de résolution pour chaque action (OCR > template > VLM)
|
||||||
|
3. Définit les timeouts, retries, fallbacks et recovery
|
||||||
|
4. Produit un ExecutionPlan déterministe et borné
|
||||||
|
|
||||||
|
L'objectif : zéro VLM au runtime pour les cas normaux.
|
||||||
|
Le VLM est un exception handler, pas le chemin principal.
|
||||||
|
|
||||||
|
Le compilateur utilise :
|
||||||
|
- Les données de l'enregistrement (crops, textes OCR) pour pré-compiler
|
||||||
|
- L'historique d'apprentissage (ReplayLearner) pour choisir la meilleure stratégie
|
||||||
|
- Le contexte métier (DomainContext) pour adapter les paramètres
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
from .workflow_ir import WorkflowIR, Step, Action
|
||||||
|
from .execution_plan import (
|
||||||
|
ExecutionPlan, ExecutionNode, ResolutionStrategy, SuccessCondition,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Temps estimé par type d'action (ms)
|
||||||
|
_ACTION_TIME_ESTIMATES = {
|
||||||
|
"click": 200, # OCR lookup + clic
|
||||||
|
"type": 500, # Frappe char-by-char
|
||||||
|
"key_combo": 100,
|
||||||
|
"wait": 0, # Le duration_ms est dans l'action
|
||||||
|
"scroll": 200,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ExecutionCompiler:
|
||||||
|
"""Compile un WorkflowIR en ExecutionPlan.
|
||||||
|
|
||||||
|
Usage :
|
||||||
|
compiler = ExecutionCompiler()
|
||||||
|
plan = compiler.compile(workflow_ir, target_machine="VM_Win11")
|
||||||
|
plan.save("data/plans/")
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, learning_dir: str = ""):
|
||||||
|
self._learning_dir = learning_dir or "data/learning/replay_results"
|
||||||
|
|
||||||
|
def compile(
|
||||||
|
self,
|
||||||
|
ir: WorkflowIR,
|
||||||
|
target_machine: str = "",
|
||||||
|
target_resolution: str = "1280x800",
|
||||||
|
params: Optional[Dict[str, str]] = None,
|
||||||
|
surface_profile=None,
|
||||||
|
) -> ExecutionPlan:
|
||||||
|
"""Compiler un WorkflowIR en ExecutionPlan.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ir: Le WorkflowIR à compiler
|
||||||
|
target_machine: Machine cible (pour adapter les stratégies)
|
||||||
|
target_resolution: Résolution de la machine cible
|
||||||
|
params: Variables à substituer
|
||||||
|
surface_profile: SurfaceProfile optionnel pour adapter les paramètres.
|
||||||
|
Si fourni, timeouts/seuils/retries sont tirés du profil.
|
||||||
|
"""
|
||||||
|
t_start = time.time()
|
||||||
|
|
||||||
|
plan = ExecutionPlan(
|
||||||
|
plan_id=f"plan_{uuid.uuid4().hex[:8]}",
|
||||||
|
workflow_id=ir.workflow_id,
|
||||||
|
version=ir.version,
|
||||||
|
created_at=time.time(),
|
||||||
|
domain=ir.domain,
|
||||||
|
target_machine=target_machine,
|
||||||
|
target_resolution=target_resolution,
|
||||||
|
variables=params or {v.name: v.default for v in ir.variables},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Consulter l'historique d'apprentissage
|
||||||
|
learned_strategies = self._load_learned_strategies()
|
||||||
|
|
||||||
|
# Compiler chaque étape
|
||||||
|
for step in ir.steps:
|
||||||
|
nodes = self._compile_step(step, ir, learned_strategies, surface_profile)
|
||||||
|
plan.nodes.extend(nodes)
|
||||||
|
|
||||||
|
# Statistiques de compilation
|
||||||
|
plan.total_nodes = len(plan.nodes)
|
||||||
|
plan.nodes_with_ocr = sum(
|
||||||
|
1 for n in plan.nodes
|
||||||
|
if n.strategy_primary and n.strategy_primary.method == "ocr"
|
||||||
|
)
|
||||||
|
plan.nodes_with_template = sum(
|
||||||
|
1 for n in plan.nodes
|
||||||
|
if n.strategy_primary and n.strategy_primary.method == "template"
|
||||||
|
)
|
||||||
|
plan.nodes_with_vlm = sum(
|
||||||
|
1 for n in plan.nodes
|
||||||
|
if n.strategy_primary and n.strategy_primary.method == "vlm"
|
||||||
|
)
|
||||||
|
plan.estimated_duration_s = sum(
|
||||||
|
_ACTION_TIME_ESTIMATES.get(n.action_type, 200) + n.duration_ms
|
||||||
|
for n in plan.nodes
|
||||||
|
) / 1000.0
|
||||||
|
|
||||||
|
elapsed = time.time() - t_start
|
||||||
|
logger.info(
|
||||||
|
f"Compilation: {plan.total_nodes} nœuds en {elapsed:.1f}s — "
|
||||||
|
f"OCR={plan.nodes_with_ocr}, template={plan.nodes_with_template}, "
|
||||||
|
f"VLM={plan.nodes_with_vlm} (exception handler)"
|
||||||
|
)
|
||||||
|
|
||||||
|
return plan
|
||||||
|
|
||||||
|
def _compile_step(
|
||||||
|
self,
|
||||||
|
step: Step,
|
||||||
|
ir: WorkflowIR,
|
||||||
|
learned: Dict[str, str],
|
||||||
|
surface_profile=None,
|
||||||
|
) -> List[ExecutionNode]:
|
||||||
|
"""Compiler une étape en nœuds d'exécution."""
|
||||||
|
nodes = []
|
||||||
|
|
||||||
|
for i, action in enumerate(step.actions):
|
||||||
|
node = self._compile_action(
|
||||||
|
action=action,
|
||||||
|
step=step,
|
||||||
|
action_index=i,
|
||||||
|
ir=ir,
|
||||||
|
learned=learned,
|
||||||
|
surface_profile=surface_profile,
|
||||||
|
)
|
||||||
|
nodes.append(node)
|
||||||
|
|
||||||
|
return nodes
|
||||||
|
|
||||||
|
def _compile_action(
|
||||||
|
self,
|
||||||
|
action: Action,
|
||||||
|
step: Step,
|
||||||
|
action_index: int,
|
||||||
|
ir: WorkflowIR,
|
||||||
|
learned: Dict[str, str],
|
||||||
|
surface_profile=None,
|
||||||
|
) -> ExecutionNode:
|
||||||
|
"""Compiler une action en nœud d'exécution avec stratégie de résolution."""
|
||||||
|
|
||||||
|
node = ExecutionNode(
|
||||||
|
node_id=f"n_{step.step_id}_{action_index}",
|
||||||
|
action_type=action.type,
|
||||||
|
intent=step.intent,
|
||||||
|
step_id=step.step_id,
|
||||||
|
is_optional=step.is_optional,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Paramètres par défaut, surchargés par le surface_profile si fourni
|
||||||
|
default_click_timeout = 10000
|
||||||
|
default_click_retries = 2
|
||||||
|
if surface_profile is not None:
|
||||||
|
default_click_timeout = getattr(surface_profile, "timeout_click_ms", 10000)
|
||||||
|
default_click_retries = getattr(surface_profile, "max_retries", 2)
|
||||||
|
|
||||||
|
if action.type == "click":
|
||||||
|
# Compiler les stratégies de résolution pour ce clic
|
||||||
|
node.strategy_primary, node.strategy_fallbacks = self._compile_click_resolution(
|
||||||
|
action, step, learned, surface_profile,
|
||||||
|
)
|
||||||
|
node.timeout_ms = default_click_timeout
|
||||||
|
node.max_retries = default_click_retries
|
||||||
|
node.recovery_action = "escape"
|
||||||
|
|
||||||
|
# Condition de succès STRICTE basée sur le titre de fenêtre attendu.
|
||||||
|
# Si expected_window_after est défini, on fait du title_match (strict).
|
||||||
|
# Sinon on retombe sur screen_changed (faible).
|
||||||
|
expected_after = getattr(action, "expected_window_after", "")
|
||||||
|
if expected_after and expected_after != "unknown_window":
|
||||||
|
node.success_condition = SuccessCondition(
|
||||||
|
method="title_match",
|
||||||
|
expected_title=expected_after,
|
||||||
|
description=step.postcondition or f"Fenêtre attendue: {expected_after}",
|
||||||
|
)
|
||||||
|
elif step.postcondition:
|
||||||
|
node.success_condition = SuccessCondition(
|
||||||
|
method="screen_changed",
|
||||||
|
description=step.postcondition,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Pré-condition stricte : la fenêtre AVANT le clic doit matcher
|
||||||
|
# Stockée en tant que champ dédié sur le nœud pour l'exécuteur
|
||||||
|
expected_before = getattr(action, "expected_window_before", "")
|
||||||
|
if expected_before and expected_before != "unknown_window":
|
||||||
|
# On l'injecte dans la condition de succès (cas "avant")
|
||||||
|
# Le nœud portera les deux via des champs séparés
|
||||||
|
node.expected_window_before = expected_before
|
||||||
|
|
||||||
|
elif action.type == "type":
|
||||||
|
node.text = action.text
|
||||||
|
node.variable_name = action.text.strip("{}") if action.variable else ""
|
||||||
|
node.timeout_ms = 5000
|
||||||
|
node.max_retries = 0 # Pas de retry sur la frappe
|
||||||
|
node.recovery_action = "undo"
|
||||||
|
|
||||||
|
elif action.type == "key_combo":
|
||||||
|
node.keys = action.keys
|
||||||
|
node.timeout_ms = 3000
|
||||||
|
node.max_retries = 0
|
||||||
|
node.recovery_action = "undo"
|
||||||
|
|
||||||
|
elif action.type == "wait":
|
||||||
|
node.duration_ms = action.duration_ms or 1000
|
||||||
|
node.timeout_ms = action.duration_ms + 2000
|
||||||
|
node.max_retries = 0
|
||||||
|
node.recovery_action = "none"
|
||||||
|
|
||||||
|
elif action.type == "scroll":
|
||||||
|
node.timeout_ms = 3000
|
||||||
|
node.max_retries = 0
|
||||||
|
node.recovery_action = "none"
|
||||||
|
|
||||||
|
return node
|
||||||
|
|
||||||
|
def _compile_click_resolution(
|
||||||
|
self,
|
||||||
|
action: Action,
|
||||||
|
step: Step,
|
||||||
|
learned: Dict[str, str],
|
||||||
|
surface_profile=None,
|
||||||
|
) -> tuple:
|
||||||
|
"""Compiler les stratégies de résolution pour un clic.
|
||||||
|
|
||||||
|
Utilise les données d'enrichissement visuel (action._enrichment) si
|
||||||
|
disponibles :
|
||||||
|
- by_text (OCR)
|
||||||
|
- anchor_image_base64 (template)
|
||||||
|
- vlm_description (VLM)
|
||||||
|
- uia_snapshot (UIA sur Windows natif)
|
||||||
|
|
||||||
|
Ordre de priorité (variable selon la surface) :
|
||||||
|
1. UIA (si snapshot dispo ET surface native ET helper dispo) — 10-20ms
|
||||||
|
2. OCR exact (si texte visible) — 100-200ms
|
||||||
|
3. Template matching (si crop) — 10ms
|
||||||
|
4. VLM — exception handler
|
||||||
|
|
||||||
|
Le learning peut réordonner si une stratégie a mieux marché avant.
|
||||||
|
"""
|
||||||
|
primary = None
|
||||||
|
fallbacks = []
|
||||||
|
|
||||||
|
# Lire l'enrichissement visuel si dispo
|
||||||
|
enrichment = getattr(action, "_enrichment", None) or {}
|
||||||
|
by_text_from_enrich = enrichment.get("by_text", "")
|
||||||
|
anchor_b64 = enrichment.get("anchor_image_base64", "")
|
||||||
|
vlm_desc_from_enrich = enrichment.get("vlm_description", "")
|
||||||
|
window_title = enrichment.get("window_title", "")
|
||||||
|
uia_snapshot = enrichment.get("uia_snapshot") or {}
|
||||||
|
|
||||||
|
# Source de texte : enrichissement > anchor_hint > target
|
||||||
|
target_text = by_text_from_enrich or action.anchor_hint or action.target
|
||||||
|
# Ne pas utiliser "unknown_window" comme texte OCR
|
||||||
|
if target_text == "unknown_window":
|
||||||
|
target_text = ""
|
||||||
|
|
||||||
|
learned_method = learned.get(target_text, "")
|
||||||
|
|
||||||
|
# Est-ce qu'on est sur une surface où UIA est activable ?
|
||||||
|
uia_eligible = False
|
||||||
|
if surface_profile is not None:
|
||||||
|
from .surface_classifier import SurfaceType
|
||||||
|
surface_type = getattr(surface_profile, "surface_type", None)
|
||||||
|
uia_available = getattr(surface_profile, "uia_available", False)
|
||||||
|
uia_eligible = (
|
||||||
|
uia_available
|
||||||
|
and surface_type == SurfaceType.WINDOWS_NATIVE
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Sans profil explicite, on active UIA si le snapshot est présent
|
||||||
|
# (l'agent décidera au runtime s'il peut l'utiliser)
|
||||||
|
uia_eligible = bool(uia_snapshot)
|
||||||
|
|
||||||
|
# Stratégie UIA — la plus rapide et la plus précise sur Windows natif
|
||||||
|
if uia_snapshot and uia_snapshot.get("name") and uia_eligible:
|
||||||
|
uia_strategy = ResolutionStrategy(
|
||||||
|
method="uia",
|
||||||
|
uia_name=uia_snapshot.get("name", ""),
|
||||||
|
uia_control_type=uia_snapshot.get("control_type", ""),
|
||||||
|
uia_automation_id=uia_snapshot.get("automation_id", ""),
|
||||||
|
uia_parent_path=uia_snapshot.get("parent_path", []),
|
||||||
|
threshold=0.95,
|
||||||
|
)
|
||||||
|
primary = uia_strategy
|
||||||
|
|
||||||
|
# Stratégie OCR — le texte visible est la meilleure ancre
|
||||||
|
if target_text:
|
||||||
|
ocr_strategy = ResolutionStrategy(
|
||||||
|
method="ocr",
|
||||||
|
target_text=target_text,
|
||||||
|
threshold=0.7,
|
||||||
|
)
|
||||||
|
if primary is None and (
|
||||||
|
not learned_method
|
||||||
|
or learned_method in ("ocr", "som_text_match", "hybrid_text_direct", "v4_ocr")
|
||||||
|
):
|
||||||
|
primary = ocr_strategy
|
||||||
|
else:
|
||||||
|
fallbacks.append(ocr_strategy)
|
||||||
|
|
||||||
|
# Stratégie template — le crop visuel de l'enregistrement
|
||||||
|
if anchor_b64:
|
||||||
|
template_strategy = ResolutionStrategy(
|
||||||
|
method="template",
|
||||||
|
target_text=target_text,
|
||||||
|
anchor_b64=anchor_b64,
|
||||||
|
threshold=0.85,
|
||||||
|
)
|
||||||
|
if primary is None and learned_method in (
|
||||||
|
"anchor_template", "template_matching", "v4_template"
|
||||||
|
):
|
||||||
|
primary = template_strategy
|
||||||
|
else:
|
||||||
|
fallbacks.append(template_strategy)
|
||||||
|
|
||||||
|
# Stratégie VLM — exception handler (dernier recours)
|
||||||
|
vlm_description = vlm_desc_from_enrich or action.target or step.intent
|
||||||
|
if vlm_description and vlm_description != "unknown_window":
|
||||||
|
vlm_strategy = ResolutionStrategy(
|
||||||
|
method="vlm",
|
||||||
|
vlm_description=vlm_description,
|
||||||
|
threshold=0.6,
|
||||||
|
)
|
||||||
|
fallbacks.append(vlm_strategy)
|
||||||
|
|
||||||
|
# Si aucune primaire trouvée, prendre le premier fallback
|
||||||
|
if primary is None:
|
||||||
|
if fallbacks:
|
||||||
|
primary = fallbacks.pop(0)
|
||||||
|
else:
|
||||||
|
# Dernier recours : VLM avec l'intention métier
|
||||||
|
primary = ResolutionStrategy(
|
||||||
|
method="vlm",
|
||||||
|
vlm_description=step.intent or "élément UI",
|
||||||
|
threshold=0.5,
|
||||||
|
)
|
||||||
|
|
||||||
|
return primary, fallbacks
|
||||||
|
|
||||||
|
def _load_learned_strategies(self) -> Dict[str, str]:
|
||||||
|
"""Charger les stratégies apprises (ReplayLearner)."""
|
||||||
|
try:
|
||||||
|
from agent_v0.server_v1.replay_learner import ReplayLearner
|
||||||
|
learner = ReplayLearner(learning_dir=self._learning_dir)
|
||||||
|
# Construire un mapping target → best_method depuis l'historique
|
||||||
|
strategies = {}
|
||||||
|
for outcome in learner._recent:
|
||||||
|
if outcome.success and outcome.resolution_method and outcome.target_description:
|
||||||
|
strategies[outcome.target_description] = outcome.resolution_method
|
||||||
|
return strategies
|
||||||
|
except Exception:
|
||||||
|
return {}
|
||||||
285
core/workflow/execution_plan.py
Normal file
285
core/workflow/execution_plan.py
Normal file
@@ -0,0 +1,285 @@
|
|||||||
|
# core/workflow/execution_plan.py
|
||||||
|
"""
|
||||||
|
ExecutionPlan — Plan d'exécution strict, borné et versionné.
|
||||||
|
|
||||||
|
C'est ce que le runtime exécute. Pas d'improvisation — tout est pré-compilé :
|
||||||
|
- chaque nœud a une stratégie de résolution primaire + fallbacks
|
||||||
|
- chaque nœud a un timeout, un retry policy, une condition de succès
|
||||||
|
- le VLM n'intervient qu'en exception handler (pas en chemin principal)
|
||||||
|
|
||||||
|
Le runtime ne fait que : exécuter → observer → vérifier → suite ou fallback.
|
||||||
|
|
||||||
|
Cycle : WorkflowIR → ExecutionCompiler → ExecutionPlan → Runtime
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ResolutionStrategy:
|
||||||
|
"""Stratégie de résolution visuelle pour un élément UI.
|
||||||
|
|
||||||
|
Pré-compilée — le runtime n'a pas besoin du VLM pour résoudre.
|
||||||
|
"""
|
||||||
|
method: str # "uia", "ocr", "template", "position", "vlm", "dom"
|
||||||
|
target_text: str = "" # Texte à chercher (pour OCR)
|
||||||
|
anchor_b64: str = "" # Crop de référence (pour template matching)
|
||||||
|
zone: Dict[str, float] = field(default_factory=dict) # Zone de recherche {x_min, y_min, x_max, y_max}
|
||||||
|
position_hint: str = "" # "en haut à droite", "dans la barre des tâches"
|
||||||
|
vlm_description: str = "" # Description VLM (dernier recours)
|
||||||
|
threshold: float = 0.8 # Seuil de confiance
|
||||||
|
|
||||||
|
# Stratégie UIA (Windows UI Automation)
|
||||||
|
# Utilisée quand l'enregistrement a capturé un snapshot UIA au moment du clic.
|
||||||
|
# Au replay, l'agent Windows appelle lea_uia.exe find --name ... pour retrouver
|
||||||
|
# l'élément par son chemin logique (100% fiable sur Windows natif).
|
||||||
|
uia_name: str = "" # Name property de l'élément
|
||||||
|
uia_control_type: str = "" # ControlType (Button, Edit, MenuItem, ...)
|
||||||
|
uia_automation_id: str = "" # AutomationId (optionnel)
|
||||||
|
uia_parent_path: List[Dict[str, str]] = field(default_factory=list)
|
||||||
|
|
||||||
|
# Stratégie DOM (web avec CDP activé) — préparation pour plus tard
|
||||||
|
dom_selector: str = "" # CSS selector
|
||||||
|
dom_xpath: str = "" # XPath
|
||||||
|
dom_url_pattern: str = "" # Pattern URL à matcher
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
d = {"method": self.method}
|
||||||
|
if self.target_text:
|
||||||
|
d["target_text"] = self.target_text
|
||||||
|
if self.anchor_b64:
|
||||||
|
d["anchor_b64"] = self.anchor_b64[:50] + "..." # Tronqué pour la lisibilité
|
||||||
|
if self.zone:
|
||||||
|
d["zone"] = self.zone
|
||||||
|
if self.position_hint:
|
||||||
|
d["position_hint"] = self.position_hint
|
||||||
|
if self.vlm_description:
|
||||||
|
d["vlm_description"] = self.vlm_description
|
||||||
|
if self.uia_name:
|
||||||
|
d["uia_name"] = self.uia_name
|
||||||
|
if self.uia_control_type:
|
||||||
|
d["uia_control_type"] = self.uia_control_type
|
||||||
|
if self.uia_automation_id:
|
||||||
|
d["uia_automation_id"] = self.uia_automation_id
|
||||||
|
if self.uia_parent_path:
|
||||||
|
d["uia_parent_path"] = self.uia_parent_path
|
||||||
|
if self.dom_selector:
|
||||||
|
d["dom_selector"] = self.dom_selector
|
||||||
|
if self.dom_xpath:
|
||||||
|
d["dom_xpath"] = self.dom_xpath
|
||||||
|
if self.dom_url_pattern:
|
||||||
|
d["dom_url_pattern"] = self.dom_url_pattern
|
||||||
|
d["threshold"] = self.threshold
|
||||||
|
return d
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, d: Dict) -> "ResolutionStrategy":
|
||||||
|
return cls(**{k: v for k, v in d.items() if k in cls.__dataclass_fields__})
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SuccessCondition:
|
||||||
|
"""Condition de succès d'un nœud — comment vérifier que l'action a marché."""
|
||||||
|
method: str = "screen_changed" # "screen_changed", "title_match", "text_visible", "none"
|
||||||
|
expected_title: str = "" # Titre fenêtre attendu après l'action
|
||||||
|
expected_text: str = "" # Texte qui doit apparaître
|
||||||
|
description: str = "" # Description pour le Critic VLM (exception handler)
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
d = {"method": self.method}
|
||||||
|
if self.expected_title:
|
||||||
|
d["expected_title"] = self.expected_title
|
||||||
|
if self.expected_text:
|
||||||
|
d["expected_text"] = self.expected_text
|
||||||
|
if self.description:
|
||||||
|
d["description"] = self.description
|
||||||
|
return d
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, d: Dict) -> "SuccessCondition":
|
||||||
|
return cls(**{k: v for k, v in d.items() if k in cls.__dataclass_fields__})
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ExecutionNode:
|
||||||
|
"""Nœud d'exécution — une action à exécuter avec sa stratégie complète."""
|
||||||
|
node_id: str
|
||||||
|
action_type: str # click, type, key_combo, wait, scroll
|
||||||
|
intent: str = "" # Intention métier (pour le logging/audit)
|
||||||
|
|
||||||
|
# Résolution visuelle pré-compilée
|
||||||
|
strategy_primary: Optional[ResolutionStrategy] = None
|
||||||
|
strategy_fallbacks: List[ResolutionStrategy] = field(default_factory=list)
|
||||||
|
|
||||||
|
# Données de l'action
|
||||||
|
text: str = "" # Texte à taper
|
||||||
|
keys: List[str] = field(default_factory=list)
|
||||||
|
duration_ms: int = 0
|
||||||
|
variable_name: str = "" # Si le texte est une variable
|
||||||
|
|
||||||
|
# Bornes d'exécution
|
||||||
|
timeout_ms: int = 10000 # Timeout pour cette action
|
||||||
|
max_retries: int = 1 # Nombre de retries autorisés
|
||||||
|
retry_delay_ms: int = 2000 # Délai entre retries
|
||||||
|
|
||||||
|
# Vérification
|
||||||
|
success_condition: Optional[SuccessCondition] = None
|
||||||
|
|
||||||
|
# Contrôle strict de fenêtre (pré-condition)
|
||||||
|
expected_window_before: str = "" # La fenêtre active doit matcher AVANT l'action
|
||||||
|
|
||||||
|
# Recovery
|
||||||
|
recovery_action: str = "escape" # "escape", "undo", "close", "none"
|
||||||
|
|
||||||
|
# Contexte
|
||||||
|
step_id: str = "" # Référence vers l'étape WorkflowIR
|
||||||
|
is_optional: bool = False
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
d = {
|
||||||
|
"node_id": self.node_id,
|
||||||
|
"action_type": self.action_type,
|
||||||
|
}
|
||||||
|
if self.intent:
|
||||||
|
d["intent"] = self.intent
|
||||||
|
if self.strategy_primary:
|
||||||
|
d["strategy_primary"] = self.strategy_primary.to_dict()
|
||||||
|
if self.strategy_fallbacks:
|
||||||
|
d["strategy_fallbacks"] = [s.to_dict() for s in self.strategy_fallbacks]
|
||||||
|
if self.text:
|
||||||
|
d["text"] = self.text
|
||||||
|
if self.keys:
|
||||||
|
d["keys"] = self.keys
|
||||||
|
if self.duration_ms:
|
||||||
|
d["duration_ms"] = self.duration_ms
|
||||||
|
if self.variable_name:
|
||||||
|
d["variable_name"] = self.variable_name
|
||||||
|
d["timeout_ms"] = self.timeout_ms
|
||||||
|
d["max_retries"] = self.max_retries
|
||||||
|
if self.success_condition:
|
||||||
|
d["success_condition"] = self.success_condition.to_dict()
|
||||||
|
if self.expected_window_before:
|
||||||
|
d["expected_window_before"] = self.expected_window_before
|
||||||
|
d["recovery_action"] = self.recovery_action
|
||||||
|
if self.is_optional:
|
||||||
|
d["is_optional"] = True
|
||||||
|
return d
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, d: Dict) -> "ExecutionNode":
|
||||||
|
primary = ResolutionStrategy.from_dict(d["strategy_primary"]) if d.get("strategy_primary") else None
|
||||||
|
fallbacks = [ResolutionStrategy.from_dict(f) for f in d.get("strategy_fallbacks", [])]
|
||||||
|
success = SuccessCondition.from_dict(d["success_condition"]) if d.get("success_condition") else None
|
||||||
|
return cls(
|
||||||
|
node_id=d["node_id"],
|
||||||
|
action_type=d["action_type"],
|
||||||
|
intent=d.get("intent", ""),
|
||||||
|
strategy_primary=primary,
|
||||||
|
strategy_fallbacks=fallbacks,
|
||||||
|
text=d.get("text", ""),
|
||||||
|
keys=d.get("keys", []),
|
||||||
|
duration_ms=d.get("duration_ms", 0),
|
||||||
|
variable_name=d.get("variable_name", ""),
|
||||||
|
timeout_ms=d.get("timeout_ms", 10000),
|
||||||
|
max_retries=d.get("max_retries", 1),
|
||||||
|
retry_delay_ms=d.get("retry_delay_ms", 2000),
|
||||||
|
success_condition=success,
|
||||||
|
expected_window_before=d.get("expected_window_before", ""),
|
||||||
|
recovery_action=d.get("recovery_action", "escape"),
|
||||||
|
step_id=d.get("step_id", ""),
|
||||||
|
is_optional=d.get("is_optional", False),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ExecutionPlan:
|
||||||
|
"""Plan d'exécution versionné — ce que le runtime exécute."""
|
||||||
|
plan_id: str
|
||||||
|
workflow_id: str # Référence vers le WorkflowIR source
|
||||||
|
version: int = 1
|
||||||
|
created_at: float = 0.0
|
||||||
|
|
||||||
|
# Nœuds d'exécution (séquence ordonnée)
|
||||||
|
nodes: List[ExecutionNode] = field(default_factory=list)
|
||||||
|
|
||||||
|
# Variables à substituer avant exécution
|
||||||
|
variables: Dict[str, str] = field(default_factory=dict)
|
||||||
|
|
||||||
|
# Configuration globale
|
||||||
|
domain: str = "generic"
|
||||||
|
target_machine: str = "" # Machine cible
|
||||||
|
target_resolution: str = "" # "1280x800", "1920x1080"
|
||||||
|
|
||||||
|
# Métriques de compilation
|
||||||
|
total_nodes: int = 0
|
||||||
|
nodes_with_ocr: int = 0 # Résolution OCR (rapide, précis)
|
||||||
|
nodes_with_template: int = 0 # Résolution template (rapide)
|
||||||
|
nodes_with_vlm: int = 0 # Résolution VLM (lent, dernier recours)
|
||||||
|
estimated_duration_s: float = 0.0
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"plan_id": self.plan_id,
|
||||||
|
"workflow_id": self.workflow_id,
|
||||||
|
"version": self.version,
|
||||||
|
"created_at": self.created_at,
|
||||||
|
"domain": self.domain,
|
||||||
|
"target_machine": self.target_machine,
|
||||||
|
"target_resolution": self.target_resolution,
|
||||||
|
"variables": self.variables,
|
||||||
|
"nodes": [n.to_dict() for n in self.nodes],
|
||||||
|
"stats": {
|
||||||
|
"total_nodes": self.total_nodes,
|
||||||
|
"nodes_with_ocr": self.nodes_with_ocr,
|
||||||
|
"nodes_with_template": self.nodes_with_template,
|
||||||
|
"nodes_with_vlm": self.nodes_with_vlm,
|
||||||
|
"estimated_duration_s": round(self.estimated_duration_s, 1),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def to_json(self, indent: int = 2) -> str:
|
||||||
|
return json.dumps(self.to_dict(), ensure_ascii=False, indent=indent)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, d: Dict) -> "ExecutionPlan":
|
||||||
|
nodes = [ExecutionNode.from_dict(n) for n in d.get("nodes", [])]
|
||||||
|
stats = d.get("stats", {})
|
||||||
|
return cls(
|
||||||
|
plan_id=d["plan_id"],
|
||||||
|
workflow_id=d.get("workflow_id", ""),
|
||||||
|
version=d.get("version", 1),
|
||||||
|
created_at=d.get("created_at", 0),
|
||||||
|
domain=d.get("domain", "generic"),
|
||||||
|
target_machine=d.get("target_machine", ""),
|
||||||
|
target_resolution=d.get("target_resolution", ""),
|
||||||
|
variables=d.get("variables", {}),
|
||||||
|
nodes=nodes,
|
||||||
|
total_nodes=stats.get("total_nodes", len(nodes)),
|
||||||
|
nodes_with_ocr=stats.get("nodes_with_ocr", 0),
|
||||||
|
nodes_with_template=stats.get("nodes_with_template", 0),
|
||||||
|
nodes_with_vlm=stats.get("nodes_with_vlm", 0),
|
||||||
|
estimated_duration_s=stats.get("estimated_duration_s", 0),
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_json(cls, json_str: str) -> "ExecutionPlan":
|
||||||
|
return cls.from_dict(json.loads(json_str))
|
||||||
|
|
||||||
|
def save(self, directory: str) -> Path:
|
||||||
|
dir_path = Path(directory)
|
||||||
|
dir_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
file_path = dir_path / f"{self.plan_id}.json"
|
||||||
|
file_path.write_text(self.to_json(), encoding="utf-8")
|
||||||
|
return file_path
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def load(cls, file_path: str) -> "ExecutionPlan":
|
||||||
|
return cls.from_json(Path(file_path).read_text(encoding="utf-8"))
|
||||||
627
core/workflow/ir_builder.py
Normal file
627
core/workflow/ir_builder.py
Normal file
@@ -0,0 +1,627 @@
|
|||||||
|
# core/workflow/ir_builder.py
|
||||||
|
"""
|
||||||
|
IRBuilder — Transforme une RawTrace en WorkflowIR.
|
||||||
|
|
||||||
|
C'est le "compilateur de savoir-faire" :
|
||||||
|
RawTrace (clics bruts) → WorkflowIR (connaissance structurée)
|
||||||
|
|
||||||
|
Le builder utilise gemma4 pour COMPRENDRE ce que l'utilisateur a fait :
|
||||||
|
- Segmenter les actions en étapes logiques
|
||||||
|
- Identifier l'intention de chaque étape
|
||||||
|
- Détecter les variables (données qui changent entre les exécutions)
|
||||||
|
- Définir les pré/postconditions
|
||||||
|
|
||||||
|
Le builder est appelé UNE SEULE FOIS après l'enregistrement.
|
||||||
|
Le WorkflowIR produit est ensuite réutilisé pour chaque replay.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
from .workflow_ir import WorkflowIR, Step, Action, Variable
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class IRBuilder:
|
||||||
|
"""Construit un WorkflowIR depuis une RawTrace (événements bruts).
|
||||||
|
|
||||||
|
Usage :
|
||||||
|
builder = IRBuilder()
|
||||||
|
ir = builder.build(
|
||||||
|
events=raw_events,
|
||||||
|
session_id="sess_xxx",
|
||||||
|
domain="tim_codage",
|
||||||
|
)
|
||||||
|
ir.save("data/workflows/")
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, gemma4_port: str = ""):
|
||||||
|
self._gemma4_port = gemma4_port or os.environ.get("GEMMA4_PORT", "11435")
|
||||||
|
self._gemma4_url = f"http://localhost:{self._gemma4_port}/api/chat"
|
||||||
|
|
||||||
|
def build(
|
||||||
|
self,
|
||||||
|
events: List[Dict[str, Any]],
|
||||||
|
session_id: str = "",
|
||||||
|
session_dir: str = "",
|
||||||
|
domain: str = "generic",
|
||||||
|
name: str = "",
|
||||||
|
) -> WorkflowIR:
|
||||||
|
"""Construire un WorkflowIR depuis des événements bruts.
|
||||||
|
|
||||||
|
Étapes :
|
||||||
|
1. Filtrer les événements parasites
|
||||||
|
2. Segmenter en étapes logiques (par changement de fenêtre/intention)
|
||||||
|
3. Pour chaque étape, identifier l'intention via gemma4
|
||||||
|
4. Détecter les variables
|
||||||
|
5. Définir pré/postconditions
|
||||||
|
"""
|
||||||
|
t_start = time.time()
|
||||||
|
|
||||||
|
# Résoudre le session_dir_path pour l'enrichissement visuel
|
||||||
|
session_dir_path = Path(session_dir) if session_dir else None
|
||||||
|
if session_dir_path and not session_dir_path.is_dir():
|
||||||
|
logger.warning(
|
||||||
|
f"IRBuilder: session_dir '{session_dir}' introuvable — "
|
||||||
|
f"enrichissement visuel désactivé"
|
||||||
|
)
|
||||||
|
session_dir_path = None
|
||||||
|
|
||||||
|
# Créer le WorkflowIR vide
|
||||||
|
ir = WorkflowIR.new(
|
||||||
|
name=name or f"Workflow du {time.strftime('%d/%m/%Y %H:%M')}",
|
||||||
|
domain=domain,
|
||||||
|
learned_from=session_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
# 1. Filtrer les événements utiles
|
||||||
|
actionable = self._filter_events(events)
|
||||||
|
if not actionable:
|
||||||
|
logger.warning("IRBuilder: aucun événement actionable")
|
||||||
|
return ir
|
||||||
|
|
||||||
|
# 2. Détecter les applications utilisées
|
||||||
|
ir.applications = self._detect_applications(actionable)
|
||||||
|
|
||||||
|
# 3. Segmenter en étapes logiques
|
||||||
|
segments = self._segment_into_steps(actionable)
|
||||||
|
|
||||||
|
# 4. Pour chaque segment, construire une Step
|
||||||
|
for i, segment in enumerate(segments):
|
||||||
|
step = self._build_step(
|
||||||
|
segment=segment,
|
||||||
|
step_index=i,
|
||||||
|
total_steps=len(segments),
|
||||||
|
workflow_name=ir.name,
|
||||||
|
domain=domain,
|
||||||
|
session_dir_path=session_dir_path,
|
||||||
|
)
|
||||||
|
ir.steps.append(step)
|
||||||
|
|
||||||
|
# 5. Contrôle strict : remplir expected_window_before/after pour chaque action
|
||||||
|
# C'est la clé de la robustesse : chaque action sait dans quelle fenêtre
|
||||||
|
# elle doit s'exécuter ET dans quelle fenêtre elle doit aboutir.
|
||||||
|
self._attach_window_expectations(ir, actionable)
|
||||||
|
|
||||||
|
# 6. Détecter les variables
|
||||||
|
ir.variables = self._detect_variables(ir.steps, actionable)
|
||||||
|
|
||||||
|
elapsed = time.time() - t_start
|
||||||
|
logger.info(
|
||||||
|
f"IRBuilder: WorkflowIR construit en {elapsed:.1f}s — "
|
||||||
|
f"{len(ir.steps)} étapes, {len(ir.variables)} variables, "
|
||||||
|
f"{len(ir.applications)} applications"
|
||||||
|
)
|
||||||
|
|
||||||
|
return ir
|
||||||
|
|
||||||
|
def _filter_events(self, events: List[Dict]) -> List[Dict]:
|
||||||
|
"""Filtrer les événements parasites.
|
||||||
|
|
||||||
|
Exclusions :
|
||||||
|
1. Types d'événements de bruit (heartbeat, focus_change, action_result)
|
||||||
|
2. Clics dont la CIBLE UIA est dans Léa elle-même
|
||||||
|
(via uia_snapshot.parent_path — on vérifie où va le clic, pas d'où
|
||||||
|
il vient). Un clic "sur la taskbar" peut avoir window.title="Léa"
|
||||||
|
si Léa avait le focus, mais sa cible UIA est la taskbar.
|
||||||
|
"""
|
||||||
|
ignored_types = {"heartbeat", "focus_change", "action_result", "window_focus_change"}
|
||||||
|
lea_markers = (
|
||||||
|
"léa", "lea -", "léa -", "lea —", "léa —",
|
||||||
|
"lea assistante", "léa assistante",
|
||||||
|
"agent v1",
|
||||||
|
)
|
||||||
|
|
||||||
|
def _uia_target_is_lea(uia_snapshot: dict) -> bool:
|
||||||
|
"""L'élément UIA cliqué est-il dans la fenêtre de Léa ?"""
|
||||||
|
if not uia_snapshot:
|
||||||
|
return False
|
||||||
|
# Vérifier le nom de l'élément lui-même
|
||||||
|
name = (uia_snapshot.get("name", "") or "").lower()
|
||||||
|
if any(m in name for m in lea_markers):
|
||||||
|
return True
|
||||||
|
# Vérifier les parents
|
||||||
|
for parent in uia_snapshot.get("parent_path", []):
|
||||||
|
p_name = (parent.get("name", "") or "").lower()
|
||||||
|
if any(m in p_name for m in lea_markers):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
result = []
|
||||||
|
filtered_lea = 0
|
||||||
|
for raw_evt in events:
|
||||||
|
evt = raw_evt.get("event", raw_evt)
|
||||||
|
evt_type = evt.get("type", "")
|
||||||
|
if evt_type in ignored_types:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Filtrer uniquement les clics dont la CIBLE est dans Léa
|
||||||
|
# (pas les clics depuis Léa vers l'extérieur)
|
||||||
|
if evt_type == "mouse_click":
|
||||||
|
uia = evt.get("uia_snapshot") or {}
|
||||||
|
if _uia_target_is_lea(uia):
|
||||||
|
filtered_lea += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
result.append(evt)
|
||||||
|
|
||||||
|
if filtered_lea > 0:
|
||||||
|
logger.info(
|
||||||
|
f"IRBuilder: {filtered_lea} clic(s) filtré(s) "
|
||||||
|
f"(cible UIA dans la fenêtre Léa)"
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _attach_window_expectations(self, ir: WorkflowIR, events: List[Dict]) -> None:
|
||||||
|
"""Remplir expected_window_before/after pour chaque action du workflow.
|
||||||
|
|
||||||
|
C'est LA clé du contrôle strict : chaque action connaît la fenêtre
|
||||||
|
dans laquelle elle doit s'exécuter ET celle qui doit apparaître
|
||||||
|
après. Toute divergence au replay → STOP immédiat.
|
||||||
|
|
||||||
|
On reconstruit la séquence d'événements "actionables" (clicks, type,
|
||||||
|
key_combo) et on aligne chaque Action du workflow sur son événement
|
||||||
|
source pour récupérer :
|
||||||
|
- expected_window_before : titre de la fenêtre AU MOMENT du clic
|
||||||
|
- expected_window_after : titre de la fenêtre du PROCHAIN click
|
||||||
|
|
||||||
|
Filtre critique : la fenêtre de Léa elle-même n'est JAMAIS une
|
||||||
|
fenêtre cible valide (c'est l'overlay agent, pas l'app métier).
|
||||||
|
Les fenêtres "unknown_window" et les titres vides sont ignorés.
|
||||||
|
"""
|
||||||
|
def _is_valid_target_window(title: str) -> bool:
|
||||||
|
"""Un titre de fenêtre est valide comme expected_window_* si :
|
||||||
|
- non vide, non "unknown_window"
|
||||||
|
- pas la fenêtre de Léa elle-même
|
||||||
|
"""
|
||||||
|
if not title or title == "unknown_window":
|
||||||
|
return False
|
||||||
|
title_lower = title.lower()
|
||||||
|
lea_markers = (
|
||||||
|
"léa", "lea -", "léa -", "lea —", "léa —",
|
||||||
|
"lea assistante", "léa assistante",
|
||||||
|
"agent v1",
|
||||||
|
)
|
||||||
|
for marker in lea_markers:
|
||||||
|
if marker in title_lower:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _extract_uia_root_window(uia_snapshot: dict) -> str:
|
||||||
|
"""Extraire le nom de la fenêtre racine depuis un snapshot UIA.
|
||||||
|
|
||||||
|
Le parent_path contient la hiérarchie de l'élément cliqué.
|
||||||
|
La première entrée avec control_type="fenêtre" est la fenêtre
|
||||||
|
qui CONTIENT l'élément cliqué — c'est la vraie cible.
|
||||||
|
"""
|
||||||
|
if not uia_snapshot:
|
||||||
|
return ""
|
||||||
|
for parent in uia_snapshot.get("parent_path", []):
|
||||||
|
ct = (parent.get("control_type", "") or "").lower()
|
||||||
|
if ct in ("fenêtre", "window"):
|
||||||
|
name = (parent.get("name", "") or "").strip()
|
||||||
|
if name:
|
||||||
|
return name
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# Extraire la séquence des événements actionables avec leurs titres
|
||||||
|
# Source de vérité pour les clics : parent_path UIA (où va vraiment
|
||||||
|
# le clic), sinon window.title (fallback).
|
||||||
|
# Pour les type/key_combo : window.title uniquement.
|
||||||
|
event_sequence: List[Dict[str, Any]] = []
|
||||||
|
for evt in events:
|
||||||
|
t = evt.get("type", "")
|
||||||
|
if t not in ("mouse_click", "text_input", "key_combo", "key_press", "scroll"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Titre de référence : priorité à la cible UIA pour les clics
|
||||||
|
effective_title = ""
|
||||||
|
if t == "mouse_click":
|
||||||
|
uia = evt.get("uia_snapshot") or {}
|
||||||
|
uia_root = _extract_uia_root_window(uia)
|
||||||
|
if uia_root and _is_valid_target_window(uia_root):
|
||||||
|
effective_title = uia_root
|
||||||
|
|
||||||
|
# Fallback sur window.title
|
||||||
|
if not effective_title:
|
||||||
|
raw_title = evt.get("window", {}).get("title", "") or ""
|
||||||
|
if _is_valid_target_window(raw_title):
|
||||||
|
effective_title = raw_title
|
||||||
|
|
||||||
|
event_sequence.append({"type": t, "title": effective_title})
|
||||||
|
|
||||||
|
# Aligner avec les actions du workflow
|
||||||
|
flat_actions: List[tuple] = []
|
||||||
|
for si, step in enumerate(ir.steps):
|
||||||
|
for ai, action in enumerate(step.actions):
|
||||||
|
if action.type in ("click", "type", "key_combo"):
|
||||||
|
flat_actions.append((si, ai, action))
|
||||||
|
|
||||||
|
# Limite : on prend le min entre les 2 listes
|
||||||
|
n = min(len(flat_actions), len(event_sequence))
|
||||||
|
|
||||||
|
for i in range(n):
|
||||||
|
si, ai, action = flat_actions[i]
|
||||||
|
title_now = event_sequence[i]["title"]
|
||||||
|
if title_now:
|
||||||
|
action.expected_window_before = title_now
|
||||||
|
|
||||||
|
# Chercher le prochain événement avec un titre valide
|
||||||
|
# Et qui est DIFFÉRENT du titre actuel (sinon pas de transition à vérifier)
|
||||||
|
for j in range(i + 1, len(event_sequence)):
|
||||||
|
next_title = event_sequence[j]["title"]
|
||||||
|
if next_title and next_title != title_now:
|
||||||
|
action.expected_window_after = next_title
|
||||||
|
break
|
||||||
|
|
||||||
|
def _detect_applications(self, events: List[Dict]) -> List[str]:
|
||||||
|
"""Détecter les applications utilisées."""
|
||||||
|
apps = set()
|
||||||
|
for evt in events:
|
||||||
|
title = evt.get("window", {}).get("title", "")
|
||||||
|
if title and title != "unknown_window":
|
||||||
|
for sep in [" – ", " - ", " — "]:
|
||||||
|
if sep in title:
|
||||||
|
apps.add(title.split(sep)[-1].strip())
|
||||||
|
break
|
||||||
|
return sorted(apps)
|
||||||
|
|
||||||
|
def _segment_into_steps(self, events: List[Dict]) -> List[List[Dict]]:
|
||||||
|
"""Segmenter les événements en étapes logiques.
|
||||||
|
|
||||||
|
Critères de coupure :
|
||||||
|
- Changement d'application (fenêtre différente)
|
||||||
|
- Pause longue (> 5s entre deux événements)
|
||||||
|
- Transition logique (clic → frappe → clic = étapes différentes)
|
||||||
|
"""
|
||||||
|
if not events:
|
||||||
|
return []
|
||||||
|
|
||||||
|
segments = []
|
||||||
|
current_segment = [events[0]]
|
||||||
|
current_app = self._get_app_name(events[0])
|
||||||
|
|
||||||
|
for evt in events[1:]:
|
||||||
|
app = self._get_app_name(evt)
|
||||||
|
evt_type = evt.get("type", "")
|
||||||
|
|
||||||
|
# Coupure par changement d'application
|
||||||
|
app_changed = app and current_app and app != current_app
|
||||||
|
|
||||||
|
# Coupure par pause longue
|
||||||
|
prev_ts = float(current_segment[-1].get("timestamp", 0))
|
||||||
|
curr_ts = float(evt.get("timestamp", 0))
|
||||||
|
long_pause = (curr_ts - prev_ts) > 5.0 if prev_ts > 0 and curr_ts > 0 else False
|
||||||
|
|
||||||
|
# Coupure par transition clic → nouveau clic (nouvelle intention)
|
||||||
|
transition = (
|
||||||
|
evt_type == "mouse_click"
|
||||||
|
and len(current_segment) >= 2
|
||||||
|
and current_segment[-1].get("type") not in ("mouse_click",)
|
||||||
|
)
|
||||||
|
|
||||||
|
if app_changed or long_pause:
|
||||||
|
if current_segment:
|
||||||
|
segments.append(current_segment)
|
||||||
|
current_segment = [evt]
|
||||||
|
current_app = app
|
||||||
|
else:
|
||||||
|
current_segment.append(evt)
|
||||||
|
|
||||||
|
if current_segment:
|
||||||
|
segments.append(current_segment)
|
||||||
|
|
||||||
|
return segments
|
||||||
|
|
||||||
|
def _get_app_name(self, evt: Dict) -> str:
|
||||||
|
"""Extraire le nom d'application depuis un événement."""
|
||||||
|
title = evt.get("window", {}).get("title", "")
|
||||||
|
for sep in [" – ", " - ", " — "]:
|
||||||
|
if sep in title:
|
||||||
|
return title.split(sep)[-1].strip()
|
||||||
|
return title
|
||||||
|
|
||||||
|
def _build_step(
|
||||||
|
self,
|
||||||
|
segment: List[Dict],
|
||||||
|
step_index: int,
|
||||||
|
total_steps: int,
|
||||||
|
workflow_name: str,
|
||||||
|
domain: str,
|
||||||
|
session_dir_path: Optional[Path] = None,
|
||||||
|
) -> Step:
|
||||||
|
"""Construire une Step depuis un segment d'événements.
|
||||||
|
|
||||||
|
Utilise gemma4 pour comprendre l'intention du segment.
|
||||||
|
"""
|
||||||
|
# Construire la description du segment pour gemma4
|
||||||
|
actions = []
|
||||||
|
for evt in segment:
|
||||||
|
action = self._event_to_action(evt, session_dir_path=session_dir_path)
|
||||||
|
if action:
|
||||||
|
actions.append(action)
|
||||||
|
|
||||||
|
# Description textuelle du segment
|
||||||
|
segment_desc = self._describe_segment(segment)
|
||||||
|
|
||||||
|
# Demander à gemma4 l'intention
|
||||||
|
intent, precondition, postcondition = self._analyze_intent(
|
||||||
|
segment_desc, step_index, total_steps, workflow_name, domain,
|
||||||
|
)
|
||||||
|
|
||||||
|
return Step(
|
||||||
|
step_id=f"s{step_index + 1}",
|
||||||
|
intent=intent or segment_desc,
|
||||||
|
precondition=precondition,
|
||||||
|
postcondition=postcondition,
|
||||||
|
actions=actions,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _event_to_action(self, evt: Dict, session_dir_path: Optional[Path] = None) -> Optional[Action]:
|
||||||
|
"""Convertir un événement brut en Action enrichie.
|
||||||
|
|
||||||
|
Pour les clics : appelle enrich_click_from_screenshot() si le session_dir
|
||||||
|
est disponible pour obtenir :
|
||||||
|
- by_text (texte OCR exact de l'élément cliqué)
|
||||||
|
- anchor_image_base64 (crop 80x80 pour template matching)
|
||||||
|
- vlm_description (description positionnelle)
|
||||||
|
- window_capture (rect pour le grounding ciblé)
|
||||||
|
|
||||||
|
Cet enrichissement est LA clé pour que l'ExecutionCompiler produise
|
||||||
|
des plans V4 complets avec toutes les stratégies (OCR + template + VLM).
|
||||||
|
"""
|
||||||
|
evt_type = evt.get("type", "")
|
||||||
|
|
||||||
|
if evt_type == "mouse_click":
|
||||||
|
window = evt.get("window", {}).get("title", "")
|
||||||
|
pos = evt.get("pos", [0, 0])
|
||||||
|
|
||||||
|
# Action de base (fallback sans enrichissement)
|
||||||
|
action = Action(
|
||||||
|
type="click",
|
||||||
|
target=window,
|
||||||
|
anchor_hint=evt.get("vision_info", {}).get("text", "") if isinstance(evt.get("vision_info"), dict) else "",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Enrichissement visuel via enrich_click_from_screenshot
|
||||||
|
# Accès direct au crop OCR + anchor pour l'ExecutionCompiler
|
||||||
|
if session_dir_path and isinstance(pos, list) and len(pos) == 2:
|
||||||
|
enrichment = self._enrich_click(
|
||||||
|
evt, session_dir_path, window, int(pos[0]), int(pos[1]),
|
||||||
|
)
|
||||||
|
if enrichment:
|
||||||
|
# Le texte OCR devient l'anchor_hint pour l'OCR primaire
|
||||||
|
by_text = enrichment.get("by_text", "")
|
||||||
|
if by_text:
|
||||||
|
action.anchor_hint = by_text
|
||||||
|
# Stocker les métadonnées d'enrichissement dans l'action
|
||||||
|
# (utilisé par l'ExecutionCompiler pour construire les stratégies)
|
||||||
|
action._enrichment = enrichment
|
||||||
|
|
||||||
|
# Lire le snapshot UIA si l'agent Windows l'a capturé.
|
||||||
|
# Format attendu dans l'événement :
|
||||||
|
# evt["uia_snapshot"] = {
|
||||||
|
# "name": "Enregistrer",
|
||||||
|
# "control_type": "bouton",
|
||||||
|
# "automation_id": "btnSave",
|
||||||
|
# "parent_path": [{"name": "...", "control_type": "..."}],
|
||||||
|
# }
|
||||||
|
# Si présent, il est fusionné dans _enrichment pour que
|
||||||
|
# l'ExecutionCompiler puisse créer une stratégie UIA prioritaire.
|
||||||
|
uia_snapshot = evt.get("uia_snapshot")
|
||||||
|
if uia_snapshot and isinstance(uia_snapshot, dict):
|
||||||
|
if not hasattr(action, "_enrichment") or action._enrichment is None:
|
||||||
|
action._enrichment = {}
|
||||||
|
action._enrichment["uia_snapshot"] = uia_snapshot
|
||||||
|
|
||||||
|
return action
|
||||||
|
|
||||||
|
elif evt_type == "text_input":
|
||||||
|
text = evt.get("text", "")
|
||||||
|
if text:
|
||||||
|
return Action(type="type", text=text)
|
||||||
|
elif evt_type in ("key_combo", "key_press"):
|
||||||
|
keys = evt.get("keys", [])
|
||||||
|
if keys:
|
||||||
|
return Action(type="key_combo", keys=keys)
|
||||||
|
elif evt_type == "scroll":
|
||||||
|
return Action(type="scroll")
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _enrich_click(
|
||||||
|
self,
|
||||||
|
evt: Dict,
|
||||||
|
session_dir_path: Path,
|
||||||
|
window_title: str,
|
||||||
|
click_x: int,
|
||||||
|
click_y: int,
|
||||||
|
) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Enrichir un clic avec OCR + crop + description.
|
||||||
|
|
||||||
|
Réutilise enrich_click_from_screenshot du stream_processor (éprouvé).
|
||||||
|
Retourne un dict avec by_text, anchor_image_base64, vlm_description, etc.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from agent_v0.server_v1.stream_processor import enrich_click_from_screenshot
|
||||||
|
|
||||||
|
# Trouver le screenshot full
|
||||||
|
screenshot_id = evt.get("screenshot_id", "")
|
||||||
|
if not screenshot_id:
|
||||||
|
return None
|
||||||
|
|
||||||
|
full_path = session_dir_path / "shots" / f"{screenshot_id}_full.png"
|
||||||
|
if not full_path.is_file():
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Résolution écran
|
||||||
|
screen_w = 1280
|
||||||
|
screen_h = 800
|
||||||
|
window_capture = evt.get("window_capture", {})
|
||||||
|
if window_capture.get("window_rect"):
|
||||||
|
rect = window_capture["window_rect"]
|
||||||
|
screen_w = max(screen_w, rect[2])
|
||||||
|
screen_h = max(screen_h, rect[3])
|
||||||
|
|
||||||
|
return enrich_click_from_screenshot(
|
||||||
|
screenshot_path=full_path,
|
||||||
|
click_x=click_x,
|
||||||
|
click_y=click_y,
|
||||||
|
screen_w=screen_w,
|
||||||
|
screen_h=screen_h,
|
||||||
|
window_title=window_title,
|
||||||
|
vision_info=evt.get("vision_info") if isinstance(evt.get("vision_info"), dict) else None,
|
||||||
|
session_dir=session_dir_path,
|
||||||
|
screenshot_id=screenshot_id,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"IRBuilder._enrich_click: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _describe_segment(self, segment: List[Dict]) -> str:
|
||||||
|
"""Décrire un segment en langage naturel (pour gemma4)."""
|
||||||
|
parts = []
|
||||||
|
window = ""
|
||||||
|
for evt in segment:
|
||||||
|
evt_type = evt.get("type", "")
|
||||||
|
w = evt.get("window", {}).get("title", "")
|
||||||
|
if w and w != window:
|
||||||
|
window = w
|
||||||
|
parts.append(f"[{w}]")
|
||||||
|
if evt_type == "mouse_click":
|
||||||
|
text = evt.get("vision_info", {}).get("text", "")
|
||||||
|
parts.append(f"clic sur '{text}'" if text else "clic")
|
||||||
|
elif evt_type == "text_input":
|
||||||
|
text = evt.get("text", "")
|
||||||
|
parts.append(f"saisie '{text[:30]}'")
|
||||||
|
elif evt_type in ("key_combo", "key_press"):
|
||||||
|
keys = evt.get("keys", [])
|
||||||
|
parts.append(f"touche {'+'.join(keys)}")
|
||||||
|
return " → ".join(parts) if parts else "action"
|
||||||
|
|
||||||
|
def _analyze_intent(
|
||||||
|
self,
|
||||||
|
segment_desc: str,
|
||||||
|
step_index: int,
|
||||||
|
total_steps: int,
|
||||||
|
workflow_name: str,
|
||||||
|
domain: str,
|
||||||
|
) -> tuple:
|
||||||
|
"""Demander à gemma4 de comprendre l'intention d'un segment.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(intent, precondition, postcondition)
|
||||||
|
"""
|
||||||
|
import requests as _requests
|
||||||
|
|
||||||
|
# Charger le contexte métier
|
||||||
|
domain_prompt = ""
|
||||||
|
try:
|
||||||
|
from agent_v0.server_v1.domain_context import get_domain_context
|
||||||
|
ctx = get_domain_context(domain)
|
||||||
|
if ctx.system_prompt:
|
||||||
|
domain_prompt = f"\nContexte métier : {ctx.name}\n"
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
prompt = (
|
||||||
|
f"{domain_prompt}"
|
||||||
|
f"Workflow : {workflow_name} (étape {step_index + 1}/{total_steps})\n"
|
||||||
|
f"Actions observées : {segment_desc}\n\n"
|
||||||
|
f"Réponds en 3 lignes :\n"
|
||||||
|
f"INTENTION: que veut faire l'utilisateur avec ces actions (1 phrase)\n"
|
||||||
|
f"AVANT: état attendu de l'écran avant cette étape (1 phrase)\n"
|
||||||
|
f"APRÈS: état attendu de l'écran après cette étape (1 phrase)"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = _requests.post(
|
||||||
|
self._gemma4_url,
|
||||||
|
json={
|
||||||
|
"model": "gemma4:e4b",
|
||||||
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
|
"stream": False,
|
||||||
|
"think": True,
|
||||||
|
"options": {"temperature": 0.1, "num_predict": 800},
|
||||||
|
},
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
if resp.ok:
|
||||||
|
content = resp.json().get("message", {}).get("content", "")
|
||||||
|
return self._parse_intent_response(content)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"IRBuilder: gemma4 indisponible ({e})")
|
||||||
|
|
||||||
|
return (segment_desc, "", "")
|
||||||
|
|
||||||
|
def _parse_intent_response(self, content: str) -> tuple:
|
||||||
|
"""Parser la réponse gemma4 (INTENTION/AVANT/APRÈS)."""
|
||||||
|
intent = ""
|
||||||
|
precondition = ""
|
||||||
|
postcondition = ""
|
||||||
|
|
||||||
|
for line in content.split("\n"):
|
||||||
|
clean = line.strip()
|
||||||
|
upper = clean.upper()
|
||||||
|
if upper.startswith("INTENTION:"):
|
||||||
|
intent = clean.split(":", 1)[1].strip()
|
||||||
|
elif upper.startswith("AVANT:"):
|
||||||
|
precondition = clean.split(":", 1)[1].strip()
|
||||||
|
elif upper.startswith(("APRÈS:", "APRES:")):
|
||||||
|
postcondition = clean.split(":", 1)[1].strip()
|
||||||
|
|
||||||
|
return (intent, precondition, postcondition)
|
||||||
|
|
||||||
|
def _detect_variables(self, steps: List[Step], events: List[Dict]) -> List[Variable]:
|
||||||
|
"""Détecter les variables dans le workflow.
|
||||||
|
|
||||||
|
Une variable est une donnée qui change entre les exécutions :
|
||||||
|
- Texte saisi par l'utilisateur (noms, codes, dates)
|
||||||
|
- Données lues à l'écran (résultats de recherche)
|
||||||
|
"""
|
||||||
|
variables = []
|
||||||
|
seen_texts = set()
|
||||||
|
|
||||||
|
for step in steps:
|
||||||
|
for action in step.actions:
|
||||||
|
if action.type == "type" and action.text:
|
||||||
|
text = action.text.strip()
|
||||||
|
if text and text not in seen_texts and len(text) > 2:
|
||||||
|
seen_texts.add(text)
|
||||||
|
var_name = f"texte_{len(variables) + 1}"
|
||||||
|
variables.append(Variable(
|
||||||
|
name=var_name,
|
||||||
|
description=f"Texte saisi : '{text[:50]}'",
|
||||||
|
source="user",
|
||||||
|
default=text,
|
||||||
|
))
|
||||||
|
# Marquer l'action comme variable
|
||||||
|
action.variable = True
|
||||||
|
action.text = f"{{{var_name}}}"
|
||||||
|
|
||||||
|
return variables
|
||||||
693
core/workflow/shadow_observer.py
Normal file
693
core/workflow/shadow_observer.py
Normal file
@@ -0,0 +1,693 @@
|
|||||||
|
# core/workflow/shadow_observer.py
|
||||||
|
"""
|
||||||
|
ShadowObserver — Observation en temps réel de ce que Léa comprend.
|
||||||
|
|
||||||
|
C'est le "mode Shadow amélioré" : pendant que l'utilisateur enregistre
|
||||||
|
une démonstration, Léa lui dit ce qu'elle comprend au fur et à mesure.
|
||||||
|
|
||||||
|
Contrairement à l'IRBuilder (qui analyse TOUT à la fin en appelant gemma4),
|
||||||
|
le ShadowObserver travaille en incrémental :
|
||||||
|
- À chaque événement reçu, il met à jour sa compréhension locale.
|
||||||
|
- Il segmente dès qu'un critère de coupure est détecté.
|
||||||
|
- Il émet des notifications légères ("Léa a compris : tu viens d'ouvrir le
|
||||||
|
Bloc-notes") via un callback.
|
||||||
|
- Il détecte les variables (texte saisi) pendant la frappe.
|
||||||
|
|
||||||
|
Le ShadowObserver n'est pas la source de vérité — c'est une couche
|
||||||
|
d'observation. La source de vérité reste `live_events.jsonl`.
|
||||||
|
Le WorkflowIR final est toujours reconstruit par l'IRBuilder après
|
||||||
|
validation, mais la compréhension temps réel accélère la boucle de
|
||||||
|
rétroaction avec l'utilisateur.
|
||||||
|
|
||||||
|
Usage :
|
||||||
|
|
||||||
|
def on_notify(event):
|
||||||
|
print(f"[{event.niveau}] {event.message}")
|
||||||
|
|
||||||
|
observer = ShadowObserver(notify_callback=on_notify)
|
||||||
|
observer.start("sess_abc")
|
||||||
|
observer.observe_event(event1)
|
||||||
|
observer.observe_event(event2)
|
||||||
|
...
|
||||||
|
comprehension = observer.get_understanding()
|
||||||
|
# → [{"step": 1, "intent": "Ouvrir le Bloc-notes", "confidence": 0.8}, ...]
|
||||||
|
observer.stop()
|
||||||
|
|
||||||
|
Contraintes :
|
||||||
|
- 100% asynchrone côté performance : la méthode observe_event() ne doit
|
||||||
|
jamais bloquer la capture (pas d'appel réseau synchrone).
|
||||||
|
- Optionnel : activable via paramètre, ne modifie pas la capture existante.
|
||||||
|
- 100% français dans les messages utilisateur.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Any, Callable, Dict, List, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Types d'événements observationnels
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class NiveauNotification(str, Enum):
|
||||||
|
"""Niveau d'importance d'une notification.
|
||||||
|
|
||||||
|
- INFO : information passive ("Léa observe...")
|
||||||
|
- DECOUVERTE : Léa vient de comprendre quelque chose de nouveau
|
||||||
|
- QUESTION : Léa aimerait une confirmation (non bloquant)
|
||||||
|
- VARIABLE : une variable a été détectée
|
||||||
|
"""
|
||||||
|
|
||||||
|
INFO = "info"
|
||||||
|
DECOUVERTE = "decouverte"
|
||||||
|
QUESTION = "question"
|
||||||
|
VARIABLE = "variable"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class NotificationShadow:
|
||||||
|
"""Notification émise par le ShadowObserver vers la GUI utilisateur."""
|
||||||
|
|
||||||
|
notif_id: str
|
||||||
|
niveau: NiveauNotification
|
||||||
|
message: str # Texte affichable à l'utilisateur (français)
|
||||||
|
session_id: str
|
||||||
|
step_index: int = -1 # Index de l'étape concernée, -1 si global
|
||||||
|
data: Dict[str, Any] = field(default_factory=dict)
|
||||||
|
timestamp: float = 0.0
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"notif_id": self.notif_id,
|
||||||
|
"niveau": self.niveau.value,
|
||||||
|
"message": self.message,
|
||||||
|
"session_id": self.session_id,
|
||||||
|
"step_index": self.step_index,
|
||||||
|
"data": self.data,
|
||||||
|
"timestamp": self.timestamp,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class UnderstoodStep:
|
||||||
|
"""Étape logique comprise en temps réel par le ShadowObserver.
|
||||||
|
|
||||||
|
C'est une version simplifiée de `Step` (core.workflow.workflow_ir),
|
||||||
|
optimisée pour la construction incrémentale. Elle sera convertie
|
||||||
|
en `Step` final par le ShadowValidator après validation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
step_index: int
|
||||||
|
intent: str # Intention humaine (ex: "Ouvrir le Bloc-notes")
|
||||||
|
intent_provisoire: bool = True # True tant que gemma4 n'a pas confirmé
|
||||||
|
confidence: float = 0.5 # Score de confiance (0..1)
|
||||||
|
app_name: str = "" # Application principale
|
||||||
|
window_title: str = "" # Titre de la fenêtre au début du segment
|
||||||
|
events: List[Dict[str, Any]] = field(default_factory=list)
|
||||||
|
variables_detectees: List[str] = field(default_factory=list)
|
||||||
|
started_at: float = 0.0
|
||||||
|
ended_at: float = 0.0
|
||||||
|
validated: bool = False # L'utilisateur a validé l'étape
|
||||||
|
corrected: bool = False # L'utilisateur a corrigé l'intention
|
||||||
|
cancelled: bool = False # L'utilisateur a annulé l'étape
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"step_index": self.step_index,
|
||||||
|
"intent": self.intent,
|
||||||
|
"intent_provisoire": self.intent_provisoire,
|
||||||
|
"confidence": round(self.confidence, 3),
|
||||||
|
"app_name": self.app_name,
|
||||||
|
"window_title": self.window_title,
|
||||||
|
"events_count": len(self.events),
|
||||||
|
"variables_detectees": list(self.variables_detectees),
|
||||||
|
"started_at": self.started_at,
|
||||||
|
"ended_at": self.ended_at,
|
||||||
|
"validated": self.validated,
|
||||||
|
"corrected": self.corrected,
|
||||||
|
"cancelled": self.cancelled,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Observer
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# Constantes de segmentation (en secondes). On évite de re-déclarer les
|
||||||
|
# constantes de l'IRBuilder car l'observation est incrémentale — on peut
|
||||||
|
# se permettre des seuils plus courts pour plus de réactivité.
|
||||||
|
_SEUIL_PAUSE_LONGUE_S = 4.0
|
||||||
|
_SEUIL_CONFIANCE_BASE = 0.5
|
||||||
|
_SEUIL_CONFIANCE_APP_CHANGE = 0.8
|
||||||
|
|
||||||
|
# Types d'événements ignorés
|
||||||
|
_EVENT_TYPES_IGNORES = {
|
||||||
|
"heartbeat",
|
||||||
|
"focus_change",
|
||||||
|
"action_result",
|
||||||
|
"window_focus_change",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ShadowObserver:
|
||||||
|
"""Observe les événements en temps réel et met à jour la compréhension.
|
||||||
|
|
||||||
|
Thread-safe : peut être appelé depuis plusieurs threads (capture,
|
||||||
|
API, worker).
|
||||||
|
|
||||||
|
Le callback `notify_callback` est appelé de manière synchrone mais les
|
||||||
|
notifications sont extrêmement légères (juste un dataclass) — elles
|
||||||
|
sont destinées à être envoyées via WebSocket/HTTP long-poll depuis la
|
||||||
|
couche API.
|
||||||
|
"""
|
||||||
|
|
||||||
|
NotifyCallback = Callable[[NotificationShadow], None]
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
notify_callback: Optional[NotifyCallback] = None,
|
||||||
|
*,
|
||||||
|
enable_gemma4: bool = False,
|
||||||
|
gemma4_callback: Optional[Callable[[UnderstoodStep], None]] = None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
notify_callback: Fonction appelée à chaque notification
|
||||||
|
(doit être rapide, pas d'IO bloquant).
|
||||||
|
enable_gemma4: Si True, une tâche asynchrone peut enrichir
|
||||||
|
les intentions via gemma4 (non bloquant). En pratique,
|
||||||
|
on laisse le caller le brancher via `gemma4_callback`.
|
||||||
|
gemma4_callback: Fonction appelée en arrière-plan pour
|
||||||
|
enrichir une étape (via gemma4 ou autre LLM). Non bloquant.
|
||||||
|
"""
|
||||||
|
self._notify_callback = notify_callback
|
||||||
|
self._enable_gemma4 = enable_gemma4
|
||||||
|
self._gemma4_callback = gemma4_callback
|
||||||
|
|
||||||
|
self._lock = threading.RLock()
|
||||||
|
self._sessions: Dict[str, Dict[str, Any]] = {}
|
||||||
|
|
||||||
|
# ----- Cycle de vie --------------------------------------------------
|
||||||
|
|
||||||
|
def start(self, session_id: str) -> None:
|
||||||
|
"""Démarrer l'observation d'une session."""
|
||||||
|
with self._lock:
|
||||||
|
self._sessions[session_id] = {
|
||||||
|
"steps": [], # List[UnderstoodStep]
|
||||||
|
"current_step": None, # Optional[UnderstoodStep]
|
||||||
|
"last_event_ts": 0.0,
|
||||||
|
"last_notif_ts": 0.0,
|
||||||
|
"total_events": 0,
|
||||||
|
"notifications": [], # Historique des notifications
|
||||||
|
"started_at": time.time(),
|
||||||
|
"stopped_at": 0.0,
|
||||||
|
}
|
||||||
|
self._notifier(
|
||||||
|
session_id,
|
||||||
|
NiveauNotification.INFO,
|
||||||
|
"Léa t'observe. Fais ta tâche normalement, je vais apprendre.",
|
||||||
|
)
|
||||||
|
|
||||||
|
def stop(self, session_id: str) -> None:
|
||||||
|
"""Arrêter l'observation et finaliser le segment en cours."""
|
||||||
|
with self._lock:
|
||||||
|
state = self._sessions.get(session_id)
|
||||||
|
if not state:
|
||||||
|
return
|
||||||
|
current = state.get("current_step")
|
||||||
|
if current is not None and current.events:
|
||||||
|
current.ended_at = state["last_event_ts"] or time.time()
|
||||||
|
state["steps"].append(current)
|
||||||
|
state["current_step"] = None
|
||||||
|
state["stopped_at"] = time.time()
|
||||||
|
|
||||||
|
nb_steps = len(self.get_understanding(session_id))
|
||||||
|
if nb_steps > 0:
|
||||||
|
self._notifier(
|
||||||
|
session_id,
|
||||||
|
NiveauNotification.DECOUVERTE,
|
||||||
|
f"J'ai observé {nb_steps} étape(s). Tu veux que je te les "
|
||||||
|
f"montre pour validation ?",
|
||||||
|
)
|
||||||
|
|
||||||
|
def reset(self, session_id: str) -> None:
|
||||||
|
"""Supprimer l'état d'une session (après finalisation)."""
|
||||||
|
with self._lock:
|
||||||
|
self._sessions.pop(session_id, None)
|
||||||
|
|
||||||
|
# ----- Observation ---------------------------------------------------
|
||||||
|
|
||||||
|
def observe_event(self, session_id: str, event: Dict[str, Any]) -> None:
|
||||||
|
"""Observer un nouvel événement pendant la capture.
|
||||||
|
|
||||||
|
Cette méthode est appelée à chaque événement reçu par le serveur.
|
||||||
|
Elle doit être RAPIDE (pas d'IO réseau synchrone).
|
||||||
|
"""
|
||||||
|
evt_type = event.get("type", "")
|
||||||
|
if evt_type in _EVENT_TYPES_IGNORES:
|
||||||
|
return
|
||||||
|
|
||||||
|
with self._lock:
|
||||||
|
state = self._sessions.get(session_id)
|
||||||
|
if not state:
|
||||||
|
# Auto-start si pas encore démarré (robustesse)
|
||||||
|
self.start(session_id)
|
||||||
|
state = self._sessions[session_id]
|
||||||
|
|
||||||
|
state["total_events"] += 1
|
||||||
|
|
||||||
|
# 1. Décider si on démarre un nouveau segment
|
||||||
|
current = state.get("current_step")
|
||||||
|
should_cut, cut_reason = self._should_cut(state, event)
|
||||||
|
|
||||||
|
if should_cut and current is not None:
|
||||||
|
current.ended_at = state["last_event_ts"] or time.time()
|
||||||
|
state["steps"].append(current)
|
||||||
|
self._emit_step_closed(session_id, current, cut_reason)
|
||||||
|
current = None
|
||||||
|
state["current_step"] = None
|
||||||
|
|
||||||
|
if current is None:
|
||||||
|
step_index = len(state["steps"]) + 1
|
||||||
|
current = UnderstoodStep(
|
||||||
|
step_index=step_index,
|
||||||
|
intent=self._initial_intent(event),
|
||||||
|
intent_provisoire=True,
|
||||||
|
confidence=_SEUIL_CONFIANCE_BASE,
|
||||||
|
app_name=self._get_app_name(event),
|
||||||
|
window_title=self._get_window_title(event),
|
||||||
|
started_at=float(event.get("timestamp", 0)) or time.time(),
|
||||||
|
)
|
||||||
|
state["current_step"] = current
|
||||||
|
|
||||||
|
# 2. Ajouter l'événement au segment courant
|
||||||
|
current.events.append(event)
|
||||||
|
ts = float(event.get("timestamp", 0)) or time.time()
|
||||||
|
state["last_event_ts"] = ts
|
||||||
|
|
||||||
|
# 3. Rafraîchir l'intent provisoire à partir du contexte accumulé
|
||||||
|
current.intent = self._refine_intent(current, event)
|
||||||
|
|
||||||
|
# 4. Détection de variable pendant la frappe
|
||||||
|
if evt_type == "text_input":
|
||||||
|
self._handle_text_input(session_id, current, event)
|
||||||
|
|
||||||
|
# 5. Émission périodique d'un résumé (toutes les 5s)
|
||||||
|
self._maybe_emit_heartbeat(session_id, state)
|
||||||
|
|
||||||
|
# ----- API publique --------------------------------------------------
|
||||||
|
|
||||||
|
def get_understanding(
|
||||||
|
self, session_id: str, include_current: bool = True
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""Récupérer ce que Léa a compris jusqu'ici.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Liste de dicts au format :
|
||||||
|
[{"step": 1, "intent": "Ouvrir le Bloc-notes",
|
||||||
|
"confidence": 0.9, "app": "Bloc-notes",
|
||||||
|
"events_count": 4, ...}, ...]
|
||||||
|
"""
|
||||||
|
with self._lock:
|
||||||
|
state = self._sessions.get(session_id)
|
||||||
|
if not state:
|
||||||
|
return []
|
||||||
|
steps = list(state["steps"])
|
||||||
|
if include_current and state.get("current_step") is not None:
|
||||||
|
steps = steps + [state["current_step"]]
|
||||||
|
|
||||||
|
out = []
|
||||||
|
for step in steps:
|
||||||
|
d = step.to_dict()
|
||||||
|
d["step"] = d.pop("step_index")
|
||||||
|
out.append(d)
|
||||||
|
return out
|
||||||
|
|
||||||
|
def get_notifications(
|
||||||
|
self, session_id: str, since_ts: float = 0.0
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""Récupérer les notifications émises depuis un timestamp."""
|
||||||
|
with self._lock:
|
||||||
|
state = self._sessions.get(session_id)
|
||||||
|
if not state:
|
||||||
|
return []
|
||||||
|
return [
|
||||||
|
n.to_dict() for n in state["notifications"]
|
||||||
|
if n.timestamp >= since_ts
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_current_step(
|
||||||
|
self, session_id: str
|
||||||
|
) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Retourner l'étape en cours de construction."""
|
||||||
|
with self._lock:
|
||||||
|
state = self._sessions.get(session_id)
|
||||||
|
if not state:
|
||||||
|
return None
|
||||||
|
current = state.get("current_step")
|
||||||
|
if current is None:
|
||||||
|
return None
|
||||||
|
return current.to_dict()
|
||||||
|
|
||||||
|
def get_steps_internal(
|
||||||
|
self, session_id: str, include_current: bool = True
|
||||||
|
) -> List[UnderstoodStep]:
|
||||||
|
"""Version interne : retourne les objets `UnderstoodStep`.
|
||||||
|
|
||||||
|
Utilisé par le ShadowValidator pour reconstruire un WorkflowIR.
|
||||||
|
"""
|
||||||
|
with self._lock:
|
||||||
|
state = self._sessions.get(session_id)
|
||||||
|
if not state:
|
||||||
|
return []
|
||||||
|
steps = list(state["steps"])
|
||||||
|
if include_current and state.get("current_step") is not None:
|
||||||
|
steps = steps + [state["current_step"]]
|
||||||
|
# Retourner des copies pour éviter les mutations externes
|
||||||
|
return [self._copy_step(s) for s in steps]
|
||||||
|
|
||||||
|
def has_session(self, session_id: str) -> bool:
|
||||||
|
with self._lock:
|
||||||
|
return session_id in self._sessions
|
||||||
|
|
||||||
|
# ----- Internals : segmentation --------------------------------------
|
||||||
|
|
||||||
|
def _should_cut(
|
||||||
|
self, state: Dict[str, Any], event: Dict[str, Any]
|
||||||
|
) -> tuple:
|
||||||
|
"""Décider si l'événement doit démarrer un nouveau segment.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(should_cut, reason)
|
||||||
|
"""
|
||||||
|
current = state.get("current_step")
|
||||||
|
if current is None or not current.events:
|
||||||
|
return (False, "")
|
||||||
|
|
||||||
|
# Coupure : changement d'application
|
||||||
|
new_app = self._get_app_name(event)
|
||||||
|
if new_app and current.app_name and new_app != current.app_name:
|
||||||
|
return (True, "changement_application")
|
||||||
|
|
||||||
|
# Coupure : pause longue entre deux événements
|
||||||
|
prev_ts = float(current.events[-1].get("timestamp", 0))
|
||||||
|
curr_ts = float(event.get("timestamp", 0))
|
||||||
|
if prev_ts > 0 and curr_ts > 0:
|
||||||
|
if (curr_ts - prev_ts) > _SEUIL_PAUSE_LONGUE_S:
|
||||||
|
return (True, "pause_longue")
|
||||||
|
|
||||||
|
# Coupure : key_combo « lourd » type ctrl+s (sauvegarde) → fin logique
|
||||||
|
evt_type = event.get("type", "")
|
||||||
|
if evt_type in ("key_combo", "key_press"):
|
||||||
|
keys = [str(k).lower() for k in event.get("keys", [])]
|
||||||
|
if "ctrl" in keys and any(k in keys for k in ("s", "enter")):
|
||||||
|
# On accroche le key_combo à l'étape courante, puis on coupe
|
||||||
|
# APRÈS — retourner False ici, la coupure se fera au prochain
|
||||||
|
# événement. C'est voulu.
|
||||||
|
return (False, "")
|
||||||
|
|
||||||
|
return (False, "")
|
||||||
|
|
||||||
|
def _initial_intent(self, event: Dict[str, Any]) -> str:
|
||||||
|
"""Intention provisoire d'un tout nouveau segment."""
|
||||||
|
app = self._get_app_name(event) or self._get_window_title(event)
|
||||||
|
evt_type = event.get("type", "")
|
||||||
|
if evt_type == "mouse_click":
|
||||||
|
hint = event.get("vision_info", {}).get("text", "")
|
||||||
|
if hint:
|
||||||
|
return f"Cliquer sur « {hint} »"
|
||||||
|
if app:
|
||||||
|
return f"Interagir avec {app}"
|
||||||
|
return "Cliquer quelque part"
|
||||||
|
if evt_type == "text_input":
|
||||||
|
text = event.get("text", "")[:40]
|
||||||
|
return f"Saisir du texte" + (f" « {text} »" if text else "")
|
||||||
|
if evt_type in ("key_combo", "key_press"):
|
||||||
|
keys = event.get("keys", [])
|
||||||
|
return f"Appuyer sur {'+'.join(keys)}" if keys else "Raccourci clavier"
|
||||||
|
return f"Action dans {app}" if app else "Action"
|
||||||
|
|
||||||
|
def _refine_intent(
|
||||||
|
self, step: UnderstoodStep, event: Dict[str, Any]
|
||||||
|
) -> str:
|
||||||
|
"""Raffiner l'intention au fur et à mesure qu'on voit plus d'événements.
|
||||||
|
|
||||||
|
Heuristiques simples — pas de gemma4 ici pour rester rapide.
|
||||||
|
"""
|
||||||
|
types = [e.get("type", "") for e in step.events]
|
||||||
|
has_click = "mouse_click" in types
|
||||||
|
has_type = "text_input" in types
|
||||||
|
has_key = any(t in ("key_combo", "key_press") for t in types)
|
||||||
|
app = step.app_name or self._get_window_title(event)
|
||||||
|
|
||||||
|
# Cas 1 : clic + saisie + entrée → "Rechercher X"
|
||||||
|
if has_click and has_type:
|
||||||
|
texts = [e.get("text", "") for e in step.events if e.get("type") == "text_input"]
|
||||||
|
if texts and any("enter" in [k.lower() for k in e.get("keys", [])]
|
||||||
|
for e in step.events if e.get("type") in ("key_combo", "key_press")):
|
||||||
|
premier_texte = next((t for t in texts if t), "")
|
||||||
|
if premier_texte:
|
||||||
|
step.confidence = min(0.85, step.confidence + 0.05)
|
||||||
|
return f"Rechercher « {premier_texte[:30]} »"
|
||||||
|
|
||||||
|
# Cas 2 : saisie seule → "Écrire du texte"
|
||||||
|
if has_type and not has_click:
|
||||||
|
texts = [e.get("text", "") for e in step.events if e.get("type") == "text_input"]
|
||||||
|
premier_texte = next((t for t in texts if t), "")
|
||||||
|
if premier_texte:
|
||||||
|
return f"Écrire « {premier_texte[:40]} »"
|
||||||
|
return "Écrire du texte"
|
||||||
|
|
||||||
|
# Cas 3 : ctrl+s → "Sauvegarder"
|
||||||
|
if has_key:
|
||||||
|
for e in step.events:
|
||||||
|
if e.get("type") in ("key_combo", "key_press"):
|
||||||
|
keys = [str(k).lower() for k in e.get("keys", [])]
|
||||||
|
if "ctrl" in keys and "s" in keys:
|
||||||
|
step.confidence = min(0.9, step.confidence + 0.1)
|
||||||
|
return f"Sauvegarder{' dans ' + app if app else ''}"
|
||||||
|
if "ctrl" in keys and "c" in keys:
|
||||||
|
return f"Copier{' depuis ' + app if app else ''}"
|
||||||
|
if "ctrl" in keys and "v" in keys:
|
||||||
|
return f"Coller{' dans ' + app if app else ''}"
|
||||||
|
|
||||||
|
# Cas 4 : clic seul + app identifiable
|
||||||
|
if has_click and app:
|
||||||
|
hint = ""
|
||||||
|
for e in step.events:
|
||||||
|
if e.get("type") == "mouse_click":
|
||||||
|
hint = e.get("vision_info", {}).get("text", "")
|
||||||
|
if hint:
|
||||||
|
break
|
||||||
|
if hint:
|
||||||
|
return f"Cliquer sur « {hint} » dans {app}"
|
||||||
|
return f"Interagir avec {app}"
|
||||||
|
|
||||||
|
return step.intent
|
||||||
|
|
||||||
|
def _handle_text_input(
|
||||||
|
self,
|
||||||
|
session_id: str,
|
||||||
|
step: UnderstoodStep,
|
||||||
|
event: Dict[str, Any],
|
||||||
|
) -> None:
|
||||||
|
"""Détecter et notifier une variable lors d'une saisie texte."""
|
||||||
|
text = (event.get("text") or "").strip()
|
||||||
|
if not text or len(text) < 3:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Déduire un nom de variable provisoire
|
||||||
|
var_name = f"texte_{len(step.variables_detectees) + 1}"
|
||||||
|
step.variables_detectees.append(var_name)
|
||||||
|
|
||||||
|
# Heuristique : détecter le type plausible
|
||||||
|
var_type = self._guess_variable_type(text)
|
||||||
|
|
||||||
|
self._notifier(
|
||||||
|
session_id,
|
||||||
|
NiveauNotification.VARIABLE,
|
||||||
|
f"Variable détectée : tu as tapé « {text[:40]} » — c'est {var_type} ?",
|
||||||
|
step_index=step.step_index,
|
||||||
|
data={
|
||||||
|
"variable_name": var_name,
|
||||||
|
"value": text,
|
||||||
|
"variable_type": var_type,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
def _guess_variable_type(self, text: str) -> str:
|
||||||
|
"""Deviner le type d'une variable à partir de sa valeur."""
|
||||||
|
t = text.strip()
|
||||||
|
# Date (basique)
|
||||||
|
if len(t) == 10 and t[2] in "/-" and t[5] in "/-":
|
||||||
|
return "une date"
|
||||||
|
if t.isdigit():
|
||||||
|
return "un numéro"
|
||||||
|
if "@" in t and "." in t:
|
||||||
|
return "une adresse e-mail"
|
||||||
|
if len(t) <= 10 and t.replace(" ", "").replace("-", "").isalnum() and not any(c.islower() for c in t):
|
||||||
|
return "un code"
|
||||||
|
if " " in t and len(t) > 10:
|
||||||
|
return "un texte libre"
|
||||||
|
return "un texte"
|
||||||
|
|
||||||
|
# ----- Internals : notifications -------------------------------------
|
||||||
|
|
||||||
|
def _notifier(
|
||||||
|
self,
|
||||||
|
session_id: str,
|
||||||
|
niveau: NiveauNotification,
|
||||||
|
message: str,
|
||||||
|
*,
|
||||||
|
step_index: int = -1,
|
||||||
|
data: Optional[Dict[str, Any]] = None,
|
||||||
|
) -> None:
|
||||||
|
"""Créer et émettre une notification."""
|
||||||
|
notif = NotificationShadow(
|
||||||
|
notif_id=uuid.uuid4().hex[:12],
|
||||||
|
niveau=niveau,
|
||||||
|
message=message,
|
||||||
|
session_id=session_id,
|
||||||
|
step_index=step_index,
|
||||||
|
data=data or {},
|
||||||
|
timestamp=time.time(),
|
||||||
|
)
|
||||||
|
|
||||||
|
with self._lock:
|
||||||
|
state = self._sessions.get(session_id)
|
||||||
|
if state is not None:
|
||||||
|
state["notifications"].append(notif)
|
||||||
|
state["last_notif_ts"] = notif.timestamp
|
||||||
|
|
||||||
|
if self._notify_callback is not None:
|
||||||
|
try:
|
||||||
|
self._notify_callback(notif)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"ShadowObserver: callback a échoué : {e}")
|
||||||
|
|
||||||
|
def _emit_step_closed(
|
||||||
|
self,
|
||||||
|
session_id: str,
|
||||||
|
step: UnderstoodStep,
|
||||||
|
reason: str,
|
||||||
|
) -> None:
|
||||||
|
"""Émettre une notification quand une étape est fermée."""
|
||||||
|
raison_humaine = {
|
||||||
|
"changement_application": "tu es passé à une autre application",
|
||||||
|
"pause_longue": "tu as fait une pause",
|
||||||
|
}.get(reason, "")
|
||||||
|
|
||||||
|
suffixe = f" ({raison_humaine})" if raison_humaine else ""
|
||||||
|
self._notifier(
|
||||||
|
session_id,
|
||||||
|
NiveauNotification.DECOUVERTE,
|
||||||
|
f"Nouvelle étape comprise : {step.intent}{suffixe}",
|
||||||
|
step_index=step.step_index,
|
||||||
|
data={"step": step.to_dict()},
|
||||||
|
)
|
||||||
|
|
||||||
|
if self._enable_gemma4 and self._gemma4_callback is not None:
|
||||||
|
# Non bloquant : on délègue au caller (qui peut utiliser un thread)
|
||||||
|
try:
|
||||||
|
self._gemma4_callback(self._copy_step(step))
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"ShadowObserver: gemma4_callback a échoué : {e}")
|
||||||
|
|
||||||
|
def _maybe_emit_heartbeat(
|
||||||
|
self,
|
||||||
|
session_id: str,
|
||||||
|
state: Dict[str, Any],
|
||||||
|
) -> None:
|
||||||
|
"""Émettre un résumé périodique (toutes les 5s env.)."""
|
||||||
|
now = time.time()
|
||||||
|
last = state.get("last_notif_ts", 0)
|
||||||
|
if now - last < 5.0:
|
||||||
|
return
|
||||||
|
nb_steps = len(state["steps"])
|
||||||
|
if state.get("current_step") is not None:
|
||||||
|
nb_steps += 1
|
||||||
|
if nb_steps == 0:
|
||||||
|
return
|
||||||
|
self._notifier(
|
||||||
|
session_id,
|
||||||
|
NiveauNotification.INFO,
|
||||||
|
f"J'ai compris {nb_steps} étape(s) jusqu'ici.",
|
||||||
|
data={"steps_count": nb_steps},
|
||||||
|
)
|
||||||
|
|
||||||
|
# ----- Utilitaires ---------------------------------------------------
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_app_name(event: Dict[str, Any]) -> str:
|
||||||
|
"""Extraire le nom d'application depuis un événement."""
|
||||||
|
window = event.get("window") or {}
|
||||||
|
if isinstance(window, dict):
|
||||||
|
title = window.get("title", "")
|
||||||
|
app_name = window.get("app_name", "")
|
||||||
|
else:
|
||||||
|
title = event.get("window_title", "")
|
||||||
|
app_name = ""
|
||||||
|
|
||||||
|
# Préférer app_name si disponible
|
||||||
|
if app_name and app_name != "unknown":
|
||||||
|
return app_name
|
||||||
|
|
||||||
|
# Sinon, extraire depuis le titre
|
||||||
|
for sep in [" – ", " - ", " — "]:
|
||||||
|
if sep in title:
|
||||||
|
return title.split(sep)[-1].strip()
|
||||||
|
return title.strip() if title else ""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_window_title(event: Dict[str, Any]) -> str:
|
||||||
|
window = event.get("window") or {}
|
||||||
|
if isinstance(window, dict):
|
||||||
|
return window.get("title", "") or ""
|
||||||
|
return event.get("window_title", "") or ""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _copy_step(step: UnderstoodStep) -> UnderstoodStep:
|
||||||
|
"""Copie superficielle pour éviter les fuites de mutation."""
|
||||||
|
return UnderstoodStep(
|
||||||
|
step_index=step.step_index,
|
||||||
|
intent=step.intent,
|
||||||
|
intent_provisoire=step.intent_provisoire,
|
||||||
|
confidence=step.confidence,
|
||||||
|
app_name=step.app_name,
|
||||||
|
window_title=step.window_title,
|
||||||
|
events=list(step.events),
|
||||||
|
variables_detectees=list(step.variables_detectees),
|
||||||
|
started_at=step.started_at,
|
||||||
|
ended_at=step.ended_at,
|
||||||
|
validated=step.validated,
|
||||||
|
corrected=step.corrected,
|
||||||
|
cancelled=step.cancelled,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Singleton partagé (optionnel)
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
_shared_observer: Optional[ShadowObserver] = None
|
||||||
|
_shared_lock = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
|
def get_shared_observer() -> ShadowObserver:
|
||||||
|
"""Observer partagé pour l'API (lazy init)."""
|
||||||
|
global _shared_observer
|
||||||
|
with _shared_lock:
|
||||||
|
if _shared_observer is None:
|
||||||
|
_shared_observer = ShadowObserver()
|
||||||
|
return _shared_observer
|
||||||
468
core/workflow/shadow_validator.py
Normal file
468
core/workflow/shadow_validator.py
Normal file
@@ -0,0 +1,468 @@
|
|||||||
|
# core/workflow/shadow_validator.py
|
||||||
|
"""
|
||||||
|
ShadowValidator — Applique les feedbacks utilisateur et reconstruit un WorkflowIR.
|
||||||
|
|
||||||
|
Le ShadowObserver observe et comprend en temps réel. Le ShadowValidator,
|
||||||
|
lui, prend les décisions de l'utilisateur (valider, corriger, annuler,
|
||||||
|
combiner) et reconstruit un WorkflowIR final « propre » qui sera
|
||||||
|
persisté et exécutable par le runtime.
|
||||||
|
|
||||||
|
Opérations supportées :
|
||||||
|
- validate(step_index) : marquer l'étape comme validée
|
||||||
|
- correct(step_index, new_intent) : corriger l'intention
|
||||||
|
- undo(step_index) : annuler l'étape (elle sera exclue du WorkflowIR)
|
||||||
|
- merge_with_next(step_index) : fusionner avec l'étape suivante
|
||||||
|
- cancel() : annuler tout le workflow
|
||||||
|
- split(step_index, at_event_index) : couper une étape en deux (bonus)
|
||||||
|
|
||||||
|
Le validator ne touche PAS aux événements bruts (events.jsonl) — il
|
||||||
|
travaille sur la liste des `UnderstoodStep` fournie par le ShadowObserver.
|
||||||
|
|
||||||
|
Une fois toutes les actions appliquées, `build_workflow_ir()` produit
|
||||||
|
un WorkflowIR exécutable à partir des étapes validées/corrigées.
|
||||||
|
|
||||||
|
Usage :
|
||||||
|
|
||||||
|
validator = ShadowValidator()
|
||||||
|
validator.set_steps(observer.get_steps_internal(session_id))
|
||||||
|
|
||||||
|
validator.apply_feedback({"action": "validate", "step_index": 1})
|
||||||
|
validator.apply_feedback({
|
||||||
|
"action": "correct",
|
||||||
|
"step_index": 2,
|
||||||
|
"new_intent": "Sauvegarder le document",
|
||||||
|
})
|
||||||
|
validator.apply_feedback({"action": "undo", "step_index": 3})
|
||||||
|
|
||||||
|
ir = validator.build_workflow_ir(
|
||||||
|
session_id="sess_abc",
|
||||||
|
name="Mon workflow",
|
||||||
|
domain="generic",
|
||||||
|
)
|
||||||
|
ir.save("data/workflows/")
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
from .shadow_observer import UnderstoodStep
|
||||||
|
from .workflow_ir import Action, Step, Variable, WorkflowIR
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# Actions supportées par le feedback
|
||||||
|
FEEDBACK_ACTIONS = {
|
||||||
|
"validate",
|
||||||
|
"correct",
|
||||||
|
"undo",
|
||||||
|
"cancel",
|
||||||
|
"merge_next",
|
||||||
|
"split",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class FeedbackResult:
|
||||||
|
"""Résultat d'une opération de feedback."""
|
||||||
|
|
||||||
|
ok: bool
|
||||||
|
action: str
|
||||||
|
step_index: int
|
||||||
|
message: str
|
||||||
|
data: Dict[str, Any]
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"ok": self.ok,
|
||||||
|
"action": self.action,
|
||||||
|
"step_index": self.step_index,
|
||||||
|
"message": self.message,
|
||||||
|
"data": dict(self.data),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ShadowValidator:
|
||||||
|
"""Applique les feedbacks utilisateur et produit un WorkflowIR."""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._steps: List[UnderstoodStep] = []
|
||||||
|
self._cancelled_workflow: bool = False
|
||||||
|
self._history: List[FeedbackResult] = []
|
||||||
|
|
||||||
|
# ----- API -----------------------------------------------------------
|
||||||
|
|
||||||
|
def set_steps(self, steps: List[UnderstoodStep]) -> None:
|
||||||
|
"""Initialiser le validator avec la liste des étapes observées."""
|
||||||
|
self._steps = [self._clone(s) for s in steps]
|
||||||
|
self._cancelled_workflow = False
|
||||||
|
self._history = []
|
||||||
|
|
||||||
|
@property
|
||||||
|
def steps(self) -> List[UnderstoodStep]:
|
||||||
|
"""Vue en lecture des étapes courantes."""
|
||||||
|
return list(self._steps)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def history(self) -> List[FeedbackResult]:
|
||||||
|
"""Historique des feedbacks appliqués."""
|
||||||
|
return list(self._history)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_cancelled(self) -> bool:
|
||||||
|
return self._cancelled_workflow
|
||||||
|
|
||||||
|
def apply_feedback(self, feedback: Dict[str, Any]) -> FeedbackResult:
|
||||||
|
"""Appliquer un feedback utilisateur.
|
||||||
|
|
||||||
|
Le `feedback` est un dict au format :
|
||||||
|
{
|
||||||
|
"action": "validate" | "correct" | "undo" | "cancel" | "merge_next" | "split",
|
||||||
|
"step_index": 1, # Index 1-based (comme dans get_understanding)
|
||||||
|
"new_intent": "...", # Pour correct
|
||||||
|
"at_event_index": 3, # Pour split
|
||||||
|
}
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
FeedbackResult
|
||||||
|
"""
|
||||||
|
action = (feedback.get("action") or "").strip()
|
||||||
|
if action not in FEEDBACK_ACTIONS:
|
||||||
|
return self._record(FeedbackResult(
|
||||||
|
ok=False, action=action, step_index=-1,
|
||||||
|
message=f"Action inconnue : « {action} »",
|
||||||
|
data={"supported": sorted(FEEDBACK_ACTIONS)},
|
||||||
|
))
|
||||||
|
|
||||||
|
if action == "cancel":
|
||||||
|
return self._do_cancel()
|
||||||
|
|
||||||
|
step_index = int(feedback.get("step_index", -1))
|
||||||
|
if not self._is_valid_step_index(step_index):
|
||||||
|
return self._record(FeedbackResult(
|
||||||
|
ok=False, action=action, step_index=step_index,
|
||||||
|
message=f"Index d'étape invalide : {step_index}",
|
||||||
|
data={"nb_steps": len(self._steps)},
|
||||||
|
))
|
||||||
|
|
||||||
|
if action == "validate":
|
||||||
|
return self._do_validate(step_index)
|
||||||
|
if action == "correct":
|
||||||
|
return self._do_correct(step_index, feedback.get("new_intent", ""))
|
||||||
|
if action == "undo":
|
||||||
|
return self._do_undo(step_index)
|
||||||
|
if action == "merge_next":
|
||||||
|
return self._do_merge_next(step_index)
|
||||||
|
if action == "split":
|
||||||
|
return self._do_split(
|
||||||
|
step_index, int(feedback.get("at_event_index", -1))
|
||||||
|
)
|
||||||
|
|
||||||
|
return self._record(FeedbackResult(
|
||||||
|
ok=False, action=action, step_index=step_index,
|
||||||
|
message="Action non implémentée", data={},
|
||||||
|
))
|
||||||
|
|
||||||
|
def apply_feedbacks(
|
||||||
|
self, feedbacks: List[Dict[str, Any]]
|
||||||
|
) -> List[FeedbackResult]:
|
||||||
|
"""Appliquer plusieurs feedbacks dans l'ordre."""
|
||||||
|
return [self.apply_feedback(f) for f in feedbacks]
|
||||||
|
|
||||||
|
# ----- Opérations ---------------------------------------------------
|
||||||
|
|
||||||
|
def _do_validate(self, step_index: int) -> FeedbackResult:
|
||||||
|
step = self._get_step(step_index)
|
||||||
|
step.validated = True
|
||||||
|
step.intent_provisoire = False
|
||||||
|
step.confidence = max(step.confidence, 0.95)
|
||||||
|
return self._record(FeedbackResult(
|
||||||
|
ok=True, action="validate", step_index=step_index,
|
||||||
|
message=f"Étape {step_index} validée : {step.intent}",
|
||||||
|
data={"intent": step.intent},
|
||||||
|
))
|
||||||
|
|
||||||
|
def _do_correct(
|
||||||
|
self, step_index: int, new_intent: str
|
||||||
|
) -> FeedbackResult:
|
||||||
|
new_intent = (new_intent or "").strip()
|
||||||
|
if not new_intent:
|
||||||
|
return self._record(FeedbackResult(
|
||||||
|
ok=False, action="correct", step_index=step_index,
|
||||||
|
message="Nouvelle intention vide",
|
||||||
|
data={},
|
||||||
|
))
|
||||||
|
step = self._get_step(step_index)
|
||||||
|
old_intent = step.intent
|
||||||
|
step.intent = new_intent
|
||||||
|
step.corrected = True
|
||||||
|
step.validated = True # Corriger = implicitement valider
|
||||||
|
step.intent_provisoire = False
|
||||||
|
step.confidence = 1.0
|
||||||
|
return self._record(FeedbackResult(
|
||||||
|
ok=True, action="correct", step_index=step_index,
|
||||||
|
message=f"Étape {step_index} corrigée : « {old_intent} » → « {new_intent} »",
|
||||||
|
data={"old_intent": old_intent, "new_intent": new_intent},
|
||||||
|
))
|
||||||
|
|
||||||
|
def _do_undo(self, step_index: int) -> FeedbackResult:
|
||||||
|
step = self._get_step(step_index)
|
||||||
|
step.cancelled = True
|
||||||
|
return self._record(FeedbackResult(
|
||||||
|
ok=True, action="undo", step_index=step_index,
|
||||||
|
message=f"Étape {step_index} annulée : {step.intent}",
|
||||||
|
data={"intent": step.intent},
|
||||||
|
))
|
||||||
|
|
||||||
|
def _do_merge_next(self, step_index: int) -> FeedbackResult:
|
||||||
|
"""Fusionner l'étape avec la suivante."""
|
||||||
|
if step_index >= len(self._steps):
|
||||||
|
return self._record(FeedbackResult(
|
||||||
|
ok=False, action="merge_next", step_index=step_index,
|
||||||
|
message="Aucune étape suivante à fusionner",
|
||||||
|
data={},
|
||||||
|
))
|
||||||
|
step = self._get_step(step_index)
|
||||||
|
next_step = self._get_step(step_index + 1)
|
||||||
|
|
||||||
|
merged = UnderstoodStep(
|
||||||
|
step_index=step.step_index,
|
||||||
|
intent=step.intent if len(step.intent) >= len(next_step.intent) else next_step.intent,
|
||||||
|
intent_provisoire=False,
|
||||||
|
confidence=max(step.confidence, next_step.confidence),
|
||||||
|
app_name=step.app_name or next_step.app_name,
|
||||||
|
window_title=step.window_title or next_step.window_title,
|
||||||
|
events=list(step.events) + list(next_step.events),
|
||||||
|
variables_detectees=list(step.variables_detectees)
|
||||||
|
+ list(next_step.variables_detectees),
|
||||||
|
started_at=step.started_at or next_step.started_at,
|
||||||
|
ended_at=next_step.ended_at or step.ended_at,
|
||||||
|
validated=True,
|
||||||
|
corrected=step.corrected or next_step.corrected,
|
||||||
|
cancelled=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Remplacer [step, next_step] par [merged]
|
||||||
|
idx0 = step_index - 1 # 1-based → 0-based
|
||||||
|
self._steps.pop(idx0 + 1) # next_step
|
||||||
|
self._steps[idx0] = merged
|
||||||
|
self._renumber()
|
||||||
|
|
||||||
|
return self._record(FeedbackResult(
|
||||||
|
ok=True, action="merge_next", step_index=step_index,
|
||||||
|
message=f"Étapes {step_index} et {step_index + 1} fusionnées",
|
||||||
|
data={"intent": merged.intent},
|
||||||
|
))
|
||||||
|
|
||||||
|
def _do_split(
|
||||||
|
self, step_index: int, at_event_index: int
|
||||||
|
) -> FeedbackResult:
|
||||||
|
"""Couper une étape en deux au niveau de l'événement at_event_index.
|
||||||
|
|
||||||
|
`at_event_index` est 0-based parmi les events de l'étape.
|
||||||
|
"""
|
||||||
|
step = self._get_step(step_index)
|
||||||
|
if at_event_index <= 0 or at_event_index >= len(step.events):
|
||||||
|
return self._record(FeedbackResult(
|
||||||
|
ok=False, action="split", step_index=step_index,
|
||||||
|
message=f"Index de coupe invalide : {at_event_index}",
|
||||||
|
data={"nb_events": len(step.events)},
|
||||||
|
))
|
||||||
|
|
||||||
|
left_events = step.events[:at_event_index]
|
||||||
|
right_events = step.events[at_event_index:]
|
||||||
|
|
||||||
|
left = UnderstoodStep(
|
||||||
|
step_index=step.step_index,
|
||||||
|
intent=step.intent + " (1/2)",
|
||||||
|
intent_provisoire=True,
|
||||||
|
confidence=step.confidence * 0.9,
|
||||||
|
app_name=step.app_name,
|
||||||
|
window_title=step.window_title,
|
||||||
|
events=left_events,
|
||||||
|
started_at=step.started_at,
|
||||||
|
)
|
||||||
|
right = UnderstoodStep(
|
||||||
|
step_index=step.step_index + 1,
|
||||||
|
intent=step.intent + " (2/2)",
|
||||||
|
intent_provisoire=True,
|
||||||
|
confidence=step.confidence * 0.9,
|
||||||
|
app_name=step.app_name,
|
||||||
|
window_title=step.window_title,
|
||||||
|
events=right_events,
|
||||||
|
started_at=float(right_events[0].get("timestamp", 0))
|
||||||
|
if right_events else step.started_at,
|
||||||
|
ended_at=step.ended_at,
|
||||||
|
)
|
||||||
|
|
||||||
|
idx0 = step_index - 1
|
||||||
|
self._steps[idx0] = left
|
||||||
|
self._steps.insert(idx0 + 1, right)
|
||||||
|
self._renumber()
|
||||||
|
|
||||||
|
return self._record(FeedbackResult(
|
||||||
|
ok=True, action="split", step_index=step_index,
|
||||||
|
message=f"Étape {step_index} coupée en 2",
|
||||||
|
data={"nb_steps": len(self._steps)},
|
||||||
|
))
|
||||||
|
|
||||||
|
def _do_cancel(self) -> FeedbackResult:
|
||||||
|
self._cancelled_workflow = True
|
||||||
|
return self._record(FeedbackResult(
|
||||||
|
ok=True, action="cancel", step_index=-1,
|
||||||
|
message="Workflow annulé",
|
||||||
|
data={},
|
||||||
|
))
|
||||||
|
|
||||||
|
# ----- Construction du WorkflowIR -----------------------------------
|
||||||
|
|
||||||
|
def build_workflow_ir(
|
||||||
|
self,
|
||||||
|
session_id: str = "",
|
||||||
|
name: str = "",
|
||||||
|
domain: str = "generic",
|
||||||
|
*,
|
||||||
|
require_all_validated: bool = False,
|
||||||
|
) -> Optional[WorkflowIR]:
|
||||||
|
"""Construire un WorkflowIR à partir des étapes corrigées.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
session_id: Identifiant de la session source.
|
||||||
|
name: Nom du workflow.
|
||||||
|
domain: Domaine métier.
|
||||||
|
require_all_validated: Si True, lève une erreur si au moins
|
||||||
|
une étape n'a pas été validée explicitement.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
WorkflowIR ou None si le workflow a été annulé.
|
||||||
|
"""
|
||||||
|
if self._cancelled_workflow:
|
||||||
|
logger.info("ShadowValidator: workflow annulé, pas de build")
|
||||||
|
return None
|
||||||
|
|
||||||
|
ir = WorkflowIR.new(
|
||||||
|
name=name or f"Workflow du {time.strftime('%d/%m/%Y %H:%M')}",
|
||||||
|
domain=domain,
|
||||||
|
learned_from=session_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
variables: List[Variable] = []
|
||||||
|
seen_texts = set()
|
||||||
|
applications: set = set()
|
||||||
|
|
||||||
|
for step in self._steps:
|
||||||
|
if step.cancelled:
|
||||||
|
continue
|
||||||
|
if require_all_validated and not step.validated:
|
||||||
|
raise ValueError(
|
||||||
|
f"Étape {step.step_index} non validée : {step.intent}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if step.app_name:
|
||||||
|
applications.add(step.app_name)
|
||||||
|
|
||||||
|
actions = []
|
||||||
|
for evt in step.events:
|
||||||
|
action = self._event_to_action(evt)
|
||||||
|
if action is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Détection de variable (texte saisi)
|
||||||
|
if action.type == "type" and action.text:
|
||||||
|
text = action.text.strip()
|
||||||
|
if text and text not in seen_texts and len(text) > 2:
|
||||||
|
seen_texts.add(text)
|
||||||
|
var_name = f"texte_{len(variables) + 1}"
|
||||||
|
variables.append(Variable(
|
||||||
|
name=var_name,
|
||||||
|
description=f"Texte saisi : « {text[:50]} »",
|
||||||
|
source="user",
|
||||||
|
default=text,
|
||||||
|
))
|
||||||
|
action.variable = True
|
||||||
|
action.text = "{" + var_name + "}"
|
||||||
|
|
||||||
|
actions.append(action)
|
||||||
|
|
||||||
|
ir_step = Step(
|
||||||
|
step_id=f"s{len(ir.steps) + 1}",
|
||||||
|
intent=step.intent,
|
||||||
|
actions=actions,
|
||||||
|
)
|
||||||
|
ir.steps.append(ir_step)
|
||||||
|
|
||||||
|
ir.variables = variables
|
||||||
|
ir.applications = sorted(applications)
|
||||||
|
ir.updated_at = time.time()
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"ShadowValidator: WorkflowIR construit — {len(ir.steps)} étapes, "
|
||||||
|
f"{len(ir.variables)} variables"
|
||||||
|
)
|
||||||
|
return ir
|
||||||
|
|
||||||
|
# ----- Utilitaires --------------------------------------------------
|
||||||
|
|
||||||
|
def _is_valid_step_index(self, step_index: int) -> bool:
|
||||||
|
return 1 <= step_index <= len(self._steps)
|
||||||
|
|
||||||
|
def _get_step(self, step_index: int) -> UnderstoodStep:
|
||||||
|
return self._steps[step_index - 1]
|
||||||
|
|
||||||
|
def _renumber(self) -> None:
|
||||||
|
for i, s in enumerate(self._steps, start=1):
|
||||||
|
s.step_index = i
|
||||||
|
|
||||||
|
def _record(self, result: FeedbackResult) -> FeedbackResult:
|
||||||
|
self._history.append(result)
|
||||||
|
return result
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _clone(step: UnderstoodStep) -> UnderstoodStep:
|
||||||
|
return UnderstoodStep(
|
||||||
|
step_index=step.step_index,
|
||||||
|
intent=step.intent,
|
||||||
|
intent_provisoire=step.intent_provisoire,
|
||||||
|
confidence=step.confidence,
|
||||||
|
app_name=step.app_name,
|
||||||
|
window_title=step.window_title,
|
||||||
|
events=list(step.events),
|
||||||
|
variables_detectees=list(step.variables_detectees),
|
||||||
|
started_at=step.started_at,
|
||||||
|
ended_at=step.ended_at,
|
||||||
|
validated=step.validated,
|
||||||
|
corrected=step.corrected,
|
||||||
|
cancelled=step.cancelled,
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _event_to_action(evt: Dict[str, Any]) -> Optional[Action]:
|
||||||
|
"""Convertir un événement brut en Action (miroir de IRBuilder)."""
|
||||||
|
evt_type = evt.get("type", "")
|
||||||
|
|
||||||
|
if evt_type == "mouse_click":
|
||||||
|
window = evt.get("window") or {}
|
||||||
|
if isinstance(window, dict):
|
||||||
|
target = window.get("title", "")
|
||||||
|
else:
|
||||||
|
target = evt.get("window_title", "")
|
||||||
|
return Action(
|
||||||
|
type="click",
|
||||||
|
target=target or "",
|
||||||
|
anchor_hint=(evt.get("vision_info") or {}).get("text", ""),
|
||||||
|
)
|
||||||
|
if evt_type == "text_input":
|
||||||
|
text = evt.get("text", "")
|
||||||
|
if text:
|
||||||
|
return Action(type="type", text=text)
|
||||||
|
if evt_type in ("key_combo", "key_press"):
|
||||||
|
keys = evt.get("keys", [])
|
||||||
|
if keys:
|
||||||
|
return Action(type="key_combo", keys=list(keys))
|
||||||
|
if evt_type == "scroll":
|
||||||
|
return Action(type="scroll")
|
||||||
|
return None
|
||||||
337
core/workflow/surface_classifier.py
Normal file
337
core/workflow/surface_classifier.py
Normal file
@@ -0,0 +1,337 @@
|
|||||||
|
# core/workflow/surface_classifier.py
|
||||||
|
"""
|
||||||
|
SurfaceClassifier — détecte le type de surface applicative au moment de l'exécution.
|
||||||
|
|
||||||
|
4 types de surfaces reconnus :
|
||||||
|
- citrix : session Citrix/RDP/TSE (wfica32.exe, mstsc.exe, CDViewer.exe)
|
||||||
|
→ vision pure obligatoire, paramètres tolérants
|
||||||
|
- windows_native : application Windows native (notepad.exe, explorer.exe, DPI...)
|
||||||
|
→ vision + UIA bonus, paramètres standards
|
||||||
|
- web_local : navigateur local (chrome.exe, firefox.exe, msedge.exe)
|
||||||
|
→ vision + DOM/CDP bonus (si activé), paramètres rapides
|
||||||
|
- unknown : fallback → vision pure, paramètres par défaut
|
||||||
|
|
||||||
|
Le classifier s'exécute UNE SEULE FOIS au début d'une session ou d'un replay.
|
||||||
|
Son résultat détermine :
|
||||||
|
1. Quels helpers sont activés (UIA ? CDP ?)
|
||||||
|
2. Les paramètres de résolution (timeouts, seuils OCR)
|
||||||
|
3. La stratégie de recovery
|
||||||
|
|
||||||
|
Principe : la vision reste le fondement. Le classifier décide juste
|
||||||
|
des bonus à activer et des paramètres à tuner.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import platform
|
||||||
|
import subprocess
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class SurfaceType(str, Enum):
|
||||||
|
"""Types de surfaces applicatives."""
|
||||||
|
CITRIX = "citrix"
|
||||||
|
WINDOWS_NATIVE = "windows_native"
|
||||||
|
WEB_LOCAL = "web_local"
|
||||||
|
UNKNOWN = "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
# Processus connus par type de surface
|
||||||
|
_CITRIX_PROCESSES = {
|
||||||
|
"wfica32.exe", # Citrix Workspace (Windows 10+)
|
||||||
|
"cdviewer.exe", # Citrix Desktop Viewer
|
||||||
|
"cdviewer.exe",
|
||||||
|
"mstsc.exe", # Microsoft Remote Desktop
|
||||||
|
"vmware-vmx.exe", # VMware (cas RDS)
|
||||||
|
"xen.exe", # Citrix XenApp
|
||||||
|
"receiver.exe", # Citrix Receiver (ancien)
|
||||||
|
"selfservice.exe", # Citrix Self-Service Plug-in
|
||||||
|
}
|
||||||
|
|
||||||
|
_BROWSER_PROCESSES = {
|
||||||
|
"chrome.exe",
|
||||||
|
"msedge.exe",
|
||||||
|
"firefox.exe",
|
||||||
|
"brave.exe",
|
||||||
|
"opera.exe",
|
||||||
|
"vivaldi.exe",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Processus système Windows qui ne sont PAS des surfaces applicatives
|
||||||
|
_SYSTEM_PROCESSES = {
|
||||||
|
"explorer.exe", # Shell Windows (cas spécial — on le compte comme natif)
|
||||||
|
"searchhost.exe", # Recherche Windows
|
||||||
|
"startmenuexperiencehost.exe",
|
||||||
|
"shellexperiencehost.exe",
|
||||||
|
"applicationframehost.exe",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SurfaceProfile:
|
||||||
|
"""Profil complet d'une surface détectée."""
|
||||||
|
surface_type: SurfaceType
|
||||||
|
process_name: str = "" # Processus de la fenêtre active
|
||||||
|
window_title: str = "" # Titre de la fenêtre active
|
||||||
|
confidence: float = 1.0 # Confiance de la détection (0-1)
|
||||||
|
|
||||||
|
# Capacités disponibles
|
||||||
|
uia_available: bool = False # Le helper UIA peut être utilisé
|
||||||
|
cdp_available: bool = False # Chrome DevTools Protocol accessible
|
||||||
|
ocr_available: bool = True # OCR toujours dispo (docTR)
|
||||||
|
vlm_available: bool = True # VLM toujours dispo (qwen2.5vl)
|
||||||
|
|
||||||
|
# Paramètres adaptés à la surface
|
||||||
|
timeout_click_ms: int = 10000
|
||||||
|
timeout_resolve_ms: int = 5000
|
||||||
|
ocr_threshold: float = 0.75
|
||||||
|
template_threshold: float = 0.85
|
||||||
|
max_retries: int = 2
|
||||||
|
retry_delay_ms: int = 2000
|
||||||
|
|
||||||
|
# Métadonnées
|
||||||
|
detected_at: float = 0.0
|
||||||
|
details: Dict[str, Any] = field(default_factory=dict)
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"surface_type": self.surface_type.value,
|
||||||
|
"process_name": self.process_name,
|
||||||
|
"window_title": self.window_title,
|
||||||
|
"confidence": round(self.confidence, 3),
|
||||||
|
"capabilities": {
|
||||||
|
"uia": self.uia_available,
|
||||||
|
"cdp": self.cdp_available,
|
||||||
|
"ocr": self.ocr_available,
|
||||||
|
"vlm": self.vlm_available,
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"timeout_click_ms": self.timeout_click_ms,
|
||||||
|
"timeout_resolve_ms": self.timeout_resolve_ms,
|
||||||
|
"ocr_threshold": self.ocr_threshold,
|
||||||
|
"template_threshold": self.template_threshold,
|
||||||
|
"max_retries": self.max_retries,
|
||||||
|
"retry_delay_ms": self.retry_delay_ms,
|
||||||
|
},
|
||||||
|
"details": self.details,
|
||||||
|
}
|
||||||
|
|
||||||
|
def resolve_order(self) -> List[str]:
|
||||||
|
"""Construire l'ordre de résolution selon la surface et les capacités."""
|
||||||
|
order = []
|
||||||
|
if self.uia_available and self.surface_type == SurfaceType.WINDOWS_NATIVE:
|
||||||
|
order.append("uia")
|
||||||
|
if self.cdp_available and self.surface_type == SurfaceType.WEB_LOCAL:
|
||||||
|
order.append("dom")
|
||||||
|
order.extend(["ocr", "template", "vlm"])
|
||||||
|
return order
|
||||||
|
|
||||||
|
|
||||||
|
class SurfaceClassifier:
|
||||||
|
"""Détecte la surface et configure les paramètres adaptés.
|
||||||
|
|
||||||
|
Usage :
|
||||||
|
classifier = SurfaceClassifier()
|
||||||
|
profile = classifier.classify(process="notepad.exe", title="Sans titre – Bloc-notes")
|
||||||
|
if profile.uia_available:
|
||||||
|
# Utiliser lea_uia.exe
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, uia_helper_path: str = ""):
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
uia_helper_path: Chemin vers lea_uia.exe (optionnel, auto-détection sinon)
|
||||||
|
"""
|
||||||
|
self._uia_helper_path = uia_helper_path or self._find_uia_helper()
|
||||||
|
|
||||||
|
def _find_uia_helper(self) -> str:
|
||||||
|
"""Trouver lea_uia.exe dans les emplacements standards."""
|
||||||
|
candidates = [
|
||||||
|
r"C:\Lea\helpers\lea_uia.exe",
|
||||||
|
r".\helpers\lea_uia.exe",
|
||||||
|
os.path.join(os.path.dirname(__file__), "..", "..", "agent_rust", "lea_uia",
|
||||||
|
"target", "x86_64-pc-windows-gnu", "release", "lea_uia.exe"),
|
||||||
|
]
|
||||||
|
for path in candidates:
|
||||||
|
if os.path.isfile(path):
|
||||||
|
return os.path.abspath(path)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def classify(
|
||||||
|
self,
|
||||||
|
process_name: str = "",
|
||||||
|
window_title: str = "",
|
||||||
|
screen_info: Optional[Dict] = None,
|
||||||
|
) -> SurfaceProfile:
|
||||||
|
"""Classifier une surface depuis le contexte fenêtre.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
process_name: Nom du processus (ex: "notepad.exe")
|
||||||
|
window_title: Titre de la fenêtre active
|
||||||
|
screen_info: Infos écran (résolution, DPI, compression détectée)
|
||||||
|
"""
|
||||||
|
import time
|
||||||
|
|
||||||
|
process_lower = process_name.lower().strip()
|
||||||
|
title_lower = window_title.lower()
|
||||||
|
|
||||||
|
# Détection Citrix — priorité absolue
|
||||||
|
if process_lower in _CITRIX_PROCESSES:
|
||||||
|
return self._build_citrix_profile(process_name, window_title, time.time())
|
||||||
|
|
||||||
|
# Titre Citrix (ex: "Session Citrix", "Citrix Receiver")
|
||||||
|
if any(marker in title_lower for marker in ["citrix", "ica session", "rdp session"]):
|
||||||
|
return self._build_citrix_profile(process_name, window_title, time.time())
|
||||||
|
|
||||||
|
# Navigateur
|
||||||
|
if process_lower in _BROWSER_PROCESSES:
|
||||||
|
# Cas particulier : navigateur qui contient du Citrix embedded
|
||||||
|
if "citrix" in title_lower:
|
||||||
|
return self._build_citrix_profile(process_name, window_title, time.time())
|
||||||
|
return self._build_web_profile(process_name, window_title, time.time())
|
||||||
|
|
||||||
|
# Application Windows native
|
||||||
|
if process_lower.endswith(".exe") and process_lower not in _SYSTEM_PROCESSES:
|
||||||
|
return self._build_windows_profile(process_name, window_title, time.time())
|
||||||
|
|
||||||
|
# Shell Windows (explorer.exe) — compté comme natif
|
||||||
|
if process_lower == "explorer.exe":
|
||||||
|
return self._build_windows_profile(process_name, window_title, time.time())
|
||||||
|
|
||||||
|
# Unknown — fallback sûr
|
||||||
|
return self._build_unknown_profile(process_name, window_title, time.time())
|
||||||
|
|
||||||
|
def _build_citrix_profile(self, process: str, title: str, ts: float) -> SurfaceProfile:
|
||||||
|
"""Profil Citrix — vision pure, paramètres tolérants."""
|
||||||
|
return SurfaceProfile(
|
||||||
|
surface_type=SurfaceType.CITRIX,
|
||||||
|
process_name=process,
|
||||||
|
window_title=title,
|
||||||
|
confidence=0.95,
|
||||||
|
uia_available=False, # UIA n'est pas dispo dans Citrix
|
||||||
|
cdp_available=False,
|
||||||
|
ocr_available=True,
|
||||||
|
vlm_available=True,
|
||||||
|
# Citrix : compression JPEG, latence, retries agressifs
|
||||||
|
timeout_click_ms=15000,
|
||||||
|
timeout_resolve_ms=10000,
|
||||||
|
ocr_threshold=0.65, # Plus tolérant (compression)
|
||||||
|
template_threshold=0.75, # Plus tolérant
|
||||||
|
max_retries=3,
|
||||||
|
retry_delay_ms=3000,
|
||||||
|
detected_at=ts,
|
||||||
|
details={"reason": "citrix_process_or_title"},
|
||||||
|
)
|
||||||
|
|
||||||
|
def _build_windows_profile(self, process: str, title: str, ts: float) -> SurfaceProfile:
|
||||||
|
"""Profil Windows natif — vision + UIA bonus."""
|
||||||
|
uia_ok = self._check_uia_available()
|
||||||
|
return SurfaceProfile(
|
||||||
|
surface_type=SurfaceType.WINDOWS_NATIVE,
|
||||||
|
process_name=process,
|
||||||
|
window_title=title,
|
||||||
|
confidence=0.9,
|
||||||
|
uia_available=uia_ok,
|
||||||
|
cdp_available=False,
|
||||||
|
ocr_available=True,
|
||||||
|
vlm_available=True,
|
||||||
|
timeout_click_ms=8000,
|
||||||
|
timeout_resolve_ms=5000,
|
||||||
|
ocr_threshold=0.75,
|
||||||
|
template_threshold=0.85,
|
||||||
|
max_retries=2,
|
||||||
|
retry_delay_ms=2000,
|
||||||
|
detected_at=ts,
|
||||||
|
details={
|
||||||
|
"reason": "native_windows_process",
|
||||||
|
"uia_helper": self._uia_helper_path if uia_ok else "",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
def _build_web_profile(self, process: str, title: str, ts: float) -> SurfaceProfile:
|
||||||
|
"""Profil web local — vision (+ CDP plus tard)."""
|
||||||
|
cdp_ok = self._check_cdp_available()
|
||||||
|
return SurfaceProfile(
|
||||||
|
surface_type=SurfaceType.WEB_LOCAL,
|
||||||
|
process_name=process,
|
||||||
|
window_title=title,
|
||||||
|
confidence=0.9,
|
||||||
|
uia_available=False, # UIA limité pour les navigateurs
|
||||||
|
cdp_available=cdp_ok,
|
||||||
|
ocr_available=True,
|
||||||
|
vlm_available=True,
|
||||||
|
# Web local : rapide, texte bien rendu
|
||||||
|
timeout_click_ms=5000,
|
||||||
|
timeout_resolve_ms=3000,
|
||||||
|
ocr_threshold=0.80,
|
||||||
|
template_threshold=0.88,
|
||||||
|
max_retries=1,
|
||||||
|
retry_delay_ms=1000,
|
||||||
|
detected_at=ts,
|
||||||
|
details={"reason": "browser_process"},
|
||||||
|
)
|
||||||
|
|
||||||
|
def _build_unknown_profile(self, process: str, title: str, ts: float) -> SurfaceProfile:
|
||||||
|
"""Profil inconnu — paramètres sûrs par défaut."""
|
||||||
|
return SurfaceProfile(
|
||||||
|
surface_type=SurfaceType.UNKNOWN,
|
||||||
|
process_name=process,
|
||||||
|
window_title=title,
|
||||||
|
confidence=0.5,
|
||||||
|
uia_available=False,
|
||||||
|
cdp_available=False,
|
||||||
|
ocr_available=True,
|
||||||
|
vlm_available=True,
|
||||||
|
timeout_click_ms=10000,
|
||||||
|
timeout_resolve_ms=5000,
|
||||||
|
ocr_threshold=0.70,
|
||||||
|
template_threshold=0.80,
|
||||||
|
max_retries=2,
|
||||||
|
retry_delay_ms=2000,
|
||||||
|
detected_at=ts,
|
||||||
|
details={"reason": "fallback"},
|
||||||
|
)
|
||||||
|
|
||||||
|
def _check_uia_available(self) -> bool:
|
||||||
|
"""Vérifier que lea_uia.exe est dispo et fonctionnel.
|
||||||
|
|
||||||
|
Sur Windows : appelle `lea_uia.exe health`.
|
||||||
|
Sur Linux : toujours False (stub).
|
||||||
|
"""
|
||||||
|
if platform.system() != "Windows":
|
||||||
|
return False
|
||||||
|
if not self._uia_helper_path or not os.path.isfile(self._uia_helper_path):
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
[self._uia_helper_path, "health"],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=5,
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
return False
|
||||||
|
import json
|
||||||
|
data = json.loads(result.stdout.strip())
|
||||||
|
return data.get("status") == "ok"
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"UIA health check failed: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _check_cdp_available(self) -> bool:
|
||||||
|
"""Vérifier que Chrome DevTools Protocol est accessible.
|
||||||
|
|
||||||
|
Teste la présence d'un endpoint CDP sur localhost:9222.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
import urllib.request
|
||||||
|
with urllib.request.urlopen(
|
||||||
|
"http://localhost:9222/json/version", timeout=1
|
||||||
|
) as resp:
|
||||||
|
return resp.status == 200
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
294
core/workflow/uia_helper.py
Normal file
294
core/workflow/uia_helper.py
Normal file
@@ -0,0 +1,294 @@
|
|||||||
|
# core/workflow/uia_helper.py
|
||||||
|
"""
|
||||||
|
UIAHelper — Wrapper Python pour lea_uia.exe (helper Rust UI Automation).
|
||||||
|
|
||||||
|
Expose une API Python simple pour interroger UIA via le binaire Rust.
|
||||||
|
Communique via subprocess + stdin/stdout JSON.
|
||||||
|
|
||||||
|
Pourquoi un helper Rust ?
|
||||||
|
- 5-10x plus rapide que pywinauto (10-20ms vs 50-200ms)
|
||||||
|
- Binaire standalone ~500 Ko, aucune dépendance runtime
|
||||||
|
- Pas de problèmes de threading COM en Python
|
||||||
|
- Crash-safe (le crash du helper n'affecte pas l'agent Python)
|
||||||
|
|
||||||
|
Architecture :
|
||||||
|
Python executor
|
||||||
|
↓ subprocess.run
|
||||||
|
lea_uia.exe query --x 812 --y 436
|
||||||
|
↓ UIA API Windows
|
||||||
|
JSON response
|
||||||
|
↓ stdout
|
||||||
|
Python executor parse JSON
|
||||||
|
|
||||||
|
Si lea_uia.exe n'est pas disponible (Linux, binaire absent, crash) :
|
||||||
|
toutes les méthodes retournent None → fallback vision automatique.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import platform
|
||||||
|
import subprocess
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Timeout par défaut pour les appels UIA (en secondes)
|
||||||
|
_DEFAULT_TIMEOUT = 5.0
|
||||||
|
|
||||||
|
# Masquer la fenêtre console lors du spawn de lea_uia.exe sur Windows.
|
||||||
|
# Sans ce flag, chaque appel (à chaque clic utilisateur pendant
|
||||||
|
# l'enregistrement) fait apparaître une fenêtre cmd noire brièvement
|
||||||
|
# visible à l'écran → ralentit la souris et pollue les screenshots
|
||||||
|
# capturés (le VLM peut "voir" le chemin lea_uia.exe comme texte cliqué).
|
||||||
|
#
|
||||||
|
# La valeur 0x08000000 correspond à CREATE_NO_WINDOW défini dans
|
||||||
|
# l'API Windows. Sur Linux/Mac, la valeur est 0 et `creationflags`
|
||||||
|
# est ignoré. getattr() gère le cas où Python expose déjà la constante
|
||||||
|
# sur Windows.
|
||||||
|
if platform.system() == "Windows":
|
||||||
|
_SUBPROCESS_CREATION_FLAGS = getattr(subprocess, "CREATE_NO_WINDOW", 0x08000000)
|
||||||
|
else:
|
||||||
|
_SUBPROCESS_CREATION_FLAGS = 0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class UiaElement:
|
||||||
|
"""Représentation Python d'un élément UIA."""
|
||||||
|
name: str = ""
|
||||||
|
control_type: str = ""
|
||||||
|
class_name: str = ""
|
||||||
|
automation_id: str = ""
|
||||||
|
bounding_rect: Tuple[int, int, int, int] = (0, 0, 0, 0)
|
||||||
|
is_enabled: bool = False
|
||||||
|
is_offscreen: bool = True
|
||||||
|
parent_path: List[Dict[str, str]] = field(default_factory=list)
|
||||||
|
process_name: str = ""
|
||||||
|
|
||||||
|
def center(self) -> Tuple[int, int]:
|
||||||
|
"""Retourner le centre du rectangle (pixels)."""
|
||||||
|
x1, y1, x2, y2 = self.bounding_rect
|
||||||
|
return ((x1 + x2) // 2, (y1 + y2) // 2)
|
||||||
|
|
||||||
|
def width(self) -> int:
|
||||||
|
return self.bounding_rect[2] - self.bounding_rect[0]
|
||||||
|
|
||||||
|
def height(self) -> int:
|
||||||
|
return self.bounding_rect[3] - self.bounding_rect[1]
|
||||||
|
|
||||||
|
def is_clickable(self) -> bool:
|
||||||
|
"""Peut-on cliquer dessus ?"""
|
||||||
|
return (
|
||||||
|
self.is_enabled
|
||||||
|
and not self.is_offscreen
|
||||||
|
and self.width() > 0
|
||||||
|
and self.height() > 0
|
||||||
|
)
|
||||||
|
|
||||||
|
def path_signature(self) -> str:
|
||||||
|
"""Signature du chemin parent (pour retrouver l'élément)."""
|
||||||
|
parts = [f"{p['control_type']}[{p['name']}]" for p in self.parent_path if p.get("name")]
|
||||||
|
parts.append(f"{self.control_type}[{self.name}]")
|
||||||
|
return " > ".join(parts)
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"name": self.name,
|
||||||
|
"control_type": self.control_type,
|
||||||
|
"class_name": self.class_name,
|
||||||
|
"automation_id": self.automation_id,
|
||||||
|
"bounding_rect": list(self.bounding_rect),
|
||||||
|
"is_enabled": self.is_enabled,
|
||||||
|
"is_offscreen": self.is_offscreen,
|
||||||
|
"parent_path": self.parent_path,
|
||||||
|
"process_name": self.process_name,
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, d: Dict[str, Any]) -> "UiaElement":
|
||||||
|
rect = d.get("bounding_rect", [0, 0, 0, 0])
|
||||||
|
if isinstance(rect, list) and len(rect) >= 4:
|
||||||
|
rect = tuple(rect[:4])
|
||||||
|
else:
|
||||||
|
rect = (0, 0, 0, 0)
|
||||||
|
return cls(
|
||||||
|
name=d.get("name", ""),
|
||||||
|
control_type=d.get("control_type", ""),
|
||||||
|
class_name=d.get("class_name", ""),
|
||||||
|
automation_id=d.get("automation_id", ""),
|
||||||
|
bounding_rect=rect,
|
||||||
|
is_enabled=d.get("is_enabled", False),
|
||||||
|
is_offscreen=d.get("is_offscreen", True),
|
||||||
|
parent_path=d.get("parent_path", []),
|
||||||
|
process_name=d.get("process_name", ""),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class UIAHelper:
|
||||||
|
"""Wrapper Python pour lea_uia.exe."""
|
||||||
|
|
||||||
|
def __init__(self, helper_path: str = "", timeout: float = _DEFAULT_TIMEOUT):
|
||||||
|
self._helper_path = helper_path or self._find_helper()
|
||||||
|
self._timeout = timeout
|
||||||
|
self._available = self._check_available()
|
||||||
|
|
||||||
|
def _find_helper(self) -> str:
|
||||||
|
"""Trouver lea_uia.exe dans les emplacements standards."""
|
||||||
|
candidates = [
|
||||||
|
r"C:\Lea\helpers\lea_uia.exe",
|
||||||
|
os.path.join(os.path.dirname(__file__), "..", "..",
|
||||||
|
"agent_rust", "lea_uia", "target",
|
||||||
|
"x86_64-pc-windows-gnu", "release", "lea_uia.exe"),
|
||||||
|
"./helpers/lea_uia.exe",
|
||||||
|
"lea_uia.exe",
|
||||||
|
]
|
||||||
|
for path in candidates:
|
||||||
|
if os.path.isfile(path):
|
||||||
|
return os.path.abspath(path)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def _check_available(self) -> bool:
|
||||||
|
"""Vérifier que le helper est utilisable (Windows + binaire + health OK)."""
|
||||||
|
if platform.system() != "Windows":
|
||||||
|
logger.debug("UIAHelper: Linux/Mac — helper désactivé")
|
||||||
|
return False
|
||||||
|
if not self._helper_path:
|
||||||
|
logger.debug("UIAHelper: lea_uia.exe introuvable")
|
||||||
|
return False
|
||||||
|
if not os.path.isfile(self._helper_path):
|
||||||
|
logger.debug(f"UIAHelper: chemin invalide {self._helper_path}")
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
@property
|
||||||
|
def available(self) -> bool:
|
||||||
|
return self._available
|
||||||
|
|
||||||
|
@property
|
||||||
|
def helper_path(self) -> str:
|
||||||
|
return self._helper_path
|
||||||
|
|
||||||
|
def _run(self, args: List[str]) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Exécuter lea_uia.exe avec les arguments et parser le JSON."""
|
||||||
|
if not self._available:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
[self._helper_path] + args,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=self._timeout,
|
||||||
|
encoding="utf-8",
|
||||||
|
errors="replace",
|
||||||
|
creationflags=_SUBPROCESS_CREATION_FLAGS,
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
logger.debug(
|
||||||
|
f"UIAHelper: exit code {result.returncode}, "
|
||||||
|
f"stderr: {result.stderr[:200]}"
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
output = result.stdout.strip()
|
||||||
|
if not output:
|
||||||
|
return None
|
||||||
|
return json.loads(output)
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
logger.debug(f"UIAHelper: timeout ({self._timeout}s) sur {args}")
|
||||||
|
return None
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.debug(f"UIAHelper: JSON invalide — {e}")
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"UIAHelper: erreur {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def health(self) -> bool:
|
||||||
|
"""Vérifier que UIA répond."""
|
||||||
|
data = self._run(["health"])
|
||||||
|
return data is not None and data.get("status") == "ok"
|
||||||
|
|
||||||
|
def query_at(
|
||||||
|
self,
|
||||||
|
x: int,
|
||||||
|
y: int,
|
||||||
|
with_parents: bool = True,
|
||||||
|
) -> Optional[UiaElement]:
|
||||||
|
"""Récupérer l'élément UIA à une position écran.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
x, y: Coordonnées pixel absolues
|
||||||
|
with_parents: Inclure la hiérarchie des parents
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
UiaElement si trouvé, None sinon (pas d'élément ou UIA indispo)
|
||||||
|
"""
|
||||||
|
args = ["query", "--x", str(x), "--y", str(y)]
|
||||||
|
if not with_parents:
|
||||||
|
args.append("--with-parents=false")
|
||||||
|
|
||||||
|
data = self._run(args)
|
||||||
|
if not data or data.get("status") != "ok":
|
||||||
|
return None
|
||||||
|
|
||||||
|
elem_data = data.get("element")
|
||||||
|
if not elem_data:
|
||||||
|
return None
|
||||||
|
return UiaElement.from_dict(elem_data)
|
||||||
|
|
||||||
|
def find_by_name(
|
||||||
|
self,
|
||||||
|
name: str,
|
||||||
|
control_type: Optional[str] = None,
|
||||||
|
automation_id: Optional[str] = None,
|
||||||
|
window: Optional[str] = None,
|
||||||
|
timeout_ms: int = 2000,
|
||||||
|
) -> Optional[UiaElement]:
|
||||||
|
"""Rechercher un élément par son nom (+ filtres optionnels).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name: Nom exact de l'élément
|
||||||
|
control_type: Type de contrôle (Button, Edit, MenuItem...)
|
||||||
|
automation_id: ID d'automation
|
||||||
|
window: Restreindre à une fenêtre spécifique
|
||||||
|
timeout_ms: Timeout de recherche en millisecondes
|
||||||
|
"""
|
||||||
|
args = ["find", "--name", name, "--timeout-ms", str(timeout_ms)]
|
||||||
|
if control_type:
|
||||||
|
args.extend(["--control-type", control_type])
|
||||||
|
if automation_id:
|
||||||
|
args.extend(["--automation-id", automation_id])
|
||||||
|
if window:
|
||||||
|
args.extend(["--window", window])
|
||||||
|
|
||||||
|
data = self._run(args)
|
||||||
|
if not data or data.get("status") != "ok":
|
||||||
|
return None
|
||||||
|
|
||||||
|
elem_data = data.get("element")
|
||||||
|
if not elem_data:
|
||||||
|
return None
|
||||||
|
return UiaElement.from_dict(elem_data)
|
||||||
|
|
||||||
|
def capture_focused(self, max_depth: int = 3) -> Optional[UiaElement]:
|
||||||
|
"""Capturer l'élément ayant le focus + son contexte."""
|
||||||
|
data = self._run(["capture", "--max-depth", str(max_depth)])
|
||||||
|
if not data or data.get("status") != "ok":
|
||||||
|
return None
|
||||||
|
|
||||||
|
elem_data = data.get("element")
|
||||||
|
if not elem_data:
|
||||||
|
return None
|
||||||
|
return UiaElement.from_dict(elem_data)
|
||||||
|
|
||||||
|
|
||||||
|
# Instance globale partagée (singleton léger)
|
||||||
|
_SHARED_HELPER: Optional[UIAHelper] = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_shared_helper() -> UIAHelper:
|
||||||
|
"""Retourner une instance partagée de UIAHelper."""
|
||||||
|
global _SHARED_HELPER
|
||||||
|
if _SHARED_HELPER is None:
|
||||||
|
_SHARED_HELPER = UIAHelper()
|
||||||
|
return _SHARED_HELPER
|
||||||
278
core/workflow/workflow_ir.py
Normal file
278
core/workflow/workflow_ir.py
Normal file
@@ -0,0 +1,278 @@
|
|||||||
|
# core/workflow/workflow_ir.py
|
||||||
|
"""
|
||||||
|
WorkflowIR — Représentation Intermédiaire d'un workflow.
|
||||||
|
|
||||||
|
C'est la CONNAISSANCE que Léa a acquise en observant un utilisateur.
|
||||||
|
Pas les clics bruts (RawTrace), pas le plan d'exécution (ExecutionPlan).
|
||||||
|
C'est ce que Léa a COMPRIS.
|
||||||
|
|
||||||
|
Format générique — fonctionne pour n'importe quel métier :
|
||||||
|
- TIM qui code des dossiers patients
|
||||||
|
- Comptable qui saisit des factures
|
||||||
|
- RH qui édite des fiches de paie
|
||||||
|
- Logisticien qui gère des stocks
|
||||||
|
|
||||||
|
Le domaine métier est une couche par-dessus (domain_context),
|
||||||
|
pas dans le WorkflowIR lui-même.
|
||||||
|
|
||||||
|
Cycle de vie :
|
||||||
|
RawTrace (capture) → WorkflowIR (compréhension) → ExecutionPlan (exécution)
|
||||||
|
|
||||||
|
Le WorkflowIR est :
|
||||||
|
- versionné (chaque recompilation incrémente la version)
|
||||||
|
- indépendant de la résolution d'écran
|
||||||
|
- indépendant du poste cible
|
||||||
|
- paramétrable (variables substituables)
|
||||||
|
- enrichi par l'apprentissage (chaque replay améliore le IR)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Structures de données
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Variable:
|
||||||
|
"""Variable substituable dans un workflow."""
|
||||||
|
name: str # Identifiant (ex: "patient", "facture_num")
|
||||||
|
description: str = "" # Description humaine
|
||||||
|
source: str = "user" # Origine : "user", "screen", "file", "previous_step"
|
||||||
|
default: str = "" # Valeur par défaut
|
||||||
|
required: bool = True
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"name": self.name,
|
||||||
|
"description": self.description,
|
||||||
|
"source": self.source,
|
||||||
|
"default": self.default,
|
||||||
|
"required": self.required,
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, d: Dict) -> "Variable":
|
||||||
|
return cls(**{k: v for k, v in d.items() if k in cls.__dataclass_fields__})
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Action:
|
||||||
|
"""Action élémentaire dans une étape."""
|
||||||
|
type: str # click, type, key_combo, wait, scroll
|
||||||
|
target: str = "" # Description de la cible ("bouton Enregistrer")
|
||||||
|
text: str = "" # Texte à taper (pour type)
|
||||||
|
keys: List[str] = field(default_factory=list) # Touches (pour key_combo)
|
||||||
|
duration_ms: int = 0 # Durée (pour wait)
|
||||||
|
variable: bool = False # True si le texte contient une variable {var}
|
||||||
|
anchor_hint: str = "" # Indice visuel pour aider la résolution
|
||||||
|
# Contrôle strict des étapes — l'action ne peut s'exécuter que si la fenêtre
|
||||||
|
# active correspond à `expected_window_before`, et ne peut passer à la
|
||||||
|
# suivante que si la fenêtre résultante correspond à `expected_window_after`.
|
||||||
|
# Ces champs sont extraits par l'IRBuilder depuis les événements bruts.
|
||||||
|
expected_window_before: str = ""
|
||||||
|
expected_window_after: str = ""
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
d = {"type": self.type}
|
||||||
|
if self.target:
|
||||||
|
d["target"] = self.target
|
||||||
|
if self.text:
|
||||||
|
d["text"] = self.text
|
||||||
|
if self.keys:
|
||||||
|
d["keys"] = self.keys
|
||||||
|
if self.duration_ms:
|
||||||
|
d["duration_ms"] = self.duration_ms
|
||||||
|
if self.variable:
|
||||||
|
d["variable"] = True
|
||||||
|
if self.anchor_hint:
|
||||||
|
d["anchor_hint"] = self.anchor_hint
|
||||||
|
if self.expected_window_before:
|
||||||
|
d["expected_window_before"] = self.expected_window_before
|
||||||
|
if self.expected_window_after:
|
||||||
|
d["expected_window_after"] = self.expected_window_after
|
||||||
|
return d
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, d: Dict) -> "Action":
|
||||||
|
return cls(**{k: v for k, v in d.items() if k in cls.__dataclass_fields__})
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Step:
|
||||||
|
"""Étape logique d'un workflow — une intention métier."""
|
||||||
|
step_id: str
|
||||||
|
intent: str # "Ouvrir le dossier", "Saisir le code"
|
||||||
|
precondition: str = "" # "L'application est sur l'écran de liste"
|
||||||
|
postcondition: str = "" # "Le dossier est affiché"
|
||||||
|
actions: List[Action] = field(default_factory=list)
|
||||||
|
is_optional: bool = False # Étape optionnelle (peut être sautée)
|
||||||
|
is_loop: bool = False # Étape répétée (pour chaque élément)
|
||||||
|
loop_variable: str = "" # Variable de boucle
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
d = {
|
||||||
|
"step_id": self.step_id,
|
||||||
|
"intent": self.intent,
|
||||||
|
"actions": [a.to_dict() for a in self.actions],
|
||||||
|
}
|
||||||
|
if self.precondition:
|
||||||
|
d["precondition"] = self.precondition
|
||||||
|
if self.postcondition:
|
||||||
|
d["postcondition"] = self.postcondition
|
||||||
|
if self.is_optional:
|
||||||
|
d["is_optional"] = True
|
||||||
|
if self.is_loop:
|
||||||
|
d["is_loop"] = True
|
||||||
|
d["loop_variable"] = self.loop_variable
|
||||||
|
return d
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, d: Dict) -> "Step":
|
||||||
|
actions = [Action.from_dict(a) for a in d.get("actions", [])]
|
||||||
|
return cls(
|
||||||
|
step_id=d["step_id"],
|
||||||
|
intent=d.get("intent", ""),
|
||||||
|
precondition=d.get("precondition", ""),
|
||||||
|
postcondition=d.get("postcondition", ""),
|
||||||
|
actions=actions,
|
||||||
|
is_optional=d.get("is_optional", False),
|
||||||
|
is_loop=d.get("is_loop", False),
|
||||||
|
loop_variable=d.get("loop_variable", ""),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class WorkflowIR:
|
||||||
|
"""Représentation Intermédiaire d'un workflow — la connaissance compilée.
|
||||||
|
|
||||||
|
C'est ce que Léa a compris en observant l'utilisateur.
|
||||||
|
Indépendant du poste, de la résolution, du runtime.
|
||||||
|
"""
|
||||||
|
workflow_id: str
|
||||||
|
version: int = 1
|
||||||
|
name: str = ""
|
||||||
|
description: str = ""
|
||||||
|
domain: str = "generic" # Domaine métier (tim_codage, compta, rh, stocks...)
|
||||||
|
learned_from: str = "" # session_id source
|
||||||
|
created_at: float = 0.0
|
||||||
|
updated_at: float = 0.0
|
||||||
|
|
||||||
|
# Contenu
|
||||||
|
variables: List[Variable] = field(default_factory=list)
|
||||||
|
steps: List[Step] = field(default_factory=list)
|
||||||
|
|
||||||
|
# Métadonnées d'apprentissage
|
||||||
|
replay_count: int = 0 # Nombre de replays effectués
|
||||||
|
success_rate: float = 0.0 # Taux de succès moyen
|
||||||
|
last_replay_at: float = 0.0
|
||||||
|
|
||||||
|
# Applications utilisées (détectées lors de l'apprentissage)
|
||||||
|
applications: List[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"workflow_id": self.workflow_id,
|
||||||
|
"version": self.version,
|
||||||
|
"name": self.name,
|
||||||
|
"description": self.description,
|
||||||
|
"domain": self.domain,
|
||||||
|
"learned_from": self.learned_from,
|
||||||
|
"created_at": self.created_at,
|
||||||
|
"updated_at": self.updated_at,
|
||||||
|
"variables": [v.to_dict() for v in self.variables],
|
||||||
|
"steps": [s.to_dict() for s in self.steps],
|
||||||
|
"replay_count": self.replay_count,
|
||||||
|
"success_rate": round(self.success_rate, 3),
|
||||||
|
"last_replay_at": self.last_replay_at,
|
||||||
|
"applications": self.applications,
|
||||||
|
}
|
||||||
|
|
||||||
|
def to_json(self, indent: int = 2) -> str:
|
||||||
|
return json.dumps(self.to_dict(), ensure_ascii=False, indent=indent)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, d: Dict) -> "WorkflowIR":
|
||||||
|
variables = [Variable.from_dict(v) for v in d.get("variables", [])]
|
||||||
|
steps = [Step.from_dict(s) for s in d.get("steps", [])]
|
||||||
|
return cls(
|
||||||
|
workflow_id=d["workflow_id"],
|
||||||
|
version=d.get("version", 1),
|
||||||
|
name=d.get("name", ""),
|
||||||
|
description=d.get("description", ""),
|
||||||
|
domain=d.get("domain", "generic"),
|
||||||
|
learned_from=d.get("learned_from", ""),
|
||||||
|
created_at=d.get("created_at", 0),
|
||||||
|
updated_at=d.get("updated_at", 0),
|
||||||
|
variables=variables,
|
||||||
|
steps=steps,
|
||||||
|
replay_count=d.get("replay_count", 0),
|
||||||
|
success_rate=d.get("success_rate", 0),
|
||||||
|
last_replay_at=d.get("last_replay_at", 0),
|
||||||
|
applications=d.get("applications", []),
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_json(cls, json_str: str) -> "WorkflowIR":
|
||||||
|
return cls.from_dict(json.loads(json_str))
|
||||||
|
|
||||||
|
def save(self, directory: str) -> Path:
|
||||||
|
"""Sauvegarder le WorkflowIR dans un fichier JSON."""
|
||||||
|
dir_path = Path(directory)
|
||||||
|
dir_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
file_path = dir_path / f"{self.workflow_id}_v{self.version}.json"
|
||||||
|
file_path.write_text(self.to_json(), encoding="utf-8")
|
||||||
|
logger.info(f"WorkflowIR sauvegardé : {file_path}")
|
||||||
|
return file_path
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def load(cls, file_path: str) -> "WorkflowIR":
|
||||||
|
"""Charger un WorkflowIR depuis un fichier JSON."""
|
||||||
|
return cls.from_json(Path(file_path).read_text(encoding="utf-8"))
|
||||||
|
|
||||||
|
def increment_version(self) -> "WorkflowIR":
|
||||||
|
"""Créer une nouvelle version du workflow (après recompilation)."""
|
||||||
|
import copy
|
||||||
|
new = copy.deepcopy(self)
|
||||||
|
new.version += 1
|
||||||
|
new.updated_at = time.time()
|
||||||
|
return new
|
||||||
|
|
||||||
|
def add_step(self, intent: str, actions: List[Dict] = None, **kwargs) -> Step:
|
||||||
|
"""Ajouter une étape au workflow."""
|
||||||
|
step = Step(
|
||||||
|
step_id=f"s{len(self.steps) + 1}",
|
||||||
|
intent=intent,
|
||||||
|
actions=[Action.from_dict(a) for a in (actions or [])],
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
self.steps.append(step)
|
||||||
|
return step
|
||||||
|
|
||||||
|
def add_variable(self, name: str, **kwargs) -> Variable:
|
||||||
|
"""Ajouter une variable au workflow."""
|
||||||
|
var = Variable(name=name, **kwargs)
|
||||||
|
self.variables.append(var)
|
||||||
|
return var
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def new(name: str, domain: str = "generic", learned_from: str = "") -> "WorkflowIR":
|
||||||
|
"""Créer un nouveau WorkflowIR vide."""
|
||||||
|
return WorkflowIR(
|
||||||
|
workflow_id=f"wf_{uuid.uuid4().hex[:12]}",
|
||||||
|
version=1,
|
||||||
|
name=name,
|
||||||
|
domain=domain,
|
||||||
|
learned_from=learned_from,
|
||||||
|
created_at=time.time(),
|
||||||
|
updated_at=time.time(),
|
||||||
|
)
|
||||||
@@ -1,100 +0,0 @@
|
|||||||
{
|
|
||||||
"workflow_id": "demo_calculator",
|
|
||||||
"name": "Demo - Calculatrice",
|
|
||||||
"description": "Ouvre la calculatrice et effectue un calcul simple",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"created_at": "2024-11-29T10:00:00",
|
|
||||||
"updated_at": "2024-11-29T10:00:00",
|
|
||||||
"learning_state": "OBSERVATION",
|
|
||||||
"execution_count": 0,
|
|
||||||
"entry_nodes": ["start"],
|
|
||||||
"end_nodes": ["end"],
|
|
||||||
"nodes": [
|
|
||||||
{
|
|
||||||
"node_id": "start",
|
|
||||||
"name": "Desktop",
|
|
||||||
"description": "Écran de départ",
|
|
||||||
"template": {
|
|
||||||
"title_pattern": ".*"
|
|
||||||
},
|
|
||||||
"is_entry": true,
|
|
||||||
"is_end": false,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"node_id": "calc_open",
|
|
||||||
"name": "Calculatrice ouverte",
|
|
||||||
"description": "La calculatrice est visible",
|
|
||||||
"template": {
|
|
||||||
"title_pattern": ".*(calc|gnome-calculator).*"
|
|
||||||
},
|
|
||||||
"is_entry": false,
|
|
||||||
"is_end": false,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"node_id": "end",
|
|
||||||
"name": "Calcul effectué",
|
|
||||||
"description": "Le calcul est affiché",
|
|
||||||
"template": {
|
|
||||||
"title_pattern": ".*"
|
|
||||||
},
|
|
||||||
"is_entry": false,
|
|
||||||
"is_end": true,
|
|
||||||
"metadata": {}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"edges": [
|
|
||||||
{
|
|
||||||
"edge_id": "open_calc",
|
|
||||||
"source_node": "start",
|
|
||||||
"target_node": "calc_open",
|
|
||||||
"action": {
|
|
||||||
"type": "compound",
|
|
||||||
"target": {
|
|
||||||
"by_role": null,
|
|
||||||
"selection_policy": "first"
|
|
||||||
},
|
|
||||||
"parameters": {
|
|
||||||
"steps": [
|
|
||||||
{"type": "key_press", "key": "super"},
|
|
||||||
{"type": "wait", "duration_ms": 500},
|
|
||||||
{"type": "text_input", "text": "calculator"},
|
|
||||||
{"type": "key_press", "key": "Return"}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"constraints": {
|
|
||||||
"timeout_ms": 5000
|
|
||||||
},
|
|
||||||
"confidence_threshold": 0.7
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"edge_id": "do_calc",
|
|
||||||
"source_node": "calc_open",
|
|
||||||
"target_node": "end",
|
|
||||||
"action": {
|
|
||||||
"type": "text_input",
|
|
||||||
"target": {
|
|
||||||
"by_role": "button",
|
|
||||||
"selection_policy": "first"
|
|
||||||
},
|
|
||||||
"parameters": {
|
|
||||||
"text": "${expression}=",
|
|
||||||
"defaults": {
|
|
||||||
"expression": "2+2"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"constraints": {
|
|
||||||
"timeout_ms": 3000
|
|
||||||
},
|
|
||||||
"confidence_threshold": 0.8
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"author": "RPA Vision V3",
|
|
||||||
"tags": ["demo", "calculator"],
|
|
||||||
"difficulty": "easy"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -146,8 +146,14 @@ REQUIRED_FILES=(
|
|||||||
"agent_v1/core/__init__.py"
|
"agent_v1/core/__init__.py"
|
||||||
"agent_v1/core/captor.py"
|
"agent_v1/core/captor.py"
|
||||||
"agent_v1/core/executor.py"
|
"agent_v1/core/executor.py"
|
||||||
|
"agent_v1/core/grounding.py"
|
||||||
|
"agent_v1/core/policy.py"
|
||||||
|
"agent_v1/core/recovery.py"
|
||||||
|
"agent_v1/core/system_dialog_guard.py"
|
||||||
|
"agent_v1/core/uia_helper.py"
|
||||||
"agent_v1/network/__init__.py"
|
"agent_v1/network/__init__.py"
|
||||||
"agent_v1/network/streamer.py"
|
"agent_v1/network/streamer.py"
|
||||||
|
"agent_v1/network/persistent_buffer.py"
|
||||||
"agent_v1/session/__init__.py"
|
"agent_v1/session/__init__.py"
|
||||||
"agent_v1/session/storage.py"
|
"agent_v1/session/storage.py"
|
||||||
"agent_v1/ui/__init__.py"
|
"agent_v1/ui/__init__.py"
|
||||||
@@ -156,6 +162,8 @@ REQUIRED_FILES=(
|
|||||||
"agent_v1/ui/chat_window.py"
|
"agent_v1/ui/chat_window.py"
|
||||||
"agent_v1/ui/capture_server.py"
|
"agent_v1/ui/capture_server.py"
|
||||||
"agent_v1/ui/notifications.py"
|
"agent_v1/ui/notifications.py"
|
||||||
|
"agent_v1/ui/activity_panel.py"
|
||||||
|
"agent_v1/ui/messages.py"
|
||||||
"agent_v1/vision/__init__.py"
|
"agent_v1/vision/__init__.py"
|
||||||
"agent_v1/vision/capturer.py"
|
"agent_v1/vision/capturer.py"
|
||||||
"agent_v1/vision/blur_sensitive.py"
|
"agent_v1/vision/blur_sensitive.py"
|
||||||
|
|||||||
19
deploy/configs/config_dev_windows.txt
Normal file
19
deploy/configs/config_dev_windows.txt
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
# ============================================================
|
||||||
|
# Configuration Lea — Poste Dev / Chef de projet (Windows)
|
||||||
|
# ============================================================
|
||||||
|
#
|
||||||
|
# Poste : PC dev chef de projet
|
||||||
|
# Objectif : enrichir connaissance Windows, evaluer robustesse
|
||||||
|
# Serveur : 192.168.1.40:5005 (RTX 5070)
|
||||||
|
#
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
RPA_SERVER_URL=http://192.168.1.40:5005/api/v1
|
||||||
|
RPA_API_TOKEN=86031addb338e449fccdb1a983f61807aec15d42d482b9c7748ad607dc23caab
|
||||||
|
RPA_MACHINE_ID=DEV_WINDOWS
|
||||||
|
RPA_USER_LABEL=Dev
|
||||||
|
|
||||||
|
# --- Parametres avances (ne pas modifier sauf indication) ---
|
||||||
|
# RPA_OLLAMA_HOST=localhost
|
||||||
|
RPA_BLUR_SENSITIVE=false
|
||||||
|
RPA_LOG_RETENTION_DAYS=180
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user