Compare commits
50 Commits
v3.0
...
c7b0649716
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c7b0649716 | ||
|
|
2bfcfa4535 | ||
|
|
b808e48b1f | ||
|
|
78ee962918 | ||
|
|
c8a3618e27 | ||
|
|
9ca277a63f | ||
|
|
8c7b6e5696 | ||
|
|
af4ffa189a | ||
|
|
42f571d496 | ||
|
|
36737cfe9d | ||
|
|
93ef93e563 | ||
|
|
376e4a88b3 | ||
|
|
bb4ed2a75d | ||
|
|
f7b8cddd2b | ||
|
|
a9a99953dd | ||
|
|
aee64f54b1 | ||
|
|
c77844fa9a | ||
|
|
013fe071a2 | ||
|
|
203dc00d53 | ||
|
|
e9a028134a | ||
|
|
01bba7bc6c | ||
|
|
d5285de99c | ||
|
|
33c198b827 | ||
|
|
816b37af98 | ||
|
|
d82aad984f | ||
|
|
057c37131f | ||
|
|
9bcce3fc68 | ||
|
|
f96f6322ec | ||
|
|
02ee2d7b5b | ||
|
|
47993e2ee9 | ||
|
|
7cc03f6f10 | ||
|
|
a21f1ea9fa | ||
|
|
9188bd7df1 | ||
|
|
f82753debe | ||
|
|
b92cb9db03 | ||
|
|
e66629ce1a | ||
|
|
cecdf417b7 | ||
|
|
56e3cc052a | ||
|
|
332366b58c | ||
|
|
ac9c207474 | ||
|
|
f85d56ac05 | ||
|
|
172167f6c0 | ||
|
|
42d49dd8bd | ||
|
|
f541bb8ce4 | ||
|
|
a6eb4c168f | ||
|
|
f6ad5ff2b2 | ||
|
|
2ac781343a | ||
|
|
bffcfb2db3 | ||
|
|
cc673755f7 | ||
|
|
4509038bf0 |
@@ -30,7 +30,9 @@ DASHBOARD_PORT=5001
|
||||
CLIP_MODEL=ViT-B-32
|
||||
CLIP_PRETRAINED=openai
|
||||
CLIP_DEVICE=cpu # cpu or cuda
|
||||
VLM_MODEL=qwen3-vl:8b
|
||||
RPA_VLM_MODEL=gemma4:latest # gemma4:latest (défaut), qwen3-vl:8b, ui-tars (fallback)
|
||||
VLM_MODEL=gemma4:latest # alias de compatibilité
|
||||
# VLM_ALLOW_CLOUD=false # true pour activer les APIs cloud en fallback (OpenAI, Gemini, Anthropic)
|
||||
VLM_ENDPOINT=http://localhost:11434
|
||||
OWL_MODEL=google/owlv2-base-patch16-ensemble
|
||||
OWL_CONFIDENCE_THRESHOLD=0.1
|
||||
|
||||
207
.gitea/workflows/security-audit.yml
Normal file
207
.gitea/workflows/security-audit.yml
Normal file
@@ -0,0 +1,207 @@
|
||||
# ------------------------------------------------------------------
|
||||
# Audit sécurité — bandit + pip-audit + scan secrets
|
||||
# ------------------------------------------------------------------
|
||||
# Jamais bloquant : on reporte les warnings, on ne casse pas la CI.
|
||||
# Utile pour détecter les dérives progressives (nouveaux CVE, secrets
|
||||
# oubliés dans un commit, patterns risqués).
|
||||
#
|
||||
# Fréquence : à chaque push sur main + hebdo (cron).
|
||||
# ------------------------------------------------------------------
|
||||
name: security-audit
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
schedule:
|
||||
# Tous les lundis à 6h UTC (8h Paris hiver, 7h Paris été).
|
||||
- cron: "0 6 * * 1"
|
||||
workflow_dispatch: {}
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
# ----------------------------------------------------------------
|
||||
# Job 1 — bandit (bonnes pratiques sécu Python)
|
||||
# ----------------------------------------------------------------
|
||||
bandit:
|
||||
name: Bandit (scan statique)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
continue-on-error: true
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Python 3.12
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
cache: "pip"
|
||||
|
||||
- name: Installation bandit
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install "bandit[toml]==1.7.10"
|
||||
|
||||
- name: Scan bandit sur core/
|
||||
run: |
|
||||
# -ll : niveau LOW minimum (remonte tout)
|
||||
# -ii : confiance LOW minimum
|
||||
# --skip B101 : on ignore les asserts (usuels en tests/validation)
|
||||
bandit -r core/ \
|
||||
--skip B101,B404,B603 \
|
||||
--format txt \
|
||||
--exit-zero \
|
||||
--output bandit-report.txt
|
||||
echo "=== RAPPORT BANDIT ==="
|
||||
cat bandit-report.txt
|
||||
|
||||
- name: Upload rapport bandit
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: bandit-report
|
||||
path: bandit-report.txt
|
||||
retention-days: 30
|
||||
if-no-files-found: ignore
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Job 2 — pip-audit (CVE sur requirements)
|
||||
# ----------------------------------------------------------------
|
||||
pip-audit:
|
||||
name: pip-audit (CVE dépendances)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
continue-on-error: true
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Python 3.12
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
cache: "pip"
|
||||
|
||||
- name: Installation pip-audit
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install "pip-audit==2.7.3"
|
||||
|
||||
- name: Audit CVE sur requirements-ci.txt
|
||||
run: |
|
||||
if [ -f requirements-ci.txt ]; then
|
||||
pip-audit -r requirements-ci.txt \
|
||||
--format json \
|
||||
--output pip-audit-ci.json \
|
||||
--progress-spinner off \
|
||||
--disable-pip || echo "::warning::CVE détectées dans requirements-ci.txt"
|
||||
echo "=== RAPPORT pip-audit (CI) ==="
|
||||
cat pip-audit-ci.json || true
|
||||
else
|
||||
echo "::notice::requirements-ci.txt absent — skip"
|
||||
fi
|
||||
|
||||
- name: Audit CVE sur requirements.txt (best-effort)
|
||||
run: |
|
||||
# Timeout généreux car requirements.txt est massif (torch, CUDA).
|
||||
timeout 120 pip-audit -r requirements.txt \
|
||||
--format json \
|
||||
--output pip-audit-full.json \
|
||||
--progress-spinner off \
|
||||
--disable-pip 2>&1 | head -200 || \
|
||||
echo "::warning::pip-audit sur requirements.txt a timeout ou échoué (non bloquant)"
|
||||
|
||||
- name: Upload rapports pip-audit
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: pip-audit-reports
|
||||
path: |
|
||||
pip-audit-ci.json
|
||||
pip-audit-full.json
|
||||
retention-days: 30
|
||||
if-no-files-found: ignore
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Job 3 — Scan secrets en clair (grep simple)
|
||||
# ----------------------------------------------------------------
|
||||
# Patterns recherchés : clés API Anthropic (sk-ant-), OpenAI (sk-),
|
||||
# Google (AIzaSy), AWS (AKIA), tokens Hugging Face (hf_).
|
||||
# Ne cherche QUE dans les fichiers trackés (pas .env, pas .venv).
|
||||
# ----------------------------------------------------------------
|
||||
secrets-scan:
|
||||
name: Scan secrets (grep)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 3
|
||||
continue-on-error: true
|
||||
|
||||
steps:
|
||||
- name: Checkout (historique complet)
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Scan patterns de secrets
|
||||
run: |
|
||||
# Chemins exclus : venvs, caches, data, htmlcov, models.
|
||||
EXCLUDES='--exclude-dir=.venv --exclude-dir=venv_v3 --exclude-dir=.git \
|
||||
--exclude-dir=node_modules --exclude-dir=htmlcov --exclude-dir=models \
|
||||
--exclude-dir=data --exclude-dir=__pycache__ --exclude-dir=.pytest_cache \
|
||||
--exclude=*.lock --exclude=*.log --exclude=*.md'
|
||||
|
||||
echo "=== Recherche de secrets potentiels ==="
|
||||
FOUND=0
|
||||
|
||||
# Anthropic
|
||||
if grep -rnI $EXCLUDES -E 'sk-ant-[a-zA-Z0-9_-]{20,}' . 2>/dev/null; then
|
||||
echo "::warning::Clé Anthropic potentielle détectée"
|
||||
FOUND=1
|
||||
fi
|
||||
|
||||
# OpenAI
|
||||
if grep -rnI $EXCLUDES -E 'sk-proj-[a-zA-Z0-9_-]{20,}|sk-[a-zA-Z0-9]{40,}' . 2>/dev/null; then
|
||||
echo "::warning::Clé OpenAI potentielle détectée"
|
||||
FOUND=1
|
||||
fi
|
||||
|
||||
# Google Cloud / API Keys
|
||||
if grep -rnI $EXCLUDES -E 'AIzaSy[a-zA-Z0-9_-]{33}' . 2>/dev/null; then
|
||||
echo "::warning::Clé Google API potentielle détectée"
|
||||
FOUND=1
|
||||
fi
|
||||
|
||||
# AWS
|
||||
if grep -rnI $EXCLUDES -E 'AKIA[0-9A-Z]{16}' . 2>/dev/null; then
|
||||
echo "::warning::Clé AWS potentielle détectée"
|
||||
FOUND=1
|
||||
fi
|
||||
|
||||
# Hugging Face
|
||||
if grep -rnI $EXCLUDES -E 'hf_[a-zA-Z0-9]{30,}' . 2>/dev/null; then
|
||||
echo "::warning::Token Hugging Face potentiel détecté"
|
||||
FOUND=1
|
||||
fi
|
||||
|
||||
# Mots-clés suspects à côté d'assignations
|
||||
if grep -rnI $EXCLUDES -E '(password|passwd|secret|api_key|apikey|token)\s*=\s*["\x27][a-zA-Z0-9_\-!@#\$%]{12,}["\x27]' . 2>/dev/null \
|
||||
| grep -viE '(example|dummy|placeholder|test|fake|xxx|changeme|\$\{)' 2>/dev/null; then
|
||||
echo "::warning::Assignation suspecte d'un secret détectée"
|
||||
FOUND=1
|
||||
fi
|
||||
|
||||
if [ "$FOUND" -eq 0 ]; then
|
||||
echo "Aucun secret détecté par les patterns de base."
|
||||
else
|
||||
echo ""
|
||||
echo "::notice::Vérifier manuellement les occurrences ci-dessus."
|
||||
echo "::notice::Si faux positif : ajouter le fichier aux exclusions ou reformater."
|
||||
fi
|
||||
|
||||
# Toujours succès (job non bloquant).
|
||||
exit 0
|
||||
199
.gitea/workflows/tests.yml
Normal file
199
.gitea/workflows/tests.yml
Normal file
@@ -0,0 +1,199 @@
|
||||
# ------------------------------------------------------------------
|
||||
# CI principale — Tests unitaires + lint léger
|
||||
# ------------------------------------------------------------------
|
||||
# Déclenchement : push / pull_request sur n'importe quelle branche.
|
||||
# Objectif : feedback rapide (< 3 min) sans GPU ni Ollama.
|
||||
# Runner : self-hosted (label "ubuntu-latest" ou équivalent).
|
||||
#
|
||||
# Les tests marqués `slow`, `gpu`, `integration`, `performance`,
|
||||
# `visual` et `smoke` sont exclus volontairement — ils nécessitent
|
||||
# CUDA, Ollama, ou des captures d'écran réelles.
|
||||
# ------------------------------------------------------------------
|
||||
name: tests
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- "**"
|
||||
pull_request:
|
||||
branches:
|
||||
- "**"
|
||||
|
||||
# Permet à une nouvelle exécution d'annuler les précédentes
|
||||
# sur la même branche (évite l'engorgement du runner local).
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
# Empêche l'import accidentel de torch/CUDA pendant la CI.
|
||||
PYTHONDONTWRITEBYTECODE: "1"
|
||||
PIP_DISABLE_PIP_VERSION_CHECK: "1"
|
||||
PIP_NO_PYTHON_VERSION_WARNING: "1"
|
||||
# Les modules d'exécution lisent parfois ces vars ; valeurs neutres en CI.
|
||||
RPA_VISION_CI: "1"
|
||||
RPA_AUTH_VAULT_PATH: "/tmp/ci_vault.enc"
|
||||
|
||||
jobs:
|
||||
# ----------------------------------------------------------------
|
||||
# Job 1 — Lint (ruff + black --check)
|
||||
# ----------------------------------------------------------------
|
||||
# Non-bloquant : si ruff/black ne sont pas installables, on log
|
||||
# un warning et on continue. L'objectif ici est d'alerter, pas de
|
||||
# casser la CI pour des espaces en trop.
|
||||
# ----------------------------------------------------------------
|
||||
lint:
|
||||
name: Lint (ruff + black)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
continue-on-error: true
|
||||
|
||||
steps:
|
||||
- name: Checkout du code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Python 3.12
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
cache: "pip"
|
||||
|
||||
- name: Installation des linters
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install "ruff==0.6.9" "black==23.12.1" || {
|
||||
echo "::warning::Impossible d'installer ruff/black — job ignoré"
|
||||
exit 0
|
||||
}
|
||||
|
||||
- name: Ruff (lint rapide)
|
||||
run: |
|
||||
if command -v ruff >/dev/null 2>&1; then
|
||||
# Ruff : on limite aux erreurs critiques (E9, F63, F7, F82) pour
|
||||
# éviter le bruit. Dom peut durcir progressivement.
|
||||
ruff check --select=E9,F63,F7,F82 --output-format=github \
|
||||
core/ agent_v0/ tests/ || {
|
||||
echo "::warning::Ruff a trouvé des erreurs critiques"
|
||||
exit 1
|
||||
}
|
||||
else
|
||||
echo "::warning::ruff indisponible — skip"
|
||||
fi
|
||||
|
||||
- name: Black (format check)
|
||||
run: |
|
||||
if command -v black >/dev/null 2>&1; then
|
||||
# --check : ne modifie pas, signale juste.
|
||||
black --check --diff core/ agent_v0/ tests/ || {
|
||||
echo "::warning::Black suggère un reformatage — non bloquant"
|
||||
exit 0
|
||||
}
|
||||
else
|
||||
echo "::warning::black indisponible — skip"
|
||||
fi
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Job 2 — Tests unitaires
|
||||
# ----------------------------------------------------------------
|
||||
# Exclut tous les marqueurs lourds. Utilise requirements-ci.txt
|
||||
# pour éviter torch/CUDA (économie ~3 Go + ~2 min).
|
||||
# ----------------------------------------------------------------
|
||||
unit-tests:
|
||||
name: Tests unitaires (sans GPU)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
|
||||
steps:
|
||||
- name: Checkout du code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Python 3.12
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
cache: "pip"
|
||||
cache-dependency-path: |
|
||||
requirements-ci.txt
|
||||
requirements.txt
|
||||
|
||||
- name: Installation des dépendances CI
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
if [ -f requirements-ci.txt ]; then
|
||||
echo "Utilisation de requirements-ci.txt (léger, sans torch)"
|
||||
pip install -r requirements-ci.txt
|
||||
else
|
||||
echo "::warning::requirements-ci.txt absent — fallback requirements.txt (lourd)"
|
||||
pip install -r requirements.txt
|
||||
fi
|
||||
|
||||
- name: Vérification imports critiques
|
||||
run: |
|
||||
python -c "import pytest; print(f'pytest {pytest.__version__}')"
|
||||
python -c "import sys; sys.path.insert(0, '.'); import core; print('core OK')" || {
|
||||
echo "::error::Impossible d'importer core.*"
|
||||
exit 1
|
||||
}
|
||||
|
||||
- name: Tests unitaires (hors slow/gpu/integration)
|
||||
run: |
|
||||
python -m pytest tests/unit/ \
|
||||
-m "not slow and not gpu and not integration and not performance and not visual" \
|
||||
--tb=short \
|
||||
--strict-markers \
|
||||
-q \
|
||||
--maxfail=10 \
|
||||
-o cache_dir=/tmp/.pytest_cache_ci
|
||||
|
||||
- name: Upload logs si échec
|
||||
if: failure()
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: pytest-logs
|
||||
path: |
|
||||
/tmp/.pytest_cache_ci
|
||||
logs/
|
||||
retention-days: 3
|
||||
if-no-files-found: ignore
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Job 3 — Tests sécurité (bloquant)
|
||||
# ----------------------------------------------------------------
|
||||
# Les tests `test_security_*` valident des invariants critiques
|
||||
# (évaluation sûre, sérialisation signée). Aucune régression tolérée.
|
||||
# ----------------------------------------------------------------
|
||||
security-tests:
|
||||
name: Tests sécurité (critique)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
needs: [unit-tests]
|
||||
|
||||
steps:
|
||||
- name: Checkout du code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Python 3.12
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
cache: "pip"
|
||||
cache-dependency-path: |
|
||||
requirements-ci.txt
|
||||
requirements.txt
|
||||
|
||||
- name: Installation des dépendances CI
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
if [ -f requirements-ci.txt ]; then
|
||||
pip install -r requirements-ci.txt
|
||||
else
|
||||
pip install -r requirements.txt
|
||||
fi
|
||||
|
||||
- name: Tests sécurité (test_security_*)
|
||||
run: |
|
||||
python -m pytest tests/unit/test_security_*.py \
|
||||
--tb=long \
|
||||
--strict-markers \
|
||||
-v \
|
||||
-o cache_dir=/tmp/.pytest_cache_ci_sec
|
||||
35
.gitignore
vendored
35
.gitignore
vendored
@@ -75,3 +75,38 @@ htmlcov/
|
||||
# === Backups ===
|
||||
*_backup_*
|
||||
backups/
|
||||
*.bak
|
||||
*.bak_*
|
||||
*.orig
|
||||
*.old
|
||||
|
||||
# === Legacy / Triage ===
|
||||
_a_trier/
|
||||
archives/
|
||||
|
||||
# === Claude Code — worktrees et données locales ===
|
||||
# Worktrees générés par la CLI Claude Code lors d'exécutions d'agents
|
||||
# parallèles. Peuvent atteindre plusieurs centaines de Mo chacun.
|
||||
# Ne jamais committer — gérer via `git worktree list` / `git worktree remove`.
|
||||
.claude/
|
||||
.kiro/
|
||||
.mcp.json
|
||||
.snapshots/
|
||||
|
||||
# === Données runtime (sessions, learning, buffer, config local) ===
|
||||
data/
|
||||
.hypothesis/
|
||||
.deps_installed
|
||||
# Buffers SQLite locaux (streamer, cache)
|
||||
**/buffer/
|
||||
**/pending_events.db
|
||||
# Databases applicatives (instance Flask)
|
||||
**/instance/*.db
|
||||
**/instance/*.sqlite
|
||||
**/instance/*.sqlite3
|
||||
# Caches et index locaux
|
||||
*.sqlite
|
||||
*.sqlite3
|
||||
*.db-journal
|
||||
*.db-wal
|
||||
*.db-shm
|
||||
|
||||
@@ -21,7 +21,12 @@ ollama serve
|
||||
### 3. Télécharger le modèle VLM
|
||||
|
||||
```bash
|
||||
ollama pull qwen3-vl:8b
|
||||
# Modèle par défaut du projet (voir .env.example)
|
||||
ollama pull gemma4:latest
|
||||
|
||||
# Alternatives supportées
|
||||
# ollama pull qwen3-vl:8b
|
||||
# ollama pull 0000/ui-tars-1.5-7b-q8_0:7b # grounder visuel
|
||||
```
|
||||
|
||||
## Utilisation
|
||||
|
||||
338
README.md
338
README.md
@@ -1,207 +1,203 @@
|
||||
# RPA Vision V3 - 100% Vision-Based Workflow Automation
|
||||
# RPA Vision V3 — Automatisation basée sur la compréhension visuelle des interfaces
|
||||
|
||||
## 📊 Status
|
||||
> ⚠️ **Projet en phase POC** — voir [`docs/STATUS.md`](docs/STATUS.md) pour l'état
|
||||
> réel par module. Certaines briques sont opérationnelles bout en bout,
|
||||
> d'autres sont en cours de stabilisation. Ce dépôt n'est pas production-ready.
|
||||
|
||||
🚀 **PRODUCTION-READY** - Phase 12 Complete (77% System Completion) ✅
|
||||
*Dernière mise à jour : 14 avril 2026*
|
||||
|
||||
**Latest Update**: 14 Décembre 2024
|
||||
- ✅ **10/13 Phases Complétées** - Système mature et fonctionnel
|
||||
- ✅ **Performance Exceptionnelle** - 500-6250x plus rapide que requis
|
||||
- ✅ **Architecture Entreprise** - 148k+ lignes, 19 modules, 6 specs complètes
|
||||
- ✅ **Innovations Techniques** - Self-healing, Multi-modal, GPU management
|
||||
- 📊 **Audit Complet** - [Rapport détaillé](AUDIT_COMPLET_SYSTEME_RPA_VISION_V3.md)
|
||||
## Intention
|
||||
|
||||
**Quick Test**: `bash test_clip.sh`
|
||||
Automatiser des workflows métier par **compréhension sémantique de l'écran**
|
||||
plutôt que par coordonnées de clic fixes. Le système observe l'utilisateur,
|
||||
reconstruit un graphe d'états de l'interface, et cherche à rejouer la
|
||||
procédure en reconnaissant visuellement les éléments cibles — y compris
|
||||
quand l'UI change légèrement.
|
||||
|
||||
## 🎯 Vision
|
||||
Terrain cible principal : postes hospitaliers (Citrix, applications métier
|
||||
web et desktop). Contrainte forte : **100 % local**, pas d'appel à un LLM
|
||||
cloud dans le pipeline par défaut.
|
||||
|
||||
RPA basé sur la **compréhension sémantique** des interfaces, pas sur des coordonnées de clics.
|
||||
|
||||
Le système apprend des workflows en observant l'utilisateur et les automatise de manière robuste grâce à une architecture en 5 couches.
|
||||
|
||||
## 🏗️ Architecture en 5 Couches
|
||||
## Architecture en couches
|
||||
|
||||
```
|
||||
RawSession (Couche 0)
|
||||
↓
|
||||
ScreenState (Couche 1) - 4 niveaux d'abstraction
|
||||
↓
|
||||
UIElement Detection (Couche 2) - Types + Rôles sémantiques
|
||||
↓
|
||||
State Embedding (Couche 3) - Fusion multi-modale
|
||||
↓
|
||||
Workflow Graph (Couche 4) - Nodes + Edges + Learning States
|
||||
RawSession (couche 0) — capture événements + screenshots
|
||||
↓
|
||||
ScreenState (couche 1) — états d'écran à plusieurs niveaux d'abstraction
|
||||
↓
|
||||
UIElement (couche 2) — détection sémantique (cascade OCR + templates + VLM)
|
||||
↓
|
||||
State Embedding (couche 3) — fusion multi-modale + index FAISS
|
||||
↓
|
||||
Workflow Graph (couche 4) — nœuds, transitions, résolution de cibles
|
||||
```
|
||||
|
||||
## 📁 Structure
|
||||
## État des fonctionnalités (synthèse)
|
||||
|
||||
```
|
||||
rpa_vision_v3/
|
||||
├── core/
|
||||
│ ├── models/ # Couches 0-4 : Structures de données
|
||||
│ ├── capture/ # Couche 0 : Capture événements + screenshots
|
||||
│ ├── detection/ # Couche 2 : Détection UI sémantique
|
||||
│ ├── embedding/ # Couche 3 : Fusion multi-modale + FAISS
|
||||
│ ├── graph/ # Couche 4 : Construction + Matching + Exécution
|
||||
│ └── persistence/ # Sauvegarde/Chargement
|
||||
├── data/
|
||||
│ ├── sessions/ # RawSessions
|
||||
│ ├── screen_states/ # ScreenStates
|
||||
│ ├── embeddings/ # Vecteurs .npy
|
||||
│ ├── faiss_index/ # Index FAISS
|
||||
│ └── workflows/ # Workflow Graphs
|
||||
└── tests/ # Tests unitaires + intégration
|
||||
```
|
||||
Le détail par module est dans [`docs/STATUS.md`](docs/STATUS.md).
|
||||
|
||||
## 🚀 Démarrage Rapide
|
||||
**Opérationnel**
|
||||
- Capture Windows (Agent V1) + streaming vers serveur Linux
|
||||
- Stockage des sessions brutes (screenshots + événements)
|
||||
- Streaming server FastAPI, sessions en mémoire
|
||||
- Build du package Windows (`deploy/build_package.sh`)
|
||||
|
||||
**Alpha (fonctionnel sur un cas de référence, encore peu généralisé)**
|
||||
- Détection UI par cascade VLM + OCR + templates
|
||||
- Construction de workflow graph depuis une session
|
||||
- Replay E2E supervisé — premier succès sur Notepad le 13 avril 2026
|
||||
- Mode apprentissage : pause et demande d'aide humaine quand la résolution échoue
|
||||
- Embeddings CLIP + index FAISS
|
||||
- Module auth (Fernet + TOTP), federation (LearningPack)
|
||||
- Web Dashboard, Agent Chat
|
||||
|
||||
**En cours**
|
||||
- Visual Workflow Builder (VWB) — bugs DB runtime connus
|
||||
- Self-healing / recovery global
|
||||
- Analytics / reporting
|
||||
- Worker de compilation sessions → ExecutionPlan
|
||||
- Tests E2E multi-applications
|
||||
|
||||
## Limitations connues
|
||||
|
||||
- Le pipeline de replay est validé sur un nombre très restreint d'applications.
|
||||
- `TargetMemoryStore` (apprentissage Phase 1) est câblé mais sa base reste
|
||||
vide tant qu'un replay complet n'a pas été cristallisé.
|
||||
- Certaines asymétries entre chemins stricts et legacy dans le serveur de
|
||||
streaming peuvent provoquer des arrêts au lieu de pauses d'apprentissage.
|
||||
- VWB n'est pas encore stable en écriture ; un outil dédié plus simple est
|
||||
envisagé.
|
||||
|
||||
## Démarrage
|
||||
|
||||
### Prérequis
|
||||
|
||||
- Python 3.10 à 3.12
|
||||
- [Ollama](https://ollama.ai) installé et démarré localement
|
||||
- Recommandé : GPU NVIDIA pour l'inférence VLM
|
||||
- Windows 10/11 uniquement pour le client Agent V1
|
||||
|
||||
### Installation
|
||||
|
||||
```bash
|
||||
# 1. Installer Ollama
|
||||
curl -fsSL https://ollama.ai/install.sh | sh # Linux
|
||||
# ou
|
||||
brew install ollama # macOS
|
||||
|
||||
# 2. Démarrer Ollama
|
||||
ollama serve
|
||||
|
||||
# 3. Télécharger le modèle VLM
|
||||
ollama pull qwen3-vl:8b
|
||||
|
||||
# 4. Installer dépendances Python
|
||||
# 1) Cloner puis créer le venv
|
||||
python3 -m venv .venv
|
||||
source .venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
|
||||
# 2) Démarrer Ollama et récupérer le modèle VLM par défaut
|
||||
ollama serve &
|
||||
ollama pull gemma4:latest # défaut du projet
|
||||
# Alternatives supportées :
|
||||
# ollama pull qwen3-vl:8b
|
||||
# ollama pull 0000/ui-tars-1.5-7b-q8_0:7b # grounder visuel
|
||||
|
||||
# 3) Copier et ajuster la configuration
|
||||
cp .env.example .env
|
||||
# éditer .env pour vérifier RPA_VLM_MODEL, VLM_ENDPOINT, ports, etc.
|
||||
```
|
||||
|
||||
### Test Rapide
|
||||
### Lancer les services
|
||||
|
||||
Tous les services sont pilotés par `svc.sh` (source de vérité des ports :
|
||||
`services.conf`).
|
||||
|
||||
```bash
|
||||
# Diagnostic système
|
||||
python3 rpa_vision_v3/examples/diagnostic_vlm.py
|
||||
|
||||
# Test de détection
|
||||
./rpa_vision_v3/test_quick.sh
|
||||
./svc.sh status # État de tous les services
|
||||
./svc.sh start # Tout démarrer
|
||||
./svc.sh start streaming # Streaming server uniquement (port 5005)
|
||||
./svc.sh restart api # Redémarrer l'API (port 8000)
|
||||
./svc.sh stop # Tout arrêter
|
||||
```
|
||||
|
||||
### Utilisation - Détection UI
|
||||
| Port | Service |
|
||||
|---|---|
|
||||
| 8000 | API Server (upload / traitement core) |
|
||||
| 5001 | Web Dashboard |
|
||||
| 5002 | VWB Backend (Flask) |
|
||||
| 5003 | Monitoring |
|
||||
| 5004 | Agent Chat |
|
||||
| 5005 | Streaming Server (Agent V1 → pipeline core) |
|
||||
| 5006 | Session Cleaner |
|
||||
| 5099 | Worker de compilation (optionnel) |
|
||||
| 3002 | VWB Frontend (Vite/React) |
|
||||
|
||||
```python
|
||||
from rpa_vision_v3.core.detection import create_detector
|
||||
### Client Windows (Agent V1)
|
||||
|
||||
# Créer le détecteur
|
||||
detector = create_detector()
|
||||
|
||||
# Détecter les éléments UI
|
||||
elements = detector.detect("screenshot.png")
|
||||
|
||||
# Utiliser les résultats
|
||||
for elem in elements:
|
||||
print(f"{elem.type:15s} | {elem.role:20s} | {elem.label}")
|
||||
```
|
||||
|
||||
### Utilisation - Workflow (Phase 4 - À venir)
|
||||
|
||||
```python
|
||||
from rpa_vision_v3.core.models import RawSession, ScreenState, Workflow
|
||||
from rpa_vision_v3.core.graph import GraphBuilder, NodeMatcher
|
||||
|
||||
# 1. Capturer une session
|
||||
session = RawSession(...)
|
||||
# ... capturer événements et screenshots
|
||||
|
||||
# 2. Construire workflow automatiquement
|
||||
builder = GraphBuilder(...)
|
||||
workflow = builder.build_from_session(session)
|
||||
|
||||
# 3. Matcher état actuel
|
||||
matcher = NodeMatcher(...)
|
||||
current_state = ScreenState(...)
|
||||
match = matcher.match(current_state, workflow)
|
||||
|
||||
# 4. Exécuter action
|
||||
if match:
|
||||
edge = workflow.get_outgoing_edges(match.node.node_id)[0]
|
||||
executor.execute_edge(edge, current_state)
|
||||
```
|
||||
|
||||
## 📚 Documentation
|
||||
|
||||
### Guides Principaux
|
||||
- **Quick Start** : `QUICK_START.md` - Démarrage rapide
|
||||
- **Prochaines Étapes** : `NEXT_STEPS.md` - Roadmap et Phase 4
|
||||
- **Phase 3 Complète** : `PHASE3_COMPLETE.md` - Résumé Phase 3
|
||||
|
||||
### Documentation Technique
|
||||
- **Spec complète** : `.kiro/specs/workflow-graph-implementation/`
|
||||
- **Architecture** : `docs/reference/ARCHITECTURE_VISION_COMPLETE.md`
|
||||
- **Détection Hybride** : `HYBRID_DETECTION_SUMMARY.md`
|
||||
- **Intégration Ollama** : `docs/OLLAMA_INTEGRATION.md`
|
||||
|
||||
## 🎓 Concepts Clés
|
||||
|
||||
### RPA 100% Vision
|
||||
|
||||
- ❌ Pas de coordonnées (x, y) fixes
|
||||
- ✅ Rôles sémantiques (primary_action, form_input, etc.)
|
||||
- ✅ Matching par similarité visuelle et textuelle
|
||||
- ✅ Robuste aux changements d'UI
|
||||
|
||||
### Apprentissage Progressif
|
||||
|
||||
```
|
||||
OBSERVATION (5+ exécutions)
|
||||
↓
|
||||
COACHING (10+ assistances, succès >90%)
|
||||
↓
|
||||
AUTO_CANDIDATE (20+ exécutions, succès >95%)
|
||||
↓
|
||||
AUTO_CONFIRMÉ (validation utilisateur)
|
||||
```
|
||||
|
||||
### State Embedding
|
||||
|
||||
Fusion multi-modale :
|
||||
- 50% Image (screenshot complet)
|
||||
- 30% Texte (texte détecté)
|
||||
- 10% Titre (fenêtre)
|
||||
- 10% UI (éléments détectés)
|
||||
|
||||
## 🧪 Tests
|
||||
Le client capture souris, clavier et écran sur le poste Windows et envoie
|
||||
les données au streaming server Linux.
|
||||
|
||||
```bash
|
||||
# Tests unitaires
|
||||
pytest tests/unit/
|
||||
|
||||
# Tests d'intégration
|
||||
pytest tests/integration/
|
||||
|
||||
# Tests de performance
|
||||
pytest tests/performance/ --benchmark-only
|
||||
# Build du package Windows depuis le repo Linux
|
||||
./deploy/build_package.sh
|
||||
# produit deploy/Lea_v<version>.zip
|
||||
```
|
||||
|
||||
## 📈 Roadmap - 77% Complété (10/13 Phases)
|
||||
Voir [`docs/DEV_SETUP.md`](docs/DEV_SETUP.md) pour la maintenance du dépôt
|
||||
(worktrees, build, services).
|
||||
|
||||
### ✅ **Phases Complétées**
|
||||
- [x] **Phase 1-2** : Fondations + Embeddings FAISS ✅
|
||||
- [x] **Phase 4-6** : Détection UI + Workflow Graphs + Action Execution ✅
|
||||
- [x] **Phase 7-8** : Learning System + Training System ✅
|
||||
- [x] **Phase 10-12** : GPU Management + Performance + Monitoring ✅
|
||||
## Arborescence du dépôt
|
||||
|
||||
### 🎯 **Phases Restantes**
|
||||
- [ ] **Phase 3** : Checkpoint Final (tests storage)
|
||||
- [ ] **Phase 9** : Visual Workflow Builder (90% → 100%)
|
||||
- [ ] **Phase 13** : Tests End-to-End + Documentation finale
|
||||
```
|
||||
rpa_vision_v3/
|
||||
├── agent_v0/ # Agent V1 (client Windows) + serveur de streaming
|
||||
│ ├── agent_v1/ # Source de l'agent (capture, UI tray, exécution)
|
||||
│ └── server_v1/ # FastAPI streaming + processeurs
|
||||
├── core/ # Pipeline core
|
||||
│ ├── detection/ # Cascade VLM + OCR + templates
|
||||
│ ├── embedding/ # CLIP + FAISS
|
||||
│ ├── graph/ # Construction / matching de workflow graphs
|
||||
│ ├── execution/ # Résolution de cibles, actions LLM
|
||||
│ ├── learning/ # TargetMemoryStore (apprentissage)
|
||||
│ ├── auth/ # Vault Fernet + TOTP
|
||||
│ └── federation/ # Export/import de LearningPacks
|
||||
├── visual_workflow_builder/ # VWB (backend Flask + frontend React Vite)
|
||||
├── web_dashboard/ # Dashboard Flask + SocketIO
|
||||
├── agent_chat/ # Interface conversationnelle + planner
|
||||
├── deploy/ # Scripts de build et unités systemd
|
||||
├── data/ # Sessions, embeddings, index FAISS, apprentissage
|
||||
├── docs/ # Documentation technique
|
||||
├── tests/ # pytest (unit, integration, e2e)
|
||||
├── services.conf # Source de vérité des ports
|
||||
├── svc.sh # Orchestrateur des services
|
||||
└── run.sh # Démarrage tout-en-un (legacy, préférer svc.sh)
|
||||
```
|
||||
|
||||
### 🚀 **Composants Production-Ready**
|
||||
- **Agent V0** : Capture cross-platform + Encryption ✅
|
||||
- **Server API** : Processing pipeline + Web dashboard ✅
|
||||
- **Analytics System** : Monitoring + Insights + Reporting ✅
|
||||
- **Self-Healing** : Automatic adaptation + Recovery ✅
|
||||
## Tests
|
||||
|
||||
## 🤝 Contribution
|
||||
```bash
|
||||
source .venv/bin/activate
|
||||
|
||||
Voir `.kiro/specs/workflow-graph-implementation/tasks.md` pour les tâches en cours.
|
||||
# Tests rapides (hors marqueur slow)
|
||||
pytest -m "not slow" -q
|
||||
|
||||
## 📄 Licence
|
||||
# Tests d'intégration (streaming, pipeline)
|
||||
pytest tests/integration/ -q
|
||||
|
||||
Propriétaire - Tous droits réservés
|
||||
# Tests E2E
|
||||
pytest tests/test_pipeline_e2e.py -q
|
||||
```
|
||||
|
||||
Quelques tests legacy sont connus comme cassés — voir la mémoire projet et
|
||||
`docs/` pour la liste.
|
||||
|
||||
## Documentation
|
||||
|
||||
- [`docs/STATUS.md`](docs/STATUS.md) — état réel par module
|
||||
- [`docs/DEV_SETUP.md`](docs/DEV_SETUP.md) — tâches d'administration (worktrees, build)
|
||||
- [`docs/VISION_RPA_INTELLIGENT.md`](docs/VISION_RPA_INTELLIGENT.md) — cahier des charges
|
||||
- [`docs/PLAN_ACTEUR_V1.md`](docs/PLAN_ACTEUR_V1.md) — architecture 3 niveaux (Macro / Méso / Micro)
|
||||
- [`docs/CONFORMITE_AI_ACT.md`](docs/CONFORMITE_AI_ACT.md) — journalisation, floutage, rétention
|
||||
|
||||
## Concepts clés
|
||||
|
||||
- **RPA 100 % vision** : pas de coordonnées fixes ; l'agent localise un
|
||||
élément par ce qu'il voit (label + contexte visuel), pas par `x,y`.
|
||||
- **Apprentissage progressif** : mode shadow → assisté → autonome, validé
|
||||
par supervision humaine sur les échecs.
|
||||
- **LLM 100 % local** : Ollama sur la machine. Aucun appel cloud dans le
|
||||
pipeline par défaut (cf. feedback projet `feedback_local_only.md`).
|
||||
|
||||
## Licence
|
||||
|
||||
Propriétaire — tous droits réservés.
|
||||
|
||||
3
agent_rust/lea_uia/.gitignore
vendored
Normal file
3
agent_rust/lea_uia/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
target/
|
||||
**/target/
|
||||
|
||||
384
agent_rust/lea_uia/Cargo.lock
generated
Normal file
384
agent_rust/lea_uia/Cargo.lock
generated
Normal file
@@ -0,0 +1,384 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "anstream"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"anstyle-parse",
|
||||
"anstyle-query",
|
||||
"anstyle-wincon",
|
||||
"colorchoice",
|
||||
"is_terminal_polyfill",
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle"
|
||||
version = "1.0.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000"
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-parse"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e"
|
||||
dependencies = [
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-query"
|
||||
version = "1.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
|
||||
dependencies = [
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-wincon"
|
||||
version = "3.0.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"once_cell_polyfill",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "4.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351"
|
||||
dependencies = [
|
||||
"clap_builder",
|
||||
"clap_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_builder"
|
||||
version = "4.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anstyle",
|
||||
"clap_lex",
|
||||
"strsim",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_derive"
|
||||
version = "4.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1110bd8a634a1ab8cb04345d8d878267d57c3cf1b38d91b71af6686408bbca6a"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_lex"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9"
|
||||
|
||||
[[package]]
|
||||
name = "colorchoice"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570"
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||
|
||||
[[package]]
|
||||
name = "is_terminal_polyfill"
|
||||
version = "1.70.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
|
||||
|
||||
[[package]]
|
||||
name = "lea_uia"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"windows",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
|
||||
|
||||
[[package]]
|
||||
name = "once_cell_polyfill"
|
||||
version = "1.70.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.106"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.45"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.228"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
|
||||
dependencies = [
|
||||
"serde_core",
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_core"
|
||||
version = "1.0.228"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.228"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.149"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"memchr",
|
||||
"serde",
|
||||
"serde_core",
|
||||
"zmij",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.117"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
||||
|
||||
[[package]]
|
||||
name = "utf8parse"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
||||
|
||||
[[package]]
|
||||
name = "windows"
|
||||
version = "0.59.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f919aee0a93304be7f62e8e5027811bbba96bcb1de84d6618be56e43f8a32a1"
|
||||
dependencies = [
|
||||
"windows-core",
|
||||
"windows-targets",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-core"
|
||||
version = "0.59.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "810ce18ed2112484b0d4e15d022e5f598113e220c53e373fb31e67e21670c1ce"
|
||||
dependencies = [
|
||||
"windows-implement",
|
||||
"windows-interface",
|
||||
"windows-result",
|
||||
"windows-strings",
|
||||
"windows-targets",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-implement"
|
||||
version = "0.59.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "83577b051e2f49a058c308f17f273b570a6a758386fc291b5f6a934dd84e48c1"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-interface"
|
||||
version = "0.59.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-link"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
|
||||
|
||||
[[package]]
|
||||
name = "windows-link"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
||||
|
||||
[[package]]
|
||||
name = "windows-result"
|
||||
version = "0.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6"
|
||||
dependencies = [
|
||||
"windows-link 0.1.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-strings"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "87fa48cc5d406560701792be122a10132491cff9d0aeb23583cc2dcafc847319"
|
||||
dependencies = [
|
||||
"windows-link 0.1.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.61.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
|
||||
dependencies = [
|
||||
"windows-link 0.2.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.53.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
|
||||
dependencies = [
|
||||
"windows-link 0.2.1",
|
||||
"windows_aarch64_gnullvm",
|
||||
"windows_aarch64_msvc",
|
||||
"windows_i686_gnu",
|
||||
"windows_i686_gnullvm",
|
||||
"windows_i686_msvc",
|
||||
"windows_x86_64_gnu",
|
||||
"windows_x86_64_gnullvm",
|
||||
"windows_x86_64_msvc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnullvm"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
|
||||
|
||||
[[package]]
|
||||
name = "zmij"
|
||||
version = "1.0.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
|
||||
34
agent_rust/lea_uia/Cargo.toml
Normal file
34
agent_rust/lea_uia/Cargo.toml
Normal file
@@ -0,0 +1,34 @@
|
||||
[package]
|
||||
name = "lea_uia"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
authors = ["Dom <dom@rpa-vision-v3>"]
|
||||
description = "Helper Windows UI Automation pour Léa (agent RPA V3)"
|
||||
license = "Proprietary"
|
||||
|
||||
[[bin]]
|
||||
name = "lea_uia"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
clap = { version = "4.5", features = ["derive"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
windows = { version = "0.59", features = [
|
||||
"Win32_Foundation",
|
||||
"Win32_System_Com",
|
||||
"Win32_System_Ole",
|
||||
"Win32_System_Variant",
|
||||
"Win32_UI_Accessibility",
|
||||
"Win32_UI_WindowsAndMessaging",
|
||||
"Win32_Graphics_Gdi",
|
||||
] }
|
||||
|
||||
[profile.release]
|
||||
opt-level = "z" # Taille minimale
|
||||
lto = true # Link-time optimization
|
||||
codegen-units = 1 # Meilleure optimisation
|
||||
strip = true # Retirer les symboles
|
||||
panic = "abort" # Pas d'unwinding → binaire plus petit
|
||||
564
agent_rust/lea_uia/src/main.rs
Normal file
564
agent_rust/lea_uia/src/main.rs
Normal file
@@ -0,0 +1,564 @@
|
||||
// lea_uia — Helper Windows UI Automation pour Léa
|
||||
//
|
||||
// Binaire standalone qui expose 3 commandes UIA :
|
||||
// query → retourne l'élément UIA à une position (x, y)
|
||||
// find → retrouve un élément par son chemin logique
|
||||
// capture → liste les éléments visibles (debug)
|
||||
//
|
||||
// Communication avec l'agent Python via stdin/stdout JSON.
|
||||
// Tous les appels sont non-bloquants et retournent du JSON structuré.
|
||||
//
|
||||
// Sur Linux (développement) : retourne des stubs d'erreur.
|
||||
// Sur Windows : utilise UIAutomationCore via `windows-rs`.
|
||||
|
||||
use clap::{Parser, Subcommand};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(name = "lea_uia")]
|
||||
#[command(about = "Helper UI Automation pour Léa", long_about = None)]
|
||||
#[command(version)]
|
||||
struct Cli {
|
||||
#[command(subcommand)]
|
||||
command: Commands,
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
enum Commands {
|
||||
/// Retourner l'élément UIA à une position donnée (x, y en pixels écran)
|
||||
Query {
|
||||
/// Coordonnée X (pixels)
|
||||
#[arg(long)]
|
||||
x: i32,
|
||||
/// Coordonnée Y (pixels)
|
||||
#[arg(long)]
|
||||
y: i32,
|
||||
/// Inclure la hiérarchie des parents (peut être lent)
|
||||
#[arg(long, default_value_t = true)]
|
||||
with_parents: bool,
|
||||
},
|
||||
/// Rechercher un élément par son chemin logique ou son nom
|
||||
Find {
|
||||
/// Nom de l'élément (Name property)
|
||||
#[arg(long)]
|
||||
name: Option<String>,
|
||||
/// Type de contrôle (Button, Edit, MenuItem, etc.)
|
||||
#[arg(long)]
|
||||
control_type: Option<String>,
|
||||
/// AutomationId
|
||||
#[arg(long)]
|
||||
automation_id: Option<String>,
|
||||
/// Limite la recherche à cette fenêtre (titre exact)
|
||||
#[arg(long)]
|
||||
window: Option<String>,
|
||||
/// Timeout en millisecondes
|
||||
#[arg(long, default_value_t = 2000)]
|
||||
timeout_ms: u32,
|
||||
},
|
||||
/// Lister tous les éléments visibles de la fenêtre active (debug)
|
||||
Capture {
|
||||
/// Profondeur maximale de l'arbre
|
||||
#[arg(long, default_value_t = 3)]
|
||||
max_depth: u32,
|
||||
},
|
||||
/// Vérifier que UIA est disponible et fonctionnel
|
||||
Health,
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Modèles de sortie JSON
|
||||
// =========================================================================
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
struct UiaElement {
|
||||
/// Nom visible de l'élément
|
||||
name: String,
|
||||
/// Type de contrôle (Button, Edit, MenuItem, Window, ...)
|
||||
control_type: String,
|
||||
/// Classe Windows (Edit, Static, #32770, ...)
|
||||
class_name: String,
|
||||
/// AutomationId (ID interne, parfois vide)
|
||||
automation_id: String,
|
||||
/// Rectangle absolu [x1, y1, x2, y2] en pixels écran
|
||||
bounding_rect: [i32; 4],
|
||||
/// Est-ce que l'élément est activable
|
||||
is_enabled: bool,
|
||||
/// Est-ce que l'élément est visible
|
||||
is_offscreen: bool,
|
||||
/// Hiérarchie des parents (chemin logique)
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
parent_path: Vec<ParentHint>,
|
||||
/// Process owning this element
|
||||
#[serde(skip_serializing_if = "String::is_empty")]
|
||||
process_name: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
struct ParentHint {
|
||||
name: String,
|
||||
control_type: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(tag = "status")]
|
||||
enum UiaResponse {
|
||||
#[serde(rename = "ok")]
|
||||
Ok {
|
||||
element: Option<UiaElement>,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
elements: Vec<UiaElement>,
|
||||
elapsed_ms: u64,
|
||||
},
|
||||
#[serde(rename = "not_found")]
|
||||
NotFound {
|
||||
reason: String,
|
||||
elapsed_ms: u64,
|
||||
},
|
||||
#[serde(rename = "error")]
|
||||
Error {
|
||||
message: String,
|
||||
code: String,
|
||||
},
|
||||
#[serde(rename = "unavailable")]
|
||||
Unavailable {
|
||||
reason: String,
|
||||
},
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Implémentation Windows
|
||||
// =========================================================================
|
||||
|
||||
#[cfg(windows)]
|
||||
mod uia_impl {
|
||||
use super::*;
|
||||
use std::time::Instant;
|
||||
use windows::Win32::Foundation::POINT;
|
||||
use windows::Win32::System::Com::{
|
||||
CoCreateInstance, CoInitializeEx, CoUninitialize, CLSCTX_INPROC_SERVER,
|
||||
COINIT_APARTMENTTHREADED,
|
||||
};
|
||||
use windows::Win32::UI::Accessibility::{
|
||||
CUIAutomation, IUIAutomation, IUIAutomationElement, IUIAutomationTreeWalker,
|
||||
};
|
||||
|
||||
struct ComGuard;
|
||||
impl ComGuard {
|
||||
fn new() -> windows::core::Result<Self> {
|
||||
unsafe {
|
||||
let hr = CoInitializeEx(None, COINIT_APARTMENTTHREADED);
|
||||
if hr.is_err() {
|
||||
// RPC_E_CHANGED_MODE : le thread est déjà initialisé → OK
|
||||
let code = hr.0 as u32;
|
||||
if code != 0x80010106 {
|
||||
return Err(windows::core::Error::from(hr));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(Self)
|
||||
}
|
||||
}
|
||||
impl Drop for ComGuard {
|
||||
fn drop(&mut self) {
|
||||
unsafe { CoUninitialize() };
|
||||
}
|
||||
}
|
||||
|
||||
fn get_automation() -> windows::core::Result<IUIAutomation> {
|
||||
unsafe { CoCreateInstance(&CUIAutomation, None, CLSCTX_INPROC_SERVER) }
|
||||
}
|
||||
|
||||
fn element_to_struct(
|
||||
element: &IUIAutomationElement,
|
||||
with_parents: bool,
|
||||
) -> windows::core::Result<UiaElement> {
|
||||
let mut result = UiaElement {
|
||||
name: String::new(),
|
||||
control_type: String::new(),
|
||||
class_name: String::new(),
|
||||
automation_id: String::new(),
|
||||
bounding_rect: [0, 0, 0, 0],
|
||||
is_enabled: false,
|
||||
is_offscreen: true,
|
||||
parent_path: Vec::new(),
|
||||
process_name: String::new(),
|
||||
};
|
||||
|
||||
unsafe {
|
||||
if let Ok(name) = element.CurrentName() {
|
||||
result.name = name.to_string();
|
||||
}
|
||||
if let Ok(ct) = element.CurrentLocalizedControlType() {
|
||||
result.control_type = ct.to_string();
|
||||
}
|
||||
if let Ok(cn) = element.CurrentClassName() {
|
||||
result.class_name = cn.to_string();
|
||||
}
|
||||
if let Ok(aid) = element.CurrentAutomationId() {
|
||||
result.automation_id = aid.to_string();
|
||||
}
|
||||
if let Ok(rect) = element.CurrentBoundingRectangle() {
|
||||
result.bounding_rect = [rect.left, rect.top, rect.right, rect.bottom];
|
||||
}
|
||||
if let Ok(enabled) = element.CurrentIsEnabled() {
|
||||
result.is_enabled = enabled.as_bool();
|
||||
}
|
||||
if let Ok(offscreen) = element.CurrentIsOffscreen() {
|
||||
result.is_offscreen = offscreen.as_bool();
|
||||
}
|
||||
if with_parents {
|
||||
// Remonter la hiérarchie jusqu'à la Window root
|
||||
if let Ok(automation) = get_automation() {
|
||||
let walker = automation.ControlViewWalker();
|
||||
if let Ok(walker) = walker {
|
||||
let mut current = element.clone();
|
||||
for _ in 0..10 {
|
||||
match walker.GetParentElement(¤t) {
|
||||
Ok(parent) => {
|
||||
let name = parent
|
||||
.CurrentName()
|
||||
.map(|n| n.to_string())
|
||||
.unwrap_or_default();
|
||||
let ct = parent
|
||||
.CurrentLocalizedControlType()
|
||||
.map(|c| c.to_string())
|
||||
.unwrap_or_default();
|
||||
if name.is_empty() && ct.is_empty() {
|
||||
break;
|
||||
}
|
||||
result.parent_path.insert(
|
||||
0,
|
||||
ParentHint {
|
||||
name,
|
||||
control_type: ct,
|
||||
},
|
||||
);
|
||||
current = parent;
|
||||
}
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn query_at_point(x: i32, y: i32, with_parents: bool) -> UiaResponse {
|
||||
let start = Instant::now();
|
||||
let _com = match ComGuard::new() {
|
||||
Ok(g) => g,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("CoInitializeEx: {}", e),
|
||||
code: "com_init_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let automation = match get_automation() {
|
||||
Ok(a) => a,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("CUIAutomation: {}", e),
|
||||
code: "automation_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let point = POINT { x, y };
|
||||
let element = unsafe { automation.ElementFromPoint(point) };
|
||||
match element {
|
||||
Ok(el) => match element_to_struct(&el, with_parents) {
|
||||
Ok(e) => UiaResponse::Ok {
|
||||
element: Some(e),
|
||||
elements: Vec::new(),
|
||||
elapsed_ms: start.elapsed().as_millis() as u64,
|
||||
},
|
||||
Err(e) => UiaResponse::Error {
|
||||
message: format!("element_to_struct: {}", e),
|
||||
code: "extract_failed".into(),
|
||||
},
|
||||
},
|
||||
Err(_) => UiaResponse::NotFound {
|
||||
reason: format!("Aucun élément UIA à ({}, {})", x, y),
|
||||
elapsed_ms: start.elapsed().as_millis() as u64,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn find_element(
|
||||
name: Option<String>,
|
||||
_control_type: Option<String>,
|
||||
_automation_id: Option<String>,
|
||||
_window: Option<String>,
|
||||
_timeout_ms: u32,
|
||||
) -> UiaResponse {
|
||||
let start = Instant::now();
|
||||
let _com = match ComGuard::new() {
|
||||
Ok(g) => g,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("CoInitializeEx: {}", e),
|
||||
code: "com_init_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let automation = match get_automation() {
|
||||
Ok(a) => a,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("CUIAutomation: {}", e),
|
||||
code: "automation_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let root = match unsafe { automation.GetRootElement() } {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("GetRootElement: {}", e),
|
||||
code: "root_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Recherche simple par parcours d'arbre (MVP)
|
||||
// L'arbre UIA peut être énorme → on limite la profondeur
|
||||
if let Some(target_name) = name {
|
||||
let walker = unsafe { automation.ControlViewWalker() };
|
||||
if let Ok(walker) = walker {
|
||||
if let Some(found) =
|
||||
walk_and_find(&walker, &root, &target_name, 0, 6, &_control_type, &_automation_id)
|
||||
{
|
||||
match element_to_struct(&found, true) {
|
||||
Ok(e) => {
|
||||
return UiaResponse::Ok {
|
||||
element: Some(e),
|
||||
elements: Vec::new(),
|
||||
elapsed_ms: start.elapsed().as_millis() as u64,
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("element_to_struct: {}", e),
|
||||
code: "extract_failed".into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UiaResponse::NotFound {
|
||||
reason: "Aucun élément trouvé".into(),
|
||||
elapsed_ms: start.elapsed().as_millis() as u64,
|
||||
}
|
||||
}
|
||||
|
||||
/// Parcours récursif de l'arbre UIA pour trouver un élément par nom
|
||||
fn walk_and_find(
|
||||
walker: &IUIAutomationTreeWalker,
|
||||
element: &IUIAutomationElement,
|
||||
target_name: &str,
|
||||
depth: u32,
|
||||
max_depth: u32,
|
||||
target_control_type: &Option<String>,
|
||||
target_automation_id: &Option<String>,
|
||||
) -> Option<IUIAutomationElement> {
|
||||
if depth > max_depth {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Tester l'élément courant
|
||||
unsafe {
|
||||
if let Ok(name) = element.CurrentName() {
|
||||
if name.to_string() == target_name {
|
||||
// Vérifier les filtres additionnels
|
||||
let mut matches = true;
|
||||
if let Some(ct) = target_control_type {
|
||||
if let Ok(local_ct) = element.CurrentLocalizedControlType() {
|
||||
if !local_ct.to_string().to_lowercase().contains(&ct.to_lowercase()) {
|
||||
matches = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
if matches {
|
||||
if let Some(aid) = target_automation_id {
|
||||
if let Ok(local_aid) = element.CurrentAutomationId() {
|
||||
if local_aid.to_string() != *aid {
|
||||
matches = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if matches {
|
||||
return Some(element.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Parcourir les enfants
|
||||
if let Ok(first_child) = walker.GetFirstChildElement(element) {
|
||||
let mut current = first_child;
|
||||
loop {
|
||||
if let Some(found) = walk_and_find(
|
||||
walker,
|
||||
¤t,
|
||||
target_name,
|
||||
depth + 1,
|
||||
max_depth,
|
||||
target_control_type,
|
||||
target_automation_id,
|
||||
) {
|
||||
return Some(found);
|
||||
}
|
||||
match walker.GetNextSiblingElement(¤t) {
|
||||
Ok(next) => current = next,
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub fn capture_tree(_max_depth: u32) -> UiaResponse {
|
||||
let start = Instant::now();
|
||||
let _com = match ComGuard::new() {
|
||||
Ok(g) => g,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("CoInitializeEx: {}", e),
|
||||
code: "com_init_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let automation = match get_automation() {
|
||||
Ok(a) => a,
|
||||
Err(e) => {
|
||||
return UiaResponse::Error {
|
||||
message: format!("CUIAutomation: {}", e),
|
||||
code: "automation_failed".into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let focused = unsafe { automation.GetFocusedElement() };
|
||||
match focused {
|
||||
Ok(el) => match element_to_struct(&el, true) {
|
||||
Ok(e) => UiaResponse::Ok {
|
||||
element: Some(e),
|
||||
elements: Vec::new(),
|
||||
elapsed_ms: start.elapsed().as_millis() as u64,
|
||||
},
|
||||
Err(e) => UiaResponse::Error {
|
||||
message: format!("element_to_struct: {}", e),
|
||||
code: "extract_failed".into(),
|
||||
},
|
||||
},
|
||||
Err(e) => UiaResponse::Error {
|
||||
message: format!("GetFocusedElement: {}", e),
|
||||
code: "focused_failed".into(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn health_check() -> UiaResponse {
|
||||
let _com = match ComGuard::new() {
|
||||
Ok(g) => g,
|
||||
Err(e) => {
|
||||
return UiaResponse::Unavailable {
|
||||
reason: format!("COM init failed: {}", e),
|
||||
}
|
||||
}
|
||||
};
|
||||
match get_automation() {
|
||||
Ok(_) => UiaResponse::Ok {
|
||||
element: None,
|
||||
elements: Vec::new(),
|
||||
elapsed_ms: 0,
|
||||
},
|
||||
Err(e) => UiaResponse::Unavailable {
|
||||
reason: format!("UIA not available: {}", e),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Stub Linux (pour développement et tests)
|
||||
// =========================================================================
|
||||
|
||||
#[cfg(not(windows))]
|
||||
mod uia_impl {
|
||||
use super::*;
|
||||
|
||||
pub fn query_at_point(_x: i32, _y: i32, _with_parents: bool) -> UiaResponse {
|
||||
UiaResponse::Unavailable {
|
||||
reason: "UIA n'est disponible que sur Windows".into(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn find_element(
|
||||
_name: Option<String>,
|
||||
_control_type: Option<String>,
|
||||
_automation_id: Option<String>,
|
||||
_window: Option<String>,
|
||||
_timeout_ms: u32,
|
||||
) -> UiaResponse {
|
||||
UiaResponse::Unavailable {
|
||||
reason: "UIA n'est disponible que sur Windows".into(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn capture_tree(_max_depth: u32) -> UiaResponse {
|
||||
UiaResponse::Unavailable {
|
||||
reason: "UIA n'est disponible que sur Windows".into(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn health_check() -> UiaResponse {
|
||||
UiaResponse::Unavailable {
|
||||
reason: "UIA n'est disponible que sur Windows".into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Main
|
||||
// =========================================================================
|
||||
|
||||
fn main() {
|
||||
let cli = Cli::parse();
|
||||
|
||||
let response = match cli.command {
|
||||
Commands::Query {
|
||||
x,
|
||||
y,
|
||||
with_parents,
|
||||
} => uia_impl::query_at_point(x, y, with_parents),
|
||||
Commands::Find {
|
||||
name,
|
||||
control_type,
|
||||
automation_id,
|
||||
window,
|
||||
timeout_ms,
|
||||
} => uia_impl::find_element(name, control_type, automation_id, window, timeout_ms),
|
||||
Commands::Capture { max_depth } => uia_impl::capture_tree(max_depth),
|
||||
Commands::Health => uia_impl::health_check(),
|
||||
};
|
||||
|
||||
// Sortie JSON sur stdout
|
||||
match serde_json::to_string(&response) {
|
||||
Ok(json) => println!("{}", json),
|
||||
Err(e) => {
|
||||
eprintln!("{{\"status\":\"error\",\"message\":\"JSON serialization: {}\"}}", e);
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -178,8 +178,41 @@ class EventCaptorV1:
|
||||
"timestamp": now,
|
||||
}
|
||||
self._inject_screen_metadata(event)
|
||||
# Capturer le snapshot UIA à la position du clic (si helper dispo)
|
||||
# Non-bloquant : si UIA échoue, l'event est enrichi uniquement
|
||||
# des données vision comme aujourd'hui.
|
||||
self._inject_uia_snapshot(event, x, y)
|
||||
self.on_event(event)
|
||||
|
||||
def _inject_uia_snapshot(self, event: dict, x: int, y: int) -> None:
|
||||
"""Ajouter un uia_snapshot à l'événement si le helper UIA est dispo.
|
||||
|
||||
Appelle lea_uia.exe query --x N --y N en ~10-20ms.
|
||||
Fallback silencieux si le helper n'est pas dispo ou échoue.
|
||||
"""
|
||||
try:
|
||||
from .uia_helper import get_shared_helper
|
||||
helper = get_shared_helper()
|
||||
if not helper.available:
|
||||
return
|
||||
element = helper.query_at(int(x), int(y), with_parents=True)
|
||||
if element is None:
|
||||
return
|
||||
event["uia_snapshot"] = {
|
||||
"name": element.name,
|
||||
"control_type": element.control_type,
|
||||
"class_name": element.class_name,
|
||||
"automation_id": element.automation_id,
|
||||
"bounding_rect": list(element.bounding_rect),
|
||||
"is_enabled": element.is_enabled,
|
||||
"is_offscreen": element.is_offscreen,
|
||||
"parent_path": element.parent_path,
|
||||
}
|
||||
except Exception as e:
|
||||
# Non bloquant — on continue sans UIA
|
||||
import logging
|
||||
logging.getLogger(__name__).debug(f"UIA snapshot skip: {e}")
|
||||
|
||||
def _on_scroll(self, x, y, dx, dy):
|
||||
event = {
|
||||
"type": "mouse_scroll",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -15,6 +15,7 @@ Ref: docs/PLAN_ACTEUR_V1.md — Architecture MICRO (grounding + exécution)
|
||||
"""
|
||||
|
||||
import base64
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
@@ -126,19 +127,62 @@ class GroundingEngine:
|
||||
)
|
||||
|
||||
t_start = time.time()
|
||||
screenshot_b64 = self._executor._capture_screenshot_b64(max_width=0, quality=75)
|
||||
|
||||
# ── Capture contrainte à la fenêtre active ──
|
||||
# Le grounding ne voit QUE la fenêtre attendue — pas la taskbar,
|
||||
# pas le systray, pas les autres apps. Comme un humain qui regarde
|
||||
# l'application sur laquelle il travaille.
|
||||
window_rect = None
|
||||
try:
|
||||
from ..window_info_crossplatform import get_active_window_rect
|
||||
win_info = get_active_window_rect()
|
||||
if win_info and win_info.get("rect"):
|
||||
r = win_info["rect"] # [left, top, right, bottom]
|
||||
# Validation : fenêtre visible et pas minuscule
|
||||
w = r[2] - r[0]
|
||||
h = r[3] - r[1]
|
||||
if w > 50 and h > 50:
|
||||
window_rect = {
|
||||
"left": max(0, r[0]),
|
||||
"top": max(0, r[1]),
|
||||
"width": min(w, screen_width),
|
||||
"height": min(h, screen_height),
|
||||
}
|
||||
logger.info(
|
||||
f"Grounding contraint à la fenêtre : "
|
||||
f"{window_rect['width']}x{window_rect['height']} "
|
||||
f"à ({window_rect['left']}, {window_rect['top']})"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"Pas de window rect disponible : {e}")
|
||||
|
||||
screenshot_b64 = self._capture_window_or_screen(window_rect)
|
||||
if not screenshot_b64:
|
||||
return GroundingResult(
|
||||
found=False, detail="Capture screenshot échouée",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
# Dimensions de la zone capturée (fenêtre ou écran entier)
|
||||
cap_w = window_rect["width"] if window_rect else screen_width
|
||||
cap_h = window_rect["height"] if window_rect else screen_height
|
||||
|
||||
for strategy in strategies:
|
||||
result = self._try_strategy(
|
||||
strategy, server_url, screenshot_b64, target_spec,
|
||||
fallback_x, fallback_y, screen_width, screen_height,
|
||||
fallback_x, fallback_y, cap_w, cap_h,
|
||||
)
|
||||
if result.found:
|
||||
# ── Conversion coords fenêtre → coords écran ──
|
||||
if window_rect:
|
||||
# Le grounding a retourné des coords relatives à la fenêtre
|
||||
# On les convertit en coords relatives à l'écran entier
|
||||
abs_x = window_rect["left"] + result.x_pct * cap_w
|
||||
abs_y = window_rect["top"] + result.y_pct * cap_h
|
||||
result.x_pct = abs_x / screen_width
|
||||
result.y_pct = abs_y / screen_height
|
||||
result.detail = f"{result.detail} [fenêtre {cap_w}x{cap_h}]"
|
||||
|
||||
result.elapsed_ms = (time.time() - t_start) * 1000
|
||||
return result
|
||||
|
||||
@@ -148,6 +192,39 @@ class GroundingEngine:
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
def _capture_window_or_screen(self, window_rect: Optional[Dict]) -> str:
|
||||
"""Capturer soit la fenêtre active (croppée), soit l'écran entier.
|
||||
|
||||
Si window_rect est fourni, capture uniquement cette zone.
|
||||
Sinon, capture l'écran entier (fallback).
|
||||
"""
|
||||
try:
|
||||
from PIL import Image
|
||||
import mss as mss_lib
|
||||
|
||||
with mss_lib.mss() as local_sct:
|
||||
if window_rect:
|
||||
# Capture de la zone fenêtre uniquement
|
||||
region = {
|
||||
"left": window_rect["left"],
|
||||
"top": window_rect["top"],
|
||||
"width": window_rect["width"],
|
||||
"height": window_rect["height"],
|
||||
}
|
||||
raw = local_sct.grab(region)
|
||||
else:
|
||||
# Fallback écran entier
|
||||
raw = local_sct.grab(local_sct.monitors[1])
|
||||
|
||||
img = Image.frombytes("RGB", raw.size, raw.bgra, "raw", "BGRX")
|
||||
buffer = io.BytesIO()
|
||||
img.save(buffer, format="JPEG", quality=75)
|
||||
return base64.b64encode(buffer.getvalue()).decode("utf-8")
|
||||
except Exception as e:
|
||||
logger.warning(f"Capture échouée : {e}")
|
||||
# Fallback sur la méthode existante de l'executor
|
||||
return self._executor._capture_screenshot_b64(max_width=0, quality=75)
|
||||
|
||||
def _try_strategy(
|
||||
self,
|
||||
strategy: str,
|
||||
|
||||
@@ -85,6 +85,10 @@ class PolicyEngine:
|
||||
2. Si retry déjà fait → demander à l'acteur gemma4
|
||||
3. Selon gemma4 : SKIP, ABORT, ou SUPERVISE
|
||||
|
||||
**SÉCURITÉ** : si, pendant l'étape 1, le handler popup détecte un
|
||||
dialogue système Windows (UAC, CredUI, SmartScreen…), on bascule
|
||||
immédiatement en SUPERVISE. Cf. system_dialog_guard.py.
|
||||
|
||||
Args:
|
||||
action: L'action qui a échoué
|
||||
target_spec: La cible non trouvée
|
||||
@@ -96,6 +100,22 @@ class PolicyEngine:
|
||||
# ── Étape 1 : Tentative de fermeture popup (premier essai) ──
|
||||
if retry_count == 0:
|
||||
popup_handled = self._try_close_popup()
|
||||
|
||||
# Si le popup handler a détecté un dialogue système, on
|
||||
# bascule immédiatement en SUPERVISE — pas de retry, pas de
|
||||
# gemma4 : on rend la main à l'humain.
|
||||
if getattr(self._executor, "_system_dialog_pause", None):
|
||||
sd = self._executor._system_dialog_pause
|
||||
return PolicyDecision(
|
||||
decision=Decision.SUPERVISE,
|
||||
reason=(
|
||||
f"Dialogue système détecté ({sd.get('category', '?')}) — "
|
||||
f"refus d'interaction automatique"
|
||||
),
|
||||
action_taken="system_dialog_blocked",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
if popup_handled:
|
||||
return PolicyDecision(
|
||||
decision=Decision.RETRY,
|
||||
|
||||
448
agent_v0/agent_v1/core/system_dialog_guard.py
Normal file
448
agent_v0/agent_v1/core/system_dialog_guard.py
Normal file
@@ -0,0 +1,448 @@
|
||||
# agent_v1/core/system_dialog_guard.py
|
||||
"""
|
||||
Garde-fou sécurité : détection des dialogues système Windows critiques.
|
||||
|
||||
==============================================================================
|
||||
POURQUOI ?
|
||||
==============================================================================
|
||||
|
||||
Pendant un replay, si un dialogue UAC, CredUI (mot de passe Windows),
|
||||
SmartScreen ou une notification de sécurité Windows apparaît, Léa pourrait
|
||||
demander au VLM "quel bouton cliquer" et recevoir "Oui" en réponse.
|
||||
|
||||
→ **Léa cliquerait OUI sur une élévation UAC** → vecteur d'attaque ransomware.
|
||||
|
||||
Ce module fournit la détection de ces dialogues pour que l'exécuteur
|
||||
**ne clique JAMAIS dessus automatiquement**. La décision est renvoyée à
|
||||
l'humain (pause supervisée).
|
||||
|
||||
==============================================================================
|
||||
PRINCIPE
|
||||
==============================================================================
|
||||
|
||||
- **Faux positif tolérable** : on préfère pauser pour rien plutôt que cliquer
|
||||
sur un UAC.
|
||||
- **Faux négatif catastrophique** : mieux vaut être trop prudent.
|
||||
- **Multi-signal** : titre, ClassName UIA, nom de processus, parent_path.
|
||||
Un seul signal suffit à bloquer.
|
||||
- **Compatible Citrix** : les dialogues UAC d'un client Citrix apparaissent
|
||||
aussi dans la VM distante — la détection par classe UIA fonctionne.
|
||||
|
||||
==============================================================================
|
||||
PATTERNS DE DÉTECTION (ordre de criticité décroissant)
|
||||
==============================================================================
|
||||
|
||||
1. UAC Consent (élévation de privilèges)
|
||||
- ClassName : `$$$Secure UAP Dummy Window Class$$$`
|
||||
- Process : `consent.exe`
|
||||
- Titre : "Contrôle de compte d'utilisateur", "User Account Control"
|
||||
|
||||
2. CredUI (prompt mot de passe Windows)
|
||||
- ClassName : `Credential Dialog Xaml Host`
|
||||
- Process : `credentialuibroker.exe`, `credui.exe`
|
||||
- Titre : "Sécurité Windows", "Windows Security"
|
||||
|
||||
3. SmartScreen (protection contre applications inconnues)
|
||||
- Process : `smartscreen.exe`
|
||||
- Titre : "Windows a protégé votre ordinateur", "Windows protected your PC"
|
||||
|
||||
4. Windows Defender / Security Center
|
||||
- Process : `securityhealthhost.exe`, `msmpeng.exe`
|
||||
- Titre : "Sécurité Windows", "Windows Defender"
|
||||
|
||||
5. Signatures pilotes / driver install
|
||||
- Titre : "Installer ce pilote", "Driver signature"
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Catégories de dialogues système (pour logging + messages)
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class SystemDialogCategory:
|
||||
"""Catégories de dialogues système à bloquer absolument."""
|
||||
UAC = "uac_consent" # Élévation de privilèges
|
||||
CREDUI = "windows_credential_prompt" # Prompt de mot de passe
|
||||
SMARTSCREEN = "smartscreen" # Protection SmartScreen
|
||||
DEFENDER = "windows_defender" # Alerte Windows Defender
|
||||
DRIVER = "driver_install" # Installation pilote signé
|
||||
SECURITY_TOAST = "security_toast" # Toast de sécurité Windows
|
||||
UNKNOWN_DIALOG = "unknown_system_dialog" # Dialogue #32770 sans app connue
|
||||
|
||||
|
||||
@dataclass
|
||||
class SystemDialogDetection:
|
||||
"""Résultat d'une analyse de dialogue système."""
|
||||
is_system_dialog: bool
|
||||
category: str = "" # Valeur de SystemDialogCategory
|
||||
matched_signal: str = "" # Ex: "class_name=Consent.exe"
|
||||
matched_value: str = "" # La valeur qui a matché
|
||||
reason: str = "" # Explication lisible
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"is_system_dialog": self.is_system_dialog,
|
||||
"category": self.category,
|
||||
"matched_signal": self.matched_signal,
|
||||
"matched_value": self.matched_value,
|
||||
"reason": self.reason,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Signatures de détection
|
||||
# =============================================================================
|
||||
|
||||
|
||||
# ClassName UIA (casse préservée — Windows exposées telle quelle par UIA).
|
||||
# Utilisées telles quelles puis en minuscules pour matcher avec souplesse.
|
||||
_CLASS_NAMES_SYSTEM = {
|
||||
# UAC Consent
|
||||
"$$$Secure UAP Dummy Window Class$$$": SystemDialogCategory.UAC,
|
||||
"Credential Dialog Xaml Host": SystemDialogCategory.CREDUI,
|
||||
# Windows Credential UI ancien nom
|
||||
"CredentialDialogXamlHost": SystemDialogCategory.CREDUI,
|
||||
}
|
||||
|
||||
# Nom de processus (comparaison insensible à la casse, .exe normalisé)
|
||||
_PROCESS_NAMES_SYSTEM = {
|
||||
"consent.exe": SystemDialogCategory.UAC,
|
||||
"credentialuibroker.exe": SystemDialogCategory.CREDUI,
|
||||
"credui.exe": SystemDialogCategory.CREDUI,
|
||||
"credwiz.exe": SystemDialogCategory.CREDUI,
|
||||
"smartscreen.exe": SystemDialogCategory.SMARTSCREEN,
|
||||
"securityhealthhost.exe": SystemDialogCategory.DEFENDER,
|
||||
"securityhealthui.exe": SystemDialogCategory.DEFENDER,
|
||||
"securityhealthsystray.exe": SystemDialogCategory.DEFENDER,
|
||||
"msmpeng.exe": SystemDialogCategory.DEFENDER,
|
||||
"windowsdefender.exe": SystemDialogCategory.DEFENDER,
|
||||
"msiexec.exe": SystemDialogCategory.DRIVER, # prompts pilotes signés
|
||||
"drvinst.exe": SystemDialogCategory.DRIVER,
|
||||
}
|
||||
|
||||
# Motifs titre (insensibles à la casse, regex avec word boundaries)
|
||||
# On ne matche pas les titres génériques trop larges pour limiter les faux
|
||||
# positifs sur OSIRIS/OBSIUS/MEDSPHERE.
|
||||
_TITLE_PATTERNS_SYSTEM: Tuple[Tuple[re.Pattern, str], ...] = (
|
||||
# UAC
|
||||
(re.compile(r"contr[oô]le\s+de\s+compte\s+d'?utilisateur", re.IGNORECASE),
|
||||
SystemDialogCategory.UAC),
|
||||
(re.compile(r"\buser\s+account\s+control\b", re.IGNORECASE),
|
||||
SystemDialogCategory.UAC),
|
||||
(re.compile(r"voulez-vous\s+autoriser\s+cette\s+application", re.IGNORECASE),
|
||||
SystemDialogCategory.UAC),
|
||||
(re.compile(r"do\s+you\s+want\s+to\s+allow\s+this\s+app", re.IGNORECASE),
|
||||
SystemDialogCategory.UAC),
|
||||
|
||||
# CredUI / Sécurité Windows
|
||||
(re.compile(r"\bs[eé]curit[eé]\s+windows\b", re.IGNORECASE),
|
||||
SystemDialogCategory.CREDUI),
|
||||
(re.compile(r"\bwindows\s+security\b", re.IGNORECASE),
|
||||
SystemDialogCategory.CREDUI),
|
||||
(re.compile(r"entrer\s+les\s+informations\s+d'?identification", re.IGNORECASE),
|
||||
SystemDialogCategory.CREDUI),
|
||||
(re.compile(r"enter\s+(?:your\s+)?credentials?", re.IGNORECASE),
|
||||
SystemDialogCategory.CREDUI),
|
||||
(re.compile(r"connectez-vous\s+[aà]\s+votre\s+compte", re.IGNORECASE),
|
||||
SystemDialogCategory.CREDUI),
|
||||
(re.compile(r"\bsign\s+in\s+to\s+your\s+account\b", re.IGNORECASE),
|
||||
SystemDialogCategory.CREDUI),
|
||||
|
||||
# SmartScreen
|
||||
(re.compile(r"windows\s+a\s+prot[eé]g[eé]", re.IGNORECASE),
|
||||
SystemDialogCategory.SMARTSCREEN),
|
||||
(re.compile(r"windows\s+protected\s+your\s+pc", re.IGNORECASE),
|
||||
SystemDialogCategory.SMARTSCREEN),
|
||||
(re.compile(r"\bsmartscreen\b", re.IGNORECASE),
|
||||
SystemDialogCategory.SMARTSCREEN),
|
||||
(re.compile(r"\b[eé]diteur\s+inconnu\b", re.IGNORECASE),
|
||||
SystemDialogCategory.SMARTSCREEN),
|
||||
(re.compile(r"\bunknown\s+publisher\b", re.IGNORECASE),
|
||||
SystemDialogCategory.SMARTSCREEN),
|
||||
|
||||
# Windows Defender
|
||||
(re.compile(r"windows\s+defender", re.IGNORECASE),
|
||||
SystemDialogCategory.DEFENDER),
|
||||
(re.compile(r"menace\s+d[eé]tect[eé]e", re.IGNORECASE),
|
||||
SystemDialogCategory.DEFENDER),
|
||||
(re.compile(r"threat\s+detected", re.IGNORECASE),
|
||||
SystemDialogCategory.DEFENDER),
|
||||
|
||||
# Driver
|
||||
(re.compile(r"installer\s+ce\s+pilote", re.IGNORECASE),
|
||||
SystemDialogCategory.DRIVER),
|
||||
(re.compile(r"install\s+this\s+driver", re.IGNORECASE),
|
||||
SystemDialogCategory.DRIVER),
|
||||
(re.compile(r"signature\s+num[eé]rique\s+du\s+pilote", re.IGNORECASE),
|
||||
SystemDialogCategory.DRIVER),
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Fonctions de détection
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def _normalize_process(name: str) -> str:
|
||||
"""Normaliser un nom de processus pour comparaison."""
|
||||
if not name:
|
||||
return ""
|
||||
name = name.strip().lower()
|
||||
# Enlever le chemin éventuel
|
||||
if "\\" in name or "/" in name:
|
||||
name = name.replace("\\", "/").split("/")[-1]
|
||||
# Assurer suffixe .exe pour matcher le dictionnaire
|
||||
if not name.endswith(".exe") and name:
|
||||
# Les process_name peuvent venir sans .exe (psutil) — on ajoute
|
||||
# pour avoir une clé uniforme
|
||||
name_with_exe = name + ".exe"
|
||||
if name_with_exe in _PROCESS_NAMES_SYSTEM:
|
||||
return name_with_exe
|
||||
return name
|
||||
|
||||
|
||||
def _check_class_name(class_name: str) -> Optional[Tuple[str, str, str]]:
|
||||
"""Vérifier si un ClassName UIA matche un dialogue système.
|
||||
|
||||
Returns:
|
||||
(category, matched_class, reason) si match, None sinon.
|
||||
"""
|
||||
if not class_name:
|
||||
return None
|
||||
|
||||
# Match exact
|
||||
if class_name in _CLASS_NAMES_SYSTEM:
|
||||
cat = _CLASS_NAMES_SYSTEM[class_name]
|
||||
return (cat, class_name, f"ClassName UIA '{class_name}' = dialogue système {cat}")
|
||||
|
||||
# Match insensible à la casse + normalisation espaces
|
||||
cn_norm = class_name.strip()
|
||||
for known, cat in _CLASS_NAMES_SYSTEM.items():
|
||||
if cn_norm.lower() == known.lower():
|
||||
return (cat, class_name, f"ClassName UIA ~= '{known}' ({cat})")
|
||||
|
||||
# Détection souple UAC (il existe quelques variantes de la classe secure)
|
||||
if "secure uap" in class_name.lower() or "uap dummy" in class_name.lower():
|
||||
return (SystemDialogCategory.UAC, class_name,
|
||||
f"ClassName '{class_name}' contient 'Secure UAP' → UAC")
|
||||
|
||||
# Credential XAML Host
|
||||
if "credential" in class_name.lower() and "xaml" in class_name.lower():
|
||||
return (SystemDialogCategory.CREDUI, class_name,
|
||||
f"ClassName '{class_name}' contient Credential+Xaml → CredUI")
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _check_process_name(process_name: str) -> Optional[Tuple[str, str, str]]:
|
||||
"""Vérifier si un nom de processus est un dialogue système.
|
||||
|
||||
Returns:
|
||||
(category, matched_process, reason) si match, None sinon.
|
||||
"""
|
||||
if not process_name:
|
||||
return None
|
||||
|
||||
norm = _normalize_process(process_name)
|
||||
if norm in _PROCESS_NAMES_SYSTEM:
|
||||
cat = _PROCESS_NAMES_SYSTEM[norm]
|
||||
return (cat, process_name, f"Processus '{norm}' = {cat}")
|
||||
return None
|
||||
|
||||
|
||||
def _check_title(title: str) -> Optional[Tuple[str, str, str]]:
|
||||
"""Vérifier si un titre de fenêtre matche un dialogue système.
|
||||
|
||||
Returns:
|
||||
(category, matched_pattern, reason) si match, None sinon.
|
||||
"""
|
||||
if not title:
|
||||
return None
|
||||
|
||||
for pattern, cat in _TITLE_PATTERNS_SYSTEM:
|
||||
m = pattern.search(title)
|
||||
if m:
|
||||
return (cat, m.group(0),
|
||||
f"Titre '{title[:60]}' matche '{pattern.pattern}' → {cat}")
|
||||
return None
|
||||
|
||||
|
||||
def is_system_dialog(
|
||||
uia_snapshot: Optional[Dict[str, Any]] = None,
|
||||
window_info: Optional[Dict[str, Any]] = None,
|
||||
) -> SystemDialogDetection:
|
||||
"""Déterminer si la fenêtre active est un dialogue système critique.
|
||||
|
||||
La détection combine plusieurs signaux — **un seul suffit à bloquer**.
|
||||
On préfère un faux positif (pause inutile) à un faux négatif (clic UAC).
|
||||
|
||||
Args:
|
||||
uia_snapshot: Dict avec champs `class_name`, `process_name`,
|
||||
`parent_path`, `name`. Peut être None si UIA indisponible.
|
||||
window_info: Dict avec champs `title`, `app_name`. Peut être None.
|
||||
|
||||
Returns:
|
||||
SystemDialogDetection avec is_system_dialog=True si un dialogue
|
||||
système est détecté.
|
||||
|
||||
Exemples::
|
||||
|
||||
det = is_system_dialog(window_info={"title": "User Account Control"})
|
||||
assert det.is_system_dialog # UAC détecté
|
||||
|
||||
det = is_system_dialog(uia_snapshot={"class_name": "$$$Secure UAP Dummy Window Class$$$"})
|
||||
assert det.is_system_dialog # UAC via ClassName
|
||||
|
||||
det = is_system_dialog(window_info={"title": "OSIRIS - Patient Dupont"})
|
||||
assert not det.is_system_dialog # Application métier → OK
|
||||
"""
|
||||
# ── Signal 1 : ClassName UIA ──
|
||||
if uia_snapshot:
|
||||
cn = uia_snapshot.get("class_name", "") or ""
|
||||
r = _check_class_name(cn)
|
||||
if r:
|
||||
cat, matched, reason = r
|
||||
return SystemDialogDetection(
|
||||
is_system_dialog=True,
|
||||
category=cat,
|
||||
matched_signal="class_name",
|
||||
matched_value=matched,
|
||||
reason=reason,
|
||||
)
|
||||
|
||||
# Explorer aussi les parents (le champ cliqué peut être un bouton
|
||||
# interne dont la ClassName est "Button", mais le root de la fenêtre
|
||||
# est le Consent.exe).
|
||||
for parent in uia_snapshot.get("parent_path", []) or []:
|
||||
p_cn = parent.get("class_name", "") or ""
|
||||
r = _check_class_name(p_cn)
|
||||
if r:
|
||||
cat, matched, reason = r
|
||||
return SystemDialogDetection(
|
||||
is_system_dialog=True,
|
||||
category=cat,
|
||||
matched_signal="parent_class_name",
|
||||
matched_value=matched,
|
||||
reason=f"Parent : {reason}",
|
||||
)
|
||||
|
||||
# ── Signal 2 : Process name ──
|
||||
if uia_snapshot:
|
||||
pn = uia_snapshot.get("process_name", "") or ""
|
||||
r = _check_process_name(pn)
|
||||
if r:
|
||||
cat, matched, reason = r
|
||||
return SystemDialogDetection(
|
||||
is_system_dialog=True,
|
||||
category=cat,
|
||||
matched_signal="process_name",
|
||||
matched_value=matched,
|
||||
reason=reason,
|
||||
)
|
||||
|
||||
if window_info:
|
||||
app = window_info.get("app_name", "") or ""
|
||||
r = _check_process_name(app)
|
||||
if r:
|
||||
cat, matched, reason = r
|
||||
return SystemDialogDetection(
|
||||
is_system_dialog=True,
|
||||
category=cat,
|
||||
matched_signal="app_name",
|
||||
matched_value=matched,
|
||||
reason=reason,
|
||||
)
|
||||
|
||||
# ── Signal 3 : Titre de fenêtre ──
|
||||
if window_info:
|
||||
title = window_info.get("title", "") or ""
|
||||
r = _check_title(title)
|
||||
if r:
|
||||
cat, matched, reason = r
|
||||
return SystemDialogDetection(
|
||||
is_system_dialog=True,
|
||||
category=cat,
|
||||
matched_signal="window_title",
|
||||
matched_value=matched,
|
||||
reason=reason,
|
||||
)
|
||||
|
||||
if uia_snapshot:
|
||||
# Certains dialogues système remontent leur titre dans uia.name
|
||||
uia_name = uia_snapshot.get("name", "") or ""
|
||||
r = _check_title(uia_name)
|
||||
if r:
|
||||
cat, matched, reason = r
|
||||
return SystemDialogDetection(
|
||||
is_system_dialog=True,
|
||||
category=cat,
|
||||
matched_signal="uia_name",
|
||||
matched_value=matched,
|
||||
reason=reason,
|
||||
)
|
||||
|
||||
return SystemDialogDetection(is_system_dialog=False)
|
||||
|
||||
|
||||
def detect_current_system_dialog() -> SystemDialogDetection:
|
||||
"""Analyser l'écran actuel et détecter un dialogue système.
|
||||
|
||||
Helper autonome qui interroge à la fois `get_active_window_info()` et
|
||||
le helper UIA (si dispo) pour obtenir la détection la plus fiable.
|
||||
|
||||
Returns:
|
||||
SystemDialogDetection. Si un signal matche, is_system_dialog=True.
|
||||
Si rien n'est disponible (Linux, UIA absent), is_system_dialog=False
|
||||
mais le caller peut encore fallback sur une analyse par titre.
|
||||
"""
|
||||
window_info: Optional[Dict[str, Any]] = None
|
||||
uia_snapshot: Optional[Dict[str, Any]] = None
|
||||
|
||||
# Fenêtre active (cross-platform)
|
||||
try:
|
||||
from ..window_info_crossplatform import get_active_window_info
|
||||
window_info = get_active_window_info()
|
||||
except Exception as e: # pragma: no cover — best-effort
|
||||
logger.debug(f"[SYS-DIALOG] window_info indisponible : {e}")
|
||||
|
||||
# UIA local (Windows uniquement, via lea_uia.exe)
|
||||
try:
|
||||
from .uia_helper import get_shared_helper
|
||||
helper = get_shared_helper()
|
||||
if helper.available:
|
||||
# On capture l'élément focalisé (root = fenêtre active)
|
||||
element = helper.capture_focused(max_depth=2)
|
||||
if element is not None:
|
||||
uia_snapshot = element.to_dict()
|
||||
except Exception as e: # pragma: no cover
|
||||
logger.debug(f"[SYS-DIALOG] UIA indisponible : {e}")
|
||||
|
||||
detection = is_system_dialog(
|
||||
uia_snapshot=uia_snapshot, window_info=window_info,
|
||||
)
|
||||
|
||||
if detection.is_system_dialog:
|
||||
logger.warning(
|
||||
f"[SYS-DIALOG] BLOCAGE — dialogue système détecté "
|
||||
f"[{detection.category}] via {detection.matched_signal}='{detection.matched_value}' "
|
||||
f"— {detection.reason}"
|
||||
)
|
||||
return detection
|
||||
|
||||
|
||||
__all__ = [
|
||||
"SystemDialogCategory",
|
||||
"SystemDialogDetection",
|
||||
"is_system_dialog",
|
||||
"detect_current_system_dialog",
|
||||
]
|
||||
294
agent_v0/agent_v1/core/uia_helper.py
Normal file
294
agent_v0/agent_v1/core/uia_helper.py
Normal file
@@ -0,0 +1,294 @@
|
||||
# core/workflow/uia_helper.py
|
||||
"""
|
||||
UIAHelper — Wrapper Python pour lea_uia.exe (helper Rust UI Automation).
|
||||
|
||||
Expose une API Python simple pour interroger UIA via le binaire Rust.
|
||||
Communique via subprocess + stdin/stdout JSON.
|
||||
|
||||
Pourquoi un helper Rust ?
|
||||
- 5-10x plus rapide que pywinauto (10-20ms vs 50-200ms)
|
||||
- Binaire standalone ~500 Ko, aucune dépendance runtime
|
||||
- Pas de problèmes de threading COM en Python
|
||||
- Crash-safe (le crash du helper n'affecte pas l'agent Python)
|
||||
|
||||
Architecture :
|
||||
Python executor
|
||||
↓ subprocess.run
|
||||
lea_uia.exe query --x 812 --y 436
|
||||
↓ UIA API Windows
|
||||
JSON response
|
||||
↓ stdout
|
||||
Python executor parse JSON
|
||||
|
||||
Si lea_uia.exe n'est pas disponible (Linux, binaire absent, crash) :
|
||||
toutes les méthodes retournent None → fallback vision automatique.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Timeout par défaut pour les appels UIA (en secondes)
|
||||
_DEFAULT_TIMEOUT = 5.0
|
||||
|
||||
# Masquer la fenêtre console lors du spawn de lea_uia.exe sur Windows.
|
||||
# Sans ce flag, chaque appel (à chaque clic utilisateur pendant
|
||||
# l'enregistrement) fait apparaître une fenêtre cmd noire brièvement
|
||||
# visible à l'écran → ralentit la souris et pollue les screenshots
|
||||
# capturés (le VLM peut "voir" le chemin lea_uia.exe comme texte cliqué).
|
||||
#
|
||||
# La valeur 0x08000000 correspond à CREATE_NO_WINDOW défini dans
|
||||
# l'API Windows. Sur Linux/Mac, la valeur est 0 et `creationflags`
|
||||
# est ignoré. getattr() gère le cas où Python expose déjà la constante
|
||||
# sur Windows.
|
||||
if platform.system() == "Windows":
|
||||
_SUBPROCESS_CREATION_FLAGS = getattr(subprocess, "CREATE_NO_WINDOW", 0x08000000)
|
||||
else:
|
||||
_SUBPROCESS_CREATION_FLAGS = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class UiaElement:
|
||||
"""Représentation Python d'un élément UIA."""
|
||||
name: str = ""
|
||||
control_type: str = ""
|
||||
class_name: str = ""
|
||||
automation_id: str = ""
|
||||
bounding_rect: Tuple[int, int, int, int] = (0, 0, 0, 0)
|
||||
is_enabled: bool = False
|
||||
is_offscreen: bool = True
|
||||
parent_path: List[Dict[str, str]] = field(default_factory=list)
|
||||
process_name: str = ""
|
||||
|
||||
def center(self) -> Tuple[int, int]:
|
||||
"""Retourner le centre du rectangle (pixels)."""
|
||||
x1, y1, x2, y2 = self.bounding_rect
|
||||
return ((x1 + x2) // 2, (y1 + y2) // 2)
|
||||
|
||||
def width(self) -> int:
|
||||
return self.bounding_rect[2] - self.bounding_rect[0]
|
||||
|
||||
def height(self) -> int:
|
||||
return self.bounding_rect[3] - self.bounding_rect[1]
|
||||
|
||||
def is_clickable(self) -> bool:
|
||||
"""Peut-on cliquer dessus ?"""
|
||||
return (
|
||||
self.is_enabled
|
||||
and not self.is_offscreen
|
||||
and self.width() > 0
|
||||
and self.height() > 0
|
||||
)
|
||||
|
||||
def path_signature(self) -> str:
|
||||
"""Signature du chemin parent (pour retrouver l'élément)."""
|
||||
parts = [f"{p['control_type']}[{p['name']}]" for p in self.parent_path if p.get("name")]
|
||||
parts.append(f"{self.control_type}[{self.name}]")
|
||||
return " > ".join(parts)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"control_type": self.control_type,
|
||||
"class_name": self.class_name,
|
||||
"automation_id": self.automation_id,
|
||||
"bounding_rect": list(self.bounding_rect),
|
||||
"is_enabled": self.is_enabled,
|
||||
"is_offscreen": self.is_offscreen,
|
||||
"parent_path": self.parent_path,
|
||||
"process_name": self.process_name,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict[str, Any]) -> "UiaElement":
|
||||
rect = d.get("bounding_rect", [0, 0, 0, 0])
|
||||
if isinstance(rect, list) and len(rect) >= 4:
|
||||
rect = tuple(rect[:4])
|
||||
else:
|
||||
rect = (0, 0, 0, 0)
|
||||
return cls(
|
||||
name=d.get("name", ""),
|
||||
control_type=d.get("control_type", ""),
|
||||
class_name=d.get("class_name", ""),
|
||||
automation_id=d.get("automation_id", ""),
|
||||
bounding_rect=rect,
|
||||
is_enabled=d.get("is_enabled", False),
|
||||
is_offscreen=d.get("is_offscreen", True),
|
||||
parent_path=d.get("parent_path", []),
|
||||
process_name=d.get("process_name", ""),
|
||||
)
|
||||
|
||||
|
||||
class UIAHelper:
|
||||
"""Wrapper Python pour lea_uia.exe."""
|
||||
|
||||
def __init__(self, helper_path: str = "", timeout: float = _DEFAULT_TIMEOUT):
|
||||
self._helper_path = helper_path or self._find_helper()
|
||||
self._timeout = timeout
|
||||
self._available = self._check_available()
|
||||
|
||||
def _find_helper(self) -> str:
|
||||
"""Trouver lea_uia.exe dans les emplacements standards."""
|
||||
candidates = [
|
||||
r"C:\Lea\helpers\lea_uia.exe",
|
||||
os.path.join(os.path.dirname(__file__), "..", "..",
|
||||
"agent_rust", "lea_uia", "target",
|
||||
"x86_64-pc-windows-gnu", "release", "lea_uia.exe"),
|
||||
"./helpers/lea_uia.exe",
|
||||
"lea_uia.exe",
|
||||
]
|
||||
for path in candidates:
|
||||
if os.path.isfile(path):
|
||||
return os.path.abspath(path)
|
||||
return ""
|
||||
|
||||
def _check_available(self) -> bool:
|
||||
"""Vérifier que le helper est utilisable (Windows + binaire + health OK)."""
|
||||
if platform.system() != "Windows":
|
||||
logger.debug("UIAHelper: Linux/Mac — helper désactivé")
|
||||
return False
|
||||
if not self._helper_path:
|
||||
logger.debug("UIAHelper: lea_uia.exe introuvable")
|
||||
return False
|
||||
if not os.path.isfile(self._helper_path):
|
||||
logger.debug(f"UIAHelper: chemin invalide {self._helper_path}")
|
||||
return False
|
||||
return True
|
||||
|
||||
@property
|
||||
def available(self) -> bool:
|
||||
return self._available
|
||||
|
||||
@property
|
||||
def helper_path(self) -> str:
|
||||
return self._helper_path
|
||||
|
||||
def _run(self, args: List[str]) -> Optional[Dict[str, Any]]:
|
||||
"""Exécuter lea_uia.exe avec les arguments et parser le JSON."""
|
||||
if not self._available:
|
||||
return None
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[self._helper_path] + args,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=self._timeout,
|
||||
encoding="utf-8",
|
||||
errors="replace",
|
||||
creationflags=_SUBPROCESS_CREATION_FLAGS,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
logger.debug(
|
||||
f"UIAHelper: exit code {result.returncode}, "
|
||||
f"stderr: {result.stderr[:200]}"
|
||||
)
|
||||
return None
|
||||
output = result.stdout.strip()
|
||||
if not output:
|
||||
return None
|
||||
return json.loads(output)
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.debug(f"UIAHelper: timeout ({self._timeout}s) sur {args}")
|
||||
return None
|
||||
except json.JSONDecodeError as e:
|
||||
logger.debug(f"UIAHelper: JSON invalide — {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.debug(f"UIAHelper: erreur {e}")
|
||||
return None
|
||||
|
||||
def health(self) -> bool:
|
||||
"""Vérifier que UIA répond."""
|
||||
data = self._run(["health"])
|
||||
return data is not None and data.get("status") == "ok"
|
||||
|
||||
def query_at(
|
||||
self,
|
||||
x: int,
|
||||
y: int,
|
||||
with_parents: bool = True,
|
||||
) -> Optional[UiaElement]:
|
||||
"""Récupérer l'élément UIA à une position écran.
|
||||
|
||||
Args:
|
||||
x, y: Coordonnées pixel absolues
|
||||
with_parents: Inclure la hiérarchie des parents
|
||||
|
||||
Returns:
|
||||
UiaElement si trouvé, None sinon (pas d'élément ou UIA indispo)
|
||||
"""
|
||||
args = ["query", "--x", str(x), "--y", str(y)]
|
||||
if not with_parents:
|
||||
args.append("--with-parents=false")
|
||||
|
||||
data = self._run(args)
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
def find_by_name(
|
||||
self,
|
||||
name: str,
|
||||
control_type: Optional[str] = None,
|
||||
automation_id: Optional[str] = None,
|
||||
window: Optional[str] = None,
|
||||
timeout_ms: int = 2000,
|
||||
) -> Optional[UiaElement]:
|
||||
"""Rechercher un élément par son nom (+ filtres optionnels).
|
||||
|
||||
Args:
|
||||
name: Nom exact de l'élément
|
||||
control_type: Type de contrôle (Button, Edit, MenuItem...)
|
||||
automation_id: ID d'automation
|
||||
window: Restreindre à une fenêtre spécifique
|
||||
timeout_ms: Timeout de recherche en millisecondes
|
||||
"""
|
||||
args = ["find", "--name", name, "--timeout-ms", str(timeout_ms)]
|
||||
if control_type:
|
||||
args.extend(["--control-type", control_type])
|
||||
if automation_id:
|
||||
args.extend(["--automation-id", automation_id])
|
||||
if window:
|
||||
args.extend(["--window", window])
|
||||
|
||||
data = self._run(args)
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
def capture_focused(self, max_depth: int = 3) -> Optional[UiaElement]:
|
||||
"""Capturer l'élément ayant le focus + son contexte."""
|
||||
data = self._run(["capture", "--max-depth", str(max_depth)])
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
|
||||
# Instance globale partagée (singleton léger)
|
||||
_SHARED_HELPER: Optional[UIAHelper] = None
|
||||
|
||||
|
||||
def get_shared_helper() -> UIAHelper:
|
||||
"""Retourner une instance partagée de UIAHelper."""
|
||||
global _SHARED_HELPER
|
||||
if _SHARED_HELPER is None:
|
||||
_SHARED_HELPER = UIAHelper()
|
||||
return _SHARED_HELPER
|
||||
@@ -38,8 +38,19 @@ except (ImportError, ValueError):
|
||||
except ImportError:
|
||||
LeaServerClient = None
|
||||
|
||||
# Configuration du logging
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
||||
# Configuration du logging — format structuré et lisible pour un TIM
|
||||
# Niveau de détail : INFO par défaut, DEBUG si RPA_AGENT_DEBUG=1
|
||||
_log_level = logging.DEBUG if os.environ.get("RPA_AGENT_DEBUG") == "1" else logging.INFO
|
||||
logging.basicConfig(
|
||||
level=_log_level,
|
||||
format="%(asctime)s %(levelname)-7s %(name)-25s %(message)s",
|
||||
datefmt="%H:%M:%S",
|
||||
)
|
||||
|
||||
# Réduire le bruit de certaines libs
|
||||
for _noisy in ("urllib3", "requests.packages.urllib3", "PIL", "mss"):
|
||||
logging.getLogger(_noisy).setLevel(logging.WARNING)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Intervalle de polling replay (secondes)
|
||||
@@ -371,12 +382,22 @@ class AgentV1:
|
||||
time.sleep(5)
|
||||
|
||||
def stop_session(self):
|
||||
# Arrêter la capture et le streaming de la session d'enregistrement
|
||||
if self.captor: self.captor.stop()
|
||||
if self.streamer: self.streamer.stop()
|
||||
logger.info(f"Session {self.session_id} terminée.")
|
||||
# Sauvegarder le session_id avant de l'annuler (pour les logs)
|
||||
ended_session_id = self.session_id
|
||||
|
||||
# Reset le session_id pour que le poll replay utilise l'ID stable
|
||||
# Arrêter la capture d'abord (plus d'events entrants)
|
||||
if self.captor: self.captor.stop()
|
||||
|
||||
# Attendre que les events en cours de traitement dans _on_event_bridge
|
||||
# aient le temps d'être envoyés au streamer (capture duale + push)
|
||||
import time
|
||||
time.sleep(1.5)
|
||||
|
||||
# Maintenant arrêter le streamer (drain queue + finalize)
|
||||
if self.streamer: self.streamer.stop()
|
||||
logger.info(f"Session {ended_session_id} terminée.")
|
||||
|
||||
# Reset le session_id APRÈS le stop complet du streamer
|
||||
self.session_id = None
|
||||
|
||||
# Reset le backoff de l'executor pour reprendre le polling immédiatement
|
||||
@@ -403,6 +424,7 @@ class AgentV1:
|
||||
"""Capture périodique pour donner du contexte au stagiaire.
|
||||
Déduplication : n'envoie que si l'écran a changé.
|
||||
Tourne tant que session_id est défini (= enregistrement actif).
|
||||
Enrichi avec le titre de la fenêtre active pour contextualisation.
|
||||
"""
|
||||
while self.running and self.session_id:
|
||||
try:
|
||||
@@ -413,7 +435,17 @@ class AgentV1:
|
||||
if img_hash != self._last_heartbeat_hash:
|
||||
self._last_heartbeat_hash = img_hash
|
||||
self.streamer.push_image(full_path, f"heartbeat_{int(time.time())}")
|
||||
self.streamer.push_event({"type": "heartbeat", "image": full_path, "timestamp": time.time(), "machine_id": self.machine_id})
|
||||
heartbeat_event = {
|
||||
"type": "heartbeat",
|
||||
"image": full_path,
|
||||
"timestamp": time.time(),
|
||||
"machine_id": self.machine_id,
|
||||
}
|
||||
# Ajouter le titre de la fenêtre active (léger, pas de crop)
|
||||
window_title = self.vision.get_active_window_title()
|
||||
if window_title:
|
||||
heartbeat_event["active_window_title"] = window_title
|
||||
self.streamer.push_event(heartbeat_event)
|
||||
except Exception as e:
|
||||
logger.error(f"Heartbeat error: {e}")
|
||||
time.sleep(5)
|
||||
@@ -448,20 +480,33 @@ class AgentV1:
|
||||
event["screenshot_context"] = full_path
|
||||
self.streamer.push_image(full_path, f"focus_{int(time.time())}")
|
||||
|
||||
# 🔴 Capture Interactive (Dual)
|
||||
# Capture Interactive (Dual + Fenêtre active)
|
||||
if event["type"] in ["mouse_click", "key_combo"]:
|
||||
self.shot_counter += 1
|
||||
shot_id = f"shot_{self.shot_counter:04d}"
|
||||
|
||||
|
||||
pos = event.get("pos", (0, 0))
|
||||
capture_info = self.vision.capture_dual(pos[0], pos[1], shot_id)
|
||||
|
||||
|
||||
event["screenshot_id"] = shot_id
|
||||
event["vision_info"] = capture_info
|
||||
|
||||
|
||||
# Enrichir l'event avec les métadonnées de la fenêtre active
|
||||
# (titre, rect, coordonnées clic relatives, taille fenêtre)
|
||||
window_capture = capture_info.get("window_capture")
|
||||
if window_capture:
|
||||
event["window_capture"] = {
|
||||
"title": window_capture.get("window_title", ""),
|
||||
"app_name": window_capture.get("app_name", ""),
|
||||
"rect": window_capture.get("window_rect"),
|
||||
"click_relative": window_capture.get("click_in_window"),
|
||||
"window_size": window_capture.get("window_size"),
|
||||
"click_inside_window": window_capture.get("click_inside_window", True),
|
||||
}
|
||||
|
||||
self._stream_capture_info(capture_info, shot_id)
|
||||
|
||||
# 🕒 POST-ACTION : Capture du résultat après 1s (pour voir le résultat du clic)
|
||||
|
||||
# POST-ACTION : Capture du résultat après 1s (pour voir le résultat du clic)
|
||||
threading.Timer(1.0, self._capture_result, args=(shot_id,)).start()
|
||||
|
||||
self.ui.update_stats(self.shot_counter)
|
||||
@@ -481,6 +526,12 @@ class AgentV1:
|
||||
self.streamer.push_image(capture_info["full"], f"{shot_id}_full")
|
||||
if "crop" in capture_info:
|
||||
self.streamer.push_image(capture_info["crop"], f"{shot_id}_crop")
|
||||
# Streamer l'image de la fenêtre active si disponible
|
||||
window_capture = capture_info.get("window_capture")
|
||||
if window_capture and "window_image" in window_capture:
|
||||
self.streamer.push_image(
|
||||
window_capture["window_image"], f"{shot_id}_window"
|
||||
)
|
||||
|
||||
def run(self):
|
||||
self.ui.run()
|
||||
|
||||
380
agent_v0/agent_v1/network/persistent_buffer.py
Normal file
380
agent_v0/agent_v1/network/persistent_buffer.py
Normal file
@@ -0,0 +1,380 @@
|
||||
# agent_v1/network/persistent_buffer.py
|
||||
"""
|
||||
Buffer persistant SQLite pour les événements/images qui n'ont pas pu être envoyés.
|
||||
|
||||
Résout le bloquant AI Act Article 12 : en cas de coupure serveur ou de queue pleine,
|
||||
les événements prioritaires (click, key, action, screenshot) sont persistés sur disque
|
||||
au lieu d'être silencieusement perdus. Ils sont rejoués à la reconnexion.
|
||||
|
||||
Caractéristiques :
|
||||
- SQLite fichier unique (agent_v1/buffer/pending_events.db), thread-safe
|
||||
- Async : les écritures se font depuis un thread daemon, jamais bloquant
|
||||
- Quota : compteur d'attempts par item, abandon après MAX_ATTEMPTS
|
||||
- Robustesse : un fichier corrompu est renommé et recréé vide
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sqlite3
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Nombre max de tentatives avant abandon définitif d'un item
|
||||
MAX_ATTEMPTS = 10
|
||||
|
||||
# Taille max du buffer en items pour éviter une explosion disque
|
||||
# (typiquement : 1000 events + 1000 images = quelques Mo de SQLite)
|
||||
MAX_BUFFER_ITEMS = 2000
|
||||
|
||||
|
||||
class PersistentBuffer:
|
||||
"""Buffer SQLite pour événements/images en attente d'envoi.
|
||||
|
||||
Deux tables :
|
||||
- pending_events (id, session_id, payload_json, attempts, created_at)
|
||||
- pending_images (id, session_id, shot_id, image_path, attempts, created_at)
|
||||
|
||||
Usage :
|
||||
buf = PersistentBuffer(base_dir / "buffer")
|
||||
buf.add_event(session_id, event_dict) # persiste un event
|
||||
buf.add_image(session_id, image_path, shot_id) # persiste une image
|
||||
for row in buf.drain_events(): # itère sur les events
|
||||
if envoyer(row): buf.delete_event(row["id"])
|
||||
else: buf.mark_attempt(row["id"], "event")
|
||||
"""
|
||||
|
||||
def __init__(self, buffer_dir: Path):
|
||||
self.buffer_dir = Path(buffer_dir)
|
||||
self.buffer_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.db_path = self.buffer_dir / "pending_events.db"
|
||||
self._lock = threading.Lock()
|
||||
self._init_db()
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Initialisation / gestion corruption
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
def _init_db(self):
|
||||
"""Crée les tables si elles n'existent pas.
|
||||
|
||||
En cas de fichier corrompu, on le renomme en .corrupted et on recrée
|
||||
un buffer vide. On préfère perdre un buffer non lisible plutôt que
|
||||
de crasher l'agent au démarrage.
|
||||
"""
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS pending_events (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
session_id TEXT NOT NULL,
|
||||
payload TEXT NOT NULL,
|
||||
attempts INTEGER NOT NULL DEFAULT 0,
|
||||
created_at REAL NOT NULL
|
||||
)
|
||||
"""
|
||||
)
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS pending_images (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
session_id TEXT NOT NULL,
|
||||
shot_id TEXT NOT NULL,
|
||||
image_path TEXT NOT NULL,
|
||||
attempts INTEGER NOT NULL DEFAULT 0,
|
||||
created_at REAL NOT NULL
|
||||
)
|
||||
"""
|
||||
)
|
||||
conn.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_events_created "
|
||||
"ON pending_events(created_at)"
|
||||
)
|
||||
conn.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_images_created "
|
||||
"ON pending_images(created_at)"
|
||||
)
|
||||
conn.commit()
|
||||
except sqlite3.DatabaseError as e:
|
||||
logger.warning(
|
||||
f"Buffer SQLite corrompu ({e}) — renommage en .corrupted "
|
||||
f"et recréation d'un buffer vide"
|
||||
)
|
||||
try:
|
||||
corrupted = self.db_path.with_suffix(
|
||||
f".corrupted.{int(time.time())}"
|
||||
)
|
||||
os.rename(self.db_path, corrupted)
|
||||
except OSError:
|
||||
# Si le rename échoue, on tente la suppression directe
|
||||
try:
|
||||
os.remove(self.db_path)
|
||||
except OSError:
|
||||
pass
|
||||
# Nouvelle tentative (table vide)
|
||||
with self._connect() as conn:
|
||||
conn.execute(
|
||||
"CREATE TABLE IF NOT EXISTS pending_events ("
|
||||
"id INTEGER PRIMARY KEY AUTOINCREMENT, "
|
||||
"session_id TEXT NOT NULL, payload TEXT NOT NULL, "
|
||||
"attempts INTEGER NOT NULL DEFAULT 0, "
|
||||
"created_at REAL NOT NULL)"
|
||||
)
|
||||
conn.execute(
|
||||
"CREATE TABLE IF NOT EXISTS pending_images ("
|
||||
"id INTEGER PRIMARY KEY AUTOINCREMENT, "
|
||||
"session_id TEXT NOT NULL, shot_id TEXT NOT NULL, "
|
||||
"image_path TEXT NOT NULL, "
|
||||
"attempts INTEGER NOT NULL DEFAULT 0, "
|
||||
"created_at REAL NOT NULL)"
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
def _connect(self) -> sqlite3.Connection:
|
||||
"""Connexion SQLite en mode WAL (meilleure concurrence)."""
|
||||
conn = sqlite3.connect(
|
||||
str(self.db_path),
|
||||
timeout=5.0,
|
||||
check_same_thread=False,
|
||||
isolation_level=None, # autocommit — on gère les transactions
|
||||
)
|
||||
try:
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
conn.execute("PRAGMA synchronous=NORMAL")
|
||||
except sqlite3.DatabaseError:
|
||||
pass
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Écriture — persiste un item
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
def add_event(self, session_id: str, event: dict) -> bool:
|
||||
"""Persiste un événement. Retourne True si écrit, False sinon.
|
||||
|
||||
Si le buffer dépasse MAX_BUFFER_ITEMS, on drop l'insertion (plutôt
|
||||
que saturer le disque). On log un warning au premier dépassement.
|
||||
"""
|
||||
with self._lock:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
count = conn.execute(
|
||||
"SELECT COUNT(*) FROM pending_events"
|
||||
).fetchone()[0]
|
||||
if count >= MAX_BUFFER_ITEMS:
|
||||
logger.warning(
|
||||
f"Buffer persistant saturé ({count} events) "
|
||||
f"— event droppé"
|
||||
)
|
||||
return False
|
||||
conn.execute(
|
||||
"INSERT INTO pending_events "
|
||||
"(session_id, payload, attempts, created_at) "
|
||||
"VALUES (?, ?, 0, ?)",
|
||||
(session_id, json.dumps(event), time.time()),
|
||||
)
|
||||
return True
|
||||
except (sqlite3.DatabaseError, TypeError, ValueError) as e:
|
||||
logger.error(f"Buffer add_event échoué : {e}")
|
||||
return False
|
||||
|
||||
def add_image(
|
||||
self, session_id: str, image_path: str, shot_id: str
|
||||
) -> bool:
|
||||
"""Persiste une référence image (chemin fichier + shot_id).
|
||||
|
||||
On ne stocke PAS les bytes de l'image (risque de faire gonfler la DB) :
|
||||
uniquement le chemin. Donc l'image doit rester présente sur disque
|
||||
tant qu'elle n'a pas été envoyée avec succès au serveur.
|
||||
"""
|
||||
with self._lock:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
count = conn.execute(
|
||||
"SELECT COUNT(*) FROM pending_images"
|
||||
).fetchone()[0]
|
||||
if count >= MAX_BUFFER_ITEMS:
|
||||
logger.warning(
|
||||
f"Buffer persistant saturé ({count} images) "
|
||||
f"— image droppée"
|
||||
)
|
||||
return False
|
||||
conn.execute(
|
||||
"INSERT INTO pending_images "
|
||||
"(session_id, shot_id, image_path, attempts, created_at) "
|
||||
"VALUES (?, ?, ?, 0, ?)",
|
||||
(session_id, shot_id, image_path, time.time()),
|
||||
)
|
||||
return True
|
||||
except sqlite3.DatabaseError as e:
|
||||
logger.error(f"Buffer add_image échoué : {e}")
|
||||
return False
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Lecture — drain dans l'ordre chronologique
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
def drain_events(self, limit: int = 100) -> list:
|
||||
"""Retourne les events en attente, triés par date de création."""
|
||||
with self._lock:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
rows = conn.execute(
|
||||
"SELECT id, session_id, payload, attempts "
|
||||
"FROM pending_events "
|
||||
"ORDER BY created_at ASC LIMIT ?",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
except sqlite3.DatabaseError as e:
|
||||
logger.error(f"Buffer drain_events échoué : {e}")
|
||||
return []
|
||||
|
||||
def drain_images(self, limit: int = 50) -> list:
|
||||
"""Retourne les images en attente, triées par date de création."""
|
||||
with self._lock:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
rows = conn.execute(
|
||||
"SELECT id, session_id, shot_id, image_path, attempts "
|
||||
"FROM pending_images "
|
||||
"ORDER BY created_at ASC LIMIT ?",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
except sqlite3.DatabaseError as e:
|
||||
logger.error(f"Buffer drain_images échoué : {e}")
|
||||
return []
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Marquage — succès, échec, abandon
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
def delete_event(self, row_id: int):
|
||||
"""Supprime un event après envoi réussi."""
|
||||
with self._lock:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
conn.execute(
|
||||
"DELETE FROM pending_events WHERE id = ?", (row_id,)
|
||||
)
|
||||
except sqlite3.DatabaseError as e:
|
||||
logger.error(f"Buffer delete_event échoué : {e}")
|
||||
|
||||
def delete_image(self, row_id: int):
|
||||
"""Supprime une image après envoi réussi."""
|
||||
with self._lock:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
conn.execute(
|
||||
"DELETE FROM pending_images WHERE id = ?", (row_id,)
|
||||
)
|
||||
except sqlite3.DatabaseError as e:
|
||||
logger.error(f"Buffer delete_image échoué : {e}")
|
||||
|
||||
def increment_attempts(self, row_id: int, kind: str) -> int:
|
||||
"""Incrémente le compteur d'attempts. Retourne la nouvelle valeur.
|
||||
|
||||
kind : "event" ou "image"
|
||||
"""
|
||||
table = "pending_events" if kind == "event" else "pending_images"
|
||||
with self._lock:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
conn.execute(
|
||||
f"UPDATE {table} SET attempts = attempts + 1 "
|
||||
"WHERE id = ?",
|
||||
(row_id,),
|
||||
)
|
||||
row = conn.execute(
|
||||
f"SELECT attempts FROM {table} WHERE id = ?", (row_id,)
|
||||
).fetchone()
|
||||
return int(row["attempts"]) if row else MAX_ATTEMPTS
|
||||
except sqlite3.DatabaseError as e:
|
||||
logger.error(f"Buffer increment_attempts échoué : {e}")
|
||||
return MAX_ATTEMPTS
|
||||
|
||||
def abandon_exceeded(self) -> int:
|
||||
"""Supprime les items ayant dépassé MAX_ATTEMPTS.
|
||||
|
||||
Un item abandonné est logué en erreur (trace AI Act) puis supprimé.
|
||||
Retourne le nombre d'items abandonnés.
|
||||
"""
|
||||
abandoned = 0
|
||||
with self._lock:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
# Events abandonnés
|
||||
rows = conn.execute(
|
||||
"SELECT id, session_id, payload FROM pending_events "
|
||||
"WHERE attempts >= ?",
|
||||
(MAX_ATTEMPTS,),
|
||||
).fetchall()
|
||||
for r in rows:
|
||||
try:
|
||||
event_type = json.loads(r["payload"]).get(
|
||||
"type", "?"
|
||||
)
|
||||
except (ValueError, TypeError):
|
||||
event_type = "?"
|
||||
logger.error(
|
||||
f"Buffer : event abandonné après {MAX_ATTEMPTS} "
|
||||
f"tentatives — session={r['session_id']} "
|
||||
f"type={event_type}"
|
||||
)
|
||||
abandoned += 1
|
||||
conn.execute(
|
||||
"DELETE FROM pending_events WHERE attempts >= ?",
|
||||
(MAX_ATTEMPTS,),
|
||||
)
|
||||
|
||||
# Images abandonnées
|
||||
rows = conn.execute(
|
||||
"SELECT id, session_id, shot_id FROM pending_images "
|
||||
"WHERE attempts >= ?",
|
||||
(MAX_ATTEMPTS,),
|
||||
).fetchall()
|
||||
for r in rows:
|
||||
logger.error(
|
||||
f"Buffer : image abandonnée après {MAX_ATTEMPTS} "
|
||||
f"tentatives — session={r['session_id']} "
|
||||
f"shot_id={r['shot_id']}"
|
||||
)
|
||||
abandoned += 1
|
||||
conn.execute(
|
||||
"DELETE FROM pending_images WHERE attempts >= ?",
|
||||
(MAX_ATTEMPTS,),
|
||||
)
|
||||
except sqlite3.DatabaseError as e:
|
||||
logger.error(f"Buffer abandon_exceeded échoué : {e}")
|
||||
return abandoned
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Introspection
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
def counts(self) -> dict:
|
||||
"""Retourne (events_count, images_count) pour diagnostic."""
|
||||
with self._lock:
|
||||
try:
|
||||
with self._connect() as conn:
|
||||
ev = conn.execute(
|
||||
"SELECT COUNT(*) FROM pending_events"
|
||||
).fetchone()[0]
|
||||
im = conn.execute(
|
||||
"SELECT COUNT(*) FROM pending_images"
|
||||
).fetchone()[0]
|
||||
return {"events": ev, "images": im}
|
||||
except sqlite3.DatabaseError:
|
||||
return {"events": 0, "images": 0}
|
||||
|
||||
def is_empty(self) -> bool:
|
||||
c = self.counts()
|
||||
return c["events"] == 0 and c["images"] == 0
|
||||
@@ -14,10 +14,19 @@ Robustesse (P0-2) :
|
||||
- Health-check périodique (30s) pour recovery du flag _server_available
|
||||
- Compression JPEG qualité 85 pour les images (réduction ~5-10x)
|
||||
- Backpressure : queue bornée (maxsize=100), drop des heartbeat si pleine
|
||||
|
||||
Conformité AI Act (Article 12 — journalisation automatique) :
|
||||
- Purge après ACK : les screenshots locaux sont supprimés après HTTP 200
|
||||
du serveur (par défaut). Le serveur devient la source de vérité.
|
||||
- Buffer persistant : les events/images prioritaires non envoyés sont
|
||||
persistés dans un SQLite local (agent_v1/buffer/pending_events.db)
|
||||
et rejoués au démarrage et à la reconnexion.
|
||||
"""
|
||||
|
||||
import enum
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
import queue
|
||||
import threading
|
||||
import time
|
||||
@@ -25,7 +34,18 @@ import time
|
||||
import requests
|
||||
from PIL import Image
|
||||
|
||||
from ..config import API_TOKEN, STREAMING_ENDPOINT
|
||||
from ..config import API_TOKEN, BASE_DIR, STREAMING_ENDPOINT
|
||||
from .persistent_buffer import MAX_ATTEMPTS, PersistentBuffer
|
||||
|
||||
|
||||
# Fix P0-E : résultat d'envoi d'image trivaleur (succès / échec réseau / fichier
|
||||
# disparu). On ne doit PAS considérer un FileNotFoundError comme un succès
|
||||
# HTTP 200 — sinon le buffer SQLite supprime l'entrée alors que le serveur n'a
|
||||
# jamais reçu l'image (perte silencieuse).
|
||||
class ImageSendResult(enum.Enum):
|
||||
OK = "ok" # HTTP 200, serveur a accusé réception
|
||||
FAILED = "failed" # Erreur réseau/serveur récupérable (retry OK)
|
||||
FILE_GONE = "file_gone" # Fichier local introuvable (abandon, pas retry)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -45,6 +65,20 @@ QUEUE_MAX_SIZE = 100
|
||||
# Types d'événements à ne jamais dropper
|
||||
PRIORITY_EVENT_TYPES = {"click", "key", "scroll", "action", "screenshot"}
|
||||
|
||||
# Purge locale après ACK serveur (Partie A de l'audit)
|
||||
# Activé par défaut : le serveur conserve déjà les screenshots 180 jours
|
||||
# (conformité AI Act Article 12). Désactivable via RPA_PURGE_AFTER_ACK=0
|
||||
# pour debugging local.
|
||||
PURGE_AFTER_ACK = os.environ.get("RPA_PURGE_AFTER_ACK", "1").lower() in (
|
||||
"1", "true", "yes",
|
||||
)
|
||||
|
||||
# Chemin du buffer persistant (Partie B de l'audit)
|
||||
BUFFER_DIR = BASE_DIR / "buffer"
|
||||
|
||||
# Intervalle entre deux tentatives de drain du buffer (secondes)
|
||||
BUFFER_DRAIN_INTERVAL_S = 15
|
||||
|
||||
|
||||
class TraceStreamer:
|
||||
def __init__(self, session_id: str, machine_id: str = "default"):
|
||||
@@ -54,8 +88,20 @@ class TraceStreamer:
|
||||
self.running = False
|
||||
self._thread = None
|
||||
self._health_thread = None
|
||||
self._drain_thread = None
|
||||
self._server_available = True # Désactivé après trop d'échecs
|
||||
|
||||
# Buffer persistant — partagé entre sessions (survit au redémarrage)
|
||||
# Initialisé paresseusement pour ne pas payer le coût SQLite en dehors
|
||||
# d'un streaming actif.
|
||||
self._buffer: PersistentBuffer | None = None
|
||||
|
||||
def _get_buffer(self) -> PersistentBuffer:
|
||||
"""Retourne le buffer persistant, en l'initialisant au besoin."""
|
||||
if self._buffer is None:
|
||||
self._buffer = PersistentBuffer(BUFFER_DIR)
|
||||
return self._buffer
|
||||
|
||||
@staticmethod
|
||||
def _auth_headers() -> dict:
|
||||
"""Headers d'authentification Bearer pour les requêtes API."""
|
||||
@@ -75,6 +121,11 @@ class TraceStreamer:
|
||||
target=self._health_check_loop, daemon=True
|
||||
)
|
||||
self._health_thread.start()
|
||||
# Thread de drain du buffer persistant (rejoue les items en attente)
|
||||
self._drain_thread = threading.Thread(
|
||||
target=self._buffer_drain_loop, daemon=True
|
||||
)
|
||||
self._drain_thread.start()
|
||||
logger.info(f"Streamer pour {self.session_id} démarré")
|
||||
|
||||
def stop(self):
|
||||
@@ -99,6 +150,9 @@ class TraceStreamer:
|
||||
if self._health_thread:
|
||||
self._health_thread.join(timeout=2.0)
|
||||
|
||||
if self._drain_thread:
|
||||
self._drain_thread.join(timeout=2.0)
|
||||
|
||||
self._finalize_session()
|
||||
logger.info(f"Streamer pour {self.session_id} arrêté")
|
||||
|
||||
@@ -126,11 +180,21 @@ class TraceStreamer:
|
||||
|
||||
Quand la queue est pleine :
|
||||
- Les événements prioritaires (click, key, action, screenshot) sont
|
||||
ajoutés en bloquant brièvement (0.5s)
|
||||
- Les heartbeat sont silencieusement droppés
|
||||
ajoutés en bloquant brièvement (0.5s). Si toujours pleine → persistés
|
||||
dans le buffer SQLite pour rejeu ultérieur.
|
||||
- Les heartbeat sont silencieusement droppés.
|
||||
- Si le serveur est marqué indisponible, on persiste immédiatement les
|
||||
items prioritaires (évite de remplir la queue inutilement).
|
||||
"""
|
||||
is_priority = self._is_priority_item(item_type, data)
|
||||
|
||||
# Serveur indisponible + item prioritaire → on persiste directement
|
||||
# sans polluer la queue RAM (qui ne sera jamais vidée tant que le
|
||||
# serveur est down).
|
||||
if is_priority and not self._server_available:
|
||||
self._persist_to_buffer(item_type, data)
|
||||
return
|
||||
|
||||
try:
|
||||
self.queue.put_nowait((item_type, data))
|
||||
except queue.Full:
|
||||
@@ -139,10 +203,18 @@ class TraceStreamer:
|
||||
try:
|
||||
self.queue.put((item_type, data), timeout=0.5)
|
||||
except queue.Full:
|
||||
logger.warning(
|
||||
f"Queue pleine — événement prioritaire droppé "
|
||||
f"(type={item_type})"
|
||||
)
|
||||
# Persistance disque (ne JAMAIS dropper un prioritaire)
|
||||
persisted = self._persist_to_buffer(item_type, data)
|
||||
if persisted:
|
||||
logger.warning(
|
||||
f"Queue pleine — événement prioritaire persisté "
|
||||
f"sur disque (type={item_type})"
|
||||
)
|
||||
else:
|
||||
logger.error(
|
||||
f"Queue pleine ET buffer saturé — événement "
|
||||
f"prioritaire perdu (type={item_type})"
|
||||
)
|
||||
else:
|
||||
# Heartbeat ou événement non-critique : on drop silencieusement
|
||||
logger.debug(
|
||||
@@ -163,6 +235,23 @@ class TraceStreamer:
|
||||
return event_type in PRIORITY_EVENT_TYPES
|
||||
return False
|
||||
|
||||
def _persist_to_buffer(self, item_type: str, data) -> bool:
|
||||
"""Persiste un item dans le buffer SQLite. Retourne True si OK.
|
||||
|
||||
Utilisé quand la queue est pleine ou le serveur indisponible.
|
||||
"""
|
||||
try:
|
||||
buf = self._get_buffer()
|
||||
if item_type == "event" and isinstance(data, dict):
|
||||
return buf.add_event(self.session_id, data)
|
||||
if item_type == "image":
|
||||
path, shot_id = data
|
||||
return buf.add_image(self.session_id, path, shot_id)
|
||||
except Exception as e:
|
||||
# On n'arrête jamais l'agent si le buffer échoue
|
||||
logger.error(f"Persistance buffer échouée : {e}")
|
||||
return False
|
||||
|
||||
# =========================================================================
|
||||
# Boucle d'envoi
|
||||
# =========================================================================
|
||||
@@ -174,16 +263,36 @@ class TraceStreamer:
|
||||
try:
|
||||
item_type, data = self.queue.get(timeout=0.5)
|
||||
success = False
|
||||
is_file_gone = False
|
||||
if item_type == "event":
|
||||
success = self._send_with_retry(self._send_event, data)
|
||||
elif item_type == "image":
|
||||
success = self._send_with_retry(self._send_image, *data)
|
||||
result = self._send_with_retry(self._send_image, *data)
|
||||
# Fix P0-E : distinguer FILE_GONE du vrai succès HTTP.
|
||||
if result is ImageSendResult.OK:
|
||||
success = True
|
||||
elif result is ImageSendResult.FILE_GONE:
|
||||
# Fichier disparu : pas de retry, pas de persistance
|
||||
# (on ne peut plus le renvoyer). On considère l'item
|
||||
# comme traité sans comptabiliser un succès réseau.
|
||||
is_file_gone = True
|
||||
success = False
|
||||
else:
|
||||
success = False
|
||||
self.queue.task_done()
|
||||
|
||||
if success:
|
||||
consecutive_failures = 0
|
||||
elif is_file_gone:
|
||||
# Fichier introuvable — déjà logué ERROR dans _send_image.
|
||||
# On ne persiste PAS dans le buffer (retry voué à échouer).
|
||||
consecutive_failures = 0
|
||||
else:
|
||||
consecutive_failures += 1
|
||||
# Après 3 retries infructueux, si l'item est prioritaire,
|
||||
# on le persiste pour ne pas le perdre définitivement.
|
||||
if self._is_priority_item(item_type, data):
|
||||
self._persist_to_buffer(item_type, data)
|
||||
if consecutive_failures >= 10:
|
||||
logger.warning(
|
||||
"10 échecs consécutifs — serveur marqué indisponible"
|
||||
@@ -200,15 +309,22 @@ class TraceStreamer:
|
||||
# Retry avec backoff exponentiel
|
||||
# =========================================================================
|
||||
|
||||
def _send_with_retry(self, send_fn, *args) -> bool:
|
||||
def _send_with_retry(self, send_fn, *args):
|
||||
"""Tente l'envoi avec retry et backoff exponentiel.
|
||||
|
||||
3 tentatives max avec délais de 1s, 2s, 4s entre chaque.
|
||||
Retourne True si l'envoi a réussi, False sinon.
|
||||
Retourne :
|
||||
- True / ImageSendResult.OK si l'envoi a réussi
|
||||
- ImageSendResult.FILE_GONE (images uniquement) — pas de retry
|
||||
- False / ImageSendResult.FAILED sinon
|
||||
"""
|
||||
# Première tentative (sans délai)
|
||||
if send_fn(*args):
|
||||
return True
|
||||
first = send_fn(*args)
|
||||
if first is ImageSendResult.OK or first is True:
|
||||
return first
|
||||
# Fix P0-E : FILE_GONE → pas de retry, l'erreur est permanente.
|
||||
if first is ImageSendResult.FILE_GONE:
|
||||
return first
|
||||
|
||||
# Retries avec backoff
|
||||
for attempt, delay in enumerate(RETRY_DELAYS, start=1):
|
||||
@@ -219,9 +335,13 @@ class TraceStreamer:
|
||||
f"Retry {attempt}/{MAX_RETRIES} dans {delay}s..."
|
||||
)
|
||||
time.sleep(delay)
|
||||
if send_fn(*args):
|
||||
result = send_fn(*args)
|
||||
if result is ImageSendResult.OK or result is True:
|
||||
logger.debug(f"Retry {attempt} réussi")
|
||||
return True
|
||||
return result
|
||||
# FILE_GONE pendant un retry — idem, on arrête
|
||||
if result is ImageSendResult.FILE_GONE:
|
||||
return result
|
||||
|
||||
logger.debug(f"Envoi échoué après {MAX_RETRIES} retries")
|
||||
return False
|
||||
@@ -260,6 +380,115 @@ class TraceStreamer:
|
||||
except Exception:
|
||||
logger.debug("Health-check échoué — serveur toujours indisponible")
|
||||
|
||||
# =========================================================================
|
||||
# Drain du buffer persistant (Partie B)
|
||||
# =========================================================================
|
||||
|
||||
def _buffer_drain_loop(self):
|
||||
"""Rejoue les items persistés en arrière-plan.
|
||||
|
||||
Tourne tant que self.running. Essaie de drainer le buffer toutes les
|
||||
BUFFER_DRAIN_INTERVAL_S secondes, mais seulement si :
|
||||
- le serveur est disponible,
|
||||
- il y a effectivement des items en attente.
|
||||
|
||||
Au premier passage (démarrage agent), on draine immédiatement pour
|
||||
rejouer tout ce qui a été persisté lors de la session précédente.
|
||||
"""
|
||||
# Au démarrage : drain immédiat (pas d'attente)
|
||||
first_pass = True
|
||||
while self.running:
|
||||
if not first_pass:
|
||||
time.sleep(BUFFER_DRAIN_INTERVAL_S)
|
||||
if not self.running:
|
||||
break
|
||||
first_pass = False
|
||||
|
||||
if not self._server_available:
|
||||
continue
|
||||
|
||||
try:
|
||||
buf = self._get_buffer()
|
||||
# Abandonner d'abord les items exceeded (évite de les retenter)
|
||||
abandoned = buf.abandon_exceeded()
|
||||
if abandoned:
|
||||
logger.warning(
|
||||
f"Buffer : {abandoned} items abandonnés "
|
||||
f"après {MAX_ATTEMPTS} tentatives"
|
||||
)
|
||||
|
||||
counts = buf.counts()
|
||||
if counts["events"] == 0 and counts["images"] == 0:
|
||||
continue
|
||||
|
||||
logger.info(
|
||||
f"Buffer drain : {counts['events']} events, "
|
||||
f"{counts['images']} images en attente — rejeu"
|
||||
)
|
||||
self._drain_buffer_once(buf)
|
||||
except Exception as e:
|
||||
logger.error(f"Buffer drain loop échoué : {e}")
|
||||
|
||||
def _drain_buffer_once(self, buf: PersistentBuffer):
|
||||
"""Une passe de drain : envoie ce qui peut l'être, incrémente le reste.
|
||||
|
||||
On arrête dès qu'un envoi échoue (serveur probablement down).
|
||||
"""
|
||||
# Events d'abord (plus légers, priorité métier AI Act)
|
||||
for row in buf.drain_events(limit=50):
|
||||
if not self._server_available:
|
||||
return
|
||||
try:
|
||||
import json as _json
|
||||
event = _json.loads(row["payload"])
|
||||
except (ValueError, TypeError):
|
||||
logger.error(
|
||||
f"Buffer : payload event #{row['id']} corrompu, suppression"
|
||||
)
|
||||
buf.delete_event(row["id"])
|
||||
continue
|
||||
if self._send_event(event):
|
||||
buf.delete_event(row["id"])
|
||||
else:
|
||||
buf.increment_attempts(row["id"], "event")
|
||||
# Serveur répond mal — on arrête la passe
|
||||
return
|
||||
|
||||
# Puis images
|
||||
for row in buf.drain_images(limit=20):
|
||||
if not self._server_available:
|
||||
return
|
||||
image_path = row["image_path"]
|
||||
shot_id = row["shot_id"]
|
||||
if not os.path.exists(image_path):
|
||||
# Fichier local disparu (purge, clean-up) — on abandonne.
|
||||
# Fix P0-E : log ERROR (pas warning) — c'est une perte de donnée.
|
||||
logger.error(
|
||||
f"Buffer : image #{row['id']} introuvable sur disque "
|
||||
f"({image_path}) — entrée abandonnée (le serveur n'a "
|
||||
f"jamais reçu cette image, session={row['session_id']}, "
|
||||
f"shot={shot_id})"
|
||||
)
|
||||
buf.delete_image(row["id"])
|
||||
continue
|
||||
result = self._send_image(image_path, shot_id)
|
||||
if result is ImageSendResult.OK or result is True:
|
||||
buf.delete_image(row["id"])
|
||||
elif result is ImageSendResult.FILE_GONE:
|
||||
# Fix P0-E : fichier disparu pendant l'envoi.
|
||||
# Ce n'est PAS un succès HTTP — ne pas considérer comme tel.
|
||||
# On supprime néanmoins l'entrée (retry voué à échouer)
|
||||
# mais avec un log ERROR explicite.
|
||||
logger.error(
|
||||
f"Buffer : image #{row['id']} disparue pendant l'envoi "
|
||||
f"({image_path}) — entrée abandonnée, pas de retry "
|
||||
f"(session={row['session_id']}, shot={shot_id})"
|
||||
)
|
||||
buf.delete_image(row["id"])
|
||||
else:
|
||||
buf.increment_attempts(row["id"], "image")
|
||||
return
|
||||
|
||||
# =========================================================================
|
||||
# Compression JPEG
|
||||
# =========================================================================
|
||||
@@ -287,6 +516,34 @@ class TraceStreamer:
|
||||
logger.warning(f"Compression JPEG échouée, envoi PNG brut: {e}")
|
||||
return None, None, None
|
||||
|
||||
# =========================================================================
|
||||
# Purge locale après ACK (Partie A)
|
||||
# =========================================================================
|
||||
|
||||
@staticmethod
|
||||
def _purge_local_image(path: str):
|
||||
"""Supprime un screenshot local après ACK 200 du serveur.
|
||||
|
||||
Ne crashe JAMAIS si le fichier est verrouillé (cas Windows) ou
|
||||
déjà supprimé : on log en debug et on continue. L'auto-cleanup
|
||||
de SessionStorage repassera plus tard.
|
||||
"""
|
||||
if not PURGE_AFTER_ACK:
|
||||
return
|
||||
try:
|
||||
os.remove(path)
|
||||
logger.debug(f"Screenshot local purgé après ACK : {path}")
|
||||
except FileNotFoundError:
|
||||
# Déjà supprimé ou chemin invalide — silencieux
|
||||
pass
|
||||
except PermissionError as e:
|
||||
# Windows verrouille parfois les fichiers (antivirus, indexation...)
|
||||
logger.debug(
|
||||
f"Purge différée (fichier verrouillé) : {path} — {e}"
|
||||
)
|
||||
except OSError as e:
|
||||
logger.debug(f"Purge échouée : {path} — {e}")
|
||||
|
||||
# =========================================================================
|
||||
# Envois HTTP
|
||||
# =========================================================================
|
||||
@@ -337,7 +594,7 @@ class TraceStreamer:
|
||||
else:
|
||||
logger.warning(f"Finalisation échouée: {resp.status_code}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Finalisation échouée: {e}")
|
||||
logger.warning(f"Finalisation échouée: {e}")
|
||||
|
||||
def _send_event(self, event: dict) -> bool:
|
||||
"""Envoyer un événement au serveur (avec identifiant machine)."""
|
||||
@@ -361,14 +618,23 @@ class TraceStreamer:
|
||||
logger.debug(f"Streaming Event échoué: {e}")
|
||||
return False
|
||||
|
||||
def _send_image(self, path: str, shot_id: str) -> bool:
|
||||
def _send_image(self, path: str, shot_id: str):
|
||||
"""Envoyer un screenshot au serveur, compressé en JPEG.
|
||||
|
||||
Utilise un context manager pour le fallback PNG afin d'éviter
|
||||
les fuites de descripteurs de fichier.
|
||||
|
||||
Partie A (purge après ACK) : en cas de HTTP 200 confirmé, le fichier
|
||||
local est supprimé (le serveur devient la source de vérité).
|
||||
|
||||
Fix P0-E : retourne `ImageSendResult` (OK / FAILED / FILE_GONE).
|
||||
Les appelants historiques qui attendaient un bool continuent de
|
||||
fonctionner grâce à la truthiness du enum (OK → True, reste → False),
|
||||
MAIS le drain du buffer doit désormais discriminer FILE_GONE pour
|
||||
ne pas confondre "fichier disparu" avec "envoyé avec succès".
|
||||
"""
|
||||
if not self._server_available:
|
||||
return False
|
||||
return ImageSendResult.FAILED
|
||||
try:
|
||||
# Tenter la compression JPEG (réduction ~5-10x vs PNG)
|
||||
jpeg_buf, content_type, suffix = self._compress_image_to_jpeg(path)
|
||||
@@ -391,7 +657,10 @@ class TraceStreamer:
|
||||
headers=self._auth_headers(),
|
||||
timeout=5,
|
||||
)
|
||||
return resp.ok
|
||||
if resp.ok:
|
||||
self._purge_local_image(path)
|
||||
return ImageSendResult.OK
|
||||
return ImageSendResult.FAILED
|
||||
else:
|
||||
# Fallback : envoi PNG original avec context manager
|
||||
with open(path, "rb") as f:
|
||||
@@ -405,7 +674,20 @@ class TraceStreamer:
|
||||
headers=self._auth_headers(),
|
||||
timeout=5,
|
||||
)
|
||||
return resp.ok
|
||||
if resp.ok:
|
||||
self._purge_local_image(path)
|
||||
return ImageSendResult.OK
|
||||
return ImageSendResult.FAILED
|
||||
except FileNotFoundError:
|
||||
# Fix P0-E : fichier local disparu. On NE doit PAS considérer ça
|
||||
# comme un succès HTTP 200. Le serveur n'a rien reçu. On signale
|
||||
# `FILE_GONE` pour que le drain du buffer supprime l'entrée
|
||||
# (pas de retry possible) tout en loguant ERROR (pas debug).
|
||||
logger.error(
|
||||
f"Image {shot_id} introuvable sur disque ({path}) — "
|
||||
f"abandon (serveur n'a rien reçu)"
|
||||
)
|
||||
return ImageSendResult.FILE_GONE
|
||||
except Exception as e:
|
||||
logger.debug(f"Streaming Image échoué: {e}")
|
||||
return False
|
||||
return ImageSendResult.FAILED
|
||||
|
||||
418
agent_v0/agent_v1/ui/activity_panel.py
Normal file
418
agent_v0/agent_v1/ui/activity_panel.py
Normal file
@@ -0,0 +1,418 @@
|
||||
# agent_v1/ui/activity_panel.py
|
||||
"""
|
||||
Panel d'activité temps réel de Léa.
|
||||
|
||||
Affiche à l'utilisateur ce que Léa fait *maintenant* :
|
||||
- État courant (Observe / Cherche / Agit / Vérifie / Bloquée)
|
||||
- Action en cours (ex: "Clic sur Rechercher")
|
||||
- Progression (ex: "3/15")
|
||||
- Temps écoulé depuis le début du workflow
|
||||
|
||||
Contraintes :
|
||||
- Fallback silencieux si tkinter absent (ne crash jamais)
|
||||
- Thread-safe (mises à jour depuis les threads de replay)
|
||||
- Pas de dépendance à PyQt5 (seulement tkinter, déjà utilisé par chat_window)
|
||||
|
||||
Utilisation :
|
||||
panel = ActivityPanel()
|
||||
panel.definir_workflow("Saisie patient", nb_etapes=15)
|
||||
panel.mettre_a_jour(etat=EtatLea.AGIT, action="Clic sur Valider", etape=3)
|
||||
panel.masquer()
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EtatLea(Enum):
|
||||
"""États macroscopiques de Léa pendant un replay."""
|
||||
|
||||
INACTIVE = ("inactive", "Prête", "#808080") # Gris
|
||||
OBSERVE = ("observe", "Observe", "#4A90E2") # Bleu
|
||||
CHERCHE = ("cherche", "Cherche", "#F5A623") # Orange
|
||||
AGIT = ("agit", "Agit", "#7ED321") # Vert
|
||||
VERIFIE = ("verifie", "Vérifie", "#9013FE") # Violet
|
||||
BLOQUEE = ("bloquee", "Bloquée", "#D0021B") # Rouge
|
||||
TERMINE = ("termine", "Terminé", "#50E3C2") # Turquoise
|
||||
|
||||
def __init__(self, code: str, libelle: str, couleur: str) -> None:
|
||||
self.code = code
|
||||
self.libelle = libelle
|
||||
self.couleur = couleur
|
||||
|
||||
|
||||
@dataclass
|
||||
class EtatActivite:
|
||||
"""Instantané de l'activité courante de Léa.
|
||||
|
||||
Utilisé par le panel et exposé par `ActivityPanel.snapshot()` pour les
|
||||
tests (sans dépendre de tkinter).
|
||||
"""
|
||||
|
||||
etat: EtatLea = EtatLea.INACTIVE
|
||||
action_courante: str = ""
|
||||
nom_workflow: str = ""
|
||||
etape: int = 0
|
||||
nb_etapes: int = 0
|
||||
debut_timestamp: float = 0.0
|
||||
dernier_message: str = ""
|
||||
|
||||
def temps_ecoule_s(self) -> float:
|
||||
"""Temps écoulé depuis le début du workflow (secondes)."""
|
||||
if self.debut_timestamp <= 0:
|
||||
return 0.0
|
||||
return max(0.0, time.time() - self.debut_timestamp)
|
||||
|
||||
def progression_texte(self) -> str:
|
||||
"""Représentation textuelle de la progression (ex: '3/15')."""
|
||||
if self.nb_etapes <= 0:
|
||||
return ""
|
||||
return f"{self.etape}/{self.nb_etapes}"
|
||||
|
||||
def temps_ecoule_texte(self) -> str:
|
||||
"""Représentation humaine du temps écoulé (ex: '12s', '1m24s')."""
|
||||
s = int(self.temps_ecoule_s())
|
||||
if s < 60:
|
||||
return f"{s}s"
|
||||
return f"{s // 60}m{s % 60:02d}s"
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Sérialiser pour le logging et les tests."""
|
||||
return {
|
||||
"etat": self.etat.code,
|
||||
"etat_libelle": self.etat.libelle,
|
||||
"action_courante": self.action_courante,
|
||||
"nom_workflow": self.nom_workflow,
|
||||
"etape": self.etape,
|
||||
"nb_etapes": self.nb_etapes,
|
||||
"progression": self.progression_texte(),
|
||||
"temps_ecoule_s": round(self.temps_ecoule_s(), 1),
|
||||
"dernier_message": self.dernier_message,
|
||||
}
|
||||
|
||||
|
||||
class ActivityPanel:
|
||||
"""Panel d'activité de Léa.
|
||||
|
||||
Thread-safe. Le panel tkinter est créé à la demande (lazy) et uniquement
|
||||
si tkinter est disponible. Toutes les méthodes sont safe à appeler même
|
||||
si l'UI n'est pas dispo (fallback silencieux).
|
||||
"""
|
||||
|
||||
def __init__(self, activer_ui: bool = True) -> None:
|
||||
self._lock = threading.RLock()
|
||||
self._etat = EtatActivite()
|
||||
self._activer_ui = activer_ui
|
||||
# UI tkinter (créée à la demande dans le thread UI)
|
||||
self._tk_root = None
|
||||
self._tk_labels: dict = {}
|
||||
self._ui_disponible = None # Lazy : résolu au premier usage
|
||||
self._listeners = [] # Callbacks pour les changements d'état
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# API publique (thread-safe)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def definir_workflow(self, nom: str, nb_etapes: int = 0) -> None:
|
||||
"""Démarrer le suivi d'un nouveau workflow."""
|
||||
with self._lock:
|
||||
self._etat = EtatActivite(
|
||||
etat=EtatLea.OBSERVE,
|
||||
nom_workflow=nom,
|
||||
nb_etapes=nb_etapes,
|
||||
debut_timestamp=time.time(),
|
||||
)
|
||||
self._notifier_changement()
|
||||
self._rafraichir_ui()
|
||||
logger.info(f"[ACTIVITY] Workflow démarré : {nom} ({nb_etapes} étapes)")
|
||||
|
||||
def mettre_a_jour(
|
||||
self,
|
||||
etat: Optional[EtatLea] = None,
|
||||
action: Optional[str] = None,
|
||||
etape: Optional[int] = None,
|
||||
message: Optional[str] = None,
|
||||
) -> None:
|
||||
"""Mettre à jour l'état affiché.
|
||||
|
||||
Tous les paramètres sont optionnels — on ne met à jour que ce qui est
|
||||
fourni. Les autres champs conservent leur valeur actuelle.
|
||||
"""
|
||||
with self._lock:
|
||||
if etat is not None:
|
||||
self._etat.etat = etat
|
||||
if action is not None:
|
||||
self._etat.action_courante = action
|
||||
if etape is not None:
|
||||
self._etat.etape = etape
|
||||
if message is not None:
|
||||
self._etat.dernier_message = message
|
||||
|
||||
self._notifier_changement()
|
||||
self._rafraichir_ui()
|
||||
|
||||
def terminer(self, succes: bool = True) -> None:
|
||||
"""Marquer le workflow comme terminé."""
|
||||
with self._lock:
|
||||
self._etat.etat = EtatLea.TERMINE if succes else EtatLea.BLOQUEE
|
||||
if not succes:
|
||||
self._etat.dernier_message = (
|
||||
self._etat.dernier_message or "Léa a rendu la main"
|
||||
)
|
||||
self._notifier_changement()
|
||||
self._rafraichir_ui()
|
||||
|
||||
def reinitialiser(self) -> None:
|
||||
"""Remettre le panel en état inactif."""
|
||||
with self._lock:
|
||||
self._etat = EtatActivite()
|
||||
self._notifier_changement()
|
||||
self._rafraichir_ui()
|
||||
|
||||
def snapshot(self) -> EtatActivite:
|
||||
"""Obtenir un instantané immuable de l'état courant (pour les tests)."""
|
||||
with self._lock:
|
||||
return EtatActivite(
|
||||
etat=self._etat.etat,
|
||||
action_courante=self._etat.action_courante,
|
||||
nom_workflow=self._etat.nom_workflow,
|
||||
etape=self._etat.etape,
|
||||
nb_etapes=self._etat.nb_etapes,
|
||||
debut_timestamp=self._etat.debut_timestamp,
|
||||
dernier_message=self._etat.dernier_message,
|
||||
)
|
||||
|
||||
def masquer(self) -> None:
|
||||
"""Masquer le panel UI si affiché."""
|
||||
if self._tk_root is not None:
|
||||
try:
|
||||
self._tk_root.withdraw()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def afficher(self) -> None:
|
||||
"""Afficher le panel UI si disponible."""
|
||||
self._creer_ui_si_besoin()
|
||||
if self._tk_root is not None:
|
||||
try:
|
||||
self._tk_root.deiconify()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def on_change(self, callback) -> None:
|
||||
"""Enregistrer un listener appelé à chaque changement d'état."""
|
||||
with self._lock:
|
||||
self._listeners.append(callback)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Gestion UI tkinter (lazy, fallback silencieux)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _creer_ui_si_besoin(self) -> None:
|
||||
"""Créer la fenêtre tkinter au premier usage (lazy)."""
|
||||
if not self._activer_ui:
|
||||
return
|
||||
if self._tk_root is not None:
|
||||
return
|
||||
if self._ui_disponible is False:
|
||||
return # Déjà testé et indisponible
|
||||
|
||||
try:
|
||||
import tkinter as tk
|
||||
except Exception as e:
|
||||
logger.debug(f"[ACTIVITY] tkinter indisponible : {e}")
|
||||
self._ui_disponible = False
|
||||
return
|
||||
|
||||
try:
|
||||
self._tk_root = tk.Toplevel() if _tk_root_existe() else tk.Tk()
|
||||
self._tk_root.title("Léa — Activité")
|
||||
self._tk_root.geometry("340x180+40+40")
|
||||
self._tk_root.attributes("-topmost", True)
|
||||
self._tk_root.resizable(False, False)
|
||||
self._tk_root.configure(bg="#1E1E1E")
|
||||
|
||||
titre = tk.Label(
|
||||
self._tk_root,
|
||||
text="Léa",
|
||||
font=("Segoe UI", 14, "bold"),
|
||||
fg="#FFFFFF",
|
||||
bg="#1E1E1E",
|
||||
)
|
||||
titre.pack(pady=(10, 2))
|
||||
|
||||
self._tk_labels["etat"] = tk.Label(
|
||||
self._tk_root,
|
||||
text="Prête",
|
||||
font=("Segoe UI", 11),
|
||||
fg="#808080",
|
||||
bg="#1E1E1E",
|
||||
)
|
||||
self._tk_labels["etat"].pack()
|
||||
|
||||
self._tk_labels["action"] = tk.Label(
|
||||
self._tk_root,
|
||||
text="",
|
||||
font=("Segoe UI", 10),
|
||||
fg="#FFFFFF",
|
||||
bg="#1E1E1E",
|
||||
wraplength=300,
|
||||
)
|
||||
self._tk_labels["action"].pack(pady=(8, 2))
|
||||
|
||||
self._tk_labels["progression"] = tk.Label(
|
||||
self._tk_root,
|
||||
text="",
|
||||
font=("Segoe UI", 9),
|
||||
fg="#B0B0B0",
|
||||
bg="#1E1E1E",
|
||||
)
|
||||
self._tk_labels["progression"].pack()
|
||||
|
||||
self._tk_labels["temps"] = tk.Label(
|
||||
self._tk_root,
|
||||
text="",
|
||||
font=("Segoe UI", 9),
|
||||
fg="#808080",
|
||||
bg="#1E1E1E",
|
||||
)
|
||||
self._tk_labels["temps"].pack(pady=(4, 0))
|
||||
|
||||
self._tk_labels["message"] = tk.Label(
|
||||
self._tk_root,
|
||||
text="",
|
||||
font=("Segoe UI", 9, "italic"),
|
||||
fg="#B0B0B0",
|
||||
bg="#1E1E1E",
|
||||
wraplength=300,
|
||||
)
|
||||
self._tk_labels["message"].pack(pady=(6, 10))
|
||||
|
||||
# Masquer par défaut : on affiche seulement pendant un workflow
|
||||
self._tk_root.withdraw()
|
||||
self._ui_disponible = True
|
||||
except Exception as e:
|
||||
logger.debug(f"[ACTIVITY] Impossible de créer l'UI : {e}")
|
||||
self._ui_disponible = False
|
||||
self._tk_root = None
|
||||
|
||||
def _rafraichir_ui(self) -> None:
|
||||
"""Mettre à jour les labels tkinter (safe si l'UI n'existe pas)."""
|
||||
if not self._activer_ui or self._ui_disponible is False:
|
||||
return
|
||||
self._creer_ui_si_besoin()
|
||||
if self._tk_root is None:
|
||||
return
|
||||
|
||||
try:
|
||||
with self._lock:
|
||||
snap = self.snapshot()
|
||||
|
||||
# Utiliser after(0) pour rester dans le thread UI tkinter
|
||||
def _update():
|
||||
try:
|
||||
self._tk_labels["etat"].config(
|
||||
text=snap.etat.libelle,
|
||||
fg=snap.etat.couleur,
|
||||
)
|
||||
if snap.action_courante:
|
||||
self._tk_labels["action"].config(text=snap.action_courante)
|
||||
else:
|
||||
self._tk_labels["action"].config(text="")
|
||||
|
||||
prog = snap.progression_texte()
|
||||
if prog and snap.nom_workflow:
|
||||
self._tk_labels["progression"].config(
|
||||
text=f"« {snap.nom_workflow} » — {prog}"
|
||||
)
|
||||
elif snap.nom_workflow:
|
||||
self._tk_labels["progression"].config(
|
||||
text=f"« {snap.nom_workflow} »"
|
||||
)
|
||||
else:
|
||||
self._tk_labels["progression"].config(text="")
|
||||
|
||||
if snap.debut_timestamp > 0:
|
||||
self._tk_labels["temps"].config(
|
||||
text=f"⏱ {snap.temps_ecoule_texte()}"
|
||||
)
|
||||
else:
|
||||
self._tk_labels["temps"].config(text="")
|
||||
|
||||
self._tk_labels["message"].config(text=snap.dernier_message)
|
||||
|
||||
# Afficher automatiquement si actif
|
||||
if snap.etat != EtatLea.INACTIVE:
|
||||
self._tk_root.deiconify()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
self._tk_root.after(0, _update)
|
||||
except Exception:
|
||||
# Si le root a été détruit
|
||||
self._tk_root = None
|
||||
self._ui_disponible = False
|
||||
except Exception as e:
|
||||
logger.debug(f"[ACTIVITY] Erreur rafraîchissement UI : {e}")
|
||||
|
||||
def _notifier_changement(self) -> None:
|
||||
"""Notifier tous les listeners du changement d'état."""
|
||||
with self._lock:
|
||||
listeners = list(self._listeners)
|
||||
snap = self.snapshot()
|
||||
|
||||
for cb in listeners:
|
||||
try:
|
||||
cb(snap)
|
||||
except Exception as e:
|
||||
logger.debug(f"[ACTIVITY] Listener erreur : {e}")
|
||||
|
||||
|
||||
def _tk_root_existe() -> bool:
|
||||
"""Vérifier si un root tkinter existe déjà (pour créer un Toplevel)."""
|
||||
try:
|
||||
import tkinter as tk
|
||||
|
||||
default_root = getattr(tk, "_default_root", None)
|
||||
return default_root is not None
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Singleton global (optionnel)
|
||||
# ============================================================================
|
||||
|
||||
|
||||
_INSTANCE_GLOBALE: Optional[ActivityPanel] = None
|
||||
_LOCK_SINGLETON = threading.Lock()
|
||||
|
||||
|
||||
def get_activity_panel(activer_ui: bool = True) -> ActivityPanel:
|
||||
"""Obtenir l'instance globale du panel d'activité (lazy)."""
|
||||
global _INSTANCE_GLOBALE
|
||||
with _LOCK_SINGLETON:
|
||||
if _INSTANCE_GLOBALE is None:
|
||||
_INSTANCE_GLOBALE = ActivityPanel(activer_ui=activer_ui)
|
||||
return _INSTANCE_GLOBALE
|
||||
|
||||
|
||||
def reset_activity_panel() -> None:
|
||||
"""Réinitialiser le singleton (utile pour les tests)."""
|
||||
global _INSTANCE_GLOBALE
|
||||
with _LOCK_SINGLETON:
|
||||
if _INSTANCE_GLOBALE is not None:
|
||||
try:
|
||||
_INSTANCE_GLOBALE.masquer()
|
||||
except Exception:
|
||||
pass
|
||||
_INSTANCE_GLOBALE = None
|
||||
@@ -3,15 +3,25 @@ Mini serveur HTTP sur l'agent Windows pour les captures d'ecran a la demande
|
||||
et les operations fichiers.
|
||||
|
||||
Ecoute sur le port 5006 (configurable via RPA_CAPTURE_PORT).
|
||||
Bind par defaut sur 127.0.0.1 (configurable via RPA_CAPTURE_BIND).
|
||||
Endpoints :
|
||||
GET /capture -> screenshot frais en base64 (JPEG)
|
||||
GET /health -> {"status": "ok"}
|
||||
GET /health -> {"status": "ok"} (pas d'auth — sonde liveness)
|
||||
POST /file-action -> operations fichiers (list, create, move, copy, sort)
|
||||
|
||||
Securite :
|
||||
- Authentification Bearer obligatoire (RPA_API_TOKEN) pour /capture et
|
||||
/file-action. Sans token configure, ces endpoints sont desactives.
|
||||
- Les tentatives non authentifiees sont loguees (WARNING) avec l'IP source.
|
||||
- Bind defaut localhost. Pour exposer sur le LAN (cas VWB backend qui
|
||||
appelle l'agent a distance), definir explicitement
|
||||
RPA_CAPTURE_BIND=0.0.0.0. L'auth reste alors la seule protection.
|
||||
"""
|
||||
import threading
|
||||
import logging
|
||||
import json
|
||||
import base64
|
||||
import hmac
|
||||
import io
|
||||
import os
|
||||
import time
|
||||
@@ -20,6 +30,17 @@ from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
CAPTURE_PORT = int(os.environ.get("RPA_CAPTURE_PORT", "5006"))
|
||||
# Bind par defaut sur localhost — defense en profondeur.
|
||||
# Pour le deploiement VWB (backend Linux -> agent Windows), definir
|
||||
# RPA_CAPTURE_BIND=0.0.0.0 explicitement. L'auth par token reste requise.
|
||||
CAPTURE_BIND = os.environ.get("RPA_CAPTURE_BIND", "127.0.0.1")
|
||||
|
||||
# Token d'authentification (partage avec le streaming). Doit etre defini pour
|
||||
# que /capture et /file-action soient accessibles.
|
||||
CAPTURE_TOKEN = os.environ.get("RPA_API_TOKEN", "")
|
||||
|
||||
# Endpoints ouverts (pas d'auth requise — sondes techniques uniquement)
|
||||
_PUBLIC_PATHS = {"/health"}
|
||||
|
||||
# Floutage des données sensibles (conformité AI Act)
|
||||
BLUR_SENSITIVE = os.environ.get("RPA_BLUR_SENSITIVE", "true").lower() in ("true", "1", "yes")
|
||||
@@ -33,6 +54,8 @@ class CaptureHandler(BaseHTTPRequestHandler):
|
||||
|
||||
def do_GET(self):
|
||||
if self.path == "/capture":
|
||||
if not self._check_auth():
|
||||
return
|
||||
self._handle_capture()
|
||||
elif self.path == "/health":
|
||||
self._send_json(200, {"status": "ok"})
|
||||
@@ -41,10 +64,56 @@ class CaptureHandler(BaseHTTPRequestHandler):
|
||||
|
||||
def do_POST(self):
|
||||
if self.path == "/file-action":
|
||||
if not self._check_auth():
|
||||
return
|
||||
self._handle_file_action()
|
||||
else:
|
||||
self._send_json(404, {"error": "not found"})
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _check_auth(self) -> bool:
|
||||
"""Valide le Bearer token. Renvoie 401/503 si invalide.
|
||||
|
||||
- Si aucun token n'est configure cote serveur (RPA_API_TOKEN vide),
|
||||
on refuse toutes les requetes sensibles (503) — fail-closed.
|
||||
- Sinon, on compare en temps constant via hmac.compare_digest.
|
||||
- Les tentatives echouees sont loguees avec l'IP source.
|
||||
"""
|
||||
# Autoriser les endpoints publics
|
||||
if self.path in _PUBLIC_PATHS:
|
||||
return True
|
||||
|
||||
peer = self.client_address[0] if self.client_address else "?"
|
||||
|
||||
if not CAPTURE_TOKEN:
|
||||
logger.error(
|
||||
"Refus %s depuis %s : RPA_API_TOKEN non configure "
|
||||
"(capture server en mode fail-closed)",
|
||||
self.path, peer,
|
||||
)
|
||||
self._send_json(503, {
|
||||
"error": "capture server non configure (token manquant)",
|
||||
})
|
||||
return False
|
||||
|
||||
auth_header = self.headers.get("Authorization", "")
|
||||
token = ""
|
||||
if auth_header.startswith("Bearer "):
|
||||
token = auth_header[len("Bearer "):].strip()
|
||||
|
||||
if not token or not hmac.compare_digest(token, CAPTURE_TOKEN):
|
||||
logger.warning(
|
||||
"Tentative d'acces non autorisee a %s depuis %s "
|
||||
"(token %s)",
|
||||
self.path, peer,
|
||||
"absent" if not token else "invalide",
|
||||
)
|
||||
self._send_json(401, {"error": "unauthorized"})
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def do_OPTIONS(self):
|
||||
"""Gestion CORS preflight."""
|
||||
self.send_response(200)
|
||||
@@ -351,21 +420,46 @@ class _FileActionHandlerLocal:
|
||||
class CaptureServer:
|
||||
"""Serveur de capture d'ecran en temps reel (thread daemon)."""
|
||||
|
||||
def __init__(self, port: int = CAPTURE_PORT):
|
||||
def __init__(self, port: int = CAPTURE_PORT, bind: str = CAPTURE_BIND):
|
||||
self._port = port
|
||||
self._bind = bind
|
||||
self._server: HTTPServer | None = None
|
||||
self._thread: threading.Thread | None = None
|
||||
|
||||
def start(self):
|
||||
"""Demarre le serveur dans un thread daemon."""
|
||||
"""Demarre le serveur dans un thread daemon.
|
||||
|
||||
Avertit si le serveur est expose sur le LAN sans token configure.
|
||||
"""
|
||||
# Defense en profondeur : refus de demarrer si expose LAN sans auth
|
||||
exposed_lan = self._bind not in ("127.0.0.1", "localhost", "::1")
|
||||
if exposed_lan and not CAPTURE_TOKEN:
|
||||
logger.error(
|
||||
"REFUS demarrage capture server : bind=%s (LAN) sans "
|
||||
"RPA_API_TOKEN. Definir le token ou RPA_CAPTURE_BIND=127.0.0.1.",
|
||||
self._bind,
|
||||
)
|
||||
print(
|
||||
f"[CAPTURE] REFUS demarrage : bind={self._bind} sans token. "
|
||||
f"Definir RPA_API_TOKEN ou RPA_CAPTURE_BIND=127.0.0.1."
|
||||
)
|
||||
return
|
||||
|
||||
try:
|
||||
self._server = HTTPServer(("0.0.0.0", self._port), CaptureHandler)
|
||||
self._server = HTTPServer((self._bind, self._port), CaptureHandler)
|
||||
self._thread = threading.Thread(
|
||||
target=self._server.serve_forever, daemon=True
|
||||
)
|
||||
self._thread.start()
|
||||
logger.info(f"Capture server demarre sur le port {self._port}")
|
||||
print(f"[CAPTURE] Serveur de capture demarre sur le port {self._port}")
|
||||
auth_mode = "token requis" if CAPTURE_TOKEN else "token absent (fail-closed)"
|
||||
logger.info(
|
||||
"Capture server demarre sur %s:%s (%s)",
|
||||
self._bind, self._port, auth_mode,
|
||||
)
|
||||
print(
|
||||
f"[CAPTURE] Serveur de capture demarre sur "
|
||||
f"{self._bind}:{self._port} ({auth_mode})"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Impossible de demarrer le capture server : {e}")
|
||||
print(f"[CAPTURE] ERREUR demarrage : {e}")
|
||||
|
||||
612
agent_v0/agent_v1/ui/messages.py
Normal file
612
agent_v0/agent_v1/ui/messages.py
Normal file
@@ -0,0 +1,612 @@
|
||||
# agent_v1/ui/messages.py
|
||||
"""
|
||||
Formatage des messages utilisateur pour Léa.
|
||||
|
||||
Convertit les codes d'erreur techniques (`target_not_found`, `no_screen_change`...)
|
||||
en phrases en français naturel, orientées action, adaptées à un utilisateur non
|
||||
technique (secrétaire médicale, TIM).
|
||||
|
||||
Trois niveaux de sévérité sont définis :
|
||||
- INFO — Léa fait son travail normalement
|
||||
- ATTENTION — Quelque chose de léger (ralentissement, retry)
|
||||
- BLOCAGE — Léa a besoin d'aide, elle rend la main
|
||||
|
||||
Le module est 100% pur (pas d'I/O, pas d'UI) : testable sans mocks lourds.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Any, Mapping, Optional
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Accès paresseux au DomainContext
|
||||
# ----------------------------------------------------------------------------
|
||||
#
|
||||
# On importe le module à l'appel pour éviter toute dépendance circulaire
|
||||
# avec `agent_v0.server_v1.domain_context` (qui ne doit pas importer l'UI).
|
||||
# Si l'import échoue (contexte client sans server_v1), on retombe sur None
|
||||
# et les formatters gardent leur comportement générique historique.
|
||||
|
||||
|
||||
def _get_domain_ctx(domain_id: Optional[str]):
|
||||
"""Récupérer un DomainContext si possible, sinon None (fallback)."""
|
||||
if not domain_id:
|
||||
return None
|
||||
try:
|
||||
from agent_v0.server_v1.domain_context import get_domain_context # lazy
|
||||
return get_domain_context(domain_id)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _friendly_target(description: str, domain_id: Optional[str] = None) -> str:
|
||||
"""Transformer une description technique en langage métier si possible.
|
||||
|
||||
Ex (tim_codage) : "DP" → "diagnostic principal"
|
||||
Ex (comptabilite) : "TVA" → "montant de TVA"
|
||||
Retombe sur la description nettoyée si aucun domaine ne matche.
|
||||
"""
|
||||
base = _nettoyer_description_cible(description)
|
||||
ctx = _get_domain_ctx(domain_id)
|
||||
if ctx is None or not base:
|
||||
return base
|
||||
try:
|
||||
return ctx._apply_synonyms(base)
|
||||
except Exception:
|
||||
return base
|
||||
|
||||
|
||||
class NiveauMessage(Enum):
|
||||
"""Niveaux hiérarchiques des messages affichés à l'utilisateur."""
|
||||
|
||||
INFO = "info" # Fond vert clair, disparaît tout seul, 3-5s
|
||||
ATTENTION = "attention" # Fond orange clair, disparaît tout seul, 7s
|
||||
BLOCAGE = "blocage" # Fond rouge clair, reste affiché, 15s+
|
||||
|
||||
|
||||
# Durée d'affichage par défaut (secondes), par niveau
|
||||
DUREE_PAR_NIVEAU: dict[NiveauMessage, int] = {
|
||||
NiveauMessage.INFO: 4,
|
||||
NiveauMessage.ATTENTION: 7,
|
||||
NiveauMessage.BLOCAGE: 15,
|
||||
}
|
||||
|
||||
# Icône textuelle par niveau (compatible plyer/Windows/Linux)
|
||||
ICONE_PAR_NIVEAU: dict[NiveauMessage, str] = {
|
||||
NiveauMessage.INFO: "i",
|
||||
NiveauMessage.ATTENTION: "!",
|
||||
NiveauMessage.BLOCAGE: "?",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class MessageUtilisateur:
|
||||
"""Un message prêt à être affiché à l'utilisateur.
|
||||
|
||||
Attributes:
|
||||
niveau: Hiérarchie (info/attention/blocage)
|
||||
titre: Titre court de la notification (≤60 caractères)
|
||||
corps: Corps du message en français naturel
|
||||
duree_s: Durée d'affichage recommandée (secondes)
|
||||
persistent: Si True, l'utilisateur doit fermer manuellement
|
||||
"""
|
||||
|
||||
niveau: NiveauMessage
|
||||
titre: str
|
||||
corps: str
|
||||
duree_s: int
|
||||
persistent: bool = False
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Sérialiser le message (utile pour les tests et le logging)."""
|
||||
return {
|
||||
"niveau": self.niveau.value,
|
||||
"titre": self.titre,
|
||||
"corps": self.corps,
|
||||
"duree_s": self.duree_s,
|
||||
"persistent": self.persistent,
|
||||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Helpers d'extraction
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def _extraire_nom_application(titre_fenetre: str) -> str:
|
||||
"""Extraire le nom de l'application à partir d'un titre de fenêtre.
|
||||
|
||||
Les titres Windows suivent généralement le format :
|
||||
"Document.txt – Bloc-notes"
|
||||
"Ma Page - Google Chrome"
|
||||
"Sans titre — Paint"
|
||||
|
||||
On retourne la partie après le dernier séparateur, ou le titre entier.
|
||||
"""
|
||||
if not titre_fenetre:
|
||||
return ""
|
||||
titre = titre_fenetre.strip()
|
||||
# Chercher le dernier séparateur parmi " – ", " — ", " - "
|
||||
for sep in (" – ", " — ", " - "):
|
||||
if sep in titre:
|
||||
return titre.rsplit(sep, 1)[-1].strip()
|
||||
return titre
|
||||
|
||||
|
||||
def _nettoyer_description_cible(description: str) -> str:
|
||||
"""Nettoyer la description technique d'une cible pour l'afficher.
|
||||
|
||||
Supprime les caractères techniques (guillemets inutiles, ':').
|
||||
"""
|
||||
if not description:
|
||||
return ""
|
||||
desc = description.strip()
|
||||
# Retirer les guillemets encapsulants
|
||||
desc = desc.strip("'\"`")
|
||||
# Limiter la longueur
|
||||
if len(desc) > 80:
|
||||
desc = desc[:77] + "..."
|
||||
return desc
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Formattage des messages techniques → humains
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def formatter_cible_non_trouvee(
|
||||
description_cible: str,
|
||||
titre_fenetre: Optional[str] = None,
|
||||
domain_id: Optional[str] = None,
|
||||
params: Optional[Mapping[str, Any]] = None,
|
||||
) -> MessageUtilisateur:
|
||||
"""Message quand Léa ne trouve pas un élément à cliquer.
|
||||
|
||||
Si un domaine métier est fourni, la description de la cible est
|
||||
transformée en langage métier via le DomainContext :
|
||||
- tim_codage + "DP" → "diagnostic principal"
|
||||
- comptabilite + "TVA" → "montant de TVA"
|
||||
|
||||
Exemple avant :
|
||||
target_not_found: 'bonjour' dans *bonjour, – Bloc-notes
|
||||
Exemple après :
|
||||
Léa a besoin d'aide
|
||||
Je ne trouve pas "bonjour" dans le Bloc-notes. Peux-tu cliquer
|
||||
dessus toi-même ? Je reprends ensuite.
|
||||
|
||||
Args:
|
||||
description_cible: Description brute de la cible.
|
||||
titre_fenetre: Titre de la fenêtre active (pour extraire l'app).
|
||||
domain_id: Domaine métier pour enrichir la sortie (optionnel).
|
||||
params: Paramètres du workflow (nom_patient, num_facture...)
|
||||
utilisés par les templates de clarification métier.
|
||||
"""
|
||||
cible = _friendly_target(description_cible, domain_id) or "l'élément"
|
||||
app = _extraire_nom_application(titre_fenetre or "")
|
||||
|
||||
# Si un domaine et un template de clarification existent, préférer la
|
||||
# question métier (plus pertinente que le message générique).
|
||||
ctx = _get_domain_ctx(domain_id)
|
||||
if ctx is not None and ctx.clarification_templates:
|
||||
try:
|
||||
corps = ctx.pose_clarification_question(
|
||||
{
|
||||
"blocked_on": "target_not_found",
|
||||
"target": description_cible or "",
|
||||
"app": app,
|
||||
"params": dict(params or {}),
|
||||
}
|
||||
)
|
||||
except Exception:
|
||||
corps = ""
|
||||
if corps:
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.BLOCAGE,
|
||||
titre="Léa a besoin d'aide",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||
persistent=True,
|
||||
)
|
||||
|
||||
if app:
|
||||
corps = (
|
||||
f"Je ne trouve pas « {cible} » dans {app}. "
|
||||
f"Peux-tu cliquer dessus toi-même ? Je reprends ensuite."
|
||||
)
|
||||
else:
|
||||
corps = (
|
||||
f"Je ne trouve pas « {cible} » à l'écran. "
|
||||
f"Peux-tu le faire toi-même ? Je reprends ensuite."
|
||||
)
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.BLOCAGE,
|
||||
titre="Léa a besoin d'aide",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||
persistent=True,
|
||||
)
|
||||
|
||||
|
||||
def formatter_fenetre_incorrecte(
|
||||
titre_actuel: str,
|
||||
titre_attendu: str,
|
||||
) -> MessageUtilisateur:
|
||||
"""Message quand la fenêtre active n'est pas celle attendue.
|
||||
|
||||
Exemple avant :
|
||||
Fenêtre incorrecte: 'Program Manager' (attendu: 'Lea : Explorateur de fichiers')
|
||||
Exemple après :
|
||||
Léa attend une fenêtre
|
||||
J'attends « Explorateur de fichiers » mais c'est « Program Manager »
|
||||
qui est affiché. Peux-tu ouvrir la bonne fenêtre ?
|
||||
"""
|
||||
app_actuelle = _extraire_nom_application(titre_actuel) or "une autre fenêtre"
|
||||
app_attendue = _extraire_nom_application(titre_attendu) or titre_attendu
|
||||
|
||||
corps = (
|
||||
f"J'attends « {app_attendue} » mais c'est « {app_actuelle} » "
|
||||
f"qui est affiché. Peux-tu ouvrir la bonne fenêtre ?"
|
||||
)
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.BLOCAGE,
|
||||
titre="Léa attend une fenêtre",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||
persistent=True,
|
||||
)
|
||||
|
||||
|
||||
def formatter_ecran_inchange(action_type: str = "") -> MessageUtilisateur:
|
||||
"""Message quand l'action n'a pas eu d'effet visible.
|
||||
|
||||
Exemple avant :
|
||||
Ecran inchange apres l'action
|
||||
Exemple après :
|
||||
Léa vérifie
|
||||
Mon clic n'a pas eu l'air de marcher. Je vais réessayer ou te
|
||||
rendre la main si ça ne passe pas.
|
||||
"""
|
||||
actions_fr = {
|
||||
"click": "Mon clic",
|
||||
"type": "Ma saisie",
|
||||
"key_combo": "Mon raccourci clavier",
|
||||
"scroll": "Mon défilement",
|
||||
}
|
||||
quoi = actions_fr.get(action_type, "Mon action")
|
||||
|
||||
corps = (
|
||||
f"{quoi} n'a pas eu l'air de marcher. Je vais réessayer, "
|
||||
f"ou te rendre la main si ça ne passe pas."
|
||||
)
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa vérifie",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
|
||||
|
||||
def formatter_connexion_perdue(hote_serveur: str = "") -> MessageUtilisateur:
|
||||
"""Message quand la connexion avec le serveur est perdue.
|
||||
|
||||
Rassurant : on dit qu'on va réessayer automatiquement.
|
||||
"""
|
||||
corps = (
|
||||
"J'ai perdu le lien avec le serveur. Je retente automatiquement, "
|
||||
"pas besoin d'intervenir."
|
||||
)
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa est déconnectée",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
|
||||
|
||||
def formatter_connexion_retablie() -> MessageUtilisateur:
|
||||
"""Message quand la connexion serveur est rétablie."""
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.INFO,
|
||||
titre="Léa",
|
||||
corps="C'est bon, la connexion est revenue. Je continue.",
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.INFO],
|
||||
)
|
||||
|
||||
|
||||
def formatter_debut_workflow(nom_workflow: str, nb_etapes: int = 0) -> MessageUtilisateur:
|
||||
"""Message au démarrage d'un workflow de replay."""
|
||||
if nb_etapes > 0:
|
||||
corps = (
|
||||
f"Je démarre « {nom_workflow} » ({nb_etapes} étapes). "
|
||||
f"Je t'indique mon avancement."
|
||||
)
|
||||
else:
|
||||
corps = f"Je démarre « {nom_workflow} ». Je t'indique mon avancement."
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.INFO,
|
||||
titre="Léa démarre",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.INFO],
|
||||
)
|
||||
|
||||
|
||||
def formatter_etape_workflow(
|
||||
etape_actuelle: int,
|
||||
nb_etapes: int,
|
||||
description: str = "",
|
||||
) -> MessageUtilisateur:
|
||||
"""Message pour la progression d'une étape."""
|
||||
if description:
|
||||
desc = _nettoyer_description_cible(description)
|
||||
corps = f"Étape {etape_actuelle}/{nb_etapes} — {desc}"
|
||||
else:
|
||||
corps = f"Étape {etape_actuelle}/{nb_etapes}"
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.INFO,
|
||||
titre="Léa avance",
|
||||
corps=corps,
|
||||
duree_s=3,
|
||||
)
|
||||
|
||||
|
||||
def formatter_retry(action_type: str = "", tentative: int = 2) -> MessageUtilisateur:
|
||||
"""Message quand Léa retente une action."""
|
||||
corps = (
|
||||
f"Je retente (tentative {tentative}). Ça arrive parfois, "
|
||||
f"l'écran était peut-être en cours de chargement."
|
||||
)
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa retente",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
|
||||
|
||||
def formatter_ralentissement() -> MessageUtilisateur:
|
||||
"""Message quand Léa prend plus de temps que prévu."""
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa prend son temps",
|
||||
corps="Je vais plus lentement que prévu. L'écran met du temps à répondre.",
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
|
||||
|
||||
def formatter_fin_workflow(
|
||||
succes: bool,
|
||||
nom_workflow: str = "",
|
||||
nb_etapes: int = 0,
|
||||
duree_s: float = 0.0,
|
||||
domain_id: Optional[str] = None,
|
||||
items_count: int = 0,
|
||||
failed_count: int = 0,
|
||||
params: Optional[Mapping[str, Any]] = None,
|
||||
) -> MessageUtilisateur:
|
||||
"""Message à la fin d'un workflow.
|
||||
|
||||
Si un domaine métier est fourni (et qu'il expose des summary_templates),
|
||||
on utilise `DomainContext.describe_workflow_outcome` pour formuler un
|
||||
rapport en langage métier (ex: "J'ai codé 14 dossiers sur 15").
|
||||
|
||||
Args:
|
||||
succes: True si l'ensemble du workflow a réussi.
|
||||
nom_workflow: Nom du workflow.
|
||||
nb_etapes: Nombre d'étapes techniques (pour fallback générique).
|
||||
duree_s: Durée totale en secondes.
|
||||
domain_id: Domaine métier (optionnel).
|
||||
items_count: Nombre d'items métier traités (ex: 15 dossiers).
|
||||
failed_count: Nombre d'items en échec.
|
||||
params: Infos supplémentaires passées aux templates.
|
||||
"""
|
||||
ctx = _get_domain_ctx(domain_id)
|
||||
if ctx is not None and ctx.summary_templates:
|
||||
try:
|
||||
corps = ctx.describe_workflow_outcome(
|
||||
workflow_name=nom_workflow,
|
||||
success=succes,
|
||||
items_count=items_count or max(1, nb_etapes),
|
||||
failed_count=failed_count,
|
||||
elapsed_s=duree_s,
|
||||
extra=dict(params or {}),
|
||||
)
|
||||
except Exception:
|
||||
corps = ""
|
||||
if corps:
|
||||
if succes and failed_count == 0:
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.INFO,
|
||||
titre="Léa a terminé",
|
||||
corps=corps,
|
||||
duree_s=6,
|
||||
)
|
||||
if succes and failed_count > 0:
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa a terminé partiellement",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.BLOCAGE,
|
||||
titre="Léa s'arrête",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||
persistent=True,
|
||||
)
|
||||
|
||||
if succes:
|
||||
if nom_workflow and nb_etapes > 0:
|
||||
corps = (
|
||||
f"C'est fait ! « {nom_workflow} » est terminé "
|
||||
f"({nb_etapes} étapes en {int(duree_s)}s)."
|
||||
)
|
||||
else:
|
||||
corps = "C'est fait ! Tout s'est bien passé."
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.INFO,
|
||||
titre="Léa a terminé",
|
||||
corps=corps,
|
||||
duree_s=6,
|
||||
)
|
||||
else:
|
||||
corps = (
|
||||
"Je n'ai pas pu terminer. Je te rends la main, "
|
||||
"tu peux continuer à partir de là où je me suis arrêtée."
|
||||
)
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.BLOCAGE,
|
||||
titre="Léa s'arrête",
|
||||
corps=corps,
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.BLOCAGE],
|
||||
persistent=True,
|
||||
)
|
||||
|
||||
|
||||
def formatter_erreur_generique(
|
||||
message_technique: str,
|
||||
domain_id: Optional[str] = None,
|
||||
params: Optional[Mapping[str, Any]] = None,
|
||||
) -> MessageUtilisateur:
|
||||
"""Formater un message d'erreur technique non catégorisé.
|
||||
|
||||
On essaie de détecter les motifs connus dans le message technique pour
|
||||
le router vers le bon formatter spécialisé, sinon on emballe le message.
|
||||
Si `domain_id` est fourni, il est propagé aux formatters spécialisés
|
||||
pour produire un message en langage métier.
|
||||
"""
|
||||
if not message_technique:
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa",
|
||||
corps="J'ai rencontré un petit souci. Je continue.",
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
|
||||
msg_lower = message_technique.lower()
|
||||
|
||||
# target_not_found[:...]
|
||||
if "target_not_found" in msg_lower:
|
||||
# Essayer d'extraire la description après le ':'
|
||||
match = re.match(r"target_not_found[:\s]*(.*)", message_technique, re.IGNORECASE)
|
||||
desc = match.group(1).strip() if match else ""
|
||||
return formatter_cible_non_trouvee(desc, domain_id=domain_id, params=params)
|
||||
|
||||
# Fenêtre incorrecte: 'X' (attendu: 'Y')
|
||||
if "fenêtre incorrecte" in msg_lower or "fenetre incorrecte" in msg_lower:
|
||||
# Extraire actuel et attendu
|
||||
m_actuel = re.search(r"[:,]\s*['\"]([^'\"]+)['\"]", message_technique)
|
||||
m_attendu = re.search(r"attendu[:\s]*['\"]([^'\"]+)['\"]", message_technique)
|
||||
actuel = m_actuel.group(1) if m_actuel else ""
|
||||
attendu = m_attendu.group(1) if m_attendu else ""
|
||||
return formatter_fenetre_incorrecte(actuel, attendu)
|
||||
|
||||
# Ecran inchangé
|
||||
if "inchang" in msg_lower or "no_screen_change" in msg_lower:
|
||||
return formatter_ecran_inchange()
|
||||
|
||||
# Policy abort / supervise
|
||||
if "policy_abort" in msg_lower or "visual_resolve_failed" in msg_lower:
|
||||
return formatter_cible_non_trouvee(
|
||||
message_technique, domain_id=domain_id, params=params
|
||||
)
|
||||
|
||||
# Fallback : message technique tronqué
|
||||
msg_tronque = message_technique.strip()
|
||||
if len(msg_tronque) > 120:
|
||||
msg_tronque = msg_tronque[:117] + "..."
|
||||
|
||||
return MessageUtilisateur(
|
||||
niveau=NiveauMessage.ATTENTION,
|
||||
titre="Léa",
|
||||
corps=f"J'ai rencontré un souci : {msg_tronque}",
|
||||
duree_s=DUREE_PAR_NIVEAU[NiveauMessage.ATTENTION],
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Détection fenêtre Léa (utilisé par l'executor pour ignorer sa propre UI)
|
||||
# ============================================================================
|
||||
|
||||
|
||||
# Motifs qui identifient une fenêtre appartenant à Léa (l'agent lui-même).
|
||||
# On utilise des regex avec \b pour éviter les faux positifs sur des noms
|
||||
# contenant "lea" (ex: "cléa.txt", "leapfrog", "replay").
|
||||
_MOTIFS_FENETRE_LEA_REGEX = (
|
||||
r"\bléa\b",
|
||||
r"\blea\b(?!p)", # "lea" mot entier, pas "leapfrog"
|
||||
r"lea\s*[—–\-:]", # "Lea —", "Lea -", "Lea :"
|
||||
r"léa\s*[—–\-:]",
|
||||
r"\bassistante ia\b",
|
||||
r"\bléa ia\b",
|
||||
r"\blea ia\b",
|
||||
)
|
||||
|
||||
|
||||
def est_fenetre_lea(titre_fenetre: str) -> bool:
|
||||
"""Détecter si un titre de fenêtre appartient à l'agent Léa lui-même.
|
||||
|
||||
Utilisé pour éviter que Léa ne se considère comme une fenêtre intrusive
|
||||
dans ses propres pré-vérifications.
|
||||
|
||||
Utilise des regex avec des word boundaries pour éviter les faux positifs
|
||||
sur des noms de fichiers contenant "lea" (ex: "cléa.txt", "replay.log").
|
||||
"""
|
||||
if not titre_fenetre:
|
||||
return False
|
||||
titre_lower = titre_fenetre.lower().strip()
|
||||
return any(re.search(motif, titre_lower) for motif in _MOTIFS_FENETRE_LEA_REGEX)
|
||||
|
||||
|
||||
# Fenêtres parasites Windows à ignorer dans les pré-vérifications.
|
||||
# Ce ne sont pas des fenêtres applicatives — c'est du bruit système
|
||||
# qui prend le focus de manière imprévisible.
|
||||
_FENETRES_BRUIT_SYSTEME = (
|
||||
"fenêtre de dépassement de capacité",
|
||||
"overflow", # version anglaise systray
|
||||
"program manager",
|
||||
"barre des tâches",
|
||||
"task bar",
|
||||
"cortana",
|
||||
"action center",
|
||||
"centre de notifications",
|
||||
)
|
||||
|
||||
|
||||
def est_fenetre_bruit(titre_fenetre: str) -> bool:
|
||||
"""Détecter si un titre de fenêtre est du bruit système Windows.
|
||||
|
||||
Ces fenêtres prennent le focus de manière imprévisible (systray overflow,
|
||||
taskbar, Program Manager) et ne sont jamais la cible d'une action utilisateur.
|
||||
"""
|
||||
if not titre_fenetre:
|
||||
return True # pas de titre = bruit
|
||||
titre_lower = titre_fenetre.lower().strip()
|
||||
if titre_lower == "unknown_window":
|
||||
return True
|
||||
return any(p in titre_lower for p in _FENETRES_BRUIT_SYSTEME)
|
||||
|
||||
|
||||
# Conservé pour rétro-compatibilité avec le code qui listait MOTIFS_FENETRE_LEA
|
||||
MOTIFS_FENETRE_LEA = (
|
||||
"léa",
|
||||
"lea —",
|
||||
"léa —",
|
||||
"lea -",
|
||||
"léa -",
|
||||
"lea assistante",
|
||||
"léa assistante",
|
||||
"lea : ",
|
||||
"léa : ",
|
||||
"assistante ia",
|
||||
)
|
||||
@@ -5,6 +5,14 @@ Utilise plyer pour les notifications système, sans dépendance PyQt5.
|
||||
|
||||
Remplace les dialogues Qt par des toasts non-bloquants.
|
||||
Thread-safe avec rate limiting (1 notification / 2 secondes max).
|
||||
|
||||
Les messages utilisateur sont formatés via `agent_v1.ui.messages` qui convertit
|
||||
les codes techniques (target_not_found, etc.) en français naturel.
|
||||
|
||||
Hiérarchie des notifications (cf. messages.NiveauMessage) :
|
||||
- INFO : auto-dismiss en ~4s, rate-limité classique
|
||||
- ATTENTION : auto-dismiss en ~7s, rate-limité classique
|
||||
- BLOCAGE : persistant (15s+), bypass du rate limit
|
||||
"""
|
||||
|
||||
import logging
|
||||
@@ -12,6 +20,22 @@ import threading
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
from .messages import (
|
||||
MessageUtilisateur,
|
||||
NiveauMessage,
|
||||
formatter_cible_non_trouvee,
|
||||
formatter_connexion_perdue,
|
||||
formatter_connexion_retablie,
|
||||
formatter_debut_workflow,
|
||||
formatter_ecran_inchange,
|
||||
formatter_erreur_generique,
|
||||
formatter_etape_workflow,
|
||||
formatter_fenetre_incorrecte,
|
||||
formatter_fin_workflow,
|
||||
formatter_ralentissement,
|
||||
formatter_retry,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Import conditionnel de plyer — fallback silencieux si absent
|
||||
@@ -59,7 +83,13 @@ class NotificationManager:
|
||||
# Méthode générique
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
def notify(self, title: str, message: str, timeout: int = 5) -> bool:
|
||||
def notify(
|
||||
self,
|
||||
title: str,
|
||||
message: str,
|
||||
timeout: int = 5,
|
||||
bypass_rate_limit: bool = False,
|
||||
) -> bool:
|
||||
"""
|
||||
Affiche une notification toast.
|
||||
|
||||
@@ -67,6 +97,8 @@ class NotificationManager:
|
||||
title: Titre de la notification.
|
||||
message: Corps du message.
|
||||
timeout: Durée d'affichage en secondes.
|
||||
bypass_rate_limit: Si True, ignore le rate limit (pour les blocages
|
||||
importants qui ne doivent pas être écrasés).
|
||||
|
||||
Returns:
|
||||
True si la notification a été envoyée, False sinon
|
||||
@@ -76,17 +108,21 @@ class NotificationManager:
|
||||
logger.debug("Notification ignorée (plyer absent) : %s", title)
|
||||
return False
|
||||
|
||||
with self._lock:
|
||||
now = time.monotonic()
|
||||
elapsed = now - self._last_notification_time
|
||||
if elapsed < RATE_LIMIT_SECONDS:
|
||||
logger.debug(
|
||||
"Notification ignorée (rate limit, %.1fs restantes) : %s",
|
||||
RATE_LIMIT_SECONDS - elapsed,
|
||||
title,
|
||||
)
|
||||
return False
|
||||
self._last_notification_time = now
|
||||
if not bypass_rate_limit:
|
||||
with self._lock:
|
||||
now = time.monotonic()
|
||||
elapsed = now - self._last_notification_time
|
||||
if elapsed < RATE_LIMIT_SECONDS:
|
||||
logger.debug(
|
||||
"Notification ignorée (rate limit, %.1fs restantes) : %s",
|
||||
RATE_LIMIT_SECONDS - elapsed,
|
||||
title,
|
||||
)
|
||||
return False
|
||||
self._last_notification_time = now
|
||||
else:
|
||||
with self._lock:
|
||||
self._last_notification_time = time.monotonic()
|
||||
|
||||
# Envoi dans un thread dédié pour ne jamais bloquer l'appelant
|
||||
thread = threading.Thread(
|
||||
@@ -97,6 +133,39 @@ class NotificationManager:
|
||||
thread.start()
|
||||
return True
|
||||
|
||||
def notify_message(self, msg: MessageUtilisateur) -> bool:
|
||||
"""Envoyer un MessageUtilisateur structuré (niveau, titre, corps).
|
||||
|
||||
Les messages BLOCAGE bypass le rate limit pour garantir que
|
||||
l'utilisateur voit qu'on a besoin de lui.
|
||||
"""
|
||||
bypass = msg.niveau == NiveauMessage.BLOCAGE
|
||||
# Log aussi pour tracer dans les logs fichiers
|
||||
self._log_message(msg)
|
||||
return self.notify(
|
||||
title=msg.titre,
|
||||
message=msg.corps,
|
||||
timeout=msg.duree_s,
|
||||
bypass_rate_limit=bypass,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _log_message(msg: MessageUtilisateur) -> None:
|
||||
"""Logger un message utilisateur avec le niveau approprié.
|
||||
|
||||
Les logs agents sont plus lisibles quand on route info → INFO,
|
||||
attention → WARNING, blocage → ERROR, avec un préfixe [LEA].
|
||||
"""
|
||||
prefix = f"[LEA] {msg.titre}: {msg.corps}"
|
||||
if msg.niveau == NiveauMessage.INFO:
|
||||
logger.info(prefix)
|
||||
elif msg.niveau == NiveauMessage.ATTENTION:
|
||||
logger.warning(prefix)
|
||||
elif msg.niveau == NiveauMessage.BLOCAGE:
|
||||
logger.error(prefix)
|
||||
else:
|
||||
logger.info(prefix)
|
||||
|
||||
def _send(self, title: str, message: str, timeout: int) -> None:
|
||||
"""Envoi effectif de la notification (exécuté dans un thread dédié)."""
|
||||
try:
|
||||
@@ -180,40 +249,79 @@ class NotificationManager:
|
||||
timeout=3,
|
||||
)
|
||||
|
||||
def replay_finished(self, success: bool, workflow_name: str) -> bool:
|
||||
"""Notification de fin de replay (succès ou échec)."""
|
||||
if success:
|
||||
return self.notify(
|
||||
title=APP_NAME,
|
||||
message="C'est fait ! Tout s'est bien passé.",
|
||||
timeout=5,
|
||||
)
|
||||
else:
|
||||
return self.notify(
|
||||
title=APP_NAME,
|
||||
message="Hmm, j'ai eu un souci. Vous pouvez me remontrer ?",
|
||||
timeout=7,
|
||||
)
|
||||
def replay_target_not_found(
|
||||
self,
|
||||
target_description: str,
|
||||
window_title: Optional[str] = None,
|
||||
) -> bool:
|
||||
"""Notification quand un élément n'est pas trouvé pendant le replay.
|
||||
|
||||
def connection_changed(self, connected: bool, server_host: str) -> bool:
|
||||
Le replay est mis en pause et attend une intervention humaine.
|
||||
Utilise `messages.formatter_cible_non_trouvee` pour un message en
|
||||
français naturel.
|
||||
"""
|
||||
msg = formatter_cible_non_trouvee(target_description, window_title)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_wrong_window(self, current_title: str, expected_title: str) -> bool:
|
||||
"""Notification quand la fenêtre active n'est pas celle attendue."""
|
||||
msg = formatter_fenetre_incorrecte(current_title, expected_title)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_no_screen_change(self, action_type: str = "") -> bool:
|
||||
"""Notification quand une action n'a pas eu d'effet visible."""
|
||||
msg = formatter_ecran_inchange(action_type)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_retry(self, action_type: str = "", tentative: int = 2) -> bool:
|
||||
"""Notification quand Léa retente une action."""
|
||||
msg = formatter_retry(action_type, tentative)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_slow(self) -> bool:
|
||||
"""Notification quand Léa va plus lentement que prévu."""
|
||||
msg = formatter_ralentissement()
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_finished(
|
||||
self,
|
||||
success: bool,
|
||||
workflow_name: str,
|
||||
step_count: int = 0,
|
||||
duration_s: float = 0.0,
|
||||
) -> bool:
|
||||
"""Notification de fin de replay (succès ou échec)."""
|
||||
msg = formatter_fin_workflow(success, workflow_name, step_count, duration_s)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_workflow_started(self, workflow_name: str, step_count: int = 0) -> bool:
|
||||
"""Notification de début de workflow (remplace `replay_started`)."""
|
||||
msg = formatter_debut_workflow(workflow_name, step_count)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def replay_step_progress(
|
||||
self,
|
||||
current: int,
|
||||
total: int,
|
||||
description: str = "",
|
||||
) -> bool:
|
||||
"""Notification de progression d'une étape (niveau INFO)."""
|
||||
msg = formatter_etape_workflow(current, total, description)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def connection_changed(self, connected: bool, server_host: str = "") -> bool:
|
||||
"""Notification de changement d'état de la connexion serveur."""
|
||||
if connected:
|
||||
return self.notify(
|
||||
title=APP_NAME,
|
||||
message="Connectée au serveur.",
|
||||
timeout=5,
|
||||
)
|
||||
msg = formatter_connexion_retablie()
|
||||
else:
|
||||
return self.notify(
|
||||
title=APP_NAME,
|
||||
message="J'ai perdu la connexion avec le serveur.",
|
||||
timeout=7,
|
||||
)
|
||||
msg = formatter_connexion_perdue(server_host)
|
||||
return self.notify_message(msg)
|
||||
|
||||
def error(self, message: str) -> bool:
|
||||
"""Notification d'erreur."""
|
||||
return self.notify(
|
||||
title=APP_NAME,
|
||||
message=f"Oups, un problème : {message}",
|
||||
timeout=10,
|
||||
)
|
||||
"""Notification d'erreur générique.
|
||||
|
||||
Essaie d'abord de détecter un motif technique connu et de formater
|
||||
correctement, sinon fallback sur un message générique aidant.
|
||||
"""
|
||||
msg = formatter_erreur_generique(message)
|
||||
return self.notify_message(msg)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
214
agent_v0/deploy/windows_client/agent_v1/core/grounding.py
Normal file
214
agent_v0/deploy/windows_client/agent_v1/core/grounding.py
Normal file
@@ -0,0 +1,214 @@
|
||||
# agent_v1/core/grounding.py
|
||||
"""
|
||||
Module Grounding — localisation pure d'éléments UI sur l'écran.
|
||||
|
||||
Responsabilité unique : "Trouve l'élément X sur l'écran et retourne ses coordonnées."
|
||||
Ne prend AUCUNE décision. Si l'élément n'est pas trouvé → retourne NOT_FOUND.
|
||||
|
||||
Stratégies disponibles (cascade configurable) :
|
||||
1. Serveur SomEngine + VLM (GPU distant)
|
||||
2. Template matching local (CPU, ~10ms)
|
||||
3. VLM local direct (CPU/GPU local)
|
||||
|
||||
Séparé de Policy (qui décide quoi faire quand grounding échoue).
|
||||
Ref: docs/PLAN_ACTEUR_V1.md — Architecture MICRO (grounding + exécution)
|
||||
"""
|
||||
|
||||
import base64
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class GroundingResult:
|
||||
"""Résultat d'une tentative de localisation visuelle."""
|
||||
found: bool # L'élément a été trouvé
|
||||
x_pct: float = 0.0 # Position X en % (0.0-1.0)
|
||||
y_pct: float = 0.0 # Position Y en % (0.0-1.0)
|
||||
method: str = "" # Méthode utilisée (server_som, anchor_template, vlm_direct...)
|
||||
score: float = 0.0 # Confiance (0.0-1.0)
|
||||
elapsed_ms: float = 0.0 # Temps de résolution
|
||||
detail: str = "" # Info supplémentaire (label trouvé, raison échec)
|
||||
raw: Optional[Dict] = None # Données brutes du resolver (pour debug)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"found": self.found,
|
||||
"x_pct": self.x_pct,
|
||||
"y_pct": self.y_pct,
|
||||
"method": self.method,
|
||||
"score": round(self.score, 3),
|
||||
"elapsed_ms": round(self.elapsed_ms, 1),
|
||||
"detail": self.detail,
|
||||
}
|
||||
|
||||
|
||||
# Résultat singleton pour "pas trouvé"
|
||||
NOT_FOUND = GroundingResult(found=False, detail="Aucune méthode n'a trouvé l'élément")
|
||||
|
||||
|
||||
class GroundingEngine:
|
||||
"""Moteur de localisation visuelle d'éléments UI.
|
||||
|
||||
Encapsule la cascade de résolution (serveur → template → VLM local)
|
||||
avec une interface unifiée. Ne prend aucune décision — c'est le rôle
|
||||
de PolicyEngine.
|
||||
|
||||
Usage :
|
||||
engine = GroundingEngine(executor)
|
||||
result = engine.locate(screenshot_b64, target_spec, screen_w, screen_h)
|
||||
if result.found:
|
||||
click(result.x_pct, result.y_pct)
|
||||
"""
|
||||
|
||||
def __init__(self, executor):
|
||||
"""
|
||||
Args:
|
||||
executor: ActionExecutorV1 — fournit les méthodes de résolution existantes.
|
||||
"""
|
||||
self._executor = executor
|
||||
|
||||
def locate(
|
||||
self,
|
||||
server_url: str,
|
||||
target_spec: Dict[str, Any],
|
||||
fallback_x: float,
|
||||
fallback_y: float,
|
||||
screen_width: int,
|
||||
screen_height: int,
|
||||
strategies: Optional[List[str]] = None,
|
||||
) -> GroundingResult:
|
||||
"""Localiser un élément UI sur l'écran.
|
||||
|
||||
Exécute la cascade de stratégies dans l'ordre et retourne
|
||||
dès qu'une stratégie trouve l'élément.
|
||||
|
||||
Args:
|
||||
server_url: URL du serveur (SomEngine + VLM GPU)
|
||||
target_spec: Spécification de la cible (by_text, anchor, vlm_description...)
|
||||
fallback_x, fallback_y: Coordonnées de fallback (enregistrement)
|
||||
screen_width, screen_height: Résolution écran
|
||||
strategies: Liste ordonnée de stratégies à essayer.
|
||||
Par défaut : ["server", "template", "vlm_local"]
|
||||
|
||||
Returns:
|
||||
GroundingResult avec found=True et coordonnées, ou NOT_FOUND
|
||||
"""
|
||||
if strategies is None:
|
||||
strategies = ["server", "template", "vlm_local"]
|
||||
|
||||
# ── Apprentissage : réordonner les stratégies selon l'historique ──
|
||||
# Si le Learning sait quelle méthode marche pour cette cible,
|
||||
# la mettre en premier. C'est la boucle d'apprentissage.
|
||||
learned = target_spec.get("_learned_strategy", "")
|
||||
if learned:
|
||||
strategy_map = {
|
||||
"som_text_match": "server",
|
||||
"grounding_vlm": "server",
|
||||
"server_som": "server",
|
||||
"anchor_template": "template",
|
||||
"template_matching": "template",
|
||||
"hybrid_text_direct": "vlm_local",
|
||||
"hybrid_vlm_text": "vlm_local",
|
||||
"vlm_direct": "vlm_local",
|
||||
}
|
||||
preferred = strategy_map.get(learned, "")
|
||||
if preferred and preferred in strategies:
|
||||
strategies = [preferred] + [s for s in strategies if s != preferred]
|
||||
logger.info(
|
||||
f"Grounding: stratégie réordonnée par l'apprentissage → "
|
||||
f"{strategies} (learned={learned})"
|
||||
)
|
||||
|
||||
t_start = time.time()
|
||||
screenshot_b64 = self._executor._capture_screenshot_b64(max_width=0, quality=75)
|
||||
if not screenshot_b64:
|
||||
return GroundingResult(
|
||||
found=False, detail="Capture screenshot échouée",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
for strategy in strategies:
|
||||
result = self._try_strategy(
|
||||
strategy, server_url, screenshot_b64, target_spec,
|
||||
fallback_x, fallback_y, screen_width, screen_height,
|
||||
)
|
||||
if result.found:
|
||||
result.elapsed_ms = (time.time() - t_start) * 1000
|
||||
return result
|
||||
|
||||
return GroundingResult(
|
||||
found=False,
|
||||
detail=f"Toutes les stratégies ont échoué ({', '.join(strategies)})",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
def _try_strategy(
|
||||
self,
|
||||
strategy: str,
|
||||
server_url: str,
|
||||
screenshot_b64: str,
|
||||
target_spec: Dict[str, Any],
|
||||
fallback_x: float,
|
||||
fallback_y: float,
|
||||
screen_width: int,
|
||||
screen_height: int,
|
||||
) -> GroundingResult:
|
||||
"""Essayer une stratégie de grounding unique."""
|
||||
|
||||
if strategy == "server" and server_url:
|
||||
raw = self._executor._server_resolve_target(
|
||||
server_url, screenshot_b64, target_spec,
|
||||
fallback_x, fallback_y, screen_width, screen_height,
|
||||
)
|
||||
if raw and raw.get("resolved"):
|
||||
return GroundingResult(
|
||||
found=True,
|
||||
x_pct=raw["x_pct"],
|
||||
y_pct=raw["y_pct"],
|
||||
method=raw.get("method", "server"),
|
||||
score=raw.get("score", 0.0),
|
||||
detail=raw.get("matched_element", {}).get("label", ""),
|
||||
raw=raw,
|
||||
)
|
||||
|
||||
elif strategy == "template":
|
||||
anchor_b64 = target_spec.get("anchor_image_base64", "")
|
||||
if anchor_b64:
|
||||
raw = self._executor._template_match_anchor(
|
||||
screenshot_b64, anchor_b64, screen_width, screen_height,
|
||||
)
|
||||
if raw and raw.get("resolved"):
|
||||
return GroundingResult(
|
||||
found=True,
|
||||
x_pct=raw["x_pct"],
|
||||
y_pct=raw["y_pct"],
|
||||
method="anchor_template",
|
||||
score=raw.get("score", 0.0),
|
||||
raw=raw,
|
||||
)
|
||||
|
||||
elif strategy == "vlm_local":
|
||||
by_text = target_spec.get("by_text", "")
|
||||
vlm_desc = target_spec.get("vlm_description", "")
|
||||
if vlm_desc or by_text:
|
||||
raw = self._executor._hybrid_vlm_resolve(
|
||||
screenshot_b64, target_spec, screen_width, screen_height,
|
||||
)
|
||||
if raw and raw.get("resolved"):
|
||||
return GroundingResult(
|
||||
found=True,
|
||||
x_pct=raw["x_pct"],
|
||||
y_pct=raw["y_pct"],
|
||||
method=raw.get("method", "vlm_local"),
|
||||
score=raw.get("score", 0.0),
|
||||
detail=raw.get("matched_element", {}).get("label", ""),
|
||||
raw=raw,
|
||||
)
|
||||
|
||||
return GroundingResult(found=False, method=strategy, detail=f"{strategy}: pas trouvé")
|
||||
152
agent_v0/deploy/windows_client/agent_v1/core/policy.py
Normal file
152
agent_v0/deploy/windows_client/agent_v1/core/policy.py
Normal file
@@ -0,0 +1,152 @@
|
||||
# agent_v1/core/policy.py
|
||||
"""
|
||||
Module Policy — décisions intelligentes quand le grounding échoue.
|
||||
|
||||
Responsabilité unique : "Le Grounding dit NOT_FOUND. Que fait-on ?"
|
||||
Ne localise AUCUN élément — c'est le rôle du Grounding.
|
||||
|
||||
Décisions possibles :
|
||||
- RETRY : re-tenter le grounding (après popup fermée, par exemple)
|
||||
- SKIP : l'action n'est plus nécessaire (état déjà atteint)
|
||||
- ABORT : arrêter le workflow (état incohérent)
|
||||
- SUPERVISE : rendre la main à l'utilisateur
|
||||
|
||||
Séparé de Grounding (qui localise les éléments).
|
||||
Ref: docs/PLAN_ACTEUR_V1.md — Architecture MÉSO (acteur intelligent)
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Decision(Enum):
|
||||
"""Décisions possibles quand le grounding échoue."""
|
||||
RETRY = "retry" # Re-tenter (après correction : popup fermée, navigation...)
|
||||
SKIP = "skip" # Action inutile (état déjà atteint)
|
||||
ABORT = "abort" # Arrêter le workflow (état incohérent)
|
||||
SUPERVISE = "supervise" # Rendre la main à l'utilisateur (Léa dit "je bloque")
|
||||
CONTINUE = "continue" # Continuer malgré l'échec (action non critique)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PolicyDecision:
|
||||
"""Résultat d'une décision Policy."""
|
||||
decision: Decision
|
||||
reason: str # Explication de la décision
|
||||
action_taken: str = "" # Action corrective effectuée (ex: "popup fermée")
|
||||
elapsed_ms: float = 0.0
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"decision": self.decision.value,
|
||||
"reason": self.reason,
|
||||
"action_taken": self.action_taken,
|
||||
"elapsed_ms": round(self.elapsed_ms, 1),
|
||||
}
|
||||
|
||||
|
||||
class PolicyEngine:
|
||||
"""Moteur de décision quand le grounding échoue.
|
||||
|
||||
Cascade de décision :
|
||||
1. Popup détectée ? → fermer et RETRY
|
||||
2. Acteur gemma4 → SKIP / ABORT / SUPERVISE
|
||||
3. Fallback → SUPERVISE (rendre la main)
|
||||
|
||||
Usage :
|
||||
policy = PolicyEngine(executor)
|
||||
decision = policy.decide(action, target_spec, grounding_result)
|
||||
if decision.decision == Decision.RETRY:
|
||||
# re-tenter le grounding
|
||||
elif decision.decision == Decision.SKIP:
|
||||
# marquer comme réussi, passer à la suite
|
||||
"""
|
||||
|
||||
def __init__(self, executor):
|
||||
self._executor = executor
|
||||
|
||||
def decide(
|
||||
self,
|
||||
action: Dict[str, Any],
|
||||
target_spec: Dict[str, Any],
|
||||
retry_count: int = 0,
|
||||
max_retries: int = 1,
|
||||
) -> PolicyDecision:
|
||||
"""Décider quoi faire quand le grounding a échoué.
|
||||
|
||||
Cascade :
|
||||
1. Si c'est le premier essai → tenter de fermer une popup → RETRY
|
||||
2. Si retry déjà fait → demander à l'acteur gemma4
|
||||
3. Selon gemma4 : SKIP, ABORT, ou SUPERVISE
|
||||
|
||||
Args:
|
||||
action: L'action qui a échoué
|
||||
target_spec: La cible non trouvée
|
||||
retry_count: Nombre de retries déjà faits
|
||||
max_retries: Maximum de retries autorisés
|
||||
"""
|
||||
t_start = time.time()
|
||||
|
||||
# ── Étape 1 : Tentative de fermeture popup (premier essai) ──
|
||||
if retry_count == 0:
|
||||
popup_handled = self._try_close_popup()
|
||||
if popup_handled:
|
||||
return PolicyDecision(
|
||||
decision=Decision.RETRY,
|
||||
reason="Popup détectée et fermée, re-tentative",
|
||||
action_taken="popup_closed",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
# ── Étape 2 : Max retries atteint → acteur gemma4 ──
|
||||
if retry_count >= max_retries:
|
||||
actor_decision = self._ask_actor(action, target_spec)
|
||||
|
||||
if actor_decision == "PASSER":
|
||||
return PolicyDecision(
|
||||
decision=Decision.SKIP,
|
||||
reason="Acteur gemma4 : l'état est déjà atteint",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
elif actor_decision == "STOPPER":
|
||||
return PolicyDecision(
|
||||
decision=Decision.ABORT,
|
||||
reason="Acteur gemma4 : état incohérent, arrêt",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
else:
|
||||
# EXECUTER ou inconnu → pause supervisée
|
||||
return PolicyDecision(
|
||||
decision=Decision.SUPERVISE,
|
||||
reason=f"Acteur gemma4 : {actor_decision}, pause supervisée",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
# ── Étape 3 : Encore des retries disponibles → RETRY ──
|
||||
return PolicyDecision(
|
||||
decision=Decision.RETRY,
|
||||
reason=f"Retry {retry_count + 1}/{max_retries}",
|
||||
elapsed_ms=(time.time() - t_start) * 1000,
|
||||
)
|
||||
|
||||
def _try_close_popup(self) -> bool:
|
||||
"""Tenter de fermer une popup via le handler VLM existant."""
|
||||
try:
|
||||
return self._executor._handle_popup_vlm()
|
||||
except Exception as e:
|
||||
logger.debug(f"Policy: popup handler échoué : {e}")
|
||||
return False
|
||||
|
||||
def _ask_actor(self, action: Dict, target_spec: Dict) -> str:
|
||||
"""Demander à gemma4 de décider (PASSER/EXECUTER/STOPPER)."""
|
||||
try:
|
||||
return self._executor._actor_decide(action, target_spec)
|
||||
except Exception as e:
|
||||
logger.debug(f"Policy: acteur gemma4 échoué : {e}")
|
||||
return "EXECUTER" # Fallback → supervisé
|
||||
294
agent_v0/deploy/windows_client/agent_v1/core/uia_helper.py
Normal file
294
agent_v0/deploy/windows_client/agent_v1/core/uia_helper.py
Normal file
@@ -0,0 +1,294 @@
|
||||
# core/workflow/uia_helper.py
|
||||
"""
|
||||
UIAHelper — Wrapper Python pour lea_uia.exe (helper Rust UI Automation).
|
||||
|
||||
Expose une API Python simple pour interroger UIA via le binaire Rust.
|
||||
Communique via subprocess + stdin/stdout JSON.
|
||||
|
||||
Pourquoi un helper Rust ?
|
||||
- 5-10x plus rapide que pywinauto (10-20ms vs 50-200ms)
|
||||
- Binaire standalone ~500 Ko, aucune dépendance runtime
|
||||
- Pas de problèmes de threading COM en Python
|
||||
- Crash-safe (le crash du helper n'affecte pas l'agent Python)
|
||||
|
||||
Architecture :
|
||||
Python executor
|
||||
↓ subprocess.run
|
||||
lea_uia.exe query --x 812 --y 436
|
||||
↓ UIA API Windows
|
||||
JSON response
|
||||
↓ stdout
|
||||
Python executor parse JSON
|
||||
|
||||
Si lea_uia.exe n'est pas disponible (Linux, binaire absent, crash) :
|
||||
toutes les méthodes retournent None → fallback vision automatique.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Timeout par défaut pour les appels UIA (en secondes)
|
||||
_DEFAULT_TIMEOUT = 5.0
|
||||
|
||||
# Masquer la fenêtre console lors du spawn de lea_uia.exe sur Windows.
|
||||
# Sans ce flag, chaque appel (à chaque clic utilisateur pendant
|
||||
# l'enregistrement) fait apparaître une fenêtre cmd noire brièvement
|
||||
# visible à l'écran → ralentit la souris et pollue les screenshots
|
||||
# capturés (le VLM peut "voir" le chemin lea_uia.exe comme texte cliqué).
|
||||
#
|
||||
# La valeur 0x08000000 correspond à CREATE_NO_WINDOW défini dans
|
||||
# l'API Windows. Sur Linux/Mac, la valeur est 0 et `creationflags`
|
||||
# est ignoré. getattr() gère le cas où Python expose déjà la constante
|
||||
# sur Windows.
|
||||
if platform.system() == "Windows":
|
||||
_SUBPROCESS_CREATION_FLAGS = getattr(subprocess, "CREATE_NO_WINDOW", 0x08000000)
|
||||
else:
|
||||
_SUBPROCESS_CREATION_FLAGS = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class UiaElement:
|
||||
"""Représentation Python d'un élément UIA."""
|
||||
name: str = ""
|
||||
control_type: str = ""
|
||||
class_name: str = ""
|
||||
automation_id: str = ""
|
||||
bounding_rect: Tuple[int, int, int, int] = (0, 0, 0, 0)
|
||||
is_enabled: bool = False
|
||||
is_offscreen: bool = True
|
||||
parent_path: List[Dict[str, str]] = field(default_factory=list)
|
||||
process_name: str = ""
|
||||
|
||||
def center(self) -> Tuple[int, int]:
|
||||
"""Retourner le centre du rectangle (pixels)."""
|
||||
x1, y1, x2, y2 = self.bounding_rect
|
||||
return ((x1 + x2) // 2, (y1 + y2) // 2)
|
||||
|
||||
def width(self) -> int:
|
||||
return self.bounding_rect[2] - self.bounding_rect[0]
|
||||
|
||||
def height(self) -> int:
|
||||
return self.bounding_rect[3] - self.bounding_rect[1]
|
||||
|
||||
def is_clickable(self) -> bool:
|
||||
"""Peut-on cliquer dessus ?"""
|
||||
return (
|
||||
self.is_enabled
|
||||
and not self.is_offscreen
|
||||
and self.width() > 0
|
||||
and self.height() > 0
|
||||
)
|
||||
|
||||
def path_signature(self) -> str:
|
||||
"""Signature du chemin parent (pour retrouver l'élément)."""
|
||||
parts = [f"{p['control_type']}[{p['name']}]" for p in self.parent_path if p.get("name")]
|
||||
parts.append(f"{self.control_type}[{self.name}]")
|
||||
return " > ".join(parts)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"control_type": self.control_type,
|
||||
"class_name": self.class_name,
|
||||
"automation_id": self.automation_id,
|
||||
"bounding_rect": list(self.bounding_rect),
|
||||
"is_enabled": self.is_enabled,
|
||||
"is_offscreen": self.is_offscreen,
|
||||
"parent_path": self.parent_path,
|
||||
"process_name": self.process_name,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict[str, Any]) -> "UiaElement":
|
||||
rect = d.get("bounding_rect", [0, 0, 0, 0])
|
||||
if isinstance(rect, list) and len(rect) >= 4:
|
||||
rect = tuple(rect[:4])
|
||||
else:
|
||||
rect = (0, 0, 0, 0)
|
||||
return cls(
|
||||
name=d.get("name", ""),
|
||||
control_type=d.get("control_type", ""),
|
||||
class_name=d.get("class_name", ""),
|
||||
automation_id=d.get("automation_id", ""),
|
||||
bounding_rect=rect,
|
||||
is_enabled=d.get("is_enabled", False),
|
||||
is_offscreen=d.get("is_offscreen", True),
|
||||
parent_path=d.get("parent_path", []),
|
||||
process_name=d.get("process_name", ""),
|
||||
)
|
||||
|
||||
|
||||
class UIAHelper:
|
||||
"""Wrapper Python pour lea_uia.exe."""
|
||||
|
||||
def __init__(self, helper_path: str = "", timeout: float = _DEFAULT_TIMEOUT):
|
||||
self._helper_path = helper_path or self._find_helper()
|
||||
self._timeout = timeout
|
||||
self._available = self._check_available()
|
||||
|
||||
def _find_helper(self) -> str:
|
||||
"""Trouver lea_uia.exe dans les emplacements standards."""
|
||||
candidates = [
|
||||
r"C:\Lea\helpers\lea_uia.exe",
|
||||
os.path.join(os.path.dirname(__file__), "..", "..",
|
||||
"agent_rust", "lea_uia", "target",
|
||||
"x86_64-pc-windows-gnu", "release", "lea_uia.exe"),
|
||||
"./helpers/lea_uia.exe",
|
||||
"lea_uia.exe",
|
||||
]
|
||||
for path in candidates:
|
||||
if os.path.isfile(path):
|
||||
return os.path.abspath(path)
|
||||
return ""
|
||||
|
||||
def _check_available(self) -> bool:
|
||||
"""Vérifier que le helper est utilisable (Windows + binaire + health OK)."""
|
||||
if platform.system() != "Windows":
|
||||
logger.debug("UIAHelper: Linux/Mac — helper désactivé")
|
||||
return False
|
||||
if not self._helper_path:
|
||||
logger.debug("UIAHelper: lea_uia.exe introuvable")
|
||||
return False
|
||||
if not os.path.isfile(self._helper_path):
|
||||
logger.debug(f"UIAHelper: chemin invalide {self._helper_path}")
|
||||
return False
|
||||
return True
|
||||
|
||||
@property
|
||||
def available(self) -> bool:
|
||||
return self._available
|
||||
|
||||
@property
|
||||
def helper_path(self) -> str:
|
||||
return self._helper_path
|
||||
|
||||
def _run(self, args: List[str]) -> Optional[Dict[str, Any]]:
|
||||
"""Exécuter lea_uia.exe avec les arguments et parser le JSON."""
|
||||
if not self._available:
|
||||
return None
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[self._helper_path] + args,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=self._timeout,
|
||||
encoding="utf-8",
|
||||
errors="replace",
|
||||
creationflags=_SUBPROCESS_CREATION_FLAGS,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
logger.debug(
|
||||
f"UIAHelper: exit code {result.returncode}, "
|
||||
f"stderr: {result.stderr[:200]}"
|
||||
)
|
||||
return None
|
||||
output = result.stdout.strip()
|
||||
if not output:
|
||||
return None
|
||||
return json.loads(output)
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.debug(f"UIAHelper: timeout ({self._timeout}s) sur {args}")
|
||||
return None
|
||||
except json.JSONDecodeError as e:
|
||||
logger.debug(f"UIAHelper: JSON invalide — {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.debug(f"UIAHelper: erreur {e}")
|
||||
return None
|
||||
|
||||
def health(self) -> bool:
|
||||
"""Vérifier que UIA répond."""
|
||||
data = self._run(["health"])
|
||||
return data is not None and data.get("status") == "ok"
|
||||
|
||||
def query_at(
|
||||
self,
|
||||
x: int,
|
||||
y: int,
|
||||
with_parents: bool = True,
|
||||
) -> Optional[UiaElement]:
|
||||
"""Récupérer l'élément UIA à une position écran.
|
||||
|
||||
Args:
|
||||
x, y: Coordonnées pixel absolues
|
||||
with_parents: Inclure la hiérarchie des parents
|
||||
|
||||
Returns:
|
||||
UiaElement si trouvé, None sinon (pas d'élément ou UIA indispo)
|
||||
"""
|
||||
args = ["query", "--x", str(x), "--y", str(y)]
|
||||
if not with_parents:
|
||||
args.append("--with-parents=false")
|
||||
|
||||
data = self._run(args)
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
def find_by_name(
|
||||
self,
|
||||
name: str,
|
||||
control_type: Optional[str] = None,
|
||||
automation_id: Optional[str] = None,
|
||||
window: Optional[str] = None,
|
||||
timeout_ms: int = 2000,
|
||||
) -> Optional[UiaElement]:
|
||||
"""Rechercher un élément par son nom (+ filtres optionnels).
|
||||
|
||||
Args:
|
||||
name: Nom exact de l'élément
|
||||
control_type: Type de contrôle (Button, Edit, MenuItem...)
|
||||
automation_id: ID d'automation
|
||||
window: Restreindre à une fenêtre spécifique
|
||||
timeout_ms: Timeout de recherche en millisecondes
|
||||
"""
|
||||
args = ["find", "--name", name, "--timeout-ms", str(timeout_ms)]
|
||||
if control_type:
|
||||
args.extend(["--control-type", control_type])
|
||||
if automation_id:
|
||||
args.extend(["--automation-id", automation_id])
|
||||
if window:
|
||||
args.extend(["--window", window])
|
||||
|
||||
data = self._run(args)
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
def capture_focused(self, max_depth: int = 3) -> Optional[UiaElement]:
|
||||
"""Capturer l'élément ayant le focus + son contexte."""
|
||||
data = self._run(["capture", "--max-depth", str(max_depth)])
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
|
||||
# Instance globale partagée (singleton léger)
|
||||
_SHARED_HELPER: Optional[UIAHelper] = None
|
||||
|
||||
|
||||
def get_shared_helper() -> UIAHelper:
|
||||
"""Retourner une instance partagée de UIAHelper."""
|
||||
global _SHARED_HELPER
|
||||
if _SHARED_HELPER is None:
|
||||
_SHARED_HELPER = UIAHelper()
|
||||
return _SHARED_HELPER
|
||||
@@ -2,6 +2,17 @@
|
||||
"""
|
||||
deploy_windows.py — Script de packaging du client Windows pour Agent V1.
|
||||
|
||||
⚠️ OBSOLÈTE (avril 2026)
|
||||
Le build officiel du package Windows passe par ``deploy/build_package.sh``
|
||||
(à la racine du repo) qui lit directement ``agent_v0/agent_v1/`` et évite
|
||||
les clones intermédiaires. Ce script est conservé pour référence mais son
|
||||
manifeste ``FILE_MANIFEST`` est incomplet : il n'inclut pas
|
||||
``system_dialog_guard.py``, ``persistent_buffer.py``, ``recovery.py``,
|
||||
``uia_helper.py``, ``grounding.py``, ``policy.py``,
|
||||
``vision/blur_sensitive.py``, ``vision/system_info.py``,
|
||||
``ui/chat_window.py``, ``ui/capture_server.py``, ``ui/shared_state.py``.
|
||||
Ne PAS l'utiliser pour un packaging réel.
|
||||
|
||||
Copie uniquement les fichiers nécessaires au fonctionnement de l'agent
|
||||
sur le PC cible (Windows), sans le serveur ni les dépendances lourdes.
|
||||
|
||||
|
||||
@@ -71,8 +71,16 @@ class LeaServerClient:
|
||||
self._chat_port = chat_port
|
||||
self._stream_port = stream_port
|
||||
|
||||
# En prod, la base URL passe par le reverse proxy HTTPS
|
||||
# (ex. https://lea.labs.laurinebazin.design). Si RPA_SERVER_URL est
|
||||
# definie on l'utilise telle quelle, sinon on reconstruit http://host:port.
|
||||
server_url = os.environ.get("RPA_SERVER_URL", "").strip().rstrip("/")
|
||||
if server_url:
|
||||
self._stream_base = server_url
|
||||
else:
|
||||
self._stream_base = f"http://{self._host}:{self._stream_port}"
|
||||
|
||||
self._chat_base = f"http://{self._host}:{self._chat_port}"
|
||||
self._stream_base = f"http://{self._host}:{self._stream_port}"
|
||||
|
||||
# Etat de connexion
|
||||
self._connected = False
|
||||
|
||||
@@ -1,12 +1,97 @@
|
||||
# run_agent_v1.py
|
||||
import sys
|
||||
import os
|
||||
import atexit
|
||||
|
||||
# Ajout du répertoire courant au PYTHONPATH pour permettre les imports de modules
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
if current_dir not in sys.path:
|
||||
sys.path.append(current_dir)
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Verrou PID — empêche le lancement de plusieurs instances
|
||||
# Même si Lea.bat est double-cliqué ou lancé deux fois,
|
||||
# un seul agent tourne à la fois (defense-in-depth).
|
||||
# ---------------------------------------------------------------
|
||||
LOCK_FILE = os.path.join(current_dir, "lea_agent.lock")
|
||||
|
||||
|
||||
def _pid_is_alive(pid: int) -> bool:
|
||||
"""Vérifie si un processus avec ce PID existe encore (Windows + Unix)."""
|
||||
if sys.platform == "win32":
|
||||
try:
|
||||
import ctypes
|
||||
kernel32 = ctypes.windll.kernel32 # type: ignore[attr-defined]
|
||||
PROCESS_QUERY_LIMITED_INFORMATION = 0x1000
|
||||
handle = kernel32.OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, False, pid)
|
||||
if handle:
|
||||
kernel32.CloseHandle(handle)
|
||||
return True
|
||||
return False
|
||||
except Exception:
|
||||
# Fallback : tasklist
|
||||
try:
|
||||
import subprocess
|
||||
result = subprocess.run(
|
||||
["tasklist", "/FI", f"PID eq {pid}", "/NH"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
return str(pid) in result.stdout
|
||||
except Exception:
|
||||
return False
|
||||
else:
|
||||
# Unix/Linux — os.kill(pid, 0) ne tue pas le process
|
||||
try:
|
||||
os.kill(pid, 0)
|
||||
return True
|
||||
except (OSError, ProcessLookupError):
|
||||
return False
|
||||
|
||||
|
||||
def _acquire_lock() -> bool:
|
||||
"""Tente d'acquérir le verrou PID. Retourne False si une autre instance tourne."""
|
||||
my_pid = os.getpid()
|
||||
|
||||
# Lire le PID existant
|
||||
if os.path.isfile(LOCK_FILE):
|
||||
try:
|
||||
with open(LOCK_FILE, "r", encoding="utf-8") as f:
|
||||
old_pid = int(f.read().strip())
|
||||
# Le PID dans le lock est-il encore vivant ?
|
||||
if old_pid != my_pid and _pid_is_alive(old_pid):
|
||||
return False # Une autre instance tourne déjà
|
||||
except (ValueError, OSError):
|
||||
pass # Fichier corrompu — on l'écrase
|
||||
|
||||
# Écrire notre PID
|
||||
try:
|
||||
with open(LOCK_FILE, "w", encoding="utf-8") as f:
|
||||
f.write(str(my_pid))
|
||||
except OSError:
|
||||
pass # Pas bloquant — on continue sans lock
|
||||
return True
|
||||
|
||||
|
||||
def _release_lock():
|
||||
"""Supprime le fichier lock au shutdown."""
|
||||
try:
|
||||
if os.path.isfile(LOCK_FILE):
|
||||
with open(LOCK_FILE, "r", encoding="utf-8") as f:
|
||||
stored_pid = int(f.read().strip())
|
||||
# Ne supprimer que si c'est bien NOTRE lock
|
||||
if stored_pid == os.getpid():
|
||||
os.remove(LOCK_FILE)
|
||||
except (ValueError, OSError):
|
||||
pass
|
||||
|
||||
|
||||
# Vérification du lock AVANT toute initialisation lourde
|
||||
if not _acquire_lock():
|
||||
# Une autre instance de Léa tourne déjà — on quitte silencieusement
|
||||
sys.exit(0)
|
||||
|
||||
atexit.register(_release_lock)
|
||||
|
||||
# Charger config.txt et .env comme variables d'environnement
|
||||
# (équivalent du `set` dans Lea.bat, mais fonctionne aussi sans le .bat)
|
||||
for config_file in ("config.txt", ".env"):
|
||||
@@ -32,7 +117,7 @@ logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
|
||||
)
|
||||
logging.info("=== Agent V1 démarrage — config chargée ===")
|
||||
logging.info("=== Agent V1 démarrage — config chargée (PID %d) ===", os.getpid())
|
||||
logging.info("RPA_SERVER_URL=%s", os.environ.get("RPA_SERVER_URL", "(non défini)"))
|
||||
logging.info("RPA_SERVER_HOST=%s", os.environ.get("RPA_SERVER_HOST", "(non défini)"))
|
||||
logging.info("RPA_API_TOKEN=%s", os.environ.get("RPA_API_TOKEN", "(non défini)")[:8] + "...")
|
||||
|
||||
296
agent_v0/server_v1/agent_registry.py
Normal file
296
agent_v0/server_v1/agent_registry.py
Normal file
@@ -0,0 +1,296 @@
|
||||
# agent_v0/server_v1/agent_registry.py
|
||||
"""
|
||||
Registre des agents Lea enrolles sur le parc.
|
||||
|
||||
Alimente par les endpoints /api/v1/agents/enroll et /api/v1/agents/uninstall
|
||||
que l'installeur Inno Setup (`deploy/installer/Lea.iss`) appelle a
|
||||
l'installation et a la desinstallation sur chaque poste collaborateur.
|
||||
|
||||
Stockage : SQLite simple, cohabite avec rpa_data.db dans data/databases/.
|
||||
Aucune dependance GPU/LLM — ce module doit rester leger (juste sqlite3 +
|
||||
stdlib) pour pouvoir etre importe par le serveur HTTP.
|
||||
|
||||
Schema de la table `enrolled_agents` :
|
||||
id INTEGER PK AUTOINCREMENT
|
||||
machine_id TEXT UNIQUE NOT NULL — identifiant genere par l'installeur
|
||||
user_name TEXT — nom affichage collaborateur
|
||||
user_email TEXT
|
||||
user_id TEXT — identifiant metier (ex: AIVA-001)
|
||||
hostname TEXT
|
||||
os_info TEXT
|
||||
version TEXT — version du client Lea
|
||||
status TEXT DEFAULT 'active' — 'active' | 'uninstalled'
|
||||
enrolled_at TEXT NOT NULL — ISO 8601 UTC
|
||||
last_seen_at TEXT — ISO 8601 UTC (heartbeat / stream)
|
||||
uninstalled_at TEXT
|
||||
uninstall_reason TEXT
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import sqlite3
|
||||
import threading
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Verrou global : SQLite tolere plusieurs threads mais on serialise
|
||||
# les ecritures pour eviter les races sur _init_db + upserts concurrents.
|
||||
_DB_LOCK = threading.Lock()
|
||||
|
||||
|
||||
def _utc_now_iso() -> str:
|
||||
"""Horodatage ISO 8601 UTC (compatible toutes les autres tables)."""
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
class AgentRegistry:
|
||||
"""Gestion CRUD des agents enrolles (SQLite)."""
|
||||
|
||||
def __init__(self, db_path: str | Path = "data/databases/rpa_data.db"):
|
||||
self.db_path = Path(db_path)
|
||||
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
self._init_db()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Infra SQLite
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _connect(self) -> sqlite3.Connection:
|
||||
# check_same_thread=False : on protege nous-memes via _DB_LOCK,
|
||||
# indispensable car FastAPI appelle les endpoints sur threads
|
||||
# differents (thread pool).
|
||||
conn = sqlite3.connect(str(self.db_path), check_same_thread=False)
|
||||
conn.row_factory = sqlite3.Row
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
conn.execute("PRAGMA foreign_keys=ON")
|
||||
return conn
|
||||
|
||||
def _init_db(self) -> None:
|
||||
"""Cree la table et ses index si absents (idempotent)."""
|
||||
with _DB_LOCK, self._connect() as conn:
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS enrolled_agents (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
machine_id TEXT NOT NULL UNIQUE,
|
||||
user_name TEXT,
|
||||
user_email TEXT,
|
||||
user_id TEXT,
|
||||
hostname TEXT,
|
||||
os_info TEXT,
|
||||
version TEXT,
|
||||
status TEXT NOT NULL DEFAULT 'active',
|
||||
enrolled_at TEXT NOT NULL,
|
||||
last_seen_at TEXT,
|
||||
uninstalled_at TEXT,
|
||||
uninstall_reason TEXT
|
||||
)
|
||||
"""
|
||||
)
|
||||
conn.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_enrolled_agents_status "
|
||||
"ON enrolled_agents(status)"
|
||||
)
|
||||
conn.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_enrolled_agents_machine "
|
||||
"ON enrolled_agents(machine_id)"
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Lecture
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def get(self, machine_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""Recupere un agent par machine_id (ou None)."""
|
||||
with _DB_LOCK, self._connect() as conn:
|
||||
row = conn.execute(
|
||||
"SELECT * FROM enrolled_agents WHERE machine_id = ?",
|
||||
(machine_id,),
|
||||
).fetchone()
|
||||
return dict(row) if row else None
|
||||
|
||||
def list_by_status(self, status: str) -> List[Dict[str, Any]]:
|
||||
"""Liste les agents par statut ('active' | 'uninstalled')."""
|
||||
with _DB_LOCK, self._connect() as conn:
|
||||
rows = conn.execute(
|
||||
"SELECT * FROM enrolled_agents WHERE status = ? "
|
||||
"ORDER BY enrolled_at DESC",
|
||||
(status,),
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
def count_by_status(self, status: str) -> int:
|
||||
with _DB_LOCK, self._connect() as conn:
|
||||
row = conn.execute(
|
||||
"SELECT COUNT(*) AS n FROM enrolled_agents WHERE status = ?",
|
||||
(status,),
|
||||
).fetchone()
|
||||
return int(row["n"]) if row else 0
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Ecriture
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def enroll(
|
||||
self,
|
||||
*,
|
||||
machine_id: str,
|
||||
user_name: str | None = None,
|
||||
user_email: str | None = None,
|
||||
user_id: str | None = None,
|
||||
hostname: str | None = None,
|
||||
os_info: str | None = None,
|
||||
version: str | None = None,
|
||||
allow_reactivate: bool = True,
|
||||
) -> Dict[str, Any]:
|
||||
"""Enregistre un nouvel agent ou reactive un agent desinstalle.
|
||||
|
||||
Returns:
|
||||
dict avec clefs {"created": bool, "reactivated": bool, "agent": row}
|
||||
|
||||
Raises:
|
||||
ValueError: si machine_id est vide.
|
||||
AgentAlreadyEnrolledError: si deja actif (status=active).
|
||||
"""
|
||||
if not machine_id or not machine_id.strip():
|
||||
raise ValueError("machine_id est obligatoire")
|
||||
machine_id = machine_id.strip()
|
||||
|
||||
now = _utc_now_iso()
|
||||
|
||||
with _DB_LOCK, self._connect() as conn:
|
||||
existing = conn.execute(
|
||||
"SELECT * FROM enrolled_agents WHERE machine_id = ?",
|
||||
(machine_id,),
|
||||
).fetchone()
|
||||
|
||||
if existing is not None:
|
||||
if existing["status"] == "active":
|
||||
# Deja enrolle et actif -> conflit explicit
|
||||
raise AgentAlreadyEnrolledError(dict(existing))
|
||||
|
||||
# Agent desinstalle : reactivation si autorise (defaut)
|
||||
if not allow_reactivate:
|
||||
raise AgentAlreadyEnrolledError(dict(existing))
|
||||
|
||||
conn.execute(
|
||||
"""
|
||||
UPDATE enrolled_agents
|
||||
SET user_name = COALESCE(?, user_name),
|
||||
user_email = COALESCE(?, user_email),
|
||||
user_id = COALESCE(?, user_id),
|
||||
hostname = COALESCE(?, hostname),
|
||||
os_info = COALESCE(?, os_info),
|
||||
version = COALESCE(?, version),
|
||||
status = 'active',
|
||||
enrolled_at = ?,
|
||||
last_seen_at = ?,
|
||||
uninstalled_at = NULL,
|
||||
uninstall_reason = NULL
|
||||
WHERE machine_id = ?
|
||||
""",
|
||||
(
|
||||
user_name, user_email, user_id,
|
||||
hostname, os_info, version,
|
||||
now, now, machine_id,
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
row = conn.execute(
|
||||
"SELECT * FROM enrolled_agents WHERE machine_id = ?",
|
||||
(machine_id,),
|
||||
).fetchone()
|
||||
return {"created": False, "reactivated": True, "agent": dict(row)}
|
||||
|
||||
# Nouvelle inscription
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO enrolled_agents (
|
||||
machine_id, user_name, user_email, user_id,
|
||||
hostname, os_info, version,
|
||||
status, enrolled_at, last_seen_at
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, 'active', ?, ?)
|
||||
""",
|
||||
(
|
||||
machine_id, user_name, user_email, user_id,
|
||||
hostname, os_info, version,
|
||||
now, now,
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
row = conn.execute(
|
||||
"SELECT * FROM enrolled_agents WHERE machine_id = ?",
|
||||
(machine_id,),
|
||||
).fetchone()
|
||||
return {"created": True, "reactivated": False, "agent": dict(row)}
|
||||
|
||||
def uninstall(
|
||||
self,
|
||||
*,
|
||||
machine_id: str,
|
||||
reason: str | None = None,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Marque un agent comme desinstalle (soft delete).
|
||||
|
||||
Returns:
|
||||
Le row mis a jour, ou None si l'agent n'existe pas.
|
||||
"""
|
||||
if not machine_id or not machine_id.strip():
|
||||
raise ValueError("machine_id est obligatoire")
|
||||
machine_id = machine_id.strip()
|
||||
|
||||
now = _utc_now_iso()
|
||||
with _DB_LOCK, self._connect() as conn:
|
||||
existing = conn.execute(
|
||||
"SELECT * FROM enrolled_agents WHERE machine_id = ?",
|
||||
(machine_id,),
|
||||
).fetchone()
|
||||
if existing is None:
|
||||
return None
|
||||
|
||||
conn.execute(
|
||||
"""
|
||||
UPDATE enrolled_agents
|
||||
SET status = 'uninstalled',
|
||||
uninstalled_at = ?,
|
||||
uninstall_reason = ?
|
||||
WHERE machine_id = ?
|
||||
""",
|
||||
(now, reason, machine_id),
|
||||
)
|
||||
conn.commit()
|
||||
row = conn.execute(
|
||||
"SELECT * FROM enrolled_agents WHERE machine_id = ?",
|
||||
(machine_id,),
|
||||
).fetchone()
|
||||
return dict(row)
|
||||
|
||||
def touch_last_seen(self, machine_id: str) -> None:
|
||||
"""Met a jour last_seen_at (appel depuis le stream / heartbeat).
|
||||
|
||||
Silencieux si l'agent est inconnu (evite les erreurs sur vieux clients).
|
||||
"""
|
||||
if not machine_id:
|
||||
return
|
||||
now = _utc_now_iso()
|
||||
with _DB_LOCK, self._connect() as conn:
|
||||
conn.execute(
|
||||
"UPDATE enrolled_agents SET last_seen_at = ? WHERE machine_id = ?",
|
||||
(now, machine_id),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
class AgentAlreadyEnrolledError(Exception):
|
||||
"""Levee si on tente d'enrouler une machine deja active."""
|
||||
|
||||
def __init__(self, existing_row: Dict[str, Any]):
|
||||
self.existing = existing_row
|
||||
super().__init__(
|
||||
f"machine_id={existing_row.get('machine_id')} deja enrole "
|
||||
f"(status={existing_row.get('status')})"
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
622
agent_v0/server_v1/chat_interface.py
Normal file
622
agent_v0/server_v1/chat_interface.py
Normal file
@@ -0,0 +1,622 @@
|
||||
"""
|
||||
ChatInterface — Interface de chat conversationnelle pour Léa.
|
||||
|
||||
Permet au TIM (Technicien Information Médicale) de parler à Léa en langage
|
||||
naturel :
|
||||
- "Ouvre le Bloc-notes et écris bonjour"
|
||||
- Léa comprend (TaskPlanner) et propose un plan
|
||||
- Le TIM confirme (ou refuse)
|
||||
- Léa exécute (replay) et envoie des updates de progression
|
||||
- Historique conversationnel conservé par session
|
||||
|
||||
C'est une couche LÉGÈRE au-dessus du TaskPlanner. Toute la logique de
|
||||
compréhension reste dans TaskPlanner — ChatInterface gère uniquement
|
||||
l'état conversationnel, la confirmation et le suivi d'exécution.
|
||||
|
||||
États de la session :
|
||||
idle → en attente d'un message
|
||||
planning → TaskPlanner.understand() en cours
|
||||
awaiting_confirmation → plan prêt, attend la confirmation du TIM
|
||||
executing → replay en cours
|
||||
done → dernier tour terminé (retour à idle au prochain message)
|
||||
error → erreur interne (instruction non comprise, exception…)
|
||||
|
||||
Langue : 100% français (c'est l'interface utilisateur).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# =============================================================================
|
||||
# États
|
||||
# =============================================================================
|
||||
|
||||
STATE_IDLE = "idle"
|
||||
STATE_PLANNING = "planning"
|
||||
STATE_AWAITING_CONFIRMATION = "awaiting_confirmation"
|
||||
STATE_EXECUTING = "executing"
|
||||
STATE_DONE = "done"
|
||||
STATE_ERROR = "error"
|
||||
|
||||
VALID_STATES = {
|
||||
STATE_IDLE,
|
||||
STATE_PLANNING,
|
||||
STATE_AWAITING_CONFIRMATION,
|
||||
STATE_EXECUTING,
|
||||
STATE_DONE,
|
||||
STATE_ERROR,
|
||||
}
|
||||
|
||||
# Rôles de messages
|
||||
ROLE_USER = "user"
|
||||
ROLE_LEA = "lea"
|
||||
ROLE_SYSTEM = "system"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Message
|
||||
# =============================================================================
|
||||
|
||||
@dataclass
|
||||
class ChatMessage:
|
||||
"""Un message dans l'historique d'une conversation."""
|
||||
role: str # "user", "lea", "system"
|
||||
content: str # Texte du message
|
||||
timestamp: float = field(default_factory=time.time)
|
||||
# Données contextuelles optionnelles (plan, résultat, progression…)
|
||||
meta: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"role": self.role,
|
||||
"content": self.content,
|
||||
"timestamp": self.timestamp,
|
||||
"meta": self.meta,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ChatSession
|
||||
# =============================================================================
|
||||
|
||||
class ChatSession:
|
||||
"""Une conversation entre un utilisateur et Léa.
|
||||
|
||||
Maintient l'historique, l'état courant, et le dernier plan en attente
|
||||
de confirmation. Thread-safe (un lock par session).
|
||||
|
||||
Dépendances injectées (pour tester facilement) :
|
||||
- task_planner : instance de TaskPlanner (ou mock)
|
||||
- workflows_provider : callable () -> List[Dict] (liste des workflows)
|
||||
- replay_callback : callable (session_id, machine_id, params) -> replay_id
|
||||
- status_provider : callable (replay_id) -> Dict (pour suivre l'exécution)
|
||||
|
||||
Toutes ces dépendances sont optionnelles : ChatSession dégrade
|
||||
gracieusement (fallback) si gemma4 / replay indisponibles.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
session_id: str = "",
|
||||
task_planner: Any = None,
|
||||
workflows_provider: Optional[Callable[[], List[Dict[str, Any]]]] = None,
|
||||
replay_callback: Optional[Callable[..., str]] = None,
|
||||
status_provider: Optional[Callable[[str], Dict[str, Any]]] = None,
|
||||
machine_id: str = "default",
|
||||
):
|
||||
self.session_id = session_id or f"chat_{uuid.uuid4().hex[:12]}"
|
||||
self.machine_id = machine_id
|
||||
self.created_at = time.time()
|
||||
self.updated_at = self.created_at
|
||||
|
||||
self._task_planner = task_planner
|
||||
self._workflows_provider = workflows_provider
|
||||
self._replay_callback = replay_callback
|
||||
self._status_provider = status_provider
|
||||
|
||||
self._state: str = STATE_IDLE
|
||||
self._messages: List[ChatMessage] = []
|
||||
self._pending_plan: Any = None # TaskPlan en attente de confirmation
|
||||
self._active_replay_id: str = "" # Replay courant (si executing)
|
||||
self._last_progress: Dict[str, Any] = {}
|
||||
|
||||
self._lock = threading.RLock()
|
||||
|
||||
# Message d'accueil
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"Bonjour ! Je suis Léa. Dites-moi ce que vous voulez que je fasse.",
|
||||
meta={"welcome": True},
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Accesseurs
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
@property
|
||||
def state(self) -> str:
|
||||
with self._lock:
|
||||
return self._state
|
||||
|
||||
def get_history(self) -> List[Dict[str, Any]]:
|
||||
"""Retourne l'historique complet des messages (sérialisé)."""
|
||||
with self._lock:
|
||||
return [m.to_dict() for m in self._messages]
|
||||
|
||||
def get_snapshot(self) -> Dict[str, Any]:
|
||||
"""État complet pour l'UI (historique + état + progression)."""
|
||||
with self._lock:
|
||||
return {
|
||||
"session_id": self.session_id,
|
||||
"state": self._state,
|
||||
"machine_id": self.machine_id,
|
||||
"created_at": self.created_at,
|
||||
"updated_at": self.updated_at,
|
||||
"messages": [m.to_dict() for m in self._messages],
|
||||
"pending_plan": (
|
||||
self._pending_plan.to_dict()
|
||||
if self._pending_plan is not None
|
||||
else None
|
||||
),
|
||||
"active_replay_id": self._active_replay_id,
|
||||
"progress": dict(self._last_progress),
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# API publique
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
def send_message(self, text: str) -> Dict[str, Any]:
|
||||
"""Envoyer un message utilisateur.
|
||||
|
||||
Trois cas possibles selon l'état courant :
|
||||
1. awaiting_confirmation → c'est une réponse OUI/NON
|
||||
2. executing → on rafraîchit la progression
|
||||
3. idle/done/error → nouvelle instruction, on appelle TaskPlanner
|
||||
"""
|
||||
text = (text or "").strip()
|
||||
if not text:
|
||||
return {
|
||||
"ok": False,
|
||||
"error": "Message vide",
|
||||
"state": self._state,
|
||||
}
|
||||
|
||||
with self._lock:
|
||||
# Cas 1 : on attend une confirmation
|
||||
if self._state == STATE_AWAITING_CONFIRMATION:
|
||||
return self._handle_confirmation_reply(text)
|
||||
|
||||
# Cas 2 : en pleine exécution → message ajouté mais pas d'action
|
||||
if self._state == STATE_EXECUTING:
|
||||
self._append(ROLE_USER, text)
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"Je suis en train d'exécuter le workflow. Un instant…",
|
||||
)
|
||||
return {"ok": True, "state": self._state}
|
||||
|
||||
# Cas 3 : nouvelle instruction
|
||||
self._append(ROLE_USER, text)
|
||||
self._set_state(STATE_PLANNING)
|
||||
|
||||
# Appel TaskPlanner hors du lock (peut être lent : gemma4)
|
||||
return self._plan_and_reply(text)
|
||||
|
||||
def confirm(self, confirmed: bool = True) -> Dict[str, Any]:
|
||||
"""Confirmer (ou refuser) l'exécution du plan en attente."""
|
||||
with self._lock:
|
||||
if self._state != STATE_AWAITING_CONFIRMATION:
|
||||
return {
|
||||
"ok": False,
|
||||
"error": f"Pas de plan en attente (état={self._state})",
|
||||
"state": self._state,
|
||||
}
|
||||
|
||||
if not confirmed:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"D'accord, j'annule. Dites-moi autre chose quand vous voulez.",
|
||||
)
|
||||
self._pending_plan = None
|
||||
self._set_state(STATE_IDLE)
|
||||
return {"ok": True, "state": self._state, "confirmed": False}
|
||||
|
||||
plan = self._pending_plan
|
||||
if plan is None:
|
||||
self._set_state(STATE_IDLE)
|
||||
return {
|
||||
"ok": False,
|
||||
"error": "Aucun plan à confirmer",
|
||||
"state": self._state,
|
||||
}
|
||||
|
||||
self._set_state(STATE_EXECUTING)
|
||||
|
||||
# Exécution hors du lock
|
||||
return self._execute_plan(plan)
|
||||
|
||||
def refresh_progress(self) -> Dict[str, Any]:
|
||||
"""Rafraîchir la progression du replay en cours.
|
||||
|
||||
Appelé par le client (polling) pour obtenir les updates d'exécution.
|
||||
Si le replay est terminé, passe l'état à done.
|
||||
"""
|
||||
with self._lock:
|
||||
if self._state != STATE_EXECUTING or not self._active_replay_id:
|
||||
return {"ok": True, "state": self._state, "progress": self._last_progress}
|
||||
|
||||
replay_id = self._active_replay_id
|
||||
provider = self._status_provider
|
||||
|
||||
if provider is None:
|
||||
return {"ok": True, "state": self._state, "progress": {}}
|
||||
|
||||
try:
|
||||
status = provider(replay_id) or {}
|
||||
except Exception as e:
|
||||
logger.warning(f"ChatSession: status_provider erreur: {e}")
|
||||
status = {}
|
||||
|
||||
with self._lock:
|
||||
self._last_progress = status
|
||||
self.updated_at = time.time()
|
||||
|
||||
# Détection de fin
|
||||
replay_status = str(status.get("status", "")).lower()
|
||||
completed = status.get("completed_actions", 0)
|
||||
total = status.get("total_actions", 0)
|
||||
|
||||
if replay_status in ("done", "completed", "finished", "success"):
|
||||
summary = (
|
||||
f"Workflow terminé ! {completed}/{total} actions réussies."
|
||||
if total
|
||||
else "Workflow terminé."
|
||||
)
|
||||
self._append(ROLE_LEA, summary, meta={"progress": dict(status)})
|
||||
self._set_state(STATE_DONE)
|
||||
self._active_replay_id = ""
|
||||
elif replay_status in ("failed", "error", "aborted"):
|
||||
err = status.get("error") or status.get("message") or "Erreur inconnue"
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
f"Le workflow a échoué : {err}",
|
||||
meta={"progress": dict(status)},
|
||||
)
|
||||
self._set_state(STATE_ERROR)
|
||||
self._active_replay_id = ""
|
||||
elif replay_status == "paused_need_help":
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"Je suis bloquée sur une action, j'ai besoin d'aide…",
|
||||
meta={"progress": dict(status)},
|
||||
)
|
||||
# on reste en executing pour que le TIM puisse reprendre
|
||||
# else : toujours en cours, pas de message
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"state": self._state,
|
||||
"progress": dict(self._last_progress),
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Logique interne
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
def _plan_and_reply(self, instruction: str) -> Dict[str, Any]:
|
||||
"""Appeler TaskPlanner.understand() et produire une réponse."""
|
||||
plan = None
|
||||
error_msg = ""
|
||||
|
||||
if self._task_planner is None:
|
||||
error_msg = "Planificateur indisponible"
|
||||
else:
|
||||
try:
|
||||
workflows = []
|
||||
if self._workflows_provider is not None:
|
||||
try:
|
||||
workflows = self._workflows_provider() or []
|
||||
except Exception as e:
|
||||
logger.warning(f"ChatSession: workflows_provider erreur: {e}")
|
||||
workflows = []
|
||||
|
||||
plan = self._task_planner.understand(
|
||||
instruction=instruction,
|
||||
available_workflows=workflows,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"ChatSession: TaskPlanner.understand erreur: {e}")
|
||||
error_msg = f"Erreur de compréhension : {e}"
|
||||
|
||||
# Fallback gracieux si pas de plan / gemma4 indisponible
|
||||
if plan is None:
|
||||
with self._lock:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
f"Désolée, je n'arrive pas à comprendre pour l'instant. {error_msg}".strip(),
|
||||
meta={"error": error_msg},
|
||||
)
|
||||
self._set_state(STATE_ERROR)
|
||||
return {
|
||||
"ok": False,
|
||||
"state": self._state,
|
||||
"error": error_msg,
|
||||
}
|
||||
|
||||
# Plan non compris
|
||||
if not plan.understood:
|
||||
reason = plan.error or "je n'ai pas compris votre demande"
|
||||
with self._lock:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
(
|
||||
f"Désolée, {reason}. "
|
||||
"Pouvez-vous reformuler ? Je connais les workflows que vous m'avez appris."
|
||||
),
|
||||
meta={"plan": plan.to_dict()},
|
||||
)
|
||||
self._set_state(STATE_ERROR)
|
||||
return {
|
||||
"ok": False,
|
||||
"state": self._state,
|
||||
"plan": plan.to_dict(),
|
||||
"error": reason,
|
||||
}
|
||||
|
||||
# Plan compris → formuler la proposition
|
||||
proposal = self._format_proposal(plan)
|
||||
|
||||
with self._lock:
|
||||
self._pending_plan = plan
|
||||
self._append(ROLE_LEA, proposal, meta={"plan": plan.to_dict()})
|
||||
self._set_state(STATE_AWAITING_CONFIRMATION)
|
||||
return {
|
||||
"ok": True,
|
||||
"state": self._state,
|
||||
"plan": plan.to_dict(),
|
||||
"message": proposal,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _format_proposal(plan: Any) -> str:
|
||||
"""Formuler une proposition en français à partir d'un TaskPlan."""
|
||||
lines = []
|
||||
lines.append(f"J'ai compris : « {plan.instruction} ».")
|
||||
|
||||
if plan.workflow_name:
|
||||
conf_pct = int(round((plan.match_confidence or 0.0) * 100))
|
||||
lines.append(
|
||||
f"Je vais utiliser le workflow « {plan.workflow_name} »"
|
||||
f" (confiance {conf_pct}%)."
|
||||
)
|
||||
elif plan.mode == "free" and plan.steps:
|
||||
lines.append(
|
||||
f"Je n'ai pas de workflow enregistré pour ça, "
|
||||
f"mais j'ai planifié {len(plan.steps)} étape(s) :"
|
||||
)
|
||||
for i, step in enumerate(plan.steps[:5], 1):
|
||||
desc = step.get("description", "") if isinstance(step, dict) else str(step)
|
||||
lines.append(f" {i}. {desc}")
|
||||
if len(plan.steps) > 5:
|
||||
lines.append(f" … et {len(plan.steps) - 5} autre(s) étape(s).")
|
||||
else:
|
||||
lines.append("Je n'ai pas de plan d'action clair pour cette demande.")
|
||||
|
||||
if plan.parameters:
|
||||
params_str = ", ".join(f"{k}={v}" for k, v in plan.parameters.items())
|
||||
lines.append(f"Paramètres détectés : {params_str}.")
|
||||
|
||||
if plan.is_loop:
|
||||
src = plan.loop_source or "éléments à traiter"
|
||||
lines.append(f"Traitement en boucle sur : {src}.")
|
||||
|
||||
lines.append("")
|
||||
lines.append("Est-ce que je peux y aller ? (oui / non)")
|
||||
return "\n".join(lines)
|
||||
|
||||
def _handle_confirmation_reply(self, text: str) -> Dict[str, Any]:
|
||||
"""Interpréter un message utilisateur comme OUI/NON."""
|
||||
self._append(ROLE_USER, text)
|
||||
yes_tokens = {"oui", "yes", "ok", "y", "go", "vas-y", "allez", "allez-y", "confirme", "confirmer", "continue"}
|
||||
no_tokens = {"non", "no", "annule", "annuler", "stop", "arrête", "arrete", "abandonne", "abandonner"}
|
||||
|
||||
t = text.strip().lower().rstrip("!.?")
|
||||
|
||||
if t in yes_tokens or any(t.startswith(tok + " ") for tok in yes_tokens):
|
||||
# Déverrouiller : sortir du lock avant d'exécuter (confirm re-prend le lock)
|
||||
pass
|
||||
elif t in no_tokens or any(t.startswith(tok + " ") for tok in no_tokens):
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"D'accord, j'annule. Dites-moi autre chose quand vous voulez.",
|
||||
)
|
||||
self._pending_plan = None
|
||||
self._set_state(STATE_IDLE)
|
||||
return {"ok": True, "state": self._state, "confirmed": False}
|
||||
else:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"Je n'ai pas compris votre réponse. Répondez « oui » pour lancer ou « non » pour annuler.",
|
||||
)
|
||||
return {"ok": True, "state": self._state, "needs_clarification": True}
|
||||
|
||||
# Libérer le lock pour confirm() qui le re-prendra
|
||||
plan = self._pending_plan
|
||||
self._pending_plan = None
|
||||
self._set_state(STATE_EXECUTING)
|
||||
# Exécution hors du lock (sortie du with bloc appelant)
|
||||
# Note : _handle_confirmation_reply est appelé sous lock via send_message
|
||||
# On ne peut pas appeler _execute_plan ici sans risque de double-lock.
|
||||
# On relâche le lock via une astuce : on retourne un marqueur et send_message
|
||||
# orchestrera. Ici on appelle directement _execute_plan qui utilise RLock,
|
||||
# donc c'est safe (re-entrant).
|
||||
return self._execute_plan(plan)
|
||||
|
||||
def _execute_plan(self, plan: Any) -> Dict[str, Any]:
|
||||
"""Lancer le replay correspondant au plan."""
|
||||
if plan is None:
|
||||
with self._lock:
|
||||
self._append(ROLE_LEA, "Rien à exécuter.", meta={})
|
||||
self._set_state(STATE_IDLE)
|
||||
return {"ok": False, "state": self._state, "error": "Aucun plan"}
|
||||
|
||||
if self._replay_callback is None:
|
||||
with self._lock:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"Je ne peux pas exécuter : aucun moteur d'exécution n'est configuré.",
|
||||
)
|
||||
self._set_state(STATE_ERROR)
|
||||
return {
|
||||
"ok": False,
|
||||
"state": self._state,
|
||||
"error": "replay_callback non configuré",
|
||||
}
|
||||
|
||||
# Annoncer le démarrage
|
||||
with self._lock:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
"C'est parti ! Je lance le workflow…",
|
||||
meta={"plan": plan.to_dict()},
|
||||
)
|
||||
|
||||
# Appeler le callback
|
||||
try:
|
||||
if plan.workflow_match:
|
||||
replay_id = self._replay_callback(
|
||||
session_id=plan.workflow_match,
|
||||
machine_id=self.machine_id,
|
||||
params=plan.parameters,
|
||||
)
|
||||
else:
|
||||
# Mode libre : pas encore branché côté chat (on refuse proprement)
|
||||
replay_id = ""
|
||||
raise RuntimeError(
|
||||
"Mode libre non supporté pour l'instant — "
|
||||
"entraînez un workflow pour cette tâche"
|
||||
)
|
||||
except Exception as e:
|
||||
with self._lock:
|
||||
self._append(
|
||||
ROLE_LEA,
|
||||
f"Je n'ai pas pu lancer le workflow : {e}",
|
||||
meta={"error": str(e)},
|
||||
)
|
||||
self._set_state(STATE_ERROR)
|
||||
return {"ok": False, "state": self._state, "error": str(e)}
|
||||
|
||||
with self._lock:
|
||||
self._active_replay_id = replay_id or ""
|
||||
return {
|
||||
"ok": True,
|
||||
"state": self._state,
|
||||
"replay_id": self._active_replay_id,
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
def _append(self, role: str, content: str, meta: Optional[Dict[str, Any]] = None) -> None:
|
||||
"""Ajouter un message à l'historique (doit être appelé sous lock)."""
|
||||
msg = ChatMessage(role=role, content=content, meta=meta or {})
|
||||
self._messages.append(msg)
|
||||
self.updated_at = msg.timestamp
|
||||
|
||||
def _set_state(self, new_state: str) -> None:
|
||||
"""Changer d'état (doit être appelé sous lock)."""
|
||||
if new_state not in VALID_STATES:
|
||||
raise ValueError(f"État invalide : {new_state}")
|
||||
old = self._state
|
||||
self._state = new_state
|
||||
self.updated_at = time.time()
|
||||
if old != new_state:
|
||||
logger.debug(
|
||||
f"ChatSession {self.session_id}: {old} -> {new_state}"
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ChatManager — registre en mémoire des sessions
|
||||
# =============================================================================
|
||||
|
||||
class ChatManager:
|
||||
"""Registre en mémoire des sessions de chat.
|
||||
|
||||
Thread-safe. Utilisé par l'API FastAPI pour gérer plusieurs
|
||||
conversations simultanées.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
task_planner: Any = None,
|
||||
workflows_provider: Optional[Callable[[], List[Dict[str, Any]]]] = None,
|
||||
replay_callback: Optional[Callable[..., str]] = None,
|
||||
status_provider: Optional[Callable[[str], Dict[str, Any]]] = None,
|
||||
):
|
||||
self._task_planner = task_planner
|
||||
self._workflows_provider = workflows_provider
|
||||
self._replay_callback = replay_callback
|
||||
self._status_provider = status_provider
|
||||
self._sessions: Dict[str, ChatSession] = {}
|
||||
self._lock = threading.RLock()
|
||||
|
||||
def create_session(self, machine_id: str = "default") -> ChatSession:
|
||||
"""Créer une nouvelle session de chat."""
|
||||
session = ChatSession(
|
||||
task_planner=self._task_planner,
|
||||
workflows_provider=self._workflows_provider,
|
||||
replay_callback=self._replay_callback,
|
||||
status_provider=self._status_provider,
|
||||
machine_id=machine_id,
|
||||
)
|
||||
with self._lock:
|
||||
self._sessions[session.session_id] = session
|
||||
logger.info(f"ChatManager: session créée {session.session_id}")
|
||||
return session
|
||||
|
||||
def get_session(self, session_id: str) -> Optional[ChatSession]:
|
||||
with self._lock:
|
||||
return self._sessions.get(session_id)
|
||||
|
||||
def list_sessions(self) -> List[Dict[str, Any]]:
|
||||
with self._lock:
|
||||
return [
|
||||
{
|
||||
"session_id": s.session_id,
|
||||
"state": s.state,
|
||||
"machine_id": s.machine_id,
|
||||
"created_at": s.created_at,
|
||||
"updated_at": s.updated_at,
|
||||
"message_count": len(s.get_history()),
|
||||
}
|
||||
for s in self._sessions.values()
|
||||
]
|
||||
|
||||
def delete_session(self, session_id: str) -> bool:
|
||||
with self._lock:
|
||||
return self._sessions.pop(session_id, None) is not None
|
||||
|
||||
def cleanup_old(self, max_age_s: float = 3600 * 24) -> int:
|
||||
"""Supprimer les sessions inactives depuis max_age_s secondes."""
|
||||
now = time.time()
|
||||
removed = 0
|
||||
with self._lock:
|
||||
to_delete = [
|
||||
sid for sid, s in self._sessions.items()
|
||||
if (now - s.updated_at) > max_age_s
|
||||
]
|
||||
for sid in to_delete:
|
||||
del self._sessions[sid]
|
||||
removed += 1
|
||||
return removed
|
||||
@@ -3,35 +3,81 @@
|
||||
Contexte métier pour les appels VLM — rend Léa experte du domaine.
|
||||
|
||||
Chaque workflow est associé à un domaine métier (médical, comptable, etc.)
|
||||
qui enrichit TOUS les prompts VLM (Observer, Critic, acteur, enrichissement).
|
||||
qui enrichit TOUS les prompts VLM (Observer, Critic, acteur, enrichissement)
|
||||
ET la personnalité de Léa (résumés, questions de clarification, rapports).
|
||||
|
||||
Un gemma4 qui sait qu'il regarde un DPI et que l'utilisateur fait du codage
|
||||
CIM-10 prend des décisions bien meilleures qu'un VLM générique.
|
||||
CIM-10 prend des décisions bien meilleures qu'un VLM générique. Et Léa qui
|
||||
dit "J'ai codé 14 dossiers sur 15" plutôt que "J'ai exécuté 112 clics" est
|
||||
bien plus utile pour un TIM.
|
||||
|
||||
Premier domaine : TIM (Technicien d'Information Médicale)
|
||||
- Logiciels DPI/DMS (dossier patient informatisé)
|
||||
- Codage CIM-10 / CCAM / GHM
|
||||
- Lecture de comptes rendus médicaux
|
||||
- Validation des séjours / RSS / RSA
|
||||
Domaines pré-configurés :
|
||||
- tim_codage : TIM, codage CIM-10 / CCAM / PMSI, DPI
|
||||
- comptabilite : factures, TVA, OCR, plans comptables
|
||||
- rh_paie : fiches de paie, employés, charges sociales
|
||||
- stocks_logistique : bons, commandes, réceptions, inventaires
|
||||
- generic : fallback bureautique
|
||||
|
||||
Usage :
|
||||
Usage basique :
|
||||
ctx = get_domain_context("tim_codage")
|
||||
prompt = f"{ctx.system_prompt}\n\n{user_prompt}"
|
||||
prompt = ctx.enrich_prompt(user_prompt, role="actor")
|
||||
|
||||
Usage langage métier :
|
||||
ctx = get_domain_context("tim_codage")
|
||||
phrase = ctx.summarize_action("click", {"target": "DP"})
|
||||
# → "saisir le diagnostic principal"
|
||||
|
||||
question = ctx.pose_clarification_question(
|
||||
{"blocked_on": "target_not_found", "target": "Fichier patient",
|
||||
"params": {"nom_patient": "Mme Durand"}}
|
||||
)
|
||||
# → "Je ne trouve pas le dossier de Mme Durand..."
|
||||
|
||||
rapport = ctx.describe_workflow_outcome(
|
||||
workflow_name="Codage séjours janvier",
|
||||
success=True,
|
||||
items_count=15,
|
||||
failed_count=1,
|
||||
)
|
||||
# → "J'ai codé 14 dossiers sur 15..."
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import unicodedata
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Dict, List, Mapping, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _strip_accents(s: str) -> str:
|
||||
"""Supprimer les accents pour les comparaisons insensibles aux diacritiques."""
|
||||
if not s:
|
||||
return ""
|
||||
nkfd = unicodedata.normalize("NFKD", s)
|
||||
return "".join(c for c in nkfd if not unicodedata.combining(c))
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Data class
|
||||
# =========================================================================
|
||||
|
||||
|
||||
@dataclass
|
||||
class DomainContext:
|
||||
"""Contexte métier pour un domaine spécifique."""
|
||||
domain_id: str # Identifiant unique (tim_codage, comptabilite, etc.)
|
||||
name: str # Nom lisible (Codage médical TIM)
|
||||
description: str # Description courte du métier
|
||||
"""Contexte métier pour un domaine spécifique.
|
||||
|
||||
Contient à la fois les hints pour les prompts VLM et les éléments de
|
||||
personnalité de Léa (langage métier, questions, rapports).
|
||||
"""
|
||||
|
||||
domain_id: str # tim_codage, comptabilite, ...
|
||||
name: str # Nom lisible
|
||||
description: str # Description courte
|
||||
|
||||
# Prompt système injecté dans TOUS les appels VLM
|
||||
system_prompt: str = ""
|
||||
@@ -39,18 +85,47 @@ class DomainContext:
|
||||
# Vocabulaire métier (termes que le VLM doit connaître)
|
||||
vocabulary: List[str] = field(default_factory=list)
|
||||
|
||||
# Applications connues (noms de logiciels que le VLM peut rencontrer)
|
||||
# Applications connues
|
||||
known_apps: List[str] = field(default_factory=list)
|
||||
|
||||
# Écrans types (descriptions des écrans courants du métier)
|
||||
# Écrans types
|
||||
screen_patterns: Dict[str, str] = field(default_factory=dict)
|
||||
|
||||
# --- Personnalité Léa -------------------------------------------------
|
||||
|
||||
# Mapping d'actions techniques (click/type/key_combo) vers description métier,
|
||||
# indexé par un mot-clé lisible trouvé dans la cible/texte.
|
||||
# Format : { (action_type, keyword_lower) : "description métier" }
|
||||
# Exemple : ("click", "dp") → "saisir le diagnostic principal"
|
||||
common_actions: Dict[str, str] = field(default_factory=dict)
|
||||
|
||||
# Synonymes métier : technique → forme lisible
|
||||
# Exemple : {"dp": "diagnostic principal", "das": "diagnostics associés"}
|
||||
vocabulary_synonyms: Dict[str, str] = field(default_factory=dict)
|
||||
|
||||
# Templates de questions de clarification (selon la raison de blocage).
|
||||
# Clé = identifiant de blocage ("target_not_found", "ambiguous_field", ...)
|
||||
# Valeur = template f-string (champs: {target}, {app}, {nom_patient}, ...)
|
||||
clarification_templates: Dict[str, str] = field(default_factory=dict)
|
||||
|
||||
# Templates de résumés de fin de workflow.
|
||||
# Clés attendues :
|
||||
# - "success" : tout a marché
|
||||
# - "partial" : succès partiel (failed_count > 0)
|
||||
# - "failure" : échec complet
|
||||
# - "success_one" : cas 1 élément (pour éviter "1 dossiers")
|
||||
# - "item_singular" : libellé d'un item ("dossier")
|
||||
# - "item_plural" : libellé au pluriel ("dossiers")
|
||||
summary_templates: Dict[str, str] = field(default_factory=dict)
|
||||
|
||||
# ------------------------------------------------------------------ API
|
||||
|
||||
def enrich_prompt(self, prompt: str, role: str = "") -> str:
|
||||
"""Enrichir un prompt avec le contexte métier.
|
||||
|
||||
Args:
|
||||
prompt: Le prompt original
|
||||
role: Le rôle du VLM (observer, critic, actor, enrichment)
|
||||
role: Le rôle du VLM (observer, critic, actor, enrichment)
|
||||
"""
|
||||
parts = []
|
||||
|
||||
@@ -65,6 +140,310 @@ class DomainContext:
|
||||
parts.append(prompt)
|
||||
return "\n\n".join(parts)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Personnalité : résumé d'action en langage métier
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def summarize_action(
|
||||
self,
|
||||
action: str,
|
||||
params: Optional[Mapping[str, Any]] = None,
|
||||
) -> str:
|
||||
"""Résumer une action technique en langage métier.
|
||||
|
||||
Args:
|
||||
action: Type d'action ("click", "type", "key_combo", "wait", "scroll")
|
||||
params: Paramètres de l'action (target, text, keys, ...)
|
||||
|
||||
Returns:
|
||||
Phrase en français orientée métier. Fallback générique si aucun
|
||||
mapping ne correspond.
|
||||
|
||||
Exemples (domaine tim_codage) :
|
||||
click sur "DP" → "saisir le diagnostic principal"
|
||||
type "E11.9" → "saisir le code CIM-10 E11.9"
|
||||
click sur "Valider" → "valider le codage"
|
||||
"""
|
||||
params = dict(params or {})
|
||||
target = str(params.get("target") or params.get("description") or "").strip()
|
||||
text = str(params.get("text") or "").strip()
|
||||
keys = params.get("keys") or []
|
||||
|
||||
haystack = _strip_accents(f"{target} {text}".lower())
|
||||
|
||||
# 1) Essayer un match mot-clé dans common_actions.
|
||||
# Clés sous la forme "click:mot" ou "type:mot".
|
||||
# Comparaison insensible à la casse ET aux accents.
|
||||
for key, label in self.common_actions.items():
|
||||
if ":" not in key:
|
||||
continue
|
||||
k_action, k_word = key.split(":", 1)
|
||||
if k_action != action:
|
||||
continue
|
||||
k_word_norm = _strip_accents(k_word.lower())
|
||||
if k_word_norm and k_word_norm in haystack:
|
||||
return label
|
||||
|
||||
# 2) Essayer une substitution via vocabulary_synonyms dans la cible.
|
||||
friendly_target = self._apply_synonyms(target)
|
||||
|
||||
if action == "click":
|
||||
if friendly_target:
|
||||
return f"cliquer sur {friendly_target}"
|
||||
return "cliquer"
|
||||
|
||||
if action == "type":
|
||||
if text and friendly_target:
|
||||
return f"saisir « {text} » dans {friendly_target}"
|
||||
if text:
|
||||
return f"saisir « {text} »"
|
||||
return "saisir du texte"
|
||||
|
||||
if action == "key_combo":
|
||||
if isinstance(keys, (list, tuple)) and keys:
|
||||
return f"utiliser le raccourci {'+'.join(str(k) for k in keys)}"
|
||||
return "utiliser un raccourci clavier"
|
||||
|
||||
if action == "wait":
|
||||
return "attendre le chargement de l'écran"
|
||||
|
||||
if action == "scroll":
|
||||
return "faire défiler l'écran"
|
||||
|
||||
# Fallback ultime
|
||||
return f"effectuer l'action {action}"
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Personnalité : question de clarification
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def pose_clarification_question(
|
||||
self,
|
||||
context: Optional[Mapping[str, Any]] = None,
|
||||
) -> str:
|
||||
"""Générer une question pertinente quand Léa bloque.
|
||||
|
||||
Cherche un template dans clarification_templates selon :
|
||||
- context["blocked_on"] (ex: "target_not_found", "ambiguous_field")
|
||||
- context["target"] (la cible visée)
|
||||
- paramètres du workflow (params) disponibles pour substitution
|
||||
|
||||
Args:
|
||||
context: Dictionnaire libre contenant au minimum 'blocked_on' ou
|
||||
'target'. Peut contenir 'params' pour la substitution.
|
||||
|
||||
Returns:
|
||||
Question en français. Fallback générique si aucun template ne
|
||||
correspond.
|
||||
"""
|
||||
ctx = dict(context or {})
|
||||
blocked_on = str(ctx.get("blocked_on") or "").strip()
|
||||
target = str(ctx.get("target") or "").strip()
|
||||
params = dict(ctx.get("params") or {})
|
||||
|
||||
# Dictionnaire de substitution : champs du context + params + target
|
||||
subs: Dict[str, Any] = {
|
||||
"target": target,
|
||||
"target_friendly": self._apply_synonyms(target) or target or "cet élément",
|
||||
"app": ctx.get("app", ""),
|
||||
}
|
||||
subs.update(params)
|
||||
|
||||
# 1) Essai par clé exacte de blocage
|
||||
template = self.clarification_templates.get(blocked_on, "")
|
||||
|
||||
# 2) Essai par cible (si la cible matche un mot-clé connu)
|
||||
if not template and target:
|
||||
low = target.lower()
|
||||
for key, tpl in self.clarification_templates.items():
|
||||
if key.startswith("target:") and key.split(":", 1)[1].lower() in low:
|
||||
template = tpl
|
||||
break
|
||||
|
||||
# 3) Template générique du domaine
|
||||
if not template:
|
||||
template = self.clarification_templates.get("default", "")
|
||||
|
||||
if template:
|
||||
try:
|
||||
return template.format_map(_SafeDict(subs))
|
||||
except Exception as e: # pragma: no cover - format inattendu
|
||||
logger.warning("clarification template format error: %s", e)
|
||||
|
||||
# 4) Fallback ultime cross-domaine
|
||||
friendly = subs["target_friendly"]
|
||||
return (
|
||||
f"Je ne trouve pas {friendly}. "
|
||||
f"Peux-tu me le montrer ou me confirmer que c'est le bon écran ?"
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Personnalité : rapport final
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def describe_workflow_outcome(
|
||||
self,
|
||||
workflow_name: str = "",
|
||||
success: bool = True,
|
||||
items_count: int = 1,
|
||||
failed_count: int = 0,
|
||||
elapsed_s: float = 0.0,
|
||||
extra: Optional[Mapping[str, Any]] = None,
|
||||
use_llm: bool = False,
|
||||
) -> str:
|
||||
"""Générer un rapport de fin de workflow en langage métier.
|
||||
|
||||
Args:
|
||||
workflow_name: Nom du workflow ("Codage janvier").
|
||||
success: True si le workflow a globalement réussi.
|
||||
items_count: Nombre d'items traités (ex: 15 dossiers). 1 par défaut.
|
||||
failed_count: Nombre d'items en échec.
|
||||
elapsed_s: Durée totale (secondes).
|
||||
extra: Infos supplémentaires (hint pour le LLM).
|
||||
use_llm: Si True, tenter un appel à gemma4 pour produire le
|
||||
rapport. Fallback sur les templates en cas d'échec.
|
||||
|
||||
Returns:
|
||||
Rapport en français. Toujours une chaîne, jamais None.
|
||||
"""
|
||||
extra = dict(extra or {})
|
||||
done = max(0, items_count - failed_count)
|
||||
|
||||
item_sg = self.summary_templates.get("item_singular", "élément")
|
||||
item_pl = self.summary_templates.get("item_plural", "éléments")
|
||||
item_word = item_sg if done <= 1 else item_pl
|
||||
|
||||
# Données disponibles pour les templates
|
||||
subs = {
|
||||
"workflow_name": workflow_name or "le workflow",
|
||||
"items_count": items_count,
|
||||
"done": done,
|
||||
"failed": failed_count,
|
||||
"item_singular": item_sg,
|
||||
"item_plural": item_pl,
|
||||
"item_word": item_word,
|
||||
"elapsed_s": int(elapsed_s),
|
||||
}
|
||||
subs.update(extra)
|
||||
|
||||
# Choisir le template adéquat
|
||||
if not success and failed_count >= items_count:
|
||||
key = "failure"
|
||||
elif failed_count > 0:
|
||||
key = "partial"
|
||||
elif items_count == 1:
|
||||
key = "success_one" if "success_one" in self.summary_templates else "success"
|
||||
else:
|
||||
key = "success"
|
||||
|
||||
template = self.summary_templates.get(key, "")
|
||||
|
||||
# Optionnel : raffiner via gemma4
|
||||
if use_llm:
|
||||
llm_text = self._llm_refine_summary(template, subs, success)
|
||||
if llm_text:
|
||||
return llm_text
|
||||
|
||||
if template:
|
||||
try:
|
||||
return template.format_map(_SafeDict(subs))
|
||||
except Exception as e: # pragma: no cover
|
||||
logger.warning("summary template format error: %s", e)
|
||||
|
||||
# Fallback générique
|
||||
if success:
|
||||
if items_count <= 1:
|
||||
return f"C'est fait, j'ai terminé « {workflow_name or 'le workflow'} »."
|
||||
return (
|
||||
f"J'ai traité {done} {item_word} sur {items_count}"
|
||||
+ (f", {failed_count} en échec." if failed_count else ".")
|
||||
)
|
||||
return (
|
||||
f"Je n'ai pas pu terminer « {workflow_name or 'le workflow'} ». "
|
||||
f"Je te rends la main."
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Helpers internes
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _apply_synonyms(self, text: str) -> str:
|
||||
"""Remplacer les sigles/termes techniques par leur forme métier.
|
||||
|
||||
Cherche mots entiers (word boundaries) en insensible à la casse.
|
||||
"""
|
||||
if not text or not self.vocabulary_synonyms:
|
||||
return text
|
||||
result = text
|
||||
for short, full in self.vocabulary_synonyms.items():
|
||||
if not short:
|
||||
continue
|
||||
pattern = r"\b" + re.escape(short) + r"\b"
|
||||
result = re.sub(pattern, full, result, flags=re.IGNORECASE)
|
||||
return result
|
||||
|
||||
def _llm_refine_summary(
|
||||
self,
|
||||
template: str,
|
||||
subs: Dict[str, Any],
|
||||
success: bool,
|
||||
) -> str:
|
||||
"""Tenter un raffinement du rapport via gemma4.
|
||||
|
||||
Appel best-effort : toute erreur retourne "" et le caller retombe sur
|
||||
le template brut. Isolé dans une méthode pour pouvoir le monkey-patcher
|
||||
dans les tests.
|
||||
"""
|
||||
try:
|
||||
import requests as _requests
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
port = os.environ.get("GEMMA4_PORT", "11435")
|
||||
url = f"http://localhost:{port}/api/chat"
|
||||
|
||||
base = ""
|
||||
if template:
|
||||
try:
|
||||
base = template.format_map(_SafeDict(subs))
|
||||
except Exception:
|
||||
base = ""
|
||||
|
||||
prompt = (
|
||||
f"Tu es Léa, une assistante RPA dans le domaine : {self.name}.\n"
|
||||
f"Tu viens de terminer un workflow. Résume en UNE à DEUX phrases "
|
||||
f"en langage métier, chaleureux mais professionnel, en français.\n\n"
|
||||
f"Données :\n"
|
||||
f"- workflow : {subs.get('workflow_name', '')}\n"
|
||||
f"- items traités : {subs.get('done', 0)} / {subs.get('items_count', 0)}\n"
|
||||
f"- échecs : {subs.get('failed', 0)}\n"
|
||||
f"- succès global : {'oui' if success else 'non'}\n"
|
||||
f"- durée : {subs.get('elapsed_s', 0)}s\n\n"
|
||||
f"Base suggérée (tu peux la reformuler) : {base or '(aucune)'}\n\n"
|
||||
f"Ta phrase :"
|
||||
)
|
||||
|
||||
try:
|
||||
resp = _requests.post(
|
||||
url,
|
||||
json={
|
||||
"model": "gemma4:e4b",
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"stream": False,
|
||||
"options": {"temperature": 0.3, "num_predict": 200},
|
||||
},
|
||||
timeout=30,
|
||||
)
|
||||
if not resp.ok:
|
||||
return ""
|
||||
content = resp.json().get("message", {}).get("content", "").strip()
|
||||
# Nettoyage basique : supprimer guillemets typographiques en bord
|
||||
content = content.strip("\"' \n")
|
||||
return content
|
||||
except Exception as e:
|
||||
logger.debug("gemma4 refine summary failed: %s", e)
|
||||
return ""
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"domain_id": self.domain_id,
|
||||
@@ -72,9 +451,24 @@ class DomainContext:
|
||||
"description": self.description,
|
||||
"known_apps": self.known_apps,
|
||||
"vocabulary_count": len(self.vocabulary),
|
||||
"common_actions_count": len(self.common_actions),
|
||||
"has_clarification_templates": bool(self.clarification_templates),
|
||||
"has_summary_templates": bool(self.summary_templates),
|
||||
}
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Utilitaires
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class _SafeDict(dict):
|
||||
"""dict pour str.format_map qui retourne "" pour les clés manquantes."""
|
||||
|
||||
def __missing__(self, key): # type: ignore[override]
|
||||
return ""
|
||||
|
||||
|
||||
# Hints par rôle VLM — adaptés au contexte métier
|
||||
_ROLE_HINTS = {
|
||||
"observer": (
|
||||
@@ -100,6 +494,7 @@ _ROLE_HINTS = {
|
||||
# Domaines pré-configurés
|
||||
# =========================================================================
|
||||
|
||||
|
||||
_TIM_CODAGE = DomainContext(
|
||||
domain_id="tim_codage",
|
||||
name="Codage médical TIM",
|
||||
@@ -156,8 +551,405 @@ _TIM_CODAGE = DomainContext(
|
||||
"recherche_code": "Recherche de code CIM-10 ou CCAM (champ de recherche + arborescence)",
|
||||
"validation_ghm": "Écran de validation du groupage avec GHM calculé et valorisation",
|
||||
},
|
||||
vocabulary_synonyms={
|
||||
"DP": "diagnostic principal",
|
||||
"DAS": "diagnostics associés",
|
||||
"CMA": "complication associée",
|
||||
"UM": "unité médicale",
|
||||
"CR": "compte rendu",
|
||||
"RSS": "résumé de sortie",
|
||||
"RSA": "résumé anonymisé",
|
||||
"GHM": "groupe homogène de malades",
|
||||
"IPP": "identifiant patient",
|
||||
},
|
||||
common_actions={
|
||||
"click:dp": "saisir le diagnostic principal",
|
||||
"click:diagnostic principal": "saisir le diagnostic principal",
|
||||
"click:das": "ajouter un diagnostic associé",
|
||||
"click:ccam": "saisir un acte CCAM",
|
||||
"click:valider": "valider le codage",
|
||||
"click:valider le codage": "valider le codage",
|
||||
"click:grouper": "calculer le GHM",
|
||||
"click:ghm": "consulter le groupage GHM",
|
||||
"click:dossier patient": "ouvrir le dossier patient",
|
||||
"click:fiche patient": "ouvrir la fiche patient",
|
||||
"click:compte rendu": "consulter le compte rendu",
|
||||
"click:cr": "consulter le compte rendu",
|
||||
"click:rechercher": "rechercher un code CIM-10",
|
||||
"type:cim": "saisir un code CIM-10",
|
||||
},
|
||||
clarification_templates={
|
||||
"default": (
|
||||
"Je ne trouve pas {target_friendly}. "
|
||||
"Tu peux me montrer où il se trouve dans le dossier ?"
|
||||
),
|
||||
"target_not_found": (
|
||||
"Je ne trouve pas {target_friendly}. "
|
||||
"Le dossier de {nom_patient} est peut-être déjà codé ou archivé ?"
|
||||
),
|
||||
"target:fichier patient": (
|
||||
"Je ne trouve pas le dossier de {nom_patient}. "
|
||||
"Il est peut-être archivé ? Tu peux me le montrer ?"
|
||||
),
|
||||
"target:dossier": (
|
||||
"Je ne trouve pas le dossier de {nom_patient}. "
|
||||
"Il est peut-être archivé ? Tu peux me le montrer ?"
|
||||
),
|
||||
"ambiguous_code": (
|
||||
"Le compte rendu mentionne plusieurs codes possibles. "
|
||||
"Est-ce le code CIM-10 {code_a} ou {code_b} que tu préfères ?"
|
||||
),
|
||||
"no_cr": (
|
||||
"Je ne trouve pas de compte rendu pour {nom_patient}. "
|
||||
"Tu veux que je saute ce dossier ou que je continue sans ?"
|
||||
),
|
||||
},
|
||||
summary_templates={
|
||||
"item_singular": "dossier",
|
||||
"item_plural": "dossiers",
|
||||
"success_one": (
|
||||
"J'ai codé le dossier de {nom_patient} en {elapsed_s}s. "
|
||||
"Tu peux vérifier le groupage GHM."
|
||||
),
|
||||
"success": (
|
||||
"J'ai codé {done} dossiers sur {items_count}. "
|
||||
"Tout est passé sans erreur, tu peux valider le groupage."
|
||||
),
|
||||
"partial": (
|
||||
"J'ai codé {done} dossiers sur {items_count}. "
|
||||
"{failed} sont en attente — codes CIM-10 ambigus, à valider manuellement."
|
||||
),
|
||||
"failure": (
|
||||
"Je n'ai pas pu coder les dossiers de {workflow_name}. "
|
||||
"Je te rends la main, les comptes rendus sont peut-être inaccessibles."
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
_COMPTABILITE = DomainContext(
|
||||
domain_id="comptabilite",
|
||||
name="Comptabilité",
|
||||
description=(
|
||||
"Comptable : saisie de factures fournisseurs et clients, lettrage, "
|
||||
"rapprochement bancaire, déclarations de TVA, bilans, immobilisations."
|
||||
),
|
||||
system_prompt=(
|
||||
"Tu es un assistant expert en comptabilité d'entreprise. "
|
||||
"L'utilisateur est un comptable qui utilise un logiciel de saisie comptable "
|
||||
"(Sage, Cegid, EBP, Quadra, Isacompta) pour saisir des factures, faire "
|
||||
"les rapprochements bancaires, préparer la TVA et les bilans.\n\n"
|
||||
"Vocabulaire du métier :\n"
|
||||
"- Facture : justificatif de vente ou d'achat (numéro, date, HT, TVA, TTC)\n"
|
||||
"- HT/TVA/TTC : montants hors taxes, taxe, toutes taxes\n"
|
||||
"- Compte comptable : numéro du plan comptable général (PCG), ex 401 (fournisseurs), 411 (clients)\n"
|
||||
"- Journal : journal de saisie (achats, ventes, banque, OD)\n"
|
||||
"- Lettrage : association d'une facture avec son paiement\n"
|
||||
"- Rapprochement : comparaison compte comptable / relevé bancaire\n"
|
||||
"- OCR / LAD : reconnaissance automatique des factures scannées\n"
|
||||
"- Écriture : ligne comptable (débit/crédit)\n"
|
||||
"- Exercice : période comptable annuelle\n"
|
||||
"- Bilan / compte de résultat : états financiers\n"
|
||||
"- CA : chiffre d'affaires\n\n"
|
||||
"Écrans courants :\n"
|
||||
"- Saisie d'écritures (numéro de compte, libellé, débit, crédit)\n"
|
||||
"- Import OCR de factures fournisseurs\n"
|
||||
"- Lettrage / rapprochement\n"
|
||||
"- Brouillard / journal\n"
|
||||
"- Balance / grand livre"
|
||||
),
|
||||
vocabulary=[
|
||||
"facture", "HT", "TVA", "TTC", "compte", "journal", "lettrage",
|
||||
"rapprochement", "OCR", "LAD", "écriture", "débit", "crédit",
|
||||
"exercice", "bilan", "compte de résultat", "CA", "immobilisation",
|
||||
"fournisseur", "client", "PCG", "plan comptable",
|
||||
],
|
||||
known_apps=[
|
||||
"Sage", "Cegid", "EBP", "Quadra", "Isacompta", "Ciel Compta",
|
||||
"Odoo", "Pennylane", "Dext", "Agicap",
|
||||
],
|
||||
screen_patterns={
|
||||
"saisie_ecriture": "Saisie d'écriture comptable (compte, libellé, débit, crédit)",
|
||||
"ocr_facture": "Import OCR : zone image + champs extraits (numéro, date, HT, TVA, TTC, fournisseur)",
|
||||
"lettrage": "Liste d'écritures à lettrer (débit vs crédit)",
|
||||
"rapprochement": "Comparaison compte banque / relevé",
|
||||
"balance": "Balance comptable (comptes agrégés avec soldes)",
|
||||
},
|
||||
vocabulary_synonyms={
|
||||
"HT": "montant hors taxes",
|
||||
"TVA": "montant de TVA",
|
||||
"TTC": "montant toutes taxes",
|
||||
"CA": "chiffre d'affaires",
|
||||
"PCG": "plan comptable général",
|
||||
"OD": "opération diverse",
|
||||
},
|
||||
common_actions={
|
||||
"click:valider": "valider l'écriture",
|
||||
"click:enregistrer": "enregistrer la saisie",
|
||||
"click:lettrer": "lettrer les écritures",
|
||||
"click:rapprocher": "rapprocher avec la banque",
|
||||
"click:ocr": "lancer la reconnaissance OCR",
|
||||
"click:facture": "ouvrir la facture",
|
||||
"click:compte": "sélectionner le compte comptable",
|
||||
"type:ht": "saisir le montant hors taxes",
|
||||
"type:tva": "saisir le montant de TVA",
|
||||
"type:ttc": "saisir le montant toutes taxes",
|
||||
},
|
||||
clarification_templates={
|
||||
"default": (
|
||||
"Je ne trouve pas {target_friendly}. "
|
||||
"C'est bien la facture {num_facture} que tu veux saisir ?"
|
||||
),
|
||||
"target_not_found": (
|
||||
"Je ne trouve pas le champ {target_friendly}. "
|
||||
"C'est bien la facture {num_facture} qui doit être saisie ?"
|
||||
),
|
||||
"target:montant": (
|
||||
"Je ne trouve pas le champ « Montant HT ». "
|
||||
"C'est bien la facture {num_facture} que tu veux saisir ?"
|
||||
),
|
||||
"target:tva": (
|
||||
"Je ne trouve pas le champ TVA. Est-ce une facture à taux {taux_tva} % ?"
|
||||
),
|
||||
"ambiguous_account": (
|
||||
"Je ne sais pas sur quel compte imputer : {compte_a} ou {compte_b} ?"
|
||||
),
|
||||
},
|
||||
summary_templates={
|
||||
"item_singular": "facture",
|
||||
"item_plural": "factures",
|
||||
"success_one": (
|
||||
"J'ai saisi la facture {num_facture} en {elapsed_s}s."
|
||||
),
|
||||
"success": (
|
||||
"J'ai saisi {done} factures sur {items_count}. "
|
||||
"Tout est en brouillard, tu peux valider."
|
||||
),
|
||||
"partial": (
|
||||
"J'ai saisi {done} factures sur {items_count}. "
|
||||
"{failed} factures sont en attente — imputation comptable à vérifier."
|
||||
),
|
||||
"failure": (
|
||||
"Je n'ai pas pu saisir les factures de {workflow_name}. "
|
||||
"L'OCR n'a peut-être pas fonctionné, je te rends la main."
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
_RH_PAIE = DomainContext(
|
||||
domain_id="rh_paie",
|
||||
name="Ressources humaines et paie",
|
||||
description=(
|
||||
"Gestionnaire RH / paie : fiches employés, contrats, bulletins de salaire, "
|
||||
"déclarations sociales (DSN), charges, congés, absences."
|
||||
),
|
||||
system_prompt=(
|
||||
"Tu es un assistant expert en gestion RH et paie française. "
|
||||
"L'utilisateur est un gestionnaire RH ou de paie qui utilise un logiciel "
|
||||
"(Silae, Sage Paie, Cegid, ADP, PayFit) pour éditer des bulletins de salaire, "
|
||||
"gérer les contrats, les absences, et envoyer les DSN.\n\n"
|
||||
"Vocabulaire du métier :\n"
|
||||
"- Bulletin de paie : fiche de salaire mensuelle\n"
|
||||
"- DSN : Déclaration Sociale Nominative (mensuelle, transmise à l'URSSAF)\n"
|
||||
"- Brut / Net : salaire avant et après charges\n"
|
||||
"- Charges sociales / patronales : cotisations employeur et salarié\n"
|
||||
"- CDI / CDD : types de contrats\n"
|
||||
"- Période de paie : mois concerné par le bulletin\n"
|
||||
"- SMIC : salaire minimum\n"
|
||||
"- IJSS : indemnités journalières sécurité sociale\n"
|
||||
"- Congés payés : solde de congés\n"
|
||||
"- RTT : réduction du temps de travail\n"
|
||||
"- Saisie sur salaire : retenue judiciaire\n"
|
||||
"- Solde de tout compte : dernier bulletin d'un salarié qui part\n\n"
|
||||
"Écrans courants :\n"
|
||||
"- Fiche employé (identité, contrat, poste, salaire)\n"
|
||||
"- Saisie des variables (heures, absences, primes)\n"
|
||||
"- Bulletin de paie (aperçu avant validation)\n"
|
||||
"- Déclaration DSN\n"
|
||||
"- Gestion des absences / congés"
|
||||
),
|
||||
vocabulary=[
|
||||
"bulletin", "salaire", "brut", "net", "charges sociales", "DSN",
|
||||
"CDI", "CDD", "congés", "RTT", "SMIC", "IJSS", "URSSAF",
|
||||
"employé", "salarié", "contrat", "prime", "heures supplémentaires",
|
||||
"absence", "solde de tout compte", "STC",
|
||||
],
|
||||
known_apps=[
|
||||
"Silae", "Sage Paie", "Cegid Paie", "ADP", "PayFit", "Nibelis",
|
||||
"Cegedim SRH", "Lucca", "HR Access",
|
||||
],
|
||||
screen_patterns={
|
||||
"fiche_employe": "Fiche employé avec identité, contrat, poste",
|
||||
"saisie_variables": "Saisie des variables de paie (heures, absences, primes)",
|
||||
"apercu_bulletin": "Aperçu du bulletin de paie avant validation",
|
||||
"dsn": "Écran DSN (déclaration sociale nominative)",
|
||||
"conges": "Gestion des absences et congés",
|
||||
},
|
||||
vocabulary_synonyms={
|
||||
"DSN": "déclaration sociale",
|
||||
"RTT": "réduction du temps de travail",
|
||||
"STC": "solde de tout compte",
|
||||
"IJSS": "indemnités journalières",
|
||||
"CP": "congés payés",
|
||||
},
|
||||
common_actions={
|
||||
"click:valider": "valider le bulletin",
|
||||
"click:editer": "éditer le bulletin",
|
||||
"click:bulletin": "ouvrir le bulletin de paie",
|
||||
"click:employe": "ouvrir la fiche employé",
|
||||
"click:dsn": "lancer la DSN",
|
||||
"click:conges": "gérer les congés",
|
||||
"click:absence": "saisir une absence",
|
||||
"type:heures": "saisir les heures travaillées",
|
||||
"type:prime": "saisir une prime",
|
||||
},
|
||||
clarification_templates={
|
||||
"default": (
|
||||
"Je ne trouve pas {target_friendly} pour {nom_employe}. "
|
||||
"Tu peux me confirmer la période de paie ?"
|
||||
),
|
||||
"target_not_found": (
|
||||
"Je ne trouve pas {target_friendly} dans la fiche de {nom_employe}. "
|
||||
"Le contrat est peut-être clôturé ?"
|
||||
),
|
||||
"target:employe": (
|
||||
"Je ne trouve pas {nom_employe} dans la liste. "
|
||||
"Est-il encore actif dans l'entreprise ?"
|
||||
),
|
||||
"ambiguous_period": (
|
||||
"Est-ce la période {periode_a} ou {periode_b} que tu veux traiter ?"
|
||||
),
|
||||
},
|
||||
summary_templates={
|
||||
"item_singular": "bulletin",
|
||||
"item_plural": "bulletins",
|
||||
"success_one": (
|
||||
"J'ai édité le bulletin de {nom_employe} en {elapsed_s}s."
|
||||
),
|
||||
"success": (
|
||||
"J'ai édité {done} bulletins sur {items_count}. "
|
||||
"La paie est prête pour validation."
|
||||
),
|
||||
"partial": (
|
||||
"J'ai édité {done} bulletins sur {items_count}. "
|
||||
"{failed} sont en attente — variables de paie à compléter."
|
||||
),
|
||||
"failure": (
|
||||
"Je n'ai pas pu éditer les bulletins de {workflow_name}. "
|
||||
"Il y a peut-être un blocage côté logiciel de paie."
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
_STOCKS_LOGISTIQUE = DomainContext(
|
||||
domain_id="stocks_logistique",
|
||||
name="Stocks et logistique",
|
||||
description=(
|
||||
"Gestionnaire de stocks / logistique : bons de commande, bons de livraison, "
|
||||
"réceptions, inventaires, mouvements de stock, expéditions."
|
||||
),
|
||||
system_prompt=(
|
||||
"Tu es un assistant expert en gestion de stocks et logistique. "
|
||||
"L'utilisateur utilise un ERP ou WMS (SAP, Dynamics, Odoo, Sage, Divalto) "
|
||||
"pour gérer les commandes, les réceptions, les expéditions et les inventaires.\n\n"
|
||||
"Vocabulaire du métier :\n"
|
||||
"- BC : Bon de Commande (achat ou vente)\n"
|
||||
"- BL : Bon de Livraison\n"
|
||||
"- BR : Bon de Réception\n"
|
||||
"- Article / Référence / SKU : produit en stock\n"
|
||||
"- Emplacement : localisation physique (allée, rayon, emplacement)\n"
|
||||
"- Mouvement de stock : entrée, sortie, transfert\n"
|
||||
"- Inventaire : comptage physique pour recaler le stock théorique\n"
|
||||
"- FIFO / LIFO : ordre de sortie des stocks\n"
|
||||
"- ERP : progiciel de gestion intégré\n"
|
||||
"- WMS : Warehouse Management System\n"
|
||||
"- Picking : préparation de commande\n"
|
||||
"- Quantité en stock / disponible / réservée\n\n"
|
||||
"Écrans courants :\n"
|
||||
"- Saisie de bon de commande / réception\n"
|
||||
"- Liste des articles (avec photo, quantité, emplacement)\n"
|
||||
"- Inventaire (comptage)\n"
|
||||
"- Mouvements de stock\n"
|
||||
"- Picking list (liste de préparation)"
|
||||
),
|
||||
vocabulary=[
|
||||
"bon de commande", "BC", "bon de livraison", "BL", "bon de réception", "BR",
|
||||
"article", "référence", "SKU", "emplacement", "stock", "inventaire",
|
||||
"mouvement", "entrée", "sortie", "picking", "FIFO", "LIFO", "ERP", "WMS",
|
||||
"fournisseur", "client", "quantité", "disponible", "réservé",
|
||||
],
|
||||
known_apps=[
|
||||
"SAP", "Dynamics", "Odoo", "Sage X3", "Divalto", "Cegid",
|
||||
"Oracle NetSuite", "Reflex WMS", "Infolog",
|
||||
],
|
||||
screen_patterns={
|
||||
"bon_commande": "Saisie de bon de commande (fournisseur, lignes d'articles, quantités)",
|
||||
"reception": "Bon de réception (rapprochement avec la commande)",
|
||||
"inventaire": "Saisie d'inventaire (article, emplacement, quantité comptée)",
|
||||
"picking": "Liste de préparation avec articles et emplacements",
|
||||
"mouvement": "Mouvement de stock (entrée/sortie/transfert)",
|
||||
},
|
||||
vocabulary_synonyms={
|
||||
"BC": "bon de commande",
|
||||
"BL": "bon de livraison",
|
||||
"BR": "bon de réception",
|
||||
"SKU": "référence produit",
|
||||
"WMS": "gestion d'entrepôt",
|
||||
"ERP": "progiciel de gestion",
|
||||
},
|
||||
common_actions={
|
||||
"click:valider": "valider le bon",
|
||||
"click:commande": "ouvrir le bon de commande",
|
||||
"click:livraison": "ouvrir le bon de livraison",
|
||||
"click:reception": "saisir la réception",
|
||||
"click:inventaire": "démarrer l'inventaire",
|
||||
"click:article": "sélectionner un article",
|
||||
"click:picking": "démarrer la préparation",
|
||||
"type:quantite": "saisir la quantité",
|
||||
"type:reference": "saisir la référence article",
|
||||
},
|
||||
clarification_templates={
|
||||
"default": (
|
||||
"Je ne trouve pas {target_friendly}. "
|
||||
"C'est bien la commande {num_bc} qu'on traite ?"
|
||||
),
|
||||
"target_not_found": (
|
||||
"Je ne trouve pas {target_friendly}. "
|
||||
"La commande {num_bc} est peut-être déjà clôturée ?"
|
||||
),
|
||||
"target:article": (
|
||||
"Je ne trouve pas l'article {ref_article}. "
|
||||
"Il est peut-être archivé ou mal référencé ?"
|
||||
),
|
||||
"quantity_mismatch": (
|
||||
"La quantité reçue ({qte_recue}) ne correspond pas à la commande "
|
||||
"({qte_commandee}). Je saisis un écart ou tu vérifies ?"
|
||||
),
|
||||
},
|
||||
summary_templates={
|
||||
"item_singular": "bon",
|
||||
"item_plural": "bons",
|
||||
"success_one": (
|
||||
"J'ai traité le bon {num_bc} en {elapsed_s}s."
|
||||
),
|
||||
"success": (
|
||||
"J'ai traité {done} bons sur {items_count}. "
|
||||
"Les mouvements de stock sont validés."
|
||||
),
|
||||
"partial": (
|
||||
"J'ai traité {done} bons sur {items_count}. "
|
||||
"{failed} bons sont en attente — écarts de quantité à vérifier."
|
||||
),
|
||||
"failure": (
|
||||
"Je n'ai pas pu traiter les bons de {workflow_name}. "
|
||||
"L'ERP a peut-être refusé une ligne, je te rends la main."
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
_GENERIC = DomainContext(
|
||||
domain_id="generic",
|
||||
name="Bureautique générale",
|
||||
@@ -166,11 +958,37 @@ _GENERIC = DomainContext(
|
||||
"Tu es un assistant RPA qui observe des applications bureautiques. "
|
||||
"Décris précisément ce que tu vois à l'écran."
|
||||
),
|
||||
summary_templates={
|
||||
"item_singular": "action",
|
||||
"item_plural": "actions",
|
||||
"success_one": "C'est fait, j'ai terminé « {workflow_name} » en {elapsed_s}s.",
|
||||
"success": (
|
||||
"J'ai terminé « {workflow_name} » : {done} {item_word} exécutées "
|
||||
"sur {items_count}."
|
||||
),
|
||||
"partial": (
|
||||
"J'ai terminé « {workflow_name} » partiellement : "
|
||||
"{done} {item_word} sur {items_count} ({failed} en échec)."
|
||||
),
|
||||
"failure": (
|
||||
"Je n'ai pas pu terminer « {workflow_name} ». Je te rends la main."
|
||||
),
|
||||
},
|
||||
clarification_templates={
|
||||
"default": (
|
||||
"Je ne trouve pas {target_friendly} à l'écran. "
|
||||
"Tu peux me le montrer ?"
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
# Registre des domaines disponibles
|
||||
_DOMAINS: Dict[str, DomainContext] = {
|
||||
"tim_codage": _TIM_CODAGE,
|
||||
"comptabilite": _COMPTABILITE,
|
||||
"rh_paie": _RH_PAIE,
|
||||
"stocks_logistique": _STOCKS_LOGISTIQUE,
|
||||
"generic": _GENERIC,
|
||||
}
|
||||
|
||||
@@ -179,7 +997,8 @@ def get_domain_context(domain_id: str = "generic") -> DomainContext:
|
||||
"""Récupérer le contexte métier par ID.
|
||||
|
||||
Args:
|
||||
domain_id: Identifiant du domaine (tim_codage, generic, etc.)
|
||||
domain_id: Identifiant du domaine (tim_codage, comptabilite, rh_paie,
|
||||
stocks_logistique, generic, etc.)
|
||||
|
||||
Returns:
|
||||
DomainContext correspondant, ou generic si non trouvé.
|
||||
|
||||
373
agent_v0/server_v1/execution_plan_runner.py
Normal file
373
agent_v0/server_v1/execution_plan_runner.py
Normal file
@@ -0,0 +1,373 @@
|
||||
# agent_v0/server_v1/execution_plan_runner.py
|
||||
"""
|
||||
ExecutionPlanRunner — Adaptateur ExecutionPlan → actions replay.
|
||||
|
||||
Pièce d'intégration du pipeline V4 :
|
||||
RawTrace → IRBuilder → WorkflowIR → ExecutionCompiler → ExecutionPlan → Runtime
|
||||
|
||||
Ce module convertit un `ExecutionPlan` (plan pré-compilé, déterministe) en
|
||||
liste d'actions au format attendu par l'executor replay actuel (clé x_pct,
|
||||
y_pct, target_spec, etc.), puis les injecte dans `_replay_queues`.
|
||||
|
||||
L'ancien chemin `build_replay_from_raw_events()` dans stream_processor.py
|
||||
reste inchangé — les deux chemins coexistent pendant la transition.
|
||||
|
||||
Format d'action produit (compatible executor existant) :
|
||||
{
|
||||
"action_id": "act_...",
|
||||
"type": "click",
|
||||
"x_pct": 0.5,
|
||||
"y_pct": 0.3,
|
||||
"visual_mode": True,
|
||||
"target_spec": {
|
||||
"by_text": "...",
|
||||
"window_title": "...",
|
||||
"vlm_description": "...",
|
||||
"anchor_image_base64": "...",
|
||||
},
|
||||
"expected_window_title": "...",
|
||||
}
|
||||
|
||||
Auteur: Dom, Alice - Avril 2026
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import threading
|
||||
import uuid
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from core.workflow.execution_plan import (
|
||||
ExecutionNode,
|
||||
ExecutionPlan,
|
||||
ResolutionStrategy,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Substitution de variables
|
||||
# =========================================================================
|
||||
# Le WorkflowIR utilise la syntaxe `{var}` dans les champs texte.
|
||||
# Ici on supporte les deux : `{var}` (IR natif) et `${var}` (replay legacy).
|
||||
_VARIABLE_RE_CURLY = re.compile(r"\{(\w+)\}")
|
||||
_VARIABLE_RE_DOLLAR = re.compile(r"\$\{(\w+)\}")
|
||||
|
||||
|
||||
def substitute_variables(text: str, variables: Dict[str, Any]) -> str:
|
||||
"""Remplacer `{var}` et `${var}` par leurs valeurs.
|
||||
|
||||
Priorité : variables fournies > placeholder brut (inchangé si inconnu).
|
||||
"""
|
||||
if not text or not variables:
|
||||
return text
|
||||
|
||||
def replacer(match: "re.Match[str]") -> str:
|
||||
var_name = match.group(1)
|
||||
if var_name in variables:
|
||||
return str(variables[var_name])
|
||||
return match.group(0)
|
||||
|
||||
text = _VARIABLE_RE_DOLLAR.sub(replacer, text)
|
||||
text = _VARIABLE_RE_CURLY.sub(replacer, text)
|
||||
return text
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Conversion ExecutionNode → action replay
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def _strategy_to_target_spec(
|
||||
strategy: Optional[ResolutionStrategy],
|
||||
fallbacks: Optional[List[ResolutionStrategy]] = None,
|
||||
intent: str = "",
|
||||
) -> Dict[str, Any]:
|
||||
"""Construire un `target_spec` depuis les stratégies de résolution.
|
||||
|
||||
Fusionne la primaire et les fallbacks pour donner un maximum d'indices
|
||||
au resolve_engine :
|
||||
- OCR → by_text
|
||||
- template → anchor_image_base64 (depuis anchor_b64)
|
||||
- VLM → vlm_description
|
||||
|
||||
Règle V4 : la stratégie primaire dicte la méthode préférée.
|
||||
Le champ `resolve_order` liste les méthodes dans l'ordre à essayer.
|
||||
Le resolve_engine honore cet ordre au lieu de sa cascade par défaut.
|
||||
|
||||
resolve_order est la clé du "zéro VLM au runtime" :
|
||||
- ["ocr", "template", "vlm"] → V4 typique (OCR rapide)
|
||||
- ["template", "ocr", "vlm"] → apprentissage : template marche mieux
|
||||
- ["vlm"] → éléments sans texte (icônes)
|
||||
"""
|
||||
spec: Dict[str, Any] = {}
|
||||
|
||||
all_strategies: List[ResolutionStrategy] = []
|
||||
if strategy is not None:
|
||||
all_strategies.append(strategy)
|
||||
if fallbacks:
|
||||
all_strategies.extend(fallbacks)
|
||||
|
||||
by_text_candidate = ""
|
||||
anchor_candidate = ""
|
||||
vlm_candidate = ""
|
||||
uia_data: Dict[str, Any] = {}
|
||||
dom_data: Dict[str, Any] = {}
|
||||
resolve_order: List[str] = []
|
||||
seen_methods: set = set()
|
||||
|
||||
for strat in all_strategies:
|
||||
if not strat:
|
||||
continue
|
||||
if strat.method == "ocr" and strat.target_text and not by_text_candidate:
|
||||
by_text_candidate = strat.target_text
|
||||
elif strat.method == "template":
|
||||
if strat.anchor_b64 and not anchor_candidate:
|
||||
anchor_candidate = strat.anchor_b64
|
||||
if strat.target_text and not by_text_candidate:
|
||||
by_text_candidate = strat.target_text
|
||||
elif strat.method == "vlm" and strat.vlm_description and not vlm_candidate:
|
||||
vlm_candidate = strat.vlm_description
|
||||
elif strat.method == "uia" and strat.uia_name and not uia_data:
|
||||
uia_data = {
|
||||
"name": strat.uia_name,
|
||||
"control_type": strat.uia_control_type,
|
||||
"automation_id": strat.uia_automation_id,
|
||||
"parent_path": strat.uia_parent_path,
|
||||
}
|
||||
elif strat.method == "dom" and strat.dom_selector and not dom_data:
|
||||
dom_data = {
|
||||
"selector": strat.dom_selector,
|
||||
"xpath": strat.dom_xpath,
|
||||
"url_pattern": strat.dom_url_pattern,
|
||||
}
|
||||
|
||||
# Construire l'ordre des méthodes (dans l'ordre primaire → fallbacks)
|
||||
if strat.method and strat.method not in seen_methods:
|
||||
resolve_order.append(strat.method)
|
||||
seen_methods.add(strat.method)
|
||||
|
||||
if by_text_candidate:
|
||||
spec["by_text"] = by_text_candidate
|
||||
if anchor_candidate:
|
||||
spec["anchor_image_base64"] = anchor_candidate
|
||||
if vlm_candidate:
|
||||
spec["vlm_description"] = vlm_candidate
|
||||
elif intent and "vlm_description" not in spec:
|
||||
# L'intention métier devient le prompt VLM de dernier recours
|
||||
spec["vlm_description"] = intent
|
||||
|
||||
# Données UIA — consommées par l'agent Windows via lea_uia.exe
|
||||
if uia_data:
|
||||
spec["uia_target"] = uia_data
|
||||
|
||||
# Données DOM — consommées par l'agent Windows via CDP (futur)
|
||||
if dom_data:
|
||||
spec["dom_target"] = dom_data
|
||||
|
||||
# Ordre de résolution pré-compilé — c'est LA pièce centrale du V4
|
||||
if resolve_order:
|
||||
spec["resolve_order"] = resolve_order
|
||||
|
||||
return spec
|
||||
|
||||
|
||||
def execution_node_to_action(
|
||||
node: ExecutionNode,
|
||||
variables: Optional[Dict[str, Any]] = None,
|
||||
id_prefix: str = "act_plan",
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Convertir un `ExecutionNode` en action replay.
|
||||
|
||||
Retourne `None` si le nœud n'est pas exécutable (type inconnu).
|
||||
|
||||
Args:
|
||||
node: Le nœud à convertir.
|
||||
variables: Dictionnaire de variables pour substituer les {var}.
|
||||
id_prefix: Préfixe pour l'action_id générée.
|
||||
"""
|
||||
variables = variables or {}
|
||||
|
||||
action: Dict[str, Any] = {
|
||||
"action_id": f"{id_prefix}_{uuid.uuid4().hex[:8]}",
|
||||
"plan_node_id": node.node_id,
|
||||
}
|
||||
|
||||
if node.intent:
|
||||
action["intention"] = node.intent
|
||||
if node.step_id:
|
||||
action["plan_step_id"] = node.step_id
|
||||
if node.is_optional:
|
||||
action["is_optional"] = True
|
||||
|
||||
# Métadonnées d'exécution utiles au runtime
|
||||
if node.timeout_ms:
|
||||
action["timeout_ms"] = node.timeout_ms
|
||||
if node.max_retries:
|
||||
action["max_retries"] = node.max_retries
|
||||
if node.recovery_action:
|
||||
action["recovery_action"] = node.recovery_action
|
||||
if node.success_condition:
|
||||
action["success_condition"] = node.success_condition.to_dict()
|
||||
|
||||
action_type = node.action_type
|
||||
|
||||
if action_type == "click":
|
||||
action["type"] = "click"
|
||||
|
||||
strategy = node.strategy_primary
|
||||
fallbacks = node.strategy_fallbacks or []
|
||||
|
||||
# ── Déduction des coordonnées depuis la stratégie primaire ──
|
||||
# - OCR : pas de coordonnées (le runtime trouve via OCR)
|
||||
# - template : l'anchor sera utilisé au runtime
|
||||
# - VLM : la description sera utilisée au runtime
|
||||
# Dans tous les cas le resolve_engine retrouve les pixels au replay.
|
||||
# On expose néanmoins un centre (0.5, 0.5) neutre pour rester
|
||||
# compatible avec les validations de queue existantes.
|
||||
action["x_pct"] = 0.5
|
||||
action["y_pct"] = 0.5
|
||||
action["visual_mode"] = True
|
||||
|
||||
target_spec = _strategy_to_target_spec(
|
||||
strategy=strategy,
|
||||
fallbacks=fallbacks,
|
||||
intent=node.intent,
|
||||
)
|
||||
|
||||
# Titre fenêtre attendu AVANT (pré-vérif stricte)
|
||||
# Si absent, aucune pré-vérif → l'action s'exécute quel que soit l'écran
|
||||
if node.expected_window_before:
|
||||
action["expected_window_before"] = node.expected_window_before
|
||||
target_spec["window_title"] = node.expected_window_before
|
||||
|
||||
# Titre fenêtre attendu APRÈS (post-vérif stricte)
|
||||
# C'est la garantie de passage à l'action suivante
|
||||
if node.success_condition and node.success_condition.expected_title:
|
||||
action["expected_window_title"] = node.success_condition.expected_title
|
||||
action["success_strict"] = (
|
||||
node.success_condition.method == "title_match"
|
||||
)
|
||||
if "window_title" not in target_spec:
|
||||
target_spec["window_title"] = node.success_condition.expected_title
|
||||
|
||||
if target_spec:
|
||||
action["target_spec"] = target_spec
|
||||
|
||||
elif action_type == "type":
|
||||
action["type"] = "type"
|
||||
text = node.text or ""
|
||||
# Substituer les variables avant d'envoyer (ex: {patient} → "DUPONT")
|
||||
action["text"] = substitute_variables(text, variables)
|
||||
if node.variable_name:
|
||||
action["variable_name"] = node.variable_name
|
||||
|
||||
elif action_type in ("key_combo", "key_press"):
|
||||
action["type"] = "key_combo"
|
||||
keys = list(node.keys or [])
|
||||
if not keys:
|
||||
return None
|
||||
action["keys"] = keys
|
||||
|
||||
elif action_type == "wait":
|
||||
action["type"] = "wait"
|
||||
duration = node.duration_ms or 1000
|
||||
action["duration_ms"] = int(duration)
|
||||
|
||||
elif action_type == "scroll":
|
||||
action["type"] = "scroll"
|
||||
# Les stratégies peuvent contenir une zone — pas exploitée ici,
|
||||
# le scroll est implicitement sur la fenêtre active.
|
||||
action["delta"] = -3
|
||||
|
||||
else:
|
||||
logger.debug("execution_node_to_action: type inconnu '%s' ignoré", action_type)
|
||||
return None
|
||||
|
||||
return action
|
||||
|
||||
|
||||
def execution_plan_to_actions(
|
||||
plan: ExecutionPlan,
|
||||
variables: Optional[Dict[str, Any]] = None,
|
||||
id_prefix: str = "act_plan",
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Convertir un `ExecutionPlan` complet en liste d'actions replay.
|
||||
|
||||
Les variables passées en argument écrasent celles du plan.
|
||||
"""
|
||||
merged_vars: Dict[str, Any] = dict(plan.variables or {})
|
||||
if variables:
|
||||
merged_vars.update(variables)
|
||||
|
||||
actions: List[Dict[str, Any]] = []
|
||||
for node in plan.nodes:
|
||||
action = execution_node_to_action(
|
||||
node=node,
|
||||
variables=merged_vars,
|
||||
id_prefix=id_prefix,
|
||||
)
|
||||
if action is not None:
|
||||
actions.append(action)
|
||||
|
||||
logger.info(
|
||||
"execution_plan_to_actions(%s) : %d nœuds → %d actions replay "
|
||||
"(vars=%d)",
|
||||
plan.plan_id, plan.total_nodes, len(actions), len(merged_vars),
|
||||
)
|
||||
return actions
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Injection dans la queue de replay
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def inject_plan_into_queue(
|
||||
plan: ExecutionPlan,
|
||||
session_id: str,
|
||||
replay_queues: Dict[str, List[Dict[str, Any]]],
|
||||
variables: Optional[Dict[str, Any]] = None,
|
||||
lock: Optional[threading.Lock] = None,
|
||||
replace: bool = True,
|
||||
id_prefix: str = "act_plan",
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Injecter un `ExecutionPlan` dans la queue de replay d'une session.
|
||||
|
||||
Args:
|
||||
plan: Le plan à exécuter.
|
||||
session_id: La session Agent V1 cible.
|
||||
replay_queues: Le dict global `_replay_queues` partagé par le serveur.
|
||||
variables: Variables à substituer dans les actions.
|
||||
lock: Verrou optionnel à acquérir avant d'écrire (threadsafe).
|
||||
replace: Si True (défaut), remplace la queue existante. Sinon, append.
|
||||
id_prefix: Préfixe pour les action_id générés.
|
||||
|
||||
Returns:
|
||||
La liste des actions injectées (après substitution).
|
||||
"""
|
||||
actions = execution_plan_to_actions(
|
||||
plan=plan, variables=variables, id_prefix=id_prefix,
|
||||
)
|
||||
|
||||
def _write() -> None:
|
||||
if replace:
|
||||
replay_queues[session_id] = list(actions)
|
||||
else:
|
||||
replay_queues[session_id].extend(actions)
|
||||
|
||||
if lock is not None:
|
||||
with lock:
|
||||
_write()
|
||||
else:
|
||||
_write()
|
||||
|
||||
logger.info(
|
||||
"inject_plan_into_queue(%s) : %d actions injectées dans la queue "
|
||||
"de la session '%s' (replace=%s)",
|
||||
plan.plan_id, len(actions), session_id, replace,
|
||||
)
|
||||
return actions
|
||||
@@ -65,7 +65,8 @@ class LiveSessionState:
|
||||
class LiveSessionManager:
|
||||
"""Gère les sessions live en mémoire côté serveur avec persistance disque."""
|
||||
|
||||
def __init__(self, persist_dir: str = "data/streaming_sessions"):
|
||||
def __init__(self, persist_dir: str = "data/streaming_sessions",
|
||||
live_sessions_dir: Optional[str] = None):
|
||||
self._sessions: Dict[str, LiveSessionState] = {}
|
||||
self._lock = threading.Lock()
|
||||
self._persist_dir = Path(persist_dir)
|
||||
@@ -74,11 +75,16 @@ class LiveSessionManager:
|
||||
self._persist_counter = 0 # Compteur pour limiter la fréquence de persistance
|
||||
self._persist_interval = 10 # Persister toutes les N modifications
|
||||
|
||||
# Dossier des sessions live (JSONL + screenshots)
|
||||
self._live_sessions_dir = Path(live_sessions_dir) if live_sessions_dir else None
|
||||
|
||||
# Charger les sessions persistées au démarrage
|
||||
self._load_persisted_sessions()
|
||||
# Reconstruire les sessions depuis les live_events.jsonl sur disque
|
||||
self._discover_sessions_from_disk()
|
||||
|
||||
def _load_persisted_sessions(self):
|
||||
"""Charger les sessions sauvegardées au démarrage."""
|
||||
"""Charger les sessions sauvegardées au démarrage (JSON state files)."""
|
||||
count = 0
|
||||
for session_file in sorted(self._persist_dir.glob("sess_*.json")):
|
||||
try:
|
||||
@@ -92,6 +98,66 @@ class LiveSessionManager:
|
||||
if count:
|
||||
logger.info(f"{count} session(s) restaurée(s) depuis {self._persist_dir}")
|
||||
|
||||
def _discover_sessions_from_disk(self):
|
||||
"""Découvrir les sessions depuis les live_events.jsonl sur disque.
|
||||
|
||||
Reconstruit les sessions manquantes du session_manager en scannant :
|
||||
- live_sessions/sess_*/live_events.jsonl (sessions racine)
|
||||
- live_sessions/{machine_id}/sess_*/live_events.jsonl (multi-machine)
|
||||
|
||||
Ne touche pas aux sessions déjà chargées depuis le JSON persist.
|
||||
"""
|
||||
if self._live_sessions_dir is None:
|
||||
return
|
||||
live_dir = self._live_sessions_dir
|
||||
if not live_dir.exists():
|
||||
return
|
||||
|
||||
discovered = 0
|
||||
for jsonl_file in sorted(live_dir.glob("**/live_events.jsonl")):
|
||||
session_dir = jsonl_file.parent
|
||||
session_id = session_dir.name
|
||||
if not session_id.startswith("sess_"):
|
||||
continue
|
||||
if session_id in self._sessions:
|
||||
continue
|
||||
|
||||
# Déduire le machine_id depuis le chemin parent
|
||||
parent_name = session_dir.parent.name
|
||||
if parent_name == live_dir.name:
|
||||
machine_id = "default"
|
||||
else:
|
||||
machine_id = parent_name
|
||||
|
||||
# Compter events et screenshots
|
||||
events_count = 0
|
||||
try:
|
||||
with open(jsonl_file, 'r', encoding='utf-8') as f:
|
||||
for _ in f:
|
||||
events_count += 1
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
shots_dir = session_dir / "shots"
|
||||
shots_count = len(list(shots_dir.glob("shot_*_full.png"))) if shots_dir.exists() else 0
|
||||
|
||||
# Créer la session en mémoire
|
||||
session = LiveSessionState(
|
||||
session_id=session_id,
|
||||
machine_id=machine_id,
|
||||
finalized=False,
|
||||
)
|
||||
# Stocker le nombre d'events/shots dans les métadonnées
|
||||
session.shot_paths = {f"shot_{i:04d}": "" for i in range(shots_count)}
|
||||
self._sessions[session_id] = session
|
||||
discovered += 1
|
||||
|
||||
if discovered:
|
||||
logger.info(
|
||||
f"{discovered} session(s) découverte(s) depuis {live_dir} "
|
||||
f"(total: {len(self._sessions)} sessions en mémoire)"
|
||||
)
|
||||
|
||||
def _persist_session(self, session_id: str):
|
||||
"""Sauvegarder une session sur disque (appelé périodiquement)."""
|
||||
session = self._sessions.get(session_id)
|
||||
@@ -102,7 +168,7 @@ class LiveSessionManager:
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
json.dump(session.to_dict(), f, ensure_ascii=False)
|
||||
except Exception as e:
|
||||
logger.debug(f"Erreur persistance session {session_id}: {e}")
|
||||
logger.warning(f"Erreur persistance session {session_id}: {e}")
|
||||
|
||||
def _maybe_persist(self, session_id: str):
|
||||
"""Persister si le compteur atteint l'intervalle."""
|
||||
@@ -180,6 +246,17 @@ class LiveSessionManager:
|
||||
if meta_val is not None:
|
||||
info[meta_key] = meta_val
|
||||
session.last_window_info = info
|
||||
# Exploiter window_capture (envoyé par l'agent avec la capture fenêtre)
|
||||
# pour enrichir last_window_info avec le titre précis de la fenêtre cliquée
|
||||
window_capture = event_data.get("window_capture")
|
||||
if window_capture and isinstance(window_capture, dict):
|
||||
wc_title = window_capture.get("title", "").strip()
|
||||
wc_app = window_capture.get("app_name", "").strip()
|
||||
if wc_title:
|
||||
session.last_window_info["title"] = wc_title
|
||||
if wc_app:
|
||||
session.last_window_info["app_name"] = wc_app
|
||||
|
||||
# Accumuler les titres/apps pour le nommage automatique
|
||||
title = session.last_window_info.get("title", "").strip()
|
||||
app_name = session.last_window_info.get("app_name", "").strip()
|
||||
@@ -221,18 +298,41 @@ class LiveSessionManager:
|
||||
import socket
|
||||
|
||||
# Construire les événements au format RawSession
|
||||
# Important : copier TOUTES les données de l'événement (pos, text, keys, button...)
|
||||
# car Event.from_dict() met tout sauf t/type/window/screenshot_id dans event.data,
|
||||
# et le GraphBuilder utilise event.data pour construire les actions.
|
||||
events = []
|
||||
for evt in session.events:
|
||||
window_info = {
|
||||
"title": evt.get("window_title", session.last_window_info.get("title", "")),
|
||||
"app_name": evt.get("app_name", session.last_window_info.get("app_name", "unknown")),
|
||||
}
|
||||
events.append({
|
||||
# Extraire window info (plusieurs formats possibles)
|
||||
window_raw = evt.get("window")
|
||||
if isinstance(window_raw, dict):
|
||||
window_info = {
|
||||
"title": window_raw.get("title", session.last_window_info.get("title", "")),
|
||||
"app_name": window_raw.get("app_name", session.last_window_info.get("app_name", "unknown")),
|
||||
}
|
||||
else:
|
||||
window_info = {
|
||||
"title": evt.get("window_title", session.last_window_info.get("title", "")),
|
||||
"app_name": evt.get("app_name", session.last_window_info.get("app_name", "unknown")),
|
||||
}
|
||||
|
||||
raw_event = {
|
||||
"t": evt.get("timestamp", 0),
|
||||
"type": evt.get("type", "unknown"),
|
||||
"window": window_info,
|
||||
"screenshot_id": evt.get("screenshot_id"),
|
||||
})
|
||||
}
|
||||
|
||||
# Copier les données spécifiques au type d'événement
|
||||
# (pos, button, text, keys, etc.) — indispensable pour le replay
|
||||
_skip_keys = {"type", "timestamp", "window", "window_title",
|
||||
"app_name", "screenshot_id", "machine_id",
|
||||
"screen_metadata", "vision_info"}
|
||||
for key, value in evt.items():
|
||||
if key not in _skip_keys and key not in raw_event:
|
||||
raw_event[key] = value
|
||||
|
||||
events.append(raw_event)
|
||||
|
||||
# Construire les screenshots au format RawSession
|
||||
screenshots = []
|
||||
|
||||
1322
agent_v0/server_v1/replay_engine.py
Normal file
1322
agent_v0/server_v1/replay_engine.py
Normal file
File diff suppressed because it is too large
Load Diff
143
agent_v0/server_v1/replay_failure_logger.py
Normal file
143
agent_v0/server_v1/replay_failure_logger.py
Normal file
@@ -0,0 +1,143 @@
|
||||
# agent_v0/server_v1/replay_failure_logger.py
|
||||
"""
|
||||
Logger des echecs de replay pour l'apprentissage futur.
|
||||
|
||||
Chaque echec de resolution visuelle (target_not_found) est sauvegarde dans un
|
||||
fichier JSONL par session, avec le screenshot de ce que l'agent voit au moment
|
||||
de l'echec. Ces donnees alimentent le learning loop : re-entrainement des
|
||||
embeddings, ajustement des seuils, enrichissement des target_spec.
|
||||
|
||||
Structure :
|
||||
data/training/replay_failures/{replay_id}/failures.jsonl
|
||||
data/training/replay_failures/{replay_id}/screenshots/{action_id}.jpg
|
||||
"""
|
||||
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger("replay_failure_logger")
|
||||
|
||||
# Repertoire racine des echecs de replay
|
||||
_FAILURES_BASE_DIR = Path("data/training/replay_failures")
|
||||
|
||||
# Lock pour les ecritures concurrentes
|
||||
_write_lock = threading.Lock()
|
||||
|
||||
|
||||
def log_replay_failure(
|
||||
replay_id: str,
|
||||
action_id: str,
|
||||
target_spec: Optional[Dict[str, Any]],
|
||||
screenshot_b64: Optional[str],
|
||||
resolution_attempts: Optional[List[Dict[str, Any]]] = None,
|
||||
error: str = "target_not_found",
|
||||
extra: Optional[Dict[str, Any]] = None,
|
||||
) -> Optional[str]:
|
||||
"""Sauvegarder un echec de replay pour l'apprentissage futur.
|
||||
|
||||
Args:
|
||||
replay_id: Identifiant du replay en cours
|
||||
action_id: Identifiant de l'action echouee
|
||||
target_spec: Specification de la cible recherchee
|
||||
screenshot_b64: Screenshot JPEG base64 de ce que l'agent voit
|
||||
resolution_attempts: Liste des tentatives de resolution (methode, score, etc.)
|
||||
error: Type d'erreur (defaut: "target_not_found")
|
||||
extra: Champs supplementaires a stocker
|
||||
|
||||
Returns:
|
||||
Chemin du fichier JSONL cree, ou None en cas d'erreur.
|
||||
"""
|
||||
try:
|
||||
# Creer le repertoire de la session
|
||||
session_dir = _FAILURES_BASE_DIR / replay_id
|
||||
session_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Sauvegarder le screenshot si fourni
|
||||
screenshot_path = None
|
||||
if screenshot_b64:
|
||||
screenshots_dir = session_dir / "screenshots"
|
||||
screenshots_dir.mkdir(exist_ok=True)
|
||||
screenshot_path = str(screenshots_dir / f"{action_id}.jpg")
|
||||
try:
|
||||
img_bytes = base64.b64decode(screenshot_b64)
|
||||
with open(screenshot_path, "wb") as f:
|
||||
f.write(img_bytes)
|
||||
except Exception as e:
|
||||
logger.warning(f"Impossible de sauvegarder le screenshot : {e}")
|
||||
screenshot_path = None
|
||||
|
||||
# Construire l'entree JSONL
|
||||
entry = {
|
||||
"timestamp": time.strftime("%Y-%m-%dT%H:%M:%S"),
|
||||
"replay_id": replay_id,
|
||||
"action_id": action_id,
|
||||
"target_spec": _sanitize_target_spec(target_spec) if target_spec else None,
|
||||
"screenshot_path": screenshot_path,
|
||||
"resolution_attempts": resolution_attempts or [],
|
||||
"error": error,
|
||||
}
|
||||
if extra:
|
||||
entry.update(extra)
|
||||
|
||||
# Ecrire dans le fichier JSONL (thread-safe)
|
||||
jsonl_path = session_dir / "failures.jsonl"
|
||||
with _write_lock:
|
||||
with open(jsonl_path, "a", encoding="utf-8") as f:
|
||||
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
||||
|
||||
logger.info(
|
||||
f"Echec replay loggue : replay={replay_id} action={action_id} "
|
||||
f"error={error} -> {jsonl_path}"
|
||||
)
|
||||
return str(jsonl_path)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Impossible de logger l'echec replay : {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _sanitize_target_spec(target_spec: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Nettoyer le target_spec pour le stockage (retirer les images base64 volumineuses)."""
|
||||
cleaned = {}
|
||||
for key, value in target_spec.items():
|
||||
# Ne pas stocker les images base64 (trop volumineux pour le JSONL)
|
||||
if key.endswith("_base64") or key.endswith("_b64"):
|
||||
cleaned[key] = f"<{len(str(value))} chars>" if value else None
|
||||
else:
|
||||
cleaned[key] = value
|
||||
return cleaned
|
||||
|
||||
|
||||
def get_failure_count(replay_id: str) -> int:
|
||||
"""Compter le nombre d'echecs pour un replay donne."""
|
||||
jsonl_path = _FAILURES_BASE_DIR / replay_id / "failures.jsonl"
|
||||
if not jsonl_path.exists():
|
||||
return 0
|
||||
try:
|
||||
with open(jsonl_path, "r", encoding="utf-8") as f:
|
||||
return sum(1 for _ in f)
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
|
||||
def get_failures(replay_id: str) -> List[Dict[str, Any]]:
|
||||
"""Lire tous les echecs pour un replay donne."""
|
||||
jsonl_path = _FAILURES_BASE_DIR / replay_id / "failures.jsonl"
|
||||
if not jsonl_path.exists():
|
||||
return []
|
||||
failures = []
|
||||
try:
|
||||
with open(jsonl_path, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
failures.append(json.loads(line))
|
||||
except Exception as e:
|
||||
logger.warning(f"Erreur lecture echecs replay {replay_id} : {e}")
|
||||
return failures
|
||||
@@ -175,6 +175,55 @@ class ReplayLearner:
|
||||
|
||||
self.record(outcome)
|
||||
|
||||
def record_human_correction(
|
||||
self,
|
||||
session_id: str,
|
||||
action: Dict[str, Any],
|
||||
correction: Dict[str, Any],
|
||||
) -> None:
|
||||
"""Enregistrer une correction humaine (mode apprentissage supervisé).
|
||||
|
||||
L'humain a montré à Léa où cliquer. On stocke cette correction
|
||||
dans target_memory.db pour que la prochaine fois, Léa sache.
|
||||
"""
|
||||
target_spec = action.get("target_spec", {})
|
||||
by_text = target_spec.get("by_text", "")
|
||||
window_title = target_spec.get("window_title", "")
|
||||
x_pct = correction.get("x_pct", 0.0)
|
||||
y_pct = correction.get("y_pct", 0.0)
|
||||
|
||||
# Enregistrer dans le JSONL d'apprentissage
|
||||
outcome = ActionOutcome(
|
||||
session_id=session_id,
|
||||
action_id=action.get("action_id", ""),
|
||||
action_type="click",
|
||||
target_description=by_text,
|
||||
window_title=window_title,
|
||||
resolution_method="human_supervised",
|
||||
resolution_score=1.0, # Confiance maximale — l'humain a montré
|
||||
success=True,
|
||||
)
|
||||
self.record(outcome)
|
||||
|
||||
# Stocker dans target_memory.db pour le lookup futur
|
||||
try:
|
||||
from .replay_memory import get_target_memory_store
|
||||
store = get_target_memory_store()
|
||||
if store:
|
||||
store.record_success(
|
||||
screen_signature="human_correction",
|
||||
target_spec=target_spec,
|
||||
resolved_position={"x_pct": x_pct, "y_pct": y_pct},
|
||||
method="human_supervised",
|
||||
score=1.0,
|
||||
)
|
||||
logger.info(
|
||||
f"[APPRENTISSAGE] Correction stockée dans target_memory : "
|
||||
f"'{by_text}' → ({x_pct:.4f}, {y_pct:.4f})"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Learning: échec stockage target_memory: {e}")
|
||||
|
||||
def query_similar(
|
||||
self,
|
||||
target_description: str = "",
|
||||
|
||||
323
agent_v0/server_v1/replay_memory.py
Normal file
323
agent_v0/server_v1/replay_memory.py
Normal file
@@ -0,0 +1,323 @@
|
||||
# agent_v0/server_v1/replay_memory.py
|
||||
"""
|
||||
replay_memory — Greffe de TargetMemoryStore (Fiche #18) sur le pipeline V4.
|
||||
|
||||
Phase 1 du plan apprentissage Léa (docs/PLAN_APPRENTISSAGE_LEA.md).
|
||||
|
||||
Le runtime V4 appelle :
|
||||
- `memory_lookup()` AVANT la cascade coûteuse (OCR/template/VLM)
|
||||
- `memory_record_success()` APRÈS validation post-condition (`title_match` strict)
|
||||
- `memory_record_failure()` sur les échecs
|
||||
|
||||
Fingerprint léger V4 : les coordonnées clic (x_pct, y_pct) sont stockées dans
|
||||
les deux premières valeurs de `TargetFingerprint.bbox`, et la méthode de
|
||||
résolution ayant réussi dans le champ `etype`.
|
||||
|
||||
Signature d'écran V4 : `sha256(normalize(window_title))[:16]`. Simple et
|
||||
robuste aux données variables car les titres de fenêtre restent stables.
|
||||
Les faux positifs (même titre, écrans différents) sont rattrapés par la
|
||||
post-condition qui décrémentera la fiabilité via `record_failure()`.
|
||||
|
||||
Critère de fiabilité : 2 succès minimum et < 30% d'échecs pour déclencher
|
||||
un hit (paramètres de `TargetMemoryStore.lookup`). C'est exactement la
|
||||
cristallisation par répétition que l'on veut — Léa est un stagiaire qui
|
||||
apprend de l'observation.
|
||||
|
||||
Auteur : Dom, Alice — avril 2026
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
import unicodedata
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# =========================================================================
|
||||
# Singleton du store persistant
|
||||
# =========================================================================
|
||||
|
||||
_MEMORY_SINGLETON: Optional[Any] = None
|
||||
_MEMORY_DISABLED = False
|
||||
|
||||
|
||||
def get_memory_store():
|
||||
"""Retourne le `TargetMemoryStore` partagé, ou None si indisponible.
|
||||
|
||||
Lazy-init : le store n'est créé qu'au premier appel, ce qui évite
|
||||
d'importer `core.learning.target_memory_store` à l'import du module
|
||||
(et donc d'éviter les effets de bord sur le démarrage du serveur).
|
||||
"""
|
||||
global _MEMORY_SINGLETON, _MEMORY_DISABLED
|
||||
|
||||
if _MEMORY_DISABLED:
|
||||
return None
|
||||
if _MEMORY_SINGLETON is not None:
|
||||
return _MEMORY_SINGLETON
|
||||
|
||||
try:
|
||||
from core.learning.target_memory_store import TargetMemoryStore
|
||||
|
||||
base_path = os.environ.get("RPA_LEARNING_DIR", "data/learning")
|
||||
_MEMORY_SINGLETON = TargetMemoryStore(base_path=base_path)
|
||||
logger.info(
|
||||
"replay_memory: TargetMemoryStore initialisé (base=%s)", base_path,
|
||||
)
|
||||
return _MEMORY_SINGLETON
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"replay_memory: TargetMemoryStore indisponible (%s) — "
|
||||
"l'apprentissage persistant est désactivé", exc,
|
||||
)
|
||||
_MEMORY_DISABLED = True
|
||||
return None
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Normalisation de texte et hash
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def _norm_text(s: str) -> str:
|
||||
"""Normalise un texte pour un hash stable (accents, casse, NBSP, espaces)."""
|
||||
if not s:
|
||||
return ""
|
||||
s = s.replace("\u00A0", " ").strip().lower()
|
||||
s = unicodedata.normalize("NFKD", s)
|
||||
s = "".join(ch for ch in s if not unicodedata.combining(ch))
|
||||
return " ".join(s.split())
|
||||
|
||||
|
||||
def compute_screen_sig(window_title: str) -> str:
|
||||
"""Calcule la signature d'écran V4 à partir du titre de fenêtre.
|
||||
|
||||
Le `window_title` est strict depuis la phase "controle des étapes"
|
||||
(post-condition `title_match` obligatoire). C'est notre clé naturelle.
|
||||
"""
|
||||
norm = _norm_text(window_title)
|
||||
if not norm:
|
||||
return ""
|
||||
return hashlib.sha256(norm.encode("utf-8")).hexdigest()[:16]
|
||||
|
||||
|
||||
class _TargetSpecLike:
|
||||
"""Adaptateur dict → objet pour `TargetMemoryStore._hash_target_spec()`.
|
||||
|
||||
Le hash interne de TargetMemoryStore utilise `getattr(spec, "by_role", ...)`
|
||||
qui ne fonctionne pas avec un dict brut. On expose les attributs nécessaires.
|
||||
|
||||
On intègre aussi `resolve_order` et `vlm_description` dans `context_hints`
|
||||
pour qu'ils entrent dans le hash — deux actions avec le même `by_text`
|
||||
mais un `resolve_order` différent doivent avoir des hashes distincts.
|
||||
"""
|
||||
|
||||
__slots__ = ("by_role", "by_text", "by_position", "context_hints")
|
||||
|
||||
def __init__(self, d: Dict[str, Any]):
|
||||
self.by_role = d.get("by_role", "") or ""
|
||||
self.by_text = d.get("by_text", "") or ""
|
||||
self.by_position = d.get("by_position")
|
||||
|
||||
hints = dict(d.get("context_hints") or {})
|
||||
resolve_order = d.get("resolve_order")
|
||||
if resolve_order:
|
||||
hints["_resolve_order"] = "|".join(resolve_order) if isinstance(
|
||||
resolve_order, list
|
||||
) else str(resolve_order)
|
||||
if d.get("vlm_description"):
|
||||
hints["_vlm_desc"] = str(d["vlm_description"])
|
||||
if d.get("anchor_hint"):
|
||||
hints["_anchor_hint"] = str(d["anchor_hint"])
|
||||
self.context_hints = hints
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Lookup — consulté AVANT la cascade coûteuse
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def memory_lookup(
|
||||
window_title: str,
|
||||
target_spec: Dict[str, Any],
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Cherche une résolution apprise pour cette cible sur cet écran.
|
||||
|
||||
Returns:
|
||||
Dict compatible avec le format de sortie de `_resolve_target_sync`
|
||||
(resolved, method, x_pct, y_pct, score, ...) si une entrée fiable
|
||||
est trouvée. None sinon.
|
||||
"""
|
||||
store = get_memory_store()
|
||||
if store is None:
|
||||
return None
|
||||
|
||||
screen_sig = compute_screen_sig(window_title)
|
||||
if not screen_sig:
|
||||
return None
|
||||
|
||||
try:
|
||||
spec_shim = _TargetSpecLike(target_spec)
|
||||
fp = store.lookup(screen_sig, spec_shim)
|
||||
except Exception as exc:
|
||||
logger.debug("memory_lookup: erreur lookup (%s)", exc)
|
||||
return None
|
||||
|
||||
if fp is None:
|
||||
return None
|
||||
|
||||
# Fingerprint léger : bbox = (x_pct, y_pct, 0, 0)
|
||||
try:
|
||||
x_pct = float(fp.bbox[0])
|
||||
y_pct = float(fp.bbox[1])
|
||||
except (TypeError, IndexError, ValueError):
|
||||
logger.debug("memory_lookup: fingerprint bbox invalide")
|
||||
return None
|
||||
|
||||
# Sanity check : les pourcentages doivent être dans [0, 1]
|
||||
if not (0.0 <= x_pct <= 1.0 and 0.0 <= y_pct <= 1.0):
|
||||
logger.warning(
|
||||
"memory_lookup: coords invalides (%.3f, %.3f) pour sig=%s — "
|
||||
"entrée ignorée",
|
||||
x_pct, y_pct, screen_sig,
|
||||
)
|
||||
return None
|
||||
|
||||
method = fp.etype or "memory"
|
||||
confidence = float(getattr(fp, "confidence", 0.9) or 0.9)
|
||||
|
||||
logger.info(
|
||||
"memory_lookup HIT : sig=%s method=%s coords=(%.4f, %.4f) conf=%.2f "
|
||||
"target='%s'",
|
||||
screen_sig, method, x_pct, y_pct, confidence,
|
||||
(target_spec.get("by_text") or "")[:60],
|
||||
)
|
||||
|
||||
return {
|
||||
"resolved": True,
|
||||
"method": f"memory_{method}",
|
||||
"x_pct": x_pct,
|
||||
"y_pct": y_pct,
|
||||
"score": confidence,
|
||||
"from_memory": True,
|
||||
"screen_sig": screen_sig,
|
||||
}
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Record — appelé APRÈS validation post-condition
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def memory_record_success(
|
||||
window_title: str,
|
||||
target_spec: Dict[str, Any],
|
||||
x_pct: float,
|
||||
y_pct: float,
|
||||
method: str,
|
||||
confidence: float = 0.9,
|
||||
) -> bool:
|
||||
"""Enregistre une résolution réussie dans la mémoire persistante.
|
||||
|
||||
À appeler APRÈS validation de la post-condition (`title_match` strict).
|
||||
"""
|
||||
store = get_memory_store()
|
||||
if store is None:
|
||||
return False
|
||||
|
||||
screen_sig = compute_screen_sig(window_title)
|
||||
if not screen_sig:
|
||||
return False
|
||||
|
||||
# Sanity check : coordonnées dans [0, 1]
|
||||
try:
|
||||
x_pct = float(x_pct)
|
||||
y_pct = float(y_pct)
|
||||
except (TypeError, ValueError):
|
||||
logger.debug("memory_record_success: coords non numériques, skip")
|
||||
return False
|
||||
if not (0.0 <= x_pct <= 1.0 and 0.0 <= y_pct <= 1.0):
|
||||
logger.debug(
|
||||
"memory_record_success: coords hors [0,1] (%.3f, %.3f), skip",
|
||||
x_pct, y_pct,
|
||||
)
|
||||
return False
|
||||
|
||||
try:
|
||||
from core.learning.target_memory_store import TargetFingerprint
|
||||
|
||||
# Stripper les préfixes "memory_" empilés pour ne garder que
|
||||
# la méthode de résolution originale (ex: template_matching).
|
||||
# Sans ça, le cycle lookup → record → lookup empile "memory_"
|
||||
# indéfiniment : memory_memory_memory_template_matching.
|
||||
method_clean = method or "v4_unknown"
|
||||
while method_clean.startswith("memory_"):
|
||||
method_clean = method_clean[len("memory_"):]
|
||||
method_clean = method_clean or "v4_unknown"
|
||||
fingerprint = TargetFingerprint(
|
||||
element_id=f"v4_{method_clean}",
|
||||
bbox=(x_pct, y_pct, 0.0, 0.0),
|
||||
role=target_spec.get("by_role", "") or None,
|
||||
etype=method_clean,
|
||||
label=(target_spec.get("by_text") or "")[:200] or None,
|
||||
confidence=float(confidence),
|
||||
)
|
||||
|
||||
spec_shim = _TargetSpecLike(target_spec)
|
||||
store.record_success(
|
||||
screen_signature=screen_sig,
|
||||
target_spec=spec_shim,
|
||||
fingerprint=fingerprint,
|
||||
strategy_used=method_clean,
|
||||
confidence=float(confidence),
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"memory_record_success: sig=%s method=%s coords=(%.4f, %.4f) "
|
||||
"target='%s'",
|
||||
screen_sig, method_clean, x_pct, y_pct,
|
||||
(target_spec.get("by_text") or "")[:60],
|
||||
)
|
||||
return True
|
||||
|
||||
except Exception as exc:
|
||||
logger.warning("memory_record_success: échec (%s)", exc)
|
||||
return False
|
||||
|
||||
|
||||
def memory_record_failure(
|
||||
window_title: str,
|
||||
target_spec: Dict[str, Any],
|
||||
error_message: str,
|
||||
) -> bool:
|
||||
"""Incrémente le `fail_count` pour cette (signature, target).
|
||||
|
||||
Appelé quand l'action échoue OU quand la post-condition n'est pas
|
||||
satisfaite. Le `TargetMemoryStore.lookup()` ignorera cette entrée
|
||||
si le ratio d'échecs dépasse 30%.
|
||||
"""
|
||||
store = get_memory_store()
|
||||
if store is None:
|
||||
return False
|
||||
|
||||
screen_sig = compute_screen_sig(window_title)
|
||||
if not screen_sig:
|
||||
return False
|
||||
|
||||
try:
|
||||
spec_shim = _TargetSpecLike(target_spec)
|
||||
store.record_failure(
|
||||
screen_signature=screen_sig,
|
||||
target_spec=spec_shim,
|
||||
error_message=(error_message or "unknown")[:200],
|
||||
)
|
||||
logger.debug(
|
||||
"memory_record_failure: sig=%s error='%s'",
|
||||
screen_sig, (error_message or "")[:80],
|
||||
)
|
||||
return True
|
||||
except Exception as exc:
|
||||
logger.debug("memory_record_failure: échec (%s)", exc)
|
||||
return False
|
||||
2385
agent_v0/server_v1/resolve_engine.py
Normal file
2385
agent_v0/server_v1/resolve_engine.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -76,7 +76,16 @@ class StepMetrics:
|
||||
confidence_score: float
|
||||
retry_count: int = 0
|
||||
error_details: Optional[str] = None
|
||||
|
||||
# C1 — Instrumentation vision-aware (ExecutionLoop)
|
||||
# Ces champs proviennent de `StepResult` (core/execution/execution_loop.py).
|
||||
# Tous optionnels avec valeurs par défaut pour rétrocompatibilité.
|
||||
ocr_ms: float = 0.0 # Temps OCR sur ce step
|
||||
ui_ms: float = 0.0 # Temps détection UI sur ce step
|
||||
analyze_ms: float = 0.0 # Temps analyse ScreenState (OCR + UI + reste)
|
||||
total_ms: float = 0.0 # Temps total du step (alias duration_ms)
|
||||
cache_hit: bool = False # True si ScreenState vient du cache perceptuel
|
||||
degraded: bool = False # True si mode dégradé (timeout analyse)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary for storage."""
|
||||
return {
|
||||
@@ -92,9 +101,15 @@ class StepMetrics:
|
||||
'status': self.status,
|
||||
'confidence_score': self.confidence_score,
|
||||
'retry_count': self.retry_count,
|
||||
'error_details': self.error_details
|
||||
'error_details': self.error_details,
|
||||
'ocr_ms': self.ocr_ms,
|
||||
'ui_ms': self.ui_ms,
|
||||
'analyze_ms': self.analyze_ms,
|
||||
'total_ms': self.total_ms,
|
||||
'cache_hit': self.cache_hit,
|
||||
'degraded': self.degraded,
|
||||
}
|
||||
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'StepMetrics':
|
||||
"""Create from dictionary."""
|
||||
@@ -111,7 +126,13 @@ class StepMetrics:
|
||||
status=data['status'],
|
||||
confidence_score=data['confidence_score'],
|
||||
retry_count=data.get('retry_count', 0),
|
||||
error_details=data.get('error_details')
|
||||
error_details=data.get('error_details'),
|
||||
ocr_ms=float(data.get('ocr_ms') or 0.0),
|
||||
ui_ms=float(data.get('ui_ms') or 0.0),
|
||||
analyze_ms=float(data.get('analyze_ms') or 0.0),
|
||||
total_ms=float(data.get('total_ms') or 0.0),
|
||||
cache_hit=bool(data.get('cache_hit') or False),
|
||||
degraded=bool(data.get('degraded') or False),
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
"""Integration of analytics with ExecutionLoop."""
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
from datetime import datetime
|
||||
from typing import Any, Optional
|
||||
from datetime import datetime, timedelta
|
||||
import uuid
|
||||
|
||||
from ..analytics_system import get_analytics_system
|
||||
@@ -14,17 +14,35 @@ logger = logging.getLogger(__name__)
|
||||
class AnalyticsExecutionIntegration:
|
||||
"""Integrate analytics collection with workflow execution."""
|
||||
|
||||
def __init__(self, enabled: bool = True):
|
||||
def __init__(self, analytics_system: Any = True, enabled: Optional[bool] = None):
|
||||
"""
|
||||
Initialize analytics integration.
|
||||
|
||||
|
||||
Accepte deux formes d'appel pour la rétrocompatibilité :
|
||||
- ``AnalyticsExecutionIntegration(enabled=True)`` → auto-load du système
|
||||
- ``AnalyticsExecutionIntegration(analytics_system_instance)`` →
|
||||
utilise l'instance fournie (utilisé par ExecutionLoop)
|
||||
|
||||
Args:
|
||||
enabled: Whether analytics collection is enabled
|
||||
analytics_system: Instance d'AnalyticsSystem pré-construite, ou
|
||||
True/False pour activer/désactiver (legacy).
|
||||
enabled: Legacy — si défini, prime sur analytics_system.
|
||||
"""
|
||||
self.enabled = enabled
|
||||
self.analytics = None
|
||||
|
||||
if enabled:
|
||||
# Détection de la forme d'appel
|
||||
if enabled is not None:
|
||||
# Appel legacy explicite: AnalyticsExecutionIntegration(enabled=...)
|
||||
self.enabled = bool(enabled)
|
||||
self.analytics = None
|
||||
elif isinstance(analytics_system, bool):
|
||||
# Appel legacy: AnalyticsExecutionIntegration(True/False)
|
||||
self.enabled = analytics_system
|
||||
self.analytics = None
|
||||
else:
|
||||
# Nouvelle forme: instance injectée
|
||||
self.enabled = analytics_system is not None
|
||||
self.analytics = analytics_system
|
||||
|
||||
if self.enabled and self.analytics is None:
|
||||
try:
|
||||
self.analytics = get_analytics_system()
|
||||
logger.info("Analytics integration enabled")
|
||||
@@ -36,37 +54,50 @@ class AnalyticsExecutionIntegration:
|
||||
self,
|
||||
workflow_id: str,
|
||||
execution_id: Optional[str] = None,
|
||||
total_steps: int = 0
|
||||
total_steps: int = 0,
|
||||
mode: Optional[str] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Called when workflow execution starts.
|
||||
|
||||
Appelé au démarrage d'une exécution de workflow.
|
||||
|
||||
Args:
|
||||
workflow_id: Workflow identifier
|
||||
execution_id: Execution identifier (generated if None)
|
||||
total_steps: Total number of steps
|
||||
|
||||
workflow_id: Identifiant du workflow
|
||||
execution_id: Identifiant d'exécution (généré si None)
|
||||
total_steps: Nombre total d'étapes prévues
|
||||
mode: Mode d'exécution (OBSERVATION / COACHING / SUPERVISED /
|
||||
AUTOMATIC). Propagé en contexte pour MetricsCollector.
|
||||
|
||||
Returns:
|
||||
Execution ID
|
||||
Identifiant d'exécution (celui fourni ou nouvellement généré).
|
||||
"""
|
||||
if not self.enabled or not self.analytics:
|
||||
return execution_id or str(uuid.uuid4())
|
||||
|
||||
|
||||
if execution_id is None:
|
||||
execution_id = str(uuid.uuid4())
|
||||
|
||||
|
||||
try:
|
||||
# Start real-time tracking
|
||||
# Démarrage du tracking temps réel
|
||||
self.analytics.realtime_analytics.track_execution(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
total_steps=total_steps
|
||||
total_steps=total_steps,
|
||||
)
|
||||
|
||||
|
||||
# Ouverture de l'ExecutionMetrics côté collector (état "running").
|
||||
# Cela permet à `on_execution_complete` d'appeler
|
||||
# `record_execution_complete` qui clôture proprement.
|
||||
context = {"mode": mode} if mode else {}
|
||||
self.analytics.metrics_collector.record_execution_start(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
context=context,
|
||||
)
|
||||
|
||||
logger.debug(f"Started tracking execution: {execution_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error starting execution tracking: {e}")
|
||||
|
||||
|
||||
return execution_id
|
||||
|
||||
def on_step_start(
|
||||
@@ -101,110 +132,249 @@ class AnalyticsExecutionIntegration:
|
||||
execution_id: str,
|
||||
workflow_id: str,
|
||||
node_id: str,
|
||||
action_type: str,
|
||||
started_at: datetime,
|
||||
completed_at: datetime,
|
||||
duration: float,
|
||||
*,
|
||||
duration_ms: float,
|
||||
success: bool,
|
||||
error_message: Optional[str] = None
|
||||
action_type: str = "",
|
||||
started_at: Optional[datetime] = None,
|
||||
completed_at: Optional[datetime] = None,
|
||||
error_message: Optional[str] = None,
|
||||
confidence: float = 0.0,
|
||||
target_element: str = "",
|
||||
retry_count: int = 0,
|
||||
ocr_ms: float = 0.0,
|
||||
ui_ms: float = 0.0,
|
||||
analyze_ms: float = 0.0,
|
||||
total_ms: float = 0.0,
|
||||
cache_hit: bool = False,
|
||||
degraded: bool = False,
|
||||
step_id: Optional[str] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Called when a step completes.
|
||||
|
||||
Appelé à la fin d'un step.
|
||||
|
||||
Contrat normalisé (Lot A — avril 2026) : ``duration_ms`` est
|
||||
obligatoire et en millisecondes. Plus de rétrocompat silencieuse
|
||||
sur ``duration`` en secondes.
|
||||
|
||||
Args:
|
||||
execution_id: Execution identifier
|
||||
workflow_id: Workflow identifier
|
||||
node_id: Node identifier
|
||||
action_type: Type of action
|
||||
started_at: Start timestamp
|
||||
completed_at: Completion timestamp
|
||||
duration: Duration in seconds
|
||||
success: Whether step succeeded
|
||||
error_message: Error message if failed
|
||||
execution_id: Identifiant d'exécution
|
||||
workflow_id: Identifiant du workflow
|
||||
node_id: Identifiant du node
|
||||
duration_ms: Durée du step en millisecondes (obligatoire)
|
||||
success: Vrai si le step a réussi
|
||||
action_type: Type d'action (``click``, ``type``, …)
|
||||
started_at: Timestamp de début (déduit de duration_ms si None)
|
||||
completed_at: Timestamp de fin (``now()`` si None)
|
||||
error_message: Message d'erreur si ``success=False``
|
||||
confidence: Score de matching [0, 1]
|
||||
target_element: Élément ciblé (optionnel)
|
||||
retry_count: Nombre de retries
|
||||
ocr_ms: Temps OCR (C1)
|
||||
ui_ms: Temps détection UI (C1)
|
||||
analyze_ms: Temps analyse ScreenState (C1)
|
||||
total_ms: Temps total du step (C1, alias duration_ms)
|
||||
cache_hit: ScreenState depuis cache perceptuel (C1)
|
||||
degraded: Mode dégradé activé (C1)
|
||||
step_id: ID unique du step (généré si None)
|
||||
"""
|
||||
if not self.enabled or not self.analytics:
|
||||
return
|
||||
|
||||
|
||||
try:
|
||||
# Record step metrics
|
||||
duration_ms_final = float(duration_ms)
|
||||
|
||||
# Normaliser les timestamps
|
||||
if completed_at is None:
|
||||
completed_at = datetime.now()
|
||||
if started_at is None:
|
||||
started_at = completed_at - timedelta(milliseconds=duration_ms_final)
|
||||
|
||||
step_metrics = StepMetrics(
|
||||
step_id=step_id or f"{execution_id}:{node_id}:{completed_at.isoformat()}",
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
node_id=node_id,
|
||||
action_type=action_type,
|
||||
action_type=action_type or "unknown",
|
||||
target_element=target_element,
|
||||
started_at=started_at,
|
||||
completed_at=completed_at,
|
||||
duration=duration,
|
||||
success=success,
|
||||
error_message=error_message
|
||||
duration_ms=duration_ms_final,
|
||||
status="completed" if success else "failed",
|
||||
confidence_score=float(confidence),
|
||||
retry_count=retry_count,
|
||||
error_details=error_message,
|
||||
# C1 — vision-aware
|
||||
ocr_ms=float(ocr_ms or 0.0),
|
||||
ui_ms=float(ui_ms or 0.0),
|
||||
analyze_ms=float(analyze_ms or 0.0),
|
||||
total_ms=float(total_ms or duration_ms_final),
|
||||
cache_hit=bool(cache_hit),
|
||||
degraded=bool(degraded),
|
||||
)
|
||||
|
||||
|
||||
self.analytics.metrics_collector.record_step(step_metrics)
|
||||
|
||||
# Update real-time tracking
|
||||
self.analytics.realtime_analytics.record_step_complete(
|
||||
execution_id=execution_id,
|
||||
success=success
|
||||
|
||||
# Tracking temps réel
|
||||
try:
|
||||
self.analytics.realtime_analytics.record_step_complete(
|
||||
execution_id=execution_id,
|
||||
success=success,
|
||||
)
|
||||
except Exception as rt_err:
|
||||
logger.debug(f"Realtime tracking skipped: {rt_err}")
|
||||
|
||||
logger.debug(
|
||||
f"Recorded step: {node_id} "
|
||||
f"({'success' if success else 'failed'}, "
|
||||
f"analyze_ms={analyze_ms:.0f}, cache_hit={cache_hit}, "
|
||||
f"degraded={degraded})"
|
||||
)
|
||||
|
||||
logger.debug(f"Recorded step: {node_id} ({'success' if success else 'failed'})")
|
||||
except Exception as e:
|
||||
logger.error(f"Error recording step completion: {e}")
|
||||
|
||||
def on_step_result(
|
||||
self,
|
||||
execution_id: str,
|
||||
workflow_id: str,
|
||||
step_result: Any,
|
||||
) -> None:
|
||||
"""
|
||||
Raccourci C1 — enregistre un `StepResult` complet.
|
||||
|
||||
Évite aux appelants d'extraire manuellement les champs vision-aware.
|
||||
Utilisé par ExecutionLoop pour pousser StepResult au système analytics.
|
||||
|
||||
Args:
|
||||
execution_id: Identifiant d'exécution
|
||||
workflow_id: Identifiant de workflow
|
||||
step_result: Instance de `core.execution.execution_loop.StepResult`
|
||||
"""
|
||||
if not self.enabled or not self.analytics:
|
||||
return
|
||||
|
||||
action_type = "unknown"
|
||||
try:
|
||||
if getattr(step_result, "action_result", None) is not None:
|
||||
ar = step_result.action_result
|
||||
# ExecutionResult.action est optionnel selon la branche
|
||||
action_type = (
|
||||
getattr(ar, "action_type", None)
|
||||
or getattr(ar, "action", None)
|
||||
or "unknown"
|
||||
)
|
||||
except Exception:
|
||||
action_type = "unknown"
|
||||
|
||||
self.on_step_complete(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
node_id=getattr(step_result, "node_id", "unknown"),
|
||||
action_type=str(action_type),
|
||||
success=bool(getattr(step_result, "success", False)),
|
||||
error_message=None
|
||||
if getattr(step_result, "success", False)
|
||||
else getattr(step_result, "message", None),
|
||||
duration_ms=float(getattr(step_result, "duration_ms", 0.0) or 0.0),
|
||||
confidence=float(getattr(step_result, "match_confidence", 0.0) or 0.0),
|
||||
ocr_ms=float(getattr(step_result, "ocr_ms", 0.0) or 0.0),
|
||||
ui_ms=float(getattr(step_result, "ui_ms", 0.0) or 0.0),
|
||||
analyze_ms=float(getattr(step_result, "analyze_ms", 0.0) or 0.0),
|
||||
total_ms=float(getattr(step_result, "total_ms", 0.0) or 0.0),
|
||||
cache_hit=bool(getattr(step_result, "cache_hit", False)),
|
||||
degraded=bool(getattr(step_result, "degraded", False)),
|
||||
)
|
||||
|
||||
def on_execution_complete(
|
||||
self,
|
||||
execution_id: str,
|
||||
workflow_id: str,
|
||||
started_at: datetime,
|
||||
completed_at: datetime,
|
||||
duration: float,
|
||||
*,
|
||||
duration_ms: float,
|
||||
status: str,
|
||||
error_message: Optional[str] = None,
|
||||
steps_total: Optional[int] = None,
|
||||
steps_completed: int = 0,
|
||||
steps_failed: int = 0
|
||||
steps_failed: int = 0,
|
||||
error_message: Optional[str] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Called when workflow execution completes.
|
||||
|
||||
Appelé à la fin d'une exécution de workflow.
|
||||
|
||||
Contrat normalisé (Lot A — avril 2026) :
|
||||
- ``duration_ms`` en millisecondes, toujours. Plus de rétrocompat
|
||||
silencieuse sur ``duration`` en secondes.
|
||||
- ``status`` est une chaîne libre (``"completed"``, ``"failed"``,
|
||||
``"stopped"``, ``"timeout"``, …). L'appelant décide.
|
||||
- ``steps_total`` / ``steps_completed`` / ``steps_failed`` : noms
|
||||
alignés sur le dataclass ``ExecutionMetrics``. Si ``steps_total``
|
||||
n'est pas fourni, on le déduit par somme.
|
||||
|
||||
Args:
|
||||
execution_id: Execution identifier
|
||||
workflow_id: Workflow identifier
|
||||
started_at: Start timestamp
|
||||
completed_at: Completion timestamp
|
||||
duration: Duration in seconds
|
||||
status: Final status (success, failed, timeout)
|
||||
error_message: Error message if failed
|
||||
steps_completed: Number of steps completed
|
||||
steps_failed: Number of steps failed
|
||||
execution_id: Identifiant d'exécution
|
||||
workflow_id: Identifiant du workflow
|
||||
duration_ms: Durée totale en millisecondes
|
||||
status: Statut final (``"completed"`` / ``"failed"`` / ``"stopped"``)
|
||||
steps_total: Nombre total de steps exécutés (tous statuts confondus)
|
||||
steps_completed: Nombre de steps réussis
|
||||
steps_failed: Nombre de steps en échec
|
||||
error_message: Message d'erreur si ``status != "completed"``
|
||||
"""
|
||||
if not self.enabled or not self.analytics:
|
||||
return
|
||||
|
||||
|
||||
# steps_total dérivé si non fourni explicitement
|
||||
if steps_total is None:
|
||||
steps_total = int(steps_completed) + int(steps_failed)
|
||||
|
||||
try:
|
||||
# Record execution metrics
|
||||
execution_metrics = ExecutionMetrics(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
started_at=started_at,
|
||||
completed_at=completed_at,
|
||||
duration=duration,
|
||||
status=status,
|
||||
error_message=error_message,
|
||||
steps_completed=steps_completed,
|
||||
steps_failed=steps_failed
|
||||
)
|
||||
|
||||
self.analytics.metrics_collector.record_execution(execution_metrics)
|
||||
|
||||
# Flush to ensure persistence
|
||||
self.analytics.metrics_collector.flush()
|
||||
|
||||
# Complete real-time tracking
|
||||
collector = self.analytics.metrics_collector
|
||||
|
||||
# record_execution_complete clôture proprement un ExecutionMetrics
|
||||
# ouvert par record_execution_start (chemin nominal via
|
||||
# on_execution_start). Si l'état n'est pas présent (tests, legacy),
|
||||
# on pousse un ExecutionMetrics synthétique directement.
|
||||
completed_at = datetime.now()
|
||||
started_at = completed_at - timedelta(milliseconds=float(duration_ms))
|
||||
|
||||
active = getattr(collector, "_active_executions", None)
|
||||
if active is not None and execution_id in active:
|
||||
collector.record_execution_complete(
|
||||
execution_id=execution_id,
|
||||
status=status,
|
||||
steps_total=int(steps_total),
|
||||
steps_completed=int(steps_completed),
|
||||
steps_failed=int(steps_failed),
|
||||
error_message=error_message,
|
||||
)
|
||||
else:
|
||||
# Fallback explicite : on construit directement un ExecutionMetrics
|
||||
# aligné sur le dataclass (duration_ms, status, steps_*).
|
||||
execution_metrics = ExecutionMetrics(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
started_at=started_at,
|
||||
completed_at=completed_at,
|
||||
duration_ms=float(duration_ms),
|
||||
status=status,
|
||||
steps_total=int(steps_total),
|
||||
steps_completed=int(steps_completed),
|
||||
steps_failed=int(steps_failed),
|
||||
error_message=error_message,
|
||||
)
|
||||
# Le collector n'expose pas record_execution(...) : on pousse
|
||||
# dans le buffer protégé par lock pour rester cohérent.
|
||||
with collector._lock:
|
||||
collector._buffer.append(execution_metrics)
|
||||
|
||||
# Flush pour garantir la persistance immédiate
|
||||
collector.flush()
|
||||
|
||||
# Clôture du tracking temps réel
|
||||
self.analytics.realtime_analytics.complete_execution(
|
||||
execution_id=execution_id,
|
||||
status=status
|
||||
status=status,
|
||||
)
|
||||
|
||||
|
||||
logger.info(f"Recorded execution: {execution_id} ({status})")
|
||||
except Exception as e:
|
||||
logger.error(f"Error recording execution completion: {e}")
|
||||
@@ -216,39 +386,54 @@ class AnalyticsExecutionIntegration:
|
||||
node_id: str,
|
||||
strategy: str,
|
||||
success: bool,
|
||||
duration: float
|
||||
duration_ms: float,
|
||||
) -> None:
|
||||
"""
|
||||
Called when self-healing attempts recovery.
|
||||
|
||||
Appelé quand le self-healing tente une récupération.
|
||||
|
||||
Contrat normalisé (Lot A — avril 2026) : ``duration_ms`` en
|
||||
millisecondes, cohérent avec ``on_execution_complete`` et
|
||||
``on_step_complete``. Le StepMetrics construit respecte strictement
|
||||
le dataclass (``status``, ``duration_ms``, ``error_details``,
|
||||
``confidence_score``, ``target_element``, ``step_id``).
|
||||
|
||||
Args:
|
||||
execution_id: Execution identifier
|
||||
workflow_id: Workflow identifier
|
||||
node_id: Node identifier
|
||||
strategy: Recovery strategy used
|
||||
success: Whether recovery succeeded
|
||||
duration: Recovery duration
|
||||
execution_id: Identifiant d'exécution
|
||||
workflow_id: Identifiant du workflow
|
||||
node_id: Node où la récupération est tentée
|
||||
strategy: Stratégie de récupération employée
|
||||
success: Vrai si la récupération a réussi
|
||||
duration_ms: Durée de la tentative en millisecondes
|
||||
"""
|
||||
if not self.enabled or not self.analytics:
|
||||
return
|
||||
|
||||
|
||||
try:
|
||||
# Record as a special step metric
|
||||
now = datetime.now()
|
||||
started_at = now - timedelta(milliseconds=float(duration_ms))
|
||||
|
||||
recovery_metrics = StepMetrics(
|
||||
step_id=f"{execution_id}:{node_id}:recovery:{now.isoformat()}",
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
node_id=f"{node_id}_recovery",
|
||||
action_type=f"recovery_{strategy}",
|
||||
started_at=datetime.now(),
|
||||
completed_at=datetime.now(),
|
||||
duration=duration,
|
||||
success=success,
|
||||
error_message=None if success else f"Recovery failed: {strategy}"
|
||||
target_element="",
|
||||
started_at=started_at,
|
||||
completed_at=now,
|
||||
duration_ms=float(duration_ms),
|
||||
status="completed" if success else "failed",
|
||||
confidence_score=0.0,
|
||||
retry_count=0,
|
||||
error_details=None if success else f"Recovery failed: {strategy}",
|
||||
)
|
||||
|
||||
|
||||
self.analytics.metrics_collector.record_step(recovery_metrics)
|
||||
|
||||
logger.debug(f"Recorded recovery: {strategy} ({'success' if success else 'failed'})")
|
||||
|
||||
logger.debug(
|
||||
f"Recorded recovery: {strategy} "
|
||||
f"({'success' if success else 'failed'})"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error recording recovery attempt: {e}")
|
||||
|
||||
|
||||
@@ -42,6 +42,8 @@ class TimeSeriesStore:
|
||||
ON execution_metrics(started_at);
|
||||
|
||||
-- Step metrics table
|
||||
-- Les colonnes ocr_ms, ui_ms, analyze_ms, total_ms, cache_hit, degraded
|
||||
-- proviennent de l'instrumentation vision-aware (C1) de ExecutionLoop.
|
||||
CREATE TABLE IF NOT EXISTS step_metrics (
|
||||
step_id TEXT PRIMARY KEY,
|
||||
execution_id TEXT NOT NULL,
|
||||
@@ -56,6 +58,12 @@ class TimeSeriesStore:
|
||||
confidence_score REAL,
|
||||
retry_count INTEGER DEFAULT 0,
|
||||
error_details TEXT,
|
||||
ocr_ms REAL DEFAULT 0.0,
|
||||
ui_ms REAL DEFAULT 0.0,
|
||||
analyze_ms REAL DEFAULT 0.0,
|
||||
total_ms REAL DEFAULT 0.0,
|
||||
cache_hit INTEGER DEFAULT 0,
|
||||
degraded INTEGER DEFAULT 0,
|
||||
FOREIGN KEY (execution_id) REFERENCES execution_metrics(execution_id)
|
||||
);
|
||||
|
||||
@@ -101,11 +109,40 @@ class TimeSeriesStore:
|
||||
|
||||
logger.info(f"TimeSeriesStore initialized at {self.db_path}")
|
||||
|
||||
# Colonnes ajoutées ultérieurement — appliquées via ALTER TABLE si absentes.
|
||||
# (C1 — instrumentation vision-aware, avril 2026)
|
||||
_STEP_METRICS_MIGRATIONS = [
|
||||
("ocr_ms", "REAL DEFAULT 0.0"),
|
||||
("ui_ms", "REAL DEFAULT 0.0"),
|
||||
("analyze_ms", "REAL DEFAULT 0.0"),
|
||||
("total_ms", "REAL DEFAULT 0.0"),
|
||||
("cache_hit", "INTEGER DEFAULT 0"),
|
||||
("degraded", "INTEGER DEFAULT 0"),
|
||||
]
|
||||
|
||||
def _init_database(self) -> None:
|
||||
"""Initialize database schema."""
|
||||
"""Initialize database schema and apply lightweight migrations."""
|
||||
with self._get_connection() as conn:
|
||||
conn.executescript(self.SCHEMA)
|
||||
self._migrate_step_metrics(conn)
|
||||
conn.commit()
|
||||
|
||||
def _migrate_step_metrics(self, conn: sqlite3.Connection) -> None:
|
||||
"""Ajoute les colonnes C1 sur une base `step_metrics` pré-existante."""
|
||||
cursor = conn.execute("PRAGMA table_info(step_metrics)")
|
||||
existing = {row[1] for row in cursor.fetchall()}
|
||||
for column, ddl in self._STEP_METRICS_MIGRATIONS:
|
||||
if column not in existing:
|
||||
try:
|
||||
conn.execute(
|
||||
f"ALTER TABLE step_metrics ADD COLUMN {column} {ddl}"
|
||||
)
|
||||
logger.info(
|
||||
f"Migration step_metrics: ajout colonne {column}"
|
||||
)
|
||||
except sqlite3.OperationalError as e:
|
||||
# Collision bénigne (colonne déjà ajoutée par un autre process)
|
||||
logger.debug(f"Migration colonne {column} ignorée: {e}")
|
||||
|
||||
@contextmanager
|
||||
def _get_connection(self):
|
||||
@@ -164,13 +201,14 @@ class TimeSeriesStore:
|
||||
))
|
||||
|
||||
def _write_step_metric(self, conn: sqlite3.Connection, metric: StepMetrics) -> None:
|
||||
"""Write step metric."""
|
||||
"""Write step metric (inclut les champs vision-aware C1)."""
|
||||
conn.execute("""
|
||||
INSERT OR REPLACE INTO step_metrics
|
||||
(step_id, execution_id, workflow_id, node_id, action_type, target_element,
|
||||
started_at, completed_at, duration_ms, status, confidence_score,
|
||||
retry_count, error_details)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
retry_count, error_details,
|
||||
ocr_ms, ui_ms, analyze_ms, total_ms, cache_hit, degraded)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
metric.step_id,
|
||||
metric.execution_id,
|
||||
@@ -184,7 +222,13 @@ class TimeSeriesStore:
|
||||
metric.status,
|
||||
metric.confidence_score,
|
||||
metric.retry_count,
|
||||
metric.error_details
|
||||
metric.error_details,
|
||||
getattr(metric, 'ocr_ms', 0.0),
|
||||
getattr(metric, 'ui_ms', 0.0),
|
||||
getattr(metric, 'analyze_ms', 0.0),
|
||||
getattr(metric, 'total_ms', 0.0),
|
||||
1 if getattr(metric, 'cache_hit', False) else 0,
|
||||
1 if getattr(metric, 'degraded', False) else 0,
|
||||
))
|
||||
|
||||
def _write_resource_metric(self, conn: sqlite3.Connection, metric: ResourceMetrics) -> None:
|
||||
|
||||
31
core/anonymisation/__init__.py
Normal file
31
core/anonymisation/__init__.py
Normal file
@@ -0,0 +1,31 @@
|
||||
# core/anonymisation/__init__.py
|
||||
"""Module de floutage ciblé des PII côté serveur.
|
||||
|
||||
Remplace l'ancien blur client-side (`agent_v0/agent_v1/vision/blur_sensitive.py`)
|
||||
qui floutait toutes les zones de texte claires, cassant les codes CIM, les
|
||||
montants PMSI et les boutons.
|
||||
|
||||
Stratégie :
|
||||
1. OCR (docTR) sur le screenshot → texte + bounding boxes
|
||||
2. NER (EDS-NLP si disponible, sinon regex) → détection des PII
|
||||
3. Filtrage : ne conserver que PERSON / LOCATION / PHONE / NIR / EMAIL
|
||||
4. Blur gaussien uniquement sur les bbox des PII filtrées
|
||||
|
||||
Usage :
|
||||
from core.anonymisation import blur_pii_on_image
|
||||
blurred_path = blur_pii_on_image("shot_0001_full.png")
|
||||
"""
|
||||
|
||||
from .pii_blur import (
|
||||
PIIBlurResult,
|
||||
PIIEntity,
|
||||
PIIBlurrer,
|
||||
blur_pii_on_image,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"PIIBlurResult",
|
||||
"PIIEntity",
|
||||
"PIIBlurrer",
|
||||
"blur_pii_on_image",
|
||||
]
|
||||
650
core/anonymisation/pii_blur.py
Normal file
650
core/anonymisation/pii_blur.py
Normal file
@@ -0,0 +1,650 @@
|
||||
# core/anonymisation/pii_blur.py
|
||||
"""Floutage ciblé des PII côté serveur (Personal Identifiable Information).
|
||||
|
||||
Contexte
|
||||
--------
|
||||
L'ancien blur côté client (`agent_v0/agent_v1/vision/blur_sensitive.py`) était
|
||||
trop agressif : il floutait TOUTES les zones blanches avec texte, ce qui
|
||||
détruisait les codes CIM-10, les montants PMSI, les boutons et rendait les
|
||||
screenshots inutilisables pour le replay ou le grounding VLM. De plus,
|
||||
`opencv-python` n'était pas listé dans les dépendances de l'agent, donc le blur
|
||||
échouait silencieusement en production.
|
||||
|
||||
Stratégie retenue (avril 2026)
|
||||
------------------------------
|
||||
1. Agent = zéro blur → envoie les screenshots bruts via TLS.
|
||||
2. Serveur = OCR (docTR) + NER (EDS-NLP avec fallback regex).
|
||||
3. On floute UNIQUEMENT les entités :
|
||||
- PERSON → noms, prénoms
|
||||
- LOCATION → adresses, villes
|
||||
- PHONE → numéros de téléphone
|
||||
- NIR → numéro de sécurité sociale
|
||||
- EMAIL → adresses électroniques
|
||||
Et on préserve :
|
||||
- codes CIM-10 / CCAM
|
||||
- montants (1250€, 31,50 €)
|
||||
- dates (pas PII au sens RGPD santé)
|
||||
- identifiants techniques (shot_0001, session IDs…)
|
||||
4. Deux fichiers sont stockés :
|
||||
- `shot_XXXX_full.png` → version brute (accès restreint)
|
||||
- `shot_XXXX_full_blurred.png` → version pour affichage
|
||||
|
||||
Performance
|
||||
-----------
|
||||
Objectif : < 2 s par screenshot sur RTX 5070.
|
||||
docTR (db_mobilenet_v3_large + crnn_mobilenet_v3_large) : ~800 ms CPU, ~300 ms GPU.
|
||||
EDS-NLP pipeline minimal : ~100 ms pour un texte d'écran typique.
|
||||
Fallback regex : < 10 ms.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import tempfile
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Iterable, List, Optional, Sequence, Tuple, Union
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Types
|
||||
# =============================================================================
|
||||
|
||||
# Type d'entité PII reconnu. Aligné sur les labels EDS-NLP (`nlp.pipes.eds`)
|
||||
# et enrichi par nos propres patterns regex.
|
||||
PII_LABELS = frozenset({
|
||||
"PERSON", # noms de patient, médecin
|
||||
"LOCATION", # adresses, ville, code postal
|
||||
"ADDRESS", # alias de LOCATION (certains pipelines le produisent)
|
||||
"PHONE", # téléphone
|
||||
"NIR", # numéro sécu FR (15 chiffres)
|
||||
"SECURITY_NUMBER", # alias de NIR
|
||||
"EMAIL", # adresse email
|
||||
})
|
||||
|
||||
# Motifs qu'on NE DOIT PAS flouter même s'ils ressemblent à des PII :
|
||||
# - codes CIM-10 : 1 lettre + 2 chiffres + optionnellement .xx
|
||||
# - codes CCAM : 4 lettres + 3 chiffres
|
||||
# - montants (€, euros)
|
||||
# - dates format fr (dd/mm/yyyy, dd-mm-yy)
|
||||
# - identifiants techniques (ex: shot_0001, session_xxxxx)
|
||||
_RE_ICD10 = re.compile(r"\b[A-Z]\d{2}(\.\d{1,3})?\b")
|
||||
_RE_CCAM = re.compile(r"\b[A-Z]{4}\d{3}\b")
|
||||
_RE_MONEY = re.compile(r"\b\d{1,3}(?:[.,\s]\d{3})*(?:[.,]\d{1,2})?\s?€\b", re.IGNORECASE)
|
||||
_RE_DATE = re.compile(r"\b(0?[1-9]|[12]\d|3[01])[/.-](0?[1-9]|1[0-2])[/.-](\d{2}|\d{4})\b")
|
||||
_RE_TECH_ID = re.compile(r"\b(?:shot|session|sess|frame|trace|req|msg)_[\w-]+\b", re.IGNORECASE)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Entités PII
|
||||
# =============================================================================
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PIIEntity:
|
||||
"""Une entité PII détectée dans un screenshot."""
|
||||
label: str # PERSON, LOCATION, PHONE, NIR, EMAIL
|
||||
text: str # Texte brut détecté
|
||||
bbox: Tuple[int, int, int, int] # (x1, y1, x2, y2) en pixels
|
||||
confidence: float = 1.0 # Score NER (1.0 si regex)
|
||||
source: str = "ner" # "ner" (EDS-NLP) ou "regex"
|
||||
|
||||
|
||||
@dataclass
|
||||
class PIIBlurResult:
|
||||
"""Résultat du pipeline de blur."""
|
||||
raw_path: Path
|
||||
blurred_path: Path
|
||||
entities: List[PIIEntity] = field(default_factory=list)
|
||||
elapsed_ms: float = 0.0
|
||||
ocr_ms: float = 0.0
|
||||
ner_ms: float = 0.0
|
||||
blur_ms: float = 0.0
|
||||
ocr_engine: str = "doctr"
|
||||
ner_engine: str = "regex" # ou "edsnlp"
|
||||
|
||||
@property
|
||||
def count(self) -> int:
|
||||
return len(self.entities)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Fallback NER par regex (utilisé si EDS-NLP indisponible)
|
||||
# =============================================================================
|
||||
|
||||
# Précaution : on ne marque comme PHONE que des suites contiguës de 10 chiffres
|
||||
# (FR) ou un format international. Les codes à 3-4 chiffres sont ignorés.
|
||||
_RE_PHONE = re.compile(
|
||||
r"\b(?:(?:\+?33|0)\s?[1-9])(?:[\s.-]?\d{2}){4}\b"
|
||||
)
|
||||
_RE_NIR = re.compile(
|
||||
r"\b[12]\s?\d{2}\s?(?:0[1-9]|1[0-2]|20)\s?(?:\d{2}|2A|2B)\s?\d{3}\s?\d{3}(?:\s?\d{2})?\b"
|
||||
)
|
||||
_RE_EMAIL = re.compile(
|
||||
r"\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b", re.IGNORECASE
|
||||
)
|
||||
# Nom : Prénom Nom (au moins 2 majuscules initiales). Attrape aussi
|
||||
# "Mme Dupont", "M. Martin", "Dr. Bernard".
|
||||
# On utilise [^\S\n] (whitespace SANS newline) pour empêcher le match de sauter
|
||||
# de ligne — les lignes sont typiquement des champs distincts dans une UI métier.
|
||||
_RE_PERSON = re.compile(
|
||||
r"\b(?:M\.?|Mme|Mlle|Dr\.?|Pr\.?|Prof\.?)[^\S\n]+"
|
||||
r"[A-ZÉÈÀÂÎÔÛÇ][a-zéèàâîôûç\-]+"
|
||||
r"(?:[^\S\n]+[A-ZÉÈÀÂÎÔÛÇ][a-zéèàâîôûç\-]+)?"
|
||||
)
|
||||
# Adresse : "12 rue de la Paix", "3, avenue Victor Hugo"
|
||||
# Même principe : on empêche le matching de franchir les sauts de ligne.
|
||||
_RE_ADDRESS = re.compile(
|
||||
r"\b\d{1,4}(?:[^\S\n]?(?:bis|ter|quater))?[,\s]+(?:rue|avenue|av\.?|bd|boulevard|"
|
||||
r"allée|all\.?|place|impasse|chemin|route|rte\.?|quai|cours|voie|passage)"
|
||||
r"[^\S\n]+(?:de[^\S\n]+|du[^\S\n]+|des[^\S\n]+|la[^\S\n]+|le[^\S\n]+|les[^\S\n]+|l'|de[^\S\n]+la[^\S\n]+|d')?"
|
||||
r"[A-Za-zÀ-ÿ\-' ]{2,40}",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def _regex_find_pii(text: str) -> List[Tuple[str, int, int]]:
|
||||
"""Retourne une liste de (label, offset_debut, offset_fin) par regex.
|
||||
|
||||
Les motifs "techniques" (codes CIM, montants, dates) sont explicitement
|
||||
exclus même si un autre regex les attrape.
|
||||
"""
|
||||
# 1. Collecter toutes les plages à NE PAS flouter
|
||||
protected: List[Tuple[int, int]] = []
|
||||
for rx in (_RE_ICD10, _RE_CCAM, _RE_MONEY, _RE_DATE, _RE_TECH_ID):
|
||||
for m in rx.finditer(text):
|
||||
protected.append(m.span())
|
||||
|
||||
def _is_protected(start: int, end: int) -> bool:
|
||||
for p_start, p_end in protected:
|
||||
# recouvrement non nul
|
||||
if start < p_end and end > p_start:
|
||||
return True
|
||||
return False
|
||||
|
||||
hits: List[Tuple[str, int, int]] = []
|
||||
for label, rx in (
|
||||
("NIR", _RE_NIR),
|
||||
("EMAIL", _RE_EMAIL),
|
||||
("PHONE", _RE_PHONE),
|
||||
("PERSON", _RE_PERSON),
|
||||
("LOCATION", _RE_ADDRESS),
|
||||
):
|
||||
for m in rx.finditer(text):
|
||||
if _is_protected(m.start(), m.end()):
|
||||
continue
|
||||
hits.append((label, m.start(), m.end()))
|
||||
return hits
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# NER via EDS-NLP (optionnel)
|
||||
# =============================================================================
|
||||
|
||||
_edsnlp_pipeline = None
|
||||
|
||||
|
||||
def _get_edsnlp_pipeline():
|
||||
"""Charge une pipeline EDS-NLP si le module est disponible.
|
||||
|
||||
Retourne None si EDS-NLP n'est pas installé — le pipeline retombera
|
||||
alors sur le NER regex.
|
||||
"""
|
||||
global _edsnlp_pipeline
|
||||
if _edsnlp_pipeline is not None:
|
||||
return _edsnlp_pipeline
|
||||
try:
|
||||
import edsnlp # type: ignore
|
||||
except ImportError:
|
||||
logger.info(
|
||||
"EDS-NLP non installé — fallback regex utilisé pour la détection PII. "
|
||||
"Pour activer EDS-NLP : pip install edsnlp"
|
||||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
nlp = edsnlp.blank("eds")
|
||||
nlp.add_pipe("eds.sentences")
|
||||
nlp.add_pipe("eds.normalizer")
|
||||
# Les composants disponibles dépendent de la version installée.
|
||||
# On les ajoute en try/except pour rester résilient.
|
||||
for pipe_name in ("eds.names", "eds.dates", "eds.addresses"):
|
||||
try:
|
||||
nlp.add_pipe(pipe_name)
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.debug("EDS-NLP : composant %s indisponible (%s)", pipe_name, e)
|
||||
_edsnlp_pipeline = nlp
|
||||
logger.info("EDS-NLP : pipeline chargée")
|
||||
return _edsnlp_pipeline
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.warning("EDS-NLP non utilisable (%s) — fallback regex", e)
|
||||
return None
|
||||
|
||||
|
||||
def _edsnlp_find_pii(text: str, nlp) -> List[Tuple[str, int, int]]:
|
||||
"""Utilise EDS-NLP pour trouver des entités PII.
|
||||
|
||||
Les labels EDS-NLP sont mappés vers nos labels canoniques.
|
||||
"""
|
||||
try:
|
||||
doc = nlp(text)
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.debug("EDS-NLP : échec sur texte de %d chars (%s)", len(text), e)
|
||||
return []
|
||||
|
||||
mapping = {
|
||||
"person": "PERSON",
|
||||
"name": "PERSON",
|
||||
"patient": "PERSON",
|
||||
"doctor": "PERSON",
|
||||
"location": "LOCATION",
|
||||
"address": "LOCATION",
|
||||
"city": "LOCATION",
|
||||
}
|
||||
hits: List[Tuple[str, int, int]] = []
|
||||
for ent in getattr(doc, "ents", []):
|
||||
raw_label = str(getattr(ent, "label_", "")).lower()
|
||||
mapped = mapping.get(raw_label)
|
||||
if mapped is None:
|
||||
# On accepte aussi si le label EDS-NLP est déjà l'un de nos labels
|
||||
upper = raw_label.upper()
|
||||
if upper in PII_LABELS:
|
||||
mapped = upper
|
||||
if mapped:
|
||||
hits.append((mapped, ent.start_char, ent.end_char))
|
||||
return hits
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# OCR avec bounding boxes par mot (docTR)
|
||||
# =============================================================================
|
||||
|
||||
_ocr_predictor = None
|
||||
|
||||
|
||||
def _get_ocr_predictor():
|
||||
"""Charge un prédicteur docTR léger (mobilenet) pour l'OCR rapide."""
|
||||
global _ocr_predictor
|
||||
if _ocr_predictor is not None:
|
||||
return _ocr_predictor
|
||||
from doctr.models import ocr_predictor # type: ignore
|
||||
_ocr_predictor = ocr_predictor(
|
||||
det_arch="db_mobilenet_v3_large",
|
||||
reco_arch="crnn_mobilenet_v3_large",
|
||||
pretrained=True,
|
||||
)
|
||||
# GPU si disponible
|
||||
try:
|
||||
import torch # type: ignore
|
||||
if torch.cuda.is_available():
|
||||
_ocr_predictor = _ocr_predictor.cuda()
|
||||
logger.info("pii_blur : docTR chargé sur CUDA")
|
||||
else:
|
||||
logger.info("pii_blur : docTR chargé sur CPU")
|
||||
except Exception: # noqa: BLE001
|
||||
logger.info("pii_blur : docTR chargé (device indéterminé)")
|
||||
return _ocr_predictor
|
||||
|
||||
|
||||
def _doctr_ocr(image_path: Path) -> Tuple[List[dict], int, int]:
|
||||
"""Exécute docTR et retourne une liste de mots avec leurs bbox pixel.
|
||||
|
||||
Retour : (words, width, height) où words = [{text, x1, y1, x2, y2}, ...]
|
||||
"""
|
||||
from doctr.io import DocumentFile # type: ignore
|
||||
from PIL import Image
|
||||
|
||||
predictor = _get_ocr_predictor()
|
||||
doc = DocumentFile.from_images([str(image_path)])
|
||||
result = predictor(doc)
|
||||
|
||||
# Les coords sont normalisées (0..1). On les remappe vers la taille réelle.
|
||||
with Image.open(image_path) as img:
|
||||
W, H = img.size
|
||||
|
||||
words: List[dict] = []
|
||||
line_counter = 0
|
||||
for page in result.pages:
|
||||
for block in page.blocks:
|
||||
for line in block.lines:
|
||||
for word in line.words:
|
||||
text = word.value
|
||||
if not text or not text.strip():
|
||||
continue
|
||||
(nx1, ny1), (nx2, ny2) = word.geometry
|
||||
x1 = max(0, int(nx1 * W))
|
||||
y1 = max(0, int(ny1 * H))
|
||||
x2 = min(W, int(nx2 * W))
|
||||
y2 = min(H, int(ny2 * H))
|
||||
words.append({
|
||||
"text": text,
|
||||
"x1": x1, "y1": y1, "x2": x2, "y2": y2,
|
||||
"line": line_counter,
|
||||
})
|
||||
line_counter += 1
|
||||
return words, W, H
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Pipeline principal
|
||||
# =============================================================================
|
||||
|
||||
class PIIBlurrer:
|
||||
"""Pipeline réutilisable (garde les modèles en mémoire entre appels).
|
||||
|
||||
Exemple :
|
||||
blurrer = PIIBlurrer()
|
||||
res = blurrer.blur_image("shot_0001_full.png")
|
||||
print(res.count, res.elapsed_ms)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
blur_kernel: Tuple[int, int] = (31, 31),
|
||||
blur_sigma: float = 15.0,
|
||||
bbox_padding: int = 2,
|
||||
use_edsnlp: bool = True,
|
||||
) -> None:
|
||||
self._blur_kernel = blur_kernel
|
||||
self._blur_sigma = blur_sigma
|
||||
self._bbox_padding = bbox_padding
|
||||
self._use_edsnlp = use_edsnlp
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Point d'entrée publique
|
||||
# ------------------------------------------------------------------
|
||||
def blur_image(
|
||||
self,
|
||||
input_path: Union[str, Path],
|
||||
output_path: Optional[Union[str, Path]] = None,
|
||||
) -> PIIBlurResult:
|
||||
"""Floute les PII détectées et écrit la version floutée sur disque.
|
||||
|
||||
Args:
|
||||
input_path: Chemin vers le screenshot brut (PNG/JPG).
|
||||
output_path: Chemin de sortie. Défaut :
|
||||
`<stem>_blurred.png` à côté de l'input.
|
||||
|
||||
Returns:
|
||||
PIIBlurResult avec les timings et la liste des entités détectées.
|
||||
"""
|
||||
input_path = Path(input_path)
|
||||
if not input_path.is_file():
|
||||
raise FileNotFoundError(f"Screenshot introuvable : {input_path}")
|
||||
|
||||
if output_path is None:
|
||||
output_path = input_path.with_name(
|
||||
f"{input_path.stem}_blurred{input_path.suffix or '.png'}"
|
||||
)
|
||||
else:
|
||||
output_path = Path(output_path)
|
||||
|
||||
t_start = time.perf_counter()
|
||||
|
||||
# 1. OCR
|
||||
t_ocr = time.perf_counter()
|
||||
try:
|
||||
words, W, H = _doctr_ocr(input_path)
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.warning("pii_blur : OCR docTR échoué (%s) — pas de blur appliqué", e)
|
||||
# On copie simplement l'original vers la version "blurred"
|
||||
_copy_file(input_path, output_path)
|
||||
return PIIBlurResult(
|
||||
raw_path=input_path,
|
||||
blurred_path=output_path,
|
||||
entities=[],
|
||||
elapsed_ms=(time.perf_counter() - t_start) * 1000,
|
||||
)
|
||||
ocr_ms = (time.perf_counter() - t_ocr) * 1000
|
||||
|
||||
if not words:
|
||||
_copy_file(input_path, output_path)
|
||||
return PIIBlurResult(
|
||||
raw_path=input_path,
|
||||
blurred_path=output_path,
|
||||
entities=[],
|
||||
elapsed_ms=(time.perf_counter() - t_start) * 1000,
|
||||
ocr_ms=ocr_ms,
|
||||
)
|
||||
|
||||
# 2. Reconstituer le texte ligne par ligne en conservant la correspondance
|
||||
# (offset_char → mot) pour pouvoir repérer les bbox des entités.
|
||||
text, char_to_word = _build_text_with_map(words)
|
||||
|
||||
# 3. NER : EDS-NLP si dispo, sinon regex
|
||||
t_ner = time.perf_counter()
|
||||
ner_engine = "regex"
|
||||
entities_spans: List[Tuple[str, int, int]] = []
|
||||
if self._use_edsnlp:
|
||||
nlp = _get_edsnlp_pipeline()
|
||||
if nlp is not None:
|
||||
entities_spans = _edsnlp_find_pii(text, nlp)
|
||||
ner_engine = "edsnlp"
|
||||
# Toujours compléter avec le regex (EDS-NLP ne couvre pas tous les PII
|
||||
# fréquents : email, NIR, téléphone français).
|
||||
entities_spans.extend(_regex_find_pii(text))
|
||||
ner_ms = (time.perf_counter() - t_ner) * 1000
|
||||
|
||||
# Dédupliquer et normaliser
|
||||
entities_spans = _merge_spans(entities_spans)
|
||||
|
||||
# 4. Convertir (label, start, end) → PIIEntity(label, text, bbox pixel)
|
||||
pii_entities: List[PIIEntity] = []
|
||||
for label, start, end in entities_spans:
|
||||
if label not in PII_LABELS:
|
||||
continue
|
||||
bbox = _spans_to_bbox(start, end, char_to_word, words, self._bbox_padding, W, H)
|
||||
if bbox is None:
|
||||
continue
|
||||
pii_entities.append(PIIEntity(
|
||||
label=label,
|
||||
text=text[start:end],
|
||||
bbox=bbox,
|
||||
confidence=1.0,
|
||||
source=("ner" if ner_engine == "edsnlp" else "regex"),
|
||||
))
|
||||
|
||||
# 5. Appliquer le blur gaussien sur les bbox
|
||||
t_blur = time.perf_counter()
|
||||
_apply_blur(input_path, output_path, pii_entities,
|
||||
kernel=self._blur_kernel, sigma=self._blur_sigma)
|
||||
blur_ms = (time.perf_counter() - t_blur) * 1000
|
||||
|
||||
elapsed_ms = (time.perf_counter() - t_start) * 1000
|
||||
if pii_entities:
|
||||
logger.info(
|
||||
"pii_blur : %d PII floutés sur %s (%.0fms : ocr=%.0f ner=%.0f blur=%.0f, ner=%s)",
|
||||
len(pii_entities), input_path.name, elapsed_ms,
|
||||
ocr_ms, ner_ms, blur_ms, ner_engine,
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
"pii_blur : aucune PII détectée dans %s (%.0fms)",
|
||||
input_path.name, elapsed_ms,
|
||||
)
|
||||
|
||||
return PIIBlurResult(
|
||||
raw_path=input_path,
|
||||
blurred_path=output_path,
|
||||
entities=pii_entities,
|
||||
elapsed_ms=elapsed_ms,
|
||||
ocr_ms=ocr_ms,
|
||||
ner_ms=ner_ms,
|
||||
blur_ms=blur_ms,
|
||||
ner_engine=ner_engine,
|
||||
)
|
||||
|
||||
|
||||
# Instance singleton (lazy)
|
||||
_default_blurrer: Optional[PIIBlurrer] = None
|
||||
|
||||
|
||||
def blur_pii_on_image(
|
||||
input_path: Union[str, Path],
|
||||
output_path: Optional[Union[str, Path]] = None,
|
||||
) -> PIIBlurResult:
|
||||
"""Helper fonctionnel : instancie un PIIBlurrer singleton et l'applique."""
|
||||
global _default_blurrer
|
||||
if _default_blurrer is None:
|
||||
_default_blurrer = PIIBlurrer()
|
||||
return _default_blurrer.blur_image(input_path, output_path)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Helpers internes
|
||||
# =============================================================================
|
||||
|
||||
def _copy_file(src: Path, dst: Path) -> None:
|
||||
"""Copie bytewise (utilisé quand aucun PII n'est détecté / OCR KO)."""
|
||||
dst.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(src, "rb") as f_in, open(dst, "wb") as f_out:
|
||||
f_out.write(f_in.read())
|
||||
|
||||
|
||||
def _build_text_with_map(words: Sequence[dict]) -> Tuple[str, List[int]]:
|
||||
"""Concatène les mots en texte + mappe chaque caractère vers son index de mot.
|
||||
|
||||
Quand deux mots consécutifs appartiennent à des lignes différentes (champ
|
||||
`line` dans le dict), on insère un `\n` au lieu d'un espace. Cela empêche
|
||||
les regex gloutons (PERSON, LOCATION…) de matcher à travers des lignes
|
||||
logiques, qui sont typiquement des champs distincts dans une UI métier.
|
||||
|
||||
Returns:
|
||||
text : str concaténé (mots séparés par un espace ou un \n)
|
||||
char_to_word : list[int] len == len(text), char_to_word[i] = index du mot
|
||||
(ou -1 pour les séparateurs).
|
||||
"""
|
||||
parts: List[str] = []
|
||||
char_to_word: List[int] = []
|
||||
prev_line: Optional[int] = None
|
||||
for i, w in enumerate(words):
|
||||
cur_line = w.get("line")
|
||||
if i > 0:
|
||||
if prev_line is not None and cur_line is not None and cur_line != prev_line:
|
||||
sep = "\n"
|
||||
else:
|
||||
sep = " "
|
||||
parts.append(sep)
|
||||
char_to_word.append(-1)
|
||||
txt = w["text"]
|
||||
parts.append(txt)
|
||||
char_to_word.extend([i] * len(txt))
|
||||
prev_line = cur_line
|
||||
return "".join(parts), char_to_word
|
||||
|
||||
|
||||
def _spans_to_bbox(
|
||||
start: int,
|
||||
end: int,
|
||||
char_to_word: Sequence[int],
|
||||
words: Sequence[dict],
|
||||
padding: int,
|
||||
image_w: int,
|
||||
image_h: int,
|
||||
) -> Optional[Tuple[int, int, int, int]]:
|
||||
"""Convertit une plage [start, end[ dans le texte en bbox englobant les mots."""
|
||||
if end <= start or start >= len(char_to_word):
|
||||
return None
|
||||
word_ids = set()
|
||||
for i in range(start, min(end, len(char_to_word))):
|
||||
wid = char_to_word[i]
|
||||
if wid >= 0:
|
||||
word_ids.add(wid)
|
||||
if not word_ids:
|
||||
return None
|
||||
xs1, ys1, xs2, ys2 = [], [], [], []
|
||||
for wid in word_ids:
|
||||
w = words[wid]
|
||||
xs1.append(w["x1"]); ys1.append(w["y1"])
|
||||
xs2.append(w["x2"]); ys2.append(w["y2"])
|
||||
x1 = max(0, min(xs1) - padding)
|
||||
y1 = max(0, min(ys1) - padding)
|
||||
x2 = min(image_w, max(xs2) + padding)
|
||||
y2 = min(image_h, max(ys2) + padding)
|
||||
if x2 <= x1 or y2 <= y1:
|
||||
return None
|
||||
return (x1, y1, x2, y2)
|
||||
|
||||
|
||||
def _merge_spans(
|
||||
spans: Sequence[Tuple[str, int, int]],
|
||||
) -> List[Tuple[str, int, int]]:
|
||||
"""Déduplique et fusionne les plages qui se chevauchent sur un même label.
|
||||
|
||||
En cas de conflit inter-labels, on garde celui qui couvre le plus large.
|
||||
"""
|
||||
if not spans:
|
||||
return []
|
||||
# Trier par start puis par -width (le plus long d'abord pour les ties)
|
||||
sorted_spans = sorted(spans, key=lambda s: (s[1], -(s[2] - s[1])))
|
||||
merged: List[Tuple[str, int, int]] = []
|
||||
for label, s, e in sorted_spans:
|
||||
if not merged:
|
||||
merged.append((label, s, e))
|
||||
continue
|
||||
last_label, ls, le = merged[-1]
|
||||
if s < le: # chevauchement
|
||||
# On garde l'étendue fusionnée avec le label du plus large
|
||||
new_start = min(ls, s)
|
||||
new_end = max(le, e)
|
||||
new_label = last_label if (le - ls) >= (e - s) else label
|
||||
merged[-1] = (new_label, new_start, new_end)
|
||||
else:
|
||||
merged.append((label, s, e))
|
||||
return merged
|
||||
|
||||
|
||||
def _apply_blur(
|
||||
src: Path,
|
||||
dst: Path,
|
||||
entities: Sequence[PIIEntity],
|
||||
kernel: Tuple[int, int],
|
||||
sigma: float,
|
||||
) -> None:
|
||||
"""Applique un flou gaussien sur les bbox des entités et écrit l'image."""
|
||||
from PIL import Image
|
||||
|
||||
with Image.open(src) as img:
|
||||
if img.mode != "RGB":
|
||||
img = img.convert("RGB")
|
||||
|
||||
if not entities:
|
||||
dst.parent.mkdir(parents=True, exist_ok=True)
|
||||
img.save(dst, format="PNG", optimize=True)
|
||||
return
|
||||
|
||||
# On privilégie OpenCV s'il est disponible (plus rapide),
|
||||
# sinon on utilise PIL ImageFilter.GaussianBlur.
|
||||
try:
|
||||
import cv2 # type: ignore
|
||||
import numpy as np # type: ignore
|
||||
arr = np.array(img)
|
||||
bgr = cv2.cvtColor(arr, cv2.COLOR_RGB2BGR)
|
||||
for ent in entities:
|
||||
x1, y1, x2, y2 = ent.bbox
|
||||
if x2 <= x1 or y2 <= y1:
|
||||
continue
|
||||
roi = bgr[y1:y2, x1:x2]
|
||||
if roi.size == 0:
|
||||
continue
|
||||
k = (max(3, kernel[0] | 1), max(3, kernel[1] | 1)) # impair
|
||||
bgr[y1:y2, x1:x2] = cv2.GaussianBlur(roi, k, sigma)
|
||||
out = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
|
||||
img = Image.fromarray(out)
|
||||
except ImportError:
|
||||
from PIL import ImageFilter
|
||||
radius = max(sigma / 2, 4.0)
|
||||
for ent in entities:
|
||||
x1, y1, x2, y2 = ent.bbox
|
||||
region = img.crop((x1, y1, x2, y2))
|
||||
if region.size[0] == 0 or region.size[1] == 0:
|
||||
continue
|
||||
blurred = region.filter(ImageFilter.GaussianBlur(radius=radius))
|
||||
img.paste(blurred, (x1, y1))
|
||||
|
||||
dst.parent.mkdir(parents=True, exist_ok=True)
|
||||
img.save(dst, format="PNG", optimize=True)
|
||||
@@ -68,11 +68,11 @@ class SystemConfig:
|
||||
clip_model: str = "ViT-B-32"
|
||||
clip_pretrained: str = "openai"
|
||||
clip_device: str = "cpu"
|
||||
vlm_model: str = "qwen3-vl:8b"
|
||||
vlm_model: str = "gemma4:latest"
|
||||
vlm_endpoint: str = "http://localhost:11434"
|
||||
owl_model: str = "google/owlv2-base-patch16-ensemble"
|
||||
owl_confidence_threshold: float = 0.1
|
||||
|
||||
|
||||
# FAISS
|
||||
faiss_dimensions: int = 512
|
||||
faiss_index_type: str = "Flat"
|
||||
@@ -211,7 +211,7 @@ class ConfigurationManager:
|
||||
clip_model=os.getenv("CLIP_MODEL", "ViT-B-32"),
|
||||
clip_pretrained=os.getenv("CLIP_PRETRAINED", "openai"),
|
||||
clip_device=os.getenv("CLIP_DEVICE", "cpu"),
|
||||
vlm_model=os.getenv("VLM_MODEL", "qwen3-vl:8b"),
|
||||
vlm_model=os.getenv("RPA_VLM_MODEL", os.getenv("VLM_MODEL", "gemma4:latest")),
|
||||
vlm_endpoint=os.getenv("VLM_ENDPOINT", "http://localhost:11434"),
|
||||
owl_model=os.getenv("OWL_MODEL", "google/owlv2-base-patch16-ensemble"),
|
||||
owl_confidence_threshold=float(os.getenv("OWL_CONFIDENCE_THRESHOLD", "0.1")),
|
||||
@@ -435,7 +435,7 @@ class ModelConfig:
|
||||
clip_model: str = "ViT-B-32"
|
||||
clip_pretrained: str = "openai"
|
||||
clip_device: str = "cpu"
|
||||
vlm_model: str = "qwen3-vl:8b"
|
||||
vlm_model: str = "gemma4:latest"
|
||||
vlm_endpoint: str = "http://localhost:11434"
|
||||
owl_model: str = "google/owlv2-base-patch16-ensemble"
|
||||
owl_confidence_threshold: float = 0.1
|
||||
@@ -510,7 +510,7 @@ class FAISSConfig:
|
||||
class GPUResourceConfig:
|
||||
"""Configuration for GPU resource management - DEPRECATED: Use SystemConfig instead"""
|
||||
ollama_endpoint: str = "http://localhost:11434"
|
||||
vlm_model: str = "qwen3-vl:8b"
|
||||
vlm_model: str = "gemma4:latest"
|
||||
clip_model: str = "ViT-B-32"
|
||||
idle_timeout_seconds: int = 300
|
||||
vram_threshold_for_clip_gpu_mb: int = 1024
|
||||
@@ -599,7 +599,7 @@ UPLOADS_PATH=data/training/uploads
|
||||
CLIP_MODEL=ViT-B-32
|
||||
CLIP_PRETRAINED=openai
|
||||
CLIP_DEVICE=cpu
|
||||
VLM_MODEL=qwen3-vl:8b
|
||||
VLM_MODEL=gemma4:latest
|
||||
VLM_ENDPOINT=http://localhost:11434
|
||||
OWL_MODEL=google/owlv2-base-patch16-ensemble
|
||||
OWL_CONFIDENCE_THRESHOLD=0.1
|
||||
|
||||
@@ -23,9 +23,9 @@ class OllamaClient:
|
||||
Permet d'envoyer des images et prompts à un VLM via l'API Ollama.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
def __init__(self,
|
||||
endpoint: str = "http://localhost:11434",
|
||||
model: str = "qwen3-vl:8b",
|
||||
model: str = None,
|
||||
timeout: int = 180):
|
||||
"""
|
||||
Initialiser le client Ollama
|
||||
@@ -36,7 +36,12 @@ class OllamaClient:
|
||||
timeout: Timeout en secondes
|
||||
"""
|
||||
self.endpoint = endpoint.rstrip('/')
|
||||
self.model = model
|
||||
# Résolution du modèle : paramètre explicite > config centralisée
|
||||
if model is not None:
|
||||
self.model = model
|
||||
else:
|
||||
from core.detection.vlm_config import get_vlm_model
|
||||
self.model = get_vlm_model(endpoint=self.endpoint)
|
||||
self.timeout = timeout
|
||||
self._check_connection()
|
||||
|
||||
@@ -126,7 +131,12 @@ class OllamaClient:
|
||||
messages.append(user_message)
|
||||
|
||||
# Déterminer si le modèle est un modèle thinking (qwen3)
|
||||
is_thinking_model = "qwen3" in self.model.lower()
|
||||
# Les modèles non-thinking (gemma4, qwen2.5vl) n'ont pas besoin
|
||||
# du workaround prefill et supportent le rôle system natif.
|
||||
from core.detection.vlm_config import is_thinking_model as _is_thinking
|
||||
from core.detection.vlm_config import needs_think_false as _needs_think_false
|
||||
is_thinking_model = _is_thinking(self.model)
|
||||
requires_think_false = _needs_think_false(self.model)
|
||||
|
||||
# WORKAROUND Ollama 0.18.x : think=false est ignoré par le
|
||||
# renderer qwen3-vl-thinking. On utilise un assistant prefill
|
||||
@@ -168,9 +178,9 @@ class OllamaClient:
|
||||
}
|
||||
}
|
||||
|
||||
# Garder think=false au cas où une future version d'Ollama le
|
||||
# corrige — le prefill reste le mécanisme principal
|
||||
if is_thinking_model:
|
||||
# think=false : requis pour qwen3 (prefill reste le mécanisme
|
||||
# principal) ET pour gemma4 (sinon tokens vides sur Ollama >=0.20)
|
||||
if is_thinking_model or requires_think_false:
|
||||
payload["think"] = False
|
||||
|
||||
if force_json:
|
||||
@@ -575,7 +585,7 @@ Your answer:"""
|
||||
# Fonctions utilitaires
|
||||
# ============================================================================
|
||||
|
||||
def create_ollama_client(model: str = "qwen3-vl:8b",
|
||||
def create_ollama_client(model: str = None,
|
||||
endpoint: str = "http://localhost:11434") -> OllamaClient:
|
||||
"""
|
||||
Créer un client Ollama
|
||||
|
||||
@@ -72,9 +72,9 @@ class BoundingBox:
|
||||
class DetectionConfig:
|
||||
"""Configuration de la détection UI hybride"""
|
||||
# VLM — modèle configurable via variable d'environnement RPA_VLM_MODEL
|
||||
# Production (local) : "qwen3-vl:8b" — GPU local, pas de réseau
|
||||
# Tests (cloud) : "qwen3-vl:235b-cloud" — pas de GPU, plus lent mais libère la VRAM
|
||||
vlm_model: str = os.environ.get("RPA_VLM_MODEL", "qwen3-vl:8b")
|
||||
# Par défaut : gemma4:e4b (meilleur grounding + contextualisation)
|
||||
# Fallback : qwen3-vl:8b si gemma4 non disponible
|
||||
vlm_model: str = os.environ.get("RPA_VLM_MODEL", os.environ.get("VLM_MODEL", "gemma4:e4b"))
|
||||
vlm_endpoint: str = "http://localhost:11434"
|
||||
use_vlm_classification: bool = True # Utiliser VLM pour classifier
|
||||
|
||||
@@ -865,21 +865,24 @@ JSON array: [{{"id":0,"type":"...","role":"...","text":"..."}}]"""
|
||||
# ============================================================================
|
||||
|
||||
def create_detector(
|
||||
vlm_model: str = "qwen3-vl:8b",
|
||||
vlm_model: str = None,
|
||||
confidence_threshold: float = 0.7,
|
||||
use_vlm: bool = True
|
||||
) -> UIDetector:
|
||||
"""
|
||||
Créer un détecteur avec configuration personnalisée
|
||||
|
||||
|
||||
Args:
|
||||
vlm_model: Modèle VLM à utiliser
|
||||
vlm_model: Modèle VLM à utiliser (None = résolution automatique via vlm_config)
|
||||
confidence_threshold: Seuil de confiance
|
||||
use_vlm: Utiliser le VLM pour la classification
|
||||
|
||||
|
||||
Returns:
|
||||
UIDetector configuré
|
||||
"""
|
||||
if vlm_model is None:
|
||||
from core.detection.vlm_config import get_vlm_model
|
||||
vlm_model = get_vlm_model()
|
||||
config = DetectionConfig(
|
||||
vlm_model=vlm_model,
|
||||
confidence_threshold=confidence_threshold,
|
||||
|
||||
@@ -1,622 +0,0 @@
|
||||
"""
|
||||
UIDetector - Détection Sémantique d'Éléments UI avec VLM
|
||||
|
||||
Utilise un Vision-Language Model (VLM) pour détecter et classifier
|
||||
les éléments UI avec leurs types et rôles sémantiques.
|
||||
"""
|
||||
|
||||
from typing import List, Dict, Optional, Any, Tuple
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
import json
|
||||
import re
|
||||
|
||||
from ..models.ui_element import UIElement, UIElementEmbeddings, VisualFeatures
|
||||
from .ollama_client import OllamaClient, check_ollama_available
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectionConfig:
|
||||
"""Configuration de la détection UI"""
|
||||
vlm_model: str = "qwen3-vl:8b" # Modèle VLM à utiliser (qwen3-vl:8b recommandé)
|
||||
vlm_endpoint: str = "http://localhost:11434" # Endpoint Ollama
|
||||
confidence_threshold: float = 0.7 # Seuil de confiance minimum
|
||||
max_elements: int = 50 # Nombre max d'éléments à détecter
|
||||
detect_regions: bool = True # Détecter régions d'intérêt d'abord
|
||||
use_embeddings: bool = True # Générer embeddings duaux
|
||||
|
||||
|
||||
class UIDetector:
|
||||
"""
|
||||
Détecteur d'éléments UI sémantique
|
||||
|
||||
Utilise un VLM (Vision-Language Model) pour :
|
||||
1. Détecter les régions d'intérêt dans un screenshot
|
||||
2. Classifier le type de chaque élément UI
|
||||
3. Déterminer le rôle sémantique
|
||||
4. Extraire les features visuelles
|
||||
5. Générer des embeddings duaux (image + texte)
|
||||
"""
|
||||
|
||||
def __init__(self, config: Optional[DetectionConfig] = None):
|
||||
"""
|
||||
Initialiser le détecteur
|
||||
|
||||
Args:
|
||||
config: Configuration (utilise config par défaut si None)
|
||||
"""
|
||||
self.config = config or DetectionConfig()
|
||||
self.vlm_client = None
|
||||
self._initialize_vlm()
|
||||
|
||||
def _initialize_vlm(self) -> None:
|
||||
"""Initialiser le client VLM (Ollama)"""
|
||||
try:
|
||||
# Vérifier si Ollama est disponible
|
||||
if check_ollama_available(self.config.vlm_endpoint):
|
||||
self.vlm_client = OllamaClient(
|
||||
endpoint=self.config.vlm_endpoint,
|
||||
model=self.config.vlm_model
|
||||
)
|
||||
print(f"✓ VLM initialized: {self.config.vlm_model} at {self.config.vlm_endpoint}")
|
||||
else:
|
||||
print(f"⚠ Ollama not available at {self.config.vlm_endpoint}, using simulation mode")
|
||||
self.vlm_client = None
|
||||
except Exception as e:
|
||||
print(f"⚠ Failed to initialize VLM: {e}, using simulation mode")
|
||||
self.vlm_client = None
|
||||
|
||||
def detect(self,
|
||||
screenshot_path: str,
|
||||
window_context: Optional[Dict[str, Any]] = None) -> List[UIElement]:
|
||||
"""
|
||||
Détecter tous les éléments UI dans un screenshot
|
||||
|
||||
Args:
|
||||
screenshot_path: Chemin vers le screenshot
|
||||
window_context: Contexte de la fenêtre (titre, process, etc.)
|
||||
|
||||
Returns:
|
||||
Liste d'UIElements détectés
|
||||
"""
|
||||
# Charger image
|
||||
image = self._load_image(screenshot_path)
|
||||
if image is None:
|
||||
return []
|
||||
|
||||
# Détecter régions d'intérêt si activé
|
||||
if self.config.detect_regions:
|
||||
regions = self._detect_regions_of_interest(image, window_context)
|
||||
else:
|
||||
# Utiliser image complète
|
||||
regions = [{"bbox": (0, 0, image.width, image.height), "confidence": 1.0}]
|
||||
|
||||
# Détecter éléments UI dans chaque région
|
||||
ui_elements = []
|
||||
for region in regions:
|
||||
elements = self._detect_elements_in_region(
|
||||
image,
|
||||
region,
|
||||
screenshot_path,
|
||||
window_context
|
||||
)
|
||||
ui_elements.extend(elements)
|
||||
|
||||
# Filtrer par confiance
|
||||
ui_elements = [
|
||||
el for el in ui_elements
|
||||
if el.confidence >= self.config.confidence_threshold
|
||||
]
|
||||
|
||||
# Limiter nombre d'éléments
|
||||
if len(ui_elements) > self.config.max_elements:
|
||||
# Trier par confiance et garder les meilleurs
|
||||
ui_elements.sort(key=lambda x: x.confidence, reverse=True)
|
||||
ui_elements = ui_elements[:self.config.max_elements]
|
||||
|
||||
return ui_elements
|
||||
|
||||
def _load_image(self, screenshot_path: str) -> Optional[Image.Image]:
|
||||
"""Charger une image depuis un fichier"""
|
||||
try:
|
||||
return Image.open(screenshot_path)
|
||||
except Exception as e:
|
||||
print(f"Error loading image {screenshot_path}: {e}")
|
||||
return None
|
||||
|
||||
def _detect_regions_of_interest(self,
|
||||
image: Image.Image,
|
||||
window_context: Optional[Dict] = None) -> List[Dict]:
|
||||
"""
|
||||
Détecter les régions d'intérêt dans l'image
|
||||
|
||||
Utilise le VLM pour identifier les zones contenant des éléments UI.
|
||||
|
||||
Args:
|
||||
image: Image PIL
|
||||
window_context: Contexte de la fenêtre
|
||||
|
||||
Returns:
|
||||
Liste de régions {bbox: (x, y, w, h), confidence: float}
|
||||
"""
|
||||
if self.vlm_client is None:
|
||||
# Mode simulation : diviser l'image en grille
|
||||
return self._simulate_region_detection(image)
|
||||
|
||||
# Utiliser VLM pour détecter régions
|
||||
# Pour l'instant, on utilise l'image complète (plus simple et efficace)
|
||||
width, height = image.size
|
||||
return [{
|
||||
"bbox": (0, 0, width, height),
|
||||
"confidence": 1.0
|
||||
}]
|
||||
|
||||
def _simulate_region_detection(self, image: Image.Image) -> List[Dict]:
|
||||
"""Simulation de détection de régions (pour développement)"""
|
||||
width, height = image.size
|
||||
|
||||
# Diviser en grille 3x3 pour simulation
|
||||
regions = []
|
||||
grid_size = 3
|
||||
cell_w = width // grid_size
|
||||
cell_h = height // grid_size
|
||||
|
||||
for i in range(grid_size):
|
||||
for j in range(grid_size):
|
||||
regions.append({
|
||||
"bbox": (j * cell_w, i * cell_h, cell_w, cell_h),
|
||||
"confidence": 0.8
|
||||
})
|
||||
|
||||
return regions
|
||||
|
||||
def _detect_elements_in_region(self,
|
||||
image: Image.Image,
|
||||
region: Dict,
|
||||
screenshot_path: str,
|
||||
window_context: Optional[Dict] = None) -> List[UIElement]:
|
||||
"""
|
||||
Détecter éléments UI dans une région spécifique
|
||||
|
||||
Args:
|
||||
image: Image complète
|
||||
region: Région à analyser
|
||||
screenshot_path: Chemin du screenshot
|
||||
window_context: Contexte de la fenêtre
|
||||
|
||||
Returns:
|
||||
Liste d'UIElements dans cette région
|
||||
"""
|
||||
bbox = region["bbox"]
|
||||
x, y, w, h = bbox
|
||||
|
||||
# Extraire crop de la région
|
||||
region_image = image.crop((x, y, x + w, y + h))
|
||||
|
||||
# Détecter éléments avec VLM
|
||||
if self.vlm_client is None:
|
||||
# Mode simulation
|
||||
return self._simulate_element_detection(
|
||||
region_image, bbox, screenshot_path, window_context
|
||||
)
|
||||
|
||||
# Vraie détection avec VLM !
|
||||
return self._detect_with_vlm(
|
||||
region_image, bbox, screenshot_path, window_context
|
||||
)
|
||||
|
||||
def _detect_with_vlm(self,
|
||||
region_image: Image.Image,
|
||||
region_bbox: Tuple[int, int, int, int],
|
||||
screenshot_path: str,
|
||||
window_context: Optional[Dict] = None) -> List[UIElement]:
|
||||
"""
|
||||
Détecter éléments UI avec le VLM (vraie détection)
|
||||
|
||||
Args:
|
||||
region_image: Image de la région
|
||||
region_bbox: Bbox de la région (x, y, w, h)
|
||||
screenshot_path: Chemin du screenshot
|
||||
window_context: Contexte de la fenêtre
|
||||
|
||||
Returns:
|
||||
Liste d'UIElements détectés
|
||||
"""
|
||||
x_offset, y_offset, w, h = region_bbox
|
||||
|
||||
# Construire le prompt pour le VLM
|
||||
context_str = ""
|
||||
if window_context:
|
||||
context_str = f"\nWindow context: {window_context.get('title', 'Unknown')}"
|
||||
|
||||
# Approche simplifiée : demander une description structurée
|
||||
prompt = f"""List all interactive UI elements in this screenshot.{context_str}
|
||||
|
||||
For each element, provide:
|
||||
- type (button, text_input, checkbox, link, etc.)
|
||||
- label (visible text)
|
||||
- approximate position (top/middle/bottom, left/center/right)
|
||||
|
||||
Format as JSON array:
|
||||
[{{"type": "button", "label": "Submit", "position": "middle-center"}}]
|
||||
|
||||
Return ONLY the JSON array, no other text."""
|
||||
|
||||
# Appeler le VLM
|
||||
# Note: Utiliser le chemin du screenshot complet plutôt que le crop
|
||||
# car certains VLM gèrent mieux les fichiers que les images PIL
|
||||
result = self.vlm_client.generate(
|
||||
prompt=prompt,
|
||||
image_path=screenshot_path, # Utiliser le chemin au lieu de l'image PIL
|
||||
temperature=0.1,
|
||||
max_tokens=1000
|
||||
)
|
||||
|
||||
if not result["success"]:
|
||||
print(f"❌ VLM detection failed: {result.get('error', 'Unknown error')}")
|
||||
return []
|
||||
|
||||
if not result["response"] or len(result["response"].strip()) == 0:
|
||||
print(f"⚠ VLM returned empty response")
|
||||
return []
|
||||
|
||||
# Parser la réponse JSON
|
||||
elements = self._parse_vlm_response(
|
||||
result["response"],
|
||||
region_bbox,
|
||||
screenshot_path,
|
||||
window_context
|
||||
)
|
||||
|
||||
return elements
|
||||
|
||||
def _parse_vlm_response(self,
|
||||
response: str,
|
||||
region_bbox: Tuple[int, int, int, int],
|
||||
screenshot_path: str,
|
||||
window_context: Optional[Dict] = None) -> List[UIElement]:
|
||||
"""
|
||||
Parser la réponse JSON du VLM
|
||||
|
||||
Args:
|
||||
response: Réponse texte du VLM
|
||||
region_bbox: Bbox de la région
|
||||
screenshot_path: Chemin du screenshot
|
||||
window_context: Contexte de la fenêtre
|
||||
|
||||
Returns:
|
||||
Liste d'UIElements
|
||||
"""
|
||||
x_offset, y_offset, region_w, region_h = region_bbox
|
||||
|
||||
try:
|
||||
# Extraire le JSON de la réponse (peut contenir du texte avant/après)
|
||||
json_match = re.search(r'\[.*\]', response, re.DOTALL)
|
||||
if not json_match:
|
||||
print(f"No JSON array found in VLM response")
|
||||
print(f"VLM response was: {response[:500]}...")
|
||||
return []
|
||||
|
||||
elements_data = json.loads(json_match.group(0))
|
||||
|
||||
if not isinstance(elements_data, list):
|
||||
print(f"VLM response is not a JSON array")
|
||||
return []
|
||||
|
||||
elements = []
|
||||
for i, elem_data in enumerate(elements_data):
|
||||
try:
|
||||
# Gérer les positions (pourcentages ou textuelles)
|
||||
if 'x' in elem_data and 'y' in elem_data:
|
||||
# Format avec pourcentages
|
||||
x_pct = float(elem_data.get('x', 0))
|
||||
y_pct = float(elem_data.get('y', 0))
|
||||
w_pct = float(elem_data.get('width', 10))
|
||||
h_pct = float(elem_data.get('height', 5))
|
||||
|
||||
elem_x = x_offset + int(region_w * x_pct / 100)
|
||||
elem_y = y_offset + int(region_h * y_pct / 100)
|
||||
elem_w = int(region_w * w_pct / 100)
|
||||
elem_h = int(region_h * h_pct / 100)
|
||||
else:
|
||||
# Format avec position textuelle (top/middle/bottom, left/center/right)
|
||||
position = elem_data.get('position', 'middle-center').lower()
|
||||
|
||||
# Parser la position
|
||||
if 'top' in position:
|
||||
elem_y = y_offset + region_h // 4
|
||||
elif 'bottom' in position:
|
||||
elem_y = y_offset + 3 * region_h // 4
|
||||
else: # middle
|
||||
elem_y = y_offset + region_h // 2
|
||||
|
||||
if 'left' in position:
|
||||
elem_x = x_offset + region_w // 4
|
||||
elif 'right' in position:
|
||||
elem_x = x_offset + 3 * region_w // 4
|
||||
else: # center
|
||||
elem_x = x_offset + region_w // 2
|
||||
|
||||
# Taille par défaut basée sur le type
|
||||
elem_type = elem_data.get('type', 'button')
|
||||
if elem_type == 'button':
|
||||
elem_w, elem_h = 100, 40
|
||||
elif elem_type == 'text_input':
|
||||
elem_w, elem_h = 200, 35
|
||||
elif elem_type == 'checkbox':
|
||||
elem_w, elem_h = 25, 25
|
||||
else:
|
||||
elem_w, elem_h = 80, 30
|
||||
|
||||
# Créer l'UIElement
|
||||
element = UIElement(
|
||||
element_id=f"vlm_{elem_x}_{elem_y}",
|
||||
type=elem_data.get('type', 'unknown'),
|
||||
role=elem_data.get('role', 'unknown'),
|
||||
bbox=(elem_x, elem_y, elem_w, elem_h),
|
||||
center=(elem_x + elem_w // 2, elem_y + elem_h // 2),
|
||||
label=elem_data.get('label', ''),
|
||||
label_confidence=0.85, # Confiance par défaut pour VLM
|
||||
embeddings=UIElementEmbeddings(),
|
||||
visual_features=VisualFeatures(
|
||||
dominant_color="rgb(128, 128, 128)",
|
||||
has_icon=elem_data.get('type') == 'icon',
|
||||
shape="rectangle",
|
||||
size_category="medium"
|
||||
),
|
||||
confidence=0.85, # Confiance par défaut pour VLM
|
||||
metadata={
|
||||
"detected_by": "vlm",
|
||||
"model": self.config.vlm_model,
|
||||
"screenshot_path": screenshot_path
|
||||
}
|
||||
)
|
||||
|
||||
elements.append(element)
|
||||
|
||||
except (KeyError, ValueError, TypeError) as e:
|
||||
print(f"Error parsing element {i}: {e}")
|
||||
continue
|
||||
|
||||
return elements
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Failed to parse VLM JSON response: {e}")
|
||||
print(f"Response was: {response[:200]}...")
|
||||
return []
|
||||
|
||||
def _simulate_element_detection(self,
|
||||
region_image: Image.Image,
|
||||
region_bbox: Tuple[int, int, int, int],
|
||||
screenshot_path: str,
|
||||
window_context: Optional[Dict] = None) -> List[UIElement]:
|
||||
"""Simulation de détection d'éléments (pour développement)"""
|
||||
# Pour simulation, créer quelques éléments fictifs
|
||||
elements = []
|
||||
|
||||
x_offset, y_offset, w, h = region_bbox
|
||||
|
||||
# Simuler 2-3 éléments par région
|
||||
num_elements = np.random.randint(2, 4)
|
||||
|
||||
for i in range(num_elements):
|
||||
# Position aléatoire dans la région
|
||||
elem_w = np.random.randint(50, 150)
|
||||
elem_h = np.random.randint(20, 60)
|
||||
elem_x = x_offset + np.random.randint(0, max(1, w - elem_w))
|
||||
elem_y = y_offset + np.random.randint(0, max(1, h - elem_h))
|
||||
|
||||
# Type et rôle aléatoires
|
||||
types = ["button", "text_input", "checkbox", "link", "icon"]
|
||||
roles = ["primary_action", "cancel", "submit", "form_input", "navigation"]
|
||||
|
||||
element = UIElement(
|
||||
element_id=f"elem_{elem_x}_{elem_y}",
|
||||
type=np.random.choice(types),
|
||||
role=np.random.choice(roles),
|
||||
bbox=(elem_x, elem_y, elem_w, elem_h),
|
||||
center=(elem_x + elem_w // 2, elem_y + elem_h // 2),
|
||||
label=f"Element {i}",
|
||||
label_confidence=np.random.uniform(0.7, 0.95),
|
||||
embeddings=UIElementEmbeddings(), # Embeddings vides
|
||||
visual_features=VisualFeatures(
|
||||
dominant_color="rgb(128, 128, 128)",
|
||||
has_icon=np.random.choice([True, False]),
|
||||
shape="rectangle",
|
||||
size_category="medium"
|
||||
),
|
||||
confidence=np.random.uniform(0.7, 0.95),
|
||||
metadata={"simulated": True, "screenshot_path": screenshot_path}
|
||||
)
|
||||
|
||||
elements.append(element)
|
||||
|
||||
return elements
|
||||
|
||||
def classify_type(self,
|
||||
element_image: Image.Image,
|
||||
context: Optional[Dict] = None) -> Tuple[str, float]:
|
||||
"""
|
||||
Classifier le type d'un élément UI
|
||||
|
||||
Args:
|
||||
element_image: Image de l'élément
|
||||
context: Contexte additionnel
|
||||
|
||||
Returns:
|
||||
(type, confidence)
|
||||
"""
|
||||
if self.vlm_client is None:
|
||||
# Simulation
|
||||
types = ["button", "text_input", "checkbox", "radio", "dropdown",
|
||||
"tab", "link", "icon", "table_row", "menu_item"]
|
||||
return np.random.choice(types), np.random.uniform(0.7, 0.95)
|
||||
|
||||
# Vraie classification avec VLM
|
||||
result = self.vlm_client.classify_element_type(element_image, context)
|
||||
|
||||
if result["success"]:
|
||||
return result["type"], result["confidence"]
|
||||
|
||||
return "unknown", 0.0
|
||||
|
||||
def classify_role(self,
|
||||
element_image: Image.Image,
|
||||
element_type: str,
|
||||
context: Optional[Dict] = None) -> Tuple[str, float]:
|
||||
"""
|
||||
Classifier le rôle sémantique d'un élément
|
||||
|
||||
Args:
|
||||
element_image: Image de l'élément
|
||||
element_type: Type de l'élément
|
||||
context: Contexte additionnel
|
||||
|
||||
Returns:
|
||||
(role, confidence)
|
||||
"""
|
||||
if self.vlm_client is None:
|
||||
# Simulation
|
||||
roles = ["primary_action", "cancel", "submit", "form_input",
|
||||
"search_field", "navigation", "settings", "close"]
|
||||
return np.random.choice(roles), np.random.uniform(0.7, 0.95)
|
||||
|
||||
# Vraie classification avec VLM
|
||||
result = self.vlm_client.classify_element_role(
|
||||
element_image,
|
||||
element_type,
|
||||
context
|
||||
)
|
||||
|
||||
if result["success"]:
|
||||
return result["role"], result["confidence"]
|
||||
|
||||
return "unknown", 0.0
|
||||
|
||||
def extract_visual_features(self,
|
||||
element_image: Image.Image) -> VisualFeatures:
|
||||
"""
|
||||
Extraire les features visuelles d'un élément
|
||||
|
||||
Args:
|
||||
element_image: Image de l'élément
|
||||
|
||||
Returns:
|
||||
VisualFeatures
|
||||
"""
|
||||
# Calculer couleur dominante
|
||||
img_array = np.array(element_image)
|
||||
if len(img_array.shape) == 3:
|
||||
# Moyenne des couleurs
|
||||
dominant_color = tuple(img_array.mean(axis=(0, 1)).astype(int).tolist())
|
||||
else:
|
||||
dominant_color = (128, 128, 128)
|
||||
|
||||
# Déterminer forme (simplifié)
|
||||
width, height = element_image.size
|
||||
aspect_ratio = width / height if height > 0 else 1.0
|
||||
|
||||
if aspect_ratio > 3:
|
||||
shape = "horizontal_bar"
|
||||
elif aspect_ratio < 0.33:
|
||||
shape = "vertical_bar"
|
||||
elif 0.8 <= aspect_ratio <= 1.2:
|
||||
shape = "square"
|
||||
else:
|
||||
shape = "rectangle"
|
||||
|
||||
# Catégorie de taille
|
||||
area = width * height
|
||||
if area < 1000:
|
||||
size_category = "small"
|
||||
elif area < 10000:
|
||||
size_category = "medium"
|
||||
else:
|
||||
size_category = "large"
|
||||
|
||||
# Détection d'icône (simplifié)
|
||||
has_icon = width < 100 and height < 100 and 0.8 <= aspect_ratio <= 1.2
|
||||
|
||||
return VisualFeatures(
|
||||
dominant_color=dominant_color,
|
||||
has_icon=has_icon,
|
||||
shape=shape,
|
||||
size_category=size_category
|
||||
)
|
||||
|
||||
def generate_embeddings(self,
|
||||
element_image: Image.Image,
|
||||
element_label: str,
|
||||
embedder: Optional[Any] = None) -> Optional[UIElementEmbeddings]:
|
||||
"""
|
||||
Générer embeddings duaux (image + texte) pour un élément
|
||||
|
||||
Args:
|
||||
element_image: Image de l'élément
|
||||
element_label: Label textuel de l'élément
|
||||
embedder: Embedder à utiliser (optionnel)
|
||||
|
||||
Returns:
|
||||
UIElementEmbeddings ou None
|
||||
"""
|
||||
if not self.config.use_embeddings or embedder is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Générer embedding image
|
||||
image_embedding_id = None
|
||||
if hasattr(embedder, 'embed_image'):
|
||||
# Sauvegarder temporairement l'image
|
||||
# TODO: Implémenter sauvegarde et embedding
|
||||
pass
|
||||
|
||||
# Générer embedding texte
|
||||
text_embedding_id = None
|
||||
if element_label and hasattr(embedder, 'embed_text'):
|
||||
# TODO: Implémenter embedding texte
|
||||
pass
|
||||
|
||||
if image_embedding_id or text_embedding_id:
|
||||
return UIElementEmbeddings(
|
||||
image_embedding_id=image_embedding_id,
|
||||
text_embedding_id=text_embedding_id,
|
||||
provider="openclip_ViT-B-32",
|
||||
dimensions=512
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Warning: Failed to generate embeddings: {e}")
|
||||
|
||||
return None
|
||||
|
||||
def set_vlm_client(self, client: Any) -> None:
|
||||
"""Définir le client VLM"""
|
||||
self.vlm_client = client
|
||||
|
||||
def get_config(self) -> DetectionConfig:
|
||||
"""Récupérer la configuration"""
|
||||
return self.config
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Fonctions utilitaires
|
||||
# ============================================================================
|
||||
|
||||
def create_detector(vlm_model: str = "qwen3-vl:8b",
|
||||
confidence_threshold: float = 0.7) -> UIDetector:
|
||||
"""
|
||||
Créer un UIDetector avec configuration personnalisée
|
||||
|
||||
Args:
|
||||
vlm_model: Modèle VLM à utiliser
|
||||
confidence_threshold: Seuil de confiance
|
||||
|
||||
Returns:
|
||||
UIDetector configuré
|
||||
"""
|
||||
config = DetectionConfig(
|
||||
vlm_model=vlm_model,
|
||||
confidence_threshold=confidence_threshold
|
||||
)
|
||||
return UIDetector(config)
|
||||
194
core/detection/vlm_config.py
Normal file
194
core/detection/vlm_config.py
Normal file
@@ -0,0 +1,194 @@
|
||||
"""
|
||||
Configuration centralisée du modèle VLM (Vision-Language Model).
|
||||
|
||||
Point unique de configuration pour le modèle VLM utilisé dans tout le pipeline.
|
||||
Gère la variable d'environnement RPA_VLM_MODEL avec fallback automatique
|
||||
si le modèle configuré n'est pas disponible dans Ollama.
|
||||
|
||||
Ordre de résolution du modèle :
|
||||
1. Variable d'env RPA_VLM_MODEL (prioritaire)
|
||||
2. Variable d'env VLM_MODEL (compatibilité)
|
||||
3. Modèle par défaut : gemma4:latest
|
||||
|
||||
Fallback automatique :
|
||||
Si le modèle choisi n'est pas trouvé dans Ollama, on essaie les
|
||||
modèles de fallback dans l'ordre (FALLBACK_VLM_MODELS).
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import List, Optional
|
||||
|
||||
import requests
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Modèle VLM par défaut — Gemma 4 latest (8B dense, Q4_K_M)
|
||||
# Nécessite think=false dans le payload (sinon tokens vides sur Ollama >=0.20)
|
||||
DEFAULT_VLM_MODEL = "gemma4:latest"
|
||||
|
||||
# Modèles de fallback, testés dans l'ordre si le modèle principal n'est pas dispo
|
||||
FALLBACK_VLM_MODELS = ["qwen3-vl:8b", "0000/ui-tars-1.5-7b-q8_0:7b"]
|
||||
|
||||
# Endpoint Ollama par défaut
|
||||
DEFAULT_OLLAMA_ENDPOINT = "http://localhost:11434"
|
||||
|
||||
# Cache du modèle résolu (évite de requêter Ollama à chaque appel)
|
||||
_resolved_model: Optional[str] = None
|
||||
_resolved_model_checked = False
|
||||
|
||||
|
||||
def get_vlm_model(
|
||||
endpoint: str = DEFAULT_OLLAMA_ENDPOINT,
|
||||
force_check: bool = False,
|
||||
) -> str:
|
||||
"""Retourne le nom du modèle VLM à utiliser, avec fallback automatique.
|
||||
|
||||
Vérifie la disponibilité du modèle dans Ollama au premier appel,
|
||||
puis cache le résultat pour les appels suivants.
|
||||
|
||||
Args:
|
||||
endpoint: URL de l'API Ollama
|
||||
force_check: Forcer une nouvelle vérification (ignorer le cache)
|
||||
|
||||
Returns:
|
||||
Nom du modèle VLM disponible (ex: "gemma4:latest")
|
||||
"""
|
||||
global _resolved_model, _resolved_model_checked
|
||||
|
||||
if _resolved_model_checked and not force_check:
|
||||
return _resolved_model
|
||||
|
||||
# Lire le modèle configuré depuis l'environnement
|
||||
configured = (
|
||||
os.environ.get("RPA_VLM_MODEL")
|
||||
or os.environ.get("VLM_MODEL")
|
||||
or DEFAULT_VLM_MODEL
|
||||
)
|
||||
|
||||
# Vérifier la disponibilité dans Ollama
|
||||
available = _list_ollama_models(endpoint)
|
||||
|
||||
if available is None:
|
||||
# Ollama non joignable — utiliser le modèle configuré sans vérification
|
||||
logger.warning(
|
||||
"Ollama non joignable (%s) — utilisation de '%s' sans vérification",
|
||||
endpoint, configured,
|
||||
)
|
||||
_resolved_model = configured
|
||||
_resolved_model_checked = True
|
||||
return _resolved_model
|
||||
|
||||
# Vérifier si le modèle configuré est disponible
|
||||
if _model_available(configured, available):
|
||||
logger.info("VLM model: %s (configuré, disponible)", configured)
|
||||
_resolved_model = configured
|
||||
_resolved_model_checked = True
|
||||
return _resolved_model
|
||||
|
||||
# Fallback : essayer les modèles alternatifs
|
||||
logger.warning(
|
||||
"Modèle VLM '%s' non trouvé dans Ollama. Recherche d'un fallback...",
|
||||
configured,
|
||||
)
|
||||
|
||||
# Construire la liste de fallback complète
|
||||
fallback_candidates = [DEFAULT_VLM_MODEL] + FALLBACK_VLM_MODELS
|
||||
for candidate in fallback_candidates:
|
||||
if candidate == configured:
|
||||
continue # Déjà testé
|
||||
if _model_available(candidate, available):
|
||||
logger.info(
|
||||
"VLM model: %s (fallback, '%s' non disponible)",
|
||||
candidate, configured,
|
||||
)
|
||||
_resolved_model = candidate
|
||||
_resolved_model_checked = True
|
||||
return _resolved_model
|
||||
|
||||
# Aucun fallback trouvé — utiliser le modèle configuré quand même
|
||||
# (Ollama le téléchargera peut-être au premier appel)
|
||||
logger.warning(
|
||||
"Aucun modèle VLM trouvé dans Ollama. "
|
||||
"Modèles disponibles : %s. Utilisation de '%s' par défaut.",
|
||||
[m for m in available if "vl" in m.lower() or "gemma" in m.lower()],
|
||||
configured,
|
||||
)
|
||||
_resolved_model = configured
|
||||
_resolved_model_checked = True
|
||||
return _resolved_model
|
||||
|
||||
|
||||
def reset_vlm_model_cache():
|
||||
"""Réinitialiser le cache du modèle résolu.
|
||||
|
||||
Utile après un changement de configuration ou un pull de modèle.
|
||||
"""
|
||||
global _resolved_model, _resolved_model_checked
|
||||
_resolved_model = None
|
||||
_resolved_model_checked = False
|
||||
|
||||
|
||||
def is_thinking_model(model_name: str) -> bool:
|
||||
"""Détermine si un modèle est un modèle 'thinking' (qwen3).
|
||||
|
||||
Les modèles thinking nécessitent un assistant prefill pour éviter
|
||||
le mode réflexion interne qui peut durer >180s avec des images.
|
||||
|
||||
Args:
|
||||
model_name: Nom du modèle (ex: "qwen3-vl:8b", "gemma4:e4b")
|
||||
|
||||
Returns:
|
||||
True si le modèle est de type thinking (nécessite prefill workaround)
|
||||
"""
|
||||
return "qwen3" in model_name.lower()
|
||||
|
||||
|
||||
def needs_think_false(model_name: str) -> bool:
|
||||
"""Détermine si un modèle nécessite think=false dans le payload.
|
||||
|
||||
Sur Ollama >=0.20, gemma4 produit des tokens vides si think n'est pas
|
||||
explicitement désactivé. Ce flag doit être envoyé dans le payload chat.
|
||||
|
||||
Args:
|
||||
model_name: Nom du modèle (ex: "gemma4:latest", "gemma4:e4b")
|
||||
|
||||
Returns:
|
||||
True si le modèle nécessite think=false
|
||||
"""
|
||||
return "gemma4" in model_name.lower()
|
||||
|
||||
|
||||
def _list_ollama_models(endpoint: str) -> Optional[List[str]]:
|
||||
"""Lister les modèles disponibles dans Ollama.
|
||||
|
||||
Returns:
|
||||
Liste des noms de modèles, ou None si Ollama n'est pas joignable.
|
||||
"""
|
||||
try:
|
||||
resp = requests.get(f"{endpoint}/api/tags", timeout=5)
|
||||
if resp.status_code == 200:
|
||||
models = resp.json().get("models", [])
|
||||
return [m["name"] for m in models]
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def _model_available(model_name: str, available_models: List[str]) -> bool:
|
||||
"""Vérifie si un modèle est disponible dans la liste Ollama.
|
||||
|
||||
Supporte la correspondance exacte et le match sans tag de version
|
||||
(ex: "gemma4:e4b" match "gemma4:e4b" ou "gemma4:e4b-q4_0").
|
||||
"""
|
||||
# Match exact
|
||||
if model_name in available_models:
|
||||
return True
|
||||
|
||||
# Match par préfixe (sans tag) — "gemma4:e4b" match "gemma4:e4b"
|
||||
base_name = model_name.split(":")[0] if ":" in model_name else model_name
|
||||
for m in available_models:
|
||||
if m.startswith(base_name + ":"):
|
||||
return True
|
||||
|
||||
return False
|
||||
@@ -11,7 +11,12 @@ from pathlib import Path
|
||||
from dataclasses import dataclass
|
||||
import numpy as np
|
||||
import json
|
||||
import pickle
|
||||
|
||||
from core.security.signed_serializer import (
|
||||
SignatureVerificationError,
|
||||
load_signed,
|
||||
save_signed,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -500,21 +505,23 @@ class FAISSManager:
|
||||
# Sauvegarder index FAISS
|
||||
faiss.write_index(index_to_save, str(index_path))
|
||||
|
||||
# Sauvegarder métadonnées
|
||||
# Sauvegarder métadonnées (JSON signé HMAC — cf. core.security.signed_serializer)
|
||||
metadata = {
|
||||
"dimensions": self.dimensions,
|
||||
"index_type": self.index_type,
|
||||
"metric": self.metric,
|
||||
"next_id": self.next_id,
|
||||
"metadata_store": self.metadata_store,
|
||||
# Les clés dict sont des int côté Python ; on les sérialise en str
|
||||
# puis on les reconvertit au chargement. JSON n'autorise pas de
|
||||
# clés non-string.
|
||||
"metadata_store": {str(k): v for k, v in self.metadata_store.items()},
|
||||
"nlist": self.nlist,
|
||||
"nprobe": self.nprobe,
|
||||
"is_trained": self.is_trained,
|
||||
"auto_optimize": self.auto_optimize
|
||||
"auto_optimize": self.auto_optimize,
|
||||
}
|
||||
|
||||
with open(metadata_path, 'wb') as f:
|
||||
pickle.dump(metadata, f)
|
||||
|
||||
save_signed(metadata_path, metadata)
|
||||
|
||||
@classmethod
|
||||
def load(cls, index_path: Path, metadata_path: Path, use_gpu: bool = False) -> 'FAISSManager':
|
||||
@@ -529,11 +536,22 @@ class FAISSManager:
|
||||
Returns:
|
||||
FAISSManager chargé
|
||||
"""
|
||||
# Charger métadonnées
|
||||
with open(metadata_path, 'rb') as f:
|
||||
metadata = pickle.load(f)
|
||||
|
||||
# Créer instance
|
||||
# Charger métadonnées (JSON signé ; fallback legacy pickle avec migration).
|
||||
try:
|
||||
metadata = load_signed(metadata_path)
|
||||
except SignatureVerificationError:
|
||||
logger.error(
|
||||
"Signature HMAC invalide pour %s — refus de chargement.",
|
||||
metadata_path,
|
||||
)
|
||||
raise
|
||||
|
||||
# Reconvertir les clés int du metadata_store (JSON force des clés str).
|
||||
if isinstance(metadata.get("metadata_store"), dict):
|
||||
metadata["metadata_store"] = {
|
||||
int(k) if isinstance(k, str) and k.lstrip("-").isdigit() else k: v
|
||||
for k, v in metadata["metadata_store"].items()
|
||||
}
|
||||
manager = cls(
|
||||
dimensions=metadata["dimensions"],
|
||||
index_type=metadata["index_type"],
|
||||
|
||||
@@ -525,11 +525,25 @@ class DAGExecutor:
|
||||
True/False selon le résultat de la condition
|
||||
"""
|
||||
condition = action.get("condition", "True")
|
||||
# Contexte d'évaluation sécurisé : uniquement les résultats
|
||||
# Contexte d'évaluation sécurisé : uniquement les résultats.
|
||||
# NB : on utilise un évaluateur AST restreint (pas d'eval/exec),
|
||||
# seuls literals, comparaisons, booléens et indexations sont permis.
|
||||
eval_context = {"results": dict(self._results)}
|
||||
|
||||
# Import local pour éviter une dépendance circulaire au chargement.
|
||||
from core.execution.safe_condition_evaluator import (
|
||||
UnsafeExpressionError,
|
||||
safe_eval_condition,
|
||||
)
|
||||
|
||||
try:
|
||||
result = bool(eval(condition, {"__builtins__": {}}, eval_context))
|
||||
result = bool(safe_eval_condition(condition, eval_context))
|
||||
except UnsafeExpressionError as exc:
|
||||
logger.error(
|
||||
"Condition refusée pour '%s' (expression non sûre) : %s",
|
||||
step.step_id, exc,
|
||||
)
|
||||
result = False
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Erreur d'évaluation de condition pour '%s' : %s",
|
||||
|
||||
@@ -151,6 +151,13 @@ class StepResult:
|
||||
duration_ms: float
|
||||
message: str
|
||||
screenshot_path: Optional[str] = None
|
||||
# C1 — Instrumentation vision-aware
|
||||
ocr_ms: float = 0.0 # Temps OCR du ScreenState de ce step
|
||||
ui_ms: float = 0.0 # Temps détection UI de ce step
|
||||
total_ms: float = 0.0 # Temps total (alias de duration_ms pour cohérence)
|
||||
analyze_ms: float = 0.0 # Temps total analyse ScreenState (OCR + UI + reste)
|
||||
cache_hit: bool = False # True si ScreenState vient du cache
|
||||
degraded: bool = False # True si mode dégradé activé (timeout analyse)
|
||||
|
||||
|
||||
class ExecutionLoop:
|
||||
@@ -175,7 +182,13 @@ class ExecutionLoop:
|
||||
capture_interval_ms: int = 500,
|
||||
max_no_match_retries: int = 5,
|
||||
confirmation_callback: Optional[Callable[[str, Dict], bool]] = None,
|
||||
coaching_callback: Optional[Callable[[str, Dict], "CoachingResponse"]] = None
|
||||
coaching_callback: Optional[Callable[[str, Dict], "CoachingResponse"]] = None,
|
||||
screen_analyzer: Optional[Any] = None,
|
||||
screen_state_cache: Optional[Any] = None,
|
||||
enable_ui_detection: bool = True,
|
||||
enable_ocr: bool = True,
|
||||
analyze_timeout_ms: int = 8000,
|
||||
window_info_provider: Optional[Callable[[], Optional[Dict[str, Any]]]] = None,
|
||||
):
|
||||
"""
|
||||
Initialiser la boucle d'exécution.
|
||||
@@ -188,6 +201,15 @@ class ExecutionLoop:
|
||||
max_no_match_retries: Nombre max de tentatives si pas de match
|
||||
confirmation_callback: Callback pour demander confirmation (SUPERVISED)
|
||||
coaching_callback: Callback pour décisions coaching (COACHING)
|
||||
screen_analyzer: ScreenAnalyzer pour construire un ScreenState enrichi
|
||||
(lazy init via singleton si None)
|
||||
screen_state_cache: Cache perceptuel (lazy init via singleton si None)
|
||||
enable_ui_detection: Active la détection UI (True par défaut, flag d'urgence)
|
||||
enable_ocr: Active l'OCR (True par défaut)
|
||||
analyze_timeout_ms: Timeout soft pour l'analyse d'un ScreenState.
|
||||
Au-delà, on active le mode dégradé pour les steps suivants.
|
||||
window_info_provider: Callable renvoyant un dict window_info. Si None,
|
||||
on tente `screen_capturer.get_active_window()`.
|
||||
"""
|
||||
self.pipeline = pipeline
|
||||
self.action_executor = action_executor or ActionExecutor()
|
||||
@@ -204,6 +226,27 @@ class ExecutionLoop:
|
||||
self.confirmation_callback = confirmation_callback
|
||||
self.coaching_callback = coaching_callback
|
||||
|
||||
# C1 — Vision-aware execution
|
||||
self._screen_analyzer = screen_analyzer # lazy init si None
|
||||
self._screen_state_cache = screen_state_cache # lazy init si None
|
||||
self.enable_ui_detection = enable_ui_detection
|
||||
self.enable_ocr = enable_ocr
|
||||
self.analyze_timeout_ms = analyze_timeout_ms
|
||||
self._window_info_provider = window_info_provider
|
||||
# Mode dégradé déclenché par un timeout analyse — persiste tant qu'un
|
||||
# probe n'a pas démontré la récupération (voir ci-dessous).
|
||||
self._degraded_mode = False
|
||||
# Auto-rétablissement : compteur de steps rapides consécutifs.
|
||||
# Si l'analyse tourne vite (< analyze_timeout_ms / 2) pendant
|
||||
# _fast_steps_recovery_threshold steps → on quitte le mode dégradé.
|
||||
self._successive_fast_steps = 0
|
||||
self._fast_steps_recovery_threshold = 3
|
||||
# En mode dégradé, on retente l'analyse tous les _probe_interval steps
|
||||
# pour détecter la récupération (les autres steps restent en stub pour
|
||||
# éviter de re-saturer le GPU). 10 par défaut = ~5s à 500ms/step.
|
||||
self._probe_interval = 10
|
||||
self._degraded_step_counter = 0
|
||||
|
||||
# État interne
|
||||
self.state = ExecutionState.IDLE
|
||||
self.context: Optional[ExecutionContext] = None
|
||||
@@ -464,15 +507,15 @@ class ExecutionLoop:
|
||||
})
|
||||
|
||||
# Notify Analytics about step completion
|
||||
# C1 — transmet tous les champs vision-aware (ocr_ms, ui_ms,
|
||||
# analyze_ms, cache_hit, degraded) au système analytics via
|
||||
# on_step_result qui accepte un StepResult complet.
|
||||
if self._analytics_integration and step_result:
|
||||
try:
|
||||
self._analytics_integration.on_step_complete(
|
||||
workflow_id=self.context.workflow_id,
|
||||
self._analytics_integration.on_step_result(
|
||||
execution_id=self.context.execution_id,
|
||||
step_id=step_result.node_id,
|
||||
success=step_result.success,
|
||||
duration_ms=step_result.duration_ms,
|
||||
confidence=step_result.match_confidence
|
||||
workflow_id=self.context.workflow_id,
|
||||
step_result=step_result,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Analytics step notification failed: {e}")
|
||||
@@ -505,25 +548,47 @@ class ExecutionLoop:
|
||||
self._notify_state_change(ExecutionState.STOPPED)
|
||||
|
||||
# Notify Analytics about execution completion
|
||||
# Contrat normalisé (Lot A) : duration_ms + status explicite
|
||||
# au lieu du booléen success + duration ambigu.
|
||||
if self._analytics_integration and self.context:
|
||||
try:
|
||||
success = self.state == ExecutionState.COMPLETED
|
||||
duration_ms = (datetime.now() - self.context.started_at).total_seconds() * 1000
|
||||
|
||||
duration_ms = (
|
||||
datetime.now() - self.context.started_at
|
||||
).total_seconds() * 1000
|
||||
|
||||
# Mapping ExecutionState → status analytics
|
||||
if self.state == ExecutionState.COMPLETED:
|
||||
status = "completed"
|
||||
elif self.state == ExecutionState.FAILED:
|
||||
status = "failed"
|
||||
elif self.state == ExecutionState.STOPPED:
|
||||
status = "stopped"
|
||||
elif self.state == ExecutionState.PAUSED:
|
||||
# Pause non résolue à la sortie = blocage non récupéré
|
||||
status = "blocked"
|
||||
else:
|
||||
status = self.state.value
|
||||
|
||||
error_message = (
|
||||
None
|
||||
if status == "completed"
|
||||
else f"Execution ended in state: {self.state.value}"
|
||||
)
|
||||
|
||||
# Stop resource monitoring
|
||||
self._analytics_integration.stop_resource_monitoring(
|
||||
execution_id=self.context.execution_id
|
||||
)
|
||||
|
||||
|
||||
self._analytics_integration.on_execution_complete(
|
||||
workflow_id=self.context.workflow_id,
|
||||
execution_id=self.context.execution_id,
|
||||
success=success,
|
||||
duration_ms=duration_ms,
|
||||
steps_executed=self.context.steps_executed,
|
||||
steps_succeeded=self.context.steps_succeeded,
|
||||
status=status,
|
||||
steps_total=self.context.steps_executed,
|
||||
steps_completed=self.context.steps_succeeded,
|
||||
steps_failed=self.context.steps_failed,
|
||||
error_message=None if success else f"Execution ended in state: {self.state.value}"
|
||||
error_message=error_message,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Analytics completion notification failed: {e}")
|
||||
@@ -533,56 +598,142 @@ class ExecutionLoop:
|
||||
def _execute_step(self) -> Optional[StepResult]:
|
||||
"""
|
||||
Exécuter une étape du workflow.
|
||||
|
||||
|
||||
Returns:
|
||||
StepResult ou None si pas de match
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
|
||||
# 1. Capturer l'écran
|
||||
screenshot_path = self._capture_screen()
|
||||
if not screenshot_path:
|
||||
logger.warning("Failed to capture screen")
|
||||
return None
|
||||
|
||||
|
||||
self.context.last_screenshot_path = screenshot_path
|
||||
|
||||
# 2. Identifier l'état actuel (matching)
|
||||
match = self.pipeline.match_current_state(
|
||||
screenshot_path,
|
||||
workflow_id=self.context.workflow_id
|
||||
|
||||
# 1bis. Construire un ScreenState enrichi (C1) — avec cache perceptuel
|
||||
screen_state, timings = self._build_screen_state(screenshot_path)
|
||||
logger.debug(
|
||||
f"[Step] ScreenState analyze={timings['analyze_ms']:.0f}ms "
|
||||
f"ocr={timings['ocr_ms']:.0f}ms ui={timings['ui_ms']:.0f}ms "
|
||||
f"cache_hit={timings['cache_hit']} degraded={timings['degraded']}"
|
||||
)
|
||||
|
||||
|
||||
# 2. Identifier l'état actuel (matching)
|
||||
#
|
||||
# Lot E — on consomme le ScreenState enrichi déjà construit en 1bis
|
||||
# (avec ui_elements, detected_text, window_title réels) au lieu de
|
||||
# laisser le pipeline reconstruire un stub avec window_title="Unknown".
|
||||
# Premier vrai matching context-aware.
|
||||
match = self.pipeline.match_current_state_from_state(
|
||||
screen_state,
|
||||
workflow_id=self.context.workflow_id,
|
||||
)
|
||||
|
||||
if not match:
|
||||
logger.debug("No match found for current screen")
|
||||
return None
|
||||
|
||||
|
||||
current_node_id = match["node_id"]
|
||||
confidence = match["confidence"]
|
||||
self.context.current_node_id = current_node_id
|
||||
self.context.last_match_confidence = confidence
|
||||
|
||||
|
||||
logger.info(f"Matched node: {current_node_id} (confidence: {confidence:.3f})")
|
||||
|
||||
# 3. Obtenir la prochaine action
|
||||
|
||||
# 3. Obtenir la prochaine action (C3 : sélection d'edge robuste)
|
||||
#
|
||||
# Lot A — contrat dict avec status explicite :
|
||||
# "terminal" → fin légitime du workflow (success=True)
|
||||
# "blocked" → pause supervisée (plus JAMAIS traité comme un succès
|
||||
# pour ne pas déclencher un faux _is_workflow_complete)
|
||||
# "selected" → action à exécuter
|
||||
#
|
||||
# Lot B — on propage la confidence du match courant (source_similarity)
|
||||
# pour que l'EdgeScorer puisse vérifier la précondition
|
||||
# `min_source_similarity` de chaque edge. Sans cette propagation, la
|
||||
# contrainte était silencieusement désactivée (hardcodé à 1.0).
|
||||
next_action = self.pipeline.get_next_action(
|
||||
self.context.workflow_id,
|
||||
current_node_id
|
||||
current_node_id,
|
||||
screen_state=screen_state,
|
||||
source_similarity=confidence,
|
||||
)
|
||||
|
||||
if not next_action:
|
||||
# Pas d'action suivante = fin du workflow ou node terminal
|
||||
|
||||
# Rétrocompat défensive : si un pipeline legacy renvoie None ou un dict
|
||||
# sans status, on considère ça comme un blocage (safe default).
|
||||
if not isinstance(next_action, dict) or "status" not in next_action:
|
||||
logger.error(
|
||||
"get_next_action a renvoyé un résultat sans status "
|
||||
f"(legacy?). Valeur reçue: {next_action!r}"
|
||||
)
|
||||
next_action = {"status": "blocked", "reason": "legacy_none_return"}
|
||||
|
||||
action_status = next_action.get("status")
|
||||
|
||||
if action_status == "terminal":
|
||||
# Fin légitime : aucun outgoing_edge sur le node courant
|
||||
total_ms = (time.time() - start_time) * 1000
|
||||
return StepResult(
|
||||
success=True,
|
||||
node_id=current_node_id,
|
||||
edge_id=None,
|
||||
action_result=None,
|
||||
match_confidence=confidence,
|
||||
duration_ms=(time.time() - start_time) * 1000,
|
||||
message="No next action (terminal node)",
|
||||
screenshot_path=screenshot_path
|
||||
duration_ms=total_ms,
|
||||
message="Workflow terminated (terminal node)",
|
||||
screenshot_path=screenshot_path,
|
||||
ocr_ms=timings["ocr_ms"],
|
||||
ui_ms=timings["ui_ms"],
|
||||
analyze_ms=timings["analyze_ms"],
|
||||
total_ms=total_ms,
|
||||
cache_hit=timings["cache_hit"],
|
||||
degraded=timings["degraded"],
|
||||
)
|
||||
|
||||
|
||||
if action_status == "blocked":
|
||||
# Blocage : des edges existent mais aucun n'est valide.
|
||||
# On déclenche une pause supervisée (paused_need_help) et on
|
||||
# remonte l'erreur. On ne retourne PAS success=True.
|
||||
reason = next_action.get("reason", "unknown")
|
||||
logger.warning(
|
||||
f"ExecutionLoop bloqué sur {current_node_id}: {reason} "
|
||||
f"→ pause supervisée demandée"
|
||||
)
|
||||
# On bascule en PAUSED et on arme _pause_requested pour que la
|
||||
# boucle principale attende un resume() humain.
|
||||
self.state = ExecutionState.PAUSED
|
||||
self._pause_requested = True
|
||||
self._notify_state_change(ExecutionState.PAUSED)
|
||||
if self._on_error:
|
||||
try:
|
||||
self._on_error(
|
||||
"blocked",
|
||||
Exception(f"No valid edge from {current_node_id}: {reason}"),
|
||||
)
|
||||
except Exception as cb_err:
|
||||
logger.debug(f"on_error callback failed: {cb_err}")
|
||||
|
||||
total_ms = (time.time() - start_time) * 1000
|
||||
return StepResult(
|
||||
success=False,
|
||||
node_id=current_node_id,
|
||||
edge_id=None,
|
||||
action_result=None,
|
||||
match_confidence=confidence,
|
||||
duration_ms=total_ms,
|
||||
message=f"Blocked: {reason}",
|
||||
screenshot_path=screenshot_path,
|
||||
ocr_ms=timings["ocr_ms"],
|
||||
ui_ms=timings["ui_ms"],
|
||||
analyze_ms=timings["analyze_ms"],
|
||||
total_ms=total_ms,
|
||||
cache_hit=timings["cache_hit"],
|
||||
degraded=timings["degraded"],
|
||||
)
|
||||
|
||||
# À partir d'ici, on est forcément en status="selected"
|
||||
edge_id = next_action["edge_id"]
|
||||
self.context.current_edge_id = edge_id
|
||||
|
||||
@@ -604,7 +755,7 @@ class ExecutionLoop:
|
||||
if coaching_response.decision == CoachingDecision.ACCEPT:
|
||||
# Utilisateur accepte : exécuter l'action suggérée
|
||||
self._coaching_stats['accepted'] += 1
|
||||
action_result = self._execute_action(next_action)
|
||||
action_result = self._execute_action(next_action, screen_state=screen_state)
|
||||
self._record_coaching_feedback(
|
||||
next_action, coaching_response, action_result, success=True
|
||||
)
|
||||
@@ -615,15 +766,22 @@ class ExecutionLoop:
|
||||
self._record_coaching_feedback(
|
||||
next_action, coaching_response, None, success=False
|
||||
)
|
||||
total_ms = (time.time() - start_time) * 1000
|
||||
return StepResult(
|
||||
success=False,
|
||||
node_id=current_node_id,
|
||||
edge_id=edge_id,
|
||||
action_result=None,
|
||||
match_confidence=confidence,
|
||||
duration_ms=(time.time() - start_time) * 1000,
|
||||
duration_ms=total_ms,
|
||||
message="Action rejected by user in COACHING mode",
|
||||
screenshot_path=screenshot_path
|
||||
screenshot_path=screenshot_path,
|
||||
ocr_ms=timings["ocr_ms"],
|
||||
ui_ms=timings["ui_ms"],
|
||||
analyze_ms=timings["analyze_ms"],
|
||||
total_ms=total_ms,
|
||||
cache_hit=timings["cache_hit"],
|
||||
degraded=timings["degraded"],
|
||||
)
|
||||
|
||||
elif coaching_response.decision == CoachingDecision.CORRECT:
|
||||
@@ -632,7 +790,7 @@ class ExecutionLoop:
|
||||
corrected_action = self._apply_coaching_correction(
|
||||
next_action, coaching_response.correction
|
||||
)
|
||||
action_result = self._execute_action(corrected_action)
|
||||
action_result = self._execute_action(corrected_action, screen_state=screen_state)
|
||||
self._record_coaching_feedback(
|
||||
next_action, coaching_response, action_result,
|
||||
success=action_result.status == ExecutionStatus.SUCCESS if action_result else False
|
||||
@@ -658,33 +816,40 @@ class ExecutionLoop:
|
||||
# Mode supervisé : demander confirmation
|
||||
if not self._request_confirmation(next_action):
|
||||
logger.info("Action rejected by user")
|
||||
total_ms = (time.time() - start_time) * 1000
|
||||
return StepResult(
|
||||
success=False,
|
||||
node_id=current_node_id,
|
||||
edge_id=edge_id,
|
||||
action_result=None,
|
||||
match_confidence=confidence,
|
||||
duration_ms=(time.time() - start_time) * 1000,
|
||||
duration_ms=total_ms,
|
||||
message="Action rejected by user",
|
||||
screenshot_path=screenshot_path
|
||||
screenshot_path=screenshot_path,
|
||||
ocr_ms=timings["ocr_ms"],
|
||||
ui_ms=timings["ui_ms"],
|
||||
analyze_ms=timings["analyze_ms"],
|
||||
total_ms=total_ms,
|
||||
cache_hit=timings["cache_hit"],
|
||||
degraded=timings["degraded"],
|
||||
)
|
||||
|
||||
|
||||
# Exécuter l'action
|
||||
action_result = self._execute_action(next_action)
|
||||
|
||||
action_result = self._execute_action(next_action, screen_state=screen_state)
|
||||
|
||||
elif self.context.mode == ExecutionMode.AUTOMATIC:
|
||||
# Mode automatique : exécuter directement
|
||||
action_result = self._execute_action(next_action)
|
||||
|
||||
action_result = self._execute_action(next_action, screen_state=screen_state)
|
||||
|
||||
# 5. Mettre à jour les compteurs
|
||||
self.context.steps_executed += 1
|
||||
if action_result and action_result.status == ExecutionStatus.SUCCESS:
|
||||
self.context.steps_succeeded += 1
|
||||
elif action_result:
|
||||
self.context.steps_failed += 1
|
||||
|
||||
|
||||
duration_ms = (time.time() - start_time) * 1000
|
||||
|
||||
|
||||
return StepResult(
|
||||
success=action_result.status == ExecutionStatus.SUCCESS if action_result else True,
|
||||
node_id=current_node_id,
|
||||
@@ -693,7 +858,13 @@ class ExecutionLoop:
|
||||
match_confidence=confidence,
|
||||
duration_ms=duration_ms,
|
||||
message=action_result.message if action_result else "Observed",
|
||||
screenshot_path=screenshot_path
|
||||
screenshot_path=screenshot_path,
|
||||
ocr_ms=timings["ocr_ms"],
|
||||
ui_ms=timings["ui_ms"],
|
||||
analyze_ms=timings["analyze_ms"],
|
||||
total_ms=duration_ms,
|
||||
cache_hit=timings["cache_hit"],
|
||||
degraded=timings["degraded"],
|
||||
)
|
||||
|
||||
# =========================================================================
|
||||
@@ -718,61 +889,45 @@ class ExecutionLoop:
|
||||
logger.error(f"Screen capture failed: {e}")
|
||||
return None
|
||||
|
||||
def _execute_action(self, action_info: Dict[str, Any]) -> ExecutionResult:
|
||||
"""Exécuter une action via l'ActionExecutor."""
|
||||
def _execute_action(
|
||||
self,
|
||||
action_info: Dict[str, Any],
|
||||
screen_state: Optional[Any] = None,
|
||||
) -> ExecutionResult:
|
||||
"""
|
||||
Exécuter une action via l'ActionExecutor.
|
||||
|
||||
Args:
|
||||
action_info: dict action {edge_id, action, target_node, ...}
|
||||
screen_state: ScreenState enrichi (si None, fallback stub minimal)
|
||||
"""
|
||||
try:
|
||||
# Charger le workflow et l'edge
|
||||
workflow = self.pipeline.load_workflow(self.context.workflow_id)
|
||||
edge = workflow.get_edge(action_info["edge_id"])
|
||||
|
||||
|
||||
if not edge:
|
||||
return ExecutionResult(
|
||||
status=ExecutionStatus.FAILED,
|
||||
message=f"Edge not found: {action_info['edge_id']}",
|
||||
duration_ms=0
|
||||
)
|
||||
|
||||
# Créer un ScreenState minimal pour l'exécution
|
||||
from core.models.screen_state import (
|
||||
ScreenState, WindowContext, RawLevel, PerceptionLevel,
|
||||
ContextLevel, EmbeddingRef
|
||||
)
|
||||
|
||||
screen_state = ScreenState(
|
||||
screen_state_id=f"exec_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
|
||||
timestamp=datetime.now(),
|
||||
session_id=self.context.execution_id,
|
||||
window=WindowContext(
|
||||
app_name="unknown",
|
||||
window_title="Unknown",
|
||||
screen_resolution=[1920, 1080],
|
||||
workspace="main"
|
||||
),
|
||||
raw=RawLevel(
|
||||
screenshot_path=self.context.last_screenshot_path or "",
|
||||
capture_method="execution",
|
||||
file_size_bytes=0
|
||||
),
|
||||
perception=PerceptionLevel(
|
||||
embedding=EmbeddingRef(provider="", vector_id="", dimensions=512),
|
||||
detected_text=[],
|
||||
text_detection_method="none",
|
||||
confidence_avg=0.0
|
||||
),
|
||||
context=ContextLevel(),
|
||||
ui_elements=[]
|
||||
)
|
||||
|
||||
|
||||
# Utiliser le ScreenState enrichi fourni par le loop ; fallback minimal
|
||||
# uniquement si on n'en a pas (legacy, tests).
|
||||
if screen_state is None:
|
||||
screen_state = self._build_stub_screen_state()
|
||||
|
||||
# Exécuter l'action
|
||||
result = self.action_executor.execute_edge(
|
||||
edge,
|
||||
screen_state,
|
||||
context=self.context.variables
|
||||
)
|
||||
|
||||
|
||||
logger.info(f"Action executed: {result.status.value} - {result.message}")
|
||||
return result
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"Action execution failed: {e}")
|
||||
return ExecutionResult(
|
||||
@@ -781,6 +936,286 @@ class ExecutionLoop:
|
||||
duration_ms=0,
|
||||
error=e
|
||||
)
|
||||
|
||||
# =========================================================================
|
||||
# C1 — Construction du ScreenState (vision-aware)
|
||||
# =========================================================================
|
||||
|
||||
def _get_screen_analyzer(self):
|
||||
"""
|
||||
Récupérer le ScreenAnalyzer (singleton partagé, lazy).
|
||||
|
||||
Retourne None si indisponible (import error, etc.) — le loop
|
||||
bascule alors en fallback stub.
|
||||
|
||||
Note Lot C : on ne passe plus `session_id` au singleton. Le session_id
|
||||
est désormais un paramètre d'appel de `analyze()`, pour éviter que deux
|
||||
ExecutionLoop partageant le même analyzer se marchent dessus.
|
||||
"""
|
||||
if self._screen_analyzer is not None:
|
||||
return self._screen_analyzer
|
||||
try:
|
||||
from core.pipeline import get_screen_analyzer
|
||||
self._screen_analyzer = get_screen_analyzer()
|
||||
return self._screen_analyzer
|
||||
except Exception as e:
|
||||
logger.warning(f"ScreenAnalyzer indisponible: {e}")
|
||||
return None
|
||||
|
||||
def _get_screen_state_cache(self):
|
||||
"""Récupérer le cache de ScreenState (singleton partagé, lazy)."""
|
||||
if self._screen_state_cache is not None:
|
||||
return self._screen_state_cache
|
||||
try:
|
||||
from core.pipeline import get_screen_state_cache
|
||||
self._screen_state_cache = get_screen_state_cache()
|
||||
return self._screen_state_cache
|
||||
except Exception as e:
|
||||
logger.warning(f"ScreenStateCache indisponible: {e}")
|
||||
return None
|
||||
|
||||
def _resolve_window_info(self) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Récupérer les infos de la fenêtre active.
|
||||
|
||||
Ordre de préférence :
|
||||
1. `window_info_provider` fourni au constructeur
|
||||
2. `screen_capturer.get_active_window()`
|
||||
3. None → ScreenAnalyzer utilisera les valeurs par défaut
|
||||
"""
|
||||
if self._window_info_provider is not None:
|
||||
try:
|
||||
return self._window_info_provider()
|
||||
except Exception as e:
|
||||
logger.debug(f"window_info_provider failed: {e}")
|
||||
|
||||
try:
|
||||
raw = self.screen_capturer.get_active_window()
|
||||
if raw:
|
||||
# Normaliser vers le format attendu par ScreenAnalyzer
|
||||
return {
|
||||
"title": raw.get("title", "Unknown"),
|
||||
"app_name": raw.get("app", "unknown"),
|
||||
"window_bounds": [
|
||||
raw.get("x", 0),
|
||||
raw.get("y", 0),
|
||||
raw.get("width", 0),
|
||||
raw.get("height", 0),
|
||||
],
|
||||
}
|
||||
except Exception as e:
|
||||
logger.debug(f"get_active_window failed: {e}")
|
||||
return None
|
||||
|
||||
def _build_screen_state(
|
||||
self,
|
||||
screenshot_path: str,
|
||||
) -> tuple:
|
||||
"""
|
||||
Construire un ScreenState enrichi depuis un screenshot.
|
||||
|
||||
Logique :
|
||||
- Si enable_ui_detection=False ET enable_ocr=False → stub
|
||||
- Si analyseur indisponible → stub
|
||||
- Sinon : cache.get_or_compute(analyzer.analyze)
|
||||
- Timeout soft : si l'analyse dépasse `analyze_timeout_ms`, on log
|
||||
un warning et on active le mode dégradé pour les prochains steps.
|
||||
|
||||
Returns:
|
||||
(screen_state, timings_dict)
|
||||
timings_dict: {
|
||||
"analyze_ms", "ocr_ms", "ui_ms", "cache_hit", "degraded"
|
||||
}
|
||||
"""
|
||||
timings = {
|
||||
"analyze_ms": 0.0,
|
||||
"ocr_ms": 0.0,
|
||||
"ui_ms": 0.0,
|
||||
"cache_hit": False,
|
||||
"degraded": False,
|
||||
}
|
||||
|
||||
# Mode "tout désactivé" (flag d'urgence) → stub
|
||||
if not self.enable_ui_detection and not self.enable_ocr:
|
||||
timings["degraded"] = True
|
||||
return self._build_stub_screen_state(screenshot_path), timings
|
||||
|
||||
analyzer = self._get_screen_analyzer()
|
||||
if analyzer is None:
|
||||
timings["degraded"] = True
|
||||
return self._build_stub_screen_state(screenshot_path), timings
|
||||
|
||||
# Mode dégradé : on reste sur stub, sauf "probe" périodique qui teste
|
||||
# si le GPU est redevenu performant. Si oui, on accumule les steps
|
||||
# rapides ; après _fast_steps_recovery_threshold probes rapides
|
||||
# consécutifs on retourne en mode complet.
|
||||
if self._degraded_mode:
|
||||
self._degraded_step_counter += 1
|
||||
if self._degraded_step_counter < self._probe_interval:
|
||||
timings["degraded"] = True
|
||||
return self._build_stub_screen_state(screenshot_path), timings
|
||||
# Sinon on tente un probe réel ci-dessous
|
||||
self._degraded_step_counter = 0
|
||||
|
||||
cache = self._get_screen_state_cache()
|
||||
|
||||
# Invalidation proactive : si l'écran a massivement changé depuis
|
||||
# la dernière entrée du cache, on purge. Le TTL seul (2s) laisserait
|
||||
# passer des entrées obsolètes sur des changements rapides (popup, nav).
|
||||
if cache is not None:
|
||||
try:
|
||||
cache.invalidate_if_changed(screenshot_path, threshold=0.3)
|
||||
except Exception as e:
|
||||
logger.debug(f"invalidate_if_changed a échoué: {e}")
|
||||
|
||||
window_info = self._resolve_window_info()
|
||||
|
||||
# Fonction de calcul (cache miss)
|
||||
# Les flags runtime (enable_ocr, enable_ui_detection) et le session_id
|
||||
# sont passés en kwargs-only à analyze() : AUCUNE mutation de l'analyseur
|
||||
# singleton (Lot C — thread-safety, deux ExecutionLoop peuvent partager
|
||||
# le même analyzer sans se contaminer).
|
||||
execution_id = self.context.execution_id if self.context else ""
|
||||
|
||||
def compute(path: str):
|
||||
t_start = time.time()
|
||||
state = analyzer.analyze(
|
||||
path,
|
||||
window_info=window_info,
|
||||
enable_ocr=self.enable_ocr,
|
||||
enable_ui_detection=self.enable_ui_detection,
|
||||
session_id=execution_id,
|
||||
)
|
||||
elapsed = (time.time() - t_start) * 1000
|
||||
# Annoter le temps dans les métadonnées
|
||||
if hasattr(state, "metadata"):
|
||||
state.metadata["analyze_ms"] = elapsed
|
||||
return state
|
||||
|
||||
t0 = time.time()
|
||||
try:
|
||||
if cache is not None:
|
||||
# Lot D — clé composite context-aware : deux contextes
|
||||
# différents partageant le même screenshot n'entrent plus
|
||||
# en collision. Le workflow_id isole les replays par workflow,
|
||||
# les flags différencient les modes d'analyse (OCR on/off,
|
||||
# UI on/off), et le (window_title, app_name) distingue deux
|
||||
# applications qui présenteraient un rendu visuel similaire.
|
||||
ctx_window_title = (window_info or {}).get("title", "") or ""
|
||||
ctx_app_name = (window_info or {}).get("app_name", "") or ""
|
||||
ctx_workflow_id = (
|
||||
self.context.workflow_id if self.context else ""
|
||||
)
|
||||
state, cache_hit, _ = cache.get_or_compute(
|
||||
screenshot_path,
|
||||
compute,
|
||||
window_title=ctx_window_title,
|
||||
app_name=ctx_app_name,
|
||||
enable_ocr=self.enable_ocr,
|
||||
enable_ui_detection=self.enable_ui_detection,
|
||||
workflow_id=ctx_workflow_id,
|
||||
)
|
||||
else:
|
||||
state = compute(screenshot_path)
|
||||
cache_hit = False
|
||||
except Exception as e:
|
||||
logger.warning(f"ScreenState build failed: {e} — fallback stub")
|
||||
timings["degraded"] = True
|
||||
return self._build_stub_screen_state(screenshot_path), timings
|
||||
|
||||
analyze_ms = (time.time() - t0) * 1000
|
||||
timings["analyze_ms"] = analyze_ms
|
||||
timings["cache_hit"] = cache_hit
|
||||
|
||||
# Décomposer OCR vs UI si possible (métadonnées)
|
||||
meta = getattr(state, "metadata", {}) or {}
|
||||
timings["ocr_ms"] = float(meta.get("ocr_ms", 0.0))
|
||||
timings["ui_ms"] = float(meta.get("ui_ms", 0.0))
|
||||
|
||||
# Timeout soft : activer le mode dégradé si > seuil
|
||||
# (cache_hit ignoré : un hit ne prouve rien sur la santé du GPU)
|
||||
if analyze_ms > self.analyze_timeout_ms and not cache_hit:
|
||||
logger.warning(
|
||||
f"ScreenState analysis slow: {analyze_ms:.0f}ms > "
|
||||
f"{self.analyze_timeout_ms}ms → activation mode dégradé"
|
||||
)
|
||||
self._degraded_mode = True
|
||||
self._successive_fast_steps = 0
|
||||
timings["degraded"] = True
|
||||
else:
|
||||
# Step "rapide" : incrémenter le compteur si < timeout / 2.
|
||||
# On ignore les cache hits (pas représentatifs de la perf GPU).
|
||||
fast_threshold_ms = self.analyze_timeout_ms / 2
|
||||
if not cache_hit and analyze_ms < fast_threshold_ms:
|
||||
self._successive_fast_steps += 1
|
||||
|
||||
# Auto-rétablissement : si on était en dégradé et qu'on a
|
||||
# enchaîné assez de steps rapides → retour en mode complet.
|
||||
if (
|
||||
self._degraded_mode
|
||||
and self._successive_fast_steps
|
||||
>= self._fast_steps_recovery_threshold
|
||||
):
|
||||
logger.info(
|
||||
"Mode complet restauré après %d steps rapides "
|
||||
"(dernier analyze_ms=%.0fms < seuil=%.0fms)",
|
||||
self._successive_fast_steps,
|
||||
analyze_ms,
|
||||
fast_threshold_ms,
|
||||
)
|
||||
self._degraded_mode = False
|
||||
self._successive_fast_steps = 0
|
||||
elif not cache_hit:
|
||||
# Step ni lent ni rapide (entre timeout/2 et timeout) : reset
|
||||
self._successive_fast_steps = 0
|
||||
|
||||
# On propage l'état dégradé courant dans les timings (utile pour le
|
||||
# StepResult : tant qu'on n'a pas récupéré assez de steps rapides,
|
||||
# on continue à signaler "degraded=True").
|
||||
timings["degraded"] = self._degraded_mode
|
||||
|
||||
return state, timings
|
||||
|
||||
def _build_stub_screen_state(self, screenshot_path: Optional[str] = None):
|
||||
"""
|
||||
Construire un ScreenState minimal (fallback legacy).
|
||||
|
||||
Utilisé quand l'analyseur est indisponible ou que tous les flags
|
||||
de détection sont désactivés (flag d'urgence).
|
||||
"""
|
||||
from core.models.screen_state import (
|
||||
ScreenState, WindowContext, RawLevel, PerceptionLevel,
|
||||
ContextLevel, EmbeddingRef
|
||||
)
|
||||
|
||||
path = screenshot_path or (
|
||||
self.context.last_screenshot_path if self.context else ""
|
||||
) or ""
|
||||
|
||||
return ScreenState(
|
||||
screen_state_id=f"exec_{datetime.now().strftime('%Y%m%d_%H%M%S_%f')}",
|
||||
timestamp=datetime.now(),
|
||||
session_id=self.context.execution_id if self.context else "stub",
|
||||
window=WindowContext(
|
||||
app_name="unknown",
|
||||
window_title="Unknown",
|
||||
screen_resolution=[1920, 1080],
|
||||
workspace="main",
|
||||
),
|
||||
raw=RawLevel(
|
||||
screenshot_path=path,
|
||||
capture_method="execution",
|
||||
file_size_bytes=0,
|
||||
),
|
||||
perception=PerceptionLevel(
|
||||
embedding=EmbeddingRef(provider="", vector_id="", dimensions=512),
|
||||
detected_text=[],
|
||||
text_detection_method="none",
|
||||
confidence_avg=0.0,
|
||||
),
|
||||
context=ContextLevel(),
|
||||
ui_elements=[],
|
||||
)
|
||||
|
||||
def _request_confirmation(self, action_info: Dict[str, Any]) -> bool:
|
||||
"""Demander confirmation à l'utilisateur."""
|
||||
|
||||
228
core/execution/safe_condition_evaluator.py
Normal file
228
core/execution/safe_condition_evaluator.py
Normal file
@@ -0,0 +1,228 @@
|
||||
"""
|
||||
Évaluateur de conditions sécurisé pour le DAGExecutor.
|
||||
|
||||
Remplace `eval()` (vulnérable à l'exécution de code arbitraire) par un
|
||||
parseur AST restreint :
|
||||
|
||||
- Seuls les noeuds AST nécessaires sont autorisés (literals, comparaisons,
|
||||
booléens, indexations, accès attribut limité, arithmétique simple).
|
||||
- Les appels de fonction sont interdits.
|
||||
- Les accès à des attributs « dunder » (`__class__`, `__import__`, etc.)
|
||||
sont systématiquement refusés pour éviter les évasions classiques.
|
||||
- Le contexte d'évaluation est fourni explicitement par l'appelant ;
|
||||
aucun builtins n'est exposé.
|
||||
|
||||
Usage typique :
|
||||
>>> evaluator = SafeConditionEvaluator()
|
||||
>>> evaluator.evaluate("results['step_1']['score'] >= 0.8",
|
||||
... {"results": {"step_1": {"score": 0.92}}})
|
||||
True
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import ast
|
||||
import operator
|
||||
from typing import Any, Callable, Dict, Mapping
|
||||
|
||||
|
||||
class UnsafeExpressionError(ValueError):
|
||||
"""Levée lorsqu'une expression contient un noeud AST interdit."""
|
||||
|
||||
|
||||
# Opérateurs arithmétiques & de comparaison autorisés.
|
||||
_BIN_OPS: Dict[type, Callable[[Any, Any], Any]] = {
|
||||
ast.Add: operator.add,
|
||||
ast.Sub: operator.sub,
|
||||
ast.Mult: operator.mul,
|
||||
ast.Div: operator.truediv,
|
||||
ast.FloorDiv: operator.floordiv,
|
||||
ast.Mod: operator.mod,
|
||||
ast.Pow: operator.pow,
|
||||
}
|
||||
|
||||
_BOOL_OPS: Dict[type, Callable[[Any, Any], Any]] = {
|
||||
ast.And: lambda a, b: a and b,
|
||||
ast.Or: lambda a, b: a or b,
|
||||
}
|
||||
|
||||
_UNARY_OPS: Dict[type, Callable[[Any], Any]] = {
|
||||
ast.Not: operator.not_,
|
||||
ast.USub: operator.neg,
|
||||
ast.UAdd: operator.pos,
|
||||
}
|
||||
|
||||
_CMP_OPS: Dict[type, Callable[[Any, Any], bool]] = {
|
||||
ast.Eq: operator.eq,
|
||||
ast.NotEq: operator.ne,
|
||||
ast.Lt: operator.lt,
|
||||
ast.LtE: operator.le,
|
||||
ast.Gt: operator.gt,
|
||||
ast.GtE: operator.ge,
|
||||
ast.In: lambda a, b: a in b,
|
||||
ast.NotIn: lambda a, b: a not in b,
|
||||
ast.Is: operator.is_,
|
||||
ast.IsNot: operator.is_not,
|
||||
}
|
||||
|
||||
|
||||
class SafeConditionEvaluator:
|
||||
"""Évalue une expression de condition via un parseur AST restreint."""
|
||||
|
||||
# Longueur max — stoppe les expressions pathologiques très tôt.
|
||||
MAX_EXPRESSION_LENGTH = 1024
|
||||
|
||||
def evaluate(
|
||||
self,
|
||||
expression: str,
|
||||
context: Mapping[str, Any],
|
||||
) -> Any:
|
||||
if not isinstance(expression, str):
|
||||
raise UnsafeExpressionError(
|
||||
"L'expression doit être une chaîne de caractères."
|
||||
)
|
||||
if len(expression) > self.MAX_EXPRESSION_LENGTH:
|
||||
raise UnsafeExpressionError(
|
||||
"Expression trop longue (> 1024 caractères)."
|
||||
)
|
||||
|
||||
try:
|
||||
tree = ast.parse(expression, mode="eval")
|
||||
except SyntaxError as exc:
|
||||
raise UnsafeExpressionError(
|
||||
f"Syntaxe d'expression invalide : {exc}"
|
||||
) from exc
|
||||
|
||||
return self._eval_node(tree.body, context)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Dispatch AST
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _eval_node(self, node: ast.AST, context: Mapping[str, Any]) -> Any:
|
||||
# Littéraux (Constant remplace Num/Str/Bytes/NameConstant depuis 3.8)
|
||||
if isinstance(node, ast.Constant):
|
||||
return node.value
|
||||
|
||||
# Variables : uniquement celles présentes dans `context`.
|
||||
if isinstance(node, ast.Name):
|
||||
if node.id not in context:
|
||||
raise UnsafeExpressionError(
|
||||
f"Variable '{node.id}' non autorisée."
|
||||
)
|
||||
return context[node.id]
|
||||
|
||||
# Accès attribut — interdit tout attribut dunder.
|
||||
if isinstance(node, ast.Attribute):
|
||||
if node.attr.startswith("_"):
|
||||
raise UnsafeExpressionError(
|
||||
f"Accès à l'attribut privé '{node.attr}' interdit."
|
||||
)
|
||||
value = self._eval_node(node.value, context)
|
||||
return getattr(value, node.attr)
|
||||
|
||||
# Indexation (results['step_1']).
|
||||
if isinstance(node, ast.Subscript):
|
||||
value = self._eval_node(node.value, context)
|
||||
# Python < 3.9 utilise ast.Index, >= 3.9 utilise directement un
|
||||
# noeud. On gère les deux cas.
|
||||
slice_node = node.slice
|
||||
if isinstance(slice_node, ast.Index): # type: ignore[attr-defined]
|
||||
slice_value = self._eval_node(
|
||||
slice_node.value, context # type: ignore[attr-defined]
|
||||
)
|
||||
else:
|
||||
slice_value = self._eval_node(slice_node, context)
|
||||
return value[slice_value]
|
||||
|
||||
# Comparaisons chaînées (a < b <= c).
|
||||
if isinstance(node, ast.Compare):
|
||||
left = self._eval_node(node.left, context)
|
||||
for op_node, comparator in zip(node.ops, node.comparators):
|
||||
op_cls = type(op_node)
|
||||
if op_cls not in _CMP_OPS:
|
||||
raise UnsafeExpressionError(
|
||||
f"Opérateur de comparaison '{op_cls.__name__}' interdit."
|
||||
)
|
||||
right = self._eval_node(comparator, context)
|
||||
if not _CMP_OPS[op_cls](left, right):
|
||||
return False
|
||||
left = right
|
||||
return True
|
||||
|
||||
# Booléen (and / or) — short-circuit manuel.
|
||||
if isinstance(node, ast.BoolOp):
|
||||
op_cls = type(node.op)
|
||||
if op_cls not in _BOOL_OPS:
|
||||
raise UnsafeExpressionError(
|
||||
f"Opérateur booléen '{op_cls.__name__}' interdit."
|
||||
)
|
||||
if isinstance(node.op, ast.And):
|
||||
result: Any = True
|
||||
for sub in node.values:
|
||||
result = self._eval_node(sub, context)
|
||||
if not result:
|
||||
return result
|
||||
return result
|
||||
# Or
|
||||
result = False
|
||||
for sub in node.values:
|
||||
result = self._eval_node(sub, context)
|
||||
if result:
|
||||
return result
|
||||
return result
|
||||
|
||||
# Unaires (-x, not x)
|
||||
if isinstance(node, ast.UnaryOp):
|
||||
op_cls = type(node.op)
|
||||
if op_cls not in _UNARY_OPS:
|
||||
raise UnsafeExpressionError(
|
||||
f"Opérateur unaire '{op_cls.__name__}' interdit."
|
||||
)
|
||||
return _UNARY_OPS[op_cls](self._eval_node(node.operand, context))
|
||||
|
||||
# Binaires (+, -, *, /, %, **, //)
|
||||
if isinstance(node, ast.BinOp):
|
||||
op_cls = type(node.op)
|
||||
if op_cls not in _BIN_OPS:
|
||||
raise UnsafeExpressionError(
|
||||
f"Opérateur binaire '{op_cls.__name__}' interdit."
|
||||
)
|
||||
left = self._eval_node(node.left, context)
|
||||
right = self._eval_node(node.right, context)
|
||||
return _BIN_OPS[op_cls](left, right)
|
||||
|
||||
# Literals composites
|
||||
if isinstance(node, ast.Tuple):
|
||||
return tuple(self._eval_node(e, context) for e in node.elts)
|
||||
if isinstance(node, ast.List):
|
||||
return [self._eval_node(e, context) for e in node.elts]
|
||||
if isinstance(node, ast.Set):
|
||||
return {self._eval_node(e, context) for e in node.elts}
|
||||
if isinstance(node, ast.Dict):
|
||||
return {
|
||||
self._eval_node(k, context) if k is not None else None:
|
||||
self._eval_node(v, context)
|
||||
for k, v in zip(node.keys, node.values)
|
||||
}
|
||||
|
||||
# Tout le reste (Call, Lambda, Comprehensions, Import, etc.) est
|
||||
# refusé explicitement.
|
||||
raise UnsafeExpressionError(
|
||||
f"Noeud AST '{type(node).__name__}' interdit dans les conditions."
|
||||
)
|
||||
|
||||
|
||||
def safe_eval_condition(
|
||||
expression: str,
|
||||
context: Mapping[str, Any],
|
||||
) -> Any:
|
||||
"""Helper fonctionnel : évalue `expression` avec le contexte donné."""
|
||||
return SafeConditionEvaluator().evaluate(expression, context)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"SafeConditionEvaluator",
|
||||
"UnsafeExpressionError",
|
||||
"safe_eval_condition",
|
||||
]
|
||||
@@ -2,7 +2,140 @@
|
||||
Pipeline module - Orchestration du flux RPA Vision V3
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
from typing import Optional
|
||||
|
||||
from .workflow_pipeline import WorkflowPipeline, create_pipeline
|
||||
from .screen_analyzer import ScreenAnalyzer
|
||||
from .screen_state_cache import ScreenStateCache, compute_perceptual_hash
|
||||
from .edge_scorer import EdgeScorer, EdgeScore
|
||||
|
||||
__all__ = ["WorkflowPipeline", "create_pipeline", "ScreenAnalyzer"]
|
||||
__all__ = [
|
||||
"WorkflowPipeline",
|
||||
"create_pipeline",
|
||||
"ScreenAnalyzer",
|
||||
"ScreenStateCache",
|
||||
"compute_perceptual_hash",
|
||||
"EdgeScorer",
|
||||
"EdgeScore",
|
||||
"get_screen_analyzer",
|
||||
"reset_screen_analyzer",
|
||||
"get_screen_state_cache",
|
||||
"reset_screen_state_cache",
|
||||
]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Singleton ScreenAnalyzer
|
||||
# =============================================================================
|
||||
#
|
||||
# Une seule instance est partagée entre ExecutionLoop, GraphBuilder et
|
||||
# stream_processor pour éviter le double chargement GPU (UIDetector + CLIP
|
||||
# = 6-10 Go VRAM, plafond 12 Go sur RTX 5070).
|
||||
#
|
||||
# Thread-safe : protégé par un lock.
|
||||
#
|
||||
# IMPORTANT (Lot C — avril 2026) :
|
||||
# Ce singleton ne porte plus AUCUN contexte d'exécution. Il détient
|
||||
# uniquement les ressources lourdes (modèles OCR, UIDetector, CLIP).
|
||||
# • Les flags runtime (`enable_ocr`, `enable_ui_detection`) et l'identité
|
||||
# de session (`session_id`) se passent en kwargs-only à `analyze()`,
|
||||
# jamais en mutant l'instance. Voir `ScreenAnalyzer.analyze()`.
|
||||
# • L'argument `session_id` de `get_screen_analyzer()` ne sert QUE de
|
||||
# valeur par défaut historique, ignorée après la première création.
|
||||
# À terme, prévoir sa suppression.
|
||||
# =============================================================================
|
||||
|
||||
|
||||
_SCREEN_ANALYZER_SINGLETON: Optional[ScreenAnalyzer] = None
|
||||
_SCREEN_ANALYZER_LOCK = threading.Lock()
|
||||
|
||||
|
||||
def get_screen_analyzer(
|
||||
ui_detector=None,
|
||||
ocr_engine: Optional[str] = None,
|
||||
session_id: str = "",
|
||||
force_new: bool = False,
|
||||
) -> ScreenAnalyzer:
|
||||
"""
|
||||
Récupérer l'instance partagée de ScreenAnalyzer.
|
||||
|
||||
Création à la première demande (lazy). Les appels ultérieurs retournent
|
||||
la même instance, quels que soient les arguments (sauf `force_new=True`).
|
||||
|
||||
Args:
|
||||
ui_detector: UIDetector optionnel (utilisé seulement à la 1ère création)
|
||||
ocr_engine: Moteur OCR ("doctr", "tesseract", None=auto)
|
||||
session_id: ID de session pour la 1ère création
|
||||
force_new: Forcer la création d'une nouvelle instance (tests)
|
||||
|
||||
Returns:
|
||||
Instance partagée de ScreenAnalyzer
|
||||
"""
|
||||
global _SCREEN_ANALYZER_SINGLETON
|
||||
|
||||
if force_new:
|
||||
with _SCREEN_ANALYZER_LOCK:
|
||||
_SCREEN_ANALYZER_SINGLETON = ScreenAnalyzer(
|
||||
ui_detector=ui_detector,
|
||||
ocr_engine=ocr_engine,
|
||||
session_id=session_id,
|
||||
)
|
||||
return _SCREEN_ANALYZER_SINGLETON
|
||||
|
||||
if _SCREEN_ANALYZER_SINGLETON is not None:
|
||||
return _SCREEN_ANALYZER_SINGLETON
|
||||
|
||||
with _SCREEN_ANALYZER_LOCK:
|
||||
# Double-check locking
|
||||
if _SCREEN_ANALYZER_SINGLETON is None:
|
||||
_SCREEN_ANALYZER_SINGLETON = ScreenAnalyzer(
|
||||
ui_detector=ui_detector,
|
||||
ocr_engine=ocr_engine,
|
||||
session_id=session_id,
|
||||
)
|
||||
return _SCREEN_ANALYZER_SINGLETON
|
||||
|
||||
|
||||
def reset_screen_analyzer() -> None:
|
||||
"""Réinitialiser le singleton (tests uniquement)."""
|
||||
global _SCREEN_ANALYZER_SINGLETON
|
||||
with _SCREEN_ANALYZER_LOCK:
|
||||
_SCREEN_ANALYZER_SINGLETON = None
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Singleton ScreenStateCache (partagé)
|
||||
# =============================================================================
|
||||
|
||||
|
||||
_SCREEN_STATE_CACHE_SINGLETON: Optional[ScreenStateCache] = None
|
||||
_SCREEN_STATE_CACHE_LOCK = threading.Lock()
|
||||
|
||||
|
||||
def get_screen_state_cache(
|
||||
ttl_seconds: float = 2.0,
|
||||
max_entries: int = 16,
|
||||
) -> ScreenStateCache:
|
||||
"""
|
||||
Retourne le cache de ScreenState partagé (créé à la 1ère demande).
|
||||
"""
|
||||
global _SCREEN_STATE_CACHE_SINGLETON
|
||||
if _SCREEN_STATE_CACHE_SINGLETON is not None:
|
||||
return _SCREEN_STATE_CACHE_SINGLETON
|
||||
with _SCREEN_STATE_CACHE_LOCK:
|
||||
if _SCREEN_STATE_CACHE_SINGLETON is None:
|
||||
_SCREEN_STATE_CACHE_SINGLETON = ScreenStateCache(
|
||||
ttl_seconds=ttl_seconds,
|
||||
max_entries=max_entries,
|
||||
)
|
||||
return _SCREEN_STATE_CACHE_SINGLETON
|
||||
|
||||
|
||||
def reset_screen_state_cache() -> None:
|
||||
"""Réinitialiser le cache partagé (tests uniquement)."""
|
||||
global _SCREEN_STATE_CACHE_SINGLETON
|
||||
with _SCREEN_STATE_CACHE_LOCK:
|
||||
_SCREEN_STATE_CACHE_SINGLETON = None
|
||||
|
||||
380
core/pipeline/edge_scorer.py
Normal file
380
core/pipeline/edge_scorer.py
Normal file
@@ -0,0 +1,380 @@
|
||||
"""
|
||||
EdgeScorer — Sélection robuste d'un edge parmi plusieurs candidats.
|
||||
|
||||
Au lieu de prendre "le premier edge sortant" (comportement legacy),
|
||||
ce module :
|
||||
|
||||
1. Applique un **filtre dur** : rejette les edges dont les `pre_conditions`
|
||||
(EdgeConstraints) échouent étant donné le ScreenState courant.
|
||||
2. Applique un **ranking léger** : score composite
|
||||
- `stats.success_rate` (pondéré fort)
|
||||
- match du `target_spec` (présence d'un UI element compatible)
|
||||
- récence (dernière exécution réussie)
|
||||
3. Retourne le meilleur edge, ou `None` si aucun ne passe le filtre.
|
||||
|
||||
API principale :
|
||||
>>> scorer = EdgeScorer()
|
||||
>>> edge = scorer.select_best(edges, screen_state=state)
|
||||
|
||||
Les scores individuels sont exposés via `score_edge()` pour les tests
|
||||
et la télémétrie.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from typing import List, Optional, Sequence
|
||||
|
||||
from core.models.screen_state import ScreenState
|
||||
from core.models.workflow_graph import WorkflowEdge
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Résultat de scoring (utile pour la télémétrie / debug)
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@dataclass
|
||||
class EdgeScore:
|
||||
"""Résultat détaillé du scoring d'un edge."""
|
||||
|
||||
edge: WorkflowEdge
|
||||
total: float
|
||||
success_rate: float
|
||||
target_match: float
|
||||
recency: float
|
||||
passed_preconditions: bool
|
||||
precondition_reason: str = "OK"
|
||||
|
||||
def __lt__(self, other: "EdgeScore") -> bool:
|
||||
# Utilisé par sorted() : plus grand score = meilleur
|
||||
return self.total < other.total
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Scorer
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class EdgeScorer:
|
||||
"""
|
||||
Sélectionne le meilleur edge sortant étant donné un ScreenState.
|
||||
|
||||
Les poids par défaut peuvent être ajustés à la construction.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
weight_success_rate: float = 0.55,
|
||||
weight_target_match: float = 0.35,
|
||||
weight_recency: float = 0.10,
|
||||
default_success_rate: float = 0.5,
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
weight_success_rate: poids du `edge.stats.success_rate`
|
||||
weight_target_match: poids du match `target_spec` / `ui_elements`
|
||||
weight_recency: poids de la récence de la dernière exécution
|
||||
default_success_rate: valeur quand l'edge n'a jamais été exécuté
|
||||
"""
|
||||
total = weight_success_rate + weight_target_match + weight_recency
|
||||
if total <= 0:
|
||||
raise ValueError("La somme des poids doit être > 0")
|
||||
# Normalisation silencieuse
|
||||
self.w_success = weight_success_rate / total
|
||||
self.w_target = weight_target_match / total
|
||||
self.w_recency = weight_recency / total
|
||||
self.default_success_rate = default_success_rate
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# API publique
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def select_best(
|
||||
self,
|
||||
edges: Sequence[WorkflowEdge],
|
||||
screen_state: Optional[ScreenState] = None,
|
||||
strategy: str = "best",
|
||||
source_similarity: float = 1.0,
|
||||
) -> Optional[WorkflowEdge]:
|
||||
"""
|
||||
Sélectionne le meilleur edge.
|
||||
|
||||
Args:
|
||||
edges: Liste des edges candidats (généralement les sortants d'un node)
|
||||
screen_state: État courant pour évaluer pre_conditions et target_spec
|
||||
strategy: "best" (défaut, score complet) ou "first" (legacy, premier edge)
|
||||
source_similarity: confiance du matching qui a identifié le node
|
||||
source courant (valeur propagée depuis `match_current_state`).
|
||||
Utilisée pour évaluer la précondition ``min_source_similarity``
|
||||
de chaque edge. Défaut à ``1.0`` pour compat avec les appelants
|
||||
qui ne la fournissent pas encore.
|
||||
|
||||
Returns:
|
||||
Meilleur edge ou None si aucun ne passe les pre_conditions
|
||||
"""
|
||||
if not edges:
|
||||
return None
|
||||
|
||||
if strategy == "first":
|
||||
# Comportement legacy — retourne le premier edge quoi qu'il arrive
|
||||
return edges[0]
|
||||
|
||||
scores = self.rank(
|
||||
edges, screen_state=screen_state, source_similarity=source_similarity
|
||||
)
|
||||
|
||||
# Filtrer ceux qui ont passé les pre_conditions
|
||||
valid = [s for s in scores if s.passed_preconditions]
|
||||
if not valid:
|
||||
# Aucun edge valide → log pour debug, retourner None
|
||||
reasons = "; ".join(
|
||||
f"{s.edge.edge_id}: {s.precondition_reason}" for s in scores[:5]
|
||||
)
|
||||
logger.warning(
|
||||
f"[EdgeScorer] Aucun edge valide parmi {len(edges)} candidats. "
|
||||
f"Raisons: {reasons}"
|
||||
)
|
||||
return None
|
||||
|
||||
best = valid[0].edge # déjà trié par score décroissant
|
||||
logger.debug(
|
||||
f"[EdgeScorer] Sélection {best.edge_id} "
|
||||
f"(score={valid[0].total:.3f}, parmi {len(valid)} valides)"
|
||||
)
|
||||
return best
|
||||
|
||||
def rank(
|
||||
self,
|
||||
edges: Sequence[WorkflowEdge],
|
||||
screen_state: Optional[ScreenState] = None,
|
||||
source_similarity: float = 1.0,
|
||||
) -> List[EdgeScore]:
|
||||
"""
|
||||
Retourne la liste des edges triés par score décroissant,
|
||||
avec le détail pour chaque edge.
|
||||
|
||||
Tiebreak : `success_rate` le plus haut.
|
||||
|
||||
Args:
|
||||
edges: edges candidats
|
||||
screen_state: état courant (pour pre_conditions + target_match)
|
||||
source_similarity: confiance du match courant, propagée aux
|
||||
pre_conditions pour vérifier ``min_source_similarity``
|
||||
"""
|
||||
scored = [
|
||||
self.score_edge(edge, screen_state, source_similarity=source_similarity)
|
||||
for edge in edges
|
||||
]
|
||||
# Tri : score total décroissant, puis success_rate décroissant
|
||||
scored.sort(key=lambda s: (s.total, s.success_rate), reverse=True)
|
||||
return scored
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Scoring par edge
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def score_edge(
|
||||
self,
|
||||
edge: WorkflowEdge,
|
||||
screen_state: Optional[ScreenState] = None,
|
||||
source_similarity: float = 1.0,
|
||||
) -> EdgeScore:
|
||||
"""
|
||||
Calcule le score d'un edge.
|
||||
|
||||
Les pre_conditions sont évaluées ici mais servent uniquement de filtre
|
||||
dur (le score total reste calculé, mais `passed_preconditions` est à False).
|
||||
|
||||
Args:
|
||||
edge: edge à scorer
|
||||
screen_state: état courant (fenêtre, textes, ui_elements)
|
||||
source_similarity: confiance du matching courant, injectée dans
|
||||
``EdgeConstraints.check_preconditions`` pour évaluer
|
||||
``min_source_similarity``.
|
||||
"""
|
||||
# 1. Pre-conditions : filtre dur
|
||||
passed, reason = self._check_preconditions(
|
||||
edge, screen_state, source_similarity=source_similarity
|
||||
)
|
||||
|
||||
# 2. Success rate (dépend des stats existantes)
|
||||
success_rate = self._score_success_rate(edge)
|
||||
|
||||
# 3. Target match (UI element présent ?)
|
||||
target_match = self._score_target_match(edge, screen_state)
|
||||
|
||||
# 4. Récence
|
||||
recency = self._score_recency(edge)
|
||||
|
||||
total = (
|
||||
self.w_success * success_rate
|
||||
+ self.w_target * target_match
|
||||
+ self.w_recency * recency
|
||||
)
|
||||
|
||||
return EdgeScore(
|
||||
edge=edge,
|
||||
total=total,
|
||||
success_rate=success_rate,
|
||||
target_match=target_match,
|
||||
recency=recency,
|
||||
passed_preconditions=passed,
|
||||
precondition_reason=reason,
|
||||
)
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Composantes du score
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def _check_preconditions(
|
||||
self,
|
||||
edge: WorkflowEdge,
|
||||
screen_state: Optional[ScreenState],
|
||||
source_similarity: float = 1.0,
|
||||
) -> tuple[bool, str]:
|
||||
"""
|
||||
Vérifier les pre_conditions de l'edge.
|
||||
|
||||
Si pas de ScreenState, on ne peut rien vérifier → on laisse passer
|
||||
(mais on loggue).
|
||||
|
||||
Args:
|
||||
edge: edge à évaluer
|
||||
screen_state: état courant (None si non dispo)
|
||||
source_similarity: confiance du matching courant propagée par
|
||||
l'appelant (EdgeScorer.score_edge/rank/select_best). Elle
|
||||
alimente ``EdgeConstraints.check_preconditions`` pour rendre
|
||||
effective la contrainte ``min_source_similarity``.
|
||||
"""
|
||||
constraints = edge.constraints
|
||||
if constraints is None:
|
||||
return True, "OK (pas de contraintes)"
|
||||
|
||||
if screen_state is None:
|
||||
# Pas de ScreenState → on ne peut évaluer ni fenêtre, ni textes,
|
||||
# mais la similarité source reste vérifiable.
|
||||
try:
|
||||
ok, reason = constraints.check_preconditions(
|
||||
window_title="",
|
||||
app_name="",
|
||||
detected_texts=[],
|
||||
source_similarity=source_similarity,
|
||||
)
|
||||
if not ok:
|
||||
return ok, reason
|
||||
except Exception as e:
|
||||
logger.warning(f"[EdgeScorer] Erreur check_preconditions: {e}")
|
||||
return True, f"Erreur ignorée: {e}"
|
||||
return True, "OK (pas de ScreenState pour évaluer)"
|
||||
|
||||
window_title = screen_state.window.window_title if screen_state.window else ""
|
||||
app_name = screen_state.window.app_name if screen_state.window else ""
|
||||
detected_texts = (
|
||||
screen_state.perception.detected_text
|
||||
if screen_state.perception
|
||||
else []
|
||||
)
|
||||
|
||||
try:
|
||||
ok, reason = constraints.check_preconditions(
|
||||
window_title=window_title,
|
||||
app_name=app_name,
|
||||
detected_texts=detected_texts,
|
||||
source_similarity=source_similarity,
|
||||
)
|
||||
return ok, reason
|
||||
except Exception as e:
|
||||
logger.warning(f"[EdgeScorer] Erreur check_preconditions: {e}")
|
||||
# En cas d'erreur, on ne bloque pas l'edge
|
||||
return True, f"Erreur ignorée: {e}"
|
||||
|
||||
def _score_success_rate(self, edge: WorkflowEdge) -> float:
|
||||
"""Score basé sur `edge.stats.success_rate`."""
|
||||
if edge.stats is None or edge.stats.execution_count == 0:
|
||||
return self.default_success_rate
|
||||
return max(0.0, min(1.0, edge.stats.success_rate))
|
||||
|
||||
def _score_target_match(
|
||||
self,
|
||||
edge: WorkflowEdge,
|
||||
screen_state: Optional[ScreenState],
|
||||
) -> float:
|
||||
"""
|
||||
Score de correspondance entre le `target_spec` de l'action et
|
||||
les `ui_elements` de l'écran courant.
|
||||
|
||||
Retourne :
|
||||
- 1.0 si un élément matche strictement (texte ou rôle)
|
||||
- 0.5 si aucun screen_state fourni (neutre, pas pénalisant)
|
||||
- 0.0 si aucun élément compatible
|
||||
"""
|
||||
if screen_state is None:
|
||||
return 0.5
|
||||
|
||||
target = edge.action.target if edge.action else None
|
||||
if target is None:
|
||||
return 0.5
|
||||
|
||||
ui_elements = screen_state.ui_elements or []
|
||||
if not ui_elements:
|
||||
# Pas d'UI détectée → on ne peut pas trancher, neutre
|
||||
return 0.5
|
||||
|
||||
target_text = (target.by_text or "").lower().strip()
|
||||
target_role = (target.by_role or "").lower().strip()
|
||||
|
||||
best = 0.0
|
||||
for el in ui_elements:
|
||||
score = 0.0
|
||||
el_label = getattr(el, "label", "") or ""
|
||||
el_role = getattr(el, "role", "") or ""
|
||||
el_type = getattr(el, "type", "") or ""
|
||||
|
||||
if target_text:
|
||||
if target_text == el_label.lower().strip():
|
||||
score = max(score, 1.0)
|
||||
elif target_text in el_label.lower():
|
||||
score = max(score, 0.8)
|
||||
|
||||
if target_role:
|
||||
if target_role == el_role.lower() or target_role == el_type.lower():
|
||||
score = max(score, 0.9)
|
||||
|
||||
if not target_text and not target_role and target.by_position:
|
||||
# Si seule la position est fournie, on considère toujours match possible
|
||||
score = 0.6
|
||||
|
||||
if score > best:
|
||||
best = score
|
||||
|
||||
# Si on n'a rien trouvé mais qu'un target est demandé → 0.0 (fort négatif)
|
||||
if best == 0.0 and (target_text or target_role):
|
||||
return 0.0
|
||||
|
||||
return best if best > 0 else 0.5
|
||||
|
||||
def _score_recency(self, edge: WorkflowEdge) -> float:
|
||||
"""
|
||||
Score de récence basé sur `edge.stats.last_executed`.
|
||||
|
||||
Échelle :
|
||||
- exécuté dans les dernières 24h : 1.0
|
||||
- exécuté dans les 7 derniers jours : 0.7
|
||||
- exécuté il y a plus longtemps : 0.3
|
||||
- jamais exécuté : 0.5 (neutre)
|
||||
"""
|
||||
if edge.stats is None or edge.stats.last_executed is None:
|
||||
return 0.5
|
||||
|
||||
delta = datetime.now() - edge.stats.last_executed
|
||||
seconds = delta.total_seconds()
|
||||
if seconds < 24 * 3600:
|
||||
return 1.0
|
||||
if seconds < 7 * 24 * 3600:
|
||||
return 0.7
|
||||
return 0.3
|
||||
@@ -9,13 +9,33 @@ Orchestre les 4 niveaux du ScreenState :
|
||||
|
||||
Ce module comble le chaînon manquant entre la capture brute (Couche 0)
|
||||
et la construction d'embeddings (Couche 3).
|
||||
|
||||
=============================================================================
|
||||
Thread-safety & partage multi-loops (Lot C — avril 2026)
|
||||
=============================================================================
|
||||
Cet analyseur peut être partagé entre plusieurs `ExecutionLoop` (singleton
|
||||
`get_screen_analyzer()`). Pour éviter la contamination croisée :
|
||||
|
||||
• `analyze()` NE MUTE JAMAIS `self._ocr`, `self._ui_detector`,
|
||||
`self._ocr_initialized`, `self._ui_detector_initialized` pour gérer les
|
||||
flags runtime (enable_ocr / enable_ui_detection). Ces flags sont par
|
||||
appel, résolus en variables locales.
|
||||
• `session_id` circule en paramètre d'appel et renseigne la metadata du
|
||||
ScreenState ; l'attribut `self.session_id` n'est qu'un défaut historique
|
||||
(rétrocompat) et n'est plus la source de vérité.
|
||||
• L'init lazy des composants lourds (OCR, UIDetector) est protégée par un
|
||||
`_init_lock` par instance pour empêcher une double initialisation
|
||||
concurrente.
|
||||
"""
|
||||
|
||||
import contextlib
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, Any, List
|
||||
from typing import Optional, Dict, Any, List, Tuple
|
||||
|
||||
from PIL import Image
|
||||
|
||||
@@ -32,6 +52,44 @@ from core.models.ui_element import UIElement
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Lock d'inférence local au module : sert de fallback si le GPUResourceManager
|
||||
# n'est pas disponible (import error, tests). Partagé entre toutes les instances
|
||||
# ScreenAnalyzer du process, cohérent avec le singleton get_screen_analyzer().
|
||||
_ANALYZE_FALLBACK_LOCK = threading.Lock()
|
||||
|
||||
|
||||
def _acquire_gpu_context(timeout: Optional[float] = None):
|
||||
"""
|
||||
Retourne un context manager pour sérialiser les appels GPU.
|
||||
|
||||
Préfère `GPUResourceManager.acquire_inference()` si disponible (coordination
|
||||
globale), sinon bascule sur un lock threading local au module.
|
||||
"""
|
||||
try:
|
||||
from core.gpu import get_gpu_resource_manager
|
||||
|
||||
manager = get_gpu_resource_manager()
|
||||
return manager.acquire_inference(timeout=timeout)
|
||||
except Exception as e: # pragma: no cover - fallback defensif
|
||||
logger.debug(f"GPUResourceManager indisponible, fallback lock local: {e}")
|
||||
|
||||
@contextlib.contextmanager
|
||||
def _fallback():
|
||||
if timeout is None:
|
||||
_ANALYZE_FALLBACK_LOCK.acquire()
|
||||
yield True
|
||||
_ANALYZE_FALLBACK_LOCK.release()
|
||||
else:
|
||||
got = _ANALYZE_FALLBACK_LOCK.acquire(timeout=timeout)
|
||||
try:
|
||||
yield got
|
||||
finally:
|
||||
if got:
|
||||
_ANALYZE_FALLBACK_LOCK.release()
|
||||
|
||||
return _fallback()
|
||||
|
||||
|
||||
class ScreenAnalyzer:
|
||||
"""
|
||||
Construit un ScreenState complet (4 niveaux) depuis un screenshot.
|
||||
@@ -44,6 +102,14 @@ class ScreenAnalyzer:
|
||||
>>> state = analyzer.analyze("/path/to/screenshot.png")
|
||||
>>> print(state.perception.detected_text)
|
||||
>>> print(len(state.ui_elements))
|
||||
|
||||
Runtime overrides (kwargs-only) sur analyze() :
|
||||
>>> state = analyzer.analyze(
|
||||
... path,
|
||||
... enable_ocr=False, # bypass OCR pour cet appel
|
||||
... enable_ui_detection=False, # bypass UIDetector
|
||||
... session_id="session_42", # session par appel
|
||||
... )
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -56,18 +122,27 @@ class ScreenAnalyzer:
|
||||
Args:
|
||||
ui_detector: Instance de UIDetector (créé si None)
|
||||
ocr_engine: Moteur OCR à utiliser ("doctr", "tesseract", None=auto)
|
||||
session_id: ID de la session en cours
|
||||
session_id: ID de session par défaut (rétrocompat ; préférer passer
|
||||
`session_id` en kwarg de `analyze()` pour chaque appel).
|
||||
"""
|
||||
self._ui_detector = ui_detector
|
||||
self._ocr_engine_name = ocr_engine
|
||||
self._ocr = None
|
||||
# Session par défaut (rétrocompat). La source de vérité est désormais
|
||||
# le paramètre `session_id` de `analyze()`.
|
||||
self.session_id = session_id
|
||||
# Compteur d'états — protégé par _state_lock pour être safe en parallèle.
|
||||
self._state_counter = 0
|
||||
self._state_lock = threading.Lock()
|
||||
|
||||
# Initialisation lazy pour éviter les imports lourds au démarrage
|
||||
# Initialisation lazy pour éviter les imports lourds au démarrage.
|
||||
self._ui_detector_initialized = ui_detector is not None
|
||||
self._ocr_initialized = False
|
||||
|
||||
# Lock dédié à l'init lazy : empêche deux threads d'initialiser
|
||||
# simultanément OCR ou UIDetector (double chargement GPU).
|
||||
self._init_lock = threading.Lock()
|
||||
|
||||
# =========================================================================
|
||||
# API publique
|
||||
# =========================================================================
|
||||
@@ -77,28 +152,85 @@ class ScreenAnalyzer:
|
||||
screenshot_path: str,
|
||||
window_info: Optional[Dict[str, Any]] = None,
|
||||
context: Optional[Dict[str, Any]] = None,
|
||||
*,
|
||||
enable_ocr: bool = True,
|
||||
enable_ui_detection: bool = True,
|
||||
session_id: str = "",
|
||||
) -> ScreenState:
|
||||
"""
|
||||
Analyser un screenshot et construire un ScreenState complet.
|
||||
|
||||
Les flags `enable_ocr`, `enable_ui_detection` et `session_id` sont
|
||||
**par appel, kwargs-only**, pour ne pas polluer l'état partagé du
|
||||
singleton quand plusieurs `ExecutionLoop` se partagent l'analyseur.
|
||||
|
||||
Args:
|
||||
screenshot_path: Chemin vers le fichier image
|
||||
window_info: Infos fenêtre active {"title": ..., "app_name": ...}
|
||||
context: Contexte métier optionnel
|
||||
enable_ocr: Active l'OCR pour cet appel (True par défaut).
|
||||
False → `detected_text=[]`, aucune init d'OCR déclenchée.
|
||||
enable_ui_detection: Active la détection UI pour cet appel
|
||||
(True par défaut). False → `ui_elements=[]`.
|
||||
session_id: ID de session pour cet appel. Si vide, on retombe sur
|
||||
`self.session_id` (rétrocompat). Cette valeur est propagée
|
||||
dans `ScreenState.session_id` et `metadata["session_id"]`.
|
||||
|
||||
Returns:
|
||||
ScreenState avec les 4 niveaux remplis
|
||||
ScreenState avec les 4 niveaux remplis.
|
||||
"""
|
||||
screenshot_path = str(screenshot_path)
|
||||
self._state_counter += 1
|
||||
|
||||
state_id = f"{self.session_id}_state_{self._state_counter:04d}" if self.session_id else f"state_{self._state_counter:04d}"
|
||||
# Résolution de la session : priorité au kwarg, fallback sur l'état
|
||||
# interne (legacy). Variable locale uniquement — pas de mutation.
|
||||
effective_session_id = session_id or self.session_id
|
||||
|
||||
# Niveau 1 : Raw
|
||||
# Compteur incrémenté sous lock pour identifiants uniques même en
|
||||
# parallèle. C'est la seule mutation tolérée : elle n'impacte pas le
|
||||
# comportement OCR/UI.
|
||||
with self._state_lock:
|
||||
self._state_counter += 1
|
||||
state_counter = self._state_counter
|
||||
|
||||
state_id = (
|
||||
f"{effective_session_id}_state_{state_counter:04d}"
|
||||
if effective_session_id
|
||||
else f"state_{state_counter:04d}"
|
||||
)
|
||||
|
||||
# Niveau 1 : Raw (léger, hors lock GPU)
|
||||
raw = self._build_raw_level(screenshot_path)
|
||||
|
||||
# Niveau 2 : Perception (OCR)
|
||||
detected_text = self._extract_text(screenshot_path)
|
||||
# Résolution locale des instances OCR / UIDetector selon les flags.
|
||||
# Aucune mutation de self ici : on décide simplement ce qu'on utilise.
|
||||
ocr_instance = self._resolve_ocr_instance(enable_ocr=enable_ocr)
|
||||
ui_detector_instance = self._resolve_ui_detector_instance(
|
||||
enable_ui_detection=enable_ui_detection
|
||||
)
|
||||
|
||||
# Niveaux 2 et 3 : OCR + détection UI sont les étapes lourdes en GPU.
|
||||
# On sérialise via GPUResourceManager.acquire_inference() pour éviter
|
||||
# que ExecutionLoop et stream_processor saturent simultanément la VRAM
|
||||
# sur RTX 5070 (12 Go). Timeout généreux : un appel peut prendre 15-20s.
|
||||
with _acquire_gpu_context(timeout=60.0) as acquired:
|
||||
if not acquired:
|
||||
logger.warning(
|
||||
"Timeout en attendant le lock GPU pour ScreenAnalyzer.analyze() "
|
||||
"→ exécution sans sérialisation (risque saturation VRAM)"
|
||||
)
|
||||
|
||||
# Niveau 2 : Perception (OCR) — mesure du temps OCR
|
||||
ocr_t0 = time.time()
|
||||
detected_text = self._extract_text_with(ocr_instance, screenshot_path)
|
||||
ocr_ms = (time.time() - ocr_t0) * 1000
|
||||
|
||||
# Niveau 3 : UI Elements — mesure du temps détection
|
||||
ui_t0 = time.time()
|
||||
ui_elements = self._detect_ui_elements_with(
|
||||
ui_detector_instance, screenshot_path, window_info
|
||||
)
|
||||
ui_ms = (time.time() - ui_t0) * 1000
|
||||
|
||||
perception = PerceptionLevel(
|
||||
embedding=EmbeddingRef(
|
||||
provider="openclip_ViT-B-32",
|
||||
@@ -106,13 +238,10 @@ class ScreenAnalyzer:
|
||||
dimensions=512,
|
||||
),
|
||||
detected_text=detected_text,
|
||||
text_detection_method=self._get_ocr_method_name(),
|
||||
text_detection_method=self._get_ocr_method_name(ocr_instance),
|
||||
confidence_avg=0.85 if detected_text else 0.0,
|
||||
)
|
||||
|
||||
# Niveau 3 : UI Elements
|
||||
ui_elements = self._detect_ui_elements(screenshot_path, window_info)
|
||||
|
||||
# Niveau 4 : Contexte
|
||||
window_ctx = self._build_window_context(window_info)
|
||||
context_level = self._build_context_level(context)
|
||||
@@ -120,22 +249,28 @@ class ScreenAnalyzer:
|
||||
state = ScreenState(
|
||||
screen_state_id=state_id,
|
||||
timestamp=datetime.now(),
|
||||
session_id=self.session_id,
|
||||
session_id=effective_session_id,
|
||||
window=window_ctx,
|
||||
raw=raw,
|
||||
perception=perception,
|
||||
context=context_level,
|
||||
metadata={
|
||||
"analyzer_version": "1.0",
|
||||
"analyzer_version": "1.1",
|
||||
"session_id": effective_session_id,
|
||||
"ui_elements_count": len(ui_elements),
|
||||
"text_regions_count": len(detected_text),
|
||||
"ocr_ms": ocr_ms,
|
||||
"ui_ms": ui_ms,
|
||||
"ocr_enabled": enable_ocr,
|
||||
"ui_detection_enabled": enable_ui_detection,
|
||||
},
|
||||
ui_elements=ui_elements,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"ScreenState {state_id} construit: "
|
||||
f"{len(ui_elements)} éléments UI, {len(detected_text)} textes détectés"
|
||||
f"{len(ui_elements)} éléments UI, {len(detected_text)} textes détectés "
|
||||
f"(ocr={enable_ocr}, ui={enable_ui_detection})"
|
||||
)
|
||||
return state
|
||||
|
||||
@@ -145,11 +280,16 @@ class ScreenAnalyzer:
|
||||
save_dir: str = "data/screens",
|
||||
window_info: Optional[Dict[str, Any]] = None,
|
||||
context: Optional[Dict[str, Any]] = None,
|
||||
*,
|
||||
enable_ocr: bool = True,
|
||||
enable_ui_detection: bool = True,
|
||||
session_id: str = "",
|
||||
) -> ScreenState:
|
||||
"""
|
||||
Analyser une PIL Image (utile quand on a déjà l'image en mémoire).
|
||||
|
||||
Sauvegarde l'image sur disque puis appelle analyze().
|
||||
Sauvegarde l'image sur disque puis appelle analyze(). Les flags
|
||||
runtime sont propagés à `analyze()` en kwargs-only.
|
||||
"""
|
||||
save_path = Path(save_dir)
|
||||
save_path.mkdir(parents=True, exist_ok=True)
|
||||
@@ -159,7 +299,49 @@ class ScreenAnalyzer:
|
||||
filepath = save_path / filename
|
||||
|
||||
image.save(str(filepath))
|
||||
return self.analyze(str(filepath), window_info=window_info, context=context)
|
||||
return self.analyze(
|
||||
str(filepath),
|
||||
window_info=window_info,
|
||||
context=context,
|
||||
enable_ocr=enable_ocr,
|
||||
enable_ui_detection=enable_ui_detection,
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
# =========================================================================
|
||||
# Résolution des instances OCR / UI selon les flags d'appel
|
||||
# =========================================================================
|
||||
|
||||
def _resolve_ocr_instance(self, *, enable_ocr: bool):
|
||||
"""
|
||||
Retourner l'instance OCR à utiliser pour cet appel.
|
||||
|
||||
- `enable_ocr=False` → None (pas d'init, pas d'appel OCR)
|
||||
- sinon → init lazy sous lock si nécessaire, puis retour de `self._ocr`
|
||||
|
||||
Ne mute `self._ocr` / `self._ocr_initialized` QUE pendant l'init lazy
|
||||
réelle, jamais pour bypasser l'OCR d'un appel.
|
||||
"""
|
||||
if not enable_ocr:
|
||||
return None
|
||||
if not self._ocr_initialized:
|
||||
with self._init_lock:
|
||||
# Double-check : un autre thread a pu initialiser entretemps.
|
||||
if not self._ocr_initialized:
|
||||
self._ensure_ocr_locked()
|
||||
return self._ocr
|
||||
|
||||
def _resolve_ui_detector_instance(self, *, enable_ui_detection: bool):
|
||||
"""
|
||||
Retourner l'instance UIDetector pour cet appel (idem _resolve_ocr_instance).
|
||||
"""
|
||||
if not enable_ui_detection:
|
||||
return None
|
||||
if not self._ui_detector_initialized:
|
||||
with self._init_lock:
|
||||
if not self._ui_detector_initialized:
|
||||
self._ensure_ui_detector_locked()
|
||||
return self._ui_detector
|
||||
|
||||
# =========================================================================
|
||||
# Niveau 1 : Raw
|
||||
@@ -182,23 +364,24 @@ class ScreenAnalyzer:
|
||||
# Niveau 2 : Perception — OCR
|
||||
# =========================================================================
|
||||
|
||||
def _extract_text(self, screenshot_path: str) -> List[str]:
|
||||
"""Extraire le texte d'un screenshot via OCR."""
|
||||
self._ensure_ocr()
|
||||
|
||||
if self._ocr is None:
|
||||
def _extract_text_with(self, ocr_callable, screenshot_path: str) -> List[str]:
|
||||
"""Extraire le texte via un callable OCR donné (peut être None)."""
|
||||
if ocr_callable is None:
|
||||
return []
|
||||
|
||||
try:
|
||||
return self._ocr(screenshot_path)
|
||||
return ocr_callable(screenshot_path)
|
||||
except Exception as e:
|
||||
logger.warning(f"OCR échoué: {e}")
|
||||
return []
|
||||
|
||||
def _ensure_ocr(self) -> None:
|
||||
"""Initialiser le moteur OCR (lazy)."""
|
||||
if self._ocr_initialized:
|
||||
return
|
||||
def _ensure_ocr_locked(self) -> None:
|
||||
"""
|
||||
Initialiser le moteur OCR (appelé sous `self._init_lock`).
|
||||
|
||||
Ne doit PAS être appelé hors de `_resolve_ocr_instance()`.
|
||||
"""
|
||||
# Mutation intentionnelle : on installe l'instance OCR réelle.
|
||||
# Protégée par le lock d'init (pas le lock GPU).
|
||||
self._ocr_initialized = True
|
||||
|
||||
engine = self._ocr_engine_name
|
||||
@@ -257,8 +440,9 @@ class ScreenAnalyzer:
|
||||
|
||||
return ocr_func
|
||||
|
||||
def _get_ocr_method_name(self) -> str:
|
||||
if self._ocr is None:
|
||||
def _get_ocr_method_name(self, ocr_instance=None) -> str:
|
||||
"""Nom du moteur OCR effectivement utilisé pour cet appel."""
|
||||
if ocr_instance is None:
|
||||
return "none"
|
||||
if self._ocr_engine_name:
|
||||
return self._ocr_engine_name
|
||||
@@ -268,19 +452,18 @@ class ScreenAnalyzer:
|
||||
# Niveau 3 : UI Elements
|
||||
# =========================================================================
|
||||
|
||||
def _detect_ui_elements(
|
||||
def _detect_ui_elements_with(
|
||||
self,
|
||||
ui_detector,
|
||||
screenshot_path: str,
|
||||
window_info: Optional[Dict[str, Any]] = None,
|
||||
) -> List[UIElement]:
|
||||
"""Détecter les éléments UI dans le screenshot."""
|
||||
self._ensure_ui_detector()
|
||||
|
||||
if self._ui_detector is None:
|
||||
"""Détecter les éléments UI via un détecteur donné (peut être None)."""
|
||||
if ui_detector is None:
|
||||
return []
|
||||
|
||||
try:
|
||||
elements = self._ui_detector.detect(
|
||||
elements = ui_detector.detect(
|
||||
screenshot_path, window_context=window_info
|
||||
)
|
||||
return elements
|
||||
@@ -288,10 +471,10 @@ class ScreenAnalyzer:
|
||||
logger.warning(f"Détection UI échouée: {e}")
|
||||
return []
|
||||
|
||||
def _ensure_ui_detector(self) -> None:
|
||||
"""Initialiser le UIDetector (lazy)."""
|
||||
if self._ui_detector_initialized:
|
||||
return
|
||||
def _ensure_ui_detector_locked(self) -> None:
|
||||
"""
|
||||
Initialiser le UIDetector (appelé sous `self._init_lock`).
|
||||
"""
|
||||
self._ui_detector_initialized = True
|
||||
|
||||
try:
|
||||
|
||||
409
core/pipeline/screen_state_cache.py
Normal file
409
core/pipeline/screen_state_cache.py
Normal file
@@ -0,0 +1,409 @@
|
||||
"""
|
||||
ScreenStateCache — Cache perceptuel de ScreenState (context-aware).
|
||||
|
||||
Objectif : éviter de réanalyser un screenshot identique (5-15s VLM/OCR)
|
||||
à chaque step de la boucle d'exécution.
|
||||
|
||||
Principe (Lot D — avril 2026) :
|
||||
- Clé = composite de 6 éléments pour éviter les collisions silencieuses
|
||||
entre contextes différents partageant un même screenshot :
|
||||
1. phash (dhash 8x8 du screenshot) — calculé en ~2-5ms
|
||||
2. window_title (titre fenêtre active)
|
||||
3. app_name (nom process actif)
|
||||
4. enable_ocr (flag runtime)
|
||||
5. enable_ui_detection (flag runtime)
|
||||
6. workflow_id (isolation inter-workflows)
|
||||
- TTL par défaut : 2 secondes (configurable)
|
||||
- Invalidation explicite possible (par clé composite ou globale)
|
||||
- invalidate_if_changed reste piloté par le phash seul (détection de
|
||||
changement visuel majeur, indépendant du contexte)
|
||||
- Thread-safe (lock interne)
|
||||
|
||||
API principale :
|
||||
>>> cache = ScreenStateCache(ttl_seconds=2.0)
|
||||
>>> state, hit, ms = cache.get_or_compute(
|
||||
... screenshot_path, compute_fn,
|
||||
... window_title="App", app_name="app.exe",
|
||||
... enable_ocr=True, enable_ui_detection=True,
|
||||
... workflow_id="wf_123",
|
||||
... )
|
||||
|
||||
La fonction `compute_fn` prend le chemin du screenshot et doit retourner
|
||||
un `ScreenState`. Elle n'est appelée qu'en cache miss.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Callable, Optional, Tuple
|
||||
|
||||
from PIL import Image
|
||||
|
||||
from core.models.screen_state import ScreenState
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Hash perceptuel (dhash simple, sans dépendance imagehash)
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def _hamming_distance_hex(a: str, b: str) -> int:
|
||||
"""
|
||||
Distance de Hamming entre deux chaînes hexadécimales de même longueur.
|
||||
|
||||
Retourne le nombre de bits qui diffèrent entre les deux hashes.
|
||||
Si les longueurs diffèrent, on pad à droite par des zéros.
|
||||
"""
|
||||
if len(a) != len(b):
|
||||
max_len = max(len(a), len(b))
|
||||
a = a.ljust(max_len, "0")
|
||||
b = b.ljust(max_len, "0")
|
||||
try:
|
||||
xor = int(a, 16) ^ int(b, 16)
|
||||
return bin(xor).count("1")
|
||||
except ValueError:
|
||||
# Fallback : comparaison caractère à caractère
|
||||
return sum(1 for ca, cb in zip(a, b) if ca != cb) * 4
|
||||
|
||||
|
||||
def compute_perceptual_hash(screenshot_path: str, size: int = 8) -> str:
|
||||
"""
|
||||
Calculer un dhash (difference hash) pour un screenshot.
|
||||
|
||||
Algorithme :
|
||||
1. Convertir en niveaux de gris
|
||||
2. Redimensionner à (size+1) x size
|
||||
3. Comparer chaque pixel avec son voisin de droite (dhash)
|
||||
4. Retourner un hash hexadécimal de size*size bits
|
||||
|
||||
Robuste aux petites variations (curseur, blink, compression).
|
||||
Coût typique : 2-5 ms sur un 1920x1080.
|
||||
|
||||
Args:
|
||||
screenshot_path: Chemin vers le fichier image
|
||||
size: Taille du hash (8 = 64 bits, défaut)
|
||||
|
||||
Returns:
|
||||
Chaîne hexadécimale (size*size/4 caractères)
|
||||
"""
|
||||
try:
|
||||
img = Image.open(screenshot_path)
|
||||
img = img.convert("L").resize((size + 1, size), Image.LANCZOS)
|
||||
pixels = list(img.getdata())
|
||||
|
||||
# dhash : comparer chaque pixel avec celui de droite
|
||||
bits = []
|
||||
for row in range(size):
|
||||
for col in range(size):
|
||||
left = pixels[row * (size + 1) + col]
|
||||
right = pixels[row * (size + 1) + col + 1]
|
||||
bits.append(1 if left > right else 0)
|
||||
|
||||
# Convertir en hex
|
||||
value = 0
|
||||
for bit in bits:
|
||||
value = (value << 1) | bit
|
||||
return format(value, f"0{size * size // 4}x")
|
||||
except Exception as e:
|
||||
logger.warning(f"Hash perceptuel échoué pour {screenshot_path}: {e}")
|
||||
# Fallback : hash du contenu brut
|
||||
try:
|
||||
data = Path(screenshot_path).read_bytes()
|
||||
return hashlib.md5(data).hexdigest()[:16]
|
||||
except Exception:
|
||||
return f"unhashable_{int(time.time() * 1000)}"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Clé composite (Lot D)
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def _make_cache_key(
|
||||
phash: str,
|
||||
window_title: str,
|
||||
app_name: str,
|
||||
enable_ocr: bool,
|
||||
enable_ui_detection: bool,
|
||||
workflow_id: str,
|
||||
) -> str:
|
||||
"""
|
||||
Construire une clé composite stable pour le cache.
|
||||
|
||||
Combine les 6 dimensions du contexte d'exécution dans une chaîne
|
||||
hexadécimale (md5 tronqué à 16 caractères), préfixée par le phash pour
|
||||
conserver une lisibilité minimale en debug (log : `aabb…|ctx=1234…`).
|
||||
|
||||
NB : On hash plutôt que concaténer brut pour :
|
||||
- Borner la taille de la clé même si window_title est long
|
||||
- Éviter les collisions triviales (séparateur présent dans un titre)
|
||||
- Rendre la clé opaque (pas de PII en clair dans les logs de cache)
|
||||
|
||||
Args:
|
||||
phash: Hash perceptuel du screenshot (dhash 8x8)
|
||||
window_title: Titre de la fenêtre active (str)
|
||||
app_name: Nom du process actif (str)
|
||||
enable_ocr: Flag runtime OCR (bool)
|
||||
enable_ui_detection: Flag runtime détection UI (bool)
|
||||
workflow_id: ID du workflow en cours (str, "" pour legacy)
|
||||
|
||||
Returns:
|
||||
Clé composite `{phash}|{ctx_hash}` où ctx_hash = md5(16)
|
||||
"""
|
||||
# Sérialisation déterministe ; `|` comme séparateur interne puisque hashé.
|
||||
ctx_repr = (
|
||||
f"{window_title or ''}\x1f"
|
||||
f"{app_name or ''}\x1f"
|
||||
f"{int(bool(enable_ocr))}\x1f"
|
||||
f"{int(bool(enable_ui_detection))}\x1f"
|
||||
f"{workflow_id or ''}"
|
||||
)
|
||||
ctx_hash = hashlib.md5(ctx_repr.encode("utf-8")).hexdigest()[:16]
|
||||
return f"{phash}|{ctx_hash}"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Entry
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@dataclass
|
||||
class _CacheEntry:
|
||||
state: ScreenState
|
||||
created_at: float
|
||||
phash: str # phash seul (utilisé par invalidate_if_changed)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Cache
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class ScreenStateCache:
|
||||
"""
|
||||
Cache de ScreenState avec TTL et clé composite context-aware.
|
||||
|
||||
Thread-safe. Utilise un lock interne pour les opérations get/set.
|
||||
"""
|
||||
|
||||
def __init__(self, ttl_seconds: float = 2.0, max_entries: int = 16):
|
||||
"""
|
||||
Args:
|
||||
ttl_seconds: Durée de vie d'une entrée (en secondes)
|
||||
max_entries: Nombre max d'entrées avant éviction LRU simple
|
||||
"""
|
||||
self.ttl_seconds = ttl_seconds
|
||||
self.max_entries = max_entries
|
||||
# Clé = composite (_make_cache_key), valeur = _CacheEntry
|
||||
self._store: dict[str, _CacheEntry] = {}
|
||||
self._lock = threading.Lock()
|
||||
|
||||
# Métriques simples (utile pour le debug / logs)
|
||||
self.hits = 0
|
||||
self.misses = 0
|
||||
self.invalidations = 0
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# API bas niveau (par clé composite)
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def _get(self, composite_key: str) -> Optional[ScreenState]:
|
||||
"""Retourne l'entrée pour cette clé composite si encore valide."""
|
||||
with self._lock:
|
||||
entry = self._store.get(composite_key)
|
||||
if entry is None:
|
||||
return None
|
||||
if time.time() - entry.created_at > self.ttl_seconds:
|
||||
# Expiré
|
||||
self._store.pop(composite_key, None)
|
||||
return None
|
||||
return entry.state
|
||||
|
||||
def _set(self, composite_key: str, phash: str, state: ScreenState) -> None:
|
||||
"""Enregistre un état pour cette clé composite."""
|
||||
with self._lock:
|
||||
# Éviction simple : si plein, virer l'entrée la plus ancienne
|
||||
if (
|
||||
len(self._store) >= self.max_entries
|
||||
and composite_key not in self._store
|
||||
):
|
||||
oldest_key = min(
|
||||
self._store, key=lambda k: self._store[k].created_at
|
||||
)
|
||||
self._store.pop(oldest_key, None)
|
||||
|
||||
self._store[composite_key] = _CacheEntry(
|
||||
state=state,
|
||||
created_at=time.time(),
|
||||
phash=phash,
|
||||
)
|
||||
|
||||
def invalidate(self, composite_key: Optional[str] = None) -> None:
|
||||
"""
|
||||
Invalider une entrée ou tout le cache.
|
||||
|
||||
Args:
|
||||
composite_key: Clé à invalider. Si None, vide tout le cache.
|
||||
"""
|
||||
with self._lock:
|
||||
if composite_key is None:
|
||||
self._store.clear()
|
||||
else:
|
||||
self._store.pop(composite_key, None)
|
||||
self.invalidations += 1
|
||||
|
||||
def invalidate_if_changed(
|
||||
self,
|
||||
screenshot_path: str,
|
||||
threshold: float = 0.3,
|
||||
) -> bool:
|
||||
"""
|
||||
Invalider le cache si l'écran a suffisamment changé.
|
||||
|
||||
Compare le dhash du screenshot courant avec le phash (seul) de chaque
|
||||
entrée du cache. La décision est volontairement indépendante du reste
|
||||
de la clé composite : un changement visuel majeur rend toutes les
|
||||
entrées obsolètes, quel que soit le contexte.
|
||||
|
||||
Args:
|
||||
screenshot_path: Chemin du screenshot courant
|
||||
threshold: Proportion de bits qui doivent différer (0.0-1.0).
|
||||
0.3 = 30% (~19 bits sur 64) = changement significatif.
|
||||
|
||||
Returns:
|
||||
True si le cache a été invalidé, False sinon.
|
||||
"""
|
||||
if not self._store:
|
||||
return False
|
||||
|
||||
current_phash = compute_perceptual_hash(screenshot_path)
|
||||
|
||||
# Bits totaux : 64 pour un dhash 8x8 standard. On déduit via la
|
||||
# longueur hexa du hash courant pour rester générique.
|
||||
total_bits = len(current_phash) * 4
|
||||
if total_bits == 0:
|
||||
return False
|
||||
|
||||
threshold_bits = threshold * total_bits
|
||||
|
||||
with self._lock:
|
||||
if not self._store:
|
||||
return False
|
||||
|
||||
# Distance de Hamming minimale avec les phashes des entrées
|
||||
# (on regarde entry.phash, pas la clé composite).
|
||||
min_distance = None
|
||||
for entry in self._store.values():
|
||||
distance = _hamming_distance_hex(current_phash, entry.phash)
|
||||
if min_distance is None or distance < min_distance:
|
||||
min_distance = distance
|
||||
|
||||
if min_distance is not None and min_distance > threshold_bits:
|
||||
size_before = len(self._store)
|
||||
self._store.clear()
|
||||
self.invalidations += 1
|
||||
logger.debug(
|
||||
f"[ScreenStateCache] invalidate_if_changed: "
|
||||
f"distance={min_distance}/{total_bits} > "
|
||||
f"threshold={threshold_bits:.1f} → {size_before} entrées purgées"
|
||||
)
|
||||
return True
|
||||
return False
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# API haut niveau (context-aware)
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def get_or_compute(
|
||||
self,
|
||||
screenshot_path: str,
|
||||
compute_fn: Callable[[str], ScreenState],
|
||||
*,
|
||||
window_title: str = "",
|
||||
app_name: str = "",
|
||||
enable_ocr: bool = True,
|
||||
enable_ui_detection: bool = True,
|
||||
workflow_id: str = "",
|
||||
force_refresh: bool = False,
|
||||
) -> Tuple[ScreenState, bool, float]:
|
||||
"""
|
||||
Récupérer ou calculer le ScreenState pour un screenshot + contexte.
|
||||
|
||||
Clé de cache = composite(phash, window_title, app_name, enable_ocr,
|
||||
enable_ui_detection, workflow_id). Deux contextes différents partageant
|
||||
le même screenshot n'entrent PAS en collision.
|
||||
|
||||
Rétrocompatibilité : tous les kwargs de contexte ont une valeur par
|
||||
défaut. Un caller legacy qui n'a pas encore été adapté partagera la
|
||||
même entrée de cache qu'un autre caller legacy (comportement antérieur).
|
||||
|
||||
Args:
|
||||
screenshot_path: Chemin du screenshot
|
||||
compute_fn: Fonction qui construit un ScreenState si cache miss
|
||||
window_title: Titre de la fenêtre active (contexte visuel)
|
||||
app_name: Nom du process actif (contexte applicatif)
|
||||
enable_ocr: Flag runtime — différencie états avec/sans OCR
|
||||
enable_ui_detection: Flag runtime — différencie états avec/sans UI
|
||||
workflow_id: ID du workflow — isolation inter-workflows
|
||||
force_refresh: Ignorer le cache et recalculer
|
||||
|
||||
Returns:
|
||||
Tuple (state, cache_hit, elapsed_ms)
|
||||
"""
|
||||
t0 = time.time()
|
||||
phash = compute_perceptual_hash(screenshot_path)
|
||||
composite_key = _make_cache_key(
|
||||
phash=phash,
|
||||
window_title=window_title,
|
||||
app_name=app_name,
|
||||
enable_ocr=enable_ocr,
|
||||
enable_ui_detection=enable_ui_detection,
|
||||
workflow_id=workflow_id,
|
||||
)
|
||||
|
||||
if not force_refresh:
|
||||
cached = self._get(composite_key)
|
||||
if cached is not None:
|
||||
self.hits += 1
|
||||
elapsed_ms = (time.time() - t0) * 1000
|
||||
logger.debug(
|
||||
f"[ScreenStateCache] HIT key={composite_key[:24]}… "
|
||||
f"({elapsed_ms:.1f}ms)"
|
||||
)
|
||||
return cached, True, elapsed_ms
|
||||
|
||||
# Cache miss → calcul complet
|
||||
self.misses += 1
|
||||
state = compute_fn(screenshot_path)
|
||||
self._set(composite_key, phash, state)
|
||||
elapsed_ms = (time.time() - t0) * 1000
|
||||
logger.debug(
|
||||
f"[ScreenStateCache] MISS key={composite_key[:24]}… "
|
||||
f"({elapsed_ms:.1f}ms)"
|
||||
)
|
||||
return state, False, elapsed_ms
|
||||
|
||||
def stats(self) -> dict:
|
||||
"""Retourne les métriques du cache."""
|
||||
with self._lock:
|
||||
total = self.hits + self.misses
|
||||
return {
|
||||
"hits": self.hits,
|
||||
"misses": self.misses,
|
||||
"invalidations": self.invalidations,
|
||||
"hit_rate": self.hits / total if total > 0 else 0.0,
|
||||
"size": len(self._store),
|
||||
"max_entries": self.max_entries,
|
||||
"ttl_seconds": self.ttl_seconds,
|
||||
}
|
||||
|
||||
def __len__(self) -> int:
|
||||
with self._lock:
|
||||
return len(self._store)
|
||||
@@ -354,66 +354,306 @@ class WorkflowPipeline:
|
||||
# =========================================================================
|
||||
# Mode MATCHING : Reconnaissance de l'état actuel
|
||||
# =========================================================================
|
||||
|
||||
|
||||
def match_current_state_from_state(
|
||||
self,
|
||||
screen_state: ScreenState,
|
||||
workflow_id: Optional[str] = None,
|
||||
*,
|
||||
min_similarity: float = 0.5,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Matcher un ``ScreenState`` enrichi contre les nodes d'un workflow.
|
||||
|
||||
Lot E — premier vrai matching context-aware. Cette méthode consomme
|
||||
directement le ``ScreenState`` déjà construit par ``ExecutionLoop``
|
||||
(avec ``window_title``, ``detected_text`` et ``ui_elements``
|
||||
renseignés par le ``ScreenAnalyzer``) au lieu de reconstruire un
|
||||
stub vide avec ``window_title="Unknown"``.
|
||||
|
||||
Stratégie :
|
||||
1. Si le ``HierarchicalMatcher`` est disponible ET que le workflow
|
||||
cible est chargeable, on privilégie le matching multi-niveau
|
||||
(fenêtre → région → élément) qui exploite pleinement les
|
||||
``ui_elements`` et le ``window_title``.
|
||||
2. Sinon on retombe sur le matching par embedding via FAISS
|
||||
(même logique que l'ancien ``match_current_state``, mais avec
|
||||
le ``ScreenState`` fourni, pas un stub).
|
||||
|
||||
Args:
|
||||
screen_state: ``ScreenState`` complet (ui_elements + detected_text
|
||||
+ window_info) construit en amont par l'``ExecutionLoop``.
|
||||
workflow_id: ID du workflow cible (tous si None).
|
||||
min_similarity: seuil minimum de confidence pour considérer un
|
||||
match valide. Conserve la sémantique historique (0.5 pour
|
||||
le hiérarchique, 0.85 pour le FAISS fallback).
|
||||
|
||||
Returns:
|
||||
Dict avec ``node_id``, ``workflow_id``, ``confidence`` (+ détails
|
||||
du matching hiérarchique si applicable), ou ``None`` si aucun
|
||||
match ne dépasse le seuil.
|
||||
"""
|
||||
logger.debug(
|
||||
"Matching ScreenState (app=%s, title=%s, ui_elements=%d, "
|
||||
"detected_text=%d)",
|
||||
screen_state.window.app_name,
|
||||
screen_state.window.window_title,
|
||||
len(screen_state.ui_elements),
|
||||
len(screen_state.perception.detected_text),
|
||||
)
|
||||
|
||||
# --- Stratégie 1 : matching hiérarchique si workflow disponible ---
|
||||
if workflow_id:
|
||||
workflow = self.load_workflow(workflow_id)
|
||||
if workflow is not None and getattr(workflow, "nodes", None):
|
||||
try:
|
||||
hier_result = self._match_hierarchical_from_state(
|
||||
screen_state=screen_state,
|
||||
workflow=workflow,
|
||||
workflow_id=workflow_id,
|
||||
min_similarity=min_similarity,
|
||||
)
|
||||
if hier_result is not None:
|
||||
return hier_result
|
||||
except Exception as exc:
|
||||
# Ne jamais casser le matching sur une erreur du
|
||||
# matcher hiérarchique : on retombe sur FAISS.
|
||||
logger.debug(
|
||||
f"Hierarchical matching failed, fallback FAISS: {exc}"
|
||||
)
|
||||
|
||||
# --- Stratégie 2 : fallback embedding + FAISS ---
|
||||
return self._match_via_faiss(
|
||||
screen_state=screen_state,
|
||||
workflow_id=workflow_id,
|
||||
min_similarity=min_similarity,
|
||||
)
|
||||
|
||||
def _match_hierarchical_from_state(
|
||||
self,
|
||||
screen_state: ScreenState,
|
||||
workflow: Workflow,
|
||||
workflow_id: str,
|
||||
min_similarity: float,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Déléguer le matching au ``HierarchicalMatcher`` en extrayant
|
||||
``window_info``, ``detected_elements`` et le screenshot à partir du
|
||||
``ScreenState`` fourni. Factorise la logique de ``match_hierarchical``
|
||||
sans re-ouvrir l'image si ce n'est pas nécessaire.
|
||||
"""
|
||||
# Reconstruire window_info à partir du ScreenState (pas "Unknown")
|
||||
window_info = {
|
||||
"title": screen_state.window.window_title,
|
||||
"app_name": screen_state.window.app_name,
|
||||
"window_title": screen_state.window.window_title,
|
||||
}
|
||||
detected_elements = list(screen_state.ui_elements)
|
||||
|
||||
# Ouvrir le screenshot si nécessaire (le matcher peut en avoir besoin
|
||||
# pour du matching au niveau région). Si le chemin n'existe pas, on
|
||||
# passe None et laisse le matcher travailler avec window + elements.
|
||||
screenshot = None
|
||||
path = screen_state.raw.screenshot_path
|
||||
if path:
|
||||
try:
|
||||
from PIL import Image
|
||||
screenshot = Image.open(path)
|
||||
except Exception as exc:
|
||||
logger.debug(f"Screenshot unavailable for hierarchical match: {exc}")
|
||||
|
||||
# Contexte temporel par workflow
|
||||
if workflow_id not in self._temporal_context:
|
||||
self._temporal_context[workflow_id] = TemporalContext()
|
||||
temporal_context = self._temporal_context[workflow_id]
|
||||
|
||||
result: MatchResult = self.hierarchical_matcher.match(
|
||||
screenshot=screenshot,
|
||||
workflow=workflow,
|
||||
window_info=window_info,
|
||||
detected_elements=detected_elements,
|
||||
temporal_context=temporal_context,
|
||||
)
|
||||
|
||||
if result.confidence < min_similarity:
|
||||
logger.debug(
|
||||
f"Hierarchical match below threshold: {result.confidence:.3f} "
|
||||
f"(min={min_similarity})"
|
||||
)
|
||||
return None
|
||||
|
||||
# Mémoriser le match pour le boost temporel suivant
|
||||
temporal_context.add_match(result.node_id, result.confidence)
|
||||
|
||||
return {
|
||||
"node_id": result.node_id,
|
||||
"workflow_id": workflow_id,
|
||||
"confidence": result.confidence,
|
||||
"window_confidence": result.window_confidence,
|
||||
"region_confidence": result.region_confidence,
|
||||
"element_confidence": result.element_confidence,
|
||||
"temporal_boost": result.temporal_boost,
|
||||
"matched_variant": result.matched_variant,
|
||||
"alternatives": [
|
||||
{"node_id": alt.node_id, "confidence": alt.confidence}
|
||||
for alt in result.alternatives
|
||||
],
|
||||
"match_time_ms": result.match_time_ms,
|
||||
"match_type": "hierarchical",
|
||||
}
|
||||
|
||||
def _match_via_faiss(
|
||||
self,
|
||||
screen_state: ScreenState,
|
||||
workflow_id: Optional[str],
|
||||
min_similarity: float,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Fallback embedding + recherche FAISS. On réutilise le ``ScreenState``
|
||||
fourni (donc ses ``ui_elements`` et son ``window_title`` réels)
|
||||
au lieu d'en recréer un stub.
|
||||
"""
|
||||
# Le seuil FAISS historique était 0.85. On l'honore comme plancher
|
||||
# par défaut mais on respecte un ``min_similarity`` plus permissif
|
||||
# si l'appelant en fournit un (hiérarchique pouvant déjà avoir échoué).
|
||||
threshold = max(min_similarity, 0.85)
|
||||
|
||||
state_embedding = self.embedding_builder.build(screen_state)
|
||||
query_vector = state_embedding.get_vector()
|
||||
|
||||
results = self.faiss_manager.search(query_vector, k=5)
|
||||
if not results:
|
||||
logger.debug("No match found in FAISS")
|
||||
return None
|
||||
|
||||
for result in results:
|
||||
metadata = result.get("metadata", {})
|
||||
result_workflow_id = metadata.get("workflow_id")
|
||||
|
||||
if workflow_id and result_workflow_id != workflow_id:
|
||||
continue
|
||||
|
||||
similarity = result.get("similarity", 0)
|
||||
if similarity >= threshold:
|
||||
return {
|
||||
"node_id": metadata.get("node_id"),
|
||||
"workflow_id": result_workflow_id,
|
||||
"confidence": similarity,
|
||||
"state_embedding_id": state_embedding.embedding_id,
|
||||
"match_type": "faiss",
|
||||
}
|
||||
|
||||
logger.debug(
|
||||
f"Best FAISS match below threshold: "
|
||||
f"{results[0].get('similarity', 0):.3f} (min={threshold})"
|
||||
)
|
||||
return None
|
||||
|
||||
def match_current_state(
|
||||
self,
|
||||
screenshot_path: str,
|
||||
workflow_id: Optional[str] = None,
|
||||
window_title: Optional[str] = None
|
||||
window_title: Optional[str] = None,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Identifier dans quel node se trouve l'écran actuel.
|
||||
|
||||
Identifier dans quel node se trouve l'écran actuel (API legacy).
|
||||
|
||||
Lot E — cette méthode est désormais un **wrapper** de rétrocompat :
|
||||
elle construit un ``ScreenState`` enrichi via ``ScreenAnalyzer``
|
||||
(au lieu d'un stub avec ``window_title="Unknown"``) puis délègue
|
||||
à ``match_current_state_from_state``. Garantit la compat pour les
|
||||
callers externes qui ne manipulent que le chemin du screenshot.
|
||||
|
||||
Args:
|
||||
screenshot_path: Chemin vers le screenshot actuel
|
||||
workflow_id: ID du workflow à matcher (tous si None)
|
||||
window_title: Titre de fenêtre pour contexte
|
||||
|
||||
screenshot_path: Chemin vers le screenshot actuel.
|
||||
workflow_id: ID du workflow à matcher (tous si None).
|
||||
window_title: Titre de fenêtre pour contexte (utilisé comme
|
||||
hint si le ScreenAnalyzer n'est pas disponible).
|
||||
|
||||
Returns:
|
||||
Dict avec node_id, workflow_id, confidence, ou None si pas de match
|
||||
Dict avec ``node_id``, ``workflow_id``, ``confidence``, ou
|
||||
``None`` si pas de match.
|
||||
"""
|
||||
logger.debug(f"Matching screenshot: {screenshot_path}")
|
||||
|
||||
# Créer un ScreenState temporaire
|
||||
|
||||
# Construire un ScreenState enrichi via le ScreenAnalyzer partagé.
|
||||
screen_state = self._build_screen_state_for_matching(
|
||||
screenshot_path=screenshot_path,
|
||||
workflow_id=workflow_id,
|
||||
window_title=window_title,
|
||||
)
|
||||
|
||||
return self.match_current_state_from_state(
|
||||
screen_state=screen_state,
|
||||
workflow_id=workflow_id,
|
||||
)
|
||||
|
||||
def _build_screen_state_for_matching(
|
||||
self,
|
||||
screenshot_path: str,
|
||||
workflow_id: Optional[str],
|
||||
window_title: Optional[str],
|
||||
) -> ScreenState:
|
||||
"""
|
||||
Construire un ``ScreenState`` pour l'API legacy ``match_current_state``.
|
||||
|
||||
Tente d'utiliser le ``ScreenAnalyzer`` partagé ; en cas d'échec,
|
||||
retombe sur un stub minimaliste (équivalent fonctionnel de l'ancien
|
||||
comportement, mais clairement isolé ici).
|
||||
"""
|
||||
from core.models.screen_state import (
|
||||
WindowContext, RawLevel, PerceptionLevel, ContextLevel, EmbeddingRef
|
||||
)
|
||||
|
||||
screenshot_path = Path(screenshot_path)
|
||||
|
||||
|
||||
path = Path(screenshot_path)
|
||||
|
||||
# Tentative 1 : ScreenAnalyzer partagé (résultat enrichi)
|
||||
try:
|
||||
from core.pipeline import get_screen_analyzer
|
||||
analyzer = get_screen_analyzer()
|
||||
if analyzer is not None:
|
||||
window_info = None
|
||||
if window_title:
|
||||
window_info = {"title": window_title, "app_name": "unknown"}
|
||||
return analyzer.analyze(
|
||||
str(path),
|
||||
window_info=window_info,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug(
|
||||
f"ScreenAnalyzer unavailable in match_current_state wrapper: {exc}"
|
||||
)
|
||||
|
||||
# Tentative 2 : stub minimal (comportement legacy d'urgence)
|
||||
window = WindowContext(
|
||||
app_name="unknown",
|
||||
window_title=window_title or "Unknown",
|
||||
screen_resolution=[1920, 1080],
|
||||
workspace="main"
|
||||
workspace="main",
|
||||
)
|
||||
|
||||
raw = RawLevel(
|
||||
screenshot_path=str(screenshot_path),
|
||||
screenshot_path=str(path),
|
||||
capture_method="manual",
|
||||
file_size_bytes=screenshot_path.stat().st_size if screenshot_path.exists() else 0
|
||||
file_size_bytes=path.stat().st_size if path.exists() else 0,
|
||||
)
|
||||
|
||||
perception = PerceptionLevel(
|
||||
embedding=EmbeddingRef(
|
||||
provider="openclip_ViT-B-32",
|
||||
vector_id="temp",
|
||||
dimensions=512
|
||||
dimensions=512,
|
||||
),
|
||||
detected_text=[],
|
||||
text_detection_method="pending",
|
||||
confidence_avg=0.0
|
||||
confidence_avg=0.0,
|
||||
)
|
||||
|
||||
context = ContextLevel(
|
||||
current_workflow_candidate=workflow_id,
|
||||
workflow_step=None,
|
||||
user_id="matcher",
|
||||
tags=[],
|
||||
business_variables={}
|
||||
business_variables={},
|
||||
)
|
||||
|
||||
current_state = ScreenState(
|
||||
return ScreenState(
|
||||
screen_state_id=f"match_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
|
||||
timestamp=datetime.now(),
|
||||
session_id="matching",
|
||||
@@ -421,39 +661,8 @@ class WorkflowPipeline:
|
||||
raw=raw,
|
||||
perception=perception,
|
||||
context=context,
|
||||
ui_elements=[]
|
||||
ui_elements=[],
|
||||
)
|
||||
|
||||
# Calculer embedding
|
||||
state_embedding = self.embedding_builder.build(current_state)
|
||||
query_vector = state_embedding.get_vector()
|
||||
|
||||
# Rechercher dans FAISS
|
||||
results = self.faiss_manager.search(query_vector, k=5)
|
||||
|
||||
if not results:
|
||||
logger.debug("No match found in FAISS")
|
||||
return None
|
||||
|
||||
# Filtrer par workflow si spécifié
|
||||
for result in results:
|
||||
metadata = result.get("metadata", {})
|
||||
result_workflow_id = metadata.get("workflow_id")
|
||||
|
||||
if workflow_id and result_workflow_id != workflow_id:
|
||||
continue
|
||||
|
||||
similarity = result.get("similarity", 0)
|
||||
if similarity >= 0.85: # Seuil de matching
|
||||
return {
|
||||
"node_id": metadata.get("node_id"),
|
||||
"workflow_id": result_workflow_id,
|
||||
"confidence": similarity,
|
||||
"state_embedding_id": state_embedding.embedding_id
|
||||
}
|
||||
|
||||
logger.debug(f"Best match below threshold: {results[0].get('similarity', 0):.3f}")
|
||||
return None
|
||||
|
||||
def match_hierarchical(
|
||||
self,
|
||||
@@ -548,17 +757,56 @@ class WorkflowPipeline:
|
||||
def get_next_action(
|
||||
self,
|
||||
workflow_id: str,
|
||||
current_node_id: str
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
current_node_id: str,
|
||||
screen_state: Optional[ScreenState] = None,
|
||||
strategy: str = "best",
|
||||
source_similarity: float = 1.0,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Obtenir la prochaine action à exécuter.
|
||||
|
||||
|
||||
Contrat normalisé (Lot A — avril 2026) : retourne **toujours** un
|
||||
dict avec une clé ``status`` non-ambiguë. Le ``None`` ambigu qui
|
||||
confondait "workflow terminé" et "aucun edge valide" a été
|
||||
supprimé : l'appelant (ExecutionLoop) peut désormais distinguer
|
||||
ces cas pour déclencher une pause supervisée plutôt qu'une fin
|
||||
de workflow faux-positive.
|
||||
|
||||
Sélection d'edge (C3) :
|
||||
- Filtre dur sur ``pre_conditions`` (EdgeConstraints)
|
||||
- Ranking par score composite (success_rate, target_match, recency)
|
||||
- Tiebreak : success_rate le plus haut
|
||||
|
||||
Args:
|
||||
workflow_id: ID du workflow
|
||||
current_node_id: ID du node actuel
|
||||
|
||||
screen_state: État courant, requis pour évaluer les
|
||||
``pre_conditions`` et le match ``target_spec``. Si None,
|
||||
fallback sur la logique sans filtre de contraintes.
|
||||
strategy: ``"best"`` (défaut, scoring complet) ou ``"first"``
|
||||
(mode legacy, premier edge sans tri)
|
||||
source_similarity: confiance du matching (``match_current_state``)
|
||||
qui a identifié ``current_node_id``. Propagée à l'EdgeScorer
|
||||
pour activer la précondition ``min_source_similarity`` des
|
||||
edges. Défaut ``1.0`` pour compat avec les appelants qui
|
||||
ne la fournissent pas encore (Lot B — avril 2026).
|
||||
|
||||
Returns:
|
||||
Dict avec action, target_node, confidence, ou None
|
||||
Dict avec l'une des formes suivantes :
|
||||
|
||||
- ``{"status": "selected", "edge_id": str, "action": dict,
|
||||
"target_node": str, "confidence": float, "score": float}``
|
||||
→ edge sélectionné, l'ExecutionLoop doit l'exécuter.
|
||||
|
||||
- ``{"status": "terminal"}`` → le node courant n'a pas
|
||||
d'outgoing_edge (fin légitime de workflow).
|
||||
|
||||
- ``{"status": "blocked", "reason": str}`` → il existe des
|
||||
outgoing_edges mais aucun ne satisfait les conditions
|
||||
(``reason="no_valid_edge"``), ou le workflow est introuvable
|
||||
(``reason="workflow_not_found"``). L'ExecutionLoop doit
|
||||
déclencher une pause supervisée et ne **jamais** traiter
|
||||
ce cas comme un succès.
|
||||
"""
|
||||
workflow = self._workflows.get(workflow_id)
|
||||
if not workflow:
|
||||
@@ -569,23 +817,44 @@ class WorkflowPipeline:
|
||||
self._workflows[workflow_id] = workflow
|
||||
else:
|
||||
logger.error(f"Workflow not found: {workflow_id}")
|
||||
return None
|
||||
|
||||
return {"status": "blocked", "reason": "workflow_not_found"}
|
||||
|
||||
# Trouver les edges sortants du node actuel
|
||||
outgoing_edges = workflow.get_outgoing_edges(current_node_id)
|
||||
|
||||
|
||||
if not outgoing_edges:
|
||||
# Aucun outgoing_edge = fin légitime du workflow
|
||||
logger.info(f"No outgoing edges from node {current_node_id}")
|
||||
return None
|
||||
|
||||
# Pour l'instant, prendre le premier edge (TODO: logique de sélection)
|
||||
edge = outgoing_edges[0]
|
||||
|
||||
return {"status": "terminal"}
|
||||
|
||||
# Sélection robuste via EdgeScorer (C3)
|
||||
from core.pipeline.edge_scorer import EdgeScorer
|
||||
|
||||
scorer = EdgeScorer()
|
||||
edge = scorer.select_best(
|
||||
outgoing_edges,
|
||||
screen_state=screen_state,
|
||||
strategy=strategy,
|
||||
source_similarity=source_similarity,
|
||||
)
|
||||
|
||||
if edge is None:
|
||||
# Il y avait des candidats mais aucun n'a passé les filtres.
|
||||
# On NE retourne PAS "terminal" : l'ExecutionLoop doit traiter
|
||||
# ce cas comme un blocage et demander de l'aide.
|
||||
logger.warning(
|
||||
f"No valid edge from {current_node_id} "
|
||||
f"({len(outgoing_edges)} candidates rejected)"
|
||||
)
|
||||
return {"status": "blocked", "reason": "no_valid_edge"}
|
||||
|
||||
return {
|
||||
"status": "selected",
|
||||
"edge_id": edge.edge_id,
|
||||
"action": edge.action.to_dict(),
|
||||
"target_node": edge.to_node,
|
||||
"confidence": edge.stats.success_rate if edge.stats else 1.0
|
||||
"confidence": edge.stats.success_rate if edge.stats else 1.0,
|
||||
"score": edge.stats.success_rate if edge.stats else 1.0,
|
||||
}
|
||||
|
||||
def should_execute_automatically(self, workflow_id: str) -> bool:
|
||||
@@ -759,10 +1028,11 @@ class WorkflowPipeline:
|
||||
current_node_id = match_result["node_id"]
|
||||
logger.info(f"Matched current state to node: {current_node_id} (confidence: {match_result['confidence']:.3f})")
|
||||
|
||||
# 2. Obtenir la prochaine action
|
||||
# 2. Obtenir la prochaine action (contrat dict avec status explicite)
|
||||
action_info = self.get_next_action(workflow_id, current_node_id)
|
||||
|
||||
if not action_info:
|
||||
action_status = action_info.get("status")
|
||||
|
||||
if action_status == "terminal":
|
||||
return {
|
||||
"execution_id": execution_id,
|
||||
"workflow_id": workflow_id,
|
||||
@@ -771,9 +1041,21 @@ class WorkflowPipeline:
|
||||
"message": "Workflow completed - no more actions",
|
||||
"current_node": current_node_id,
|
||||
"execution_time_ms": (datetime.now() - start_time).total_seconds() * 1000,
|
||||
"correlation_id": execution_id
|
||||
"correlation_id": execution_id,
|
||||
}
|
||||
|
||||
|
||||
if action_status == "blocked":
|
||||
return {
|
||||
"execution_id": execution_id,
|
||||
"workflow_id": workflow_id,
|
||||
"success": False,
|
||||
"step_type": "action_selection",
|
||||
"error": f"No valid edge: {action_info.get('reason', 'unknown')}",
|
||||
"current_node": current_node_id,
|
||||
"execution_time_ms": (datetime.now() - start_time).total_seconds() * 1000,
|
||||
"correlation_id": execution_id,
|
||||
}
|
||||
|
||||
logger.info(f"Next action: {action_info['action']['type']} -> {action_info['target_node']}")
|
||||
|
||||
# 3. Charger le workflow pour obtenir l'edge complet
|
||||
|
||||
@@ -125,25 +125,47 @@ class WorkflowPipelineEnhanced:
|
||||
current_node_id = match_result["node_id"]
|
||||
logger.info(f"Matched current state to node: {current_node_id} (confidence: {match_result['confidence']:.3f})")
|
||||
|
||||
# 2. Obtenir la prochaine action
|
||||
# 2. Obtenir la prochaine action (contrat dict avec status explicite)
|
||||
action_info = self.get_next_action(workflow_id, current_node_id)
|
||||
|
||||
if not action_info:
|
||||
# Workflow terminé
|
||||
action_status = action_info.get("status")
|
||||
|
||||
if action_status == "terminal":
|
||||
# Workflow terminé (aucun outgoing_edge = fin légitime)
|
||||
performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
|
||||
|
||||
|
||||
result = WorkflowExecutionResult.workflow_complete(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
current_node=current_node_id,
|
||||
performance_metrics=performance_metrics
|
||||
performance_metrics=performance_metrics,
|
||||
)
|
||||
result.correlation_id = correlation_id
|
||||
result.match_result = match_result
|
||||
|
||||
|
||||
logger.info(f"Workflow {workflow_id} completed at node {current_node_id}")
|
||||
return result
|
||||
|
||||
|
||||
if action_status == "blocked":
|
||||
# Des edges existent mais aucun ne passe les filtres :
|
||||
# c'est un blocage, pas une fin de workflow.
|
||||
performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
|
||||
|
||||
result = WorkflowExecutionResult.error(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
error_message=f"No valid edge: {action_info.get('reason', 'unknown')}",
|
||||
step_type="action_selection",
|
||||
current_node=current_node_id,
|
||||
performance_metrics=performance_metrics,
|
||||
)
|
||||
result.correlation_id = correlation_id
|
||||
|
||||
logger.warning(
|
||||
f"Workflow {workflow_id} blocked at node {current_node_id}: "
|
||||
f"{action_info.get('reason')}"
|
||||
)
|
||||
return result
|
||||
|
||||
logger.info(f"Next action: {action_info['action']['type']} -> {action_info['target_node']}")
|
||||
|
||||
# 3. Charger le workflow pour obtenir l'edge complet
|
||||
|
||||
308
core/security/signed_serializer.py
Normal file
308
core/security/signed_serializer.py
Normal file
@@ -0,0 +1,308 @@
|
||||
"""
|
||||
Sérialiseur signé — RPA Vision V3
|
||||
|
||||
Remplace les usages de `pickle.load` (vulnérables à la désérialisation arbitraire
|
||||
de code) par une sérialisation JSON signée via HMAC-SHA256.
|
||||
|
||||
Principes :
|
||||
- Les données sont sérialisées en JSON (avec support des types numpy / datetime
|
||||
via un encodeur custom).
|
||||
- Une signature HMAC-SHA256 est calculée sur le JSON avec une clé secrète
|
||||
dérivée de `RPA_SIGNING_KEY` (ou, à défaut, de `TOKEN_SECRET_KEY`).
|
||||
- À la lecture, la signature est vérifiée AVANT tout parsing applicatif.
|
||||
- Rétrocompatibilité : un fallback `pickle.load` est disponible pour migrer
|
||||
les anciens fichiers. Il logue un WARNING et doit être suivi d'une
|
||||
ré-écriture en JSON signé.
|
||||
|
||||
ATTENTION : n'utiliser le fallback pickle que sur des fichiers dont la source
|
||||
est réputée sûre (locale + protégée). Le fallback est désactivable via la
|
||||
variable d'environnement `RPA_ALLOW_PICKLE_FALLBACK=0`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
import hmac
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import pickle
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Clé de signature
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
_SIGNATURE_ALGO = "sha256"
|
||||
_SIGNATURE_HEADER = b"RPA_SIGNED_V1\n" # Marqueur de format signé
|
||||
|
||||
|
||||
def _resolve_signing_key() -> bytes:
|
||||
"""Récupère la clé de signature HMAC.
|
||||
|
||||
Ordre de priorité :
|
||||
1. RPA_SIGNING_KEY (dédiée à la signature de fichiers)
|
||||
2. TOKEN_SECRET_KEY (clé déjà utilisée pour signer les tokens API)
|
||||
3. Clé dérivée en dev (avec WARNING)
|
||||
|
||||
La clé dev est stable pour une même machine (dérivée du hostname + path)
|
||||
afin que les lectures/écritures locales restent cohérentes en l'absence
|
||||
de configuration, tout en refusant de valider des fichiers produits
|
||||
ailleurs.
|
||||
"""
|
||||
explicit = os.getenv("RPA_SIGNING_KEY", "").strip()
|
||||
if explicit:
|
||||
return explicit.encode("utf-8")
|
||||
|
||||
fallback = os.getenv("TOKEN_SECRET_KEY", "").strip()
|
||||
if fallback:
|
||||
return fallback.encode("utf-8")
|
||||
|
||||
# Clé dev dérivée : non cryptographiquement sûre, juste pour éviter des
|
||||
# erreurs en dev local. On loggue explicitement.
|
||||
logger.warning(
|
||||
"RPA_SIGNING_KEY et TOKEN_SECRET_KEY non définis — "
|
||||
"utilisation d'une clé dérivée locale. "
|
||||
"Définir RPA_SIGNING_KEY en production."
|
||||
)
|
||||
seed = f"rpa-vision-v3::{os.uname().nodename}::dev-signing" # type: ignore[attr-defined]
|
||||
return hashlib.sha256(seed.encode("utf-8")).digest()
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Encodage JSON étendu (numpy, datetime, Path, bytes)
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
class _RPAJSONEncoder(json.JSONEncoder):
|
||||
"""Encodeur JSON supportant numpy / datetime / Path / bytes."""
|
||||
|
||||
def default(self, obj: Any) -> Any: # noqa: D401 - API json standard
|
||||
if isinstance(obj, np.ndarray):
|
||||
return {
|
||||
"__type__": "ndarray",
|
||||
"dtype": str(obj.dtype),
|
||||
"shape": list(obj.shape),
|
||||
"data": base64.b64encode(obj.tobytes()).decode("ascii"),
|
||||
}
|
||||
if isinstance(obj, (np.integer,)):
|
||||
return int(obj)
|
||||
if isinstance(obj, (np.floating,)):
|
||||
return float(obj)
|
||||
if isinstance(obj, (np.bool_,)):
|
||||
return bool(obj)
|
||||
if isinstance(obj, datetime):
|
||||
return {"__type__": "datetime", "iso": obj.isoformat()}
|
||||
if isinstance(obj, timedelta):
|
||||
return {"__type__": "timedelta", "seconds": obj.total_seconds()}
|
||||
if isinstance(obj, Path):
|
||||
return {"__type__": "path", "value": str(obj)}
|
||||
if isinstance(obj, bytes):
|
||||
return {
|
||||
"__type__": "bytes",
|
||||
"data": base64.b64encode(obj).decode("ascii"),
|
||||
}
|
||||
if isinstance(obj, set):
|
||||
return {"__type__": "set", "items": list(obj)}
|
||||
return super().default(obj)
|
||||
|
||||
|
||||
def _json_object_hook(obj: Any) -> Any:
|
||||
"""Reconstruit les types étendus depuis le JSON."""
|
||||
if not isinstance(obj, dict):
|
||||
return obj
|
||||
tag = obj.get("__type__")
|
||||
if tag is None:
|
||||
return obj
|
||||
if tag == "ndarray":
|
||||
raw = base64.b64decode(obj["data"])
|
||||
arr = np.frombuffer(raw, dtype=np.dtype(obj["dtype"]))
|
||||
return arr.reshape(obj["shape"]).copy()
|
||||
if tag == "datetime":
|
||||
return datetime.fromisoformat(obj["iso"])
|
||||
if tag == "timedelta":
|
||||
return timedelta(seconds=float(obj["seconds"]))
|
||||
if tag == "path":
|
||||
return Path(obj["value"])
|
||||
if tag == "bytes":
|
||||
return base64.b64decode(obj["data"])
|
||||
if tag == "set":
|
||||
return set(obj.get("items", []))
|
||||
return obj
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Erreurs dédiées
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
class SignedSerializerError(Exception):
|
||||
"""Erreur de base du module."""
|
||||
|
||||
|
||||
class SignatureVerificationError(SignedSerializerError):
|
||||
"""Signature HMAC invalide : le fichier a été altéré ou forgé."""
|
||||
|
||||
|
||||
class UnsupportedFormatError(SignedSerializerError):
|
||||
"""Le fichier n'est ni au format signé, ni reconnu comme pickle legacy."""
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# API publique
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
def _compute_hmac(payload: bytes, key: bytes) -> str:
|
||||
return hmac.new(key, payload, hashlib.sha256).hexdigest()
|
||||
|
||||
|
||||
def dumps_signed(data: Any, key: Optional[bytes] = None) -> bytes:
|
||||
"""Sérialise `data` en JSON signé HMAC-SHA256.
|
||||
|
||||
Format binaire retourné :
|
||||
b"RPA_SIGNED_V1\n" + utf8(json({"hmac": "<hex>", "payload": <data>}))
|
||||
|
||||
Le HMAC couvre le JSON canonique de `payload` (keys triées,
|
||||
séparateurs compacts) pour qu'un même objet produise toujours la
|
||||
même signature.
|
||||
"""
|
||||
signing_key = key if key is not None else _resolve_signing_key()
|
||||
payload_json = json.dumps(
|
||||
data,
|
||||
cls=_RPAJSONEncoder,
|
||||
sort_keys=True,
|
||||
separators=(",", ":"),
|
||||
ensure_ascii=False,
|
||||
).encode("utf-8")
|
||||
signature = _compute_hmac(payload_json, signing_key)
|
||||
envelope = {"hmac": signature, "payload_b64": base64.b64encode(payload_json).decode("ascii")}
|
||||
body = json.dumps(envelope, separators=(",", ":"), ensure_ascii=False).encode("utf-8")
|
||||
return _SIGNATURE_HEADER + body
|
||||
|
||||
|
||||
def loads_signed(raw: bytes, key: Optional[bytes] = None) -> Any:
|
||||
"""Désérialise un blob produit par `dumps_signed` après vérification HMAC."""
|
||||
if not raw.startswith(_SIGNATURE_HEADER):
|
||||
raise UnsupportedFormatError("Marqueur RPA_SIGNED_V1 absent.")
|
||||
signing_key = key if key is not None else _resolve_signing_key()
|
||||
body = raw[len(_SIGNATURE_HEADER):]
|
||||
try:
|
||||
envelope = json.loads(body.decode("utf-8"))
|
||||
except (UnicodeDecodeError, json.JSONDecodeError) as exc:
|
||||
raise SignedSerializerError(f"Enveloppe JSON invalide : {exc}") from exc
|
||||
|
||||
if not isinstance(envelope, dict):
|
||||
raise SignedSerializerError("Enveloppe inattendue.")
|
||||
signature = envelope.get("hmac")
|
||||
payload_b64 = envelope.get("payload_b64")
|
||||
if not isinstance(signature, str) or not isinstance(payload_b64, str):
|
||||
raise SignedSerializerError("Enveloppe mal formée (hmac / payload_b64).")
|
||||
|
||||
try:
|
||||
payload_bytes = base64.b64decode(payload_b64.encode("ascii"), validate=True)
|
||||
except Exception as exc: # noqa: BLE001 - base64 peut lever plusieurs erreurs
|
||||
raise SignedSerializerError(f"Payload base64 invalide : {exc}") from exc
|
||||
|
||||
expected = _compute_hmac(payload_bytes, signing_key)
|
||||
if not hmac.compare_digest(expected, signature):
|
||||
raise SignatureVerificationError(
|
||||
"Signature HMAC invalide — fichier altéré ou clé différente."
|
||||
)
|
||||
|
||||
return json.loads(payload_bytes.decode("utf-8"), object_hook=_json_object_hook)
|
||||
|
||||
|
||||
def _pickle_fallback_allowed() -> bool:
|
||||
return os.getenv("RPA_ALLOW_PICKLE_FALLBACK", "1") != "0"
|
||||
|
||||
|
||||
def save_signed(path: Union[str, Path], data: Any, key: Optional[bytes] = None) -> None:
|
||||
"""Écrit `data` sur disque dans le format JSON signé."""
|
||||
path = Path(path)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
blob = dumps_signed(data, key=key)
|
||||
tmp = path.with_suffix(path.suffix + ".tmp")
|
||||
with open(tmp, "wb") as fp:
|
||||
fp.write(blob)
|
||||
os.replace(tmp, path)
|
||||
|
||||
|
||||
def load_signed(
|
||||
path: Union[str, Path],
|
||||
*,
|
||||
allow_pickle_fallback: bool = True,
|
||||
migrate_on_fallback: bool = True,
|
||||
pickle_loader: Optional[Callable[[io.BufferedReader], Any]] = None,
|
||||
key: Optional[bytes] = None,
|
||||
) -> Any:
|
||||
"""Charge un fichier sauvegardé par `save_signed`.
|
||||
|
||||
Si le fichier n'est pas au format signé, et si `allow_pickle_fallback`
|
||||
est vrai (ET `RPA_ALLOW_PICKLE_FALLBACK != "0"`), tente un
|
||||
`pickle.load()` pour migrer les anciens fichiers. Dans ce cas, un
|
||||
WARNING est émis et le fichier est ré-écrit en JSON signé si
|
||||
`migrate_on_fallback` vaut True.
|
||||
|
||||
Args:
|
||||
path: Chemin du fichier
|
||||
allow_pickle_fallback: Activer la compat legacy
|
||||
migrate_on_fallback: Ré-écrire en JSON signé après fallback
|
||||
pickle_loader: Callable alternatif (pour tests / restricted unpickler)
|
||||
key: Clé HMAC explicite (sinon dérivée de l'environnement)
|
||||
|
||||
Raises:
|
||||
SignatureVerificationError: HMAC invalide (fichier altéré)
|
||||
UnsupportedFormatError: format inconnu et fallback désactivé
|
||||
"""
|
||||
path = Path(path)
|
||||
with open(path, "rb") as fp:
|
||||
raw = fp.read()
|
||||
|
||||
if raw.startswith(_SIGNATURE_HEADER):
|
||||
return loads_signed(raw, key=key)
|
||||
|
||||
if not allow_pickle_fallback or not _pickle_fallback_allowed():
|
||||
raise UnsupportedFormatError(
|
||||
f"{path} n'est pas au format signé et le fallback pickle est désactivé."
|
||||
)
|
||||
|
||||
logger.warning(
|
||||
"Chargement legacy pickle pour %s — ce format est obsolète et "
|
||||
"sera ré-écrit en JSON signé. Voir docs/SECURITY.md.",
|
||||
path,
|
||||
)
|
||||
|
||||
# Par défaut on refuse tout type non documenté dans ce fichier à risque :
|
||||
# utilisateur peut fournir un `pickle_loader` custom (ex: Unpickler
|
||||
# restreint). On log l'ouverture pour la traçabilité.
|
||||
loader = pickle_loader or (lambda f: pickle.load(f)) # noqa: S301 - usage legacy
|
||||
with open(path, "rb") as fp:
|
||||
data = loader(fp)
|
||||
|
||||
if migrate_on_fallback:
|
||||
try:
|
||||
save_signed(path, data, key=key)
|
||||
logger.info("Fichier %s migré en JSON signé.", path)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.error(
|
||||
"Migration JSON signé échouée pour %s : %s", path, exc
|
||||
)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
__all__ = [
|
||||
"SignedSerializerError",
|
||||
"SignatureVerificationError",
|
||||
"UnsupportedFormatError",
|
||||
"dumps_signed",
|
||||
"loads_signed",
|
||||
"save_signed",
|
||||
"load_signed",
|
||||
]
|
||||
@@ -26,11 +26,15 @@ from PIL import Image
|
||||
import logging
|
||||
import threading
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
import pickle
|
||||
import os
|
||||
|
||||
from core.models import BBox
|
||||
from core.embedding.fusion_engine import FusionEngine
|
||||
from core.security.signed_serializer import (
|
||||
SignatureVerificationError,
|
||||
load_signed,
|
||||
save_signed,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -521,42 +525,90 @@ class VisualEmbeddingManager:
|
||||
|
||||
logger.debug(f"Éviction de {num_to_remove} entrées du cache")
|
||||
|
||||
def _entry_to_dict(self, entry: "EmbeddingCacheEntry") -> Dict[str, Any]:
|
||||
"""Convertit une entrée du cache en dict JSON-serialisable."""
|
||||
return {
|
||||
"embedding": entry.embedding, # numpy → encodé par signed_serializer
|
||||
"signature": entry.signature,
|
||||
"created_at": entry.created_at,
|
||||
"access_count": entry.access_count,
|
||||
"last_accessed": entry.last_accessed,
|
||||
}
|
||||
|
||||
def _dict_to_entry(self, data: Any) -> Optional["EmbeddingCacheEntry"]:
|
||||
"""Reconstruit une EmbeddingCacheEntry depuis un dict (format JSON)
|
||||
ou depuis un objet déjà typé (fallback pickle legacy).
|
||||
Retourne None si la donnée n'est pas exploitable.
|
||||
"""
|
||||
if isinstance(data, EmbeddingCacheEntry):
|
||||
return data
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
try:
|
||||
return EmbeddingCacheEntry(
|
||||
embedding=np.asarray(data["embedding"]),
|
||||
signature=data["signature"],
|
||||
created_at=data["created_at"],
|
||||
access_count=int(data.get("access_count", 0)),
|
||||
last_accessed=data.get("last_accessed"),
|
||||
)
|
||||
except (KeyError, TypeError, ValueError) as exc:
|
||||
logger.warning(f"Entrée de cache invalide ignorée: {exc}")
|
||||
return None
|
||||
|
||||
def _load_persistent_cache(self):
|
||||
"""Charge le cache persistant depuis le disque"""
|
||||
"""Charge le cache persistant depuis le disque (JSON signé HMAC,
|
||||
fallback pickle legacy avec migration automatique)."""
|
||||
if not self.cache_persistence_path or not os.path.exists(self.cache_persistence_path):
|
||||
return
|
||||
|
||||
|
||||
try:
|
||||
with open(self.cache_persistence_path, 'rb') as f:
|
||||
cached_data = pickle.load(f)
|
||||
|
||||
# Filtrer les entrées trop anciennes (plus de 24h)
|
||||
cutoff_time = datetime.now() - timedelta(hours=24)
|
||||
|
||||
for signature, entry in cached_data.items():
|
||||
if entry.created_at > cutoff_time:
|
||||
self._embedding_cache[signature] = entry
|
||||
|
||||
logger.info(f"Cache persistant chargé: {len(self._embedding_cache)} entrées")
|
||||
|
||||
cached_data = load_signed(self.cache_persistence_path)
|
||||
except SignatureVerificationError:
|
||||
logger.error(
|
||||
"Cache persistant %s altéré (HMAC invalide) — ignoré.",
|
||||
self.cache_persistence_path,
|
||||
)
|
||||
return
|
||||
except Exception as e:
|
||||
logger.warning(f"Erreur lors du chargement du cache persistant: {e}")
|
||||
|
||||
return
|
||||
|
||||
if not isinstance(cached_data, dict):
|
||||
logger.warning("Format de cache inattendu — ignoré.")
|
||||
return
|
||||
|
||||
# Filtrer les entrées trop anciennes (plus de 24h)
|
||||
cutoff_time = datetime.now() - timedelta(hours=24)
|
||||
loaded = 0
|
||||
for signature, raw in cached_data.items():
|
||||
entry = self._dict_to_entry(raw)
|
||||
if entry is None:
|
||||
continue
|
||||
if entry.created_at > cutoff_time:
|
||||
self._embedding_cache[signature] = entry
|
||||
loaded += 1
|
||||
|
||||
logger.info(f"Cache persistant chargé: {loaded} entrées")
|
||||
|
||||
def _save_persistent_cache(self):
|
||||
"""Sauvegarde le cache sur disque"""
|
||||
"""Sauvegarde le cache sur disque en JSON signé HMAC."""
|
||||
if not self.cache_persistence_path:
|
||||
return
|
||||
|
||||
|
||||
try:
|
||||
# Créer le répertoire si nécessaire
|
||||
os.makedirs(os.path.dirname(self.cache_persistence_path), exist_ok=True)
|
||||
|
||||
|
||||
with self._cache_lock:
|
||||
with open(self.cache_persistence_path, 'wb') as f:
|
||||
pickle.dump(dict(self._embedding_cache), f)
|
||||
|
||||
serializable = {
|
||||
signature: self._entry_to_dict(entry)
|
||||
for signature, entry in self._embedding_cache.items()
|
||||
}
|
||||
|
||||
save_signed(self.cache_persistence_path, serializable)
|
||||
logger.debug("Cache persistant sauvegardé")
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Erreur lors de la sauvegarde du cache: {e}")
|
||||
|
||||
|
||||
@@ -14,8 +14,9 @@ import asyncio
|
||||
import logging
|
||||
import json
|
||||
import base64
|
||||
import pickle
|
||||
import gzip
|
||||
import pickle # noqa: S403 - usage legacy restreint au fallback de migration
|
||||
import io
|
||||
from typing import Dict, List, Optional, Any, Tuple
|
||||
from dataclasses import dataclass, asdict
|
||||
from datetime import datetime
|
||||
@@ -24,6 +25,12 @@ import numpy as np
|
||||
|
||||
from core.visual.visual_target_manager import VisualTarget, VisualTargetManager
|
||||
from core.visual.screenshot_validation_manager import ScreenshotValidationManager, ValidationResult
|
||||
from core.security.signed_serializer import (
|
||||
SignatureVerificationError,
|
||||
UnsupportedFormatError,
|
||||
dumps_signed,
|
||||
loads_signed,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -435,19 +442,19 @@ class VisualPersistenceManager:
|
||||
return None
|
||||
|
||||
async def _serialize_workflow_data(self, workflow_data: VisualWorkflowData) -> bytes:
|
||||
"""Sérialise les données d'un workflow"""
|
||||
"""Sérialise les données d'un workflow en JSON signé HMAC."""
|
||||
# Convertir en dictionnaire
|
||||
data_dict = asdict(workflow_data)
|
||||
|
||||
|
||||
# Traiter les types spéciaux
|
||||
data_dict['created_at'] = workflow_data.created_at.isoformat()
|
||||
|
||||
|
||||
# Sérialiser les cibles visuelles
|
||||
serialized_targets = {}
|
||||
for signature, target in workflow_data.visual_targets.items():
|
||||
serialized_targets[signature] = await self._serialize_visual_target(target)
|
||||
data_dict['visual_targets'] = serialized_targets
|
||||
|
||||
|
||||
# Sérialiser l'historique de validation
|
||||
serialized_history = {}
|
||||
for signature, history in workflow_data.validation_history.items():
|
||||
@@ -455,15 +462,30 @@ class VisualPersistenceManager:
|
||||
self._serialize_validation_result(result) for result in history
|
||||
]
|
||||
data_dict['validation_history'] = serialized_history
|
||||
|
||||
# Convertir en bytes
|
||||
return pickle.dumps(data_dict)
|
||||
|
||||
|
||||
# JSON signé HMAC (cf. core.security.signed_serializer)
|
||||
return dumps_signed(data_dict)
|
||||
|
||||
async def _deserialize_workflow_data(self, data: bytes) -> VisualWorkflowData:
|
||||
"""Désérialise les données d'un workflow"""
|
||||
# Désérialiser le dictionnaire
|
||||
data_dict = pickle.loads(data)
|
||||
|
||||
"""Désérialise les données d'un workflow (JSON signé HMAC ;
|
||||
fallback pickle legacy avec WARNING pour migrer les anciens fichiers)."""
|
||||
try:
|
||||
data_dict = loads_signed(data)
|
||||
except SignatureVerificationError:
|
||||
# Fichier altéré ou clé différente : on refuse sans fallback.
|
||||
logger.error("Workflow visuel : signature HMAC invalide — refus.")
|
||||
raise
|
||||
except UnsupportedFormatError:
|
||||
# Ancien format pickle : fallback explicite et bruyant.
|
||||
import os
|
||||
if os.getenv("RPA_ALLOW_PICKLE_FALLBACK", "1") == "0":
|
||||
raise
|
||||
logger.warning(
|
||||
"Workflow visuel au format pickle legacy — lecture de compat, "
|
||||
"ré-écrire en JSON signé dès que possible."
|
||||
)
|
||||
data_dict = pickle.loads(data) # noqa: S301 - fallback legacy
|
||||
|
||||
# Reconstruire les objets
|
||||
workflow_data = VisualWorkflowData(
|
||||
workflow_id=data_dict['workflow_id'],
|
||||
|
||||
369
core/workflow/execution_compiler.py
Normal file
369
core/workflow/execution_compiler.py
Normal file
@@ -0,0 +1,369 @@
|
||||
# core/workflow/execution_compiler.py
|
||||
"""
|
||||
ExecutionCompiler — Compile un WorkflowIR en ExecutionPlan.
|
||||
|
||||
Pièce maîtresse de l'architecture V4.
|
||||
"Le LLM prépare et compile. Le runtime exécute."
|
||||
|
||||
Le compilateur :
|
||||
1. Prend chaque étape du WorkflowIR
|
||||
2. Compile une stratégie de résolution pour chaque action (OCR > template > VLM)
|
||||
3. Définit les timeouts, retries, fallbacks et recovery
|
||||
4. Produit un ExecutionPlan déterministe et borné
|
||||
|
||||
L'objectif : zéro VLM au runtime pour les cas normaux.
|
||||
Le VLM est un exception handler, pas le chemin principal.
|
||||
|
||||
Le compilateur utilise :
|
||||
- Les données de l'enregistrement (crops, textes OCR) pour pré-compiler
|
||||
- L'historique d'apprentissage (ReplayLearner) pour choisir la meilleure stratégie
|
||||
- Le contexte métier (DomainContext) pour adapter les paramètres
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from .workflow_ir import WorkflowIR, Step, Action
|
||||
from .execution_plan import (
|
||||
ExecutionPlan, ExecutionNode, ResolutionStrategy, SuccessCondition,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Temps estimé par type d'action (ms)
|
||||
_ACTION_TIME_ESTIMATES = {
|
||||
"click": 200, # OCR lookup + clic
|
||||
"type": 500, # Frappe char-by-char
|
||||
"key_combo": 100,
|
||||
"wait": 0, # Le duration_ms est dans l'action
|
||||
"scroll": 200,
|
||||
}
|
||||
|
||||
|
||||
class ExecutionCompiler:
|
||||
"""Compile un WorkflowIR en ExecutionPlan.
|
||||
|
||||
Usage :
|
||||
compiler = ExecutionCompiler()
|
||||
plan = compiler.compile(workflow_ir, target_machine="VM_Win11")
|
||||
plan.save("data/plans/")
|
||||
"""
|
||||
|
||||
def __init__(self, learning_dir: str = ""):
|
||||
self._learning_dir = learning_dir or "data/learning/replay_results"
|
||||
|
||||
def compile(
|
||||
self,
|
||||
ir: WorkflowIR,
|
||||
target_machine: str = "",
|
||||
target_resolution: str = "1280x800",
|
||||
params: Optional[Dict[str, str]] = None,
|
||||
surface_profile=None,
|
||||
) -> ExecutionPlan:
|
||||
"""Compiler un WorkflowIR en ExecutionPlan.
|
||||
|
||||
Args:
|
||||
ir: Le WorkflowIR à compiler
|
||||
target_machine: Machine cible (pour adapter les stratégies)
|
||||
target_resolution: Résolution de la machine cible
|
||||
params: Variables à substituer
|
||||
surface_profile: SurfaceProfile optionnel pour adapter les paramètres.
|
||||
Si fourni, timeouts/seuils/retries sont tirés du profil.
|
||||
"""
|
||||
t_start = time.time()
|
||||
|
||||
plan = ExecutionPlan(
|
||||
plan_id=f"plan_{uuid.uuid4().hex[:8]}",
|
||||
workflow_id=ir.workflow_id,
|
||||
version=ir.version,
|
||||
created_at=time.time(),
|
||||
domain=ir.domain,
|
||||
target_machine=target_machine,
|
||||
target_resolution=target_resolution,
|
||||
variables=params or {v.name: v.default for v in ir.variables},
|
||||
)
|
||||
|
||||
# Consulter l'historique d'apprentissage
|
||||
learned_strategies = self._load_learned_strategies()
|
||||
|
||||
# Compiler chaque étape
|
||||
for step in ir.steps:
|
||||
nodes = self._compile_step(step, ir, learned_strategies, surface_profile)
|
||||
plan.nodes.extend(nodes)
|
||||
|
||||
# Statistiques de compilation
|
||||
plan.total_nodes = len(plan.nodes)
|
||||
plan.nodes_with_ocr = sum(
|
||||
1 for n in plan.nodes
|
||||
if n.strategy_primary and n.strategy_primary.method == "ocr"
|
||||
)
|
||||
plan.nodes_with_template = sum(
|
||||
1 for n in plan.nodes
|
||||
if n.strategy_primary and n.strategy_primary.method == "template"
|
||||
)
|
||||
plan.nodes_with_vlm = sum(
|
||||
1 for n in plan.nodes
|
||||
if n.strategy_primary and n.strategy_primary.method == "vlm"
|
||||
)
|
||||
plan.estimated_duration_s = sum(
|
||||
_ACTION_TIME_ESTIMATES.get(n.action_type, 200) + n.duration_ms
|
||||
for n in plan.nodes
|
||||
) / 1000.0
|
||||
|
||||
elapsed = time.time() - t_start
|
||||
logger.info(
|
||||
f"Compilation: {plan.total_nodes} nœuds en {elapsed:.1f}s — "
|
||||
f"OCR={plan.nodes_with_ocr}, template={plan.nodes_with_template}, "
|
||||
f"VLM={plan.nodes_with_vlm} (exception handler)"
|
||||
)
|
||||
|
||||
return plan
|
||||
|
||||
def _compile_step(
|
||||
self,
|
||||
step: Step,
|
||||
ir: WorkflowIR,
|
||||
learned: Dict[str, str],
|
||||
surface_profile=None,
|
||||
) -> List[ExecutionNode]:
|
||||
"""Compiler une étape en nœuds d'exécution."""
|
||||
nodes = []
|
||||
|
||||
for i, action in enumerate(step.actions):
|
||||
node = self._compile_action(
|
||||
action=action,
|
||||
step=step,
|
||||
action_index=i,
|
||||
ir=ir,
|
||||
learned=learned,
|
||||
surface_profile=surface_profile,
|
||||
)
|
||||
nodes.append(node)
|
||||
|
||||
return nodes
|
||||
|
||||
def _compile_action(
|
||||
self,
|
||||
action: Action,
|
||||
step: Step,
|
||||
action_index: int,
|
||||
ir: WorkflowIR,
|
||||
learned: Dict[str, str],
|
||||
surface_profile=None,
|
||||
) -> ExecutionNode:
|
||||
"""Compiler une action en nœud d'exécution avec stratégie de résolution."""
|
||||
|
||||
node = ExecutionNode(
|
||||
node_id=f"n_{step.step_id}_{action_index}",
|
||||
action_type=action.type,
|
||||
intent=step.intent,
|
||||
step_id=step.step_id,
|
||||
is_optional=step.is_optional,
|
||||
)
|
||||
|
||||
# Paramètres par défaut, surchargés par le surface_profile si fourni
|
||||
default_click_timeout = 10000
|
||||
default_click_retries = 2
|
||||
if surface_profile is not None:
|
||||
default_click_timeout = getattr(surface_profile, "timeout_click_ms", 10000)
|
||||
default_click_retries = getattr(surface_profile, "max_retries", 2)
|
||||
|
||||
if action.type == "click":
|
||||
# Compiler les stratégies de résolution pour ce clic
|
||||
node.strategy_primary, node.strategy_fallbacks = self._compile_click_resolution(
|
||||
action, step, learned, surface_profile,
|
||||
)
|
||||
node.timeout_ms = default_click_timeout
|
||||
node.max_retries = default_click_retries
|
||||
node.recovery_action = "escape"
|
||||
|
||||
# Condition de succès STRICTE basée sur le titre de fenêtre attendu.
|
||||
# Si expected_window_after est défini, on fait du title_match (strict).
|
||||
# Sinon on retombe sur screen_changed (faible).
|
||||
expected_after = getattr(action, "expected_window_after", "")
|
||||
if expected_after and expected_after != "unknown_window":
|
||||
node.success_condition = SuccessCondition(
|
||||
method="title_match",
|
||||
expected_title=expected_after,
|
||||
description=step.postcondition or f"Fenêtre attendue: {expected_after}",
|
||||
)
|
||||
elif step.postcondition:
|
||||
node.success_condition = SuccessCondition(
|
||||
method="screen_changed",
|
||||
description=step.postcondition,
|
||||
)
|
||||
|
||||
# Pré-condition stricte : la fenêtre AVANT le clic doit matcher
|
||||
# Stockée en tant que champ dédié sur le nœud pour l'exécuteur
|
||||
expected_before = getattr(action, "expected_window_before", "")
|
||||
if expected_before and expected_before != "unknown_window":
|
||||
# On l'injecte dans la condition de succès (cas "avant")
|
||||
# Le nœud portera les deux via des champs séparés
|
||||
node.expected_window_before = expected_before
|
||||
|
||||
elif action.type == "type":
|
||||
node.text = action.text
|
||||
node.variable_name = action.text.strip("{}") if action.variable else ""
|
||||
node.timeout_ms = 5000
|
||||
node.max_retries = 0 # Pas de retry sur la frappe
|
||||
node.recovery_action = "undo"
|
||||
|
||||
elif action.type == "key_combo":
|
||||
node.keys = action.keys
|
||||
node.timeout_ms = 3000
|
||||
node.max_retries = 0
|
||||
node.recovery_action = "undo"
|
||||
|
||||
elif action.type == "wait":
|
||||
node.duration_ms = action.duration_ms or 1000
|
||||
node.timeout_ms = action.duration_ms + 2000
|
||||
node.max_retries = 0
|
||||
node.recovery_action = "none"
|
||||
|
||||
elif action.type == "scroll":
|
||||
node.timeout_ms = 3000
|
||||
node.max_retries = 0
|
||||
node.recovery_action = "none"
|
||||
|
||||
return node
|
||||
|
||||
def _compile_click_resolution(
|
||||
self,
|
||||
action: Action,
|
||||
step: Step,
|
||||
learned: Dict[str, str],
|
||||
surface_profile=None,
|
||||
) -> tuple:
|
||||
"""Compiler les stratégies de résolution pour un clic.
|
||||
|
||||
Utilise les données d'enrichissement visuel (action._enrichment) si
|
||||
disponibles :
|
||||
- by_text (OCR)
|
||||
- anchor_image_base64 (template)
|
||||
- vlm_description (VLM)
|
||||
- uia_snapshot (UIA sur Windows natif)
|
||||
|
||||
Ordre de priorité (variable selon la surface) :
|
||||
1. UIA (si snapshot dispo ET surface native ET helper dispo) — 10-20ms
|
||||
2. OCR exact (si texte visible) — 100-200ms
|
||||
3. Template matching (si crop) — 10ms
|
||||
4. VLM — exception handler
|
||||
|
||||
Le learning peut réordonner si une stratégie a mieux marché avant.
|
||||
"""
|
||||
primary = None
|
||||
fallbacks = []
|
||||
|
||||
# Lire l'enrichissement visuel si dispo
|
||||
enrichment = getattr(action, "_enrichment", None) or {}
|
||||
by_text_from_enrich = enrichment.get("by_text", "")
|
||||
anchor_b64 = enrichment.get("anchor_image_base64", "")
|
||||
vlm_desc_from_enrich = enrichment.get("vlm_description", "")
|
||||
window_title = enrichment.get("window_title", "")
|
||||
uia_snapshot = enrichment.get("uia_snapshot") or {}
|
||||
|
||||
# Source de texte : enrichissement > anchor_hint > target
|
||||
target_text = by_text_from_enrich or action.anchor_hint or action.target
|
||||
# Ne pas utiliser "unknown_window" comme texte OCR
|
||||
if target_text == "unknown_window":
|
||||
target_text = ""
|
||||
|
||||
learned_method = learned.get(target_text, "")
|
||||
|
||||
# Est-ce qu'on est sur une surface où UIA est activable ?
|
||||
uia_eligible = False
|
||||
if surface_profile is not None:
|
||||
from .surface_classifier import SurfaceType
|
||||
surface_type = getattr(surface_profile, "surface_type", None)
|
||||
uia_available = getattr(surface_profile, "uia_available", False)
|
||||
uia_eligible = (
|
||||
uia_available
|
||||
and surface_type == SurfaceType.WINDOWS_NATIVE
|
||||
)
|
||||
else:
|
||||
# Sans profil explicite, on active UIA si le snapshot est présent
|
||||
# (l'agent décidera au runtime s'il peut l'utiliser)
|
||||
uia_eligible = bool(uia_snapshot)
|
||||
|
||||
# Stratégie UIA — la plus rapide et la plus précise sur Windows natif
|
||||
if uia_snapshot and uia_snapshot.get("name") and uia_eligible:
|
||||
uia_strategy = ResolutionStrategy(
|
||||
method="uia",
|
||||
uia_name=uia_snapshot.get("name", ""),
|
||||
uia_control_type=uia_snapshot.get("control_type", ""),
|
||||
uia_automation_id=uia_snapshot.get("automation_id", ""),
|
||||
uia_parent_path=uia_snapshot.get("parent_path", []),
|
||||
threshold=0.95,
|
||||
)
|
||||
primary = uia_strategy
|
||||
|
||||
# Stratégie OCR — le texte visible est la meilleure ancre
|
||||
if target_text:
|
||||
ocr_strategy = ResolutionStrategy(
|
||||
method="ocr",
|
||||
target_text=target_text,
|
||||
threshold=0.7,
|
||||
)
|
||||
if primary is None and (
|
||||
not learned_method
|
||||
or learned_method in ("ocr", "som_text_match", "hybrid_text_direct", "v4_ocr")
|
||||
):
|
||||
primary = ocr_strategy
|
||||
else:
|
||||
fallbacks.append(ocr_strategy)
|
||||
|
||||
# Stratégie template — le crop visuel de l'enregistrement
|
||||
if anchor_b64:
|
||||
template_strategy = ResolutionStrategy(
|
||||
method="template",
|
||||
target_text=target_text,
|
||||
anchor_b64=anchor_b64,
|
||||
threshold=0.85,
|
||||
)
|
||||
if primary is None and learned_method in (
|
||||
"anchor_template", "template_matching", "v4_template"
|
||||
):
|
||||
primary = template_strategy
|
||||
else:
|
||||
fallbacks.append(template_strategy)
|
||||
|
||||
# Stratégie VLM — exception handler (dernier recours)
|
||||
vlm_description = vlm_desc_from_enrich or action.target or step.intent
|
||||
if vlm_description and vlm_description != "unknown_window":
|
||||
vlm_strategy = ResolutionStrategy(
|
||||
method="vlm",
|
||||
vlm_description=vlm_description,
|
||||
threshold=0.6,
|
||||
)
|
||||
fallbacks.append(vlm_strategy)
|
||||
|
||||
# Si aucune primaire trouvée, prendre le premier fallback
|
||||
if primary is None:
|
||||
if fallbacks:
|
||||
primary = fallbacks.pop(0)
|
||||
else:
|
||||
# Dernier recours : VLM avec l'intention métier
|
||||
primary = ResolutionStrategy(
|
||||
method="vlm",
|
||||
vlm_description=step.intent or "élément UI",
|
||||
threshold=0.5,
|
||||
)
|
||||
|
||||
return primary, fallbacks
|
||||
|
||||
def _load_learned_strategies(self) -> Dict[str, str]:
|
||||
"""Charger les stratégies apprises (ReplayLearner)."""
|
||||
try:
|
||||
from agent_v0.server_v1.replay_learner import ReplayLearner
|
||||
learner = ReplayLearner(learning_dir=self._learning_dir)
|
||||
# Construire un mapping target → best_method depuis l'historique
|
||||
strategies = {}
|
||||
for outcome in learner._recent:
|
||||
if outcome.success and outcome.resolution_method and outcome.target_description:
|
||||
strategies[outcome.target_description] = outcome.resolution_method
|
||||
return strategies
|
||||
except Exception:
|
||||
return {}
|
||||
285
core/workflow/execution_plan.py
Normal file
285
core/workflow/execution_plan.py
Normal file
@@ -0,0 +1,285 @@
|
||||
# core/workflow/execution_plan.py
|
||||
"""
|
||||
ExecutionPlan — Plan d'exécution strict, borné et versionné.
|
||||
|
||||
C'est ce que le runtime exécute. Pas d'improvisation — tout est pré-compilé :
|
||||
- chaque nœud a une stratégie de résolution primaire + fallbacks
|
||||
- chaque nœud a un timeout, un retry policy, une condition de succès
|
||||
- le VLM n'intervient qu'en exception handler (pas en chemin principal)
|
||||
|
||||
Le runtime ne fait que : exécuter → observer → vérifier → suite ou fallback.
|
||||
|
||||
Cycle : WorkflowIR → ExecutionCompiler → ExecutionPlan → Runtime
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ResolutionStrategy:
|
||||
"""Stratégie de résolution visuelle pour un élément UI.
|
||||
|
||||
Pré-compilée — le runtime n'a pas besoin du VLM pour résoudre.
|
||||
"""
|
||||
method: str # "uia", "ocr", "template", "position", "vlm", "dom"
|
||||
target_text: str = "" # Texte à chercher (pour OCR)
|
||||
anchor_b64: str = "" # Crop de référence (pour template matching)
|
||||
zone: Dict[str, float] = field(default_factory=dict) # Zone de recherche {x_min, y_min, x_max, y_max}
|
||||
position_hint: str = "" # "en haut à droite", "dans la barre des tâches"
|
||||
vlm_description: str = "" # Description VLM (dernier recours)
|
||||
threshold: float = 0.8 # Seuil de confiance
|
||||
|
||||
# Stratégie UIA (Windows UI Automation)
|
||||
# Utilisée quand l'enregistrement a capturé un snapshot UIA au moment du clic.
|
||||
# Au replay, l'agent Windows appelle lea_uia.exe find --name ... pour retrouver
|
||||
# l'élément par son chemin logique (100% fiable sur Windows natif).
|
||||
uia_name: str = "" # Name property de l'élément
|
||||
uia_control_type: str = "" # ControlType (Button, Edit, MenuItem, ...)
|
||||
uia_automation_id: str = "" # AutomationId (optionnel)
|
||||
uia_parent_path: List[Dict[str, str]] = field(default_factory=list)
|
||||
|
||||
# Stratégie DOM (web avec CDP activé) — préparation pour plus tard
|
||||
dom_selector: str = "" # CSS selector
|
||||
dom_xpath: str = "" # XPath
|
||||
dom_url_pattern: str = "" # Pattern URL à matcher
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
d = {"method": self.method}
|
||||
if self.target_text:
|
||||
d["target_text"] = self.target_text
|
||||
if self.anchor_b64:
|
||||
d["anchor_b64"] = self.anchor_b64[:50] + "..." # Tronqué pour la lisibilité
|
||||
if self.zone:
|
||||
d["zone"] = self.zone
|
||||
if self.position_hint:
|
||||
d["position_hint"] = self.position_hint
|
||||
if self.vlm_description:
|
||||
d["vlm_description"] = self.vlm_description
|
||||
if self.uia_name:
|
||||
d["uia_name"] = self.uia_name
|
||||
if self.uia_control_type:
|
||||
d["uia_control_type"] = self.uia_control_type
|
||||
if self.uia_automation_id:
|
||||
d["uia_automation_id"] = self.uia_automation_id
|
||||
if self.uia_parent_path:
|
||||
d["uia_parent_path"] = self.uia_parent_path
|
||||
if self.dom_selector:
|
||||
d["dom_selector"] = self.dom_selector
|
||||
if self.dom_xpath:
|
||||
d["dom_xpath"] = self.dom_xpath
|
||||
if self.dom_url_pattern:
|
||||
d["dom_url_pattern"] = self.dom_url_pattern
|
||||
d["threshold"] = self.threshold
|
||||
return d
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict) -> "ResolutionStrategy":
|
||||
return cls(**{k: v for k, v in d.items() if k in cls.__dataclass_fields__})
|
||||
|
||||
|
||||
@dataclass
|
||||
class SuccessCondition:
|
||||
"""Condition de succès d'un nœud — comment vérifier que l'action a marché."""
|
||||
method: str = "screen_changed" # "screen_changed", "title_match", "text_visible", "none"
|
||||
expected_title: str = "" # Titre fenêtre attendu après l'action
|
||||
expected_text: str = "" # Texte qui doit apparaître
|
||||
description: str = "" # Description pour le Critic VLM (exception handler)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
d = {"method": self.method}
|
||||
if self.expected_title:
|
||||
d["expected_title"] = self.expected_title
|
||||
if self.expected_text:
|
||||
d["expected_text"] = self.expected_text
|
||||
if self.description:
|
||||
d["description"] = self.description
|
||||
return d
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict) -> "SuccessCondition":
|
||||
return cls(**{k: v for k, v in d.items() if k in cls.__dataclass_fields__})
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExecutionNode:
|
||||
"""Nœud d'exécution — une action à exécuter avec sa stratégie complète."""
|
||||
node_id: str
|
||||
action_type: str # click, type, key_combo, wait, scroll
|
||||
intent: str = "" # Intention métier (pour le logging/audit)
|
||||
|
||||
# Résolution visuelle pré-compilée
|
||||
strategy_primary: Optional[ResolutionStrategy] = None
|
||||
strategy_fallbacks: List[ResolutionStrategy] = field(default_factory=list)
|
||||
|
||||
# Données de l'action
|
||||
text: str = "" # Texte à taper
|
||||
keys: List[str] = field(default_factory=list)
|
||||
duration_ms: int = 0
|
||||
variable_name: str = "" # Si le texte est une variable
|
||||
|
||||
# Bornes d'exécution
|
||||
timeout_ms: int = 10000 # Timeout pour cette action
|
||||
max_retries: int = 1 # Nombre de retries autorisés
|
||||
retry_delay_ms: int = 2000 # Délai entre retries
|
||||
|
||||
# Vérification
|
||||
success_condition: Optional[SuccessCondition] = None
|
||||
|
||||
# Contrôle strict de fenêtre (pré-condition)
|
||||
expected_window_before: str = "" # La fenêtre active doit matcher AVANT l'action
|
||||
|
||||
# Recovery
|
||||
recovery_action: str = "escape" # "escape", "undo", "close", "none"
|
||||
|
||||
# Contexte
|
||||
step_id: str = "" # Référence vers l'étape WorkflowIR
|
||||
is_optional: bool = False
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
d = {
|
||||
"node_id": self.node_id,
|
||||
"action_type": self.action_type,
|
||||
}
|
||||
if self.intent:
|
||||
d["intent"] = self.intent
|
||||
if self.strategy_primary:
|
||||
d["strategy_primary"] = self.strategy_primary.to_dict()
|
||||
if self.strategy_fallbacks:
|
||||
d["strategy_fallbacks"] = [s.to_dict() for s in self.strategy_fallbacks]
|
||||
if self.text:
|
||||
d["text"] = self.text
|
||||
if self.keys:
|
||||
d["keys"] = self.keys
|
||||
if self.duration_ms:
|
||||
d["duration_ms"] = self.duration_ms
|
||||
if self.variable_name:
|
||||
d["variable_name"] = self.variable_name
|
||||
d["timeout_ms"] = self.timeout_ms
|
||||
d["max_retries"] = self.max_retries
|
||||
if self.success_condition:
|
||||
d["success_condition"] = self.success_condition.to_dict()
|
||||
if self.expected_window_before:
|
||||
d["expected_window_before"] = self.expected_window_before
|
||||
d["recovery_action"] = self.recovery_action
|
||||
if self.is_optional:
|
||||
d["is_optional"] = True
|
||||
return d
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict) -> "ExecutionNode":
|
||||
primary = ResolutionStrategy.from_dict(d["strategy_primary"]) if d.get("strategy_primary") else None
|
||||
fallbacks = [ResolutionStrategy.from_dict(f) for f in d.get("strategy_fallbacks", [])]
|
||||
success = SuccessCondition.from_dict(d["success_condition"]) if d.get("success_condition") else None
|
||||
return cls(
|
||||
node_id=d["node_id"],
|
||||
action_type=d["action_type"],
|
||||
intent=d.get("intent", ""),
|
||||
strategy_primary=primary,
|
||||
strategy_fallbacks=fallbacks,
|
||||
text=d.get("text", ""),
|
||||
keys=d.get("keys", []),
|
||||
duration_ms=d.get("duration_ms", 0),
|
||||
variable_name=d.get("variable_name", ""),
|
||||
timeout_ms=d.get("timeout_ms", 10000),
|
||||
max_retries=d.get("max_retries", 1),
|
||||
retry_delay_ms=d.get("retry_delay_ms", 2000),
|
||||
success_condition=success,
|
||||
expected_window_before=d.get("expected_window_before", ""),
|
||||
recovery_action=d.get("recovery_action", "escape"),
|
||||
step_id=d.get("step_id", ""),
|
||||
is_optional=d.get("is_optional", False),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExecutionPlan:
|
||||
"""Plan d'exécution versionné — ce que le runtime exécute."""
|
||||
plan_id: str
|
||||
workflow_id: str # Référence vers le WorkflowIR source
|
||||
version: int = 1
|
||||
created_at: float = 0.0
|
||||
|
||||
# Nœuds d'exécution (séquence ordonnée)
|
||||
nodes: List[ExecutionNode] = field(default_factory=list)
|
||||
|
||||
# Variables à substituer avant exécution
|
||||
variables: Dict[str, str] = field(default_factory=dict)
|
||||
|
||||
# Configuration globale
|
||||
domain: str = "generic"
|
||||
target_machine: str = "" # Machine cible
|
||||
target_resolution: str = "" # "1280x800", "1920x1080"
|
||||
|
||||
# Métriques de compilation
|
||||
total_nodes: int = 0
|
||||
nodes_with_ocr: int = 0 # Résolution OCR (rapide, précis)
|
||||
nodes_with_template: int = 0 # Résolution template (rapide)
|
||||
nodes_with_vlm: int = 0 # Résolution VLM (lent, dernier recours)
|
||||
estimated_duration_s: float = 0.0
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"plan_id": self.plan_id,
|
||||
"workflow_id": self.workflow_id,
|
||||
"version": self.version,
|
||||
"created_at": self.created_at,
|
||||
"domain": self.domain,
|
||||
"target_machine": self.target_machine,
|
||||
"target_resolution": self.target_resolution,
|
||||
"variables": self.variables,
|
||||
"nodes": [n.to_dict() for n in self.nodes],
|
||||
"stats": {
|
||||
"total_nodes": self.total_nodes,
|
||||
"nodes_with_ocr": self.nodes_with_ocr,
|
||||
"nodes_with_template": self.nodes_with_template,
|
||||
"nodes_with_vlm": self.nodes_with_vlm,
|
||||
"estimated_duration_s": round(self.estimated_duration_s, 1),
|
||||
},
|
||||
}
|
||||
|
||||
def to_json(self, indent: int = 2) -> str:
|
||||
return json.dumps(self.to_dict(), ensure_ascii=False, indent=indent)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict) -> "ExecutionPlan":
|
||||
nodes = [ExecutionNode.from_dict(n) for n in d.get("nodes", [])]
|
||||
stats = d.get("stats", {})
|
||||
return cls(
|
||||
plan_id=d["plan_id"],
|
||||
workflow_id=d.get("workflow_id", ""),
|
||||
version=d.get("version", 1),
|
||||
created_at=d.get("created_at", 0),
|
||||
domain=d.get("domain", "generic"),
|
||||
target_machine=d.get("target_machine", ""),
|
||||
target_resolution=d.get("target_resolution", ""),
|
||||
variables=d.get("variables", {}),
|
||||
nodes=nodes,
|
||||
total_nodes=stats.get("total_nodes", len(nodes)),
|
||||
nodes_with_ocr=stats.get("nodes_with_ocr", 0),
|
||||
nodes_with_template=stats.get("nodes_with_template", 0),
|
||||
nodes_with_vlm=stats.get("nodes_with_vlm", 0),
|
||||
estimated_duration_s=stats.get("estimated_duration_s", 0),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, json_str: str) -> "ExecutionPlan":
|
||||
return cls.from_dict(json.loads(json_str))
|
||||
|
||||
def save(self, directory: str) -> Path:
|
||||
dir_path = Path(directory)
|
||||
dir_path.mkdir(parents=True, exist_ok=True)
|
||||
file_path = dir_path / f"{self.plan_id}.json"
|
||||
file_path.write_text(self.to_json(), encoding="utf-8")
|
||||
return file_path
|
||||
|
||||
@classmethod
|
||||
def load(cls, file_path: str) -> "ExecutionPlan":
|
||||
return cls.from_json(Path(file_path).read_text(encoding="utf-8"))
|
||||
627
core/workflow/ir_builder.py
Normal file
627
core/workflow/ir_builder.py
Normal file
@@ -0,0 +1,627 @@
|
||||
# core/workflow/ir_builder.py
|
||||
"""
|
||||
IRBuilder — Transforme une RawTrace en WorkflowIR.
|
||||
|
||||
C'est le "compilateur de savoir-faire" :
|
||||
RawTrace (clics bruts) → WorkflowIR (connaissance structurée)
|
||||
|
||||
Le builder utilise gemma4 pour COMPRENDRE ce que l'utilisateur a fait :
|
||||
- Segmenter les actions en étapes logiques
|
||||
- Identifier l'intention de chaque étape
|
||||
- Détecter les variables (données qui changent entre les exécutions)
|
||||
- Définir les pré/postconditions
|
||||
|
||||
Le builder est appelé UNE SEULE FOIS après l'enregistrement.
|
||||
Le WorkflowIR produit est ensuite réutilisé pour chaque replay.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from .workflow_ir import WorkflowIR, Step, Action, Variable
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class IRBuilder:
|
||||
"""Construit un WorkflowIR depuis une RawTrace (événements bruts).
|
||||
|
||||
Usage :
|
||||
builder = IRBuilder()
|
||||
ir = builder.build(
|
||||
events=raw_events,
|
||||
session_id="sess_xxx",
|
||||
domain="tim_codage",
|
||||
)
|
||||
ir.save("data/workflows/")
|
||||
"""
|
||||
|
||||
def __init__(self, gemma4_port: str = ""):
|
||||
self._gemma4_port = gemma4_port or os.environ.get("GEMMA4_PORT", "11435")
|
||||
self._gemma4_url = f"http://localhost:{self._gemma4_port}/api/chat"
|
||||
|
||||
def build(
|
||||
self,
|
||||
events: List[Dict[str, Any]],
|
||||
session_id: str = "",
|
||||
session_dir: str = "",
|
||||
domain: str = "generic",
|
||||
name: str = "",
|
||||
) -> WorkflowIR:
|
||||
"""Construire un WorkflowIR depuis des événements bruts.
|
||||
|
||||
Étapes :
|
||||
1. Filtrer les événements parasites
|
||||
2. Segmenter en étapes logiques (par changement de fenêtre/intention)
|
||||
3. Pour chaque étape, identifier l'intention via gemma4
|
||||
4. Détecter les variables
|
||||
5. Définir pré/postconditions
|
||||
"""
|
||||
t_start = time.time()
|
||||
|
||||
# Résoudre le session_dir_path pour l'enrichissement visuel
|
||||
session_dir_path = Path(session_dir) if session_dir else None
|
||||
if session_dir_path and not session_dir_path.is_dir():
|
||||
logger.warning(
|
||||
f"IRBuilder: session_dir '{session_dir}' introuvable — "
|
||||
f"enrichissement visuel désactivé"
|
||||
)
|
||||
session_dir_path = None
|
||||
|
||||
# Créer le WorkflowIR vide
|
||||
ir = WorkflowIR.new(
|
||||
name=name or f"Workflow du {time.strftime('%d/%m/%Y %H:%M')}",
|
||||
domain=domain,
|
||||
learned_from=session_id,
|
||||
)
|
||||
|
||||
# 1. Filtrer les événements utiles
|
||||
actionable = self._filter_events(events)
|
||||
if not actionable:
|
||||
logger.warning("IRBuilder: aucun événement actionable")
|
||||
return ir
|
||||
|
||||
# 2. Détecter les applications utilisées
|
||||
ir.applications = self._detect_applications(actionable)
|
||||
|
||||
# 3. Segmenter en étapes logiques
|
||||
segments = self._segment_into_steps(actionable)
|
||||
|
||||
# 4. Pour chaque segment, construire une Step
|
||||
for i, segment in enumerate(segments):
|
||||
step = self._build_step(
|
||||
segment=segment,
|
||||
step_index=i,
|
||||
total_steps=len(segments),
|
||||
workflow_name=ir.name,
|
||||
domain=domain,
|
||||
session_dir_path=session_dir_path,
|
||||
)
|
||||
ir.steps.append(step)
|
||||
|
||||
# 5. Contrôle strict : remplir expected_window_before/after pour chaque action
|
||||
# C'est la clé de la robustesse : chaque action sait dans quelle fenêtre
|
||||
# elle doit s'exécuter ET dans quelle fenêtre elle doit aboutir.
|
||||
self._attach_window_expectations(ir, actionable)
|
||||
|
||||
# 6. Détecter les variables
|
||||
ir.variables = self._detect_variables(ir.steps, actionable)
|
||||
|
||||
elapsed = time.time() - t_start
|
||||
logger.info(
|
||||
f"IRBuilder: WorkflowIR construit en {elapsed:.1f}s — "
|
||||
f"{len(ir.steps)} étapes, {len(ir.variables)} variables, "
|
||||
f"{len(ir.applications)} applications"
|
||||
)
|
||||
|
||||
return ir
|
||||
|
||||
def _filter_events(self, events: List[Dict]) -> List[Dict]:
|
||||
"""Filtrer les événements parasites.
|
||||
|
||||
Exclusions :
|
||||
1. Types d'événements de bruit (heartbeat, focus_change, action_result)
|
||||
2. Clics dont la CIBLE UIA est dans Léa elle-même
|
||||
(via uia_snapshot.parent_path — on vérifie où va le clic, pas d'où
|
||||
il vient). Un clic "sur la taskbar" peut avoir window.title="Léa"
|
||||
si Léa avait le focus, mais sa cible UIA est la taskbar.
|
||||
"""
|
||||
ignored_types = {"heartbeat", "focus_change", "action_result", "window_focus_change"}
|
||||
lea_markers = (
|
||||
"léa", "lea -", "léa -", "lea —", "léa —",
|
||||
"lea assistante", "léa assistante",
|
||||
"agent v1",
|
||||
)
|
||||
|
||||
def _uia_target_is_lea(uia_snapshot: dict) -> bool:
|
||||
"""L'élément UIA cliqué est-il dans la fenêtre de Léa ?"""
|
||||
if not uia_snapshot:
|
||||
return False
|
||||
# Vérifier le nom de l'élément lui-même
|
||||
name = (uia_snapshot.get("name", "") or "").lower()
|
||||
if any(m in name for m in lea_markers):
|
||||
return True
|
||||
# Vérifier les parents
|
||||
for parent in uia_snapshot.get("parent_path", []):
|
||||
p_name = (parent.get("name", "") or "").lower()
|
||||
if any(m in p_name for m in lea_markers):
|
||||
return True
|
||||
return False
|
||||
|
||||
result = []
|
||||
filtered_lea = 0
|
||||
for raw_evt in events:
|
||||
evt = raw_evt.get("event", raw_evt)
|
||||
evt_type = evt.get("type", "")
|
||||
if evt_type in ignored_types:
|
||||
continue
|
||||
|
||||
# Filtrer uniquement les clics dont la CIBLE est dans Léa
|
||||
# (pas les clics depuis Léa vers l'extérieur)
|
||||
if evt_type == "mouse_click":
|
||||
uia = evt.get("uia_snapshot") or {}
|
||||
if _uia_target_is_lea(uia):
|
||||
filtered_lea += 1
|
||||
continue
|
||||
|
||||
result.append(evt)
|
||||
|
||||
if filtered_lea > 0:
|
||||
logger.info(
|
||||
f"IRBuilder: {filtered_lea} clic(s) filtré(s) "
|
||||
f"(cible UIA dans la fenêtre Léa)"
|
||||
)
|
||||
return result
|
||||
|
||||
def _attach_window_expectations(self, ir: WorkflowIR, events: List[Dict]) -> None:
|
||||
"""Remplir expected_window_before/after pour chaque action du workflow.
|
||||
|
||||
C'est LA clé du contrôle strict : chaque action connaît la fenêtre
|
||||
dans laquelle elle doit s'exécuter ET celle qui doit apparaître
|
||||
après. Toute divergence au replay → STOP immédiat.
|
||||
|
||||
On reconstruit la séquence d'événements "actionables" (clicks, type,
|
||||
key_combo) et on aligne chaque Action du workflow sur son événement
|
||||
source pour récupérer :
|
||||
- expected_window_before : titre de la fenêtre AU MOMENT du clic
|
||||
- expected_window_after : titre de la fenêtre du PROCHAIN click
|
||||
|
||||
Filtre critique : la fenêtre de Léa elle-même n'est JAMAIS une
|
||||
fenêtre cible valide (c'est l'overlay agent, pas l'app métier).
|
||||
Les fenêtres "unknown_window" et les titres vides sont ignorés.
|
||||
"""
|
||||
def _is_valid_target_window(title: str) -> bool:
|
||||
"""Un titre de fenêtre est valide comme expected_window_* si :
|
||||
- non vide, non "unknown_window"
|
||||
- pas la fenêtre de Léa elle-même
|
||||
"""
|
||||
if not title or title == "unknown_window":
|
||||
return False
|
||||
title_lower = title.lower()
|
||||
lea_markers = (
|
||||
"léa", "lea -", "léa -", "lea —", "léa —",
|
||||
"lea assistante", "léa assistante",
|
||||
"agent v1",
|
||||
)
|
||||
for marker in lea_markers:
|
||||
if marker in title_lower:
|
||||
return False
|
||||
return True
|
||||
|
||||
def _extract_uia_root_window(uia_snapshot: dict) -> str:
|
||||
"""Extraire le nom de la fenêtre racine depuis un snapshot UIA.
|
||||
|
||||
Le parent_path contient la hiérarchie de l'élément cliqué.
|
||||
La première entrée avec control_type="fenêtre" est la fenêtre
|
||||
qui CONTIENT l'élément cliqué — c'est la vraie cible.
|
||||
"""
|
||||
if not uia_snapshot:
|
||||
return ""
|
||||
for parent in uia_snapshot.get("parent_path", []):
|
||||
ct = (parent.get("control_type", "") or "").lower()
|
||||
if ct in ("fenêtre", "window"):
|
||||
name = (parent.get("name", "") or "").strip()
|
||||
if name:
|
||||
return name
|
||||
return ""
|
||||
|
||||
# Extraire la séquence des événements actionables avec leurs titres
|
||||
# Source de vérité pour les clics : parent_path UIA (où va vraiment
|
||||
# le clic), sinon window.title (fallback).
|
||||
# Pour les type/key_combo : window.title uniquement.
|
||||
event_sequence: List[Dict[str, Any]] = []
|
||||
for evt in events:
|
||||
t = evt.get("type", "")
|
||||
if t not in ("mouse_click", "text_input", "key_combo", "key_press", "scroll"):
|
||||
continue
|
||||
|
||||
# Titre de référence : priorité à la cible UIA pour les clics
|
||||
effective_title = ""
|
||||
if t == "mouse_click":
|
||||
uia = evt.get("uia_snapshot") or {}
|
||||
uia_root = _extract_uia_root_window(uia)
|
||||
if uia_root and _is_valid_target_window(uia_root):
|
||||
effective_title = uia_root
|
||||
|
||||
# Fallback sur window.title
|
||||
if not effective_title:
|
||||
raw_title = evt.get("window", {}).get("title", "") or ""
|
||||
if _is_valid_target_window(raw_title):
|
||||
effective_title = raw_title
|
||||
|
||||
event_sequence.append({"type": t, "title": effective_title})
|
||||
|
||||
# Aligner avec les actions du workflow
|
||||
flat_actions: List[tuple] = []
|
||||
for si, step in enumerate(ir.steps):
|
||||
for ai, action in enumerate(step.actions):
|
||||
if action.type in ("click", "type", "key_combo"):
|
||||
flat_actions.append((si, ai, action))
|
||||
|
||||
# Limite : on prend le min entre les 2 listes
|
||||
n = min(len(flat_actions), len(event_sequence))
|
||||
|
||||
for i in range(n):
|
||||
si, ai, action = flat_actions[i]
|
||||
title_now = event_sequence[i]["title"]
|
||||
if title_now:
|
||||
action.expected_window_before = title_now
|
||||
|
||||
# Chercher le prochain événement avec un titre valide
|
||||
# Et qui est DIFFÉRENT du titre actuel (sinon pas de transition à vérifier)
|
||||
for j in range(i + 1, len(event_sequence)):
|
||||
next_title = event_sequence[j]["title"]
|
||||
if next_title and next_title != title_now:
|
||||
action.expected_window_after = next_title
|
||||
break
|
||||
|
||||
def _detect_applications(self, events: List[Dict]) -> List[str]:
|
||||
"""Détecter les applications utilisées."""
|
||||
apps = set()
|
||||
for evt in events:
|
||||
title = evt.get("window", {}).get("title", "")
|
||||
if title and title != "unknown_window":
|
||||
for sep in [" – ", " - ", " — "]:
|
||||
if sep in title:
|
||||
apps.add(title.split(sep)[-1].strip())
|
||||
break
|
||||
return sorted(apps)
|
||||
|
||||
def _segment_into_steps(self, events: List[Dict]) -> List[List[Dict]]:
|
||||
"""Segmenter les événements en étapes logiques.
|
||||
|
||||
Critères de coupure :
|
||||
- Changement d'application (fenêtre différente)
|
||||
- Pause longue (> 5s entre deux événements)
|
||||
- Transition logique (clic → frappe → clic = étapes différentes)
|
||||
"""
|
||||
if not events:
|
||||
return []
|
||||
|
||||
segments = []
|
||||
current_segment = [events[0]]
|
||||
current_app = self._get_app_name(events[0])
|
||||
|
||||
for evt in events[1:]:
|
||||
app = self._get_app_name(evt)
|
||||
evt_type = evt.get("type", "")
|
||||
|
||||
# Coupure par changement d'application
|
||||
app_changed = app and current_app and app != current_app
|
||||
|
||||
# Coupure par pause longue
|
||||
prev_ts = float(current_segment[-1].get("timestamp", 0))
|
||||
curr_ts = float(evt.get("timestamp", 0))
|
||||
long_pause = (curr_ts - prev_ts) > 5.0 if prev_ts > 0 and curr_ts > 0 else False
|
||||
|
||||
# Coupure par transition clic → nouveau clic (nouvelle intention)
|
||||
transition = (
|
||||
evt_type == "mouse_click"
|
||||
and len(current_segment) >= 2
|
||||
and current_segment[-1].get("type") not in ("mouse_click",)
|
||||
)
|
||||
|
||||
if app_changed or long_pause:
|
||||
if current_segment:
|
||||
segments.append(current_segment)
|
||||
current_segment = [evt]
|
||||
current_app = app
|
||||
else:
|
||||
current_segment.append(evt)
|
||||
|
||||
if current_segment:
|
||||
segments.append(current_segment)
|
||||
|
||||
return segments
|
||||
|
||||
def _get_app_name(self, evt: Dict) -> str:
|
||||
"""Extraire le nom d'application depuis un événement."""
|
||||
title = evt.get("window", {}).get("title", "")
|
||||
for sep in [" – ", " - ", " — "]:
|
||||
if sep in title:
|
||||
return title.split(sep)[-1].strip()
|
||||
return title
|
||||
|
||||
def _build_step(
|
||||
self,
|
||||
segment: List[Dict],
|
||||
step_index: int,
|
||||
total_steps: int,
|
||||
workflow_name: str,
|
||||
domain: str,
|
||||
session_dir_path: Optional[Path] = None,
|
||||
) -> Step:
|
||||
"""Construire une Step depuis un segment d'événements.
|
||||
|
||||
Utilise gemma4 pour comprendre l'intention du segment.
|
||||
"""
|
||||
# Construire la description du segment pour gemma4
|
||||
actions = []
|
||||
for evt in segment:
|
||||
action = self._event_to_action(evt, session_dir_path=session_dir_path)
|
||||
if action:
|
||||
actions.append(action)
|
||||
|
||||
# Description textuelle du segment
|
||||
segment_desc = self._describe_segment(segment)
|
||||
|
||||
# Demander à gemma4 l'intention
|
||||
intent, precondition, postcondition = self._analyze_intent(
|
||||
segment_desc, step_index, total_steps, workflow_name, domain,
|
||||
)
|
||||
|
||||
return Step(
|
||||
step_id=f"s{step_index + 1}",
|
||||
intent=intent or segment_desc,
|
||||
precondition=precondition,
|
||||
postcondition=postcondition,
|
||||
actions=actions,
|
||||
)
|
||||
|
||||
def _event_to_action(self, evt: Dict, session_dir_path: Optional[Path] = None) -> Optional[Action]:
|
||||
"""Convertir un événement brut en Action enrichie.
|
||||
|
||||
Pour les clics : appelle enrich_click_from_screenshot() si le session_dir
|
||||
est disponible pour obtenir :
|
||||
- by_text (texte OCR exact de l'élément cliqué)
|
||||
- anchor_image_base64 (crop 80x80 pour template matching)
|
||||
- vlm_description (description positionnelle)
|
||||
- window_capture (rect pour le grounding ciblé)
|
||||
|
||||
Cet enrichissement est LA clé pour que l'ExecutionCompiler produise
|
||||
des plans V4 complets avec toutes les stratégies (OCR + template + VLM).
|
||||
"""
|
||||
evt_type = evt.get("type", "")
|
||||
|
||||
if evt_type == "mouse_click":
|
||||
window = evt.get("window", {}).get("title", "")
|
||||
pos = evt.get("pos", [0, 0])
|
||||
|
||||
# Action de base (fallback sans enrichissement)
|
||||
action = Action(
|
||||
type="click",
|
||||
target=window,
|
||||
anchor_hint=evt.get("vision_info", {}).get("text", "") if isinstance(evt.get("vision_info"), dict) else "",
|
||||
)
|
||||
|
||||
# Enrichissement visuel via enrich_click_from_screenshot
|
||||
# Accès direct au crop OCR + anchor pour l'ExecutionCompiler
|
||||
if session_dir_path and isinstance(pos, list) and len(pos) == 2:
|
||||
enrichment = self._enrich_click(
|
||||
evt, session_dir_path, window, int(pos[0]), int(pos[1]),
|
||||
)
|
||||
if enrichment:
|
||||
# Le texte OCR devient l'anchor_hint pour l'OCR primaire
|
||||
by_text = enrichment.get("by_text", "")
|
||||
if by_text:
|
||||
action.anchor_hint = by_text
|
||||
# Stocker les métadonnées d'enrichissement dans l'action
|
||||
# (utilisé par l'ExecutionCompiler pour construire les stratégies)
|
||||
action._enrichment = enrichment
|
||||
|
||||
# Lire le snapshot UIA si l'agent Windows l'a capturé.
|
||||
# Format attendu dans l'événement :
|
||||
# evt["uia_snapshot"] = {
|
||||
# "name": "Enregistrer",
|
||||
# "control_type": "bouton",
|
||||
# "automation_id": "btnSave",
|
||||
# "parent_path": [{"name": "...", "control_type": "..."}],
|
||||
# }
|
||||
# Si présent, il est fusionné dans _enrichment pour que
|
||||
# l'ExecutionCompiler puisse créer une stratégie UIA prioritaire.
|
||||
uia_snapshot = evt.get("uia_snapshot")
|
||||
if uia_snapshot and isinstance(uia_snapshot, dict):
|
||||
if not hasattr(action, "_enrichment") or action._enrichment is None:
|
||||
action._enrichment = {}
|
||||
action._enrichment["uia_snapshot"] = uia_snapshot
|
||||
|
||||
return action
|
||||
|
||||
elif evt_type == "text_input":
|
||||
text = evt.get("text", "")
|
||||
if text:
|
||||
return Action(type="type", text=text)
|
||||
elif evt_type in ("key_combo", "key_press"):
|
||||
keys = evt.get("keys", [])
|
||||
if keys:
|
||||
return Action(type="key_combo", keys=keys)
|
||||
elif evt_type == "scroll":
|
||||
return Action(type="scroll")
|
||||
|
||||
return None
|
||||
|
||||
def _enrich_click(
|
||||
self,
|
||||
evt: Dict,
|
||||
session_dir_path: Path,
|
||||
window_title: str,
|
||||
click_x: int,
|
||||
click_y: int,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Enrichir un clic avec OCR + crop + description.
|
||||
|
||||
Réutilise enrich_click_from_screenshot du stream_processor (éprouvé).
|
||||
Retourne un dict avec by_text, anchor_image_base64, vlm_description, etc.
|
||||
"""
|
||||
try:
|
||||
from agent_v0.server_v1.stream_processor import enrich_click_from_screenshot
|
||||
|
||||
# Trouver le screenshot full
|
||||
screenshot_id = evt.get("screenshot_id", "")
|
||||
if not screenshot_id:
|
||||
return None
|
||||
|
||||
full_path = session_dir_path / "shots" / f"{screenshot_id}_full.png"
|
||||
if not full_path.is_file():
|
||||
return None
|
||||
|
||||
# Résolution écran
|
||||
screen_w = 1280
|
||||
screen_h = 800
|
||||
window_capture = evt.get("window_capture", {})
|
||||
if window_capture.get("window_rect"):
|
||||
rect = window_capture["window_rect"]
|
||||
screen_w = max(screen_w, rect[2])
|
||||
screen_h = max(screen_h, rect[3])
|
||||
|
||||
return enrich_click_from_screenshot(
|
||||
screenshot_path=full_path,
|
||||
click_x=click_x,
|
||||
click_y=click_y,
|
||||
screen_w=screen_w,
|
||||
screen_h=screen_h,
|
||||
window_title=window_title,
|
||||
vision_info=evt.get("vision_info") if isinstance(evt.get("vision_info"), dict) else None,
|
||||
session_dir=session_dir_path,
|
||||
screenshot_id=screenshot_id,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"IRBuilder._enrich_click: {e}")
|
||||
return None
|
||||
|
||||
def _describe_segment(self, segment: List[Dict]) -> str:
|
||||
"""Décrire un segment en langage naturel (pour gemma4)."""
|
||||
parts = []
|
||||
window = ""
|
||||
for evt in segment:
|
||||
evt_type = evt.get("type", "")
|
||||
w = evt.get("window", {}).get("title", "")
|
||||
if w and w != window:
|
||||
window = w
|
||||
parts.append(f"[{w}]")
|
||||
if evt_type == "mouse_click":
|
||||
text = evt.get("vision_info", {}).get("text", "")
|
||||
parts.append(f"clic sur '{text}'" if text else "clic")
|
||||
elif evt_type == "text_input":
|
||||
text = evt.get("text", "")
|
||||
parts.append(f"saisie '{text[:30]}'")
|
||||
elif evt_type in ("key_combo", "key_press"):
|
||||
keys = evt.get("keys", [])
|
||||
parts.append(f"touche {'+'.join(keys)}")
|
||||
return " → ".join(parts) if parts else "action"
|
||||
|
||||
def _analyze_intent(
|
||||
self,
|
||||
segment_desc: str,
|
||||
step_index: int,
|
||||
total_steps: int,
|
||||
workflow_name: str,
|
||||
domain: str,
|
||||
) -> tuple:
|
||||
"""Demander à gemma4 de comprendre l'intention d'un segment.
|
||||
|
||||
Returns:
|
||||
(intent, precondition, postcondition)
|
||||
"""
|
||||
import requests as _requests
|
||||
|
||||
# Charger le contexte métier
|
||||
domain_prompt = ""
|
||||
try:
|
||||
from agent_v0.server_v1.domain_context import get_domain_context
|
||||
ctx = get_domain_context(domain)
|
||||
if ctx.system_prompt:
|
||||
domain_prompt = f"\nContexte métier : {ctx.name}\n"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
prompt = (
|
||||
f"{domain_prompt}"
|
||||
f"Workflow : {workflow_name} (étape {step_index + 1}/{total_steps})\n"
|
||||
f"Actions observées : {segment_desc}\n\n"
|
||||
f"Réponds en 3 lignes :\n"
|
||||
f"INTENTION: que veut faire l'utilisateur avec ces actions (1 phrase)\n"
|
||||
f"AVANT: état attendu de l'écran avant cette étape (1 phrase)\n"
|
||||
f"APRÈS: état attendu de l'écran après cette étape (1 phrase)"
|
||||
)
|
||||
|
||||
try:
|
||||
resp = _requests.post(
|
||||
self._gemma4_url,
|
||||
json={
|
||||
"model": "gemma4:e4b",
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"stream": False,
|
||||
"think": True,
|
||||
"options": {"temperature": 0.1, "num_predict": 800},
|
||||
},
|
||||
timeout=30,
|
||||
)
|
||||
if resp.ok:
|
||||
content = resp.json().get("message", {}).get("content", "")
|
||||
return self._parse_intent_response(content)
|
||||
except Exception as e:
|
||||
logger.debug(f"IRBuilder: gemma4 indisponible ({e})")
|
||||
|
||||
return (segment_desc, "", "")
|
||||
|
||||
def _parse_intent_response(self, content: str) -> tuple:
|
||||
"""Parser la réponse gemma4 (INTENTION/AVANT/APRÈS)."""
|
||||
intent = ""
|
||||
precondition = ""
|
||||
postcondition = ""
|
||||
|
||||
for line in content.split("\n"):
|
||||
clean = line.strip()
|
||||
upper = clean.upper()
|
||||
if upper.startswith("INTENTION:"):
|
||||
intent = clean.split(":", 1)[1].strip()
|
||||
elif upper.startswith("AVANT:"):
|
||||
precondition = clean.split(":", 1)[1].strip()
|
||||
elif upper.startswith(("APRÈS:", "APRES:")):
|
||||
postcondition = clean.split(":", 1)[1].strip()
|
||||
|
||||
return (intent, precondition, postcondition)
|
||||
|
||||
def _detect_variables(self, steps: List[Step], events: List[Dict]) -> List[Variable]:
|
||||
"""Détecter les variables dans le workflow.
|
||||
|
||||
Une variable est une donnée qui change entre les exécutions :
|
||||
- Texte saisi par l'utilisateur (noms, codes, dates)
|
||||
- Données lues à l'écran (résultats de recherche)
|
||||
"""
|
||||
variables = []
|
||||
seen_texts = set()
|
||||
|
||||
for step in steps:
|
||||
for action in step.actions:
|
||||
if action.type == "type" and action.text:
|
||||
text = action.text.strip()
|
||||
if text and text not in seen_texts and len(text) > 2:
|
||||
seen_texts.add(text)
|
||||
var_name = f"texte_{len(variables) + 1}"
|
||||
variables.append(Variable(
|
||||
name=var_name,
|
||||
description=f"Texte saisi : '{text[:50]}'",
|
||||
source="user",
|
||||
default=text,
|
||||
))
|
||||
# Marquer l'action comme variable
|
||||
action.variable = True
|
||||
action.text = f"{{{var_name}}}"
|
||||
|
||||
return variables
|
||||
693
core/workflow/shadow_observer.py
Normal file
693
core/workflow/shadow_observer.py
Normal file
@@ -0,0 +1,693 @@
|
||||
# core/workflow/shadow_observer.py
|
||||
"""
|
||||
ShadowObserver — Observation en temps réel de ce que Léa comprend.
|
||||
|
||||
C'est le "mode Shadow amélioré" : pendant que l'utilisateur enregistre
|
||||
une démonstration, Léa lui dit ce qu'elle comprend au fur et à mesure.
|
||||
|
||||
Contrairement à l'IRBuilder (qui analyse TOUT à la fin en appelant gemma4),
|
||||
le ShadowObserver travaille en incrémental :
|
||||
- À chaque événement reçu, il met à jour sa compréhension locale.
|
||||
- Il segmente dès qu'un critère de coupure est détecté.
|
||||
- Il émet des notifications légères ("Léa a compris : tu viens d'ouvrir le
|
||||
Bloc-notes") via un callback.
|
||||
- Il détecte les variables (texte saisi) pendant la frappe.
|
||||
|
||||
Le ShadowObserver n'est pas la source de vérité — c'est une couche
|
||||
d'observation. La source de vérité reste `live_events.jsonl`.
|
||||
Le WorkflowIR final est toujours reconstruit par l'IRBuilder après
|
||||
validation, mais la compréhension temps réel accélère la boucle de
|
||||
rétroaction avec l'utilisateur.
|
||||
|
||||
Usage :
|
||||
|
||||
def on_notify(event):
|
||||
print(f"[{event.niveau}] {event.message}")
|
||||
|
||||
observer = ShadowObserver(notify_callback=on_notify)
|
||||
observer.start("sess_abc")
|
||||
observer.observe_event(event1)
|
||||
observer.observe_event(event2)
|
||||
...
|
||||
comprehension = observer.get_understanding()
|
||||
# → [{"step": 1, "intent": "Ouvrir le Bloc-notes", "confidence": 0.8}, ...]
|
||||
observer.stop()
|
||||
|
||||
Contraintes :
|
||||
- 100% asynchrone côté performance : la méthode observe_event() ne doit
|
||||
jamais bloquer la capture (pas d'appel réseau synchrone).
|
||||
- Optionnel : activable via paramètre, ne modifie pas la capture existante.
|
||||
- 100% français dans les messages utilisateur.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Types d'événements observationnels
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class NiveauNotification(str, Enum):
|
||||
"""Niveau d'importance d'une notification.
|
||||
|
||||
- INFO : information passive ("Léa observe...")
|
||||
- DECOUVERTE : Léa vient de comprendre quelque chose de nouveau
|
||||
- QUESTION : Léa aimerait une confirmation (non bloquant)
|
||||
- VARIABLE : une variable a été détectée
|
||||
"""
|
||||
|
||||
INFO = "info"
|
||||
DECOUVERTE = "decouverte"
|
||||
QUESTION = "question"
|
||||
VARIABLE = "variable"
|
||||
|
||||
|
||||
@dataclass
|
||||
class NotificationShadow:
|
||||
"""Notification émise par le ShadowObserver vers la GUI utilisateur."""
|
||||
|
||||
notif_id: str
|
||||
niveau: NiveauNotification
|
||||
message: str # Texte affichable à l'utilisateur (français)
|
||||
session_id: str
|
||||
step_index: int = -1 # Index de l'étape concernée, -1 si global
|
||||
data: Dict[str, Any] = field(default_factory=dict)
|
||||
timestamp: float = 0.0
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"notif_id": self.notif_id,
|
||||
"niveau": self.niveau.value,
|
||||
"message": self.message,
|
||||
"session_id": self.session_id,
|
||||
"step_index": self.step_index,
|
||||
"data": self.data,
|
||||
"timestamp": self.timestamp,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class UnderstoodStep:
|
||||
"""Étape logique comprise en temps réel par le ShadowObserver.
|
||||
|
||||
C'est une version simplifiée de `Step` (core.workflow.workflow_ir),
|
||||
optimisée pour la construction incrémentale. Elle sera convertie
|
||||
en `Step` final par le ShadowValidator après validation.
|
||||
"""
|
||||
|
||||
step_index: int
|
||||
intent: str # Intention humaine (ex: "Ouvrir le Bloc-notes")
|
||||
intent_provisoire: bool = True # True tant que gemma4 n'a pas confirmé
|
||||
confidence: float = 0.5 # Score de confiance (0..1)
|
||||
app_name: str = "" # Application principale
|
||||
window_title: str = "" # Titre de la fenêtre au début du segment
|
||||
events: List[Dict[str, Any]] = field(default_factory=list)
|
||||
variables_detectees: List[str] = field(default_factory=list)
|
||||
started_at: float = 0.0
|
||||
ended_at: float = 0.0
|
||||
validated: bool = False # L'utilisateur a validé l'étape
|
||||
corrected: bool = False # L'utilisateur a corrigé l'intention
|
||||
cancelled: bool = False # L'utilisateur a annulé l'étape
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"step_index": self.step_index,
|
||||
"intent": self.intent,
|
||||
"intent_provisoire": self.intent_provisoire,
|
||||
"confidence": round(self.confidence, 3),
|
||||
"app_name": self.app_name,
|
||||
"window_title": self.window_title,
|
||||
"events_count": len(self.events),
|
||||
"variables_detectees": list(self.variables_detectees),
|
||||
"started_at": self.started_at,
|
||||
"ended_at": self.ended_at,
|
||||
"validated": self.validated,
|
||||
"corrected": self.corrected,
|
||||
"cancelled": self.cancelled,
|
||||
}
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Observer
|
||||
# =========================================================================
|
||||
|
||||
|
||||
# Constantes de segmentation (en secondes). On évite de re-déclarer les
|
||||
# constantes de l'IRBuilder car l'observation est incrémentale — on peut
|
||||
# se permettre des seuils plus courts pour plus de réactivité.
|
||||
_SEUIL_PAUSE_LONGUE_S = 4.0
|
||||
_SEUIL_CONFIANCE_BASE = 0.5
|
||||
_SEUIL_CONFIANCE_APP_CHANGE = 0.8
|
||||
|
||||
# Types d'événements ignorés
|
||||
_EVENT_TYPES_IGNORES = {
|
||||
"heartbeat",
|
||||
"focus_change",
|
||||
"action_result",
|
||||
"window_focus_change",
|
||||
}
|
||||
|
||||
|
||||
class ShadowObserver:
|
||||
"""Observe les événements en temps réel et met à jour la compréhension.
|
||||
|
||||
Thread-safe : peut être appelé depuis plusieurs threads (capture,
|
||||
API, worker).
|
||||
|
||||
Le callback `notify_callback` est appelé de manière synchrone mais les
|
||||
notifications sont extrêmement légères (juste un dataclass) — elles
|
||||
sont destinées à être envoyées via WebSocket/HTTP long-poll depuis la
|
||||
couche API.
|
||||
"""
|
||||
|
||||
NotifyCallback = Callable[[NotificationShadow], None]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
notify_callback: Optional[NotifyCallback] = None,
|
||||
*,
|
||||
enable_gemma4: bool = False,
|
||||
gemma4_callback: Optional[Callable[[UnderstoodStep], None]] = None,
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
notify_callback: Fonction appelée à chaque notification
|
||||
(doit être rapide, pas d'IO bloquant).
|
||||
enable_gemma4: Si True, une tâche asynchrone peut enrichir
|
||||
les intentions via gemma4 (non bloquant). En pratique,
|
||||
on laisse le caller le brancher via `gemma4_callback`.
|
||||
gemma4_callback: Fonction appelée en arrière-plan pour
|
||||
enrichir une étape (via gemma4 ou autre LLM). Non bloquant.
|
||||
"""
|
||||
self._notify_callback = notify_callback
|
||||
self._enable_gemma4 = enable_gemma4
|
||||
self._gemma4_callback = gemma4_callback
|
||||
|
||||
self._lock = threading.RLock()
|
||||
self._sessions: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
# ----- Cycle de vie --------------------------------------------------
|
||||
|
||||
def start(self, session_id: str) -> None:
|
||||
"""Démarrer l'observation d'une session."""
|
||||
with self._lock:
|
||||
self._sessions[session_id] = {
|
||||
"steps": [], # List[UnderstoodStep]
|
||||
"current_step": None, # Optional[UnderstoodStep]
|
||||
"last_event_ts": 0.0,
|
||||
"last_notif_ts": 0.0,
|
||||
"total_events": 0,
|
||||
"notifications": [], # Historique des notifications
|
||||
"started_at": time.time(),
|
||||
"stopped_at": 0.0,
|
||||
}
|
||||
self._notifier(
|
||||
session_id,
|
||||
NiveauNotification.INFO,
|
||||
"Léa t'observe. Fais ta tâche normalement, je vais apprendre.",
|
||||
)
|
||||
|
||||
def stop(self, session_id: str) -> None:
|
||||
"""Arrêter l'observation et finaliser le segment en cours."""
|
||||
with self._lock:
|
||||
state = self._sessions.get(session_id)
|
||||
if not state:
|
||||
return
|
||||
current = state.get("current_step")
|
||||
if current is not None and current.events:
|
||||
current.ended_at = state["last_event_ts"] or time.time()
|
||||
state["steps"].append(current)
|
||||
state["current_step"] = None
|
||||
state["stopped_at"] = time.time()
|
||||
|
||||
nb_steps = len(self.get_understanding(session_id))
|
||||
if nb_steps > 0:
|
||||
self._notifier(
|
||||
session_id,
|
||||
NiveauNotification.DECOUVERTE,
|
||||
f"J'ai observé {nb_steps} étape(s). Tu veux que je te les "
|
||||
f"montre pour validation ?",
|
||||
)
|
||||
|
||||
def reset(self, session_id: str) -> None:
|
||||
"""Supprimer l'état d'une session (après finalisation)."""
|
||||
with self._lock:
|
||||
self._sessions.pop(session_id, None)
|
||||
|
||||
# ----- Observation ---------------------------------------------------
|
||||
|
||||
def observe_event(self, session_id: str, event: Dict[str, Any]) -> None:
|
||||
"""Observer un nouvel événement pendant la capture.
|
||||
|
||||
Cette méthode est appelée à chaque événement reçu par le serveur.
|
||||
Elle doit être RAPIDE (pas d'IO réseau synchrone).
|
||||
"""
|
||||
evt_type = event.get("type", "")
|
||||
if evt_type in _EVENT_TYPES_IGNORES:
|
||||
return
|
||||
|
||||
with self._lock:
|
||||
state = self._sessions.get(session_id)
|
||||
if not state:
|
||||
# Auto-start si pas encore démarré (robustesse)
|
||||
self.start(session_id)
|
||||
state = self._sessions[session_id]
|
||||
|
||||
state["total_events"] += 1
|
||||
|
||||
# 1. Décider si on démarre un nouveau segment
|
||||
current = state.get("current_step")
|
||||
should_cut, cut_reason = self._should_cut(state, event)
|
||||
|
||||
if should_cut and current is not None:
|
||||
current.ended_at = state["last_event_ts"] or time.time()
|
||||
state["steps"].append(current)
|
||||
self._emit_step_closed(session_id, current, cut_reason)
|
||||
current = None
|
||||
state["current_step"] = None
|
||||
|
||||
if current is None:
|
||||
step_index = len(state["steps"]) + 1
|
||||
current = UnderstoodStep(
|
||||
step_index=step_index,
|
||||
intent=self._initial_intent(event),
|
||||
intent_provisoire=True,
|
||||
confidence=_SEUIL_CONFIANCE_BASE,
|
||||
app_name=self._get_app_name(event),
|
||||
window_title=self._get_window_title(event),
|
||||
started_at=float(event.get("timestamp", 0)) or time.time(),
|
||||
)
|
||||
state["current_step"] = current
|
||||
|
||||
# 2. Ajouter l'événement au segment courant
|
||||
current.events.append(event)
|
||||
ts = float(event.get("timestamp", 0)) or time.time()
|
||||
state["last_event_ts"] = ts
|
||||
|
||||
# 3. Rafraîchir l'intent provisoire à partir du contexte accumulé
|
||||
current.intent = self._refine_intent(current, event)
|
||||
|
||||
# 4. Détection de variable pendant la frappe
|
||||
if evt_type == "text_input":
|
||||
self._handle_text_input(session_id, current, event)
|
||||
|
||||
# 5. Émission périodique d'un résumé (toutes les 5s)
|
||||
self._maybe_emit_heartbeat(session_id, state)
|
||||
|
||||
# ----- API publique --------------------------------------------------
|
||||
|
||||
def get_understanding(
|
||||
self, session_id: str, include_current: bool = True
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Récupérer ce que Léa a compris jusqu'ici.
|
||||
|
||||
Returns:
|
||||
Liste de dicts au format :
|
||||
[{"step": 1, "intent": "Ouvrir le Bloc-notes",
|
||||
"confidence": 0.9, "app": "Bloc-notes",
|
||||
"events_count": 4, ...}, ...]
|
||||
"""
|
||||
with self._lock:
|
||||
state = self._sessions.get(session_id)
|
||||
if not state:
|
||||
return []
|
||||
steps = list(state["steps"])
|
||||
if include_current and state.get("current_step") is not None:
|
||||
steps = steps + [state["current_step"]]
|
||||
|
||||
out = []
|
||||
for step in steps:
|
||||
d = step.to_dict()
|
||||
d["step"] = d.pop("step_index")
|
||||
out.append(d)
|
||||
return out
|
||||
|
||||
def get_notifications(
|
||||
self, session_id: str, since_ts: float = 0.0
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Récupérer les notifications émises depuis un timestamp."""
|
||||
with self._lock:
|
||||
state = self._sessions.get(session_id)
|
||||
if not state:
|
||||
return []
|
||||
return [
|
||||
n.to_dict() for n in state["notifications"]
|
||||
if n.timestamp >= since_ts
|
||||
]
|
||||
|
||||
def get_current_step(
|
||||
self, session_id: str
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Retourner l'étape en cours de construction."""
|
||||
with self._lock:
|
||||
state = self._sessions.get(session_id)
|
||||
if not state:
|
||||
return None
|
||||
current = state.get("current_step")
|
||||
if current is None:
|
||||
return None
|
||||
return current.to_dict()
|
||||
|
||||
def get_steps_internal(
|
||||
self, session_id: str, include_current: bool = True
|
||||
) -> List[UnderstoodStep]:
|
||||
"""Version interne : retourne les objets `UnderstoodStep`.
|
||||
|
||||
Utilisé par le ShadowValidator pour reconstruire un WorkflowIR.
|
||||
"""
|
||||
with self._lock:
|
||||
state = self._sessions.get(session_id)
|
||||
if not state:
|
||||
return []
|
||||
steps = list(state["steps"])
|
||||
if include_current and state.get("current_step") is not None:
|
||||
steps = steps + [state["current_step"]]
|
||||
# Retourner des copies pour éviter les mutations externes
|
||||
return [self._copy_step(s) for s in steps]
|
||||
|
||||
def has_session(self, session_id: str) -> bool:
|
||||
with self._lock:
|
||||
return session_id in self._sessions
|
||||
|
||||
# ----- Internals : segmentation --------------------------------------
|
||||
|
||||
def _should_cut(
|
||||
self, state: Dict[str, Any], event: Dict[str, Any]
|
||||
) -> tuple:
|
||||
"""Décider si l'événement doit démarrer un nouveau segment.
|
||||
|
||||
Returns:
|
||||
(should_cut, reason)
|
||||
"""
|
||||
current = state.get("current_step")
|
||||
if current is None or not current.events:
|
||||
return (False, "")
|
||||
|
||||
# Coupure : changement d'application
|
||||
new_app = self._get_app_name(event)
|
||||
if new_app and current.app_name and new_app != current.app_name:
|
||||
return (True, "changement_application")
|
||||
|
||||
# Coupure : pause longue entre deux événements
|
||||
prev_ts = float(current.events[-1].get("timestamp", 0))
|
||||
curr_ts = float(event.get("timestamp", 0))
|
||||
if prev_ts > 0 and curr_ts > 0:
|
||||
if (curr_ts - prev_ts) > _SEUIL_PAUSE_LONGUE_S:
|
||||
return (True, "pause_longue")
|
||||
|
||||
# Coupure : key_combo « lourd » type ctrl+s (sauvegarde) → fin logique
|
||||
evt_type = event.get("type", "")
|
||||
if evt_type in ("key_combo", "key_press"):
|
||||
keys = [str(k).lower() for k in event.get("keys", [])]
|
||||
if "ctrl" in keys and any(k in keys for k in ("s", "enter")):
|
||||
# On accroche le key_combo à l'étape courante, puis on coupe
|
||||
# APRÈS — retourner False ici, la coupure se fera au prochain
|
||||
# événement. C'est voulu.
|
||||
return (False, "")
|
||||
|
||||
return (False, "")
|
||||
|
||||
def _initial_intent(self, event: Dict[str, Any]) -> str:
|
||||
"""Intention provisoire d'un tout nouveau segment."""
|
||||
app = self._get_app_name(event) or self._get_window_title(event)
|
||||
evt_type = event.get("type", "")
|
||||
if evt_type == "mouse_click":
|
||||
hint = event.get("vision_info", {}).get("text", "")
|
||||
if hint:
|
||||
return f"Cliquer sur « {hint} »"
|
||||
if app:
|
||||
return f"Interagir avec {app}"
|
||||
return "Cliquer quelque part"
|
||||
if evt_type == "text_input":
|
||||
text = event.get("text", "")[:40]
|
||||
return f"Saisir du texte" + (f" « {text} »" if text else "")
|
||||
if evt_type in ("key_combo", "key_press"):
|
||||
keys = event.get("keys", [])
|
||||
return f"Appuyer sur {'+'.join(keys)}" if keys else "Raccourci clavier"
|
||||
return f"Action dans {app}" if app else "Action"
|
||||
|
||||
def _refine_intent(
|
||||
self, step: UnderstoodStep, event: Dict[str, Any]
|
||||
) -> str:
|
||||
"""Raffiner l'intention au fur et à mesure qu'on voit plus d'événements.
|
||||
|
||||
Heuristiques simples — pas de gemma4 ici pour rester rapide.
|
||||
"""
|
||||
types = [e.get("type", "") for e in step.events]
|
||||
has_click = "mouse_click" in types
|
||||
has_type = "text_input" in types
|
||||
has_key = any(t in ("key_combo", "key_press") for t in types)
|
||||
app = step.app_name or self._get_window_title(event)
|
||||
|
||||
# Cas 1 : clic + saisie + entrée → "Rechercher X"
|
||||
if has_click and has_type:
|
||||
texts = [e.get("text", "") for e in step.events if e.get("type") == "text_input"]
|
||||
if texts and any("enter" in [k.lower() for k in e.get("keys", [])]
|
||||
for e in step.events if e.get("type") in ("key_combo", "key_press")):
|
||||
premier_texte = next((t for t in texts if t), "")
|
||||
if premier_texte:
|
||||
step.confidence = min(0.85, step.confidence + 0.05)
|
||||
return f"Rechercher « {premier_texte[:30]} »"
|
||||
|
||||
# Cas 2 : saisie seule → "Écrire du texte"
|
||||
if has_type and not has_click:
|
||||
texts = [e.get("text", "") for e in step.events if e.get("type") == "text_input"]
|
||||
premier_texte = next((t for t in texts if t), "")
|
||||
if premier_texte:
|
||||
return f"Écrire « {premier_texte[:40]} »"
|
||||
return "Écrire du texte"
|
||||
|
||||
# Cas 3 : ctrl+s → "Sauvegarder"
|
||||
if has_key:
|
||||
for e in step.events:
|
||||
if e.get("type") in ("key_combo", "key_press"):
|
||||
keys = [str(k).lower() for k in e.get("keys", [])]
|
||||
if "ctrl" in keys and "s" in keys:
|
||||
step.confidence = min(0.9, step.confidence + 0.1)
|
||||
return f"Sauvegarder{' dans ' + app if app else ''}"
|
||||
if "ctrl" in keys and "c" in keys:
|
||||
return f"Copier{' depuis ' + app if app else ''}"
|
||||
if "ctrl" in keys and "v" in keys:
|
||||
return f"Coller{' dans ' + app if app else ''}"
|
||||
|
||||
# Cas 4 : clic seul + app identifiable
|
||||
if has_click and app:
|
||||
hint = ""
|
||||
for e in step.events:
|
||||
if e.get("type") == "mouse_click":
|
||||
hint = e.get("vision_info", {}).get("text", "")
|
||||
if hint:
|
||||
break
|
||||
if hint:
|
||||
return f"Cliquer sur « {hint} » dans {app}"
|
||||
return f"Interagir avec {app}"
|
||||
|
||||
return step.intent
|
||||
|
||||
def _handle_text_input(
|
||||
self,
|
||||
session_id: str,
|
||||
step: UnderstoodStep,
|
||||
event: Dict[str, Any],
|
||||
) -> None:
|
||||
"""Détecter et notifier une variable lors d'une saisie texte."""
|
||||
text = (event.get("text") or "").strip()
|
||||
if not text or len(text) < 3:
|
||||
return
|
||||
|
||||
# Déduire un nom de variable provisoire
|
||||
var_name = f"texte_{len(step.variables_detectees) + 1}"
|
||||
step.variables_detectees.append(var_name)
|
||||
|
||||
# Heuristique : détecter le type plausible
|
||||
var_type = self._guess_variable_type(text)
|
||||
|
||||
self._notifier(
|
||||
session_id,
|
||||
NiveauNotification.VARIABLE,
|
||||
f"Variable détectée : tu as tapé « {text[:40]} » — c'est {var_type} ?",
|
||||
step_index=step.step_index,
|
||||
data={
|
||||
"variable_name": var_name,
|
||||
"value": text,
|
||||
"variable_type": var_type,
|
||||
},
|
||||
)
|
||||
|
||||
def _guess_variable_type(self, text: str) -> str:
|
||||
"""Deviner le type d'une variable à partir de sa valeur."""
|
||||
t = text.strip()
|
||||
# Date (basique)
|
||||
if len(t) == 10 and t[2] in "/-" and t[5] in "/-":
|
||||
return "une date"
|
||||
if t.isdigit():
|
||||
return "un numéro"
|
||||
if "@" in t and "." in t:
|
||||
return "une adresse e-mail"
|
||||
if len(t) <= 10 and t.replace(" ", "").replace("-", "").isalnum() and not any(c.islower() for c in t):
|
||||
return "un code"
|
||||
if " " in t and len(t) > 10:
|
||||
return "un texte libre"
|
||||
return "un texte"
|
||||
|
||||
# ----- Internals : notifications -------------------------------------
|
||||
|
||||
def _notifier(
|
||||
self,
|
||||
session_id: str,
|
||||
niveau: NiveauNotification,
|
||||
message: str,
|
||||
*,
|
||||
step_index: int = -1,
|
||||
data: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
"""Créer et émettre une notification."""
|
||||
notif = NotificationShadow(
|
||||
notif_id=uuid.uuid4().hex[:12],
|
||||
niveau=niveau,
|
||||
message=message,
|
||||
session_id=session_id,
|
||||
step_index=step_index,
|
||||
data=data or {},
|
||||
timestamp=time.time(),
|
||||
)
|
||||
|
||||
with self._lock:
|
||||
state = self._sessions.get(session_id)
|
||||
if state is not None:
|
||||
state["notifications"].append(notif)
|
||||
state["last_notif_ts"] = notif.timestamp
|
||||
|
||||
if self._notify_callback is not None:
|
||||
try:
|
||||
self._notify_callback(notif)
|
||||
except Exception as e:
|
||||
logger.debug(f"ShadowObserver: callback a échoué : {e}")
|
||||
|
||||
def _emit_step_closed(
|
||||
self,
|
||||
session_id: str,
|
||||
step: UnderstoodStep,
|
||||
reason: str,
|
||||
) -> None:
|
||||
"""Émettre une notification quand une étape est fermée."""
|
||||
raison_humaine = {
|
||||
"changement_application": "tu es passé à une autre application",
|
||||
"pause_longue": "tu as fait une pause",
|
||||
}.get(reason, "")
|
||||
|
||||
suffixe = f" ({raison_humaine})" if raison_humaine else ""
|
||||
self._notifier(
|
||||
session_id,
|
||||
NiveauNotification.DECOUVERTE,
|
||||
f"Nouvelle étape comprise : {step.intent}{suffixe}",
|
||||
step_index=step.step_index,
|
||||
data={"step": step.to_dict()},
|
||||
)
|
||||
|
||||
if self._enable_gemma4 and self._gemma4_callback is not None:
|
||||
# Non bloquant : on délègue au caller (qui peut utiliser un thread)
|
||||
try:
|
||||
self._gemma4_callback(self._copy_step(step))
|
||||
except Exception as e:
|
||||
logger.debug(f"ShadowObserver: gemma4_callback a échoué : {e}")
|
||||
|
||||
def _maybe_emit_heartbeat(
|
||||
self,
|
||||
session_id: str,
|
||||
state: Dict[str, Any],
|
||||
) -> None:
|
||||
"""Émettre un résumé périodique (toutes les 5s env.)."""
|
||||
now = time.time()
|
||||
last = state.get("last_notif_ts", 0)
|
||||
if now - last < 5.0:
|
||||
return
|
||||
nb_steps = len(state["steps"])
|
||||
if state.get("current_step") is not None:
|
||||
nb_steps += 1
|
||||
if nb_steps == 0:
|
||||
return
|
||||
self._notifier(
|
||||
session_id,
|
||||
NiveauNotification.INFO,
|
||||
f"J'ai compris {nb_steps} étape(s) jusqu'ici.",
|
||||
data={"steps_count": nb_steps},
|
||||
)
|
||||
|
||||
# ----- Utilitaires ---------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _get_app_name(event: Dict[str, Any]) -> str:
|
||||
"""Extraire le nom d'application depuis un événement."""
|
||||
window = event.get("window") or {}
|
||||
if isinstance(window, dict):
|
||||
title = window.get("title", "")
|
||||
app_name = window.get("app_name", "")
|
||||
else:
|
||||
title = event.get("window_title", "")
|
||||
app_name = ""
|
||||
|
||||
# Préférer app_name si disponible
|
||||
if app_name and app_name != "unknown":
|
||||
return app_name
|
||||
|
||||
# Sinon, extraire depuis le titre
|
||||
for sep in [" – ", " - ", " — "]:
|
||||
if sep in title:
|
||||
return title.split(sep)[-1].strip()
|
||||
return title.strip() if title else ""
|
||||
|
||||
@staticmethod
|
||||
def _get_window_title(event: Dict[str, Any]) -> str:
|
||||
window = event.get("window") or {}
|
||||
if isinstance(window, dict):
|
||||
return window.get("title", "") or ""
|
||||
return event.get("window_title", "") or ""
|
||||
|
||||
@staticmethod
|
||||
def _copy_step(step: UnderstoodStep) -> UnderstoodStep:
|
||||
"""Copie superficielle pour éviter les fuites de mutation."""
|
||||
return UnderstoodStep(
|
||||
step_index=step.step_index,
|
||||
intent=step.intent,
|
||||
intent_provisoire=step.intent_provisoire,
|
||||
confidence=step.confidence,
|
||||
app_name=step.app_name,
|
||||
window_title=step.window_title,
|
||||
events=list(step.events),
|
||||
variables_detectees=list(step.variables_detectees),
|
||||
started_at=step.started_at,
|
||||
ended_at=step.ended_at,
|
||||
validated=step.validated,
|
||||
corrected=step.corrected,
|
||||
cancelled=step.cancelled,
|
||||
)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Singleton partagé (optionnel)
|
||||
# =========================================================================
|
||||
|
||||
|
||||
_shared_observer: Optional[ShadowObserver] = None
|
||||
_shared_lock = threading.Lock()
|
||||
|
||||
|
||||
def get_shared_observer() -> ShadowObserver:
|
||||
"""Observer partagé pour l'API (lazy init)."""
|
||||
global _shared_observer
|
||||
with _shared_lock:
|
||||
if _shared_observer is None:
|
||||
_shared_observer = ShadowObserver()
|
||||
return _shared_observer
|
||||
468
core/workflow/shadow_validator.py
Normal file
468
core/workflow/shadow_validator.py
Normal file
@@ -0,0 +1,468 @@
|
||||
# core/workflow/shadow_validator.py
|
||||
"""
|
||||
ShadowValidator — Applique les feedbacks utilisateur et reconstruit un WorkflowIR.
|
||||
|
||||
Le ShadowObserver observe et comprend en temps réel. Le ShadowValidator,
|
||||
lui, prend les décisions de l'utilisateur (valider, corriger, annuler,
|
||||
combiner) et reconstruit un WorkflowIR final « propre » qui sera
|
||||
persisté et exécutable par le runtime.
|
||||
|
||||
Opérations supportées :
|
||||
- validate(step_index) : marquer l'étape comme validée
|
||||
- correct(step_index, new_intent) : corriger l'intention
|
||||
- undo(step_index) : annuler l'étape (elle sera exclue du WorkflowIR)
|
||||
- merge_with_next(step_index) : fusionner avec l'étape suivante
|
||||
- cancel() : annuler tout le workflow
|
||||
- split(step_index, at_event_index) : couper une étape en deux (bonus)
|
||||
|
||||
Le validator ne touche PAS aux événements bruts (events.jsonl) — il
|
||||
travaille sur la liste des `UnderstoodStep` fournie par le ShadowObserver.
|
||||
|
||||
Une fois toutes les actions appliquées, `build_workflow_ir()` produit
|
||||
un WorkflowIR exécutable à partir des étapes validées/corrigées.
|
||||
|
||||
Usage :
|
||||
|
||||
validator = ShadowValidator()
|
||||
validator.set_steps(observer.get_steps_internal(session_id))
|
||||
|
||||
validator.apply_feedback({"action": "validate", "step_index": 1})
|
||||
validator.apply_feedback({
|
||||
"action": "correct",
|
||||
"step_index": 2,
|
||||
"new_intent": "Sauvegarder le document",
|
||||
})
|
||||
validator.apply_feedback({"action": "undo", "step_index": 3})
|
||||
|
||||
ir = validator.build_workflow_ir(
|
||||
session_id="sess_abc",
|
||||
name="Mon workflow",
|
||||
domain="generic",
|
||||
)
|
||||
ir.save("data/workflows/")
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from .shadow_observer import UnderstoodStep
|
||||
from .workflow_ir import Action, Step, Variable, WorkflowIR
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Actions supportées par le feedback
|
||||
FEEDBACK_ACTIONS = {
|
||||
"validate",
|
||||
"correct",
|
||||
"undo",
|
||||
"cancel",
|
||||
"merge_next",
|
||||
"split",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class FeedbackResult:
|
||||
"""Résultat d'une opération de feedback."""
|
||||
|
||||
ok: bool
|
||||
action: str
|
||||
step_index: int
|
||||
message: str
|
||||
data: Dict[str, Any]
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"ok": self.ok,
|
||||
"action": self.action,
|
||||
"step_index": self.step_index,
|
||||
"message": self.message,
|
||||
"data": dict(self.data),
|
||||
}
|
||||
|
||||
|
||||
class ShadowValidator:
|
||||
"""Applique les feedbacks utilisateur et produit un WorkflowIR."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._steps: List[UnderstoodStep] = []
|
||||
self._cancelled_workflow: bool = False
|
||||
self._history: List[FeedbackResult] = []
|
||||
|
||||
# ----- API -----------------------------------------------------------
|
||||
|
||||
def set_steps(self, steps: List[UnderstoodStep]) -> None:
|
||||
"""Initialiser le validator avec la liste des étapes observées."""
|
||||
self._steps = [self._clone(s) for s in steps]
|
||||
self._cancelled_workflow = False
|
||||
self._history = []
|
||||
|
||||
@property
|
||||
def steps(self) -> List[UnderstoodStep]:
|
||||
"""Vue en lecture des étapes courantes."""
|
||||
return list(self._steps)
|
||||
|
||||
@property
|
||||
def history(self) -> List[FeedbackResult]:
|
||||
"""Historique des feedbacks appliqués."""
|
||||
return list(self._history)
|
||||
|
||||
@property
|
||||
def is_cancelled(self) -> bool:
|
||||
return self._cancelled_workflow
|
||||
|
||||
def apply_feedback(self, feedback: Dict[str, Any]) -> FeedbackResult:
|
||||
"""Appliquer un feedback utilisateur.
|
||||
|
||||
Le `feedback` est un dict au format :
|
||||
{
|
||||
"action": "validate" | "correct" | "undo" | "cancel" | "merge_next" | "split",
|
||||
"step_index": 1, # Index 1-based (comme dans get_understanding)
|
||||
"new_intent": "...", # Pour correct
|
||||
"at_event_index": 3, # Pour split
|
||||
}
|
||||
|
||||
Returns:
|
||||
FeedbackResult
|
||||
"""
|
||||
action = (feedback.get("action") or "").strip()
|
||||
if action not in FEEDBACK_ACTIONS:
|
||||
return self._record(FeedbackResult(
|
||||
ok=False, action=action, step_index=-1,
|
||||
message=f"Action inconnue : « {action} »",
|
||||
data={"supported": sorted(FEEDBACK_ACTIONS)},
|
||||
))
|
||||
|
||||
if action == "cancel":
|
||||
return self._do_cancel()
|
||||
|
||||
step_index = int(feedback.get("step_index", -1))
|
||||
if not self._is_valid_step_index(step_index):
|
||||
return self._record(FeedbackResult(
|
||||
ok=False, action=action, step_index=step_index,
|
||||
message=f"Index d'étape invalide : {step_index}",
|
||||
data={"nb_steps": len(self._steps)},
|
||||
))
|
||||
|
||||
if action == "validate":
|
||||
return self._do_validate(step_index)
|
||||
if action == "correct":
|
||||
return self._do_correct(step_index, feedback.get("new_intent", ""))
|
||||
if action == "undo":
|
||||
return self._do_undo(step_index)
|
||||
if action == "merge_next":
|
||||
return self._do_merge_next(step_index)
|
||||
if action == "split":
|
||||
return self._do_split(
|
||||
step_index, int(feedback.get("at_event_index", -1))
|
||||
)
|
||||
|
||||
return self._record(FeedbackResult(
|
||||
ok=False, action=action, step_index=step_index,
|
||||
message="Action non implémentée", data={},
|
||||
))
|
||||
|
||||
def apply_feedbacks(
|
||||
self, feedbacks: List[Dict[str, Any]]
|
||||
) -> List[FeedbackResult]:
|
||||
"""Appliquer plusieurs feedbacks dans l'ordre."""
|
||||
return [self.apply_feedback(f) for f in feedbacks]
|
||||
|
||||
# ----- Opérations ---------------------------------------------------
|
||||
|
||||
def _do_validate(self, step_index: int) -> FeedbackResult:
|
||||
step = self._get_step(step_index)
|
||||
step.validated = True
|
||||
step.intent_provisoire = False
|
||||
step.confidence = max(step.confidence, 0.95)
|
||||
return self._record(FeedbackResult(
|
||||
ok=True, action="validate", step_index=step_index,
|
||||
message=f"Étape {step_index} validée : {step.intent}",
|
||||
data={"intent": step.intent},
|
||||
))
|
||||
|
||||
def _do_correct(
|
||||
self, step_index: int, new_intent: str
|
||||
) -> FeedbackResult:
|
||||
new_intent = (new_intent or "").strip()
|
||||
if not new_intent:
|
||||
return self._record(FeedbackResult(
|
||||
ok=False, action="correct", step_index=step_index,
|
||||
message="Nouvelle intention vide",
|
||||
data={},
|
||||
))
|
||||
step = self._get_step(step_index)
|
||||
old_intent = step.intent
|
||||
step.intent = new_intent
|
||||
step.corrected = True
|
||||
step.validated = True # Corriger = implicitement valider
|
||||
step.intent_provisoire = False
|
||||
step.confidence = 1.0
|
||||
return self._record(FeedbackResult(
|
||||
ok=True, action="correct", step_index=step_index,
|
||||
message=f"Étape {step_index} corrigée : « {old_intent} » → « {new_intent} »",
|
||||
data={"old_intent": old_intent, "new_intent": new_intent},
|
||||
))
|
||||
|
||||
def _do_undo(self, step_index: int) -> FeedbackResult:
|
||||
step = self._get_step(step_index)
|
||||
step.cancelled = True
|
||||
return self._record(FeedbackResult(
|
||||
ok=True, action="undo", step_index=step_index,
|
||||
message=f"Étape {step_index} annulée : {step.intent}",
|
||||
data={"intent": step.intent},
|
||||
))
|
||||
|
||||
def _do_merge_next(self, step_index: int) -> FeedbackResult:
|
||||
"""Fusionner l'étape avec la suivante."""
|
||||
if step_index >= len(self._steps):
|
||||
return self._record(FeedbackResult(
|
||||
ok=False, action="merge_next", step_index=step_index,
|
||||
message="Aucune étape suivante à fusionner",
|
||||
data={},
|
||||
))
|
||||
step = self._get_step(step_index)
|
||||
next_step = self._get_step(step_index + 1)
|
||||
|
||||
merged = UnderstoodStep(
|
||||
step_index=step.step_index,
|
||||
intent=step.intent if len(step.intent) >= len(next_step.intent) else next_step.intent,
|
||||
intent_provisoire=False,
|
||||
confidence=max(step.confidence, next_step.confidence),
|
||||
app_name=step.app_name or next_step.app_name,
|
||||
window_title=step.window_title or next_step.window_title,
|
||||
events=list(step.events) + list(next_step.events),
|
||||
variables_detectees=list(step.variables_detectees)
|
||||
+ list(next_step.variables_detectees),
|
||||
started_at=step.started_at or next_step.started_at,
|
||||
ended_at=next_step.ended_at or step.ended_at,
|
||||
validated=True,
|
||||
corrected=step.corrected or next_step.corrected,
|
||||
cancelled=False,
|
||||
)
|
||||
|
||||
# Remplacer [step, next_step] par [merged]
|
||||
idx0 = step_index - 1 # 1-based → 0-based
|
||||
self._steps.pop(idx0 + 1) # next_step
|
||||
self._steps[idx0] = merged
|
||||
self._renumber()
|
||||
|
||||
return self._record(FeedbackResult(
|
||||
ok=True, action="merge_next", step_index=step_index,
|
||||
message=f"Étapes {step_index} et {step_index + 1} fusionnées",
|
||||
data={"intent": merged.intent},
|
||||
))
|
||||
|
||||
def _do_split(
|
||||
self, step_index: int, at_event_index: int
|
||||
) -> FeedbackResult:
|
||||
"""Couper une étape en deux au niveau de l'événement at_event_index.
|
||||
|
||||
`at_event_index` est 0-based parmi les events de l'étape.
|
||||
"""
|
||||
step = self._get_step(step_index)
|
||||
if at_event_index <= 0 or at_event_index >= len(step.events):
|
||||
return self._record(FeedbackResult(
|
||||
ok=False, action="split", step_index=step_index,
|
||||
message=f"Index de coupe invalide : {at_event_index}",
|
||||
data={"nb_events": len(step.events)},
|
||||
))
|
||||
|
||||
left_events = step.events[:at_event_index]
|
||||
right_events = step.events[at_event_index:]
|
||||
|
||||
left = UnderstoodStep(
|
||||
step_index=step.step_index,
|
||||
intent=step.intent + " (1/2)",
|
||||
intent_provisoire=True,
|
||||
confidence=step.confidence * 0.9,
|
||||
app_name=step.app_name,
|
||||
window_title=step.window_title,
|
||||
events=left_events,
|
||||
started_at=step.started_at,
|
||||
)
|
||||
right = UnderstoodStep(
|
||||
step_index=step.step_index + 1,
|
||||
intent=step.intent + " (2/2)",
|
||||
intent_provisoire=True,
|
||||
confidence=step.confidence * 0.9,
|
||||
app_name=step.app_name,
|
||||
window_title=step.window_title,
|
||||
events=right_events,
|
||||
started_at=float(right_events[0].get("timestamp", 0))
|
||||
if right_events else step.started_at,
|
||||
ended_at=step.ended_at,
|
||||
)
|
||||
|
||||
idx0 = step_index - 1
|
||||
self._steps[idx0] = left
|
||||
self._steps.insert(idx0 + 1, right)
|
||||
self._renumber()
|
||||
|
||||
return self._record(FeedbackResult(
|
||||
ok=True, action="split", step_index=step_index,
|
||||
message=f"Étape {step_index} coupée en 2",
|
||||
data={"nb_steps": len(self._steps)},
|
||||
))
|
||||
|
||||
def _do_cancel(self) -> FeedbackResult:
|
||||
self._cancelled_workflow = True
|
||||
return self._record(FeedbackResult(
|
||||
ok=True, action="cancel", step_index=-1,
|
||||
message="Workflow annulé",
|
||||
data={},
|
||||
))
|
||||
|
||||
# ----- Construction du WorkflowIR -----------------------------------
|
||||
|
||||
def build_workflow_ir(
|
||||
self,
|
||||
session_id: str = "",
|
||||
name: str = "",
|
||||
domain: str = "generic",
|
||||
*,
|
||||
require_all_validated: bool = False,
|
||||
) -> Optional[WorkflowIR]:
|
||||
"""Construire un WorkflowIR à partir des étapes corrigées.
|
||||
|
||||
Args:
|
||||
session_id: Identifiant de la session source.
|
||||
name: Nom du workflow.
|
||||
domain: Domaine métier.
|
||||
require_all_validated: Si True, lève une erreur si au moins
|
||||
une étape n'a pas été validée explicitement.
|
||||
|
||||
Returns:
|
||||
WorkflowIR ou None si le workflow a été annulé.
|
||||
"""
|
||||
if self._cancelled_workflow:
|
||||
logger.info("ShadowValidator: workflow annulé, pas de build")
|
||||
return None
|
||||
|
||||
ir = WorkflowIR.new(
|
||||
name=name or f"Workflow du {time.strftime('%d/%m/%Y %H:%M')}",
|
||||
domain=domain,
|
||||
learned_from=session_id,
|
||||
)
|
||||
|
||||
variables: List[Variable] = []
|
||||
seen_texts = set()
|
||||
applications: set = set()
|
||||
|
||||
for step in self._steps:
|
||||
if step.cancelled:
|
||||
continue
|
||||
if require_all_validated and not step.validated:
|
||||
raise ValueError(
|
||||
f"Étape {step.step_index} non validée : {step.intent}"
|
||||
)
|
||||
|
||||
if step.app_name:
|
||||
applications.add(step.app_name)
|
||||
|
||||
actions = []
|
||||
for evt in step.events:
|
||||
action = self._event_to_action(evt)
|
||||
if action is None:
|
||||
continue
|
||||
|
||||
# Détection de variable (texte saisi)
|
||||
if action.type == "type" and action.text:
|
||||
text = action.text.strip()
|
||||
if text and text not in seen_texts and len(text) > 2:
|
||||
seen_texts.add(text)
|
||||
var_name = f"texte_{len(variables) + 1}"
|
||||
variables.append(Variable(
|
||||
name=var_name,
|
||||
description=f"Texte saisi : « {text[:50]} »",
|
||||
source="user",
|
||||
default=text,
|
||||
))
|
||||
action.variable = True
|
||||
action.text = "{" + var_name + "}"
|
||||
|
||||
actions.append(action)
|
||||
|
||||
ir_step = Step(
|
||||
step_id=f"s{len(ir.steps) + 1}",
|
||||
intent=step.intent,
|
||||
actions=actions,
|
||||
)
|
||||
ir.steps.append(ir_step)
|
||||
|
||||
ir.variables = variables
|
||||
ir.applications = sorted(applications)
|
||||
ir.updated_at = time.time()
|
||||
|
||||
logger.info(
|
||||
f"ShadowValidator: WorkflowIR construit — {len(ir.steps)} étapes, "
|
||||
f"{len(ir.variables)} variables"
|
||||
)
|
||||
return ir
|
||||
|
||||
# ----- Utilitaires --------------------------------------------------
|
||||
|
||||
def _is_valid_step_index(self, step_index: int) -> bool:
|
||||
return 1 <= step_index <= len(self._steps)
|
||||
|
||||
def _get_step(self, step_index: int) -> UnderstoodStep:
|
||||
return self._steps[step_index - 1]
|
||||
|
||||
def _renumber(self) -> None:
|
||||
for i, s in enumerate(self._steps, start=1):
|
||||
s.step_index = i
|
||||
|
||||
def _record(self, result: FeedbackResult) -> FeedbackResult:
|
||||
self._history.append(result)
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _clone(step: UnderstoodStep) -> UnderstoodStep:
|
||||
return UnderstoodStep(
|
||||
step_index=step.step_index,
|
||||
intent=step.intent,
|
||||
intent_provisoire=step.intent_provisoire,
|
||||
confidence=step.confidence,
|
||||
app_name=step.app_name,
|
||||
window_title=step.window_title,
|
||||
events=list(step.events),
|
||||
variables_detectees=list(step.variables_detectees),
|
||||
started_at=step.started_at,
|
||||
ended_at=step.ended_at,
|
||||
validated=step.validated,
|
||||
corrected=step.corrected,
|
||||
cancelled=step.cancelled,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _event_to_action(evt: Dict[str, Any]) -> Optional[Action]:
|
||||
"""Convertir un événement brut en Action (miroir de IRBuilder)."""
|
||||
evt_type = evt.get("type", "")
|
||||
|
||||
if evt_type == "mouse_click":
|
||||
window = evt.get("window") or {}
|
||||
if isinstance(window, dict):
|
||||
target = window.get("title", "")
|
||||
else:
|
||||
target = evt.get("window_title", "")
|
||||
return Action(
|
||||
type="click",
|
||||
target=target or "",
|
||||
anchor_hint=(evt.get("vision_info") or {}).get("text", ""),
|
||||
)
|
||||
if evt_type == "text_input":
|
||||
text = evt.get("text", "")
|
||||
if text:
|
||||
return Action(type="type", text=text)
|
||||
if evt_type in ("key_combo", "key_press"):
|
||||
keys = evt.get("keys", [])
|
||||
if keys:
|
||||
return Action(type="key_combo", keys=list(keys))
|
||||
if evt_type == "scroll":
|
||||
return Action(type="scroll")
|
||||
return None
|
||||
337
core/workflow/surface_classifier.py
Normal file
337
core/workflow/surface_classifier.py
Normal file
@@ -0,0 +1,337 @@
|
||||
# core/workflow/surface_classifier.py
|
||||
"""
|
||||
SurfaceClassifier — détecte le type de surface applicative au moment de l'exécution.
|
||||
|
||||
4 types de surfaces reconnus :
|
||||
- citrix : session Citrix/RDP/TSE (wfica32.exe, mstsc.exe, CDViewer.exe)
|
||||
→ vision pure obligatoire, paramètres tolérants
|
||||
- windows_native : application Windows native (notepad.exe, explorer.exe, DPI...)
|
||||
→ vision + UIA bonus, paramètres standards
|
||||
- web_local : navigateur local (chrome.exe, firefox.exe, msedge.exe)
|
||||
→ vision + DOM/CDP bonus (si activé), paramètres rapides
|
||||
- unknown : fallback → vision pure, paramètres par défaut
|
||||
|
||||
Le classifier s'exécute UNE SEULE FOIS au début d'une session ou d'un replay.
|
||||
Son résultat détermine :
|
||||
1. Quels helpers sont activés (UIA ? CDP ?)
|
||||
2. Les paramètres de résolution (timeouts, seuils OCR)
|
||||
3. La stratégie de recovery
|
||||
|
||||
Principe : la vision reste le fondement. Le classifier décide juste
|
||||
des bonus à activer et des paramètres à tuner.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SurfaceType(str, Enum):
|
||||
"""Types de surfaces applicatives."""
|
||||
CITRIX = "citrix"
|
||||
WINDOWS_NATIVE = "windows_native"
|
||||
WEB_LOCAL = "web_local"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
|
||||
# Processus connus par type de surface
|
||||
_CITRIX_PROCESSES = {
|
||||
"wfica32.exe", # Citrix Workspace (Windows 10+)
|
||||
"cdviewer.exe", # Citrix Desktop Viewer
|
||||
"cdviewer.exe",
|
||||
"mstsc.exe", # Microsoft Remote Desktop
|
||||
"vmware-vmx.exe", # VMware (cas RDS)
|
||||
"xen.exe", # Citrix XenApp
|
||||
"receiver.exe", # Citrix Receiver (ancien)
|
||||
"selfservice.exe", # Citrix Self-Service Plug-in
|
||||
}
|
||||
|
||||
_BROWSER_PROCESSES = {
|
||||
"chrome.exe",
|
||||
"msedge.exe",
|
||||
"firefox.exe",
|
||||
"brave.exe",
|
||||
"opera.exe",
|
||||
"vivaldi.exe",
|
||||
}
|
||||
|
||||
# Processus système Windows qui ne sont PAS des surfaces applicatives
|
||||
_SYSTEM_PROCESSES = {
|
||||
"explorer.exe", # Shell Windows (cas spécial — on le compte comme natif)
|
||||
"searchhost.exe", # Recherche Windows
|
||||
"startmenuexperiencehost.exe",
|
||||
"shellexperiencehost.exe",
|
||||
"applicationframehost.exe",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class SurfaceProfile:
|
||||
"""Profil complet d'une surface détectée."""
|
||||
surface_type: SurfaceType
|
||||
process_name: str = "" # Processus de la fenêtre active
|
||||
window_title: str = "" # Titre de la fenêtre active
|
||||
confidence: float = 1.0 # Confiance de la détection (0-1)
|
||||
|
||||
# Capacités disponibles
|
||||
uia_available: bool = False # Le helper UIA peut être utilisé
|
||||
cdp_available: bool = False # Chrome DevTools Protocol accessible
|
||||
ocr_available: bool = True # OCR toujours dispo (docTR)
|
||||
vlm_available: bool = True # VLM toujours dispo (qwen2.5vl)
|
||||
|
||||
# Paramètres adaptés à la surface
|
||||
timeout_click_ms: int = 10000
|
||||
timeout_resolve_ms: int = 5000
|
||||
ocr_threshold: float = 0.75
|
||||
template_threshold: float = 0.85
|
||||
max_retries: int = 2
|
||||
retry_delay_ms: int = 2000
|
||||
|
||||
# Métadonnées
|
||||
detected_at: float = 0.0
|
||||
details: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"surface_type": self.surface_type.value,
|
||||
"process_name": self.process_name,
|
||||
"window_title": self.window_title,
|
||||
"confidence": round(self.confidence, 3),
|
||||
"capabilities": {
|
||||
"uia": self.uia_available,
|
||||
"cdp": self.cdp_available,
|
||||
"ocr": self.ocr_available,
|
||||
"vlm": self.vlm_available,
|
||||
},
|
||||
"parameters": {
|
||||
"timeout_click_ms": self.timeout_click_ms,
|
||||
"timeout_resolve_ms": self.timeout_resolve_ms,
|
||||
"ocr_threshold": self.ocr_threshold,
|
||||
"template_threshold": self.template_threshold,
|
||||
"max_retries": self.max_retries,
|
||||
"retry_delay_ms": self.retry_delay_ms,
|
||||
},
|
||||
"details": self.details,
|
||||
}
|
||||
|
||||
def resolve_order(self) -> List[str]:
|
||||
"""Construire l'ordre de résolution selon la surface et les capacités."""
|
||||
order = []
|
||||
if self.uia_available and self.surface_type == SurfaceType.WINDOWS_NATIVE:
|
||||
order.append("uia")
|
||||
if self.cdp_available and self.surface_type == SurfaceType.WEB_LOCAL:
|
||||
order.append("dom")
|
||||
order.extend(["ocr", "template", "vlm"])
|
||||
return order
|
||||
|
||||
|
||||
class SurfaceClassifier:
|
||||
"""Détecte la surface et configure les paramètres adaptés.
|
||||
|
||||
Usage :
|
||||
classifier = SurfaceClassifier()
|
||||
profile = classifier.classify(process="notepad.exe", title="Sans titre – Bloc-notes")
|
||||
if profile.uia_available:
|
||||
# Utiliser lea_uia.exe
|
||||
"""
|
||||
|
||||
def __init__(self, uia_helper_path: str = ""):
|
||||
"""
|
||||
Args:
|
||||
uia_helper_path: Chemin vers lea_uia.exe (optionnel, auto-détection sinon)
|
||||
"""
|
||||
self._uia_helper_path = uia_helper_path or self._find_uia_helper()
|
||||
|
||||
def _find_uia_helper(self) -> str:
|
||||
"""Trouver lea_uia.exe dans les emplacements standards."""
|
||||
candidates = [
|
||||
r"C:\Lea\helpers\lea_uia.exe",
|
||||
r".\helpers\lea_uia.exe",
|
||||
os.path.join(os.path.dirname(__file__), "..", "..", "agent_rust", "lea_uia",
|
||||
"target", "x86_64-pc-windows-gnu", "release", "lea_uia.exe"),
|
||||
]
|
||||
for path in candidates:
|
||||
if os.path.isfile(path):
|
||||
return os.path.abspath(path)
|
||||
return ""
|
||||
|
||||
def classify(
|
||||
self,
|
||||
process_name: str = "",
|
||||
window_title: str = "",
|
||||
screen_info: Optional[Dict] = None,
|
||||
) -> SurfaceProfile:
|
||||
"""Classifier une surface depuis le contexte fenêtre.
|
||||
|
||||
Args:
|
||||
process_name: Nom du processus (ex: "notepad.exe")
|
||||
window_title: Titre de la fenêtre active
|
||||
screen_info: Infos écran (résolution, DPI, compression détectée)
|
||||
"""
|
||||
import time
|
||||
|
||||
process_lower = process_name.lower().strip()
|
||||
title_lower = window_title.lower()
|
||||
|
||||
# Détection Citrix — priorité absolue
|
||||
if process_lower in _CITRIX_PROCESSES:
|
||||
return self._build_citrix_profile(process_name, window_title, time.time())
|
||||
|
||||
# Titre Citrix (ex: "Session Citrix", "Citrix Receiver")
|
||||
if any(marker in title_lower for marker in ["citrix", "ica session", "rdp session"]):
|
||||
return self._build_citrix_profile(process_name, window_title, time.time())
|
||||
|
||||
# Navigateur
|
||||
if process_lower in _BROWSER_PROCESSES:
|
||||
# Cas particulier : navigateur qui contient du Citrix embedded
|
||||
if "citrix" in title_lower:
|
||||
return self._build_citrix_profile(process_name, window_title, time.time())
|
||||
return self._build_web_profile(process_name, window_title, time.time())
|
||||
|
||||
# Application Windows native
|
||||
if process_lower.endswith(".exe") and process_lower not in _SYSTEM_PROCESSES:
|
||||
return self._build_windows_profile(process_name, window_title, time.time())
|
||||
|
||||
# Shell Windows (explorer.exe) — compté comme natif
|
||||
if process_lower == "explorer.exe":
|
||||
return self._build_windows_profile(process_name, window_title, time.time())
|
||||
|
||||
# Unknown — fallback sûr
|
||||
return self._build_unknown_profile(process_name, window_title, time.time())
|
||||
|
||||
def _build_citrix_profile(self, process: str, title: str, ts: float) -> SurfaceProfile:
|
||||
"""Profil Citrix — vision pure, paramètres tolérants."""
|
||||
return SurfaceProfile(
|
||||
surface_type=SurfaceType.CITRIX,
|
||||
process_name=process,
|
||||
window_title=title,
|
||||
confidence=0.95,
|
||||
uia_available=False, # UIA n'est pas dispo dans Citrix
|
||||
cdp_available=False,
|
||||
ocr_available=True,
|
||||
vlm_available=True,
|
||||
# Citrix : compression JPEG, latence, retries agressifs
|
||||
timeout_click_ms=15000,
|
||||
timeout_resolve_ms=10000,
|
||||
ocr_threshold=0.65, # Plus tolérant (compression)
|
||||
template_threshold=0.75, # Plus tolérant
|
||||
max_retries=3,
|
||||
retry_delay_ms=3000,
|
||||
detected_at=ts,
|
||||
details={"reason": "citrix_process_or_title"},
|
||||
)
|
||||
|
||||
def _build_windows_profile(self, process: str, title: str, ts: float) -> SurfaceProfile:
|
||||
"""Profil Windows natif — vision + UIA bonus."""
|
||||
uia_ok = self._check_uia_available()
|
||||
return SurfaceProfile(
|
||||
surface_type=SurfaceType.WINDOWS_NATIVE,
|
||||
process_name=process,
|
||||
window_title=title,
|
||||
confidence=0.9,
|
||||
uia_available=uia_ok,
|
||||
cdp_available=False,
|
||||
ocr_available=True,
|
||||
vlm_available=True,
|
||||
timeout_click_ms=8000,
|
||||
timeout_resolve_ms=5000,
|
||||
ocr_threshold=0.75,
|
||||
template_threshold=0.85,
|
||||
max_retries=2,
|
||||
retry_delay_ms=2000,
|
||||
detected_at=ts,
|
||||
details={
|
||||
"reason": "native_windows_process",
|
||||
"uia_helper": self._uia_helper_path if uia_ok else "",
|
||||
},
|
||||
)
|
||||
|
||||
def _build_web_profile(self, process: str, title: str, ts: float) -> SurfaceProfile:
|
||||
"""Profil web local — vision (+ CDP plus tard)."""
|
||||
cdp_ok = self._check_cdp_available()
|
||||
return SurfaceProfile(
|
||||
surface_type=SurfaceType.WEB_LOCAL,
|
||||
process_name=process,
|
||||
window_title=title,
|
||||
confidence=0.9,
|
||||
uia_available=False, # UIA limité pour les navigateurs
|
||||
cdp_available=cdp_ok,
|
||||
ocr_available=True,
|
||||
vlm_available=True,
|
||||
# Web local : rapide, texte bien rendu
|
||||
timeout_click_ms=5000,
|
||||
timeout_resolve_ms=3000,
|
||||
ocr_threshold=0.80,
|
||||
template_threshold=0.88,
|
||||
max_retries=1,
|
||||
retry_delay_ms=1000,
|
||||
detected_at=ts,
|
||||
details={"reason": "browser_process"},
|
||||
)
|
||||
|
||||
def _build_unknown_profile(self, process: str, title: str, ts: float) -> SurfaceProfile:
|
||||
"""Profil inconnu — paramètres sûrs par défaut."""
|
||||
return SurfaceProfile(
|
||||
surface_type=SurfaceType.UNKNOWN,
|
||||
process_name=process,
|
||||
window_title=title,
|
||||
confidence=0.5,
|
||||
uia_available=False,
|
||||
cdp_available=False,
|
||||
ocr_available=True,
|
||||
vlm_available=True,
|
||||
timeout_click_ms=10000,
|
||||
timeout_resolve_ms=5000,
|
||||
ocr_threshold=0.70,
|
||||
template_threshold=0.80,
|
||||
max_retries=2,
|
||||
retry_delay_ms=2000,
|
||||
detected_at=ts,
|
||||
details={"reason": "fallback"},
|
||||
)
|
||||
|
||||
def _check_uia_available(self) -> bool:
|
||||
"""Vérifier que lea_uia.exe est dispo et fonctionnel.
|
||||
|
||||
Sur Windows : appelle `lea_uia.exe health`.
|
||||
Sur Linux : toujours False (stub).
|
||||
"""
|
||||
if platform.system() != "Windows":
|
||||
return False
|
||||
if not self._uia_helper_path or not os.path.isfile(self._uia_helper_path):
|
||||
return False
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[self._uia_helper_path, "health"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
return False
|
||||
import json
|
||||
data = json.loads(result.stdout.strip())
|
||||
return data.get("status") == "ok"
|
||||
except Exception as e:
|
||||
logger.debug(f"UIA health check failed: {e}")
|
||||
return False
|
||||
|
||||
def _check_cdp_available(self) -> bool:
|
||||
"""Vérifier que Chrome DevTools Protocol est accessible.
|
||||
|
||||
Teste la présence d'un endpoint CDP sur localhost:9222.
|
||||
"""
|
||||
try:
|
||||
import urllib.request
|
||||
with urllib.request.urlopen(
|
||||
"http://localhost:9222/json/version", timeout=1
|
||||
) as resp:
|
||||
return resp.status == 200
|
||||
except Exception:
|
||||
return False
|
||||
294
core/workflow/uia_helper.py
Normal file
294
core/workflow/uia_helper.py
Normal file
@@ -0,0 +1,294 @@
|
||||
# core/workflow/uia_helper.py
|
||||
"""
|
||||
UIAHelper — Wrapper Python pour lea_uia.exe (helper Rust UI Automation).
|
||||
|
||||
Expose une API Python simple pour interroger UIA via le binaire Rust.
|
||||
Communique via subprocess + stdin/stdout JSON.
|
||||
|
||||
Pourquoi un helper Rust ?
|
||||
- 5-10x plus rapide que pywinauto (10-20ms vs 50-200ms)
|
||||
- Binaire standalone ~500 Ko, aucune dépendance runtime
|
||||
- Pas de problèmes de threading COM en Python
|
||||
- Crash-safe (le crash du helper n'affecte pas l'agent Python)
|
||||
|
||||
Architecture :
|
||||
Python executor
|
||||
↓ subprocess.run
|
||||
lea_uia.exe query --x 812 --y 436
|
||||
↓ UIA API Windows
|
||||
JSON response
|
||||
↓ stdout
|
||||
Python executor parse JSON
|
||||
|
||||
Si lea_uia.exe n'est pas disponible (Linux, binaire absent, crash) :
|
||||
toutes les méthodes retournent None → fallback vision automatique.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Timeout par défaut pour les appels UIA (en secondes)
|
||||
_DEFAULT_TIMEOUT = 5.0
|
||||
|
||||
# Masquer la fenêtre console lors du spawn de lea_uia.exe sur Windows.
|
||||
# Sans ce flag, chaque appel (à chaque clic utilisateur pendant
|
||||
# l'enregistrement) fait apparaître une fenêtre cmd noire brièvement
|
||||
# visible à l'écran → ralentit la souris et pollue les screenshots
|
||||
# capturés (le VLM peut "voir" le chemin lea_uia.exe comme texte cliqué).
|
||||
#
|
||||
# La valeur 0x08000000 correspond à CREATE_NO_WINDOW défini dans
|
||||
# l'API Windows. Sur Linux/Mac, la valeur est 0 et `creationflags`
|
||||
# est ignoré. getattr() gère le cas où Python expose déjà la constante
|
||||
# sur Windows.
|
||||
if platform.system() == "Windows":
|
||||
_SUBPROCESS_CREATION_FLAGS = getattr(subprocess, "CREATE_NO_WINDOW", 0x08000000)
|
||||
else:
|
||||
_SUBPROCESS_CREATION_FLAGS = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class UiaElement:
|
||||
"""Représentation Python d'un élément UIA."""
|
||||
name: str = ""
|
||||
control_type: str = ""
|
||||
class_name: str = ""
|
||||
automation_id: str = ""
|
||||
bounding_rect: Tuple[int, int, int, int] = (0, 0, 0, 0)
|
||||
is_enabled: bool = False
|
||||
is_offscreen: bool = True
|
||||
parent_path: List[Dict[str, str]] = field(default_factory=list)
|
||||
process_name: str = ""
|
||||
|
||||
def center(self) -> Tuple[int, int]:
|
||||
"""Retourner le centre du rectangle (pixels)."""
|
||||
x1, y1, x2, y2 = self.bounding_rect
|
||||
return ((x1 + x2) // 2, (y1 + y2) // 2)
|
||||
|
||||
def width(self) -> int:
|
||||
return self.bounding_rect[2] - self.bounding_rect[0]
|
||||
|
||||
def height(self) -> int:
|
||||
return self.bounding_rect[3] - self.bounding_rect[1]
|
||||
|
||||
def is_clickable(self) -> bool:
|
||||
"""Peut-on cliquer dessus ?"""
|
||||
return (
|
||||
self.is_enabled
|
||||
and not self.is_offscreen
|
||||
and self.width() > 0
|
||||
and self.height() > 0
|
||||
)
|
||||
|
||||
def path_signature(self) -> str:
|
||||
"""Signature du chemin parent (pour retrouver l'élément)."""
|
||||
parts = [f"{p['control_type']}[{p['name']}]" for p in self.parent_path if p.get("name")]
|
||||
parts.append(f"{self.control_type}[{self.name}]")
|
||||
return " > ".join(parts)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"control_type": self.control_type,
|
||||
"class_name": self.class_name,
|
||||
"automation_id": self.automation_id,
|
||||
"bounding_rect": list(self.bounding_rect),
|
||||
"is_enabled": self.is_enabled,
|
||||
"is_offscreen": self.is_offscreen,
|
||||
"parent_path": self.parent_path,
|
||||
"process_name": self.process_name,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict[str, Any]) -> "UiaElement":
|
||||
rect = d.get("bounding_rect", [0, 0, 0, 0])
|
||||
if isinstance(rect, list) and len(rect) >= 4:
|
||||
rect = tuple(rect[:4])
|
||||
else:
|
||||
rect = (0, 0, 0, 0)
|
||||
return cls(
|
||||
name=d.get("name", ""),
|
||||
control_type=d.get("control_type", ""),
|
||||
class_name=d.get("class_name", ""),
|
||||
automation_id=d.get("automation_id", ""),
|
||||
bounding_rect=rect,
|
||||
is_enabled=d.get("is_enabled", False),
|
||||
is_offscreen=d.get("is_offscreen", True),
|
||||
parent_path=d.get("parent_path", []),
|
||||
process_name=d.get("process_name", ""),
|
||||
)
|
||||
|
||||
|
||||
class UIAHelper:
|
||||
"""Wrapper Python pour lea_uia.exe."""
|
||||
|
||||
def __init__(self, helper_path: str = "", timeout: float = _DEFAULT_TIMEOUT):
|
||||
self._helper_path = helper_path or self._find_helper()
|
||||
self._timeout = timeout
|
||||
self._available = self._check_available()
|
||||
|
||||
def _find_helper(self) -> str:
|
||||
"""Trouver lea_uia.exe dans les emplacements standards."""
|
||||
candidates = [
|
||||
r"C:\Lea\helpers\lea_uia.exe",
|
||||
os.path.join(os.path.dirname(__file__), "..", "..",
|
||||
"agent_rust", "lea_uia", "target",
|
||||
"x86_64-pc-windows-gnu", "release", "lea_uia.exe"),
|
||||
"./helpers/lea_uia.exe",
|
||||
"lea_uia.exe",
|
||||
]
|
||||
for path in candidates:
|
||||
if os.path.isfile(path):
|
||||
return os.path.abspath(path)
|
||||
return ""
|
||||
|
||||
def _check_available(self) -> bool:
|
||||
"""Vérifier que le helper est utilisable (Windows + binaire + health OK)."""
|
||||
if platform.system() != "Windows":
|
||||
logger.debug("UIAHelper: Linux/Mac — helper désactivé")
|
||||
return False
|
||||
if not self._helper_path:
|
||||
logger.debug("UIAHelper: lea_uia.exe introuvable")
|
||||
return False
|
||||
if not os.path.isfile(self._helper_path):
|
||||
logger.debug(f"UIAHelper: chemin invalide {self._helper_path}")
|
||||
return False
|
||||
return True
|
||||
|
||||
@property
|
||||
def available(self) -> bool:
|
||||
return self._available
|
||||
|
||||
@property
|
||||
def helper_path(self) -> str:
|
||||
return self._helper_path
|
||||
|
||||
def _run(self, args: List[str]) -> Optional[Dict[str, Any]]:
|
||||
"""Exécuter lea_uia.exe avec les arguments et parser le JSON."""
|
||||
if not self._available:
|
||||
return None
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[self._helper_path] + args,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=self._timeout,
|
||||
encoding="utf-8",
|
||||
errors="replace",
|
||||
creationflags=_SUBPROCESS_CREATION_FLAGS,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
logger.debug(
|
||||
f"UIAHelper: exit code {result.returncode}, "
|
||||
f"stderr: {result.stderr[:200]}"
|
||||
)
|
||||
return None
|
||||
output = result.stdout.strip()
|
||||
if not output:
|
||||
return None
|
||||
return json.loads(output)
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.debug(f"UIAHelper: timeout ({self._timeout}s) sur {args}")
|
||||
return None
|
||||
except json.JSONDecodeError as e:
|
||||
logger.debug(f"UIAHelper: JSON invalide — {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.debug(f"UIAHelper: erreur {e}")
|
||||
return None
|
||||
|
||||
def health(self) -> bool:
|
||||
"""Vérifier que UIA répond."""
|
||||
data = self._run(["health"])
|
||||
return data is not None and data.get("status") == "ok"
|
||||
|
||||
def query_at(
|
||||
self,
|
||||
x: int,
|
||||
y: int,
|
||||
with_parents: bool = True,
|
||||
) -> Optional[UiaElement]:
|
||||
"""Récupérer l'élément UIA à une position écran.
|
||||
|
||||
Args:
|
||||
x, y: Coordonnées pixel absolues
|
||||
with_parents: Inclure la hiérarchie des parents
|
||||
|
||||
Returns:
|
||||
UiaElement si trouvé, None sinon (pas d'élément ou UIA indispo)
|
||||
"""
|
||||
args = ["query", "--x", str(x), "--y", str(y)]
|
||||
if not with_parents:
|
||||
args.append("--with-parents=false")
|
||||
|
||||
data = self._run(args)
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
def find_by_name(
|
||||
self,
|
||||
name: str,
|
||||
control_type: Optional[str] = None,
|
||||
automation_id: Optional[str] = None,
|
||||
window: Optional[str] = None,
|
||||
timeout_ms: int = 2000,
|
||||
) -> Optional[UiaElement]:
|
||||
"""Rechercher un élément par son nom (+ filtres optionnels).
|
||||
|
||||
Args:
|
||||
name: Nom exact de l'élément
|
||||
control_type: Type de contrôle (Button, Edit, MenuItem...)
|
||||
automation_id: ID d'automation
|
||||
window: Restreindre à une fenêtre spécifique
|
||||
timeout_ms: Timeout de recherche en millisecondes
|
||||
"""
|
||||
args = ["find", "--name", name, "--timeout-ms", str(timeout_ms)]
|
||||
if control_type:
|
||||
args.extend(["--control-type", control_type])
|
||||
if automation_id:
|
||||
args.extend(["--automation-id", automation_id])
|
||||
if window:
|
||||
args.extend(["--window", window])
|
||||
|
||||
data = self._run(args)
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
def capture_focused(self, max_depth: int = 3) -> Optional[UiaElement]:
|
||||
"""Capturer l'élément ayant le focus + son contexte."""
|
||||
data = self._run(["capture", "--max-depth", str(max_depth)])
|
||||
if not data or data.get("status") != "ok":
|
||||
return None
|
||||
|
||||
elem_data = data.get("element")
|
||||
if not elem_data:
|
||||
return None
|
||||
return UiaElement.from_dict(elem_data)
|
||||
|
||||
|
||||
# Instance globale partagée (singleton léger)
|
||||
_SHARED_HELPER: Optional[UIAHelper] = None
|
||||
|
||||
|
||||
def get_shared_helper() -> UIAHelper:
|
||||
"""Retourner une instance partagée de UIAHelper."""
|
||||
global _SHARED_HELPER
|
||||
if _SHARED_HELPER is None:
|
||||
_SHARED_HELPER = UIAHelper()
|
||||
return _SHARED_HELPER
|
||||
278
core/workflow/workflow_ir.py
Normal file
278
core/workflow/workflow_ir.py
Normal file
@@ -0,0 +1,278 @@
|
||||
# core/workflow/workflow_ir.py
|
||||
"""
|
||||
WorkflowIR — Représentation Intermédiaire d'un workflow.
|
||||
|
||||
C'est la CONNAISSANCE que Léa a acquise en observant un utilisateur.
|
||||
Pas les clics bruts (RawTrace), pas le plan d'exécution (ExecutionPlan).
|
||||
C'est ce que Léa a COMPRIS.
|
||||
|
||||
Format générique — fonctionne pour n'importe quel métier :
|
||||
- TIM qui code des dossiers patients
|
||||
- Comptable qui saisit des factures
|
||||
- RH qui édite des fiches de paie
|
||||
- Logisticien qui gère des stocks
|
||||
|
||||
Le domaine métier est une couche par-dessus (domain_context),
|
||||
pas dans le WorkflowIR lui-même.
|
||||
|
||||
Cycle de vie :
|
||||
RawTrace (capture) → WorkflowIR (compréhension) → ExecutionPlan (exécution)
|
||||
|
||||
Le WorkflowIR est :
|
||||
- versionné (chaque recompilation incrémente la version)
|
||||
- indépendant de la résolution d'écran
|
||||
- indépendant du poste cible
|
||||
- paramétrable (variables substituables)
|
||||
- enrichi par l'apprentissage (chaque replay améliore le IR)
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Structures de données
|
||||
# =========================================================================
|
||||
|
||||
|
||||
@dataclass
|
||||
class Variable:
|
||||
"""Variable substituable dans un workflow."""
|
||||
name: str # Identifiant (ex: "patient", "facture_num")
|
||||
description: str = "" # Description humaine
|
||||
source: str = "user" # Origine : "user", "screen", "file", "previous_step"
|
||||
default: str = "" # Valeur par défaut
|
||||
required: bool = True
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"source": self.source,
|
||||
"default": self.default,
|
||||
"required": self.required,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict) -> "Variable":
|
||||
return cls(**{k: v for k, v in d.items() if k in cls.__dataclass_fields__})
|
||||
|
||||
|
||||
@dataclass
|
||||
class Action:
|
||||
"""Action élémentaire dans une étape."""
|
||||
type: str # click, type, key_combo, wait, scroll
|
||||
target: str = "" # Description de la cible ("bouton Enregistrer")
|
||||
text: str = "" # Texte à taper (pour type)
|
||||
keys: List[str] = field(default_factory=list) # Touches (pour key_combo)
|
||||
duration_ms: int = 0 # Durée (pour wait)
|
||||
variable: bool = False # True si le texte contient une variable {var}
|
||||
anchor_hint: str = "" # Indice visuel pour aider la résolution
|
||||
# Contrôle strict des étapes — l'action ne peut s'exécuter que si la fenêtre
|
||||
# active correspond à `expected_window_before`, et ne peut passer à la
|
||||
# suivante que si la fenêtre résultante correspond à `expected_window_after`.
|
||||
# Ces champs sont extraits par l'IRBuilder depuis les événements bruts.
|
||||
expected_window_before: str = ""
|
||||
expected_window_after: str = ""
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
d = {"type": self.type}
|
||||
if self.target:
|
||||
d["target"] = self.target
|
||||
if self.text:
|
||||
d["text"] = self.text
|
||||
if self.keys:
|
||||
d["keys"] = self.keys
|
||||
if self.duration_ms:
|
||||
d["duration_ms"] = self.duration_ms
|
||||
if self.variable:
|
||||
d["variable"] = True
|
||||
if self.anchor_hint:
|
||||
d["anchor_hint"] = self.anchor_hint
|
||||
if self.expected_window_before:
|
||||
d["expected_window_before"] = self.expected_window_before
|
||||
if self.expected_window_after:
|
||||
d["expected_window_after"] = self.expected_window_after
|
||||
return d
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict) -> "Action":
|
||||
return cls(**{k: v for k, v in d.items() if k in cls.__dataclass_fields__})
|
||||
|
||||
|
||||
@dataclass
|
||||
class Step:
|
||||
"""Étape logique d'un workflow — une intention métier."""
|
||||
step_id: str
|
||||
intent: str # "Ouvrir le dossier", "Saisir le code"
|
||||
precondition: str = "" # "L'application est sur l'écran de liste"
|
||||
postcondition: str = "" # "Le dossier est affiché"
|
||||
actions: List[Action] = field(default_factory=list)
|
||||
is_optional: bool = False # Étape optionnelle (peut être sautée)
|
||||
is_loop: bool = False # Étape répétée (pour chaque élément)
|
||||
loop_variable: str = "" # Variable de boucle
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
d = {
|
||||
"step_id": self.step_id,
|
||||
"intent": self.intent,
|
||||
"actions": [a.to_dict() for a in self.actions],
|
||||
}
|
||||
if self.precondition:
|
||||
d["precondition"] = self.precondition
|
||||
if self.postcondition:
|
||||
d["postcondition"] = self.postcondition
|
||||
if self.is_optional:
|
||||
d["is_optional"] = True
|
||||
if self.is_loop:
|
||||
d["is_loop"] = True
|
||||
d["loop_variable"] = self.loop_variable
|
||||
return d
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict) -> "Step":
|
||||
actions = [Action.from_dict(a) for a in d.get("actions", [])]
|
||||
return cls(
|
||||
step_id=d["step_id"],
|
||||
intent=d.get("intent", ""),
|
||||
precondition=d.get("precondition", ""),
|
||||
postcondition=d.get("postcondition", ""),
|
||||
actions=actions,
|
||||
is_optional=d.get("is_optional", False),
|
||||
is_loop=d.get("is_loop", False),
|
||||
loop_variable=d.get("loop_variable", ""),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class WorkflowIR:
|
||||
"""Représentation Intermédiaire d'un workflow — la connaissance compilée.
|
||||
|
||||
C'est ce que Léa a compris en observant l'utilisateur.
|
||||
Indépendant du poste, de la résolution, du runtime.
|
||||
"""
|
||||
workflow_id: str
|
||||
version: int = 1
|
||||
name: str = ""
|
||||
description: str = ""
|
||||
domain: str = "generic" # Domaine métier (tim_codage, compta, rh, stocks...)
|
||||
learned_from: str = "" # session_id source
|
||||
created_at: float = 0.0
|
||||
updated_at: float = 0.0
|
||||
|
||||
# Contenu
|
||||
variables: List[Variable] = field(default_factory=list)
|
||||
steps: List[Step] = field(default_factory=list)
|
||||
|
||||
# Métadonnées d'apprentissage
|
||||
replay_count: int = 0 # Nombre de replays effectués
|
||||
success_rate: float = 0.0 # Taux de succès moyen
|
||||
last_replay_at: float = 0.0
|
||||
|
||||
# Applications utilisées (détectées lors de l'apprentissage)
|
||||
applications: List[str] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"workflow_id": self.workflow_id,
|
||||
"version": self.version,
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"domain": self.domain,
|
||||
"learned_from": self.learned_from,
|
||||
"created_at": self.created_at,
|
||||
"updated_at": self.updated_at,
|
||||
"variables": [v.to_dict() for v in self.variables],
|
||||
"steps": [s.to_dict() for s in self.steps],
|
||||
"replay_count": self.replay_count,
|
||||
"success_rate": round(self.success_rate, 3),
|
||||
"last_replay_at": self.last_replay_at,
|
||||
"applications": self.applications,
|
||||
}
|
||||
|
||||
def to_json(self, indent: int = 2) -> str:
|
||||
return json.dumps(self.to_dict(), ensure_ascii=False, indent=indent)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, d: Dict) -> "WorkflowIR":
|
||||
variables = [Variable.from_dict(v) for v in d.get("variables", [])]
|
||||
steps = [Step.from_dict(s) for s in d.get("steps", [])]
|
||||
return cls(
|
||||
workflow_id=d["workflow_id"],
|
||||
version=d.get("version", 1),
|
||||
name=d.get("name", ""),
|
||||
description=d.get("description", ""),
|
||||
domain=d.get("domain", "generic"),
|
||||
learned_from=d.get("learned_from", ""),
|
||||
created_at=d.get("created_at", 0),
|
||||
updated_at=d.get("updated_at", 0),
|
||||
variables=variables,
|
||||
steps=steps,
|
||||
replay_count=d.get("replay_count", 0),
|
||||
success_rate=d.get("success_rate", 0),
|
||||
last_replay_at=d.get("last_replay_at", 0),
|
||||
applications=d.get("applications", []),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, json_str: str) -> "WorkflowIR":
|
||||
return cls.from_dict(json.loads(json_str))
|
||||
|
||||
def save(self, directory: str) -> Path:
|
||||
"""Sauvegarder le WorkflowIR dans un fichier JSON."""
|
||||
dir_path = Path(directory)
|
||||
dir_path.mkdir(parents=True, exist_ok=True)
|
||||
file_path = dir_path / f"{self.workflow_id}_v{self.version}.json"
|
||||
file_path.write_text(self.to_json(), encoding="utf-8")
|
||||
logger.info(f"WorkflowIR sauvegardé : {file_path}")
|
||||
return file_path
|
||||
|
||||
@classmethod
|
||||
def load(cls, file_path: str) -> "WorkflowIR":
|
||||
"""Charger un WorkflowIR depuis un fichier JSON."""
|
||||
return cls.from_json(Path(file_path).read_text(encoding="utf-8"))
|
||||
|
||||
def increment_version(self) -> "WorkflowIR":
|
||||
"""Créer une nouvelle version du workflow (après recompilation)."""
|
||||
import copy
|
||||
new = copy.deepcopy(self)
|
||||
new.version += 1
|
||||
new.updated_at = time.time()
|
||||
return new
|
||||
|
||||
def add_step(self, intent: str, actions: List[Dict] = None, **kwargs) -> Step:
|
||||
"""Ajouter une étape au workflow."""
|
||||
step = Step(
|
||||
step_id=f"s{len(self.steps) + 1}",
|
||||
intent=intent,
|
||||
actions=[Action.from_dict(a) for a in (actions or [])],
|
||||
**kwargs,
|
||||
)
|
||||
self.steps.append(step)
|
||||
return step
|
||||
|
||||
def add_variable(self, name: str, **kwargs) -> Variable:
|
||||
"""Ajouter une variable au workflow."""
|
||||
var = Variable(name=name, **kwargs)
|
||||
self.variables.append(var)
|
||||
return var
|
||||
|
||||
@staticmethod
|
||||
def new(name: str, domain: str = "generic", learned_from: str = "") -> "WorkflowIR":
|
||||
"""Créer un nouveau WorkflowIR vide."""
|
||||
return WorkflowIR(
|
||||
workflow_id=f"wf_{uuid.uuid4().hex[:12]}",
|
||||
version=1,
|
||||
name=name,
|
||||
domain=domain,
|
||||
learned_from=learned_from,
|
||||
created_at=time.time(),
|
||||
updated_at=time.time(),
|
||||
)
|
||||
@@ -146,8 +146,14 @@ REQUIRED_FILES=(
|
||||
"agent_v1/core/__init__.py"
|
||||
"agent_v1/core/captor.py"
|
||||
"agent_v1/core/executor.py"
|
||||
"agent_v1/core/grounding.py"
|
||||
"agent_v1/core/policy.py"
|
||||
"agent_v1/core/recovery.py"
|
||||
"agent_v1/core/system_dialog_guard.py"
|
||||
"agent_v1/core/uia_helper.py"
|
||||
"agent_v1/network/__init__.py"
|
||||
"agent_v1/network/streamer.py"
|
||||
"agent_v1/network/persistent_buffer.py"
|
||||
"agent_v1/session/__init__.py"
|
||||
"agent_v1/session/storage.py"
|
||||
"agent_v1/ui/__init__.py"
|
||||
@@ -156,6 +162,8 @@ REQUIRED_FILES=(
|
||||
"agent_v1/ui/chat_window.py"
|
||||
"agent_v1/ui/capture_server.py"
|
||||
"agent_v1/ui/notifications.py"
|
||||
"agent_v1/ui/activity_panel.py"
|
||||
"agent_v1/ui/messages.py"
|
||||
"agent_v1/vision/__init__.py"
|
||||
"agent_v1/vision/capturer.py"
|
||||
"agent_v1/vision/blur_sensitive.py"
|
||||
|
||||
61
deploy/installer/LICENSE.txt
Normal file
61
deploy/installer/LICENSE.txt
Normal file
@@ -0,0 +1,61 @@
|
||||
============================================================
|
||||
Lea - Conditions Generales d'Utilisation
|
||||
============================================================
|
||||
|
||||
Version 1.0 — Avril 2026
|
||||
Editeur : AIVANOV
|
||||
|
||||
1. OBJET
|
||||
--------
|
||||
Lea est un logiciel d'assistance intelligente destine a automatiser
|
||||
des taches repetitives sur poste de travail Windows, pour le compte
|
||||
de son employeur (AIVANOV et ses clients autorises).
|
||||
|
||||
2. NATURE DES DONNEES COLLECTEES
|
||||
--------------------------------
|
||||
Lors de son utilisation, Lea capture :
|
||||
- Des captures d'ecran du poste de travail
|
||||
- Les evenements clavier et souris
|
||||
- Les metadonnees systeme (nom de la machine, processus actifs)
|
||||
|
||||
Les donnees sensibles (mots de passe, numeros de securite sociale,
|
||||
informations de cartes bancaires) sont automatiquement floutees
|
||||
avant transmission au serveur, sauf desactivation explicite par
|
||||
l'administrateur (parametre RPA_BLUR_SENSITIVE=false).
|
||||
|
||||
3. TRANSMISSION ET STOCKAGE
|
||||
---------------------------
|
||||
Les donnees sont transmises via HTTPS chiffre a un serveur central
|
||||
gere par AIVANOV. Elles sont conservees 180 jours minimum pour des
|
||||
raisons de conformite, puis purgees automatiquement.
|
||||
|
||||
4. SYSTEME D'IA (AI ACT - ARTICLE 50)
|
||||
-------------------------------------
|
||||
Lea utilise des modeles d'intelligence artificielle pour comprendre
|
||||
et automatiser les taches. Conformement a l'Article 50 du Reglement
|
||||
europeen sur l'Intelligence Artificielle, l'utilisateur est informe
|
||||
qu'il interagit avec un systeme d'IA.
|
||||
|
||||
5. CONTROLE PAR L'UTILISATEUR
|
||||
-----------------------------
|
||||
L'utilisateur peut a tout moment :
|
||||
- Arreter l'enregistrement (clic droit sur icone > C'est termine)
|
||||
- Declencher un arret d'urgence (clic droit > ARRET D'URGENCE)
|
||||
- Quitter Lea completement (clic droit > Quitter Lea)
|
||||
- Desinstaller Lea via le panneau de configuration Windows
|
||||
|
||||
6. RESPONSABILITE
|
||||
-----------------
|
||||
L'utilisateur s'engage a ne pas utiliser Lea sur des donnees qu'il
|
||||
n'est pas autorise a traiter dans le cadre de ses fonctions.
|
||||
AIVANOV ne pourra etre tenu responsable d'un usage non conforme.
|
||||
|
||||
7. CONTACT
|
||||
----------
|
||||
Pour toute question ou demande d'acces/rectification/suppression
|
||||
de donnees : dpo@aivanov.com
|
||||
|
||||
============================================================
|
||||
En cliquant sur "J'accepte", vous confirmez avoir pris connaissance
|
||||
de ces conditions et les accepter.
|
||||
============================================================
|
||||
554
deploy/installer/Lea.iss
Normal file
554
deploy/installer/Lea.iss
Normal file
@@ -0,0 +1,554 @@
|
||||
; ============================================================
|
||||
; Lea.iss — Script Inno Setup pour l'installeur Lea
|
||||
; ------------------------------------------------------------
|
||||
; Compile avec Inno Setup 6.2+ (ISCC.exe Lea.iss)
|
||||
;
|
||||
; Ce script produit Lea-Setup-v{VERSION}.exe dans ..\releases\
|
||||
;
|
||||
; Fonctions principales :
|
||||
; - Page de bienvenue + licence (CGU)
|
||||
; - Page custom d'enrollment (nom, email, ID AIVANOV, URL, token)
|
||||
; - Generation d'un machine_id unique par poste
|
||||
; - Generation automatique de config.txt
|
||||
; - Installation silencieuse de Python 3.12 embedded (optionnelle)
|
||||
; - Raccourci demarrage automatique (checkbox)
|
||||
; - Installation silencieuse : /VERYSILENT /CONFIG=path\to\config.txt
|
||||
; - Desinstallation propre (kill process, cleanup, export logs)
|
||||
;
|
||||
; Pre-requis staging :
|
||||
; Le dossier ..\build\installer_staging\ doit contenir :
|
||||
; - Le package Lea complet (agent_v1/, lea_ui/, run_agent_v1.py, Lea.bat, ...)
|
||||
; - Optionnel : python-3.12-embed\ (runtime Python embedded pre-configure)
|
||||
; build_installer.sh s'occupe de preparer ce staging.
|
||||
; ============================================================
|
||||
|
||||
#define MyAppName "Lea"
|
||||
#define MyAppVersion "1.0.0"
|
||||
#define MyAppPublisher "AIVANOV"
|
||||
#define MyAppURL "https://lea.labs.laurinebazin.design"
|
||||
#define MyAppExeName "Lea.bat"
|
||||
#define MyAppDescription "Lea - Assistante IA pour l'automatisation"
|
||||
|
||||
; Chemin du staging (peut etre surcharge via ISCC /DSourceDir=...)
|
||||
#ifndef SourceDir
|
||||
#define SourceDir "..\build\installer_staging"
|
||||
#endif
|
||||
|
||||
; Chemin de sortie des installeurs
|
||||
#ifndef OutputDir
|
||||
#define OutputDir "..\releases"
|
||||
#endif
|
||||
|
||||
; Activer le bundle Python embedded si present dans le staging
|
||||
#define PythonEmbedDir "python-3.12-embed"
|
||||
|
||||
[Setup]
|
||||
AppId={{B3F9A1E2-5C4D-4E7F-9A1B-2C3D4E5F6789}
|
||||
AppName={#MyAppName}
|
||||
AppVersion={#MyAppVersion}
|
||||
AppVerName={#MyAppName} {#MyAppVersion}
|
||||
AppPublisher={#MyAppPublisher}
|
||||
AppPublisherURL={#MyAppURL}
|
||||
AppSupportURL={#MyAppURL}
|
||||
AppUpdatesURL={#MyAppURL}
|
||||
DefaultDirName={autopf}\{#MyAppName}
|
||||
DefaultGroupName={#MyAppName}
|
||||
DisableProgramGroupPage=yes
|
||||
OutputDir={#OutputDir}
|
||||
OutputBaseFilename=Lea-Setup-v{#MyAppVersion}
|
||||
; Compression correcte (pas trop aggressive pour que l'install reste rapide)
|
||||
Compression=lzma2
|
||||
SolidCompression=yes
|
||||
; Support HiDPI
|
||||
WizardStyle=modern
|
||||
; Langue FR par defaut
|
||||
ShowLanguageDialog=no
|
||||
; Autorise l'install en mode user si pas admin (bascule sur LOCALAPPDATA)
|
||||
PrivilegesRequired=lowest
|
||||
PrivilegesRequiredOverridesAllowed=dialog
|
||||
; Icone de l'installeur (decommenter si disponible)
|
||||
; SetupIconFile=lea.ico
|
||||
; Uninstall
|
||||
UninstallDisplayName={#MyAppName} {#MyAppVersion}
|
||||
; UninstallDisplayIcon={app}\lea.ico ; decommenter quand l'icone sera fournie
|
||||
; Architecture : 64-bit uniquement (Windows 10+ / 11)
|
||||
ArchitecturesAllowed=x64compatible
|
||||
ArchitecturesInstallIn64BitMode=x64compatible
|
||||
; Version minimale Windows : 10
|
||||
MinVersion=10.0
|
||||
; Informations legales
|
||||
VersionInfoVersion={#MyAppVersion}
|
||||
VersionInfoCompany={#MyAppPublisher}
|
||||
VersionInfoDescription={#MyAppDescription}
|
||||
VersionInfoCopyright=Copyright (C) 2026 {#MyAppPublisher}
|
||||
; Licence CGU affichee avant le choix du repertoire
|
||||
LicenseFile=LICENSE.txt
|
||||
|
||||
[Languages]
|
||||
Name: "french"; MessagesFile: "compiler:Languages\French.isl"
|
||||
|
||||
[Files]
|
||||
; Package complet (code Python + .bat + requirements)
|
||||
; Note : install.bat EST copie (execute par [Run] pour creer le venv Python)
|
||||
; Note : config.txt n'est PAS copie depuis le staging (il est genere par [Code])
|
||||
Source: "{#SourceDir}\*"; \
|
||||
DestDir: "{app}"; \
|
||||
Flags: ignoreversion recursesubdirs createallsubdirs; \
|
||||
Excludes: "{#PythonEmbedDir}\*,config.txt,*.log,sessions\*,__pycache__\*"
|
||||
|
||||
; Python 3.12 embedded (optionnel, copie conditionnelle via check)
|
||||
Source: "{#SourceDir}\{#PythonEmbedDir}\*"; \
|
||||
DestDir: "{app}\python-embed"; \
|
||||
Flags: ignoreversion recursesubdirs createallsubdirs skipifsourcedoesntexist; \
|
||||
Components: pythonembed
|
||||
|
||||
; Script de desinstallation custom (kill + export logs)
|
||||
Source: "uninstall_lea.ps1"; DestDir: "{app}"; Flags: ignoreversion
|
||||
|
||||
; Script de configuration du runtime Python embedded (optionnel)
|
||||
Source: "configure_embed.ps1"; DestDir: "{app}"; Flags: ignoreversion; Components: pythonembed
|
||||
|
||||
; Licence CGU (affichee dans la page licence ET conservee dans {app})
|
||||
Source: "LICENSE.txt"; DestDir: "{app}"; Flags: ignoreversion isreadme
|
||||
|
||||
; Template de config pour installation silencieuse (reference)
|
||||
Source: "config_template.txt"; DestDir: "{app}"; Flags: ignoreversion
|
||||
|
||||
[Components]
|
||||
Name: "core"; Description: "Lea (obligatoire)"; Types: full compact custom; Flags: fixed
|
||||
Name: "pythonembed"; Description: "Python 3.12 embedded (recommande si Python non installe sur le poste)"; Types: full
|
||||
Name: "autostart"; Description: "Demarrer Lea automatiquement au demarrage de Windows"; Types: full
|
||||
|
||||
[Tasks]
|
||||
Name: "desktopicon"; Description: "Creer un raccourci sur le bureau"; GroupDescription: "Raccourcis :"; Flags: unchecked
|
||||
Name: "startmenuicon"; Description: "Creer un raccourci dans le menu Demarrer"; GroupDescription: "Raccourcis :"
|
||||
|
||||
[Icons]
|
||||
Name: "{autoprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; WorkingDir: "{app}"; Tasks: startmenuicon
|
||||
Name: "{autodesktop}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; WorkingDir: "{app}"; Tasks: desktopicon
|
||||
; Raccourci autostart (shell:startup) — cree si composant autostart selectionne
|
||||
Name: "{userstartup}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; \
|
||||
WorkingDir: "{app}"; Components: autostart
|
||||
|
||||
[Run]
|
||||
; Apres copie : executer install.bat pour creer le venv et installer les dependances Python
|
||||
; Skip si bundle embedded (dans ce cas, on utilise python-embed directement)
|
||||
Filename: "{app}\install.bat"; \
|
||||
WorkingDir: "{app}"; \
|
||||
StatusMsg: "Installation des composants Python (1-2 minutes)..."; \
|
||||
Flags: runhidden waituntilterminated; \
|
||||
Components: not pythonembed
|
||||
|
||||
; Configuration Python embedded : creer un Lea.bat qui pointe sur python-embed
|
||||
Filename: "{cmd}"; \
|
||||
Parameters: "/c copy /y ""{app}\Lea.bat"" ""{app}\Lea.bat.bak"" && powershell -NoProfile -ExecutionPolicy Bypass -File ""{app}\configure_embed.ps1"""; \
|
||||
WorkingDir: "{app}"; \
|
||||
StatusMsg: "Configuration du runtime Python embedded..."; \
|
||||
Flags: runhidden waituntilterminated skipifsilent; \
|
||||
Components: pythonembed
|
||||
|
||||
; Lancer Lea a la fin de l'installation (optionnel)
|
||||
Filename: "{app}\{#MyAppExeName}"; \
|
||||
Description: "Lancer {#MyAppName} maintenant"; \
|
||||
Flags: postinstall skipifsilent nowait shellexec
|
||||
|
||||
[UninstallRun]
|
||||
; Tuer le process via PID du lock avant suppression des fichiers
|
||||
Filename: "powershell.exe"; \
|
||||
Parameters: "-NoProfile -ExecutionPolicy Bypass -File ""{app}\uninstall_lea.ps1"" -AppDir ""{app}"""; \
|
||||
RunOnceId: "KillLeaProcess"; \
|
||||
Flags: runhidden waituntilterminated
|
||||
|
||||
[UninstallDelete]
|
||||
Type: filesandordirs; Name: "{app}\.venv"
|
||||
Type: filesandordirs; Name: "{app}\__pycache__"
|
||||
Type: filesandordirs; Name: "{app}\agent_v1\__pycache__"
|
||||
Type: filesandordirs; Name: "{app}\agent_v1\sessions"
|
||||
Type: filesandordirs; Name: "{app}\agent_v1\logs"
|
||||
Type: files; Name: "{app}\lea_agent.lock"
|
||||
Type: files; Name: "{app}\config.txt"
|
||||
Type: files; Name: "{app}\machine_id.txt"
|
||||
|
||||
; ============================================================
|
||||
; Code Pascal : pages custom + generation config.txt + helpers
|
||||
; ============================================================
|
||||
[Code]
|
||||
const
|
||||
SERVER_URL_DEFAULT = 'https://lea.labs.laurinebazin.design/api/v1';
|
||||
SERVER_HOST_DEFAULT = 'lea.labs.laurinebazin.design';
|
||||
DEFAULT_TOKEN = '86031addb338e449fccdb1a983f61807aec15d42d482b9c7748ad607dc23caab';
|
||||
|
||||
var
|
||||
EnrollmentPage: TInputQueryWizardPage;
|
||||
TokenPage: TInputQueryWizardPage;
|
||||
MachineIdValue: string;
|
||||
ConfigFilePath: string;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Helper : ajoute des guillemets autour d'une chaine
|
||||
// --------------------------------------------------------------------
|
||||
function AddQuotes(const S: string): string;
|
||||
begin
|
||||
Result := '"' + S + '"';
|
||||
end;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Wrapper CreateGUIDString (via PowerShell, fallback par defaut)
|
||||
// --------------------------------------------------------------------
|
||||
function CreateGUIDString(var Guid: string): Boolean;
|
||||
var
|
||||
ResultCode: Integer;
|
||||
TmpFile: string;
|
||||
Lines: TArrayOfString;
|
||||
begin
|
||||
Result := False;
|
||||
TmpFile := ExpandConstant('{tmp}\guid.txt');
|
||||
// powershell : genere un GUID
|
||||
if Exec('powershell.exe',
|
||||
'-NoProfile -Command "[guid]::NewGuid().ToString() | Out-File -Encoding ASCII ' + AddQuotes(TmpFile) + '"',
|
||||
'', SW_HIDE, ewWaitUntilTerminated, ResultCode) then
|
||||
begin
|
||||
if LoadStringsFromFile(TmpFile, Lines) and (GetArrayLength(Lines) > 0) then
|
||||
begin
|
||||
Guid := Trim(Lines[0]);
|
||||
Result := Length(Guid) > 0;
|
||||
end;
|
||||
DeleteFile(TmpFile);
|
||||
end;
|
||||
end;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Recupere le hostname de la machine
|
||||
// --------------------------------------------------------------------
|
||||
function GetComputerNameString(): string;
|
||||
var
|
||||
Buffer: string;
|
||||
begin
|
||||
Buffer := ExpandConstant('{computername}');
|
||||
if Length(Buffer) = 0 then
|
||||
Buffer := 'unknown-host';
|
||||
Result := Buffer;
|
||||
end;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Genere un identifiant machine unique : UUID4 + hostname hashe
|
||||
// --------------------------------------------------------------------
|
||||
function GenerateMachineId(): string;
|
||||
var
|
||||
Guid: string;
|
||||
Hostname: string;
|
||||
I: Integer;
|
||||
Hash: Cardinal;
|
||||
begin
|
||||
// Essaye d'utiliser le GUID genere par Windows (via PowerShell)
|
||||
Guid := '';
|
||||
if CreateGUIDString(Guid) then
|
||||
Result := LowerCase(StringChange(StringChange(StringChange(Guid, '{', ''), '}', ''), '-', ''))
|
||||
else
|
||||
Result := IntToStr(GetTickCount);
|
||||
|
||||
// Ajoute un hash du hostname pour stabilite
|
||||
Hostname := GetComputerNameString();
|
||||
Hash := 0;
|
||||
for I := 1 to Length(Hostname) do
|
||||
Hash := (Hash * 31 + Ord(Hostname[I])) and $FFFFFFFF;
|
||||
|
||||
Result := Copy(Result, 1, 16) + '-' + Format('%08x', [Hash]);
|
||||
end;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Charge une configuration depuis /CONFIG=path (installation silencieuse)
|
||||
// Format du fichier : NOM=valeur, une ligne par parametre
|
||||
// Cles attendues : USER_NAME, USER_EMAIL, USER_ID, SERVER_URL, API_TOKEN
|
||||
// --------------------------------------------------------------------
|
||||
procedure LoadConfigFromCommandLine(); forward;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Initialisation : cree les pages custom d'enrollment
|
||||
// --------------------------------------------------------------------
|
||||
procedure InitializeWizard();
|
||||
begin
|
||||
// Page 1 : informations collaborateur
|
||||
EnrollmentPage := CreateInputQueryPage(wpSelectTasks,
|
||||
'Identification du collaborateur',
|
||||
'Veuillez renseigner vos informations pour l''enrollment',
|
||||
'Ces informations sont envoyees au serveur Lea pour identifier votre poste. ' +
|
||||
'Elles sont stockees de maniere securisee et ne sont jamais partagees avec des tiers.');
|
||||
|
||||
EnrollmentPage.Add('Nom et prenom :', False);
|
||||
EnrollmentPage.Add('Email professionnel :', False);
|
||||
EnrollmentPage.Add('ID interne AIVANOV (optionnel) :', False);
|
||||
|
||||
EnrollmentPage.Values[0] := '';
|
||||
EnrollmentPage.Values[1] := '';
|
||||
EnrollmentPage.Values[2] := '';
|
||||
|
||||
// Page 2 : configuration serveur (URL + token)
|
||||
TokenPage := CreateInputQueryPage(EnrollmentPage.ID,
|
||||
'Connexion au serveur Lea',
|
||||
'Configuration de la connexion au serveur central',
|
||||
'L''URL du serveur est pre-remplie par defaut. Le token d''authentification ' +
|
||||
'vous est fourni par votre administrateur AIVANOV. Laissez la valeur par defaut ' +
|
||||
'si vous ne savez pas quoi mettre.');
|
||||
|
||||
TokenPage.Add('URL du serveur (avec /api/v1) :', False);
|
||||
TokenPage.Add('Token d''authentification :', False);
|
||||
|
||||
TokenPage.Values[0] := SERVER_URL_DEFAULT;
|
||||
TokenPage.Values[1] := DEFAULT_TOKEN;
|
||||
|
||||
// Si un fichier /CONFIG= est passe en ligne de commande, pre-remplir
|
||||
LoadConfigFromCommandLine();
|
||||
end;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Implementation de LoadConfigFromCommandLine (declare en forward ci-dessus)
|
||||
// --------------------------------------------------------------------
|
||||
procedure LoadConfigFromCommandLine();
|
||||
var
|
||||
ConfigParam: string;
|
||||
Lines: TArrayOfString;
|
||||
I: Integer;
|
||||
Line, Key, Value: string;
|
||||
EqPos: Integer;
|
||||
begin
|
||||
ConfigParam := ExpandConstant('{param:CONFIG}');
|
||||
if Length(ConfigParam) = 0 then Exit;
|
||||
if not FileExists(ConfigParam) then Exit;
|
||||
|
||||
if not LoadStringsFromFile(ConfigParam, Lines) then Exit;
|
||||
|
||||
for I := 0 to GetArrayLength(Lines) - 1 do
|
||||
begin
|
||||
Line := Trim(Lines[I]);
|
||||
if (Length(Line) = 0) or (Line[1] = '#') then Continue;
|
||||
|
||||
EqPos := Pos('=', Line);
|
||||
if EqPos = 0 then Continue;
|
||||
|
||||
Key := Trim(Copy(Line, 1, EqPos - 1));
|
||||
Value := Trim(Copy(Line, EqPos + 1, Length(Line)));
|
||||
|
||||
if Key = 'USER_NAME' then EnrollmentPage.Values[0] := Value
|
||||
else if Key = 'USER_EMAIL' then EnrollmentPage.Values[1] := Value
|
||||
else if Key = 'USER_ID' then EnrollmentPage.Values[2] := Value
|
||||
else if Key = 'SERVER_URL' then TokenPage.Values[0] := Value
|
||||
else if Key = 'API_TOKEN' then TokenPage.Values[1] := Value;
|
||||
end;
|
||||
end;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Validation des pages custom (Nom/Email obligatoires, token non vide)
|
||||
// --------------------------------------------------------------------
|
||||
function NextButtonClick(CurPageID: Integer): Boolean;
|
||||
var
|
||||
Email: string;
|
||||
begin
|
||||
Result := True;
|
||||
|
||||
if CurPageID = EnrollmentPage.ID then
|
||||
begin
|
||||
if Length(Trim(EnrollmentPage.Values[0])) = 0 then
|
||||
begin
|
||||
MsgBox('Le nom est obligatoire.', mbError, MB_OK);
|
||||
Result := False;
|
||||
Exit;
|
||||
end;
|
||||
|
||||
Email := Trim(EnrollmentPage.Values[1]);
|
||||
if (Length(Email) = 0) or (Pos('@', Email) = 0) then
|
||||
begin
|
||||
MsgBox('Un email valide est obligatoire.', mbError, MB_OK);
|
||||
Result := False;
|
||||
Exit;
|
||||
end;
|
||||
end;
|
||||
|
||||
if CurPageID = TokenPage.ID then
|
||||
begin
|
||||
if Length(Trim(TokenPage.Values[0])) = 0 then
|
||||
begin
|
||||
MsgBox('L''URL du serveur est obligatoire.', mbError, MB_OK);
|
||||
Result := False;
|
||||
Exit;
|
||||
end;
|
||||
if Length(Trim(TokenPage.Values[1])) < 16 then
|
||||
begin
|
||||
if MsgBox('Le token parait court (< 16 caracteres). Continuer quand meme ?',
|
||||
mbConfirmation, MB_YESNO) = IDNO then
|
||||
begin
|
||||
Result := False;
|
||||
Exit;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Ecrit config.txt genere dans le dossier d'installation
|
||||
// --------------------------------------------------------------------
|
||||
procedure WriteGeneratedConfig();
|
||||
var
|
||||
Config: string;
|
||||
ServerUrl, ServerHost, Token: string;
|
||||
UserName, UserEmail, UserId: string;
|
||||
SlashPos: Integer;
|
||||
begin
|
||||
ConfigFilePath := ExpandConstant('{app}\config.txt');
|
||||
|
||||
ServerUrl := Trim(TokenPage.Values[0]);
|
||||
Token := Trim(TokenPage.Values[1]);
|
||||
UserName := Trim(EnrollmentPage.Values[0]);
|
||||
UserEmail := Trim(EnrollmentPage.Values[1]);
|
||||
UserId := Trim(EnrollmentPage.Values[2]);
|
||||
|
||||
// Derive ServerHost depuis ServerUrl : https://host/api/v1 -> host
|
||||
ServerHost := ServerUrl;
|
||||
ServerHost := StringChange(ServerHost, 'https://', '');
|
||||
ServerHost := StringChange(ServerHost, 'http://', '');
|
||||
SlashPos := Pos('/', ServerHost);
|
||||
if SlashPos > 0 then
|
||||
ServerHost := Copy(ServerHost, 1, SlashPos - 1);
|
||||
|
||||
Config :=
|
||||
'# ============================================================' + #13#10 +
|
||||
'# Configuration Lea (genere par l''installeur)' + #13#10 +
|
||||
'# ============================================================' + #13#10 +
|
||||
'# Genere le ' + GetDateTimeString('yyyy-mm-dd hh:nn:ss', '-', ':') + #13#10 +
|
||||
'# Installe par : ' + UserName + ' <' + UserEmail + '>' + #13#10 +
|
||||
'# ID interne : ' + UserId + #13#10 +
|
||||
'# Machine ID : ' + MachineIdValue + #13#10 +
|
||||
'# ============================================================' + #13#10 +
|
||||
'' + #13#10 +
|
||||
'# Adresse du serveur Lea (URL complete avec /api/v1)' + #13#10 +
|
||||
'RPA_SERVER_URL=' + ServerUrl + #13#10 +
|
||||
'' + #13#10 +
|
||||
'# Cle d''authentification (fournie par l''administrateur)' + #13#10 +
|
||||
'RPA_API_TOKEN=' + Token + #13#10 +
|
||||
'' + #13#10 +
|
||||
'# Nom du serveur (sans https://, sans /api/v1)' + #13#10 +
|
||||
'RPA_SERVER_HOST=' + ServerHost + #13#10 +
|
||||
'' + #13#10 +
|
||||
'# Identifiant unique de cette machine (genere a l''install)' + #13#10 +
|
||||
'RPA_MACHINE_ID=' + MachineIdValue + #13#10 +
|
||||
'' + #13#10 +
|
||||
'# Informations collaborateur (utilisees pour l''audit cote serveur)' + #13#10 +
|
||||
'RPA_USER_NAME=' + UserName + #13#10 +
|
||||
'RPA_USER_EMAIL=' + UserEmail + #13#10;
|
||||
|
||||
if Length(UserId) > 0 then
|
||||
Config := Config + 'RPA_USER_ID=' + UserId + #13#10;
|
||||
|
||||
Config := Config + '' + #13#10 +
|
||||
'# ============================================================' + #13#10 +
|
||||
'# Parametres avances (ne pas modifier sauf indication)' + #13#10 +
|
||||
'# ============================================================' + #13#10 +
|
||||
'' + #13#10 +
|
||||
'# Flouter les zones de texte dans les captures (securite donnees)' + #13#10 +
|
||||
'RPA_BLUR_SENSITIVE=true' + #13#10 +
|
||||
'' + #13#10 +
|
||||
'# Duree de conservation des logs en jours (minimum 180 pour conformite)' + #13#10 +
|
||||
'RPA_LOG_RETENTION_DAYS=180' + #13#10;
|
||||
|
||||
if not SaveStringToFile(ConfigFilePath, Config, False) then
|
||||
MsgBox('Echec de l''ecriture de config.txt dans ' + ConfigFilePath, mbError, MB_OK);
|
||||
end;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Ecrit le machine_id.txt (identifiant du poste)
|
||||
// --------------------------------------------------------------------
|
||||
procedure WriteMachineId();
|
||||
var
|
||||
MachineIdFile: string;
|
||||
begin
|
||||
MachineIdFile := ExpandConstant('{app}\machine_id.txt');
|
||||
if not SaveStringToFile(MachineIdFile, MachineIdValue, False) then
|
||||
MsgBox('Echec de l''ecriture de machine_id.txt', mbError, MB_OK);
|
||||
end;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Notifie le serveur de l'enrollment (best-effort, non bloquant)
|
||||
// POST vers {SERVER_URL}/agents/enroll avec les infos collaborateur
|
||||
// --------------------------------------------------------------------
|
||||
procedure NotifyServerEnrollment();
|
||||
var
|
||||
ResultCode: Integer;
|
||||
PsScript: string;
|
||||
PsFile: string;
|
||||
ServerUrl, Token: string;
|
||||
begin
|
||||
ServerUrl := Trim(TokenPage.Values[0]);
|
||||
Token := Trim(TokenPage.Values[1]);
|
||||
|
||||
PsFile := ExpandConstant('{tmp}\enroll.ps1');
|
||||
PsScript :=
|
||||
'$ErrorActionPreference = ''SilentlyContinue''' + #13#10 +
|
||||
'$body = @{' + #13#10 +
|
||||
' machine_id = ''' + MachineIdValue + '''' + #13#10 +
|
||||
' hostname = $env:COMPUTERNAME' + #13#10 +
|
||||
' user_name = ''' + EnrollmentPage.Values[0] + '''' + #13#10 +
|
||||
' user_email = ''' + EnrollmentPage.Values[1] + '''' + #13#10 +
|
||||
' user_id = ''' + EnrollmentPage.Values[2] + '''' + #13#10 +
|
||||
' agent_version = ''' + '{#MyAppVersion}' + '''' + #13#10 +
|
||||
'} | ConvertTo-Json' + #13#10 +
|
||||
'try {' + #13#10 +
|
||||
' Invoke-RestMethod -Uri ''' + ServerUrl + '/agents/enroll'' ' +
|
||||
'-Method POST -Body $body -ContentType ''application/json'' ' +
|
||||
'-Headers @{ Authorization = ''Bearer ' + Token + ''' } -TimeoutSec 10 | Out-Null' + #13#10 +
|
||||
'} catch { exit 0 }' + #13#10;
|
||||
|
||||
SaveStringToFile(PsFile, PsScript, False);
|
||||
Exec('powershell.exe',
|
||||
'-NoProfile -ExecutionPolicy Bypass -File ' + AddQuotes(PsFile),
|
||||
'', SW_HIDE, ewWaitUntilTerminated, ResultCode);
|
||||
DeleteFile(PsFile);
|
||||
end;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Hook : actions apres copie des fichiers (ssPostInstall)
|
||||
// --------------------------------------------------------------------
|
||||
procedure CurStepChanged(CurStep: TSetupStep);
|
||||
begin
|
||||
if CurStep = ssInstall then
|
||||
begin
|
||||
// Genere le machine_id AVANT la copie des fichiers
|
||||
MachineIdValue := GenerateMachineId();
|
||||
end;
|
||||
|
||||
if CurStep = ssPostInstall then
|
||||
begin
|
||||
// Ecrit config.txt et machine_id.txt
|
||||
WriteGeneratedConfig();
|
||||
WriteMachineId();
|
||||
// Notifie le serveur (best-effort)
|
||||
NotifyServerEnrollment();
|
||||
end;
|
||||
end;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Desinstallation : proposer d'exporter les logs avant suppression
|
||||
// --------------------------------------------------------------------
|
||||
function InitializeUninstall(): Boolean;
|
||||
var
|
||||
LogDir, ExportDir: string;
|
||||
ResultCode: Integer;
|
||||
begin
|
||||
Result := True;
|
||||
LogDir := ExpandConstant('{app}\agent_v1\logs');
|
||||
|
||||
if DirExists(LogDir) then
|
||||
begin
|
||||
if MsgBox('Voulez-vous exporter les logs de Lea avant la desinstallation ?' + #13#10 +
|
||||
'(les logs seront copies dans votre dossier Documents)',
|
||||
mbConfirmation, MB_YESNO) = IDYES then
|
||||
begin
|
||||
ExportDir := ExpandConstant('{userdocs}\Lea_logs_export');
|
||||
ForceDirectories(ExportDir);
|
||||
Exec('powershell.exe',
|
||||
'-NoProfile -Command "Copy-Item -Path ' + AddQuotes(LogDir + '\*') +
|
||||
' -Destination ' + AddQuotes(ExportDir) + ' -Recurse -Force"',
|
||||
'', SW_HIDE, ewWaitUntilTerminated, ResultCode);
|
||||
MsgBox('Logs exportes dans : ' + ExportDir, mbInformation, MB_OK);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
227
deploy/installer/README.md
Normal file
227
deploy/installer/README.md
Normal file
@@ -0,0 +1,227 @@
|
||||
# Installeur Lea (Inno Setup)
|
||||
|
||||
Installeur Windows professionnel pour Lea, remplacant le ZIP + `install.bat` artisanal.
|
||||
|
||||
## Resume
|
||||
|
||||
Produit `Lea-Setup-v1.0.0.exe` dans `deploy/releases/`.
|
||||
|
||||
Caracteristiques :
|
||||
- Interface francaise, moderne (style wizard Windows 10/11)
|
||||
- Page custom d'enrollment (nom, email, ID interne, URL serveur, token)
|
||||
- Generation automatique de `machine_id` unique (GUID + hash hostname)
|
||||
- `config.txt` genere a partir des donnees saisies
|
||||
- Option bundle Python 3.12 embedded (postes sans droits admin)
|
||||
- Raccourci demarrage automatique (`shell:startup`) optionnel
|
||||
- Notification serveur a l'install / desinstall (best-effort)
|
||||
- Installation silencieuse : `/VERYSILENT /CONFIG=enroll.txt`
|
||||
- Desinstallation propre : kill process, cleanup, export logs
|
||||
|
||||
## Pre-requis pour compiler
|
||||
|
||||
### Inno Setup 6.2+
|
||||
|
||||
Telecharger depuis [jrsoftware.org](https://jrsoftware.org/isinfo.php) et installer
|
||||
`innosetup-6.x.exe`. Le compilateur `ISCC.exe` doit etre accessible.
|
||||
|
||||
### Alternative Linux : Wine
|
||||
|
||||
```bash
|
||||
# Installation
|
||||
winetricks innosetup
|
||||
# Ou : telecharger innosetup-6.x.exe et lancer : wine innosetup-6.x.exe
|
||||
|
||||
# Verifier
|
||||
ls "$HOME/.wine/drive_c/Program Files (x86)/Inno Setup 6/ISCC.exe"
|
||||
```
|
||||
|
||||
Le script `build_installer.sh` detecte automatiquement Wine si present.
|
||||
|
||||
## Build local
|
||||
|
||||
### Build complet (staging + compilation)
|
||||
|
||||
```bash
|
||||
cd rpa_vision_v3
|
||||
./deploy/installer/build_installer.sh
|
||||
```
|
||||
|
||||
Produit `deploy/releases/Lea-Setup-v1.0.0.exe`.
|
||||
|
||||
### Build staging uniquement (sans ISCC)
|
||||
|
||||
```bash
|
||||
./deploy/installer/build_installer.sh --stage-only
|
||||
```
|
||||
|
||||
Prepare `deploy/build/installer_staging/` puis affiche la commande ISCC a executer
|
||||
sur Windows.
|
||||
|
||||
### Nettoyer avant
|
||||
|
||||
```bash
|
||||
./deploy/installer/build_installer.sh --clean
|
||||
```
|
||||
|
||||
## Build sur Windows (recommande pour production)
|
||||
|
||||
1. Copier le dossier `deploy/` sur le PC Windows
|
||||
2. Ouvrir `deploy/installer/Lea.iss` dans Inno Setup Compiler
|
||||
3. `Build > Compile` (ou F9)
|
||||
4. Recuperer `deploy/releases/Lea-Setup-v1.0.0.exe`
|
||||
|
||||
## Python 3.12 embedded (optionnel)
|
||||
|
||||
Pour bundler Python directement dans l'installeur (evite d'exiger que les postes
|
||||
aient Python installe) :
|
||||
|
||||
```bash
|
||||
# Sur Linux
|
||||
cd deploy/installer
|
||||
wget https://www.python.org/ftp/python/3.12.8/python-3.12.8-embed-amd64.zip
|
||||
mkdir python-3.12-embed
|
||||
unzip python-3.12.8-embed-amd64.zip -d python-3.12-embed/
|
||||
```
|
||||
|
||||
Le staging copie automatiquement ce dossier si present. Le composant
|
||||
"pythonembed" devient alors selectionnable dans l'installeur.
|
||||
|
||||
Le script `configure_embed.ps1` :
|
||||
1. Patche `python312._pth` pour activer `import site`
|
||||
2. Installe `pip` via `get-pip.py`
|
||||
3. Installe `requirements_agent.txt`
|
||||
4. Reecrit `Lea.bat` pour pointer sur `python-embed\pythonw.exe`
|
||||
|
||||
## Installation silencieuse (deploiement de masse)
|
||||
|
||||
Pour deployer sans interaction utilisateur (GPO, SCCM, script PowerShell) :
|
||||
|
||||
1. Preparer un fichier `enroll.txt` par poste (ou un commun) :
|
||||
|
||||
```
|
||||
USER_NAME=Jean Dupont
|
||||
USER_EMAIL=jean.dupont@aivanov.com
|
||||
USER_ID=EMP-00123
|
||||
SERVER_URL=https://lea.labs.laurinebazin.design/api/v1
|
||||
API_TOKEN=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
||||
```
|
||||
|
||||
2. Lancer l'installeur :
|
||||
|
||||
```cmd
|
||||
Lea-Setup-v1.0.0.exe /VERYSILENT /CONFIG=C:\temp\enroll.txt /DIR="C:\Lea"
|
||||
```
|
||||
|
||||
Parametres Inno Setup utiles :
|
||||
- `/VERYSILENT` : aucune UI
|
||||
- `/SILENT` : barre de progression seulement
|
||||
- `/DIR="..."` : dossier d'installation
|
||||
- `/LOG="install.log"` : log d'installation
|
||||
- `/TASKS="startmenuicon,autostart"` : composants a installer (voir `[Tasks]` et `[Components]`)
|
||||
- `/CONFIG=path` : fichier d'enrollment (custom, specifique a Lea)
|
||||
|
||||
## Signature du .exe (SmartScreen)
|
||||
|
||||
Sans signature, Windows SmartScreen affiche un avertissement rouge ("Cet editeur
|
||||
est inconnu"). Pour eviter cela, signer l'installeur avec un certificat
|
||||
code-signing.
|
||||
|
||||
### Options de certificat
|
||||
|
||||
1. **Certificat OV (Organization Validation)** : ~200-400 EUR/an
|
||||
- Sectigo, DigiCert, GlobalSign
|
||||
- SmartScreen apprend la reputation progressivement (~30 installations)
|
||||
- Livre sur token USB FIPS depuis 2023
|
||||
|
||||
2. **Certificat EV (Extended Validation)** : ~400-700 EUR/an
|
||||
- Reputation SmartScreen immediate (pas d'avertissement des la 1ere install)
|
||||
- Strict : obligatoirement sur token USB
|
||||
|
||||
### Signature manuelle (avec signtool.exe du Windows SDK)
|
||||
|
||||
```cmd
|
||||
signtool sign ^
|
||||
/tr http://timestamp.sectigo.com ^
|
||||
/td sha256 ^
|
||||
/fd sha256 ^
|
||||
/a ^
|
||||
"deploy\releases\Lea-Setup-v1.0.0.exe"
|
||||
|
||||
signtool verify /pa /v "deploy\releases\Lea-Setup-v1.0.0.exe"
|
||||
```
|
||||
|
||||
### Signature automatique dans Inno Setup
|
||||
|
||||
Ajouter dans `Lea.iss` apres `[Setup]` :
|
||||
|
||||
```
|
||||
SignTool=signtool $f
|
||||
```
|
||||
|
||||
Et declarer le signtool via `ISCC.exe /Ssigntool=...` au build.
|
||||
|
||||
### Solution interne (certif AIVANOV)
|
||||
|
||||
Si AIVANOV a deja un certificat code-signing, le token USB + mot de passe
|
||||
suffisent. Sinon, Sectigo OV est un bon choix d'entree de gamme.
|
||||
|
||||
## Structure du dossier installer/
|
||||
|
||||
```
|
||||
deploy/installer/
|
||||
├── Lea.iss # Script Inno Setup principal
|
||||
├── build_installer.sh # Helper bash (staging + ISCC)
|
||||
├── uninstall_lea.ps1 # Script de desinstallation propre
|
||||
├── configure_embed.ps1 # Configuration Python embedded
|
||||
├── config_template.txt # Modele config pour /VERYSILENT /CONFIG=
|
||||
├── LICENSE.txt # CGU affichees dans la page licence
|
||||
└── README.md # Ce fichier
|
||||
```
|
||||
|
||||
## Test de l'installeur
|
||||
|
||||
1. **Machine de test Windows 11** (VM ou PC physique, idealement sans Python)
|
||||
2. Copier `Lea-Setup-v1.0.0.exe` sur la machine
|
||||
3. Double-cliquer : verifier que l'enrollment s'affiche en francais
|
||||
4. Tester l'installation (avec et sans Python deja installe)
|
||||
5. Verifier le fichier `C:\Program Files\Lea\config.txt` genere
|
||||
6. Verifier le raccourci `shell:startup` (si option cochee)
|
||||
7. Lancer Lea, verifier la connexion au serveur
|
||||
8. Tester la desinstallation depuis "Ajout/suppression de programmes"
|
||||
|
||||
### Test automatise (PowerShell, sur la VM)
|
||||
|
||||
```powershell
|
||||
# Installation silencieuse
|
||||
$cfg = "C:\temp\enroll.txt"
|
||||
@"
|
||||
USER_NAME=Test Automatique
|
||||
USER_EMAIL=test@aivanov.com
|
||||
"@ | Out-File -Encoding ASCII $cfg
|
||||
|
||||
.\Lea-Setup-v1.0.0.exe /VERYSILENT /CONFIG=$cfg /LOG="C:\temp\install.log"
|
||||
|
||||
# Verifications
|
||||
Test-Path "C:\Program Files\Lea\config.txt"
|
||||
Get-Content "C:\Program Files\Lea\machine_id.txt"
|
||||
|
||||
# Desinstallation silencieuse
|
||||
$uninst = Get-WmiObject Win32_Product | Where-Object { $_.Name -like "Lea*" }
|
||||
$uninst.Uninstall()
|
||||
```
|
||||
|
||||
## Notes et limites connues
|
||||
|
||||
- **Endpoint serveur `/agents/enroll` et `/agents/uninstall` :** pas encore
|
||||
implemente cote serveur (avril 2026). L'installeur envoie la requete en
|
||||
best-effort, une erreur est silencieusement ignoree. A implementer dans
|
||||
`agent_v0/server_v1/api_stream.py` quand necessaire.
|
||||
- **Python embedded :** le patch `python312._pth` + pip bootstrap fonctionne mais
|
||||
augmente la taille de l'installeur (~25 MB). A reserver aux postes sans
|
||||
Python.
|
||||
- **Code signing :** indispensable pour deploiement hopital/client. Prevoir le
|
||||
budget certificat (400-700 EUR/an) dans la roadmap commerciale.
|
||||
|
||||
## Historique
|
||||
|
||||
- v1.0.0 (2026-04-13) : Premiere version de l'installeur Inno Setup.
|
||||
220
deploy/installer/build_installer.sh
Executable file
220
deploy/installer/build_installer.sh
Executable file
@@ -0,0 +1,220 @@
|
||||
#!/bin/bash
|
||||
# ============================================================
|
||||
# build_installer.sh — Prepare le staging et invoque ISCC
|
||||
# ------------------------------------------------------------
|
||||
#
|
||||
# Ce script :
|
||||
# 1. Invoque build_package.sh pour generer le package classique
|
||||
# 2. Copie le package dans deploy/build/installer_staging/
|
||||
# 3. Copie les helpers de l'installeur (uninstall, licence)
|
||||
# 4. Appelle Inno Setup (ISCC.exe) si disponible
|
||||
# (sinon, affiche les instructions pour compiler sous Windows)
|
||||
#
|
||||
# Usage :
|
||||
# ./deploy/installer/build_installer.sh # Build complet
|
||||
# ./deploy/installer/build_installer.sh --stage-only # Prepare le staging uniquement
|
||||
# ./deploy/installer/build_installer.sh --clean # Nettoyer avant
|
||||
#
|
||||
# Pre-requis :
|
||||
# - bash, rsync, zip (pour le package de base)
|
||||
# - Inno Setup 6.2+ installe (Windows ou Wine) pour compiler
|
||||
#
|
||||
# Sur Linux, ISCC.exe peut etre execute via Wine :
|
||||
# wine "/home/dom/.wine/drive_c/Program Files (x86)/Inno Setup 6/ISCC.exe" Lea.iss
|
||||
# ============================================================
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Couleurs
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
RED='\033[0;31m'
|
||||
NC='\033[0m'
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
DEPLOY_DIR="$(dirname "$SCRIPT_DIR")"
|
||||
PROJECT_ROOT="$(dirname "$DEPLOY_DIR")"
|
||||
|
||||
STAGING_DIR="$DEPLOY_DIR/build/installer_staging"
|
||||
RELEASES_DIR="$DEPLOY_DIR/releases"
|
||||
BASE_BUILD_DIR="$DEPLOY_DIR/build/Lea"
|
||||
|
||||
# Recupere la version depuis config.py
|
||||
VERSION=$(grep -oP 'AGENT_VERSION\s*=\s*"([^"]+)"' "$PROJECT_ROOT/agent_v0/agent_v1/config.py" | grep -oP '"[^"]+"' | tr -d '"' || echo "1.0.0")
|
||||
|
||||
echo -e "${GREEN}============================================================${NC}"
|
||||
echo -e "${GREEN} Build installeur Inno Setup Lea v${VERSION}${NC}"
|
||||
echo -e "${GREEN}============================================================${NC}"
|
||||
echo ""
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# Parsing des arguments
|
||||
# ---------------------------------------------------------------
|
||||
STAGE_ONLY=0
|
||||
CLEAN=0
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--stage-only) STAGE_ONLY=1 ;;
|
||||
--clean) CLEAN=1 ;;
|
||||
*) echo "Argument inconnu : $arg" ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# 1. Clean optionnel
|
||||
# ---------------------------------------------------------------
|
||||
if [[ $CLEAN -eq 1 ]]; then
|
||||
echo -e "${YELLOW}[0/5] Nettoyage des anciens builds...${NC}"
|
||||
rm -rf "$STAGING_DIR"
|
||||
rm -rf "$BASE_BUILD_DIR"
|
||||
rm -f "$RELEASES_DIR"/Lea-Setup-*.exe
|
||||
echo " OK"
|
||||
echo ""
|
||||
fi
|
||||
|
||||
mkdir -p "$RELEASES_DIR"
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# 2. Build du package de base (reutilise build_package.sh)
|
||||
# ---------------------------------------------------------------
|
||||
echo "[1/5] Build du package de base..."
|
||||
if [[ ! -d "$BASE_BUILD_DIR" ]] || [[ $CLEAN -eq 1 ]]; then
|
||||
bash "$DEPLOY_DIR/build_package.sh" >/dev/null
|
||||
fi
|
||||
if [[ ! -d "$BASE_BUILD_DIR" ]]; then
|
||||
echo -e "${RED} Erreur : $BASE_BUILD_DIR n'a pas ete cree par build_package.sh${NC}"
|
||||
exit 1
|
||||
fi
|
||||
echo " Package de base pret : $BASE_BUILD_DIR"
|
||||
echo ""
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# 3. Copie vers staging
|
||||
# ---------------------------------------------------------------
|
||||
echo "[2/5] Preparation du staging installeur..."
|
||||
rm -rf "$STAGING_DIR"
|
||||
mkdir -p "$STAGING_DIR"
|
||||
|
||||
# Copie tout sauf config.txt (genere par l'installeur) et install.bat
|
||||
# install.bat est conserve mais sera appele en mode silencieux par ISS
|
||||
rsync -a \
|
||||
--exclude='__pycache__' \
|
||||
--exclude='*.pyc' \
|
||||
--exclude='.venv' \
|
||||
--exclude='sessions/' \
|
||||
--exclude='logs/' \
|
||||
"$BASE_BUILD_DIR/" \
|
||||
"$STAGING_DIR/"
|
||||
|
||||
# On supprime le config.txt du staging : c'est l'installeur qui le generera
|
||||
rm -f "$STAGING_DIR/config.txt"
|
||||
|
||||
echo " Staging : $STAGING_DIR"
|
||||
echo " Fichiers : $(find "$STAGING_DIR" -type f | wc -l)"
|
||||
echo ""
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# 4. Copie des helpers installeur (uninstall, licence, etc.)
|
||||
# ---------------------------------------------------------------
|
||||
echo "[3/5] Copie des helpers installeur..."
|
||||
cp "$SCRIPT_DIR/uninstall_lea.ps1" "$STAGING_DIR/" 2>/dev/null || true
|
||||
cp "$SCRIPT_DIR/configure_embed.ps1" "$STAGING_DIR/" 2>/dev/null || true
|
||||
cp "$SCRIPT_DIR/LICENSE.txt" "$STAGING_DIR/" 2>/dev/null || true
|
||||
cp "$SCRIPT_DIR/config_template.txt" "$STAGING_DIR/config_template.txt" 2>/dev/null || true
|
||||
echo " Helpers copies"
|
||||
echo ""
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# 5. Python embedded (optionnel)
|
||||
# ---------------------------------------------------------------
|
||||
PYTHON_EMBED_SRC="${PYTHON_EMBED_DIR:-$SCRIPT_DIR/python-3.12-embed}"
|
||||
if [[ -d "$PYTHON_EMBED_SRC" ]]; then
|
||||
echo "[4/5] Copie de Python 3.12 embedded..."
|
||||
rsync -a "$PYTHON_EMBED_SRC/" "$STAGING_DIR/python-3.12-embed/"
|
||||
echo " Python embedded inclus"
|
||||
else
|
||||
echo -e "${YELLOW}[4/5] Python 3.12 embedded non trouve dans $PYTHON_EMBED_SRC${NC}"
|
||||
echo " L'installeur sera produit SANS bundle Python."
|
||||
echo " Pour bundler Python : voir README.md section 'Python embedded'"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# 6. Stage-only : on s'arrete ici
|
||||
# ---------------------------------------------------------------
|
||||
if [[ $STAGE_ONLY -eq 1 ]]; then
|
||||
echo -e "${GREEN} Staging pret. Utiliser ISCC pour compiler :${NC}"
|
||||
echo " ISCC.exe \"$SCRIPT_DIR/Lea.iss\""
|
||||
echo ""
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# 7. Compilation avec ISCC (si disponible)
|
||||
# ---------------------------------------------------------------
|
||||
echo "[5/5] Compilation Inno Setup..."
|
||||
|
||||
# Chercher ISCC : natif Linux (rare), Wine, ou WSL
|
||||
ISCC_BIN=""
|
||||
if command -v iscc >/dev/null 2>&1; then
|
||||
ISCC_BIN="iscc"
|
||||
elif command -v ISCC.exe >/dev/null 2>&1; then
|
||||
ISCC_BIN="ISCC.exe"
|
||||
elif command -v wine >/dev/null 2>&1; then
|
||||
# Chemins Wine courants
|
||||
for path in \
|
||||
"$HOME/.wine/drive_c/Program Files (x86)/Inno Setup 6/ISCC.exe" \
|
||||
"$HOME/.wine/drive_c/Program Files/Inno Setup 6/ISCC.exe"; do
|
||||
if [[ -f "$path" ]]; then
|
||||
ISCC_BIN="wine \"$path\""
|
||||
break
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
if [[ -z "$ISCC_BIN" ]]; then
|
||||
echo ""
|
||||
echo -e "${YELLOW} ISCC (Inno Setup Compiler) introuvable.${NC}"
|
||||
echo ""
|
||||
echo " Le staging est pret dans : $STAGING_DIR"
|
||||
echo ""
|
||||
echo " Pour compiler l'installeur, deux options :"
|
||||
echo ""
|
||||
echo " 1) Sur un PC Windows avec Inno Setup 6 installe :"
|
||||
echo " - Copier le dossier deploy/ sur le PC"
|
||||
echo " - Ouvrir deploy/installer/Lea.iss dans Inno Setup"
|
||||
echo " - Cliquer 'Compile' (F9)"
|
||||
echo " - Recuperer deploy/releases/Lea-Setup-v${VERSION}.exe"
|
||||
echo ""
|
||||
echo " 2) Sur Linux avec Wine :"
|
||||
echo " - winetricks innosetup (ou installer le .exe manuellement)"
|
||||
echo " - wine \"\$HOME/.wine/drive_c/Program Files (x86)/Inno Setup 6/ISCC.exe\" \\"
|
||||
echo " \"$SCRIPT_DIR/Lea.iss\""
|
||||
echo ""
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo " ISCC trouve : $ISCC_BIN"
|
||||
eval "$ISCC_BIN \"$SCRIPT_DIR/Lea.iss\""
|
||||
|
||||
# Verification du resultat
|
||||
OUTPUT_EXE="$RELEASES_DIR/Lea-Setup-v${VERSION}.exe"
|
||||
if [[ -f "$OUTPUT_EXE" ]]; then
|
||||
EXE_SIZE=$(du -h "$OUTPUT_EXE" | cut -f1)
|
||||
echo ""
|
||||
echo -e "${GREEN}============================================================${NC}"
|
||||
echo -e "${GREEN} Installeur produit !${NC}"
|
||||
echo -e "${GREEN}============================================================${NC}"
|
||||
echo ""
|
||||
echo " Fichier : $OUTPUT_EXE"
|
||||
echo " Taille : $EXE_SIZE"
|
||||
echo ""
|
||||
echo " Deploiement :"
|
||||
echo " - Signer le .exe avec un certificat code-signing (voir README.md)"
|
||||
echo " - Publier sur : https://lea.labs.laurinebazin.design/downloads/"
|
||||
echo " - Installation silencieuse : Lea-Setup-v${VERSION}.exe /VERYSILENT /CONFIG=enroll.txt"
|
||||
echo ""
|
||||
else
|
||||
echo -e "${RED} Erreur : $OUTPUT_EXE n'a pas ete produit${NC}"
|
||||
exit 1
|
||||
fi
|
||||
27
deploy/installer/config_template.txt
Normal file
27
deploy/installer/config_template.txt
Normal file
@@ -0,0 +1,27 @@
|
||||
# ============================================================
|
||||
# config_template.txt — Modele pour installation silencieuse
|
||||
# ------------------------------------------------------------
|
||||
#
|
||||
# Ce fichier est utilise en mode /VERYSILENT pour pre-remplir
|
||||
# les valeurs d'enrollment sans interface graphique.
|
||||
#
|
||||
# Usage :
|
||||
# Lea-Setup-v1.0.0.exe /VERYSILENT /CONFIG=enroll.txt
|
||||
#
|
||||
# L'installeur lit ce fichier au demarrage et remplit les pages
|
||||
# custom (nom, email, ID, URL, token) automatiquement.
|
||||
#
|
||||
# Toutes les cles ci-dessous sont optionnelles. Si une cle est
|
||||
# absente, la valeur par defaut de l'installeur est utilisee.
|
||||
#
|
||||
# Format : CLE=valeur, une ligne par parametre, # = commentaire.
|
||||
# ============================================================
|
||||
|
||||
# Identite du collaborateur (obligatoires sauf USER_ID)
|
||||
USER_NAME=Prenom Nom
|
||||
USER_EMAIL=prenom.nom@aivanov.com
|
||||
USER_ID=
|
||||
|
||||
# Connexion serveur (valeurs par defaut deja pre-remplies)
|
||||
SERVER_URL=https://lea.labs.laurinebazin.design/api/v1
|
||||
API_TOKEN=86031addb338e449fccdb1a983f61807aec15d42d482b9c7748ad607dc23caab
|
||||
112
deploy/installer/configure_embed.ps1
Normal file
112
deploy/installer/configure_embed.ps1
Normal file
@@ -0,0 +1,112 @@
|
||||
# ============================================================
|
||||
# configure_embed.ps1 — Configure le runtime Python embedded
|
||||
# ------------------------------------------------------------
|
||||
#
|
||||
# Quand le composant 'pythonembed' est installe, on a :
|
||||
# <AppDir>\python-embed\ <-- runtime Python 3.12 embedded
|
||||
#
|
||||
# Ce script :
|
||||
# 1. Active l'import des packages (patch de python312._pth)
|
||||
# 2. Installe pip dans le runtime embedded
|
||||
# 3. Installe les dependances requirements_agent.txt
|
||||
# 4. Reecrit Lea.bat pour pointer sur python-embed\pythonw.exe
|
||||
#
|
||||
# Doit etre execute avec le CWD = <AppDir>
|
||||
# ============================================================
|
||||
|
||||
$ErrorActionPreference = 'Stop'
|
||||
|
||||
$AppDir = Get-Location
|
||||
$EmbedDir = Join-Path $AppDir "python-embed"
|
||||
$PythonExe = Join-Path $EmbedDir "python.exe"
|
||||
|
||||
if (-not (Test-Path $PythonExe)) {
|
||||
Write-Host "Python embedded introuvable, abandon."
|
||||
exit 1
|
||||
}
|
||||
|
||||
Write-Host "Configuration de Python embedded..."
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# 1. Decommenter la ligne 'import site' dans python312._pth
|
||||
# (necessaire pour que pip puisse fonctionner)
|
||||
# ---------------------------------------------------------------
|
||||
$PthFile = Get-ChildItem -Path $EmbedDir -Filter "python*._pth" | Select-Object -First 1
|
||||
if ($PthFile) {
|
||||
$Content = Get-Content $PthFile.FullName
|
||||
$NewContent = $Content -replace '^#import site', 'import site'
|
||||
Set-Content -Path $PthFile.FullName -Value $NewContent
|
||||
Write-Host " python._pth patche (import site active)"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# 2. Installer pip (bootstrap via get-pip.py)
|
||||
# ---------------------------------------------------------------
|
||||
$GetPip = Join-Path $env:TEMP "get-pip.py"
|
||||
Write-Host " Telechargement de get-pip.py..."
|
||||
Invoke-WebRequest -Uri "https://bootstrap.pypa.io/get-pip.py" -OutFile $GetPip -UseBasicParsing
|
||||
|
||||
Write-Host " Installation de pip..."
|
||||
& $PythonExe $GetPip --no-warn-script-location
|
||||
Remove-Item $GetPip -Force
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# 3. Installer les dependances
|
||||
# ---------------------------------------------------------------
|
||||
$Requirements = Join-Path $AppDir "requirements_agent.txt"
|
||||
if (Test-Path $Requirements) {
|
||||
Write-Host " Installation des dependances Python..."
|
||||
& $PythonExe -m pip install --no-warn-script-location -r $Requirements
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# 4. Reecrire Lea.bat pour utiliser python-embed
|
||||
# ---------------------------------------------------------------
|
||||
$LeaBat = Join-Path $AppDir "Lea.bat"
|
||||
$NewLeaBat = @"
|
||||
@echo off
|
||||
chcp 65001 >nul 2>&1
|
||||
title Lea - Assistante IA
|
||||
cd /d "%~dp0"
|
||||
|
||||
if exist "lea_agent.lock" (
|
||||
for /f "usebackq tokens=* delims=" %%i in ("lea_agent.lock") do (
|
||||
taskkill /F /PID %%i >nul 2>&1
|
||||
)
|
||||
del /f /q "lea_agent.lock" >nul 2>&1
|
||||
timeout /t 2 >nul
|
||||
)
|
||||
|
||||
if exist "config.txt" (
|
||||
for /f "usebackq eol=# tokens=1,* delims==" %%a in ("config.txt") do (
|
||||
if not "%%a"=="" if not "%%b"=="" set "%%a=%%b"
|
||||
)
|
||||
)
|
||||
|
||||
echo.
|
||||
echo Demarrage de Lea (runtime embedded)...
|
||||
echo.
|
||||
|
||||
start "" /b "%~dp0python-embed\pythonw.exe" run_agent_v1.py
|
||||
|
||||
timeout /t 3 >nul
|
||||
set "LEA_ALIVE=0"
|
||||
if exist "lea_agent.lock" (
|
||||
for /f "usebackq tokens=* delims=" %%i in ("lea_agent.lock") do (
|
||||
tasklist /FI "PID eq %%i" /NH 2>nul | findstr /I "pythonw" >nul && set "LEA_ALIVE=1"
|
||||
)
|
||||
)
|
||||
if "%LEA_ALIVE%"=="0" (
|
||||
echo.
|
||||
echo Lea n'a pas demarre correctement. Affichage des erreurs :
|
||||
echo.
|
||||
"%~dp0python-embed\python.exe" run_agent_v1.py
|
||||
pause
|
||||
)
|
||||
"@
|
||||
|
||||
Set-Content -Path $LeaBat -Value $NewLeaBat -Encoding ASCII
|
||||
Write-Host " Lea.bat reecrit pour runtime embedded"
|
||||
|
||||
Write-Host "Configuration terminee."
|
||||
exit 0
|
||||
99
deploy/installer/uninstall_lea.ps1
Normal file
99
deploy/installer/uninstall_lea.ps1
Normal file
@@ -0,0 +1,99 @@
|
||||
# ============================================================
|
||||
# uninstall_lea.ps1 — Script de desinstallation propre de Lea
|
||||
# ------------------------------------------------------------
|
||||
#
|
||||
# Appele par Inno Setup via [UninstallRun] AVANT la suppression
|
||||
# des fichiers. Roles :
|
||||
#
|
||||
# 1. Tuer proprement le process Lea (via PID du lock)
|
||||
# 2. Nettoyer shell:startup (supprimer le raccourci auto-start)
|
||||
# 3. Notifier le serveur de la desinstallation (best-effort)
|
||||
# 4. Supprimer le lock file
|
||||
#
|
||||
# Usage (par Inno Setup) :
|
||||
# powershell.exe -NoProfile -ExecutionPolicy Bypass \
|
||||
# -File uninstall_lea.ps1 -AppDir "C:\Program Files\Lea"
|
||||
# ============================================================
|
||||
|
||||
param(
|
||||
[Parameter(Mandatory = $true)]
|
||||
[string]$AppDir
|
||||
)
|
||||
|
||||
$ErrorActionPreference = 'SilentlyContinue'
|
||||
Write-Host "Desinstallation de Lea en cours..."
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# 1. Tuer le process via PID du lock file
|
||||
# ---------------------------------------------------------------
|
||||
$LockFile = Join-Path $AppDir "lea_agent.lock"
|
||||
if (Test-Path $LockFile) {
|
||||
try {
|
||||
$Pid = (Get-Content $LockFile -ErrorAction Stop | Select-Object -First 1).Trim()
|
||||
if ($Pid -match '^\d+$') {
|
||||
Write-Host " Arret du process Lea (PID $Pid)..."
|
||||
Stop-Process -Id ([int]$Pid) -Force -ErrorAction SilentlyContinue
|
||||
Start-Sleep -Seconds 1
|
||||
}
|
||||
} catch {
|
||||
Write-Host " Lock file illisible (ignore)."
|
||||
}
|
||||
Remove-Item $LockFile -Force -ErrorAction SilentlyContinue
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# 2. Nettoyer shell:startup (peut ne pas exister si composant
|
||||
# non installe, on le supprime silencieusement dans tous les cas)
|
||||
# ---------------------------------------------------------------
|
||||
$StartupDir = [Environment]::GetFolderPath('Startup')
|
||||
$StartupShortcut = Join-Path $StartupDir "Lea.lnk"
|
||||
if (Test-Path $StartupShortcut) {
|
||||
Write-Host " Suppression du raccourci auto-start..."
|
||||
Remove-Item $StartupShortcut -Force -ErrorAction SilentlyContinue
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# 3. Notifier le serveur de la desinstallation (best-effort)
|
||||
# ---------------------------------------------------------------
|
||||
$ConfigFile = Join-Path $AppDir "config.txt"
|
||||
$MachineIdFile = Join-Path $AppDir "machine_id.txt"
|
||||
if ((Test-Path $ConfigFile) -and (Test-Path $MachineIdFile)) {
|
||||
try {
|
||||
$ConfigLines = Get-Content $ConfigFile
|
||||
$ServerUrl = ($ConfigLines | Where-Object { $_ -match '^RPA_SERVER_URL=' } | Select-Object -First 1) -replace '^RPA_SERVER_URL=', ''
|
||||
$Token = ($ConfigLines | Where-Object { $_ -match '^RPA_API_TOKEN=' } | Select-Object -First 1) -replace '^RPA_API_TOKEN=', ''
|
||||
$MachineId = (Get-Content $MachineIdFile -ErrorAction Stop | Select-Object -First 1).Trim()
|
||||
|
||||
if ($ServerUrl -and $Token -and $MachineId) {
|
||||
Write-Host " Notification du serveur..."
|
||||
$Body = @{
|
||||
machine_id = $MachineId
|
||||
hostname = $env:COMPUTERNAME
|
||||
event = 'uninstall'
|
||||
timestamp = (Get-Date -Format "o")
|
||||
} | ConvertTo-Json
|
||||
|
||||
Invoke-RestMethod `
|
||||
-Uri "$ServerUrl/agents/uninstall" `
|
||||
-Method POST `
|
||||
-Body $Body `
|
||||
-ContentType 'application/json' `
|
||||
-Headers @{ Authorization = "Bearer $Token" } `
|
||||
-TimeoutSec 5 `
|
||||
-ErrorAction SilentlyContinue | Out-Null
|
||||
}
|
||||
} catch {
|
||||
# Best-effort : on ignore toute erreur reseau/auth
|
||||
Write-Host " Notification serveur echouee (ignore)."
|
||||
}
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
# 4. Supprimer les fichiers restants verrouilles eventuellement
|
||||
# ---------------------------------------------------------------
|
||||
Start-Sleep -Seconds 1
|
||||
Get-ChildItem -Path $AppDir -Filter "*.pyc" -Recurse -ErrorAction SilentlyContinue |
|
||||
Remove-Item -Force -ErrorAction SilentlyContinue
|
||||
|
||||
Write-Host "Desinstallation : pre-traitement termine."
|
||||
exit 0
|
||||
@@ -8,12 +8,17 @@ title Lea - Assistante IA
|
||||
cd /d "%~dp0"
|
||||
|
||||
:: ---------------------------------------------------------------
|
||||
:: Fermer les anciennes instances de Lea
|
||||
:: Fermer l'ancienne instance de Lea (UNIQUEMENT via le PID du lock)
|
||||
:: NE JAMAIS tuer tous les pythonw.exe/python.exe du poste :
|
||||
:: cela tuerait Jupyter, Spyder, Anaconda, scripts metier, etc.
|
||||
:: ---------------------------------------------------------------
|
||||
taskkill /F /IM pythonw.exe >nul 2>&1
|
||||
taskkill /F /IM python.exe >nul 2>&1
|
||||
taskkill /F /IM rpa-agent.exe >nul 2>&1
|
||||
timeout /t 2 >nul
|
||||
if exist "lea_agent.lock" (
|
||||
for /f "usebackq tokens=* delims=" %%i in ("lea_agent.lock") do (
|
||||
taskkill /F /PID %%i >nul 2>&1
|
||||
)
|
||||
del /f /q "lea_agent.lock" >nul 2>&1
|
||||
timeout /t 2 >nul
|
||||
)
|
||||
|
||||
:: ---------------------------------------------------------------
|
||||
:: Verifier que l'installation a ete faite
|
||||
@@ -51,10 +56,19 @@ echo Pour arreter Lea : clic droit sur l'icone ^> "Quitter Lea"
|
||||
echo Vous pouvez fermer cette fenetre.
|
||||
echo.
|
||||
|
||||
.venv\Scripts\pythonw.exe run_agent_v1.py
|
||||
if errorlevel 1 (
|
||||
start "" /b .venv\Scripts\pythonw.exe run_agent_v1.py
|
||||
|
||||
:: Attendre 3s puis verifier que Lea tourne (via le PID du lock)
|
||||
timeout /t 3 >nul
|
||||
set "LEA_ALIVE=0"
|
||||
if exist "lea_agent.lock" (
|
||||
for /f "usebackq tokens=* delims=" %%i in ("lea_agent.lock") do (
|
||||
tasklist /FI "PID eq %%i" /NH 2>nul | findstr /I "pythonw" >nul && set "LEA_ALIVE=1"
|
||||
)
|
||||
)
|
||||
if "%LEA_ALIVE%"=="0" (
|
||||
echo.
|
||||
echo Lea a rencontre un probleme au demarrage.
|
||||
echo Lea n'a pas demarre correctement.
|
||||
echo Tentative avec affichage des erreurs...
|
||||
echo.
|
||||
.venv\Scripts\python.exe run_agent_v1.py
|
||||
|
||||
@@ -23,9 +23,21 @@ RPA_SERVER_HOST=lea.labs.laurinebazin.design
|
||||
# Parametres avances (ne pas modifier sauf indication)
|
||||
# ============================================================
|
||||
|
||||
# Flouter les zones de texte dans les captures (securite donnees)
|
||||
# Mettre false uniquement pour le developpement/tests
|
||||
RPA_BLUR_SENSITIVE=true
|
||||
# Flouter les zones de texte dans les captures cote CLIENT.
|
||||
#
|
||||
# DEPUIS AVRIL 2026 : LE BLUR CLIENT EST DESACTIVE PAR DEFAUT.
|
||||
# Le floutage des donnees sensibles (noms, adresses, telephones, NIR, email)
|
||||
# est desormais effectue cote SERVEUR via EDS-NLP + OCR dans le module
|
||||
# core/anonymisation/pii_blur.py.
|
||||
#
|
||||
# Avantages du blur server-side :
|
||||
# - Cible precisement les PII (PERSON/LOCATION/PHONE/NIR/EMAIL)
|
||||
# - Ne casse plus les codes CIM, montants PMSI, identifiants techniques
|
||||
# - Deux versions stockees : _raw (entrainement) + _blurred (affichage)
|
||||
#
|
||||
# Ne remettre a 'true' que si un deploiement specifique l'exige explicitement
|
||||
# (ex : reseau non chiffre entre agent et serveur).
|
||||
RPA_BLUR_SENSITIVE=false
|
||||
|
||||
# Duree de conservation des logs en jours (minimum 180 pour conformite)
|
||||
RPA_LOG_RETENTION_DAYS=180
|
||||
|
||||
42
deploy/systemd/rpa-session-cleaner.service
Normal file
42
deploy/systemd/rpa-session-cleaner.service
Normal file
@@ -0,0 +1,42 @@
|
||||
[Unit]
|
||||
Description=RPA Vision V3 - Session Cleaner (port 5006)
|
||||
Documentation=https://lea.labs.laurinebazin.design
|
||||
After=network-online.target rpa-streaming.service
|
||||
Wants=network-online.target
|
||||
Requires=rpa-streaming.service
|
||||
PartOf=rpa-vision.target
|
||||
StartLimitIntervalSec=300
|
||||
StartLimitBurst=5
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
|
||||
# ---- Runtime ----
|
||||
User=dom
|
||||
Group=dom
|
||||
WorkingDirectory=/home/dom/ai/rpa_vision_v3
|
||||
EnvironmentFile=/home/dom/ai/rpa_vision_v3/.env.local
|
||||
Environment="PYTHONUNBUFFERED=1"
|
||||
Environment="RPA_SERVICE_NAME=rpa-session-cleaner"
|
||||
|
||||
# Lancement du session cleaner (dépend du streaming server port 5005)
|
||||
ExecStart=/home/dom/ai/rpa_vision_v3/.venv/bin/python3 tools/session_cleaner.py
|
||||
|
||||
# ---- Resilience ----
|
||||
Restart=on-failure
|
||||
RestartSec=10
|
||||
TimeoutStopSec=15
|
||||
KillMode=mixed
|
||||
KillSignal=SIGTERM
|
||||
|
||||
# ---- Hardening ----
|
||||
NoNewPrivileges=true
|
||||
PrivateTmp=true
|
||||
|
||||
# Logs -> journald
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
SyslogIdentifier=rpa-session-cleaner
|
||||
|
||||
[Install]
|
||||
WantedBy=rpa-vision.target
|
||||
7
deploy/systemd/rpa-vision.target
Normal file
7
deploy/systemd/rpa-vision.target
Normal file
@@ -0,0 +1,7 @@
|
||||
[Unit]
|
||||
Description=RPA Vision V3 - Tous les services
|
||||
After=network-online.target
|
||||
Wants=rpa-streaming.service rpa-vision-v3-api.service rpa-vision-v3-dashboard.service rpa-vision-v3-worker.service rpa-session-cleaner.service
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
227
docs/CI_SETUP.md
Normal file
227
docs/CI_SETUP.md
Normal file
@@ -0,0 +1,227 @@
|
||||
# CI Setup — Gitea Actions pour RPA Vision V3
|
||||
|
||||
> **Statut** : CI activée le 15 avril 2026. Runner `dom-local-runner` (systemd) enregistré.
|
||||
|
||||
Ce document décrit la CI minimale mise en place sur `gitea.localhost:3100`
|
||||
pour prévenir les régressions silencieuses sur `main` et les PR.
|
||||
|
||||
## Vue d'ensemble
|
||||
|
||||
Deux workflows Gitea Actions (syntaxe compatible GitHub Actions) :
|
||||
|
||||
| Workflow | Fichier | Déclencheur | Bloquant |
|
||||
|----------------------------------|---------------------------------------|------------------------------|----------|
|
||||
| Tests | `.gitea/workflows/tests.yml` | push + PR (toutes branches) | Oui (unit + security) |
|
||||
| Audit sécurité | `.gitea/workflows/security-audit.yml` | push main + cron hebdo | Non |
|
||||
|
||||
### Jobs du workflow `tests`
|
||||
|
||||
1. **lint** (non bloquant) — `ruff` + `black --check` sur `core/`, `agent_v0/`, `tests/`.
|
||||
2. **unit-tests** (bloquant) — `pytest tests/unit/` avec `-m "not slow and not gpu and not integration and not performance and not visual"`.
|
||||
3. **security-tests** (bloquant) — `pytest tests/unit/test_security_*.py` en mode verbose. Dépend de `unit-tests`.
|
||||
|
||||
### Jobs du workflow `security-audit`
|
||||
|
||||
1. **bandit** — scan statique sur `core/` (asserts ignorés).
|
||||
2. **pip-audit** — détection CVE sur `requirements-ci.txt` et `requirements.txt`.
|
||||
3. **secrets-scan** — `grep` pour patterns `sk-ant-`, `sk-proj-`, `AIzaSy`, `AKIA`, `hf_`.
|
||||
|
||||
Aucun de ces jobs ne casse la CI — ils produisent des artefacts consultables.
|
||||
|
||||
## Activation de Gitea Actions
|
||||
|
||||
Gitea Actions n'est pas actif par défaut. Deux étapes :
|
||||
|
||||
### 1. Activer Actions dans Gitea
|
||||
|
||||
Sur `http://localhost:3100`, éditer `/home/dom/Install_base/docker-compose.yml`
|
||||
(ou le `app.ini` monté dans le conteneur Gitea) et ajouter :
|
||||
|
||||
```ini
|
||||
[actions]
|
||||
ENABLED = true
|
||||
DEFAULT_ACTIONS_URL = https://github.com
|
||||
```
|
||||
|
||||
Puis redémarrer Gitea :
|
||||
|
||||
```bash
|
||||
cd /home/dom/Install_base
|
||||
docker compose restart gitea
|
||||
```
|
||||
|
||||
Vérifier : dans l'UI Gitea → `Site Administration` → `Configuration Summary`
|
||||
→ la section `[actions]` doit afficher `enabled: true`.
|
||||
|
||||
Côté dépôt : `Settings` → `Advanced Settings` → cocher **"Enable Repository Actions"**.
|
||||
|
||||
### 2. Installer et enregistrer un runner local
|
||||
|
||||
Gitea a besoin d'un `act_runner` (fork de nektos/act) pour exécuter les jobs.
|
||||
|
||||
```bash
|
||||
# Téléchargement du runner (Linux amd64)
|
||||
cd /home/dom/Install_base
|
||||
mkdir -p gitea_runner && cd gitea_runner
|
||||
wget https://dl.gitea.com/act_runner/0.2.11/act_runner-0.2.11-linux-amd64 -O act_runner
|
||||
chmod +x act_runner
|
||||
|
||||
# Génération de la config
|
||||
./act_runner generate-config > config.yaml
|
||||
|
||||
# Récupération du token d'enregistrement
|
||||
# Site Administration → Actions → Runners → Create new Runner
|
||||
# (ou pour un runner par-dépôt : Settings du dépôt → Actions → Runners)
|
||||
|
||||
# Enregistrement (interactif)
|
||||
./act_runner register --no-interactive \
|
||||
--instance http://localhost:3100 \
|
||||
--token <TOKEN_COPIE_DEPUIS_GITEA> \
|
||||
--name "runner-local-cpu" \
|
||||
--labels "ubuntu-latest:docker://catthehacker/ubuntu:act-22.04"
|
||||
|
||||
# Lancement en daemon
|
||||
nohup ./act_runner daemon --config config.yaml > runner.log 2>&1 &
|
||||
```
|
||||
|
||||
Pour persister au reboot : créer un service systemd
|
||||
(cf. `~/ai/rpa_vision_v3/deploy/systemd/` pour un modèle).
|
||||
|
||||
**Note** : le label `ubuntu-latest` pointe sur une image Docker légère
|
||||
(`catthehacker/ubuntu:act-22.04`, ~300 Mo) qui suffit pour nos jobs Python.
|
||||
|
||||
### 3. Premier test
|
||||
|
||||
```bash
|
||||
cd /home/dom/ai/rpa_vision_v3
|
||||
# Modification triviale
|
||||
echo "" >> README.md
|
||||
git add README.md
|
||||
git commit -m "chore: trigger CI"
|
||||
git push gitea main
|
||||
```
|
||||
|
||||
Dans l'UI Gitea → onglet `Actions` du dépôt, le workflow doit apparaître
|
||||
et passer en ~2 minutes.
|
||||
|
||||
## Lancer les tests localement avant push
|
||||
|
||||
Identique à la CI :
|
||||
|
||||
```bash
|
||||
cd /home/dom/ai/rpa_vision_v3
|
||||
source .venv/bin/activate
|
||||
|
||||
# Tests unitaires (hors slow/gpu/integration) — ~60s
|
||||
pytest tests/unit/ -m "not slow and not gpu and not integration and not performance and not visual" -q
|
||||
|
||||
# Tests sécurité seulement — ~5s
|
||||
pytest tests/unit/test_security_*.py -v
|
||||
|
||||
# Lint (si installé)
|
||||
ruff check --select=E9,F63,F7,F82 core/ agent_v0/ tests/
|
||||
black --check core/ agent_v0/ tests/
|
||||
```
|
||||
|
||||
Ou via Makefile :
|
||||
|
||||
```bash
|
||||
make test-fast # équivalent à "not slow"
|
||||
make check # validate-imports + test-fast
|
||||
```
|
||||
|
||||
## Désactiver temporairement la CI (merge urgent)
|
||||
|
||||
Trois options, de la plus propre à la plus brutale :
|
||||
|
||||
### Option 1 — Skip via message de commit (recommandé)
|
||||
|
||||
Préfixer le message avec `[skip ci]` ou `[ci skip]` :
|
||||
|
||||
```bash
|
||||
git commit -m "fix: hotfix prod [skip ci]"
|
||||
```
|
||||
|
||||
Gitea Actions respecte cette convention.
|
||||
|
||||
### Option 2 — Désactiver le workflow côté dépôt
|
||||
|
||||
Dans l'UI Gitea → dépôt → `Actions` → sélectionner le workflow → bouton
|
||||
**"Disable workflow"**. Réactivable au même endroit.
|
||||
|
||||
### Option 3 — Renommer le fichier
|
||||
|
||||
```bash
|
||||
mv .gitea/workflows/tests.yml .gitea/workflows/tests.yml.disabled
|
||||
git commit -am "chore: disable CI temporarily"
|
||||
```
|
||||
|
||||
Ne **jamais** supprimer le fichier — ça rend le rollback pénible.
|
||||
|
||||
## Limitations connues
|
||||
|
||||
- **Pas de tests `slow` / `gpu` / `integration`** en CI. Ces tests nécessitent
|
||||
CUDA, Ollama (port 11434), ou des captures d'écran réelles. Ils doivent
|
||||
être lancés manuellement sur la machine de dev avant un tag de release.
|
||||
- **Pas de tests E2E `smoke`** (`tests/smoke/`) — nécessitent le serveur
|
||||
complet (ports 5005, 5001, 5002, 3002).
|
||||
- **Pas de tests `visual`** (`tests/visual/`) — nécessitent le serveur GPU.
|
||||
- **Runner unique** : tant qu'il n'y a qu'un `act_runner` enregistré,
|
||||
les jobs s'exécutent en série. Acceptable pour < 10 builds/jour.
|
||||
- **Pas de `torch` en CI** : si un test unitaire importe `torch` directement
|
||||
(sans lazy import), il échouera. Convention : les imports GPU doivent
|
||||
être dans `try/except ImportError` + marqueur `@pytest.mark.gpu`.
|
||||
- **`requirements-ci.txt` à resynchroniser** : quand une dépendance est
|
||||
ajoutée à `requirements.txt` et utilisée par un test unitaire, penser
|
||||
à l'ajouter aussi à `requirements-ci.txt`.
|
||||
|
||||
## Temps d'exécution estimé
|
||||
|
||||
| Job | Cold (sans cache pip) | Warm (cache pip) |
|
||||
|-----------------|----------------------|------------------|
|
||||
| lint | ~40s | ~15s |
|
||||
| unit-tests | ~2m30 | ~1m15 |
|
||||
| security-tests | ~1m | ~30s |
|
||||
| **Total CI** | **~3m** | **~1m30** |
|
||||
|
||||
Le cache pip est géré automatiquement par `actions/setup-python@v5`
|
||||
via la clé `requirements-ci.txt` + `requirements.txt`.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Le workflow ne se déclenche pas
|
||||
|
||||
1. Vérifier que `[actions]` est actif dans `app.ini` Gitea.
|
||||
2. Vérifier que le runner est bien enregistré : `Site Administration` → `Actions` → `Runners`.
|
||||
3. Le runner doit être `Online` (point vert).
|
||||
4. Le dépôt doit avoir Actions activées dans ses paramètres.
|
||||
|
||||
### Erreur "No runner available"
|
||||
|
||||
Le runner est stoppé ou a un label incompatible. Relancer :
|
||||
|
||||
```bash
|
||||
cd /home/dom/Install_base/gitea_runner
|
||||
ps aux | grep act_runner # vérifier s'il tourne
|
||||
tail -f runner.log # voir les erreurs
|
||||
```
|
||||
|
||||
### Timeout sur `pip install`
|
||||
|
||||
`requirements.txt` contient torch + CUDA (~3 Go). Si la CI tombe sur
|
||||
`requirements.txt` au lieu de `requirements-ci.txt`, vérifier que le
|
||||
fichier léger est bien committé à la racine du repo.
|
||||
|
||||
### Tests passent en local mais échouent en CI
|
||||
|
||||
Diff le plus fréquent :
|
||||
- Variables d'environnement (`.env.local` absent en CI → tester avec `unset` en local).
|
||||
- Ports déjà pris par `svc.sh` en local mais libres en CI (→ OK).
|
||||
- Paths absolus hardcodés (`/home/dom/...`) → utiliser `pathlib` + fixtures.
|
||||
|
||||
## Évolutions possibles
|
||||
|
||||
- Ajouter un job `type-check` avec `mypy core/` (actuellement dans `requirements.txt` mais pas en CI — choix délibéré : trop lent et 200+ erreurs à nettoyer d'abord).
|
||||
- Ajouter un job `coverage` avec seuil minimum (ex: 60%).
|
||||
- Brancher les résultats sur un badge README via `gitea-actions-status`.
|
||||
- Pour les PR : bloquer le merge tant que `unit-tests` + `security-tests` ne passent pas (réglable dans `Settings` → `Branches` → `Branch protection rules`).
|
||||
107
docs/DEV_SETUP.md
Normal file
107
docs/DEV_SETUP.md
Normal file
@@ -0,0 +1,107 @@
|
||||
# DEV_SETUP — Guide développeur
|
||||
|
||||
Ce document recense les tâches d'administration du dépôt qui ne sont pas couvertes
|
||||
par `README.md` (destiné aux utilisateurs) mais nécessaires au quotidien.
|
||||
|
||||
## Sommaire
|
||||
|
||||
- [Environnement Python](#environnement-python)
|
||||
- [Services locaux](#services-locaux)
|
||||
- [Worktrees Claude Code](#worktrees-claude-code)
|
||||
- [Build du package Windows](#build-du-package-windows)
|
||||
|
||||
---
|
||||
|
||||
## Environnement Python
|
||||
|
||||
- Venv du projet : `.venv/` (à la racine du repo)
|
||||
- Python supporté : 3.10 à 3.12
|
||||
|
||||
```bash
|
||||
python3 -m venv .venv
|
||||
source .venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## Services locaux
|
||||
|
||||
Utiliser `./svc.sh` pour piloter tous les services. La carte des ports est
|
||||
dans `services.conf`.
|
||||
|
||||
```bash
|
||||
./svc.sh status # État de tous les services
|
||||
./svc.sh start streaming # Démarrer le serveur Agent V1 (port 5005)
|
||||
./svc.sh restart api # Redémarrer l'API (port 8000)
|
||||
./svc.sh stop # Tout arrêter
|
||||
```
|
||||
|
||||
## Worktrees Claude Code
|
||||
|
||||
La CLI Claude Code peut créer des worktrees git dans `.claude/worktrees/` pour
|
||||
exécuter des agents parallèles sur des branches isolées. Ces dossiers peuvent
|
||||
occuper plusieurs centaines de Mo chacun et polluer les grep.
|
||||
|
||||
### Vérifier l'état des worktrees
|
||||
|
||||
```bash
|
||||
# Worktrees actifs vs branches git
|
||||
git worktree list
|
||||
git branch | grep worktree
|
||||
|
||||
# Espace disque consommé
|
||||
du -sh .claude/worktrees/* 2>/dev/null
|
||||
```
|
||||
|
||||
### Supprimer un worktree proprement
|
||||
|
||||
```bash
|
||||
# 1) Retirer l'entrée git (libère le lock dans .git/worktrees/)
|
||||
git worktree remove .claude/worktrees/agent-<hash>
|
||||
|
||||
# 2) Si le dossier persiste (worktree orphelin), forcer le retrait
|
||||
git worktree remove --force .claude/worktrees/agent-<hash>
|
||||
|
||||
# 3) Supprimer les branches worktree abandonnées
|
||||
git branch -D worktree-agent-<hash>
|
||||
```
|
||||
|
||||
### Nettoyage global
|
||||
|
||||
```bash
|
||||
# Supprimer TOUS les worktrees et leurs branches associées
|
||||
for wt in .claude/worktrees/*/; do
|
||||
hash=$(basename "$wt")
|
||||
git worktree remove --force "$wt" 2>/dev/null
|
||||
done
|
||||
git branch | grep worktree-agent- | xargs -r git branch -D
|
||||
git worktree prune -v
|
||||
|
||||
# Nettoyer les branches orphelines (worktree supprimé mais branche subsiste)
|
||||
git branch | grep worktree-agent- | xargs -r git branch -D
|
||||
```
|
||||
|
||||
Le dossier `.claude/` est gitignoré — il ne sera jamais committé.
|
||||
|
||||
## Build du package Windows
|
||||
|
||||
Le package de déploiement pour le PC Windows des utilisateurs est généré par
|
||||
`deploy/build_package.sh`. Il embarque `agent_v0/agent_v1/` directement (pas
|
||||
de staging intermédiaire).
|
||||
|
||||
```bash
|
||||
./deploy/build_package.sh # Build standard
|
||||
./deploy/build_package.sh --clean # Nettoyer avant de builder
|
||||
```
|
||||
|
||||
Le script vérifie la présence de tous les fichiers Python requis via la liste
|
||||
`REQUIRED_FILES`. Si vous ajoutez un nouveau module Python critique côté agent
|
||||
(ex: dans `agent_v1/core/` ou `agent_v1/network/`), **ajoutez-le à
|
||||
`REQUIRED_FILES`** pour qu'un fichier manquant fasse échouer le build plutôt
|
||||
que de produire un zip incomplet.
|
||||
|
||||
### Note historique : `agent_v0/deploy/windows_client/`
|
||||
|
||||
Ce dossier a été créé par `agent_v0/deploy_windows.py` comme staging de build
|
||||
et s'est désynchronisé. Il a été supprimé en avril 2026 — le build officiel
|
||||
passe désormais par `deploy/build_package.sh` qui lit directement
|
||||
`agent_v0/agent_v1/`.
|
||||
220
docs/PLAN_APPRENTISSAGE_LEA.md
Normal file
220
docs/PLAN_APPRENTISSAGE_LEA.md
Normal file
@@ -0,0 +1,220 @@
|
||||
# Plan Apprentissage Léa — Phase 1 / 2 / 3
|
||||
|
||||
**Date** : 10 avril 2026
|
||||
**Auteur** : Dom + Claude (session cartographie target_resolver)
|
||||
**Statut** : Plan validé par Dom, implémentation non commencée
|
||||
|
||||
---
|
||||
|
||||
## Contexte
|
||||
|
||||
Après deux semaines à debugger le replay sur Windows et avoir écrit du code (V4 : surface_classifier, UIA, execution_plan, executor strict) qui **dupliquait sans le savoir** des concepts déjà présents dans le V3 legacy, une cartographie exhaustive a été lancée.
|
||||
|
||||
Fichiers lus en profondeur :
|
||||
- `core/execution/target_resolver.py` (3495 lignes)
|
||||
- `core/learning/target_memory_store.py` (545 lignes — Fiche #18)
|
||||
- `core/models/workflow_graph.py` (TargetSpec — 570-640)
|
||||
- `core/detection/spatial_analyzer.py` (595 lignes)
|
||||
|
||||
## Découverte critique
|
||||
|
||||
**Les pipelines V3 et V4 sont complètement découplés au runtime de replay.**
|
||||
|
||||
```
|
||||
REPLAY V4 (actif aujourd'hui) LEGACY V3 (dormant au replay)
|
||||
============================= =============================
|
||||
stream_processor workflow_pipeline
|
||||
↓ ↓
|
||||
execution_plan_runner execution_loop
|
||||
↓ ↓
|
||||
agent_v1/core/executor.py action_executor
|
||||
↓ ↓
|
||||
OCR + template + VLM direct target_resolver
|
||||
↓
|
||||
target_memory_store (Fiche #18)
|
||||
↓
|
||||
SpatialAnalyzer
|
||||
```
|
||||
|
||||
Vérifié par `grep "from core.execution" agent_v0/` → **zéro import**.
|
||||
|
||||
Callers V3 encore vivants (mais pas sur le chemin de replay critique) :
|
||||
- `agent_chat/app.py`
|
||||
- `visual_workflow_builder/backend/api/workflows.py`
|
||||
- `core/evaluation/*`
|
||||
|
||||
## Modules dormants à valeur immédiate
|
||||
|
||||
### TargetMemoryStore — le Crystallizer qu'on pensait devoir écrire
|
||||
|
||||
- SQLite `data/learning/target_memory.db` + JSONL audit `data/learning/events/YYYY-MM-DD/*.jsonl`
|
||||
- API propre et testée :
|
||||
- `record_success(screen_sig, target_spec, fingerprint, strategy, confidence)`
|
||||
- `record_failure(screen_sig, target_spec, error)`
|
||||
- `lookup(screen_sig, target_spec, min_success_count=2, max_fail_ratio=0.3)` → fingerprint ou None
|
||||
- Clé unique : `(screen_signature, target_spec_hash)`
|
||||
- Fingerprint : `(element_id, bbox, role, etype, label, confidence)`
|
||||
- **Critère de fiabilité** : au moins 2 succès et < 30% d'échecs → c'est ça la "cristallisation par répétition"
|
||||
|
||||
### TargetSpec — vocabulaire déjà riche
|
||||
|
||||
Dans `core/models/workflow_graph.py:572` :
|
||||
- `context_hints` : `near_text`, `below_text`, `right_of_text`, `same_row_as_text`, `within_region`, `exclude_near_text`
|
||||
- `hard_constraints` : `within_container_text`, `min_area`
|
||||
- `weights` : `proximity`, `alignment`, `container`, `roi_iou`
|
||||
|
||||
### ResolutionStrategy V4 — vocabulaire pauvre (à enrichir)
|
||||
|
||||
Dans `core/workflow/execution_plan.py:27` :
|
||||
- `target_text`, `anchor_b64`, `zone`, `vlm_description`, `uia_*`, `dom_*`
|
||||
- Pas de context_hints, pas de hard_constraints → trou dans l'expressivité
|
||||
|
||||
## Décision validée
|
||||
|
||||
**Léa = stagiaire qui apprend de la répétition.** La mémoire précède la généralisation. Mais le raisonnement spatial reste indispensable comme filet de sécurité quand la mémoire ne suffit pas (décalages de layout, premier replay sur nouvel écran, généralisation entre écrans similaires).
|
||||
|
||||
## Plan séquencé
|
||||
|
||||
### Phase 1 — Mémoire sur V4 (≈1 jour, ~150 lignes)
|
||||
|
||||
**Objectif** : greffer `TargetMemoryStore` directement sur le resolve V4, sans passer par target_resolver ni UIElement.
|
||||
|
||||
**Lookup avant OCR/template/VLM**
|
||||
```python
|
||||
fp = memory.lookup(screen_sig, target_spec)
|
||||
if fp:
|
||||
# On a vu ce clic réussir ≥2 fois sur cet écran
|
||||
return fp.bbox # clic direct, <10ms
|
||||
```
|
||||
|
||||
**Record après validation post-condition (déjà en place — `title_match` strict)**
|
||||
```python
|
||||
if post_condition_passed:
|
||||
memory.record_success(screen_sig, target_spec, fingerprint, "v4_ocr", confidence)
|
||||
else:
|
||||
memory.record_failure(screen_sig, target_spec, reason)
|
||||
```
|
||||
|
||||
**À construire**
|
||||
- `screen_signature(screenshot)` → hash stable. Piste : `window_title` + tokens OCR dominants, ou réutiliser `core/execution/screen_signature.py` si compatible.
|
||||
- Fingerprint léger : `(x, y, w, h, method)`. Pas besoin de role/type/label en V4.
|
||||
- Point de branchement exact à confirmer avant implémentation :
|
||||
- Côté serveur dans `resolve_engine` (si resolve serveur)
|
||||
- Côté agent dans `agent_v1/core/executor.py` (si resolve local)
|
||||
|
||||
**Bénéfice observable**
|
||||
- 3ème passage d'un workflow sur même écran : 10-15s VLM remplacés par <10ms lookup
|
||||
- Léa **apprend** vraiment — pas parce qu'on a écrit un Crystallizer, parce qu'on a consommé celui qui dort depuis mars
|
||||
|
||||
**Tests de validation**
|
||||
- [ ] Rejouer un workflow 3 fois, mesurer le temps du 3ème passage
|
||||
- [ ] Vérifier que `data/learning/target_memory.db` se remplit
|
||||
- [ ] Vérifier que les événements JSONL s'écrivent
|
||||
|
||||
### Phase 2 light — Raisonnement spatial OCR-only (≈3-5 jours, ~300-400 lignes)
|
||||
|
||||
**Principe clé** : pur pixel/OCR. Pas d'`UIElement`, pas de role/type, pas de parser UI. On évite le piège "ressusciter V3 complet".
|
||||
|
||||
**À l'enregistrement (IRBuilder, côté serveur)**
|
||||
1. Pour chaque clic `(x, y)` dans la trace
|
||||
2. OCR la zone autour (±300px)
|
||||
3. Identifier les 3-5 textes les plus proches avec direction (left/right/above/below) et distance
|
||||
4. Populer `ResolutionStrategy.context_hints` :
|
||||
```python
|
||||
{
|
||||
"right_of_text": "Nom du patient", # 60px à gauche du clic
|
||||
"below_text": "Identité", # 120px au-dessus
|
||||
"near_text": "Enregistrer", # le texte du clic lui-même
|
||||
}
|
||||
```
|
||||
|
||||
**Au replay (resolve_engine)**, en cascade :
|
||||
1. Lookup mémoire (Phase 1) → si hit, clic direct
|
||||
2. Sinon : OCR de l'écran actuel
|
||||
3. Trouver les ancres de `context_hints` via OCR (normalisation accents + fuzzy Fiche #8)
|
||||
4. Calculer la zone candidate par intersection des contraintes spatiales
|
||||
5. Cliquer
|
||||
6. Si post-cond échoue : retombée VLM (exception handler)
|
||||
|
||||
**Logique à porter depuis target_resolver.py**
|
||||
- `_apply_context_hints_to_candidates` (lignes 2601-2803) — adaptée à "candidats = zones OCR" au lieu de "candidats = UIElement"
|
||||
- `_find_element_by_text` + normalisation (`_norm_text`, `_fuzzy_ratio`) lignes 211-235
|
||||
- Healing profile (ligne 395) pour relaxation progressive
|
||||
|
||||
**Décision tranchée**
|
||||
- OCR **côté serveur Linux** (docTR déjà présent via SomEngine)
|
||||
- Zéro changement sur le client Windows
|
||||
- Le serveur reçoit le screenshot au moment du build IR, extrait les context_hints, les intègre dans `ResolutionStrategy`
|
||||
|
||||
**Enrichissement de `ResolutionStrategy` (execution_plan.py)**
|
||||
Ajouter au dataclass :
|
||||
```python
|
||||
context_hints: Dict[str, Any] = field(default_factory=dict)
|
||||
```
|
||||
|
||||
Et dans `execution_plan_runner._strategy_to_target_spec` : propager `context_hints` dans `target_spec`.
|
||||
|
||||
**Tests de validation**
|
||||
- [ ] Enregistrer un workflow, vérifier que le plan contient des `context_hints` cohérents
|
||||
- [ ] Modifier la résolution de la VM (1920→1280), rejouer, vérifier que les clics atteignent la bonne cible
|
||||
- [ ] Ajouter un champ au-dessus de la cible, rejouer, vérifier robustesse
|
||||
|
||||
### Phase 3 — Spatial V3 complet (pas maintenant)
|
||||
|
||||
**Correction 10 avril 2026** : une version précédente de ce document affirmait qu'OmniParser avait été retiré. **C'était faux.** OmniParser est toujours présent :
|
||||
- `core/detection/omniparser_adapter.py` — 429 lignes
|
||||
- `agent_v0/server_v1/resolve_engine.py:254` — `_get_omniparser()` singleton thread-safe, lazy-load
|
||||
- `agent_v0/server_v1/resolve_engine.py:293` — `_resolve_by_yolo()` défini et importé dans `api_stream.py`
|
||||
|
||||
Ce qui est vrai : `_resolve_by_yolo` **n'est jamais appelé** dans la cascade V4 (`_resolve_target_sync` ne l'invoque pas). C'est du code **dormant**, pas supprimé.
|
||||
|
||||
**Conséquence pour Phase 3** : on a potentiellement **déjà** un parser UI utilisable. Deux pistes :
|
||||
1. **Ré-activer `_resolve_by_yolo`** dans la cascade V4 (injecter un appel dans `_resolve_target_sync` comme fallback après OCR/template/VLM). Il produit déjà une liste d'éléments détectés avec bbox et role approximatif.
|
||||
2. **Pont `_resolve_by_yolo → List[UIElement]`** : adapter la sortie YOLO pour alimenter `target_resolver` V3. Un pont d'une centaine de lignes devrait suffire.
|
||||
|
||||
**Avant de lancer Phase 3**, vérifier :
|
||||
- Les modèles YOLO sont-ils toujours sur disque ? (`omniparser.detect()` lazy-loads)
|
||||
- Quelle qualité de détection sur des écrans Citrix/DPI réels ?
|
||||
- Les tests `tests/integration/test_auto_healing_integration.py` et `tests/unit/test_fiche11_*` passent-ils encore ?
|
||||
|
||||
**Tant qu'on n'a pas fait cette vérification, Phase 3 reste pending.**
|
||||
|
||||
## Ce qu'on ne fait PAS
|
||||
|
||||
| Tentation | Pourquoi on résiste |
|
||||
|-----------|---------------------|
|
||||
| Refactorer `target_resolver.py` pour le rendre V4-compatible | 3495 lignes couplées à `UIElement` disparu — plus économique de le laisser dormir et recoder l'essentiel minimal dans V4 |
|
||||
| Brancher `action_executor` sur le streaming replay | 2000 lignes de pipeline pour un bénéfice qu'on a en 150 lignes avec TargetMemoryStore seul |
|
||||
| Ressusciter `SpatialAnalyzer` maintenant | Zéro valeur sans `UIElement` riches en amont |
|
||||
| Faire Phase 2 avant Phase 1 | Léa raisonnerait à chaque clic, lent et coûteux — pas un "stagiaire qui apprend", juste un agent qui réfléchit en boucle |
|
||||
|
||||
## Suivi d'avancement
|
||||
|
||||
### Phase 1 — Mémoire sur V4
|
||||
- [ ] Identifier le point de branchement exact (serveur vs agent)
|
||||
- [ ] Définir `screen_signature` stable pour V4
|
||||
- [ ] Définir le format fingerprint léger
|
||||
- [ ] Brancher `memory.lookup()` avant cascade OCR/template/VLM
|
||||
- [ ] Brancher `memory.record_success()` après post-cond validée
|
||||
- [ ] Brancher `memory.record_failure()` sur échec
|
||||
- [ ] Test : workflow rejoué 3 fois, 3ème en <100ms sur le resolve
|
||||
- [ ] Vérifier remplissage de `data/learning/target_memory.db`
|
||||
|
||||
### Phase 2 light — Spatial OCR-only
|
||||
- [ ] Enrichir `ResolutionStrategy` avec `context_hints`
|
||||
- [ ] IRBuilder : extraire context_hints via OCR au build
|
||||
- [ ] `execution_plan_runner` : propager context_hints dans target_spec
|
||||
- [ ] resolve_engine : implémenter fallback spatial OCR
|
||||
- [ ] Porter `_apply_context_hints_to_candidates` adapté
|
||||
- [ ] Porter normalisation texte (`_norm_text`, `_fuzzy_ratio`)
|
||||
- [ ] Test : résolution VM modifiée, clic atteint toujours la cible
|
||||
- [ ] Test : champ ajouté dans le formulaire, robustesse préservée
|
||||
|
||||
### Phase 3 — Spatial V3 complet
|
||||
- [ ] **BLOQUÉ** jusqu'à ce qu'un parser UI produise des `UIElement`
|
||||
|
||||
## Liens
|
||||
|
||||
- Code de référence : `core/execution/target_resolver.py`, `core/learning/target_memory_store.py`
|
||||
- Architecture V4 : `core/workflow/execution_plan.py`, `core/workflow/execution_compiler.py`, `agent_v0/server_v1/execution_plan_runner.py`
|
||||
- Replay runtime : `agent_v0/agent_v1/core/executor.py`
|
||||
112
docs/STATUS.md
Normal file
112
docs/STATUS.md
Normal file
@@ -0,0 +1,112 @@
|
||||
# STATUS — État réel du projet RPA Vision V3
|
||||
|
||||
> Dernière mise à jour : 14 avril 2026
|
||||
>
|
||||
> Ce document remplace les affirmations marketing du README historique.
|
||||
> Il décrit l'état réel des modules, sans embellissement.
|
||||
|
||||
## Positionnement
|
||||
|
||||
**POC avancé** — certaines briques sont fonctionnelles de bout en bout
|
||||
(capture, streaming, premier replay E2E sur Notepad), d'autres sont en cours
|
||||
de stabilisation ou à l'état d'ébauche. Le projet n'est pas « production-ready ».
|
||||
|
||||
Les fonctionnalités ci-dessous sont documentées sans minimiser les limites.
|
||||
|
||||
## Légende
|
||||
|
||||
- **opérationnel** : testé, utilisé régulièrement, pas de régression récente connue
|
||||
- **alpha** : branché et fonctionnel sur un cas d'usage de référence, manque
|
||||
de recul sur la généralisation
|
||||
- **en cours** : en développement actif, comportement instable
|
||||
- **non démarré** : planifié, pas encore de code significatif
|
||||
|
||||
## Vue d'ensemble par module
|
||||
|
||||
| Module / fonctionnalité | État | Commentaire |
|
||||
|---|---|---|
|
||||
| Capture d'écran + événements (Agent V1 Windows) | opérationnel | `agent_v0/agent_v1/` — systray, streaming vers serveur |
|
||||
| Streaming server (`agent_v0/server_v1/`) | opérationnel | FastAPI port 5005, sessions en mémoire |
|
||||
| Stockage sessions (`RawSession`) | opérationnel | JSON + screenshots, rotation manuelle |
|
||||
| Détection UI (`core/detection/`) | alpha | Cascade VLM + OCR + templates, sensible au modèle choisi |
|
||||
| Embedding & FAISS (`core/embedding/`) | alpha | CLIP ViT-B/32 + index Flat, pas testé à grande échelle |
|
||||
| Workflow Graph (`core/graph/`) | alpha | Construction depuis sessions, matching heuristique |
|
||||
| Replay E2E (`agent_v0/server_v1/api_stream.py`) | alpha | Premier succès le 13 avril 2026 sur Notepad, asymétries strict/legacy connues |
|
||||
| Mode apprentissage supervisé | alpha | Pause sur échec répété, demande d'intervention humaine |
|
||||
| TargetMemoryStore (Phase 1 apprentissage) | alpha | Schéma SQLite en place, DB vide jusqu'au premier replay complet |
|
||||
| Grounding visuel (UI-TARS, gemma4, qwen3-vl) | alpha | Switch de modèle via `.env` (`RPA_VLM_MODEL`) |
|
||||
| SomEngine (YOLO + docTR + VLM) | alpha | Intégré, dormant dans la cascade par défaut |
|
||||
| Web Dashboard (port 5001) | alpha | Flask + SocketIO, fonctionnel mais non durci |
|
||||
| Visual Workflow Builder (VWB, ports 5002 + 3002) | en cours | Catalogue d'actions, UI React. Bugs DB runtime connus |
|
||||
| Agent Chat (port 5004) | alpha | Planner autonome, basé LLM local |
|
||||
| Module auth (`core/auth/`) | alpha | Vault Fernet + TOTP, CLI seul, pas d'intégration UI |
|
||||
| Federation (`core/federation/`) | alpha | Export/import de LearningPacks, pas de test terrain |
|
||||
| GPU Resource Manager (`core/gpu/`) | alpha | Gestion Ollama + warmup modèles, code utilisé mais peu testé |
|
||||
| Self-healing / recovery | en cours | Heuristiques présentes, comportement global non stabilisé |
|
||||
| Analytics / reporting | en cours | Prototype, pas de frontend finalisé |
|
||||
| Tests end-to-end | en cours | 1 replay E2E réussi, 56 tests d'intégration verts hors cas connus |
|
||||
| Deploy Windows (`deploy/build_package.sh`) | opérationnel | Produit `Lea_v<version>.zip`, vérification des fichiers requis |
|
||||
| Conformité AI Act (journalisation, floutage, rétention logs) | alpha | Mécanismes en place, audit formel non fait |
|
||||
|
||||
## Limites connues (non exploitables comme failles)
|
||||
|
||||
- Plusieurs copies parallèles du code agent ont existé (source, staging
|
||||
Windows, worktrees) avec risque de divergence. Le staging Windows obsolète
|
||||
a été supprimé ; le build officiel passe par `deploy/build_package.sh`.
|
||||
- La base `data/learning/target_memory.db` reste vide tant qu'un replay
|
||||
complet n'a pas été cristallisé — l'apprentissage est câblé mais pas
|
||||
encore éprouvé.
|
||||
- Certaines asymétries entre chemins « strict » et « legacy » dans
|
||||
`api_stream.py` peuvent faire retomber une erreur en mode strict vers
|
||||
le retry+stop legacy au lieu de la pause d'apprentissage.
|
||||
- Le worker de compilation sessions → `ExecutionPlan` (port 5099) n'est pas
|
||||
lancé par défaut — les sessions enregistrées ne sont pas compilées
|
||||
automatiquement.
|
||||
- Le VWB présente des bugs en écriture DB identifiés et documentés.
|
||||
- La détection VLM est sensible au choix de modèle ; le défaut est
|
||||
`gemma4:latest` (cf. `.env.example`).
|
||||
|
||||
## Modèles utilisés
|
||||
|
||||
Définis dans `.env` (voir `.env.example`) :
|
||||
|
||||
| Variable | Valeur par défaut | Rôle |
|
||||
|---|---|---|
|
||||
| `RPA_VLM_MODEL` | `gemma4:latest` | Modèle VLM principal (Ollama) |
|
||||
| `VLM_MODEL` | `gemma4:latest` | Alias de compatibilité |
|
||||
| `CLIP_MODEL` | `ViT-B-32` | Embeddings visuels |
|
||||
| `CLIP_PRETRAINED` | `openai` | Poids pré-entraînés |
|
||||
| `VLM_ENDPOINT` | `http://localhost:11434` | Ollama local |
|
||||
|
||||
Modèles alternatifs testés : `qwen3-vl:8b`, `ui-tars` (grounding direct).
|
||||
Aucun appel cloud par défaut — tout passe par Ollama local.
|
||||
|
||||
## Infrastructure
|
||||
|
||||
- **OS cible serveur** : Linux (Ubuntu 24.04 testé)
|
||||
- **GPU recommandé** : NVIDIA (ex. RTX 5070) pour l'inférence VLM locale
|
||||
- **OS cible client** : Windows 10/11 (Agent V1)
|
||||
- **Python** : 3.10 à 3.12
|
||||
- **Ollama** : service local obligatoire
|
||||
|
||||
## Ports utilisés (source : `services.conf`)
|
||||
|
||||
| Port | Service |
|
||||
|---|---|
|
||||
| 8000 | API Server (core upload) |
|
||||
| 5001 | Web Dashboard |
|
||||
| 5002 | VWB Backend (Flask) |
|
||||
| 5003 | Monitoring |
|
||||
| 5004 | Agent Chat |
|
||||
| 5005 | Streaming Server (Agent V1) |
|
||||
| 5006 | Session Cleaner |
|
||||
| 5099 | Worker de compilation (optionnel) |
|
||||
| 3002 | VWB Frontend (Vite/React) |
|
||||
|
||||
## Prochaines étapes prioritaires
|
||||
|
||||
1. Stabiliser le replay E2E sur 3 applications métier différentes
|
||||
2. Alimenter `TargetMemoryStore` via des replays réussis réels
|
||||
3. Harmoniser les branches `strict` / `legacy` dans `api_stream.py`
|
||||
4. Durcir VWB ou pivoter vers un outil dédié plus simple
|
||||
5. Activer le worker de compilation sessions → ExecutionPlan
|
||||
109
requirements-ci.txt
Normal file
109
requirements-ci.txt
Normal file
@@ -0,0 +1,109 @@
|
||||
# ------------------------------------------------------------------
|
||||
# requirements-ci.txt — Dépendances pour la CI (tests unitaires)
|
||||
# ------------------------------------------------------------------
|
||||
# Objectif : installer le minimum pour que `pytest tests/unit/`
|
||||
# passe sans GPU, sans Ollama, sans torch, sans FAISS GPU.
|
||||
#
|
||||
# Les tests lourds (torch, transformers, CLIP, FAISS GPU, doctr,
|
||||
# Ollama) sont marqués `slow`, `gpu` ou `integration` et exclus
|
||||
# via `-m "not slow and not gpu and not integration"`.
|
||||
#
|
||||
# Versions alignées sur requirements.txt pour éviter les surprises
|
||||
# lors du runtime local, mais allégées (CPU-only, headless).
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
# --- Runtime core ---
|
||||
pydantic==2.12.5
|
||||
pydantic_core==2.41.5
|
||||
python-dotenv==1.0.0
|
||||
PyYAML==6.0.1
|
||||
click==8.3.1
|
||||
typing_extensions==4.15.0
|
||||
annotated-types==0.7.0
|
||||
|
||||
# --- Web frameworks (utilisés par les tests API/dashboard) ---
|
||||
fastapi==0.128.0
|
||||
starlette==0.50.0
|
||||
uvicorn==0.40.0
|
||||
Flask==3.0.0
|
||||
Flask-Caching==2.1.0
|
||||
Flask-Cors==4.0.0
|
||||
Flask-SQLAlchemy==3.1.1
|
||||
Werkzeug==3.1.5
|
||||
Jinja2==3.1.6
|
||||
itsdangerous==2.2.0
|
||||
blinker==1.9.0
|
||||
|
||||
# --- DB (tests auth/audit/extraction) ---
|
||||
SQLAlchemy==2.0.23
|
||||
alembic==1.18.4
|
||||
|
||||
# --- HTTP clients ---
|
||||
httpx==0.28.1
|
||||
requests==2.32.5
|
||||
urllib3==2.6.3
|
||||
certifi==2026.1.4
|
||||
idna==3.11
|
||||
charset-normalizer==3.4.4
|
||||
h11==0.16.0
|
||||
httpcore==1.0.9
|
||||
anyio==4.12.1
|
||||
sniffio==1.3.1; python_version >= "3.7"
|
||||
|
||||
# --- Sécurité (test_security_*, auth vault, TOTP) ---
|
||||
cryptography==46.0.3
|
||||
cffi==2.0.0
|
||||
pycparser==2.23
|
||||
|
||||
# --- Images (opencv-python-headless au lieu de opencv-python pour CI) ---
|
||||
pillow==12.1.0
|
||||
opencv-python-headless==4.12.0.88
|
||||
numpy==2.2.6
|
||||
|
||||
# --- Pytest et plugins ---
|
||||
pytest==9.0.2
|
||||
pytest-asyncio==1.3.0
|
||||
pytest-cov==4.1.0
|
||||
pytest-flask==1.3.0
|
||||
pytest-mock==3.12.0
|
||||
iniconfig==2.3.0
|
||||
pluggy==1.6.0
|
||||
packaging==25.0
|
||||
|
||||
# --- Couverture ---
|
||||
coverage==7.13.1
|
||||
|
||||
# --- Utilitaires divers (imports indirects fréquents) ---
|
||||
python-dateutil==2.8.2
|
||||
six==1.17.0
|
||||
attrs==25.4.0
|
||||
jsonschema==4.20.0
|
||||
jsonschema-specifications==2025.9.1
|
||||
referencing==0.37.0
|
||||
rpds-py==0.30.0
|
||||
RapidFuzz==3.14.3
|
||||
regex==2025.11.3
|
||||
python-multipart==0.0.21
|
||||
validators==0.35.0
|
||||
prometheus_client==0.23.1
|
||||
psutil==7.2.1
|
||||
filelock==3.20.3
|
||||
tqdm==4.67.1
|
||||
|
||||
# --- Hypothesis (property tests, si inclus plus tard) ---
|
||||
hypothesis==6.92.1
|
||||
sortedcontainers==2.4.0
|
||||
|
||||
# --- NOTES ---
|
||||
# Volontairement absents :
|
||||
# - torch / torchvision / triton / nvidia-* → GPU, hors CI
|
||||
# - transformers / accelerate / tokenizers → chargent torch
|
||||
# - open_clip_torch / timm → idem
|
||||
# - faiss-cpu → binaire lourd (~90 Mo),
|
||||
# utilisé uniquement en
|
||||
# tests `slow` / `integration`
|
||||
# - ollama → nécessite serveur Ollama
|
||||
# - python-doctr / pypdfium2 → OCR, tests `slow`
|
||||
# - pynput / pyautogui / mss / PyQt5 → GUI / simulation I/O
|
||||
# - python-socketio / Flask-SocketIO → WS, tests intégration
|
||||
# - eds-nlp / spacy → modèles NLP hors CI
|
||||
@@ -9,6 +9,7 @@
|
||||
# 5003 - Monitoring (métriques système)
|
||||
# 5004 - Agent Chat (interface conversationnelle)
|
||||
# 5005 - Streaming Server (Agent V1 → core pipeline)
|
||||
# 5006 - Session Cleaner (nettoyage sessions avant replay)
|
||||
# 3002 - VWB Frontend (Vite/React)
|
||||
#
|
||||
|
||||
@@ -20,3 +21,4 @@ agent-chat|5004|agent_chat/app.py|optional
|
||||
streaming|5005|agent_v0/server_v1/api_stream.py|optional
|
||||
worker|5099|agent_v0/server_v1/run_worker.py|optional
|
||||
vwb-frontend|3002|cd visual_workflow_builder/frontend_v4 && npm run dev|required
|
||||
session-cleaner|5006|tools/session_cleaner.py|optional
|
||||
|
||||
24
svc.sh
24
svc.sh
@@ -56,6 +56,7 @@ declare -A PORTS=(
|
||||
[streaming]=5005
|
||||
[worker]=5099
|
||||
[vwb-frontend]=3002
|
||||
[session-cleaner]=5006
|
||||
)
|
||||
|
||||
# Mapping nom court -> nom service systemd
|
||||
@@ -66,13 +67,14 @@ declare -A SYSTEMD_UNITS=(
|
||||
[streaming]="rpa-streaming.service"
|
||||
[worker]="rpa-worker.service"
|
||||
[vwb-frontend]="rpa-vwb-frontend.service"
|
||||
[session-cleaner]="rpa-session-cleaner.service"
|
||||
)
|
||||
|
||||
# Services gérés par systemd (ceux qui ont un .service)
|
||||
SYSTEMD_SERVICES="streaming worker agent-chat dashboard vwb-backend vwb-frontend"
|
||||
SYSTEMD_SERVICES="streaming worker agent-chat dashboard vwb-backend vwb-frontend session-cleaner"
|
||||
|
||||
# Tous les services connus
|
||||
ALL_SERVICES="api dashboard vwb-backend monitoring agent-chat streaming worker vwb-frontend"
|
||||
ALL_SERVICES="api dashboard vwb-backend monitoring agent-chat streaming worker vwb-frontend session-cleaner"
|
||||
|
||||
declare -A COMMANDS=(
|
||||
[api]="$VENV_DIR/bin/python3 server/api_upload.py"
|
||||
@@ -83,14 +85,15 @@ declare -A COMMANDS=(
|
||||
[streaming]="$VENV_DIR/bin/python3 -m agent_v0.server_v1.api_stream"
|
||||
[worker]="$VENV_DIR/bin/python3 -m agent_v0.server_v1.run_worker"
|
||||
[vwb-frontend]="cd $SCRIPT_DIR/visual_workflow_builder/frontend_v4 && npm run dev"
|
||||
[session-cleaner]="$VENV_DIR/bin/python3 tools/session_cleaner.py"
|
||||
)
|
||||
|
||||
# Groupes de services
|
||||
declare -A SVC_GROUPS=(
|
||||
[vwb]="vwb-backend vwb-frontend"
|
||||
[all]="api dashboard vwb-backend vwb-frontend"
|
||||
[full]="api dashboard vwb-backend vwb-frontend monitoring agent-chat streaming worker"
|
||||
[boot]="streaming worker agent-chat dashboard vwb-backend vwb-frontend"
|
||||
[full]="api dashboard vwb-backend vwb-frontend monitoring agent-chat streaming worker session-cleaner"
|
||||
[boot]="streaming worker agent-chat dashboard vwb-backend vwb-frontend session-cleaner"
|
||||
)
|
||||
|
||||
# =============================================================================
|
||||
@@ -353,7 +356,7 @@ do_install() {
|
||||
|
||||
# Vérifier que les fichiers existent
|
||||
local missing=false
|
||||
for unit in rpa-streaming.service rpa-worker.service rpa-agent-chat.service rpa-dashboard.service rpa-vwb-backend.service rpa-vwb-frontend.service rpa-vision.target; do
|
||||
for unit in rpa-streaming.service rpa-worker.service rpa-agent-chat.service rpa-dashboard.service rpa-vwb-backend.service rpa-vwb-frontend.service rpa-session-cleaner.service rpa-vision.target; do
|
||||
if [ -f "$SYSTEMD_DIR/$unit" ]; then
|
||||
echo -e " ${GREEN}OK${NC} $unit"
|
||||
else
|
||||
@@ -397,7 +400,7 @@ do_enable() {
|
||||
echo -e "${CYAN}${BOLD}Activation du demarrage automatique au boot...${NC}"
|
||||
systemctl --user daemon-reload
|
||||
systemctl --user enable rpa-vision.target
|
||||
for unit in rpa-streaming.service rpa-worker.service rpa-agent-chat.service rpa-dashboard.service rpa-vwb-backend.service rpa-vwb-frontend.service; do
|
||||
for unit in rpa-streaming.service rpa-worker.service rpa-agent-chat.service rpa-dashboard.service rpa-vwb-backend.service rpa-vwb-frontend.service rpa-session-cleaner.service; do
|
||||
systemctl --user enable "$unit" 2>/dev/null
|
||||
echo -e " ${GREEN}OK${NC} $unit"
|
||||
done
|
||||
@@ -408,7 +411,7 @@ do_enable() {
|
||||
do_disable() {
|
||||
echo -e "${YELLOW}${BOLD}Desactivation du demarrage automatique...${NC}"
|
||||
systemctl --user disable rpa-vision.target 2>/dev/null || true
|
||||
for unit in rpa-streaming.service rpa-worker.service rpa-agent-chat.service rpa-dashboard.service rpa-vwb-backend.service rpa-vwb-frontend.service; do
|
||||
for unit in rpa-streaming.service rpa-worker.service rpa-agent-chat.service rpa-dashboard.service rpa-vwb-backend.service rpa-vwb-frontend.service rpa-session-cleaner.service; do
|
||||
systemctl --user disable "$unit" 2>/dev/null || true
|
||||
echo -e " ${GREEN}OK${NC} $unit"
|
||||
done
|
||||
@@ -438,11 +441,12 @@ show_help() {
|
||||
echo " dashboard Web Dashboard (port 5001)"
|
||||
echo " vwb-backend VWB Backend Flask (port 5002)"
|
||||
echo " vwb-frontend VWB Frontend Vite (port 3002)"
|
||||
echo " session-cleaner Session Cleaner (port 5006)"
|
||||
echo " api API Server (port 8000) [legacy uniquement]"
|
||||
echo " monitoring Monitoring (port 5003) [legacy uniquement]"
|
||||
echo ""
|
||||
echo -e "${BOLD}Groupes:${NC}"
|
||||
echo " boot Services systemd (streaming, worker, chat, dashboard, vwb)"
|
||||
echo " boot Services systemd (streaming, worker, chat, dashboard, vwb, session-cleaner)"
|
||||
echo " vwb VWB backend + frontend"
|
||||
echo " all Core (api, dashboard, vwb)"
|
||||
echo " full Tous les services"
|
||||
@@ -451,8 +455,8 @@ show_help() {
|
||||
echo " --legacy Forcer le mode legacy (PID files au lieu de systemd)"
|
||||
echo ""
|
||||
echo -e "${BOLD}Exemples:${NC}"
|
||||
echo " $0 start boot # Demarrer les 5 services systemd"
|
||||
echo " $0 stop boot # Arreter les 5 services systemd"
|
||||
echo " $0 start boot # Demarrer les services systemd"
|
||||
echo " $0 stop boot # Arreter les services systemd"
|
||||
echo " $0 restart streaming # Redemarrer le streaming server"
|
||||
echo " $0 logs streaming -f # Suivre les logs du streaming"
|
||||
echo " $0 status # Voir l'etat de tout"
|
||||
|
||||
333
tests/integration/test_agents_enroll_api.py
Normal file
333
tests/integration/test_agents_enroll_api.py
Normal file
@@ -0,0 +1,333 @@
|
||||
"""
|
||||
Tests d'integration pour les endpoints /api/v1/agents/* (fleet management).
|
||||
|
||||
Couvre :
|
||||
- POST /api/v1/agents/enroll (201, 409 duplicate, 401 sans token,
|
||||
reenrollement apres uninstall)
|
||||
- POST /api/v1/agents/uninstall (200, 404 inconnu)
|
||||
- GET /api/v1/agents/fleet (listing actif / desinstalle)
|
||||
|
||||
Le module `agent_v0.server_v1.api_stream` applique un fail-closed a
|
||||
l'import si RPA_API_TOKEN est absent : la fixture `_ensure_api_token`
|
||||
garantit que l'env est defini AVANT tout import.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
# Racine du projet pour les imports locaux (meme pattern que les autres
|
||||
# tests d'integration)
|
||||
_ROOT = str(Path(__file__).resolve().parents[2])
|
||||
if _ROOT not in sys.path:
|
||||
sys.path.insert(0, _ROOT)
|
||||
|
||||
|
||||
_TEST_API_TOKEN = "test_token_fleet_endpoints_0123456789abcdef"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def agents_client(monkeypatch, tmp_path):
|
||||
"""Client FastAPI de test avec un AgentRegistry isole sur disque.
|
||||
|
||||
Remplace le `agent_registry` global par une instance pointant sur une
|
||||
DB temporaire, pour ne pas polluer la vraie rpa_data.db du workspace.
|
||||
"""
|
||||
# Garantir que le module peut s'importer (RPA_API_TOKEN sinon sys.exit 1)
|
||||
monkeypatch.setenv("RPA_API_TOKEN", _TEST_API_TOKEN)
|
||||
monkeypatch.setenv(
|
||||
"RPA_AGENTS_DB_PATH", str(tmp_path / "test_agents.db")
|
||||
)
|
||||
|
||||
# Import tardif apres config de l'env
|
||||
from fastapi.testclient import TestClient
|
||||
from agent_v0.server_v1 import api_stream
|
||||
from agent_v0.server_v1.agent_registry import AgentRegistry
|
||||
|
||||
# Aligner le token attendu par le middleware Bearer avec notre token de test
|
||||
monkeypatch.setattr(api_stream, "API_TOKEN", _TEST_API_TOKEN)
|
||||
|
||||
# Substituer le registre global par une instance dediee au test
|
||||
original_registry = api_stream.agent_registry
|
||||
test_registry = AgentRegistry(db_path=str(tmp_path / "test_agents.db"))
|
||||
monkeypatch.setattr(api_stream, "agent_registry", test_registry)
|
||||
|
||||
client = TestClient(api_stream.app, raise_server_exceptions=False)
|
||||
yield client, _TEST_API_TOKEN, test_registry
|
||||
|
||||
# Restauration
|
||||
monkeypatch.setattr(api_stream, "agent_registry", original_registry)
|
||||
|
||||
|
||||
def _auth_headers(token: str) -> dict:
|
||||
return {"Authorization": f"Bearer {token}"}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# POST /api/v1/agents/enroll
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_enroll_new_agent_returns_201(agents_client):
|
||||
client, token, _ = agents_client
|
||||
payload = {
|
||||
"machine_id": "aivanov-jdoe-a3f2b718",
|
||||
"user_name": "Jean Doe",
|
||||
"user_email": "jdoe@aivanov.fr",
|
||||
"user_id": "AIVA-001",
|
||||
"hostname": "DESKTOP-ABC123",
|
||||
"os_info": "Windows 11",
|
||||
"version": "1.0.0",
|
||||
}
|
||||
|
||||
resp = client.post(
|
||||
"/api/v1/agents/enroll", json=payload, headers=_auth_headers(token)
|
||||
)
|
||||
|
||||
assert resp.status_code == 201, resp.text
|
||||
data = resp.json()
|
||||
assert data["status"] == "enrolled"
|
||||
assert data["created"] is True
|
||||
assert data["reactivated"] is False
|
||||
assert data["machine_id"] == "aivanov-jdoe-a3f2b718"
|
||||
# Phase 1 : token global renvoye pour confirmation
|
||||
assert data["api_token"] == token
|
||||
agent = data["agent"]
|
||||
assert agent["user_name"] == "Jean Doe"
|
||||
assert agent["hostname"] == "DESKTOP-ABC123"
|
||||
assert agent["status"] == "active"
|
||||
assert agent["enrolled_at"]
|
||||
assert agent["uninstalled_at"] is None
|
||||
|
||||
|
||||
def test_enroll_duplicate_returns_409(agents_client):
|
||||
client, token, _ = agents_client
|
||||
payload = {
|
||||
"machine_id": "dup-machine-001",
|
||||
"user_name": "Alice",
|
||||
"hostname": "PC-ALICE",
|
||||
"version": "1.0.0",
|
||||
}
|
||||
first = client.post(
|
||||
"/api/v1/agents/enroll", json=payload, headers=_auth_headers(token)
|
||||
)
|
||||
assert first.status_code == 201
|
||||
|
||||
# Reenrollement sur machine encore active -> 409
|
||||
second = client.post(
|
||||
"/api/v1/agents/enroll", json=payload, headers=_auth_headers(token)
|
||||
)
|
||||
assert second.status_code == 409, second.text
|
||||
body = second.json()
|
||||
# FastAPI enveloppe notre detail dans "detail"
|
||||
detail = body["detail"]
|
||||
assert detail["error"] == "already_enrolled"
|
||||
assert detail["existing"]["machine_id"] == "dup-machine-001"
|
||||
|
||||
|
||||
def test_enroll_without_token_returns_401(agents_client):
|
||||
client, _, _ = agents_client
|
||||
payload = {"machine_id": "no-auth-001"}
|
||||
resp = client.post("/api/v1/agents/enroll", json=payload)
|
||||
assert resp.status_code == 401
|
||||
|
||||
|
||||
def test_enroll_with_wrong_token_returns_401(agents_client):
|
||||
client, _, _ = agents_client
|
||||
payload = {"machine_id": "bad-token-001"}
|
||||
resp = client.post(
|
||||
"/api/v1/agents/enroll",
|
||||
json=payload,
|
||||
headers={"Authorization": "Bearer WRONG_TOKEN"},
|
||||
)
|
||||
assert resp.status_code == 401
|
||||
|
||||
|
||||
def test_enroll_missing_machine_id_returns_422(agents_client):
|
||||
"""Pydantic renvoie 422 si machine_id est absent (validation automatique)."""
|
||||
client, token, _ = agents_client
|
||||
resp = client.post(
|
||||
"/api/v1/agents/enroll", json={}, headers=_auth_headers(token)
|
||||
)
|
||||
assert resp.status_code == 422
|
||||
|
||||
|
||||
def test_enroll_blank_machine_id_returns_400(agents_client):
|
||||
"""Un machine_id vide (whitespace) est rejete avec un 400 explicite."""
|
||||
client, token, _ = agents_client
|
||||
resp = client.post(
|
||||
"/api/v1/agents/enroll",
|
||||
json={"machine_id": " "},
|
||||
headers=_auth_headers(token),
|
||||
)
|
||||
assert resp.status_code == 400
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# POST /api/v1/agents/uninstall
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_uninstall_existing_returns_200_and_soft_deletes(agents_client):
|
||||
client, token, registry = agents_client
|
||||
|
||||
# Preparer un agent actif
|
||||
client.post(
|
||||
"/api/v1/agents/enroll",
|
||||
json={
|
||||
"machine_id": "uninst-001",
|
||||
"user_name": "Bob",
|
||||
"hostname": "PC-BOB",
|
||||
},
|
||||
headers=_auth_headers(token),
|
||||
)
|
||||
|
||||
resp = client.post(
|
||||
"/api/v1/agents/uninstall",
|
||||
json={"machine_id": "uninst-001", "reason": "user_uninstall"},
|
||||
headers=_auth_headers(token),
|
||||
)
|
||||
assert resp.status_code == 200, resp.text
|
||||
data = resp.json()
|
||||
assert data["status"] == "uninstalled"
|
||||
assert data["machine_id"] == "uninst-001"
|
||||
assert data["agent"]["status"] == "uninstalled"
|
||||
assert data["agent"]["uninstall_reason"] == "user_uninstall"
|
||||
assert data["agent"]["uninstalled_at"]
|
||||
|
||||
# Verifier en base : pas de suppression physique (soft delete)
|
||||
row = registry.get("uninst-001")
|
||||
assert row is not None
|
||||
assert row["status"] == "uninstalled"
|
||||
|
||||
|
||||
def test_uninstall_unknown_returns_404(agents_client):
|
||||
client, token, _ = agents_client
|
||||
resp = client.post(
|
||||
"/api/v1/agents/uninstall",
|
||||
json={"machine_id": "never-seen-001", "reason": "admin_revoke"},
|
||||
headers=_auth_headers(token),
|
||||
)
|
||||
assert resp.status_code == 404
|
||||
|
||||
|
||||
def test_uninstall_without_token_returns_401(agents_client):
|
||||
client, _, _ = agents_client
|
||||
resp = client.post(
|
||||
"/api/v1/agents/uninstall",
|
||||
json={"machine_id": "anything"},
|
||||
)
|
||||
assert resp.status_code == 401
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Reenrollement apres uninstall = reactivation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_reenroll_after_uninstall_reactivates(agents_client):
|
||||
client, token, _ = agents_client
|
||||
|
||||
client.post(
|
||||
"/api/v1/agents/enroll",
|
||||
json={
|
||||
"machine_id": "reenroll-001",
|
||||
"user_name": "Carol",
|
||||
"hostname": "PC-CAROL",
|
||||
"version": "1.0.0",
|
||||
},
|
||||
headers=_auth_headers(token),
|
||||
)
|
||||
client.post(
|
||||
"/api/v1/agents/uninstall",
|
||||
json={"machine_id": "reenroll-001", "reason": "user_uninstall"},
|
||||
headers=_auth_headers(token),
|
||||
)
|
||||
|
||||
# Nouvelle installation -> reactivation OK (meme machine_id, maj des champs)
|
||||
resp = client.post(
|
||||
"/api/v1/agents/enroll",
|
||||
json={
|
||||
"machine_id": "reenroll-001",
|
||||
"user_name": "Carol Durand",
|
||||
"hostname": "PC-CAROL",
|
||||
"version": "1.1.0",
|
||||
},
|
||||
headers=_auth_headers(token),
|
||||
)
|
||||
assert resp.status_code == 201, resp.text
|
||||
data = resp.json()
|
||||
assert data["created"] is False
|
||||
assert data["reactivated"] is True
|
||||
agent = data["agent"]
|
||||
assert agent["status"] == "active"
|
||||
assert agent["uninstalled_at"] is None
|
||||
assert agent["uninstall_reason"] is None
|
||||
# Les champs ont bien ete mis a jour
|
||||
assert agent["user_name"] == "Carol Durand"
|
||||
assert agent["version"] == "1.1.0"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /api/v1/agents/fleet
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_fleet_lists_active_and_uninstalled(agents_client):
|
||||
client, token, _ = agents_client
|
||||
|
||||
# 2 agents actifs + 1 desinstalle
|
||||
for mid in ("fleet-a", "fleet-b"):
|
||||
client.post(
|
||||
"/api/v1/agents/enroll",
|
||||
json={"machine_id": mid, "user_name": mid, "hostname": mid.upper()},
|
||||
headers=_auth_headers(token),
|
||||
)
|
||||
|
||||
client.post(
|
||||
"/api/v1/agents/enroll",
|
||||
json={"machine_id": "fleet-c", "user_name": "Cleo"},
|
||||
headers=_auth_headers(token),
|
||||
)
|
||||
client.post(
|
||||
"/api/v1/agents/uninstall",
|
||||
json={"machine_id": "fleet-c", "reason": "machine_retired"},
|
||||
headers=_auth_headers(token),
|
||||
)
|
||||
|
||||
resp = client.get("/api/v1/agents/fleet", headers=_auth_headers(token))
|
||||
assert resp.status_code == 200, resp.text
|
||||
data = resp.json()
|
||||
assert data["total_active"] == 2
|
||||
assert data["total_uninstalled"] == 1
|
||||
|
||||
active_ids = {a["machine_id"] for a in data["active"]}
|
||||
assert active_ids == {"fleet-a", "fleet-b"}
|
||||
|
||||
uninstalled_ids = {a["machine_id"] for a in data["uninstalled"]}
|
||||
assert uninstalled_ids == {"fleet-c"}
|
||||
assert data["uninstalled"][0]["uninstall_reason"] == "machine_retired"
|
||||
|
||||
|
||||
def test_fleet_empty(agents_client):
|
||||
client, token, _ = agents_client
|
||||
resp = client.get("/api/v1/agents/fleet", headers=_auth_headers(token))
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data == {
|
||||
"active": [],
|
||||
"uninstalled": [],
|
||||
"total_active": 0,
|
||||
"total_uninstalled": 0,
|
||||
}
|
||||
|
||||
|
||||
def test_fleet_without_token_returns_401(agents_client):
|
||||
client, _, _ = agents_client
|
||||
resp = client.get("/api/v1/agents/fleet")
|
||||
assert resp.status_code == 401
|
||||
@@ -184,8 +184,12 @@ class TestImagePayloadFormat:
|
||||
"""Le serveur distingue full/crop par '_crop' dans le shot_id."""
|
||||
from agent_v0.agent_v1.network.streamer import TraceStreamer
|
||||
|
||||
fake_img = tmp_path / "crop.png"
|
||||
fake_img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 50)
|
||||
# Dans le monde réel, full et crop sont deux fichiers distincts
|
||||
# (la purge après ACK supprime le premier avant que le second parte).
|
||||
fake_full = tmp_path / "full.png"
|
||||
fake_full.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 50)
|
||||
fake_crop = tmp_path / "crop.png"
|
||||
fake_crop.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 50)
|
||||
|
||||
with patch("agent_v0.agent_v1.network.streamer.requests") as mock_req:
|
||||
mock_req.post.return_value = MagicMock(ok=True)
|
||||
@@ -194,9 +198,9 @@ class TestImagePayloadFormat:
|
||||
streamer._server_available = True
|
||||
|
||||
# Full screenshot
|
||||
streamer._send_image(str(fake_img), "shot_0001_full")
|
||||
streamer._send_image(str(fake_full), "shot_0001_full")
|
||||
# Crop screenshot
|
||||
streamer._send_image(str(fake_img), "shot_0001_crop")
|
||||
streamer._send_image(str(fake_crop), "shot_0001_crop")
|
||||
|
||||
img_calls = [
|
||||
c for c in mock_req.post.call_args_list
|
||||
|
||||
@@ -6,6 +6,7 @@ Sans GPU/modèles lourds (mocks pour ScreenAnalyzer et CLIP).
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import tempfile
|
||||
@@ -457,6 +458,27 @@ class TestStreamProcessorListMethods:
|
||||
class TestAPIEndpoints:
|
||||
"""Tests pour les endpoints GET sessions et workflows."""
|
||||
|
||||
# Token de test fixe utilisé pour tous les tests d'API.
|
||||
# Doit être défini AVANT le premier import de agent_v0.server_v1.api_stream
|
||||
# car le module fail-closed (sys.exit 1) si RPA_API_TOKEN est absent.
|
||||
_TEST_API_TOKEN = "test_token_for_api_endpoints_0123456789abcdef"
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _ensure_api_token(self, monkeypatch):
|
||||
"""Garantit que RPA_API_TOKEN est défini avant l'import de api_stream.
|
||||
|
||||
Le module agent_v0.server_v1.api_stream applique un fail-closed P0-C
|
||||
(sys.exit 1) à l'import si RPA_API_TOKEN est absent. On force donc
|
||||
une valeur de test ici avant tout import lazy dans la fixture client.
|
||||
"""
|
||||
monkeypatch.setenv("RPA_API_TOKEN", self._TEST_API_TOKEN)
|
||||
# Si api_stream est déjà chargé dans sys.modules avec un autre token
|
||||
# (par ex. depuis un précédent test), on aligne sa valeur API_TOKEN
|
||||
# pour que les requêtes Bearer du test passent l'auth.
|
||||
api_stream_mod = sys.modules.get("agent_v0.server_v1.api_stream")
|
||||
if api_stream_mod is not None:
|
||||
monkeypatch.setattr(api_stream_mod, "API_TOKEN", self._TEST_API_TOKEN)
|
||||
|
||||
@pytest.fixture
|
||||
def client(self, temp_dir):
|
||||
"""Client de test FastAPI."""
|
||||
|
||||
378
tests/integration/test_streamer_buffer_and_purge.py
Normal file
378
tests/integration/test_streamer_buffer_and_purge.py
Normal file
@@ -0,0 +1,378 @@
|
||||
"""
|
||||
Tests pour les fonctionnalités Partie A (purge après ACK) et Partie B
|
||||
(buffer persistant) du TraceStreamer — bloquants audit AI Act.
|
||||
|
||||
Aucun réseau : on mocke requests.post.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
_ROOT = str(Path(__file__).resolve().parents[2])
|
||||
if _ROOT not in sys.path:
|
||||
sys.path.insert(0, _ROOT)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_png(path: Path, size: int = 100) -> Path:
|
||||
"""Crée un PNG minimal (header + padding) valide pour open()."""
|
||||
path.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * size)
|
||||
return path
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def isolated_buffer(tmp_path, monkeypatch):
|
||||
"""Isole le buffer persistant dans un tmp_path par test.
|
||||
|
||||
Le buffer est normalement partagé (BASE_DIR / "buffer"). On pointe
|
||||
vers un chemin jetable pour éviter la pollution croisée entre tests.
|
||||
"""
|
||||
from agent_v0.agent_v1.network import streamer as streamer_mod
|
||||
|
||||
buffer_dir = tmp_path / "buffer"
|
||||
monkeypatch.setattr(streamer_mod, "BUFFER_DIR", buffer_dir)
|
||||
return buffer_dir
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Partie A — Purge après ACK
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestPurgeAfterAck:
|
||||
"""Partie A : les screenshots locaux sont supprimés après HTTP 200."""
|
||||
|
||||
def test_image_purged_after_ack(self, tmp_path, isolated_buffer):
|
||||
"""Après HTTP 200, le fichier image local doit être supprimé."""
|
||||
from agent_v0.agent_v1.network.streamer import (
|
||||
ImageSendResult,
|
||||
TraceStreamer,
|
||||
)
|
||||
|
||||
img_path = _make_png(tmp_path / "to_purge.png")
|
||||
assert img_path.exists()
|
||||
|
||||
with patch("agent_v0.agent_v1.network.streamer.requests") as mock_req:
|
||||
mock_req.post.return_value = MagicMock(ok=True)
|
||||
streamer = TraceStreamer("sess_purge_001")
|
||||
streamer._server_available = True
|
||||
result = streamer._send_image(str(img_path), "shot_test")
|
||||
|
||||
assert result is ImageSendResult.OK
|
||||
assert not img_path.exists(), "Fichier local doit être supprimé après ACK"
|
||||
|
||||
def test_image_not_purged_if_server_rejects(self, tmp_path, isolated_buffer):
|
||||
"""Si le serveur répond 500, le fichier local est conservé."""
|
||||
from agent_v0.agent_v1.network.streamer import (
|
||||
ImageSendResult,
|
||||
TraceStreamer,
|
||||
)
|
||||
|
||||
img_path = _make_png(tmp_path / "keep_me.png")
|
||||
|
||||
with patch("agent_v0.agent_v1.network.streamer.requests") as mock_req:
|
||||
mock_req.post.return_value = MagicMock(ok=False, status_code=500)
|
||||
streamer = TraceStreamer("sess_purge_002")
|
||||
streamer._server_available = True
|
||||
result = streamer._send_image(str(img_path), "shot_test")
|
||||
|
||||
assert result is ImageSendResult.FAILED
|
||||
assert img_path.exists(), "Fichier doit rester si le serveur rejette"
|
||||
|
||||
def test_purge_disabled_via_env(
|
||||
self, tmp_path, isolated_buffer, monkeypatch
|
||||
):
|
||||
"""RPA_PURGE_AFTER_ACK=0 désactive la purge."""
|
||||
# On patche PURGE_AFTER_ACK directement (lu au module load)
|
||||
from agent_v0.agent_v1.network import streamer as streamer_mod
|
||||
|
||||
monkeypatch.setattr(streamer_mod, "PURGE_AFTER_ACK", False)
|
||||
|
||||
img_path = _make_png(tmp_path / "keep.png")
|
||||
|
||||
with patch("agent_v0.agent_v1.network.streamer.requests") as mock_req:
|
||||
mock_req.post.return_value = MagicMock(ok=True)
|
||||
streamer = streamer_mod.TraceStreamer("sess_purge_003")
|
||||
streamer._server_available = True
|
||||
streamer._send_image(str(img_path), "shot_test")
|
||||
|
||||
assert img_path.exists(), "Purge doit être désactivée"
|
||||
|
||||
def test_purge_does_not_crash_on_locked_file(
|
||||
self, tmp_path, isolated_buffer, monkeypatch
|
||||
):
|
||||
"""Si os.remove échoue (fichier verrouillé), pas de crash."""
|
||||
from agent_v0.agent_v1.network import streamer as streamer_mod
|
||||
|
||||
img_path = _make_png(tmp_path / "locked.png")
|
||||
|
||||
def _raise_permission(*_args, **_kwargs):
|
||||
raise PermissionError("Fichier verrouillé (simulé)")
|
||||
|
||||
monkeypatch.setattr(streamer_mod.os, "remove", _raise_permission)
|
||||
|
||||
with patch("agent_v0.agent_v1.network.streamer.requests") as mock_req:
|
||||
mock_req.post.return_value = MagicMock(ok=True)
|
||||
streamer = streamer_mod.TraceStreamer("sess_purge_004")
|
||||
streamer._server_available = True
|
||||
# Ne doit PAS lever
|
||||
result = streamer._send_image(str(img_path), "shot_test")
|
||||
|
||||
from agent_v0.agent_v1.network.streamer import ImageSendResult
|
||||
assert result is ImageSendResult.OK
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Partie B — Buffer persistant SQLite
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestPersistentBuffer:
|
||||
"""Partie B : persistance disque des events/images non envoyés."""
|
||||
|
||||
def test_priority_event_persisted_when_server_down(
|
||||
self, tmp_path, isolated_buffer
|
||||
):
|
||||
"""Un event prioritaire est persisté si le serveur est indisponible."""
|
||||
from agent_v0.agent_v1.network.streamer import TraceStreamer
|
||||
|
||||
streamer = TraceStreamer("sess_buf_001")
|
||||
streamer._server_available = False
|
||||
|
||||
streamer.push_event({"type": "click", "pos": [100, 200]})
|
||||
|
||||
buf = streamer._get_buffer()
|
||||
counts = buf.counts()
|
||||
assert counts["events"] == 1, "Click doit être persisté"
|
||||
|
||||
def test_heartbeat_not_persisted_when_server_down(
|
||||
self, tmp_path, isolated_buffer
|
||||
):
|
||||
"""Un heartbeat (non prioritaire) n'est PAS persisté."""
|
||||
from agent_v0.agent_v1.network.streamer import TraceStreamer
|
||||
|
||||
streamer = TraceStreamer("sess_buf_002")
|
||||
streamer._server_available = False
|
||||
|
||||
# La queue n'est pas pleine, donc le heartbeat va dans la queue RAM
|
||||
streamer.push_event({"type": "heartbeat", "image": "/tmp/h.png"})
|
||||
|
||||
buf = streamer._get_buffer()
|
||||
# Heartbeat reste dans la queue RAM (pas prioritaire → pas persisté)
|
||||
assert buf.counts()["events"] == 0
|
||||
|
||||
def test_image_persisted_when_server_down(
|
||||
self, tmp_path, isolated_buffer
|
||||
):
|
||||
"""Une image est persistée si le serveur est indisponible."""
|
||||
from agent_v0.agent_v1.network.streamer import TraceStreamer
|
||||
|
||||
img = _make_png(tmp_path / "img.png")
|
||||
|
||||
streamer = TraceStreamer("sess_buf_003")
|
||||
streamer._server_available = False
|
||||
|
||||
streamer.push_image(str(img), "shot_001")
|
||||
|
||||
buf = streamer._get_buffer()
|
||||
assert buf.counts()["images"] == 1
|
||||
|
||||
def test_buffer_persists_when_queue_full(
|
||||
self, tmp_path, isolated_buffer
|
||||
):
|
||||
"""Quand la queue RAM est pleine, un event prioritaire va en SQLite."""
|
||||
from agent_v0.agent_v1.network import streamer as streamer_mod
|
||||
|
||||
# Monkeypatch la taille max de queue pour forcer le débordement vite
|
||||
streamer = streamer_mod.TraceStreamer("sess_buf_004")
|
||||
streamer._server_available = True
|
||||
# Remplir artificiellement la queue
|
||||
import queue as _q
|
||||
|
||||
# Remplir jusqu'à être full
|
||||
while True:
|
||||
try:
|
||||
streamer.queue.put_nowait(("event", {"type": "noise"}))
|
||||
except _q.Full:
|
||||
break
|
||||
|
||||
# Maintenant queue pleine — un click doit aller en SQLite
|
||||
streamer.push_event({"type": "click", "pos": [1, 2]})
|
||||
|
||||
buf = streamer._get_buffer()
|
||||
assert buf.counts()["events"] >= 1
|
||||
|
||||
def test_drain_replays_events_when_server_recovers(
|
||||
self, tmp_path, isolated_buffer
|
||||
):
|
||||
"""Le drain rejoue les events persistés quand le serveur revient."""
|
||||
from agent_v0.agent_v1.network.streamer import TraceStreamer
|
||||
|
||||
streamer = TraceStreamer("sess_buf_005")
|
||||
# Persister un event pendant que le serveur est down
|
||||
streamer._server_available = False
|
||||
streamer.push_event({"type": "click", "pos": [50, 50]})
|
||||
|
||||
assert streamer._get_buffer().counts()["events"] == 1
|
||||
|
||||
# Serveur revient — on simule un drain manuel
|
||||
streamer._server_available = True
|
||||
with patch(
|
||||
"agent_v0.agent_v1.network.streamer.requests"
|
||||
) as mock_req:
|
||||
mock_req.post.return_value = MagicMock(ok=True)
|
||||
streamer._drain_buffer_once(streamer._get_buffer())
|
||||
|
||||
# L'event doit être envoyé ET supprimé du buffer
|
||||
event_calls = [
|
||||
c for c in mock_req.post.call_args_list if "/event" in str(c)
|
||||
]
|
||||
assert len(event_calls) == 1
|
||||
assert streamer._get_buffer().counts()["events"] == 0
|
||||
|
||||
def test_drain_increments_attempts_on_failure(
|
||||
self, tmp_path, isolated_buffer
|
||||
):
|
||||
"""Si le drain échoue, attempts est incrémenté (pas de suppression)."""
|
||||
from agent_v0.agent_v1.network.streamer import TraceStreamer
|
||||
|
||||
streamer = TraceStreamer("sess_buf_006")
|
||||
streamer._server_available = False
|
||||
streamer.push_event({"type": "click"})
|
||||
|
||||
buf = streamer._get_buffer()
|
||||
assert buf.counts()["events"] == 1
|
||||
|
||||
# Simule un envoi qui échoue (500)
|
||||
streamer._server_available = True
|
||||
with patch("agent_v0.agent_v1.network.streamer.requests") as mock_req:
|
||||
mock_req.post.return_value = MagicMock(ok=False, status_code=500)
|
||||
streamer._drain_buffer_once(buf)
|
||||
|
||||
# L'event reste dans le buffer avec attempts=1
|
||||
rows = buf.drain_events()
|
||||
assert len(rows) == 1
|
||||
assert rows[0]["attempts"] == 1
|
||||
|
||||
def test_event_abandoned_after_max_attempts(
|
||||
self, tmp_path, isolated_buffer
|
||||
):
|
||||
"""Après MAX_ATTEMPTS, un event est abandonné (supprimé + log error)."""
|
||||
from agent_v0.agent_v1.network.persistent_buffer import (
|
||||
MAX_ATTEMPTS,
|
||||
PersistentBuffer,
|
||||
)
|
||||
|
||||
buf = PersistentBuffer(tmp_path / "buf")
|
||||
buf.add_event("sess_aband", {"type": "click"})
|
||||
|
||||
# Incrémenter attempts jusqu'au max
|
||||
rows = buf.drain_events()
|
||||
for _ in range(MAX_ATTEMPTS):
|
||||
buf.increment_attempts(rows[0]["id"], "event")
|
||||
|
||||
abandoned = buf.abandon_exceeded()
|
||||
assert abandoned == 1
|
||||
assert buf.counts()["events"] == 0
|
||||
|
||||
def test_buffer_survives_corrupted_db(self, tmp_path):
|
||||
"""Un fichier DB corrompu est renommé et un nouveau est créé."""
|
||||
from agent_v0.agent_v1.network.persistent_buffer import (
|
||||
PersistentBuffer,
|
||||
)
|
||||
|
||||
buffer_dir = tmp_path / "buf"
|
||||
buffer_dir.mkdir()
|
||||
# Créer un fichier "DB" corrompu
|
||||
db_path = buffer_dir / "pending_events.db"
|
||||
db_path.write_bytes(b"this is not a valid sqlite db file\x00\x01")
|
||||
|
||||
# Ne doit pas crasher
|
||||
buf = PersistentBuffer(buffer_dir)
|
||||
|
||||
# Le buffer doit être utilisable
|
||||
assert buf.add_event("sess_recover", {"type": "click"}) is True
|
||||
assert buf.counts()["events"] == 1
|
||||
|
||||
def test_drain_skips_image_with_missing_file(
|
||||
self, tmp_path, isolated_buffer
|
||||
):
|
||||
"""Si le fichier image a disparu, on supprime l'entrée du buffer."""
|
||||
from agent_v0.agent_v1.network.streamer import TraceStreamer
|
||||
|
||||
streamer = TraceStreamer("sess_buf_missing")
|
||||
streamer._server_available = False
|
||||
# Persister une image vers un chemin qui n'existe pas
|
||||
streamer.push_image("/tmp/does_not_exist_xyz.png", "shot_missing")
|
||||
|
||||
buf = streamer._get_buffer()
|
||||
assert buf.counts()["images"] == 1
|
||||
|
||||
# Drain : l'entrée doit être supprimée (fichier introuvable)
|
||||
streamer._server_available = True
|
||||
with patch("agent_v0.agent_v1.network.streamer.requests") as mock_req:
|
||||
mock_req.post.return_value = MagicMock(ok=True)
|
||||
streamer._drain_buffer_once(buf)
|
||||
|
||||
assert buf.counts()["images"] == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scénarios complets (reprise, coupure réseau)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestScenarios:
|
||||
"""Scénarios de bout en bout pour valider la reprise après incident."""
|
||||
|
||||
def test_scenario_server_offline_then_recover(
|
||||
self, tmp_path, isolated_buffer
|
||||
):
|
||||
"""Scénario : serveur offline → events bufferisés → serveur revient
|
||||
→ drain automatique → buffer vide."""
|
||||
from agent_v0.agent_v1.network.streamer import TraceStreamer
|
||||
|
||||
streamer = TraceStreamer("sess_scenario_001")
|
||||
|
||||
# 1) Serveur offline au démarrage
|
||||
streamer._server_available = False
|
||||
|
||||
# 2) L'utilisateur clique 5 fois
|
||||
for i in range(5):
|
||||
streamer.push_event({"type": "click", "pos": [i, i]})
|
||||
|
||||
buf = streamer._get_buffer()
|
||||
assert buf.counts()["events"] == 5, "5 clicks doivent être persistés"
|
||||
|
||||
# 3) Le serveur revient
|
||||
streamer._server_available = True
|
||||
|
||||
# 4) Drain manuel (équivalent boucle)
|
||||
with patch(
|
||||
"agent_v0.agent_v1.network.streamer.requests"
|
||||
) as mock_req:
|
||||
mock_req.post.return_value = MagicMock(ok=True)
|
||||
streamer._drain_buffer_once(buf)
|
||||
|
||||
# 5) Tous les events ont été envoyés dans l'ordre
|
||||
event_calls = [
|
||||
c for c in mock_req.post.call_args_list if "/event" in str(c)
|
||||
]
|
||||
assert len(event_calls) == 5
|
||||
# Vérifier l'ordre (positions croissantes)
|
||||
positions = [
|
||||
c[1]["json"]["event"]["pos"][0] for c in event_calls
|
||||
]
|
||||
assert positions == [0, 1, 2, 3, 4]
|
||||
|
||||
assert buf.counts()["events"] == 0
|
||||
214
tests/integration/test_streamer_file_gone_p0e.py
Normal file
214
tests/integration/test_streamer_file_gone_p0e.py
Normal file
@@ -0,0 +1,214 @@
|
||||
"""
|
||||
Tests du Fix P0-E : FileNotFoundError dans _send_image n'est pas un succès.
|
||||
|
||||
Avant : un fichier image disparu retournait `True` (succès logique) — donc
|
||||
le buffer SQLite supprimait l'entrée alors que le serveur n'avait jamais
|
||||
reçu l'image. Perte silencieuse, contradiction avec la sémantique
|
||||
"succès = HTTP 200".
|
||||
|
||||
Après : retourne `ImageSendResult.FILE_GONE` distinct de `OK`. Le drain
|
||||
du buffer supprime l'entrée mais avec un log ERROR explicite (pas de retry,
|
||||
pas de confusion avec un succès réseau).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def isolated_buffer(tmp_path, monkeypatch):
|
||||
"""Isole le buffer persistant dans un tmp_path par test."""
|
||||
from agent_v0.agent_v1.network import streamer as streamer_mod
|
||||
|
||||
buffer_dir = tmp_path / "buffer"
|
||||
monkeypatch.setattr(streamer_mod, "BUFFER_DIR", buffer_dir)
|
||||
return buffer_dir
|
||||
|
||||
|
||||
class TestImageSendResultEnum:
|
||||
"""Vérifier l'existence et le contrat de l'enum ImageSendResult."""
|
||||
|
||||
def test_enum_has_three_values(self):
|
||||
from agent_v0.agent_v1.network.streamer import ImageSendResult
|
||||
|
||||
assert ImageSendResult.OK.value == "ok"
|
||||
assert ImageSendResult.FAILED.value == "failed"
|
||||
assert ImageSendResult.FILE_GONE.value == "file_gone"
|
||||
|
||||
def test_enum_values_distinct(self):
|
||||
from agent_v0.agent_v1.network.streamer import ImageSendResult
|
||||
|
||||
assert ImageSendResult.OK is not ImageSendResult.FAILED
|
||||
assert ImageSendResult.OK is not ImageSendResult.FILE_GONE
|
||||
assert ImageSendResult.FAILED is not ImageSendResult.FILE_GONE
|
||||
|
||||
|
||||
class TestSendImageReturnsFileGone:
|
||||
"""_send_image doit retourner FILE_GONE si le fichier n'existe pas."""
|
||||
|
||||
def test_missing_file_returns_file_gone(self, tmp_path, isolated_buffer):
|
||||
"""Fichier inexistant → FILE_GONE (pas OK, pas FAILED)."""
|
||||
from agent_v0.agent_v1.network.streamer import (
|
||||
ImageSendResult,
|
||||
TraceStreamer,
|
||||
)
|
||||
|
||||
streamer = TraceStreamer("sess_p0e_001")
|
||||
streamer._server_available = True
|
||||
|
||||
# On NE crée pas le fichier
|
||||
missing_path = str(tmp_path / "i_do_not_exist.png")
|
||||
|
||||
with patch("agent_v0.agent_v1.network.streamer.requests"):
|
||||
result = streamer._send_image(missing_path, "shot_lost")
|
||||
|
||||
assert result is ImageSendResult.FILE_GONE, (
|
||||
f"Attendu FILE_GONE, reçu {result}"
|
||||
)
|
||||
|
||||
def test_file_gone_is_not_truthy_for_legacy_callers(
|
||||
self, tmp_path, isolated_buffer
|
||||
):
|
||||
"""Un caller legacy qui fait `if result:` ne doit PAS interpréter
|
||||
FILE_GONE comme un succès."""
|
||||
from agent_v0.agent_v1.network.streamer import ImageSendResult
|
||||
|
||||
# FILE_GONE est un membre d'enum non vide → en Python il est truthy
|
||||
# par défaut. C'est pour ça qu'on ne peut PAS se contenter du test
|
||||
# bool(result) pour distinguer succès/échec : il faut comparer is OK.
|
||||
# Ce test documente le contrat : les callers DOIVENT comparer is OK.
|
||||
result = ImageSendResult.FILE_GONE
|
||||
assert result is not ImageSendResult.OK
|
||||
assert result is not True
|
||||
|
||||
|
||||
class TestDrainHandlesFileGone:
|
||||
"""Le drain du buffer doit supprimer l'entrée FILE_GONE avec log ERROR."""
|
||||
|
||||
def test_drain_removes_buffer_entry_for_missing_file(
|
||||
self, tmp_path, isolated_buffer, caplog
|
||||
):
|
||||
"""Si le fichier disparait entre la persistance et le drain :
|
||||
- L'entrée est supprimée du buffer (pas de retry infini)
|
||||
- Un log ERROR signale la perte
|
||||
"""
|
||||
import logging
|
||||
|
||||
from agent_v0.agent_v1.network.streamer import TraceStreamer
|
||||
|
||||
streamer = TraceStreamer("sess_p0e_drain")
|
||||
streamer._server_available = False
|
||||
|
||||
# Persister une image vers un chemin inexistant
|
||||
ghost_path = str(tmp_path / "ghost.png")
|
||||
streamer.push_image(ghost_path, "shot_ghost")
|
||||
|
||||
buf = streamer._get_buffer()
|
||||
assert buf.counts()["images"] == 1
|
||||
|
||||
# Drain avec serveur dispo : doit détecter l'absence et abandonner
|
||||
streamer._server_available = True
|
||||
with caplog.at_level(logging.ERROR, logger="agent_v0.agent_v1.network.streamer"):
|
||||
with patch("agent_v0.agent_v1.network.streamer.requests"):
|
||||
streamer._drain_buffer_once(buf)
|
||||
|
||||
assert buf.counts()["images"] == 0, (
|
||||
"L'entrée doit être supprimée (retry voué à échouer)"
|
||||
)
|
||||
|
||||
# Vérifier qu'un log ERROR a été émis (pas seulement un warning)
|
||||
error_logs = [r for r in caplog.records if r.levelno >= logging.ERROR]
|
||||
assert len(error_logs) >= 1, (
|
||||
"Un log ERROR doit signaler que le serveur n'a rien reçu"
|
||||
)
|
||||
assert any(
|
||||
"abandonnée" in r.getMessage() or "introuvable" in r.getMessage()
|
||||
or "abandonnée" in r.getMessage().lower()
|
||||
for r in error_logs
|
||||
)
|
||||
|
||||
def test_send_image_file_disappears_during_send(
|
||||
self, tmp_path, isolated_buffer, caplog
|
||||
):
|
||||
"""Cas tordu : le fichier existe au moment de drain_images mais
|
||||
disparait pendant _send_image (race condition disque).
|
||||
|
||||
On simule en patchant _compress_image_to_jpeg pour lever
|
||||
FileNotFoundError.
|
||||
"""
|
||||
import logging
|
||||
|
||||
from agent_v0.agent_v1.network.streamer import (
|
||||
ImageSendResult,
|
||||
TraceStreamer,
|
||||
)
|
||||
|
||||
# Fichier existant initialement
|
||||
img_path = tmp_path / "race.png"
|
||||
img_path.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 50)
|
||||
|
||||
streamer = TraceStreamer("sess_p0e_race")
|
||||
streamer._server_available = True
|
||||
|
||||
# Forcer FileNotFoundError dans le pipeline d'envoi (compression
|
||||
# tente d'ouvrir le fichier — qui aura "disparu" entre temps).
|
||||
def _gone(_path):
|
||||
raise FileNotFoundError(f"race condition: {_path}")
|
||||
|
||||
with patch.object(streamer, "_compress_image_to_jpeg", _gone), \
|
||||
patch("agent_v0.agent_v1.network.streamer.requests"), \
|
||||
caplog.at_level(logging.ERROR, logger="agent_v0.agent_v1.network.streamer"):
|
||||
result = streamer._send_image(str(img_path), "shot_race")
|
||||
|
||||
assert result is ImageSendResult.FILE_GONE, (
|
||||
"FileNotFoundError pendant la compression → FILE_GONE"
|
||||
)
|
||||
# Log ERROR (pas debug comme avant)
|
||||
error_logs = [r for r in caplog.records if r.levelno >= logging.ERROR]
|
||||
assert len(error_logs) >= 1
|
||||
|
||||
|
||||
class TestStreamLoopHandlesFileGone:
|
||||
"""La boucle d'envoi ne doit PAS persister une entrée FILE_GONE."""
|
||||
|
||||
def test_file_gone_not_persisted_to_buffer(
|
||||
self, tmp_path, isolated_buffer
|
||||
):
|
||||
"""Quand _send_image retourne FILE_GONE, on ne réécrit pas dans
|
||||
le buffer (sinon boucle infinie : add → drain → file_gone → add…)."""
|
||||
from agent_v0.agent_v1.network.streamer import (
|
||||
ImageSendResult,
|
||||
TraceStreamer,
|
||||
)
|
||||
|
||||
streamer = TraceStreamer("sess_p0e_loop")
|
||||
streamer._server_available = True
|
||||
|
||||
# Mock _send_with_retry pour retourner FILE_GONE directement
|
||||
with patch.object(
|
||||
streamer, "_send_with_retry", return_value=ImageSendResult.FILE_GONE
|
||||
):
|
||||
# Mettre une image dans la queue
|
||||
streamer.queue.put(("image", ("/tmp/whatever.png", "shot_x")))
|
||||
# Lancer une seule itération de la boucle (en simulant)
|
||||
try:
|
||||
item_type, data = streamer.queue.get(timeout=0.1)
|
||||
# Reproduire la logique du _stream_loop
|
||||
result = streamer._send_with_retry(
|
||||
streamer._send_image, *data
|
||||
)
|
||||
assert result is ImageSendResult.FILE_GONE
|
||||
# Le caller (stream_loop) doit identifier FILE_GONE comme
|
||||
# "ne pas persister" → on vérifie que le buffer reste vide
|
||||
buf = streamer._get_buffer()
|
||||
# Avant le fix : l'item aurait été persisté car "consecutive_failures += 1"
|
||||
# et "if priority_item: persist()". Avec le fix, on saute.
|
||||
assert buf.counts()["images"] == 0
|
||||
finally:
|
||||
streamer.queue.task_done()
|
||||
@@ -96,14 +96,16 @@ class TestWorkflowPipelineEnhanced:
|
||||
"confidence": 0.92
|
||||
}
|
||||
|
||||
# Mock de l'action suivante
|
||||
# Mock de l'action suivante (contrat dict normalisé Lot A)
|
||||
mock_workflow_pipeline.get_next_action.return_value = {
|
||||
"status": "selected",
|
||||
"edge_id": "edge_1",
|
||||
"action": {"type": "click", "target": "button"},
|
||||
"target_node": "node_2",
|
||||
"confidence": 0.95
|
||||
"confidence": 0.95,
|
||||
"score": 0.95,
|
||||
}
|
||||
|
||||
|
||||
# Mock du workflow
|
||||
mock_workflow = Mock(spec=Workflow)
|
||||
mock_edge = Mock(spec=WorkflowEdge)
|
||||
@@ -112,7 +114,7 @@ class TestWorkflowPipelineEnhanced:
|
||||
mock_edge.to_node = "node_2"
|
||||
mock_workflow.edges = [mock_edge]
|
||||
mock_workflow_pipeline.load_workflow.return_value = mock_workflow
|
||||
|
||||
|
||||
# Mock du résultat d'exécution
|
||||
mock_execution_result = Mock(spec=ExecutionResult)
|
||||
mock_execution_result.status = ExecutionStatus.SUCCESS
|
||||
@@ -121,24 +123,24 @@ class TestWorkflowPipelineEnhanced:
|
||||
mock_execution_result.target_resolved = None
|
||||
mock_execution_result.error = None
|
||||
mock_workflow_pipeline.action_executor.execute_edge.return_value = mock_execution_result
|
||||
|
||||
|
||||
# Créer l'instance enhanced
|
||||
enhanced = WorkflowPipelineEnhanced()
|
||||
|
||||
|
||||
# Lier les méthodes du pipeline mock
|
||||
enhanced.match_current_state = mock_workflow_pipeline.match_current_state
|
||||
enhanced.get_next_action = mock_workflow_pipeline.get_next_action
|
||||
enhanced.load_workflow = mock_workflow_pipeline.load_workflow
|
||||
enhanced.action_executor = mock_workflow_pipeline.action_executor
|
||||
enhanced.error_handler = mock_workflow_pipeline.error_handler
|
||||
|
||||
|
||||
# Act
|
||||
result = enhanced.execute_workflow_step_enhanced(
|
||||
workflow_id=workflow_id,
|
||||
current_state=mock_screen_state,
|
||||
context={"test_context": "value"}
|
||||
)
|
||||
|
||||
|
||||
# Assert
|
||||
assert isinstance(result, WorkflowExecutionResult)
|
||||
assert result.success is True
|
||||
@@ -242,7 +244,8 @@ class TestWorkflowPipelineEnhanced:
|
||||
}
|
||||
|
||||
# Mock de l'action suivante (pas d'action = workflow terminé)
|
||||
mock_workflow_pipeline.get_next_action.return_value = None
|
||||
# Contrat dict normalisé Lot A : status="terminal" pour fin légitime
|
||||
mock_workflow_pipeline.get_next_action.return_value = {"status": "terminal"}
|
||||
|
||||
# Créer l'instance enhanced
|
||||
enhanced = WorkflowPipelineEnhanced()
|
||||
@@ -347,14 +350,16 @@ class TestWorkflowPipelineEnhanced:
|
||||
"confidence": 0.92
|
||||
}
|
||||
|
||||
# Mock de l'action suivante
|
||||
# Mock de l'action suivante (contrat dict normalisé Lot A)
|
||||
mock_workflow_pipeline.get_next_action.return_value = {
|
||||
"status": "selected",
|
||||
"edge_id": "edge_1",
|
||||
"action": {"type": "click", "target": "button"},
|
||||
"target_node": "node_2",
|
||||
"confidence": 0.95
|
||||
"confidence": 0.95,
|
||||
"score": 0.95,
|
||||
}
|
||||
|
||||
|
||||
# Mock du workflow
|
||||
mock_workflow = Mock(spec=Workflow)
|
||||
mock_edge = Mock(spec=WorkflowEdge)
|
||||
@@ -363,7 +368,7 @@ class TestWorkflowPipelineEnhanced:
|
||||
mock_edge.to_node = "node_2"
|
||||
mock_workflow.edges = [mock_edge]
|
||||
mock_workflow_pipeline.load_workflow.return_value = mock_workflow
|
||||
|
||||
|
||||
# Mock du résultat d'exécution
|
||||
mock_execution_result = Mock(spec=ExecutionResult)
|
||||
mock_execution_result.status = ExecutionStatus.SUCCESS
|
||||
@@ -372,17 +377,17 @@ class TestWorkflowPipelineEnhanced:
|
||||
mock_execution_result.target_resolved = None
|
||||
mock_execution_result.error = None
|
||||
mock_workflow_pipeline.action_executor.execute_edge.return_value = mock_execution_result
|
||||
|
||||
|
||||
# Créer l'instance enhanced
|
||||
enhanced = WorkflowPipelineEnhanced()
|
||||
|
||||
|
||||
# Lier les méthodes du pipeline mock
|
||||
enhanced.match_current_state = mock_workflow_pipeline.match_current_state
|
||||
enhanced.get_next_action = mock_workflow_pipeline.get_next_action
|
||||
enhanced.load_workflow = mock_workflow_pipeline.load_workflow
|
||||
enhanced.action_executor = mock_workflow_pipeline.action_executor
|
||||
enhanced.error_handler = mock_workflow_pipeline.error_handler
|
||||
|
||||
|
||||
# Act
|
||||
result = enhanced.execute_workflow_step_enhanced(
|
||||
workflow_id=workflow_id,
|
||||
|
||||
520
tests/unit/test_analytics_vision_metrics.py
Normal file
520
tests/unit/test_analytics_vision_metrics.py
Normal file
@@ -0,0 +1,520 @@
|
||||
"""
|
||||
Tests unitaires pour la remontée des champs vision-aware (C1) vers analytics.
|
||||
|
||||
Couvre :
|
||||
- StepMetrics.to_dict / from_dict avec les nouveaux champs
|
||||
- AnalyticsExecutionIntegration.on_step_result passe bien les champs
|
||||
- Persistance SQLite (schema + migration) des colonnes C1
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sqlite3
|
||||
import tempfile
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from core.analytics.collection.metrics_collector import StepMetrics
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# StepMetrics : sérialisation des champs C1
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_step_metrics(**overrides) -> StepMetrics:
|
||||
base = dict(
|
||||
step_id="s1",
|
||||
execution_id="exec1",
|
||||
workflow_id="wf1",
|
||||
node_id="n1",
|
||||
action_type="click",
|
||||
target_element="",
|
||||
started_at=datetime(2026, 4, 13, 10, 0, 0),
|
||||
completed_at=datetime(2026, 4, 13, 10, 0, 1),
|
||||
duration_ms=1000.0,
|
||||
status="completed",
|
||||
confidence_score=0.9,
|
||||
retry_count=0,
|
||||
error_details=None,
|
||||
)
|
||||
base.update(overrides)
|
||||
return StepMetrics(**base)
|
||||
|
||||
|
||||
class TestStepMetricsVisionFields:
|
||||
def test_default_vision_fields(self):
|
||||
m = _make_step_metrics()
|
||||
assert m.ocr_ms == 0.0
|
||||
assert m.ui_ms == 0.0
|
||||
assert m.analyze_ms == 0.0
|
||||
assert m.total_ms == 0.0
|
||||
assert m.cache_hit is False
|
||||
assert m.degraded is False
|
||||
|
||||
def test_to_dict_includes_vision_fields(self):
|
||||
m = _make_step_metrics(
|
||||
ocr_ms=120.5,
|
||||
ui_ms=45.0,
|
||||
analyze_ms=200.0,
|
||||
total_ms=1050.0,
|
||||
cache_hit=True,
|
||||
degraded=True,
|
||||
)
|
||||
d = m.to_dict()
|
||||
assert d["ocr_ms"] == 120.5
|
||||
assert d["ui_ms"] == 45.0
|
||||
assert d["analyze_ms"] == 200.0
|
||||
assert d["total_ms"] == 1050.0
|
||||
assert d["cache_hit"] is True
|
||||
assert d["degraded"] is True
|
||||
|
||||
def test_from_dict_roundtrip(self):
|
||||
original = _make_step_metrics(
|
||||
ocr_ms=10.0, ui_ms=20.0, analyze_ms=30.0,
|
||||
total_ms=100.0, cache_hit=True, degraded=False,
|
||||
)
|
||||
restored = StepMetrics.from_dict(original.to_dict())
|
||||
assert restored.ocr_ms == 10.0
|
||||
assert restored.ui_ms == 20.0
|
||||
assert restored.analyze_ms == 30.0
|
||||
assert restored.total_ms == 100.0
|
||||
assert restored.cache_hit is True
|
||||
assert restored.degraded is False
|
||||
|
||||
def test_from_dict_missing_vision_fields_defaults_to_zero(self):
|
||||
"""Rétrocompatibilité : un dict sans champs C1 doit produire 0/False."""
|
||||
restored = StepMetrics.from_dict({
|
||||
'step_id': 's1',
|
||||
'execution_id': 'e1',
|
||||
'workflow_id': 'w1',
|
||||
'node_id': 'n1',
|
||||
'action_type': 'click',
|
||||
'target_element': '',
|
||||
'started_at': datetime.now().isoformat(),
|
||||
'completed_at': datetime.now().isoformat(),
|
||||
'duration_ms': 100.0,
|
||||
'status': 'completed',
|
||||
'confidence_score': 0.5,
|
||||
})
|
||||
assert restored.ocr_ms == 0.0
|
||||
assert restored.cache_hit is False
|
||||
assert restored.degraded is False
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# AnalyticsExecutionIntegration.on_step_result
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _FakeStepResult:
|
||||
"""Stand-in minimal pour core.execution.execution_loop.StepResult."""
|
||||
def __init__(self, **kw):
|
||||
self.success = kw.get("success", True)
|
||||
self.node_id = kw.get("node_id", "n1")
|
||||
self.edge_id = kw.get("edge_id", None)
|
||||
self.action_result = kw.get("action_result", None)
|
||||
self.match_confidence = kw.get("match_confidence", 0.9)
|
||||
self.duration_ms = kw.get("duration_ms", 100.0)
|
||||
self.message = kw.get("message", "")
|
||||
self.ocr_ms = kw.get("ocr_ms", 0.0)
|
||||
self.ui_ms = kw.get("ui_ms", 0.0)
|
||||
self.analyze_ms = kw.get("analyze_ms", 0.0)
|
||||
self.total_ms = kw.get("total_ms", 0.0)
|
||||
self.cache_hit = kw.get("cache_hit", False)
|
||||
self.degraded = kw.get("degraded", False)
|
||||
|
||||
|
||||
class TestAnalyticsOnStepResult:
|
||||
def test_on_step_result_passes_vision_fields(self):
|
||||
from core.analytics.integration.execution_integration import (
|
||||
AnalyticsExecutionIntegration,
|
||||
)
|
||||
|
||||
# Analytics system mocké
|
||||
fake_system = MagicMock()
|
||||
integration = AnalyticsExecutionIntegration(fake_system)
|
||||
|
||||
step = _FakeStepResult(
|
||||
node_id="node_click",
|
||||
success=True,
|
||||
match_confidence=0.87,
|
||||
duration_ms=1234.0,
|
||||
ocr_ms=111.0,
|
||||
ui_ms=222.0,
|
||||
analyze_ms=333.0,
|
||||
total_ms=1234.0,
|
||||
cache_hit=True,
|
||||
degraded=False,
|
||||
)
|
||||
|
||||
integration.on_step_result(
|
||||
execution_id="exec1",
|
||||
workflow_id="wf1",
|
||||
step_result=step,
|
||||
)
|
||||
|
||||
# Vérifie qu'un StepMetrics avec les bons champs a été enregistré
|
||||
record_calls = fake_system.metrics_collector.record_step.call_args_list
|
||||
assert len(record_calls) == 1
|
||||
recorded: StepMetrics = record_calls[0].args[0]
|
||||
assert isinstance(recorded, StepMetrics)
|
||||
assert recorded.node_id == "node_click"
|
||||
assert recorded.workflow_id == "wf1"
|
||||
assert recorded.execution_id == "exec1"
|
||||
assert recorded.confidence_score == 0.87
|
||||
assert recorded.duration_ms == 1234.0
|
||||
assert recorded.ocr_ms == 111.0
|
||||
assert recorded.ui_ms == 222.0
|
||||
assert recorded.analyze_ms == 333.0
|
||||
assert recorded.total_ms == 1234.0
|
||||
assert recorded.cache_hit is True
|
||||
assert recorded.degraded is False
|
||||
assert recorded.status == "completed"
|
||||
|
||||
def test_on_step_result_failed_step(self):
|
||||
from core.analytics.integration.execution_integration import (
|
||||
AnalyticsExecutionIntegration,
|
||||
)
|
||||
|
||||
fake_system = MagicMock()
|
||||
integration = AnalyticsExecutionIntegration(fake_system)
|
||||
|
||||
step = _FakeStepResult(
|
||||
success=False,
|
||||
message="Click failed",
|
||||
degraded=True,
|
||||
)
|
||||
|
||||
integration.on_step_result("e1", "w1", step)
|
||||
|
||||
recorded: StepMetrics = fake_system.metrics_collector.record_step.call_args.args[0]
|
||||
assert recorded.status == "failed"
|
||||
assert recorded.error_details == "Click failed"
|
||||
assert recorded.degraded is True
|
||||
|
||||
def test_on_step_result_disabled_integration_is_noop(self):
|
||||
from core.analytics.integration.execution_integration import (
|
||||
AnalyticsExecutionIntegration,
|
||||
)
|
||||
|
||||
integration = AnalyticsExecutionIntegration(None) # désactivé
|
||||
assert integration.enabled is False
|
||||
|
||||
step = _FakeStepResult()
|
||||
# Ne doit rien faire ni lever d'exception
|
||||
integration.on_step_result("e1", "w1", step)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# AnalyticsExecutionIntegration.on_execution_complete (Lot A — avril 2026)
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestAnalyticsOnExecutionComplete:
|
||||
"""Contrat normalisé : duration_ms (ms) + status (str), pas de magie."""
|
||||
|
||||
def _make_integration(self):
|
||||
from core.analytics.integration.execution_integration import (
|
||||
AnalyticsExecutionIntegration,
|
||||
)
|
||||
|
||||
fake_system = MagicMock()
|
||||
# Pas d'execution active : l'intégration doit emprunter le fallback
|
||||
# "ExecutionMetrics synthétique pushé dans _buffer".
|
||||
fake_system.metrics_collector._active_executions = {}
|
||||
fake_system.metrics_collector._lock = MagicMock()
|
||||
fake_system.metrics_collector._lock.__enter__ = MagicMock(
|
||||
return_value=None
|
||||
)
|
||||
fake_system.metrics_collector._lock.__exit__ = MagicMock(
|
||||
return_value=None
|
||||
)
|
||||
fake_system.metrics_collector._buffer = []
|
||||
return AnalyticsExecutionIntegration(fake_system), fake_system
|
||||
|
||||
def test_fallback_builds_execution_metrics_with_correct_fields(self):
|
||||
"""Sans record_execution_start préalable, on construit un
|
||||
ExecutionMetrics synthétique avec les bons noms de champs."""
|
||||
from core.analytics.collection.metrics_collector import ExecutionMetrics
|
||||
|
||||
integration, fake_system = self._make_integration()
|
||||
|
||||
integration.on_execution_complete(
|
||||
execution_id="exec1",
|
||||
workflow_id="wf1",
|
||||
duration_ms=1500.0,
|
||||
status="completed",
|
||||
steps_total=3,
|
||||
steps_completed=3,
|
||||
steps_failed=0,
|
||||
)
|
||||
|
||||
# Un ExecutionMetrics a été pushé dans le buffer
|
||||
buffer = fake_system.metrics_collector._buffer
|
||||
assert len(buffer) == 1
|
||||
metric: ExecutionMetrics = buffer[0]
|
||||
assert isinstance(metric, ExecutionMetrics)
|
||||
assert metric.execution_id == "exec1"
|
||||
assert metric.workflow_id == "wf1"
|
||||
assert metric.duration_ms == 1500.0
|
||||
assert metric.status == "completed"
|
||||
assert metric.steps_total == 3
|
||||
assert metric.steps_completed == 3
|
||||
assert metric.steps_failed == 0
|
||||
# started_at / completed_at sont cohérents
|
||||
delta_ms = (
|
||||
metric.completed_at - metric.started_at
|
||||
).total_seconds() * 1000
|
||||
assert abs(delta_ms - 1500.0) < 1.0
|
||||
|
||||
def test_uses_record_execution_complete_if_active(self):
|
||||
"""Si l'execution a été ouverte via on_execution_start, on délègue
|
||||
à record_execution_complete (chemin nominal)."""
|
||||
integration, fake_system = self._make_integration()
|
||||
# Simuler une execution active
|
||||
fake_system.metrics_collector._active_executions = {"exec1": object()}
|
||||
|
||||
integration.on_execution_complete(
|
||||
execution_id="exec1",
|
||||
workflow_id="wf1",
|
||||
duration_ms=800.0,
|
||||
status="failed",
|
||||
steps_total=2,
|
||||
steps_completed=1,
|
||||
steps_failed=1,
|
||||
error_message="timeout",
|
||||
)
|
||||
|
||||
call = fake_system.metrics_collector.record_execution_complete.call_args
|
||||
assert call is not None
|
||||
kwargs = call.kwargs
|
||||
assert kwargs["execution_id"] == "exec1"
|
||||
assert kwargs["status"] == "failed"
|
||||
assert kwargs["steps_total"] == 2
|
||||
assert kwargs["steps_completed"] == 1
|
||||
assert kwargs["steps_failed"] == 1
|
||||
assert kwargs["error_message"] == "timeout"
|
||||
|
||||
def test_steps_total_derived_when_not_provided(self):
|
||||
"""steps_total déduit par somme si absent, pas d'erreur silencieuse."""
|
||||
integration, fake_system = self._make_integration()
|
||||
|
||||
integration.on_execution_complete(
|
||||
execution_id="exec1",
|
||||
workflow_id="wf1",
|
||||
duration_ms=500.0,
|
||||
status="completed",
|
||||
steps_completed=2,
|
||||
steps_failed=1,
|
||||
)
|
||||
|
||||
metric = fake_system.metrics_collector._buffer[0]
|
||||
assert metric.steps_total == 3 # 2 + 1
|
||||
|
||||
def test_disabled_integration_is_noop(self):
|
||||
from core.analytics.integration.execution_integration import (
|
||||
AnalyticsExecutionIntegration,
|
||||
)
|
||||
|
||||
integration = AnalyticsExecutionIntegration(None)
|
||||
assert integration.enabled is False
|
||||
|
||||
# Ne doit rien faire ni lever d'exception
|
||||
integration.on_execution_complete(
|
||||
execution_id="exec1",
|
||||
workflow_id="wf1",
|
||||
duration_ms=100.0,
|
||||
status="completed",
|
||||
)
|
||||
|
||||
def test_realtime_complete_called(self):
|
||||
"""Le tracking temps réel est clos avec le bon status."""
|
||||
integration, fake_system = self._make_integration()
|
||||
|
||||
integration.on_execution_complete(
|
||||
execution_id="exec1",
|
||||
workflow_id="wf1",
|
||||
duration_ms=100.0,
|
||||
status="stopped",
|
||||
)
|
||||
|
||||
fake_system.realtime_analytics.complete_execution.assert_called_once_with(
|
||||
execution_id="exec1",
|
||||
status="stopped",
|
||||
)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# AnalyticsExecutionIntegration.on_recovery_attempt (Lot A — avril 2026)
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestAnalyticsOnRecoveryAttempt:
|
||||
"""Contrat normalisé : StepMetrics construit avec les vrais champs."""
|
||||
|
||||
def test_success_recovery_builds_valid_step_metrics(self):
|
||||
from core.analytics.collection.metrics_collector import StepMetrics
|
||||
from core.analytics.integration.execution_integration import (
|
||||
AnalyticsExecutionIntegration,
|
||||
)
|
||||
|
||||
fake_system = MagicMock()
|
||||
integration = AnalyticsExecutionIntegration(fake_system)
|
||||
|
||||
integration.on_recovery_attempt(
|
||||
execution_id="exec1",
|
||||
workflow_id="wf1",
|
||||
node_id="node_click",
|
||||
strategy="retry_with_delay",
|
||||
success=True,
|
||||
duration_ms=250.0,
|
||||
)
|
||||
|
||||
call = fake_system.metrics_collector.record_step.call_args
|
||||
assert call is not None
|
||||
recorded: StepMetrics = call.args[0]
|
||||
assert isinstance(recorded, StepMetrics)
|
||||
assert recorded.execution_id == "exec1"
|
||||
assert recorded.workflow_id == "wf1"
|
||||
assert recorded.node_id == "node_click_recovery"
|
||||
assert recorded.action_type == "recovery_retry_with_delay"
|
||||
assert recorded.duration_ms == 250.0
|
||||
assert recorded.status == "completed"
|
||||
assert recorded.error_details is None
|
||||
# Champs obligatoires du dataclass
|
||||
assert recorded.step_id # non vide
|
||||
assert recorded.target_element == ""
|
||||
assert recorded.confidence_score == 0.0
|
||||
|
||||
def test_failed_recovery_sets_status_and_error_details(self):
|
||||
from core.analytics.collection.metrics_collector import StepMetrics
|
||||
from core.analytics.integration.execution_integration import (
|
||||
AnalyticsExecutionIntegration,
|
||||
)
|
||||
|
||||
fake_system = MagicMock()
|
||||
integration = AnalyticsExecutionIntegration(fake_system)
|
||||
|
||||
integration.on_recovery_attempt(
|
||||
execution_id="e1",
|
||||
workflow_id="w1",
|
||||
node_id="n1",
|
||||
strategy="fallback_to_parent",
|
||||
success=False,
|
||||
duration_ms=80.0,
|
||||
)
|
||||
|
||||
recorded: StepMetrics = (
|
||||
fake_system.metrics_collector.record_step.call_args.args[0]
|
||||
)
|
||||
assert recorded.status == "failed"
|
||||
assert recorded.error_details == "Recovery failed: fallback_to_parent"
|
||||
assert recorded.duration_ms == 80.0
|
||||
|
||||
def test_disabled_integration_is_noop(self):
|
||||
from core.analytics.integration.execution_integration import (
|
||||
AnalyticsExecutionIntegration,
|
||||
)
|
||||
|
||||
integration = AnalyticsExecutionIntegration(None)
|
||||
integration.on_recovery_attempt(
|
||||
execution_id="e1",
|
||||
workflow_id="w1",
|
||||
node_id="n1",
|
||||
strategy="x",
|
||||
success=True,
|
||||
duration_ms=10.0,
|
||||
)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Persistance SQLite : schema + migration
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestTimeSeriesStoreSchema:
|
||||
def test_new_store_has_vision_columns(self, tmp_path):
|
||||
from core.analytics.storage.timeseries_store import TimeSeriesStore
|
||||
|
||||
store = TimeSeriesStore(tmp_path)
|
||||
with sqlite3.connect(str(store.db_path)) as conn:
|
||||
cols = {row[1] for row in conn.execute(
|
||||
"PRAGMA table_info(step_metrics)"
|
||||
)}
|
||||
# Colonnes legacy
|
||||
assert "duration_ms" in cols
|
||||
assert "confidence_score" in cols
|
||||
# Colonnes C1
|
||||
assert "ocr_ms" in cols
|
||||
assert "ui_ms" in cols
|
||||
assert "analyze_ms" in cols
|
||||
assert "total_ms" in cols
|
||||
assert "cache_hit" in cols
|
||||
assert "degraded" in cols
|
||||
|
||||
def test_migration_adds_missing_columns(self, tmp_path):
|
||||
"""Base pré-existante sans les colonnes C1 — la migration doit les ajouter."""
|
||||
from core.analytics.storage.timeseries_store import TimeSeriesStore
|
||||
|
||||
# Créer une base "legacy" manuellement, sans les nouvelles colonnes
|
||||
storage_dir = tmp_path / "legacy"
|
||||
storage_dir.mkdir()
|
||||
legacy_db = storage_dir / "timeseries.db"
|
||||
with sqlite3.connect(str(legacy_db)) as conn:
|
||||
conn.executescript("""
|
||||
CREATE TABLE step_metrics (
|
||||
step_id TEXT PRIMARY KEY,
|
||||
execution_id TEXT NOT NULL,
|
||||
workflow_id TEXT NOT NULL,
|
||||
node_id TEXT NOT NULL,
|
||||
action_type TEXT NOT NULL,
|
||||
target_element TEXT,
|
||||
started_at TIMESTAMP NOT NULL,
|
||||
completed_at TIMESTAMP NOT NULL,
|
||||
duration_ms REAL NOT NULL,
|
||||
status TEXT NOT NULL,
|
||||
confidence_score REAL,
|
||||
retry_count INTEGER DEFAULT 0,
|
||||
error_details TEXT
|
||||
);
|
||||
""")
|
||||
conn.commit()
|
||||
|
||||
# Instancier TimeSeriesStore → doit migrer
|
||||
_ = TimeSeriesStore(storage_dir)
|
||||
|
||||
with sqlite3.connect(str(legacy_db)) as conn:
|
||||
cols = {row[1] for row in conn.execute(
|
||||
"PRAGMA table_info(step_metrics)"
|
||||
)}
|
||||
assert "ocr_ms" in cols
|
||||
assert "cache_hit" in cols
|
||||
assert "degraded" in cols
|
||||
|
||||
def test_write_and_read_vision_metrics(self, tmp_path):
|
||||
from core.analytics.storage.timeseries_store import TimeSeriesStore
|
||||
|
||||
store = TimeSeriesStore(tmp_path)
|
||||
metric = _make_step_metrics(
|
||||
ocr_ms=50.0, ui_ms=60.0, analyze_ms=110.0,
|
||||
total_ms=500.0, cache_hit=True, degraded=True,
|
||||
)
|
||||
store.write_metrics([metric])
|
||||
|
||||
with sqlite3.connect(str(store.db_path)) as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
row = conn.execute(
|
||||
"SELECT * FROM step_metrics WHERE step_id = ?", (metric.step_id,)
|
||||
).fetchone()
|
||||
assert row is not None
|
||||
assert row["ocr_ms"] == 50.0
|
||||
assert row["ui_ms"] == 60.0
|
||||
assert row["analyze_ms"] == 110.0
|
||||
assert row["total_ms"] == 500.0
|
||||
# SQLite stocke les bool comme INTEGER
|
||||
assert row["cache_hit"] == 1
|
||||
assert row["degraded"] == 1
|
||||
171
tests/unit/test_api_stream_auth_p0bc.py
Normal file
171
tests/unit/test_api_stream_auth_p0bc.py
Normal file
@@ -0,0 +1,171 @@
|
||||
"""
|
||||
Tests des Fix P0-B et P0-C sur agent_v0/server_v1/api_stream.py.
|
||||
|
||||
P0-B : /api/v1/traces/stream/image n'est PLUS dans _PUBLIC_PATHS.
|
||||
L'upload d'image exige désormais un Bearer token.
|
||||
|
||||
P0-C : Si RPA_API_TOKEN est absent ET RPA_AUTH_DISABLED ≠ true,
|
||||
le module DOIT refuser de se charger (sys.exit 1).
|
||||
En mode dev (RPA_AUTH_DISABLED=true), pas de crash mais log warning.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
||||
|
||||
|
||||
def _reload_api_stream():
|
||||
"""Recharge le module api_stream pour appliquer les nouvelles env vars."""
|
||||
mod_name = "agent_v0.server_v1.api_stream"
|
||||
if mod_name in sys.modules:
|
||||
del sys.modules[mod_name]
|
||||
return importlib.import_module(mod_name)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fix P0-B : /image n'est plus public
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestImageEndpointNotPublic:
|
||||
"""Fix P0-B : /api/v1/traces/stream/image exige un Bearer token."""
|
||||
|
||||
def test_image_path_removed_from_public_paths(self, monkeypatch):
|
||||
"""Vérifier que la constante _PUBLIC_PATHS ne contient plus /image."""
|
||||
monkeypatch.setenv("RPA_API_TOKEN", "deadbeef" * 4)
|
||||
monkeypatch.delenv("RPA_AUTH_DISABLED", raising=False)
|
||||
mod = _reload_api_stream()
|
||||
assert "/api/v1/traces/stream/image" not in mod._PUBLIC_PATHS, (
|
||||
"L'endpoint d'upload d'image NE doit PAS être public — il accepte "
|
||||
"des bytes arbitraires et déclenche du travail VLM côté serveur."
|
||||
)
|
||||
|
||||
def test_health_still_public(self, monkeypatch):
|
||||
"""/health reste public (monitoring)."""
|
||||
monkeypatch.setenv("RPA_API_TOKEN", "deadbeef" * 4)
|
||||
monkeypatch.delenv("RPA_AUTH_DISABLED", raising=False)
|
||||
mod = _reload_api_stream()
|
||||
assert "/health" in mod._PUBLIC_PATHS
|
||||
|
||||
def test_replay_next_still_public(self, monkeypatch):
|
||||
"""/replay/next reste public (legacy agent Rust polling)."""
|
||||
monkeypatch.setenv("RPA_API_TOKEN", "deadbeef" * 4)
|
||||
monkeypatch.delenv("RPA_AUTH_DISABLED", raising=False)
|
||||
mod = _reload_api_stream()
|
||||
assert "/api/v1/traces/stream/replay/next" in mod._PUBLIC_PATHS
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fix P0-C : fail-closed si pas de token
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFailClosedTokenP0C:
|
||||
"""Fix P0-C : RPA_API_TOKEN absent → sys.exit (pas de génération silencieuse)."""
|
||||
|
||||
def test_no_token_no_disable_exits(self, monkeypatch):
|
||||
"""Sans RPA_API_TOKEN ET sans RPA_AUTH_DISABLED → SystemExit(1)."""
|
||||
monkeypatch.delenv("RPA_API_TOKEN", raising=False)
|
||||
monkeypatch.delenv("RPA_AUTH_DISABLED", raising=False)
|
||||
with pytest.raises(SystemExit) as exc_info:
|
||||
_reload_api_stream()
|
||||
assert exc_info.value.code == 1
|
||||
|
||||
def test_empty_token_no_disable_exits(self, monkeypatch):
|
||||
"""Token explicitement vide → SystemExit (pas généré aléatoirement)."""
|
||||
monkeypatch.setenv("RPA_API_TOKEN", " ") # whitespace, strippé
|
||||
monkeypatch.delenv("RPA_AUTH_DISABLED", raising=False)
|
||||
with pytest.raises(SystemExit) as exc_info:
|
||||
_reload_api_stream()
|
||||
assert exc_info.value.code == 1
|
||||
|
||||
def test_no_token_with_disable_succeeds(self, monkeypatch):
|
||||
"""Sans token MAIS RPA_AUTH_DISABLED=true → chargement OK (mode dev)."""
|
||||
monkeypatch.delenv("RPA_API_TOKEN", raising=False)
|
||||
monkeypatch.setenv("RPA_AUTH_DISABLED", "true")
|
||||
# Doit pas crash
|
||||
mod = _reload_api_stream()
|
||||
assert mod._AUTH_DISABLED is True
|
||||
# API_TOKEN existe toujours (généré pour cohérence interne, jamais utilisé)
|
||||
assert mod.API_TOKEN, "Un token interne est toujours défini en mode dev"
|
||||
|
||||
def test_token_present_logs_prefix(self, monkeypatch, caplog):
|
||||
"""Avec un token valide, le module log les 8 premiers caractères."""
|
||||
import logging
|
||||
monkeypatch.setenv("RPA_API_TOKEN", "abcdef0123456789" * 2)
|
||||
monkeypatch.delenv("RPA_AUTH_DISABLED", raising=False)
|
||||
with caplog.at_level(logging.INFO, logger="api_stream"):
|
||||
mod = _reload_api_stream()
|
||||
# Le log INFO contient le préfixe (8 premiers chars)
|
||||
assert mod.API_TOKEN == "abcdef0123456789" * 2
|
||||
# Au moins une trace contient "abcdef01" (préfixe)
|
||||
log_text = " ".join(r.getMessage() for r in caplog.records)
|
||||
assert "abcdef01" in log_text or "Token API chargé" in log_text
|
||||
|
||||
def test_verify_token_bypass_when_disabled(self, monkeypatch):
|
||||
"""Mode dev : _verify_token doit laisser passer sans header."""
|
||||
import asyncio
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
monkeypatch.delenv("RPA_API_TOKEN", raising=False)
|
||||
monkeypatch.setenv("RPA_AUTH_DISABLED", "true")
|
||||
mod = _reload_api_stream()
|
||||
|
||||
# Forger une requête sans header sur un endpoint normalement protégé
|
||||
req = MagicMock()
|
||||
req.url.path = "/api/v1/traces/stream/event"
|
||||
req.headers = {}
|
||||
# Ne doit pas raise
|
||||
asyncio.get_event_loop().run_until_complete(mod._verify_token(req))
|
||||
|
||||
def test_verify_token_rejects_missing_header(self, monkeypatch):
|
||||
"""Auth activée : pas de header → HTTPException 401."""
|
||||
import asyncio
|
||||
from unittest.mock import MagicMock
|
||||
from fastapi import HTTPException
|
||||
|
||||
monkeypatch.setenv("RPA_API_TOKEN", "validtoken" * 4)
|
||||
monkeypatch.delenv("RPA_AUTH_DISABLED", raising=False)
|
||||
mod = _reload_api_stream()
|
||||
|
||||
req = MagicMock()
|
||||
req.url.path = "/api/v1/traces/stream/image" # Désormais protégé (P0-B)
|
||||
req.headers = {}
|
||||
with pytest.raises(HTTPException) as exc_info:
|
||||
asyncio.get_event_loop().run_until_complete(mod._verify_token(req))
|
||||
assert exc_info.value.status_code == 401
|
||||
|
||||
def test_verify_token_rejects_image_without_bearer(self, monkeypatch):
|
||||
"""P0-B + P0-C : POST /image sans token → 401 (l'endpoint n'est plus public)."""
|
||||
import asyncio
|
||||
from unittest.mock import MagicMock
|
||||
from fastapi import HTTPException
|
||||
|
||||
monkeypatch.setenv("RPA_API_TOKEN", "validtoken" * 4)
|
||||
monkeypatch.delenv("RPA_AUTH_DISABLED", raising=False)
|
||||
mod = _reload_api_stream()
|
||||
|
||||
req = MagicMock()
|
||||
req.url.path = "/api/v1/traces/stream/image"
|
||||
req.headers = {"Authorization": "Bearer wrong-token"}
|
||||
with pytest.raises(HTTPException) as exc_info:
|
||||
asyncio.get_event_loop().run_until_complete(mod._verify_token(req))
|
||||
assert exc_info.value.status_code == 401
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _cleanup(monkeypatch):
|
||||
"""Nettoie l'environnement entre les tests pour éviter la pollution."""
|
||||
yield
|
||||
# Recharger avec un token bidon pour ne pas casser les autres suites
|
||||
monkeypatch.setenv("RPA_API_TOKEN", "cleanup-token" * 3)
|
||||
monkeypatch.delenv("RPA_AUTH_DISABLED", raising=False)
|
||||
try:
|
||||
_reload_api_stream()
|
||||
except SystemExit:
|
||||
pass
|
||||
441
tests/unit/test_chat_interface.py
Normal file
441
tests/unit/test_chat_interface.py
Normal file
@@ -0,0 +1,441 @@
|
||||
# tests/unit/test_chat_interface.py
|
||||
"""
|
||||
Tests unitaires du module chat_interface (Léa conversationnelle).
|
||||
|
||||
Vérifie :
|
||||
1. Création de session (état initial, message d'accueil)
|
||||
2. Envoi de message → appel TaskPlanner mocké
|
||||
3. Historique (get_history)
|
||||
4. Transitions d'états idle → planning → awaiting_confirmation → executing → done
|
||||
5. Abandon (utilisateur répond "non")
|
||||
6. Fallback gracieux quand gemma4/TaskPlanner indisponible
|
||||
7. ChatManager (création, listing, cleanup)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
_ROOT = str(Path(__file__).resolve().parents[2])
|
||||
if _ROOT not in sys.path:
|
||||
sys.path.insert(0, _ROOT)
|
||||
|
||||
from agent_v0.server_v1.chat_interface import (
|
||||
ChatSession,
|
||||
ChatManager,
|
||||
STATE_IDLE,
|
||||
STATE_PLANNING,
|
||||
STATE_AWAITING_CONFIRMATION,
|
||||
STATE_EXECUTING,
|
||||
STATE_DONE,
|
||||
STATE_ERROR,
|
||||
ROLE_USER,
|
||||
ROLE_LEA,
|
||||
)
|
||||
from agent_v0.server_v1.task_planner import TaskPlan
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Fixtures
|
||||
# =============================================================================
|
||||
|
||||
@pytest.fixture
|
||||
def sample_workflows():
|
||||
return [
|
||||
{
|
||||
"session_id": "sess_bloc_notes",
|
||||
"name": "Bloc-notes",
|
||||
"description": "Ouvrir Bloc-notes via Exécuter (Win+R) et écrire du texte",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def understood_plan():
|
||||
"""TaskPlan qui a compris l'ordre et matche un workflow."""
|
||||
return TaskPlan(
|
||||
instruction="ouvre le bloc-notes et écris bonjour",
|
||||
understood=True,
|
||||
workflow_match="sess_bloc_notes",
|
||||
workflow_name="Bloc-notes",
|
||||
match_confidence=0.9,
|
||||
parameters={"texte": "bonjour"},
|
||||
is_loop=False,
|
||||
mode="replay",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def unknown_plan():
|
||||
"""TaskPlan qui n'a pas compris."""
|
||||
return TaskPlan(
|
||||
instruction="fais le café",
|
||||
understood=False,
|
||||
error="aucun workflow ne correspond",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_task_planner(understood_plan):
|
||||
planner = MagicMock()
|
||||
planner.understand.return_value = understood_plan
|
||||
return planner
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_replay_callback():
|
||||
return MagicMock(return_value="replay_abc123")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_status_provider():
|
||||
"""Retourne un dict par défaut 'running' — peut être modifié dans les tests."""
|
||||
return MagicMock(return_value={
|
||||
"status": "running",
|
||||
"completed_actions": 1,
|
||||
"total_actions": 5,
|
||||
})
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def session(mock_task_planner, sample_workflows, mock_replay_callback, mock_status_provider):
|
||||
return ChatSession(
|
||||
task_planner=mock_task_planner,
|
||||
workflows_provider=lambda: sample_workflows,
|
||||
replay_callback=mock_replay_callback,
|
||||
status_provider=mock_status_provider,
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests création session
|
||||
# =============================================================================
|
||||
|
||||
class TestSessionCreation:
|
||||
def test_session_id_generated(self):
|
||||
s = ChatSession()
|
||||
assert s.session_id.startswith("chat_")
|
||||
|
||||
def test_initial_state_is_idle(self):
|
||||
s = ChatSession()
|
||||
assert s.state == STATE_IDLE
|
||||
|
||||
def test_welcome_message_present(self):
|
||||
s = ChatSession()
|
||||
history = s.get_history()
|
||||
assert len(history) == 1
|
||||
assert history[0]["role"] == ROLE_LEA
|
||||
assert "Bonjour" in history[0]["content"] or "Léa" in history[0]["content"]
|
||||
|
||||
def test_session_id_custom(self):
|
||||
s = ChatSession(session_id="custom_42")
|
||||
assert s.session_id == "custom_42"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests send_message
|
||||
# =============================================================================
|
||||
|
||||
class TestSendMessage:
|
||||
def test_empty_message_rejected(self, session):
|
||||
result = session.send_message("")
|
||||
assert result["ok"] is False
|
||||
|
||||
def test_send_message_calls_planner(self, session, mock_task_planner):
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
mock_task_planner.understand.assert_called_once()
|
||||
call = mock_task_planner.understand.call_args
|
||||
assert call.kwargs["instruction"] == "ouvre le bloc-notes"
|
||||
# workflows_provider a été appelé et passé
|
||||
assert "available_workflows" in call.kwargs
|
||||
assert len(call.kwargs["available_workflows"]) == 1
|
||||
|
||||
def test_send_message_transitions_to_awaiting_confirmation(self, session):
|
||||
result = session.send_message("ouvre le bloc-notes")
|
||||
assert result["ok"] is True
|
||||
assert session.state == STATE_AWAITING_CONFIRMATION
|
||||
assert result["state"] == STATE_AWAITING_CONFIRMATION
|
||||
|
||||
def test_user_message_added_to_history(self, session):
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
history = session.get_history()
|
||||
user_msgs = [m for m in history if m["role"] == ROLE_USER]
|
||||
assert len(user_msgs) == 1
|
||||
assert user_msgs[0]["content"] == "ouvre le bloc-notes"
|
||||
|
||||
def test_lea_proposal_added_to_history(self, session):
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
history = session.get_history()
|
||||
lea_msgs = [m for m in history if m["role"] == ROLE_LEA]
|
||||
# Bienvenue + proposition
|
||||
assert len(lea_msgs) == 2
|
||||
proposal = lea_msgs[-1]["content"]
|
||||
assert "Bloc-notes" in proposal
|
||||
assert "oui" in proposal.lower() or "y aller" in proposal.lower()
|
||||
|
||||
def test_proposal_contains_confidence(self, session):
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
history = session.get_history()
|
||||
proposal = history[-1]["content"]
|
||||
# 0.9 → 90%
|
||||
assert "90" in proposal
|
||||
|
||||
def test_proposal_contains_parameters(self, session):
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
history = session.get_history()
|
||||
proposal = history[-1]["content"]
|
||||
assert "texte" in proposal
|
||||
assert "bonjour" in proposal
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests get_history
|
||||
# =============================================================================
|
||||
|
||||
class TestGetHistory:
|
||||
def test_history_returns_list_of_dicts(self, session):
|
||||
history = session.get_history()
|
||||
assert isinstance(history, list)
|
||||
assert all(isinstance(m, dict) for m in history)
|
||||
|
||||
def test_history_message_structure(self, session):
|
||||
history = session.get_history()
|
||||
msg = history[0]
|
||||
assert "role" in msg
|
||||
assert "content" in msg
|
||||
assert "timestamp" in msg
|
||||
assert "meta" in msg
|
||||
|
||||
def test_history_grows_with_messages(self, session):
|
||||
initial = len(session.get_history())
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
assert len(session.get_history()) > initial
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests transitions d'états
|
||||
# =============================================================================
|
||||
|
||||
class TestStateTransitions:
|
||||
def test_full_happy_path(self, session, mock_task_planner, mock_replay_callback):
|
||||
"""idle → planning → awaiting_confirmation → executing → done."""
|
||||
# Départ : idle
|
||||
assert session.state == STATE_IDLE
|
||||
|
||||
# Envoi message → planning → awaiting_confirmation
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
assert session.state == STATE_AWAITING_CONFIRMATION
|
||||
|
||||
# Confirmation → executing
|
||||
result = session.confirm(confirmed=True)
|
||||
assert result["ok"] is True
|
||||
assert session.state == STATE_EXECUTING
|
||||
mock_replay_callback.assert_called_once()
|
||||
call = mock_replay_callback.call_args
|
||||
assert call.kwargs["session_id"] == "sess_bloc_notes"
|
||||
|
||||
# Simulation : replay terminé → done
|
||||
session._status_provider.return_value = {
|
||||
"status": "done",
|
||||
"completed_actions": 5,
|
||||
"total_actions": 5,
|
||||
}
|
||||
session.refresh_progress()
|
||||
assert session.state == STATE_DONE
|
||||
|
||||
def test_confirm_via_message_oui(self, session, mock_replay_callback):
|
||||
"""Le TIM peut répondre 'oui' en message au lieu d'un bouton."""
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
assert session.state == STATE_AWAITING_CONFIRMATION
|
||||
|
||||
session.send_message("oui")
|
||||
assert session.state == STATE_EXECUTING
|
||||
mock_replay_callback.assert_called_once()
|
||||
|
||||
def test_refusal_via_confirm_false(self, session, mock_replay_callback):
|
||||
"""confirm(False) → retour à idle, pas d'exécution."""
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
result = session.confirm(confirmed=False)
|
||||
assert result["ok"] is True
|
||||
assert result["confirmed"] is False
|
||||
assert session.state == STATE_IDLE
|
||||
mock_replay_callback.assert_not_called()
|
||||
|
||||
def test_refusal_via_message_non(self, session, mock_replay_callback):
|
||||
"""Le TIM répond 'non' → annulation."""
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
session.send_message("non")
|
||||
assert session.state == STATE_IDLE
|
||||
mock_replay_callback.assert_not_called()
|
||||
# Le message d'annulation doit être dans l'historique
|
||||
history = session.get_history()
|
||||
assert any("annule" in m["content"].lower() for m in history)
|
||||
|
||||
def test_ambiguous_confirmation_reply(self, session):
|
||||
"""Réponse ambiguë pendant awaiting_confirmation → demande de clarification."""
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
result = session.send_message("peut-être")
|
||||
assert session.state == STATE_AWAITING_CONFIRMATION
|
||||
assert result.get("needs_clarification") is True
|
||||
|
||||
def test_failed_replay_transitions_to_error(self, session):
|
||||
"""replay_callback lève une exception → état error."""
|
||||
session._replay_callback = MagicMock(side_effect=RuntimeError("boom"))
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
result = session.confirm(confirmed=True)
|
||||
assert result["ok"] is False
|
||||
assert session.state == STATE_ERROR
|
||||
|
||||
def test_replay_failure_from_status(self, session):
|
||||
"""Le replay rapporte 'failed' → état error."""
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
session.confirm(confirmed=True)
|
||||
assert session.state == STATE_EXECUTING
|
||||
|
||||
session._status_provider.return_value = {
|
||||
"status": "failed",
|
||||
"error": "element introuvable",
|
||||
}
|
||||
session.refresh_progress()
|
||||
assert session.state == STATE_ERROR
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests fallback / résilience
|
||||
# =============================================================================
|
||||
|
||||
class TestResilience:
|
||||
def test_no_task_planner_graceful(self):
|
||||
"""Sans TaskPlanner, on reste gracieux."""
|
||||
s = ChatSession(task_planner=None)
|
||||
result = s.send_message("test")
|
||||
assert result["ok"] is False
|
||||
assert s.state == STATE_ERROR
|
||||
# Message d'erreur présent dans l'historique
|
||||
history = s.get_history()
|
||||
assert any("désolée" in m["content"].lower() or "indisponible" in m["content"].lower()
|
||||
for m in history)
|
||||
|
||||
def test_task_planner_exception_graceful(self, mock_replay_callback):
|
||||
"""TaskPlanner lève une exception (gemma4 down) → état error propre."""
|
||||
planner = MagicMock()
|
||||
planner.understand.side_effect = RuntimeError("gemma4 offline")
|
||||
|
||||
s = ChatSession(
|
||||
task_planner=planner,
|
||||
workflows_provider=lambda: [],
|
||||
replay_callback=mock_replay_callback,
|
||||
)
|
||||
result = s.send_message("test")
|
||||
assert result["ok"] is False
|
||||
assert s.state == STATE_ERROR
|
||||
|
||||
def test_instruction_not_understood(self, unknown_plan, mock_replay_callback):
|
||||
"""Plan.understood = False → message d'erreur explicite."""
|
||||
planner = MagicMock()
|
||||
planner.understand.return_value = unknown_plan
|
||||
|
||||
s = ChatSession(
|
||||
task_planner=planner,
|
||||
workflows_provider=lambda: [],
|
||||
replay_callback=mock_replay_callback,
|
||||
)
|
||||
result = s.send_message("fais le café")
|
||||
assert result["ok"] is False
|
||||
assert s.state == STATE_ERROR
|
||||
history = s.get_history()
|
||||
assert any("reformuler" in m["content"].lower() for m in history)
|
||||
|
||||
def test_no_replay_callback(self, mock_task_planner, sample_workflows):
|
||||
"""Sans replay_callback, on refuse l'exécution proprement."""
|
||||
s = ChatSession(
|
||||
task_planner=mock_task_planner,
|
||||
workflows_provider=lambda: sample_workflows,
|
||||
replay_callback=None,
|
||||
)
|
||||
s.send_message("ouvre le bloc-notes")
|
||||
result = s.confirm(confirmed=True)
|
||||
assert result["ok"] is False
|
||||
assert s.state == STATE_ERROR
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests snapshot
|
||||
# =============================================================================
|
||||
|
||||
class TestSnapshot:
|
||||
def test_snapshot_structure(self, session):
|
||||
snap = session.get_snapshot()
|
||||
assert "session_id" in snap
|
||||
assert "state" in snap
|
||||
assert "messages" in snap
|
||||
assert "pending_plan" in snap
|
||||
assert "active_replay_id" in snap
|
||||
assert "progress" in snap
|
||||
|
||||
def test_snapshot_includes_pending_plan_when_awaiting(self, session):
|
||||
session.send_message("ouvre le bloc-notes")
|
||||
snap = session.get_snapshot()
|
||||
assert snap["state"] == STATE_AWAITING_CONFIRMATION
|
||||
assert snap["pending_plan"] is not None
|
||||
assert snap["pending_plan"]["workflow_name"] == "Bloc-notes"
|
||||
|
||||
def test_snapshot_no_pending_plan_in_idle(self, session):
|
||||
snap = session.get_snapshot()
|
||||
assert snap["pending_plan"] is None
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests ChatManager
|
||||
# =============================================================================
|
||||
|
||||
class TestChatManager:
|
||||
def test_create_session(self, mock_task_planner, sample_workflows):
|
||||
mgr = ChatManager(
|
||||
task_planner=mock_task_planner,
|
||||
workflows_provider=lambda: sample_workflows,
|
||||
)
|
||||
s = mgr.create_session()
|
||||
assert s is not None
|
||||
assert s.session_id in [x["session_id"] for x in mgr.list_sessions()]
|
||||
|
||||
def test_get_session(self, mock_task_planner):
|
||||
mgr = ChatManager(task_planner=mock_task_planner)
|
||||
s = mgr.create_session()
|
||||
retrieved = mgr.get_session(s.session_id)
|
||||
assert retrieved is s
|
||||
|
||||
def test_get_session_not_found(self):
|
||||
mgr = ChatManager()
|
||||
assert mgr.get_session("unknown") is None
|
||||
|
||||
def test_delete_session(self, mock_task_planner):
|
||||
mgr = ChatManager(task_planner=mock_task_planner)
|
||||
s = mgr.create_session()
|
||||
assert mgr.delete_session(s.session_id) is True
|
||||
assert mgr.get_session(s.session_id) is None
|
||||
|
||||
def test_cleanup_old_sessions(self, mock_task_planner):
|
||||
mgr = ChatManager(task_planner=mock_task_planner)
|
||||
s = mgr.create_session()
|
||||
# Simuler une session très ancienne
|
||||
s.updated_at = time.time() - 100000
|
||||
removed = mgr.cleanup_old(max_age_s=3600)
|
||||
assert removed == 1
|
||||
assert mgr.get_session(s.session_id) is None
|
||||
|
||||
def test_list_sessions_structure(self, mock_task_planner):
|
||||
mgr = ChatManager(task_planner=mock_task_planner)
|
||||
mgr.create_session(machine_id="pc-01")
|
||||
sessions = mgr.list_sessions()
|
||||
assert len(sessions) == 1
|
||||
s = sessions[0]
|
||||
assert "session_id" in s
|
||||
assert "state" in s
|
||||
assert "machine_id" in s
|
||||
assert s["machine_id"] == "pc-01"
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user