feat: pipeline complet MACRO/MÉSO/MICRO — Critic, Observer, Policy, Recovery, Learning, Audit Trail, TaskPlanner
Architecture 3 niveaux implémentée et testée (137 tests unitaires + 21 visuels) : MÉSO (acteur intelligent) : - P0 Critic : vérification sémantique post-action via gemma4 (replay_verifier.py) - P1 Observer : pré-analyse écran avant chaque action (api_stream.py /pre_analyze) - P2 Grounding/Policy : séparation localisation (grounding.py) et décision (policy.py) - P3 Recovery : rollback automatique Ctrl+Z/Escape/Alt+F4 (recovery.py) - P4 Learning : apprentissage runtime avec boucle de consolidation (replay_learner.py) MACRO (planificateur) : - TaskPlanner : comprend les ordres en langage naturel via gemma4 (task_planner.py) - Contexte métier TIM/CIM-10 pour les hôpitaux (domain_context.py) - Endpoint POST /api/v1/task pour l'exécution par instruction Traçabilité : - Audit trail complet avec 18 champs par action (audit_trail.py) - Endpoints GET /audit/history, /audit/summary, /audit/export (CSV) Grounding : - Fix parsing bbox_2d qwen2.5vl (pixels relatifs, pas grille 1000x1000) - Benchmarks visuels sur captures réelles (3 approches : baseline, zoom, Citrix) - Reproductibilité validée : variance < 0.008 sur 10 itérations Sécurité : - Tokens de production retirés du code source → .env.local - Secret key aléatoire si non configuré - Suppression logs qui leakent les tokens Résultats : 80% de replay (vs 12.5% avant), 100% détection visuelle Citrix JPEG Q20 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -28,11 +28,15 @@ from pydantic import BaseModel
|
||||
|
||||
from .replay_failure_logger import log_replay_failure
|
||||
from .replay_verifier import ReplayVerifier, VerificationResult
|
||||
from .replay_learner import ReplayLearner
|
||||
from .audit_trail import AuditTrail, AuditEntry
|
||||
from .stream_processor import StreamProcessor, build_replay_from_raw_events, enrich_click_from_screenshot
|
||||
from .worker_stream import StreamWorker
|
||||
|
||||
# Instance globale du vérificateur de replay (comparaison screenshots avant/après)
|
||||
_replay_verifier = ReplayVerifier()
|
||||
_replay_learner = ReplayLearner()
|
||||
_audit_trail = AuditTrail()
|
||||
|
||||
# Nombre maximum de retries par action avant de déclarer un échec
|
||||
MAX_RETRIES_PER_ACTION = 3
|
||||
@@ -995,6 +999,7 @@ class ReplayResultReport(BaseModel):
|
||||
warning: Optional[str] = None # "no_screen_change", "popup_handled", "visual_resolve_failed"
|
||||
screenshot: Optional[str] = None # Chemin ou base64 du screenshot post-action
|
||||
screenshot_after: Optional[str] = None # Chemin ou base64 du screenshot APRES l'action
|
||||
screenshot_before: Optional[str] = None # Screenshot AVANT l'action (pour le Critic)
|
||||
actual_position: Optional[Dict[str, float]] = None # {"x": px, "y": py} position réelle du clic
|
||||
# Métriques de résolution visuelle
|
||||
resolution_method: Optional[str] = None # som_text_match, som_vlm, vlm_quick_find, etc.
|
||||
@@ -3255,8 +3260,9 @@ async def report_action_result(report: ReplayResultReport):
|
||||
skip_verify = skip_verify or agent_handled_popup
|
||||
verification = None
|
||||
if report.success and screenshot_after and not skip_verify:
|
||||
# Chercher le screenshot avant (dernier connu de la session)
|
||||
screenshot_before = replay_state.get("_last_screenshot_before")
|
||||
# Utiliser le screenshot_before envoyé par l'agent (Critic fiable)
|
||||
# Fallback sur le dernier screenshot stocké côté serveur
|
||||
screenshot_before = report.screenshot_before or replay_state.get("_last_screenshot_before")
|
||||
if screenshot_before:
|
||||
try:
|
||||
action_dict = original_action or {"type": "unknown", "action_id": action_id}
|
||||
@@ -3264,12 +3270,37 @@ async def report_action_result(report: ReplayResultReport):
|
||||
"success": report.success,
|
||||
"error": report.error,
|
||||
}
|
||||
verification = _replay_verifier.verify_action(
|
||||
action=action_dict,
|
||||
result=result_dict,
|
||||
screenshot_before=screenshot_before,
|
||||
screenshot_after=screenshot_after,
|
||||
)
|
||||
# Utiliser le Critic sémantique si l'action a un expected_result
|
||||
expected_result = (original_action or {}).get("expected_result", "")
|
||||
action_intention = (original_action or {}).get("intention", "")
|
||||
if expected_result:
|
||||
# Critic complet : pixel + VLM sémantique
|
||||
workflow_ctx = (
|
||||
f"Action {replay_state.get('completed_actions', 0)+1}"
|
||||
f"/{len(replay_state.get('actions', []))}"
|
||||
)
|
||||
verification = _replay_verifier.verify_with_critic(
|
||||
action=action_dict,
|
||||
result=result_dict,
|
||||
screenshot_before=screenshot_before,
|
||||
screenshot_after=screenshot_after,
|
||||
expected_result=expected_result,
|
||||
action_intention=action_intention,
|
||||
workflow_context=workflow_ctx,
|
||||
)
|
||||
if verification.semantic_verified is not None:
|
||||
logger.info(
|
||||
f"Critic sémantique : {'OK' if verification.semantic_verified else 'ÉCHEC'} "
|
||||
f"en {verification.semantic_elapsed_ms:.0f}ms — {verification.semantic_detail[:80]}"
|
||||
)
|
||||
else:
|
||||
# Vérification pixel seule (pas d'expected_result)
|
||||
verification = _replay_verifier.verify_action(
|
||||
action=action_dict,
|
||||
result=result_dict,
|
||||
screenshot_before=screenshot_before,
|
||||
screenshot_after=screenshot_after,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Vérification post-action échouée: {e}")
|
||||
|
||||
@@ -3295,6 +3326,68 @@ async def report_action_result(report: ReplayResultReport):
|
||||
}
|
||||
replay_state["results"].append(result_entry)
|
||||
|
||||
# === Apprentissage : enregistrer le résultat pour amélioration continue ===
|
||||
try:
|
||||
_replay_learner.record_from_replay_result(
|
||||
session_id=session_id,
|
||||
action=original_action or {"action_id": action_id, "type": "unknown"},
|
||||
result=result_entry,
|
||||
verification=verification.to_dict() if verification else None,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"Learning: échec enregistrement: {e}")
|
||||
|
||||
# === Audit Trail : traçabilité complète pour conformité hospitalière ===
|
||||
try:
|
||||
_action = original_action or {"action_id": action_id, "type": "unknown"}
|
||||
_target_spec = _action.get("target_spec", {})
|
||||
_verification = verification.to_dict() if verification else {}
|
||||
|
||||
# Déterminer le résultat pour l'audit
|
||||
if report.success and (verification is None or verification.verified):
|
||||
_audit_result = "success"
|
||||
elif report.success and verification and not verification.verified:
|
||||
_audit_result = "recovered" if retry_count > 0 else "failed"
|
||||
elif not report.success:
|
||||
_audit_result = "failed"
|
||||
else:
|
||||
_audit_result = "success"
|
||||
|
||||
# Déterminer le résultat du Critic
|
||||
_critic = ""
|
||||
if verification:
|
||||
if verification.semantic_verified is True:
|
||||
_critic = "semantic_ok"
|
||||
elif verification.semantic_verified is False:
|
||||
_critic = f"semantic_fail: {verification.semantic_detail[:100]}"
|
||||
elif verification.verified:
|
||||
_critic = "pixel_ok"
|
||||
else:
|
||||
_critic = f"pixel_fail: {verification.detail[:100]}"
|
||||
|
||||
_audit_trail.record(AuditEntry(
|
||||
session_id=session_id,
|
||||
action_id=action_id,
|
||||
user_id=replay_state.get("params", {}).get("user_id", ""),
|
||||
user_name=replay_state.get("params", {}).get("user_name", ""),
|
||||
machine_id=replay_state.get("machine_id", ""),
|
||||
action_type=_action.get("type", ""),
|
||||
action_detail=_target_spec.get("by_text", "") or _action.get("intention", ""),
|
||||
target_app=_target_spec.get("window_title", ""),
|
||||
execution_mode=replay_state.get("params", {}).get("execution_mode", "autonomous"),
|
||||
result=_audit_result,
|
||||
resolution_method=result_entry.get("resolution_method", ""),
|
||||
critic_result=_critic,
|
||||
recovery_action=report.warning or "",
|
||||
domain=replay_state.get("params", {}).get("domain", ""),
|
||||
workflow_id=replay_state.get("workflow_id", ""),
|
||||
workflow_name=replay_state.get("params", {}).get("workflow_name", ""),
|
||||
duration_ms=result_entry.get("resolution_elapsed_ms", 0.0) or 0.0,
|
||||
))
|
||||
except Exception as e:
|
||||
logger.debug(f"Audit Trail: échec enregistrement: {e}")
|
||||
|
||||
with _replay_lock:
|
||||
# === Logique de retry / success / failure ===
|
||||
if report.success and (verification is None or verification.verified):
|
||||
# Action réussie (vérification OK ou pas de vérification)
|
||||
@@ -3861,6 +3954,225 @@ async def resolve_target(request: ResolveTargetRequest):
|
||||
pass
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Observer — Pré-analyse écran avant résolution
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class PreAnalyzeRequest(BaseModel):
|
||||
"""Requête de pré-analyse écran (Observer)."""
|
||||
screenshot_b64: str
|
||||
expected_state: str = "" # Description attendue de l'état écran
|
||||
window_title: str = "" # Titre fenêtre attendu
|
||||
screen_width: int = 1920
|
||||
screen_height: int = 1080
|
||||
|
||||
|
||||
@app.post("/api/v1/traces/stream/replay/pre_analyze")
|
||||
async def pre_analyze_screen(request: PreAnalyzeRequest):
|
||||
"""Observer : analyser l'écran AVANT la résolution de cible.
|
||||
|
||||
Détecte les popups, dialogues modaux, et états inattendus
|
||||
qui empêcheraient la résolution visuelle de fonctionner.
|
||||
|
||||
Retourne :
|
||||
- screen_state: "ok" | "popup" | "unexpected"
|
||||
- popup_label: texte du bouton popup à cliquer (si popup)
|
||||
- popup_coords: {x_pct, y_pct} du bouton (si popup)
|
||||
- detail: description du problème
|
||||
"""
|
||||
import asyncio
|
||||
import base64
|
||||
import io
|
||||
|
||||
from PIL import Image
|
||||
|
||||
try:
|
||||
img_bytes = base64.b64decode(request.screenshot_b64)
|
||||
img = Image.open(io.BytesIO(img_bytes))
|
||||
except Exception as e:
|
||||
return {"screen_state": "ok", "detail": f"decode error: {e}"}
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
result = await loop.run_in_executor(
|
||||
None,
|
||||
_pre_analyze_screen_sync,
|
||||
request.screenshot_b64,
|
||||
request.expected_state,
|
||||
request.window_title,
|
||||
request.screen_width,
|
||||
request.screen_height,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def _pre_analyze_screen_sync(
|
||||
screenshot_b64: str,
|
||||
expected_state: str,
|
||||
window_title: str,
|
||||
screen_width: int,
|
||||
screen_height: int,
|
||||
) -> Dict[str, Any]:
|
||||
"""Pré-analyse synchrone de l'écran via VLM.
|
||||
|
||||
Utilise gemma4 (Docker port 11435) pour détecter :
|
||||
1. Popups/dialogues modaux (avec coordonnées du bouton à cliquer)
|
||||
2. États incohérents avec l'attendu
|
||||
|
||||
Rapide (~2-5s) car gemma4 est léger et en mode texte+image.
|
||||
"""
|
||||
import os
|
||||
import time
|
||||
import requests as _requests
|
||||
|
||||
gemma4_port = os.environ.get("GEMMA4_PORT", "11435")
|
||||
gemma4_url = f"http://localhost:{gemma4_port}/api/chat"
|
||||
|
||||
# Charger le contexte métier pour l'Observer
|
||||
from .domain_context import get_domain_context
|
||||
domain = get_domain_context(os.environ.get("RPA_DOMAIN", "generic"))
|
||||
|
||||
# Prompt concis pour détection popup
|
||||
prompt = (
|
||||
"Regarde cette capture d'écran.\n"
|
||||
"Y a-t-il une popup, boîte de dialogue, message d'erreur, ou fenêtre modale visible ?\n\n"
|
||||
"Réponds EXACTEMENT dans ce format :\n"
|
||||
"ÉTAT: OK ou POPUP ou INATTENDU\n"
|
||||
"BOUTON: texte du bouton à cliquer (si POPUP, sinon 'aucun')\n"
|
||||
"DÉTAIL: description courte (1 ligne)"
|
||||
)
|
||||
|
||||
# Messages avec contexte métier
|
||||
messages = []
|
||||
if domain.system_prompt:
|
||||
messages.append({"role": "system", "content": domain.system_prompt})
|
||||
messages.append({"role": "user", "content": prompt, "images": [screenshot_b64]})
|
||||
|
||||
try:
|
||||
t_start = time.time()
|
||||
resp = _requests.post(
|
||||
gemma4_url,
|
||||
json={
|
||||
"model": "gemma4:e4b",
|
||||
"messages": messages,
|
||||
"stream": False,
|
||||
"think": True,
|
||||
"options": {"temperature": 0.1, "num_predict": 800},
|
||||
},
|
||||
timeout=30,
|
||||
)
|
||||
elapsed_ms = (time.time() - t_start) * 1000
|
||||
|
||||
if not resp.ok:
|
||||
logger.warning(f"Observer VLM HTTP {resp.status_code}")
|
||||
return {"screen_state": "ok", "detail": f"VLM HTTP {resp.status_code}"}
|
||||
|
||||
content = resp.json().get("message", {}).get("content", "").strip()
|
||||
logger.info(f"Observer VLM ({elapsed_ms:.0f}ms) : {content[:100]}")
|
||||
|
||||
# Parser la réponse
|
||||
state = "ok"
|
||||
button = ""
|
||||
detail = content
|
||||
|
||||
for line in content.split("\n"):
|
||||
line_clean = line.strip()
|
||||
upper = line_clean.upper()
|
||||
if upper.startswith("ÉTAT:") or upper.startswith("ETAT:"):
|
||||
val = upper.split(":", 1)[1].strip()
|
||||
if "POPUP" in val:
|
||||
state = "popup"
|
||||
elif "INATTENDU" in val or "UNEXPECTED" in val:
|
||||
state = "unexpected"
|
||||
else:
|
||||
state = "ok"
|
||||
elif upper.startswith("BOUTON:"):
|
||||
button = line_clean.split(":", 1)[1].strip().strip("'\"")
|
||||
if button.lower() in ("aucun", "none", "n/a", ""):
|
||||
button = ""
|
||||
elif upper.startswith("DÉTAIL:") or upper.startswith("DETAIL:"):
|
||||
detail = line_clean.split(":", 1)[1].strip()
|
||||
|
||||
if state == "ok":
|
||||
return {"screen_state": "ok"}
|
||||
|
||||
result = {
|
||||
"screen_state": state,
|
||||
"detail": detail,
|
||||
"elapsed_ms": round(elapsed_ms, 1),
|
||||
}
|
||||
|
||||
# Si popup détectée avec un texte de bouton, essayer de le localiser
|
||||
if state == "popup" and button:
|
||||
result["popup_label"] = button
|
||||
# Localiser le bouton par grounding VLM (qwen2.5vl)
|
||||
coords = _locate_popup_button(screenshot_b64, button, screen_width, screen_height)
|
||||
if coords:
|
||||
result["popup_coords"] = coords
|
||||
|
||||
return result
|
||||
|
||||
except _requests.Timeout:
|
||||
logger.debug("Observer VLM timeout (15s)")
|
||||
return {"screen_state": "ok", "detail": "VLM timeout"}
|
||||
except Exception as e:
|
||||
logger.debug(f"Observer VLM erreur : {e}")
|
||||
return {"screen_state": "ok", "detail": str(e)}
|
||||
|
||||
|
||||
def _locate_popup_button(
|
||||
screenshot_b64: str, button_text: str,
|
||||
screen_width: int, screen_height: int,
|
||||
) -> Optional[Dict[str, float]]:
|
||||
"""Localiser un bouton de popup par grounding VLM (qwen2.5vl).
|
||||
|
||||
Utilise le format bbox_2d natif de qwen2.5vl pour trouver
|
||||
la position exacte du bouton sur le screenshot.
|
||||
"""
|
||||
import requests as _requests
|
||||
import re
|
||||
|
||||
ollama_url = "http://localhost:11434/api/chat"
|
||||
prompt = f"Detect the button with text '{button_text}' with a bounding box."
|
||||
|
||||
try:
|
||||
resp = _requests.post(
|
||||
ollama_url,
|
||||
json={
|
||||
"model": "qwen2.5vl:7b",
|
||||
"messages": [{"role": "user", "content": prompt, "images": [screenshot_b64]}],
|
||||
"stream": False,
|
||||
"options": {"temperature": 0.1, "num_predict": 50},
|
||||
},
|
||||
timeout=15,
|
||||
)
|
||||
if not resp.ok:
|
||||
return None
|
||||
|
||||
content = resp.json().get("message", {}).get("content", "")
|
||||
|
||||
# Parser bbox_2d — qwen2.5vl retourne des coordonnées en pixels
|
||||
# relatifs à l'image envoyée, PAS sur une grille 1000x1000.
|
||||
# Format JSON : [{"bbox_2d": [x1, y1, x2, y2], "label": "..."}]
|
||||
bbox_match = re.search(
|
||||
r'"bbox_2d"\s*:\s*\[\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\]',
|
||||
content,
|
||||
)
|
||||
if bbox_match:
|
||||
x1, y1, x2, y2 = [int(bbox_match.group(i)) for i in range(1, 5)]
|
||||
# Normaliser par les dimensions de l'écran (pixels → 0-1)
|
||||
cx = (x1 + x2) / 2 / screen_width
|
||||
cy = (y1 + y2) / 2 / screen_height
|
||||
if 0.0 <= cx <= 1.0 and 0.0 <= cy <= 1.0:
|
||||
logger.info(f"Observer : bouton '{button_text}' localisé à ({cx:.3f}, {cy:.3f})")
|
||||
return {"x_pct": cx, "y_pct": cy}
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Observer grounding bouton erreur : {e}")
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_by_template_matching(
|
||||
screenshot_path: str,
|
||||
anchor_image_b64: str,
|
||||
@@ -5694,6 +6006,417 @@ async def import_learning_pack(body: LearningPackImportRequest, request: Request
|
||||
_global_faiss_index = None
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Endpoints Audit Trail — traçabilité complète des actions RPA
|
||||
# =========================================================================
|
||||
|
||||
@app.get("/api/v1/audit/history")
|
||||
async def audit_history(
|
||||
date_from: str = "",
|
||||
date_to: str = "",
|
||||
user_id: str = "",
|
||||
session_id: str = "",
|
||||
result: str = "",
|
||||
action_type: str = "",
|
||||
workflow_id: str = "",
|
||||
domain: str = "",
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
):
|
||||
"""
|
||||
Historique d'audit paginé avec filtres.
|
||||
|
||||
Paramètres query :
|
||||
date_from : date début (YYYY-MM-DD), défaut = aujourd'hui
|
||||
date_to : date fin (YYYY-MM-DD), défaut = date_from
|
||||
user_id : filtrer par identifiant TIM
|
||||
session_id: filtrer par session
|
||||
result : filtrer par résultat (success, failed, recovered, skipped)
|
||||
action_type: filtrer par type d'action (click, type, key_combo, etc.)
|
||||
workflow_id: filtrer par workflow
|
||||
domain : filtrer par domaine métier
|
||||
limit : nombre max de résultats (défaut 100, max 1000)
|
||||
offset : décalage pour la pagination
|
||||
|
||||
Retourne la liste des entrées triées par timestamp décroissant.
|
||||
"""
|
||||
# Borner le limit pour éviter les abus
|
||||
limit = min(max(1, limit), 1000)
|
||||
offset = max(0, offset)
|
||||
|
||||
entries = _audit_trail.query(
|
||||
date_from=date_from,
|
||||
date_to=date_to,
|
||||
user_id=user_id,
|
||||
session_id=session_id,
|
||||
result=result,
|
||||
action_type=action_type,
|
||||
workflow_id=workflow_id,
|
||||
domain=domain,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "ok",
|
||||
"count": len(entries),
|
||||
"offset": offset,
|
||||
"limit": limit,
|
||||
"entries": entries,
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/v1/audit/summary")
|
||||
async def audit_summary(
|
||||
date: str = "",
|
||||
):
|
||||
"""
|
||||
Résumé journalier de l'audit.
|
||||
|
||||
Paramètre query :
|
||||
date : date cible (YYYY-MM-DD), défaut = aujourd'hui
|
||||
|
||||
Retourne les statistiques agrégées : nombre d'actions, taux de succès,
|
||||
répartition par utilisateur, par résultat, par type, par workflow, par mode.
|
||||
"""
|
||||
summary = _audit_trail.get_summary(target_date=date)
|
||||
return {
|
||||
"status": "ok",
|
||||
**summary,
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/v1/audit/export")
|
||||
async def audit_export(
|
||||
date_from: str = "",
|
||||
date_to: str = "",
|
||||
user_id: str = "",
|
||||
session_id: str = "",
|
||||
):
|
||||
"""
|
||||
Export CSV de l'historique d'audit.
|
||||
|
||||
Paramètres query :
|
||||
date_from : date début (YYYY-MM-DD), défaut = aujourd'hui
|
||||
date_to : date fin (YYYY-MM-DD), défaut = date_from
|
||||
user_id : filtrer par identifiant TIM
|
||||
session_id : filtrer par session
|
||||
|
||||
Retourne le fichier CSV en texte brut (Content-Type: text/csv).
|
||||
"""
|
||||
from fastapi.responses import Response
|
||||
|
||||
csv_data = _audit_trail.export_csv(
|
||||
date_from=date_from,
|
||||
date_to=date_to,
|
||||
user_id=user_id,
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
if not csv_data:
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail="Aucune entrée d'audit trouvée pour les filtres spécifiés.",
|
||||
)
|
||||
|
||||
# Nom du fichier pour le téléchargement
|
||||
filename = f"audit_{date_from or 'today'}"
|
||||
if date_to and date_to != date_from:
|
||||
filename += f"_to_{date_to}"
|
||||
filename += ".csv"
|
||||
|
||||
return Response(
|
||||
content=csv_data,
|
||||
media_type="text/csv; charset=utf-8",
|
||||
headers={
|
||||
"Content-Disposition": f'attachment; filename="{filename}"',
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Task Planner — Comprendre et exécuter des ordres en langage naturel
|
||||
# =========================================================================
|
||||
|
||||
from .task_planner import TaskPlanner
|
||||
|
||||
_task_planner = TaskPlanner()
|
||||
|
||||
|
||||
class TaskRequest(BaseModel):
|
||||
"""Requête de tâche en langage naturel."""
|
||||
instruction: str # "Traite les dossiers de janvier"
|
||||
machine_id: str = "default" # Machine cible
|
||||
dry_run: bool = False # True = planifier sans exécuter
|
||||
|
||||
|
||||
@app.post("/api/v1/task")
|
||||
async def execute_task(request: TaskRequest):
|
||||
"""Exécuter une tâche décrite en langage naturel.
|
||||
|
||||
Léa comprend l'instruction, trouve le workflow correspondant,
|
||||
et l'exécute. C'est le point d'entrée principal pour l'utilisateur.
|
||||
|
||||
Exemples :
|
||||
- "Ouvre le bloc-notes et écris bonjour"
|
||||
- "Traite les dossiers de janvier"
|
||||
- "Recherche voiture électrique sur Google"
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
# 1. Lister les workflows disponibles
|
||||
workflows = _list_available_workflows()
|
||||
|
||||
# 2. Comprendre l'instruction
|
||||
loop = asyncio.get_event_loop()
|
||||
plan = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: _task_planner.understand(
|
||||
instruction=request.instruction,
|
||||
available_workflows=workflows,
|
||||
),
|
||||
)
|
||||
|
||||
if not plan.understood:
|
||||
return {
|
||||
"status": "not_understood",
|
||||
"instruction": request.instruction,
|
||||
"error": plan.error or "Instruction non comprise",
|
||||
"plan": plan.to_dict(),
|
||||
}
|
||||
|
||||
# 3. Dry run = retourner le plan sans exécuter
|
||||
if request.dry_run:
|
||||
return {
|
||||
"status": "planned",
|
||||
"instruction": request.instruction,
|
||||
"plan": plan.to_dict(),
|
||||
}
|
||||
|
||||
# 4. Exécuter
|
||||
def replay_callback(session_id="", machine_id="", params=None, actions=None, task_description=""):
|
||||
"""Callback pour lancer un replay depuis le planner."""
|
||||
if session_id:
|
||||
# Mode replay : relancer un workflow connu
|
||||
import requests as _req
|
||||
resp = _req.post(
|
||||
f"http://localhost:5005/api/v1/traces/stream/replay-session"
|
||||
f"?session_id={session_id}&machine_id={machine_id}",
|
||||
headers={"Authorization": f"Bearer {API_TOKEN}"},
|
||||
timeout=600,
|
||||
)
|
||||
if resp.ok:
|
||||
return resp.json().get("replay_id", "")
|
||||
raise Exception(f"Replay échoué: {resp.text[:200]}")
|
||||
elif actions:
|
||||
# Mode libre : actions planifiées
|
||||
import requests as _req
|
||||
resp = _req.post(
|
||||
f"http://localhost:5005/api/v1/traces/stream/replay/raw",
|
||||
json={
|
||||
"session_id": "",
|
||||
"actions": actions,
|
||||
"machine_id": machine_id,
|
||||
"task_description": task_description,
|
||||
},
|
||||
headers={"Authorization": f"Bearer {API_TOKEN}"},
|
||||
timeout=30,
|
||||
)
|
||||
if resp.ok:
|
||||
return resp.json().get("replay_id", "")
|
||||
raise Exception(f"Replay raw échoué: {resp.text[:200]}")
|
||||
|
||||
result = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: _task_planner.execute(
|
||||
plan=plan,
|
||||
replay_callback=replay_callback,
|
||||
machine_id=request.machine_id,
|
||||
),
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "executed" if result.success else "failed",
|
||||
"instruction": request.instruction,
|
||||
"plan": plan.to_dict(),
|
||||
"result": result.to_dict(),
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/v1/task/capabilities")
|
||||
async def list_capabilities():
|
||||
"""Lister ce que Léa sait faire (workflows appris)."""
|
||||
workflows = _list_available_workflows()
|
||||
return {
|
||||
"capabilities": _task_planner.list_capabilities(workflows),
|
||||
"workflows": workflows,
|
||||
"total": len(workflows),
|
||||
}
|
||||
|
||||
|
||||
def _list_available_workflows() -> List[Dict[str, Any]]:
|
||||
"""Lister les workflows/sessions disponibles pour le planner."""
|
||||
workflows = []
|
||||
|
||||
# Sessions enregistrées avec des événements
|
||||
try:
|
||||
sessions_dir = LIVE_SESSIONS_DIR
|
||||
for machine_dir in sessions_dir.iterdir():
|
||||
if not machine_dir.is_dir() or machine_dir.name.startswith((".", "embeddings", "streaming")):
|
||||
continue
|
||||
for session_dir in machine_dir.iterdir():
|
||||
if not session_dir.is_dir() or not session_dir.name.startswith("sess_"):
|
||||
continue
|
||||
events_file = session_dir / "live_events.jsonl"
|
||||
if events_file.is_file():
|
||||
# Extraire une description depuis les événements
|
||||
desc = _extract_session_description(events_file)
|
||||
workflows.append({
|
||||
"session_id": session_dir.name,
|
||||
"name": desc.get("name", session_dir.name),
|
||||
"description": desc.get("description", ""),
|
||||
"machine": machine_dir.name,
|
||||
"event_count": desc.get("event_count", 0),
|
||||
})
|
||||
except Exception as e:
|
||||
logger.debug(f"Erreur listage workflows: {e}")
|
||||
|
||||
return workflows
|
||||
|
||||
|
||||
def _extract_session_description(events_file) -> Dict[str, Any]:
|
||||
"""Extraire une description métier d'une session depuis ses événements.
|
||||
|
||||
Analyse les événements pour produire une description sémantique
|
||||
(pas juste une liste d'apps) qui aide au matching par le TaskPlanner.
|
||||
|
||||
Exemples de descriptions produites :
|
||||
- "Ouvrir Bloc-notes via Exécuter (Win+R) et écrire du texte"
|
||||
- "Naviguer dans l'Explorateur de fichiers et ouvrir des images"
|
||||
- "Utiliser cmd.exe pour exécuter des commandes"
|
||||
"""
|
||||
try:
|
||||
apps = set()
|
||||
app_names = set() # Noms d'applications (partie droite du titre)
|
||||
typed_texts = [] # Texte saisi par l'utilisateur
|
||||
key_combos = [] # Raccourcis clavier utilisés
|
||||
event_types = {} # Compteur par type d'événement
|
||||
window_sequence = [] # Séquence des fenêtres visitées (pour le flux)
|
||||
event_count = 0
|
||||
|
||||
with open(events_file) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
event_count += 1
|
||||
if event_count > 100: # Lire plus pour mieux comprendre
|
||||
break
|
||||
try:
|
||||
obj = json.loads(line)
|
||||
evt = obj.get("event", obj)
|
||||
evt_type = evt.get("type", "")
|
||||
|
||||
# Compter les types d'événements
|
||||
event_types[evt_type] = event_types.get(evt_type, 0) + 1
|
||||
|
||||
# Collecter les fenêtres
|
||||
title = evt.get("window", {}).get("title", "")
|
||||
if title and title not in ("unknown_window", "Program Manager"):
|
||||
if title not in window_sequence[-1:]:
|
||||
window_sequence.append(title)
|
||||
# Extraire le nom de l'app (partie droite du titre)
|
||||
for sep in [" – ", " - ", " — "]:
|
||||
if sep in title:
|
||||
app_name = title.split(sep)[-1].strip()
|
||||
app_names.add(app_name)
|
||||
apps.add(title)
|
||||
break
|
||||
else:
|
||||
app_names.add(title[:30])
|
||||
apps.add(title[:30])
|
||||
|
||||
# Collecter le texte saisi
|
||||
if evt_type == "text_input":
|
||||
text = evt.get("text", "")
|
||||
if text and len(text) > 1:
|
||||
typed_texts.append(text)
|
||||
|
||||
# Collecter les raccourcis clavier
|
||||
if evt_type == "key_combo":
|
||||
keys = evt.get("keys", [])
|
||||
if keys:
|
||||
key_combos.append("+".join(keys))
|
||||
|
||||
# Changement de fenêtre → flux
|
||||
if evt_type == "window_focus_change":
|
||||
to_title = evt.get("to", {}).get("title", "")
|
||||
if to_title and to_title not in ("unknown_window", "Program Manager"):
|
||||
if to_title not in window_sequence[-1:]:
|
||||
window_sequence.append(to_title)
|
||||
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
# --- Construire la description sémantique ---
|
||||
apps_list = sorted(app_names)[:5]
|
||||
apps_str = ", ".join(apps_list)
|
||||
|
||||
# Construire une description orientée action
|
||||
desc_parts = []
|
||||
|
||||
# Détecter les patterns courants
|
||||
has_run_dialog = any("Exécuter" in w for w in window_sequence)
|
||||
has_search = any("Rechercher" in w or "Recherche" in w for w in window_sequence)
|
||||
has_win_r = "win+r" in [k.lower() for k in key_combos]
|
||||
has_win_s = "win+s" in [k.lower() for k in key_combos]
|
||||
|
||||
# Applications principales utilisées (en dehors des launchers)
|
||||
main_apps = [a for a in apps_list if a not in ("Exécuter", "Rechercher")]
|
||||
launcher = ""
|
||||
if has_run_dialog or has_win_r:
|
||||
launcher = "via Exécuter (Win+R)"
|
||||
elif has_search or has_win_s:
|
||||
launcher = "via la recherche Windows"
|
||||
|
||||
if main_apps:
|
||||
verb = "Ouvrir" if launcher else "Utiliser"
|
||||
desc_parts.append(f"{verb} {', '.join(main_apps)} {launcher}".strip())
|
||||
elif launcher:
|
||||
desc_parts.append(f"Lancer une application {launcher}")
|
||||
|
||||
# Texte saisi
|
||||
total_typed = "".join(typed_texts)
|
||||
if len(total_typed) > 5:
|
||||
desc_parts.append("écrire du texte")
|
||||
elif typed_texts:
|
||||
desc_parts.append(f"saisir '{total_typed[:30]}'")
|
||||
|
||||
# Raccourcis clavier notables
|
||||
notable_combos = [k for k in key_combos if k.lower() not in ("win+r", "win+s")]
|
||||
if notable_combos:
|
||||
combos_str = ", ".join(sorted(set(notable_combos))[:3])
|
||||
desc_parts.append(f"raccourcis : {combos_str}")
|
||||
|
||||
# Nombre de clics
|
||||
click_count = event_types.get("mouse_click", 0)
|
||||
if click_count > 5:
|
||||
desc_parts.append(f"{click_count} clics")
|
||||
|
||||
description = " et ".join(desc_parts) if desc_parts else f"Workflow avec {apps_str}"
|
||||
name = apps_str or "Session sans nom"
|
||||
|
||||
return {
|
||||
"name": name,
|
||||
"description": description,
|
||||
"event_count": event_count,
|
||||
"apps": apps_list,
|
||||
"typed_text_preview": total_typed[:50] if typed_texts else "",
|
||||
}
|
||||
except Exception:
|
||||
return {"name": "?", "description": "", "event_count": 0}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
|
||||
|
||||
Reference in New Issue
Block a user