Files
Geniusia_v2/geniusia2/core/orchestrator.py
2026-03-05 00:20:25 +01:00

2258 lines
85 KiB
Python

"""
Orchestrateur - Boucle cognitive principale pour RPA Vision V2
Implémente le paradigme Observer → Réfléchir → Agir → Apprendre
"""
import time
import signal
import threading
from typing import Dict, Any, Optional, List
from datetime import datetime
import numpy as np
from .learning_manager import LearningManager
from .utils.vision_utils import VisionUtils
from .llm_manager import LLMManager
from .utils.image_utils import capture_screen, get_active_window
from .utils.input_utils import InputUtils
from .logger import Logger
from .models import Action, Detection
from .config import get_config, get_performance_config
from .whitelist_manager import WhitelistManager
from .event_capture import EventCapture
from .vision_analysis import VisionAnalysis
from .vision_search import VisionSearch
from .suggestion_manager import SuggestionManager
from .task_replay import TaskReplayEngine
from .session_manager import SessionManager
from .workflow_detector import WorkflowDetector
from .enhanced_workflow_matcher import EnhancedWorkflowMatcher
from .multimodal_embedding_manager import MultiModalEmbeddingManager
from .enriched_screen_capture import EnrichedScreenCapture
# New embedding system
from .embedders import (
EmbeddingManager as NewEmbeddingManager,
FAISSIndex,
LightweightFineTuner
)
from PIL import Image
import cv2
import os
class Orchestrator:
"""
Orchestrateur principal implémentant la boucle cognitive
Observer → Réfléchir → Agir → Apprendre → Répéter
"""
def __init__(
self,
learning_manager: LearningManager,
vision_utils: VisionUtils,
llm_manager: LLMManager,
logger: Logger,
gui=None,
config: Optional[Dict[str, Any]] = None,
whitelist_manager: Optional[WhitelistManager] = None,
input_utils: Optional[InputUtils] = None
):
"""
Initialise l'orchestrateur avec tous les composants nécessaires
Args:
learning_manager: Gestionnaire d'apprentissage
vision_utils: Utilitaires de vision pour détection UI
llm_manager: Gestionnaire LLM pour raisonnement
logger: Logger pour journalisation
gui: Interface graphique (optionnel)
config: Configuration globale (utilise CONFIG par défaut si None)
whitelist_manager: Gestionnaire de liste blanche (crée un nouveau si None)
input_utils: Utilitaires d'entrée (crée un nouveau si None)
"""
self.learning_manager = learning_manager
self.vision = vision_utils
self.llm = llm_manager
self.logger = logger
self.gui = gui
self.config = config or get_config()
self.perf_config = get_performance_config()
# Utilitaires d'entrée pour exécution d'actions
self.input_utils = input_utils or InputUtils(logger, self.config)
# État de la boucle cognitive
self.running = False
self.paused = False
self._stop_event = threading.Event()
self._pause_event = threading.Event()
# Contexte actuel
self.current_context: Dict[str, Any] = {}
self.current_frame: Optional[np.ndarray] = None
self.current_window: str = ""
# Gestionnaire de liste blanche
self.whitelist_manager = whitelist_manager or WhitelistManager(logger=logger)
self.enforce_whitelist = self.config["security"].get("enforce_whitelist", True)
# Gestionnaire de sessions pour segmentation d'actions
self.session_manager = SessionManager(
logger=logger,
config=self.config
)
self.session_manager.on_session_completed = self._on_session_completed
# Détecteur de workflows pour patterns répétitifs
self.workflow_detector = WorkflowDetector(
logger=logger,
config=self.config
)
self.workflow_detector.on_workflow_detected = self._on_workflow_detected
# Capture d'événements utilisateur (utilise nos instances)
self.event_capture = EventCapture(
logger=logger,
config=self.config
)
# Remplacer les instances par défaut par les nôtres
self.event_capture.session_manager = self.session_manager
self.event_capture.workflow_detector = self.workflow_detector
self.event_capture.register_pattern_callback(self._on_pattern_detected)
# Analyse visuelle des actions
self.vision_analyzer = VisionAnalysis(
embeddings_manager=learning_manager.embeddings_manager,
vision_utils=vision_utils,
llm_manager=llm_manager,
logger=logger
)
# Recherche visuelle pour le rejeu
self.vision_search = VisionSearch(
embeddings_manager=learning_manager.embeddings_manager,
logger=logger
)
# Nouveau système d'embeddings
self._init_new_embedding_system()
# Gestionnaire d'embeddings multi-modaux pour le matching amélioré
self.multimodal_manager = MultiModalEmbeddingManager(
logger=logger,
data_dir=self.config.get("data_dir", "data")
)
# Système de capture d'écran enrichi (UI Element Detection)
ui_detection_mode = self.config.get("ui_detection", {}).get("mode", "light")
self.enriched_capture = EnrichedScreenCapture(
logger=logger,
data_dir=self.config.get("data_dir", "data"),
mode=ui_detection_mode,
config={
"ui_detector": self.config.get("ui_detection", {}).get("detector", {}),
"multimodal_embedding": self.config.get("multimodal_embedding", {}),
"enhanced_matcher": self.config.get("enhanced_matcher", {})
}
)
logger.log_action({
"action": "enriched_capture_initialized",
"mode": ui_detection_mode
})
# Matcher de workflows amélioré avec embeddings multi-modaux
matcher_config = {
"screen_weight": self.config.get("enhanced_matcher", {}).get("screen_weight", 0.6),
"elements_weight": self.config.get("enhanced_matcher", {}).get("elements_weight", 0.4),
"min_similarity_threshold": self.config.get("enhanced_matcher", {}).get("min_similarity_threshold", 0.3),
"min_confidence_threshold": self.config.get("enhanced_matcher", {}).get("min_confidence_threshold", 0.5)
}
self.enhanced_matcher = EnhancedWorkflowMatcher(
multimodal_manager=self.multimodal_manager,
logger=logger,
config=matcher_config
)
# Gestionnaire de suggestions (Mode Assisté)
self.suggestion_manager = SuggestionManager(
learning_manager=learning_manager,
embeddings_manager=learning_manager.embeddings_manager,
logger=logger,
config=self.config
)
# Moteur de rejeu pour exécution des suggestions
self.replay_engine = TaskReplayEngine(
learning_manager=learning_manager,
embeddings_manager=learning_manager.embeddings_manager,
vision_utils=vision_utils,
input_utils=self.input_utils,
logger=logger,
config=self.config
)
# Connecter les callbacks du suggestion_manager
self.suggestion_manager.on_suggestion_created = self._on_suggestion_created
self.suggestion_manager.on_suggestion_accepted = self._on_suggestion_accepted
self.suggestion_manager.on_suggestion_rejected = self._on_suggestion_rejected
self.suggestion_manager.on_suggestion_timeout = self._on_suggestion_timeout
# Métriques de performance
self.metrics = {
"total_cycles": 0,
"avg_latency_ms": 0.0,
"detections_count": 0,
"actions_executed": 0,
"actions_suggested": 0,
"sessions_created": 0,
"workflows_detected": 0,
}
# Configurer l'arrêt d'urgence (Ctrl+Pause)
self._setup_emergency_stop()
# Mode progressif (démarre en shadow, propose assist après patterns)
self._progressive_mode = False
self._assist_proposed = False
self.logger.log_action({
"action": "orchestrator_initialized",
"enforce_whitelist": self.enforce_whitelist,
"whitelist_size": len(self.whitelist_manager.get_whitelist())
})
def _init_new_embedding_system(self):
"""
Initialise le nouveau système d'embeddings avec FAISS et fine-tuning.
"""
# Initialiser à None par défaut
self.new_embedding_manager = None
self.faiss_index = None
self.fine_tuner = None
try:
# 1. Initialiser l'EmbeddingManager
model_name = self.config.get("embedding", {}).get("model", "clip")
cache_size = self.config.get("embedding", {}).get("cache_size", 1000)
device = self.config.get("embedding", {}).get("device", "cpu")
self.new_embedding_manager = NewEmbeddingManager(
model_name=model_name,
cache_size=cache_size,
device=device,
fallback_enabled=True
)
self.logger.log_action({
"action": "new_embedding_manager_initialized",
"model": model_name,
"dimension": self.new_embedding_manager.get_dimension(),
"device": device
})
# 2. Initialiser l'index FAISS
try:
index_path = self.config.get("faiss", {}).get("index_path", "data/workflow_embeddings")
dimension = self.new_embedding_manager.get_dimension()
self.faiss_index = FAISSIndex(dimension=dimension)
# Charger l'index existant si disponible
if os.path.exists(f"{index_path}.index"):
try:
self.faiss_index.load(index_path)
self.logger.log_action({
"action": "faiss_index_loaded",
"path": index_path,
"num_embeddings": self.faiss_index.get_stats()["num_embeddings"]
})
except Exception as e:
self.logger.log_action({
"action": "faiss_index_load_failed",
"error": str(e),
"creating_new": True
})
else:
self.logger.log_action({
"action": "faiss_index_created_new",
"dimension": dimension
})
# Sauvegarder le chemin pour cleanup
self._faiss_index_path = index_path
except Exception as e:
self.logger.log_action({
"action": "faiss_index_init_failed",
"error": str(e)
})
self.faiss_index = None
# 3. Initialiser le fine-tuner
try:
fine_tuning_config = self.config.get("fine_tuning", {})
if fine_tuning_config.get("enabled", True):
trigger_threshold = fine_tuning_config.get("trigger_threshold", 10)
max_examples = fine_tuning_config.get("max_examples", 1000)
checkpoint_dir = fine_tuning_config.get("checkpoint_dir", "data/fine_tuning")
# Créer le répertoire si nécessaire
os.makedirs(checkpoint_dir, exist_ok=True)
self.fine_tuner = LightweightFineTuner(
embedder=self.new_embedding_manager.embedder,
trigger_threshold=trigger_threshold,
max_examples=max_examples
)
# Charger le checkpoint si disponible
checkpoint_name = "orchestrator_finetuning"
checkpoint_path = os.path.join(checkpoint_dir, f"{checkpoint_name}.pkl")
if os.path.exists(checkpoint_path):
try:
self.fine_tuner.load_checkpoint(checkpoint_name)
self.logger.log_action({
"action": "fine_tuner_checkpoint_loaded",
"path": checkpoint_path,
"stats": self.fine_tuner.get_stats()
})
except Exception as e:
self.logger.log_action({
"action": "fine_tuner_checkpoint_load_failed",
"error": str(e)
})
# Sauvegarder le nom pour cleanup
self._fine_tuner_checkpoint_name = checkpoint_name
self.logger.log_action({
"action": "fine_tuner_initialized",
"trigger_threshold": trigger_threshold,
"max_examples": max_examples
})
else:
self.fine_tuner = None
self._fine_tuner_checkpoint_name = None
self.logger.log_action({
"action": "fine_tuner_disabled"
})
except Exception as e:
self.logger.log_action({
"action": "fine_tuner_init_failed",
"error": str(e)
})
self.fine_tuner = None
self._fine_tuner_checkpoint_name = None
except Exception as e:
self.logger.log_action({
"action": "new_embedding_system_init_failed",
"error": str(e)
})
import traceback
traceback.print_exc()
# Fallback: désactiver le nouveau système
self.new_embedding_manager = None
self.faiss_index = None
self.fine_tuner = None
self._faiss_index_path = None
self._fine_tuner_checkpoint_name = None
def _setup_emergency_stop(self):
"""Configure le gestionnaire d'arrêt d'urgence Ctrl+Pause"""
def emergency_stop_handler(signum, frame):
"""Gestionnaire pour arrêt d'urgence"""
self.logger.log_security_event({
"event_type": "emergency_stop",
"window": self.current_window,
"details": "Arrêt d'urgence déclenché par l'utilisateur"
})
self.stop()
# Sur Linux/Mac, utiliser SIGINT (Ctrl+C) comme arrêt d'urgence
# Note: Ctrl+Pause n'est pas standard sur tous les OS
try:
signal.signal(signal.SIGINT, emergency_stop_handler)
except Exception as e:
self.logger.log_action({
"action": "emergency_stop_setup_failed",
"error": str(e)
})
def _generate_task_description_with_llm(
self,
events: List[Dict[str, Any]],
signatures: List[Dict[str, Any]],
pattern: Dict[str, Any]
) -> str:
"""
Utilise Qwen3-VL pour générer une description intelligente de la tâche.
Args:
events: Liste des événements capturés
signatures: Signatures visuelles créées
pattern: Pattern détecté
Returns:
Description générée par le LLM
"""
try:
# Préparer les screenshots pour le LLM
screenshots = [event['screenshot'] for event in events if event.get('screenshot') is not None]
if not screenshots:
return f"Tâche dans {pattern.get('window', 'Unknown')}"
# Vérifier que le LLM est disponible
if not self.llm or not self.llm.is_available():
print(f" ⚠️ Qwen3-VL non disponible (Ollama arrêté ?)")
return f"Tâche dans {pattern.get('window', 'Unknown Window')}"
# Créer un prompt pour Qwen3-VL
window = pattern.get('window', 'Unknown Window')
if not window or window.strip() == "":
window = "Unknown Window"
repetitions = pattern.get('repetitions', 0)
action_type = events[0].get('type', 'action') if events else 'action'
print(f" 📝 Contexte: {window}, {action_type}, {repetitions}x")
# Génération avec vision (Gemma3 n'a pas de mode thinking)
action_fr = "Clic" if action_type == "mouse_click" else "Défilement" if action_type == "scroll" else "Touche" if action_type == "key_press" else action_type
# Prompt simple et direct pour Gemma3
prompt = f"""Regarde cette image. L'utilisateur a fait: {action_fr} ({repetitions} fois) dans {window}
Donne un nom court pour cette action en 2-3 mots maximum.
Exemples: "Rafraîchir page", "Ouvrir menu", "Fermer fenêtre"
Nom:"""
print(f" 🔄 Appel Gemma3 avec vision...")
# Appeler avec une image
response = self.llm.generate_with_vision(
prompt=prompt,
images=screenshots[:1] # Une image suffit
)
print(f" ✅ Réponse reçue (brute): '{response}'")
print(f" 📏 Longueur: {len(response)} caractères")
print(f" 🔤 Type: {type(response)}")
# Nettoyer la réponse
description = response.strip()
print(f" 🧹 Après strip: '{description}' (longueur: {len(description)})")
# Enlever la ponctuation finale
description = description.rstrip('.!?')
# Prendre seulement les premiers mots si trop long
if len(description) > 60:
words = description.split()
description = ' '.join(words[:5]) # Garder 5 premiers mots max
if len(description) > 60:
description = description[:57] + "..."
# Si la réponse est vide ou invalide, fallback
if not description or len(description) < 3:
window_name = pattern.get('window', 'Unknown Window')
if not window_name or window_name.strip() == "":
window_name = "Unknown Window"
description = f"Tâche dans {window_name}"
print(f" ⚠️ Réponse LLM invalide, fallback: {description}")
else:
print(f" ✓ Description générée: {description}")
return description
except Exception as e:
print(f" ❌ Erreur LLM: {e}")
import traceback
traceback.print_exc()
# Fallback vers une description simple
window_name = pattern.get('window', 'Unknown Window')
if not window_name or window_name.strip() == "":
window_name = "Unknown Window"
return f"Tâche dans {window_name}"
def _on_session_completed(self, session):
"""
Callback appelé quand une session est complétée.
Passe la session au WorkflowDetector pour analyse.
Args:
session: Session complétée
"""
self.logger.log_action({
"action": "session_completed",
"session_id": session.session_id,
"action_count": session.action_count,
"duration_seconds": session.duration.total_seconds(),
"window": session.window
})
self.metrics["sessions_created"] += 1
# Analyser la session pour détecter des workflows
try:
self.workflow_detector.analyze_session(session)
except Exception as e:
self.logger.log_action({
"action": "workflow_analysis_failed",
"session_id": session.session_id,
"error": str(e)
})
def _on_workflow_detected(self, workflow: Dict[str, Any]):
"""
Callback appelé quand un workflow est détecté.
Notifie le SuggestionManager et persiste le workflow.
Args:
workflow: Workflow détecté
"""
self.logger.log_action({
"action": "workflow_detected",
"workflow_id": workflow.get("workflow_id"),
"workflow_name": workflow.get("name"),
"pattern_length": len(workflow.get("pattern", [])),
"confidence": workflow.get("confidence"),
"repetitions": workflow.get("repetitions")
})
self.metrics["workflows_detected"] += 1
# Indexer le workflow dans FAISS (nouveau système)
self._index_workflow_in_faiss(workflow)
# Notifier le SuggestionManager
try:
self.suggestion_manager.on_workflow_detected(workflow)
except Exception as e:
self.logger.log_action({
"action": "workflow_notification_failed",
"workflow_id": workflow.get("workflow_id"),
"error": str(e)
})
# Afficher notification dans la GUI
if self.gui:
workflow_name = workflow.get("name", "Workflow détecté")
confidence = workflow.get("confidence", 0.0)
if hasattr(self, 'log_to_gui'):
self.log_to_gui(
"🎯",
f"Workflow détecté: {workflow_name} (confiance: {confidence:.0%})",
"success"
)
def _on_pattern_detected(self, pattern: Dict[str, Any]):
"""
Callback appelé quand un pattern répétitif est détecté.
Analyse les actions et crée une tâche apprise.
"""
print(f"\n{'='*60}")
print(f"🎯 PATTERN DÉTECTÉ DANS ORCHESTRATOR !")
print(f"{'='*60}")
print(f"Répétitions: {pattern['repetitions']}")
print(f"Longueur: {pattern['length']}")
print(f"Fenêtre: {pattern.get('window', 'Unknown')}")
# LOG GUI: Pattern détecté
if hasattr(self, 'log_to_gui'):
self.log_to_gui("🎯", f"Pattern détecté ! ({pattern['repetitions']} répétitions)", "success")
try:
# 1. Récupérer les derniers événements avec screenshots
print(f"🔍 Récupération des screenshots...")
last_events = self.event_capture.get_last_screenshots(3)
print(f"📊 Screenshots trouvés: {len(last_events)}")
if len(last_events) < 3:
print(f"⚠️ Pas assez de screenshots capturés (besoin de 3, trouvé {len(last_events)})")
print(f"💡 Astuce: Les screenshots sont capturés seulement pour les clics et certaines touches")
return
print(f"📸 Analyse de {len(last_events)} actions...")
# 2. Analyser chaque action avec l'IA
signatures = []
for i, event in enumerate(last_events):
print(f" 🔬 Analyse action {i+1}/{len(last_events)}...")
try:
signature = self.vision_analyzer.analyze_action(
screenshot=event['screenshot'],
x=event.get('x', 0),
y=event.get('y', 0),
action_type=event['type'],
window=event['window']
)
signatures.append(signature)
print(f" ✓ Signature créée")
except Exception as e:
print(f" ❌ Erreur: {e}")
import traceback
traceback.print_exc()
if not signatures:
print(" ❌ Échec de l'analyse - aucune signature créée")
return
print(f"{len(signatures)} signatures créées")
# 3. Utiliser Qwen3-VL pour générer une description intelligente
print(f" 🤖 Analyse avec Qwen3-VL...")
task_description = self._generate_task_description_with_llm(
last_events,
signatures,
pattern
)
# 4. Créer une tâche
task = self.learning_manager.create_task_from_signatures(
signatures,
description=task_description
)
print(f" ✅ Tâche créée : {task.task_id}")
print(f" 📝 Description : {task.task_name}")
# LOG GUI: Tâche créée
if hasattr(self, 'log_to_gui'):
self.log_to_gui("📚", f"Tâche apprise : {task.task_name}", "info")
# Mettre à jour les stats (on compte les tâches comme workflows)
if hasattr(self, 'update_gui_stats'):
workflows_count = len(self.learning_manager.get_all_tasks())
self.update_gui_stats(workflows_count=workflows_count)
# 4. Proposer de basculer en mode assist si en mode progressif
current_mode = self.learning_manager.get_mode()
print(f"\n🔍 DEBUG: Mode actuel = {current_mode}")
print(f"🔍 DEBUG: Progressive mode = {getattr(self, '_progressive_mode', False)}")
print(f"🔍 DEBUG: Assist proposed = {getattr(self, '_assist_proposed', False)}")
if current_mode == "shadow" and hasattr(self, '_progressive_mode') and self._progressive_mode:
print(f"✅ Conditions remplies pour proposer le mode assist!")
self._propose_assist_mode(task)
else:
print(f"❌ Conditions NON remplies pour proposer le mode assist")
# 5. Notifier la GUI
if hasattr(self, 'log_to_gui'):
remaining = max(0, 20 - task.observation_count)
self.log_to_gui(
"🎉",
f"Tâche apprise : {task.task_name} (encore {remaining} répétitions pour rejeu)",
"success"
)
except Exception as e:
print(f"\n❌ ERREUR DANS LE CALLBACK:")
print(f" {e}")
import traceback
traceback.print_exc()
def _extract_intent_from_pattern(self, pattern: Dict[str, Any]) -> Optional[str]:
"""
Extrait une intention détectable du pattern.
Returns:
Terme détectable par les modèles de vision (button, icon, text, etc.)
"""
sequence = pattern.get("sequence", [])
if not sequence:
return None
# Analyser le type d'actions dans la séquence
has_clicks = any(e["type"] == "click" for e in sequence)
has_keys = any(e["type"] == "key" for e in sequence)
# Déterminer l'intention basée sur les actions
if has_clicks and not has_keys:
return "button" # Probablement des clics sur des boutons
elif has_keys and not has_clicks:
return "text field" # Probablement de la saisie de texte
elif has_clicks and has_keys:
return "form" # Probablement un formulaire
return "button" # Par défaut
def replay_task(self, task_id: str) -> bool:
"""
Rejoue une tâche apprise.
Args:
task_id: ID de la tâche à rejouer
Returns:
True si succès, False sinon
"""
# Charger la tâche
task = self.learning_manager.load_task(task_id)
if not task or not hasattr(task, 'signatures'):
print(f"❌ Tâche {task_id} introuvable")
return False
print(f"🎬 Rejeu de la tâche : {task.description}")
# Rejouer chaque action
for i, signature in enumerate(task.signatures):
print(f" Action {i+1}/{len(task.signatures)}...")
# 1. Capturer l'écran actuel
current_screenshot = capture_screen()
# 2. Trouver l'élément visuellement
result = self.vision_search.find_element(
current_screenshot,
signature,
confidence_threshold=0.8
)
if result:
x, y, confidence = result
print(f" ✓ Élément trouvé à ({x}, {y}) - confiance: {confidence:.2f}")
# 3. Exécuter l'action
action_type = signature.get('action_type', 'mouse_click')
try:
if action_type == 'mouse_click':
self.input_utils.click(x, y)
elif action_type == 'scroll':
dy = signature.get('dy', 0)
self.input_utils.scroll(dy)
elif action_type == 'key_press':
key = signature.get('key', '')
self.input_utils.type_text(key)
print(f" ✓ Action exécutée")
# Petite pause entre les actions
import time
time.sleep(0.5)
except Exception as e:
print(f" ❌ Échec de l'exécution: {e}")
return False
else:
print(f" ❌ Élément introuvable")
# Demander confirmation utilisateur
if hasattr(self, 'log_to_gui'):
self.log_to_gui(
"⚠️",
f"Élément introuvable pour l'action {i+1}",
"warning"
)
return False
print(f"✅ Tâche rejouée avec succès !")
return True
def run(self):
"""
Boucle cognitive principale
Exécute continuellement: Observer → Réfléchir → Agir → Apprendre
"""
self.running = True
self._stop_event.clear()
# Démarrer la capture d'événements
self.event_capture.start()
self.logger.log_action({
"action": "orchestrator_started",
"mode": self.learning_manager.get_mode()
})
# LOG GUI: Démarrage de l'observation
if hasattr(self, 'log_to_gui'):
self.log_to_gui("🚀", "Démarrage de l'observation...", "info")
self.log_to_gui("👀", "Observation active - En attente d'actions...", "success")
# Compteur pour mise à jour périodique de la GUI
last_event_count = 0
gui_update_counter = 0
try:
while self.running and not self._stop_event.is_set():
# Vérifier si en pause
if self.paused:
time.sleep(0.1)
continue
cycle_start = time.time()
# Mettre à jour les stats GUI périodiquement (toutes les 10 itérations)
gui_update_counter += 1
if gui_update_counter >= 10 and hasattr(self, 'update_gui_stats'):
current_event_count = len(self.event_capture.events)
if current_event_count != last_event_count:
# Nouvelles actions détectées
workflows_count = len(self.learning_manager.get_all_tasks())
patterns_count = len(self.workflow_detector.detected_workflows) if hasattr(self.workflow_detector, 'detected_workflows') else 0
self.update_gui_stats(
actions_count=current_event_count,
patterns_count=patterns_count,
workflows_count=workflows_count
)
# Log seulement si de nouvelles actions
if current_event_count > last_event_count and hasattr(self, 'log_to_gui'):
new_actions = current_event_count - last_event_count
if new_actions > 0:
window = self.current_window or "Application"
self.log_to_gui("👀", f"{new_actions} action(s) observée(s) dans {window}", "info")
last_event_count = current_event_count
gui_update_counter = 0
try:
# 1. OBSERVER - Capturer le contexte actuel
context = self.capture_context()
if not context:
# Pas de contexte valide, attendre
time.sleep(0.5)
continue
# 1.5. VÉRIFIER LES SUGGESTIONS (Mode Assisté)
# Vérifier périodiquement s'il faut suggérer une action
try:
self.check_for_suggestions()
except Exception as e:
self.logger.log_action({
"action": "suggestion_check_error",
"error": str(e)
})
# 2. RÉFLÉCHIR - Détecter les éléments et raisonner
intent = self.learning_manager.get_current_intent()
# En mode Shadow sans intention, on observe passivement
# Les actions utilisateur seront capturées via les hooks système
# et enregistrées pour l'apprentissage
if not intent:
# Pas d'intention définie, on attend les actions utilisateur
# En mode Shadow, on n'essaie pas de détecter proactivement
time.sleep(0.1)
continue
# Si une intention est définie, détecter et agir
if intent:
detections = self.detect_elements(context["frame"], intent)
if detections:
decision = self.reason_about_action(detections, context)
# 3. AGIR - Exécuter ou suggérer selon le mode
self.execute_or_suggest(decision)
# 4. APPRENDRE - Mettre à jour l'état d'apprentissage
# (géré dans execute_or_suggest via les callbacks)
# Calculer la latence du cycle
cycle_time = (time.time() - cycle_start) * 1000 # ms
self._update_metrics(cycle_time)
# Vérifier les seuils de performance
if cycle_time > self.perf_config["max_latency_ms"]:
self.logger.log_action({
"action": "performance_warning",
"cycle_time_ms": cycle_time,
"threshold_ms": self.perf_config["max_latency_ms"]
})
# Petite pause pour éviter de surcharger le CPU
time.sleep(0.1)
except Exception as e:
self.logger.log_action({
"action": "cycle_error",
"error": str(e),
"window": self.current_window
})
time.sleep(1) # Pause plus longue en cas d'erreur
finally:
self.running = False
self.logger.log_action({
"action": "orchestrator_stopped",
"total_cycles": self.metrics["total_cycles"],
"avg_latency_ms": self.metrics["avg_latency_ms"]
})
def capture_context(self) -> Optional[Dict[str, Any]]:
"""
Capture le contexte actuel (écran et fenêtre active)
Returns:
Dictionnaire contenant frame, window_title et timestamp
ou None si la capture échoue
"""
try:
# Capturer l'écran
frame = capture_screen()
# Obtenir la fenêtre active
window_title = get_active_window()
# Mettre à jour l'état actuel
self.current_frame = frame
self.current_window = window_title
# Créer le contexte
context = {
"frame": frame,
"window_title": window_title,
"timestamp": datetime.now(),
"frame_shape": frame.shape
}
self.current_context = context
return context
except Exception as e:
self.logger.log_action({
"action": "capture_context_error",
"error": str(e)
})
return None
def detect_elements(self, frame: np.ndarray, intent: str) -> List[Detection]:
"""
Détecte les éléments UI dans le frame basé sur l'intention
Args:
frame: Image de l'écran
intent: Intention utilisateur (ex: "cliquer sur valider")
Returns:
Liste de détections trouvées
"""
try:
# Extraire les mots-clés de l'intention pour la détection
# Pour l'instant, utiliser l'intention complète comme prompt
prompt = intent
# Détecter avec fallback automatique entre modèles
detections = self.vision.detect(prompt, frame)
# Filtrer et fusionner les détections
if detections:
detections = self.vision.filter_detections(
detections,
min_confidence=0.3
)
detections = self.vision.merge_overlapping_detections(
detections,
iou_threshold=0.5
)
self.metrics["detections_count"] += len(detections)
self.logger.log_action({
"action": "elements_detected",
"intent": intent,
"num_detections": len(detections),
"window": self.current_window
})
return detections
except Exception as e:
self.logger.log_action({
"action": "detection_error",
"error": str(e),
"intent": intent
})
return []
def reason_about_action(
self,
detections: List[Detection],
context: Dict[str, Any]
) -> Dict[str, Any]:
"""
Utilise le LLM pour raisonner sur quelle action effectuer
Args:
detections: Liste de détections d'éléments UI
context: Contexte actuel
Returns:
Dictionnaire décrivant la décision d'action
"""
try:
# Préparer les détections pour le LLM
detections_data = [
{
"label": d.label,
"confidence": d.confidence,
"bbox": d.bbox,
"roi_image": d.roi_image,
"model_source": d.model_source
}
for d in detections
]
# Obtenir l'intention actuelle
intent = self.learning_manager.get_current_intent()
# Raisonner avec le LLM
llm_result = self.llm.reason_about_detections(
detections_data,
context,
intent
)
# Construire la décision
selected_element = llm_result.get("selected_element")
llm_score = llm_result.get("llm_score", 0.5)
if selected_element:
# Trouver la détection correspondante
selected_detection = None
for d in detections:
if (d.label == selected_element.get("label") and
d.bbox == tuple(selected_element.get("bbox", []))):
selected_detection = d
break
if selected_detection:
# Calculer la confiance globale
vision_conf = selected_detection.confidence
task_id = self.learning_manager.current_task_id
confidence = self.learning_manager.calculate_confidence(
vision_conf,
llm_score,
task_id or "default"
)
# Créer l'action
action = Action(
action_type="click", # Par défaut, sera affiné plus tard
target_element=selected_detection.label,
bbox=selected_detection.bbox,
confidence=confidence,
embedding=selected_detection.embedding,
timestamp=datetime.now(),
window_title=context.get("window_title", ""),
parameters={}
)
decision = {
"action": action,
"detection": selected_detection,
"confidence": confidence,
"llm_reasoning": llm_result.get("reasoning", ""),
"task_id": task_id
}
self.logger.log_action({
"action": "decision_made",
"target_element": action.target_element,
"confidence": confidence,
"llm_score": llm_score,
"vision_conf": vision_conf
})
return decision
# Aucune décision valide
return {
"action": None,
"confidence": 0.0,
"reasoning": "Aucun élément approprié trouvé"
}
except Exception as e:
self.logger.log_action({
"action": "reasoning_error",
"error": str(e)
})
return {
"action": None,
"confidence": 0.0,
"reasoning": f"Erreur: {str(e)}"
}
def execute_or_suggest(self, decision: Dict[str, Any]):
"""
Exécute ou suggère une action selon le mode opérationnel
Args:
decision: Dictionnaire décrivant la décision d'action
"""
action = decision.get("action")
if not action:
return
# Obtenir le mode actuel
mode = self.learning_manager.get_mode()
if mode == "shadow":
# Mode Shadow: Observer uniquement
self.learning_manager.observe(action)
if hasattr(self, 'log_to_gui'):
self.log_to_gui(
"👀",
f"Observation: {action.target_element}",
"info"
)
elif mode == "assist":
# Mode Assisté: Suggérer et attendre validation
self.metrics["actions_suggested"] += 1
if self.gui:
feedback = self.gui.show_suggestion(decision)
self.learn_from_feedback(feedback, decision)
else:
# Sans GUI, logger la suggestion
self.logger.log_action({
"action": "suggestion_made",
"target_element": action.target_element,
"confidence": decision.get("confidence", 0.0),
"mode": "assist"
})
elif mode == "auto":
# Mode Autopilot: Vérifier liste blanche puis exécuter
if self._check_whitelist(action.window_title):
self.execute_action(decision)
else:
# Violation de liste blanche
self.logger.log_security_event({
"event_type": "whitelist_violation",
"window": action.window_title,
"action_attempted": action.action_type,
"target_element": action.target_element,
"details": "Fenêtre non autorisée dans la liste blanche"
})
if hasattr(self, 'log_to_gui'):
self.log_to_gui(
"⚠️",
"Action bloquée: fenêtre non autorisée",
"warning"
)
def _check_whitelist(self, window_title: str) -> bool:
"""
Vérifie si une fenêtre est dans la liste blanche
Args:
window_title: Titre de la fenêtre à vérifier
Returns:
True si autorisée ou si la liste blanche n'est pas appliquée
"""
if not self.enforce_whitelist:
return True
# Utiliser le WhitelistManager pour la vérification
return self.whitelist_manager.is_window_allowed(window_title)
def execute_action(self, decision: Dict[str, Any]):
"""
Exécute réellement une action (mode Autopilot)
Args:
decision: Dictionnaire décrivant la décision d'action
"""
action = decision.get("action")
if not action:
return
try:
execution_start = time.time()
# Préparer les données d'action pour InputUtils
action_data = {
"action_type": action.action_type,
"bbox": action.bbox,
"parameters": action.parameters
}
# Exécuter l'action via InputUtils
success = self.input_utils.execute_action(action_data)
# Calculer la latence d'exécution
execution_time = (time.time() - execution_start) * 1000 # ms
# Logger le résultat
self.logger.log_action({
"action": "action_executed",
"window": action.window_title,
"action_type": action.action_type,
"target_element": action.target_element,
"bbox": list(action.bbox),
"confidence": action.confidence,
"mode": "auto",
"result": "success" if success else "failed",
"latency_ms": execution_time
})
if success:
# Enregistrer l'exécution dans le learning manager
self.learning_manager.record_execution({
"action": action,
"confidence": decision.get("confidence", 0.0),
"task_id": decision.get("task_id"),
"latency_ms": execution_time
})
self.metrics["actions_executed"] += 1
# Afficher notification de succès
if hasattr(self, 'log_to_gui'):
self.log_to_gui(
"✔️",
f"Action exécutée: {action.target_element}",
"success"
)
else:
# Échec d'exécution - déclencher rollback
self.logger.log_action({
"action": "execution_failed_rollback_triggered",
"target_element": action.target_element
})
# Rollback des 3 dernières actions
self.rollback_last_actions(count=3)
if hasattr(self, 'log_to_gui'):
self.log_to_gui(
"⚠️",
"Échec d'exécution - Rollback effectué",
"warning"
)
except Exception as e:
self.logger.log_action({
"action": "execution_error",
"error": str(e),
"target_element": action.target_element,
"mode": "auto",
"result": "failed"
})
# Rollback en cas d'erreur
self.rollback_last_actions(count=3)
if hasattr(self, 'log_to_gui'):
self.log_to_gui(
"",
f"Erreur d'exécution: {str(e)}",
"error"
)
def learn_from_feedback(self, feedback: Dict[str, Any], decision: Dict[str, Any]):
"""
Met à jour l'apprentissage basé sur le retour utilisateur
Args:
feedback: Retour utilisateur (accept/reject/correct)
decision: Décision originale
"""
try:
# Ajouter des informations de contexte au feedback
feedback["task_id"] = decision.get("task_id")
feedback["action"] = decision.get("action")
# Transmettre au learning manager
self.learning_manager.confirm_action(feedback)
self.logger.log_action({
"action": "feedback_processed",
"feedback_type": feedback.get("type"),
"task_id": feedback.get("task_id")
})
except Exception as e:
self.logger.log_action({
"action": "feedback_error",
"error": str(e)
})
def _update_metrics(self, cycle_time_ms: float):
"""
Met à jour les métriques de performance
Args:
cycle_time_ms: Temps du cycle en millisecondes
"""
self.metrics["total_cycles"] += 1
# Moyenne mobile de la latence
alpha = 0.1 # Facteur de lissage
self.metrics["avg_latency_ms"] = (
alpha * cycle_time_ms +
(1 - alpha) * self.metrics["avg_latency_ms"]
)
def stop(self):
"""Arrête la boucle cognitive"""
self.running = False
self._stop_event.set()
# Arrêter la capture d'événements
self.event_capture.stop()
# Sauvegarder le système d'embeddings
self._save_embedding_system_on_shutdown()
self.logger.log_action({
"action": "orchestrator_stop_requested"
})
def pause(self):
"""Met en pause la boucle cognitive"""
self.paused = True
self._pause_event.set()
self.logger.log_action({
"action": "orchestrator_paused"
})
def resume(self):
"""Reprend la boucle cognitive"""
self.paused = False
self._pause_event.clear()
self.logger.log_action({
"action": "orchestrator_resumed"
})
def add_to_whitelist(self, window_pattern: str, admin_confirmed: bool = False):
"""
Ajoute une fenêtre à la liste blanche
Args:
window_pattern: Pattern de titre de fenêtre à autoriser
admin_confirmed: Si True, bypass la confirmation admin
"""
return self.whitelist_manager.add_to_whitelist(
window_pattern,
admin_confirmed=admin_confirmed,
added_by="orchestrator"
)
def remove_from_whitelist(self, window_pattern: str):
"""
Retire une fenêtre de la liste blanche
Args:
window_pattern: Pattern de titre de fenêtre à retirer
"""
return self.whitelist_manager.remove_from_whitelist(window_pattern)
def get_whitelist(self) -> List[str]:
"""Retourne la liste blanche actuelle"""
return self.whitelist_manager.get_whitelist()
def set_whitelist_enforcement(self, enforce: bool):
"""
Active ou désactive l'application de la liste blanche
Args:
enforce: True pour activer, False pour désactiver
"""
self.enforce_whitelist = enforce
self.logger.log_action({
"action": "whitelist_enforcement_changed",
"enforce": enforce
})
def get_metrics(self) -> Dict[str, Any]:
"""Retourne les métriques de performance actuelles"""
return self.metrics.copy()
def rollback_last_actions(self, count: int = 3):
"""
Effectue un rollback des dernières actions
Args:
count: Nombre d'actions à annuler (défaut: 3)
"""
try:
# Obtenir les dernières actions
recent_actions = self.input_utils.get_action_history(limit=count)
if not recent_actions:
self.logger.log_action({
"action": "rollback_skipped",
"reason": "no_actions_in_history"
})
return
self.logger.log_action({
"action": "rollback_started",
"actions_count": len(recent_actions)
})
# Exécuter les actions inverses dans l'ordre inverse
rollback_success = 0
rollback_failed = 0
for action in reversed(recent_actions):
success = self.input_utils.execute_inverse_action(action)
if success:
rollback_success += 1
else:
rollback_failed += 1
self.logger.log_action({
"action": "rollback_completed",
"success_count": rollback_success,
"failed_count": rollback_failed
})
if hasattr(self, 'log_to_gui'):
self.log_to_gui(
"🔄",
f"Rollback: {rollback_success}/{len(recent_actions)} actions annulées",
"info"
)
except Exception as e:
self.logger.log_action({
"action": "rollback_error",
"error": str(e)
})
def get_status(self) -> Dict[str, Any]:
"""
Retourne l'état actuel de l'orchestrateur
Returns:
Dictionnaire avec l'état complet
"""
return {
"running": self.running,
"paused": self.paused,
"mode": self.learning_manager.get_mode(),
"current_window": self.current_window,
"current_task": self.learning_manager.current_task_id,
"whitelist_size": len(self.whitelist_manager.get_whitelist()),
"enforce_whitelist": self.enforce_whitelist,
"metrics": self.get_metrics(),
"action_history_size": len(self.input_utils.get_action_history())
}
def enable_progressive_mode(self):
"""Active le mode progressif (démarre en shadow, propose assist)."""
print("\n" + "="*60)
print("🎓 ACTIVATION DU MODE PROGRESSIF")
print("="*60)
self._progressive_mode = True
self._assist_proposed = False
self.learning_manager.mode = "shadow"
print(f"✅ Mode progressif activé:")
print(f" - _progressive_mode = {self._progressive_mode}")
print(f" - _assist_proposed = {self._assist_proposed}")
print(f" - learning_manager.mode = {self.learning_manager.mode}")
print("="*60 + "\n")
self.logger.log_action({
"action": "progressive_mode_enabled",
"initial_mode": "shadow"
})
if hasattr(self, 'log_to_gui'):
self.log_to_gui(
"🎓",
"Mode Progressif activé - Observation des patterns",
"info"
)
def _propose_assist_mode(self, task):
"""
Propose de basculer en mode assist après détection d'un pattern.
Args:
task: Tâche détectée
"""
# Ne proposer qu'une seule fois
if self._assist_proposed:
return
self._assist_proposed = True
self.logger.log_action({
"action": "assist_mode_proposed",
"task_id": task.task_id,
"task_name": task.task_name
})
# Afficher la proposition dans la GUI
if self.gui and hasattr(self.gui, 'show_mode_switch_proposal'):
self.gui.show_mode_switch_proposal(
task_name=task.task_name,
on_accept=self._switch_to_assist_mode,
on_reject=self._stay_in_shadow_mode
)
else:
# Fallback: notification simple
print(f"\n{'='*60}")
print(f"💡 PROPOSITION DE MODE ASSIST")
print(f"{'='*60}")
print(f"J'ai détecté un pattern répétitif: {task.task_name}")
print(f"")
print(f"Est-ce que je peux essayer de vous aider en suggérant")
print(f"automatiquement cette action la prochaine fois ?")
print(f"")
print(f"→ Basculer en mode Assist maintenant")
print(f" (Les suggestions apparaîtront automatiquement)")
print(f"{'='*60}\n")
# Auto-accepter après 10 secondes si pas de GUI
import threading
def auto_accept():
time.sleep(10)
if not self._assist_proposed:
return
print("⏱️ Pas de réponse, je bascule en mode Assist...")
self._switch_to_assist_mode()
threading.Thread(target=auto_accept, daemon=True).start()
def _switch_to_assist_mode(self):
"""Bascule en mode assist."""
self.learning_manager.mode = "assist"
self.logger.log_action({
"action": "switched_to_assist_mode",
"from_progressive": True
})
print(f"\n✅ Mode Assist activé !")
print(f" Les suggestions apparaîtront automatiquement\n")
if hasattr(self, 'log_to_gui'):
self.log_to_gui(
"",
"Mode Assist activé - Suggestions automatiques activées",
"success"
)
if hasattr(self, 'change_mode_gui'):
self.change_mode_gui("assist")
def _stay_in_shadow_mode(self):
"""Reste en mode shadow."""
self.logger.log_action({
"action": "stayed_in_shadow_mode",
"user_declined_assist": True
})
print(f"\n👀 Je continue en mode observation")
if hasattr(self, 'log_to_gui'):
self.log_to_gui(
"👀",
"Mode Shadow maintenu - Observation continue",
"info"
)
# Méthodes pour le Mode Assisté
def _capture_context_for_suggestion(self) -> Dict[str, Any]:
"""
Capture le contexte actuel pour générer des suggestions.
Returns:
Dictionnaire de contexte
"""
try:
# Capturer l'écran
screenshot = capture_screen()
# Fenêtre active
window = get_active_window()
# Générer embedding avec le système d'embeddings
embedding = None
if screenshot is not None:
try:
# Utiliser le nouveau système d'embeddings si disponible
if self.new_embedding_manager:
# Convertir numpy BGR → PIL RGB
screenshot_rgb = cv2.cvtColor(screenshot, cv2.COLOR_BGR2RGB)
pil_image = Image.fromarray(screenshot_rgb)
embedding = self.new_embedding_manager.embed(pil_image)
else:
# Fallback vers l'ancien système
embedding = self.learning_manager.embeddings_manager.encode_image(screenshot)
except Exception as e:
self.logger.log_action({
"action": "embedding_generation_failed",
"error": str(e)
})
return {
"screenshot": screenshot,
"window": window,
"embedding": embedding,
"event_capture": self.event_capture, # Ajouter l'event_capture pour les workflows
"timestamp": datetime.now()
}
except Exception as e:
self.logger.log_action({
"action": "context_capture_error",
"error": str(e)
})
return {}
def _on_suggestion_created(self, suggestion: Dict[str, Any]):
"""
Callback appelé quand une suggestion est créée.
Args:
suggestion: Suggestion créée
"""
suggestion_type = suggestion.get("type", "action")
if suggestion_type == "workflow":
# Suggestion de workflow
self.logger.log_action({
"action": "workflow_suggestion_created",
"workflow_id": suggestion["workflow_id"],
"workflow_name": suggestion["workflow_name"],
"step": suggestion["current_step"],
"confidence": suggestion["confidence"]
})
else:
# Suggestion d'action classique
self.logger.log_action({
"action": "suggestion_created_callback",
"task_id": suggestion.get("task_id"),
"confidence": suggestion["confidence"]
})
# Afficher dans la GUI si disponible
if self.gui and hasattr(self.gui, "show_suggestion"):
self.gui.show_suggestion(suggestion)
# Mettre à jour les métriques
self.metrics["actions_suggested"] += 1
def _on_suggestion_accepted(self, suggestion: Dict[str, Any]):
"""
Callback appelé quand une suggestion est acceptée.
Args:
suggestion: Suggestion acceptée
"""
self.logger.log_action({
"action": "suggestion_accepted_callback",
"task_id": suggestion["task_id"]
})
# Ajouter comme exemple positif pour le fine-tuning
self._add_positive_example_for_finetuning(suggestion)
# Exécuter la suggestion
self._execute_suggestion(suggestion)
def _on_suggestion_rejected(self, suggestion: Dict[str, Any]):
"""
Callback appelé quand une suggestion est rejetée.
Args:
suggestion: Suggestion rejetée
"""
self.logger.log_action({
"action": "suggestion_rejected_callback",
"task_id": suggestion["task_id"]
})
# Ajouter comme exemple négatif pour le fine-tuning
self._add_negative_example_for_finetuning(suggestion)
# Masquer dans la GUI si disponible
if self.gui and hasattr(self.gui, "hide_suggestion"):
self.gui.hide_suggestion()
def _on_suggestion_timeout(self, suggestion: Dict[str, Any]):
"""
Callback appelé quand une suggestion expire.
Args:
suggestion: Suggestion expirée
"""
self.logger.log_action({
"action": "suggestion_timeout_callback",
"task_id": suggestion["task_id"]
})
# Masquer dans la GUI si disponible
if self.gui and hasattr(self.gui, "hide_suggestion"):
self.gui.hide_suggestion()
def _execute_suggestion(self, suggestion: Dict[str, Any]):
"""
Exécute une suggestion acceptée.
Args:
suggestion: Suggestion à exécuter
"""
import asyncio
task_id = suggestion["task_id"]
self.logger.log_action({
"action": "executing_suggestion",
"task_id": task_id
})
try:
# Exécuter le rejeu de manière asynchrone
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
result = loop.run_until_complete(
self.replay_engine.replay_task(task_id, interactive=False)
)
loop.close()
# Logger le résultat
self.logger.log_action({
"action": "suggestion_executed",
"task_id": task_id,
"success": result.get("success", False),
"executed_actions": result.get("executed_actions", 0),
"failed_actions": result.get("failed_actions", 0)
})
# Mettre à jour les métriques
if result.get("success"):
self.metrics["actions_executed"] += 1
# Notifier la GUI
if self.gui and hasattr(self.gui, "show_execution_result"):
self.gui.show_execution_result(result)
except Exception as e:
self.logger.log_action({
"action": "suggestion_execution_error",
"task_id": task_id,
"error": str(e)
})
def check_for_suggestions(self):
"""
Vérifie s'il faut créer une suggestion basée sur le contexte actuel.
À appeler périodiquement ou après une action utilisateur.
"""
# Vérifier le mode
mode = self.learning_manager.get_mode()
if mode != "assist":
return
# Vérifier qu'il n'y a pas déjà une suggestion active
if self.suggestion_manager.get_current_suggestion() is not None:
# Vérifier le timeout
self.suggestion_manager.check_timeout()
return
# 1. D'abord, vérifier les workflows (priorité haute)
workflow_match = self._check_workflow_match()
if workflow_match:
# Créer une suggestion de workflow
suggestion = self.suggestion_manager.create_workflow_suggestion(workflow_match)
if suggestion:
self.logger.log_action({
"action": "workflow_suggestion_created_in_orchestrator",
"workflow_id": workflow_match.workflow_id,
"confidence": workflow_match.confidence
})
return
# 2. Sinon, vérifier les suggestions d'actions classiques
# Capturer le contexte
context = self._capture_context_for_suggestion()
if not context:
return
# Créer une suggestion si applicable
self.suggestion_manager.create_suggestion(context)
def _check_workflow_match(self) -> Optional[Any]:
"""
Vérifie si les actions courantes correspondent à un workflow connu.
Returns:
WorkflowMatch si trouvé, None sinon
"""
try:
# Récupérer la session courante
current_session = self.session_manager.current_session
if not current_session or not current_session.actions:
return None
# Récupérer les workflows connus
workflows = self.workflow_detector.get_workflows()
if not workflows:
return None
# Convertir les workflows en format dict pour le matcher
workflows_dict = []
for workflow in workflows:
workflow_dict = {
"workflow_id": workflow.workflow_id,
"name": workflow.name,
"steps": [
{
"action_type": step.action_type,
"position": step.position,
"window": step.window,
"target_description": step.target_description
}
for step in workflow.steps
]
}
workflows_dict.append(workflow_dict)
# Utiliser le SuggestionManager pour vérifier les correspondances
workflow_match = self.suggestion_manager.check_workflow_match(
current_session.actions,
workflows_dict
)
return workflow_match
except Exception as e:
self.logger.log_action({
"action": "workflow_match_check_error",
"error": str(e)
})
return None
def find_matching_workflows_enhanced(
self,
screen_state: Optional[Any] = None,
screenshot: Optional[np.ndarray] = None,
top_k: int = 5
) -> List[Any]:
"""
Trouve les workflows qui matchent avec l'écran actuel en utilisant
l'EnhancedWorkflowMatcher (matching multi-modal amélioré).
Args:
screen_state: État d'écran enrichi (utilise l'état actuel si None)
screenshot: Screenshot numpy array (capture l'écran si None)
top_k: Nombre de meilleurs matches à retourner
Returns:
Liste des WorkflowMatch triés par score
"""
try:
# Capturer l'écran si nécessaire
if screenshot is None:
screenshot = self.current_frame
if screenshot is None:
screenshot = capture_screen()
# Créer un screen_state si nécessaire
if screen_state is None:
# Utiliser EnrichedScreenCapture pour créer un état enrichi complet
window_title = self.current_window or "Unknown"
app_name = self.current_window or "Unknown"
session_id = self.session_manager.current_session.session_id if self.session_manager.current_session else "unknown"
# Capturer et enrichir avec le système UI Element Detection
screen_state = self.enriched_capture.capture_and_enrich(
screenshot=screenshot,
session_id=session_id,
window_title=window_title,
app_name=app_name,
screen_resolution=(screenshot.shape[1], screenshot.shape[0]) if screenshot is not None else (1920, 1080),
detected_text=[], # Pourrait être enrichi avec OCR
context_tags=[],
workflow_candidate=None,
save=False # Ne pas sauvegarder automatiquement
)
# Récupérer les workflows connus
workflows = self.workflow_detector.get_workflows()
if not workflows:
self.logger.log_action({
"action": "no_workflows_to_match",
"message": "Aucun workflow disponible pour le matching"
})
return []
# Utiliser l'EnhancedWorkflowMatcher
matches = self.enhanced_matcher.find_matching_workflows(
screen_state=screen_state,
screenshot=screenshot,
workflows=workflows,
top_k=top_k
)
# Logger les résultats
if matches:
self.logger.log_action({
"action": "enhanced_workflow_matching_completed",
"matches_found": len(matches),
"top_match": {
"workflow_id": matches[0].workflow_id,
"workflow_name": matches[0].workflow_name,
"composite_score": float(matches[0].composite_score),
"confidence": float(matches[0].confidence)
}
})
# Logger le feedback détaillé si disponible
if matches[0].differences:
feedback_summary = matches[0].get_feedback_summary()
self.logger.log_action({
"action": "workflow_match_feedback",
"workflow_id": matches[0].workflow_id,
"feedback": feedback_summary
})
else:
self.logger.log_action({
"action": "no_workflow_matches_found",
"workflows_evaluated": len(workflows)
})
return matches
except Exception as e:
self.logger.log_action({
"action": "enhanced_workflow_matching_error",
"error": str(e)
})
import traceback
self.logger.log_action({
"action": "enhanced_workflow_matching_traceback",
"traceback": traceback.format_exc()
})
return []
def accept_current_suggestion(self):
"""Accepte la suggestion actuelle (appelé par la GUI sur Entrée)."""
suggestion = self.suggestion_manager.accept_suggestion()
if suggestion:
self.logger.log_action({
"action": "user_accepted_suggestion",
"task_id": suggestion["task_id"]
})
def reject_current_suggestion(self):
"""Rejette la suggestion actuelle (appelé par la GUI sur Échap)."""
rejected = self.suggestion_manager.reject_suggestion()
if rejected:
self.logger.log_action({
"action": "user_rejected_suggestion"
})
def _index_workflow_in_faiss(self, workflow: Dict[str, Any]):
"""
Indexe un workflow dans FAISS pour recherche de similarité.
Args:
workflow: Workflow à indexer
"""
if self.new_embedding_manager is None or self.faiss_index is None:
return
try:
# Récupérer les screenshots du workflow
screenshots = workflow.get("screenshots", [])
if not screenshots:
return
# Générer des embeddings pour chaque screenshot
embeddings = []
for screenshot in screenshots:
# Convertir numpy BGR → PIL RGB
if isinstance(screenshot, np.ndarray):
screenshot_rgb = cv2.cvtColor(screenshot, cv2.COLOR_BGR2RGB)
pil_image = Image.fromarray(screenshot_rgb)
else:
pil_image = screenshot
# Générer embedding
embedding = self.new_embedding_manager.embed(pil_image)
embeddings.append(embedding)
# Indexer dans FAISS
if embeddings:
embeddings_array = np.array(embeddings)
metadata = [{
"workflow_id": workflow.get("workflow_id"),
"workflow_name": workflow.get("name"),
"step_index": i
} for i in range(len(embeddings))]
self.faiss_index.add(embeddings_array, metadata)
self.logger.log_action({
"action": "workflow_indexed_in_faiss",
"workflow_id": workflow.get("workflow_id"),
"num_embeddings": len(embeddings)
})
except Exception as e:
self.logger.log_action({
"action": "workflow_indexing_failed",
"workflow_id": workflow.get("workflow_id"),
"error": str(e)
})
def _add_positive_example_for_finetuning(self, suggestion: Dict[str, Any]):
"""
Ajoute un exemple positif pour le fine-tuning quand une suggestion est acceptée.
Args:
suggestion: Suggestion acceptée
"""
if not self.fine_tuner:
return
try:
# Récupérer le screenshot de la suggestion
screenshot = suggestion.get("screenshot")
if screenshot is None:
return
# Convertir numpy BGR → PIL RGB si nécessaire
if isinstance(screenshot, np.ndarray):
screenshot_rgb = cv2.cvtColor(screenshot, cv2.COLOR_BGR2RGB)
pil_image = Image.fromarray(screenshot_rgb)
else:
pil_image = screenshot
# Ajouter comme exemple positif
workflow_id = suggestion.get("workflow_id") or suggestion.get("task_id")
self.fine_tuner.add_positive_example(
image=pil_image,
workflow_id=workflow_id,
metadata={
"timestamp": time.time(),
"confidence": suggestion.get("confidence", 0.0)
}
)
self.logger.log_action({
"action": "positive_example_added_for_finetuning",
"workflow_id": workflow_id,
"total_examples": self.fine_tuner.get_stats()["total_examples"]
})
except Exception as e:
self.logger.log_action({
"action": "positive_example_add_failed",
"error": str(e)
})
def _add_negative_example_for_finetuning(self, suggestion: Dict[str, Any]):
"""
Ajoute un exemple négatif pour le fine-tuning quand une suggestion est rejetée.
Args:
suggestion: Suggestion rejetée
"""
if not self.fine_tuner:
return
try:
# Récupérer le screenshot de la suggestion
screenshot = suggestion.get("screenshot")
if screenshot is None:
return
# Convertir numpy BGR → PIL RGB si nécessaire
if isinstance(screenshot, np.ndarray):
screenshot_rgb = cv2.cvtColor(screenshot, cv2.COLOR_BGR2RGB)
pil_image = Image.fromarray(screenshot_rgb)
else:
pil_image = screenshot
# Ajouter comme exemple négatif
workflow_id = suggestion.get("workflow_id") or suggestion.get("task_id")
self.fine_tuner.add_negative_example(
image=pil_image,
workflow_id=workflow_id,
metadata={
"timestamp": time.time(),
"confidence": suggestion.get("confidence", 0.0)
}
)
self.logger.log_action({
"action": "negative_example_added_for_finetuning",
"workflow_id": workflow_id,
"total_examples": self.fine_tuner.get_stats()["total_examples"]
})
except Exception as e:
self.logger.log_action({
"action": "negative_example_add_failed",
"error": str(e)
})
def _save_embedding_system_on_shutdown(self):
"""
Sauvegarde l'état du système d'embeddings à l'arrêt.
"""
try:
# Attendre la fin du fine-tuning en cours
if self.fine_tuner:
self.logger.log_action({
"action": "waiting_for_finetuning_completion"
})
self.fine_tuner.wait_for_training(timeout=30)
# Sauvegarder le checkpoint
self.fine_tuner.save_checkpoint(self._fine_tuner_checkpoint_name)
self.logger.log_action({
"action": "fine_tuner_checkpoint_saved",
"name": self._fine_tuner_checkpoint_name,
"stats": self.fine_tuner.get_stats()
})
# Sauvegarder l'index FAISS
if self.faiss_index:
self.faiss_index.save(self._faiss_index_path)
self.logger.log_action({
"action": "faiss_index_saved",
"path": self._faiss_index_path,
"stats": self.faiss_index.get_stats()
})
# Logger les statistiques du cache
if self.new_embedding_manager:
cache_stats = self.new_embedding_manager.get_stats()
self.logger.log_action({
"action": "embedding_cache_stats",
"stats": cache_stats
})
except Exception as e:
self.logger.log_action({
"action": "embedding_system_save_failed",
"error": str(e)
})
if __name__ == "__main__":
"""Tests basiques de l'orchestrateur"""
print("Test de l'Orchestrateur RPA Vision V2")
print("=" * 50)
# Créer des composants mock pour les tests
from .embeddings_manager import EmbeddingsManager
from .config import ensure_directories
# S'assurer que les répertoires existent
ensure_directories()
# Initialiser les composants
print("\n1. Initialisation des composants...")
logger = Logger()
embeddings_manager = EmbeddingsManager()
learning_manager = LearningManager(embeddings_manager, logger, get_config())
vision_utils = VisionUtils()
llm_manager = LLMManager(logger=logger, fallback_to_vision=True)
print(" ✓ Composants initialisés")
# Créer l'orchestrateur
print("\n2. Création de l'orchestrateur...")
orchestrator = Orchestrator(
learning_manager=learning_manager,
vision_utils=vision_utils,
llm_manager=llm_manager,
logger=logger,
gui=None # Pas de GUI pour les tests
)
print(" ✓ Orchestrateur créé")
# Test de capture de contexte
print("\n3. Test capture_context()...")
try:
context = orchestrator.capture_context()
if context:
print(f" ✓ Contexte capturé")
print(f" - Fenêtre: {context.get('window_title', 'N/A')}")
print(f" - Frame shape: {context.get('frame_shape', 'N/A')}")
else:
print(" ⚠ Capture de contexte a échoué (normal sans environnement graphique)")
except Exception as e:
print(f" ⚠ Erreur: {e}")
# Test de liste blanche
print("\n4. Test de liste blanche...")
orchestrator.add_to_whitelist("Dolibarr*")
orchestrator.add_to_whitelist("Firefox*")
whitelist = orchestrator.get_whitelist()
print(f" ✓ Liste blanche: {whitelist}")
# Test de vérification liste blanche
print("\n5. Test de vérification liste blanche...")
test_windows = [
"Dolibarr - Facturation",
"Firefox - Mozilla",
"Unknown Application"
]
for window in test_windows:
allowed = orchestrator._check_whitelist(window)
status = "✓ Autorisé" if allowed else "✗ Bloqué"
print(f" {status}: {window}")
# Test de métriques
print("\n6. Test de métriques...")
metrics = orchestrator.get_metrics()
print(f" ✓ Métriques:")
for key, value in metrics.items():
print(f" - {key}: {value}")
# Test de statut
print("\n7. Test de statut...")
status = orchestrator.get_status()
print(f" ✓ Statut:")
print(f" - Running: {status['running']}")
print(f" - Mode: {status['mode']}")
print(f" - Whitelist enforcement: {status['enforce_whitelist']}")
print("\n✓ Tests basiques terminés!")
print("\nNote: Pour tester la boucle cognitive complète, lancez orchestrator.run()")
print(" dans un environnement avec interface graphique.")