v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution

- Frontend v4 accessible sur réseau local (192.168.1.40)
- Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard)
- Ollama GPU fonctionnel
- Self-healing interactif
- Dashboard confiance

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Dom
2026-01-29 11:23:51 +01:00
parent 21bfa3b337
commit a27b74cf22
1595 changed files with 412691 additions and 400 deletions

View File

@@ -0,0 +1,7 @@
"""
Pipeline module - Orchestration du flux RPA Vision V3
"""
from .workflow_pipeline import WorkflowPipeline, create_pipeline
__all__ = ["WorkflowPipeline", "create_pipeline"]

View File

@@ -0,0 +1,968 @@
"""
WorkflowPipeline - Orchestrateur du flux complet RPA Vision V3
Ce module orchestre le pipeline complet :
RawSession → ScreenStates → UIElements → Embeddings → Workflow
C'est le "ciment" qui connecte toutes les briques du système.
"""
import logging
import numpy as np
from typing import List, Dict, Optional, Any
from pathlib import Path
from datetime import datetime
from core.models.raw_session import RawSession
from core.models.screen_state import ScreenState
from core.models.workflow_graph import Workflow, LearningState
from core.models.ui_element import UIElement
from core.models.execution_result import WorkflowExecutionResult, PerformanceMetrics, RecoveryInfo, StepExecutionStatus
from core.detection.ui_detector import UIDetector, DetectionConfig
from core.embedding.clip_embedder import CLIPEmbedder
from core.embedding.state_embedding_builder import StateEmbeddingBuilder
from core.embedding.fusion_engine import FusionEngine
from core.embedding.faiss_manager import FAISSManager
from core.graph.graph_builder import GraphBuilder
from core.graph.node_matcher import NodeMatcher
from core.learning.learning_manager import LearningManager
from core.matching.hierarchical_matcher import HierarchicalMatcher, TemporalContext, MatchResult
from core.execution.action_executor import ActionExecutor, ExecutionResult, ExecutionStatus
from core.execution.target_resolver import TargetResolver
from core.execution.error_handler import ErrorHandler
logger = logging.getLogger(__name__)
class WorkflowPipeline:
"""
Orchestrateur principal du système RPA Vision V3.
Gère le flux complet depuis la capture brute jusqu'à l'exécution
automatique des workflows appris.
Modes d'opération:
- TRAINING: Analyse des sessions pour construire des workflows
- MATCHING: Reconnaissance de l'état actuel dans un workflow
- EXECUTION: Exécution automatique des actions
Example:
>>> pipeline = WorkflowPipeline()
>>> workflow = pipeline.process_session(raw_session, "Login Workflow")
>>> print(f"Built workflow with {len(workflow.nodes)} nodes")
"""
def __init__(
self,
data_dir: str = "data",
use_gpu: bool = False,
enable_ui_detection: bool = True,
enable_vlm: bool = True,
action_executor: Optional[ActionExecutor] = None,
target_resolver: Optional[TargetResolver] = None,
error_handler: Optional[ErrorHandler] = None
):
"""
Initialiser le pipeline.
Args:
data_dir: Répertoire de données
use_gpu: Utiliser GPU si disponible
enable_ui_detection: Activer la détection UI (OWL-v2 + VLM)
enable_vlm: Activer le VLM pour classification
action_executor: Instance ActionExecutor (crée par défaut si None)
target_resolver: Instance TargetResolver (crée par défaut si None)
error_handler: Instance ErrorHandler (crée par défaut si None)
"""
self.data_dir = Path(data_dir)
self.data_dir.mkdir(parents=True, exist_ok=True)
# Créer sous-répertoires
self.embeddings_dir = self.data_dir / "embeddings"
self.workflows_dir = self.data_dir / "workflows"
self.screenshots_dir = self.data_dir / "screenshots"
for d in [self.embeddings_dir, self.workflows_dir, self.screenshots_dir]:
d.mkdir(parents=True, exist_ok=True)
# Initialiser les composants
logger.info("Initializing WorkflowPipeline components...")
# 1. Embedder CLIP
device = "cuda" if use_gpu else "cpu"
self.clip_embedder = CLIPEmbedder(device=device)
logger.info(f"✓ CLIP Embedder initialized on {device}")
# 2. Fusion Engine
self.fusion_engine = FusionEngine()
logger.info("✓ Fusion Engine initialized")
# 3. State Embedding Builder
self.embedding_builder = StateEmbeddingBuilder(
fusion_engine=self.fusion_engine,
output_dir=self.embeddings_dir,
use_clip=True
)
logger.info("✓ State Embedding Builder initialized")
# 4. FAISS Manager
self.faiss_manager = FAISSManager(
dimensions=512,
index_type="Flat"
)
logger.info("✓ FAISS Manager initialized")
# 5. UI Detector (optionnel)
self.ui_detector = None
if enable_ui_detection:
# Utiliser ErrorHandler pour gérer les erreurs d'initialisation
try:
config = DetectionConfig(
use_vlm_classification=enable_vlm,
use_owl_detection=True,
confidence_threshold=0.7
)
self.ui_detector = UIDetector(config)
logger.info("✓ UI Detector initialized")
except Exception as e:
if error_handler:
context_data = {
'details': {'operation': 'ui_detector_initialization'},
'original_data': {'enable_vlm': enable_vlm}
}
recovery_result = error_handler.handle_error(e, context_data)
if not recovery_result.success:
logger.warning(f"UI Detector not available: {e}")
else:
logger.warning(f"UI Detector not available: {e}")
# 6. Graph Builder
self.graph_builder = GraphBuilder(
embedding_builder=self.embedding_builder,
faiss_manager=self.faiss_manager
)
logger.info("✓ Graph Builder initialized")
# 7. Node Matcher
self.node_matcher = NodeMatcher(
embedding_builder=self.embedding_builder,
faiss_manager=self.faiss_manager
)
logger.info("✓ Node Matcher initialized")
# 8. Learning Manager
self.learning_manager = LearningManager()
logger.info("✓ Learning Manager initialized")
# 9. Hierarchical Matcher
self.hierarchical_matcher = HierarchicalMatcher()
logger.info("✓ Hierarchical Matcher initialized")
# 10. Target Resolver (pour l'exécution)
self.target_resolver = target_resolver or TargetResolver()
logger.info("✓ Target Resolver initialized")
# 11. Error Handler (pour la gestion d'erreurs centralisée)
self.error_handler = error_handler or ErrorHandler()
logger.info("✓ Error Handler initialized")
# 12. Action Executor (pour l'exécution)
self.action_executor = action_executor or ActionExecutor(
target_resolver=self.target_resolver,
error_handler=self.error_handler
)
logger.info("✓ Action Executor initialized")
# Cache des workflows chargés
self._workflows: Dict[str, Workflow] = {}
# Contexte temporel pour le matching hiérarchique
self._temporal_context: Dict[str, TemporalContext] = {}
logger.info("WorkflowPipeline ready!")
# =========================================================================
# Mode TRAINING : Construction de workflows depuis sessions
# =========================================================================
def process_session(
self,
session: RawSession,
workflow_name: Optional[str] = None,
detect_ui: bool = True
) -> Workflow:
"""
Traiter une session brute pour construire un workflow.
Pipeline complet:
1. Créer ScreenStates depuis screenshots
2. Détecter UIElements pour chaque état
3. Calculer embeddings multi-modaux
4. Détecter patterns via clustering
5. Construire le graphe (nodes + edges)
Args:
session: Session brute à analyser
workflow_name: Nom du workflow (généré si None)
detect_ui: Détecter les éléments UI (plus lent mais plus précis)
Returns:
Workflow construit
"""
logger.info(f"Processing session {session.session_id}...")
start_time = datetime.now()
# Étape 1: Construire le workflow via GraphBuilder
workflow = self.graph_builder.build_from_session(session, workflow_name)
# Étape 2: Enrichir avec détection UI si demandé
if detect_ui and self.ui_detector:
self._enrich_with_ui_detection(workflow, session)
# Étape 3: Indexer les embeddings dans FAISS
self._index_workflow_embeddings(workflow)
# Étape 4: Enregistrer dans le Learning Manager
self.learning_manager.register_workflow(workflow)
# Étape 5: Sauvegarder le workflow
workflow_path = self.workflows_dir / f"{workflow.workflow_id}.json"
workflow.save_to_file(workflow_path)
# Cacher le workflow
self._workflows[workflow.workflow_id] = workflow
duration = (datetime.now() - start_time).total_seconds()
logger.info(
f"Session processed in {duration:.1f}s: "
f"{len(workflow.nodes)} nodes, {len(workflow.edges)} edges"
)
return workflow
def _enrich_with_ui_detection(
self,
workflow: Workflow,
session: RawSession
) -> None:
"""
Enrichir les nodes du workflow avec détection UI.
"""
if not self.ui_detector:
return
logger.info("Enriching workflow with UI detection...")
for screenshot in session.screenshots:
screenshot_path = Path(screenshot.relative_path)
if screenshot_path.exists():
# Utiliser ErrorHandler pour gérer les erreurs de détection UI
try:
ui_elements = self.ui_detector.detect(str(screenshot_path))
logger.debug(f"Detected {len(ui_elements)} UI elements in {screenshot.screenshot_id}")
except Exception as e:
context_data = {
'details': {'operation': 'ui_detection', 'screenshot_id': screenshot.screenshot_id},
'original_data': {'screenshot_path': str(screenshot_path)}
}
recovery_result = self.error_handler.handle_error(e, context_data)
if not recovery_result.success:
logger.warning(f"UI detection failed for {screenshot.screenshot_id}: {e}")
def _index_workflow_embeddings(self, workflow: Workflow) -> None:
"""
Indexer les embeddings des nodes dans FAISS (rebuild propre).
Auteur : Dom, Alice Kiro - 22 décembre 2025
Amélioration pour FAISS Rebuild Propre:
- Construction de liste canonique avant reindex
- Métadonnées enrichies (workflow_id, node_id, node_name)
- Force training IVF pour cohérence
- Logging informatif
"""
if not self.faiss_manager:
return
items = []
for node in workflow.nodes:
vec = self._extract_node_vector(node)
if vec is None:
continue
items.append((
node.node_id,
vec,
{
"workflow_id": workflow.workflow_id,
"node_id": node.node_id,
"node_name": getattr(node, "name", "")
}
))
n = self.faiss_manager.reindex(items, force_train_ivf=True)
logger.info(f"FAISS reindexed: {n} node prototypes (workflow={workflow.workflow_id})")
def _extract_node_vector(self, node) -> Optional[np.ndarray]:
"""
Récupérer le prototype vecteur d'un node, compatible avec plusieurs versions de modèle.
Auteur : Dom, Alice Kiro - 22 décembre 2025
Support Multi-Version:
- v1: embedding_prototype en liste directe
- v2: embedding.vector_id avec fichier sur disque
- Fallback: screen_template legacy
- Gestion robuste des erreurs
Returns:
np.ndarray ou None si aucun vecteur trouvé
"""
# v1: prototype stocké en liste directement
tpl = getattr(node, "template", None)
if tpl is not None:
proto_list = getattr(tpl, "embedding_prototype", None)
if isinstance(proto_list, list):
try:
v = np.array(proto_list, dtype=np.float32)
return v
except Exception as e:
logger.debug(f"Failed to convert embedding_prototype list: {e}")
# v2: prototype stocké sur disque via EmbeddingPrototype.vector_id
if tpl is not None:
emb = getattr(tpl, "embedding", None)
if emb is not None:
vector_id = getattr(emb, "vector_id", None)
if vector_id:
try:
return np.load(vector_id).astype(np.float32)
except Exception as e:
logger.debug(f"Failed to load vector from {vector_id}: {e}")
# fallback (ancienne nomenclature)
st = getattr(node, "screen_template", None)
if st is not None:
p = getattr(st, "embedding_prototype_path", None)
if p:
try:
return np.load(p).astype(np.float32)
except Exception as e:
logger.debug(f"Failed to load legacy vector from {p}: {e}")
return None
# =========================================================================
# Mode MATCHING : Reconnaissance de l'état actuel
# =========================================================================
def match_current_state(
self,
screenshot_path: str,
workflow_id: Optional[str] = None,
window_title: Optional[str] = None
) -> Optional[Dict[str, Any]]:
"""
Identifier dans quel node se trouve l'écran actuel.
Args:
screenshot_path: Chemin vers le screenshot actuel
workflow_id: ID du workflow à matcher (tous si None)
window_title: Titre de fenêtre pour contexte
Returns:
Dict avec node_id, workflow_id, confidence, ou None si pas de match
"""
logger.debug(f"Matching screenshot: {screenshot_path}")
# Créer un ScreenState temporaire
from core.models.screen_state import (
WindowContext, RawLevel, PerceptionLevel, ContextLevel, EmbeddingRef
)
screenshot_path = Path(screenshot_path)
window = WindowContext(
app_name="unknown",
window_title=window_title or "Unknown",
screen_resolution=[1920, 1080],
workspace="main"
)
raw = RawLevel(
screenshot_path=str(screenshot_path),
capture_method="manual",
file_size_bytes=screenshot_path.stat().st_size if screenshot_path.exists() else 0
)
perception = PerceptionLevel(
embedding=EmbeddingRef(
provider="openclip_ViT-B-32",
vector_id="temp",
dimensions=512
),
detected_text=[],
text_detection_method="pending",
confidence_avg=0.0
)
context = ContextLevel(
current_workflow_candidate=workflow_id,
workflow_step=None,
user_id="matcher",
tags=[],
business_variables={}
)
current_state = ScreenState(
screen_state_id=f"match_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
timestamp=datetime.now(),
session_id="matching",
window=window,
raw=raw,
perception=perception,
context=context,
ui_elements=[]
)
# Calculer embedding
state_embedding = self.embedding_builder.build(current_state)
query_vector = state_embedding.get_vector()
# Rechercher dans FAISS
results = self.faiss_manager.search(query_vector, k=5)
if not results:
logger.debug("No match found in FAISS")
return None
# Filtrer par workflow si spécifié
for result in results:
metadata = result.get("metadata", {})
result_workflow_id = metadata.get("workflow_id")
if workflow_id and result_workflow_id != workflow_id:
continue
similarity = result.get("similarity", 0)
if similarity >= 0.85: # Seuil de matching
return {
"node_id": metadata.get("node_id"),
"workflow_id": result_workflow_id,
"confidence": similarity,
"state_embedding_id": state_embedding.embedding_id
}
logger.debug(f"Best match below threshold: {results[0].get('similarity', 0):.3f}")
return None
def match_hierarchical(
self,
screenshot_path: str,
workflow_id: str,
window_info: Optional[Dict[str, Any]] = None,
detected_elements: Optional[List[Any]] = None
) -> Optional[Dict[str, Any]]:
"""
Matching hiérarchique multi-niveau (fenêtre → région → élément).
Utilise le HierarchicalMatcher pour un matching plus robuste
avec boost temporel basé sur l'historique.
Args:
screenshot_path: Chemin vers le screenshot
workflow_id: ID du workflow
window_info: Informations de fenêtre (titre, processus)
detected_elements: Éléments UI détectés
Returns:
Dict avec node_id, confidence, détails du match
"""
workflow = self.load_workflow(workflow_id)
if not workflow:
logger.error(f"Workflow not found: {workflow_id}")
return None
# Récupérer ou créer le contexte temporel
if workflow_id not in self._temporal_context:
self._temporal_context[workflow_id] = TemporalContext()
temporal_context = self._temporal_context[workflow_id]
# Charger le screenshot
from PIL import Image
# Utiliser ErrorHandler pour gérer les erreurs de chargement d'image
try:
screenshot = Image.open(screenshot_path)
except Exception as e:
context_data = {
'details': {'operation': 'load_screenshot'},
'original_data': {'screenshot_path': str(screenshot_path)}
}
recovery_result = self.error_handler.handle_error(e, context_data)
if not recovery_result.success:
logger.error(f"Failed to load screenshot: {e}")
return None
# Effectuer le matching hiérarchique
result: MatchResult = self.hierarchical_matcher.match(
screenshot=screenshot,
workflow=workflow,
window_info=window_info,
detected_elements=detected_elements,
temporal_context=temporal_context
)
# Vérifier le seuil de confiance
if result.confidence < 0.5:
logger.debug(f"Hierarchical match below threshold: {result.confidence:.3f}")
return None
# Mettre à jour le contexte temporel
temporal_context.add_match(result.node_id, result.confidence)
return {
"node_id": result.node_id,
"workflow_id": workflow_id,
"confidence": result.confidence,
"window_confidence": result.window_confidence,
"region_confidence": result.region_confidence,
"element_confidence": result.element_confidence,
"temporal_boost": result.temporal_boost,
"matched_variant": result.matched_variant,
"alternatives": [
{"node_id": alt.node_id, "confidence": alt.confidence}
for alt in result.alternatives
],
"match_time_ms": result.match_time_ms
}
def reset_temporal_context(self, workflow_id: str) -> None:
"""Réinitialiser le contexte temporel pour un workflow."""
if workflow_id in self._temporal_context:
self._temporal_context[workflow_id] = TemporalContext()
logger.info(f"Temporal context reset for workflow {workflow_id}")
# =========================================================================
# Mode EXECUTION : Exécution automatique
# =========================================================================
def get_next_action(
self,
workflow_id: str,
current_node_id: str
) -> Optional[Dict[str, Any]]:
"""
Obtenir la prochaine action à exécuter.
Args:
workflow_id: ID du workflow
current_node_id: ID du node actuel
Returns:
Dict avec action, target_node, confidence, ou None
"""
workflow = self._workflows.get(workflow_id)
if not workflow:
# Essayer de charger depuis fichier
workflow_path = self.workflows_dir / f"{workflow_id}.json"
if workflow_path.exists():
workflow = Workflow.load_from_file(workflow_path)
self._workflows[workflow_id] = workflow
else:
logger.error(f"Workflow not found: {workflow_id}")
return None
# Trouver les edges sortants du node actuel
outgoing_edges = workflow.get_outgoing_edges(current_node_id)
if not outgoing_edges:
logger.info(f"No outgoing edges from node {current_node_id}")
return None
# Pour l'instant, prendre le premier edge (TODO: logique de sélection)
edge = outgoing_edges[0]
return {
"edge_id": edge.edge_id,
"action": edge.action.to_dict(),
"target_node": edge.to_node,
"confidence": edge.stats.success_rate if edge.stats else 1.0
}
def should_execute_automatically(self, workflow_id: str) -> bool:
"""
Vérifier si le workflow peut s'exécuter automatiquement.
Basé sur le learning_state du workflow.
"""
return self.learning_manager.should_execute_automatically(workflow_id)
def should_ask_confirmation(self, workflow_id: str) -> bool:
"""
Vérifier si on doit demander confirmation avant exécution.
"""
return self.learning_manager.should_ask_confirmation(workflow_id)
# =========================================================================
# Gestion des workflows
# =========================================================================
def load_workflow(self, workflow_id: str) -> Optional[Workflow]:
"""
Charger un workflow depuis le cache ou le fichier.
"""
if workflow_id in self._workflows:
return self._workflows[workflow_id]
# Chercher dans plusieurs emplacements
search_paths = [
self.workflows_dir / f"{workflow_id}.json",
self.data_dir / "training" / "workflows" / f"{workflow_id}.json",
Path("data/training/workflows") / f"{workflow_id}.json",
]
for workflow_path in search_paths:
if workflow_path.exists():
# Utiliser ErrorHandler pour gérer les erreurs de chargement de workflow
try:
workflow = Workflow.load_from_file(workflow_path)
self._workflows[workflow_id] = workflow
self.learning_manager.register_workflow(workflow)
logger.info(f"Loaded workflow {workflow_id} from {workflow_path}")
return workflow
except Exception as e:
context_data = {
'details': {'operation': 'load_workflow', 'workflow_id': workflow_id},
'original_data': {'workflow_path': str(workflow_path)}
}
recovery_result = self.error_handler.handle_error(e, context_data)
if not recovery_result.success:
logger.error(f"Failed to load workflow from {workflow_path}: {e}")
logger.warning(f"Workflow not found: {workflow_id}")
return None
def list_workflows(self) -> List[Dict[str, Any]]:
"""
Lister tous les workflows disponibles.
"""
workflows = []
for workflow_path in self.workflows_dir.glob("*.json"):
try:
workflow = Workflow.load_from_file(workflow_path)
workflows.append({
"workflow_id": workflow.workflow_id,
"name": workflow.name,
"learning_state": workflow.learning_state,
"nodes_count": len(workflow.nodes),
"edges_count": len(workflow.edges),
"created_at": workflow.created_at.isoformat()
})
except Exception as e:
logger.warning(f"Failed to load workflow {workflow_path}: {e}")
return workflows
def get_workflow_status(self, workflow_id: str) -> Optional[Dict[str, Any]]:
"""
Obtenir le statut détaillé d'un workflow.
"""
workflow = self.load_workflow(workflow_id)
if not workflow:
return None
stats = self.learning_manager.get_workflow_stats(workflow_id)
return {
"workflow_id": workflow.workflow_id,
"name": workflow.name,
"learning_state": workflow.learning_state,
"nodes": [{"id": n.node_id, "name": n.name} for n in workflow.nodes],
"edges": [{"id": e.edge_id, "from": e.from_node, "to": e.to_node} for e in workflow.edges],
"stats": {
"observation_count": stats.observation_count if stats else 0,
"execution_count": stats.execution_count if stats else 0,
"success_rate": stats.success_rate if stats else 0.0,
"avg_confidence": stats.avg_confidence if stats else 0.0
}
}
# =========================================================================
# Mode EXECUTION INTÉGRÉ : Exécution automatique avec ActionExecutor
# =========================================================================
def execute_workflow_step(
self,
workflow_id: str,
current_state: ScreenState,
context: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""
Exécute une étape complète de workflow de bout en bout.
Pipeline d'exécution intégré:
1. Matcher l'état actuel avec le workflow
2. Obtenir la prochaine action à exécuter
3. Résoudre la cible avec TargetResolver
4. Exécuter l'action avec ActionExecutor
5. Gérer les erreurs avec ErrorHandler et stratégies appropriées
6. Retourner le résultat détaillé
Args:
workflow_id: ID du workflow à exécuter
current_state: État actuel de l'écran
context: Contexte d'exécution optionnel (variables, etc.)
Returns:
Dict avec résultat d'exécution détaillé
"""
import uuid
from datetime import datetime
execution_id = str(uuid.uuid4())
start_time = datetime.now()
logger.info(f"Executing workflow step: {workflow_id} (execution_id: {execution_id})")
try:
# 1. Matcher l'état actuel
match_result = self.match_current_state(
screenshot_path=current_state.raw.screenshot_path,
workflow_id=workflow_id,
window_title=current_state.window.window_title
)
if not match_result:
# Gérer l'échec de matching avec ErrorHandler
workflow = self.load_workflow(workflow_id)
candidate_nodes = workflow.nodes if workflow else []
recovery_result = self.error_handler.handle_matching_failure(
screen_state=current_state,
candidate_nodes=candidate_nodes,
best_confidence=0.0,
threshold=0.85
)
return {
"execution_id": execution_id,
"workflow_id": workflow_id,
"success": False,
"step_type": "state_matching",
"error": "No matching state found in workflow",
"recovery_strategy": recovery_result.strategy_used.value,
"recovery_message": recovery_result.message,
"execution_time_ms": (datetime.now() - start_time).total_seconds() * 1000,
"correlation_id": execution_id
}
current_node_id = match_result["node_id"]
logger.info(f"Matched current state to node: {current_node_id} (confidence: {match_result['confidence']:.3f})")
# 2. Obtenir la prochaine action
action_info = self.get_next_action(workflow_id, current_node_id)
if not action_info:
return {
"execution_id": execution_id,
"workflow_id": workflow_id,
"success": True,
"step_type": "workflow_complete",
"message": "Workflow completed - no more actions",
"current_node": current_node_id,
"execution_time_ms": (datetime.now() - start_time).total_seconds() * 1000,
"correlation_id": execution_id
}
logger.info(f"Next action: {action_info['action']['type']} -> {action_info['target_node']}")
# 3. Charger le workflow pour obtenir l'edge complet
workflow = self.load_workflow(workflow_id)
if not workflow:
return {
"execution_id": execution_id,
"workflow_id": workflow_id,
"success": False,
"step_type": "workflow_loading",
"error": f"Failed to load workflow: {workflow_id}",
"execution_time_ms": (datetime.now() - start_time).total_seconds() * 1000,
"correlation_id": execution_id
}
# Trouver l'edge correspondant
edge = None
for e in workflow.edges:
if (hasattr(e, 'edge_id') and e.edge_id == action_info['edge_id']) or \
(e.from_node == current_node_id and e.to_node == action_info['target_node']):
edge = e
break
if not edge:
return {
"execution_id": execution_id,
"workflow_id": workflow_id,
"success": False,
"step_type": "edge_resolution",
"error": f"Edge not found: {current_node_id} -> {action_info['target_node']}",
"execution_time_ms": (datetime.now() - start_time).total_seconds() * 1000,
"correlation_id": execution_id
}
# 4. Exécuter l'action avec ActionExecutor (qui utilise déjà ErrorHandler)
execution_result = self.action_executor.execute_edge(
edge=edge,
screen_state=current_state,
context=context
)
# 5. Gérer les erreurs spécifiques avec ErrorHandler si nécessaire
recovery_applied = None
if execution_result.status != ExecutionStatus.SUCCESS:
if execution_result.status == ExecutionStatus.TARGET_NOT_FOUND:
# ActionExecutor a déjà géré cela, mais on peut ajouter du logging
logger.info("Target not found - ActionExecutor applied recovery strategies")
elif execution_result.status == ExecutionStatus.POSTCONDITION_FAILED:
# Gérer l'échec de post-conditions
recovery_result = self.error_handler.handle_postcondition_failure(
edge=edge,
screen_state=current_state,
timeout_ms=5000
)
recovery_applied = {
"strategy": recovery_result.strategy_used.value,
"message": recovery_result.message,
"success": recovery_result.success
}
logger.warning(f"Post-condition failed - Recovery: {recovery_result.message}")
# 6. Construire le résultat final
total_time_ms = (datetime.now() - start_time).total_seconds() * 1000
result = {
"execution_id": execution_id,
"workflow_id": workflow_id,
"success": execution_result.status == ExecutionStatus.SUCCESS,
"step_type": "action_execution",
"current_node": current_node_id,
"target_node": action_info['target_node'],
"action_executed": {
"type": action_info['action']['type'],
"target": action_info['action'].get('target'),
"parameters": action_info['action'].get('parameters', {})
},
"execution_result": {
"status": execution_result.status.value,
"message": execution_result.message,
"duration_ms": execution_result.duration_ms
},
"match_result": match_result,
"execution_time_ms": total_time_ms,
"correlation_id": execution_id
}
# Ajouter les détails de récupération si appliquée
if recovery_applied:
result["recovery_applied"] = recovery_applied
# Ajouter les détails de résolution de cible si disponible
if execution_result.target_resolved:
result["target_resolved"] = {
"element_id": execution_result.target_resolved.element.element_id,
"confidence": execution_result.target_resolved.confidence,
"method": getattr(execution_result.target_resolved, 'method', 'standard'),
"bbox": execution_result.target_resolved.element.bbox
}
# Ajouter l'erreur si présente
if execution_result.error:
result["error"] = str(execution_result.error)
# Logger le résultat
if result["success"]:
logger.info(f"Workflow step executed successfully in {total_time_ms:.1f}ms")
else:
logger.error(f"Workflow step failed: {execution_result.message}")
return result
except Exception as e:
total_time_ms = (datetime.now() - start_time).total_seconds() * 1000
logger.error(f"Workflow step execution failed with exception: {e}", exc_info=True)
# Utiliser ErrorHandler pour gérer l'exception de workflow
context_data = {
'screen_state': current_state,
'details': {
'operation': 'workflow_step_execution',
'workflow_id': workflow_id,
'execution_id': execution_id,
'exception_type': type(e).__name__
},
'original_data': {
'workflow_id': workflow_id,
'execution_id': execution_id
}
}
recovery_result = self.error_handler.handle_error(e, context_data)
return {
"success": False,
"message": f"Workflow execution exception: {str(e)}",
"execution_id": execution_id,
"workflow_id": workflow_id,
"total_time_ms": total_time_ms,
"error": str(e),
"recovery_attempted": recovery_result.success,
"recovery_message": recovery_result.message if recovery_result else None
}
self.error_handler.error_history.append(error_ctx)
self.error_handler._log_error(error_ctx)
return {
"execution_id": execution_id,
"workflow_id": workflow_id,
"success": False,
"step_type": "execution_error",
"error": str(e),
"execution_time_ms": total_time_ms,
"correlation_id": execution_id
}
# =============================================================================
# Factory function
# =============================================================================
def create_pipeline(
data_dir: str = "data",
use_gpu: bool = False,
enable_ui_detection: bool = True,
action_executor: Optional[ActionExecutor] = None,
target_resolver: Optional[TargetResolver] = None,
error_handler: Optional[ErrorHandler] = None
) -> WorkflowPipeline:
"""
Créer un WorkflowPipeline avec configuration par défaut.
Args:
data_dir: Répertoire de données
use_gpu: Utiliser GPU si disponible
enable_ui_detection: Activer la détection UI
action_executor: Instance ActionExecutor (crée par défaut si None)
target_resolver: Instance TargetResolver (crée par défaut si None)
error_handler: Instance ErrorHandler (crée par défaut si None)
Returns:
WorkflowPipeline configuré
"""
return WorkflowPipeline(
data_dir=data_dir,
use_gpu=use_gpu,
enable_ui_detection=enable_ui_detection,
action_executor=action_executor,
target_resolver=target_resolver,
error_handler=error_handler
)

View File

@@ -0,0 +1,384 @@
"""
Amélioration de WorkflowPipeline pour utiliser WorkflowExecutionResult avec métadonnées complètes
Cette version améliore la méthode execute_workflow_step pour retourner un objet
WorkflowExecutionResult au lieu d'un dictionnaire, incluant toutes les métadonnées
requises : correlation_id, performance_metrics, recovery_applied.
Auteur: Dom, Alice Kiro - 20 décembre 2024
"""
import logging
import uuid
from datetime import datetime
from typing import Optional, Dict, Any
from core.models.screen_state import ScreenState
from core.models.execution_result import (
WorkflowExecutionResult,
PerformanceMetrics,
RecoveryInfo,
StepExecutionStatus
)
from core.execution.action_executor import ExecutionStatus
logger = logging.getLogger(__name__)
class WorkflowPipelineEnhanced:
"""
Mixin pour améliorer WorkflowPipeline avec ExecutionResult complet.
Cette classe peut être utilisée pour étendre WorkflowPipeline existant
ou comme référence pour la migration.
"""
def execute_workflow_step_enhanced(
self,
workflow_id: str,
current_state: ScreenState,
context: Optional[Dict[str, Any]] = None
) -> WorkflowExecutionResult:
"""
Exécute une étape complète de workflow de bout en bout avec métadonnées complètes.
Pipeline d'exécution intégré:
1. Matcher l'état actuel avec le workflow
2. Obtenir la prochaine action à exécuter
3. Résoudre la cible avec TargetResolver
4. Exécuter l'action avec ActionExecutor
5. Gérer les erreurs avec ErrorHandler et stratégies appropriées
6. Retourner WorkflowExecutionResult avec métadonnées complètes
Args:
workflow_id: ID du workflow à exécuter
current_state: État actuel de l'écran
context: Contexte d'exécution optionnel (variables, etc.)
Returns:
WorkflowExecutionResult avec métadonnées complètes incluant:
- correlation_id unique pour traçabilité
- performance_metrics détaillées par phase
- recovery_applied si des stratégies de récupération ont été utilisées
- execution_details pour métadonnées personnalisées
"""
# Générer les identifiants uniques
execution_id = str(uuid.uuid4())
correlation_id = str(uuid.uuid4())
start_time = datetime.now()
logger.info(f"Executing workflow step: {workflow_id} (execution_id: {execution_id}, correlation_id: {correlation_id})")
# Initialiser les métriques de performance
performance_metrics = PerformanceMetrics(total_execution_time_ms=0.0)
try:
# 1. Matcher l'état actuel avec mesure de performance
match_start = datetime.now()
match_result = self.match_current_state(
screenshot_path=current_state.raw.screenshot_path,
workflow_id=workflow_id,
window_title=current_state.window.window_title
)
performance_metrics.state_matching_time_ms = (datetime.now() - match_start).total_seconds() * 1000
if not match_result:
# Gérer l'échec de matching avec ErrorHandler
workflow = self.load_workflow(workflow_id)
candidate_nodes = workflow.nodes if workflow else []
recovery_start = datetime.now()
recovery_result = self.error_handler.handle_matching_failure(
screen_state=current_state,
candidate_nodes=candidate_nodes,
best_confidence=0.0,
threshold=0.85
)
recovery_duration = (datetime.now() - recovery_start).total_seconds() * 1000
# Créer les informations de récupération
recovery_info = RecoveryInfo(
strategy=recovery_result.strategy_used.value,
message=recovery_result.message,
success=recovery_result.success,
attempts=1,
duration_ms=recovery_duration
)
# Finaliser les métriques
performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
performance_metrics.error_handling_time_ms = recovery_duration
# Créer et retourner le résultat de no_match
result = WorkflowExecutionResult.no_match(
execution_id=execution_id,
workflow_id=workflow_id,
current_state=current_state,
recovery_info=recovery_info,
performance_metrics=performance_metrics
)
result.correlation_id = correlation_id
logger.warning(f"No match found for workflow {workflow_id}, applied recovery: {recovery_result.strategy_used.value}")
return result
current_node_id = match_result["node_id"]
logger.info(f"Matched current state to node: {current_node_id} (confidence: {match_result['confidence']:.3f})")
# 2. Obtenir la prochaine action
action_info = self.get_next_action(workflow_id, current_node_id)
if not action_info:
# Workflow terminé
performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
result = WorkflowExecutionResult.workflow_complete(
execution_id=execution_id,
workflow_id=workflow_id,
current_node=current_node_id,
performance_metrics=performance_metrics
)
result.correlation_id = correlation_id
result.match_result = match_result
logger.info(f"Workflow {workflow_id} completed at node {current_node_id}")
return result
logger.info(f"Next action: {action_info['action']['type']} -> {action_info['target_node']}")
# 3. Charger le workflow pour obtenir l'edge complet
workflow = self.load_workflow(workflow_id)
if not workflow:
performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
result = WorkflowExecutionResult.error(
execution_id=execution_id,
workflow_id=workflow_id,
error_message=f"Failed to load workflow: {workflow_id}",
step_type="workflow_loading",
current_node=current_node_id,
performance_metrics=performance_metrics
)
result.correlation_id = correlation_id
logger.error(f"Failed to load workflow: {workflow_id}")
return result
# Trouver l'edge correspondant
edge = None
for e in workflow.edges:
if (hasattr(e, 'edge_id') and e.edge_id == action_info['edge_id']) or \
(e.from_node == current_node_id and e.to_node == action_info['target_node']):
edge = e
break
if not edge:
performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
result = WorkflowExecutionResult.error(
execution_id=execution_id,
workflow_id=workflow_id,
error_message=f"Edge not found: {current_node_id} -> {action_info['target_node']}",
step_type="edge_resolution",
current_node=current_node_id,
performance_metrics=performance_metrics
)
result.correlation_id = correlation_id
logger.error(f"Edge not found: {current_node_id} -> {action_info['target_node']}")
return result
# 4. Exécuter l'action avec ActionExecutor avec mesure de performance
execution_start = datetime.now()
execution_result = self.action_executor.execute_edge(
edge=edge,
screen_state=current_state,
context=context
)
performance_metrics.action_execution_time_ms = (datetime.now() - execution_start).total_seconds() * 1000
# 5. Gérer les erreurs spécifiques avec ErrorHandler si nécessaire
recovery_info = None
if execution_result.status != ExecutionStatus.SUCCESS:
recovery_start = datetime.now()
if execution_result.status == ExecutionStatus.TARGET_NOT_FOUND:
# ActionExecutor a déjà géré cela, mais on peut ajouter du logging
logger.info("Target not found - ActionExecutor applied recovery strategies")
# Créer une info de récupération basée sur ce qui a été fait par ActionExecutor
recovery_info = RecoveryInfo(
strategy="target_resolution_fallback",
message="ActionExecutor applied target resolution fallback strategies",
success=False, # Puisque le statut est encore TARGET_NOT_FOUND
attempts=1,
duration_ms=0.0 # ActionExecutor a déjà mesuré son temps
)
elif execution_result.status == ExecutionStatus.POSTCONDITION_FAILED:
# Gérer l'échec de post-conditions
recovery_result = self.error_handler.handle_postcondition_failure(
edge=edge,
screen_state=current_state,
timeout_ms=5000
)
recovery_duration = (datetime.now() - recovery_start).total_seconds() * 1000
recovery_info = RecoveryInfo(
strategy=recovery_result.strategy_used.value,
message=recovery_result.message,
success=recovery_result.success,
attempts=1,
duration_ms=recovery_duration
)
performance_metrics.error_handling_time_ms = recovery_duration
logger.warning(f"Post-condition failed - Recovery: {recovery_result.message}")
# 6. Construire le résultat final avec métadonnées complètes
performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
# Créer le dictionnaire d'action exécutée avec détails complets
action_executed = {
"edge_id": action_info.get('edge_id', 'unknown'),
"type": action_info['action']['type'],
"target": action_info['action'].get('target'),
"parameters": action_info['action'].get('parameters', {}),
"execution_status": execution_result.status.value,
"execution_message": execution_result.message,
"execution_duration_ms": execution_result.duration_ms
}
if execution_result.status == ExecutionStatus.SUCCESS:
# Créer le résultat de succès
result = WorkflowExecutionResult.success(
execution_id=execution_id,
workflow_id=workflow_id,
current_node=current_node_id,
target_node=action_info['target_node'],
action_executed=action_executed,
target_resolved=execution_result.target_resolved,
match_result=match_result,
performance_metrics=performance_metrics
)
result.correlation_id = correlation_id
# Ajouter des détails d'exécution personnalisés
result.add_execution_detail("action_confidence", action_info.get('confidence', 1.0))
result.add_execution_detail("match_confidence", match_result.get('confidence', 0.0))
if context:
result.add_execution_detail("execution_context", context)
logger.info(f"Workflow step executed successfully in {performance_metrics.total_execution_time_ms:.1f}ms")
else:
# Créer le résultat d'erreur
result = WorkflowExecutionResult.error(
execution_id=execution_id,
workflow_id=workflow_id,
error_message=execution_result.message,
step_type="action_execution",
current_node=current_node_id,
recovery_info=recovery_info,
performance_metrics=performance_metrics
)
result.correlation_id = correlation_id
result.target_node = action_info['target_node']
result.action_executed = action_executed
result.target_resolved = execution_result.target_resolved
result.match_result = match_result
# Ajouter des détails d'erreur
result.add_execution_detail("action_confidence", action_info.get('confidence', 1.0))
result.add_execution_detail("match_confidence", match_result.get('confidence', 0.0))
if execution_result.error:
result.add_execution_detail("original_error", str(execution_result.error))
logger.error(f"Workflow step failed: {execution_result.message}")
return result
except Exception as e:
# Gestion des exceptions avec métadonnées complètes
performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
logger.error(f"Workflow step execution failed with exception: {e}", exc_info=True)
# Utiliser ErrorHandler pour logger l'exception
from core.execution.error_handler import ErrorContext, ErrorType
error_ctx = ErrorContext(
error_type=ErrorType.UNKNOWN,
timestamp=datetime.now(),
screen_state=current_state,
message=f"Workflow execution exception: {str(e)}",
details={
"workflow_id": workflow_id,
"execution_id": execution_id,
"correlation_id": correlation_id,
"exception_type": type(e).__name__
}
)
self.error_handler.error_history.append(error_ctx)
self.error_handler._log_error(error_ctx)
# Créer le résultat d'erreur avec métadonnées complètes
result = WorkflowExecutionResult.error(
execution_id=execution_id,
workflow_id=workflow_id,
error_message=str(e),
step_type="execution_error",
performance_metrics=performance_metrics
)
result.correlation_id = correlation_id
# Ajouter des détails d'exception
result.add_execution_detail("exception_type", type(e).__name__)
result.add_execution_detail("exception_traceback", str(e))
if context:
result.add_execution_detail("execution_context", context)
return result
def enhance_workflow_pipeline(pipeline_instance):
"""
Fonction utilitaire pour améliorer une instance existante de WorkflowPipeline
avec la méthode execute_workflow_step_enhanced.
Args:
pipeline_instance: Instance de WorkflowPipeline à améliorer
Returns:
L'instance améliorée avec la nouvelle méthode
"""
# Ajouter la méthode améliorée à l'instance
enhanced_mixin = WorkflowPipelineEnhanced()
# Lier les méthodes nécessaires
pipeline_instance.execute_workflow_step_enhanced = lambda *args, **kwargs: \
enhanced_mixin.execute_workflow_step_enhanced.call(pipeline_instance, *args, **kwargs)
return pipeline_instance
# Fonction de migration pour remplacer la méthode existante
def migrate_execute_workflow_step(pipeline_instance):
"""
Migre la méthode execute_workflow_step existante vers la version améliorée.
ATTENTION: Cette fonction remplace la méthode existante. Utilisez avec précaution.
Args:
pipeline_instance: Instance de WorkflowPipeline à migrer
Returns:
L'instance avec la méthode migrée
"""
# Sauvegarder l'ancienne méthode si nécessaire
if hasattr(pipeline_instance, 'execute_workflow_step'):
pipeline_instance._execute_workflow_step_legacy = pipeline_instance.execute_workflow_step
# Remplacer par la version améliorée
enhanced_mixin = WorkflowPipelineEnhanced()
pipeline_instance.execute_workflow_step = lambda *args, **kwargs: \
enhanced_mixin.execute_workflow_step_enhanced.__get__(pipeline_instance, type(pipeline_instance))(*args, **kwargs)
logger.info("WorkflowPipeline.execute_workflow_step migrated to enhanced version with complete metadata")
return pipeline_instance