feat(grounding): pipeline centralisé + serveur UI-TARS transformers + nettoyage code mort

Architecture grounding complète : - core/grounding/server.py : serveur FastAPI (port 8200) avec UI-TARS-1.5-7B en 4-bit NF4 Process séparé avec son propre contexte CUDA (résout le crash Flask/CUDA) - core/grounding/pipeline.py : orchestrateur cascade template→OCR→UI-TARS→static - core/grounding/template_matcher.py : TemplateMatcher centralisé (remplace 5 copies) - core/grounding/ui_tars_grounder.py : client HTTP vers le serveur de grounding - core/grounding/target.py : GroundingTarget + GroundingResult ORA modifié : - _act_click() : capture unique de l'écran envoyée au serveur de grounding - Pre-check VLM skippé pour ui_tars (redondant, et Ollama n'a plus de VRAM) - verify_level='none' par défaut (vérification titre OCR prévue en Phase 2) - Détection réponses négatives UI-TARS ("I don't see it" → fallback OCR) Nettoyage : - 9 fichiers morts archivés dans _archive/ (~6300 lignes supprimées) - 21 tests ajoutés pour TemplateMatcher Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-25 17:48:18 +02:00
parent 16ff396dbf
commit 9da589c8c2
20 changed files with 1862 additions and 15 deletions
--- a/core/pipeline/workflow_pipeline_enhanced.py
+++ b/core/pipeline/workflow_pipeline_enhanced.py
@@ -1,406 +0,0 @@
-"""
-Amélioration de WorkflowPipeline pour utiliser WorkflowExecutionResult avec métadonnées complètes
-
-Cette version améliore la méthode execute_workflow_step pour retourner un objet
-WorkflowExecutionResult au lieu d'un dictionnaire, incluant toutes les métadonnées
-requises : correlation_id, performance_metrics, recovery_applied.
-
-Auteur: Dom, Alice Kiro - 20 décembre 2024
-"""
-
-import logging
-import uuid
-from datetime import datetime
-from typing import Optional, Dict, Any
-
-from core.models.screen_state import ScreenState
-from core.models.execution_result import (
-    WorkflowExecutionResult, 
-    PerformanceMetrics, 
-    RecoveryInfo, 
-    StepExecutionStatus
-)
-from core.execution.action_executor import ExecutionStatus
-
-logger = logging.getLogger(__name__)
-
-
-class WorkflowPipelineEnhanced:
-    """
-    Mixin pour améliorer WorkflowPipeline avec ExecutionResult complet.
-    
-    Cette classe peut être utilisée pour étendre WorkflowPipeline existant
-    ou comme référence pour la migration.
-    """
-    
-    def execute_workflow_step_enhanced(
-        self,
-        workflow_id: str,
-        current_state: ScreenState,
-        context: Optional[Dict[str, Any]] = None
-    ) -> WorkflowExecutionResult:
-        """
-        Exécute une étape complète de workflow de bout en bout avec métadonnées complètes.
-        
-        Pipeline d'exécution intégré:
-        1. Matcher l'état actuel avec le workflow
-        2. Obtenir la prochaine action à exécuter
-        3. Résoudre la cible avec TargetResolver
-        4. Exécuter l'action avec ActionExecutor
-        5. Gérer les erreurs avec ErrorHandler et stratégies appropriées
-        6. Retourner WorkflowExecutionResult avec métadonnées complètes
-        
-        Args:
-            workflow_id: ID du workflow à exécuter
-            current_state: État actuel de l'écran
-            context: Contexte d'exécution optionnel (variables, etc.)
-        
-        Returns:
-            WorkflowExecutionResult avec métadonnées complètes incluant:
-            - correlation_id unique pour traçabilité
-            - performance_metrics détaillées par phase
-            - recovery_applied si des stratégies de récupération ont été utilisées
-            - execution_details pour métadonnées personnalisées
-        """
-        # Générer les identifiants uniques
-        execution_id = str(uuid.uuid4())
-        correlation_id = str(uuid.uuid4())
-        start_time = datetime.now()
-        
-        logger.info(f"Executing workflow step: {workflow_id} (execution_id: {execution_id}, correlation_id: {correlation_id})")
-        
-        # Initialiser les métriques de performance
-        performance_metrics = PerformanceMetrics(total_execution_time_ms=0.0)
-        
-        try:
-            # 1. Matcher l'état actuel avec mesure de performance
-            match_start = datetime.now()
-            match_result = self.match_current_state(
-                screenshot_path=current_state.raw.screenshot_path,
-                workflow_id=workflow_id,
-                window_title=current_state.window.window_title
-            )
-            performance_metrics.state_matching_time_ms = (datetime.now() - match_start).total_seconds() * 1000
-            
-            if not match_result:
-                # Gérer l'échec de matching avec ErrorHandler
-                workflow = self.load_workflow(workflow_id)
-                candidate_nodes = workflow.nodes if workflow else []
-                
-                recovery_start = datetime.now()
-                recovery_result = self.error_handler.handle_matching_failure(
-                    screen_state=current_state,
-                    candidate_nodes=candidate_nodes,
-                    best_confidence=0.0,
-                    threshold=0.85
-                )
-                recovery_duration = (datetime.now() - recovery_start).total_seconds() * 1000
-                
-                # Créer les informations de récupération
-                recovery_info = RecoveryInfo(
-                    strategy=recovery_result.strategy_used.value,
-                    message=recovery_result.message,
-                    success=recovery_result.success,
-                    attempts=1,
-                    duration_ms=recovery_duration
-                )
-                
-                # Finaliser les métriques
-                performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
-                performance_metrics.error_handling_time_ms = recovery_duration
-                
-                # Créer et retourner le résultat de no_match
-                result = WorkflowExecutionResult.no_match(
-                    execution_id=execution_id,
-                    workflow_id=workflow_id,
-                    current_state=current_state,
-                    recovery_info=recovery_info,
-                    performance_metrics=performance_metrics
-                )
-                result.correlation_id = correlation_id
-                
-                logger.warning(f"No match found for workflow {workflow_id}, applied recovery: {recovery_result.strategy_used.value}")
-                return result
-            
-            current_node_id = match_result["node_id"]
-            logger.info(f"Matched current state to node: {current_node_id} (confidence: {match_result['confidence']:.3f})")
-            
-            # 2. Obtenir la prochaine action (contrat dict avec status explicite)
-            action_info = self.get_next_action(workflow_id, current_node_id)
-            action_status = action_info.get("status")
-
-            if action_status == "terminal":
-                # Workflow terminé (aucun outgoing_edge = fin légitime)
-                performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
-
-                result = WorkflowExecutionResult.workflow_complete(
-                    execution_id=execution_id,
-                    workflow_id=workflow_id,
-                    current_node=current_node_id,
-                    performance_metrics=performance_metrics,
-                )
-                result.correlation_id = correlation_id
-                result.match_result = match_result
-
-                logger.info(f"Workflow {workflow_id} completed at node {current_node_id}")
-                return result
-
-            if action_status == "blocked":
-                # Des edges existent mais aucun ne passe les filtres :
-                # c'est un blocage, pas une fin de workflow.
-                performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
-
-                result = WorkflowExecutionResult.error(
-                    execution_id=execution_id,
-                    workflow_id=workflow_id,
-                    error_message=f"No valid edge: {action_info.get('reason', 'unknown')}",
-                    step_type="action_selection",
-                    current_node=current_node_id,
-                    performance_metrics=performance_metrics,
-                )
-                result.correlation_id = correlation_id
-
-                logger.warning(
-                    f"Workflow {workflow_id} blocked at node {current_node_id}: "
-                    f"{action_info.get('reason')}"
-                )
-                return result
-
-            logger.info(f"Next action: {action_info['action']['type']} -> {action_info['target_node']}")
-            
-            # 3. Charger le workflow pour obtenir l'edge complet
-            workflow = self.load_workflow(workflow_id)
-            if not workflow:
-                performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
-                
-                result = WorkflowExecutionResult.error(
-                    execution_id=execution_id,
-                    workflow_id=workflow_id,
-                    error_message=f"Failed to load workflow: {workflow_id}",
-                    step_type="workflow_loading",
-                    current_node=current_node_id,
-                    performance_metrics=performance_metrics
-                )
-                result.correlation_id = correlation_id
-                
-                logger.error(f"Failed to load workflow: {workflow_id}")
-                return result
-            
-            # Trouver l'edge correspondant
-            edge = None
-            for e in workflow.edges:
-                if (hasattr(e, 'edge_id') and e.edge_id == action_info['edge_id']) or \
-                   (e.from_node == current_node_id and e.to_node == action_info['target_node']):
-                    edge = e
-                    break
-            
-            if not edge:
-                performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
-                
-                result = WorkflowExecutionResult.error(
-                    execution_id=execution_id,
-                    workflow_id=workflow_id,
-                    error_message=f"Edge not found: {current_node_id} -> {action_info['target_node']}",
-                    step_type="edge_resolution",
-                    current_node=current_node_id,
-                    performance_metrics=performance_metrics
-                )
-                result.correlation_id = correlation_id
-                
-                logger.error(f"Edge not found: {current_node_id} -> {action_info['target_node']}")
-                return result
-            
-            # 4. Exécuter l'action avec ActionExecutor avec mesure de performance
-            execution_start = datetime.now()
-            execution_result = self.action_executor.execute_edge(
-                edge=edge,
-                screen_state=current_state,
-                context=context
-            )
-            performance_metrics.action_execution_time_ms = (datetime.now() - execution_start).total_seconds() * 1000
-            
-            # 5. Gérer les erreurs spécifiques avec ErrorHandler si nécessaire
-            recovery_info = None
-            if execution_result.status != ExecutionStatus.SUCCESS:
-                recovery_start = datetime.now()
-                
-                if execution_result.status == ExecutionStatus.TARGET_NOT_FOUND:
-                    # ActionExecutor a déjà géré cela, mais on peut ajouter du logging
-                    logger.info("Target not found - ActionExecutor applied recovery strategies")
-                    # Créer une info de récupération basée sur ce qui a été fait par ActionExecutor
-                    recovery_info = RecoveryInfo(
-                        strategy="target_resolution_fallback",
-                        message="ActionExecutor applied target resolution fallback strategies",
-                        success=False,  # Puisque le statut est encore TARGET_NOT_FOUND
-                        attempts=1,
-                        duration_ms=0.0  # ActionExecutor a déjà mesuré son temps
-                    )
-                    
-                elif execution_result.status == ExecutionStatus.POSTCONDITION_FAILED:
-                    # Gérer l'échec de post-conditions
-                    recovery_result = self.error_handler.handle_postcondition_failure(
-                        edge=edge,
-                        screen_state=current_state,
-                        timeout_ms=5000
-                    )
-                    recovery_duration = (datetime.now() - recovery_start).total_seconds() * 1000
-                    
-                    recovery_info = RecoveryInfo(
-                        strategy=recovery_result.strategy_used.value,
-                        message=recovery_result.message,
-                        success=recovery_result.success,
-                        attempts=1,
-                        duration_ms=recovery_duration
-                    )
-                    performance_metrics.error_handling_time_ms = recovery_duration
-                    logger.warning(f"Post-condition failed - Recovery: {recovery_result.message}")
-            
-            # 6. Construire le résultat final avec métadonnées complètes
-            performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
-            
-            # Créer le dictionnaire d'action exécutée avec détails complets
-            action_executed = {
-                "edge_id": action_info.get('edge_id', 'unknown'),
-                "type": action_info['action']['type'],
-                "target": action_info['action'].get('target'),
-                "parameters": action_info['action'].get('parameters', {}),
-                "execution_status": execution_result.status.value,
-                "execution_message": execution_result.message,
-                "execution_duration_ms": execution_result.duration_ms
-            }
-            
-            if execution_result.status == ExecutionStatus.SUCCESS:
-                # Créer le résultat de succès
-                result = WorkflowExecutionResult.success(
-                    execution_id=execution_id,
-                    workflow_id=workflow_id,
-                    current_node=current_node_id,
-                    target_node=action_info['target_node'],
-                    action_executed=action_executed,
-                    target_resolved=execution_result.target_resolved,
-                    match_result=match_result,
-                    performance_metrics=performance_metrics
-                )
-                result.correlation_id = correlation_id
-                
-                # Ajouter des détails d'exécution personnalisés
-                result.add_execution_detail("action_confidence", action_info.get('confidence', 1.0))
-                result.add_execution_detail("match_confidence", match_result.get('confidence', 0.0))
-                if context:
-                    result.add_execution_detail("execution_context", context)
-                
-                logger.info(f"Workflow step executed successfully in {performance_metrics.total_execution_time_ms:.1f}ms")
-                
-            else:
-                # Créer le résultat d'erreur
-                result = WorkflowExecutionResult.error(
-                    execution_id=execution_id,
-                    workflow_id=workflow_id,
-                    error_message=execution_result.message,
-                    step_type="action_execution",
-                    current_node=current_node_id,
-                    recovery_info=recovery_info,
-                    performance_metrics=performance_metrics
-                )
-                result.correlation_id = correlation_id
-                result.target_node = action_info['target_node']
-                result.action_executed = action_executed
-                result.target_resolved = execution_result.target_resolved
-                result.match_result = match_result
-                
-                # Ajouter des détails d'erreur
-                result.add_execution_detail("action_confidence", action_info.get('confidence', 1.0))
-                result.add_execution_detail("match_confidence", match_result.get('confidence', 0.0))
-                if execution_result.error:
-                    result.add_execution_detail("original_error", str(execution_result.error))
-                
-                logger.error(f"Workflow step failed: {execution_result.message}")
-            
-            return result
-            
-        except Exception as e:
-            # Gestion des exceptions avec métadonnées complètes
-            performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
-            logger.error(f"Workflow step execution failed with exception: {e}", exc_info=True)
-            
-            # Utiliser ErrorHandler pour logger l'exception
-            from core.execution.error_handler import ErrorContext, ErrorType
-            error_ctx = ErrorContext(
-                error_type=ErrorType.UNKNOWN,
-                timestamp=datetime.now(),
-                screen_state=current_state,
-                message=f"Workflow execution exception: {str(e)}",
-                details={
-                    "workflow_id": workflow_id,
-                    "execution_id": execution_id,
-                    "correlation_id": correlation_id,
-                    "exception_type": type(e).__name__
-                }
-            )
-            self.error_handler.error_history.append(error_ctx)
-            self.error_handler._log_error(error_ctx)
-            
-            # Créer le résultat d'erreur avec métadonnées complètes
-            result = WorkflowExecutionResult.error(
-                execution_id=execution_id,
-                workflow_id=workflow_id,
-                error_message=str(e),
-                step_type="execution_error",
-                performance_metrics=performance_metrics
-            )
-            result.correlation_id = correlation_id
-            
-            # Ajouter des détails d'exception
-            result.add_execution_detail("exception_type", type(e).__name__)
-            result.add_execution_detail("exception_traceback", str(e))
-            if context:
-                result.add_execution_detail("execution_context", context)
-            
-            return result
-
-
-def enhance_workflow_pipeline(pipeline_instance):
-    """
-    Fonction utilitaire pour améliorer une instance existante de WorkflowPipeline
-    avec la méthode execute_workflow_step_enhanced.
-    
-    Args:
-        pipeline_instance: Instance de WorkflowPipeline à améliorer
-    
-    Returns:
-        L'instance améliorée avec la nouvelle méthode
-    """
-    # Ajouter la méthode améliorée à l'instance
-    enhanced_mixin = WorkflowPipelineEnhanced()
-    
-    # Lier les méthodes nécessaires
-    pipeline_instance.execute_workflow_step_enhanced = lambda *args, **kwargs: \
-        enhanced_mixin.execute_workflow_step_enhanced.call(pipeline_instance, *args, **kwargs)
-    
-    return pipeline_instance
-
-
-# Fonction de migration pour remplacer la méthode existante
-def migrate_execute_workflow_step(pipeline_instance):
-    """
-    Migre la méthode execute_workflow_step existante vers la version améliorée.
-    
-    ATTENTION: Cette fonction remplace la méthode existante. Utilisez avec précaution.
-    
-    Args:
-        pipeline_instance: Instance de WorkflowPipeline à migrer
-    
-    Returns:
-        L'instance avec la méthode migrée
-    """
-    # Sauvegarder l'ancienne méthode si nécessaire
-    if hasattr(pipeline_instance, 'execute_workflow_step'):
-        pipeline_instance._execute_workflow_step_legacy = pipeline_instance.execute_workflow_step
-    
-    # Remplacer par la version améliorée
-    enhanced_mixin = WorkflowPipelineEnhanced()
-    pipeline_instance.execute_workflow_step = lambda *args, **kwargs: \
-        enhanced_mixin.execute_workflow_step_enhanced.__get__(pipeline_instance, type(pipeline_instance))(*args, **kwargs)
-    
-    logger.info("WorkflowPipeline.execute_workflow_step migrated to enhanced version with complete metadata")
-    return pipeline_instance