feat(grounding): pipeline centralisé + serveur UI-TARS transformers + nettoyage code mort
Architecture grounding complète :
- core/grounding/server.py : serveur FastAPI (port 8200) avec UI-TARS-1.5-7B en 4-bit NF4
Process séparé avec son propre contexte CUDA (résout le crash Flask/CUDA)
- core/grounding/pipeline.py : orchestrateur cascade template→OCR→UI-TARS→static
- core/grounding/template_matcher.py : TemplateMatcher centralisé (remplace 5 copies)
- core/grounding/ui_tars_grounder.py : client HTTP vers le serveur de grounding
- core/grounding/target.py : GroundingTarget + GroundingResult
ORA modifié :
- _act_click() : capture unique de l'écran envoyée au serveur de grounding
- Pre-check VLM skippé pour ui_tars (redondant, et Ollama n'a plus de VRAM)
- verify_level='none' par défaut (vérification titre OCR prévue en Phase 2)
- Détection réponses négatives UI-TARS ("I don't see it" → fallback OCR)
Nettoyage :
- 9 fichiers morts archivés dans _archive/ (~6300 lignes supprimées)
- 21 tests ajoutés pour TemplateMatcher
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,406 +0,0 @@
|
||||
"""
|
||||
Amélioration de WorkflowPipeline pour utiliser WorkflowExecutionResult avec métadonnées complètes
|
||||
|
||||
Cette version améliore la méthode execute_workflow_step pour retourner un objet
|
||||
WorkflowExecutionResult au lieu d'un dictionnaire, incluant toutes les métadonnées
|
||||
requises : correlation_id, performance_metrics, recovery_applied.
|
||||
|
||||
Auteur: Dom, Alice Kiro - 20 décembre 2024
|
||||
"""
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
from core.models.screen_state import ScreenState
|
||||
from core.models.execution_result import (
|
||||
WorkflowExecutionResult,
|
||||
PerformanceMetrics,
|
||||
RecoveryInfo,
|
||||
StepExecutionStatus
|
||||
)
|
||||
from core.execution.action_executor import ExecutionStatus
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class WorkflowPipelineEnhanced:
|
||||
"""
|
||||
Mixin pour améliorer WorkflowPipeline avec ExecutionResult complet.
|
||||
|
||||
Cette classe peut être utilisée pour étendre WorkflowPipeline existant
|
||||
ou comme référence pour la migration.
|
||||
"""
|
||||
|
||||
def execute_workflow_step_enhanced(
|
||||
self,
|
||||
workflow_id: str,
|
||||
current_state: ScreenState,
|
||||
context: Optional[Dict[str, Any]] = None
|
||||
) -> WorkflowExecutionResult:
|
||||
"""
|
||||
Exécute une étape complète de workflow de bout en bout avec métadonnées complètes.
|
||||
|
||||
Pipeline d'exécution intégré:
|
||||
1. Matcher l'état actuel avec le workflow
|
||||
2. Obtenir la prochaine action à exécuter
|
||||
3. Résoudre la cible avec TargetResolver
|
||||
4. Exécuter l'action avec ActionExecutor
|
||||
5. Gérer les erreurs avec ErrorHandler et stratégies appropriées
|
||||
6. Retourner WorkflowExecutionResult avec métadonnées complètes
|
||||
|
||||
Args:
|
||||
workflow_id: ID du workflow à exécuter
|
||||
current_state: État actuel de l'écran
|
||||
context: Contexte d'exécution optionnel (variables, etc.)
|
||||
|
||||
Returns:
|
||||
WorkflowExecutionResult avec métadonnées complètes incluant:
|
||||
- correlation_id unique pour traçabilité
|
||||
- performance_metrics détaillées par phase
|
||||
- recovery_applied si des stratégies de récupération ont été utilisées
|
||||
- execution_details pour métadonnées personnalisées
|
||||
"""
|
||||
# Générer les identifiants uniques
|
||||
execution_id = str(uuid.uuid4())
|
||||
correlation_id = str(uuid.uuid4())
|
||||
start_time = datetime.now()
|
||||
|
||||
logger.info(f"Executing workflow step: {workflow_id} (execution_id: {execution_id}, correlation_id: {correlation_id})")
|
||||
|
||||
# Initialiser les métriques de performance
|
||||
performance_metrics = PerformanceMetrics(total_execution_time_ms=0.0)
|
||||
|
||||
try:
|
||||
# 1. Matcher l'état actuel avec mesure de performance
|
||||
match_start = datetime.now()
|
||||
match_result = self.match_current_state(
|
||||
screenshot_path=current_state.raw.screenshot_path,
|
||||
workflow_id=workflow_id,
|
||||
window_title=current_state.window.window_title
|
||||
)
|
||||
performance_metrics.state_matching_time_ms = (datetime.now() - match_start).total_seconds() * 1000
|
||||
|
||||
if not match_result:
|
||||
# Gérer l'échec de matching avec ErrorHandler
|
||||
workflow = self.load_workflow(workflow_id)
|
||||
candidate_nodes = workflow.nodes if workflow else []
|
||||
|
||||
recovery_start = datetime.now()
|
||||
recovery_result = self.error_handler.handle_matching_failure(
|
||||
screen_state=current_state,
|
||||
candidate_nodes=candidate_nodes,
|
||||
best_confidence=0.0,
|
||||
threshold=0.85
|
||||
)
|
||||
recovery_duration = (datetime.now() - recovery_start).total_seconds() * 1000
|
||||
|
||||
# Créer les informations de récupération
|
||||
recovery_info = RecoveryInfo(
|
||||
strategy=recovery_result.strategy_used.value,
|
||||
message=recovery_result.message,
|
||||
success=recovery_result.success,
|
||||
attempts=1,
|
||||
duration_ms=recovery_duration
|
||||
)
|
||||
|
||||
# Finaliser les métriques
|
||||
performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
|
||||
performance_metrics.error_handling_time_ms = recovery_duration
|
||||
|
||||
# Créer et retourner le résultat de no_match
|
||||
result = WorkflowExecutionResult.no_match(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
current_state=current_state,
|
||||
recovery_info=recovery_info,
|
||||
performance_metrics=performance_metrics
|
||||
)
|
||||
result.correlation_id = correlation_id
|
||||
|
||||
logger.warning(f"No match found for workflow {workflow_id}, applied recovery: {recovery_result.strategy_used.value}")
|
||||
return result
|
||||
|
||||
current_node_id = match_result["node_id"]
|
||||
logger.info(f"Matched current state to node: {current_node_id} (confidence: {match_result['confidence']:.3f})")
|
||||
|
||||
# 2. Obtenir la prochaine action (contrat dict avec status explicite)
|
||||
action_info = self.get_next_action(workflow_id, current_node_id)
|
||||
action_status = action_info.get("status")
|
||||
|
||||
if action_status == "terminal":
|
||||
# Workflow terminé (aucun outgoing_edge = fin légitime)
|
||||
performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
|
||||
|
||||
result = WorkflowExecutionResult.workflow_complete(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
current_node=current_node_id,
|
||||
performance_metrics=performance_metrics,
|
||||
)
|
||||
result.correlation_id = correlation_id
|
||||
result.match_result = match_result
|
||||
|
||||
logger.info(f"Workflow {workflow_id} completed at node {current_node_id}")
|
||||
return result
|
||||
|
||||
if action_status == "blocked":
|
||||
# Des edges existent mais aucun ne passe les filtres :
|
||||
# c'est un blocage, pas une fin de workflow.
|
||||
performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
|
||||
|
||||
result = WorkflowExecutionResult.error(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
error_message=f"No valid edge: {action_info.get('reason', 'unknown')}",
|
||||
step_type="action_selection",
|
||||
current_node=current_node_id,
|
||||
performance_metrics=performance_metrics,
|
||||
)
|
||||
result.correlation_id = correlation_id
|
||||
|
||||
logger.warning(
|
||||
f"Workflow {workflow_id} blocked at node {current_node_id}: "
|
||||
f"{action_info.get('reason')}"
|
||||
)
|
||||
return result
|
||||
|
||||
logger.info(f"Next action: {action_info['action']['type']} -> {action_info['target_node']}")
|
||||
|
||||
# 3. Charger le workflow pour obtenir l'edge complet
|
||||
workflow = self.load_workflow(workflow_id)
|
||||
if not workflow:
|
||||
performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
|
||||
|
||||
result = WorkflowExecutionResult.error(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
error_message=f"Failed to load workflow: {workflow_id}",
|
||||
step_type="workflow_loading",
|
||||
current_node=current_node_id,
|
||||
performance_metrics=performance_metrics
|
||||
)
|
||||
result.correlation_id = correlation_id
|
||||
|
||||
logger.error(f"Failed to load workflow: {workflow_id}")
|
||||
return result
|
||||
|
||||
# Trouver l'edge correspondant
|
||||
edge = None
|
||||
for e in workflow.edges:
|
||||
if (hasattr(e, 'edge_id') and e.edge_id == action_info['edge_id']) or \
|
||||
(e.from_node == current_node_id and e.to_node == action_info['target_node']):
|
||||
edge = e
|
||||
break
|
||||
|
||||
if not edge:
|
||||
performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
|
||||
|
||||
result = WorkflowExecutionResult.error(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
error_message=f"Edge not found: {current_node_id} -> {action_info['target_node']}",
|
||||
step_type="edge_resolution",
|
||||
current_node=current_node_id,
|
||||
performance_metrics=performance_metrics
|
||||
)
|
||||
result.correlation_id = correlation_id
|
||||
|
||||
logger.error(f"Edge not found: {current_node_id} -> {action_info['target_node']}")
|
||||
return result
|
||||
|
||||
# 4. Exécuter l'action avec ActionExecutor avec mesure de performance
|
||||
execution_start = datetime.now()
|
||||
execution_result = self.action_executor.execute_edge(
|
||||
edge=edge,
|
||||
screen_state=current_state,
|
||||
context=context
|
||||
)
|
||||
performance_metrics.action_execution_time_ms = (datetime.now() - execution_start).total_seconds() * 1000
|
||||
|
||||
# 5. Gérer les erreurs spécifiques avec ErrorHandler si nécessaire
|
||||
recovery_info = None
|
||||
if execution_result.status != ExecutionStatus.SUCCESS:
|
||||
recovery_start = datetime.now()
|
||||
|
||||
if execution_result.status == ExecutionStatus.TARGET_NOT_FOUND:
|
||||
# ActionExecutor a déjà géré cela, mais on peut ajouter du logging
|
||||
logger.info("Target not found - ActionExecutor applied recovery strategies")
|
||||
# Créer une info de récupération basée sur ce qui a été fait par ActionExecutor
|
||||
recovery_info = RecoveryInfo(
|
||||
strategy="target_resolution_fallback",
|
||||
message="ActionExecutor applied target resolution fallback strategies",
|
||||
success=False, # Puisque le statut est encore TARGET_NOT_FOUND
|
||||
attempts=1,
|
||||
duration_ms=0.0 # ActionExecutor a déjà mesuré son temps
|
||||
)
|
||||
|
||||
elif execution_result.status == ExecutionStatus.POSTCONDITION_FAILED:
|
||||
# Gérer l'échec de post-conditions
|
||||
recovery_result = self.error_handler.handle_postcondition_failure(
|
||||
edge=edge,
|
||||
screen_state=current_state,
|
||||
timeout_ms=5000
|
||||
)
|
||||
recovery_duration = (datetime.now() - recovery_start).total_seconds() * 1000
|
||||
|
||||
recovery_info = RecoveryInfo(
|
||||
strategy=recovery_result.strategy_used.value,
|
||||
message=recovery_result.message,
|
||||
success=recovery_result.success,
|
||||
attempts=1,
|
||||
duration_ms=recovery_duration
|
||||
)
|
||||
performance_metrics.error_handling_time_ms = recovery_duration
|
||||
logger.warning(f"Post-condition failed - Recovery: {recovery_result.message}")
|
||||
|
||||
# 6. Construire le résultat final avec métadonnées complètes
|
||||
performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
|
||||
|
||||
# Créer le dictionnaire d'action exécutée avec détails complets
|
||||
action_executed = {
|
||||
"edge_id": action_info.get('edge_id', 'unknown'),
|
||||
"type": action_info['action']['type'],
|
||||
"target": action_info['action'].get('target'),
|
||||
"parameters": action_info['action'].get('parameters', {}),
|
||||
"execution_status": execution_result.status.value,
|
||||
"execution_message": execution_result.message,
|
||||
"execution_duration_ms": execution_result.duration_ms
|
||||
}
|
||||
|
||||
if execution_result.status == ExecutionStatus.SUCCESS:
|
||||
# Créer le résultat de succès
|
||||
result = WorkflowExecutionResult.success(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
current_node=current_node_id,
|
||||
target_node=action_info['target_node'],
|
||||
action_executed=action_executed,
|
||||
target_resolved=execution_result.target_resolved,
|
||||
match_result=match_result,
|
||||
performance_metrics=performance_metrics
|
||||
)
|
||||
result.correlation_id = correlation_id
|
||||
|
||||
# Ajouter des détails d'exécution personnalisés
|
||||
result.add_execution_detail("action_confidence", action_info.get('confidence', 1.0))
|
||||
result.add_execution_detail("match_confidence", match_result.get('confidence', 0.0))
|
||||
if context:
|
||||
result.add_execution_detail("execution_context", context)
|
||||
|
||||
logger.info(f"Workflow step executed successfully in {performance_metrics.total_execution_time_ms:.1f}ms")
|
||||
|
||||
else:
|
||||
# Créer le résultat d'erreur
|
||||
result = WorkflowExecutionResult.error(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
error_message=execution_result.message,
|
||||
step_type="action_execution",
|
||||
current_node=current_node_id,
|
||||
recovery_info=recovery_info,
|
||||
performance_metrics=performance_metrics
|
||||
)
|
||||
result.correlation_id = correlation_id
|
||||
result.target_node = action_info['target_node']
|
||||
result.action_executed = action_executed
|
||||
result.target_resolved = execution_result.target_resolved
|
||||
result.match_result = match_result
|
||||
|
||||
# Ajouter des détails d'erreur
|
||||
result.add_execution_detail("action_confidence", action_info.get('confidence', 1.0))
|
||||
result.add_execution_detail("match_confidence", match_result.get('confidence', 0.0))
|
||||
if execution_result.error:
|
||||
result.add_execution_detail("original_error", str(execution_result.error))
|
||||
|
||||
logger.error(f"Workflow step failed: {execution_result.message}")
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
# Gestion des exceptions avec métadonnées complètes
|
||||
performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
|
||||
logger.error(f"Workflow step execution failed with exception: {e}", exc_info=True)
|
||||
|
||||
# Utiliser ErrorHandler pour logger l'exception
|
||||
from core.execution.error_handler import ErrorContext, ErrorType
|
||||
error_ctx = ErrorContext(
|
||||
error_type=ErrorType.UNKNOWN,
|
||||
timestamp=datetime.now(),
|
||||
screen_state=current_state,
|
||||
message=f"Workflow execution exception: {str(e)}",
|
||||
details={
|
||||
"workflow_id": workflow_id,
|
||||
"execution_id": execution_id,
|
||||
"correlation_id": correlation_id,
|
||||
"exception_type": type(e).__name__
|
||||
}
|
||||
)
|
||||
self.error_handler.error_history.append(error_ctx)
|
||||
self.error_handler._log_error(error_ctx)
|
||||
|
||||
# Créer le résultat d'erreur avec métadonnées complètes
|
||||
result = WorkflowExecutionResult.error(
|
||||
execution_id=execution_id,
|
||||
workflow_id=workflow_id,
|
||||
error_message=str(e),
|
||||
step_type="execution_error",
|
||||
performance_metrics=performance_metrics
|
||||
)
|
||||
result.correlation_id = correlation_id
|
||||
|
||||
# Ajouter des détails d'exception
|
||||
result.add_execution_detail("exception_type", type(e).__name__)
|
||||
result.add_execution_detail("exception_traceback", str(e))
|
||||
if context:
|
||||
result.add_execution_detail("execution_context", context)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def enhance_workflow_pipeline(pipeline_instance):
|
||||
"""
|
||||
Fonction utilitaire pour améliorer une instance existante de WorkflowPipeline
|
||||
avec la méthode execute_workflow_step_enhanced.
|
||||
|
||||
Args:
|
||||
pipeline_instance: Instance de WorkflowPipeline à améliorer
|
||||
|
||||
Returns:
|
||||
L'instance améliorée avec la nouvelle méthode
|
||||
"""
|
||||
# Ajouter la méthode améliorée à l'instance
|
||||
enhanced_mixin = WorkflowPipelineEnhanced()
|
||||
|
||||
# Lier les méthodes nécessaires
|
||||
pipeline_instance.execute_workflow_step_enhanced = lambda *args, **kwargs: \
|
||||
enhanced_mixin.execute_workflow_step_enhanced.call(pipeline_instance, *args, **kwargs)
|
||||
|
||||
return pipeline_instance
|
||||
|
||||
|
||||
# Fonction de migration pour remplacer la méthode existante
|
||||
def migrate_execute_workflow_step(pipeline_instance):
|
||||
"""
|
||||
Migre la méthode execute_workflow_step existante vers la version améliorée.
|
||||
|
||||
ATTENTION: Cette fonction remplace la méthode existante. Utilisez avec précaution.
|
||||
|
||||
Args:
|
||||
pipeline_instance: Instance de WorkflowPipeline à migrer
|
||||
|
||||
Returns:
|
||||
L'instance avec la méthode migrée
|
||||
"""
|
||||
# Sauvegarder l'ancienne méthode si nécessaire
|
||||
if hasattr(pipeline_instance, 'execute_workflow_step'):
|
||||
pipeline_instance._execute_workflow_step_legacy = pipeline_instance.execute_workflow_step
|
||||
|
||||
# Remplacer par la version améliorée
|
||||
enhanced_mixin = WorkflowPipelineEnhanced()
|
||||
pipeline_instance.execute_workflow_step = lambda *args, **kwargs: \
|
||||
enhanced_mixin.execute_workflow_step_enhanced.__get__(pipeline_instance, type(pipeline_instance))(*args, **kwargs)
|
||||
|
||||
logger.info("WorkflowPipeline.execute_workflow_step migrated to enhanced version with complete metadata")
|
||||
return pipeline_instance
|
||||
Reference in New Issue
Block a user