diff --git a/core/execution/input_handler.py b/core/execution/input_handler.py index 4d212c30b..c262edbfe 100644 --- a/core/execution/input_handler.py +++ b/core/execution/input_handler.py @@ -424,7 +424,7 @@ def _describe_anchor_image(anchor_image_base64: str) -> Optional[str]: return None except Exception as e: - logger.debug(f"Description ancre échouée: {e}") + logger.warning(f"[Grounding] Description ancre échouée: {e}") return None diff --git a/visual_workflow_builder/backend/services/intelligent_executor.py b/visual_workflow_builder/backend/services/intelligent_executor.py index 30cf5b275..fc0d73841 100644 --- a/visual_workflow_builder/backend/services/intelligent_executor.py +++ b/visual_workflow_builder/backend/services/intelligent_executor.py @@ -776,52 +776,6 @@ def find_and_click( global_result['search_time_ms'] = (_time.time() - start_time) * 1000 return global_result - # === STRATÉGIE 4: SeeClick (visual grounding) === - # Essayer SeeClick si les autres méthodes ont échoué - try: - print("🎯 [Vision] Essai SeeClick (visual grounding)...") - from core.detection.seeclick_adapter import get_seeclick - - seeclick = get_seeclick() - if seeclick.available: - # Utiliser une description générique basée sur l'ancre - # TODO: Améliorer avec une description plus précise - description = "the clickable element or button" - - grounding_result = seeclick.ground(screen_image, description, return_pixels=True) - - if grounding_result.found: - found_x = grounding_result.x_pixel - found_y = grounding_result.y_pixel - - # Vérifier la distance à la position originale si anchor_bbox existe - accept_seeclick = True - if anchor_bbox: - orig_x = anchor_bbox.get('x', 0) + anchor_bbox.get('width', 0) // 2 - orig_y = anchor_bbox.get('y', 0) + anchor_bbox.get('height', 0) // 2 - distance = np.sqrt((found_x - orig_x)**2 + (found_y - orig_y)**2) - - MAX_SEECLICK_DISTANCE = 500 - if distance > MAX_SEECLICK_DISTANCE: - print(f"⛔ [Vision] SeeClick rejeté: distance {distance:.0f}px > {MAX_SEECLICK_DISTANCE}px max") - accept_seeclick = False - - if accept_seeclick: - print(f"✅ [Vision] SeeClick réussi! Coordonnées: ({found_x}, {found_y})") - return { - 'found': True, - 'confidence': grounding_result.confidence, - 'coordinates': {'x': found_x, 'y': found_y}, - 'bbox': anchor_bbox, - 'method': 'seeclick_grounding', - 'search_time_ms': (_time.time() - start_time) * 1000, - 'raw_output': grounding_result.raw_output - } - except ImportError: - print("ℹ️ [Vision] SeeClick non disponible (module non trouvé)") - except Exception as seeclick_err: - print(f"⚠️ [Vision] Erreur SeeClick: {seeclick_err}") - # === FALLBACK: Chaîne de grounding (OCR → UI-TARS → VLM) === if target_text or target_description: try: