feat: instructions en langage naturel via boucle ORA

reason_instruction() : le VLM regarde l'écran, décide la prochaine action atomique (click/type/hotkey/scroll/done), retourne un Decision avec expected_after pour la vérification. run_instruction() : boucle ORA complète pour instructions texte. CognitiveContext mis à jour à chaque étape (objectif, historique, faits appris, confiance). POST /api/v3/execute/instruction : endpoint API pour lancer une instruction en langage naturel. Thread daemon, polling du résultat via GET /api/v3/execute/instruction/result. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-22 09:09:53 +02:00
parent 0c5fffe951
commit 0ec5e2a25b
2 changed files with 490 additions and 0 deletions
--- a/visual_workflow_builder/backend/api_v3/execute.py
+++ b/visual_workflow_builder/backend/api_v3/execute.py
@@ -1901,3 +1901,140 @@ def get_healing_candidates():
        'step_info': step_info,
        'original_bbox': step_info.get('original_bbox')
    })
+
+
+# ═══════════════════════════════════════════════════════════
+# Mode INSTRUCTION (Phase 4) — exécution par langage naturel
+# ═══════════════════════════════════════════════════════════
+
+@api_v3_bp.route('/execute/instruction', methods=['POST'])
+def execute_instruction():
+    """Exécute une instruction en langage naturel.
+
+    Le VLM regarde l'écran et décide à chaque pas la prochaine action
+    atomique jusqu'à ce que l'objectif soit atteint.
+
+    POST JSON:
+        instruction (str): L'instruction en langage naturel.
+        max_steps (int, opt): Nombre max d'étapes (défaut 30).
+        verify_level (str, opt): 'none' | 'phash' | 'vlm' | 'auto' (défaut 'auto').
+
+    Returns:
+        202 avec l'ID d'exécution si le thread a démarré.
+    """
+    global _execution_state
+
+    data = request.get_json() or {}
+    instruction = data.get('instruction', '').strip()
+    max_steps = data.get('max_steps', 30)
+    verify_level = data.get('verify_level', 'auto')
+
+    if not instruction:
+        return jsonify({'success': False, 'error': 'Instruction vide'}), 400
+
+    # Vérifier qu'aucune exécution n'est en cours
+    with _execution_lock:
+        if _execution_state['is_running']:
+            return jsonify({
+                'success': False,
+                'error': 'Une exécution est déjà en cours'
+            }), 409
+
+    # Minimiser le navigateur VWB pour accéder à l'écran cible
+    minimize_active_window()
+
+    # Générer un ID d'exécution
+    exec_id = generate_id('instr')
+
+    def run():
+        """Thread d'exécution de l'instruction."""
+        try:
+            from core.execution.observe_reason_act import ORALoop
+
+            loop = ORALoop(
+                max_retries=2,
+                max_steps=max_steps,
+                verify_level=verify_level,
+            )
+
+            logger.info(f"🚀 [Instruction] Démarrage: '{instruction}' (exec_id={exec_id})")
+
+            def on_progress(step_num, total, verification):
+                with _execution_lock:
+                    _execution_state['current_step_info'] = {
+                        'index': step_num - 1,
+                        'total': total,
+                        'verification': {
+                            'success': verification.success,
+                            'change_level': verification.change_level,
+                            'detail': verification.detail,
+                        } if verification else None,
+                    }
+
+            result = loop.run_instruction(instruction, on_progress=on_progress)
+
+            with _execution_lock:
+                _execution_state['last_instruction_result'] = {
+                    'success': result.success,
+                    'steps_completed': result.steps_completed,
+                    'total_steps': result.total_steps,
+                    'reason': result.reason,
+                    'instruction': instruction,
+                    'exec_id': exec_id,
+                }
+                _execution_state['is_running'] = False
+                _execution_state['current_execution_id'] = None
+
+            emoji = "✅" if result.success else "❌"
+            logger.info(
+                f"{emoji} [Instruction] Terminé: success={result.success}, "
+                f"steps={result.steps_completed}/{result.total_steps}, "
+                f"reason={result.reason}"
+            )
+
+        except Exception as e:
+            logger.error(f"❌ [Instruction] Erreur fatale: {e}", exc_info=True)
+            with _execution_lock:
+                _execution_state['last_instruction_result'] = {
+                    'success': False,
+                    'steps_completed': 0,
+                    'total_steps': max_steps,
+                    'reason': f'Erreur fatale: {e}',
+                    'instruction': instruction,
+                    'exec_id': exec_id,
+                }
+                _execution_state['is_running'] = False
+                _execution_state['current_execution_id'] = None
+
+    # Lancer le thread d'exécution
+    with _execution_lock:
+        _execution_state['is_running'] = True
+        _execution_state['should_stop'] = False
+        _execution_state['current_execution_id'] = exec_id
+
+    thread = threading.Thread(target=run, daemon=True, name=f'instruction-{exec_id}')
+    thread.start()
+
+    return jsonify({
+        'success': True,
+        'message': f'Instruction lancée: {instruction}',
+        'exec_id': exec_id,
+    }), 202
+
+
+@api_v3_bp.route('/execute/instruction/result', methods=['GET'])
+def get_instruction_result():
+    """Retourne le résultat de la dernière exécution d'instruction.
+
+    Returns:
+        JSON avec le résultat ou null si aucun résultat disponible.
+    """
+    with _execution_lock:
+        result = _execution_state.get('last_instruction_result')
+        is_running = _execution_state['is_running']
+
+    return jsonify({
+        'success': True,
+        'is_running': is_running,
+        'result': result,
+    })