From 8e0f0ecb0fce4ed11c4b166a9ad1712a7056ce83 Mon Sep 17 00:00:00 2001 From: Alexey Taymano Date: Tue, 9 Sep 2025 20:10:39 +0000 Subject: [PATCH 1/2] bind to default qemu address --- omnitool/omnibox/compose.yml | 3 +-- omnitool/omnibox/vm/win11setup/setupscripts/server/main.py | 3 +-- omnitool/readme.md | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/omnitool/omnibox/compose.yml b/omnitool/omnibox/compose.yml index 9ad6b90..4bf4c43 100644 --- a/omnitool/omnibox/compose.yml +++ b/omnitool/omnibox/compose.yml @@ -13,8 +13,7 @@ services: cap_add: - NET_ADMIN ports: - - 8006:8006 # Web Viewer access - - 5000:5000 # Computer control server + - 8006:8006 # Web Viewer access volumes: - ./vm/win11iso/custom.iso:/custom.iso - ./vm/win11setup/firstboot:/oem diff --git a/omnitool/omnibox/vm/win11setup/setupscripts/server/main.py b/omnitool/omnibox/vm/win11setup/setupscripts/server/main.py index 8414889..348e483 100644 --- a/omnitool/omnibox/vm/win11setup/setupscripts/server/main.py +++ b/omnitool/omnibox/vm/win11setup/setupscripts/server/main.py @@ -69,7 +69,6 @@ def capture_screen_with_cursor(): # make the cursor smaller cursor = cursor.resize((int(cursor.width / 1.5), int(cursor.height / 1.5))) screenshot.paste(cursor, (cursor_x, cursor_y), cursor) - # Convert PIL Image to bytes and send img_io = BytesIO() @@ -78,4 +77,4 @@ def capture_screen_with_cursor(): return send_file(img_io, mimetype='image/png') if __name__ == '__main__': - app.run(host="127.0.0.1", port=args.port) \ No newline at end of file + app.run(host="10.0.2.15", port=args.port) \ No newline at end of file diff --git a/omnitool/readme.md b/omnitool/readme.md index a45e867..6ebf92d 100644 --- a/omnitool/readme.md +++ b/omnitool/readme.md @@ -103,7 +103,7 @@ There are three components: If your internet speed is slow and you want a minimal VM with less preinstalled apps comment out lines 57 to 350 in this [file](https://github.com/microsoft/OmniParser/blob/master/omnitool/omnibox/vm/win11setup/setupscripts/setup.ps1) that defines all the apps to install when you first create the container + VM. Ensure that you follow factory reset instructions from the next section when creating your VM to wipe any previous omnibox setup. ### Validation errors: Windows Host is not responding -If you get this error in Gradio after clicking the submit button, this indicates that the server running in the VM that accepts commands from Gradio and then moves the mouse/ keyboard isn't available. You can verify this by running `curl http://localhost:5000/probe`. Ensure your `omnibox` is fully finished setting up (should no longer have a terminal window). Refer to the omnibox section for timing on that. If you have set up your omnibox, it may be a matter of waiting a little. +If you get this error in Gradio after clicking the submit button, this indicates that the server running in the VM that accepts commands from Gradio and then moves the mouse/ keyboard isn't available. You can verify this by running `docker exec -it omni-windows bash -c "curl http://localhost:5000/probe"`. Ensure your `omnibox` is fully finished setting up (should no longer have a terminal window). Refer to the omnibox section for timing on that. If you have set up your omnibox, it may be a matter of waiting a little. If waiting 10 mins doesn't help. Try stopping (`./manage_vm.sh stop`) and starting (`./manage_vm.sh start`) your omnibox VM with the script commands. From 8186ddef43479f427d894bcec6f58dd07dbfdda8 Mon Sep 17 00:00:00 2001 From: Alexey Taymano Date: Tue, 9 Sep 2025 20:36:23 +0000 Subject: [PATCH 2/2] update manage vm script --- omnitool/omnibox/scripts/manage_vm.sh | 4 +- .../vm/win11setup/setupscripts/server/main.py | 72 ++++++++++++------- 2 files changed, 47 insertions(+), 29 deletions(-) diff --git a/omnitool/omnibox/scripts/manage_vm.sh b/omnitool/omnibox/scripts/manage_vm.sh index 6acf45a..d17b245 100755 --- a/omnitool/omnibox/scripts/manage_vm.sh +++ b/omnitool/omnibox/scripts/manage_vm.sh @@ -12,7 +12,7 @@ create_vm() { # Wait for the VM to start up while true; do - response=$(curl --write-out '%{http_code}' --silent --output /dev/null localhost:5000/probe) + response=$(docker exec -it omni-windows bash -c "curl --write-out '%{http_code}' --silent --output /dev/null localhost:5000/probe") if [ $response -eq 200 ]; then break fi @@ -27,7 +27,7 @@ start_vm() { echo "Starting VM..." docker compose -f ../compose.yml start while true; do - response=$(curl --write-out '%{http_code}' --silent --output /dev/null localhost:5000/probe) + response=$(docker exec -it omni-windows bash -c "curl --write-out '%{http_code}' --silent --output /dev/null localhost:5000/probe") if [ $response -eq 200 ]; then break fi diff --git a/omnitool/omnibox/vm/win11setup/setupscripts/server/main.py b/omnitool/omnibox/vm/win11setup/setupscripts/server/main.py index 348e483..586d0bd 100644 --- a/omnitool/omnibox/vm/win11setup/setupscripts/server/main.py +++ b/omnitool/omnibox/vm/win11setup/setupscripts/server/main.py @@ -10,6 +10,50 @@ import pyautogui from PIL import Image from io import BytesIO + +def execute_anything(data): + """Execute any command received in the JSON request. + WARNING: This function executes commands without any safety checks.""" + # The 'command' key in the JSON request should contain the command to be executed. + shell = data.get('shell', False) + command = data.get('command', "" if shell else []) + + if isinstance(command, str) and not shell: + command = shlex.split(command) + + # Expand user directory + for i, arg in enumerate(command): + if arg.startswith("~/"): + command[i] = os.path.expanduser(arg) + + # Execute the command without any safety checks. + try: + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell, text=True, timeout=120) + return jsonify({ + 'status': 'success', + 'output': result.stdout, + 'error': result.stderr, + 'returncode': result.returncode + }) + except Exception as e: + logger.error("\n" + traceback.format_exc() + "\n") + return jsonify({ + 'status': 'error', + 'message': str(e) + }), 500 + + +def execute(data): + """Action space aware implementation. Should not use arbitrary code execution.""" + return jsonify({ + 'status': 'error', + 'message': 'Not implemented. Please add your implementation to omnitool/omnibox/vm/win11setup/setupscripts/server/main.py.' + }), 500 + + +execute_impl = execute # switch to execute_anything to allow any command. Please use with caution only for testing purposes. + + parser = argparse.ArgumentParser() parser.add_argument("--log_file", help="log file path", type=str, default=os.path.join(os.path.dirname(__file__), "server.log")) @@ -32,33 +76,7 @@ def execute_command(): # Only execute one command at a time with computer_control_lock: data = request.json - # The 'command' key in the JSON request should contain the command to be executed. - shell = data.get('shell', False) - command = data.get('command', "" if shell else []) - - if isinstance(command, str) and not shell: - command = shlex.split(command) - - # Expand user directory - for i, arg in enumerate(command): - if arg.startswith("~/"): - command[i] = os.path.expanduser(arg) - - # Execute the command without any safety checks. - try: - result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell, text=True, timeout=120) - return jsonify({ - 'status': 'success', - 'output': result.stdout, - 'error': result.stderr, - 'returncode': result.returncode - }) - except Exception as e: - logger.error("\n" + traceback.format_exc() + "\n") - return jsonify({ - 'status': 'error', - 'message': str(e) - }), 500 + return execute_impl(data) @app.route('/screenshot', methods=['GET']) def capture_screen_with_cursor():