Merge pull request #337 from microsoft/ataymano/sec_updates

Security updates
This commit is contained in:
Alexey Taymanov
2025-09-09 16:51:59 -04:00
committed by GitHub
4 changed files with 50 additions and 34 deletions

View File

@@ -13,8 +13,7 @@ services:
cap_add:
- NET_ADMIN
ports:
- 8006:8006 # Web Viewer access
- 5000:5000 # Computer control server
- 8006:8006 # Web Viewer access
volumes:
- ./vm/win11iso/custom.iso:/custom.iso
- ./vm/win11setup/firstboot:/oem

View File

@@ -12,7 +12,7 @@ create_vm() {
# Wait for the VM to start up
while true; do
response=$(curl --write-out '%{http_code}' --silent --output /dev/null localhost:5000/probe)
response=$(docker exec -it omni-windows bash -c "curl --write-out '%{http_code}' --silent --output /dev/null localhost:5000/probe")
if [ $response -eq 200 ]; then
break
fi
@@ -27,7 +27,7 @@ start_vm() {
echo "Starting VM..."
docker compose -f ../compose.yml start
while true; do
response=$(curl --write-out '%{http_code}' --silent --output /dev/null localhost:5000/probe)
response=$(docker exec -it omni-windows bash -c "curl --write-out '%{http_code}' --silent --output /dev/null localhost:5000/probe")
if [ $response -eq 200 ]; then
break
fi

View File

@@ -10,6 +10,50 @@ import pyautogui
from PIL import Image
from io import BytesIO
def execute_anything(data):
"""Execute any command received in the JSON request.
WARNING: This function executes commands without any safety checks."""
# The 'command' key in the JSON request should contain the command to be executed.
shell = data.get('shell', False)
command = data.get('command', "" if shell else [])
if isinstance(command, str) and not shell:
command = shlex.split(command)
# Expand user directory
for i, arg in enumerate(command):
if arg.startswith("~/"):
command[i] = os.path.expanduser(arg)
# Execute the command without any safety checks.
try:
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell, text=True, timeout=120)
return jsonify({
'status': 'success',
'output': result.stdout,
'error': result.stderr,
'returncode': result.returncode
})
except Exception as e:
logger.error("\n" + traceback.format_exc() + "\n")
return jsonify({
'status': 'error',
'message': str(e)
}), 500
def execute(data):
"""Action space aware implementation. Should not use arbitrary code execution."""
return jsonify({
'status': 'error',
'message': 'Not implemented. Please add your implementation to omnitool/omnibox/vm/win11setup/setupscripts/server/main.py.'
}), 500
execute_impl = execute # switch to execute_anything to allow any command. Please use with caution only for testing purposes.
parser = argparse.ArgumentParser()
parser.add_argument("--log_file", help="log file path", type=str,
default=os.path.join(os.path.dirname(__file__), "server.log"))
@@ -32,33 +76,7 @@ def execute_command():
# Only execute one command at a time
with computer_control_lock:
data = request.json
# The 'command' key in the JSON request should contain the command to be executed.
shell = data.get('shell', False)
command = data.get('command', "" if shell else [])
if isinstance(command, str) and not shell:
command = shlex.split(command)
# Expand user directory
for i, arg in enumerate(command):
if arg.startswith("~/"):
command[i] = os.path.expanduser(arg)
# Execute the command without any safety checks.
try:
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell, text=True, timeout=120)
return jsonify({
'status': 'success',
'output': result.stdout,
'error': result.stderr,
'returncode': result.returncode
})
except Exception as e:
logger.error("\n" + traceback.format_exc() + "\n")
return jsonify({
'status': 'error',
'message': str(e)
}), 500
return execute_impl(data)
@app.route('/screenshot', methods=['GET'])
def capture_screen_with_cursor():
@@ -70,7 +88,6 @@ def capture_screen_with_cursor():
cursor = cursor.resize((int(cursor.width / 1.5), int(cursor.height / 1.5)))
screenshot.paste(cursor, (cursor_x, cursor_y), cursor)
# Convert PIL Image to bytes and send
img_io = BytesIO()
screenshot.save(img_io, 'PNG')
@@ -78,4 +95,4 @@ def capture_screen_with_cursor():
return send_file(img_io, mimetype='image/png')
if __name__ == '__main__':
app.run(host="127.0.0.1", port=args.port)
app.run(host="10.0.2.15", port=args.port)

View File

@@ -103,7 +103,7 @@ There are three components:
If your internet speed is slow and you want a minimal VM with less preinstalled apps comment out lines 57 to 350 in this [file](https://github.com/microsoft/OmniParser/blob/master/omnitool/omnibox/vm/win11setup/setupscripts/setup.ps1) that defines all the apps to install when you first create the container + VM. Ensure that you follow factory reset instructions from the next section when creating your VM to wipe any previous omnibox setup.
### Validation errors: Windows Host is not responding
If you get this error in Gradio after clicking the submit button, this indicates that the server running in the VM that accepts commands from Gradio and then moves the mouse/ keyboard isn't available. You can verify this by running `curl http://localhost:5000/probe`. Ensure your `omnibox` is fully finished setting up (should no longer have a terminal window). Refer to the omnibox section for timing on that. If you have set up your omnibox, it may be a matter of waiting a little.
If you get this error in Gradio after clicking the submit button, this indicates that the server running in the VM that accepts commands from Gradio and then moves the mouse/ keyboard isn't available. You can verify this by running `docker exec -it omni-windows bash -c "curl http://localhost:5000/probe"`. Ensure your `omnibox` is fully finished setting up (should no longer have a terminal window). Refer to the omnibox section for timing on that. If you have set up your omnibox, it may be a matter of waiting a little.
If waiting 10 mins doesn't help. Try stopping (`./manage_vm.sh stop`) and starting (`./manage_vm.sh start`) your omnibox VM with the script commands.