# Dockerfile for OmniParser with GPU support and OpenGL libraries # # This Dockerfile is intended to create an environment with NVIDIA CUDA # support and the necessary dependencies to run the OmniParser project. # The configuration is designed to support applications that rely on # Python 3.12, OpenCV, Hugging Face transformers, and Gradio. Additionally, # it includes steps to pull large files from Git LFS and a script to # convert model weights from .safetensor to .pt format. The container # runs a Gradio server by default, exposed on port 7861. # # Base image: nvidia/cuda:12.3.1-devel-ubuntu22.04 # # Key features: # - System dependencies for OpenGL to support graphical libraries. # - Miniconda for Python 3.12, allowing for environment management. # - Git Large File Storage (LFS) setup for handling large model files. # - Requirement file installation, including specific versions of # OpenCV and Hugging Face Hub. # - Entrypoint script execution with Gradio server configuration for # external access. FROM nvidia/cuda:12.3.1-devel-ubuntu22.04 # Install system dependencies with explicit OpenGL libraries RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ git \ git-lfs \ wget \ libgl1 \ libglib2.0-0 \ libsm6 \ libxext6 \ libxrender1 \ libglu1-mesa \ libglib2.0-0 \ libsm6 \ libxrender1 \ libxext6 \ python3-opencv \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* \ && git lfs install # Install Miniconda for Python 3.12 RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \ bash miniconda.sh -b -p /opt/conda && \ rm miniconda.sh ENV PATH="/opt/conda/bin:$PATH" # Create and activate Conda environment with Python 3.12, and set it as the default RUN conda create -n omni python=3.12 && \ echo "source activate omni" > ~/.bashrc ENV CONDA_DEFAULT_ENV=omni ENV PATH="/opt/conda/envs/omni/bin:$PATH" # Set the working directory in the container WORKDIR /usr/src/app # Copy project files and requirements COPY . . COPY requirements.txt /usr/src/app/requirements.txt # Initialize Git LFS and pull LFS files RUN git lfs install && \ git lfs pull # Install dependencies from requirements.txt with specific opencv-python-headless version RUN . /opt/conda/etc/profile.d/conda.sh && conda activate omni && \ pip uninstall -y opencv-python opencv-python-headless && \ pip install --no-cache-dir opencv-python-headless==4.8.1.78 && \ pip install -r requirements.txt && \ pip install huggingface_hub # Run download.py to fetch model weights and convert safetensors to .pt format # RUN . /opt/conda/etc/profile.d/conda.sh && conda activate omni && \ # python download.py && \ # echo "Contents of weights directory:" && \ # ls -lR weights && \ # python weights/convert_safetensor_to_pt.py # Expose the default Gradio port EXPOSE 7861 # Configure Gradio to be accessible externally ENV GRADIO_SERVER_NAME="0.0.0.0" # Copy and set permissions for entrypoint script # COPY entrypoint.sh /usr/src/app/entrypoint.sh # RUN chmod +x /usr/src/app/entrypoint.sh # To debug, keep the container running # CMD ["tail", "-f", "/dev/null"] ################################################################################################ # virtual display related setup --> from anthropic-quickstarts/computer-use-demo/Dockerfile ENV DEBIAN_FRONTEND=noninteractive ENV DEBIAN_PRIORITY=high RUN apt-get update && \ apt-get -y upgrade && \ apt-get -y install \ # UI Requirements xvfb \ xterm \ xdotool \ scrot \ imagemagick \ sudo \ mutter \ x11vnc \ # Python/pyenv reqs build-essential \ libssl-dev \ zlib1g-dev \ libbz2-dev \ libreadline-dev \ libsqlite3-dev \ curl \ git \ libncursesw5-dev \ xz-utils \ tk-dev \ libxml2-dev \ libxmlsec1-dev \ libffi-dev \ liblzma-dev \ # Network tools net-tools \ netcat \ # PPA req software-properties-common && \ # Userland apps sudo add-apt-repository ppa:mozillateam/ppa && \ sudo apt-get install -y --no-install-recommends \ libreoffice \ firefox-esr \ x11-apps \ xpdf \ gedit \ xpaint \ tint2 \ galculator \ pcmanfm \ unzip && \ apt-get clean # Install noVNC RUN git clone --branch v1.5.0 https://github.com/novnc/noVNC.git /opt/noVNC && \ git clone --branch v0.12.0 https://github.com/novnc/websockify /opt/noVNC/utils/websockify && \ ln -s /opt/noVNC/vnc.html /opt/noVNC/index.html # setup user ENV USERNAME=computeruse ENV HOME=/home/$USERNAME RUN useradd -m -s /bin/bash -d $HOME $USERNAME RUN echo "${USERNAME} ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers USER computeruse WORKDIR $HOME # setup python RUN git clone https://github.com/pyenv/pyenv.git ~/.pyenv && \ cd ~/.pyenv && src/configure && make -C src && cd .. && \ echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.bashrc && \ echo 'command -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.bashrc && \ echo 'eval "$(pyenv init -)"' >> ~/.bashrc ENV PYENV_ROOT="$HOME/.pyenv" ENV PATH="$PYENV_ROOT/bin:$PATH" ENV PYENV_VERSION_MAJOR=3 ENV PYENV_VERSION_MINOR=11 ENV PYENV_VERSION_PATCH=6 ENV PYENV_VERSION=$PYENV_VERSION_MAJOR.$PYENV_VERSION_MINOR.$PYENV_VERSION_PATCH RUN eval "$(pyenv init -)" && \ pyenv install $PYENV_VERSION && \ pyenv global $PYENV_VERSION && \ pyenv rehash ENV PATH="$HOME/.pyenv/shims:$HOME/.pyenv/bin:$PATH" RUN python -m pip install --upgrade pip==23.1.2 setuptools==58.0.4 wheel==0.40.0 && \ python -m pip config set global.disable-pip-version-check true # only reinstall if requirements.txt changes # COPY --chown=$USERNAME:$USERNAME computer_use_demo/requirements.txt $HOME/computer_use_demo/requirements.txt # RUN python -m pip install -r $HOME/computer_use_demo/requirements.txt # setup desktop env & app # COPY --chown=$USERNAME:$USERNAME image/ $HOME # COPY --chown=$USERNAME:$USERNAME computer_use_demo/ $HOME/computer_use_demo/ ARG DISPLAY_NUM=1 ARG HEIGHT=768 ARG WIDTH=1024 ENV DISPLAY_NUM=$DISPLAY_NUM ENV HEIGHT=$HEIGHT ENV WIDTH=$WIDTH # Set the entrypoint # ENTRYPOINT ["/usr/src/app/entrypoint.sh"] # docker build . -t omniparser-x-demo:local # manually build the docker image (optional)