add docker file, accelerate inference using cv2

2025-01-02 12:02:08 -08:00
parent d0c163cd02
commit 36b0cbea71
7 changed files with 320 additions and 26 deletions
--- a/201
+++ b/201
@@ -0,0 +1,201 @@
+# Dockerfile for OmniParser with GPU support and OpenGL libraries
+#
+# This Dockerfile is intended to create an environment with NVIDIA CUDA
+# support and the necessary dependencies to run the OmniParser project.
+# The configuration is designed to support applications that rely on
+# Python 3.12, OpenCV, Hugging Face transformers, and Gradio. Additionally,
+# it includes steps to pull large files from Git LFS and a script to
+# convert model weights from .safetensor to .pt format. The container
+# runs a Gradio server by default, exposed on port 7861.
+#
+# Base image: nvidia/cuda:12.3.1-devel-ubuntu22.04
+#
+# Key features:
+# - System dependencies for OpenGL to support graphical libraries.
+# - Miniconda for Python 3.12, allowing for environment management.
+# - Git Large File Storage (LFS) setup for handling large model files.
+# - Requirement file installation, including specific versions of
+#   OpenCV and Hugging Face Hub.
+# - Entrypoint script execution with Gradio server configuration for
+#   external access.
+
+FROM nvidia/cuda:12.3.1-devel-ubuntu22.04
+
+# Install system dependencies with explicit OpenGL libraries
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    git \
+    git-lfs \
+    wget \
+    libgl1 \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender1 \
+    libglu1-mesa \
+    libglib2.0-0 \
+    libsm6 \
+    libxrender1 \
+    libxext6 \
+    python3-opencv \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* \
+    && git lfs install
+
+# Install Miniconda for Python 3.12
+RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \
+    bash miniconda.sh -b -p /opt/conda && \
+    rm miniconda.sh
+ENV PATH="/opt/conda/bin:$PATH"
+
+# Create and activate Conda environment with Python 3.12, and set it as the default
+RUN conda create -n omni python=3.12 && \
+    echo "source activate omni" > ~/.bashrc
+ENV CONDA_DEFAULT_ENV=omni
+ENV PATH="/opt/conda/envs/omni/bin:$PATH"
+
+# Set the working directory in the container
+WORKDIR /usr/src/app
+
+# Copy project files and requirements
+COPY . .
+COPY requirements.txt /usr/src/app/requirements.txt
+
+# Initialize Git LFS and pull LFS files
+RUN git lfs install && \
+    git lfs pull
+
+# Install dependencies from requirements.txt with specific opencv-python-headless version
+RUN . /opt/conda/etc/profile.d/conda.sh && conda activate omni && \
+    pip uninstall -y opencv-python opencv-python-headless && \
+    pip install --no-cache-dir opencv-python-headless==4.8.1.78 && \
+    pip install -r requirements.txt && \
+    pip install huggingface_hub
+
+# Run download.py to fetch model weights and convert safetensors to .pt format
+# RUN . /opt/conda/etc/profile.d/conda.sh && conda activate omni && \
+#     python download.py && \
+#     echo "Contents of weights directory:" && \
+#     ls -lR weights && \
+#     python weights/convert_safetensor_to_pt.py
+
+# Expose the default Gradio port
+EXPOSE 7861
+
+# Configure Gradio to be accessible externally
+ENV GRADIO_SERVER_NAME="0.0.0.0"
+
+# Copy and set permissions for entrypoint script
+# COPY entrypoint.sh /usr/src/app/entrypoint.sh
+# RUN chmod +x /usr/src/app/entrypoint.sh
+
+# To debug, keep the container running
+# CMD ["tail", "-f", "/dev/null"]
+
+################################################################################################
+# virtual display related setup --> from anthropic-quickstarts/computer-use-demo/Dockerfile
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV DEBIAN_PRIORITY=high
+
+RUN apt-get update && \
+    apt-get -y upgrade && \
+    apt-get -y install \
+    # UI Requirements
+    xvfb \
+    xterm \
+    xdotool \
+    scrot \
+    imagemagick \
+    sudo \
+    mutter \
+    x11vnc \
+    # Python/pyenv reqs
+    build-essential \
+    libssl-dev  \
+    zlib1g-dev \
+    libbz2-dev \
+    libreadline-dev \
+    libsqlite3-dev \
+    curl \
+    git \
+    libncursesw5-dev \
+    xz-utils \
+    tk-dev \
+    libxml2-dev \
+    libxmlsec1-dev \
+    libffi-dev \
+    liblzma-dev \
+    # Network tools
+    net-tools \
+    netcat \
+    # PPA req
+    software-properties-common && \
+    # Userland apps
+    sudo add-apt-repository ppa:mozillateam/ppa && \
+    sudo apt-get install -y --no-install-recommends \
+    libreoffice \
+    firefox-esr \
+    x11-apps \
+    xpdf \
+    gedit \
+    xpaint \
+    tint2 \
+    galculator \
+    pcmanfm \
+    unzip && \
+    apt-get clean
+
+# Install noVNC
+RUN git clone --branch v1.5.0 https://github.com/novnc/noVNC.git /opt/noVNC && \
+    git clone --branch v0.12.0 https://github.com/novnc/websockify /opt/noVNC/utils/websockify && \
+    ln -s /opt/noVNC/vnc.html /opt/noVNC/index.html
+
+# setup user
+ENV USERNAME=computeruse
+ENV HOME=/home/$USERNAME
+RUN useradd -m -s /bin/bash -d $HOME $USERNAME
+RUN echo "${USERNAME} ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
+USER computeruse
+WORKDIR $HOME
+
+# setup python
+RUN git clone https://github.com/pyenv/pyenv.git ~/.pyenv && \
+    cd ~/.pyenv && src/configure && make -C src && cd .. && \
+    echo 'export PYENV_ROOT="$HOME/.pyenv"' >> ~/.bashrc && \
+    echo 'command -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"' >> ~/.bashrc && \
+    echo 'eval "$(pyenv init -)"' >> ~/.bashrc
+ENV PYENV_ROOT="$HOME/.pyenv"
+ENV PATH="$PYENV_ROOT/bin:$PATH"
+ENV PYENV_VERSION_MAJOR=3
+ENV PYENV_VERSION_MINOR=11
+ENV PYENV_VERSION_PATCH=6
+ENV PYENV_VERSION=$PYENV_VERSION_MAJOR.$PYENV_VERSION_MINOR.$PYENV_VERSION_PATCH
+RUN eval "$(pyenv init -)" && \
+    pyenv install $PYENV_VERSION && \
+    pyenv global $PYENV_VERSION && \
+    pyenv rehash
+
+ENV PATH="$HOME/.pyenv/shims:$HOME/.pyenv/bin:$PATH"
+
+RUN python -m pip install --upgrade pip==23.1.2 setuptools==58.0.4 wheel==0.40.0 && \
+    python -m pip config set global.disable-pip-version-check true
+
+# only reinstall if requirements.txt changes
+# COPY --chown=$USERNAME:$USERNAME computer_use_demo/requirements.txt $HOME/computer_use_demo/requirements.txt
+# RUN python -m pip install -r $HOME/computer_use_demo/requirements.txt
+
+# setup desktop env & app
+# COPY --chown=$USERNAME:$USERNAME image/ $HOME
+# COPY --chown=$USERNAME:$USERNAME computer_use_demo/ $HOME/computer_use_demo/
+
+ARG DISPLAY_NUM=1
+ARG HEIGHT=768
+ARG WIDTH=1024
+ENV DISPLAY_NUM=$DISPLAY_NUM
+ENV HEIGHT=$HEIGHT
+ENV WIDTH=$WIDTH
+
+# Set the entrypoint
+# ENTRYPOINT ["/usr/src/app/entrypoint.sh"]
+
+#  docker build . -t omniparser-x-demo:local  # manually build the docker image (optional)