cuda docker containers
on this page
overview
docker provides isolated cuda environments without system modifications. recommended for most use cases.
benefits:
- multiple cuda versions simultaneously
- no system pollution
- easy deployment
- reproducible environments
prerequisites
verify before starting:
# gpu driver
nvidia-smi
# docker 19.03+
docker --version
tested configuration:
- ubuntu 25.04 (plucky)
- nvidia driver 575.57.08
- docker 28.3.3
- nvidia container toolkit 1.17.8
installation
ubuntu 24.04+ / debian 12
# add nvidia apt key
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | \
sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
# add repository
curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
# install toolkit
sudo apt-get update
sudo apt-get install -y nvidia-container-toolkit
# configure docker
sudo nvidia-ctk runtime configure --runtime=docker
sudo systemctl restart docker
rootless docker
additional steps for rootless:
# configure for user
nvidia-ctk runtime configure --runtime=docker --config=$HOME/.config/docker/daemon.json
# critical: disable cgroups
sudo nvidia-ctk config --set nvidia-container-cli.no-cgroups --in-place
# restart user docker
systemctl --user restart docker
verification
docker run --rm --gpus all nvidia/cuda:12.8.0-base-ubuntu22.04 nvidia-smi
expected output:
+-------------------------------------------------------------------------+
| NVIDIA-SMI 575.57.08 Driver Version: 575.57.08 CUDA Version: 12.9|
+-------------------------------------------------------------------------+
usage patterns
basic gpu access
# all gpus
docker run --gpus all myimage
# specific gpu
docker run --gpus '"device=0"' myimage
# multiple gpus
docker run --gpus '"device=0,1"' myimage
docker compose
version: '3.8'
services:
ml-app:
image: ml:latest
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
environment control
# limit visible gpus
docker run -e NVIDIA_VISIBLE_DEVICES=0 myimage
# capabilities
docker run -e NVIDIA_DRIVER_CAPABILITIES=compute,utility myimage
cuda base images
official nvidia images
image | cuda | os | size |
---|---|---|---|
nvidia/cuda:12.8.0-base-ubuntu22.04 | 12.8 | ubuntu 22.04 | ~120mb |
nvidia/cuda:12.8.0-runtime-ubuntu22.04 | 12.8 | ubuntu 22.04 | ~2.8gb |
nvidia/cuda:12.8.0-devel-ubuntu22.04 | 12.8 | ubuntu 22.04 | ~6.6gb |
choose based on needs:
- base: cuda runtime only
- runtime: + cudnn runtime
- devel: + nvcc, headers, libs
framework images
# pytorch
docker run --gpus all pytorch/pytorch:2.7.1-cuda12.8-cudnn8-runtime
# tensorflow
docker run --gpus all tensorflow/tensorflow:latest-gpu
# jax
docker run --gpus all ghcr.io/nvidia/jax:base
dockerfile examples
minimal cuda app
FROM nvidia/cuda:12.8.0-runtime-ubuntu22.04
WORKDIR /app
COPY requirements.txt .
RUN pip install -r requirements.txt
COPY . .
CMD ["python", "app.py"]
python with uv
FROM ghcr.io/astral-sh/uv:latest as uv
FROM nvidia/cuda:12.8.0-runtime-ubuntu22.04
# install minimal deps
RUN apt-get update && apt-get install -y \
curl \
ca-certificates \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# copy uv
COPY --from=uv /uv /uvx /bin/
# install python
RUN uv python install 3.13
WORKDIR /app
COPY pyproject.toml uv.lock ./
# install deps with cache
RUN --mount=type=cache,target=/root/.cache/uv \
UV_LINK_MODE=copy \
uv sync --frozen --no-install-project
COPY . .
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --frozen
CMD ["uv", "run", "python", "main.py"]
multi-stage build
# build stage
FROM nvidia/cuda:12.8.0-devel-ubuntu22.04 as builder
WORKDIR /build
COPY . .
RUN make
# runtime stage
FROM nvidia/cuda:12.8.0-runtime-ubuntu22.04
COPY --from=builder /build/app /usr/local/bin/
CMD ["app"]
framework specifics
pytorch
FROM ghcr.io/astral-sh/uv:latest as uv
FROM nvidia/cuda:12.8.0-runtime-ubuntu22.04
COPY --from=uv /uv /uvx /bin/
RUN apt-get update && apt-get install -y curl ca-certificates \
&& apt-get clean && rm -rf /var/lib/apt/lists/*
RUN uv python install 3.13
# pytorch-specific index
ENV UV_INDEX_URL=https://download.pytorch.org/whl/cu128
WORKDIR /app
CMD ["uv", "run", "--with", "torch", "python", "-c", \
"import torch; print(f'cuda available: {torch.cuda.is_available()}')"]
tensorflow
FROM tensorflow/tensorflow:latest-gpu
# already includes cuda/cudnn
WORKDIR /app
COPY . .
CMD ["python", "train.py"]
jax
FROM nvidia/cuda:12.8.0-cudnn9-runtime-ubuntu22.04
RUN pip install --upgrade "jax[cuda12]"
performance optimization
build caching
# cache python packages
RUN --mount=type=cache,target=/root/.cache/pip \
pip install -r requirements.txt
# cache uv packages
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --frozen
layer optimization
# bad: changes invalidate cache
COPY . .
RUN pip install -r requirements.txt
# good: deps cached separately
COPY requirements.txt .
RUN pip install -r requirements.txt
COPY . .
memory management
# limit gpu memory growth
docker run -e TF_FORCE_GPU_ALLOW_GROWTH=true tensorflow/tensorflow:latest-gpu
# pytorch memory fraction
docker run -e PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512 pytorch
monitoring
inside container
# real-time monitoring
docker exec -it container_id nvidia-smi
# continuous monitoring
docker exec -it container_id watch -n 1 nvidia-smi
prometheus metrics
services:
dcgm-exporter:
image: nvidia/dcgm-exporter:latest
environment:
- DCGM_EXPORTER_NO_HOSTNAME=1
devices:
- /dev/nvidiactl
- /dev/nvidia0
volumes:
- /run/nvidia:/run/nvidia:ro
troubleshooting
common issues
“could not select device driver”
# toolkit not installed or docker not restarted sudo nvidia-ctk runtime configure --runtime=docker sudo systemctl restart docker
gpu not visible
# missing --gpus flag docker run --gpus all nvidia/cuda:12.8.0-base-ubuntu22.04 nvidia-smi
permission denied (rootless)
# missing no-cgroups config sudo nvidia-ctk config --set nvidia-container-cli.no-cgroups --in-place systemctl --user restart docker
cuda version mismatch
- container cuda can be newer than driver cuda
- use compatible base image
debugging commands
# check runtime config
docker info | grep nvidia
# inspect container
docker inspect container_id | grep -i gpu
# test minimal cuda
docker run --rm --gpus all nvidia/cuda:12.8.0-base-ubuntu22.04 \
bash -c "echo 'int main() { return 0; }' > test.cu && nvcc test.cu && ./a.out"
best practices
use specific tags
# bad FROM nvidia/cuda:latest # good FROM nvidia/cuda:12.8.0-runtime-ubuntu22.04
minimize image size
- use runtime images for deployment
- multi-stage builds for compilation
- clean package caches
version pinning
# pin all versions FROM nvidia/cuda:12.8.0-runtime-ubuntu22.04 RUN pip install torch==2.7.1+cu128
security
- run as non-root user
- use read-only filesystems where possible
- limit capabilities
tips
- container cuda version independent of host
- use
nvidia/cuda:*-base-*
for minimal size - buildkit cache mounts save bandwidth
--gpus all
works for single gpu systems- test gpu access before heavy processing
migration guide
from nvidia-docker v1
# old
nvidia-docker run image
# new
docker run --gpus all image
from native cuda
benefits of migration:
- no system cuda installation
- multiple versions side-by-side
- easier deployment
- team consistency
resources
══════════════════════════════════════════════════════════════════