use multi-stage builds for all images

Standardize on multi-stage builds for all images, which avoids including development tools and libraries in the final images, reducing image size. Install all llama.cpp binaries and shared libraries for consistency with upstream images. Avoid installing unnecessary (and large) .a files from the installation directory. Call build_llama.sh to install runtime dependencies in the final images so package versions can be kept consistent between build and final images. Signed-off-by: Mike Bonnet <mikeb@redhat.com>
2026-02-05 06:46:39 +01:00 · 2026-01-29 12:45:26 -08:00
parent acbb271e1a
commit 88d597e4d9
8 changed files with 95 additions and 46 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -154,6 +154,8 @@ jobs:
          sudo apt-get update
          sudo apt-get install podman bash codespell git cmake libcurl4-openssl-dev
          sudo ./container-images/scripts/build_llama.sh
+          sudo cp -a /tmp/install/bin/ /usr/
+          sudo cp -a /tmp/install/lib/*.so* /usr/lib/
          uv tool install tox --with tox-uv
          uv pip install ".[dev]"

--- a/container-images/asahi/Containerfile
+++ b/container-images/asahi/Containerfile
@@ -1,9 +1,15 @@
-FROM quay.io/fedora/fedora:43
+FROM quay.io/fedora/fedora:43 AS builder

-ENV ASAHI_VISIBLE_DEVICES 1
 COPY container-images/scripts/build_llama.sh \
     container-images/scripts/lib.sh \
     /src/
 WORKDIR /src/
 RUN ./build_llama.sh asahi
-WORKDIR /
+
+FROM quay.io/fedora/fedora:43
+
+RUN --mount=type=bind,from=builder,source=/tmp/install,target=/tmp/install \
+    cp -a /tmp/install/bin/ /usr/ && \
+    cp -a /tmp/install/lib64/*.so* /usr/lib64/
+RUN --mount=type=bind,target=/src \
+    /src/container-images/scripts/build_llama.sh asahi runtime
--- a/container-images/cann/Containerfile
+++ b/container-images/cann/Containerfile
@@ -10,8 +10,9 @@ WORKDIR /src/
 RUN ./build_llama.sh cann

 FROM quay.io/ascend/${ASCEND_VERSION}
-# Copy the entire installation directory from the builder
-COPY --from=builder /tmp/install /usr
+RUN --mount=type=bind,from=builder,source=/tmp/install,target=/tmp/install \
+    cp -a /tmp/install/bin/ /usr/ && \
+    cp -a /tmp/install/lib64/*.so* /usr/lib64/
 ENTRYPOINT [ \
    "/bin/bash", \
    "-c", \
--- a/container-images/cuda/Containerfile
+++ b/container-images/cuda/Containerfile
@@ -11,21 +11,25 @@ RUN ./build_llama.sh cuda
 # Final runtime image
 FROM docker.io/nvidia/cuda:${CUDA_VERSION}-runtime-ubi9

-# Copy the entire installation directory from the builder
-COPY --from=builder /tmp/install /usr
+RUN --mount=type=bind,from=builder,source=/tmp/install,target=/tmp/install \
+    cp -a /tmp/install/bin/ /usr/ && \
+    cp -a /tmp/install/lib64/*.so* /usr/lib64/
+RUN --mount=type=bind,target=/src \
+    /src/container-images/scripts/build_llama.sh cuda runtime

-# Install python3.12 and ramalama to support a non-standard use-case
-RUN dnf -y install python3.12 && dnf -y clean all && ln -sf python3.12 /usr/bin/python3
-COPY . /src/ramalama
-WORKDIR /src/ramalama
-RUN python3 -m ensurepip && \
+# Install ramalama to support a non-standard use-case
+RUN --mount=type=bind,target=/src \
+    cp -a /src /var/tmp/ramalama && \
    python3 -m pip \
      --no-cache-dir \
      --disable-pip-version-check \
      install \
        --compile \
        --prefix=/usr \
-        .
+        --root-user-action ignore \
+        /var/tmp/ramalama && \
+    rm -rf /var/tmp/ramalama
+
 WORKDIR /
 ENTRYPOINT []
 CMD ["/bin/bash"]
--- a/container-images/intel-gpu/Containerfile
+++ b/container-images/intel-gpu/Containerfile
@@ -9,14 +9,12 @@ RUN ./build_llama.sh intel-gpu

 FROM quay.io/fedora/fedora:43

-COPY --from=builder /tmp/install/ /usr/
+RUN --mount=type=bind,from=builder,source=/tmp/install,target=/tmp/install \
+    cp -a /tmp/install/bin/ /usr/ && \
+    cp -a /tmp/install/lib64/*.so* /usr/lib64/
 COPY container-images/intel-gpu/oneAPI.repo /etc/yum.repos.d/
-
-RUN dnf install -y --setopt=install_weak_deps=false \
-      procps-ng intel-oneapi-runtime-mkl intel-oneapi-mkl-sycl-dft \
-      intel-level-zero oneapi-level-zero intel-compute-runtime lspci \
-      clinfo intel-oneapi-runtime-compilers intel-oneapi-mkl-core \
-      intel-oneapi-mkl-sycl-blas intel-oneapi-runtime-dnnl gawk
+RUN --mount=type=bind,target=/src \
+    /src/container-images/scripts/build_llama.sh intel-gpu runtime

 COPY --chmod=755 container-images/intel-gpu/entrypoint.sh /usr/bin/

--- a/container-images/ramalama/Containerfile
+++ b/container-images/ramalama/Containerfile
@@ -1,19 +1,28 @@
-FROM quay.io/fedora/fedora:43
+FROM quay.io/fedora/fedora:43 AS builder

 COPY container-images/scripts/build_llama.sh \
     container-images/scripts/lib.sh \
     /src/
 WORKDIR /src/
 RUN ./build_llama.sh ramalama
+
+FROM quay.io/fedora/fedora:43
+
+RUN --mount=type=bind,from=builder,source=/tmp/install,target=/tmp/install \
+    cp -a /tmp/install/bin/ /usr/ && \
+    cp -a /tmp/install/lib64/*.so* /usr/lib64/
+RUN --mount=type=bind,target=/src \
+    /src/container-images/scripts/build_llama.sh ramalama runtime
+
 # Install ramalama to support a non-standard use-case
-COPY . /src/ramalama
-WORKDIR /src/ramalama
-RUN python3 -m ensurepip && \
+RUN --mount=type=bind,target=/src \
+    cp -a /src /var/tmp/ramalama && \
    python3 -m pip \
      --no-cache-dir \
      --disable-pip-version-check \
      install \
        --compile \
        --prefix=/usr \
-        .
-WORKDIR /
+        --root-user-action ignore \
+        /var/tmp/ramalama && \
+    rm -rf /var/tmp/ramalama
--- a/container-images/rocm/Containerfile
+++ b/container-images/rocm/Containerfile
@@ -12,7 +12,5 @@ RUN --mount=type=bind,from=builder,source=/tmp/install,target=/tmp/install \
    cp -a /tmp/install/bin/ /usr/ && \
    cp -a /tmp/install/lib64/*.so* /usr/lib64/

-RUN dnf -y --setopt=install_weak_deps=false install hipblas rocblas rocm-hip rocm-runtime rocsolver && \
-    dnf -y clean all
-
-WORKDIR /
+RUN --mount=type=bind,target=/src \
+    /src/container-images/scripts/build_llama.sh rocm runtime
--- a/container-images/scripts/build_llama.sh
+++ b/container-images/scripts/build_llama.sh
@@ -1,6 +1,7 @@
 #!/bin/bash

 DEFAULT_LLAMA_CPP_COMMIT="b45ef2702c262998d5db9887cd3c82f04761237a" # b7872
+MESA_VULKAN_VERSION=25.2.3-101.fc43

 dnf_install_intel_gpu() {
  local intel_rpms=("intel-oneapi-mkl-sycl-devel" "intel-oneapi-dnnl-devel"
@@ -54,9 +55,9 @@ dnf_install_s390_ppc64le() {
 dnf_install_mesa() {
  if [ "${ID}" = "fedora" ]; then
    dnf copr enable -y slp/mesa-libkrun-vulkan
-    dnf install -y mesa-vulkan-drivers-25.2.3-101.fc43 virglrenderer \
+    dnf install -y mesa-vulkan-drivers-$MESA_VULKAN_VERSION virglrenderer \
      "${vulkan_rpms[@]}"
-    dnf versionlock add mesa-vulkan-drivers-25.2.3-101.fc43
+    dnf versionlock add mesa-vulkan-drivers-$MESA_VULKAN_VERSION
  elif [ "${ID}" = "openEuler" ]; then
    dnf install -y mesa-vulkan-drivers virglrenderer "${vulkan_rpms[@]}"
  else # virglrenderer not available on RHEL or EPEL
@@ -70,7 +71,7 @@ dnf_install() {
  local rpm_exclude_list="selinux-policy,container-selinux"
  local rpm_list=("python3-dnf-plugin-versionlock"
    "gcc-c++" "cmake" "vim" "procps-ng" "git-core"
-    "dnf-plugins-core" "gawk")
+    "dnf-plugins-core" "gawk" "openssl-devel")
  local vulkan_rpms=("vulkan-headers" "vulkan-loader-devel" "vulkan-tools"
    "spirv-tools" "glslc" "glslang")
  if is_rhel_based; then
@@ -104,13 +105,47 @@ dnf_install() {
  dnf -y clean all
 }

+dnf_install_runtime_deps() {
+  local runtime_pkgs=()
+  if [ "$containerfile" = "ramalama" ]; then
+    # install python3 in the ramalama container to support a non-standard use-case
+    runtime_pkgs+=(python3 python3-pip)
+    if [ "$uname_m" = "x86_64" ] || [ "$uname_m" = "aarch64" ]; then
+      dnf copr enable -y slp/mesa-libkrun-vulkan
+      runtime_pkgs+=(vulkan-loader vulkan-tools "mesa-vulkan-drivers-$MESA_VULKAN_VERSION")
+    else
+      runtime_pkgs+=(openblas)
+    fi
+  elif [[ "$containerfile" = rocm* ]]; then
+    runtime_pkgs+=(hipblas rocblas rocm-hip rocm-runtime rocsolver)
+  elif [ "$containerfile" = "asahi" ]; then
+    dnf copr enable -y @asahi/fedora-remix-branding
+    dnf install -y asahi-repos
+    runtime_pkgs+=(vulkan-loader vulkan-tools mesa-vulkan-drivers)
+  elif [ "$containerfile" = "cuda" ]; then
+    # install python3.12 in the cuda container to support a non-standard use-case
+    runtime_pkgs+=(python3.12 python3.12-pip)
+    ln -sf python3.12 /usr/bin/python3
+  elif [ "$containerfile" = "intel-gpu" ]; then
+    runtime_pkgs+=(
+      clinfo lspci procps-ng
+      intel-compute-runtime intel-level-zero
+      intel-oneapi-runtime-compilers intel-oneapi-runtime-dnnl intel-oneapi-runtime-mkl
+      intel-oneapi-mkl-core intel-oneapi-mkl-sycl-blas intel-oneapi-mkl-sycl-dft
+      oneapi-level-zero
+    )
+  fi
+  dnf install -y --setopt=install_weak_deps=false "${runtime_pkgs[@]}"
+  dnf -y clean all
+}
+
 cmake_check_warnings() {
  awk -v rc=0 '/CMake Warning:/ { rc=1 } 1; END {exit rc}'
 }

 setup_build_env() {
  # external scripts may reference unbound variables
-  set +u
+  set +ux
  if [ "$containerfile" = "cann" ]; then
    # source build env
    cann_in_sys_path=/usr/local/Ascend/ascend-toolkit
@@ -133,7 +168,7 @@ setup_build_env() {
    # shellcheck disable=SC1091
    source /opt/intel/oneapi/setvars.sh
  fi
-  set -u
+  set -ux
 }

 cmake_steps() {
@@ -152,17 +187,9 @@ cmake_steps() {
  )
 }

-set_install_prefix() {
-  if [ "$containerfile" = "cuda" ] || [ "$containerfile" = "intel-gpu" ] || [ "$containerfile" = "cann" ] || [ "$containerfile" = "musa" ] || [ "$containerfile" = "rocm" ]; then
-    echo "/tmp/install"
-  else
-    echo "/usr"
-  fi
-}
-
 configure_common_flags() {
  common_flags=(
-      "-DGGML_CCACHE=OFF" "-DGGML_RPC=ON" "-DCMAKE_INSTALL_PREFIX=${install_prefix}"
+      "-DGGML_CCACHE=OFF" "-DGGML_RPC=ON" "-DCMAKE_INSTALL_PREFIX=/tmp/install"
      "-DLLAMA_BUILD_TESTS=OFF" "-DLLAMA_BUILD_EXAMPLES=OFF" "-DGGML_BUILD_TESTS=OFF" "-DGGML_BUILD_EXAMPLES=OFF"
  )
  if [ "$containerfile" != "cann" ]; then
@@ -235,10 +262,14 @@ main() {
  source "$(dirname "$0")/lib.sh"

  local containerfile=${1-""}
-  local install_prefix
-  install_prefix=$(set_install_prefix)
  local uname_m
  uname_m="$(uname -m)"
+
+  if [ "${2-""}" == "runtime" ]; then
+      dnf_install_runtime_deps
+      exit
+  fi
+
  local common_flags
  configure_common_flags