rocm: reduce image size by using a multi-stage build

Only copy required binaries and libraries from the installation directory into the final image, and install only necessary runtime dependencies. The final image size is reduced by over 2Gb. Signed-off-by: Mike Bonnet <mikeb@redhat.com>
2026-02-05 06:46:39 +01:00 · 2025-12-15 10:18:17 -08:00
parent efe48608eb
commit 61f7101c2a
2 changed files with 20 additions and 6 deletions
--- a/container-images/rocm/Containerfile
+++ b/container-images/rocm/Containerfile
@@ -1,8 +1,25 @@
-FROM quay.io/fedora/fedora:43
+FROM quay.io/fedora/fedora:43 AS builder

 COPY container-images/scripts/build_llama_and_whisper.sh \
     container-images/scripts/lib.sh \
     /src/
 WORKDIR /src/
 RUN ./build_llama_and_whisper.sh rocm
+
+FROM quay.io/fedora/fedora:43
+
+RUN --mount=type=bind,from=builder,source=/tmp/install,target=/tmp/install \
+    cp -a /tmp/install/bin/llama-bench \
+          /tmp/install/bin/llama-perplexity \
+          /tmp/install/bin/llama-quantize \
+          /tmp/install/bin/llama-server \
+          /tmp/install/bin/rpc-server \
+          /tmp/install/bin/whisper-server \
+          /tmp/install/bin/*.so* \
+          /usr/bin/ && \
+    cp -a /tmp/install/lib64/*.so* /usr/lib64/
+
+RUN dnf -y --setopt=install_weak_deps=false install hipblas rocblas rocm-hip rocm-runtime rocsolver && \
+    dnf -y clean all
+
 WORKDIR /
--- a/container-images/scripts/build_llama_and_whisper.sh
+++ b/container-images/scripts/build_llama_and_whisper.sh
@@ -183,7 +183,7 @@ cmake_steps() {
 }

 set_install_prefix() {
-  if [ "$containerfile" = "cuda" ] || [ "$containerfile" = "intel-gpu" ] || [ "$containerfile" = "cann" ] || [ "$containerfile" = "musa" ]; then
+  if [ "$containerfile" = "cuda" ] || [ "$containerfile" = "intel-gpu" ] || [ "$containerfile" = "cann" ] || [ "$containerfile" = "musa" ] || [ "$containerfile" = "rocm" ]; then
    echo "/tmp/install"
  else
    echo "/usr"
@@ -204,7 +204,7 @@ configure_common_flags() {
      common_flags+=("-DCMAKE_HIP_COMPILER_ROCM_ROOT=/usr")
    fi

-    common_flags+=("-DGGML_HIP=ON" "-DAMDGPU_TARGETS=${AMDGPU_TARGETS:-gfx1010,gfx1012,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1151,gfx1200,gfx1201}")
+    common_flags+=("-DGGML_HIP=ON" "-DGPU_TARGETS=${GPU_TARGETS:-gfx1010,gfx1012,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1151,gfx1200,gfx1201}")
    ;;
  cuda)
    common_flags+=("-DGGML_CUDA=ON" "-DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined" "-DCMAKE_CUDA_FLAGS=\"-U__ARM_NEON -U__ARM_NEON__\"")
@@ -235,7 +235,6 @@ clone_and_build_whisper_cpp() {

  git_clone_specific_commit "${WHISPER_CPP_REPO:-https://github.com/ggerganov/whisper.cpp}" "$whisper_cpp_commit"
  cmake_steps "${whisper_flags[@]}"
-  mkdir -p "$install_prefix/bin"
  cd ..
  if [[ "${RAMALAMA_IMAGE_BUILD_DEBUG_MODE:-}" != y ]]; then
      rm -rf whisper.cpp
@@ -244,8 +243,6 @@ clone_and_build_whisper_cpp() {

 clone_and_build_llama_cpp() {
  local llama_cpp_commit="${LLAMA_CPP_PULL_REF:-$DEFAULT_LLAMA_CPP_COMMIT}"
-  local install_prefix
-  install_prefix=$(set_install_prefix)
  git_clone_specific_commit "${LLAMA_CPP_REPO:-https://github.com/ggml-org/llama.cpp}" "$llama_cpp_commit"
  cmake_steps "${common_flags[@]}"
  install -m 755 build/bin/rpc-server "$install_prefix"/bin/rpc-server