Combine Vulkan, Kompute and CPU inferencing into one image

Less images to maintain, Vulkan is more mature and more widely used than Kompute. Signed-off-by: Eric Curtin <ecurtin@redhat.com>
2026-02-05 06:46:39 +01:00 · 2025-03-24 11:40:07 +00:00
parent c09713f61a
commit 73c54bf34c
10 changed files with 52 additions and 61 deletions
--- a/1
+++ b/1
@@ -109,6 +109,7 @@ ifneq (,$(wildcard /usr/bin/python3))
 	/usr/bin/python3 -m compileall -q .
 endif

+	! grep -ri "#\!/usr/bin/python3" .
 	flake8 *.py */*.py */*/*.py libexec/* bin/*
 	shellcheck *.sh */*.sh */*/*.sh

--- a/container-images/asahi/Containerfile
+++ b/container-images/asahi/Containerfile
@@ -2,5 +2,6 @@ FROM fedora:41

 ENV ASAHI_VISIBLE_DEVICES 1
 COPY --chmod=755 ../scripts /usr/bin
-RUN /usr/bin/build_llama_and_whisper.sh "asahi" && \
+RUN build_llama_and_whisper.sh "asahi" && \
    rag_framework load
+
--- a/container-images/cann/Containerfile
+++ b/container-images/cann/Containerfile
@@ -3,8 +3,8 @@ ARG ASCEND_VERSION=cann:8.0.0-910b-openeuler22.03-py3.10

 FROM quay.io/ascend/${ASCEND_VERSION} AS builder
 ARG GOLANG_VERSION
-COPY --chmod=755 ../scripts /scripts
-RUN sh -x /scripts/build_llama_and_whisper.sh "cann"
+COPY --chmod=755 ../scripts /usr/bin
+RUN build_llama_and_whisper.sh "cann"

 FROM quay.io/ascend/${ASCEND_VERSION}
 # Copy the entire installation directory from the builder
--- a/container-images/cuda/Containerfile
+++ b/container-images/cuda/Containerfile
@@ -1,8 +1,8 @@
 # Base image with CUDA for compilation
 FROM docker.io/nvidia/cuda:12.8.1-devel-ubi9 AS builder

-COPY --chmod=755 ../scripts /scripts
-RUN /scripts/build_llama_and_whisper.sh "cuda"
+COPY --chmod=755 ../scripts /usr/bin
+RUN build_llama_and_whisper.sh "cuda"

 # Final runtime image
 FROM docker.io/nvidia/cuda:12.8.1-runtime-ubi9
--- a/container-images/intel-gpu/Containerfile
+++ b/container-images/intel-gpu/Containerfile
@@ -1,19 +1,18 @@
 FROM quay.io/fedora/fedora:41 as builder

 COPY intel-gpu/oneAPI.repo /etc/yum.repos.d/
-COPY --chmod=755 scripts/build_llama_and_whisper.sh /
-
-RUN /build_llama_and_whisper.sh "intel-gpu"
+COPY --chmod=755 ../scripts /usr/bin
+RUN build_llama_and_whisper.sh "intel-gpu"

 FROM quay.io/fedora/fedora:41

 COPY --from=builder /tmp/install/ /usr/
 COPY intel-gpu/oneAPI.repo /etc/yum.repos.d/

-RUN dnf install -y procps-ng python3 python3-pip python3-devel intel-level-zero oneapi-level-zero intel-compute-runtime libcurl lspci clinfo intel-oneapi-runtime-compilers intel-oneapi-mkl-core intel-oneapi-mkl-sycl-blas intel-oneapi-runtime-dnnl ; \
-    chown 0:0 /etc/passwd ; \
-    chown 0:0 /etc/group ; \
-    chmod g=u /etc/passwd /etc/group /home ;
+RUN dnf install -y procps-ng python3 python3-pip python3-devel intel-level-zero oneapi-level-zero intel-compute-runtime libcurl lspci clinfo intel-oneapi-runtime-compilers intel-oneapi-mkl-core intel-oneapi-mkl-sycl-blas intel-oneapi-runtime-dnnl && \
+    chown 0:0 /etc/passwd && \
+    chown 0:0 /etc/group && \
+    chmod g=u /etc/passwd /etc/group /home

 RUN useradd llama-user -u 10000 -d /home/llama-user -s /bin/bash && \
 groupmod -a -U llama-user render && \
--- a/container-images/ramalama/Containerfile
+++ b/container-images/ramalama/Containerfile
@@ -2,5 +2,5 @@ FROM registry.access.redhat.com/ubi9/ubi:9.5-1742918310

 COPY --chmod=755 ../scripts /usr/bin

-RUN /usr/bin/build_llama_and_whisper.sh "ramalama" && \
+RUN build_llama_and_whisper.sh "ramalama" && \
    rag_framework load
--- a/container-images/rocm-fedora/Containerfile
+++ b/container-images/rocm-fedora/Containerfile
@@ -1,7 +1,7 @@
 FROM registry.fedoraproject.org/fedora:42

-COPY --chmod=755 ../scripts /usr/bin/
-RUN /usr/bin/build_llama_and_whisper.sh "rocm" && \
+COPY --chmod=755 ../scripts /usr/bin
+RUN build_llama_and_whisper.sh "rocm" && \
    rag_framework load

 ENV WHISPER_CPP_SHA=${WHISPER_CPP_SHA}
--- a/container-images/rocm/Containerfile
+++ b/container-images/rocm/Containerfile
@@ -2,9 +2,9 @@ FROM registry.access.redhat.com/ubi9/ubi:9.5-1742918310

 COPY rocm/amdgpu.repo /etc/yum.repos.d/
 COPY rocm/rocm.repo /etc/yum.repos.d/
-COPY --chmod=755 scripts /scripts
+COPY --chmod=755 ../scripts /usr/bin

 ARG AMDGPU_TARGETS=
 ENV AMDGPU_TARGETS=${AMDGPU_TARGETS}
-RUN /scripts/build_llama_and_whisper.sh "rocm" && \
+RUN build_llama_and_whisper.sh "rocm" && \
    rag_framework load
--- a/container-images/scripts/build_llama_and_whisper.sh
+++ b/container-images/scripts/build_llama_and_whisper.sh
@@ -8,7 +8,7 @@ dnf_install_intel_gpu() {
  local intel_rpms=("intel-oneapi-mkl-sycl-devel" "intel-oneapi-dnnl-devel" \
                  "intel-oneapi-compiler-dpcpp-cpp" "intel-level-zero" \
                  "oneapi-level-zero" "oneapi-level-zero-devel" "intel-compute-runtime")
-  dnf install -y "${rpm_list[@]}" "${intel_rpms[@]}"
+  dnf install -y "${intel_rpms[@]}"

  # shellcheck disable=SC1091
  . /opt/intel/oneapi/setvars.sh
@@ -24,11 +24,11 @@ dnf_remove() {
 dnf_install_asahi() {
  dnf copr enable -y @asahi/fedora-remix-branding
  dnf install -y asahi-repos
-  dnf install -y mesa-vulkan-drivers "${vulkan_rpms[@]}" "${rpm_list[@]}"
+  dnf install -y mesa-vulkan-drivers "${vulkan_rpms[@]}"
 }

 dnf_install_cuda() {
-  dnf install -y "${rpm_list[@]}" gcc-toolset-12
+  dnf install -y gcc-toolset-12
  # shellcheck disable=SC1091
  . /opt/rh/gcc-toolset-12/enable
 }
@@ -51,9 +51,12 @@ dnf_install_rocm() {
    if [ "${ID}" = "fedora" ]; then
      dnf install -y rocm-core-devel hipblas-devel rocblas-devel rocm-hip-devel
    else
+      add_stream_repo "AppStream"
      dnf install -y rocm-dev hipblas-devel rocblas-devel
    fi
  fi
+
+  rm_non_ubi_repos
 }

 dnf_install_s390() {
@@ -73,7 +76,8 @@ add_stream_repo() {
 }

 rm_non_ubi_repos() {
-  rm -rf /etc/yum.repos.d/mirror.stream.centos.org_9-stream_* /etc/yum.repos.d/epel*
+  local dir="/etc/yum.repos.d"
+  rm -rf $dir/mirror.stream.centos.org_9-stream_* $dir/epel* $dir/_copr:*
 }

 dnf_install_mesa() {
@@ -111,30 +115,27 @@ dnf_install_ffmpeg() {
 }

 dnf_install() {
-  local rpm_list=("podman-remote" "python3" "python3-pip" "python3-argcomplete" \
-                  "python3-dnf-plugin-versionlock" "python3-devel" "gcc-c++" "cmake" "vim" \
-                  "procps-ng" "git" "dnf-plugins-core" "libcurl-devel" "gawk")
+  local rpm_list=("podman-remote" "python3" "python3-pip" \
+                  "python3-argcomplete" "python3-dnf-plugin-versionlock" \
+                  "python3-devel" "gcc-c++" "cmake" "vim" "procps-ng" "git" \
+                  "dnf-plugins-core" "libcurl-devel" "gawk")
  local vulkan_rpms=("vulkan-headers" "vulkan-loader-devel" "vulkan-tools" \
                     "spirv-tools" "glslc" "glslang")
-  if [ "${containerfile}" = "ramalama" ] || [[ "${containerfile}" =~ rocm* ]] || \
-    [ "${containerfile}" = "vulkan" ]; then # All the UBI-based ones
-    if [ "${ID}" = "fedora" ]; then
-      dnf install -y "${rpm_list[@]}"
-    else
-      dnf_install_epel
-      dnf --enablerepo=ubi-9-appstream-rpms install -y "${rpm_list[@]}"
-    fi
+  if [ "${ID}" = "fedora" ]; then
+    dnf install -y "${rpm_list[@]}"
+  else
+    dnf_install_epel # All the UBI-based ones
+    dnf --enablerepo=ubi-9-appstream-rpms install -y "${rpm_list[@]}"
+  fi

-    # x86_64 and aarch64 means kompute
+  if [ "$containerfile" = "ramalama" ]; then
    if [ "$uname_m" = "x86_64" ] || [ "$uname_m" = "aarch64" ]; then
-      dnf_install_mesa
-    fi
-
-    dnf_install_rocm
-    rm_non_ubi_repos
-    if [ "$uname_m" != "x86_64" ] && ! [ "$uname_m" != "aarch64" ]; then
+      dnf_install_mesa # on x86_64 and aarch64 we use vulkan via mesa 
+    else
      dnf_install_s390
    fi
+  elif [[ "$containerfile" =~ rocm* ]]; then
+    dnf_install_rocm
  elif [ "$containerfile" = "asahi" ]; then
    dnf_install_asahi
  elif [ "$containerfile" = "cuda" ]; then
@@ -203,13 +204,18 @@ configure_common_flags() {
      if [ "${ID}" = "fedora" ]; then
        common_flags+=("-DCMAKE_HIP_COMPILER_ROCM_ROOT=/usr")
      fi
+
      common_flags+=("-DGGML_HIP=ON" "-DAMDGPU_TARGETS=${AMDGPU_TARGETS:-gfx1010,gfx1012,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1151,gfx1200,gfx1201}")
      ;;
    cuda)
      common_flags+=("-DGGML_CUDA=ON" "-DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined")
      ;;
-    vulkan | asahi)
-      common_flags+=("-DGGML_VULKAN=1")
+    ramalama | asahi)
+      if [ "$uname_m" = "x86_64" ] || [ "$uname_m" = "aarch64" ]; then
+        common_flags+=("-DGGML_VULKAN=ON")
+      else
+        common_flags+=("-DGGML_BLAS=ON" "-DGGML_BLAS_VENDOR=OpenBLAS")
+      fi
      ;;
    intel-gpu)
      common_flags+=("-DGGML_SYCL=ON" "-DCMAKE_C_COMPILER=icx" "-DCMAKE_CXX_COMPILER=icpx")
@@ -259,7 +265,7 @@ clone_and_build_ramalama() {
 }

 pip_install() {
-    python3 -m pip install wheel qdrant_client fastembed openai fastapi uvicorn openvino  --prefix="$1"
+  python3 -m pip install wheel qdrant_client fastembed openai fastapi uvicorn openvino  --prefix="$1"
 }

 main() {
@@ -278,22 +284,13 @@ main() {
  common_flags+=("-DGGML_CCACHE=OFF" "-DCMAKE_INSTALL_PREFIX=${install_prefix}")
  available dnf && dnf_install
  if [ -n "$containerfile" ]; then 
-      clone_and_build_ramalama "${install_prefix}"
-      pip_install "${install_prefix}"
+    clone_and_build_ramalama "${install_prefix}"
+    pip_install "${install_prefix}"
  fi
+
  setup_build_env
  clone_and_build_whisper_cpp
  common_flags+=("-DLLAMA_CURL=ON")
-  case "$containerfile" in
-    ramalama)
-      if [ "$uname_m" = "x86_64" ] || [ "$uname_m" = "aarch64" ]; then
-        common_flags+=("-DGGML_KOMPUTE=ON" "-DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON")
-      else
-        common_flags+=("-DGGML_BLAS=ON" "-DGGML_BLAS_VENDOR=OpenBLAS")
-      fi
-      ;;
-  esac
-
  clone_and_build_llama_cpp
  available dnf && dnf_remove
  rm -rf /var/cache/*dnf* /opt/rocm-*/lib/*/library/*gfx9*
--- a/container-images/vulkan/Containerfile
+++ b/container-images/vulkan/Containerfile
@@ -1,7 +0,0 @@
-FROM registry.access.redhat.com/ubi9/ubi:9.5-1742918310
-
-COPY --chmod=755 ../scripts /usr/bin
-
-RUN /usr/bin/build_llama_and_whisper.sh "vulkan" && \
-    rag_framework load
-