1
0
mirror of https://github.com/containers/ramalama.git synced 2026-02-05 06:46:39 +01:00

Combine Vulkan, Kompute and CPU inferencing into one image

Less images to maintain, Vulkan is more mature and more widely
used than Kompute.

Signed-off-by: Eric Curtin <ecurtin@redhat.com>
This commit is contained in:
Eric Curtin
2025-03-24 11:40:07 +00:00
parent c09713f61a
commit 73c54bf34c
10 changed files with 52 additions and 61 deletions

View File

@@ -109,6 +109,7 @@ ifneq (,$(wildcard /usr/bin/python3))
/usr/bin/python3 -m compileall -q .
endif
! grep -ri "#\!/usr/bin/python3" .
flake8 *.py */*.py */*/*.py libexec/* bin/*
shellcheck *.sh */*.sh */*/*.sh

View File

@@ -2,5 +2,6 @@ FROM fedora:41
ENV ASAHI_VISIBLE_DEVICES 1
COPY --chmod=755 ../scripts /usr/bin
RUN /usr/bin/build_llama_and_whisper.sh "asahi" && \
RUN build_llama_and_whisper.sh "asahi" && \
rag_framework load

View File

@@ -3,8 +3,8 @@ ARG ASCEND_VERSION=cann:8.0.0-910b-openeuler22.03-py3.10
FROM quay.io/ascend/${ASCEND_VERSION} AS builder
ARG GOLANG_VERSION
COPY --chmod=755 ../scripts /scripts
RUN sh -x /scripts/build_llama_and_whisper.sh "cann"
COPY --chmod=755 ../scripts /usr/bin
RUN build_llama_and_whisper.sh "cann"
FROM quay.io/ascend/${ASCEND_VERSION}
# Copy the entire installation directory from the builder

View File

@@ -1,8 +1,8 @@
# Base image with CUDA for compilation
FROM docker.io/nvidia/cuda:12.8.1-devel-ubi9 AS builder
COPY --chmod=755 ../scripts /scripts
RUN /scripts/build_llama_and_whisper.sh "cuda"
COPY --chmod=755 ../scripts /usr/bin
RUN build_llama_and_whisper.sh "cuda"
# Final runtime image
FROM docker.io/nvidia/cuda:12.8.1-runtime-ubi9

View File

@@ -1,19 +1,18 @@
FROM quay.io/fedora/fedora:41 as builder
COPY intel-gpu/oneAPI.repo /etc/yum.repos.d/
COPY --chmod=755 scripts/build_llama_and_whisper.sh /
RUN /build_llama_and_whisper.sh "intel-gpu"
COPY --chmod=755 ../scripts /usr/bin
RUN build_llama_and_whisper.sh "intel-gpu"
FROM quay.io/fedora/fedora:41
COPY --from=builder /tmp/install/ /usr/
COPY intel-gpu/oneAPI.repo /etc/yum.repos.d/
RUN dnf install -y procps-ng python3 python3-pip python3-devel intel-level-zero oneapi-level-zero intel-compute-runtime libcurl lspci clinfo intel-oneapi-runtime-compilers intel-oneapi-mkl-core intel-oneapi-mkl-sycl-blas intel-oneapi-runtime-dnnl ; \
chown 0:0 /etc/passwd ; \
chown 0:0 /etc/group ; \
chmod g=u /etc/passwd /etc/group /home ;
RUN dnf install -y procps-ng python3 python3-pip python3-devel intel-level-zero oneapi-level-zero intel-compute-runtime libcurl lspci clinfo intel-oneapi-runtime-compilers intel-oneapi-mkl-core intel-oneapi-mkl-sycl-blas intel-oneapi-runtime-dnnl && \
chown 0:0 /etc/passwd && \
chown 0:0 /etc/group && \
chmod g=u /etc/passwd /etc/group /home
RUN useradd llama-user -u 10000 -d /home/llama-user -s /bin/bash && \
groupmod -a -U llama-user render && \

View File

@@ -2,5 +2,5 @@ FROM registry.access.redhat.com/ubi9/ubi:9.5-1742918310
COPY --chmod=755 ../scripts /usr/bin
RUN /usr/bin/build_llama_and_whisper.sh "ramalama" && \
RUN build_llama_and_whisper.sh "ramalama" && \
rag_framework load

View File

@@ -1,7 +1,7 @@
FROM registry.fedoraproject.org/fedora:42
COPY --chmod=755 ../scripts /usr/bin/
RUN /usr/bin/build_llama_and_whisper.sh "rocm" && \
COPY --chmod=755 ../scripts /usr/bin
RUN build_llama_and_whisper.sh "rocm" && \
rag_framework load
ENV WHISPER_CPP_SHA=${WHISPER_CPP_SHA}

View File

@@ -2,9 +2,9 @@ FROM registry.access.redhat.com/ubi9/ubi:9.5-1742918310
COPY rocm/amdgpu.repo /etc/yum.repos.d/
COPY rocm/rocm.repo /etc/yum.repos.d/
COPY --chmod=755 scripts /scripts
COPY --chmod=755 ../scripts /usr/bin
ARG AMDGPU_TARGETS=
ENV AMDGPU_TARGETS=${AMDGPU_TARGETS}
RUN /scripts/build_llama_and_whisper.sh "rocm" && \
RUN build_llama_and_whisper.sh "rocm" && \
rag_framework load

View File

@@ -8,7 +8,7 @@ dnf_install_intel_gpu() {
local intel_rpms=("intel-oneapi-mkl-sycl-devel" "intel-oneapi-dnnl-devel" \
"intel-oneapi-compiler-dpcpp-cpp" "intel-level-zero" \
"oneapi-level-zero" "oneapi-level-zero-devel" "intel-compute-runtime")
dnf install -y "${rpm_list[@]}" "${intel_rpms[@]}"
dnf install -y "${intel_rpms[@]}"
# shellcheck disable=SC1091
. /opt/intel/oneapi/setvars.sh
@@ -24,11 +24,11 @@ dnf_remove() {
dnf_install_asahi() {
dnf copr enable -y @asahi/fedora-remix-branding
dnf install -y asahi-repos
dnf install -y mesa-vulkan-drivers "${vulkan_rpms[@]}" "${rpm_list[@]}"
dnf install -y mesa-vulkan-drivers "${vulkan_rpms[@]}"
}
dnf_install_cuda() {
dnf install -y "${rpm_list[@]}" gcc-toolset-12
dnf install -y gcc-toolset-12
# shellcheck disable=SC1091
. /opt/rh/gcc-toolset-12/enable
}
@@ -51,9 +51,12 @@ dnf_install_rocm() {
if [ "${ID}" = "fedora" ]; then
dnf install -y rocm-core-devel hipblas-devel rocblas-devel rocm-hip-devel
else
add_stream_repo "AppStream"
dnf install -y rocm-dev hipblas-devel rocblas-devel
fi
fi
rm_non_ubi_repos
}
dnf_install_s390() {
@@ -73,7 +76,8 @@ add_stream_repo() {
}
rm_non_ubi_repos() {
rm -rf /etc/yum.repos.d/mirror.stream.centos.org_9-stream_* /etc/yum.repos.d/epel*
local dir="/etc/yum.repos.d"
rm -rf $dir/mirror.stream.centos.org_9-stream_* $dir/epel* $dir/_copr:*
}
dnf_install_mesa() {
@@ -111,30 +115,27 @@ dnf_install_ffmpeg() {
}
dnf_install() {
local rpm_list=("podman-remote" "python3" "python3-pip" "python3-argcomplete" \
"python3-dnf-plugin-versionlock" "python3-devel" "gcc-c++" "cmake" "vim" \
"procps-ng" "git" "dnf-plugins-core" "libcurl-devel" "gawk")
local rpm_list=("podman-remote" "python3" "python3-pip" \
"python3-argcomplete" "python3-dnf-plugin-versionlock" \
"python3-devel" "gcc-c++" "cmake" "vim" "procps-ng" "git" \
"dnf-plugins-core" "libcurl-devel" "gawk")
local vulkan_rpms=("vulkan-headers" "vulkan-loader-devel" "vulkan-tools" \
"spirv-tools" "glslc" "glslang")
if [ "${containerfile}" = "ramalama" ] || [[ "${containerfile}" =~ rocm* ]] || \
[ "${containerfile}" = "vulkan" ]; then # All the UBI-based ones
if [ "${ID}" = "fedora" ]; then
dnf install -y "${rpm_list[@]}"
else
dnf_install_epel
dnf --enablerepo=ubi-9-appstream-rpms install -y "${rpm_list[@]}"
fi
if [ "${ID}" = "fedora" ]; then
dnf install -y "${rpm_list[@]}"
else
dnf_install_epel # All the UBI-based ones
dnf --enablerepo=ubi-9-appstream-rpms install -y "${rpm_list[@]}"
fi
# x86_64 and aarch64 means kompute
if [ "$containerfile" = "ramalama" ]; then
if [ "$uname_m" = "x86_64" ] || [ "$uname_m" = "aarch64" ]; then
dnf_install_mesa
fi
dnf_install_rocm
rm_non_ubi_repos
if [ "$uname_m" != "x86_64" ] && ! [ "$uname_m" != "aarch64" ]; then
dnf_install_mesa # on x86_64 and aarch64 we use vulkan via mesa
else
dnf_install_s390
fi
elif [[ "$containerfile" =~ rocm* ]]; then
dnf_install_rocm
elif [ "$containerfile" = "asahi" ]; then
dnf_install_asahi
elif [ "$containerfile" = "cuda" ]; then
@@ -203,13 +204,18 @@ configure_common_flags() {
if [ "${ID}" = "fedora" ]; then
common_flags+=("-DCMAKE_HIP_COMPILER_ROCM_ROOT=/usr")
fi
common_flags+=("-DGGML_HIP=ON" "-DAMDGPU_TARGETS=${AMDGPU_TARGETS:-gfx1010,gfx1012,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103,gfx1151,gfx1200,gfx1201}")
;;
cuda)
common_flags+=("-DGGML_CUDA=ON" "-DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined")
;;
vulkan | asahi)
common_flags+=("-DGGML_VULKAN=1")
ramalama | asahi)
if [ "$uname_m" = "x86_64" ] || [ "$uname_m" = "aarch64" ]; then
common_flags+=("-DGGML_VULKAN=ON")
else
common_flags+=("-DGGML_BLAS=ON" "-DGGML_BLAS_VENDOR=OpenBLAS")
fi
;;
intel-gpu)
common_flags+=("-DGGML_SYCL=ON" "-DCMAKE_C_COMPILER=icx" "-DCMAKE_CXX_COMPILER=icpx")
@@ -259,7 +265,7 @@ clone_and_build_ramalama() {
}
pip_install() {
python3 -m pip install wheel qdrant_client fastembed openai fastapi uvicorn openvino --prefix="$1"
python3 -m pip install wheel qdrant_client fastembed openai fastapi uvicorn openvino --prefix="$1"
}
main() {
@@ -278,22 +284,13 @@ main() {
common_flags+=("-DGGML_CCACHE=OFF" "-DCMAKE_INSTALL_PREFIX=${install_prefix}")
available dnf && dnf_install
if [ -n "$containerfile" ]; then
clone_and_build_ramalama "${install_prefix}"
pip_install "${install_prefix}"
clone_and_build_ramalama "${install_prefix}"
pip_install "${install_prefix}"
fi
setup_build_env
clone_and_build_whisper_cpp
common_flags+=("-DLLAMA_CURL=ON")
case "$containerfile" in
ramalama)
if [ "$uname_m" = "x86_64" ] || [ "$uname_m" = "aarch64" ]; then
common_flags+=("-DGGML_KOMPUTE=ON" "-DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON")
else
common_flags+=("-DGGML_BLAS=ON" "-DGGML_BLAS_VENDOR=OpenBLAS")
fi
;;
esac
clone_and_build_llama_cpp
available dnf && dnf_remove
rm -rf /var/cache/*dnf* /opt/rocm-*/lib/*/library/*gfx9*

View File

@@ -1,7 +0,0 @@
FROM registry.access.redhat.com/ubi9/ubi:9.5-1742918310
COPY --chmod=755 ../scripts /usr/bin
RUN /usr/bin/build_llama_and_whisper.sh "vulkan" && \
rag_framework load