1
0
mirror of https://github.com/containers/ramalama.git synced 2026-02-05 06:46:39 +01:00

use multi-stage builds for all images

Standardize on multi-stage builds for all images, which avoids including
development tools and libraries in the final images, reducing image size.
Install all llama.cpp binaries and shared libraries for consistency with
upstream images. Avoid installing unnecessary (and large) .a files from
the installation directory.
Call build_llama.sh to install runtime dependencies in the final images
so package versions can be kept consistent between build and final images.

Signed-off-by: Mike Bonnet <mikeb@redhat.com>
This commit is contained in:
Mike Bonnet
2026-01-29 12:45:26 -08:00
parent acbb271e1a
commit 88d597e4d9
8 changed files with 95 additions and 46 deletions

View File

@@ -154,6 +154,8 @@ jobs:
sudo apt-get update
sudo apt-get install podman bash codespell git cmake libcurl4-openssl-dev
sudo ./container-images/scripts/build_llama.sh
sudo cp -a /tmp/install/bin/ /usr/
sudo cp -a /tmp/install/lib/*.so* /usr/lib/
uv tool install tox --with tox-uv
uv pip install ".[dev]"

View File

@@ -1,9 +1,15 @@
FROM quay.io/fedora/fedora:43
FROM quay.io/fedora/fedora:43 AS builder
ENV ASAHI_VISIBLE_DEVICES 1
COPY container-images/scripts/build_llama.sh \
container-images/scripts/lib.sh \
/src/
WORKDIR /src/
RUN ./build_llama.sh asahi
WORKDIR /
FROM quay.io/fedora/fedora:43
RUN --mount=type=bind,from=builder,source=/tmp/install,target=/tmp/install \
cp -a /tmp/install/bin/ /usr/ && \
cp -a /tmp/install/lib64/*.so* /usr/lib64/
RUN --mount=type=bind,target=/src \
/src/container-images/scripts/build_llama.sh asahi runtime

View File

@@ -10,8 +10,9 @@ WORKDIR /src/
RUN ./build_llama.sh cann
FROM quay.io/ascend/${ASCEND_VERSION}
# Copy the entire installation directory from the builder
COPY --from=builder /tmp/install /usr
RUN --mount=type=bind,from=builder,source=/tmp/install,target=/tmp/install \
cp -a /tmp/install/bin/ /usr/ && \
cp -a /tmp/install/lib64/*.so* /usr/lib64/
ENTRYPOINT [ \
"/bin/bash", \
"-c", \

View File

@@ -11,21 +11,25 @@ RUN ./build_llama.sh cuda
# Final runtime image
FROM docker.io/nvidia/cuda:${CUDA_VERSION}-runtime-ubi9
# Copy the entire installation directory from the builder
COPY --from=builder /tmp/install /usr
RUN --mount=type=bind,from=builder,source=/tmp/install,target=/tmp/install \
cp -a /tmp/install/bin/ /usr/ && \
cp -a /tmp/install/lib64/*.so* /usr/lib64/
RUN --mount=type=bind,target=/src \
/src/container-images/scripts/build_llama.sh cuda runtime
# Install python3.12 and ramalama to support a non-standard use-case
RUN dnf -y install python3.12 && dnf -y clean all && ln -sf python3.12 /usr/bin/python3
COPY . /src/ramalama
WORKDIR /src/ramalama
RUN python3 -m ensurepip && \
# Install ramalama to support a non-standard use-case
RUN --mount=type=bind,target=/src \
cp -a /src /var/tmp/ramalama && \
python3 -m pip \
--no-cache-dir \
--disable-pip-version-check \
install \
--compile \
--prefix=/usr \
.
--root-user-action ignore \
/var/tmp/ramalama && \
rm -rf /var/tmp/ramalama
WORKDIR /
ENTRYPOINT []
CMD ["/bin/bash"]

View File

@@ -9,14 +9,12 @@ RUN ./build_llama.sh intel-gpu
FROM quay.io/fedora/fedora:43
COPY --from=builder /tmp/install/ /usr/
RUN --mount=type=bind,from=builder,source=/tmp/install,target=/tmp/install \
cp -a /tmp/install/bin/ /usr/ && \
cp -a /tmp/install/lib64/*.so* /usr/lib64/
COPY container-images/intel-gpu/oneAPI.repo /etc/yum.repos.d/
RUN dnf install -y --setopt=install_weak_deps=false \
procps-ng intel-oneapi-runtime-mkl intel-oneapi-mkl-sycl-dft \
intel-level-zero oneapi-level-zero intel-compute-runtime lspci \
clinfo intel-oneapi-runtime-compilers intel-oneapi-mkl-core \
intel-oneapi-mkl-sycl-blas intel-oneapi-runtime-dnnl gawk
RUN --mount=type=bind,target=/src \
/src/container-images/scripts/build_llama.sh intel-gpu runtime
COPY --chmod=755 container-images/intel-gpu/entrypoint.sh /usr/bin/

View File

@@ -1,19 +1,28 @@
FROM quay.io/fedora/fedora:43
FROM quay.io/fedora/fedora:43 AS builder
COPY container-images/scripts/build_llama.sh \
container-images/scripts/lib.sh \
/src/
WORKDIR /src/
RUN ./build_llama.sh ramalama
FROM quay.io/fedora/fedora:43
RUN --mount=type=bind,from=builder,source=/tmp/install,target=/tmp/install \
cp -a /tmp/install/bin/ /usr/ && \
cp -a /tmp/install/lib64/*.so* /usr/lib64/
RUN --mount=type=bind,target=/src \
/src/container-images/scripts/build_llama.sh ramalama runtime
# Install ramalama to support a non-standard use-case
COPY . /src/ramalama
WORKDIR /src/ramalama
RUN python3 -m ensurepip && \
RUN --mount=type=bind,target=/src \
cp -a /src /var/tmp/ramalama && \
python3 -m pip \
--no-cache-dir \
--disable-pip-version-check \
install \
--compile \
--prefix=/usr \
.
WORKDIR /
--root-user-action ignore \
/var/tmp/ramalama && \
rm -rf /var/tmp/ramalama

View File

@@ -12,7 +12,5 @@ RUN --mount=type=bind,from=builder,source=/tmp/install,target=/tmp/install \
cp -a /tmp/install/bin/ /usr/ && \
cp -a /tmp/install/lib64/*.so* /usr/lib64/
RUN dnf -y --setopt=install_weak_deps=false install hipblas rocblas rocm-hip rocm-runtime rocsolver && \
dnf -y clean all
WORKDIR /
RUN --mount=type=bind,target=/src \
/src/container-images/scripts/build_llama.sh rocm runtime

View File

@@ -1,6 +1,7 @@
#!/bin/bash
DEFAULT_LLAMA_CPP_COMMIT="b45ef2702c262998d5db9887cd3c82f04761237a" # b7872
MESA_VULKAN_VERSION=25.2.3-101.fc43
dnf_install_intel_gpu() {
local intel_rpms=("intel-oneapi-mkl-sycl-devel" "intel-oneapi-dnnl-devel"
@@ -54,9 +55,9 @@ dnf_install_s390_ppc64le() {
dnf_install_mesa() {
if [ "${ID}" = "fedora" ]; then
dnf copr enable -y slp/mesa-libkrun-vulkan
dnf install -y mesa-vulkan-drivers-25.2.3-101.fc43 virglrenderer \
dnf install -y mesa-vulkan-drivers-$MESA_VULKAN_VERSION virglrenderer \
"${vulkan_rpms[@]}"
dnf versionlock add mesa-vulkan-drivers-25.2.3-101.fc43
dnf versionlock add mesa-vulkan-drivers-$MESA_VULKAN_VERSION
elif [ "${ID}" = "openEuler" ]; then
dnf install -y mesa-vulkan-drivers virglrenderer "${vulkan_rpms[@]}"
else # virglrenderer not available on RHEL or EPEL
@@ -70,7 +71,7 @@ dnf_install() {
local rpm_exclude_list="selinux-policy,container-selinux"
local rpm_list=("python3-dnf-plugin-versionlock"
"gcc-c++" "cmake" "vim" "procps-ng" "git-core"
"dnf-plugins-core" "gawk")
"dnf-plugins-core" "gawk" "openssl-devel")
local vulkan_rpms=("vulkan-headers" "vulkan-loader-devel" "vulkan-tools"
"spirv-tools" "glslc" "glslang")
if is_rhel_based; then
@@ -104,13 +105,47 @@ dnf_install() {
dnf -y clean all
}
dnf_install_runtime_deps() {
local runtime_pkgs=()
if [ "$containerfile" = "ramalama" ]; then
# install python3 in the ramalama container to support a non-standard use-case
runtime_pkgs+=(python3 python3-pip)
if [ "$uname_m" = "x86_64" ] || [ "$uname_m" = "aarch64" ]; then
dnf copr enable -y slp/mesa-libkrun-vulkan
runtime_pkgs+=(vulkan-loader vulkan-tools "mesa-vulkan-drivers-$MESA_VULKAN_VERSION")
else
runtime_pkgs+=(openblas)
fi
elif [[ "$containerfile" = rocm* ]]; then
runtime_pkgs+=(hipblas rocblas rocm-hip rocm-runtime rocsolver)
elif [ "$containerfile" = "asahi" ]; then
dnf copr enable -y @asahi/fedora-remix-branding
dnf install -y asahi-repos
runtime_pkgs+=(vulkan-loader vulkan-tools mesa-vulkan-drivers)
elif [ "$containerfile" = "cuda" ]; then
# install python3.12 in the cuda container to support a non-standard use-case
runtime_pkgs+=(python3.12 python3.12-pip)
ln -sf python3.12 /usr/bin/python3
elif [ "$containerfile" = "intel-gpu" ]; then
runtime_pkgs+=(
clinfo lspci procps-ng
intel-compute-runtime intel-level-zero
intel-oneapi-runtime-compilers intel-oneapi-runtime-dnnl intel-oneapi-runtime-mkl
intel-oneapi-mkl-core intel-oneapi-mkl-sycl-blas intel-oneapi-mkl-sycl-dft
oneapi-level-zero
)
fi
dnf install -y --setopt=install_weak_deps=false "${runtime_pkgs[@]}"
dnf -y clean all
}
cmake_check_warnings() {
awk -v rc=0 '/CMake Warning:/ { rc=1 } 1; END {exit rc}'
}
setup_build_env() {
# external scripts may reference unbound variables
set +u
set +ux
if [ "$containerfile" = "cann" ]; then
# source build env
cann_in_sys_path=/usr/local/Ascend/ascend-toolkit
@@ -133,7 +168,7 @@ setup_build_env() {
# shellcheck disable=SC1091
source /opt/intel/oneapi/setvars.sh
fi
set -u
set -ux
}
cmake_steps() {
@@ -152,17 +187,9 @@ cmake_steps() {
)
}
set_install_prefix() {
if [ "$containerfile" = "cuda" ] || [ "$containerfile" = "intel-gpu" ] || [ "$containerfile" = "cann" ] || [ "$containerfile" = "musa" ] || [ "$containerfile" = "rocm" ]; then
echo "/tmp/install"
else
echo "/usr"
fi
}
configure_common_flags() {
common_flags=(
"-DGGML_CCACHE=OFF" "-DGGML_RPC=ON" "-DCMAKE_INSTALL_PREFIX=${install_prefix}"
"-DGGML_CCACHE=OFF" "-DGGML_RPC=ON" "-DCMAKE_INSTALL_PREFIX=/tmp/install"
"-DLLAMA_BUILD_TESTS=OFF" "-DLLAMA_BUILD_EXAMPLES=OFF" "-DGGML_BUILD_TESTS=OFF" "-DGGML_BUILD_EXAMPLES=OFF"
)
if [ "$containerfile" != "cann" ]; then
@@ -235,10 +262,14 @@ main() {
source "$(dirname "$0")/lib.sh"
local containerfile=${1-""}
local install_prefix
install_prefix=$(set_install_prefix)
local uname_m
uname_m="$(uname -m)"
if [ "${2-""}" == "runtime" ]; then
dnf_install_runtime_deps
exit
fi
local common_flags
configure_common_flags