1
0
mirror of https://github.com/containers/ramalama.git synced 2026-02-05 06:46:39 +01:00

remove whisper.cpp from all images

Remove build and installation of whisper.cpp, and installation of ffmpeg.
Rename build_llama_and_whisper.sh to build_llama.sh.
Update Containerfiles to reference new script name.
Consolidate management of cmake args in build_llama.sh.
Remove references to whisper-server in various locations.

Signed-off-by: Mike Bonnet <mikeb@redhat.com>
This commit is contained in:
Mike Bonnet
2026-01-27 15:58:45 -08:00
parent 34d89cee90
commit d06f6d3519
21 changed files with 52 additions and 123 deletions

View File

@@ -155,7 +155,7 @@ jobs:
df -h
sudo apt-get update
sudo apt-get install podman bats bash codespell git cmake libcurl4-openssl-dev
sudo ./container-images/scripts/build_llama_and_whisper.sh
sudo ./container-images/scripts/build_llama.sh
uv run -- make install-requirements
- name: install ollama
@@ -350,7 +350,7 @@ jobs:
df -h
sudo apt-get update
sudo apt-get install podman bash codespell git cmake libcurl4-openssl-dev
sudo ./container-images/scripts/build_llama_and_whisper.sh
sudo ./container-images/scripts/build_llama.sh
uv tool install tox --with tox-uv
uv pip install ".[dev]"

View File

@@ -11,7 +11,7 @@ metadata:
pipelinesascode.tekton.dev/on-cel-expression: >-
event == "pull_request" && target_branch == "main" && body.action != "ready_for_review" &&
(
"container-images/scripts/build_llama_and_whisper.sh".pathChanged() ||
"container-images/scripts/build_llama.sh".pathChanged() ||
"container-images/scripts/lib.sh".pathChanged() ||
"container-images/asahi/*".pathChanged() ||
".tekton/pipelines/pull-request-pipeline.yaml".pathChanged() ||

View File

@@ -11,7 +11,7 @@ metadata:
pipelinesascode.tekton.dev/on-cel-expression: >-
event == "pull_request" && target_branch == "main" && body.action != "ready_for_review" &&
(
"container-images/scripts/build_llama_and_whisper.sh".pathChanged() ||
"container-images/scripts/build_llama.sh".pathChanged() ||
"container-images/scripts/lib.sh".pathChanged() ||
"container-images/cann/*".pathChanged() ||
".tekton/pipelines/pull-request-pipeline.yaml".pathChanged() ||

View File

@@ -11,7 +11,7 @@ metadata:
pipelinesascode.tekton.dev/on-cel-expression: >-
event == "pull_request" && target_branch == "main" && body.action != "ready_for_review" &&
(
"container-images/scripts/build_llama_and_whisper.sh".pathChanged() ||
"container-images/scripts/build_llama.sh".pathChanged() ||
"container-images/scripts/lib.sh".pathChanged() ||
"container-images/cuda/*".pathChanged() ||
".tekton/pipelines/pull-request-pipeline.yaml".pathChanged() ||

View File

@@ -11,7 +11,7 @@ metadata:
pipelinesascode.tekton.dev/on-cel-expression: >-
event == "pull_request" && target_branch == "main" && body.action != "ready_for_review" &&
(
"container-images/scripts/build_llama_and_whisper.sh".pathChanged() ||
"container-images/scripts/build_llama.sh".pathChanged() ||
"container-images/scripts/lib.sh".pathChanged() ||
"container-images/intel-gpu/*".pathChanged() ||
".tekton/pipelines/pull-request-pipeline.yaml".pathChanged() ||

View File

@@ -11,7 +11,7 @@ metadata:
pipelinesascode.tekton.dev/on-cel-expression: >-
event == "pull_request" && target_branch == "main" && body.action != "ready_for_review" &&
(
"container-images/scripts/build_llama_and_whisper.sh".pathChanged() ||
"container-images/scripts/build_llama.sh".pathChanged() ||
"container-images/scripts/lib.sh".pathChanged() ||
"container-images/ramalama/*".pathChanged() ||
".tekton/pipelines/pull-request-pipeline.yaml".pathChanged() ||

View File

@@ -11,7 +11,7 @@ metadata:
pipelinesascode.tekton.dev/on-cel-expression: >-
event == "pull_request" && target_branch == "main" && body.action != "ready_for_review" &&
(
"container-images/scripts/build_llama_and_whisper.sh".pathChanged() ||
"container-images/scripts/build_llama.sh".pathChanged() ||
"container-images/scripts/lib.sh".pathChanged() ||
"container-images/rocm/*".pathChanged() ||
".tekton/pipelines/pull-request-pipeline.yaml".pathChanged() ||

View File

@@ -1258,7 +1258,6 @@ Regarding this alpha, everything is under development, so expect breaking change
This project wouldn't be possible without the help of other projects like:
- [llama.cpp](https://github.com/ggml-org/llama.cpp)
- [whisper.cpp](https://github.com/ggml-org/whisper.cpp)
- [vllm](https://github.com/vllm-project/vllm)
- [mlx-lm](https://github.com/ml-explore/mlx-examples)
- [podman](https://github.com/containers/podman)

View File

@@ -20,7 +20,6 @@ Contributors can comment on or contribute to the issues linked here.
Define a syntax that simplifies adding new runtimes.
Current runtimes:
- `llama.cpp`
- `whisper.cpp`
- `vllm`
- `stable-diffusion`
- `OpenVINO`
@@ -67,4 +66,4 @@ Contributors can comment on or contribute to the issues linked here.
- Detect best base image from a compatibility matrix.
- Automatically select and execute commands with the correct image.
---
---

View File

@@ -1,9 +1,9 @@
FROM quay.io/fedora/fedora:43
ENV ASAHI_VISIBLE_DEVICES 1
COPY container-images/scripts/build_llama_and_whisper.sh \
COPY container-images/scripts/build_llama.sh \
container-images/scripts/lib.sh \
/src/
WORKDIR /src/
RUN ./build_llama_and_whisper.sh asahi
RUN ./build_llama.sh asahi
WORKDIR /

View File

@@ -3,11 +3,11 @@ ARG ASCEND_VERSION=cann:8.0.0-910b-openeuler22.03-py3.10
FROM quay.io/ascend/${ASCEND_VERSION} AS builder
ARG GOLANG_VERSION
COPY container-images/scripts/build_llama_and_whisper.sh \
COPY container-images/scripts/build_llama.sh \
container-images/scripts/lib.sh \
/src/
WORKDIR /src/
RUN ./build_llama_and_whisper.sh cann
RUN ./build_llama.sh cann
FROM quay.io/ascend/${ASCEND_VERSION}
# Copy the entire installation directory from the builder

View File

@@ -2,11 +2,11 @@ ARG CUDA_VERSION=12.9.1
# Base image with CUDA for compilation
FROM docker.io/nvidia/cuda:${CUDA_VERSION}-devel-ubi9 AS builder
COPY container-images/scripts/build_llama_and_whisper.sh \
COPY container-images/scripts/build_llama.sh \
container-images/scripts/lib.sh \
/src/
WORKDIR /src/
RUN ./build_llama_and_whisper.sh cuda
RUN ./build_llama.sh cuda
# Final runtime image
FROM docker.io/nvidia/cuda:${CUDA_VERSION}-runtime-ubi9

View File

@@ -1,11 +1,11 @@
FROM quay.io/fedora/fedora:43 as builder
COPY container-images/intel-gpu/oneAPI.repo /etc/yum.repos.d/
COPY container-images/scripts/build_llama_and_whisper.sh \
COPY container-images/scripts/build_llama.sh \
container-images/scripts/lib.sh \
/src/
WORKDIR /src/
RUN ./build_llama_and_whisper.sh intel-gpu
RUN ./build_llama.sh intel-gpu
FROM quay.io/fedora/fedora:43

View File

@@ -3,11 +3,11 @@ ARG UBUNTU_VERSION=22.04
# Base image with MUSA for compilation
FROM docker.io/mthreads/musa:${VERSION}-devel-ubuntu${UBUNTU_VERSION}-amd64 AS builder
COPY container-images/scripts/build_llama_and_whisper.sh \
COPY container-images/scripts/build_llama.sh \
container-images/scripts/lib.sh \
/src/
WORKDIR /src/
RUN ./build_llama_and_whisper.sh musa
RUN ./build_llama.sh musa
# Final runtime image
FROM docker.io/mthreads/musa:${VERSION}-runtime-ubuntu${UBUNTU_VERSION}-amd64

View File

@@ -1,10 +1,10 @@
FROM quay.io/fedora/fedora:43
COPY container-images/scripts/build_llama_and_whisper.sh \
COPY container-images/scripts/build_llama.sh \
container-images/scripts/lib.sh \
/src/
WORKDIR /src/
RUN ./build_llama_and_whisper.sh ramalama
RUN ./build_llama.sh ramalama
# Install ramalama to support a non-standard use-case
COPY . /src/ramalama
WORKDIR /src/ramalama

View File

@@ -1,22 +1,15 @@
FROM quay.io/fedora/fedora:43 AS builder
COPY container-images/scripts/build_llama_and_whisper.sh \
COPY container-images/scripts/build_llama.sh \
container-images/scripts/lib.sh \
/src/
WORKDIR /src/
RUN ./build_llama_and_whisper.sh rocm
RUN ./build_llama.sh rocm
FROM quay.io/fedora/fedora:43
RUN --mount=type=bind,from=builder,source=/tmp/install,target=/tmp/install \
cp -a /tmp/install/bin/llama-bench \
/tmp/install/bin/llama-perplexity \
/tmp/install/bin/llama-quantize \
/tmp/install/bin/llama-server \
/tmp/install/bin/rpc-server \
/tmp/install/bin/whisper-server \
/tmp/install/bin/*.so* \
/usr/bin/ && \
cp -a /tmp/install/bin/ /usr/ && \
cp -a /tmp/install/lib64/*.so* /usr/lib64/
RUN dnf -y --setopt=install_weak_deps=false install hipblas rocblas rocm-hip rocm-runtime rocsolver && \

View File

@@ -1,7 +1,6 @@
#!/bin/bash
DEFAULT_LLAMA_CPP_COMMIT="091a46cb8d43c0e662d04b80a3d11320d25b7d49" # b7815
DEFAULT_WHISPER_COMMIT="2eeeba56e9edd762b4b38467bab96c2517163158" # v1.8.3
dnf_install_intel_gpu() {
local intel_rpms=("intel-oneapi-mkl-sycl-devel" "intel-oneapi-dnnl-devel"
@@ -9,13 +8,6 @@ dnf_install_intel_gpu() {
"intel-oneapi-compiler-dpcpp-cpp" "intel-level-zero"
"oneapi-level-zero" "oneapi-level-zero-devel" "intel-compute-runtime")
dnf install -y "${intel_rpms[@]}"
# shellcheck disable=SC1091
. /opt/intel/oneapi/setvars.sh
}
dnf_remove() {
dnf -y clean all
}
dnf_install_asahi() {
@@ -74,25 +66,6 @@ dnf_install_mesa() {
rm_non_ubi_repos
}
# There is no ffmpeg-free package in the openEuler repository. openEuler can use ffmpeg,
# which also has the same GPL/LGPL license as ffmpeg-free.
dnf_install_ffmpeg() {
if is_rhel_based; then
dnf_install_epel
add_stream_repo "AppStream"
add_stream_repo "BaseOS"
add_stream_repo "CRB"
fi
if [ "${ID}" = "openEuler" ]; then
dnf install -y ffmpeg
else
dnf install -y ffmpeg-free
fi
rm_non_ubi_repos
}
dnf_install() {
local rpm_exclude_list="selinux-policy,container-selinux"
local rpm_list=("python3-dnf-plugin-versionlock"
@@ -124,8 +97,6 @@ dnf_install() {
dnf_install_cann
fi
dnf_install_ffmpeg
if [[ "${RAMALAMA_IMAGE_BUILD_DEBUG_MODE:-}" == y ]]; then
dnf install -y gdb strace
fi
@@ -134,16 +105,12 @@ dnf_install() {
}
cmake_check_warnings() {
# There has warning "CMake Warning:Manually-specified variables were not used by the project" during compile of custom ascend kernels of ggml cann backend.
# Should remove "cann" judge condition when this warning are fixed in llama.cpp/whisper.cpp
if [ "$containerfile" != "cann" ]; then
awk -v rc=0 '/CMake Warning:/ { rc=1 } 1; END {exit rc}'
else
awk '/CMake Warning:/ {print $0}'
fi
awk -v rc=0 '/CMake Warning:/ { rc=1 } 1; END {exit rc}'
}
setup_build_env() {
# external scripts may reference unbound variables
set +u
if [ "$containerfile" = "cann" ]; then
# source build env
cann_in_sys_path=/usr/local/Ascend/ascend-toolkit
@@ -162,7 +129,11 @@ setup_build_env() {
echo "No Ascend Toolkit found"
exit 1
fi
elif [ "$containerfile" = "intel-gpu" ]; then
# shellcheck disable=SC1091
source /opt/intel/oneapi/setvars.sh
fi
set -u
}
cmake_steps() {
@@ -190,7 +161,13 @@ set_install_prefix() {
}
configure_common_flags() {
common_flags=()
common_flags=(
"-DGGML_CCACHE=OFF" "-DGGML_RPC=ON" "-DCMAKE_INSTALL_PREFIX=${install_prefix}"
"-DLLAMA_BUILD_TESTS=OFF" "-DLLAMA_BUILD_EXAMPLES=OFF" "-DGGML_BUILD_TESTS=OFF" "-DGGML_BUILD_EXAMPLES=OFF"
)
if [ "$containerfile" != "cann" ]; then
common_flags+=("-DGGML_NATIVE=OFF" "-DGGML_BACKEND_DL=ON" "-DGGML_CPU_ALL_VARIANTS=ON")
fi
if [[ "${RAMALAMA_IMAGE_BUILD_DEBUG_MODE:-}" == y ]]; then
common_flags+=("-DGGML_CMAKE_BUILD_TYPE=Debug")
else
@@ -198,6 +175,16 @@ configure_common_flags() {
fi
case "$containerfile" in
ramalama)
if [ "$uname_m" = "x86_64" ] || [ "$uname_m" = "aarch64" ]; then
common_flags+=("-DGGML_VULKAN=ON")
elif [ "$uname_m" = "s390x" ] || [ "$uname_m" = "ppc64le" ]; then
common_flags+=("-DGGML_BLAS=ON" "-DGGML_BLAS_VENDOR=OpenBLAS")
fi
if [ "$uname_m" = "s390x" ]; then
common_flags+=("-DARCH_FLAGS=-march=z15")
fi
;;
rocm*)
if [ "${ID}" = "fedora" ]; then
common_flags+=("-DCMAKE_HIP_COMPILER_ROCM_ROOT=/usr")
@@ -223,28 +210,10 @@ configure_common_flags() {
esac
}
clone_and_build_whisper_cpp() {
local whisper_cpp_commit="${WHISPER_CPP_PULL_REF:-$DEFAULT_WHISPER_COMMIT}"
local whisper_flags=("${common_flags[@]}")
whisper_flags+=("-DBUILD_SHARED_LIBS=OFF")
# See: https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md#compilation-options
if [ "$containerfile" = "musa" ]; then
whisper_flags+=("-DCMAKE_POSITION_INDEPENDENT_CODE=ON")
fi
git_clone_specific_commit "${WHISPER_CPP_REPO:-https://github.com/ggerganov/whisper.cpp}" "$whisper_cpp_commit"
cmake_steps "${whisper_flags[@]}"
cd ..
if [[ "${RAMALAMA_IMAGE_BUILD_DEBUG_MODE:-}" != y ]]; then
rm -rf whisper.cpp
fi
}
clone_and_build_llama_cpp() {
local llama_cpp_commit="${LLAMA_CPP_PULL_REF:-$DEFAULT_LLAMA_CPP_COMMIT}"
git_clone_specific_commit "${LLAMA_CPP_REPO:-https://github.com/ggml-org/llama.cpp}" "$llama_cpp_commit"
cmake_steps "${common_flags[@]}"
install -m 755 build/bin/rpc-server "$install_prefix"/bin/rpc-server
cd ..
if [[ "${RAMALAMA_IMAGE_BUILD_DEBUG_MODE:-}" != y ]]; then
rm -rf llama.cpp
@@ -252,35 +221,15 @@ clone_and_build_llama_cpp() {
}
cleanup() {
available dnf && dnf_remove
rm -rf /var/cache/*dnf* /opt/rocm-*/lib/*/library/*gfx9*
available dnf && dnf -y clean all
ldconfig # needed for libraries
}
add_common_flags() {
common_flags+=("-DGGML_RPC=ON" "-DLLAMA_BUILD_TESTS=OFF" "-DLLAMA_BUILD_EXAMPLES=OFF" "-DGGML_BUILD_TESTS=OFF" "-DGGML_BUILD_EXAMPLES=OFF")
if [ "$containerfile" != "cann" ]; then
common_flags+=("-DGGML_NATIVE=OFF" "-DGGML_BACKEND_DL=ON" "-DGGML_CPU_ALL_VARIANTS=ON")
fi
case "$containerfile" in
ramalama)
if [ "$uname_m" = "x86_64" ] || [ "$uname_m" = "aarch64" ]; then
common_flags+=("-DGGML_VULKAN=ON")
elif [ "$uname_m" = "s390x" ] || [ "$uname_m" = "ppc64le" ]; then
common_flags+=("-DGGML_BLAS=ON" "-DGGML_BLAS_VENDOR=OpenBLAS")
fi
if [ "$uname_m" = "s390x" ]; then
common_flags+=("-DARCH_FLAGS=-march=z15")
fi
;;
esac
}
main() {
# shellcheck disable=SC1091
source /etc/os-release
set -ex -o pipefail
set -eux -o pipefail
# shellcheck disable=SC1091
source "$(dirname "$0")/lib.sh"
@@ -292,15 +241,10 @@ main() {
uname_m="$(uname -m)"
local common_flags
configure_common_flags
common_flags+=("-DGGML_CCACHE=OFF" "-DCMAKE_INSTALL_PREFIX=${install_prefix}")
available dnf && dnf_install
setup_build_env
if [ "$uname_m" != "s390x" ]; then
clone_and_build_whisper_cpp
fi
add_common_flags
clone_and_build_llama_cpp
cleanup
}

View File

@@ -40,10 +40,6 @@ add_build_platform() {
# set to 'y' to include the debug tools and debug files in the image
"RAMALAMA_IMAGE_BUILD_DEBUG_MODE"
# reference of a whisper.cpp repo and commit to use
"WHISPER_CPP_REPO"
"WHISPER_CPP_PULL_REF"
# reference to a llama.cpp repo and commit to use
"LLAMA_CPP_REPO"
"LLAMA_CPP_PULL_REF"

View File

@@ -65,7 +65,6 @@ case ${1} in
*)
podman run --pull=never --rm "${REPO}/$1" ls -l /usr/bin/llama-server
podman run --pull=never --rm "${REPO}/$1" ls -l /usr/bin/llama-run
podman run --pull=never --rm "${REPO}/$1" ls -l /usr/bin/whisper-server
podman run --pull=never --rm "${REPO}/$1"-rag rag_framework load
;;
esac

View File

@@ -56,7 +56,6 @@ case ${1} in
*)
podman run --pull=never --rm "${REPO}/$1" ls -l /usr/bin/llama-server
podman run --pull=never --rm "${REPO}/$1" ls -l /usr/bin/llama-run
podman run --pull=never --rm "${REPO}/$1" ls -l /usr/bin/whisper-server
podman run --pull=never --rm "${REPO}/$1"-rag rag_framework load
release "$1"

View File

@@ -16,7 +16,7 @@ pushd "$TMT_TREE"
if [[ $1 == "docker" ]]; then
./container_build.sh build ramalama
elif [[ $1 == "nocontainer" ]]; then
./container-images/scripts/build_llama_and_whisper.sh
./container-images/scripts/build_llama.sh
fi
./.github/scripts/install-ollama.sh