remove whisper.cpp from all images

Remove build and installation of whisper.cpp, and installation of ffmpeg. Rename build_llama_and_whisper.sh to build_llama.sh. Update Containerfiles to reference new script name. Consolidate management of cmake args in build_llama.sh. Remove references to whisper-server in various locations. Signed-off-by: Mike Bonnet <mikeb@redhat.com>
2026-02-05 06:46:39 +01:00 · 2026-01-27 15:58:45 -08:00
parent 34d89cee90
commit d06f6d3519
21 changed files with 52 additions and 123 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -155,7 +155,7 @@ jobs:
           df -h
           sudo apt-get update
           sudo apt-get install podman bats bash codespell git cmake libcurl4-openssl-dev
-           sudo ./container-images/scripts/build_llama_and_whisper.sh
+           sudo ./container-images/scripts/build_llama.sh
           uv run -- make install-requirements

      - name: install ollama
@@ -350,7 +350,7 @@ jobs:
          df -h
          sudo apt-get update
          sudo apt-get install podman bash codespell git cmake libcurl4-openssl-dev
-          sudo ./container-images/scripts/build_llama_and_whisper.sh
+          sudo ./container-images/scripts/build_llama.sh
          uv tool install tox --with tox-uv
          uv pip install ".[dev]"

--- a/.tekton/asahi/asahi-pull-request.yaml
+++ b/.tekton/asahi/asahi-pull-request.yaml
@@ -11,7 +11,7 @@ metadata:
    pipelinesascode.tekton.dev/on-cel-expression: >-
      event == "pull_request" && target_branch == "main" && body.action != "ready_for_review" &&
      (
-        "container-images/scripts/build_llama_and_whisper.sh".pathChanged() ||
+        "container-images/scripts/build_llama.sh".pathChanged() ||
        "container-images/scripts/lib.sh".pathChanged() ||
        "container-images/asahi/*".pathChanged() ||
        ".tekton/pipelines/pull-request-pipeline.yaml".pathChanged() ||
--- a/.tekton/cann/cann-pull-request.yaml
+++ b/.tekton/cann/cann-pull-request.yaml
@@ -11,7 +11,7 @@ metadata:
    pipelinesascode.tekton.dev/on-cel-expression: >-
      event == "pull_request" && target_branch == "main" && body.action != "ready_for_review" &&
      (
-        "container-images/scripts/build_llama_and_whisper.sh".pathChanged() ||
+        "container-images/scripts/build_llama.sh".pathChanged() ||
        "container-images/scripts/lib.sh".pathChanged() ||
        "container-images/cann/*".pathChanged() ||
        ".tekton/pipelines/pull-request-pipeline.yaml".pathChanged() ||
--- a/.tekton/cuda/cuda-pull-request.yaml
+++ b/.tekton/cuda/cuda-pull-request.yaml
@@ -11,7 +11,7 @@ metadata:
    pipelinesascode.tekton.dev/on-cel-expression: >-
      event == "pull_request" && target_branch == "main" && body.action != "ready_for_review" &&
      (
-        "container-images/scripts/build_llama_and_whisper.sh".pathChanged() ||
+        "container-images/scripts/build_llama.sh".pathChanged() ||
        "container-images/scripts/lib.sh".pathChanged() ||
        "container-images/cuda/*".pathChanged() ||
        ".tekton/pipelines/pull-request-pipeline.yaml".pathChanged() ||
--- a/.tekton/intel-gpu/intel-gpu-pull-request.yaml
+++ b/.tekton/intel-gpu/intel-gpu-pull-request.yaml
@@ -11,7 +11,7 @@ metadata:
    pipelinesascode.tekton.dev/on-cel-expression: >-
      event == "pull_request" && target_branch == "main" && body.action != "ready_for_review" &&
      (
-        "container-images/scripts/build_llama_and_whisper.sh".pathChanged() ||
+        "container-images/scripts/build_llama.sh".pathChanged() ||
        "container-images/scripts/lib.sh".pathChanged() ||
        "container-images/intel-gpu/*".pathChanged() ||
        ".tekton/pipelines/pull-request-pipeline.yaml".pathChanged() ||
--- a/.tekton/ramalama/ramalama-pull-request.yaml
+++ b/.tekton/ramalama/ramalama-pull-request.yaml
@@ -11,7 +11,7 @@ metadata:
    pipelinesascode.tekton.dev/on-cel-expression: >-
      event == "pull_request" && target_branch == "main" && body.action != "ready_for_review" &&
      (
-        "container-images/scripts/build_llama_and_whisper.sh".pathChanged() ||
+        "container-images/scripts/build_llama.sh".pathChanged() ||
        "container-images/scripts/lib.sh".pathChanged() ||
        "container-images/ramalama/*".pathChanged() ||
        ".tekton/pipelines/pull-request-pipeline.yaml".pathChanged() ||
--- a/.tekton/rocm/rocm-pull-request.yaml
+++ b/.tekton/rocm/rocm-pull-request.yaml
@@ -11,7 +11,7 @@ metadata:
    pipelinesascode.tekton.dev/on-cel-expression: >-
      event == "pull_request" && target_branch == "main" && body.action != "ready_for_review" &&
      (
-        "container-images/scripts/build_llama_and_whisper.sh".pathChanged() ||
+        "container-images/scripts/build_llama.sh".pathChanged() ||
        "container-images/scripts/lib.sh".pathChanged() ||
        "container-images/rocm/*".pathChanged() ||
        ".tekton/pipelines/pull-request-pipeline.yaml".pathChanged() ||
--- a/README.md
+++ b/README.md
@@ -1258,7 +1258,6 @@ Regarding this alpha, everything is under development, so expect breaking change
 This project wouldn't be possible without the help of other projects like:

 - [llama.cpp](https://github.com/ggml-org/llama.cpp)
- [whisper.cpp](https://github.com/ggml-org/whisper.cpp)
 - [vllm](https://github.com/vllm-project/vllm)
 - [mlx-lm](https://github.com/ml-explore/mlx-examples)
 - [podman](https://github.com/containers/podman)
--- a/Roadmap.md
+++ b/Roadmap.md
@@ -20,7 +20,6 @@ Contributors can comment on or contribute to the issues linked here.
  Define a syntax that simplifies adding new runtimes.  
  Current runtimes:  
  - `llama.cpp`  
-  - `whisper.cpp`  
  - `vllm`  
  - `stable-diffusion`  
  - `OpenVINO`  
@@ -67,4 +66,4 @@ Contributors can comment on or contribute to the issues linked here.
  - Detect best base image from a compatibility matrix.  
  - Automatically select and execute commands with the correct image.

---
+---
--- a/container-images/asahi/Containerfile
+++ b/container-images/asahi/Containerfile
@@ -1,9 +1,9 @@
 FROM quay.io/fedora/fedora:43

 ENV ASAHI_VISIBLE_DEVICES 1
-COPY container-images/scripts/build_llama_and_whisper.sh \
+COPY container-images/scripts/build_llama.sh \
     container-images/scripts/lib.sh \
     /src/
 WORKDIR /src/
-RUN ./build_llama_and_whisper.sh asahi
+RUN ./build_llama.sh asahi
 WORKDIR /
--- a/container-images/cann/Containerfile
+++ b/container-images/cann/Containerfile
@@ -3,11 +3,11 @@ ARG ASCEND_VERSION=cann:8.0.0-910b-openeuler22.03-py3.10

 FROM quay.io/ascend/${ASCEND_VERSION} AS builder
 ARG GOLANG_VERSION
-COPY container-images/scripts/build_llama_and_whisper.sh \
+COPY container-images/scripts/build_llama.sh \
     container-images/scripts/lib.sh \
     /src/
 WORKDIR /src/
-RUN ./build_llama_and_whisper.sh cann
+RUN ./build_llama.sh cann

 FROM quay.io/ascend/${ASCEND_VERSION}
 # Copy the entire installation directory from the builder
--- a/container-images/cuda/Containerfile
+++ b/container-images/cuda/Containerfile
@@ -2,11 +2,11 @@ ARG CUDA_VERSION=12.9.1
 # Base image with CUDA for compilation
 FROM docker.io/nvidia/cuda:${CUDA_VERSION}-devel-ubi9 AS builder

-COPY container-images/scripts/build_llama_and_whisper.sh \
+COPY container-images/scripts/build_llama.sh \
     container-images/scripts/lib.sh \
     /src/
 WORKDIR /src/
-RUN ./build_llama_and_whisper.sh cuda
+RUN ./build_llama.sh cuda

 # Final runtime image
 FROM docker.io/nvidia/cuda:${CUDA_VERSION}-runtime-ubi9
--- a/container-images/intel-gpu/Containerfile
+++ b/container-images/intel-gpu/Containerfile
@@ -1,11 +1,11 @@
 FROM quay.io/fedora/fedora:43 as builder

 COPY container-images/intel-gpu/oneAPI.repo /etc/yum.repos.d/
-COPY container-images/scripts/build_llama_and_whisper.sh \
+COPY container-images/scripts/build_llama.sh \
     container-images/scripts/lib.sh \
     /src/
 WORKDIR /src/
-RUN ./build_llama_and_whisper.sh intel-gpu
+RUN ./build_llama.sh intel-gpu

 FROM quay.io/fedora/fedora:43

--- a/container-images/musa/Containerfile
+++ b/container-images/musa/Containerfile
@@ -3,11 +3,11 @@ ARG UBUNTU_VERSION=22.04
 # Base image with MUSA for compilation
 FROM docker.io/mthreads/musa:${VERSION}-devel-ubuntu${UBUNTU_VERSION}-amd64 AS builder

-COPY container-images/scripts/build_llama_and_whisper.sh \
+COPY container-images/scripts/build_llama.sh \
     container-images/scripts/lib.sh \
     /src/
 WORKDIR /src/
-RUN ./build_llama_and_whisper.sh musa
+RUN ./build_llama.sh musa

 # Final runtime image
 FROM docker.io/mthreads/musa:${VERSION}-runtime-ubuntu${UBUNTU_VERSION}-amd64
--- a/container-images/ramalama/Containerfile
+++ b/container-images/ramalama/Containerfile
@@ -1,10 +1,10 @@
 FROM quay.io/fedora/fedora:43

-COPY container-images/scripts/build_llama_and_whisper.sh \
+COPY container-images/scripts/build_llama.sh \
     container-images/scripts/lib.sh \
     /src/
 WORKDIR /src/
-RUN ./build_llama_and_whisper.sh ramalama
+RUN ./build_llama.sh ramalama
 # Install ramalama to support a non-standard use-case
 COPY . /src/ramalama
 WORKDIR /src/ramalama
--- a/container-images/rocm/Containerfile
+++ b/container-images/rocm/Containerfile
@@ -1,22 +1,15 @@
 FROM quay.io/fedora/fedora:43 AS builder

-COPY container-images/scripts/build_llama_and_whisper.sh \
+COPY container-images/scripts/build_llama.sh \
     container-images/scripts/lib.sh \
     /src/
 WORKDIR /src/
-RUN ./build_llama_and_whisper.sh rocm
+RUN ./build_llama.sh rocm

 FROM quay.io/fedora/fedora:43

 RUN --mount=type=bind,from=builder,source=/tmp/install,target=/tmp/install \
-    cp -a /tmp/install/bin/llama-bench \
-          /tmp/install/bin/llama-perplexity \
-          /tmp/install/bin/llama-quantize \
-          /tmp/install/bin/llama-server \
-          /tmp/install/bin/rpc-server \
-          /tmp/install/bin/whisper-server \
-          /tmp/install/bin/*.so* \
-          /usr/bin/ && \
+    cp -a /tmp/install/bin/ /usr/ && \
    cp -a /tmp/install/lib64/*.so* /usr/lib64/

 RUN dnf -y --setopt=install_weak_deps=false install hipblas rocblas rocm-hip rocm-runtime rocsolver && \
--- a/container-images/scripts/build_llama_and_whisper.sh
+++ b/container-images/scripts/build_llama_and_whisper.sh
@@ -1,7 +1,6 @@
 #!/bin/bash

 DEFAULT_LLAMA_CPP_COMMIT="091a46cb8d43c0e662d04b80a3d11320d25b7d49" # b7815
-DEFAULT_WHISPER_COMMIT="2eeeba56e9edd762b4b38467bab96c2517163158" # v1.8.3

 dnf_install_intel_gpu() {
  local intel_rpms=("intel-oneapi-mkl-sycl-devel" "intel-oneapi-dnnl-devel"
@@ -9,13 +8,6 @@ dnf_install_intel_gpu() {
    "intel-oneapi-compiler-dpcpp-cpp" "intel-level-zero"
    "oneapi-level-zero" "oneapi-level-zero-devel" "intel-compute-runtime")
  dnf install -y "${intel_rpms[@]}"
-
-  # shellcheck disable=SC1091
-  . /opt/intel/oneapi/setvars.sh
-}
-
-dnf_remove() {
-  dnf -y clean all
 }

 dnf_install_asahi() {
@@ -74,25 +66,6 @@ dnf_install_mesa() {
  rm_non_ubi_repos
 }

-# There is no ffmpeg-free package in the openEuler repository. openEuler can use ffmpeg,
-# which also has the same GPL/LGPL license as ffmpeg-free.
-dnf_install_ffmpeg() {
-  if is_rhel_based; then
-    dnf_install_epel
-    add_stream_repo "AppStream"
-    add_stream_repo "BaseOS"
-    add_stream_repo "CRB"
-  fi
-
-  if [ "${ID}" = "openEuler" ]; then
-    dnf install -y ffmpeg
-  else
-    dnf install -y ffmpeg-free
-  fi
-
-  rm_non_ubi_repos
-}
-
 dnf_install() {
  local rpm_exclude_list="selinux-policy,container-selinux"
  local rpm_list=("python3-dnf-plugin-versionlock"
@@ -124,8 +97,6 @@ dnf_install() {
    dnf_install_cann
  fi

-  dnf_install_ffmpeg
-
  if [[ "${RAMALAMA_IMAGE_BUILD_DEBUG_MODE:-}" == y ]]; then
      dnf install -y gdb strace
  fi
@@ -134,16 +105,12 @@ dnf_install() {
 }

 cmake_check_warnings() {
-  # There has warning "CMake Warning:Manually-specified variables were not used by the project" during compile of custom ascend kernels of ggml cann backend.
-  # Should remove "cann" judge condition when this warning are fixed in llama.cpp/whisper.cpp
-  if [ "$containerfile" != "cann" ]; then
-    awk -v rc=0 '/CMake Warning:/ { rc=1 } 1; END {exit rc}'
-  else
-    awk '/CMake Warning:/ {print $0}'
-  fi
+  awk -v rc=0 '/CMake Warning:/ { rc=1 } 1; END {exit rc}'
 }

 setup_build_env() {
+  # external scripts may reference unbound variables
+  set +u
  if [ "$containerfile" = "cann" ]; then
    # source build env
    cann_in_sys_path=/usr/local/Ascend/ascend-toolkit
@@ -162,7 +129,11 @@ setup_build_env() {
      echo "No Ascend Toolkit found"
      exit 1
    fi
+  elif [ "$containerfile" = "intel-gpu" ]; then
+    # shellcheck disable=SC1091
+    source /opt/intel/oneapi/setvars.sh
  fi
+  set -u
 }

 cmake_steps() {
@@ -190,7 +161,13 @@ set_install_prefix() {
 }

 configure_common_flags() {
-  common_flags=()
+  common_flags=(
+      "-DGGML_CCACHE=OFF" "-DGGML_RPC=ON" "-DCMAKE_INSTALL_PREFIX=${install_prefix}"
+      "-DLLAMA_BUILD_TESTS=OFF" "-DLLAMA_BUILD_EXAMPLES=OFF" "-DGGML_BUILD_TESTS=OFF" "-DGGML_BUILD_EXAMPLES=OFF"
+  )
+  if [ "$containerfile" != "cann" ]; then
+      common_flags+=("-DGGML_NATIVE=OFF" "-DGGML_BACKEND_DL=ON" "-DGGML_CPU_ALL_VARIANTS=ON")
+  fi
  if [[ "${RAMALAMA_IMAGE_BUILD_DEBUG_MODE:-}" == y ]]; then
      common_flags+=("-DGGML_CMAKE_BUILD_TYPE=Debug")
  else
@@ -198,6 +175,16 @@ configure_common_flags() {
  fi

  case "$containerfile" in
+  ramalama)
+    if [ "$uname_m" = "x86_64" ] || [ "$uname_m" = "aarch64" ]; then
+      common_flags+=("-DGGML_VULKAN=ON")
+    elif [ "$uname_m" = "s390x" ] || [ "$uname_m" = "ppc64le" ]; then
+      common_flags+=("-DGGML_BLAS=ON" "-DGGML_BLAS_VENDOR=OpenBLAS")
+    fi
+    if [ "$uname_m" = "s390x" ]; then
+      common_flags+=("-DARCH_FLAGS=-march=z15")
+    fi
+    ;;
  rocm*)
    if [ "${ID}" = "fedora" ]; then
      common_flags+=("-DCMAKE_HIP_COMPILER_ROCM_ROOT=/usr")
@@ -223,28 +210,10 @@ configure_common_flags() {
  esac
 }

-clone_and_build_whisper_cpp() {
-  local whisper_cpp_commit="${WHISPER_CPP_PULL_REF:-$DEFAULT_WHISPER_COMMIT}"
-  local whisper_flags=("${common_flags[@]}")
-  whisper_flags+=("-DBUILD_SHARED_LIBS=OFF")
-  # See: https://github.com/ggml-org/llama.cpp/blob/master/docs/build.md#compilation-options
-  if [ "$containerfile" = "musa" ]; then
-    whisper_flags+=("-DCMAKE_POSITION_INDEPENDENT_CODE=ON")
-  fi
-
-  git_clone_specific_commit "${WHISPER_CPP_REPO:-https://github.com/ggerganov/whisper.cpp}" "$whisper_cpp_commit"
-  cmake_steps "${whisper_flags[@]}"
-  cd ..
-  if [[ "${RAMALAMA_IMAGE_BUILD_DEBUG_MODE:-}" != y ]]; then
-      rm -rf whisper.cpp
-  fi
-}
-
 clone_and_build_llama_cpp() {
  local llama_cpp_commit="${LLAMA_CPP_PULL_REF:-$DEFAULT_LLAMA_CPP_COMMIT}"
  git_clone_specific_commit "${LLAMA_CPP_REPO:-https://github.com/ggml-org/llama.cpp}" "$llama_cpp_commit"
  cmake_steps "${common_flags[@]}"
-  install -m 755 build/bin/rpc-server "$install_prefix"/bin/rpc-server
  cd ..
  if [[ "${RAMALAMA_IMAGE_BUILD_DEBUG_MODE:-}" != y ]]; then
      rm -rf llama.cpp
@@ -252,35 +221,15 @@ clone_and_build_llama_cpp() {
 }

 cleanup() {
-  available dnf && dnf_remove
-  rm -rf /var/cache/*dnf* /opt/rocm-*/lib/*/library/*gfx9*
+  available dnf && dnf -y clean all
  ldconfig # needed for libraries
 }

-add_common_flags() {
-  common_flags+=("-DGGML_RPC=ON" "-DLLAMA_BUILD_TESTS=OFF" "-DLLAMA_BUILD_EXAMPLES=OFF" "-DGGML_BUILD_TESTS=OFF" "-DGGML_BUILD_EXAMPLES=OFF")
-  if [ "$containerfile" != "cann" ]; then
-      common_flags+=("-DGGML_NATIVE=OFF" "-DGGML_BACKEND_DL=ON" "-DGGML_CPU_ALL_VARIANTS=ON")
-  fi
-  case "$containerfile" in
-  ramalama)
-    if [ "$uname_m" = "x86_64" ] || [ "$uname_m" = "aarch64" ]; then
-      common_flags+=("-DGGML_VULKAN=ON")
-    elif [ "$uname_m" = "s390x" ] || [ "$uname_m" = "ppc64le" ]; then
-      common_flags+=("-DGGML_BLAS=ON" "-DGGML_BLAS_VENDOR=OpenBLAS")
-    fi
-    if [ "$uname_m" = "s390x" ]; then
-      common_flags+=("-DARCH_FLAGS=-march=z15")
-    fi
-    ;;
-  esac
-}
-
 main() {
  # shellcheck disable=SC1091
  source /etc/os-release

-  set -ex -o pipefail
+  set -eux -o pipefail

  # shellcheck disable=SC1091
  source "$(dirname "$0")/lib.sh"
@@ -292,15 +241,10 @@ main() {
  uname_m="$(uname -m)"
  local common_flags
  configure_common_flags
-  common_flags+=("-DGGML_CCACHE=OFF" "-DCMAKE_INSTALL_PREFIX=${install_prefix}")
+
  available dnf && dnf_install
-
  setup_build_env
-  if [ "$uname_m" != "s390x" ]; then
-    clone_and_build_whisper_cpp
-  fi

-  add_common_flags
  clone_and_build_llama_cpp
  cleanup
 }
--- a/container_build.sh
+++ b/container_build.sh
@@ -40,10 +40,6 @@ add_build_platform() {
      # set to 'y' to include the debug tools and debug files in the image
      "RAMALAMA_IMAGE_BUILD_DEBUG_MODE"

-      # reference of a whisper.cpp repo and commit to use
-      "WHISPER_CPP_REPO"
-      "WHISPER_CPP_PULL_REF"
-
      # reference to a llama.cpp repo and commit to use
      "LLAMA_CPP_REPO"
      "LLAMA_CPP_PULL_REF"
--- a/scripts/release-image.sh
+++ b/scripts/release-image.sh
@@ -65,7 +65,6 @@ case ${1} in
    *)
 	podman run --pull=never --rm "${REPO}/$1" ls -l /usr/bin/llama-server
 	podman run --pull=never --rm "${REPO}/$1" ls -l /usr/bin/llama-run
-	podman run --pull=never --rm "${REPO}/$1" ls -l /usr/bin/whisper-server
 	podman run --pull=never --rm "${REPO}/$1"-rag rag_framework load
 	;;
 esac
--- a/scripts/release.sh
+++ b/scripts/release.sh
@@ -56,7 +56,6 @@ case ${1} in
    *)
 	podman run --pull=never --rm "${REPO}/$1" ls -l /usr/bin/llama-server
 	podman run --pull=never --rm "${REPO}/$1" ls -l /usr/bin/llama-run
-	podman run --pull=never --rm "${REPO}/$1" ls -l /usr/bin/whisper-server
 	podman run --pull=never --rm "${REPO}/$1"-rag rag_framework load

 	release "$1"
--- a/test/tmt/bats-tests.sh
+++ b/test/tmt/bats-tests.sh
@@ -16,7 +16,7 @@ pushd "$TMT_TREE"
 if [[ $1 == "docker" ]]; then
    ./container_build.sh build ramalama
 elif [[ $1 == "nocontainer" ]]; then
-    ./container-images/scripts/build_llama_and_whisper.sh
+    ./container-images/scripts/build_llama.sh
 fi
 ./.github/scripts/install-ollama.sh