From 22efcb53cb6e76f4dbb90aa737f9db5a62e64932 Mon Sep 17 00:00:00 2001 From: Oliver Walsh Date: Tue, 27 Jan 2026 16:41:24 +0000 Subject: [PATCH 1/8] Speed up quadlet/kube generate bats/e2e tests Disable image pulling when using HIP_VISIBLE_DEVICES, otherwise the large rocm image will be pulled. Skip all of the generate tests when --nocontainer is true. Signed-off-by: Oliver Walsh --- test/e2e/test_serve.py | 27 ++++++++++++++++----------- test/system/040-serve.bats | 13 +++++++++---- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/test/e2e/test_serve.py b/test/e2e/test_serve.py index 297f272f..73e6b3c4 100644 --- a/test/e2e/test_serve.py +++ b/test/e2e/test_serve.py @@ -400,17 +400,20 @@ def test_stop_failures(): @pytest.mark.e2e -def test_quadlet_generation(test_model): - with RamalamaExecWorkspace(env_vars={"HIP_VISIBLE_DEVICES": "99"}) as ctx: - test_model_full_name = get_full_model_name(test_model) - container_file = Path(ctx.workspace_dir) / f"{test_model_full_name}.container" - ctx.check_call(["ramalama", "serve", "--port", "1234", "--generate", "quadlet", test_model]) - with container_file.open("r") as f: - content = f.read() - assert re.search(r".*PublishPort=0.0.0.0:1234:1234", content) - assert re.search(r".*llama-server --host 0.0.0.0 --port 1234 --model .*", content) - assert re.search(f".*Mount=type=bind,.*{test_model_full_name}", content) - assert re.search(r".*Environment=HIP_VISIBLE_DEVICES=99", content) +def test_quadlet_generation(shared_ctx, test_model): + ctx = shared_ctx + test_model_full_name = get_full_model_name(test_model) + container_file = Path(ctx.workspace_dir) / f"{test_model_full_name}.container" + ctx.check_call( + ["ramalama", "serve", "--port", "1234", "--pull", "never", "--generate", "quadlet", test_model], + env={"HIP_VISIBLE_DEVICES": "99"}, + ) + with container_file.open("r") as f: + content = f.read() + assert re.search(r".*PublishPort=0.0.0.0:1234:1234", content) + assert re.search(r".*llama-server --host 0.0.0.0 --port 1234 --model .*", content) + assert re.search(f".*Mount=type=bind,.*{test_model_full_name}", content) + assert re.search(r".*Environment=HIP_VISIBLE_DEVICES=99", content) @pytest.mark.e2e @@ -612,6 +615,8 @@ def test_serve_kube_generation(test_model, generate, env_vars): "test", "--port", "1234", + "--pull", + "never", "--generate", generate.format(tmp_dir=ctx.workspace_dir, sep=os.sep), test_model, diff --git a/test/system/040-serve.bats b/test/system/040-serve.bats index 367a6614..cac45ced 100755 --- a/test/system/040-serve.bats +++ b/test/system/040-serve.bats @@ -201,6 +201,7 @@ verify_begin=".*run --rm" } @test "ramalama serve --generate=quadlet" { + skip_if_nocontainer model_file="smollm-135m-instruct" model_fullname="smollm-135M-instruct-v0.2-Q8_0-GGUF" model="smollm:135m" @@ -215,7 +216,7 @@ verify_begin=".*run --rm" is "$output" ".*Exec=.*llama-server --host 0.0.0.0 --port 1234 --model .*" "Exec line should be correct" is "$output" ".*Mount=type=bind,.*$model_file" "Mount line should be correct" - HIP_VISIBLE_DEVICES=99 run_ramalama -q serve --port 1234 --generate=quadlet $model + HIP_VISIBLE_DEVICES=99 run_ramalama -q serve --port 1234 --pull never --generate=quadlet $model is "$output" "Generating quadlet file: $quadlet" "generate $quadlet" run cat $quadlet @@ -392,6 +393,7 @@ verify_begin=".*run --rm" # } @test "ramalama serve --generate=kube" { + skip_if_nocontainer model="smollm:135m" name=c_$(safename) run_ramalama pull $model @@ -402,7 +404,7 @@ verify_begin=".*run --rm" is "$output" ".*command: \[\".*serve.*\"\]" "Should command" is "$output" ".*containerPort: 1234" "Should container container port" - HIP_VISIBLE_DEVICES=99 run_ramalama serve --name=${name} --port 1234 --generate=kube $model + HIP_VISIBLE_DEVICES=99 run_ramalama serve --name=${name} --port 1234 --pull never --generate=kube $model is "$output" ".*Generating Kubernetes YAML file: ${name}.yaml" "generate .yaml file" run cat $name.yaml @@ -418,7 +420,7 @@ verify_begin=".*run --rm" is "$output" ".*command: \[\".*serve.*\"\]" "Should command" is "$output" ".*containerPort: 1234" "Should container container port" - HIP_VISIBLE_DEVICES=99 run_ramalama serve --name=${name} --port 1234 --generate=quadlet/kube $model + HIP_VISIBLE_DEVICES=99 run_ramalama serve --name=${name} --port 1234 --pull never --generate=quadlet/kube $model is "$output" ".*Generating Kubernetes YAML file: ${name}.yaml" "generate .yaml file" run cat $name.yaml @@ -433,6 +435,7 @@ verify_begin=".*run --rm" } @test "ramalama serve --generate=kube:/tmp" { + skip_if_nocontainer model=tiny name=c_$(safename) run_ramalama pull ${model} @@ -447,6 +450,7 @@ verify_begin=".*run --rm" } @test "ramalama serve --generate=compose" { + skip_if_nocontainer model="smollm:135m" name="docker-compose" run_ramalama pull $model @@ -458,7 +462,7 @@ verify_begin=".*run --rm" is "$output" ".*ports:" "Should contain ports section" is "$output" ".*- \"1234:1234\"" "Should map the container port" - HIP_VISIBLE_DEVICES=99 run_ramalama serve --name=${name} --port 1234 --generate=compose $model + HIP_VISIBLE_DEVICES=99 run_ramalama serve --name=${name} --port 1234 --pull never --generate=compose $model is "$output" ".*Generating Compose YAML file: ${name}.yaml" "generate .yaml file" run cat $name.yaml @@ -469,6 +473,7 @@ verify_begin=".*run --rm" } @test "ramalama serve --generate=compose:/tmp" { + skip_if_nocontainer model=tiny name="docker-compose" run_ramalama pull ${model} From 49ccb6b3441e2a0c3b7101820d390aee437edea4 Mon Sep 17 00:00:00 2001 From: Oliver Walsh Date: Wed, 28 Jan 2026 09:28:33 +0000 Subject: [PATCH 2/8] Fix unreliable assert in test_pull_using_ollama_cache Just check the command output reports that it's pulling from cache. Pull/pull-from-cache timing will vary depending on network/disk, so not safe to assume pulling from disk will be twice as fast as network. Signed-off-by: Oliver Walsh --- test/e2e/test_pull.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/test/e2e/test_pull.py b/test/e2e/test_pull.py index 70053c5a..357e7ee3 100644 --- a/test/e2e/test_pull.py +++ b/test/e2e/test_pull.py @@ -13,7 +13,6 @@ from test.conftest import ( skip_if_no_ollama, ) from test.e2e.utils import RamalamaExecWorkspace -from time import time import pytest @@ -261,24 +260,17 @@ def test_pull_using_ollama_cache(ollama_server, ollama_model, model, env_vars, e ctx.environ["OLLAMA_MODELS"] = str(ollama_server.models_dir) # Pull image using ollama server and ollama cli - ollama_pull_start_time = time() ollama_server.pull_model(ollama_model) - ollama_pull_end_time = time() - ollama_pull_time = ollama_pull_end_time - ollama_pull_start_time # Pull image using ramalama cli - ramalama_pull_start_time = time() - ctx.check_call(ramalama_cli + ["pull", model]) - ramalama_pull_end_time = time() - ramalama_pull_time = ramalama_pull_end_time - ramalama_pull_start_time + pull_output = ctx.check_output(ramalama_cli + ["pull", model], stderr=STDOUT) + + assert 'Using cached ollama:' in pull_output # Check if the model pull is the expected model_list = json.loads(ctx.check_output(ramalama_cli + ["list", "--json", "--sort", "modified"])) assert model_list[0]["name"] == expected - # Compare the ollama pull time with the ramalama cached pull time - assert (ollama_pull_time / 2) > ramalama_pull_time - @pytest.mark.e2e @pytest.mark.distro_integration From 0a9095df830c7c8fda9268d3765d7864ba822e6b Mon Sep 17 00:00:00 2001 From: Oliver Walsh Date: Wed, 28 Jan 2026 10:32:53 +0000 Subject: [PATCH 3/8] Enable pytest duration reporting Signed-off-by: Oliver Walsh --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5db7485a..e16ca014 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -169,7 +169,7 @@ lcov.output = "coverage/coverage.lcov" [tool.pytest.ini_options] testpaths = ["test"] markers = ["e2e", "distro_integration"] -addopts = "-m 'not e2e' --color=yes" +addopts = "-m 'not e2e' --color=yes --durations=10" [tool.setuptools.packages.find] include = ["ramalama", "ramalama.*"] @@ -219,6 +219,7 @@ commands = [ "pytest", "-m", "e2e", "-vvv", + "--durations=10", "--basetemp={envtmpdir}", "--tb=short", { replace = "posargs", extend = true }, From 6887dbab789fd64af73bf3e3642afb233be362b5 Mon Sep 17 00:00:00 2001 From: Oliver Walsh Date: Wed, 28 Jan 2026 11:15:39 +0000 Subject: [PATCH 4/8] Remove bats tests from github ci workflow Signed-off-by: Oliver Walsh --- .github/workflows/ci.yml | 169 --------------------------------------- 1 file changed, 169 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e52cdafc..c302f203 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -98,175 +98,6 @@ jobs: run: | uv run -- make unit-tests - bats: - runs-on: ubuntu-24.04 - timeout-minutes: 60 - steps: - - uses: actions/checkout@v6 - - - name: Install the latest version of uv - uses: astral-sh/setup-uv@v7 - - - name: install bats - shell: bash - run: | - df -h - # /mnt has ~ 65 GB free disk space. / is too small. - sudo mkdir -m a=rwx -p /mnt/tmp /mnt/runner - sudo mkdir -m o=rwx -p /home/runner/.local - sudo chown runner:runner /mnt/runner /home/runner/.local - sudo mount --bind /mnt/runner /home/runner/.local - sudo apt-get update - sudo apt-get install podman bats bash codespell - uv run -- make install-requirements - - - name: install ollama - shell: bash - run: ./.github/scripts/install-ollama.sh - - - name: Upgrade to podman 5 - run: | - set -e - # Enable universe repository which contains podman - sudo add-apt-repository "deb http://old-releases.ubuntu.com/ubuntu oracular universe" - # Update package lists - sudo apt-get update - sudo apt-get purge firefox - # Install specific podman version - sudo apt-get upgrade -y podman crun - - - name: run bats - run: | - TEMPDIR=/mnt/tmp - uv run -- make bats - - bats-nocontainer: - runs-on: ubuntu-24.04 - timeout-minutes: 60 - steps: - - uses: actions/checkout@v6 - - - name: Install the latest version of uv - uses: astral-sh/setup-uv@v7 - - - name: install bats - shell: bash - run: | - df -h - sudo apt-get update - sudo apt-get install podman bats bash codespell git cmake libcurl4-openssl-dev - sudo ./container-images/scripts/build_llama.sh - uv run -- make install-requirements - - - name: install ollama - shell: bash - run: ./.github/scripts/install-ollama.sh - - - name: Upgrade to podman 5 - run: | - set -e - # Enable universe repository which contains podman - sudo add-apt-repository "deb http://old-releases.ubuntu.com/ubuntu oracular universe" - # Update package lists - sudo apt-get update - sudo apt-get purge firefox - # Install specific podman version - sudo apt-get upgrade -y podman crun - - - name: run bats-nocontainer - run: | - uv run -- make bats-nocontainer - - docker: - runs-on: ubuntu-24.04 - timeout-minutes: 60 - steps: - - uses: actions/checkout@v6 - - - name: Install the latest version of uv - uses: astral-sh/setup-uv@v7 - - - name: install bats - shell: bash - run: | - sudo apt-get update - sudo apt-get install bats bash codespell - uv run -- make install-requirements - - - name: install ollama - shell: bash - run: ./.github/scripts/install-ollama.sh - - - name: Upgrade to podman 5 - run: | - set -e - # Enable universe repository which contains podman - sudo add-apt-repository "deb http://old-releases.ubuntu.com/ubuntu oracular universe" - # Update package lists - sudo apt-get update - sudo apt-get purge firefox - # Install specific podman version - sudo apt-get upgrade -y podman crun - - - name: Free Disk Space Linux - shell: bash - run: | - sudo docker rmi "$(docker image ls -aq)" >/dev/null 2>&1 || true - sudo rm -rf \ - /usr/share/dotnet /usr/local/lib/android /opt/ghc \ - /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup \ - /usr/lib/jvm || true - - # /mnt has ~ 65 GB free disk space. / is too small. - - name: Reconfigure Docker data-root - run: | - sudo mkdir -p /mnt/docker /etc/docker - echo '{"data-root": "/mnt/docker"}' > /tmp/daemon.json - sudo mv /tmp/daemon.json /etc/docker/daemon.json - cat /etc/docker/daemon.json - sudo systemctl restart docker.service - sudo mkdir -m a=rwx -p /mnt/tmp /mnt/runner - sudo mkdir -m o=rwx -p /home/runner/.local - sudo chown runner:runner /mnt/runner /home/runner/.local - sudo mount --bind /mnt/runner /home/runner/.local - df -h - - - name: run bats-docker - run: | - docker info - uv run -- make bats-docker - - macos: - runs-on: macos-15 - timeout-minutes: 60 - steps: - - uses: actions/checkout@v6 - - - name: Install the latest version of uv - uses: astral-sh/setup-uv@v7 - with: - python-version: '<3.14' - - - name: install mlx-lm - shell: bash - run: | - uv tool install mlx-lm - - - name: install golang - shell: bash - run: | - brew install go bats bash jq llama.cpp shellcheck coreutils - uv run -- make install-requirements - - - name: install ollama - shell: bash - run: ./.github/scripts/install-ollama.sh - - - name: run bats - shell: bash - run: | - uv run -- make bats-nocontainer - e2e-tests: strategy: fail-fast: false From 8efa71623d4dc22e7e16f3a1b089d07e33f5d256 Mon Sep 17 00:00:00 2001 From: Oliver Walsh Date: Wed, 28 Jan 2026 15:06:30 +0000 Subject: [PATCH 5/8] Skip e2e tests if lint or unit-tests fail Signed-off-by: Oliver Walsh --- .github/workflows/ci.yml | 64 +++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 23 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c302f203..2de2f8a9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,7 +37,33 @@ jobs: run: | uv run -- make type-check + unit-test: + strategy: + fail-fast: false + matrix: + variant: + - title: "" + uv-options: {} + - title: " (python 3.10)" + uv-options: + python-version: '3.10' + name: Unit Tests${{ matrix.variant.title }} + runs-on: ubuntu-24.04 + timeout-minutes: 20 + steps: + - uses: actions/checkout@v6 + - name: Install the latest version of uv + uses: astral-sh/setup-uv@v7 + with: ${{ matrix.variant.uv-options }} + + - name: Run unit tests + run: | + uv run -- make unit-tests + build-image: + needs: + - lint + - unit-test runs-on: ubuntu-24.04 timeout-minutes: 60 steps: @@ -75,30 +101,10 @@ jobs: run: | ./container_build.sh build -s ramalama - unit-test: - strategy: - fail-fast: false - matrix: - variant: - - title: "" - uv-options: {} - - title: " (python 3.10)" - uv-options: - python-version: '3.10' - name: Unit Tests${{ matrix.variant.title }} - runs-on: ubuntu-24.04 - timeout-minutes: 20 - steps: - - uses: actions/checkout@v6 - - name: Install the latest version of uv - uses: astral-sh/setup-uv@v7 - with: ${{ matrix.variant.uv-options }} - - - name: Run unit tests - run: | - uv run -- make unit-tests - e2e-tests: + needs: + - lint + - unit-test strategy: fail-fast: false matrix: @@ -154,6 +160,9 @@ jobs: make e2e-tests e2e-tests-nocontainer: + needs: + - lint + - unit-test strategy: fail-fast: false matrix: @@ -205,6 +214,9 @@ jobs: make e2e-tests-nocontainer e2e-tests-docker: + needs: + - lint + - unit-test strategy: fail-fast: false matrix: @@ -278,6 +290,9 @@ jobs: make e2e-tests-docker e2e-tests-macos: + needs: + - lint + - unit-test strategy: fail-fast: false matrix: @@ -316,6 +331,9 @@ jobs: make e2e-tests-nocontainer e2e-tests-windows: + needs: + - lint + - unit-test strategy: fail-fast: false matrix: From d1fb7d35c80068890b3d9d7cbf689e447876dd8b Mon Sep 17 00:00:00 2001 From: Oliver Walsh Date: Wed, 28 Jan 2026 15:18:29 +0000 Subject: [PATCH 6/8] Serialize ci jobs for each branch and cancel running ci if PR is updated Signed-off-by: Oliver Walsh --- .github/workflows/ci.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2de2f8a9..253336e7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,6 +6,10 @@ on: branches: - main +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} + jobs: lint: name: Lint Code From 70a3be3d4c69babe8d01276cdba6e83c9d8f8276 Mon Sep 17 00:00:00 2001 From: Oliver Walsh Date: Wed, 28 Jan 2026 16:52:32 +0000 Subject: [PATCH 7/8] Drop test_serve_api from e2e tests Signed-off-by: Oliver Walsh --- test/e2e/test_serve.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/e2e/test_serve.py b/test/e2e/test_serve.py index 73e6b3c4..497d7964 100644 --- a/test/e2e/test_serve.py +++ b/test/e2e/test_serve.py @@ -705,7 +705,6 @@ def test_kube_generation_with_llama_api(test_model): assert re.search(r".*/llama-stack", content) -@pytest.mark.e2e @skip_if_docker @skip_if_no_container @skip_if_ppc64le From 894adfe1135b5258dae046a282c39eb0fb9d5bb7 Mon Sep 17 00:00:00 2001 From: Oliver Walsh Date: Wed, 28 Jan 2026 21:15:40 +0000 Subject: [PATCH 8/8] Drop build-image job from ci Signed-off-by: Oliver Walsh --- .github/workflows/ci.yml | 41 ---------------------------------------- 1 file changed, 41 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 253336e7..aabe4d69 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -64,47 +64,6 @@ jobs: run: | uv run -- make unit-tests - build-image: - needs: - - lint - - unit-test - runs-on: ubuntu-24.04 - timeout-minutes: 60 - steps: - - uses: actions/checkout@v6 - - - name: Install the latest version of uv - uses: astral-sh/setup-uv@v7 - - - name: Install dependencies - shell: bash - run: | - df -h - sudo apt-get update - sudo apt-get install -y bash codespell pipx podman - uv run -- make install-requirements - - - name: Upgrade to podman 5 - run: | - set -e - # /mnt has ~ 65 GB free disk space. / is too small. - sudo mkdir -m a=rwx -p /mnt/tmp /mnt/runner - sudo mkdir -m o=rwx -p /home/runner/.local - sudo chown runner:runner /mnt/runner /home/runner/.local - sudo mount --bind /mnt/runner /home/runner/.local - # Enable universe repository which contains podman - sudo add-apt-repository "deb http://old-releases.ubuntu.com/ubuntu oracular universe" - # Update package lists - sudo apt-get update - sudo apt-get purge firefox - # Install specific podman version - sudo apt-get upgrade -y podman crun - - - name: Build a container for CPU inferencing - shell: bash - run: | - ./container_build.sh build -s ramalama - e2e-tests: needs: - lint