mirror of
https://github.com/containers/ramalama.git
synced 2026-02-05 06:46:39 +01:00
Merge pull request #2358 from olliewalsh/ci_fixes
Reduce CI load and fix unreliable tests
This commit is contained in:
226
.github/workflows/ci.yml
vendored
226
.github/workflows/ci.yml
vendored
@@ -6,6 +6,10 @@ on:
|
||||
branches:
|
||||
- main
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
name: Lint Code
|
||||
@@ -37,44 +41,6 @@ jobs:
|
||||
run: |
|
||||
uv run -- make type-check
|
||||
|
||||
build-image:
|
||||
runs-on: ubuntu-24.04
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Install the latest version of uv
|
||||
uses: astral-sh/setup-uv@v7
|
||||
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
df -h
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y bash codespell pipx podman
|
||||
uv run -- make install-requirements
|
||||
|
||||
- name: Upgrade to podman 5
|
||||
run: |
|
||||
set -e
|
||||
# /mnt has ~ 65 GB free disk space. / is too small.
|
||||
sudo mkdir -m a=rwx -p /mnt/tmp /mnt/runner
|
||||
sudo mkdir -m o=rwx -p /home/runner/.local
|
||||
sudo chown runner:runner /mnt/runner /home/runner/.local
|
||||
sudo mount --bind /mnt/runner /home/runner/.local
|
||||
# Enable universe repository which contains podman
|
||||
sudo add-apt-repository "deb http://old-releases.ubuntu.com/ubuntu oracular universe"
|
||||
# Update package lists
|
||||
sudo apt-get update
|
||||
sudo apt-get purge firefox
|
||||
# Install specific podman version
|
||||
sudo apt-get upgrade -y podman crun
|
||||
|
||||
- name: Build a container for CPU inferencing
|
||||
shell: bash
|
||||
run: |
|
||||
./container_build.sh build -s ramalama
|
||||
|
||||
unit-test:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@@ -98,176 +64,10 @@ jobs:
|
||||
run: |
|
||||
uv run -- make unit-tests
|
||||
|
||||
bats:
|
||||
runs-on: ubuntu-24.04
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Install the latest version of uv
|
||||
uses: astral-sh/setup-uv@v7
|
||||
|
||||
- name: install bats
|
||||
shell: bash
|
||||
run: |
|
||||
df -h
|
||||
# /mnt has ~ 65 GB free disk space. / is too small.
|
||||
sudo mkdir -m a=rwx -p /mnt/tmp /mnt/runner
|
||||
sudo mkdir -m o=rwx -p /home/runner/.local
|
||||
sudo chown runner:runner /mnt/runner /home/runner/.local
|
||||
sudo mount --bind /mnt/runner /home/runner/.local
|
||||
sudo apt-get update
|
||||
sudo apt-get install podman bats bash codespell
|
||||
uv run -- make install-requirements
|
||||
|
||||
- name: install ollama
|
||||
shell: bash
|
||||
run: ./.github/scripts/install-ollama.sh
|
||||
|
||||
- name: Upgrade to podman 5
|
||||
run: |
|
||||
set -e
|
||||
# Enable universe repository which contains podman
|
||||
sudo add-apt-repository "deb http://old-releases.ubuntu.com/ubuntu oracular universe"
|
||||
# Update package lists
|
||||
sudo apt-get update
|
||||
sudo apt-get purge firefox
|
||||
# Install specific podman version
|
||||
sudo apt-get upgrade -y podman crun
|
||||
|
||||
- name: run bats
|
||||
run: |
|
||||
TEMPDIR=/mnt/tmp
|
||||
uv run -- make bats
|
||||
|
||||
bats-nocontainer:
|
||||
runs-on: ubuntu-24.04
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Install the latest version of uv
|
||||
uses: astral-sh/setup-uv@v7
|
||||
|
||||
- name: install bats
|
||||
shell: bash
|
||||
run: |
|
||||
df -h
|
||||
sudo apt-get update
|
||||
sudo apt-get install podman bats bash codespell git cmake libcurl4-openssl-dev
|
||||
sudo ./container-images/scripts/build_llama.sh
|
||||
uv run -- make install-requirements
|
||||
|
||||
- name: install ollama
|
||||
shell: bash
|
||||
run: ./.github/scripts/install-ollama.sh
|
||||
|
||||
- name: Upgrade to podman 5
|
||||
run: |
|
||||
set -e
|
||||
# Enable universe repository which contains podman
|
||||
sudo add-apt-repository "deb http://old-releases.ubuntu.com/ubuntu oracular universe"
|
||||
# Update package lists
|
||||
sudo apt-get update
|
||||
sudo apt-get purge firefox
|
||||
# Install specific podman version
|
||||
sudo apt-get upgrade -y podman crun
|
||||
|
||||
- name: run bats-nocontainer
|
||||
run: |
|
||||
uv run -- make bats-nocontainer
|
||||
|
||||
docker:
|
||||
runs-on: ubuntu-24.04
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Install the latest version of uv
|
||||
uses: astral-sh/setup-uv@v7
|
||||
|
||||
- name: install bats
|
||||
shell: bash
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install bats bash codespell
|
||||
uv run -- make install-requirements
|
||||
|
||||
- name: install ollama
|
||||
shell: bash
|
||||
run: ./.github/scripts/install-ollama.sh
|
||||
|
||||
- name: Upgrade to podman 5
|
||||
run: |
|
||||
set -e
|
||||
# Enable universe repository which contains podman
|
||||
sudo add-apt-repository "deb http://old-releases.ubuntu.com/ubuntu oracular universe"
|
||||
# Update package lists
|
||||
sudo apt-get update
|
||||
sudo apt-get purge firefox
|
||||
# Install specific podman version
|
||||
sudo apt-get upgrade -y podman crun
|
||||
|
||||
- name: Free Disk Space Linux
|
||||
shell: bash
|
||||
run: |
|
||||
sudo docker rmi "$(docker image ls -aq)" >/dev/null 2>&1 || true
|
||||
sudo rm -rf \
|
||||
/usr/share/dotnet /usr/local/lib/android /opt/ghc \
|
||||
/usr/local/share/powershell /usr/share/swift /usr/local/.ghcup \
|
||||
/usr/lib/jvm || true
|
||||
|
||||
# /mnt has ~ 65 GB free disk space. / is too small.
|
||||
- name: Reconfigure Docker data-root
|
||||
run: |
|
||||
sudo mkdir -p /mnt/docker /etc/docker
|
||||
echo '{"data-root": "/mnt/docker"}' > /tmp/daemon.json
|
||||
sudo mv /tmp/daemon.json /etc/docker/daemon.json
|
||||
cat /etc/docker/daemon.json
|
||||
sudo systemctl restart docker.service
|
||||
sudo mkdir -m a=rwx -p /mnt/tmp /mnt/runner
|
||||
sudo mkdir -m o=rwx -p /home/runner/.local
|
||||
sudo chown runner:runner /mnt/runner /home/runner/.local
|
||||
sudo mount --bind /mnt/runner /home/runner/.local
|
||||
df -h
|
||||
|
||||
- name: run bats-docker
|
||||
run: |
|
||||
docker info
|
||||
uv run -- make bats-docker
|
||||
|
||||
macos:
|
||||
runs-on: macos-15
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Install the latest version of uv
|
||||
uses: astral-sh/setup-uv@v7
|
||||
with:
|
||||
python-version: '<3.14'
|
||||
|
||||
- name: install mlx-lm
|
||||
shell: bash
|
||||
run: |
|
||||
uv tool install mlx-lm
|
||||
|
||||
- name: install golang
|
||||
shell: bash
|
||||
run: |
|
||||
brew install go bats bash jq llama.cpp shellcheck coreutils
|
||||
uv run -- make install-requirements
|
||||
|
||||
- name: install ollama
|
||||
shell: bash
|
||||
run: ./.github/scripts/install-ollama.sh
|
||||
|
||||
- name: run bats
|
||||
shell: bash
|
||||
run: |
|
||||
uv run -- make bats-nocontainer
|
||||
|
||||
e2e-tests:
|
||||
needs:
|
||||
- lint
|
||||
- unit-test
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -323,6 +123,9 @@ jobs:
|
||||
make e2e-tests
|
||||
|
||||
e2e-tests-nocontainer:
|
||||
needs:
|
||||
- lint
|
||||
- unit-test
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -374,6 +177,9 @@ jobs:
|
||||
make e2e-tests-nocontainer
|
||||
|
||||
e2e-tests-docker:
|
||||
needs:
|
||||
- lint
|
||||
- unit-test
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -447,6 +253,9 @@ jobs:
|
||||
make e2e-tests-docker
|
||||
|
||||
e2e-tests-macos:
|
||||
needs:
|
||||
- lint
|
||||
- unit-test
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -485,6 +294,9 @@ jobs:
|
||||
make e2e-tests-nocontainer
|
||||
|
||||
e2e-tests-windows:
|
||||
needs:
|
||||
- lint
|
||||
- unit-test
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
|
||||
@@ -169,7 +169,7 @@ lcov.output = "coverage/coverage.lcov"
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["test"]
|
||||
markers = ["e2e", "distro_integration"]
|
||||
addopts = "-m 'not e2e' --color=yes"
|
||||
addopts = "-m 'not e2e' --color=yes --durations=10"
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
include = ["ramalama", "ramalama.*"]
|
||||
@@ -219,6 +219,7 @@ commands = [
|
||||
"pytest",
|
||||
"-m", "e2e",
|
||||
"-vvv",
|
||||
"--durations=10",
|
||||
"--basetemp={envtmpdir}",
|
||||
"--tb=short",
|
||||
{ replace = "posargs", extend = true },
|
||||
|
||||
@@ -13,7 +13,6 @@ from test.conftest import (
|
||||
skip_if_no_ollama,
|
||||
)
|
||||
from test.e2e.utils import RamalamaExecWorkspace
|
||||
from time import time
|
||||
|
||||
import pytest
|
||||
|
||||
@@ -261,24 +260,17 @@ def test_pull_using_ollama_cache(ollama_server, ollama_model, model, env_vars, e
|
||||
ctx.environ["OLLAMA_MODELS"] = str(ollama_server.models_dir)
|
||||
|
||||
# Pull image using ollama server and ollama cli
|
||||
ollama_pull_start_time = time()
|
||||
ollama_server.pull_model(ollama_model)
|
||||
ollama_pull_end_time = time()
|
||||
ollama_pull_time = ollama_pull_end_time - ollama_pull_start_time
|
||||
|
||||
# Pull image using ramalama cli
|
||||
ramalama_pull_start_time = time()
|
||||
ctx.check_call(ramalama_cli + ["pull", model])
|
||||
ramalama_pull_end_time = time()
|
||||
ramalama_pull_time = ramalama_pull_end_time - ramalama_pull_start_time
|
||||
pull_output = ctx.check_output(ramalama_cli + ["pull", model], stderr=STDOUT)
|
||||
|
||||
assert 'Using cached ollama:' in pull_output
|
||||
|
||||
# Check if the model pull is the expected
|
||||
model_list = json.loads(ctx.check_output(ramalama_cli + ["list", "--json", "--sort", "modified"]))
|
||||
assert model_list[0]["name"] == expected
|
||||
|
||||
# Compare the ollama pull time with the ramalama cached pull time
|
||||
assert (ollama_pull_time / 2) > ramalama_pull_time
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
@pytest.mark.distro_integration
|
||||
|
||||
@@ -400,17 +400,20 @@ def test_stop_failures():
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
def test_quadlet_generation(test_model):
|
||||
with RamalamaExecWorkspace(env_vars={"HIP_VISIBLE_DEVICES": "99"}) as ctx:
|
||||
test_model_full_name = get_full_model_name(test_model)
|
||||
container_file = Path(ctx.workspace_dir) / f"{test_model_full_name}.container"
|
||||
ctx.check_call(["ramalama", "serve", "--port", "1234", "--generate", "quadlet", test_model])
|
||||
with container_file.open("r") as f:
|
||||
content = f.read()
|
||||
assert re.search(r".*PublishPort=0.0.0.0:1234:1234", content)
|
||||
assert re.search(r".*llama-server --host 0.0.0.0 --port 1234 --model .*", content)
|
||||
assert re.search(f".*Mount=type=bind,.*{test_model_full_name}", content)
|
||||
assert re.search(r".*Environment=HIP_VISIBLE_DEVICES=99", content)
|
||||
def test_quadlet_generation(shared_ctx, test_model):
|
||||
ctx = shared_ctx
|
||||
test_model_full_name = get_full_model_name(test_model)
|
||||
container_file = Path(ctx.workspace_dir) / f"{test_model_full_name}.container"
|
||||
ctx.check_call(
|
||||
["ramalama", "serve", "--port", "1234", "--pull", "never", "--generate", "quadlet", test_model],
|
||||
env={"HIP_VISIBLE_DEVICES": "99"},
|
||||
)
|
||||
with container_file.open("r") as f:
|
||||
content = f.read()
|
||||
assert re.search(r".*PublishPort=0.0.0.0:1234:1234", content)
|
||||
assert re.search(r".*llama-server --host 0.0.0.0 --port 1234 --model .*", content)
|
||||
assert re.search(f".*Mount=type=bind,.*{test_model_full_name}", content)
|
||||
assert re.search(r".*Environment=HIP_VISIBLE_DEVICES=99", content)
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
@@ -612,6 +615,8 @@ def test_serve_kube_generation(test_model, generate, env_vars):
|
||||
"test",
|
||||
"--port",
|
||||
"1234",
|
||||
"--pull",
|
||||
"never",
|
||||
"--generate",
|
||||
generate.format(tmp_dir=ctx.workspace_dir, sep=os.sep),
|
||||
test_model,
|
||||
@@ -700,7 +705,6 @@ def test_kube_generation_with_llama_api(test_model):
|
||||
assert re.search(r".*/llama-stack", content)
|
||||
|
||||
|
||||
@pytest.mark.e2e
|
||||
@skip_if_docker
|
||||
@skip_if_no_container
|
||||
@skip_if_ppc64le
|
||||
|
||||
@@ -201,6 +201,7 @@ verify_begin=".*run --rm"
|
||||
}
|
||||
|
||||
@test "ramalama serve --generate=quadlet" {
|
||||
skip_if_nocontainer
|
||||
model_file="smollm-135m-instruct"
|
||||
model_fullname="smollm-135M-instruct-v0.2-Q8_0-GGUF"
|
||||
model="smollm:135m"
|
||||
@@ -215,7 +216,7 @@ verify_begin=".*run --rm"
|
||||
is "$output" ".*Exec=.*llama-server --host 0.0.0.0 --port 1234 --model .*" "Exec line should be correct"
|
||||
is "$output" ".*Mount=type=bind,.*$model_file" "Mount line should be correct"
|
||||
|
||||
HIP_VISIBLE_DEVICES=99 run_ramalama -q serve --port 1234 --generate=quadlet $model
|
||||
HIP_VISIBLE_DEVICES=99 run_ramalama -q serve --port 1234 --pull never --generate=quadlet $model
|
||||
is "$output" "Generating quadlet file: $quadlet" "generate $quadlet"
|
||||
|
||||
run cat $quadlet
|
||||
@@ -392,6 +393,7 @@ verify_begin=".*run --rm"
|
||||
# }
|
||||
|
||||
@test "ramalama serve --generate=kube" {
|
||||
skip_if_nocontainer
|
||||
model="smollm:135m"
|
||||
name=c_$(safename)
|
||||
run_ramalama pull $model
|
||||
@@ -402,7 +404,7 @@ verify_begin=".*run --rm"
|
||||
is "$output" ".*command: \[\".*serve.*\"\]" "Should command"
|
||||
is "$output" ".*containerPort: 1234" "Should container container port"
|
||||
|
||||
HIP_VISIBLE_DEVICES=99 run_ramalama serve --name=${name} --port 1234 --generate=kube $model
|
||||
HIP_VISIBLE_DEVICES=99 run_ramalama serve --name=${name} --port 1234 --pull never --generate=kube $model
|
||||
is "$output" ".*Generating Kubernetes YAML file: ${name}.yaml" "generate .yaml file"
|
||||
|
||||
run cat $name.yaml
|
||||
@@ -418,7 +420,7 @@ verify_begin=".*run --rm"
|
||||
is "$output" ".*command: \[\".*serve.*\"\]" "Should command"
|
||||
is "$output" ".*containerPort: 1234" "Should container container port"
|
||||
|
||||
HIP_VISIBLE_DEVICES=99 run_ramalama serve --name=${name} --port 1234 --generate=quadlet/kube $model
|
||||
HIP_VISIBLE_DEVICES=99 run_ramalama serve --name=${name} --port 1234 --pull never --generate=quadlet/kube $model
|
||||
is "$output" ".*Generating Kubernetes YAML file: ${name}.yaml" "generate .yaml file"
|
||||
|
||||
run cat $name.yaml
|
||||
@@ -433,6 +435,7 @@ verify_begin=".*run --rm"
|
||||
}
|
||||
|
||||
@test "ramalama serve --generate=kube:/tmp" {
|
||||
skip_if_nocontainer
|
||||
model=tiny
|
||||
name=c_$(safename)
|
||||
run_ramalama pull ${model}
|
||||
@@ -447,6 +450,7 @@ verify_begin=".*run --rm"
|
||||
}
|
||||
|
||||
@test "ramalama serve --generate=compose" {
|
||||
skip_if_nocontainer
|
||||
model="smollm:135m"
|
||||
name="docker-compose"
|
||||
run_ramalama pull $model
|
||||
@@ -458,7 +462,7 @@ verify_begin=".*run --rm"
|
||||
is "$output" ".*ports:" "Should contain ports section"
|
||||
is "$output" ".*- \"1234:1234\"" "Should map the container port"
|
||||
|
||||
HIP_VISIBLE_DEVICES=99 run_ramalama serve --name=${name} --port 1234 --generate=compose $model
|
||||
HIP_VISIBLE_DEVICES=99 run_ramalama serve --name=${name} --port 1234 --pull never --generate=compose $model
|
||||
is "$output" ".*Generating Compose YAML file: ${name}.yaml" "generate .yaml file"
|
||||
|
||||
run cat $name.yaml
|
||||
@@ -469,6 +473,7 @@ verify_begin=".*run --rm"
|
||||
}
|
||||
|
||||
@test "ramalama serve --generate=compose:/tmp" {
|
||||
skip_if_nocontainer
|
||||
model=tiny
|
||||
name="docker-compose"
|
||||
run_ramalama pull ${model}
|
||||
|
||||
Reference in New Issue
Block a user