mirror of
https://github.com/containers/ramalama.git
synced 2026-02-05 06:46:39 +01:00
834 lines
33 KiB
Python
834 lines
33 KiB
Python
import itertools
|
|
import json
|
|
import logging
|
|
import os
|
|
import platform
|
|
import random
|
|
import re
|
|
import string
|
|
import time
|
|
from contextlib import contextmanager
|
|
from pathlib import Path
|
|
from subprocess import STDOUT, CalledProcessError
|
|
from test.conftest import (
|
|
skip_if_container,
|
|
skip_if_darwin,
|
|
skip_if_docker,
|
|
skip_if_gh_actions_darwin,
|
|
skip_if_no_container,
|
|
skip_if_not_darwin,
|
|
skip_if_ppc64le,
|
|
skip_if_s390x,
|
|
)
|
|
from test.e2e.utils import RamalamaExecWorkspace, check_output, get_full_model_name
|
|
|
|
import pytest
|
|
import yaml
|
|
|
|
|
|
@contextmanager
|
|
def chdir(path):
|
|
old_dir = os.getcwd()
|
|
os.chdir(path)
|
|
try:
|
|
yield
|
|
finally:
|
|
os.chdir(old_dir)
|
|
|
|
|
|
DRY_RUN_TEST_MODEL = "dry_run_model"
|
|
RAMALAMA_DRY_RUN = ["ramalama", "-q", "--dryrun", "serve"]
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def shared_ctx(test_model):
|
|
config = """
|
|
[ramalama]
|
|
store="{workspace_dir}/.local/share/ramalama"
|
|
"""
|
|
with RamalamaExecWorkspace(config=config) as ctx:
|
|
ctx.check_call(["ramalama", "-q", "pull", test_model])
|
|
yield ctx
|
|
|
|
|
|
@pytest.mark.e2e
|
|
@skip_if_no_container
|
|
def test_basic_dry_run():
|
|
ramalama_info = json.loads(check_output(["ramalama", "info"]))
|
|
conman = ramalama_info["Engine"]["Name"]
|
|
|
|
result = check_output(RAMALAMA_DRY_RUN + [DRY_RUN_TEST_MODEL])
|
|
assert result.startswith(f"{conman} run --rm")
|
|
assert not re.search(r".*-t -i", result), "run without terminal"
|
|
|
|
|
|
@pytest.mark.e2e
|
|
@pytest.mark.parametrize(
|
|
"extra_params, pattern, config, env_vars, expected",
|
|
[
|
|
# fmt: off
|
|
pytest.param(
|
|
[], f".*{DRY_RUN_TEST_MODEL}.*", None, None, True,
|
|
id="check model name", marks=skip_if_no_container
|
|
),
|
|
pytest.param(
|
|
[], r".*--name ramalama-.", None, None, True,
|
|
id="check default --name flag", marks=skip_if_no_container
|
|
),
|
|
pytest.param(
|
|
[], r".*--cache-reuse 256", None, None, True,
|
|
id="check --cache-reuse default value (256)", marks=skip_if_no_container
|
|
),
|
|
pytest.param(
|
|
[], r".*--no-webui", None, None, False,
|
|
id="check --no-webui is not present by default", marks=skip_if_no_container
|
|
),
|
|
pytest.param(
|
|
["--webui", "off"], r".*--no-webui", None, None, True,
|
|
id="check --no-webui", marks=skip_if_no_container
|
|
),
|
|
pytest.param(
|
|
["--name", "foobar"], r".*--name foobar .*", None, None, True,
|
|
id="check --name foobar", marks=skip_if_no_container
|
|
),
|
|
pytest.param(
|
|
["--name", "foobar"], r".*--network", None, None, False,
|
|
id="check --network is not present when run within container", marks=skip_if_no_container
|
|
),
|
|
pytest.param(
|
|
["--name", "foobar"], r".*--host 0.0.0.0", None, None, True,
|
|
id="check --host is not present when run within container", marks=skip_if_no_container
|
|
),
|
|
pytest.param(
|
|
["--name", "foobar"], f".*{DRY_RUN_TEST_MODEL}.*", None, None, True,
|
|
id="check test_model with --name foobar", marks=skip_if_no_container
|
|
),
|
|
pytest.param(
|
|
["--name", "foobar"], r".*--seed", None, None, False,
|
|
id="check --seed is not present by default", marks=skip_if_no_container
|
|
),
|
|
pytest.param(
|
|
["--network", "bridge", "--host", "127.1.2.3", "--name", "foobar"],
|
|
r".*--network bridge", None, None, True,
|
|
id="check --network", marks=skip_if_no_container
|
|
),
|
|
pytest.param(
|
|
["--host", "127.1.2.3"],
|
|
r".*--host 127.1.2.3", None, None, False,
|
|
id="check --host is not modified when run within container", marks=skip_if_no_container
|
|
),
|
|
pytest.param(
|
|
["--host", "127.1.2.3", "--port", "1234"],
|
|
r".*-p 127.1.2.3:1234:1234", None, None, True,
|
|
id="check -p is modified when run within container", marks=skip_if_no_container
|
|
),
|
|
pytest.param(
|
|
[], r".*--temp 0.8", None, None, True,
|
|
id="check --temp default value", marks=skip_if_no_container
|
|
),
|
|
pytest.param(
|
|
["--temp", "0.1"], r".*--temp 0.1", None, None, True,
|
|
id="check --temp", marks=skip_if_no_container
|
|
),
|
|
pytest.param(
|
|
["--seed", "1234", "--name", "foobar"], r".*--seed 1234",
|
|
None, {"RAMALAMA_CONFIG": "NUL" if platform.system() == "Windows" else '/dev/null'}, True,
|
|
id="check --seed 1234 with RAMALAMA_CONFIG=/dev/null", marks=skip_if_no_container,
|
|
),
|
|
pytest.param(
|
|
["--name", "foobar"], r".*--pull newer", None, {
|
|
"RAMALAMA_CONFIG": "NUL" if platform.system() == "Windows" else '/dev/null'
|
|
}, True,
|
|
id="check pull policy with RAMALAMA_CONFIG=/dev/null", marks=[skip_if_no_container, skip_if_docker],
|
|
),
|
|
pytest.param(
|
|
["--name", "foobar"], r".*--cap-drop=all", None, None, True,
|
|
id="check if --cap-drop=all is present", marks=skip_if_no_container
|
|
),
|
|
pytest.param(
|
|
["--name", "foobar"], r".*no-new-privileges", None, None, True,
|
|
id="check if --no-new-privs is present", marks=skip_if_no_container),
|
|
pytest.param(
|
|
[], r".*--pull newer", None, None, True,
|
|
id="check default pull policy", marks=[skip_if_no_container, skip_if_docker],
|
|
),
|
|
pytest.param(
|
|
["--pull", "never"], r".*--pull never", None, None, True,
|
|
id="check --pull never", marks=skip_if_no_container
|
|
),
|
|
pytest.param(
|
|
["--privileged"], r".*--privileged", None, None, True,
|
|
id="check --privileged", marks=skip_if_no_container
|
|
),
|
|
pytest.param(
|
|
["--privileged"], r".*--cap-drop=all", None, None, False,
|
|
id="check cap-drop=all is not set when --privileged", marks=skip_if_no_container
|
|
),
|
|
pytest.param(
|
|
["--privileged"], r".*no-new-privileges", None, None, False,
|
|
id="check no-new-privileges is not set when --privileged", marks=skip_if_no_container
|
|
),
|
|
pytest.param(
|
|
["--selinux", "True"], r".*--security-opt=label=disable", None, None, False,
|
|
id="check --selinux=True enables container separation", marks=skip_if_no_container),
|
|
pytest.param(
|
|
["--selinux", "False"], r".*--security-opt=label=disable", None, None, True,
|
|
id="check --selinux=False disables container separation", marks=skip_if_no_container),
|
|
pytest.param(
|
|
[], r".*--host 0.0.0.0", None, None, True,
|
|
id="check default --host value to 0.0.0.0", marks=skip_if_container
|
|
),
|
|
pytest.param(
|
|
[], r".*--cache-reuse 256", None, None, True,
|
|
id="check --cache-reuse default value", marks=skip_if_container
|
|
),
|
|
pytest.param(
|
|
[], r".*--flash-attn", None, None, True,
|
|
id="check --flash-attn", marks=[skip_if_container, skip_if_not_darwin]
|
|
),
|
|
pytest.param(
|
|
["--host", "127.0.0.1"],
|
|
r".*--host 127.0.0.1", None, None, True,
|
|
id="check --host flag to 127.0.0.1", marks=skip_if_container
|
|
),
|
|
pytest.param(
|
|
["--seed", "abcd"],
|
|
r".*--seed abcd", None, None, True,
|
|
id="check --seed flag is set", marks=skip_if_container
|
|
),
|
|
pytest.param(
|
|
["--detach"], r".*-d .*", None, None, True,
|
|
id="check ---detach", marks=skip_if_no_container
|
|
),
|
|
pytest.param(
|
|
["-d"], r".*-d .*", None, None, True,
|
|
id="check -d", marks=skip_if_no_container
|
|
),
|
|
pytest.param(
|
|
["--runtime-args", "--foo -bar"], r".*--foo\s+-bar", None, None, True,
|
|
id="check --runtime-args"
|
|
),
|
|
pytest.param(
|
|
["--runtime-args", "--foo='a b c'"], r".*--foo=a b c", None, None, True,
|
|
id="check --runtime-args=\"--foo='a b c'\""
|
|
),
|
|
pytest.param(
|
|
["--thinking", "False"], r".*--reasoning-budget 0", None, None, True,
|
|
id="check --reasoning-budget 0 passed to runtime",
|
|
),
|
|
pytest.param(
|
|
[], r".*--reasoning-budget", None, None, False,
|
|
id="check --reasoning-budget not passed by default",
|
|
),
|
|
# fmt: on
|
|
],
|
|
)
|
|
def test_params(extra_params, pattern, config, env_vars, expected):
|
|
with RamalamaExecWorkspace(config=config, env_vars=env_vars) as ctx:
|
|
result = ctx.check_output(RAMALAMA_DRY_RUN + extra_params + [DRY_RUN_TEST_MODEL])
|
|
assert bool(re.search(pattern, result)) is expected
|
|
|
|
|
|
@pytest.mark.e2e
|
|
@pytest.mark.parametrize(
|
|
"extra_params, pattern, config, env_vars, expected_exit_code, expected",
|
|
[
|
|
# fmt: off
|
|
pytest.param(
|
|
["--pull", "bogus"], r".*error: argument --pull: invalid choice: 'bogus'", None, None, 2, True,
|
|
id="raise error when --pull value is not valid", marks=skip_if_no_container,
|
|
),
|
|
pytest.param(
|
|
["--runtime-args", "--foo='a b c"], r".*No closing quotation", None, None, 22, True,
|
|
id="raise closing quotation error with --runtime-args",
|
|
),
|
|
pytest.param(
|
|
["--selinux", "100"], r".*Error: Cannot coerce '100' to bool", None, None, 22, True,
|
|
id="raise error when --selinux has non boolean value", marks=skip_if_no_container,
|
|
)
|
|
# fmt: on
|
|
],
|
|
)
|
|
def test_params_errors(extra_params, pattern, config, env_vars, expected_exit_code, expected):
|
|
with RamalamaExecWorkspace(config=config, env_vars=env_vars) as ctx:
|
|
with pytest.raises(CalledProcessError) as exc_info:
|
|
ctx.check_output(RAMALAMA_DRY_RUN + extra_params + [DRY_RUN_TEST_MODEL], stderr=STDOUT)
|
|
assert exc_info.value.returncode == expected_exit_code
|
|
assert bool(re.search(pattern, exc_info.value.output.decode("utf-8"))) is expected
|
|
|
|
|
|
@pytest.mark.e2e
|
|
def test_non_existing_model():
|
|
with RamalamaExecWorkspace() as ctx:
|
|
with pytest.raises(CalledProcessError) as exc_info:
|
|
ctx.check_output(["ramalama", "serve", "NON_EXISTING_MODEL"], stderr=STDOUT)
|
|
assert exc_info.value.returncode == 22
|
|
assert re.search(
|
|
r".*Error: Manifest for NON_EXISTING_MODEL:latest was not found in the Ollama registry",
|
|
exc_info.value.output.decode("utf-8"),
|
|
)
|
|
|
|
|
|
@pytest.mark.e2e
|
|
@skip_if_container
|
|
def test_nocontainer_and_name_flag_conflict():
|
|
with RamalamaExecWorkspace() as ctx:
|
|
with pytest.raises(CalledProcessError) as exc_info:
|
|
ctx.check_output(["ramalama", "--nocontainer", "serve", "--name", "foobar", "tiny"], stderr=STDOUT)
|
|
assert exc_info.value.returncode == 22
|
|
assert re.search(
|
|
r".*Error: --nocontainer and --name options conflict. The --name option requires a container.",
|
|
exc_info.value.output.decode("utf-8"),
|
|
)
|
|
|
|
|
|
@pytest.mark.e2e
|
|
@skip_if_no_container
|
|
def test_full_model_name_expansion():
|
|
result = check_output(RAMALAMA_DRY_RUN + ["smollm"])
|
|
pattern = ".*ai.ramalama.model=ollama://library/smollm:latest"
|
|
assert re.search(pattern, result)
|
|
|
|
|
|
@pytest.mark.e2e
|
|
@skip_if_no_container
|
|
def test_serve_and_stop(shared_ctx, test_model):
|
|
ctx = shared_ctx
|
|
container1_id = f"serve_and_stop_{''.join(random.choices(string.ascii_letters + string.digits, k=5))}"
|
|
container2_id = f"serve_and_stop_{''.join(random.choices(string.ascii_letters + string.digits, k=5))}"
|
|
|
|
# Serve Container1
|
|
ctx.check_call(["ramalama", "serve", "--name", container1_id, "--detach", test_model])
|
|
|
|
# FIXME: race-condition, chat can fail to connect if llama.cpp isn't ready, just sleep a little for now
|
|
time.sleep(10)
|
|
|
|
try:
|
|
info = json.loads(ctx.check_output(["ramalama", "info"]))
|
|
full_model_name = info["Shortnames"]["Names"][test_model]
|
|
|
|
ps_result = ctx.check_output(["ramalama", "ps"])
|
|
assert re.search(f".*{container1_id}", ps_result)
|
|
|
|
ps_list = ctx.check_output(["ramalama", "ps", "--format", "{{.Names}} {{.Ports}}"])
|
|
port = re.search(rf"{container1_id}.*->(?P<port>\d+)", ps_list)["port"]
|
|
|
|
chat_result = ctx.check_output(["ramalama", "chat", "--ls", "--url", f"http://127.0.0.1:{port}/v1"]).strip()
|
|
assert chat_result == full_model_name.split("://")[-1]
|
|
|
|
containers_list = ctx.check_output(["ramalama", "containers", "--noheading"])
|
|
assert re.search(f".*{container1_id}", containers_list)
|
|
finally:
|
|
# Stop Container1
|
|
ctx.check_call(["ramalama", "stop", container1_id])
|
|
|
|
# Start Container2
|
|
c2_id = ctx.check_output(["ramalama", "serve", "--name", container2_id, "-d", test_model]).split("\n")[0]
|
|
try:
|
|
containers_list = check_output(["ramalama", "containers", "-n"])
|
|
assert re.search(f".*{c2_id[:10]}", containers_list)
|
|
|
|
ps_result = ctx.check_output(["ramalama", "ps", "--noheading", "--no-trunc"])
|
|
assert re.search(f".*{container2_id}", ps_result)
|
|
finally:
|
|
# Stop Container2
|
|
ctx.check_call(["ramalama", "stop", container2_id])
|
|
|
|
# Check if both containers are stopped
|
|
ps_result = ctx.check_output(["ramalama", "ps", "--noheading", "--no-trunc"])
|
|
assert not re.search(f".*({container1_id}|{container2_id})", ps_result)
|
|
|
|
|
|
@pytest.mark.e2e
|
|
@skip_if_no_container
|
|
def test_serve_multiple_models(shared_ctx, test_model):
|
|
ctx = shared_ctx
|
|
container1_id = f"serve_multiple_{''.join(random.choices(string.ascii_letters + string.digits, k=5))}"
|
|
container2_id = f"serve_multiple_{''.join(random.choices(string.ascii_letters + string.digits, k=5))}"
|
|
container1_port = random.randint(64000, 65000)
|
|
container2_port = random.randint(64000, 65000)
|
|
while container1_port == container2_port:
|
|
container2_port = random.randint(64000, 65000)
|
|
|
|
# Start both models
|
|
ctx.check_call(
|
|
["ramalama", "serve", "--name", container1_id, "--port", str(container1_port), "--detach", test_model]
|
|
)
|
|
ctx.check_call(
|
|
["ramalama", "serve", "--name", container2_id, "--port", str(container2_port), "--detach", test_model]
|
|
)
|
|
|
|
# Check if they are up
|
|
ps_result = check_output(["ramalama", "ps", "--noheading"])
|
|
assert re.search(f".*{container1_port}/tcp.*{container1_id}", ps_result)
|
|
assert re.search(f".*{container2_port}/tcp.*{container2_id}", ps_result)
|
|
|
|
# Stop both models
|
|
ctx.check_call(["ramalama", "stop", container1_id])
|
|
ctx.check_call(["ramalama", "stop", container2_id])
|
|
|
|
# Check if they are stopped
|
|
ps_result = check_output(["ramalama", "ps", "--noheading"])
|
|
assert not re.search(f".*({container1_id}|{container2_id})", ps_result)
|
|
|
|
|
|
@pytest.mark.e2e
|
|
@skip_if_no_container
|
|
def test_stop_failures():
|
|
with RamalamaExecWorkspace() as ctx:
|
|
with pytest.raises(CalledProcessError) as exc_info:
|
|
ctx.check_output(["ramalama", "stop"], stderr=STDOUT)
|
|
assert exc_info.value.returncode == 22
|
|
assert re.search(r"Error: must specify a container name", exc_info.value.output.decode("utf-8"))
|
|
|
|
with pytest.raises(CalledProcessError) as exc_info:
|
|
container_id = f"stop_failure_{''.join(random.choices(string.ascii_letters + string.digits, k=5))}"
|
|
ctx.check_output(["ramalama", "stop", container_id], stderr=STDOUT)
|
|
assert re.search(r"Error.*such container.*", exc_info.value.output.decode("utf-8"))
|
|
|
|
container_id = f"stop_failure_{''.join(random.choices(string.ascii_letters + string.digits, k=5))}"
|
|
ctx.check_call(["ramalama", "stop", "--ignore", container_id], stderr=STDOUT)
|
|
|
|
with pytest.raises(CalledProcessError) as exc_info:
|
|
container_id = f"stop_failure_{''.join(random.choices(string.ascii_letters + string.digits, k=5))}"
|
|
ctx.check_output(["ramalama", "stop", "--all", container_id], stderr=STDOUT)
|
|
assert exc_info.value.returncode == 22
|
|
assert re.search(
|
|
fr"Error: specifying --all and container name, {container_id}, not allowed",
|
|
exc_info.value.output.decode("utf-8"),
|
|
)
|
|
|
|
|
|
@pytest.mark.e2e
|
|
def test_quadlet_generation(shared_ctx, test_model):
|
|
ctx = shared_ctx
|
|
test_model_full_name = get_full_model_name(test_model)
|
|
container_file = Path(ctx.workspace_dir) / f"{test_model_full_name}.container"
|
|
ctx.check_call(
|
|
["ramalama", "serve", "--port", "1234", "--pull", "never", "--generate", "quadlet", test_model],
|
|
env={"HIP_VISIBLE_DEVICES": "99"},
|
|
)
|
|
with container_file.open("r") as f:
|
|
content = f.read()
|
|
assert re.search(r".*PublishPort=0.0.0.0:1234:1234", content)
|
|
assert re.search(r".*llama-server --host 0.0.0.0 --port 1234 --model .*", content)
|
|
assert re.search(f".*Mount=type=bind,.*{test_model_full_name}", content)
|
|
assert re.search(r".*Environment=HIP_VISIBLE_DEVICES=99", content)
|
|
|
|
|
|
@pytest.mark.e2e
|
|
def test_quadlet_generation_with_add_to_unit_flag(test_model):
|
|
with RamalamaExecWorkspace() as ctx:
|
|
test_model_full_name = get_full_model_name(test_model)
|
|
container_file = Path(ctx.workspace_dir) / f"{test_model_full_name}.container"
|
|
ctx.check_call(
|
|
["ramalama", "serve", "--generate", "quadlet", "--add-to-unit", "section1:key0:value0", test_model]
|
|
)
|
|
with container_file.open("r") as f:
|
|
content = f.read()
|
|
assert re.search(r".*key0=value0.*", content)
|
|
|
|
|
|
@pytest.mark.e2e
|
|
def test_generation_with_bad_id(test_model):
|
|
with RamalamaExecWorkspace() as ctx:
|
|
with pytest.raises(CalledProcessError) as exc_info:
|
|
ctx.check_output(["ramalama", "serve", "--port", "1234", "--generate", "bogus", test_model], stderr=STDOUT)
|
|
assert exc_info.value.returncode == 2
|
|
assert re.search(
|
|
r".*error: argument --generate: invalid choice: .*bogus.* \(choose from .*\)",
|
|
exc_info.value.output.decode("utf-8"),
|
|
)
|
|
|
|
|
|
@pytest.mark.e2e
|
|
def test_generation_with_bad_add_to_unit_flag_value(test_model):
|
|
with RamalamaExecWorkspace() as ctx:
|
|
with pytest.raises(CalledProcessError) as exc_info:
|
|
ctx.check_output(
|
|
[
|
|
"ramalama",
|
|
"serve",
|
|
"--port",
|
|
"1234",
|
|
"--generate",
|
|
"quadlet",
|
|
"--add-to-unit",
|
|
"section1:key0:",
|
|
test_model,
|
|
],
|
|
stderr=STDOUT,
|
|
)
|
|
assert exc_info.value.returncode == 2
|
|
assert re.search(
|
|
r".*error: --add-to-unit parameters must be of the form <section>:<key>:<value>.*",
|
|
exc_info.value.output.decode("utf-8"),
|
|
)
|
|
|
|
|
|
@pytest.mark.e2e
|
|
@skip_if_no_container
|
|
@pytest.mark.xfail("config.option.container_engine == 'docker'", reason="docker login does not support --tls-verify")
|
|
def test_quadlet_and_kube_generation_with_container_registry(container_registry, is_container, test_model):
|
|
with RamalamaExecWorkspace() as ctx:
|
|
authfile = (Path(ctx.workspace_dir) / "authfile.json").as_posix()
|
|
auth_flags = ["--authfile", authfile, "--tls-verify", "false"]
|
|
credential_flags = ["--username", container_registry.username, "--password", container_registry.password]
|
|
test_image_url = f"{container_registry.url}/{test_model}"
|
|
container_name = f"quadlet_gen_{''.join(random.choices(string.ascii_letters + string.digits, k=5))}"
|
|
container_port = random.randint(64000, 65000)
|
|
|
|
# Login to the container registry with ramalama
|
|
ctx.check_call(["ramalama", "login"] + auth_flags + credential_flags + [container_registry.url])
|
|
|
|
# Pull test model
|
|
ctx.check_call(["ramalama", "pull", test_model])
|
|
|
|
# Push test model to the container registry in different modes
|
|
for model_type_flag in [[], ["--type=car"], ["--type=raw"]]:
|
|
ctx.check_call(["ramalama", "push"] + model_type_flag + auth_flags + [test_model, test_image_url])
|
|
|
|
# Generate a quadlet
|
|
result = ctx.check_output(
|
|
["ramalama", "serve"]
|
|
+ auth_flags
|
|
+ ["--name", container_name, "--port", str(container_port), "--generate", "quadlet", test_image_url]
|
|
)
|
|
assert re.search(".*Generating quadlet file: {}.container".format(container_name), result)
|
|
assert re.search(".*Generating quadlet file: {}.volume".format(container_name), result)
|
|
assert re.search(".*Generating quadlet file: {}.image".format(container_name), result)
|
|
|
|
# Inspect the generated quadlet file
|
|
quadlet_file = Path(ctx.workspace_dir) / "{}.container".format(container_name)
|
|
with quadlet_file.open("r") as f:
|
|
content = f.read()
|
|
assert re.search(f".*PublishPort=0.0.0.0:{container_port}:{container_port}", content)
|
|
assert re.search(f".*ContainerName={container_name}", content)
|
|
assert re.search(f".*Exec=.*llama-server --host 0.0.0.0 --port {container_port} --model .*", content)
|
|
quadlet_image_source = f"{container_registry.host}:{container_registry.port}/{test_model}"
|
|
assert re.search(
|
|
f".*Mount=type=image,source={quadlet_image_source},"
|
|
f"destination=/mnt/models,subpath=/models,readwrite=false",
|
|
content,
|
|
)
|
|
|
|
# If container mode is enabled, inspect the volume & image files generated
|
|
if is_container:
|
|
quadlet_volume_file = Path(ctx.workspace_dir) / f"{container_name}.volume"
|
|
with quadlet_volume_file.open("r") as f:
|
|
content = f.read()
|
|
assert re.search(r".*Driver=image", content)
|
|
assert re.search(f".*Image={container_name}.image", content)
|
|
|
|
quadlet_image_file = Path(ctx.workspace_dir) / f"{container_name}.image"
|
|
with quadlet_image_file.open("r") as f:
|
|
content = f.read()
|
|
assert re.search(f".*Image={quadlet_image_source}", content)
|
|
|
|
# Test kube generation with vllm runtime
|
|
result = ctx.check_output(
|
|
["ramalama", "--runtime", "vllm", "serve", "--name", "test-generation", "--port", "1234"]
|
|
+ auth_flags
|
|
+ ["--generate", "kube", test_image_url]
|
|
)
|
|
assert re.search(r".*Generating Kubernetes YAML file: test-generation.yaml", result)
|
|
kube_file = Path(ctx.workspace_dir) / "test-generation.yaml"
|
|
assert kube_file.exists()
|
|
kube_file.unlink()
|
|
|
|
# Test quadlet/kube generation with vllm runtime
|
|
result = ctx.check_output(
|
|
["ramalama", "--debug", "--runtime", "vllm", "serve", "--name", "test-generation", "--port", "1234"]
|
|
+ auth_flags
|
|
+ ["--generate", "quadlet/kube", test_image_url]
|
|
)
|
|
assert re.search(r".*Generating Kubernetes YAML file: test-generation.yaml", result)
|
|
assert re.search(r".*Generating quadlet file: test-generation.kube", result)
|
|
kube_file = Path(ctx.workspace_dir) / "test-generation.yaml"
|
|
with kube_file.open("r") as f:
|
|
content = yaml.safe_load(f.read())
|
|
containers_spec = content.get("spec", {}).get("template", {}).get("spec", {}).get("containers", [])
|
|
assert len(containers_spec) == 1
|
|
container_spec = containers_spec[0]
|
|
assert 'command' not in container_spec
|
|
model_alias = f"{container_registry.host}:{container_registry.port}/{test_model.split(':')[0]}"
|
|
assert container_spec['args'] == [
|
|
"--model",
|
|
"/mnt/models/model.file",
|
|
"--served-model-name",
|
|
f"{model_alias}",
|
|
"--max_model_len",
|
|
"2048",
|
|
"--port",
|
|
"1234",
|
|
]
|
|
volumes_spec = content.get("spec", {}).get("template", {}).get("spec", {}).get("volumes", [])
|
|
assert volumes_spec[0] == {
|
|
"name": "model",
|
|
"image": {
|
|
"reference": f"{container_registry.host}:{container_registry.port}/{test_model}",
|
|
"pullPolicy": "IfNotPresent",
|
|
},
|
|
}
|
|
|
|
|
|
@pytest.mark.e2e
|
|
@pytest.mark.parametrize(
|
|
"generate, env_vars",
|
|
[
|
|
pytest.param(
|
|
*item,
|
|
id=f"generate={item[0]}{' + env_vars' if item[1] else ''}",
|
|
)
|
|
for item in itertools.product(
|
|
[
|
|
"kube",
|
|
"kube:{tmp_dir}{sep}output",
|
|
"quadlet/kube",
|
|
"quadlet/kube:{tmp_dir}{sep}output",
|
|
"compose",
|
|
"compose:{tmp_dir}{sep}output",
|
|
],
|
|
[None, {"HIP_VISIBLE_DEVICES": "99"}],
|
|
)
|
|
],
|
|
)
|
|
def test_serve_kube_generation(test_model, generate, env_vars):
|
|
with RamalamaExecWorkspace(env_vars=env_vars) as ctx:
|
|
# Pull model
|
|
ctx.check_call(["ramalama", "pull", test_model])
|
|
|
|
# Define the output dir if it's required and ensure it is created
|
|
output_dir = (
|
|
Path(ctx.workspace_dir) / "output"
|
|
if generate.endswith(":{tmp_dir}{sep}output")
|
|
else Path(ctx.workspace_dir)
|
|
)
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Exec ramalama serve
|
|
result = ctx.check_output(
|
|
[
|
|
"ramalama",
|
|
"serve",
|
|
"--name",
|
|
"test",
|
|
"--port",
|
|
"1234",
|
|
"--pull",
|
|
"never",
|
|
"--generate",
|
|
generate.format(tmp_dir=ctx.workspace_dir, sep=os.sep),
|
|
test_model,
|
|
]
|
|
)
|
|
|
|
with chdir(output_dir):
|
|
# Test the expected output of the command execution
|
|
if "kube" in generate:
|
|
generated_file = output_dir / "test.yaml"
|
|
assert re.search(r".*Generating Kubernetes YAML file: test.yaml", result)
|
|
if generate.startswith("quadlet/kube"):
|
|
assert re.search(r".*Generating quadlet file: test.kube", result)
|
|
elif "compose" in generate:
|
|
generated_file = output_dir / "docker-compose.yaml"
|
|
assert re.search(r".*Generating Compose YAML file: docker-compose.yaml", result)
|
|
else:
|
|
raise Exception("Invalid generate option")
|
|
|
|
# Check "test.yaml" contents
|
|
with generated_file.open("r") as f:
|
|
content = f.read()
|
|
if "kube" in generate:
|
|
assert re.search(r".*command: \[\".*serve.*\"]", content)
|
|
assert re.search(r".*containerPort: 1234", content)
|
|
elif "compose" in generate:
|
|
assert re.search(r".*command: .*serve.*", content)
|
|
assert re.search(r".*ports:", content)
|
|
assert re.search(r".*- \"1234:1234\"", content)
|
|
else:
|
|
raise Exception("Invalid generate option")
|
|
|
|
if env_vars:
|
|
if "kube" in generate:
|
|
assert re.search(r".*env:", content)
|
|
assert re.search(r".*name: HIP_VISIBLE_DEVICES", content)
|
|
assert re.search(r".*value: \"99\"", content)
|
|
elif "compose" in generate:
|
|
assert re.search(r".*environment:", content)
|
|
assert re.search(r".*- HIP_VISIBLE_DEVICES=99", content)
|
|
else:
|
|
raise Exception("Invalid generate option")
|
|
|
|
# Check "test.kube" contents if generate="quadlet/kube"
|
|
if generate.startswith("quadlet/kube"):
|
|
with (output_dir / "test.kube").open("r") as f:
|
|
content = f.read()
|
|
assert re.search(r".*Yaml=test.yaml", content)
|
|
|
|
|
|
@pytest.mark.e2e
|
|
@skip_if_no_container
|
|
@skip_if_docker
|
|
def test_kube_generation_with_llama_api(test_model):
|
|
with RamalamaExecWorkspace() as ctx:
|
|
# Pull model
|
|
ctx.check_call(["ramalama", "pull", test_model])
|
|
|
|
# Exec ramalama serve
|
|
result = ctx.check_output(
|
|
[
|
|
"ramalama",
|
|
"serve",
|
|
"--name",
|
|
"test",
|
|
"--port",
|
|
"1234",
|
|
"--generate",
|
|
"kube",
|
|
"--api",
|
|
"llama-stack",
|
|
"--dri",
|
|
"off",
|
|
test_model,
|
|
]
|
|
)
|
|
|
|
# Test the expected output of the command execution
|
|
assert re.search(r".*Generating Kubernetes YAML file: test.yaml", result)
|
|
|
|
# Check "test.yaml" contents
|
|
with (Path(ctx.workspace_dir) / "test.yaml").open("r") as f:
|
|
content = f.read()
|
|
assert re.search(r".*llama-server", content)
|
|
assert re.search(r".*hostPort: 1234", content)
|
|
assert re.search(r".*/llama-stack", content)
|
|
|
|
|
|
@skip_if_docker
|
|
@skip_if_no_container
|
|
@skip_if_ppc64le
|
|
@skip_if_s390x
|
|
def test_serve_api(caplog):
|
|
# Configure logging for requests
|
|
caplog.set_level(logging.CRITICAL, logger="requests")
|
|
caplog.set_level(logging.CRITICAL, logger="urllib3")
|
|
test_model = "tiny"
|
|
|
|
with RamalamaExecWorkspace() as ctx:
|
|
# Pull model
|
|
ctx.check_call(["ramalama", "pull", test_model])
|
|
|
|
# Serve an API
|
|
container_name = f"api{''.join(random.choices(string.ascii_letters + string.digits, k=5))}"
|
|
container_port = random.randint(64000, 65000)
|
|
|
|
result = ctx.check_output(
|
|
[
|
|
"ramalama",
|
|
"serve",
|
|
"-d",
|
|
"--name",
|
|
container_name,
|
|
"--port",
|
|
str(container_port),
|
|
"--api",
|
|
"llama-stack",
|
|
"--dri",
|
|
"off",
|
|
test_model,
|
|
],
|
|
stderr=STDOUT,
|
|
)
|
|
|
|
assert re.search(fr".*Llama Stack RESTAPI: http://localhost:{container_port}", result)
|
|
assert re.search(fr".*OpenAI RESTAPI: http://localhost:{container_port}/v1/openai", result)
|
|
|
|
# Inspect the models API
|
|
# FIXME: llama-stack image is currently broken.
|
|
# models = requests.get(f"http://localhost:{container_port}/models").json()
|
|
# assert models["models"][0]["name"] == test_model
|
|
# assert models["models"][0]["model"] == test_model
|
|
|
|
# Stop container
|
|
ctx.check_call(["ramalama", "stop", container_name])
|
|
|
|
|
|
@pytest.mark.e2e
|
|
@skip_if_no_container
|
|
@skip_if_docker
|
|
@skip_if_gh_actions_darwin
|
|
def test_serve_with_non_existing_images():
|
|
with RamalamaExecWorkspace() as ctx:
|
|
# If the requested model is missing, "ramalama serve --pull never" will exit with
|
|
# "Error: <model> does not exist" and returncode 22. To test the behavior of a
|
|
# non-existent image reference, the requested model must already be available locally.
|
|
ctx.check_call(["ramalama", "pull", "tiny"])
|
|
with pytest.raises(CalledProcessError) as exc_info:
|
|
ctx.check_output(["ramalama", "serve", "--image", "bogus", "--pull", "never", "tiny"], stderr=STDOUT)
|
|
assert exc_info.value.returncode == 125
|
|
assert re.search(r".*Error: bogus: image not known", exc_info.value.output.decode("utf-8"))
|
|
|
|
with pytest.raises(CalledProcessError) as exc_info:
|
|
ctx.check_output(
|
|
[
|
|
"ramalama",
|
|
"serve",
|
|
"--image",
|
|
"bogus1",
|
|
"--rag",
|
|
"quay.io/ramalama/rag",
|
|
"--pull",
|
|
"never",
|
|
"tiny",
|
|
],
|
|
stderr=STDOUT,
|
|
)
|
|
assert exc_info.value.returncode == 22
|
|
assert re.search(r"Error: quay.io/ramalama/rag does not exist.*", exc_info.value.output.decode("utf-8"))
|
|
|
|
|
|
@pytest.mark.e2e
|
|
@skip_if_no_container
|
|
@skip_if_darwin
|
|
@skip_if_docker
|
|
def test_serve_with_rag():
|
|
with RamalamaExecWorkspace() as ctx:
|
|
result_a = ctx.check_output(RAMALAMA_DRY_RUN + ["--rag", "quay.io/ramalama/rag", "--pull", "never", "tiny"])
|
|
assert re.search(r".*llama-server", result_a)
|
|
assert re.search(r".*quay.io/.*-rag(@sha256)?:", result_a)
|
|
assert re.search(r".*rag_framework serve", result_a)
|
|
assert re.search(r".*--mount=type=image,source=quay.io/ramalama/rag,destination=/rag,rw=true", result_a)
|
|
|
|
result_b = ctx.check_output(
|
|
[
|
|
"ramalama",
|
|
"--dryrun",
|
|
"serve",
|
|
"--image",
|
|
"quay.io/ramalama/ramalama:1.0",
|
|
"--rag",
|
|
"quay.io/ramalama/rag",
|
|
"--pull",
|
|
"never",
|
|
"tiny",
|
|
]
|
|
)
|
|
assert re.search(r".*quay.io/ramalama/ramalama:1.0", result_b)
|
|
|
|
result_c = ctx.check_output(
|
|
[
|
|
"ramalama",
|
|
"--dryrun",
|
|
"serve",
|
|
"--rag",
|
|
"quay.io/ramalama/rag",
|
|
"--rag-image",
|
|
"quay.io/ramalama/ramalama-rag:1.0",
|
|
"--pull",
|
|
"never",
|
|
"tiny",
|
|
]
|
|
)
|
|
assert re.search(r".*quay.io/ramalama/ramalama-rag:1.0", result_c)
|