mirror of
https://github.com/containers/ramalama.git
synced 2026-02-05 06:46:39 +01:00
Just check the command output reports that it's pulling from cache. Pull/pull-from-cache timing will vary depending on network/disk, so not safe to assume pulling from disk will be twice as fast as network. Signed-off-by: Oliver Walsh <owalsh@redhat.com>
363 lines
15 KiB
Python
363 lines
15 KiB
Python
import json
|
|
import random
|
|
import re
|
|
import string
|
|
from pathlib import Path, PurePosixPath
|
|
from subprocess import STDOUT, CalledProcessError
|
|
from test.conftest import (
|
|
skip_if_big_endian_machine,
|
|
skip_if_darwin,
|
|
skip_if_little_endian_machine,
|
|
skip_if_no_container,
|
|
skip_if_no_huggingface_cli,
|
|
skip_if_no_ollama,
|
|
)
|
|
from test.e2e.utils import RamalamaExecWorkspace
|
|
|
|
import pytest
|
|
|
|
from ramalama.path_utils import normalize_host_path_for_container
|
|
|
|
|
|
@pytest.mark.e2e
|
|
@pytest.mark.distro_integration
|
|
def test_pull_no_model():
|
|
with RamalamaExecWorkspace() as ctx:
|
|
with pytest.raises(CalledProcessError) as exc_info:
|
|
ctx.check_output(["ramalama", "pull"], stderr=STDOUT)
|
|
assert exc_info.value.returncode == 2
|
|
assert re.search(
|
|
r".*ramalama pull: error: the following arguments are required: MODEL",
|
|
exc_info.value.output.decode("utf-8"),
|
|
)
|
|
|
|
|
|
@pytest.mark.e2e
|
|
@pytest.mark.distro_integration
|
|
def test_pull_non_existing_model():
|
|
random_model_name = f"non_existing_model_{''.join(random.choices(string.ascii_letters + string.digits, k=5))}"
|
|
with RamalamaExecWorkspace() as ctx:
|
|
with pytest.raises(CalledProcessError) as exc_info:
|
|
ctx.check_output(["ramalama", "pull", random_model_name], stderr=STDOUT)
|
|
assert exc_info.value.returncode == 22
|
|
assert re.search(
|
|
fr".*Error: Manifest for {random_model_name}:latest was not found in the Ollama registry",
|
|
exc_info.value.output.decode("utf-8"),
|
|
)
|
|
|
|
|
|
@pytest.mark.e2e
|
|
@pytest.mark.distro_integration
|
|
@pytest.mark.parametrize(
|
|
"model, env_vars, expected",
|
|
[
|
|
# fmt: off
|
|
pytest.param(
|
|
"ollama://tinyllama", None, "ollama://library/tinyllama:latest",
|
|
id="tinyllama model with ollama:// url"
|
|
),
|
|
pytest.param(
|
|
"smollm:360m", {"RAMALAMA_TRANSPORT": "ollama"}, "ollama://library/smollm:360m",
|
|
id="smollm:360m model with RAMALAMA_TRANSPORT=ollama"
|
|
),
|
|
pytest.param(
|
|
"ollama://smollm:360m", None, "ollama://library/smollm:360m",
|
|
id="smollm:360m model with ollama:// url"
|
|
),
|
|
pytest.param(
|
|
"https://ollama.com/library/smollm:135m", None, "ollama://library/smollm:135m",
|
|
id="smollm:135m model with http url from ollama"
|
|
),
|
|
pytest.param(
|
|
"hf://Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS.gguf",
|
|
None,
|
|
"hf://Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS.gguf",
|
|
id="hf://Felladrin/../smollm-360M-instruct-add-basics.IQ2_XXS.gguf model"
|
|
),
|
|
pytest.param(
|
|
"huggingface://Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS.gguf",
|
|
None,
|
|
"hf://Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS.gguf",
|
|
id="huggingface://Felladrin/../smollm-360M-instruct-add-basics.IQ2_XXS.gguf model"
|
|
),
|
|
pytest.param(
|
|
"Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS.gguf",
|
|
{"RAMALAMA_TRANSPORT": "huggingface"},
|
|
"hf://Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS.gguf",
|
|
id="Felladrin/../smollm-360M-instruct-add-basics.IQ2_XXS.gguf model with RAMALAMA_TRANSPORT=huggingface"
|
|
),
|
|
pytest.param(
|
|
"hf://HuggingFaceTB/SmolLM-135M", None, "hf://HuggingFaceTB/SmolLM-135M",
|
|
id="hf://HuggingFaceTB/SmolLM-135M model",
|
|
marks=[skip_if_no_huggingface_cli]
|
|
),
|
|
pytest.param(
|
|
"hf://ggml-org/SmolVLM-256M-Instruct-GGUF", None, "hf://ggml-org/SmolVLM-256M-Instruct-GGUF",
|
|
id="hf://ggml-org/SmolVLM-256M-Instruct-GGUF model",
|
|
marks=[skip_if_no_huggingface_cli]
|
|
),
|
|
pytest.param(
|
|
"hf://ggml-org/SmolVLM-256M-Instruct-GGUF:Q8_0", None, "hf://ggml-org/SmolVLM-256M-Instruct-GGUF:Q8_0",
|
|
id="hf://ggml-org/SmolVLM-256M-Instruct-GGUF:Q8_0 model",
|
|
marks=[skip_if_no_huggingface_cli]
|
|
),
|
|
pytest.param(
|
|
"oci://quay.io/ramalama/smollm:135m", None, "oci://quay.io/ramalama/smollm:135m",
|
|
id="smollm:135m model with oci:// url",
|
|
marks=skip_if_no_container
|
|
),
|
|
pytest.param(
|
|
"quay.io/ramalama/smollm:135m", {"RAMALAMA_TRANSPORT": "oci"}, "oci://quay.io/ramalama/smollm:135m",
|
|
id="smollm:135m model with RAMALAMA_TRANSPORT=oci",
|
|
marks=skip_if_no_container
|
|
),
|
|
pytest.param(
|
|
Path("mymodel.gguf"), None, Path("mymodel.gguf"),
|
|
id="{workspace_dir}/mymodel.gguf model with file:// url",
|
|
)
|
|
# fmt: on
|
|
],
|
|
)
|
|
def test_pull(model, env_vars, expected):
|
|
with RamalamaExecWorkspace(env_vars=env_vars) as ctx:
|
|
ramalama_cli = ["ramalama", "--store", str(ctx.storage_path)]
|
|
|
|
# Resolve model_name depending on the type of model parameter
|
|
if isinstance(model, str):
|
|
model_name = model
|
|
elif isinstance(model, Path):
|
|
model_path = ctx.workspace_path / model
|
|
model_path.parent.mkdir(parents=True, exist_ok=True)
|
|
model_path.touch()
|
|
model_name = "file://" + model_path.as_posix()
|
|
else:
|
|
raise ValueError(f"Unsupported model type: {type(model)}")
|
|
|
|
# Resolve expected_name depending on the type of the expected parameter
|
|
if isinstance(expected, str):
|
|
expected_name = expected
|
|
elif isinstance(expected, Path):
|
|
expected_name = (
|
|
"file://"
|
|
+ PurePosixPath(normalize_host_path_for_container(str(ctx.workspace_path)))
|
|
.joinpath(PurePosixPath(expected))
|
|
.as_posix()
|
|
)
|
|
else:
|
|
raise ValueError(f"Unsupported expected type: {type(expected)}")
|
|
|
|
# Pull image
|
|
ctx.check_call(ramalama_cli + ["pull", model_name])
|
|
|
|
# Get ramalama list
|
|
model_list = json.loads(ctx.check_output(ramalama_cli + ["list", "--json", "--sort", "modified"]))
|
|
|
|
# Clean pulled images
|
|
ctx.check_call(ramalama_cli + ["rm", model_name])
|
|
|
|
# Check if the model pull is the expected
|
|
assert model_list[0]["name"] == expected_name
|
|
|
|
|
|
@pytest.mark.e2e
|
|
@pytest.mark.distro_integration
|
|
def test_pull_model_layers_download():
|
|
with RamalamaExecWorkspace() as ctx:
|
|
ramalama_cli = ["ramalama", "--store", str(ctx.storage_path)]
|
|
output = ctx.check_output(
|
|
ramalama_cli + ["pull", "hf://owalsh/SmolLM2-135M-Instruct-GGUF-Split:Q4_0"], stderr=STDOUT
|
|
)
|
|
|
|
for i in range(1, 4):
|
|
assert f"Downloading Q4_0/SmolLM2-135M-Instruct-Q4_0-0000{i}-of-00003.gguf" in output
|
|
|
|
|
|
@pytest.mark.e2e
|
|
@pytest.mark.distro_integration
|
|
def test_pull_huggingface_tag_multiple_references():
|
|
with RamalamaExecWorkspace() as ctx:
|
|
ramalama_cli = ["ramalama", "--store", str(ctx.storage_path)]
|
|
model_url_list = ["hf://ggml-org/SmolVLM-256M-Instruct-GGUF", "hf://ggml-org/SmolVLM-256M-Instruct-GGUF:Q8_0"]
|
|
|
|
for model_url in model_url_list:
|
|
ctx.check_call(ramalama_cli + ["pull", model_url])
|
|
|
|
model_list = json.loads(ctx.check_output(ramalama_cli + ["list", "--json", "--sort", "modified"]))
|
|
assert set(model_url_list).issubset([model["name"] for model in model_list])
|
|
|
|
result = ctx.check_output(ramalama_cli + ["--debug", "rm", model_url_list[0]], stderr=STDOUT)
|
|
assert re.search(r".*Not removing snapshot", result)
|
|
|
|
result = ctx.check_output(ramalama_cli + ["--debug", "rm", model_url_list[1]], stderr=STDOUT)
|
|
assert re.search(r".*Snapshot removed", result)
|
|
|
|
|
|
@pytest.mark.e2e
|
|
@pytest.mark.distro_integration
|
|
@pytest.mark.parametrize(
|
|
"model, expected",
|
|
[
|
|
pytest.param(
|
|
"tiny",
|
|
"Endian mismatch of host (BIG) and model (LITTLE)",
|
|
marks=[skip_if_little_endian_machine],
|
|
id="le model on be machine",
|
|
),
|
|
pytest.param(
|
|
"stories-be:260k",
|
|
"Endian mismatch of host (LITTLE) and model (BIG)",
|
|
marks=[skip_if_big_endian_machine],
|
|
id="be model on le machine",
|
|
),
|
|
],
|
|
)
|
|
def test_pull_wrong_endian_model_error(model, expected):
|
|
with RamalamaExecWorkspace() as ctx:
|
|
with pytest.raises(CalledProcessError) as exc_info:
|
|
ctx.check_output(
|
|
["ramalama", "--store", str(ctx.storage_path), "pull", "--verify=on", model], stderr=STDOUT
|
|
)
|
|
assert exc_info.value.returncode == 1
|
|
assert expected in exc_info.value.output.decode("utf-8")
|
|
|
|
|
|
@pytest.mark.e2e
|
|
@pytest.mark.distro_integration
|
|
@skip_if_no_ollama
|
|
@skip_if_darwin
|
|
@pytest.mark.parametrize(
|
|
"ollama_model, model, env_vars, expected",
|
|
[
|
|
pytest.param(
|
|
"tinyllama",
|
|
"ollama://library/tinyllama",
|
|
None,
|
|
"ollama://library/tinyllama:latest",
|
|
id="tinyllama model with ollama:// url",
|
|
),
|
|
pytest.param(
|
|
"smollm:135m",
|
|
"https://ollama.com/library/smollm:135m",
|
|
None,
|
|
"ollama://library/smollm:135m",
|
|
id="smollm:135m model with http url from ollama",
|
|
),
|
|
pytest.param(
|
|
"smollm:360m",
|
|
"smollm:360m",
|
|
{"RAMALAMA_TRANSPORT": "ollama"},
|
|
"ollama://library/smollm:360m",
|
|
id="smollm:360m model with RAMALAMA_TRANSPORT=ollama",
|
|
),
|
|
],
|
|
)
|
|
def test_pull_using_ollama_cache(ollama_server, ollama_model, model, env_vars, expected):
|
|
with RamalamaExecWorkspace(env_vars=env_vars) as ctx:
|
|
ramalama_cli = ["ramalama", "--store", str(ctx.storage_path)]
|
|
|
|
# Ensure ollama cache exists and is set as environment variable
|
|
ctx.environ["OLLAMA_HOST"] = ollama_server.url
|
|
ctx.environ["OLLAMA_MODELS"] = str(ollama_server.models_dir)
|
|
|
|
# Pull image using ollama server and ollama cli
|
|
ollama_server.pull_model(ollama_model)
|
|
|
|
# Pull image using ramalama cli
|
|
pull_output = ctx.check_output(ramalama_cli + ["pull", model], stderr=STDOUT)
|
|
|
|
assert 'Using cached ollama:' in pull_output
|
|
|
|
# Check if the model pull is the expected
|
|
model_list = json.loads(ctx.check_output(ramalama_cli + ["list", "--json", "--sort", "modified"]))
|
|
assert model_list[0]["name"] == expected
|
|
|
|
|
|
@pytest.mark.e2e
|
|
@pytest.mark.distro_integration
|
|
@skip_if_no_huggingface_cli
|
|
@pytest.mark.parametrize(
|
|
"hf_repo, hf_model, model, env_vars, expected",
|
|
[
|
|
pytest.param(
|
|
"Felladrin/gguf-smollm-360M-instruct-add-basics",
|
|
"smollm-360M-instruct-add-basics.IQ2_XXS.gguf",
|
|
"hf://Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS.gguf",
|
|
None,
|
|
"hf://Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS.gguf",
|
|
id="with hf:// url",
|
|
),
|
|
pytest.param(
|
|
"Felladrin/gguf-smollm-360M-instruct-add-basics",
|
|
"smollm-360M-instruct-add-basics.IQ2_XXS.gguf",
|
|
"huggingface://Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS.gguf",
|
|
None,
|
|
"hf://Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS.gguf",
|
|
id="with huggingface:// url",
|
|
),
|
|
pytest.param(
|
|
"Felladrin/gguf-smollm-360M-instruct-add-basics",
|
|
"smollm-360M-instruct-add-basics.IQ2_XXS.gguf",
|
|
"Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS.gguf",
|
|
{"RAMALAMA_TRANSPORT": "huggingface"},
|
|
"hf://Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS.gguf",
|
|
id="with RAMALAMA_TRANSPORT=huggingface",
|
|
),
|
|
],
|
|
)
|
|
def test_pull_using_huggingface_cache(hf_repo, hf_model, model, env_vars, expected):
|
|
with RamalamaExecWorkspace(env_vars=env_vars) as ctx:
|
|
ramalama_cli = ["ramalama", "--store", str(ctx.storage_path)]
|
|
|
|
# Ensure huggingface cache exists and is set as environment variable
|
|
hf_home = ctx.workspace_path / ".cache" / "huggingface"
|
|
hf_home.mkdir(parents=True, exist_ok=True)
|
|
ctx.environ["HF_HOME"] = str(hf_home)
|
|
|
|
# Pull image using huggingface cli
|
|
ctx.check_call(["hf", "download", hf_repo, hf_model])
|
|
|
|
# Pull image using ramalama cli
|
|
ctx.check_call(ramalama_cli + ["pull", model])
|
|
|
|
# Check if the model pull is the expected
|
|
model_list = json.loads(ctx.check_output(ramalama_cli + ["list", "--json", "--sort", "modified"]))
|
|
assert model_list[0]["name"] == expected
|
|
|
|
|
|
@pytest.mark.e2e
|
|
@pytest.mark.distro_integration
|
|
@skip_if_no_container
|
|
@pytest.mark.xfail("config.option.container_engine == 'docker'", reason="docker login does not support --tls-verify")
|
|
def test_pull_with_registry(container_registry, container_engine):
|
|
with RamalamaExecWorkspace() as ctx:
|
|
ramalama_cli = ["ramalama", "--store", str(ctx.storage_path)]
|
|
authfile = str(ctx.workspace_path / "authfile.json")
|
|
auth_flags = ["--authfile", authfile, "--tls-verify", "false"]
|
|
credential_flags = ["--username", container_registry.username, "--password", container_registry.password]
|
|
|
|
# Login to the container registry with ramalama
|
|
ctx.check_call(["ramalama", "login"] + auth_flags + credential_flags + [container_registry.url])
|
|
|
|
# Create fake model
|
|
fake_model = ctx.workspace_path / "fake-model"
|
|
fake_model_local_url = fake_model.as_uri()
|
|
fake_model_registry_url = f"{container_registry.url}/fake-model-raw:latest"
|
|
with fake_model.open("w") as f:
|
|
f.write(''.join(random.choices(string.ascii_letters + string.digits, k=30)))
|
|
|
|
# Push fake model
|
|
ctx.check_call(ramalama_cli + ["push"] + auth_flags + [fake_model_local_url, fake_model_registry_url])
|
|
|
|
# Pull fake model
|
|
ctx.check_call(ramalama_cli + ["pull"] + auth_flags + [fake_model_registry_url])
|
|
|
|
# Check if the fake model was pulled correctly
|
|
model_list = json.loads(ctx.check_output(ramalama_cli + ["list", "--json"]))
|
|
assert fake_model_registry_url in [x["name"] for x in model_list]
|
|
|
|
# Clean fake image
|
|
ctx.check_call([container_engine, "rmi", fake_model_registry_url.replace("oci://", "")])
|
|
|
|
# Clean dangling images
|
|
ctx.check_call([container_engine, "image", "prune", "-f"])
|