1
0
mirror of https://github.com/containers/ramalama.git synced 2026-02-05 06:46:39 +01:00

Use Hugging Face models for tinylama and smollm:135

The chat templates from the ollama models do not work with llama.cpp after
converting (successfully) to jinja2.
Workaround by creating shortname.conf aliases to equivalent models on
Hugging Face.

Signed-off-by: Oliver Walsh <owalsh@redhat.com>
This commit is contained in:
Oliver Walsh
2025-09-09 18:27:29 +01:00
parent 6462424aab
commit 912bf92d6f
5 changed files with 38 additions and 36 deletions

View File

@@ -51,9 +51,10 @@
"qwen2.5vl:32b" = "hf://ggml-org/Qwen2.5-VL-32B-Instruct-GGUF"
"qwen2.5vl:3b" = "hf://ggml-org/Qwen2.5-VL-3B-Instruct-GGUF"
"qwen2.5vl:7b" = "hf://ggml-org/Qwen2.5-VL-7B-Instruct-GGUF"
"smollm:135m" = "ollama://smollm:135m"
"smollm:135m" = "hf://HuggingFaceTB/smollm-135M-instruct-v0.2-Q8_0-GGUF"
"smolvlm" = "hf://ggml-org/SmolVLM-500M-Instruct-GGUF"
"smolvlm:256m" = "hf://ggml-org/SmolVLM-256M-Instruct-GGUF"
"smolvlm:2b" = "hf://ggml-org/SmolVLM-Instruct-GGUF"
"smolvlm:500m" = "hf://ggml-org/SmolVLM-500M-Instruct-GGUF"
"tiny" = "ollama://tinyllama"
"tiny" = "hf://TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
"tinyllama" = "hf://TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"

View File

@@ -3,6 +3,7 @@
load helpers
MODEL=smollm:135m
MODEL_FULLNAME=smollm-135M-instruct-v0.2-Q8_0-GGUF
@test "ramalama --dryrun run basic output" {
image=m_$(safename)
@@ -19,7 +20,7 @@ EOF
run_ramalama -q --dryrun run ${MODEL}
is "$output" "${verify_begin}.*"
is "$output" ".*${MODEL}" "verify model name"
is "$output" ".*${MODEL_FULLNAME}" "verify model name"
is "$output" ".*--cache-reuse 256" "verify cache-reuse is being set"
assert "$output" !~ ".*--ctx-size" "assert ctx-size is not show by default"
assert "$output" !~ ".*--seed" "assert seed does not show by default"
@@ -41,7 +42,7 @@ EOF
RAMALAMA_CONFIG=/dev/null run_ramalama -q --dryrun run --cache-reuse 512 --seed 9876 -c 4096 --net bridge --name foobar ${MODEL}
is "$output" ".*--network bridge.*" "dryrun correct with --name"
is "$output" ".*${MODEL}" "verify model name"
is "$output" ".*${MODEL_FULLNAME}" "verify model name"
is "$output" ".*--ctx-size 4096" "verify ctx-size is set"
is "$output" ".*--cache-reuse 512" "verify cache-reuse is being set"
is "$output" ".*--temp 0.8" "verify temp is set"

View File

@@ -197,43 +197,44 @@ verify_begin=".*run --rm"
}
@test "ramalama serve --generate=quadlet" {
model="smollm"
model_quant="$model:135m"
quadlet="$model.container"
model_file="smollm-135m-instruct"
model_fullname="smollm-135M-instruct-v0.2-Q8_0-GGUF"
model="smollm:135m"
quadlet="$model_fullname.container"
name=c_$(safename)
run_ramalama pull $model_quant
run_ramalama -q serve --port 1234 --generate=quadlet $model_quant
run_ramalama pull $model
run_ramalama -q serve --port 1234 --generate=quadlet $model
is "$output" "Generating quadlet file: $quadlet" "generate $quadlet"
run cat $quadlet
is "$output" ".*PublishPort=0.0.0.0:1234:1234" "PublishPort should match"
is "$output" ".*Exec=.*llama-server --port 1234 --model .*" "Exec line should be correct"
is "$output" ".*Mount=type=bind,.*$model" "Mount line should be correct"
is "$output" ".*Mount=type=bind,.*$model_file" "Mount line should be correct"
HIP_VISIBLE_DEVICES=99 run_ramalama -q serve --port 1234 --generate=quadlet $model_quant
HIP_VISIBLE_DEVICES=99 run_ramalama -q serve --port 1234 --generate=quadlet $model
is "$output" "Generating quadlet file: $quadlet" "generate $quadlet"
run cat $quadlet
is "$output" ".*Environment=HIP_VISIBLE_DEVICES=99" "Should contain env property"
rm $quadlet
run_ramalama 2 serve --name=${name} --port 1234 --generate=bogus $model_quant
run_ramalama 2 serve --name=${name} --port 1234 --generate=bogus $model
is "$output" ".*error: argument --generate: invalid choice: .*bogus.* (choose from.*quadlet.*kube.*quadlet/kube.*)" "Should fail"
run_ramalama -q serve --port 1234 --generate=quadlet --add-to-unit "section1:key0:value0" $model_quant
run_ramalama -q serve --port 1234 --generate=quadlet --add-to-unit "section1:key0:value0" $model
is "$output" "Generating quadlet file: $quadlet" "generate $quadlet"
run cat $quadlet
is "$output" ".*PublishPort=0.0.0.0:1234:1234" "PublishPort should match"
is "$output" ".*Exec=.*llama-server --port 1234 --model .*" "Exec line should be correct"
is "$output" ".*Mount=type=bind,.*$model" "Mount line should be correct"
is "$output" ".*Mount=type=bind,.*$model_file" "Mount line should be correct"
is "$output" ".*key0=value0.*" "added unit should be correct"
run_ramalama 2 -q serve --port 1234 --generate=quadlet --add-to-unit "section1:key0:" $model_quant
run_ramalama 2 -q serve --port 1234 --generate=quadlet --add-to-unit "section1:key0:" $model
is "$output" ".*error: --add-to-unit parameters must be of the form <section>:<key>:<value>.*"
rm $quadlet
run_ramalama 2 serve --name=${name} --port 1234 --add-to-unit "section1:key0:value0" $model_quant
run_ramalama 2 serve --name=${name} --port 1234 --add-to-unit "section1:key0:value0" $model
is "$output" ".*error: --add-to-unit can only be used with --generate.*"
}
@@ -387,18 +388,17 @@ verify_begin=".*run --rm"
# }
@test "ramalama serve --generate=kube" {
model="smollm"
model_quant="$model:135m"
model="smollm:135m"
name=c_$(safename)
run_ramalama pull $model_quant
run_ramalama serve --name=${name} --port 1234 --generate=kube $model_quant
run_ramalama pull $model
run_ramalama serve --name=${name} --port 1234 --generate=kube $model
is "$output" ".*Generating Kubernetes YAML file: ${name}.yaml" "generate .yaml file"
run cat $name.yaml
is "$output" ".*command: \[\".*serve.*\"\]" "Should command"
is "$output" ".*containerPort: 1234" "Should container container port"
HIP_VISIBLE_DEVICES=99 run_ramalama serve --name=${name} --port 1234 --generate=kube $model_quant
HIP_VISIBLE_DEVICES=99 run_ramalama serve --name=${name} --port 1234 --generate=kube $model
is "$output" ".*Generating Kubernetes YAML file: ${name}.yaml" "generate .yaml file"
run cat $name.yaml
@@ -406,7 +406,7 @@ verify_begin=".*run --rm"
is "$output" ".*name: HIP_VISIBLE_DEVICES" "Should contain env name"
is "$output" ".*value: 99" "Should contain env value"
run_ramalama serve --name=${name} --port 1234 --generate=quadlet/kube $model_quant
run_ramalama serve --name=${name} --port 1234 --generate=quadlet/kube $model
is "$output" ".*Generating Kubernetes YAML file: ${name}.yaml" "generate .yaml file"
is "$output" ".*Generating quadlet file: ${name}.kube" "generate .kube file"
@@ -414,7 +414,7 @@ verify_begin=".*run --rm"
is "$output" ".*command: \[\".*serve.*\"\]" "Should command"
is "$output" ".*containerPort: 1234" "Should container container port"
HIP_VISIBLE_DEVICES=99 run_ramalama serve --name=${name} --port 1234 --generate=quadlet/kube $model_quant
HIP_VISIBLE_DEVICES=99 run_ramalama serve --name=${name} --port 1234 --generate=quadlet/kube $model
is "$output" ".*Generating Kubernetes YAML file: ${name}.yaml" "generate .yaml file"
run cat $name.yaml

View File

@@ -12,8 +12,8 @@ load setup_suite
# bats test_tags=distro-integration
@test "ramalama pull ollama" {
run_ramalama pull tiny
run_ramalama rm tiny
run_ramalama pull ollama://tinyllama
run_ramalama rm ollama://tinyllama
run_ramalama pull https://ollama.com/library/smollm:135m
run_ramalama list
is "$output" ".*https://ollama.com/library/smollm:135m" "image was actually pulled locally"
@@ -36,8 +36,8 @@ load setup_suite
ollama serve &
sleep 3
ollama pull tinyllama
run_ramalama pull tiny
run_ramalama rm tiny
run_ramalama pull ollama://tinyllama
run_ramalama rm ollama://tinyllama
ollama rm tinyllama
ollama pull smollm:135m

View File

@@ -10,8 +10,8 @@ load setup_suite
run_ramalama 22 inspect ${MODEL}
is "$output" "Error: No ref file found for '${MODEL}'. Please pull model."
run_ramalama pull tiny
run_ramalama inspect tiny
run_ramalama pull ollama://tinyllama
run_ramalama inspect ollama://tinyllama
is "${lines[0]}" "tinyllama" "model name"
is "${lines[1]}" " Path: .*store/ollama/library/tinyllama/.*" "model path"
@@ -25,8 +25,8 @@ load setup_suite
# bats test_tags=distro-integration
@test "ramalama inspect GGUF model with --all" {
run_ramalama pull tiny
run_ramalama inspect --all tiny
run_ramalama pull ollama://tinyllama
run_ramalama inspect --all ollama://tinyllama
is "${lines[0]}" "tinyllama" "model name"
is "${lines[1]}" " Path: .*store/ollama/library/tinyllama/.*" "model path"
@@ -40,20 +40,20 @@ load setup_suite
# bats test_tags=distro-integration
@test "ramalama inspect GGUF model with --get" {
run_ramalama pull tiny
run_ramalama pull ollama://tinyllama
run_ramalama inspect --get general.architecture tiny
run_ramalama inspect --get general.architecture ollama://tinyllama
is "$output" "llama"
run_ramalama inspect --get general.name tiny
run_ramalama inspect --get general.name ollama://tinyllama
is "$output" "TinyLlama"
}
# bats test_tags=distro-integration
@test "ramalama inspect GGUF model with --get all" {
run_ramalama pull tiny
run_ramalama pull ollama://tinyllama
run_ramalama inspect --get all tiny
run_ramalama inspect --get all ollama://tinyllama
is "${lines[0]}" "general.architecture: llama" "check for general.architecture"
is "${lines[1]}" "general.file_type: 2" "check for general.file_type"
is "${lines[2]}" "general.name: TinyLlama" "check for general.name"