Use Hugging Face models for tinylama and smollm:135

The chat templates from the ollama models do not work with llama.cpp after converting (successfully) to jinja2. Workaround by creating shortname.conf aliases to equivalent models on Hugging Face. Signed-off-by: Oliver Walsh <owalsh@redhat.com>
2026-02-05 06:46:39 +01:00 · 2025-09-09 18:27:29 +01:00
parent 6462424aab
commit 912bf92d6f
5 changed files with 38 additions and 36 deletions
--- a/shortnames/shortnames.conf
+++ b/shortnames/shortnames.conf
@@ -51,9 +51,10 @@
  "qwen2.5vl:32b" = "hf://ggml-org/Qwen2.5-VL-32B-Instruct-GGUF"
  "qwen2.5vl:3b" = "hf://ggml-org/Qwen2.5-VL-3B-Instruct-GGUF"
  "qwen2.5vl:7b" = "hf://ggml-org/Qwen2.5-VL-7B-Instruct-GGUF"
-  "smollm:135m" = "ollama://smollm:135m"
+  "smollm:135m" = "hf://HuggingFaceTB/smollm-135M-instruct-v0.2-Q8_0-GGUF"
  "smolvlm" = "hf://ggml-org/SmolVLM-500M-Instruct-GGUF"
  "smolvlm:256m" = "hf://ggml-org/SmolVLM-256M-Instruct-GGUF"
  "smolvlm:2b" = "hf://ggml-org/SmolVLM-Instruct-GGUF"
  "smolvlm:500m" = "hf://ggml-org/SmolVLM-500M-Instruct-GGUF"
-  "tiny" = "ollama://tinyllama"
+  "tiny" = "hf://TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
+  "tinyllama" = "hf://TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
--- a/test/system/030-run.bats
+++ b/test/system/030-run.bats
@@ -3,6 +3,7 @@
 load helpers

 MODEL=smollm:135m
+MODEL_FULLNAME=smollm-135M-instruct-v0.2-Q8_0-GGUF

@test "ramalama --dryrun run basic output" {
    image=m_$(safename)
@@ -19,7 +20,7 @@ EOF

 	run_ramalama -q --dryrun run ${MODEL}
 	is "$output" "${verify_begin}.*"
-	is "$output" ".*${MODEL}" "verify model name"
+	is "$output" ".*${MODEL_FULLNAME}" "verify model name"
 	is "$output" ".*--cache-reuse 256" "verify cache-reuse is being set"
 	assert "$output" !~ ".*--ctx-size" "assert ctx-size is not show by default"
 	assert "$output" !~ ".*--seed" "assert seed does not show by default"
@@ -41,7 +42,7 @@ EOF

 	RAMALAMA_CONFIG=/dev/null run_ramalama -q --dryrun run --cache-reuse 512 --seed 9876 -c 4096 --net bridge --name foobar ${MODEL}
 	is "$output" ".*--network bridge.*" "dryrun correct with --name"
-	is "$output" ".*${MODEL}" "verify model name"
+	is "$output" ".*${MODEL_FULLNAME}" "verify model name"
 	is "$output" ".*--ctx-size 4096" "verify ctx-size is set"
 	is "$output" ".*--cache-reuse 512" "verify cache-reuse is being set"
 	is "$output" ".*--temp 0.8" "verify temp is set"
--- a/test/system/040-serve.bats
+++ b/test/system/040-serve.bats
@@ -197,43 +197,44 @@ verify_begin=".*run --rm"
 }

@test "ramalama serve --generate=quadlet" {
-    model="smollm"
-    model_quant="$model:135m"
-    quadlet="$model.container"
+    model_file="smollm-135m-instruct"
+    model_fullname="smollm-135M-instruct-v0.2-Q8_0-GGUF"
+    model="smollm:135m"
+    quadlet="$model_fullname.container"
    name=c_$(safename)
-    run_ramalama pull $model_quant
-    run_ramalama -q serve --port 1234 --generate=quadlet $model_quant
+    run_ramalama pull $model
+    run_ramalama -q serve --port 1234 --generate=quadlet $model
    is "$output" "Generating quadlet file: $quadlet" "generate $quadlet"

    run cat $quadlet
    is "$output" ".*PublishPort=0.0.0.0:1234:1234" "PublishPort should match"
    is "$output" ".*Exec=.*llama-server --port 1234 --model .*" "Exec line should be correct"
-    is "$output" ".*Mount=type=bind,.*$model" "Mount line should be correct"
+    is "$output" ".*Mount=type=bind,.*$model_file" "Mount line should be correct"

-    HIP_VISIBLE_DEVICES=99 run_ramalama -q serve --port 1234 --generate=quadlet $model_quant
+    HIP_VISIBLE_DEVICES=99 run_ramalama -q serve --port 1234 --generate=quadlet $model
    is "$output" "Generating quadlet file: $quadlet" "generate $quadlet"

    run cat $quadlet
    is "$output" ".*Environment=HIP_VISIBLE_DEVICES=99" "Should contain env property"

    rm $quadlet
-    run_ramalama 2 serve --name=${name} --port 1234 --generate=bogus $model_quant
+    run_ramalama 2 serve --name=${name} --port 1234 --generate=bogus $model
    is "$output" ".*error: argument --generate: invalid choice: .*bogus.* (choose from.*quadlet.*kube.*quadlet/kube.*)" "Should fail"

-    run_ramalama -q serve --port 1234 --generate=quadlet --add-to-unit "section1:key0:value0" $model_quant
+    run_ramalama -q serve --port 1234 --generate=quadlet --add-to-unit "section1:key0:value0" $model
    is "$output" "Generating quadlet file: $quadlet" "generate $quadlet"

    run cat $quadlet
    is "$output" ".*PublishPort=0.0.0.0:1234:1234" "PublishPort should match"
    is "$output" ".*Exec=.*llama-server --port 1234 --model .*" "Exec line should be correct"
-    is "$output" ".*Mount=type=bind,.*$model" "Mount line should be correct"
+    is "$output" ".*Mount=type=bind,.*$model_file" "Mount line should be correct"
    is "$output" ".*key0=value0.*" "added unit should be correct"

-    run_ramalama 2 -q serve --port 1234 --generate=quadlet --add-to-unit "section1:key0:" $model_quant
+    run_ramalama 2 -q serve --port 1234 --generate=quadlet --add-to-unit "section1:key0:" $model
    is "$output" ".*error: --add-to-unit parameters must be of the form <section>:<key>:<value>.*"

    rm $quadlet
-    run_ramalama 2 serve --name=${name} --port 1234 --add-to-unit "section1:key0:value0"  $model_quant
+    run_ramalama 2 serve --name=${name} --port 1234 --add-to-unit "section1:key0:value0"  $model
    is "$output" ".*error: --add-to-unit can only be used with --generate.*"
 }

@@ -387,18 +388,17 @@ verify_begin=".*run --rm"
 # }

@test "ramalama serve --generate=kube" {
-    model="smollm"
-    model_quant="$model:135m"
+    model="smollm:135m"
    name=c_$(safename)
-    run_ramalama pull $model_quant
-    run_ramalama serve --name=${name} --port 1234 --generate=kube $model_quant
+    run_ramalama pull $model
+    run_ramalama serve --name=${name} --port 1234 --generate=kube $model
    is "$output" ".*Generating Kubernetes YAML file: ${name}.yaml" "generate .yaml file"

    run cat $name.yaml
    is "$output" ".*command: \[\".*serve.*\"\]" "Should command"
    is "$output" ".*containerPort: 1234" "Should container container port"

-    HIP_VISIBLE_DEVICES=99 run_ramalama serve --name=${name} --port 1234 --generate=kube $model_quant
+    HIP_VISIBLE_DEVICES=99 run_ramalama serve --name=${name} --port 1234 --generate=kube $model
    is "$output" ".*Generating Kubernetes YAML file: ${name}.yaml" "generate .yaml file"

    run cat $name.yaml
@@ -406,7 +406,7 @@ verify_begin=".*run --rm"
    is "$output" ".*name: HIP_VISIBLE_DEVICES" "Should contain env name"
    is "$output" ".*value: 99" "Should contain env value"

-    run_ramalama serve --name=${name} --port 1234 --generate=quadlet/kube $model_quant
+    run_ramalama serve --name=${name} --port 1234 --generate=quadlet/kube $model
    is "$output" ".*Generating Kubernetes YAML file: ${name}.yaml" "generate .yaml file"
    is "$output" ".*Generating quadlet file: ${name}.kube" "generate .kube file"

@@ -414,7 +414,7 @@ verify_begin=".*run --rm"
    is "$output" ".*command: \[\".*serve.*\"\]" "Should command"
    is "$output" ".*containerPort: 1234" "Should container container port"

-    HIP_VISIBLE_DEVICES=99 run_ramalama serve --name=${name} --port 1234 --generate=quadlet/kube $model_quant
+    HIP_VISIBLE_DEVICES=99 run_ramalama serve --name=${name} --port 1234 --generate=quadlet/kube $model
    is "$output" ".*Generating Kubernetes YAML file: ${name}.yaml" "generate .yaml file"

    run cat $name.yaml
--- a/test/system/050-pull.bats
+++ b/test/system/050-pull.bats
@@ -12,8 +12,8 @@ load setup_suite

 # bats test_tags=distro-integration
@test "ramalama pull ollama" {
-    run_ramalama pull tiny
-    run_ramalama rm tiny
+    run_ramalama pull ollama://tinyllama
+    run_ramalama rm ollama://tinyllama
    run_ramalama pull https://ollama.com/library/smollm:135m
    run_ramalama list
    is "$output" ".*https://ollama.com/library/smollm:135m" "image was actually pulled locally"
@@ -36,8 +36,8 @@ load setup_suite
    ollama serve &
    sleep 3
    ollama pull tinyllama
-    run_ramalama pull tiny
-    run_ramalama rm tiny
+    run_ramalama pull ollama://tinyllama
+    run_ramalama rm ollama://tinyllama
    ollama rm tinyllama

    ollama pull smollm:135m
--- a/test/system/100-inspect.bats
+++ b/test/system/100-inspect.bats
@@ -10,8 +10,8 @@ load setup_suite
    run_ramalama 22 inspect ${MODEL}
    is "$output" "Error: No ref file found for '${MODEL}'. Please pull model."
    
-    run_ramalama pull tiny
-    run_ramalama inspect tiny
+    run_ramalama pull ollama://tinyllama
+    run_ramalama inspect ollama://tinyllama

    is "${lines[0]}" "tinyllama" "model name"
    is "${lines[1]}" "   Path: .*store/ollama/library/tinyllama/.*" "model path"
@@ -25,8 +25,8 @@ load setup_suite

 # bats test_tags=distro-integration
@test "ramalama inspect GGUF model with --all" {
-    run_ramalama pull tiny
-    run_ramalama inspect --all tiny
+    run_ramalama pull ollama://tinyllama
+    run_ramalama inspect --all ollama://tinyllama

    is "${lines[0]}" "tinyllama" "model name"
    is "${lines[1]}" "   Path: .*store/ollama/library/tinyllama/.*" "model path"
@@ -40,20 +40,20 @@ load setup_suite

 # bats test_tags=distro-integration
@test "ramalama inspect GGUF model with --get" {
-    run_ramalama pull tiny
+    run_ramalama pull ollama://tinyllama

-    run_ramalama inspect --get general.architecture tiny
+    run_ramalama inspect --get general.architecture ollama://tinyllama
    is "$output" "llama"

-    run_ramalama inspect --get general.name tiny
+    run_ramalama inspect --get general.name ollama://tinyllama
    is "$output" "TinyLlama"
 }

 # bats test_tags=distro-integration
@test "ramalama inspect GGUF model with --get all" {
-    run_ramalama pull tiny
+    run_ramalama pull ollama://tinyllama

-    run_ramalama inspect --get all tiny
+    run_ramalama inspect --get all ollama://tinyllama
    is "${lines[0]}" "general.architecture: llama" "check for general.architecture"
    is "${lines[1]}" "general.file_type: 2" "check for general.file_type"
    is "${lines[2]}" "general.name: TinyLlama" "check for general.name"