mirror of
https://github.com/containers/ramalama.git
synced 2026-02-05 06:46:39 +01:00
Use Hugging Face models for tinylama and smollm:135
The chat templates from the ollama models do not work with llama.cpp after converting (successfully) to jinja2. Workaround by creating shortname.conf aliases to equivalent models on Hugging Face. Signed-off-by: Oliver Walsh <owalsh@redhat.com>
This commit is contained in:
@@ -51,9 +51,10 @@
|
||||
"qwen2.5vl:32b" = "hf://ggml-org/Qwen2.5-VL-32B-Instruct-GGUF"
|
||||
"qwen2.5vl:3b" = "hf://ggml-org/Qwen2.5-VL-3B-Instruct-GGUF"
|
||||
"qwen2.5vl:7b" = "hf://ggml-org/Qwen2.5-VL-7B-Instruct-GGUF"
|
||||
"smollm:135m" = "ollama://smollm:135m"
|
||||
"smollm:135m" = "hf://HuggingFaceTB/smollm-135M-instruct-v0.2-Q8_0-GGUF"
|
||||
"smolvlm" = "hf://ggml-org/SmolVLM-500M-Instruct-GGUF"
|
||||
"smolvlm:256m" = "hf://ggml-org/SmolVLM-256M-Instruct-GGUF"
|
||||
"smolvlm:2b" = "hf://ggml-org/SmolVLM-Instruct-GGUF"
|
||||
"smolvlm:500m" = "hf://ggml-org/SmolVLM-500M-Instruct-GGUF"
|
||||
"tiny" = "ollama://tinyllama"
|
||||
"tiny" = "hf://TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
|
||||
"tinyllama" = "hf://TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
load helpers
|
||||
|
||||
MODEL=smollm:135m
|
||||
MODEL_FULLNAME=smollm-135M-instruct-v0.2-Q8_0-GGUF
|
||||
|
||||
@test "ramalama --dryrun run basic output" {
|
||||
image=m_$(safename)
|
||||
@@ -19,7 +20,7 @@ EOF
|
||||
|
||||
run_ramalama -q --dryrun run ${MODEL}
|
||||
is "$output" "${verify_begin}.*"
|
||||
is "$output" ".*${MODEL}" "verify model name"
|
||||
is "$output" ".*${MODEL_FULLNAME}" "verify model name"
|
||||
is "$output" ".*--cache-reuse 256" "verify cache-reuse is being set"
|
||||
assert "$output" !~ ".*--ctx-size" "assert ctx-size is not show by default"
|
||||
assert "$output" !~ ".*--seed" "assert seed does not show by default"
|
||||
@@ -41,7 +42,7 @@ EOF
|
||||
|
||||
RAMALAMA_CONFIG=/dev/null run_ramalama -q --dryrun run --cache-reuse 512 --seed 9876 -c 4096 --net bridge --name foobar ${MODEL}
|
||||
is "$output" ".*--network bridge.*" "dryrun correct with --name"
|
||||
is "$output" ".*${MODEL}" "verify model name"
|
||||
is "$output" ".*${MODEL_FULLNAME}" "verify model name"
|
||||
is "$output" ".*--ctx-size 4096" "verify ctx-size is set"
|
||||
is "$output" ".*--cache-reuse 512" "verify cache-reuse is being set"
|
||||
is "$output" ".*--temp 0.8" "verify temp is set"
|
||||
|
||||
@@ -197,43 +197,44 @@ verify_begin=".*run --rm"
|
||||
}
|
||||
|
||||
@test "ramalama serve --generate=quadlet" {
|
||||
model="smollm"
|
||||
model_quant="$model:135m"
|
||||
quadlet="$model.container"
|
||||
model_file="smollm-135m-instruct"
|
||||
model_fullname="smollm-135M-instruct-v0.2-Q8_0-GGUF"
|
||||
model="smollm:135m"
|
||||
quadlet="$model_fullname.container"
|
||||
name=c_$(safename)
|
||||
run_ramalama pull $model_quant
|
||||
run_ramalama -q serve --port 1234 --generate=quadlet $model_quant
|
||||
run_ramalama pull $model
|
||||
run_ramalama -q serve --port 1234 --generate=quadlet $model
|
||||
is "$output" "Generating quadlet file: $quadlet" "generate $quadlet"
|
||||
|
||||
run cat $quadlet
|
||||
is "$output" ".*PublishPort=0.0.0.0:1234:1234" "PublishPort should match"
|
||||
is "$output" ".*Exec=.*llama-server --port 1234 --model .*" "Exec line should be correct"
|
||||
is "$output" ".*Mount=type=bind,.*$model" "Mount line should be correct"
|
||||
is "$output" ".*Mount=type=bind,.*$model_file" "Mount line should be correct"
|
||||
|
||||
HIP_VISIBLE_DEVICES=99 run_ramalama -q serve --port 1234 --generate=quadlet $model_quant
|
||||
HIP_VISIBLE_DEVICES=99 run_ramalama -q serve --port 1234 --generate=quadlet $model
|
||||
is "$output" "Generating quadlet file: $quadlet" "generate $quadlet"
|
||||
|
||||
run cat $quadlet
|
||||
is "$output" ".*Environment=HIP_VISIBLE_DEVICES=99" "Should contain env property"
|
||||
|
||||
rm $quadlet
|
||||
run_ramalama 2 serve --name=${name} --port 1234 --generate=bogus $model_quant
|
||||
run_ramalama 2 serve --name=${name} --port 1234 --generate=bogus $model
|
||||
is "$output" ".*error: argument --generate: invalid choice: .*bogus.* (choose from.*quadlet.*kube.*quadlet/kube.*)" "Should fail"
|
||||
|
||||
run_ramalama -q serve --port 1234 --generate=quadlet --add-to-unit "section1:key0:value0" $model_quant
|
||||
run_ramalama -q serve --port 1234 --generate=quadlet --add-to-unit "section1:key0:value0" $model
|
||||
is "$output" "Generating quadlet file: $quadlet" "generate $quadlet"
|
||||
|
||||
run cat $quadlet
|
||||
is "$output" ".*PublishPort=0.0.0.0:1234:1234" "PublishPort should match"
|
||||
is "$output" ".*Exec=.*llama-server --port 1234 --model .*" "Exec line should be correct"
|
||||
is "$output" ".*Mount=type=bind,.*$model" "Mount line should be correct"
|
||||
is "$output" ".*Mount=type=bind,.*$model_file" "Mount line should be correct"
|
||||
is "$output" ".*key0=value0.*" "added unit should be correct"
|
||||
|
||||
run_ramalama 2 -q serve --port 1234 --generate=quadlet --add-to-unit "section1:key0:" $model_quant
|
||||
run_ramalama 2 -q serve --port 1234 --generate=quadlet --add-to-unit "section1:key0:" $model
|
||||
is "$output" ".*error: --add-to-unit parameters must be of the form <section>:<key>:<value>.*"
|
||||
|
||||
rm $quadlet
|
||||
run_ramalama 2 serve --name=${name} --port 1234 --add-to-unit "section1:key0:value0" $model_quant
|
||||
run_ramalama 2 serve --name=${name} --port 1234 --add-to-unit "section1:key0:value0" $model
|
||||
is "$output" ".*error: --add-to-unit can only be used with --generate.*"
|
||||
}
|
||||
|
||||
@@ -387,18 +388,17 @@ verify_begin=".*run --rm"
|
||||
# }
|
||||
|
||||
@test "ramalama serve --generate=kube" {
|
||||
model="smollm"
|
||||
model_quant="$model:135m"
|
||||
model="smollm:135m"
|
||||
name=c_$(safename)
|
||||
run_ramalama pull $model_quant
|
||||
run_ramalama serve --name=${name} --port 1234 --generate=kube $model_quant
|
||||
run_ramalama pull $model
|
||||
run_ramalama serve --name=${name} --port 1234 --generate=kube $model
|
||||
is "$output" ".*Generating Kubernetes YAML file: ${name}.yaml" "generate .yaml file"
|
||||
|
||||
run cat $name.yaml
|
||||
is "$output" ".*command: \[\".*serve.*\"\]" "Should command"
|
||||
is "$output" ".*containerPort: 1234" "Should container container port"
|
||||
|
||||
HIP_VISIBLE_DEVICES=99 run_ramalama serve --name=${name} --port 1234 --generate=kube $model_quant
|
||||
HIP_VISIBLE_DEVICES=99 run_ramalama serve --name=${name} --port 1234 --generate=kube $model
|
||||
is "$output" ".*Generating Kubernetes YAML file: ${name}.yaml" "generate .yaml file"
|
||||
|
||||
run cat $name.yaml
|
||||
@@ -406,7 +406,7 @@ verify_begin=".*run --rm"
|
||||
is "$output" ".*name: HIP_VISIBLE_DEVICES" "Should contain env name"
|
||||
is "$output" ".*value: 99" "Should contain env value"
|
||||
|
||||
run_ramalama serve --name=${name} --port 1234 --generate=quadlet/kube $model_quant
|
||||
run_ramalama serve --name=${name} --port 1234 --generate=quadlet/kube $model
|
||||
is "$output" ".*Generating Kubernetes YAML file: ${name}.yaml" "generate .yaml file"
|
||||
is "$output" ".*Generating quadlet file: ${name}.kube" "generate .kube file"
|
||||
|
||||
@@ -414,7 +414,7 @@ verify_begin=".*run --rm"
|
||||
is "$output" ".*command: \[\".*serve.*\"\]" "Should command"
|
||||
is "$output" ".*containerPort: 1234" "Should container container port"
|
||||
|
||||
HIP_VISIBLE_DEVICES=99 run_ramalama serve --name=${name} --port 1234 --generate=quadlet/kube $model_quant
|
||||
HIP_VISIBLE_DEVICES=99 run_ramalama serve --name=${name} --port 1234 --generate=quadlet/kube $model
|
||||
is "$output" ".*Generating Kubernetes YAML file: ${name}.yaml" "generate .yaml file"
|
||||
|
||||
run cat $name.yaml
|
||||
|
||||
@@ -12,8 +12,8 @@ load setup_suite
|
||||
|
||||
# bats test_tags=distro-integration
|
||||
@test "ramalama pull ollama" {
|
||||
run_ramalama pull tiny
|
||||
run_ramalama rm tiny
|
||||
run_ramalama pull ollama://tinyllama
|
||||
run_ramalama rm ollama://tinyllama
|
||||
run_ramalama pull https://ollama.com/library/smollm:135m
|
||||
run_ramalama list
|
||||
is "$output" ".*https://ollama.com/library/smollm:135m" "image was actually pulled locally"
|
||||
@@ -36,8 +36,8 @@ load setup_suite
|
||||
ollama serve &
|
||||
sleep 3
|
||||
ollama pull tinyllama
|
||||
run_ramalama pull tiny
|
||||
run_ramalama rm tiny
|
||||
run_ramalama pull ollama://tinyllama
|
||||
run_ramalama rm ollama://tinyllama
|
||||
ollama rm tinyllama
|
||||
|
||||
ollama pull smollm:135m
|
||||
|
||||
@@ -10,8 +10,8 @@ load setup_suite
|
||||
run_ramalama 22 inspect ${MODEL}
|
||||
is "$output" "Error: No ref file found for '${MODEL}'. Please pull model."
|
||||
|
||||
run_ramalama pull tiny
|
||||
run_ramalama inspect tiny
|
||||
run_ramalama pull ollama://tinyllama
|
||||
run_ramalama inspect ollama://tinyllama
|
||||
|
||||
is "${lines[0]}" "tinyllama" "model name"
|
||||
is "${lines[1]}" " Path: .*store/ollama/library/tinyllama/.*" "model path"
|
||||
@@ -25,8 +25,8 @@ load setup_suite
|
||||
|
||||
# bats test_tags=distro-integration
|
||||
@test "ramalama inspect GGUF model with --all" {
|
||||
run_ramalama pull tiny
|
||||
run_ramalama inspect --all tiny
|
||||
run_ramalama pull ollama://tinyllama
|
||||
run_ramalama inspect --all ollama://tinyllama
|
||||
|
||||
is "${lines[0]}" "tinyllama" "model name"
|
||||
is "${lines[1]}" " Path: .*store/ollama/library/tinyllama/.*" "model path"
|
||||
@@ -40,20 +40,20 @@ load setup_suite
|
||||
|
||||
# bats test_tags=distro-integration
|
||||
@test "ramalama inspect GGUF model with --get" {
|
||||
run_ramalama pull tiny
|
||||
run_ramalama pull ollama://tinyllama
|
||||
|
||||
run_ramalama inspect --get general.architecture tiny
|
||||
run_ramalama inspect --get general.architecture ollama://tinyllama
|
||||
is "$output" "llama"
|
||||
|
||||
run_ramalama inspect --get general.name tiny
|
||||
run_ramalama inspect --get general.name ollama://tinyllama
|
||||
is "$output" "TinyLlama"
|
||||
}
|
||||
|
||||
# bats test_tags=distro-integration
|
||||
@test "ramalama inspect GGUF model with --get all" {
|
||||
run_ramalama pull tiny
|
||||
run_ramalama pull ollama://tinyllama
|
||||
|
||||
run_ramalama inspect --get all tiny
|
||||
run_ramalama inspect --get all ollama://tinyllama
|
||||
is "${lines[0]}" "general.architecture: llama" "check for general.architecture"
|
||||
is "${lines[1]}" "general.file_type: 2" "check for general.file_type"
|
||||
is "${lines[2]}" "general.name: TinyLlama" "check for general.name"
|
||||
|
||||
Reference in New Issue
Block a user