1
0
mirror of https://github.com/containers/ramalama.git synced 2026-02-05 06:46:39 +01:00

Added inference specification files to info command

Signed-off-by: Michael Engel <mengel@redhat.com>
This commit is contained in:
Michael Engel
2025-10-21 14:35:37 +02:00
parent 362cddb0c0
commit 6e3088357d
5 changed files with 159 additions and 80 deletions

128
README.md
View File

@@ -361,7 +361,17 @@ $ cat /usr/share/ramalama/shortnames.conf
"Name": ""
},
"Image": "quay.io/ramalama/cuda:0.7",
"Runtime": "llama.cpp",
"Inference": {
"Default": "llama.cpp",
"Engines": {
"llama.cpp": "/usr/share/ramalama/inference-spec/engines/llama.cpp.yaml",
"mlx": "/usr/share/ramalama/inference-spec/engines/mlx.yaml",
"vllm": "/usr/share/ramalama/inference-spec/engines/vllm.yaml"
},
"Schema": {
"1-0-0": "/usr/share/ramalama/inference-spec/schema/schema.1-0-0.json"
}
},
"Shortnames": {
"Names": {
"cerebrum": "huggingface://froggeric/Cerebrum-1.0-7b-GGUF/Cerebrum-1.0-7b-Q4_KS.gguf",
@@ -412,7 +422,7 @@ $ cat /usr/share/ramalama/shortnames.conf
"/home/dwalsh/.config/ramalama/shortnames.conf",
]
},
"Store": "/home/dwalsh/.local/share/ramalama",
"Store": "/usr/share/ramalama",
"UseContainer": true,
"Version": "0.7.5"
}
@@ -578,7 +588,7 @@ $ cat /usr/share/ramalama/shortnames.conf
},
"graphDriverName": "overlay",
"graphOptions": {},
"graphRoot": "/home/dwalsh/.local/share/containers/storage",
"graphRoot": "/usr/share/containers/storage",
"graphRootAllocated": 2046687182848,
"graphRootUsed": 399990419456,
"graphStatus": {
@@ -595,7 +605,7 @@ $ cat /usr/share/ramalama/shortnames.conf
},
"runRoot": "/run/user/3267/containers",
"transientStore": false,
"volumePath": "/home/dwalsh/.local/share/containers/storage/volumes"
"volumePath": "/usr/share/containers/storage/volumes"
},
"version": {
"APIVersion": "5.4.2",
@@ -612,58 +622,68 @@ $ cat /usr/share/ramalama/shortnames.conf
"Name": "podman"
},
"Image": "quay.io/ramalama/cuda:0.7",
"Runtime": "llama.cpp",
"Inference": {
"Default": "llama.cpp",
"Engines": {
"llama.cpp": "/usr/share/ramalama/inference-spec/engines/llama.cpp.yaml",
"mlx": "/usr/share/ramalama/inference-spec/engines/mlx.yaml",
"vllm": "/usr/share/ramalama/inference-spec/engines/vllm.yaml"
},
"Schema": {
"1-0-0": "/usr/share/ramalama/inference-spec/schema/schema.1-0-0.json"
}
},
"Shortnames": {
"Names": {
"cerebrum": "huggingface://froggeric/Cerebrum-1.0-7b-GGUF/Cerebrum-1.0-7b-Q4_KS.gguf",
"deepseek": "ollama://deepseek-r1",
"dragon": "huggingface://llmware/dragon-mistral-7b-v0/dragon-mistral-7b-q4_k_m.gguf",
"gemma3": "hf://bartowski/google_gemma-3-4b-it-GGUF/google_gemma-3-4b-it-IQ2_M.gguf",
"gemma3:12b": "hf://bartowski/google_gemma-3-12b-it-GGUF/google_gemma-3-12b-it-IQ2_M.gguf",
"gemma3:1b": "hf://bartowski/google_gemma-3-1b-it-GGUF/google_gemma-3-1b-it-IQ2_M.gguf",
"gemma3:27b": "hf://bartowski/google_gemma-3-27b-it-GGUF/google_gemma-3-27b-it-IQ2_M.gguf",
"gemma3:4b": "hf://bartowski/google_gemma-3-4b-it-GGUF/google_gemma-3-4b-it-IQ2_M.gguf",
"granite": "ollama://granite3.1-dense",
"granite-code": "hf://ibm-granite/granite-3b-code-base-2k-GGUF/granite-3b-code-base.Q4_K_M.gguf",
"granite-code:20b": "hf://ibm-granite/granite-20b-code-base-8k-GGUF/granite-20b-code-base.Q4_K_M.gguf",
"granite-code:34b": "hf://ibm-granite/granite-34b-code-base-8k-GGUF/granite-34b-code-base.Q4_K_M.gguf",
"granite-code:3b": "hf://ibm-granite/granite-3b-code-base-2k-GGUF/granite-3b-code-base.Q4_K_M.gguf",
"granite-code:8b": "hf://ibm-granite/granite-8b-code-base-4k-GGUF/granite-8b-code-base.Q4_K_M.gguf",
"granite-lab-7b": "huggingface://instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf",
"granite-lab-8b": "huggingface://ibm-granite/granite-8b-code-base-GGUF/granite-8b-code-base.Q4_K_M.gguf",
"granite-lab:7b": "huggingface://instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf",
"granite:2b": "ollama://granite3.1-dense:2b",
"granite:7b": "huggingface://instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf",
"granite:8b": "ollama://granite3.1-dense:8b",
"hermes": "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf",
"ibm/granite": "ollama://granite3.1-dense:8b",
"ibm/granite:2b": "ollama://granite3.1-dense:2b",
"ibm/granite:7b": "huggingface://instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf",
"ibm/granite:8b": "ollama://granite3.1-dense:8b",
"merlinite": "huggingface://instructlab/merlinite-7b-lab-GGUF/merlinite-7b-lab-Q4_K_M.gguf",
"merlinite-lab-7b": "huggingface://instructlab/merlinite-7b-lab-GGUF/merlinite-7b-lab-Q4_K_M.gguf",
"merlinite-lab:7b": "huggingface://instructlab/merlinite-7b-lab-GGUF/merlinite-7b-lab-Q4_K_M.gguf",
"merlinite:7b": "huggingface://instructlab/merlinite-7b-lab-GGUF/merlinite-7b-lab-Q4_K_M.gguf",
"mistral": "huggingface://TheBloke/Mistral-7B-Instruct-v0.2-GGUF/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
"mistral:7b": "huggingface://TheBloke/Mistral-7B-Instruct-v0.2-GGUF/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
"mistral:7b-v1": "huggingface://TheBloke/Mistral-7B-Instruct-v0.1-GGUF/mistral-7b-instruct-v0.1.Q5_K_M.gguf",
"mistral:7b-v2": "huggingface://TheBloke/Mistral-7B-Instruct-v0.2-GGUF/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
"mistral:7b-v3": "huggingface://MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF/Mistral-7B-Instruct-v0.3.Q4_K_M.gguf",
"mistral_code_16k": "huggingface://TheBloke/Mistral-7B-Code-16K-qlora-GGUF/mistral-7b-code-16k-qlora.Q4_K_M.gguf",
"mistral_codealpaca": "huggingface://TheBloke/Mistral-7B-codealpaca-lora-GGUF/mistral-7b-codealpaca-lora.Q4_K_M.gguf",
"mixtao": "huggingface://MaziyarPanahi/MixTAO-7Bx2-MoE-Instruct-v7.0-GGUF/MixTAO-7Bx2-MoE-Instruct-v7.0.Q4_K_M.gguf",
"openchat": "huggingface://TheBloke/openchat-3.5-0106-GGUF/openchat-3.5-0106.Q4_K_M.gguf",
"openorca": "huggingface://TheBloke/Mistral-7B-OpenOrca-GGUF/mistral-7b-openorca.Q4_K_M.gguf",
"phi2": "huggingface://MaziyarPanahi/phi-2-GGUF/phi-2.Q4_K_M.gguf",
"smollm:135m": "ollama://smollm:135m",
"tiny": "ollama://tinyllama"
"Names": {
"cerebrum": "huggingface://froggeric/Cerebrum-1.0-7b-GGUF/Cerebrum-1.0-7b-Q4_KS.gguf",
"deepseek": "ollama://deepseek-r1",
"dragon": "huggingface://llmware/dragon-mistral-7b-v0/dragon-mistral-7b-q4_k_m.gguf",
"gemma3": "hf://bartowski/google_gemma-3-4b-it-GGUF/google_gemma-3-4b-it-IQ2_M.gguf",
"gemma3:12b": "hf://bartowski/google_gemma-3-12b-it-GGUF/google_gemma-3-12b-it-IQ2_M.gguf",
"gemma3:1b": "hf://bartowski/google_gemma-3-1b-it-GGUF/google_gemma-3-1b-it-IQ2_M.gguf",
"gemma3:27b": "hf://bartowski/google_gemma-3-27b-it-GGUF/google_gemma-3-27b-it-IQ2_M.gguf",
"gemma3:4b": "hf://bartowski/google_gemma-3-4b-it-GGUF/google_gemma-3-4b-it-IQ2_M.gguf",
"granite": "ollama://granite3.1-dense",
"granite-code": "hf://ibm-granite/granite-3b-code-base-2k-GGUF/granite-3b-code-base.Q4_K_M.gguf",
"granite-code:20b": "hf://ibm-granite/granite-20b-code-base-8k-GGUF/granite-20b-code-base.Q4_K_M.gguf",
"granite-code:34b": "hf://ibm-granite/granite-34b-code-base-8k-GGUF/granite-34b-code-base.Q4_K_M.gguf",
"granite-code:3b": "hf://ibm-granite/granite-3b-code-base-2k-GGUF/granite-3b-code-base.Q4_K_M.gguf",
"granite-code:8b": "hf://ibm-granite/granite-8b-code-base-4k-GGUF/granite-8b-code-base.Q4_K_M.gguf",
"granite-lab-7b": "huggingface://instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf",
"granite-lab-8b": "huggingface://ibm-granite/granite-8b-code-base-GGUF/granite-8b-code-base.Q4_K_M.gguf",
"granite-lab:7b": "huggingface://instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf",
"granite:2b": "ollama://granite3.1-dense:2b",
"granite:7b": "huggingface://instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf",
"granite:8b": "ollama://granite3.1-dense:8b",
"hermes": "huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf",
"ibm/granite": "ollama://granite3.1-dense:8b",
"ibm/granite:2b": "ollama://granite3.1-dense:2b",
"ibm/granite:7b": "huggingface://instructlab/granite-7b-lab-GGUF/granite-7b-lab-Q4_K_M.gguf",
"ibm/granite:8b": "ollama://granite3.1-dense:8b",
"merlinite": "huggingface://instructlab/merlinite-7b-lab-GGUF/merlinite-7b-lab-Q4_K_M.gguf",
"merlinite-lab-7b": "huggingface://instructlab/merlinite-7b-lab-GGUF/merlinite-7b-lab-Q4_K_M.gguf",
"merlinite-lab:7b": "huggingface://instructlab/merlinite-7b-lab-GGUF/merlinite-7b-lab-Q4_K_M.gguf",
"merlinite:7b": "huggingface://instructlab/merlinite-7b-lab-GGUF/merlinite-7b-lab-Q4_K_M.gguf",
"mistral": "huggingface://TheBloke/Mistral-7B-Instruct-v0.2-GGUF/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
"mistral:7b": "huggingface://TheBloke/Mistral-7B-Instruct-v0.2-GGUF/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
"mistral:7b-v1": "huggingface://TheBloke/Mistral-7B-Instruct-v0.1-GGUF/mistral-7b-instruct-v0.1.Q5_K_M.gguf",
"mistral:7b-v2": "huggingface://TheBloke/Mistral-7B-Instruct-v0.2-GGUF/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
"mistral:7b-v3": "huggingface://MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF/Mistral-7B-Instruct-v0.3.Q4_K_M.gguf",
"mistral_code_16k": "huggingface://TheBloke/Mistral-7B-Code-16K-qlora-GGUF/mistral-7b-code-16k-qlora.Q4_K_M.gguf",
"mistral_codealpaca": "huggingface://TheBloke/Mistral-7B-codealpaca-lora-GGUF/mistral-7b-codealpaca-lora.Q4_K_M.gguf",
"mixtao": "huggingface://MaziyarPanahi/MixTAO-7Bx2-MoE-Instruct-v7.0-GGUF/MixTAO-7Bx2-MoE-Instruct-v7.0.Q4_K_M.gguf",
"openchat": "huggingface://TheBloke/openchat-3.5-0106-GGUF/openchat-3.5-0106.Q4_K_M.gguf",
"openorca": "huggingface://TheBloke/Mistral-7B-OpenOrca-GGUF/mistral-7b-openorca.Q4_K_M.gguf",
"phi2": "huggingface://MaziyarPanahi/phi-2-GGUF/phi-2.Q4_K_M.gguf",
"smollm:135m": "ollama://smollm:135m",
"tiny": "ollama://tinyllama"
},
"Files": [
"/usr/share/ramalama/shortnames.conf",
"/home/dwalsh/.config/ramalama/shortnames.conf",
]
},
"Files": [
"/usr/share/ramalama/shortnames.conf",
"/home/dwalsh/.config/ramalama/shortnames.conf",
]
},
"Store": "/home/dwalsh/.local/share/ramalama",
"Store": "/usr/share/ramalama",
"UseContainer": true,
"Version": "0.7.5"
}

View File

@@ -17,14 +17,24 @@ show this help message and exit
## FIELDS
The `Accelerator` field indicates the accelerator type for the machine.
The `Config` field shows the list of paths to RamaLama configuration files used.
The `Engine` field indicates the OCI container engine used to launch the container in which to run the AI Model
The `Image` field indicates the default container image in which to run the AI Model
The `Runtime` field indicates which backend engine is used to execute the AI model:
The `Inference` field lists the currently used inference engine as well as a list of available engine specification and schema files used for model inference.
For example:
- `llama.cpp`: Uses the llama.cpp library for model execution
- `vllm`: Uses the vLLM library for model execution
- `llama.cpp`
- `vllm`
- `mlx`
The `Selinux` field indicates if SELinux is activated or not.
The `Shortnames` field shows the used list of configuration files specifying AI Model short names as well as the merged list of shortnames.
The `Store` field indicates the directory path where RamaLama stores its persistent data, including downloaded models, configuration files, and cached data. By default, this is located in the user's local share directory.
@@ -43,7 +53,17 @@ $ ramalama info
"Name": ""
},
"Image": "quay.io/ramalama/cuda:0.7",
"Runtime": "llama.cpp",
"Inference": {
"Default": "llama.cpp",
"Engines": {
"llama.cpp": "/usr/share/ramalama/inference-spec/engines/llama.cpp.yaml",
"mlx": "/usr/share/ramalama/inference-spec/engines/mlx.yaml",
"vllm": "/usr/share/ramalama/inference-spec/engines/vllm.yaml"
},
"Schema": {
"1-0-0": "/usr/share/ramalama/inference-spec/schema/schema.1-0-0.json"
}
},
"Shortnames": {
"Names": {
"cerebrum": "huggingface://froggeric/Cerebrum-1.0-7b-GGUF/Cerebrum-1.0-7b-Q4_KS.gguf",
@@ -94,7 +114,7 @@ $ ramalama info
"/home/dwalsh/.config/ramalama/shortnames.conf",
]
},
"Store": "/home/dwalsh/.local/share/ramalama",
"Store": "/usr/share/ramalama",
"UseContainer": true,
"Version": "0.7.5"
}
@@ -251,7 +271,7 @@ $ ramalama info
},
"graphDriverName": "overlay",
"graphOptions": {},
"graphRoot": "/home/dwalsh/.local/share/containers/storage",
"graphRoot": "/usr/share/containers/storage",
"graphRootAllocated": 2046687182848,
"graphRootUsed": 399990419456,
"graphStatus": {
@@ -268,7 +288,7 @@ $ ramalama info
},
"runRoot": "/run/user/3267/containers",
"transientStore": false,
"volumePath": "/home/dwalsh/.local/share/containers/storage/volumes"
"volumePath": "/usr/share/containers/storage/volumes"
},
"version": {
"APIVersion": "5.4.2",
@@ -285,7 +305,17 @@ $ ramalama info
"Name": "podman"
},
"Image": "quay.io/ramalama/cuda:0.7",
"Runtime": "llama.cpp",
"Inference": {
"Default": "llama.cpp",
"Engines": {
"llama.cpp": "/usr/share/ramalama/inference-spec/engines/llama.cpp.yaml",
"mlx": "/usr/share/ramalama/inference-spec/engines/mlx.yaml",
"vllm": "/usr/share/ramalama/inference-spec/engines/vllm.yaml"
},
"Schema": {
"1-0-0": "/usr/share/ramalama/inference-spec/schema/schema.1-0-0.json"
}
},
"Shortnames": {
"Names": {
"cerebrum": "huggingface://froggeric/Cerebrum-1.0-7b-GGUF/Cerebrum-1.0-7b-Q4_KS.gguf",
@@ -336,7 +366,7 @@ $ ramalama info
"/home/dwalsh/.config/ramalama/shortnames.conf",
]
},
"Store": "/home/dwalsh/.local/share/ramalama",
"Store": "/usr/share/ramalama",
"UseContainer": true,
"Version": "0.7.5"
}

View File

@@ -46,7 +46,17 @@ $ ramalama info
"Name": ""
},
"Image": "quay.io/ramalama/cuda:0.7",
"Runtime": "llama.cpp",
"Inference": {
"Default": "llama.cpp",
"Engines": {
"llama.cpp": "/usr/share/ramalama/inference-spec/engines/llama.cpp.yaml",
"mlx": "/usr/share/ramalama/inference-spec/engines/mlx.yaml",
"vllm": "/usr/share/ramalama/inference-spec/engines/vllm.yaml"
},
"Schema": {
"1-0-0": "/usr/share/ramalama/inference-spec/schema/schema.1-0-0.json"
}
},
"Shortnames": {
"Names": {
"cerebrum": "huggingface://froggeric/Cerebrum-1.0-7b-GGUF/Cerebrum-1.0-7b-Q4_KS.gguf",
@@ -97,7 +107,7 @@ $ ramalama info
"/home/dwalsh/.config/ramalama/shortnames.conf",
]
},
"Store": "/home/dwalsh/.local/share/ramalama",
"Store": "/usr/share/ramalama",
"UseContainer": true,
"Version": "0.7.5"
}
@@ -254,7 +264,7 @@ $ ramalama info
},
"graphDriverName": "overlay",
"graphOptions": {},
"graphRoot": "/home/dwalsh/.local/share/containers/storage",
"graphRoot": "/usr/share/containers/storage",
"graphRootAllocated": 2046687182848,
"graphRootUsed": 399990419456,
"graphStatus": {
@@ -271,7 +281,7 @@ $ ramalama info
},
"runRoot": "/run/user/3267/containers",
"transientStore": false,
"volumePath": "/home/dwalsh/.local/share/containers/storage/volumes"
"volumePath": "/usr/share/containers/storage/volumes"
},
"version": {
"APIVersion": "5.4.2",
@@ -288,7 +298,17 @@ $ ramalama info
"Name": "podman"
},
"Image": "quay.io/ramalama/cuda:0.7",
"Runtime": "llama.cpp",
"Inference": {
"Default": "llama.cpp",
"Engines": {
"llama.cpp": "/usr/share/ramalama/inference-spec/engines/llama.cpp.yaml",
"mlx": "/usr/share/ramalama/inference-spec/engines/mlx.yaml",
"vllm": "/usr/share/ramalama/inference-spec/engines/vllm.yaml"
},
"Schema": {
"1-0-0": "/usr/share/ramalama/inference-spec/schema/schema.1-0-0.json"
}
},
"Shortnames": {
"Names": {
"cerebrum": "huggingface://froggeric/Cerebrum-1.0-7b-GGUF/Cerebrum-1.0-7b-Q4_KS.gguf",
@@ -339,7 +359,7 @@ $ ramalama info
"/home/dwalsh/.config/ramalama/shortnames.conf",
]
},
"Store": "/home/dwalsh/.local/share/ramalama",
"Store": "/usr/share/ramalama",
"UseContainer": true,
"Version": "0.7.5"
}

View File

@@ -10,8 +10,6 @@ from datetime import datetime, timezone
from pathlib import Path
from typing import get_args
from ramalama.config import COLOR_OPTIONS, SUPPORTED_ENGINES, SUPPORTED_RUNTIMES
# if autocomplete doesn't exist, just do nothing, don't break
try:
import argcomplete
@@ -27,7 +25,16 @@ from ramalama.chat import default_prefix
from ramalama.cli_arg_normalization import normalize_pull_arg
from ramalama.command.factory import assemble_command
from ramalama.common import accel_image, get_accel, perror
from ramalama.config import CONFIG, coerce_to_bool, load_file_config
from ramalama.config import (
COLOR_OPTIONS,
CONFIG,
SUPPORTED_ENGINES,
SUPPORTED_RUNTIMES,
coerce_to_bool,
get_inference_schema_files,
get_inference_spec_files,
load_file_config,
)
from ramalama.endian import EndianMismatchError
from ramalama.logger import configure_logger, logger
from ramalama.model_inspect.error import ParseError
@@ -555,25 +562,29 @@ def _list_models(args):
def info_cli(args):
info = {
"Accelerator": get_accel(),
"Config": load_file_config(),
"Engine": {
"Name": args.engine,
},
"Image": accel_image(CONFIG),
"Runtime": args.runtime,
"Inference": {
"Default": args.runtime,
"Engines": {spec: str(path) for spec, path in get_inference_spec_files().items()},
"Schema": {schema: str(path) for schema, path in get_inference_schema_files().items()},
},
"Selinux": CONFIG.selinux,
"Shortnames": {
"Files": shortnames.paths,
"Names": shortnames.shortnames,
},
"Store": args.store,
"UseContainer": args.container,
"Version": version(),
}
info["Shortnames"] = {
"Files": shortnames.paths,
"Names": shortnames.shortnames,
}
if args.engine and len(args.engine) > 0:
info["Engine"]["Info"] = engine.info(args)
info["Accelerator"] = get_accel()
print(json.dumps(info, sort_keys=True, indent=4))

View File

@@ -24,7 +24,6 @@ load helpers
# FIXME Engine (podman|docker|'')
tests="
Image | "quay.io/ramalama/.*"
Runtime | "llama.cpp"
Version | "${version}"
Store | \\\("${HOME}/.local/share/ramalama"\\\|"/var/lib/ramalama"\\\)
"
@@ -46,7 +45,6 @@ Store | \\\("${HOME}/.local/share/ramalama"\\\|"/var/lib/ramalama"\\\)
tests="
Engine.Name | $engine
Image | $image
Runtime | $runtime
Store | $(pwd)/$store
"