1
0
mirror of https://github.com/containers/ramalama.git synced 2026-02-05 15:47:26 +01:00

Merge pull request #2339 from ramalama-labs/metrics

Add benchmark metrics persistence
This commit is contained in:
Ian Eaves
2026-01-25 02:24:52 -06:00
committed by GitHub
21 changed files with 759 additions and 56 deletions

View File

@@ -51,6 +51,9 @@ process to be launched inside of the container. If an environment variable is
specified without a value, the container engine checks the host environment
for a value and set the variable only if it is set on the host.
#### **--format**
Set the output format of the benchmark results. Options include json and table (default: table).
#### **--help**, **-h**
show this help message and exit

View File

@@ -0,0 +1,46 @@
% ramalama-benchmarks 1
## NAME
ramalama\-benchmarks - view and interact with historical benchmark results
## SYNOPSIS
**ramalama benchmarks** [*options*] *command* [*args*...]
## DESCRIPTION
View and interact with historical benchmark results.
Results are stored as newline-delimited JSON (JSONL) in a `benchmarks.jsonl` file.
The storage folder is shown in `ramalama benchmarks --help` and can be
overridden via `ramalama.benchmarks.storage_folder` in `ramalama.conf`.
## OPTIONS
#### **--help**, **-h**
show this help message and exit
## COMMANDS
#### **list**
list benchmark results
## LIST OPTIONS
#### **--limit**=LIMIT
limit number of results to display
#### **--offset**=OFFSET
offset for pagination (default: 0)
#### **--format**={table,json}
output format (table or json) (default: table)
## EXAMPLES
```
ramalama benchmarks list
```
## SEE ALSO
**[ramalama(1)](ramalama.1.md)**, **[ramalama-bench(1)](ramalama-bench.1.md)**, **[ramalama.conf(5)](ramalama.conf.5.md)**
## HISTORY
Jan 2026, Originally compiled by Ian Eaves <ian@ramalama.com>

View File

@@ -137,6 +137,7 @@ The default can be overridden in the ramalama.conf file.
| Command | Description |
| ------------------------------------------------- | ---------------------------------------------------------- |
| [ramalama-bench(1)](ramalama-bench.1.md) |benchmark specified AI Model|
| [ramalama-benchmarks(1)](ramalama-benchmarks.1.md)|view and interact with historical benchmark results|
| [ramalama-chat(1)](ramalama-chat.1.md) |OpenAI chat with the specified REST API URL|
| [ramalama-containers(1)](ramalama-containers.1.md)|list all RamaLama containers|
| [ramalama-convert(1)](ramalama-convert.1.md) |convert AI Models from local storage to OCI Image|

View File

@@ -221,10 +221,22 @@
[ramalama.user]
#
# Suppress the interactive prompt when running on macOS with a Podman VM
# that doesn't support GPU acceleration (e.g., applehv provider).
# When set to true, RamaLama will automatically proceed without GPU support
# instead of asking for confirmation.
# Can also be set via the `RAMALAMA_USER__NO_MISSING_GPU_PROMPT` environment variable.
#
[ramalama.benchmarks]
#storage_folder = <default store>/benchmarks
#
# Manually specify where to save benchmark results.
# By default, results are stored under the default model store directory
# in benchmarks/benchmarks.jsonl.
# Changing `ramalama.store` does not update this; set storage_folder explicitly.
[ramalama.user]
#no_missing_gpu_prompt = false

View File

@@ -267,6 +267,16 @@ Configuration settings for the openai hosted provider
**api_key**=""
Provider-specific API key used when invoking OpenAI-hosted transports. Overrides `RAMALAMA_API_KEY` when set.
## RAMALAMA.BENCHMARKS TABLE
The ramalama.benchmarks table contains benchmark related settings.
`[[ramalama.benchmarks]]`
**storage_folder**="<default store>/benchmarks"
Manually specify where to save benchmark results.
By default, this will be stored in the default model store directory under `benchmarks/`.
Changing `ramalama.store` does not update this; set `ramalama.benchmarks.storage_folder` explicitly if needed.
## RAMALAMA.USER TABLE
The ramalama.user table contains user preference settings.

View File

@@ -117,7 +117,22 @@ commands:
inference_engine:
name: "llama-bench"
binary: "llama-bench"
options: *bench_perplexity_options
options:
- name: "--model"
description: "The AI model to run"
value: "{{ model.model_path }}"
- name: "-ngl"
description: "Number of layers to offload to the GPU if available"
value: "{{ 999 if args.ngl < 0 else args.ngl }}"
- name: "-ngld"
description: "Number of layers to offload to the GPU if available"
value: "{{ None if not args.model_draft else 999 if args.ngl < 0 else args.ngl }}"
- name: "--threads"
description: "Number of Threads to use during generation"
value: "{{ args.threads }}"
- name: "-o"
description: "Output format printed to stdout"
value: "json"
- name: rag
inference_engine:
name: "rag"

View File

@@ -0,0 +1,12 @@
class MissingStorageFolderError(Exception):
def __init__(self):
message = """
No valid benchmarks storage folder could be determined.
Set an explicit path via:
RAMALAMA__BENCHMARKS_STORAGE_FOLDER=/absolute/path/to/benchmarks
If this seems wrong for your setup, report it at:
https://www.github.com/containers/ramalama/issues
"""
super().__init__(message)

View File

@@ -0,0 +1,50 @@
import json
import logging
from dataclasses import asdict
from functools import cached_property
from pathlib import Path
from ramalama.benchmarks.errors import MissingStorageFolderError
from ramalama.benchmarks.schemas import BenchmarkRecord, DeviceInfoV1, get_benchmark_record
from ramalama.benchmarks.utilities import parse_jsonl
from ramalama.config import CONFIG
from ramalama.log_levels import LogLevel
logger = logging.getLogger("ramalama.benchmarks")
logger.setLevel(CONFIG.log_level or LogLevel.WARNING)
SCHEMA_VERSION = 1
BENCHMARKS_FILENAME = "benchmarks.jsonl"
class BenchmarksManager:
def __init__(self, storage_folder: str | Path | None):
if storage_folder is None:
raise MissingStorageFolderError
self.storage_folder = Path(storage_folder)
self.storage_file = self.storage_folder / BENCHMARKS_FILENAME
self.storage_file.parent.mkdir(parents=True, exist_ok=True)
@cached_property
def device_info(self) -> DeviceInfoV1:
return DeviceInfoV1.current_device_info()
def save(self, results: list[BenchmarkRecord] | BenchmarkRecord):
if not isinstance(results, list):
results = [results]
if len(results) == 0:
return
with self.storage_file.open("a", encoding="utf-8") as handle:
for record in results:
handle.write(json.dumps(asdict(record), ensure_ascii=True))
handle.write("\n")
def list(self) -> list[BenchmarkRecord]:
"""List benchmark results from JSONL storage."""
if not self.storage_file.exists():
return []
content = self.storage_file.read_text(encoding="utf-8")
return [get_benchmark_record(result) for result in parse_jsonl(content)]

View File

@@ -0,0 +1,211 @@
import platform
import socket
from dataclasses import dataclass, field, fields
from datetime import datetime, timezone
from functools import lru_cache
from typing import Any, ClassVar, Literal, TypeVar, overload
from ramalama.common import get_accel
VersionerT = TypeVar("VersionerT")
@dataclass
class DeviceInfo:
pass
@dataclass
class DeviceInfoV1(DeviceInfo):
hostname: str
operating_system: str
cpu_info: str
accel: str
version: ClassVar[Literal["v1"]] = "v1"
@classmethod
@lru_cache(maxsize=1)
def current_device_info(cls) -> "DeviceInfoV1":
return cls(
hostname=socket.gethostname(),
operating_system=f"{platform.system()} {platform.release()}",
cpu_info=platform.processor() or platform.machine(),
accel=get_accel(),
)
@dataclass
class TestConfiguration:
pass
@dataclass
class TestConfigurationV1(TestConfiguration):
"""Container configuration metadata for a benchmark run."""
container_image: str = ""
container_runtime: str = ""
inference_engine: str = ""
version: Literal["v1"] = "v1"
runtime_args: list[str] | None = None
@dataclass
class LlamaBenchResult:
pass
@dataclass
class LlamaBenchResultV1(LlamaBenchResult):
version: Literal["v1"] = "v1"
build_commit: str | None = None
build_number: int | None = None
backends: str | None = None
cpu_info: str | None = None
gpu_info: str | None = None
model_filename: str | None = None
model_type: str | None = None
model_size: int | None = None
model_n_params: int | None = None
n_batch: int | None = None
n_ubatch: int | None = None
n_threads: int | None = None
cpu_mask: str | None = None
cpu_strict: int | None = None
poll: int | None = None
type_k: str | None = None
type_v: str | None = None
n_gpu_layers: int | None = None
n_cpu_moe: int | None = None
split_mode: str | None = None
main_gpu: int | None = None
no_kv_offload: int | None = None
flash_attn: int | None = None
devices: str | None = None
tensor_split: str | None = None
tensor_buft_overrides: str | None = None
use_mmap: int | None = None
embeddings: int | None = None
no_op_offload: int | None = None
no_host: int | None = None
use_direct_io: int | None = None
n_prompt: int | None = None
n_gen: int | None = None
n_depth: int | None = None
test_time: str | None = None
avg_ns: int | None = None
stddev_ns: int | None = None
avg_ts: float | None = None
stddev_ts: float | None = None
samples_ns: str | None = None # JSON array stored as string
samples_ts: str | None = None # JSON array stored as string
@classmethod
def from_payload(cls, payload: dict) -> "LlamaBenchResult":
"""Build a result from a llama-bench JSON/JSONL object."""
return cls(**{f.name: payload[f.name] for f in fields(cls) if f.name in payload})
@dataclass
class BenchmarkRecord:
pass
@dataclass
class BenchmarkRecordV1(BenchmarkRecord):
configuration: TestConfigurationV1
result: LlamaBenchResultV1
version: Literal["v1"] = "v1"
created_at: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
device: DeviceInfoV1 = field(default_factory=DeviceInfoV1.current_device_info)
@classmethod
def from_payload(cls, payload: dict) -> "BenchmarkRecordV1":
payload = {**payload}
if 'device' in payload:
payload['device'] = DeviceInfoV1(**payload.pop("device"))
configuration = TestConfigurationV1(**payload.pop('configuration', {}))
result = LlamaBenchResultV1(**payload.pop('result', {}))
return cls(configuration=configuration, result=result, **payload)
@overload
def get_device_info(payload: dict) -> DeviceInfoV1: ...
@overload
def get_device_info(payload: dict, version: Literal["v1"]) -> DeviceInfoV1: ...
def get_device_info(payload: dict, version: Any = None) -> DeviceInfo:
if version is None:
version = payload.get('version', "v1")
if version == "v1":
return DeviceInfoV1(**payload)
raise NotImplementedError(f"No supported DeviceInfo schemas for version {version}")
@overload
def get_test_config(payload: dict) -> TestConfigurationV1: ...
@overload
def get_test_config(payload: dict, version: Literal["v1"]) -> TestConfigurationV1: ...
def get_test_config(payload: dict, version: Any = None) -> TestConfiguration:
if version is None:
version = payload.get('version', "v1")
if version == "v1":
return TestConfigurationV1(**payload)
raise NotImplementedError(f"No supported TestConfiguration schemas for version {version}")
@overload
def get_llama_bench_result(payload: dict) -> LlamaBenchResultV1: ...
@overload
def get_llama_bench_result(payload: dict, version: Literal["v1"]) -> LlamaBenchResultV1: ...
def get_llama_bench_result(payload: dict, version: Any = None) -> LlamaBenchResult:
if version is None:
version = payload.get('version', "v1")
if version == "v1":
return LlamaBenchResultV1(**payload)
raise NotImplementedError(f"No supported LlamaBench schemas for version {version}")
@overload
def get_benchmark_record(payload: dict) -> BenchmarkRecord: ...
@overload
def get_benchmark_record(payload: dict, version: Literal["v1"]) -> BenchmarkRecordV1: ...
def get_benchmark_record(payload: dict, version: Any = None) -> BenchmarkRecord:
if version is None:
version = payload.get('version', "v1")
if version == "v1":
return BenchmarkRecordV1.from_payload(payload)
raise NotImplementedError(f"No supported benchmark schemas for version {version}")
def normalize_benchmark_record(benchmark: BenchmarkRecord) -> BenchmarkRecordV1:
if isinstance(benchmark, BenchmarkRecordV1):
return benchmark
raise NotImplementedError(f"Received an unsupported benchmark record type {type(benchmark)}")

View File

@@ -0,0 +1,104 @@
import json
from ramalama.benchmarks.schemas import (
BenchmarkRecord,
BenchmarkRecordV1,
normalize_benchmark_record,
)
def parse_jsonl(content: str) -> list[dict]:
"""Parse newline-delimited JSON benchmark results."""
results = []
for line in content.strip().split("\n"):
if not line.strip():
continue
results.append(json.loads(line))
return results
def parse_json(content: str) -> list[dict]:
"""Parse JSON array or single object benchmark results."""
data = json.loads(content)
if not isinstance(data, list):
data = [data]
return data
def print_bench_results(records: list[BenchmarkRecord]):
"""Format benchmark results as a table for display."""
if not records:
return
normalized_records: list[BenchmarkRecordV1] = [normalize_benchmark_record(result) for result in records]
rows: list[dict[str, object | None]] = []
for i, item in enumerate(normalized_records):
result = item.result
model = result.model_filename or ""
params = f"{result.model_n_params / 1e9:.2f} B" if result.model_n_params else "-"
backend = result.gpu_info or result.cpu_info or "CPU"
ngl = str(result.n_gpu_layers) if result.n_gpu_layers else "-"
threads = str(result.n_threads) if result.n_threads else "-"
# Format test type
if result.n_prompt and result.n_gen:
test = f"pp{result.n_prompt}+tg{result.n_gen}"
elif result.n_prompt:
test = f"pp{result.n_prompt}"
elif result.n_gen:
test = f"tg{result.n_gen}"
else:
test = "-"
# Format tokens/sec with stddev
if result.avg_ts and result.stddev_ts:
t_s = f"{result.avg_ts:.2f} ± {result.stddev_ts:.2f}"
elif result.avg_ts:
t_s = f"{result.avg_ts:.2f}"
else:
t_s = "-"
rows.append(
{
"id": i,
"model": model,
"params": params,
"backend": backend,
"ngl": ngl,
"threads": threads,
"test": test,
"t/s": t_s,
"engine": item.configuration.container_runtime,
"date": item.created_at,
}
)
optional_fields = ["id", "engine", "date"]
for field in optional_fields:
if all(not row.get(field) for row in rows):
for row in rows:
row.pop(field, None)
column_order = ["id", "model", "params", "backend", "ngl", "threads", "test", "t/s", "engine", "date"]
headers = [column for column in column_order if column in rows[0]]
col_widths: dict[str, int] = {}
for header in headers:
max_len = len(header)
for row in rows:
value = row.get(header)
text = "-" if value in (None, "") else str(value)
max_len = max(max_len, len(text))
col_widths[header] = max_len
header_row = " | ".join(header.ljust(col_widths[header]) for header in headers)
print(f"| {header_row} |")
print(f"| {'-' * len(header_row)} |")
for row in rows:
cells = []
for header in headers:
value = row.get(header)
text = "-" if value in (None, "") else str(value)
cells.append(text.ljust(col_widths[header]))
print(f"| {' | '.join(cells)} |")

View File

@@ -7,11 +7,14 @@ import shlex
import subprocess
import sys
import urllib.error
from dataclasses import asdict
from datetime import datetime, timezone
from textwrap import dedent
from typing import Any, get_args
from urllib.parse import urlparse
from ramalama.benchmarks.manager import BenchmarksManager
# if autocomplete doesn't exist, just do nothing, don't break
try:
import argcomplete
@@ -24,6 +27,7 @@ except Exception:
import ramalama.chat as chat
from ramalama import engine
from ramalama.arg_types import DefaultArgsType
from ramalama.benchmarks.utilities import print_bench_results
from ramalama.chat_utils import default_prefix
from ramalama.cli_arg_normalization import normalize_pull_arg
from ramalama.command.factory import assemble_command
@@ -301,6 +305,7 @@ def configure_subcommands(parser):
subparsers = parser.add_subparsers(dest="subcommand")
subparsers.required = False
bench_parser(subparsers)
benchmarks_parser(subparsers)
chat_parser(subparsers)
containers_parser(subparsers)
convert_parser(subparsers)
@@ -324,6 +329,8 @@ def configure_subcommands(parser):
def post_parse_setup(args):
"""Perform additional setup after parsing arguments."""
if getattr(args, "subcommand", None) == "benchmark":
args.subcommand = "bench"
def map_https_to_transport(input: str) -> str:
if input.startswith("https://") or input.startswith("http://"):
@@ -510,10 +517,67 @@ def add_network_argument(parser, dflt: str | None = "none"):
def bench_parser(subparsers):
parser = subparsers.add_parser("bench", aliases=["benchmark"], help="benchmark specified AI Model")
runtime_options(parser, "bench")
parser.add_argument("MODEL", completer=local_models) # positional argument
parser.add_argument("MODEL", completer=local_models)
parser.add_argument(
"--format",
choices=["table", "json"],
default="table",
help="output format (table or json)",
)
parser.set_defaults(func=bench_cli)
def benchmarks_parser(subparsers):
storage_folder = CONFIG.benchmarks.storage_folder
epilog = f"Storage folder: {storage_folder}" if storage_folder else "Storage folder: not configured"
parser = subparsers.add_parser(
"benchmarks",
help="manage and view benchmark results",
epilog=epilog,
)
parser.set_defaults(func=lambda _: parser.print_help())
benchmarks_subparsers = parser.add_subparsers(dest="benchmarks_command", metavar="[command]")
list_parser = benchmarks_subparsers.add_parser("list", help="list benchmark results")
list_parser.add_argument(
"--limit",
type=int,
default=None,
help="limit number of results to display",
)
list_parser.add_argument(
"--offset",
type=int,
default=0,
help="offset for pagination",
)
list_parser.add_argument(
"--format",
choices=["table", "json"],
default="table",
help="output format (table or json)",
)
list_parser.set_defaults(func=benchmarks_list_cli)
def benchmarks_list_cli(args):
"""Display a list of benchmark results from storage."""
bench_manager = BenchmarksManager(CONFIG.benchmarks.storage_folder)
results = bench_manager.list()
if not results:
print("No benchmark results found")
return
if args.format == "json":
output = [asdict(item) for item in results]
print(json.dumps(output, indent=2, sort_keys=True))
else:
print_bench_results(results)
def containers_parser(subparsers):
parser = subparsers.add_parser("containers", aliases=["ps"], help="list all RamaLama containers")
parser.add_argument(

View File

@@ -2,13 +2,12 @@ import argparse
import os
from typing import Optional
from ramalama.common import check_metal, check_nvidia
from ramalama.common import check_metal, check_nvidia, get_accel_env_vars
from ramalama.console import should_colorize
from ramalama.transports.transport_factory import CLASS_MODEL_TYPES, New
class RamalamaArgsContext:
def __init__(self) -> None:
self.cache_reuse: Optional[int] = None
self.container: Optional[bool] = None
@@ -52,7 +51,6 @@ class RamalamaArgsContext:
class RamalamaRagGenArgsContext:
def __init__(self) -> None:
self.debug: bool | None = None
self.format: str | None = None
@@ -74,7 +72,6 @@ class RamalamaRagGenArgsContext:
class RamalamaRagArgsContext:
def __init__(self) -> None:
self.debug: bool | None = None
self.port: str | None = None
@@ -92,7 +89,6 @@ class RamalamaRagArgsContext:
class RamalamaModelContext:
def __init__(self, model: CLASS_MODEL_TYPES, is_container: bool, should_generate: bool, dry_run: bool):
self.model = model
self.is_container = is_container
@@ -128,7 +124,6 @@ class RamalamaModelContext:
class RamalamaHostContext:
def __init__(
self, is_container: bool, uses_nvidia: bool, uses_metal: bool, should_colorize: bool, rpc_nodes: Optional[str]
):
@@ -140,7 +135,6 @@ class RamalamaHostContext:
class RamalamaCommandContext:
def __init__(
self,
args: RamalamaArgsContext | RamalamaRagGenArgsContext | RamalamaRagArgsContext,
@@ -169,9 +163,13 @@ class RamalamaCommandContext:
model = cli_args.model
else:
model = None
skip_gpu_probe = should_generate or bool(get_accel_env_vars())
uses_nvidia = True if skip_gpu_probe else (check_nvidia() is None)
host = RamalamaHostContext(
is_container,
check_nvidia() is None,
uses_nvidia,
check_metal(argparse.Namespace(**{"container": is_container})),
should_colorize(),
os.getenv("RAMALAMA_LLAMACPP_RPC_NODES", None),

View File

@@ -19,7 +19,6 @@ def is_truthy(resolved_stmt: str) -> bool:
class CommandFactory:
def __init__(self, spec_files: dict[str, Path], schema_files: dict[str, Path]):
self.spec_files = spec_files
self.schema_files = schema_files

View File

@@ -2,6 +2,7 @@ import json
import os
import sys
from dataclasses import dataclass, field, fields
from functools import lru_cache
from pathlib import Path
from typing import Any, Literal, Mapping, TypeAlias
@@ -79,6 +80,7 @@ def get_default_engine() -> SUPPORTED_ENGINES | None:
return "docker" if available("docker") else None
@lru_cache(maxsize=1)
def get_default_store() -> str:
# Check if running as root (Unix only)
if hasattr(os, 'geteuid') and os.geteuid() == 0:
@@ -136,6 +138,22 @@ def coerce_to_bool(value: Any) -> bool:
raise ValueError(f"Cannot coerce {value!r} to bool")
def get_storage_folder(base_path: str | None = None):
if base_path is None:
base_path = get_default_store()
return os.path.join(base_path, "benchmarks")
@dataclass
class Benchmarks:
storage_folder: str = field(default_factory=get_storage_folder)
disable: bool = False
def __post_init__(self):
os.makedirs(self.storage_folder, exist_ok=True)
@dataclass
class UserConfig:
no_missing_gpu_prompt: bool = False
@@ -225,6 +243,7 @@ class HTTPClientConfig:
class BaseConfig:
api: str = "none"
api_key: str | None = None
benchmarks: Benchmarks = field(default_factory=Benchmarks)
cache_reuse: int = 256
carimage: str = "registry.access.redhat.com/ubi10-micro:latest"
container: bool = None # type: ignore
@@ -235,12 +254,15 @@ class BaseConfig:
dryrun: bool = False
engine: SUPPORTED_ENGINES | None = field(default_factory=get_default_engine)
env: list[str] = field(default_factory=list)
gguf_quantization_mode: GGUF_QUANTIZATION_MODES = DEFAULT_GGUF_QUANTIZATION_MODE
host: str = "0.0.0.0"
http_client: HTTPClientConfig = field(default_factory=HTTPClientConfig)
image: str = None # type: ignore
images: RamalamaImages = field(default_factory=RamalamaImages)
rag_image: str | None = None
rag_images: RamalamaRagImages = field(default_factory=RamalamaRagImages)
keep_groups: bool = False
log_level: LogLevel | None = None
max_tokens: int = 0
ngl: int = -1
ocr: bool = False
@@ -260,9 +282,6 @@ class BaseConfig:
transport: str = "ollama"
user: UserConfig = field(default_factory=UserConfig)
verify: bool = True
gguf_quantization_mode: GGUF_QUANTIZATION_MODES = DEFAULT_GGUF_QUANTIZATION_MODE
http_client: HTTPClientConfig = field(default_factory=HTTPClientConfig)
log_level: LogLevel | None = None
provider: ProviderConfig = field(default_factory=ProviderConfig)
def __post_init__(self):
@@ -297,30 +316,26 @@ class Config(LayeredMixin, BaseConfig):
def load_file_config() -> dict[str, Any]:
parser = TOMLParser()
config_path = os.getenv("RAMALAMA_CONFIG")
config_paths: list[str] = []
if config_path and os.path.exists(config_path):
config = parser.parse_file(config_path)
config = config.get("ramalama", {})
config['settings'] = {'config_files': [config_path]}
if log_level := config.get("log_level"):
config["log_level"] = coerce_log_level(log_level)
return config
if (config_path := os.getenv("RAMALAMA_CONFIG", None)) and os.path.exists(config_path):
config_paths.append(config_path)
else:
default_config_paths = [os.path.join(conf_dir, "ramalama.conf") for conf_dir in DEFAULT_CONFIG_DIRS]
config = {}
default_config_paths = [os.path.join(conf_dir, "ramalama.conf") for conf_dir in DEFAULT_CONFIG_DIRS]
for path in default_config_paths:
if os.path.exists(path):
config_paths.append(str(path))
config_paths = []
for path in default_config_paths:
if os.path.exists(path):
config_paths.append(str(path))
parser.parse_file(path)
path_str = f"{path}.d"
if os.path.isdir(path_str):
for conf_file in sorted(Path(path_str).glob("*.conf")):
config_paths.append(str(conf_file))
parser.parse_file(conf_file)
config = parser.data
path_str = f"{path}.d"
if os.path.isdir(path_str):
for conf_file in sorted(Path(path_str).glob("*.conf")):
config_paths.append(str(conf_file))
for file in config_paths:
parser.parse_file(file)
config: dict[str, Any] = parser.data
if config:
config = config.get('ramalama', {})
config['settings'] = {'config_files': config_paths}

View File

@@ -1,4 +1,5 @@
import copy
import json
import os
import platform
import random
@@ -8,14 +9,19 @@ import sys
import time
from abc import ABC, abstractmethod
from functools import cached_property
from typing import TYPE_CHECKING, Any, Dict, Optional
from typing import TYPE_CHECKING, Any, Optional
from ramalama.common import ContainerEntryPoint
if TYPE_CHECKING:
from ramalama.chat import ChatOperationalArgs
from datetime import datetime, timezone
import ramalama.chat as chat
from ramalama.benchmarks.manager import BenchmarksManager
from ramalama.benchmarks.schemas import BenchmarkRecord, BenchmarkRecordV1, get_benchmark_record
from ramalama.benchmarks.utilities import parse_json, print_bench_results
from ramalama.common import (
MNT_DIR,
MNT_FILE_DRAFT,
@@ -25,6 +31,7 @@ from ramalama.common import (
is_split_file_model,
perror,
populate_volume_from_image,
run_cmd,
set_accel_env_vars,
)
from ramalama.compose import Compose
@@ -463,7 +470,52 @@ class Transport(TransportBase):
def bench(self, args, cmd: list[str]):
set_accel_env_vars()
self.execute_command(cmd, args)
output_format = getattr(args, "format", "table")
if args.dryrun:
if args.container:
self.engine.dryrun()
else:
dry_run(cmd)
return
elif args.container:
self.setup_container(args)
self.setup_mounts(args)
self.engine.add([args.image] + cmd)
result = self.engine.run_process()
else:
result = run_cmd(cmd, encoding="utf-8")
try:
bench_results = parse_json(result.stdout)
except (json.JSONDecodeError, ValueError):
message = f"Could not parse benchmark output. Expected JSON but got:\n{result.stdout}"
raise ValueError(message)
base_payload: dict = {
"created_at": datetime.now(timezone.utc).isoformat(),
"configuration": {
"container_image": args.image,
"container_runtime": args.engine,
"inference_engine": args.runtime,
"runtime_args": cmd,
},
}
results: list[BenchmarkRecord] = list()
for bench_result in bench_results:
result_record: BenchmarkRecordV1 = get_benchmark_record({"result": bench_result, **base_payload}, "v1")
results.append(result_record)
if output_format == "json":
print(result.stdout)
else:
print_bench_results(results)
if not CONFIG.benchmarks.disable:
bench_manager = BenchmarksManager(CONFIG.benchmarks.storage_folder)
bench_manager.save(results)
def run(self, args, cmd: list[str]):
# The Run command will first launch a daemonized service
@@ -766,7 +818,7 @@ class Transport(TransportBase):
compose = Compose(self.model_name, model_paths, chat_template_paths, mmproj_paths, args, exec_args)
compose.generate().write(output_dir)
def inspect_metadata(self) -> Dict[str, Any]:
def inspect_metadata(self) -> dict[str, Any]:
model_path = self._get_entry_model_path(False, False, False)
if GGUFInfoParser.is_model_gguf(model_path):
return GGUFInfoParser.parse_metadata(model_path).data

View File

@@ -7,7 +7,7 @@ import pytest
@pytest.mark.e2e
@skip_if_no_llama_bench
def test_model_and_size_columns(test_model):
def test_model_and_params_columns(test_model):
result = check_output(["ramalama", "bench", "-t", "2", test_model])
assert re.search(r"\|\s+model\s+\|\s+size", result)
assert re.search(r"\|\s+model\s+\|\s+params", result)

View File

@@ -17,7 +17,7 @@ function setup() {
@test "ramalama bench" {
skip_if_nocontainer
run_ramalama bench -t 2 $(test_model smollm:135m)
is "$output" ".*model.*size.*" "model and size in output"
is "$output" ".*model.*params.*" "model and params in output"
}
# vim: filetype=sh

View File

@@ -50,7 +50,7 @@ function check_help() {
# If usage lists no arguments (strings in ALL CAPS), confirm
# by running with 'invalid-arg' and expecting failure.
if ! expr "$usage" : '.*[A-Z]' >/dev/null; then
if [ "$cmd" != "help" ] && [ "$cmd" != "daemon" ]; then
if [ "$cmd" != "help" ] && [ "$cmd" != "daemon" ] && [ "$cmd" != "benchmarks" ]; then
dprint "$command_string invalid-arg"
run_ramalama '?' "$@" $cmd invalid-arg
is "$status" 2 \

View File

@@ -0,0 +1,109 @@
import json
import pytest
from ramalama.benchmarks import manager, schemas
def _make_config(engine: str) -> schemas.TestConfigurationV1:
return schemas.TestConfigurationV1(
container_image="quay.io/ramalama/ramalama:latest",
container_runtime="docker",
inference_engine=engine,
runtime_args={"threads": 2},
)
def _make_result(model_name: str, avg_ts: float) -> schemas.LlamaBenchResultV1:
return schemas.LlamaBenchResultV1(
build_commit="abc123",
build_number=1,
cpu_info="cpu",
gpu_info="gpu",
model_filename=model_name,
n_threads=2,
n_prompt=8,
n_gen=16,
avg_ts=avg_ts,
stddev_ts=0.1,
)
def _make_device() -> schemas.DeviceInfoV1:
return schemas.DeviceInfoV1(
hostname="host",
operating_system="TestOS 1.0",
cpu_info="cpu",
accel="none",
)
def test_save_benchmark_record_writes_jsonl(tmp_path):
db = manager.BenchmarksManager(tmp_path)
cfg = _make_config("llama.cpp")
res = _make_result("model.gguf", 1.5)
device = _make_device()
record = schemas.BenchmarkRecordV1(
configuration=cfg,
result=res,
created_at="2024-01-01 00:00:00",
device=device,
)
db.save(record)
assert db.storage_file.exists()
payload = json.loads(db.storage_file.read_text().strip())
assert payload["version"] == "v1"
assert payload["created_at"] == "2024-01-01 00:00:00"
assert payload["configuration"]["inference_engine"] == "llama.cpp"
assert payload["result"]["model_filename"] == "model.gguf"
assert payload["device"]["hostname"] == "host"
def test_list_empty_returns_empty_list(tmp_path):
db = manager.BenchmarksManager(tmp_path)
records = db.list()
assert records == []
def test_manager_missing_storage_folder_raises():
with pytest.raises(manager.MissingStorageFolderError):
manager.BenchmarksManager(None)
def test_list_returns_saved_records_in_order(tmp_path):
db = manager.BenchmarksManager(tmp_path)
device = _make_device()
cfg_a = _make_config("engine-a")
cfg_b = _make_config("engine-b")
res_a = _make_result("model-a.gguf", 1.0)
res_b = _make_result("model-b.gguf", 2.0)
record_a = schemas.BenchmarkRecordV1(
configuration=cfg_a,
result=res_a,
created_at="2024-01-01 00:00:00",
device=device,
)
record_b = schemas.BenchmarkRecordV1(
configuration=cfg_b,
result=res_b,
created_at="2024-01-02 00:00:00",
device=device,
)
db.save([record_a, record_b])
stored = db.list()
assert len(stored) == 2
assert stored[0].configuration.inference_engine == "engine-a"
assert stored[1].configuration.inference_engine == "engine-b"
assert stored[0].result.avg_ts == 1.0
assert stored[1].result.avg_ts == 2.0

View File

@@ -143,6 +143,7 @@ def test_env_overrides_file_and_default():
],
)
def test_get_default_store(uid, is_root, expected):
get_default_store.cache_clear()
with patch("os.geteuid", return_value=uid):
assert get_default_store() == expected

View File

@@ -25,7 +25,7 @@ def get_config_fields():
}
config_fields = [field.name for field in fields(BaseConfig) if field.name not in excluded_fields]
config_fields.extend(('http_client', 'images', 'rag_images', 'user'))
config_fields.extend(('benchmarks', 'http_client', 'images', 'rag_images', 'user'))
return sorted(set(config_fields))
@@ -47,7 +47,7 @@ def get_documented_fields_in_conf():
documented = set()
# Subsections that contain their own field documentation (these fields should not be extracted)
subsections_with_fields = {'http_client', 'user'}
subsections_with_fields = {'benchmarks', 'http_client', 'user'}
# Track which section we're in to exclude nested fields under commented subsections
in_commented_nested_section = False
@@ -199,8 +199,8 @@ class TestConfigDocumentation:
missing = set(config_fields) - set(documented_fields)
assert not missing, (
f"The following CONFIG fields are missing from docs/ramalama.conf:\n"
f"{', '.join(sorted(missing))}\n\n"
f"The following CONFIG fields are missing from docs/ramalama.conf: "
f"`{', '.join(sorted(missing))}`. "
f"Please add documentation for these fields in docs/ramalama.conf"
)
@@ -211,11 +211,12 @@ class TestConfigDocumentation:
missing = set(config_fields) - set(documented_fields)
assert not missing, (
f"The following CONFIG fields are missing from docs/ramalama.conf.5.md:\n"
f"{', '.join(sorted(missing))}\n\n"
warning_message = (
f"The following CONFIG fields are missing from docs/ramalama.conf.5.md:"
f"`{', '.join(sorted(missing))}`. "
f"Please add documentation for these fields in docs/ramalama.conf.5.md"
)
assert not missing, warning_message
def test_no_undocumented_fields_in_conf(self):
"""Verify ramalama.conf doesn't document non-existent fields."""
@@ -225,8 +226,8 @@ class TestConfigDocumentation:
extra = set(documented_fields) - set(config_fields) - self.KNOWN_ALIASES
assert not extra, (
f"The following fields are documented in docs/ramalama.conf but not in CONFIG:\n"
f"{', '.join(sorted(extra))}\n\n"
f"The following fields are documented in docs/ramalama.conf but not in CONFIG:"
f"`{', '.join(sorted(extra))}`. "
f"These might be typos or outdated documentation."
)
@@ -238,8 +239,8 @@ class TestConfigDocumentation:
extra = set(documented_fields) - set(config_fields) - self.KNOWN_ALIASES
assert not extra, (
f"The following fields are documented in docs/ramalama.conf.5.md but not in CONFIG:\n"
f"{', '.join(sorted(extra))}\n\n"
f"The following fields are documented in docs/ramalama.conf.5.md but not in CONFIG: "
f"`{', '.join(sorted(extra))}`. "
f"These might be typos or outdated documentation."
)
@@ -258,7 +259,7 @@ class TestConfigDocumentation:
error_msg.append(f"Fields documented only in ramalama.conf.5.md:\n{', '.join(sorted(only_in_manpage))}")
assert not error_msg, (
"Documentation inconsistency between ramalama.conf and ramalama.conf.5.md:\n\n"
+ "\n\n".join(error_msg)
+ "\n\nBoth files should document the same configuration options."
"Documentation inconsistency between ramalama.conf and ramalama.conf.5.md:"
+ " ".join(error_msg)
+ ". Both files should document the same configuration options."
)