diff --git a/docs/ramalama-bench.1.md b/docs/ramalama-bench.1.md index 4dbb75c3..2e592f22 100644 --- a/docs/ramalama-bench.1.md +++ b/docs/ramalama-bench.1.md @@ -51,6 +51,9 @@ process to be launched inside of the container. If an environment variable is specified without a value, the container engine checks the host environment for a value and set the variable only if it is set on the host. +#### **--format** +Set the output format of the benchmark results. Options include json and table (default: table). + #### **--help**, **-h** show this help message and exit diff --git a/docs/ramalama-benchmarks.1.md b/docs/ramalama-benchmarks.1.md new file mode 100644 index 00000000..829b688e --- /dev/null +++ b/docs/ramalama-benchmarks.1.md @@ -0,0 +1,46 @@ +% ramalama-benchmarks 1 + +## NAME +ramalama\-benchmarks - view and interact with historical benchmark results + +## SYNOPSIS +**ramalama benchmarks** [*options*] *command* [*args*...] + +## DESCRIPTION +View and interact with historical benchmark results. +Results are stored as newline-delimited JSON (JSONL) in a `benchmarks.jsonl` file. +The storage folder is shown in `ramalama benchmarks --help` and can be +overridden via `ramalama.benchmarks.storage_folder` in `ramalama.conf`. + +## OPTIONS + +#### **--help**, **-h** +show this help message and exit + +## COMMANDS + +#### **list** +list benchmark results + +## LIST OPTIONS + +#### **--limit**=LIMIT +limit number of results to display + +#### **--offset**=OFFSET +offset for pagination (default: 0) + +#### **--format**={table,json} +output format (table or json) (default: table) + +## EXAMPLES + +``` +ramalama benchmarks list +``` + +## SEE ALSO +**[ramalama(1)](ramalama.1.md)**, **[ramalama-bench(1)](ramalama-bench.1.md)**, **[ramalama.conf(5)](ramalama.conf.5.md)** + +## HISTORY +Jan 2026, Originally compiled by Ian Eaves diff --git a/docs/ramalama.1.md b/docs/ramalama.1.md index 408f44bb..a7a2be31 100644 --- a/docs/ramalama.1.md +++ b/docs/ramalama.1.md @@ -137,6 +137,7 @@ The default can be overridden in the ramalama.conf file. | Command | Description | | ------------------------------------------------- | ---------------------------------------------------------- | | [ramalama-bench(1)](ramalama-bench.1.md) |benchmark specified AI Model| +| [ramalama-benchmarks(1)](ramalama-benchmarks.1.md)|view and interact with historical benchmark results| | [ramalama-chat(1)](ramalama-chat.1.md) |OpenAI chat with the specified REST API URL| | [ramalama-containers(1)](ramalama-containers.1.md)|list all RamaLama containers| | [ramalama-convert(1)](ramalama-convert.1.md) |convert AI Models from local storage to OCI Image| diff --git a/docs/ramalama.conf b/docs/ramalama.conf index ba526987..afb31e20 100644 --- a/docs/ramalama.conf +++ b/docs/ramalama.conf @@ -221,10 +221,22 @@ [ramalama.user] +# # Suppress the interactive prompt when running on macOS with a Podman VM # that doesn't support GPU acceleration (e.g., applehv provider). # When set to true, RamaLama will automatically proceed without GPU support # instead of asking for confirmation. # Can also be set via the `RAMALAMA_USER__NO_MISSING_GPU_PROMPT` environment variable. # + +[ramalama.benchmarks] +#storage_folder = /benchmarks +# +# Manually specify where to save benchmark results. +# By default, results are stored under the default model store directory +# in benchmarks/benchmarks.jsonl. +# Changing `ramalama.store` does not update this; set storage_folder explicitly. + + +[ramalama.user] #no_missing_gpu_prompt = false diff --git a/docs/ramalama.conf.5.md b/docs/ramalama.conf.5.md index 8dd6ce3c..10391db9 100644 --- a/docs/ramalama.conf.5.md +++ b/docs/ramalama.conf.5.md @@ -267,6 +267,16 @@ Configuration settings for the openai hosted provider **api_key**="" Provider-specific API key used when invoking OpenAI-hosted transports. Overrides `RAMALAMA_API_KEY` when set. +## RAMALAMA.BENCHMARKS TABLE +The ramalama.benchmarks table contains benchmark related settings. + +`[[ramalama.benchmarks]]` + +**storage_folder**="/benchmarks" + +Manually specify where to save benchmark results. +By default, this will be stored in the default model store directory under `benchmarks/`. +Changing `ramalama.store` does not update this; set `ramalama.benchmarks.storage_folder` explicitly if needed. ## RAMALAMA.USER TABLE The ramalama.user table contains user preference settings. diff --git a/inference-spec/engines/llama.cpp.yaml b/inference-spec/engines/llama.cpp.yaml index 19d84cb2..2d8ec4b7 100644 --- a/inference-spec/engines/llama.cpp.yaml +++ b/inference-spec/engines/llama.cpp.yaml @@ -117,7 +117,22 @@ commands: inference_engine: name: "llama-bench" binary: "llama-bench" - options: *bench_perplexity_options + options: + - name: "--model" + description: "The AI model to run" + value: "{{ model.model_path }}" + - name: "-ngl" + description: "Number of layers to offload to the GPU if available" + value: "{{ 999 if args.ngl < 0 else args.ngl }}" + - name: "-ngld" + description: "Number of layers to offload to the GPU if available" + value: "{{ None if not args.model_draft else 999 if args.ngl < 0 else args.ngl }}" + - name: "--threads" + description: "Number of Threads to use during generation" + value: "{{ args.threads }}" + - name: "-o" + description: "Output format printed to stdout" + value: "json" - name: rag inference_engine: name: "rag" diff --git a/ramalama/benchmarks/errors.py b/ramalama/benchmarks/errors.py new file mode 100644 index 00000000..5d27288d --- /dev/null +++ b/ramalama/benchmarks/errors.py @@ -0,0 +1,12 @@ +class MissingStorageFolderError(Exception): + def __init__(self): + message = """ +No valid benchmarks storage folder could be determined. + +Set an explicit path via: +RAMALAMA__BENCHMARKS_STORAGE_FOLDER=/absolute/path/to/benchmarks + +If this seems wrong for your setup, report it at: +https://www.github.com/containers/ramalama/issues + """ + super().__init__(message) diff --git a/ramalama/benchmarks/manager.py b/ramalama/benchmarks/manager.py new file mode 100644 index 00000000..aef9695b --- /dev/null +++ b/ramalama/benchmarks/manager.py @@ -0,0 +1,50 @@ +import json +import logging +from dataclasses import asdict +from functools import cached_property +from pathlib import Path + +from ramalama.benchmarks.errors import MissingStorageFolderError +from ramalama.benchmarks.schemas import BenchmarkRecord, DeviceInfoV1, get_benchmark_record +from ramalama.benchmarks.utilities import parse_jsonl +from ramalama.config import CONFIG +from ramalama.log_levels import LogLevel + +logger = logging.getLogger("ramalama.benchmarks") +logger.setLevel(CONFIG.log_level or LogLevel.WARNING) + +SCHEMA_VERSION = 1 +BENCHMARKS_FILENAME = "benchmarks.jsonl" + + +class BenchmarksManager: + def __init__(self, storage_folder: str | Path | None): + if storage_folder is None: + raise MissingStorageFolderError + + self.storage_folder = Path(storage_folder) + self.storage_file = self.storage_folder / BENCHMARKS_FILENAME + self.storage_file.parent.mkdir(parents=True, exist_ok=True) + + @cached_property + def device_info(self) -> DeviceInfoV1: + return DeviceInfoV1.current_device_info() + + def save(self, results: list[BenchmarkRecord] | BenchmarkRecord): + if not isinstance(results, list): + results = [results] + + if len(results) == 0: + return + + with self.storage_file.open("a", encoding="utf-8") as handle: + for record in results: + handle.write(json.dumps(asdict(record), ensure_ascii=True)) + handle.write("\n") + + def list(self) -> list[BenchmarkRecord]: + """List benchmark results from JSONL storage.""" + if not self.storage_file.exists(): + return [] + content = self.storage_file.read_text(encoding="utf-8") + return [get_benchmark_record(result) for result in parse_jsonl(content)] diff --git a/ramalama/benchmarks/schemas.py b/ramalama/benchmarks/schemas.py new file mode 100644 index 00000000..393ceb60 --- /dev/null +++ b/ramalama/benchmarks/schemas.py @@ -0,0 +1,211 @@ +import platform +import socket +from dataclasses import dataclass, field, fields +from datetime import datetime, timezone +from functools import lru_cache +from typing import Any, ClassVar, Literal, TypeVar, overload + +from ramalama.common import get_accel + +VersionerT = TypeVar("VersionerT") + + +@dataclass +class DeviceInfo: + pass + + +@dataclass +class DeviceInfoV1(DeviceInfo): + hostname: str + operating_system: str + cpu_info: str + accel: str + version: ClassVar[Literal["v1"]] = "v1" + + @classmethod + @lru_cache(maxsize=1) + def current_device_info(cls) -> "DeviceInfoV1": + return cls( + hostname=socket.gethostname(), + operating_system=f"{platform.system()} {platform.release()}", + cpu_info=platform.processor() or platform.machine(), + accel=get_accel(), + ) + + +@dataclass +class TestConfiguration: + pass + + +@dataclass +class TestConfigurationV1(TestConfiguration): + """Container configuration metadata for a benchmark run.""" + + container_image: str = "" + container_runtime: str = "" + inference_engine: str = "" + version: Literal["v1"] = "v1" + runtime_args: list[str] | None = None + + +@dataclass +class LlamaBenchResult: + pass + + +@dataclass +class LlamaBenchResultV1(LlamaBenchResult): + version: Literal["v1"] = "v1" + build_commit: str | None = None + build_number: int | None = None + backends: str | None = None + cpu_info: str | None = None + gpu_info: str | None = None + model_filename: str | None = None + model_type: str | None = None + model_size: int | None = None + model_n_params: int | None = None + n_batch: int | None = None + n_ubatch: int | None = None + n_threads: int | None = None + cpu_mask: str | None = None + cpu_strict: int | None = None + poll: int | None = None + type_k: str | None = None + type_v: str | None = None + n_gpu_layers: int | None = None + n_cpu_moe: int | None = None + split_mode: str | None = None + main_gpu: int | None = None + no_kv_offload: int | None = None + flash_attn: int | None = None + devices: str | None = None + tensor_split: str | None = None + tensor_buft_overrides: str | None = None + use_mmap: int | None = None + embeddings: int | None = None + no_op_offload: int | None = None + no_host: int | None = None + use_direct_io: int | None = None + n_prompt: int | None = None + n_gen: int | None = None + n_depth: int | None = None + test_time: str | None = None + avg_ns: int | None = None + stddev_ns: int | None = None + avg_ts: float | None = None + stddev_ts: float | None = None + samples_ns: str | None = None # JSON array stored as string + samples_ts: str | None = None # JSON array stored as string + + @classmethod + def from_payload(cls, payload: dict) -> "LlamaBenchResult": + """Build a result from a llama-bench JSON/JSONL object.""" + return cls(**{f.name: payload[f.name] for f in fields(cls) if f.name in payload}) + + +@dataclass +class BenchmarkRecord: + pass + + +@dataclass +class BenchmarkRecordV1(BenchmarkRecord): + configuration: TestConfigurationV1 + result: LlamaBenchResultV1 + version: Literal["v1"] = "v1" + created_at: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat()) + device: DeviceInfoV1 = field(default_factory=DeviceInfoV1.current_device_info) + + @classmethod + def from_payload(cls, payload: dict) -> "BenchmarkRecordV1": + payload = {**payload} + + if 'device' in payload: + payload['device'] = DeviceInfoV1(**payload.pop("device")) + + configuration = TestConfigurationV1(**payload.pop('configuration', {})) + result = LlamaBenchResultV1(**payload.pop('result', {})) + + return cls(configuration=configuration, result=result, **payload) + + +@overload +def get_device_info(payload: dict) -> DeviceInfoV1: ... + + +@overload +def get_device_info(payload: dict, version: Literal["v1"]) -> DeviceInfoV1: ... + + +def get_device_info(payload: dict, version: Any = None) -> DeviceInfo: + if version is None: + version = payload.get('version', "v1") + + if version == "v1": + return DeviceInfoV1(**payload) + + raise NotImplementedError(f"No supported DeviceInfo schemas for version {version}") + + +@overload +def get_test_config(payload: dict) -> TestConfigurationV1: ... + + +@overload +def get_test_config(payload: dict, version: Literal["v1"]) -> TestConfigurationV1: ... + + +def get_test_config(payload: dict, version: Any = None) -> TestConfiguration: + if version is None: + version = payload.get('version', "v1") + + if version == "v1": + return TestConfigurationV1(**payload) + + raise NotImplementedError(f"No supported TestConfiguration schemas for version {version}") + + +@overload +def get_llama_bench_result(payload: dict) -> LlamaBenchResultV1: ... + + +@overload +def get_llama_bench_result(payload: dict, version: Literal["v1"]) -> LlamaBenchResultV1: ... + + +def get_llama_bench_result(payload: dict, version: Any = None) -> LlamaBenchResult: + if version is None: + version = payload.get('version', "v1") + + if version == "v1": + return LlamaBenchResultV1(**payload) + + raise NotImplementedError(f"No supported LlamaBench schemas for version {version}") + + +@overload +def get_benchmark_record(payload: dict) -> BenchmarkRecord: ... + + +@overload +def get_benchmark_record(payload: dict, version: Literal["v1"]) -> BenchmarkRecordV1: ... + + +def get_benchmark_record(payload: dict, version: Any = None) -> BenchmarkRecord: + if version is None: + version = payload.get('version', "v1") + + if version == "v1": + return BenchmarkRecordV1.from_payload(payload) + + raise NotImplementedError(f"No supported benchmark schemas for version {version}") + + +def normalize_benchmark_record(benchmark: BenchmarkRecord) -> BenchmarkRecordV1: + if isinstance(benchmark, BenchmarkRecordV1): + return benchmark + + raise NotImplementedError(f"Received an unsupported benchmark record type {type(benchmark)}") diff --git a/ramalama/benchmarks/utilities.py b/ramalama/benchmarks/utilities.py new file mode 100644 index 00000000..8f97f573 --- /dev/null +++ b/ramalama/benchmarks/utilities.py @@ -0,0 +1,104 @@ +import json + +from ramalama.benchmarks.schemas import ( + BenchmarkRecord, + BenchmarkRecordV1, + normalize_benchmark_record, +) + + +def parse_jsonl(content: str) -> list[dict]: + """Parse newline-delimited JSON benchmark results.""" + results = [] + for line in content.strip().split("\n"): + if not line.strip(): + continue + results.append(json.loads(line)) + return results + + +def parse_json(content: str) -> list[dict]: + """Parse JSON array or single object benchmark results.""" + data = json.loads(content) + if not isinstance(data, list): + data = [data] + return data + + +def print_bench_results(records: list[BenchmarkRecord]): + """Format benchmark results as a table for display.""" + if not records: + return + normalized_records: list[BenchmarkRecordV1] = [normalize_benchmark_record(result) for result in records] + + rows: list[dict[str, object | None]] = [] + for i, item in enumerate(normalized_records): + result = item.result + model = result.model_filename or "" + params = f"{result.model_n_params / 1e9:.2f} B" if result.model_n_params else "-" + backend = result.gpu_info or result.cpu_info or "CPU" + ngl = str(result.n_gpu_layers) if result.n_gpu_layers else "-" + threads = str(result.n_threads) if result.n_threads else "-" + + # Format test type + if result.n_prompt and result.n_gen: + test = f"pp{result.n_prompt}+tg{result.n_gen}" + elif result.n_prompt: + test = f"pp{result.n_prompt}" + elif result.n_gen: + test = f"tg{result.n_gen}" + else: + test = "-" + + # Format tokens/sec with stddev + if result.avg_ts and result.stddev_ts: + t_s = f"{result.avg_ts:.2f} ± {result.stddev_ts:.2f}" + elif result.avg_ts: + t_s = f"{result.avg_ts:.2f}" + else: + t_s = "-" + + rows.append( + { + "id": i, + "model": model, + "params": params, + "backend": backend, + "ngl": ngl, + "threads": threads, + "test": test, + "t/s": t_s, + "engine": item.configuration.container_runtime, + "date": item.created_at, + } + ) + + optional_fields = ["id", "engine", "date"] + for field in optional_fields: + if all(not row.get(field) for row in rows): + for row in rows: + row.pop(field, None) + + column_order = ["id", "model", "params", "backend", "ngl", "threads", "test", "t/s", "engine", "date"] + headers = [column for column in column_order if column in rows[0]] + + col_widths: dict[str, int] = {} + for header in headers: + max_len = len(header) + for row in rows: + value = row.get(header) + text = "-" if value in (None, "") else str(value) + max_len = max(max_len, len(text)) + col_widths[header] = max_len + + header_row = " | ".join(header.ljust(col_widths[header]) for header in headers) + print(f"| {header_row} |") + print(f"| {'-' * len(header_row)} |") + + for row in rows: + cells = [] + for header in headers: + value = row.get(header) + text = "-" if value in (None, "") else str(value) + cells.append(text.ljust(col_widths[header])) + print(f"| {' | '.join(cells)} |") diff --git a/ramalama/cli.py b/ramalama/cli.py index a392b272..1d670001 100644 --- a/ramalama/cli.py +++ b/ramalama/cli.py @@ -7,11 +7,14 @@ import shlex import subprocess import sys import urllib.error +from dataclasses import asdict from datetime import datetime, timezone from textwrap import dedent from typing import Any, get_args from urllib.parse import urlparse +from ramalama.benchmarks.manager import BenchmarksManager + # if autocomplete doesn't exist, just do nothing, don't break try: import argcomplete @@ -24,6 +27,7 @@ except Exception: import ramalama.chat as chat from ramalama import engine from ramalama.arg_types import DefaultArgsType +from ramalama.benchmarks.utilities import print_bench_results from ramalama.chat_utils import default_prefix from ramalama.cli_arg_normalization import normalize_pull_arg from ramalama.command.factory import assemble_command @@ -301,6 +305,7 @@ def configure_subcommands(parser): subparsers = parser.add_subparsers(dest="subcommand") subparsers.required = False bench_parser(subparsers) + benchmarks_parser(subparsers) chat_parser(subparsers) containers_parser(subparsers) convert_parser(subparsers) @@ -324,6 +329,8 @@ def configure_subcommands(parser): def post_parse_setup(args): """Perform additional setup after parsing arguments.""" + if getattr(args, "subcommand", None) == "benchmark": + args.subcommand = "bench" def map_https_to_transport(input: str) -> str: if input.startswith("https://") or input.startswith("http://"): @@ -510,10 +517,67 @@ def add_network_argument(parser, dflt: str | None = "none"): def bench_parser(subparsers): parser = subparsers.add_parser("bench", aliases=["benchmark"], help="benchmark specified AI Model") runtime_options(parser, "bench") - parser.add_argument("MODEL", completer=local_models) # positional argument + parser.add_argument("MODEL", completer=local_models) + parser.add_argument( + "--format", + choices=["table", "json"], + default="table", + help="output format (table or json)", + ) parser.set_defaults(func=bench_cli) +def benchmarks_parser(subparsers): + storage_folder = CONFIG.benchmarks.storage_folder + epilog = f"Storage folder: {storage_folder}" if storage_folder else "Storage folder: not configured" + parser = subparsers.add_parser( + "benchmarks", + help="manage and view benchmark results", + epilog=epilog, + ) + parser.set_defaults(func=lambda _: parser.print_help()) + + benchmarks_subparsers = parser.add_subparsers(dest="benchmarks_command", metavar="[command]") + + list_parser = benchmarks_subparsers.add_parser("list", help="list benchmark results") + list_parser.add_argument( + "--limit", + type=int, + default=None, + help="limit number of results to display", + ) + list_parser.add_argument( + "--offset", + type=int, + default=0, + help="offset for pagination", + ) + list_parser.add_argument( + "--format", + choices=["table", "json"], + default="table", + help="output format (table or json)", + ) + list_parser.set_defaults(func=benchmarks_list_cli) + + +def benchmarks_list_cli(args): + """Display a list of benchmark results from storage.""" + + bench_manager = BenchmarksManager(CONFIG.benchmarks.storage_folder) + results = bench_manager.list() + + if not results: + print("No benchmark results found") + return + + if args.format == "json": + output = [asdict(item) for item in results] + print(json.dumps(output, indent=2, sort_keys=True)) + else: + print_bench_results(results) + + def containers_parser(subparsers): parser = subparsers.add_parser("containers", aliases=["ps"], help="list all RamaLama containers") parser.add_argument( diff --git a/ramalama/command/context.py b/ramalama/command/context.py index e92e9a66..3d0173be 100644 --- a/ramalama/command/context.py +++ b/ramalama/command/context.py @@ -2,13 +2,12 @@ import argparse import os from typing import Optional -from ramalama.common import check_metal, check_nvidia +from ramalama.common import check_metal, check_nvidia, get_accel_env_vars from ramalama.console import should_colorize from ramalama.transports.transport_factory import CLASS_MODEL_TYPES, New class RamalamaArgsContext: - def __init__(self) -> None: self.cache_reuse: Optional[int] = None self.container: Optional[bool] = None @@ -52,7 +51,6 @@ class RamalamaArgsContext: class RamalamaRagGenArgsContext: - def __init__(self) -> None: self.debug: bool | None = None self.format: str | None = None @@ -74,7 +72,6 @@ class RamalamaRagGenArgsContext: class RamalamaRagArgsContext: - def __init__(self) -> None: self.debug: bool | None = None self.port: str | None = None @@ -92,7 +89,6 @@ class RamalamaRagArgsContext: class RamalamaModelContext: - def __init__(self, model: CLASS_MODEL_TYPES, is_container: bool, should_generate: bool, dry_run: bool): self.model = model self.is_container = is_container @@ -128,7 +124,6 @@ class RamalamaModelContext: class RamalamaHostContext: - def __init__( self, is_container: bool, uses_nvidia: bool, uses_metal: bool, should_colorize: bool, rpc_nodes: Optional[str] ): @@ -140,7 +135,6 @@ class RamalamaHostContext: class RamalamaCommandContext: - def __init__( self, args: RamalamaArgsContext | RamalamaRagGenArgsContext | RamalamaRagArgsContext, @@ -169,9 +163,13 @@ class RamalamaCommandContext: model = cli_args.model else: model = None + + skip_gpu_probe = should_generate or bool(get_accel_env_vars()) + uses_nvidia = True if skip_gpu_probe else (check_nvidia() is None) + host = RamalamaHostContext( is_container, - check_nvidia() is None, + uses_nvidia, check_metal(argparse.Namespace(**{"container": is_container})), should_colorize(), os.getenv("RAMALAMA_LLAMACPP_RPC_NODES", None), diff --git a/ramalama/command/factory.py b/ramalama/command/factory.py index 0b73c2f9..d56e9b47 100644 --- a/ramalama/command/factory.py +++ b/ramalama/command/factory.py @@ -19,7 +19,6 @@ def is_truthy(resolved_stmt: str) -> bool: class CommandFactory: - def __init__(self, spec_files: dict[str, Path], schema_files: dict[str, Path]): self.spec_files = spec_files self.schema_files = schema_files diff --git a/ramalama/config.py b/ramalama/config.py index 36b1c2ed..dc448855 100644 --- a/ramalama/config.py +++ b/ramalama/config.py @@ -2,6 +2,7 @@ import json import os import sys from dataclasses import dataclass, field, fields +from functools import lru_cache from pathlib import Path from typing import Any, Literal, Mapping, TypeAlias @@ -79,6 +80,7 @@ def get_default_engine() -> SUPPORTED_ENGINES | None: return "docker" if available("docker") else None +@lru_cache(maxsize=1) def get_default_store() -> str: # Check if running as root (Unix only) if hasattr(os, 'geteuid') and os.geteuid() == 0: @@ -136,6 +138,22 @@ def coerce_to_bool(value: Any) -> bool: raise ValueError(f"Cannot coerce {value!r} to bool") +def get_storage_folder(base_path: str | None = None): + if base_path is None: + base_path = get_default_store() + + return os.path.join(base_path, "benchmarks") + + +@dataclass +class Benchmarks: + storage_folder: str = field(default_factory=get_storage_folder) + disable: bool = False + + def __post_init__(self): + os.makedirs(self.storage_folder, exist_ok=True) + + @dataclass class UserConfig: no_missing_gpu_prompt: bool = False @@ -225,6 +243,7 @@ class HTTPClientConfig: class BaseConfig: api: str = "none" api_key: str | None = None + benchmarks: Benchmarks = field(default_factory=Benchmarks) cache_reuse: int = 256 carimage: str = "registry.access.redhat.com/ubi10-micro:latest" container: bool = None # type: ignore @@ -235,12 +254,15 @@ class BaseConfig: dryrun: bool = False engine: SUPPORTED_ENGINES | None = field(default_factory=get_default_engine) env: list[str] = field(default_factory=list) + gguf_quantization_mode: GGUF_QUANTIZATION_MODES = DEFAULT_GGUF_QUANTIZATION_MODE host: str = "0.0.0.0" + http_client: HTTPClientConfig = field(default_factory=HTTPClientConfig) image: str = None # type: ignore images: RamalamaImages = field(default_factory=RamalamaImages) rag_image: str | None = None rag_images: RamalamaRagImages = field(default_factory=RamalamaRagImages) keep_groups: bool = False + log_level: LogLevel | None = None max_tokens: int = 0 ngl: int = -1 ocr: bool = False @@ -260,9 +282,6 @@ class BaseConfig: transport: str = "ollama" user: UserConfig = field(default_factory=UserConfig) verify: bool = True - gguf_quantization_mode: GGUF_QUANTIZATION_MODES = DEFAULT_GGUF_QUANTIZATION_MODE - http_client: HTTPClientConfig = field(default_factory=HTTPClientConfig) - log_level: LogLevel | None = None provider: ProviderConfig = field(default_factory=ProviderConfig) def __post_init__(self): @@ -297,30 +316,26 @@ class Config(LayeredMixin, BaseConfig): def load_file_config() -> dict[str, Any]: parser = TOMLParser() - config_path = os.getenv("RAMALAMA_CONFIG") + config_paths: list[str] = [] - if config_path and os.path.exists(config_path): - config = parser.parse_file(config_path) - config = config.get("ramalama", {}) - config['settings'] = {'config_files': [config_path]} - if log_level := config.get("log_level"): - config["log_level"] = coerce_log_level(log_level) - return config + if (config_path := os.getenv("RAMALAMA_CONFIG", None)) and os.path.exists(config_path): + config_paths.append(config_path) + else: + default_config_paths = [os.path.join(conf_dir, "ramalama.conf") for conf_dir in DEFAULT_CONFIG_DIRS] - config = {} - default_config_paths = [os.path.join(conf_dir, "ramalama.conf") for conf_dir in DEFAULT_CONFIG_DIRS] + for path in default_config_paths: + if os.path.exists(path): + config_paths.append(str(path)) - config_paths = [] - for path in default_config_paths: - if os.path.exists(path): - config_paths.append(str(path)) - parser.parse_file(path) - path_str = f"{path}.d" - if os.path.isdir(path_str): - for conf_file in sorted(Path(path_str).glob("*.conf")): - config_paths.append(str(conf_file)) - parser.parse_file(conf_file) - config = parser.data + path_str = f"{path}.d" + if os.path.isdir(path_str): + for conf_file in sorted(Path(path_str).glob("*.conf")): + config_paths.append(str(conf_file)) + + for file in config_paths: + parser.parse_file(file) + + config: dict[str, Any] = parser.data if config: config = config.get('ramalama', {}) config['settings'] = {'config_files': config_paths} diff --git a/ramalama/transports/base.py b/ramalama/transports/base.py index d686f48d..a30a045d 100644 --- a/ramalama/transports/base.py +++ b/ramalama/transports/base.py @@ -1,4 +1,5 @@ import copy +import json import os import platform import random @@ -8,14 +9,19 @@ import sys import time from abc import ABC, abstractmethod from functools import cached_property -from typing import TYPE_CHECKING, Any, Dict, Optional +from typing import TYPE_CHECKING, Any, Optional from ramalama.common import ContainerEntryPoint if TYPE_CHECKING: from ramalama.chat import ChatOperationalArgs +from datetime import datetime, timezone + import ramalama.chat as chat +from ramalama.benchmarks.manager import BenchmarksManager +from ramalama.benchmarks.schemas import BenchmarkRecord, BenchmarkRecordV1, get_benchmark_record +from ramalama.benchmarks.utilities import parse_json, print_bench_results from ramalama.common import ( MNT_DIR, MNT_FILE_DRAFT, @@ -25,6 +31,7 @@ from ramalama.common import ( is_split_file_model, perror, populate_volume_from_image, + run_cmd, set_accel_env_vars, ) from ramalama.compose import Compose @@ -463,7 +470,52 @@ class Transport(TransportBase): def bench(self, args, cmd: list[str]): set_accel_env_vars() - self.execute_command(cmd, args) + + output_format = getattr(args, "format", "table") + + if args.dryrun: + if args.container: + self.engine.dryrun() + else: + dry_run(cmd) + + return + elif args.container: + self.setup_container(args) + self.setup_mounts(args) + self.engine.add([args.image] + cmd) + result = self.engine.run_process() + else: + result = run_cmd(cmd, encoding="utf-8") + + try: + bench_results = parse_json(result.stdout) + except (json.JSONDecodeError, ValueError): + message = f"Could not parse benchmark output. Expected JSON but got:\n{result.stdout}" + raise ValueError(message) + + base_payload: dict = { + "created_at": datetime.now(timezone.utc).isoformat(), + "configuration": { + "container_image": args.image, + "container_runtime": args.engine, + "inference_engine": args.runtime, + "runtime_args": cmd, + }, + } + results: list[BenchmarkRecord] = list() + for bench_result in bench_results: + result_record: BenchmarkRecordV1 = get_benchmark_record({"result": bench_result, **base_payload}, "v1") + results.append(result_record) + + if output_format == "json": + print(result.stdout) + else: + print_bench_results(results) + + if not CONFIG.benchmarks.disable: + bench_manager = BenchmarksManager(CONFIG.benchmarks.storage_folder) + bench_manager.save(results) def run(self, args, cmd: list[str]): # The Run command will first launch a daemonized service @@ -766,7 +818,7 @@ class Transport(TransportBase): compose = Compose(self.model_name, model_paths, chat_template_paths, mmproj_paths, args, exec_args) compose.generate().write(output_dir) - def inspect_metadata(self) -> Dict[str, Any]: + def inspect_metadata(self) -> dict[str, Any]: model_path = self._get_entry_model_path(False, False, False) if GGUFInfoParser.is_model_gguf(model_path): return GGUFInfoParser.parse_metadata(model_path).data diff --git a/test/e2e/test_bench.py b/test/e2e/test_bench.py index d3c70ff1..40ff8f6e 100644 --- a/test/e2e/test_bench.py +++ b/test/e2e/test_bench.py @@ -7,7 +7,7 @@ import pytest @pytest.mark.e2e @skip_if_no_llama_bench -def test_model_and_size_columns(test_model): +def test_model_and_params_columns(test_model): result = check_output(["ramalama", "bench", "-t", "2", test_model]) - assert re.search(r"\|\s+model\s+\|\s+size", result) + assert re.search(r"\|\s+model\s+\|\s+params", result) diff --git a/test/system/002-bench.bats b/test/system/002-bench.bats index e7518606..341c6409 100755 --- a/test/system/002-bench.bats +++ b/test/system/002-bench.bats @@ -17,7 +17,7 @@ function setup() { @test "ramalama bench" { skip_if_nocontainer run_ramalama bench -t 2 $(test_model smollm:135m) - is "$output" ".*model.*size.*" "model and size in output" + is "$output" ".*model.*params.*" "model and params in output" } # vim: filetype=sh diff --git a/test/system/015-help.bats b/test/system/015-help.bats index 9d487908..51e8250b 100644 --- a/test/system/015-help.bats +++ b/test/system/015-help.bats @@ -50,7 +50,7 @@ function check_help() { # If usage lists no arguments (strings in ALL CAPS), confirm # by running with 'invalid-arg' and expecting failure. if ! expr "$usage" : '.*[A-Z]' >/dev/null; then - if [ "$cmd" != "help" ] && [ "$cmd" != "daemon" ]; then + if [ "$cmd" != "help" ] && [ "$cmd" != "daemon" ] && [ "$cmd" != "benchmarks" ]; then dprint "$command_string invalid-arg" run_ramalama '?' "$@" $cmd invalid-arg is "$status" 2 \ diff --git a/test/unit/test_benchmarks_manager.py b/test/unit/test_benchmarks_manager.py new file mode 100644 index 00000000..a67d8633 --- /dev/null +++ b/test/unit/test_benchmarks_manager.py @@ -0,0 +1,109 @@ +import json + +import pytest + +from ramalama.benchmarks import manager, schemas + + +def _make_config(engine: str) -> schemas.TestConfigurationV1: + return schemas.TestConfigurationV1( + container_image="quay.io/ramalama/ramalama:latest", + container_runtime="docker", + inference_engine=engine, + runtime_args={"threads": 2}, + ) + + +def _make_result(model_name: str, avg_ts: float) -> schemas.LlamaBenchResultV1: + return schemas.LlamaBenchResultV1( + build_commit="abc123", + build_number=1, + cpu_info="cpu", + gpu_info="gpu", + model_filename=model_name, + n_threads=2, + n_prompt=8, + n_gen=16, + avg_ts=avg_ts, + stddev_ts=0.1, + ) + + +def _make_device() -> schemas.DeviceInfoV1: + return schemas.DeviceInfoV1( + hostname="host", + operating_system="TestOS 1.0", + cpu_info="cpu", + accel="none", + ) + + +def test_save_benchmark_record_writes_jsonl(tmp_path): + db = manager.BenchmarksManager(tmp_path) + cfg = _make_config("llama.cpp") + res = _make_result("model.gguf", 1.5) + device = _make_device() + record = schemas.BenchmarkRecordV1( + configuration=cfg, + result=res, + created_at="2024-01-01 00:00:00", + device=device, + ) + + db.save(record) + + assert db.storage_file.exists() + payload = json.loads(db.storage_file.read_text().strip()) + + assert payload["version"] == "v1" + assert payload["created_at"] == "2024-01-01 00:00:00" + assert payload["configuration"]["inference_engine"] == "llama.cpp" + assert payload["result"]["model_filename"] == "model.gguf" + assert payload["device"]["hostname"] == "host" + + +def test_list_empty_returns_empty_list(tmp_path): + db = manager.BenchmarksManager(tmp_path) + + records = db.list() + + assert records == [] + + +def test_manager_missing_storage_folder_raises(): + with pytest.raises(manager.MissingStorageFolderError): + manager.BenchmarksManager(None) + + +def test_list_returns_saved_records_in_order(tmp_path): + db = manager.BenchmarksManager(tmp_path) + device = _make_device() + + cfg_a = _make_config("engine-a") + cfg_b = _make_config("engine-b") + + res_a = _make_result("model-a.gguf", 1.0) + res_b = _make_result("model-b.gguf", 2.0) + + record_a = schemas.BenchmarkRecordV1( + configuration=cfg_a, + result=res_a, + created_at="2024-01-01 00:00:00", + device=device, + ) + record_b = schemas.BenchmarkRecordV1( + configuration=cfg_b, + result=res_b, + created_at="2024-01-02 00:00:00", + device=device, + ) + + db.save([record_a, record_b]) + + stored = db.list() + assert len(stored) == 2 + assert stored[0].configuration.inference_engine == "engine-a" + assert stored[1].configuration.inference_engine == "engine-b" + + assert stored[0].result.avg_ts == 1.0 + assert stored[1].result.avg_ts == 2.0 diff --git a/test/unit/test_config.py b/test/unit/test_config.py index 62e10ef8..b58e0b46 100644 --- a/test/unit/test_config.py +++ b/test/unit/test_config.py @@ -143,6 +143,7 @@ def test_env_overrides_file_and_default(): ], ) def test_get_default_store(uid, is_root, expected): + get_default_store.cache_clear() with patch("os.geteuid", return_value=uid): assert get_default_store() == expected diff --git a/test/unit/test_config_documentation.py b/test/unit/test_config_documentation.py index 1eaef62a..35acf5dd 100644 --- a/test/unit/test_config_documentation.py +++ b/test/unit/test_config_documentation.py @@ -25,7 +25,7 @@ def get_config_fields(): } config_fields = [field.name for field in fields(BaseConfig) if field.name not in excluded_fields] - config_fields.extend(('http_client', 'images', 'rag_images', 'user')) + config_fields.extend(('benchmarks', 'http_client', 'images', 'rag_images', 'user')) return sorted(set(config_fields)) @@ -47,7 +47,7 @@ def get_documented_fields_in_conf(): documented = set() # Subsections that contain their own field documentation (these fields should not be extracted) - subsections_with_fields = {'http_client', 'user'} + subsections_with_fields = {'benchmarks', 'http_client', 'user'} # Track which section we're in to exclude nested fields under commented subsections in_commented_nested_section = False @@ -199,8 +199,8 @@ class TestConfigDocumentation: missing = set(config_fields) - set(documented_fields) assert not missing, ( - f"The following CONFIG fields are missing from docs/ramalama.conf:\n" - f"{', '.join(sorted(missing))}\n\n" + f"The following CONFIG fields are missing from docs/ramalama.conf: " + f"`{', '.join(sorted(missing))}`. " f"Please add documentation for these fields in docs/ramalama.conf" ) @@ -211,11 +211,12 @@ class TestConfigDocumentation: missing = set(config_fields) - set(documented_fields) - assert not missing, ( - f"The following CONFIG fields are missing from docs/ramalama.conf.5.md:\n" - f"{', '.join(sorted(missing))}\n\n" + warning_message = ( + f"The following CONFIG fields are missing from docs/ramalama.conf.5.md:" + f"`{', '.join(sorted(missing))}`. " f"Please add documentation for these fields in docs/ramalama.conf.5.md" ) + assert not missing, warning_message def test_no_undocumented_fields_in_conf(self): """Verify ramalama.conf doesn't document non-existent fields.""" @@ -225,8 +226,8 @@ class TestConfigDocumentation: extra = set(documented_fields) - set(config_fields) - self.KNOWN_ALIASES assert not extra, ( - f"The following fields are documented in docs/ramalama.conf but not in CONFIG:\n" - f"{', '.join(sorted(extra))}\n\n" + f"The following fields are documented in docs/ramalama.conf but not in CONFIG:" + f"`{', '.join(sorted(extra))}`. " f"These might be typos or outdated documentation." ) @@ -238,8 +239,8 @@ class TestConfigDocumentation: extra = set(documented_fields) - set(config_fields) - self.KNOWN_ALIASES assert not extra, ( - f"The following fields are documented in docs/ramalama.conf.5.md but not in CONFIG:\n" - f"{', '.join(sorted(extra))}\n\n" + f"The following fields are documented in docs/ramalama.conf.5.md but not in CONFIG: " + f"`{', '.join(sorted(extra))}`. " f"These might be typos or outdated documentation." ) @@ -258,7 +259,7 @@ class TestConfigDocumentation: error_msg.append(f"Fields documented only in ramalama.conf.5.md:\n{', '.join(sorted(only_in_manpage))}") assert not error_msg, ( - "Documentation inconsistency between ramalama.conf and ramalama.conf.5.md:\n\n" - + "\n\n".join(error_msg) - + "\n\nBoth files should document the same configuration options." + "Documentation inconsistency between ramalama.conf and ramalama.conf.5.md:" + + " ".join(error_msg) + + ". Both files should document the same configuration options." )