1
0
mirror of https://github.com/containers/ramalama.git synced 2026-02-05 06:46:39 +01:00

Fix llama-stack oci runtime on CUDA

Typically the container runtime for CUDA is specified using the `podman
--runtime` arg.
However lama-stack uses `podman kube play` which has no way to override the runtime.
Use a temporary containers.conf file to do this instead.

Signed-off-by: Oliver Walsh <owalsh@redhat.com>
This commit is contained in:
Oliver Walsh
2025-12-18 17:15:38 +00:00
parent 06d544b843
commit a1f0440f99

View File

@@ -3,7 +3,7 @@ import platform
import ramalama.kube as kube
import ramalama.quadlet as quadlet
from ramalama.common import check_nvidia, exec_cmd, genname, get_accel_env_vars, tagged_image
from ramalama.common import check_nvidia, exec_cmd, genname, get_accel_env_vars, run_cmd, tagged_image
from ramalama.compat import NamedTemporaryFile
from ramalama.config import CONFIG
from ramalama.engine import add_labels
@@ -20,7 +20,7 @@ class Stack:
def __init__(self, args):
self.args = args
self.name = getattr(args, "name", None) or genname()
if os.path.basename(args.engine) != "podman":
if not os.path.basename(args.engine).startswith("podman"):
raise ValueError("llama-stack requires use of the Podman container engine")
self.host = "0.0.0.0"
self.model = New(args.MODEL, args)
@@ -231,7 +231,43 @@ spec:
exec_args.append("--wait")
exec_args.append(yaml_file.name)
exec_cmd(exec_args)
self._exec_with_runtime(exec_args)
def _exec_with_runtime(self, exec_args):
# With kube play there is no way to specify the runtime for CUDA
# Set the runtime in a temporary containers.conf file instead
oci_runtime = None
if getattr(self.args, "oci_runtime", None):
oci_runtime = self.args.oci_runtime
elif check_nvidia() == "cuda" and os.access("/usr/bin/nvidia-container-runtime", os.X_OK):
oci_runtime = "/usr/bin/nvidia-container-runtime"
if not oci_runtime:
return exec_cmd(exec_args)
oci_runtime_name = (
run_cmd([self.args.engine, "info", "--format", "{{.Host.OCIRuntime.Name}}"]).stdout.decode("utf-8").strip()
)
with NamedTemporaryFile(
mode="w", prefix="RamaLama_", suffix=".conf", delete=not self.args.debug, delete_on_close=False
) as containers_conf:
containers_conf.write(
f"""\
[engine.runtimes]
{oci_runtime_name} = [ '{oci_runtime}' ]
"""
)
containers_conf.close()
old_containers_conf = os.environ.get("CONTAINERS_CONF_OVERRIDE")
try:
os.environ["CONTAINERS_CONF_OVERRIDE"] = containers_conf.name
return exec_cmd(exec_args)
finally:
if old_containers_conf:
os.environ["CONTAINERS_CONF_OVERRIDE"] = old_containers_conf
else:
del os.environ["CONTAINERS_CONF_OVERRIDE"]
def stop(self):
with NamedTemporaryFile(