Fix llama-stack oci runtime on CUDA

Typically the container runtime for CUDA is specified using the `podman --runtime` arg. However lama-stack uses `podman kube play` which has no way to override the runtime. Use a temporary containers.conf file to do this instead. Signed-off-by: Oliver Walsh <owalsh@redhat.com>
2026-02-05 06:46:39 +01:00 · 2025-12-18 17:15:38 +00:00
parent 06d544b843
commit a1f0440f99
1 changed files with 39 additions and 3 deletions
--- a/ramalama/stack.py
+++ b/ramalama/stack.py
@@ -3,7 +3,7 @@ import platform

 import ramalama.kube as kube
 import ramalama.quadlet as quadlet
-from ramalama.common import check_nvidia, exec_cmd, genname, get_accel_env_vars, tagged_image
+from ramalama.common import check_nvidia, exec_cmd, genname, get_accel_env_vars, run_cmd, tagged_image
 from ramalama.compat import NamedTemporaryFile
 from ramalama.config import CONFIG
 from ramalama.engine import add_labels
@@ -20,7 +20,7 @@ class Stack:
    def __init__(self, args):
        self.args = args
        self.name = getattr(args, "name", None) or genname()
-        if os.path.basename(args.engine) != "podman":
+        if not os.path.basename(args.engine).startswith("podman"):
            raise ValueError("llama-stack requires use of the Podman container engine")
        self.host = "0.0.0.0"
        self.model = New(args.MODEL, args)
@@ -231,7 +231,43 @@ spec:
                exec_args.append("--wait")

            exec_args.append(yaml_file.name)
-            exec_cmd(exec_args)
+            self._exec_with_runtime(exec_args)
+
+    def _exec_with_runtime(self, exec_args):
+        # With kube play there is no way to specify the runtime for CUDA
+        # Set the runtime in a temporary containers.conf file instead
+        oci_runtime = None
+        if getattr(self.args, "oci_runtime", None):
+            oci_runtime = self.args.oci_runtime
+        elif check_nvidia() == "cuda" and os.access("/usr/bin/nvidia-container-runtime", os.X_OK):
+            oci_runtime = "/usr/bin/nvidia-container-runtime"
+
+        if not oci_runtime:
+            return exec_cmd(exec_args)
+
+        oci_runtime_name = (
+            run_cmd([self.args.engine, "info", "--format", "{{.Host.OCIRuntime.Name}}"]).stdout.decode("utf-8").strip()
+        )
+
+        with NamedTemporaryFile(
+            mode="w", prefix="RamaLama_", suffix=".conf", delete=not self.args.debug, delete_on_close=False
+        ) as containers_conf:
+            containers_conf.write(
+                f"""\
+[engine.runtimes]
+{oci_runtime_name} = [ '{oci_runtime}' ]
+"""
+            )
+            containers_conf.close()
+            old_containers_conf = os.environ.get("CONTAINERS_CONF_OVERRIDE")
+            try:
+                os.environ["CONTAINERS_CONF_OVERRIDE"] = containers_conf.name
+                return exec_cmd(exec_args)
+            finally:
+                if old_containers_conf:
+                    os.environ["CONTAINERS_CONF_OVERRIDE"] = old_containers_conf
+                else:
+                    del os.environ["CONTAINERS_CONF_OVERRIDE"]

    def stop(self):
        with NamedTemporaryFile(