Add ramalama rag command

Signed-off-by: Daniel J Walsh <dwalsh@redhat.com>
2026-02-06 00:48:04 +01:00 · 2025-02-11 11:59:39 -05:00
parent ea8d3ddbc1
commit 4e4e6708bf
7 changed files with 151 additions and 9 deletions
--- a/8
+++ b/8
@@ -24,10 +24,6 @@ help:
 	@echo
 	@echo "  - make build-rag IMAGE=quay.io/ramalama/ramalama GPU=ramalama"
 	@echo
-	@echo "Build Docling Container Image"
-	@echo
-	@echo "  - make build-docling IMAGE=quay.io/ramalama/ramalama GPU=ramalama"
-	@echo
 	@echo "Build docs"
 	@echo
 	@echo "  - make docs"
@@ -100,10 +96,6 @@ build_multi_arch:
 build-rag:
 	podman build --build-arg IMAGE=${IMAGE} --build-arg GPU=${GPU} -t ${IMAGE}-rag container-images/pragmatic

-.PHONY: build-docling
-build-docling:
-	podman build --build-arg IMAGE=${IMAGE} --build-arg CONTENT=docling --build-arg GPU=${GPU} -t ${IMAGE}-docling container-images/pragmatic
-
 .PHONY: install-docs
 install-docs: docs
 	make -C docs install
--- a/README.md
+++ b/README.md
@@ -123,6 +123,7 @@ curl -fsSL https://raw.githubusercontent.com/containers/ramalama/s/install.sh |
 | [ramalama-perplexity(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-perplexity.1.md)| calculate perplexity for specified AI Model                |
 | [ramalama-pull(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-pull.1.md)            | pull AI Model from Model registry to local storage         |
 | [ramalama-push(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-push.1.md)            | push AI Model from local storage to remote registry        |
+| [ramalama-rag(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-rag.1.md)              | generate and convert Retrieval Augmented Generation (RAG) data from provided documents into an OCI Image|
 | [ramalama-rm(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-rm.1.md)                | remove AI Model from local storage                         |
 | [ramalama-run(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-run.1.md)              | run specified AI Model as a chatbot                        |
 | [ramalama-serve(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-serve.1.md)          | serve REST API on specified AI Model                       |
--- a/docs/ramalama-rag.1.md
+++ b/docs/ramalama-rag.1.md
@@ -0,0 +1,41 @@
+% ramalama-rag 1
+
+## NAME
+ramalama\-rag - generate and convert Retrieval Augmented Generation (RAG) data from provided documents into an OCI Image
+
+## SYNOPSIS
+**ramalama rag** [options] [path ...] image
+
+## DESCRIPTION
+Generate RAG data from provided documents and convert into an OCI Image. This command uses a specific container image containing the docling
+tool to convert the specified content into a RAG vector database. If the image does not exists locally RamaLama will pull the image
+down and launch a container to process the data.
+
+NOTE: this command does not work without a container engine.
+
+positional arguments:
+  path        Files/Directory containing PDF, DOCX, PPTX, XLSX, HTML, AsciiDoc & Markdown formatted files to be processed. Can be specified multiple times.
+  image       OCI Image name to contain processed rag data
+
+## OPTIONS
+
+#### **--help**, **-h**
+Print usage message
+
+#### **--network-mode**=*none*
+sets the configuration for network namespaces when handling RUN instructions
+
+## EXAMPLES
+
+```
+$ ramalama rag https://arxiv.org/pdf/2408.09869 /tmp/pdf quay.io/rhatdan/myrag
+Fetching 9 files: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:00<00:00, 68509.50it/s]
+Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.
+2024-12-04 13:49:07.372 (  70.927s) [        75AB6740]    doc_normalisation.h:448   WARN| found new `other` type: checkbox-unselected
+```
+
+## SEE ALSO
+**[ramalama(1)](ramalama.1.md)**
+
+## HISTORY
+Dec 2024, Originally compiled by Dan Walsh <dwalsh@redhat.com>
--- a/docs/ramalama.1.md
+++ b/docs/ramalama.1.md
@@ -163,6 +163,7 @@ show RamaLama version
 | [ramalama-perplexity(1)](ramalama-perplexity.1.md)| calculate the perplexity value of an AI Model              |
 | [ramalama-pull(1)](ramalama-pull.1.md)            | pull AI Models from Model registries to local storage      |
 | [ramalama-push(1)](ramalama-push.1.md)            | push AI Models from local storage to remote registries     |
+| [ramalama-rag(1)](ramalama-rag.1.md)              | generate and convert Retrieval Augmented Generation (RAG) data from provided documents into an OCI Image |
 | [ramalama-rm(1)](ramalama-rm.1.md)                | remove AI Models from local storage                        |
 | [ramalama-run(1)](ramalama-run.1.md)              | run specified AI Model as a chatbot                        |
 | [ramalama-serve(1)](ramalama-serve.1.md)          | serve REST API on specified AI Model                       |
--- a/install.sh
+++ b/install.sh
@@ -130,7 +130,7 @@ setup_ramalama() {
  syspath="$syspath/ramalama"
  $sudo install -m755 -d "$syspath"
  $sudo install -m755 "$to_file" "$ramalama_bin"
-  local python_files=("cli.py" "gguf_parser.py" "huggingface.py" "model.py" \
+  local python_files=("cli.py" "rag.py" "gguf_parser.py" "huggingface.py" "model.py" \
                      "model_inspect.py" "ollama.py" "common.py" "__init__.py" \
                      "quadlet.py" "kube.py" "oci.py" "version.py" "shortnames.py" \
                      "toml_parser.py" "file.py" "http_client.py" "url.py" \
--- a/ramalama/cli.py
+++ b/ramalama/cli.py
@@ -7,6 +7,7 @@ import subprocess
 import platform
 import time
 import ramalama.oci
+import ramalama.rag

 from ramalama.huggingface import Huggingface
 from ramalama.common import (
@@ -250,6 +251,7 @@ def configure_subcommands(parser):
    perplexity_parser(subparsers)
    pull_parser(subparsers)
    push_parser(subparsers)
+    rag_parser(subparsers)
    rm_parser(subparsers)
    run_parser(subparsers)
    serve_parser(subparsers)
@@ -918,6 +920,33 @@ def version_parser(subparsers):
    parser.set_defaults(func=print_version)


+def rag_parser(subparsers):
+    parser = subparsers.add_parser(
+        "rag",
+        help="generate and convert retrieval augmented generation (RAG) data from provided documents into an OCI Image",
+    )
+    parser.add_argument(
+        "--network-mode",
+        type=str,
+        default="none",
+        help="set the network mode for the container",
+    )
+    parser.add_argument(
+        "PATH",
+        nargs="*",
+        help="""\
+Files/Directory containing PDF, DOCX, PPTX, XLSX, HTML, AsciiDoc & Markdown
+formatted files to be processed""",
+    )
+    parser.add_argument("IMAGE", help="OCI Image name to contain processed rag data")
+    parser.set_defaults(func=rag_cli)
+
+
+def rag_cli(args):
+    rag = ramalama.rag.Rag(args.IMAGE)
+    rag.generate(args)
+
+
 def rm_parser(subparsers):
    parser = subparsers.add_parser("rm", help="remove AI Model from local storage")
    parser.add_argument("--container", default=False, action="store_false", help=argparse.SUPPRESS)
--- a/ramalama/rag.py
+++ b/ramalama/rag.py
@@ -0,0 +1,78 @@
+import os
+import subprocess
+import tempfile
+
+from ramalama.common import run_cmd
+
+
+class Rag:
+    model = ""
+    target = ""
+
+    def __init__(self, target):
+        self.target = target
+
+    def build(self, source, target, args):
+        print(f"Building {target}...")
+        src = os.path.realpath(source)
+        base = os.path.basename(source)
+        contextdir = os.path.dirname(src)
+        cfile = f"""\
+FROM scratch
+COPY {base} /vector.db
+"""
+        containerfile = tempfile.NamedTemporaryFile(prefix='RamaLama_Containerfile_', delete=True)
+        # Open the file for writing.
+        with open(containerfile.name, 'w') as c:
+            c.write(cfile)
+        imageid = (
+            run_cmd(
+                [
+                    args.engine,
+                    "build",
+                    "--no-cache",
+                    f"--network={args.network_mode}",
+                    "-q",
+                    "-t",
+                    target,
+                    "-f",
+                    containerfile.name,
+                    contextdir,
+                ],
+                debug=args.debug,
+            )
+            .stdout.decode("utf-8")
+            .strip()
+        )
+        return imageid
+
+    def generate(self, args):
+        if not args.container:
+            raise KeyError("rag command requires a container. Can not be run with --nocontainer option.")
+        if not args.engine or args.engine == "":
+            raise KeyError("rag command requires a container. Can not be run without a container engine.")
+
+        # Default image with "-rag" append is used for building rag data.
+        s = args.image.split(":")
+        s[0] = s[0] + "-rag"
+        rag_image = ":".join(s)
+
+        exec_args = [args.engine, "run", "--rm"]
+        if args.network_mode != "":
+            exec_args += ["--network", args.network_mode]
+        for path in args.PATH:
+            if os.path.exists(path):
+                fpath = os.path.realpath(path)
+                rpath = os.path.relpath(path)
+                exec_args += ["-v", f"{fpath}:/docs/{rpath}:ro,z"]
+        vectordb = tempfile.NamedTemporaryFile(dir="", prefix='RamaLama_rag_', delete=True)
+        exec_args += ["-v", f"{vectordb.name}:{vectordb.name}:z"]
+        exec_args += [rag_image]
+        exec_args += ["pragmatic", "--indexing", "--path /docs/", f"milvus_file_path={vectordb.name}"]
+        try:
+            run_cmd(exec_args, debug=args.debug)
+        except subprocess.CalledProcessError as e:
+            raise e
+
+        print(self.build(vectordb.name, self.target, args))
+        os.remove(vectordb.name)