From 4e4e6708bf8bcddaf644cc02410e2d9380370cd5 Mon Sep 17 00:00:00 2001 From: Daniel J Walsh Date: Tue, 11 Feb 2025 11:59:39 -0500 Subject: [PATCH] Add ramalama rag command Signed-off-by: Daniel J Walsh --- Makefile | 8 ----- README.md | 1 + docs/ramalama-rag.1.md | 41 ++++++++++++++++++++++ docs/ramalama.1.md | 1 + install.sh | 2 +- ramalama/cli.py | 29 ++++++++++++++++ ramalama/rag.py | 78 ++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 151 insertions(+), 9 deletions(-) create mode 100644 docs/ramalama-rag.1.md create mode 100644 ramalama/rag.py diff --git a/Makefile b/Makefile index ca31e434..21bba3b7 100644 --- a/Makefile +++ b/Makefile @@ -24,10 +24,6 @@ help: @echo @echo " - make build-rag IMAGE=quay.io/ramalama/ramalama GPU=ramalama" @echo - @echo "Build Docling Container Image" - @echo - @echo " - make build-docling IMAGE=quay.io/ramalama/ramalama GPU=ramalama" - @echo @echo "Build docs" @echo @echo " - make docs" @@ -100,10 +96,6 @@ build_multi_arch: build-rag: podman build --build-arg IMAGE=${IMAGE} --build-arg GPU=${GPU} -t ${IMAGE}-rag container-images/pragmatic -.PHONY: build-docling -build-docling: - podman build --build-arg IMAGE=${IMAGE} --build-arg CONTENT=docling --build-arg GPU=${GPU} -t ${IMAGE}-docling container-images/pragmatic - .PHONY: install-docs install-docs: docs make -C docs install diff --git a/README.md b/README.md index 74616766..0d59f376 100644 --- a/README.md +++ b/README.md @@ -123,6 +123,7 @@ curl -fsSL https://raw.githubusercontent.com/containers/ramalama/s/install.sh | | [ramalama-perplexity(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-perplexity.1.md)| calculate perplexity for specified AI Model | | [ramalama-pull(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-pull.1.md) | pull AI Model from Model registry to local storage | | [ramalama-push(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-push.1.md) | push AI Model from local storage to remote registry | +| [ramalama-rag(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-rag.1.md) | generate and convert Retrieval Augmented Generation (RAG) data from provided documents into an OCI Image| | [ramalama-rm(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-rm.1.md) | remove AI Model from local storage | | [ramalama-run(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-run.1.md) | run specified AI Model as a chatbot | | [ramalama-serve(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-serve.1.md) | serve REST API on specified AI Model | diff --git a/docs/ramalama-rag.1.md b/docs/ramalama-rag.1.md new file mode 100644 index 00000000..9578fbe5 --- /dev/null +++ b/docs/ramalama-rag.1.md @@ -0,0 +1,41 @@ +% ramalama-rag 1 + +## NAME +ramalama\-rag - generate and convert Retrieval Augmented Generation (RAG) data from provided documents into an OCI Image + +## SYNOPSIS +**ramalama rag** [options] [path ...] image + +## DESCRIPTION +Generate RAG data from provided documents and convert into an OCI Image. This command uses a specific container image containing the docling +tool to convert the specified content into a RAG vector database. If the image does not exists locally RamaLama will pull the image +down and launch a container to process the data. + +NOTE: this command does not work without a container engine. + +positional arguments: + path Files/Directory containing PDF, DOCX, PPTX, XLSX, HTML, AsciiDoc & Markdown formatted files to be processed. Can be specified multiple times. + image OCI Image name to contain processed rag data + +## OPTIONS + +#### **--help**, **-h** +Print usage message + +#### **--network-mode**=*none* +sets the configuration for network namespaces when handling RUN instructions + +## EXAMPLES + +``` +$ ramalama rag https://arxiv.org/pdf/2408.09869 /tmp/pdf quay.io/rhatdan/myrag +Fetching 9 files: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:00<00:00, 68509.50it/s] +Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU. +2024-12-04 13:49:07.372 ( 70.927s) [ 75AB6740] doc_normalisation.h:448 WARN| found new `other` type: checkbox-unselected +``` + +## SEE ALSO +**[ramalama(1)](ramalama.1.md)** + +## HISTORY +Dec 2024, Originally compiled by Dan Walsh diff --git a/docs/ramalama.1.md b/docs/ramalama.1.md index 08519290..917f4743 100644 --- a/docs/ramalama.1.md +++ b/docs/ramalama.1.md @@ -163,6 +163,7 @@ show RamaLama version | [ramalama-perplexity(1)](ramalama-perplexity.1.md)| calculate the perplexity value of an AI Model | | [ramalama-pull(1)](ramalama-pull.1.md) | pull AI Models from Model registries to local storage | | [ramalama-push(1)](ramalama-push.1.md) | push AI Models from local storage to remote registries | +| [ramalama-rag(1)](ramalama-rag.1.md) | generate and convert Retrieval Augmented Generation (RAG) data from provided documents into an OCI Image | | [ramalama-rm(1)](ramalama-rm.1.md) | remove AI Models from local storage | | [ramalama-run(1)](ramalama-run.1.md) | run specified AI Model as a chatbot | | [ramalama-serve(1)](ramalama-serve.1.md) | serve REST API on specified AI Model | diff --git a/install.sh b/install.sh index f8f1a3b8..7a2dce5d 100755 --- a/install.sh +++ b/install.sh @@ -130,7 +130,7 @@ setup_ramalama() { syspath="$syspath/ramalama" $sudo install -m755 -d "$syspath" $sudo install -m755 "$to_file" "$ramalama_bin" - local python_files=("cli.py" "gguf_parser.py" "huggingface.py" "model.py" \ + local python_files=("cli.py" "rag.py" "gguf_parser.py" "huggingface.py" "model.py" \ "model_inspect.py" "ollama.py" "common.py" "__init__.py" \ "quadlet.py" "kube.py" "oci.py" "version.py" "shortnames.py" \ "toml_parser.py" "file.py" "http_client.py" "url.py" \ diff --git a/ramalama/cli.py b/ramalama/cli.py index 4929b993..0226490b 100644 --- a/ramalama/cli.py +++ b/ramalama/cli.py @@ -7,6 +7,7 @@ import subprocess import platform import time import ramalama.oci +import ramalama.rag from ramalama.huggingface import Huggingface from ramalama.common import ( @@ -250,6 +251,7 @@ def configure_subcommands(parser): perplexity_parser(subparsers) pull_parser(subparsers) push_parser(subparsers) + rag_parser(subparsers) rm_parser(subparsers) run_parser(subparsers) serve_parser(subparsers) @@ -918,6 +920,33 @@ def version_parser(subparsers): parser.set_defaults(func=print_version) +def rag_parser(subparsers): + parser = subparsers.add_parser( + "rag", + help="generate and convert retrieval augmented generation (RAG) data from provided documents into an OCI Image", + ) + parser.add_argument( + "--network-mode", + type=str, + default="none", + help="set the network mode for the container", + ) + parser.add_argument( + "PATH", + nargs="*", + help="""\ +Files/Directory containing PDF, DOCX, PPTX, XLSX, HTML, AsciiDoc & Markdown +formatted files to be processed""", + ) + parser.add_argument("IMAGE", help="OCI Image name to contain processed rag data") + parser.set_defaults(func=rag_cli) + + +def rag_cli(args): + rag = ramalama.rag.Rag(args.IMAGE) + rag.generate(args) + + def rm_parser(subparsers): parser = subparsers.add_parser("rm", help="remove AI Model from local storage") parser.add_argument("--container", default=False, action="store_false", help=argparse.SUPPRESS) diff --git a/ramalama/rag.py b/ramalama/rag.py new file mode 100644 index 00000000..57c344e7 --- /dev/null +++ b/ramalama/rag.py @@ -0,0 +1,78 @@ +import os +import subprocess +import tempfile + +from ramalama.common import run_cmd + + +class Rag: + model = "" + target = "" + + def __init__(self, target): + self.target = target + + def build(self, source, target, args): + print(f"Building {target}...") + src = os.path.realpath(source) + base = os.path.basename(source) + contextdir = os.path.dirname(src) + cfile = f"""\ +FROM scratch +COPY {base} /vector.db +""" + containerfile = tempfile.NamedTemporaryFile(prefix='RamaLama_Containerfile_', delete=True) + # Open the file for writing. + with open(containerfile.name, 'w') as c: + c.write(cfile) + imageid = ( + run_cmd( + [ + args.engine, + "build", + "--no-cache", + f"--network={args.network_mode}", + "-q", + "-t", + target, + "-f", + containerfile.name, + contextdir, + ], + debug=args.debug, + ) + .stdout.decode("utf-8") + .strip() + ) + return imageid + + def generate(self, args): + if not args.container: + raise KeyError("rag command requires a container. Can not be run with --nocontainer option.") + if not args.engine or args.engine == "": + raise KeyError("rag command requires a container. Can not be run without a container engine.") + + # Default image with "-rag" append is used for building rag data. + s = args.image.split(":") + s[0] = s[0] + "-rag" + rag_image = ":".join(s) + + exec_args = [args.engine, "run", "--rm"] + if args.network_mode != "": + exec_args += ["--network", args.network_mode] + for path in args.PATH: + if os.path.exists(path): + fpath = os.path.realpath(path) + rpath = os.path.relpath(path) + exec_args += ["-v", f"{fpath}:/docs/{rpath}:ro,z"] + vectordb = tempfile.NamedTemporaryFile(dir="", prefix='RamaLama_rag_', delete=True) + exec_args += ["-v", f"{vectordb.name}:{vectordb.name}:z"] + exec_args += [rag_image] + exec_args += ["pragmatic", "--indexing", "--path /docs/", f"milvus_file_path={vectordb.name}"] + try: + run_cmd(exec_args, debug=args.debug) + except subprocess.CalledProcessError as e: + raise e + + print(self.build(vectordb.name, self.target, args)) + os.remove(vectordb.name)