1
0
mirror of https://github.com/containers/ramalama.git synced 2026-02-06 00:48:04 +01:00

Add ramalama rag command

Signed-off-by: Daniel J Walsh <dwalsh@redhat.com>
This commit is contained in:
Daniel J Walsh
2025-02-11 11:59:39 -05:00
parent ea8d3ddbc1
commit 4e4e6708bf
7 changed files with 151 additions and 9 deletions

View File

@@ -24,10 +24,6 @@ help:
@echo
@echo " - make build-rag IMAGE=quay.io/ramalama/ramalama GPU=ramalama"
@echo
@echo "Build Docling Container Image"
@echo
@echo " - make build-docling IMAGE=quay.io/ramalama/ramalama GPU=ramalama"
@echo
@echo "Build docs"
@echo
@echo " - make docs"
@@ -100,10 +96,6 @@ build_multi_arch:
build-rag:
podman build --build-arg IMAGE=${IMAGE} --build-arg GPU=${GPU} -t ${IMAGE}-rag container-images/pragmatic
.PHONY: build-docling
build-docling:
podman build --build-arg IMAGE=${IMAGE} --build-arg CONTENT=docling --build-arg GPU=${GPU} -t ${IMAGE}-docling container-images/pragmatic
.PHONY: install-docs
install-docs: docs
make -C docs install

View File

@@ -123,6 +123,7 @@ curl -fsSL https://raw.githubusercontent.com/containers/ramalama/s/install.sh |
| [ramalama-perplexity(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-perplexity.1.md)| calculate perplexity for specified AI Model |
| [ramalama-pull(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-pull.1.md) | pull AI Model from Model registry to local storage |
| [ramalama-push(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-push.1.md) | push AI Model from local storage to remote registry |
| [ramalama-rag(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-rag.1.md) | generate and convert Retrieval Augmented Generation (RAG) data from provided documents into an OCI Image|
| [ramalama-rm(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-rm.1.md) | remove AI Model from local storage |
| [ramalama-run(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-run.1.md) | run specified AI Model as a chatbot |
| [ramalama-serve(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-serve.1.md) | serve REST API on specified AI Model |

41
docs/ramalama-rag.1.md Normal file
View File

@@ -0,0 +1,41 @@
% ramalama-rag 1
## NAME
ramalama\-rag - generate and convert Retrieval Augmented Generation (RAG) data from provided documents into an OCI Image
## SYNOPSIS
**ramalama rag** [options] [path ...] image
## DESCRIPTION
Generate RAG data from provided documents and convert into an OCI Image. This command uses a specific container image containing the docling
tool to convert the specified content into a RAG vector database. If the image does not exists locally RamaLama will pull the image
down and launch a container to process the data.
NOTE: this command does not work without a container engine.
positional arguments:
path Files/Directory containing PDF, DOCX, PPTX, XLSX, HTML, AsciiDoc & Markdown formatted files to be processed. Can be specified multiple times.
image OCI Image name to contain processed rag data
## OPTIONS
#### **--help**, **-h**
Print usage message
#### **--network-mode**=*none*
sets the configuration for network namespaces when handling RUN instructions
## EXAMPLES
```
$ ramalama rag https://arxiv.org/pdf/2408.09869 /tmp/pdf quay.io/rhatdan/myrag
Fetching 9 files: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:00<00:00, 68509.50it/s]
Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.
2024-12-04 13:49:07.372 ( 70.927s) [ 75AB6740] doc_normalisation.h:448 WARN| found new `other` type: checkbox-unselected
```
## SEE ALSO
**[ramalama(1)](ramalama.1.md)**
## HISTORY
Dec 2024, Originally compiled by Dan Walsh <dwalsh@redhat.com>

View File

@@ -163,6 +163,7 @@ show RamaLama version
| [ramalama-perplexity(1)](ramalama-perplexity.1.md)| calculate the perplexity value of an AI Model |
| [ramalama-pull(1)](ramalama-pull.1.md) | pull AI Models from Model registries to local storage |
| [ramalama-push(1)](ramalama-push.1.md) | push AI Models from local storage to remote registries |
| [ramalama-rag(1)](ramalama-rag.1.md) | generate and convert Retrieval Augmented Generation (RAG) data from provided documents into an OCI Image |
| [ramalama-rm(1)](ramalama-rm.1.md) | remove AI Models from local storage |
| [ramalama-run(1)](ramalama-run.1.md) | run specified AI Model as a chatbot |
| [ramalama-serve(1)](ramalama-serve.1.md) | serve REST API on specified AI Model |

View File

@@ -130,7 +130,7 @@ setup_ramalama() {
syspath="$syspath/ramalama"
$sudo install -m755 -d "$syspath"
$sudo install -m755 "$to_file" "$ramalama_bin"
local python_files=("cli.py" "gguf_parser.py" "huggingface.py" "model.py" \
local python_files=("cli.py" "rag.py" "gguf_parser.py" "huggingface.py" "model.py" \
"model_inspect.py" "ollama.py" "common.py" "__init__.py" \
"quadlet.py" "kube.py" "oci.py" "version.py" "shortnames.py" \
"toml_parser.py" "file.py" "http_client.py" "url.py" \

View File

@@ -7,6 +7,7 @@ import subprocess
import platform
import time
import ramalama.oci
import ramalama.rag
from ramalama.huggingface import Huggingface
from ramalama.common import (
@@ -250,6 +251,7 @@ def configure_subcommands(parser):
perplexity_parser(subparsers)
pull_parser(subparsers)
push_parser(subparsers)
rag_parser(subparsers)
rm_parser(subparsers)
run_parser(subparsers)
serve_parser(subparsers)
@@ -918,6 +920,33 @@ def version_parser(subparsers):
parser.set_defaults(func=print_version)
def rag_parser(subparsers):
parser = subparsers.add_parser(
"rag",
help="generate and convert retrieval augmented generation (RAG) data from provided documents into an OCI Image",
)
parser.add_argument(
"--network-mode",
type=str,
default="none",
help="set the network mode for the container",
)
parser.add_argument(
"PATH",
nargs="*",
help="""\
Files/Directory containing PDF, DOCX, PPTX, XLSX, HTML, AsciiDoc & Markdown
formatted files to be processed""",
)
parser.add_argument("IMAGE", help="OCI Image name to contain processed rag data")
parser.set_defaults(func=rag_cli)
def rag_cli(args):
rag = ramalama.rag.Rag(args.IMAGE)
rag.generate(args)
def rm_parser(subparsers):
parser = subparsers.add_parser("rm", help="remove AI Model from local storage")
parser.add_argument("--container", default=False, action="store_false", help=argparse.SUPPRESS)

78
ramalama/rag.py Normal file
View File

@@ -0,0 +1,78 @@
import os
import subprocess
import tempfile
from ramalama.common import run_cmd
class Rag:
model = ""
target = ""
def __init__(self, target):
self.target = target
def build(self, source, target, args):
print(f"Building {target}...")
src = os.path.realpath(source)
base = os.path.basename(source)
contextdir = os.path.dirname(src)
cfile = f"""\
FROM scratch
COPY {base} /vector.db
"""
containerfile = tempfile.NamedTemporaryFile(prefix='RamaLama_Containerfile_', delete=True)
# Open the file for writing.
with open(containerfile.name, 'w') as c:
c.write(cfile)
imageid = (
run_cmd(
[
args.engine,
"build",
"--no-cache",
f"--network={args.network_mode}",
"-q",
"-t",
target,
"-f",
containerfile.name,
contextdir,
],
debug=args.debug,
)
.stdout.decode("utf-8")
.strip()
)
return imageid
def generate(self, args):
if not args.container:
raise KeyError("rag command requires a container. Can not be run with --nocontainer option.")
if not args.engine or args.engine == "":
raise KeyError("rag command requires a container. Can not be run without a container engine.")
# Default image with "-rag" append is used for building rag data.
s = args.image.split(":")
s[0] = s[0] + "-rag"
rag_image = ":".join(s)
exec_args = [args.engine, "run", "--rm"]
if args.network_mode != "":
exec_args += ["--network", args.network_mode]
for path in args.PATH:
if os.path.exists(path):
fpath = os.path.realpath(path)
rpath = os.path.relpath(path)
exec_args += ["-v", f"{fpath}:/docs/{rpath}:ro,z"]
vectordb = tempfile.NamedTemporaryFile(dir="", prefix='RamaLama_rag_', delete=True)
exec_args += ["-v", f"{vectordb.name}:{vectordb.name}:z"]
exec_args += [rag_image]
exec_args += ["pragmatic", "--indexing", "--path /docs/", f"milvus_file_path={vectordb.name}"]
try:
run_cmd(exec_args, debug=args.debug)
except subprocess.CalledProcessError as e:
raise e
print(self.build(vectordb.name, self.target, args))
os.remove(vectordb.name)