1
0
mirror of https://github.com/containers/ramalama.git synced 2026-02-05 06:46:39 +01:00

Merge pull request #2224 from jwieleRH/hf2

Download safetensors models from huggingface.co with https.
This commit is contained in:
Oliver Walsh
2026-01-29 11:25:21 +00:00
committed by GitHub
5 changed files with 220 additions and 188 deletions

View File

@@ -4,6 +4,7 @@ import re
import tempfile
import urllib.request
from abc import ABC, abstractmethod
from pathlib import Path
from ramalama.common import (
SPLIT_MODEL_PATH_RE,
@@ -80,6 +81,9 @@ class HFStyleRepository(ABC):
self.model_hash = None
self.mmproj_filename = None
self.mmproj_hash = None
self.other_files: list[dict] = []
self.additional_safetensor_files: list[dict] = []
self.safetensors_index_file: str | None = None
self.fetch_metadata()
@abstractmethod
@@ -116,8 +120,69 @@ class HFStyleRepository(ABC):
should_verify_checksum=False,
)
)
# Handle additional safetensors files (for sharded models)
for safetensor_file in self.additional_safetensor_files:
filename = safetensor_file['filename']
if filename not in cached_files:
logger.debug(f"Adding safetensors file: {filename}")
files.append(
SnapshotFile(
url=f"{self.blob_url}/{filename}",
header=self.headers,
hash=f"sha256:{safetensor_file['oid']}",
type=SnapshotFileType.SafetensorModel,
name=filename,
should_show_progress=True,
should_verify_checksum=True,
)
)
# Other files
for other_file in self.other_files:
filename = other_file['filename']
if filename not in cached_files:
logger.debug(f"Adding other safetensors file: {filename}")
# The file's oid is either a verifiable SHA256 lfs hash or a SHA1 git
# hash, in which case generate a non-verifiable SHA256 hash.
# TODO: Calculate a verifiable content hash
oid = other_file['oid']
file_hash = (
oid
if len(oid) == 64
else generate_sha256(f"{self.organization}/{self.name}/{filename}", with_sha_prefix=False)
)
files.append(
SnapshotFile(
url=f"{self.blob_url}/{filename}",
header=self.headers,
hash=f"sha256:{file_hash}",
type=SnapshotFileType.Other,
name=filename,
should_show_progress=False,
should_verify_checksum=len(oid) == 64,
)
)
if self.mmproj_filename and self.mmproj_filename not in cached_files:
files.append(self.mmproj_file())
# Add safetensors index file if present
if self.safetensors_index_file and self.safetensors_index_file not in cached_files:
logger.debug(f"Adding safetensors index file: {self.safetensors_index_file}")
files.append(
SnapshotFile(
url=f"{self.blob_url}/{self.safetensors_index_file}",
header=self.headers,
hash=generate_sha256(
f"{self.organization}/{self.name}/{self.safetensors_index_file}", with_sha_prefix=False
),
type=SnapshotFileType.Other,
name=self.safetensors_index_file,
required=False,
)
)
if self.FILE_NAME_CONFIG not in cached_files:
files.append(self.config_file())
if self.FILE_NAME_GENERATION_CONFIG not in cached_files:
@@ -130,11 +195,17 @@ class HFStyleRepository(ABC):
def model_file(self) -> SnapshotFile:
assert self.model_filename
assert self.model_hash
# Determine file type based on extension
file_type = SnapshotFileType.GGUFModel
if Path(self.model_filename).match('*.safetensors'):
file_type = SnapshotFileType.SafetensorModel
return SnapshotFile(
url=f"{self.blob_url}/{self.model_filename}",
header=self.headers,
hash=self.model_hash,
type=SnapshotFileType.GGUFModel,
type=file_type,
name=self.model_filename,
should_show_progress=True,
should_verify_checksum=True,

View File

@@ -1,9 +1,9 @@
import json
import os
import pathlib
import urllib.request
from pathlib import Path
from ramalama.common import available, perror, run_cmd
from ramalama.common import run_cmd
from ramalama.hf_style_repo_base import (
HFStyleRepoFile,
HFStyleRepoModel,
@@ -11,8 +11,6 @@ from ramalama.hf_style_repo_base import (
fetch_checksum_from_api_base,
)
from ramalama.logger import logger
from ramalama.model_store.snapshot_file import SnapshotFileType
from ramalama.path_utils import create_file_link
missing_huggingface = """This operation requires huggingface-cli which is not available.
@@ -26,11 +24,6 @@ sudo dnf install python3-huggingface-hub
"""
def is_hf_cli_available():
"""Check if huggingface-cli is available on the system."""
return available("hf")
def huggingface_token():
"""Return cached Hugging Face token if it exists otherwise None"""
token_path = os.path.expanduser(os.path.join("~", ".cache", "huggingface", "token"))
@@ -83,6 +76,64 @@ def fetch_repo_manifest(repo_name: str, tag: str = "latest"):
return json.loads(repo_manifest)
def fetch_repo_files(repo_name: str, revision: str = "main"):
"""Fetch the list of files in a HuggingFace repository using the Files API with pagination support."""
token = huggingface_token()
base_api_url = f"https://huggingface.co/api/models/{repo_name}/tree/{revision}"
all_files = []
next_url: str | None = base_api_url
# TODO: Handle Diffusers-multifolder layout
# See https://huggingface.co/docs/diffusers/v0.35.1/using-diffusers/other-formats#diffusers-multifolder
while next_url:
logger.debug(f"Fetching repo files from {next_url}")
request = urllib.request.Request(
url=next_url,
headers={
'Accept': 'application/json',
},
)
if token is not None:
request.add_header('Authorization', f"Bearer {token}")
with urllib.request.urlopen(request) as response:
files_data = response.read().decode('utf-8')
data = json.loads(files_data)
# Response should be a list of files or a dict
if isinstance(data, list):
all_files.extend(data)
elif isinstance(data, dict):
files = data.get('files') or data.get('siblings') or []
all_files.extend(files)
else:
raise ValueError(f"Unexpected response from HuggingFace API: {type(data)}")
# Check for pagination via Link header
next_url = None
link_header = response.headers.get('Link')
if link_header:
# Parse Link header: <url>; rel="next"
# Example: '<https://huggingface.co/api/models/foo/tree/main?cursor=abc>; rel="next"'
for link in link_header.split(','):
parts = link.split(';')
has_next_rel = any('rel="next"' in p.strip() or "rel='next'" in p.strip() for p in parts)
if has_next_rel:
for p in parts:
p = p.strip()
if p.startswith('<') and p.endswith('>'):
next_url = p[1:-1]
logger.debug(f"Found next page via Link header: {next_url}")
break
if next_url:
break
logger.debug(f"Fetched {len(all_files)} total files from repository {repo_name}")
return all_files
class HuggingfaceCLIFile(HFStyleRepoFile):
pass
@@ -90,18 +141,98 @@ class HuggingfaceCLIFile(HFStyleRepoFile):
class HuggingfaceRepository(HFStyleRepository):
REGISTRY_URL = "https://huggingface.co"
def fetch_metadata(self):
def _fetch_manifest_metadata(self):
# Repo org/name. Fetch repo manifest to determine model/mmproj file
self.blob_url = f"{HuggingfaceRepository.REGISTRY_URL}/{self.organization}/{self.name}/resolve/main"
self.manifest = fetch_repo_manifest(f"{self.organization}/{self.name}", self.tag)
try:
self.manifest = fetch_repo_manifest(f"{self.organization}/{self.name}", self.tag)
except (urllib.error.HTTPError, json.JSONDecodeError, KeyError) as e:
logger.debug(f'fetch_repo_manifest failed: {e}')
return False
try:
# Note that the blobId in the manifest already has a sha256: prefix
self.model_filename = self.manifest['ggufFile']['rfilename']
self.model_hash = self.manifest['ggufFile']['blobId']
self.mmproj_filename = self.manifest.get('mmprojFile', {}).get('rfilename', None)
self.mmproj_hash = self.manifest.get('mmprojFile', {}).get('blobId', None)
return True
except KeyError:
perror("Repository manifest missing ggufFile data")
raise
self.mmproj_filename = self.manifest.get('mmprojFile', {}).get('rfilename', None)
self.mmproj_hash = self.manifest.get('mmprojFile', {}).get('blobId', None)
# No ggufFile in manifest
return False
def _collect_file(self, file_list, file_info):
path = file_info['path']
oid = file_info.get('oid', '')
if 'lfs' in file_info and 'oid' in file_info['lfs']:
oid = file_info['lfs']['oid']
file_list.append({'filename': path, 'oid': oid})
def _fetch_safetensors_metadata(self):
"""Fetch metadata for safetensors models from HuggingFace API."""
repo_name = f"{self.organization}/{self.name}"
try:
files = fetch_repo_files(repo_name)
except (urllib.error.HTTPError, json.JSONDecodeError, KeyError) as e:
logger.debug(f'fetch_repo_files failed: {e}')
return False
# Find all safetensors files, config files and index files
safetensors_files = []
self.other_files = []
index_file = None
try:
for file_info in files:
if file_info.get('type') != 'file':
continue
path_str = file_info['path']
logger.debug(f"Examining file {path_str}")
path = Path(path_str)
# Note: case sensitivity follows platform defaults
if path.match('*.safetensors'):
self._collect_file(safetensors_files, file_info)
elif path.match('*.safetensors.index.json'):
index_file = path_str
elif path_str in {
self.FILE_NAME_CONFIG,
self.FILE_NAME_GENERATION_CONFIG,
self.FILE_NAME_TOKENIZER_CONFIG,
}:
continue
else:
self._collect_file(self.other_files, file_info)
if not safetensors_files:
logger.debug('No safetensors files found')
return False
# Sort safetensors files by name for consistent ordering
safetensors_files.sort(key=lambda x: x['filename'])
# Use the first safetensors file as the main model
# If there are multiple files, they might be sharded
self.model_filename = safetensors_files[0]['filename']
self.model_hash = f"sha256:{safetensors_files[0]['oid']}"
# Store additional safetensors files for get_file_list
self.additional_safetensor_files = safetensors_files[1:]
# Store index file if found
self.safetensors_index_file = index_file
except (KeyError, ValueError) as e:
logger.debug(f'_fetch_safetensors_metadata failed: {e}')
return False
return True
def fetch_metadata(self):
# Try to fetch GGUF manifest first, then safetensors metadata
if not self._fetch_manifest_metadata() and not self._fetch_safetensors_metadata():
raise KeyError("No metadata found")
token = huggingface_token()
if token is not None:
self.headers['Authorization'] = f"Bearer {token}"
@@ -126,7 +257,6 @@ class Huggingface(HFStyleRepoModel):
super().__init__(model, model_store_path)
self.type = "huggingface"
self.hf_cli_available = is_hf_cli_available()
def get_cli_command(self):
return "hf"
@@ -161,7 +291,7 @@ class Huggingface(HFStyleRepoModel):
return HuggingfaceRepository(name, organization, tag)
def get_cli_download_args(self, directory_path, model):
return ["hf", "download", "--local-dir", directory_path, model]
raise NotImplementedError("huggingface cli download not available")
def extract_model_identifiers(self):
model_name, model_tag, model_organization = super().extract_model_identifiers()
@@ -182,32 +312,6 @@ class Huggingface(HFStyleRepoModel):
return snapshot_path, cache_path
def in_existing_cache(self, args, target_path, sha256_checksum):
if not self.hf_cli_available:
return False
default_hf_caches = [os.path.join(os.environ['HOME'], '.cache/huggingface/hub')]
namespace, repo = os.path.split(str(self.directory))
for cache_dir in default_hf_caches:
snapshot_path, cache_path = self._fetch_snapshot_path(cache_dir, namespace, repo)
if not snapshot_path or not cache_path or not os.path.exists(snapshot_path):
continue
file_path = os.path.join(snapshot_path, self.filename)
if not os.path.exists(file_path):
continue
blob_path = pathlib.Path(file_path).resolve()
if not os.path.exists(blob_path):
continue
blob_file = os.path.relpath(blob_path, start=os.path.join(cache_path, 'blobs'))
if str(blob_file) != str(sha256_checksum):
continue
# Use cross-platform file linking (hardlink/symlink/copy)
create_file_link(str(blob_path), target_path)
return True
return False
def push(self, _, args):
@@ -230,50 +334,4 @@ class Huggingface(HFStyleRepoModel):
return proc.stdout.decode("utf-8")
def _collect_cli_files(self, tempdir: str) -> tuple[str, list[HuggingfaceCLIFile]]:
cache_dir = os.path.join(tempdir, ".cache", "huggingface", "download")
files: list[HuggingfaceCLIFile] = []
snapshot_hash = ""
for root, _, filenames in os.walk(tempdir):
for filename in filenames:
if filename == ".gitattributes":
continue
entry_path = os.path.join(root, filename)
rel_name = os.path.relpath(entry_path, start=tempdir)
# Skip files inside the .cache directory itself
if rel_name.startswith(".cache/"):
continue
sha256 = ""
metadata_path = os.path.join(cache_dir, f"{rel_name}.metadata")
if not os.path.exists(metadata_path):
continue
with open(metadata_path) as metafile:
lines = metafile.readlines()
if len(lines) < 2:
continue
sha256 = f"sha256:{lines[1].strip()}"
if sha256 == "sha256:":
continue
if os.path.basename(rel_name).lower() == "readme.md":
snapshot_hash = sha256
continue
hf_file = HuggingfaceCLIFile(
url=entry_path,
header={},
hash=sha256,
type=SnapshotFileType.Other,
name=rel_name,
)
# try to identify the model file in the pulled repo
if rel_name.endswith(".gguf"):
hf_file.type = SnapshotFileType.GGUFModel
elif rel_name.endswith(".safetensors"):
hf_file.type = SnapshotFileType.SafetensorModel
files.append(hf_file)
return snapshot_hash, files
return "", []

View File

@@ -96,8 +96,6 @@ skip_if_gh_actions_darwin = pytest.mark.skipif(
skip_if_windows = pytest.mark.skipif(platform.system() == "Windows", reason="Windows operating system")
skip_if_not_windows = pytest.mark.skipif(platform.system() != "Windows", reason="not Windows operating system")
skip_if_no_huggingface_cli = pytest.mark.skipif(shutil.which("hf") is None, reason="hf cli not installed")
skip_if_no_llama_bench = pytest.mark.skipif(shutil.which("llama-bench") is None, reason="llama-bench not installed")
skip_if_no_mlx = pytest.mark.skipif(

View File

@@ -9,7 +9,6 @@ from test.conftest import (
skip_if_darwin,
skip_if_little_endian_machine,
skip_if_no_container,
skip_if_no_huggingface_cli,
skip_if_no_ollama,
)
from test.e2e.utils import RamalamaExecWorkspace
@@ -86,21 +85,6 @@ def test_pull_non_existing_model():
"hf://Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS.gguf",
id="Felladrin/../smollm-360M-instruct-add-basics.IQ2_XXS.gguf model with RAMALAMA_TRANSPORT=huggingface"
),
pytest.param(
"hf://HuggingFaceTB/SmolLM-135M", None, "hf://HuggingFaceTB/SmolLM-135M",
id="hf://HuggingFaceTB/SmolLM-135M model",
marks=[skip_if_no_huggingface_cli]
),
pytest.param(
"hf://ggml-org/SmolVLM-256M-Instruct-GGUF", None, "hf://ggml-org/SmolVLM-256M-Instruct-GGUF",
id="hf://ggml-org/SmolVLM-256M-Instruct-GGUF model",
marks=[skip_if_no_huggingface_cli]
),
pytest.param(
"hf://ggml-org/SmolVLM-256M-Instruct-GGUF:Q8_0", None, "hf://ggml-org/SmolVLM-256M-Instruct-GGUF:Q8_0",
id="hf://ggml-org/SmolVLM-256M-Instruct-GGUF:Q8_0 model",
marks=[skip_if_no_huggingface_cli]
),
pytest.param(
"oci://quay.io/ramalama/smollm:135m", None, "oci://quay.io/ramalama/smollm:135m",
id="smollm:135m model with oci:// url",
@@ -272,58 +256,6 @@ def test_pull_using_ollama_cache(ollama_server, ollama_model, model, env_vars, e
assert model_list[0]["name"] == expected
@pytest.mark.e2e
@pytest.mark.distro_integration
@skip_if_no_huggingface_cli
@pytest.mark.parametrize(
"hf_repo, hf_model, model, env_vars, expected",
[
pytest.param(
"Felladrin/gguf-smollm-360M-instruct-add-basics",
"smollm-360M-instruct-add-basics.IQ2_XXS.gguf",
"hf://Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS.gguf",
None,
"hf://Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS.gguf",
id="with hf:// url",
),
pytest.param(
"Felladrin/gguf-smollm-360M-instruct-add-basics",
"smollm-360M-instruct-add-basics.IQ2_XXS.gguf",
"huggingface://Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS.gguf",
None,
"hf://Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS.gguf",
id="with huggingface:// url",
),
pytest.param(
"Felladrin/gguf-smollm-360M-instruct-add-basics",
"smollm-360M-instruct-add-basics.IQ2_XXS.gguf",
"Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS.gguf",
{"RAMALAMA_TRANSPORT": "huggingface"},
"hf://Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS.gguf",
id="with RAMALAMA_TRANSPORT=huggingface",
),
],
)
def test_pull_using_huggingface_cache(hf_repo, hf_model, model, env_vars, expected):
with RamalamaExecWorkspace(env_vars=env_vars) as ctx:
ramalama_cli = ["ramalama", "--store", str(ctx.storage_path)]
# Ensure huggingface cache exists and is set as environment variable
hf_home = ctx.workspace_path / ".cache" / "huggingface"
hf_home.mkdir(parents=True, exist_ok=True)
ctx.environ["HF_HOME"] = str(hf_home)
# Pull image using huggingface cli
ctx.check_call(["hf", "download", hf_repo, hf_model])
# Pull image using ramalama cli
ctx.check_call(ramalama_cli + ["pull", model])
# Check if the model pull is the expected
model_list = json.loads(ctx.check_output(ramalama_cli + ["list", "--json", "--sort", "modified"]))
assert model_list[0]["name"] == expected
@pytest.mark.e2e
@pytest.mark.distro_integration
@skip_if_no_container

View File

@@ -77,7 +77,6 @@ load setup_suite
is "$output" ".*Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS" "image was actually pulled locally"
run_ramalama rm huggingface://Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS.gguf
skip_if_no_hf_cli
run_ramalama pull hf://HuggingFaceTB/SmolLM-135M
run_ramalama list
is "$output" ".*HuggingFaceTB/SmolLM-135M" "image was actually pulled locally"
@@ -94,9 +93,6 @@ load setup_suite
run_ramalama rm huggingface://ggml-org/SmolVLM-256M-Instruct-GGUF:Q8_0
run_ramalama pull hf://owalsh/SmolLM2-135M-Instruct-GGUF-Split:Q4_0
for i in $(seq 1 3); do
is "$output" ".*Downloading Q4_0/SmolLM2-135M-Instruct-Q4_0-0000${i}-of-00003.gguf" "model part ${i} downloaded"
done
run_ramalama list
is "$output" ".*owalsh/SmolLM2-135M-Instruct-GGUF-Split:Q4_0" "image was actually pulled locally"
run_ramalama rm hf://owalsh/SmolLM2-135M-Instruct-GGUF-Split:Q4_0
@@ -117,29 +113,6 @@ load setup_suite
is "$output" ".*Snapshot removed" "snapshot with no remaining references was deleted"
}
# bats test_tags=distro-integration
@test "ramalama pull hf cli cache" {
skip_if_no_hf_cli
hf download Felladrin/gguf-smollm-360M-instruct-add-basics smollm-360M-instruct-add-basics.IQ2_XXS.gguf
run_ramalama pull hf://Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS.gguf
run_ramalama list
is "$output" ".*Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS" "image was actually pulled locally from hf-cli cache"
run_ramalama rm hf://Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS.gguf
run_ramalama pull huggingface://Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS.gguf
run_ramalama list
is "$output" ".*Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS" "image was actually pulled locally from hf-cli cache"
run_ramalama rm huggingface://Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS.gguf
RAMALAMA_TRANSPORT=huggingface run_ramalama pull Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS.gguf
run_ramalama list
is "$output" ".*Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS" "image was actually pulled locally from hf-cli cache"
run_ramalama rm huggingface://Felladrin/gguf-smollm-360M-instruct-add-basics/smollm-360M-instruct-add-basics.IQ2_XXS.gguf
rm -rf ~/.cache/huggingface/hub/models--Felladrin--gguf-smollm-360M-instruct-add-basics
}
# bats test_tags=distro-integration
@test "ramalama pull oci" {
if is_container; then