1
0
mirror of https://github.com/containers/ramalama.git synced 2026-02-05 06:46:39 +01:00
Files
ramalama/docsite/convert_manpages.py
2025-10-16 14:13:42 -05:00

345 lines
12 KiB
Python

#!/usr/bin/env python3
"""
Convert RamaLama manpages to Docusaurus MDX format
"""
import glob
import os
import re
from pathlib import Path
def get_category_info(filename):
"""Determine category and output path based on manpage section"""
if filename.endswith('.1.md'):
return 'commands', 'commands/ramalama'
if filename.endswith('.5.md'):
return 'configuration', 'configuration'
if filename.endswith('.7.md'):
return 'platform-guides', 'platform-guides'
return 'misc', 'misc'
def extract_title_and_description(content, filename):
"""Extract title and description from manpage content and filename"""
lines = content.split('\n')
# Generate title from filename pattern
base_name = os.path.basename(filename)
title = None
if base_name == 'ramalama.1.md':
title = 'ramalama' # Base command page
elif base_name.startswith('ramalama-') and base_name.endswith('.1.md'):
# Command pages: ramalama-chat.1.md -> chat
title = base_name.replace('ramalama-', '').replace('.1.md', '')
elif base_name.endswith('.7.md'):
# Platform guides: ramalama-cuda.7.md -> CUDA Support
title = base_name.replace('ramalama-', '').replace('.7.md', '')
elif base_name.endswith('.5.md'):
# Config files with custom titles
if base_name == 'ramalama.conf.5.md':
title = 'Configuration File'
elif base_name == 'ramalama-oci.5.md':
title = 'OCI Spec'
else:
# Fallback for other .5.md files
title = base_name.replace('.5.md', '')
if title is None:
# Fallback for any other file types
title = base_name.replace('.md', '').replace('-', ' ')
# Find description from NAME section
description = ""
for i, line in enumerate(lines):
if line.strip() == "## NAME":
if i + 1 < len(lines):
name_line = lines[i + 1].strip()
if ' - ' in name_line:
_, description = name_line.split(' - ', 1)
break
# Fallback description if not found
if not description:
if '.1.md' in filename:
description = "RamaLama command reference"
elif '.7.md' in filename:
description = "Platform-specific setup guide"
elif '.5.md' in filename:
description = "Configuration file reference"
else:
description = "RamaLama documentation"
return title.strip(), description.strip()
def detect_code_language(content):
"""Detect the language of a code block based on its content"""
# Common indicators for different languages
if re.search(r'\$\s*(podman|docker|ramalama|curl|wget|sudo|apt|dnf|yum|pacman|brew)\s', content):
return 'bash'
if re.search(
r'(import\s+[a-zA-Z_][a-zA-Z0-9_]*|def\s+[a-zA-Z_][a-zA-Z0-9_]*\(|class\s+[a-zA-Z_][a-zA-Z0-9_]*:)', content
):
return 'python'
if re.search(
r'(function\s+[a-zA-Z_$][a-zA-Z0-9_$]*\s*\(|const\s+[a-zA-Z_$][a-zA-Z0-9_$]*\s*=|let\s+[a-zA-Z_$] \
[a-zA-Z0-9_$]*\s*=|var\s+[a-zA-Z_$][a-zA-Z0-9_$]*\s*=)',
content,
):
return 'javascript'
if re.search(
r'(package\s+[a-zA-Z_][a-zA-Z0-9_]*|func\s+[a-zA-Z_][a-zA-Z0-9_]*\(|type\s+[a-zA-Z_][a-zA-Z0-9_]*\s+struct)',
content,
):
return 'go'
if re.search(r'(\[.*\]|\{.*\})\s*=', content):
return 'toml'
return 'text'
def convert_markdown_to_mdx(content, filename, current_output_path, output_map):
"""Convert manpage markdown to MDX format"""
# Extract title and description
title, description = extract_title_and_description(content, filename)
# Remove the first line (% directive) if present
lines = content.split('\n')
if lines[0].startswith('%'):
lines = lines[1:]
content = '\n'.join(lines)
# Remove NAME section (handles both H1 and H2 variants)
content = re.sub(
r'^#{1,2}\s+NAME\s*\n(?:.*?)(?=^#{1,6}\s|\Z)',
'',
content,
flags=re.MULTILINE | re.DOTALL,
)
# Convert SYNOPSIS to proper heading
content = re.sub(r'## SYNOPSIS', '## Synopsis', content)
# Convert DESCRIPTION
content = re.sub(r'## DESCRIPTION', '## Description', content)
# Convert OPTIONS to Options
content = re.sub(r'## OPTIONS', '## Options', content)
# Convert EXAMPLES to Examples
content = re.sub(r'## EXAMPLES', '## Examples', content)
# Convert SEE ALSO to See Also
content = re.sub(r'## SEE ALSO', '## See Also', content)
# Convert HISTORY to bottom note
history_pattern = r'## HISTORY\n(.+?)(?=\n##|\n$|$)'
history_match = re.search(history_pattern, content, re.DOTALL)
if history_match:
history_text = history_match.group(1).strip()
content = re.sub(history_pattern, '', content, flags=re.DOTALL)
# Remove TOC links to HISTORY since it becomes a footer
content = re.sub(r'\s*- \[HISTORY\]\(#history\)\n?', '', content)
# Add history as footer
content += f"\n\n---\n\n*{history_text}*"
# Convert bold markdown references like **[ramalama(1)](ramalama.1.md)** to links
content = re.sub(r'\*\*\[([^\]]+)\]\(([^)]+)\)\*\*', r'[\1](\2)', content)
def convert_link(match):
text = match.group(1)
link = match.group(2)
# Skip processing for external URLs
if link.startswith(('http://', 'https://')):
return f'[{text}]({link})'
base_link = os.path.basename(link)
target_rel_path = output_map.get(base_link)
if not target_rel_path:
return f'[{text}]({link})'
target_path = target_rel_path
if target_path == current_output_path:
return f'[{text}](#)' # Self-reference
if target_path == Path('commands/ramalama/ramalama.mdx'):
return f'[{text}](/docs/commands/ramalama/)'
# Use absolute doc URL (prefix with /docs)
target_route = '/docs/' + target_path.with_suffix('').as_posix()
return f'[{text}]({target_route})'
content = re.sub(r'\[([^\]]+)\]\(([^)]+\.md)\)', convert_link, content)
# Convert Notes to MDX admonitions
# Handle blockquote notes: > **Note:** text
content = re.sub(
r' > \*\*Note:\*\*(.*?)(?=\n(?! >)|\n\n|$)', r':::note\n\1\n:::', content, flags=re.MULTILINE | re.DOTALL
)
content = re.sub(
r'> \*\*Note:\*\*(.*?)(?=\n(?!>)|\n\n|$)', r':::note\n\1\n:::', content, flags=re.MULTILINE | re.DOTALL
)
# Handle blockquote NOTE (all caps): > **NOTE:** text
content = re.sub(
r' > \*\*NOTE:\*\*(.*?)(?=\n(?! >)|\n\n|$)', r':::note\n\1\n:::', content, flags=re.MULTILINE | re.DOTALL
)
content = re.sub(
r'> \*\*NOTE:\*\*(.*?)(?=\n(?!>)|\n\n|$)', r':::note\n\1\n:::', content, flags=re.MULTILINE | re.DOTALL
)
# Handle standalone Note: followed by content (possibly with blank lines)
content = re.sub(
r'^Note:\s*\n\n(.*?)(?=\n\nNote:|\n\n[A-Z][A-Z]|\n\n##|$)',
r':::note\n\1\n:::',
content,
flags=re.MULTILINE | re.DOTALL,
)
# Handle standalone Note: with immediate content
content = re.sub(
r'^Note:(.*?)(?=\n\nNote:|\n\n[A-Z][A-Z]|\n\n##|$)',
r':::note\n\1\n:::',
content,
flags=re.MULTILINE | re.DOTALL,
)
# Handle NOTE: text (all caps)
content = re.sub(r'^NOTE:(.*?)(?=\n\n|\n[A-Z]|\n#|$)', r':::note\n\1\n:::', content, flags=re.MULTILINE | re.DOTALL)
# Fix code blocks - detect and set appropriate language
def process_code_block(match):
block_content = match.group(1) if match.group(1) else ""
# Remove any internal code block markers
block_content = re.sub(r'```[a-zA-Z0-9_+-]*\s*\n?', '', block_content)
# Detect language if not explicitly specified
if match.group(0).startswith('```') and len(match.group(0).split('\n')[0]) > 3:
lang = match.group(0).split('\n')[0].replace('```', '')
else:
lang = detect_code_language(block_content)
return f'```{lang}\n{block_content.strip()}\n```'
# Process all code blocks
content = re.sub(r'```(?:[a-zA-Z0-9_+-]*)\n((?:(?!```)[\s\S])*?)```', process_code_block, content)
# Escape email addresses for MDX - replace @ with &#64;
def _escape_email(match):
email = match.group(1).replace('@', '&#64;')
return f"&lt;{email}&gt;"
content = re.sub(r'<([^>]+@[^>]+)>', _escape_email, content)
# Clean up extra whitespace
content = re.sub(r'\n{3,}', '\n\n', content)
content = content.strip()
# Create frontmatter
frontmatter = f"""---
title: {title}
description: {description}
# This file is auto-generated from manpages. Do not edit manually.
# Source: {filename}
---
# {title}
"""
return frontmatter + content
def get_output_filename(input_filename):
"""Generate output filename from input filename"""
base = os.path.basename(input_filename)
if base == 'ramalama.1.md':
# Base ramalama command goes in commands directory
return 'ramalama.mdx'
if base.startswith('ramalama-') and base.endswith('.1.md'):
# Command: ramalama-chat.1.md -> chat.mdx
return base.replace('ramalama-', '').replace('.1.md', '.mdx')
if base.startswith('ramalama-') and base.endswith('.7.md'):
# Platform guide: ramalama-cuda.7.md -> cuda.mdx
return base.replace('ramalama-', '').replace('.7.md', '.mdx')
if base.endswith('.5.md'):
# Config: ramalama.conf.5.md -> conf.mdx
return base.replace('ramalama.', '').replace('.5.md', '.mdx')
return base.replace('.md', '.mdx')
def clean_auto_generated_files(docsite_docs_dir):
"""Remove all auto-generated MDX files from previous runs"""
print("Cleaning up auto-generated files from previous runs...")
# Find all .mdx files recursively
mdx_files = glob.glob(str(docsite_docs_dir / '**/*.mdx'), recursive=True)
cleaned_count = 0
for mdx_file in mdx_files:
try:
with open(mdx_file, 'r', encoding='utf-8') as f:
content = f.read()
# Check if file has auto-generated marker
if '# This file is auto-generated from manpages' in content:
os.remove(mdx_file)
cleaned_count += 1
print(f" Removed: {mdx_file}")
except Exception as e:
print(f" Warning: Could not process {mdx_file}: {e}")
print(f"✅ Cleaned up {cleaned_count} auto-generated files")
def main():
docs_dir = Path('../docs')
docsite_docs_dir = Path('./docs')
# Clean up auto-generated files from previous runs
clean_auto_generated_files(docsite_docs_dir)
# Find all manpage files
manpage_files = glob.glob(str(docs_dir / '*.md'))
manpage_files = [f for f in manpage_files if not f.endswith('README.md')]
print(f"\nFound {len(manpage_files)} manpage files to convert")
manpage_entries = []
for input_file in manpage_files:
filename = os.path.basename(input_file)
output_filename = get_output_filename(filename)
_, subdir = get_category_info(filename)
relative_output_path = Path(subdir) / output_filename
manpage_entries.append((input_file, filename, relative_output_path))
output_map = {filename: relative_path for _, filename, relative_path in manpage_entries}
for input_file, filename, relative_output_path in manpage_entries:
print(f"Converting {filename}...")
with open(input_file, 'r', encoding='utf-8') as f:
content = f.read()
mdx_content = convert_markdown_to_mdx(content, filename, relative_output_path, output_map)
output_dir = (docsite_docs_dir / relative_output_path).parent
output_dir.mkdir(parents=True, exist_ok=True)
output_path = docsite_docs_dir / relative_output_path
with open(output_path, 'w', encoding='utf-8') as f:
f.write(mdx_content)
print(f" -> {output_path}")
print("\nConversion complete!")
if __name__ == '__main__':
main()