mirror of https://github.com/vuejs/vitepress
parent
1ec84c1504
commit
673db7d4b6
@ -0,0 +1,11 @@
|
||||
/** Default template for the `llms.txt` file. */
|
||||
export const defaultLLMsTxtTemplate = `\
|
||||
# {title}
|
||||
|
||||
{description}
|
||||
|
||||
{details}
|
||||
|
||||
## Table of Contents
|
||||
|
||||
{toc}`
|
@ -0,0 +1,175 @@
|
||||
import fs from 'node:fs/promises'
|
||||
import path from 'node:path'
|
||||
|
||||
import matter from 'gray-matter'
|
||||
|
||||
import type { DefaultTheme } from 'vitepress'
|
||||
import { defaultLLMsTxtTemplate } from '../constants'
|
||||
import type {
|
||||
LinksExtension,
|
||||
LlmstxtSettings,
|
||||
PreparedFile,
|
||||
VitePressConfig,
|
||||
} from '../types'
|
||||
import { generateTOC } from './toc'
|
||||
import { expandTemplate, extractTitle, generateMetadata } from './utils'
|
||||
|
||||
/**
|
||||
* Options for generating the `llms.txt` file.
|
||||
*/
|
||||
export interface GenerateLLMsTxtOptions {
|
||||
/** Path to the main documentation file `index.md`.*/
|
||||
indexMd: string
|
||||
|
||||
/** The source directory for the files. */
|
||||
srcDir: VitePressConfig['vitepress']['srcDir']
|
||||
|
||||
/** Template to use for generating `llms.txt`. */
|
||||
LLMsTxtTemplate?: LlmstxtSettings['customLLMsTxtTemplate']
|
||||
|
||||
/** Template variables for `customLLMsTxtTemplate`. */
|
||||
templateVariables?: LlmstxtSettings['customTemplateVariables']
|
||||
|
||||
/** The VitePress configuration. */
|
||||
vitepressConfig?: VitePressConfig['vitepress']['userConfig']
|
||||
|
||||
/** The base domain for the generated links. */
|
||||
domain?: LlmstxtSettings['domain']
|
||||
|
||||
/** The link extension for generated links. */
|
||||
linksExtension?: LinksExtension
|
||||
|
||||
/** Whether to use clean URLs (without the extension). */
|
||||
cleanUrls?: VitePressConfig['cleanUrls']
|
||||
|
||||
/** Optional sidebar configuration for organizing the TOC. */
|
||||
sidebar?: DefaultTheme.Sidebar
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a LLMs.txt file with a table of contents and links to all documentation sections.
|
||||
*
|
||||
* @param preparedFiles - An array of prepared files.
|
||||
* @param options - Options for generating the `llms.txt` file.
|
||||
* @returns A string representing the content of the `llms.txt` file.
|
||||
*
|
||||
* @example
|
||||
* ```markdown
|
||||
* # Shadcn for Vue
|
||||
*
|
||||
* > Beautifully designed components built with Radix Vue and Tailwind CSS.
|
||||
*
|
||||
* ## Table of Contents
|
||||
*
|
||||
* - [Getting started](/docs/getting-started.md)
|
||||
* - [About](/docs/about.md)
|
||||
* - ...
|
||||
* ```
|
||||
*
|
||||
* @see https://llmstxt.org/#format
|
||||
*/
|
||||
export async function generateLLMsTxt(
|
||||
preparedFiles: PreparedFile[],
|
||||
options: GenerateLLMsTxtOptions,
|
||||
): Promise<string> {
|
||||
const {
|
||||
indexMd,
|
||||
srcDir,
|
||||
LLMsTxtTemplate = defaultLLMsTxtTemplate,
|
||||
templateVariables = {},
|
||||
vitepressConfig,
|
||||
domain,
|
||||
sidebar,
|
||||
linksExtension,
|
||||
cleanUrls,
|
||||
} = options
|
||||
|
||||
// @ts-expect-error
|
||||
matter.clearCache()
|
||||
|
||||
const indexMdContent = await fs.readFile(indexMd, 'utf-8')
|
||||
const indexMdFile = matter(indexMdContent as string)
|
||||
|
||||
templateVariables.title ??=
|
||||
indexMdFile.data?.hero?.name ||
|
||||
indexMdFile.data?.title ||
|
||||
vitepressConfig?.title ||
|
||||
vitepressConfig?.titleTemplate ||
|
||||
extractTitle(indexMdFile) ||
|
||||
'LLMs Documentation'
|
||||
|
||||
templateVariables.description ??=
|
||||
indexMdFile.data?.hero?.text ||
|
||||
vitepressConfig?.description ||
|
||||
indexMdFile?.data?.description ||
|
||||
indexMdFile.data?.titleTemplate
|
||||
|
||||
if (templateVariables.description) {
|
||||
templateVariables.description = `> ${templateVariables.description}`
|
||||
}
|
||||
|
||||
templateVariables.details ??=
|
||||
indexMdFile.data?.hero?.tagline ||
|
||||
indexMdFile.data?.tagline ||
|
||||
(!templateVariables.description &&
|
||||
'This file contains links to all documentation sections.')
|
||||
|
||||
templateVariables.toc ??= await generateTOC(preparedFiles, {
|
||||
srcDir,
|
||||
domain,
|
||||
sidebarConfig: sidebar || vitepressConfig?.themeConfig?.sidebar,
|
||||
linksExtension,
|
||||
cleanUrls,
|
||||
})
|
||||
|
||||
return expandTemplate(LLMsTxtTemplate, templateVariables)
|
||||
}
|
||||
|
||||
/**
|
||||
* Options for generating the `llms-full.txt` file.
|
||||
*/
|
||||
export interface GenerateLLMsFullTxtOptions {
|
||||
/** The source directory for the files. */
|
||||
srcDir: VitePressConfig['vitepress']['srcDir']
|
||||
|
||||
/** The base domain for the generated links. */
|
||||
domain?: LlmstxtSettings['domain']
|
||||
|
||||
/** The link extension for generated links. */
|
||||
linksExtension?: LinksExtension
|
||||
|
||||
/** Whether to use clean URLs (without the extension). */
|
||||
cleanUrls?: VitePressConfig['cleanUrls']
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a `llms-full.txt` file content with all documentation in one file.
|
||||
*
|
||||
* @param preparedFiles - An array of prepared files.
|
||||
* @param options - Options for generating the `llms-full.txt` file.
|
||||
* @returns A string representing the full content of the LLMs.txt file.
|
||||
*/
|
||||
export function generateLLMsFullTxt(
|
||||
preparedFiles: PreparedFile[],
|
||||
options: GenerateLLMsFullTxtOptions,
|
||||
) {
|
||||
const { srcDir, domain, linksExtension, cleanUrls } = options
|
||||
|
||||
const llmsFullTxtContent = preparedFiles
|
||||
.map((preparedFile) => {
|
||||
const relativePath = path.relative(srcDir, preparedFile.path)
|
||||
|
||||
return matter.stringify(
|
||||
preparedFile.file.content,
|
||||
generateMetadata(preparedFile.file, {
|
||||
domain,
|
||||
filePath: relativePath,
|
||||
linksExtension,
|
||||
cleanUrls,
|
||||
}),
|
||||
)
|
||||
})
|
||||
.join('\n---\n\n')
|
||||
|
||||
return llmsFullTxtContent
|
||||
}
|
@ -0,0 +1,43 @@
|
||||
import pc from 'picocolors'
|
||||
|
||||
/**
|
||||
* Log prefix styling with the plugin name and separator.
|
||||
* @constant {string}
|
||||
*/
|
||||
const logPrefix = pc.blue('llmstxt') + pc.dim(' » ')
|
||||
|
||||
/** Logger object with standardized logging methods. */
|
||||
const log = {
|
||||
/**
|
||||
* Logs an informational message to the console.
|
||||
*
|
||||
* @param message - The message to log.
|
||||
*/
|
||||
info: (message: string) => console.log(`${logPrefix} ${message}`),
|
||||
|
||||
/**
|
||||
* Logs a success message to the console.
|
||||
*
|
||||
* @param message - The message to log.
|
||||
*/
|
||||
success: (message: string) =>
|
||||
console.log(`${logPrefix}${pc.green('✓')} ${message}`),
|
||||
|
||||
/**
|
||||
* Logs a warning message to the console.
|
||||
*
|
||||
* @param message - The message to log.
|
||||
*/
|
||||
warn: (message: string) =>
|
||||
console.warn(`${logPrefix}${pc.yellow('⚠')} ${pc.yellow(message)}`),
|
||||
|
||||
/**
|
||||
* Logs an error message to the console.
|
||||
*
|
||||
* @param message - The message to log.
|
||||
*/
|
||||
error: (message: string) =>
|
||||
console.error(`${logPrefix}${pc.red('✗')} ${pc.red(message)}`),
|
||||
}
|
||||
|
||||
export default log
|
@ -0,0 +1,309 @@
|
||||
import path from 'node:path'
|
||||
import type { DefaultTheme } from 'vitepress'
|
||||
import type {
|
||||
LinksExtension,
|
||||
LlmstxtSettings,
|
||||
PreparedFile,
|
||||
VitePressConfig,
|
||||
} from '../types'
|
||||
import { generateLink, stripExtPosix } from './utils'
|
||||
|
||||
/**
|
||||
* Generates a Markdown-formatted table of contents (TOC) link for a given file.
|
||||
*
|
||||
* @param file - The prepared file.
|
||||
* @param domain - The base domain for the generated link.
|
||||
* @param relativePath - The relative path of the file, which is converted to a `.md` link.
|
||||
* @param extension - The link extension for the generated link (default is `.md`).
|
||||
* @param cleanUrls - Whether to use clean URLs (without the extension).
|
||||
* @returns The formatted TOC entry as a Markdown list item.
|
||||
*/
|
||||
export const generateTOCLink = (
|
||||
file: PreparedFile,
|
||||
domain: LlmstxtSettings['domain'],
|
||||
relativePath: string,
|
||||
extension?: LinksExtension,
|
||||
cleanUrls: VitePressConfig['cleanUrls'] = false,
|
||||
) => {
|
||||
const description: string = file.file.data.description
|
||||
return `- [${file.title}](${generateLink(stripExtPosix(relativePath), domain, extension ?? '.md', cleanUrls)})${description ? `: ${description.trim()}` : ''}\n`
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively collects all paths from sidebar items.
|
||||
*
|
||||
* @param items - Array of sidebar items to process.
|
||||
* @returns Array of paths collected from the sidebar items.
|
||||
*/
|
||||
function collectPathsFromSidebarItems(
|
||||
items: DefaultTheme.SidebarItem[],
|
||||
): string[] {
|
||||
const paths: string[] = []
|
||||
|
||||
for (const item of items) {
|
||||
// Add the current item's path if it exists
|
||||
if (item.link) {
|
||||
paths.push(item.link)
|
||||
}
|
||||
|
||||
// Recursively add paths from nested items
|
||||
if (item.items && Array.isArray(item.items)) {
|
||||
paths.push(...collectPathsFromSidebarItems(item.items))
|
||||
}
|
||||
}
|
||||
|
||||
return paths
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalizes link path for comparison, handling both index.md and directory paths.
|
||||
*
|
||||
* @param link - The link path to normalize.
|
||||
* @returns Normalized link path for consistent comparison.
|
||||
*/
|
||||
export function normalizeLinkPath(link: string): string {
|
||||
const normalizedPath = stripExtPosix(link)
|
||||
|
||||
if (path.basename(normalizedPath) === 'index') {
|
||||
return path.dirname(normalizedPath)
|
||||
}
|
||||
|
||||
return normalizedPath
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a file path matches a sidebar path, handling various path formats.
|
||||
*
|
||||
* @param filePath - The file path to check.
|
||||
* @param sidebarPath - The sidebar path to compare against.
|
||||
* @returns True if paths match, false otherwise
|
||||
*/
|
||||
export function isPathMatch(filePath: string, sidebarPath: string): boolean {
|
||||
const normalizedFilePath = normalizeLinkPath(filePath)
|
||||
const normalizedSidebarPath = normalizeLinkPath(sidebarPath)
|
||||
|
||||
return (
|
||||
normalizedFilePath === normalizedSidebarPath ||
|
||||
normalizedFilePath === `${normalizedSidebarPath}.md`
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes sidebar items and generates TOC entries in the exact order they appear in sidebar config
|
||||
*
|
||||
* @param section - A sidebar section
|
||||
* @param preparedFiles - An array of prepared files
|
||||
* @param srcDir - The VitePress source directory
|
||||
* @param domain - Optional domain to prefix URLs with
|
||||
* @param linksExtension - The link extension for generated links.
|
||||
* @param depth - Current depth level for headings
|
||||
* @returns A string representing the formatted section of the TOC
|
||||
*/
|
||||
async function processSidebarSection(
|
||||
section: DefaultTheme.SidebarItem,
|
||||
preparedFiles: PreparedFile[],
|
||||
srcDir: VitePressConfig['vitepress']['srcDir'],
|
||||
domain?: LlmstxtSettings['domain'],
|
||||
linksExtension?: LinksExtension,
|
||||
cleanUrls?: VitePressConfig['cleanUrls'],
|
||||
depth = 3,
|
||||
): Promise<string> {
|
||||
let sectionTOC = ''
|
||||
|
||||
// Add section header only if it has text and is not just a link container
|
||||
if (section.text) {
|
||||
sectionTOC += `${'#'.repeat(depth)} ${section.text}\n\n`
|
||||
}
|
||||
|
||||
// Process items in this section
|
||||
if (section.items && Array.isArray(section.items)) {
|
||||
const linkItems: string[] = []
|
||||
const nestedSections: string[] = []
|
||||
|
||||
// First pass: separate link items and nested sections
|
||||
await Promise.all(
|
||||
section.items.map(async (item) => {
|
||||
// Process nested sections
|
||||
if (item.items && item.items.length > 0) {
|
||||
const processedSection = await processSidebarSection(
|
||||
item,
|
||||
preparedFiles,
|
||||
srcDir,
|
||||
domain,
|
||||
linksExtension,
|
||||
cleanUrls,
|
||||
// Increase depth for nested sections to maintain proper heading levels
|
||||
depth + 1,
|
||||
)
|
||||
nestedSections.push(processedSection)
|
||||
}
|
||||
// Process link items
|
||||
else if (item.link) {
|
||||
// Normalize the link for matching
|
||||
const normalizedItemLink = normalizeLinkPath(item.link)
|
||||
|
||||
const matchingFile = preparedFiles.find((file) => {
|
||||
const relativePath = `/${stripExtPosix(path.relative(srcDir, file.path))}`
|
||||
return isPathMatch(relativePath, normalizedItemLink)
|
||||
})
|
||||
|
||||
if (matchingFile) {
|
||||
const relativePath = path.relative(srcDir, matchingFile.path)
|
||||
linkItems.push(
|
||||
generateTOCLink(
|
||||
matchingFile,
|
||||
domain,
|
||||
relativePath,
|
||||
linksExtension,
|
||||
cleanUrls,
|
||||
),
|
||||
)
|
||||
}
|
||||
}
|
||||
}),
|
||||
)
|
||||
|
||||
// Add link items if any
|
||||
if (linkItems.length > 0) {
|
||||
sectionTOC += linkItems.join('')
|
||||
}
|
||||
|
||||
// Add a blank line before nested sections if we have link items
|
||||
if (linkItems.length > 0 && nestedSections.length > 0) {
|
||||
sectionTOC += '\n'
|
||||
}
|
||||
|
||||
// Add nested sections with appropriate spacing
|
||||
if (nestedSections.length > 0) {
|
||||
sectionTOC += nestedSections.join('\n')
|
||||
}
|
||||
}
|
||||
|
||||
return sectionTOC
|
||||
}
|
||||
|
||||
/**
|
||||
* Flattens the sidebar configuration when it's an object with path keys.
|
||||
*
|
||||
* @param sidebarConfig - The sidebar configuration from VitePress.
|
||||
* @returns An array of sidebar items.
|
||||
*/
|
||||
function flattenSidebarConfig(
|
||||
sidebarConfig: DefaultTheme.Sidebar,
|
||||
): DefaultTheme.SidebarItem[] {
|
||||
// If it's already an array, return as is
|
||||
if (Array.isArray(sidebarConfig)) {
|
||||
return sidebarConfig
|
||||
}
|
||||
|
||||
// If it's an object with path keys, flatten it
|
||||
if (typeof sidebarConfig === 'object') {
|
||||
return Object.values(sidebarConfig).flat()
|
||||
}
|
||||
|
||||
// If it's neither, return an empty array
|
||||
return []
|
||||
}
|
||||
|
||||
/**
|
||||
* Options for generating a Table of Contents (TOC).
|
||||
*/
|
||||
export interface GenerateTOCOptions {
|
||||
/**
|
||||
* The VitePress source directory.
|
||||
*/
|
||||
srcDir: VitePressConfig['vitepress']['srcDir']
|
||||
|
||||
/**
|
||||
* Optional domain to prefix URLs with.
|
||||
*/
|
||||
domain?: LlmstxtSettings['domain']
|
||||
|
||||
/**
|
||||
* Optional VitePress sidebar configuration.
|
||||
*/
|
||||
sidebarConfig?: DefaultTheme.Sidebar
|
||||
|
||||
/** The link extension for generated links. */
|
||||
linksExtension?: LinksExtension
|
||||
|
||||
/** Whether to use clean URLs (without the extension). */
|
||||
cleanUrls?: VitePressConfig['cleanUrls']
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a Table of Contents (TOC) for the provided prepared files.
|
||||
*
|
||||
* Each entry in the TOC is formatted as a markdown link to the corresponding
|
||||
* text file. If a VitePress sidebar configuration is provided, the TOC will be
|
||||
* organized into sections based on the sidebar structure, with heading levels (#, ##, ###)
|
||||
* reflecting the nesting depth of the sections.
|
||||
*
|
||||
* @param preparedFiles - An array of prepared files.
|
||||
* @param options - Options for generating the TOC.
|
||||
* @returns A string representing the formatted Table of Contents.
|
||||
*/
|
||||
export async function generateTOC(
|
||||
preparedFiles: PreparedFile[],
|
||||
options: GenerateTOCOptions,
|
||||
): Promise<string> {
|
||||
const { srcDir, domain, sidebarConfig, linksExtension, cleanUrls } = options
|
||||
let tableOfContent = ''
|
||||
|
||||
let filesToProcess = preparedFiles
|
||||
|
||||
// If sidebar configuration exists
|
||||
if (sidebarConfig) {
|
||||
// Flatten sidebar config if it's an object with path keys
|
||||
const flattenedSidebarConfig = flattenSidebarConfig(sidebarConfig)
|
||||
|
||||
// Process each top-level section in the flattened sidebar
|
||||
if (flattenedSidebarConfig.length > 0) {
|
||||
for (const section of flattenedSidebarConfig) {
|
||||
tableOfContent += await processSidebarSection(
|
||||
section,
|
||||
filesToProcess,
|
||||
srcDir,
|
||||
domain,
|
||||
linksExtension,
|
||||
cleanUrls,
|
||||
)
|
||||
|
||||
// tableOfContent = `${tableOfContent.trimEnd()}\n\n`
|
||||
tableOfContent += '\n'
|
||||
}
|
||||
|
||||
// Find files that didn't match any section
|
||||
const allSidebarPaths = collectPathsFromSidebarItems(
|
||||
flattenedSidebarConfig,
|
||||
)
|
||||
const unsortedFiles = filesToProcess.filter((file) => {
|
||||
const relativePath = `/${stripExtPosix(path.relative(srcDir, file.path))}`
|
||||
return !allSidebarPaths.some((sidebarPath) =>
|
||||
isPathMatch(relativePath, sidebarPath),
|
||||
)
|
||||
})
|
||||
|
||||
// Add files that didn't match any section
|
||||
if (unsortedFiles.length > 0) {
|
||||
tableOfContent += '### Other\n\n'
|
||||
filesToProcess = unsortedFiles
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const tocEntries: string[] = []
|
||||
|
||||
await Promise.all(
|
||||
filesToProcess.map(async (file) => {
|
||||
const relativePath = path.relative(srcDir, file.path)
|
||||
tocEntries.push(
|
||||
generateTOCLink(file, domain, relativePath, linksExtension, cleanUrls),
|
||||
)
|
||||
}),
|
||||
)
|
||||
|
||||
tableOfContent += tocEntries.join('')
|
||||
|
||||
return tableOfContent
|
||||
}
|
@ -0,0 +1,207 @@
|
||||
import path from 'node:path'
|
||||
import byteSize from 'byte-size'
|
||||
import type { GrayMatterFile, Input } from 'gray-matter'
|
||||
// @ts-expect-error
|
||||
import markdownTitle from 'markdown-title'
|
||||
import type { LinksExtension, LlmstxtSettings, VitePressConfig } from '../types'
|
||||
|
||||
/**
|
||||
* Splits a file path into its directory and file components.
|
||||
*
|
||||
* @param filepath - The path to the file.
|
||||
* @returns An object containing the directory and file name.
|
||||
*/
|
||||
export const splitDirAndFile = (filepath: string) => ({
|
||||
dir: path.dirname(filepath),
|
||||
file: path.basename(filepath),
|
||||
})
|
||||
|
||||
/**
|
||||
* Strips the file extension from a given file path.
|
||||
*
|
||||
* @param filepath - The path to the file.
|
||||
* @returns The filename without the extension.
|
||||
*/
|
||||
export const stripExt = (filepath: string) => {
|
||||
const { dir, file } = splitDirAndFile(filepath)
|
||||
|
||||
return path.join(dir, path.basename(file, path.extname(file)))
|
||||
}
|
||||
|
||||
/**
|
||||
* Strips the file extension from a given file path using POSIX format.
|
||||
*
|
||||
* @param filepath - The path to the file.
|
||||
* @returns The filename without the extension in POSIX format.
|
||||
*/
|
||||
export const stripExtPosix = (filepath: string) => {
|
||||
const { dir, file } = splitDirAndFile(filepath)
|
||||
|
||||
return path.posix.join(dir, path.basename(file, path.extname(file)))
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts the title from a markdown file's frontmatter or first heading.
|
||||
*
|
||||
* @param file - The markdown file to extract the title from.
|
||||
* @returns The extracted title, or `undefined` if no title is found.
|
||||
*/
|
||||
export function extractTitle(file: GrayMatterFile<Input>): string {
|
||||
const titleFromFrontmatter = file.data?.title || file.data?.titleTemplate
|
||||
let titleFromMarkdown: string | undefined
|
||||
|
||||
if (!titleFromFrontmatter) {
|
||||
titleFromMarkdown = markdownTitle(file.content)
|
||||
}
|
||||
return titleFromFrontmatter || titleFromMarkdown
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a regular expression to match a specific template variable in the format `{key}`.
|
||||
*
|
||||
* @param key - The name of the template variable to match.
|
||||
* @returns A case-insensitive regular expression that detects `{key}` occurrences in a string.
|
||||
*
|
||||
* @example
|
||||
* ```ts
|
||||
* const regex = templateVariable('name');
|
||||
* console.log(regex.test('Hello {name}')); // true
|
||||
* ```
|
||||
*/
|
||||
const templateVariable = (key: string) =>
|
||||
new RegExp(`(\\n\\s*\\n)?\\{${key}\\}`, 'gi')
|
||||
|
||||
/**
|
||||
* Replaces occurrences of a template variable `{variable}` in a given content string with a provided value.
|
||||
* If the value is empty or undefined, it falls back to a specified fallback value.
|
||||
*
|
||||
* @param content - The template string containing placeholders.
|
||||
* @param variable - The template variable name to replace.
|
||||
* @param value - The value to replace the variable with.
|
||||
* @param fallback - An optional fallback value if `value` is empty.
|
||||
* @returns A new string with the template variable replaced.
|
||||
*
|
||||
* @example
|
||||
* ```ts
|
||||
* const template = 'Hello {name}!';
|
||||
* const result = replaceTemplateVariable(template, 'name', 'Alice', 'User');
|
||||
* console.log(result); // 'Hello Alice!'
|
||||
* ```
|
||||
*/
|
||||
export function replaceTemplateVariable(
|
||||
content: string,
|
||||
variable: string,
|
||||
value: string | undefined,
|
||||
fallback?: string,
|
||||
) {
|
||||
return content.replace(templateVariable(variable), (_, prefix) => {
|
||||
const val = value?.length ? value : fallback?.length ? fallback : ''
|
||||
return val ? `${prefix ? '\n\n' : ''}${val}` : ''
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Expands a template string by replacing multiple template variables with their corresponding values.
|
||||
*
|
||||
* @param template - The template string containing placeholders.
|
||||
* @param values - An object mapping variable names to their respective values.
|
||||
* @returns A string with all template variables replaced.
|
||||
*
|
||||
* @example
|
||||
* ```ts
|
||||
* const template = 'Hello {name}, welcome to {place}!';
|
||||
* const values = { name: 'Alice', place: 'Wonderland' };
|
||||
* const result = expandTemplate(template, values);
|
||||
* console.log(result); // 'Hello Alice, welcome to Wonderland!'
|
||||
* ```
|
||||
*/
|
||||
export const expandTemplate = (
|
||||
template: string,
|
||||
variables: Record<string, string | undefined>,
|
||||
) => {
|
||||
return Object.entries(variables).reduce(
|
||||
(result, [key, value]) => replaceTemplateVariable(result, key, value),
|
||||
template,
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a complete link by combining a domain, path, and an optional extension.
|
||||
*
|
||||
* @param domain - The base domain of the link (e.g., "https://example.com").
|
||||
* @param path - The path to append to the domain (e.g., "guide").
|
||||
* @param extension - An optional extension to append to the path (e.g., ".md").
|
||||
* @returns The generated link
|
||||
*/
|
||||
export const generateLink = (
|
||||
path: string,
|
||||
domain?: string,
|
||||
extension?: LinksExtension,
|
||||
cleanUrls?: VitePressConfig['cleanUrls'],
|
||||
) =>
|
||||
expandTemplate('{domain}/{path}{extension}', {
|
||||
domain: domain || '',
|
||||
path,
|
||||
extension: cleanUrls ? '' : extension,
|
||||
})
|
||||
|
||||
/**
|
||||
* Options for generating metadata for markdown files.
|
||||
*/
|
||||
export interface GenerateMetadataOptions {
|
||||
/** Optional domain name to prepend to the URL. */
|
||||
domain?: LlmstxtSettings['domain']
|
||||
|
||||
/** Path to the file relative to the content root. */
|
||||
filePath: string
|
||||
|
||||
/** The link extension for generated links. */
|
||||
linksExtension?: LinksExtension
|
||||
|
||||
/** Whether to use clean URLs (without the extension). */
|
||||
cleanUrls?: VitePressConfig['cleanUrls']
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates metadata for markdown files to provide additional context for LLMs.
|
||||
*
|
||||
* @param sourceFile - Parsed markdown file with frontmatter using gray-matter.
|
||||
* @param options - Options for generating metadata.
|
||||
* @returns Object containing metadata properties for the file.
|
||||
*
|
||||
* @example
|
||||
* generateMetadata(preparedFile, { domain: 'https://example.com', filePath: 'docs/guide' })
|
||||
* // Returns { url: 'https://example.com/docs/guide.md', description: 'A guide' }
|
||||
*/
|
||||
export function generateMetadata<GrayMatter extends GrayMatterFile<Input>>(
|
||||
sourceFile: GrayMatter,
|
||||
options: GenerateMetadataOptions,
|
||||
) {
|
||||
const { domain, filePath, linksExtension, cleanUrls } = options
|
||||
const frontmatterMetadata: Record<string, string> = {}
|
||||
|
||||
frontmatterMetadata.url = generateLink(
|
||||
stripExtPosix(filePath),
|
||||
domain,
|
||||
linksExtension ?? '.md',
|
||||
cleanUrls,
|
||||
)
|
||||
|
||||
if (sourceFile.data?.description?.length) {
|
||||
frontmatterMetadata.description = sourceFile.data?.description
|
||||
}
|
||||
|
||||
return frontmatterMetadata
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a human-readable string representation of the given string's size in bytes.
|
||||
*
|
||||
* This function calculates the byte size of a given string by creating a `Blob`
|
||||
* and then converts it into a human-readable format using `byte-size`.
|
||||
*
|
||||
* @param string - The input string whose size needs to be determined.
|
||||
* @returns A human-readable size string (e.g., "1.2 KB", "500 B").
|
||||
*/
|
||||
export const getHumanReadableSizeOf = (string: string) =>
|
||||
byteSize(new Blob([string]).size).toString()
|
@ -0,0 +1,365 @@
|
||||
import type { Plugin, ViteDevServer } from 'vite'
|
||||
|
||||
import fs from 'node:fs/promises'
|
||||
import path from 'node:path'
|
||||
|
||||
import matter, { type Input } from 'gray-matter'
|
||||
import { minimatch } from 'minimatch'
|
||||
import pc from 'picocolors'
|
||||
import { remark } from 'remark'
|
||||
import remarkFrontmatter from 'remark-frontmatter'
|
||||
|
||||
import { remove } from 'unist-util-remove'
|
||||
|
||||
import { millify } from 'millify'
|
||||
import { approximateTokenSize } from 'tokenx'
|
||||
import { defaultLLMsTxtTemplate } from './constants'
|
||||
import { generateLLMsFullTxt, generateLLMsTxt } from './helpers/index'
|
||||
import log from './helpers/logger'
|
||||
import {
|
||||
expandTemplate,
|
||||
extractTitle,
|
||||
generateMetadata,
|
||||
getHumanReadableSizeOf,
|
||||
stripExt,
|
||||
} from './helpers/utils'
|
||||
import type {
|
||||
CustomTemplateVariables,
|
||||
LlmstxtSettings,
|
||||
PreparedFile,
|
||||
VitePressConfig,
|
||||
} from './types'
|
||||
|
||||
const PLUGIN_NAME = 'llmstxt'
|
||||
|
||||
/**
|
||||
* [VitePress](http://vitepress.dev/) plugin for generating raw documentation
|
||||
* for **LLMs** in Markdown format which is much lighter and more efficient for LLMs
|
||||
*
|
||||
* @param [userSettings={}] - Plugin settings.
|
||||
*
|
||||
* @see https://github.com/okineadev/vitepress-plugin-llms
|
||||
* @see https://llmstxt.org/
|
||||
*/
|
||||
export default function llmstxt(userSettings: LlmstxtSettings = {}): Plugin {
|
||||
// Create a settings object with defaults explicitly merged
|
||||
const settings: Omit<LlmstxtSettings, 'workDir'> & { workDir: string } = {
|
||||
generateLLMsTxt: true,
|
||||
generateLLMsFullTxt: true,
|
||||
generateLLMFriendlyDocsForEachPage: true,
|
||||
ignoreFiles: [],
|
||||
workDir: undefined as unknown as string,
|
||||
stripHTML: true,
|
||||
...userSettings,
|
||||
}
|
||||
|
||||
// Store the resolved Vite config
|
||||
let config: VitePressConfig
|
||||
|
||||
// Set to store all markdown file paths
|
||||
const mdFiles: Set<string> = new Set()
|
||||
|
||||
// Flag to identify which build we're in
|
||||
let isSsrBuild = false
|
||||
|
||||
return {
|
||||
name: PLUGIN_NAME,
|
||||
|
||||
/** Resolves the Vite configuration and sets up the working directory. */
|
||||
configResolved(resolvedConfig) {
|
||||
config = resolvedConfig as VitePressConfig
|
||||
if (settings.workDir) {
|
||||
settings.workDir = path.resolve(
|
||||
config.vitepress.srcDir,
|
||||
settings.workDir as string,
|
||||
)
|
||||
} else {
|
||||
settings.workDir = config.vitepress.srcDir
|
||||
}
|
||||
// Detect if this is the SSR build
|
||||
isSsrBuild = !!resolvedConfig.build?.ssr
|
||||
log.info(
|
||||
`${pc.bold(PLUGIN_NAME)} initialized ${isSsrBuild ? pc.dim('(SSR build)') : pc.dim('(client build)')} with workDir: ${pc.cyan(settings.workDir as string)}`,
|
||||
)
|
||||
},
|
||||
|
||||
/** Configures the development server to handle `llms.txt` and markdown files for LLMs. */
|
||||
async configureServer(server: ViteDevServer) {
|
||||
log.info('Dev server configured for serving plain text docs for LLMs')
|
||||
server.middlewares.use(async (req, res, next) => {
|
||||
if (req.url?.endsWith('.md') || req.url?.endsWith('.txt')) {
|
||||
try {
|
||||
// Try to read and serve the markdown file
|
||||
const filePath = path.resolve(
|
||||
config.vitepress?.outDir ?? 'dist',
|
||||
`${stripExt(req.url)}.md`,
|
||||
)
|
||||
const content = await fs.readFile(filePath, 'utf-8')
|
||||
res.setHeader('Content-Type', 'text/plain; charset=utf-8')
|
||||
res.end(content)
|
||||
return
|
||||
} catch (e) {
|
||||
// If file doesn't exist or can't be read, continue to next middleware
|
||||
log.warn(`Failed to return ${pc.cyan(req.url)}: File not found`)
|
||||
next()
|
||||
}
|
||||
}
|
||||
|
||||
// Pass to next middleware if not handled
|
||||
next()
|
||||
})
|
||||
},
|
||||
|
||||
/**
|
||||
* Resets the collection of markdown files when the build starts.
|
||||
* This ensures we don't include stale data from previous builds.
|
||||
*/
|
||||
buildStart() {
|
||||
mdFiles.clear()
|
||||
log.info('Build started, file collection cleared')
|
||||
},
|
||||
|
||||
/**
|
||||
* Processes each file that Vite transforms and collects markdown files.
|
||||
*
|
||||
* @param _ - The file content (not used).
|
||||
* @param id - The file identifier (path).
|
||||
* @returns null if the file is processed, otherwise returns the original content.
|
||||
*/
|
||||
async transform(_, id: string) {
|
||||
if (!id.endsWith('.md')) {
|
||||
return null
|
||||
}
|
||||
|
||||
// Skip files outside workDir if it's configured
|
||||
if (!id.startsWith(settings.workDir as string)) {
|
||||
return null
|
||||
}
|
||||
|
||||
if (settings.ignoreFiles?.length) {
|
||||
const shouldIgnore = await Promise.all(
|
||||
settings.ignoreFiles.map(async (pattern) => {
|
||||
if (typeof pattern === 'string') {
|
||||
return await Promise.resolve(
|
||||
minimatch(
|
||||
path.relative(settings.workDir as string, id),
|
||||
pattern,
|
||||
),
|
||||
)
|
||||
}
|
||||
return false
|
||||
}),
|
||||
)
|
||||
|
||||
if (shouldIgnore.some((result) => result === true)) {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
// Add markdown file path to our collection
|
||||
mdFiles.add(id)
|
||||
// Return null to avoid modifying the file
|
||||
return null
|
||||
},
|
||||
|
||||
/**
|
||||
* Runs only in the client build (not SSR) after completion.
|
||||
* This ensures the processing happens exactly once.
|
||||
*/
|
||||
async generateBundle() {
|
||||
// Skip processing during SSR build
|
||||
if (isSsrBuild) {
|
||||
log.info('Skipping LLMs docs generation in SSR build')
|
||||
return
|
||||
}
|
||||
|
||||
const outDir = config.vitepress?.outDir ?? 'dist'
|
||||
|
||||
// Create output directory if it doesn't exist
|
||||
try {
|
||||
await fs.access(outDir)
|
||||
} catch {
|
||||
log.info(`Creating output directory: ${pc.cyan(outDir)}`)
|
||||
await fs.mkdir(outDir, { recursive: true })
|
||||
}
|
||||
|
||||
const mdFilesList = Array.from(mdFiles)
|
||||
const fileCount = mdFilesList.length
|
||||
|
||||
// Skip if no files found
|
||||
if (fileCount === 0) {
|
||||
log.warn(
|
||||
`No markdown files found to process. Check your \`${pc.bold('workDir')}\` and \`${pc.bold('ignoreFiles')}\` settings.`,
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
log.info(
|
||||
`Processing ${pc.bold(fileCount.toString())} markdown files from ${pc.cyan(settings.workDir)}`,
|
||||
)
|
||||
|
||||
const preparedFiles: PreparedFile[] = await Promise.all(
|
||||
mdFilesList.map(async (file) => {
|
||||
const content = await fs.readFile(file, 'utf-8')
|
||||
|
||||
let mdFile: matter.GrayMatterFile<Input>
|
||||
|
||||
if (settings.stripHTML) {
|
||||
const cleanedMarkdown = await remark()
|
||||
.use(remarkFrontmatter)
|
||||
.use(() => {
|
||||
// Strip HTML tags
|
||||
return (tree) => {
|
||||
remove(tree, { type: 'html' })
|
||||
return tree
|
||||
}
|
||||
})
|
||||
.process(content)
|
||||
|
||||
mdFile = matter(String(cleanedMarkdown))
|
||||
} else {
|
||||
mdFile = matter(content)
|
||||
}
|
||||
// Extract title from frontmatter or use the first heading
|
||||
const title = extractTitle(mdFile)?.trim() || 'Untitled'
|
||||
|
||||
const filePath =
|
||||
path.basename(file) === 'index.md' &&
|
||||
path.dirname(file) !== settings.workDir
|
||||
? `${path.dirname(file)}.md`
|
||||
: file
|
||||
|
||||
return { path: filePath, title, file: mdFile }
|
||||
}),
|
||||
)
|
||||
|
||||
if (settings.generateLLMFriendlyDocsForEachPage) {
|
||||
await Promise.all(
|
||||
preparedFiles.map(async (file) => {
|
||||
const relativePath = path.relative(settings.workDir, file.path)
|
||||
try {
|
||||
const mdFile = file.file
|
||||
const targetPath = path.resolve(outDir, relativePath)
|
||||
|
||||
// Ensure target directory exists (async version)
|
||||
await fs.mkdir(path.dirname(targetPath), {
|
||||
recursive: true,
|
||||
})
|
||||
|
||||
// Copy file to output directory (async version)
|
||||
await fs.writeFile(
|
||||
targetPath,
|
||||
matter.stringify(
|
||||
mdFile.content,
|
||||
generateMetadata(mdFile, {
|
||||
domain: settings.domain,
|
||||
filePath: relativePath,
|
||||
}),
|
||||
),
|
||||
)
|
||||
|
||||
log.success(`Processed ${pc.cyan(relativePath)}`)
|
||||
} catch (error) {
|
||||
log.error(
|
||||
// @ts-ignore
|
||||
`Failed to process ${pc.cyan(relativePath)}: ${error.message}`,
|
||||
)
|
||||
}
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
// Sort files by title for better organization
|
||||
preparedFiles.sort((a, b) => a.title.localeCompare(b.title))
|
||||
|
||||
const tasks: Promise<void>[] = []
|
||||
|
||||
// Generate llms.txt - table of contents with links
|
||||
if (settings.generateLLMsTxt) {
|
||||
const llmsTxtPath = path.resolve(outDir, 'llms.txt')
|
||||
const templateVariables: CustomTemplateVariables = {
|
||||
title: settings.title,
|
||||
description: settings.description,
|
||||
details: settings.details,
|
||||
toc: settings.toc,
|
||||
...settings.customTemplateVariables,
|
||||
}
|
||||
|
||||
tasks.push(
|
||||
(async () => {
|
||||
log.info(`Generating ${pc.cyan('llms.txt')}...`)
|
||||
|
||||
const llmsTxt = await generateLLMsTxt(preparedFiles, {
|
||||
indexMd: path.resolve(settings.workDir as string, 'index.md'),
|
||||
srcDir: settings.workDir as string,
|
||||
LLMsTxtTemplate:
|
||||
settings.customLLMsTxtTemplate || defaultLLMsTxtTemplate,
|
||||
templateVariables,
|
||||
vitepressConfig: config?.vitepress?.userConfig,
|
||||
domain: settings.domain,
|
||||
sidebar: settings.sidebar,
|
||||
linksExtension: !settings.generateLLMFriendlyDocsForEachPage
|
||||
? '.html'
|
||||
: undefined,
|
||||
cleanUrls: config.cleanUrls,
|
||||
})
|
||||
|
||||
await fs.writeFile(llmsTxtPath, llmsTxt, 'utf-8')
|
||||
|
||||
log.success(
|
||||
expandTemplate(
|
||||
'Generated {file} (~{tokens} tokens, {size}) with {fileCount} documentation links',
|
||||
{
|
||||
file: pc.cyan('llms.txt'),
|
||||
tokens: pc.bold(millify(approximateTokenSize(llmsTxt))),
|
||||
size: pc.bold(getHumanReadableSizeOf(llmsTxt)),
|
||||
fileCount: pc.bold(fileCount.toString()),
|
||||
},
|
||||
),
|
||||
)
|
||||
})(),
|
||||
)
|
||||
}
|
||||
|
||||
// Generate llms-full.txt - all content in one file
|
||||
if (settings.generateLLMsFullTxt) {
|
||||
const llmsFullTxtPath = path.resolve(outDir, 'llms-full.txt')
|
||||
|
||||
tasks.push(
|
||||
(async () => {
|
||||
log.info(
|
||||
`Generating full documentation bundle (${pc.cyan('llms-full.txt')})...`,
|
||||
)
|
||||
|
||||
const llmsFullTxt = generateLLMsFullTxt(preparedFiles, {
|
||||
srcDir: settings.workDir as string,
|
||||
domain: settings.domain,
|
||||
linksExtension: !settings.generateLLMFriendlyDocsForEachPage
|
||||
? '.html'
|
||||
: undefined,
|
||||
cleanUrls: config.cleanUrls,
|
||||
})
|
||||
|
||||
// Write content to llms-full.txt
|
||||
await fs.writeFile(llmsFullTxtPath, llmsFullTxt, 'utf-8')
|
||||
log.success(
|
||||
expandTemplate(
|
||||
'Generated {file} (~{tokens} tokens, {size}) with {fileCount} markdown files',
|
||||
{
|
||||
file: pc.cyan('llms-full.txt'),
|
||||
tokens: pc.bold(millify(approximateTokenSize(llmsFullTxt))),
|
||||
size: pc.bold(getHumanReadableSizeOf(llmsFullTxt)),
|
||||
fileCount: pc.bold(fileCount.toString()),
|
||||
},
|
||||
),
|
||||
)
|
||||
})(),
|
||||
)
|
||||
}
|
||||
|
||||
if (tasks.length) {
|
||||
await Promise.all(tasks)
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
Loading…
Reference in new issue