feat: 📄 integrate `llms.txt` generation functionality

pull/4692/head
Okinea Dev 5 months ago
parent 1ec84c1504
commit 673db7d4b6
No known key found for this signature in database
GPG Key ID: 07944BC5E01E7B43

@ -123,10 +123,10 @@ export const shared = defineConfig({
firebase: 'logos:firebase'
}
}),
llmstxt({
workDir: 'en',
ignoreFiles: ['index.md']
})
]
},
llms: {
workDir: 'en',
ignoreFiles: ['index.md']
}
})

@ -15,7 +15,6 @@
"open-cli": "^8.0.0",
"postcss-rtlcss": "^5.7.0",
"vitepress": "workspace:*",
"vitepress-plugin-group-icons": "^1.4.1",
"vitepress-plugin-llms": "^1.1.0"
"vitepress-plugin-group-icons": "^1.4.1"
}
}

@ -106,10 +106,18 @@
"@vue/shared": "^3.5.13",
"@vueuse/core": "^13.1.0",
"@vueuse/integrations": "^13.1.0",
"byte-size": "^9.0.1",
"focus-trap": "^7.6.4",
"mark.js": "8.11.1",
"markdown-title": "^1.0.2",
"millify": "^6.1.0",
"minimatch": "^10.0.1",
"minisearch": "^7.1.2",
"remark": "^15.0.1",
"remark-frontmatter": "^5.0.0",
"shiki": "^3.2.2",
"tokenx": "^0.4.1",
"unist-util-remove": "^4.0.0",
"vite": "^6.2.6",
"vue": "^3.5.13"
},
@ -129,6 +137,7 @@
"@rollup/plugin-json": "^6.1.0",
"@rollup/plugin-node-resolve": "^16.0.1",
"@rollup/plugin-replace": "^6.0.2",
"@types/byte-size": "^8.1.2",
"@types/cross-spawn": "^6.0.6",
"@types/debug": "^4.1.12",
"@types/fs-extra": "^11.0.4",

@ -52,18 +52,42 @@ importers:
'@vueuse/integrations':
specifier: ^13.1.0
version: 13.1.0(axios@1.8.4(debug@4.4.0))(focus-trap@7.6.4)(vue@3.5.13(typescript@5.8.3))
byte-size:
specifier: ^9.0.1
version: 9.0.1
focus-trap:
specifier: ^7.6.4
version: 7.6.4
mark.js:
specifier: 8.11.1
version: 8.11.1
markdown-title:
specifier: ^1.0.2
version: 1.0.2
millify:
specifier: ^6.1.0
version: 6.1.0
minimatch:
specifier: ^10.0.1
version: 10.0.1
minisearch:
specifier: ^7.1.2
version: 7.1.2
remark:
specifier: ^15.0.1
version: 15.0.1
remark-frontmatter:
specifier: ^5.0.0
version: 5.0.0
shiki:
specifier: ^3.2.2
version: 3.2.2
tokenx:
specifier: ^0.4.1
version: 0.4.1
unist-util-remove:
specifier: ^4.0.0
version: 4.0.0
vite:
specifier: ^6.2.6
version: 6.2.6(@types/node@22.14.0)(jiti@1.21.7)(yaml@2.7.1)
@ -116,6 +140,9 @@ importers:
'@rollup/plugin-replace':
specifier: ^6.0.2
version: 6.0.2(rollup@4.39.0)
'@types/byte-size':
specifier: ^8.1.2
version: 8.1.2
'@types/cross-spawn':
specifier: ^6.0.6
version: 6.0.6
@ -329,9 +356,6 @@ importers:
vitepress-plugin-group-icons:
specifier: ^1.4.1
version: 1.4.1
vitepress-plugin-llms:
specifier: ^1.1.0
version: 1.1.0
packages:
@ -911,6 +935,9 @@ packages:
'@tokenizer/token@0.3.0':
resolution: {integrity: sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A==}
'@types/byte-size@8.1.2':
resolution: {integrity: sha512-jGyVzYu6avI8yuqQCNTZd65tzI8HZrLjKX9sdMqZrGWVlNChu0rf6p368oVEDCYJe5BMx2Ov04tD1wqtgTwGSA==}
'@types/cross-spawn@6.0.6':
resolution: {integrity: sha512-fXRhhUkG4H3TQk5dBhQ7m/JDdSNHKwR2BBia62lhwEIq9xGiQKLxd6LymNhn47SjXhsUEPmxi+PKw2OkW4LLjA==}
@ -2917,9 +2944,6 @@ packages:
vitepress-plugin-group-icons@1.4.1:
resolution: {integrity: sha512-4APG5wzUvl2JbZcy6+I7K9DleBJE7W5RCkPu2mDPxzKxI/9pF3GmIACDnIlhyfIpUyfW4eanbyoMuP7tzLpM3Q==}
vitepress-plugin-llms@1.1.0:
resolution: {integrity: sha512-nb7bG/lBDihlcFTzqxRxQIyzeBWQW9F6OwuUWQ7PFUNK5kVbybxXGISU4wvAV8osQmfrD9xNIGJQfuOLj5CzHg==}
vitest@3.1.1:
resolution: {integrity: sha512-kiZc/IYmKICeBAZr9DQ5rT7/6bD9G7uqQEki4fxazi1jdVl2mWGzedtBs5s6llz59yQhVb7FFY2MbHzHCnT79Q==}
engines: {node: ^18.0.0 || ^20.0.0 || >=22.0.0}
@ -3583,6 +3607,8 @@ snapshots:
'@tokenizer/token@0.3.0': {}
'@types/byte-size@8.1.2': {}
'@types/cross-spawn@6.0.6':
dependencies:
'@types/node': 22.14.0
@ -5735,22 +5761,6 @@ snapshots:
transitivePeerDependencies:
- supports-color
vitepress-plugin-llms@1.1.0:
dependencies:
byte-size: 9.0.1
gray-matter: 4.0.3
markdown-title: 1.0.2
millify: 6.1.0
minimatch: 10.0.1
picocolors: 1.1.1
remark: 15.0.1
remark-frontmatter: 5.0.0
tokenx: 0.4.1
unist-util-remove: 4.0.0
transitivePeerDependencies:
- '@75lb/nature'
- supports-color
vitest@3.1.1(@types/debug@4.1.12)(@types/node@22.14.0)(jiti@1.21.7)(yaml@2.7.1):
dependencies:
'@vitest/expect': 3.1.1

@ -130,7 +130,8 @@ export async function resolveConfig(
userConfig,
sitemap: userConfig.sitemap,
buildConcurrency: userConfig.buildConcurrency ?? 64,
...(await resolvePages(srcDir, userConfig, logger, true))
...(await resolvePages(srcDir, userConfig, logger, true)),
llms: userConfig.llms ?? false
}
// to be shared with content loaders
@ -241,7 +242,8 @@ export async function resolveSiteData(
locales: userConfig.locales || {},
scrollOffset: userConfig.scrollOffset ?? 134,
cleanUrls: !!userConfig.cleanUrls,
contentProps: userConfig.contentProps
contentProps: userConfig.contentProps,
llms: userConfig.llms ?? false,
}
}

@ -27,6 +27,7 @@ import { localSearchPlugin } from './plugins/localSearchPlugin'
import { rewritesPlugin } from './plugins/rewritesPlugin'
import { staticDataPlugin } from './plugins/staticDataPlugin'
import { webFontsPlugin } from './plugins/webFontsPlugin'
import llmstxt from './plugins/llmstxt'
import { slash, type PageDataPayload } from './shared'
import { deserializeFunctions, serializeFunctions } from './utils/fnSerialize'
@ -75,6 +76,7 @@ export async function createVitePressPlugin(
site,
vue: userVuePluginOptions,
vite: userViteConfig,
llms,
lastUpdated,
cleanUrls
} = siteConfig
@ -430,6 +432,7 @@ export async function createVitePressPlugin(
vuePlugin,
hmrFix,
webFontsPlugin(siteConfig.useWebFonts),
llms ? llmstxt(llms === true ? undefined : llms) : [],
...(userViteConfig?.plugins || []),
await localSearchPlugin(siteConfig),
staticDataPlugin,

@ -0,0 +1,11 @@
/** Default template for the `llms.txt` file. */
export const defaultLLMsTxtTemplate = `\
# {title}
{description}
{details}
## Table of Contents
{toc}`

@ -0,0 +1,175 @@
import fs from 'node:fs/promises'
import path from 'node:path'
import matter from 'gray-matter'
import type { DefaultTheme } from 'vitepress'
import { defaultLLMsTxtTemplate } from '../constants'
import type {
LinksExtension,
LlmstxtSettings,
PreparedFile,
VitePressConfig,
} from '../types'
import { generateTOC } from './toc'
import { expandTemplate, extractTitle, generateMetadata } from './utils'
/**
* Options for generating the `llms.txt` file.
*/
export interface GenerateLLMsTxtOptions {
/** Path to the main documentation file `index.md`.*/
indexMd: string
/** The source directory for the files. */
srcDir: VitePressConfig['vitepress']['srcDir']
/** Template to use for generating `llms.txt`. */
LLMsTxtTemplate?: LlmstxtSettings['customLLMsTxtTemplate']
/** Template variables for `customLLMsTxtTemplate`. */
templateVariables?: LlmstxtSettings['customTemplateVariables']
/** The VitePress configuration. */
vitepressConfig?: VitePressConfig['vitepress']['userConfig']
/** The base domain for the generated links. */
domain?: LlmstxtSettings['domain']
/** The link extension for generated links. */
linksExtension?: LinksExtension
/** Whether to use clean URLs (without the extension). */
cleanUrls?: VitePressConfig['cleanUrls']
/** Optional sidebar configuration for organizing the TOC. */
sidebar?: DefaultTheme.Sidebar
}
/**
* Generates a LLMs.txt file with a table of contents and links to all documentation sections.
*
* @param preparedFiles - An array of prepared files.
* @param options - Options for generating the `llms.txt` file.
* @returns A string representing the content of the `llms.txt` file.
*
* @example
* ```markdown
* # Shadcn for Vue
*
* > Beautifully designed components built with Radix Vue and Tailwind CSS.
*
* ## Table of Contents
*
* - [Getting started](/docs/getting-started.md)
* - [About](/docs/about.md)
* - ...
* ```
*
* @see https://llmstxt.org/#format
*/
export async function generateLLMsTxt(
preparedFiles: PreparedFile[],
options: GenerateLLMsTxtOptions,
): Promise<string> {
const {
indexMd,
srcDir,
LLMsTxtTemplate = defaultLLMsTxtTemplate,
templateVariables = {},
vitepressConfig,
domain,
sidebar,
linksExtension,
cleanUrls,
} = options
// @ts-expect-error
matter.clearCache()
const indexMdContent = await fs.readFile(indexMd, 'utf-8')
const indexMdFile = matter(indexMdContent as string)
templateVariables.title ??=
indexMdFile.data?.hero?.name ||
indexMdFile.data?.title ||
vitepressConfig?.title ||
vitepressConfig?.titleTemplate ||
extractTitle(indexMdFile) ||
'LLMs Documentation'
templateVariables.description ??=
indexMdFile.data?.hero?.text ||
vitepressConfig?.description ||
indexMdFile?.data?.description ||
indexMdFile.data?.titleTemplate
if (templateVariables.description) {
templateVariables.description = `> ${templateVariables.description}`
}
templateVariables.details ??=
indexMdFile.data?.hero?.tagline ||
indexMdFile.data?.tagline ||
(!templateVariables.description &&
'This file contains links to all documentation sections.')
templateVariables.toc ??= await generateTOC(preparedFiles, {
srcDir,
domain,
sidebarConfig: sidebar || vitepressConfig?.themeConfig?.sidebar,
linksExtension,
cleanUrls,
})
return expandTemplate(LLMsTxtTemplate, templateVariables)
}
/**
* Options for generating the `llms-full.txt` file.
*/
export interface GenerateLLMsFullTxtOptions {
/** The source directory for the files. */
srcDir: VitePressConfig['vitepress']['srcDir']
/** The base domain for the generated links. */
domain?: LlmstxtSettings['domain']
/** The link extension for generated links. */
linksExtension?: LinksExtension
/** Whether to use clean URLs (without the extension). */
cleanUrls?: VitePressConfig['cleanUrls']
}
/**
* Generates a `llms-full.txt` file content with all documentation in one file.
*
* @param preparedFiles - An array of prepared files.
* @param options - Options for generating the `llms-full.txt` file.
* @returns A string representing the full content of the LLMs.txt file.
*/
export function generateLLMsFullTxt(
preparedFiles: PreparedFile[],
options: GenerateLLMsFullTxtOptions,
) {
const { srcDir, domain, linksExtension, cleanUrls } = options
const llmsFullTxtContent = preparedFiles
.map((preparedFile) => {
const relativePath = path.relative(srcDir, preparedFile.path)
return matter.stringify(
preparedFile.file.content,
generateMetadata(preparedFile.file, {
domain,
filePath: relativePath,
linksExtension,
cleanUrls,
}),
)
})
.join('\n---\n\n')
return llmsFullTxtContent
}

@ -0,0 +1,43 @@
import pc from 'picocolors'
/**
* Log prefix styling with the plugin name and separator.
* @constant {string}
*/
const logPrefix = pc.blue('llmstxt') + pc.dim(' » ')
/** Logger object with standardized logging methods. */
const log = {
/**
* Logs an informational message to the console.
*
* @param message - The message to log.
*/
info: (message: string) => console.log(`${logPrefix} ${message}`),
/**
* Logs a success message to the console.
*
* @param message - The message to log.
*/
success: (message: string) =>
console.log(`${logPrefix}${pc.green('✓')} ${message}`),
/**
* Logs a warning message to the console.
*
* @param message - The message to log.
*/
warn: (message: string) =>
console.warn(`${logPrefix}${pc.yellow('⚠')} ${pc.yellow(message)}`),
/**
* Logs an error message to the console.
*
* @param message - The message to log.
*/
error: (message: string) =>
console.error(`${logPrefix}${pc.red('✗')} ${pc.red(message)}`),
}
export default log

@ -0,0 +1,309 @@
import path from 'node:path'
import type { DefaultTheme } from 'vitepress'
import type {
LinksExtension,
LlmstxtSettings,
PreparedFile,
VitePressConfig,
} from '../types'
import { generateLink, stripExtPosix } from './utils'
/**
* Generates a Markdown-formatted table of contents (TOC) link for a given file.
*
* @param file - The prepared file.
* @param domain - The base domain for the generated link.
* @param relativePath - The relative path of the file, which is converted to a `.md` link.
* @param extension - The link extension for the generated link (default is `.md`).
* @param cleanUrls - Whether to use clean URLs (without the extension).
* @returns The formatted TOC entry as a Markdown list item.
*/
export const generateTOCLink = (
file: PreparedFile,
domain: LlmstxtSettings['domain'],
relativePath: string,
extension?: LinksExtension,
cleanUrls: VitePressConfig['cleanUrls'] = false,
) => {
const description: string = file.file.data.description
return `- [${file.title}](${generateLink(stripExtPosix(relativePath), domain, extension ?? '.md', cleanUrls)})${description ? `: ${description.trim()}` : ''}\n`
}
/**
* Recursively collects all paths from sidebar items.
*
* @param items - Array of sidebar items to process.
* @returns Array of paths collected from the sidebar items.
*/
function collectPathsFromSidebarItems(
items: DefaultTheme.SidebarItem[],
): string[] {
const paths: string[] = []
for (const item of items) {
// Add the current item's path if it exists
if (item.link) {
paths.push(item.link)
}
// Recursively add paths from nested items
if (item.items && Array.isArray(item.items)) {
paths.push(...collectPathsFromSidebarItems(item.items))
}
}
return paths
}
/**
* Normalizes link path for comparison, handling both index.md and directory paths.
*
* @param link - The link path to normalize.
* @returns Normalized link path for consistent comparison.
*/
export function normalizeLinkPath(link: string): string {
const normalizedPath = stripExtPosix(link)
if (path.basename(normalizedPath) === 'index') {
return path.dirname(normalizedPath)
}
return normalizedPath
}
/**
* Checks if a file path matches a sidebar path, handling various path formats.
*
* @param filePath - The file path to check.
* @param sidebarPath - The sidebar path to compare against.
* @returns True if paths match, false otherwise
*/
export function isPathMatch(filePath: string, sidebarPath: string): boolean {
const normalizedFilePath = normalizeLinkPath(filePath)
const normalizedSidebarPath = normalizeLinkPath(sidebarPath)
return (
normalizedFilePath === normalizedSidebarPath ||
normalizedFilePath === `${normalizedSidebarPath}.md`
)
}
/**
* Processes sidebar items and generates TOC entries in the exact order they appear in sidebar config
*
* @param section - A sidebar section
* @param preparedFiles - An array of prepared files
* @param srcDir - The VitePress source directory
* @param domain - Optional domain to prefix URLs with
* @param linksExtension - The link extension for generated links.
* @param depth - Current depth level for headings
* @returns A string representing the formatted section of the TOC
*/
async function processSidebarSection(
section: DefaultTheme.SidebarItem,
preparedFiles: PreparedFile[],
srcDir: VitePressConfig['vitepress']['srcDir'],
domain?: LlmstxtSettings['domain'],
linksExtension?: LinksExtension,
cleanUrls?: VitePressConfig['cleanUrls'],
depth = 3,
): Promise<string> {
let sectionTOC = ''
// Add section header only if it has text and is not just a link container
if (section.text) {
sectionTOC += `${'#'.repeat(depth)} ${section.text}\n\n`
}
// Process items in this section
if (section.items && Array.isArray(section.items)) {
const linkItems: string[] = []
const nestedSections: string[] = []
// First pass: separate link items and nested sections
await Promise.all(
section.items.map(async (item) => {
// Process nested sections
if (item.items && item.items.length > 0) {
const processedSection = await processSidebarSection(
item,
preparedFiles,
srcDir,
domain,
linksExtension,
cleanUrls,
// Increase depth for nested sections to maintain proper heading levels
depth + 1,
)
nestedSections.push(processedSection)
}
// Process link items
else if (item.link) {
// Normalize the link for matching
const normalizedItemLink = normalizeLinkPath(item.link)
const matchingFile = preparedFiles.find((file) => {
const relativePath = `/${stripExtPosix(path.relative(srcDir, file.path))}`
return isPathMatch(relativePath, normalizedItemLink)
})
if (matchingFile) {
const relativePath = path.relative(srcDir, matchingFile.path)
linkItems.push(
generateTOCLink(
matchingFile,
domain,
relativePath,
linksExtension,
cleanUrls,
),
)
}
}
}),
)
// Add link items if any
if (linkItems.length > 0) {
sectionTOC += linkItems.join('')
}
// Add a blank line before nested sections if we have link items
if (linkItems.length > 0 && nestedSections.length > 0) {
sectionTOC += '\n'
}
// Add nested sections with appropriate spacing
if (nestedSections.length > 0) {
sectionTOC += nestedSections.join('\n')
}
}
return sectionTOC
}
/**
* Flattens the sidebar configuration when it's an object with path keys.
*
* @param sidebarConfig - The sidebar configuration from VitePress.
* @returns An array of sidebar items.
*/
function flattenSidebarConfig(
sidebarConfig: DefaultTheme.Sidebar,
): DefaultTheme.SidebarItem[] {
// If it's already an array, return as is
if (Array.isArray(sidebarConfig)) {
return sidebarConfig
}
// If it's an object with path keys, flatten it
if (typeof sidebarConfig === 'object') {
return Object.values(sidebarConfig).flat()
}
// If it's neither, return an empty array
return []
}
/**
* Options for generating a Table of Contents (TOC).
*/
export interface GenerateTOCOptions {
/**
* The VitePress source directory.
*/
srcDir: VitePressConfig['vitepress']['srcDir']
/**
* Optional domain to prefix URLs with.
*/
domain?: LlmstxtSettings['domain']
/**
* Optional VitePress sidebar configuration.
*/
sidebarConfig?: DefaultTheme.Sidebar
/** The link extension for generated links. */
linksExtension?: LinksExtension
/** Whether to use clean URLs (without the extension). */
cleanUrls?: VitePressConfig['cleanUrls']
}
/**
* Generates a Table of Contents (TOC) for the provided prepared files.
*
* Each entry in the TOC is formatted as a markdown link to the corresponding
* text file. If a VitePress sidebar configuration is provided, the TOC will be
* organized into sections based on the sidebar structure, with heading levels (#, ##, ###)
* reflecting the nesting depth of the sections.
*
* @param preparedFiles - An array of prepared files.
* @param options - Options for generating the TOC.
* @returns A string representing the formatted Table of Contents.
*/
export async function generateTOC(
preparedFiles: PreparedFile[],
options: GenerateTOCOptions,
): Promise<string> {
const { srcDir, domain, sidebarConfig, linksExtension, cleanUrls } = options
let tableOfContent = ''
let filesToProcess = preparedFiles
// If sidebar configuration exists
if (sidebarConfig) {
// Flatten sidebar config if it's an object with path keys
const flattenedSidebarConfig = flattenSidebarConfig(sidebarConfig)
// Process each top-level section in the flattened sidebar
if (flattenedSidebarConfig.length > 0) {
for (const section of flattenedSidebarConfig) {
tableOfContent += await processSidebarSection(
section,
filesToProcess,
srcDir,
domain,
linksExtension,
cleanUrls,
)
// tableOfContent = `${tableOfContent.trimEnd()}\n\n`
tableOfContent += '\n'
}
// Find files that didn't match any section
const allSidebarPaths = collectPathsFromSidebarItems(
flattenedSidebarConfig,
)
const unsortedFiles = filesToProcess.filter((file) => {
const relativePath = `/${stripExtPosix(path.relative(srcDir, file.path))}`
return !allSidebarPaths.some((sidebarPath) =>
isPathMatch(relativePath, sidebarPath),
)
})
// Add files that didn't match any section
if (unsortedFiles.length > 0) {
tableOfContent += '### Other\n\n'
filesToProcess = unsortedFiles
}
}
}
const tocEntries: string[] = []
await Promise.all(
filesToProcess.map(async (file) => {
const relativePath = path.relative(srcDir, file.path)
tocEntries.push(
generateTOCLink(file, domain, relativePath, linksExtension, cleanUrls),
)
}),
)
tableOfContent += tocEntries.join('')
return tableOfContent
}

@ -0,0 +1,207 @@
import path from 'node:path'
import byteSize from 'byte-size'
import type { GrayMatterFile, Input } from 'gray-matter'
// @ts-expect-error
import markdownTitle from 'markdown-title'
import type { LinksExtension, LlmstxtSettings, VitePressConfig } from '../types'
/**
* Splits a file path into its directory and file components.
*
* @param filepath - The path to the file.
* @returns An object containing the directory and file name.
*/
export const splitDirAndFile = (filepath: string) => ({
dir: path.dirname(filepath),
file: path.basename(filepath),
})
/**
* Strips the file extension from a given file path.
*
* @param filepath - The path to the file.
* @returns The filename without the extension.
*/
export const stripExt = (filepath: string) => {
const { dir, file } = splitDirAndFile(filepath)
return path.join(dir, path.basename(file, path.extname(file)))
}
/**
* Strips the file extension from a given file path using POSIX format.
*
* @param filepath - The path to the file.
* @returns The filename without the extension in POSIX format.
*/
export const stripExtPosix = (filepath: string) => {
const { dir, file } = splitDirAndFile(filepath)
return path.posix.join(dir, path.basename(file, path.extname(file)))
}
/**
* Extracts the title from a markdown file's frontmatter or first heading.
*
* @param file - The markdown file to extract the title from.
* @returns The extracted title, or `undefined` if no title is found.
*/
export function extractTitle(file: GrayMatterFile<Input>): string {
const titleFromFrontmatter = file.data?.title || file.data?.titleTemplate
let titleFromMarkdown: string | undefined
if (!titleFromFrontmatter) {
titleFromMarkdown = markdownTitle(file.content)
}
return titleFromFrontmatter || titleFromMarkdown
}
/**
* Creates a regular expression to match a specific template variable in the format `{key}`.
*
* @param key - The name of the template variable to match.
* @returns A case-insensitive regular expression that detects `{key}` occurrences in a string.
*
* @example
* ```ts
* const regex = templateVariable('name');
* console.log(regex.test('Hello {name}')); // true
* ```
*/
const templateVariable = (key: string) =>
new RegExp(`(\\n\\s*\\n)?\\{${key}\\}`, 'gi')
/**
* Replaces occurrences of a template variable `{variable}` in a given content string with a provided value.
* If the value is empty or undefined, it falls back to a specified fallback value.
*
* @param content - The template string containing placeholders.
* @param variable - The template variable name to replace.
* @param value - The value to replace the variable with.
* @param fallback - An optional fallback value if `value` is empty.
* @returns A new string with the template variable replaced.
*
* @example
* ```ts
* const template = 'Hello {name}!';
* const result = replaceTemplateVariable(template, 'name', 'Alice', 'User');
* console.log(result); // 'Hello Alice!'
* ```
*/
export function replaceTemplateVariable(
content: string,
variable: string,
value: string | undefined,
fallback?: string,
) {
return content.replace(templateVariable(variable), (_, prefix) => {
const val = value?.length ? value : fallback?.length ? fallback : ''
return val ? `${prefix ? '\n\n' : ''}${val}` : ''
})
}
/**
* Expands a template string by replacing multiple template variables with their corresponding values.
*
* @param template - The template string containing placeholders.
* @param values - An object mapping variable names to their respective values.
* @returns A string with all template variables replaced.
*
* @example
* ```ts
* const template = 'Hello {name}, welcome to {place}!';
* const values = { name: 'Alice', place: 'Wonderland' };
* const result = expandTemplate(template, values);
* console.log(result); // 'Hello Alice, welcome to Wonderland!'
* ```
*/
export const expandTemplate = (
template: string,
variables: Record<string, string | undefined>,
) => {
return Object.entries(variables).reduce(
(result, [key, value]) => replaceTemplateVariable(result, key, value),
template,
)
}
/**
* Generates a complete link by combining a domain, path, and an optional extension.
*
* @param domain - The base domain of the link (e.g., "https://example.com").
* @param path - The path to append to the domain (e.g., "guide").
* @param extension - An optional extension to append to the path (e.g., ".md").
* @returns The generated link
*/
export const generateLink = (
path: string,
domain?: string,
extension?: LinksExtension,
cleanUrls?: VitePressConfig['cleanUrls'],
) =>
expandTemplate('{domain}/{path}{extension}', {
domain: domain || '',
path,
extension: cleanUrls ? '' : extension,
})
/**
* Options for generating metadata for markdown files.
*/
export interface GenerateMetadataOptions {
/** Optional domain name to prepend to the URL. */
domain?: LlmstxtSettings['domain']
/** Path to the file relative to the content root. */
filePath: string
/** The link extension for generated links. */
linksExtension?: LinksExtension
/** Whether to use clean URLs (without the extension). */
cleanUrls?: VitePressConfig['cleanUrls']
}
/**
* Generates metadata for markdown files to provide additional context for LLMs.
*
* @param sourceFile - Parsed markdown file with frontmatter using gray-matter.
* @param options - Options for generating metadata.
* @returns Object containing metadata properties for the file.
*
* @example
* generateMetadata(preparedFile, { domain: 'https://example.com', filePath: 'docs/guide' })
* // Returns { url: 'https://example.com/docs/guide.md', description: 'A guide' }
*/
export function generateMetadata<GrayMatter extends GrayMatterFile<Input>>(
sourceFile: GrayMatter,
options: GenerateMetadataOptions,
) {
const { domain, filePath, linksExtension, cleanUrls } = options
const frontmatterMetadata: Record<string, string> = {}
frontmatterMetadata.url = generateLink(
stripExtPosix(filePath),
domain,
linksExtension ?? '.md',
cleanUrls,
)
if (sourceFile.data?.description?.length) {
frontmatterMetadata.description = sourceFile.data?.description
}
return frontmatterMetadata
}
/**
* Returns a human-readable string representation of the given string's size in bytes.
*
* This function calculates the byte size of a given string by creating a `Blob`
* and then converts it into a human-readable format using `byte-size`.
*
* @param string - The input string whose size needs to be determined.
* @returns A human-readable size string (e.g., "1.2 KB", "500 B").
*/
export const getHumanReadableSizeOf = (string: string) =>
byteSize(new Blob([string]).size).toString()

@ -0,0 +1,365 @@
import type { Plugin, ViteDevServer } from 'vite'
import fs from 'node:fs/promises'
import path from 'node:path'
import matter, { type Input } from 'gray-matter'
import { minimatch } from 'minimatch'
import pc from 'picocolors'
import { remark } from 'remark'
import remarkFrontmatter from 'remark-frontmatter'
import { remove } from 'unist-util-remove'
import { millify } from 'millify'
import { approximateTokenSize } from 'tokenx'
import { defaultLLMsTxtTemplate } from './constants'
import { generateLLMsFullTxt, generateLLMsTxt } from './helpers/index'
import log from './helpers/logger'
import {
expandTemplate,
extractTitle,
generateMetadata,
getHumanReadableSizeOf,
stripExt,
} from './helpers/utils'
import type {
CustomTemplateVariables,
LlmstxtSettings,
PreparedFile,
VitePressConfig,
} from './types'
const PLUGIN_NAME = 'llmstxt'
/**
* [VitePress](http://vitepress.dev/) plugin for generating raw documentation
* for **LLMs** in Markdown format which is much lighter and more efficient for LLMs
*
* @param [userSettings={}] - Plugin settings.
*
* @see https://github.com/okineadev/vitepress-plugin-llms
* @see https://llmstxt.org/
*/
export default function llmstxt(userSettings: LlmstxtSettings = {}): Plugin {
// Create a settings object with defaults explicitly merged
const settings: Omit<LlmstxtSettings, 'workDir'> & { workDir: string } = {
generateLLMsTxt: true,
generateLLMsFullTxt: true,
generateLLMFriendlyDocsForEachPage: true,
ignoreFiles: [],
workDir: undefined as unknown as string,
stripHTML: true,
...userSettings,
}
// Store the resolved Vite config
let config: VitePressConfig
// Set to store all markdown file paths
const mdFiles: Set<string> = new Set()
// Flag to identify which build we're in
let isSsrBuild = false
return {
name: PLUGIN_NAME,
/** Resolves the Vite configuration and sets up the working directory. */
configResolved(resolvedConfig) {
config = resolvedConfig as VitePressConfig
if (settings.workDir) {
settings.workDir = path.resolve(
config.vitepress.srcDir,
settings.workDir as string,
)
} else {
settings.workDir = config.vitepress.srcDir
}
// Detect if this is the SSR build
isSsrBuild = !!resolvedConfig.build?.ssr
log.info(
`${pc.bold(PLUGIN_NAME)} initialized ${isSsrBuild ? pc.dim('(SSR build)') : pc.dim('(client build)')} with workDir: ${pc.cyan(settings.workDir as string)}`,
)
},
/** Configures the development server to handle `llms.txt` and markdown files for LLMs. */
async configureServer(server: ViteDevServer) {
log.info('Dev server configured for serving plain text docs for LLMs')
server.middlewares.use(async (req, res, next) => {
if (req.url?.endsWith('.md') || req.url?.endsWith('.txt')) {
try {
// Try to read and serve the markdown file
const filePath = path.resolve(
config.vitepress?.outDir ?? 'dist',
`${stripExt(req.url)}.md`,
)
const content = await fs.readFile(filePath, 'utf-8')
res.setHeader('Content-Type', 'text/plain; charset=utf-8')
res.end(content)
return
} catch (e) {
// If file doesn't exist or can't be read, continue to next middleware
log.warn(`Failed to return ${pc.cyan(req.url)}: File not found`)
next()
}
}
// Pass to next middleware if not handled
next()
})
},
/**
* Resets the collection of markdown files when the build starts.
* This ensures we don't include stale data from previous builds.
*/
buildStart() {
mdFiles.clear()
log.info('Build started, file collection cleared')
},
/**
* Processes each file that Vite transforms and collects markdown files.
*
* @param _ - The file content (not used).
* @param id - The file identifier (path).
* @returns null if the file is processed, otherwise returns the original content.
*/
async transform(_, id: string) {
if (!id.endsWith('.md')) {
return null
}
// Skip files outside workDir if it's configured
if (!id.startsWith(settings.workDir as string)) {
return null
}
if (settings.ignoreFiles?.length) {
const shouldIgnore = await Promise.all(
settings.ignoreFiles.map(async (pattern) => {
if (typeof pattern === 'string') {
return await Promise.resolve(
minimatch(
path.relative(settings.workDir as string, id),
pattern,
),
)
}
return false
}),
)
if (shouldIgnore.some((result) => result === true)) {
return null
}
}
// Add markdown file path to our collection
mdFiles.add(id)
// Return null to avoid modifying the file
return null
},
/**
* Runs only in the client build (not SSR) after completion.
* This ensures the processing happens exactly once.
*/
async generateBundle() {
// Skip processing during SSR build
if (isSsrBuild) {
log.info('Skipping LLMs docs generation in SSR build')
return
}
const outDir = config.vitepress?.outDir ?? 'dist'
// Create output directory if it doesn't exist
try {
await fs.access(outDir)
} catch {
log.info(`Creating output directory: ${pc.cyan(outDir)}`)
await fs.mkdir(outDir, { recursive: true })
}
const mdFilesList = Array.from(mdFiles)
const fileCount = mdFilesList.length
// Skip if no files found
if (fileCount === 0) {
log.warn(
`No markdown files found to process. Check your \`${pc.bold('workDir')}\` and \`${pc.bold('ignoreFiles')}\` settings.`,
)
return
}
log.info(
`Processing ${pc.bold(fileCount.toString())} markdown files from ${pc.cyan(settings.workDir)}`,
)
const preparedFiles: PreparedFile[] = await Promise.all(
mdFilesList.map(async (file) => {
const content = await fs.readFile(file, 'utf-8')
let mdFile: matter.GrayMatterFile<Input>
if (settings.stripHTML) {
const cleanedMarkdown = await remark()
.use(remarkFrontmatter)
.use(() => {
// Strip HTML tags
return (tree) => {
remove(tree, { type: 'html' })
return tree
}
})
.process(content)
mdFile = matter(String(cleanedMarkdown))
} else {
mdFile = matter(content)
}
// Extract title from frontmatter or use the first heading
const title = extractTitle(mdFile)?.trim() || 'Untitled'
const filePath =
path.basename(file) === 'index.md' &&
path.dirname(file) !== settings.workDir
? `${path.dirname(file)}.md`
: file
return { path: filePath, title, file: mdFile }
}),
)
if (settings.generateLLMFriendlyDocsForEachPage) {
await Promise.all(
preparedFiles.map(async (file) => {
const relativePath = path.relative(settings.workDir, file.path)
try {
const mdFile = file.file
const targetPath = path.resolve(outDir, relativePath)
// Ensure target directory exists (async version)
await fs.mkdir(path.dirname(targetPath), {
recursive: true,
})
// Copy file to output directory (async version)
await fs.writeFile(
targetPath,
matter.stringify(
mdFile.content,
generateMetadata(mdFile, {
domain: settings.domain,
filePath: relativePath,
}),
),
)
log.success(`Processed ${pc.cyan(relativePath)}`)
} catch (error) {
log.error(
// @ts-ignore
`Failed to process ${pc.cyan(relativePath)}: ${error.message}`,
)
}
}),
)
}
// Sort files by title for better organization
preparedFiles.sort((a, b) => a.title.localeCompare(b.title))
const tasks: Promise<void>[] = []
// Generate llms.txt - table of contents with links
if (settings.generateLLMsTxt) {
const llmsTxtPath = path.resolve(outDir, 'llms.txt')
const templateVariables: CustomTemplateVariables = {
title: settings.title,
description: settings.description,
details: settings.details,
toc: settings.toc,
...settings.customTemplateVariables,
}
tasks.push(
(async () => {
log.info(`Generating ${pc.cyan('llms.txt')}...`)
const llmsTxt = await generateLLMsTxt(preparedFiles, {
indexMd: path.resolve(settings.workDir as string, 'index.md'),
srcDir: settings.workDir as string,
LLMsTxtTemplate:
settings.customLLMsTxtTemplate || defaultLLMsTxtTemplate,
templateVariables,
vitepressConfig: config?.vitepress?.userConfig,
domain: settings.domain,
sidebar: settings.sidebar,
linksExtension: !settings.generateLLMFriendlyDocsForEachPage
? '.html'
: undefined,
cleanUrls: config.cleanUrls,
})
await fs.writeFile(llmsTxtPath, llmsTxt, 'utf-8')
log.success(
expandTemplate(
'Generated {file} (~{tokens} tokens, {size}) with {fileCount} documentation links',
{
file: pc.cyan('llms.txt'),
tokens: pc.bold(millify(approximateTokenSize(llmsTxt))),
size: pc.bold(getHumanReadableSizeOf(llmsTxt)),
fileCount: pc.bold(fileCount.toString()),
},
),
)
})(),
)
}
// Generate llms-full.txt - all content in one file
if (settings.generateLLMsFullTxt) {
const llmsFullTxtPath = path.resolve(outDir, 'llms-full.txt')
tasks.push(
(async () => {
log.info(
`Generating full documentation bundle (${pc.cyan('llms-full.txt')})...`,
)
const llmsFullTxt = generateLLMsFullTxt(preparedFiles, {
srcDir: settings.workDir as string,
domain: settings.domain,
linksExtension: !settings.generateLLMFriendlyDocsForEachPage
? '.html'
: undefined,
cleanUrls: config.cleanUrls,
})
// Write content to llms-full.txt
await fs.writeFile(llmsFullTxtPath, llmsFullTxt, 'utf-8')
log.success(
expandTemplate(
'Generated {file} (~{tokens} tokens, {size}) with {fileCount} markdown files',
{
file: pc.cyan('llms-full.txt'),
tokens: pc.bold(millify(approximateTokenSize(llmsFullTxt))),
size: pc.bold(getHumanReadableSizeOf(llmsFullTxt)),
fileCount: pc.bold(fileCount.toString()),
},
),
)
})(),
)
}
if (tasks.length) {
await Promise.all(tasks)
}
},
}
}

@ -0,0 +1,243 @@
import type { GrayMatterFile, Input } from 'gray-matter'
import type { ResolvedConfig } from 'vite'
import type { DefaultTheme, SiteConfig, UserConfig } from 'vitepress'
interface TemplateVariables {
/**
* The title extracted from the frontmatter or the first h1 heading in the main document (`index.md`).
*
* @example 'Awesome tool'
*/
title?: string
/**
* The description.
*
* @example 'Blazing fast build tool'
*/
description?: string
/**
* The details.
*
* @example 'A multi-user version of the notebook designed for companies, classrooms and research labs'
*/
details?: string
/**
* An automatically generated **T**able **O**f **C**ontents.
*
* @example
* ```markdown
* - [Title](/foo.md): Lorem ipsum dolor sit amet, consectetur adipiscing elit.
* - [Title 2](/bar/baz.md): Cras vel nibh id ipsum pharetra efficitur.
* ```
*/
toc?: string
}
interface CustomTemplateVariables extends TemplateVariables {
/** Any custom variable */
[key: string]: string | undefined
}
export interface LlmstxtSettings extends TemplateVariables {
/**
* The domain that will be appended to the beginning of URLs in `llms.txt` and in the context of other files
*
* Domain attachment is not yet agreed upon (since it depends on the AI whether it can resolve the relative paths that are currently there), but if you want you can add it
*
* **Note**: Domain cannot end with `/`.
*
* Without a {@link LlmstxtSettings.domain | `domain`}:
* ```markdown
* - [Title](/foo/bar.md)
* ```
*
* With a {@link LlmstxtSettings.domain | `domain`}:
* ```markdown
* - [Title](https://example.com/foo/bar.md)
* ```
*
* @example
* ```typescript
* llmstxt({ domain: 'https://example.com' })
* ```
*/
domain?: string
/**
* Indicates whether to generate the `llms.txt` file, which contains a list of sections with corresponding links.
*
* @default true
*/
generateLLMsTxt?: boolean
/**
* Determines whether to generate the `llms-full.txt` which contains all the documentation in one file.
*
* @default true
*/
generateLLMsFullTxt?: boolean
/**
* Determines whether to generate an LLM-friendly version of the documentation for each page on the website.
*
* @default true
*/
generateLLMFriendlyDocsForEachPage?: boolean
/**
* Whether to strip HTML tags from Markdown files
*
* @default true
*/
stripHTML?: boolean
/**
* The directory from which files will be processed.
*
* This is useful for configuring the plugin to generate documentation for LLMs in a specific language.
*
* @example
* ```typescript
* llmstxt({
* // Generate documentation for LLMs from English documentation only
* workDir: 'en'
* })
* ```
*
* @default vitepress.srcDir
*/
workDir?: string
/**
* An array of file path patterns to be ignored during processing.
*
* This is useful for excluding certain files from LLMs, such as those not related to documentation (e.g., sponsors, team, etc.).
*
* @example
* ```typescript
* llmstxt({
* ignoreFiles: [
* 'about/team/*',
* 'sponsor/*'
* // ...
* ]
* })
* ```
*
* @default []
*/
ignoreFiles?: string[]
/**
* A custom template for the `llms.txt` file, allowing for a personalized order of elements.
*
* Available template elements include:
*
* - `{title}`: The title extracted from the frontmatter or the first h1 heading in the main document (`index.md`).
* - `{description}`: The description.
* - `{details}`: The details.
* - `{toc}`: An automatically generated **T**able **O**f **C**ontents.
*
* You can also add custom variables using the {@link LlmstxtSettings.customTemplateVariables | `customTemplateVariables`} parameter
*
* @default
* ```markdown
* # {title}
*
* > {description}
*
* {details}
*
* ## Table of Contents
*
* {toc}
* ```
*/
customLLMsTxtTemplate?: string
/**
* Custom variables for {@link LlmstxtSettings.customLLMsTxtTemplate | `customLLMsTxtTemplate`}.
*
* With this option you can edit or add variables to the template.
*
* You can change the title in `llms.txt` without having to change the template:
*
* @example
* ```typescript
* llmstxt({
* customTemplateVariables: {
* title: 'Very custom title',
* }
* })
* ```
*
* You can also combine this with a custom template:
*
* @example
* ```typescript
* llmstxt({
* customLLMsTxtTemplate: '# {title}\n\n{foo}',
* customTemplateVariables: {
* foo: 'Very custom title',
* }
* })
* ```
*/
customTemplateVariables?: CustomTemplateVariables
/**
* VitePress {@link DefaultTheme.Sidebar | Sidebar}
*
* Here you can insert your {@link DefaultTheme.Sidebar | `sidebar`} if it is not in the VitePress configuration
*
* Usually this parameter is used in rare cases
*/
sidebar?: DefaultTheme.Sidebar
}
/**
* Represents a prepared file, including its title and path.
*/
export type PreparedFile = {
/**
* The title of the file.
*
* @example 'Guide'
*/
title: string
/**
* The absolute path to the file.
*
* @example 'guide/getting-started.md'
*/
path: string
/**
* The prepared file itself.
*
* @example
* ```typescript
* {
* data: {
* title: 'Guide'
* },
* content: 'Content goes here'
* orig: '---\ntitle: Guide\n---\n\nContent goes here'
* }
* ```
*/
file: GrayMatterFile<Input>
}
interface VitePressConfig
extends Omit<UserConfig, keyof ResolvedConfig>,
ResolvedConfig {
vitepress: SiteConfig
}
/** Represents the link extension options for generated links. */
type LinksExtension = string | '.md' | '.html'

@ -14,6 +14,7 @@ import type {
SSGContext,
SiteData
} from './shared'
import type { LlmstxtSettings } from './plugins/llmstxt/types'
export type RawConfigExports<ThemeConfig = any> =
| Awaitable<UserConfig<ThemeConfig>>
@ -153,6 +154,8 @@ export interface UserConfig<ThemeConfig = any>
transformItems?: (items: SitemapItem[]) => Awaitable<SitemapItem[]>
}
llms?: boolean | LlmstxtSettings
/**
* Build end hook: called when SSG finish.
* @param siteConfig The resolved configuration.
@ -209,6 +212,7 @@ export interface SiteConfig<ThemeConfig = any>
| 'transformHtml'
| 'transformPageData'
| 'sitemap'
| 'llms'
> {
root: string
srcDir: string

@ -4,7 +4,8 @@
"baseUrl": ".",
"outDir": "../../dist/node",
"types": ["node"],
"sourceMap": true
"sourceMap": true,
"rootDir": "../.."
},
"include": ["."]
}

2
types/shared.d.ts vendored

@ -1,6 +1,7 @@
// types shared between server and client
import type { UseDarkOptions } from '@vueuse/core'
import type { SSRContext } from 'vue/server-renderer'
import type { LlmstxtSettings } from '../src/node/plugins/llmstxt/types'
export type { DefaultTheme } from './default-theme.js'
export type Awaitable<T> = T | PromiseLike<T>
@ -134,6 +135,7 @@ export interface SiteData<ThemeConfig = any> {
router: {
prefetchLinks: boolean
}
llms?: boolean | LlmstxtSettings
}
export type HeadConfig =

Loading…
Cancel
Save