From 1feab5c05ff31980f965fea5898c382ed1551019 Mon Sep 17 00:00:00 2001 From: userquin Date: Sat, 28 Oct 2023 15:50:48 +0200 Subject: [PATCH] feat(node): add duplicated ids and bad anchor links detection --- package.json | 1 + pnpm-lock.yaml | 7 ++ src/node/build/build.ts | 2 + src/node/build/checkAnchorRefs.ts | 113 ++++++++++++++++++++++++++++++ 4 files changed, 123 insertions(+) create mode 100644 src/node/build/checkAnchorRefs.ts diff --git a/package.json b/package.json index 2bda2e8a..f4b6d365 100644 --- a/package.json +++ b/package.json @@ -192,6 +192,7 @@ "sitemap": "^7.1.1", "supports-color": "^9.4.0", "typescript": "^5.2.2", + "ultrahtml": "^1.5.2", "vitest": "^0.34.6", "vue-tsc": "^1.8.19", "wait-on": "^7.0.1" diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 0013817c..603d65f5 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -279,6 +279,9 @@ importers: typescript: specifier: ^5.2.2 version: 5.2.2 + ultrahtml: + specifier: ^1.5.2 + version: 1.5.2 vitest: specifier: ^0.34.6 version: 0.34.6(supports-color@9.4.0) @@ -4623,6 +4626,10 @@ packages: dev: true optional: true + /ultrahtml@1.5.2: + resolution: {integrity: sha512-qh4mBffhlkiXwDAOxvSGxhL0QEQsTbnP9BozOK3OYPEGvPvdWzvAUaXNtUSMdNsKDtuyjEbyVUPFZ52SSLhLqw==} + dev: true + /unbox-primitive@1.0.2: resolution: {integrity: sha512-61pPlCD9h51VoreyJ0BReideM3MDKMKnh6+V9L08331ipq6Q8OFXZYiqP6n/tbHx4s5I9uRhcye6BrbkizkBDw==} dependencies: diff --git a/src/node/build/build.ts b/src/node/build/build.ts index b5c50978..ecf7c816 100644 --- a/src/node/build/build.ts +++ b/src/node/build/build.ts @@ -13,6 +13,7 @@ import { task } from '../utils/task' import { bundle } from './bundle' import { generateSitemap } from './generateSitemap' import { renderPage } from './render' +import { checkIdsAndAnchorHrefs } from './checkAnchorRefs' export async function build( root?: string, @@ -137,6 +138,7 @@ export async function build( if (!process.env.DEBUG) await rimraf(siteConfig.tempDir) } + await checkIdsAndAnchorHrefs(siteConfig) await generateSitemap(siteConfig) await siteConfig.buildEnd?.(siteConfig) diff --git a/src/node/build/checkAnchorRefs.ts b/src/node/build/checkAnchorRefs.ts new file mode 100644 index 00000000..ad4ca487 --- /dev/null +++ b/src/node/build/checkAnchorRefs.ts @@ -0,0 +1,113 @@ +import type { SiteConfig } from '../config' +import fg from 'fast-glob' +import { task } from '../utils/task' +import fs from 'fs-extra' +import { parse, walkSync, ELEMENT_NODE } from 'ultrahtml' +import { dirname, join, resolve } from 'path' + +export async function checkIdsAndAnchorHrefs(siteConfig: SiteConfig) { + await task('checking for duplicate ids and bad anchor hrefs', async () => { + for await (const error of collectErrors(siteConfig)) { + // TODO: use picocolors here + console.error(error) + } + }) +} + +/* exporting this function for testing purposes */ +export async function* collectErrors(siteConfig: SiteConfig) { + const outDir = siteConfig.outDir + const files = new Set( + siteConfig.pages.map((page) => + `${siteConfig.rewrites.map[page] || page}` + .replace(/\\/g, '/') + .replace(/\.md$/, '.html') + ) + ) + // add public html files to the list: i.e. VP docs has public/pure.html + for await (const entry of fg.stream('*.html', { + cwd: outDir, + deep: 1 + })) { + files.add(entry.toString().replace(/\\/g, '/')) + } + const checkHtmlExt = siteConfig.site.cleanUrls === false + const stream = fg.stream('**/*.html', { + cwd: siteConfig.outDir + }) + for await (const entry of stream) { + const localLinks = new Set() + const localIds = new Set() + const localErrors: string[] = [] + const content = parse( + await fs.promises.readFile(resolve(outDir, entry.toString()), 'utf8') + ) + // collect id headings and href anchors + walkSync(content, (node) => { + if (node.type === ELEMENT_NODE) { + const id = node.attributes.id + if (id) { + if (localIds.has(id)) localErrors.push(`duplicate id="${id}"`) + else localIds.add(id) + } + if (node.name.toLowerCase() === 'a') { + const href = node.attributes.href + if ( + !href || + href.startsWith('http://') || + href.startsWith('https://') + ) + return + localLinks.add(href) + } + } + }) + // check for local hrefs and external links + for (const href of localLinks) { + // 1) check for local heading ids + if (href[0] === '#') { + const id = href.slice(1) + if (!localIds.has(id)) + localErrors.push(`missing local id for "${href}"`) + + continue + } + // 2) check for external links + // Remove parameters and hash + let localLink = href.split(/[#?]/).shift() + if (!localLink) continue + + // Append .html + if (checkHtmlExt) { + if (!localLink.endsWith('/')) { + localLink += 'index.html' + } + if (!localLink.endsWith('.html')) { + localErrors.push(`bad href link "${href}"`) + continue + } + } else { + if (localLink === '/') localLink = '/index.html' + if (!localLink.endsWith('.html')) localLink += '.html' + } + // Get absolute link + if (localLink.startsWith('.')) { + localLink = + '/' + join(dirname(entry.toString()), localLink).replace(/\\/g, '/') + } + if (!localLink.startsWith('/')) { + localErrors.push(`bad href link "${href}"`) + continue + } + localLink = localLink.slice(1) + if (!localLink) localLink = 'index.html' + + // Check if target html page exists + if (!files.has(localLink)) { + localErrors.push(`bad href link "${href}" (missing file)`) + } + } + if (localErrors.length) + yield `\n${entry}\n${localErrors.map((e) => `\t${e}`).join('\n')}` + } +}