fix: protect special characters in math from markdown table parsers

Wiki.js uses markdown-it-attrs which interprets curly braces inside
inline math ($...$) as attribute directives, stripping them from the
formula. Additionally, markdown table parsers split cells at both `|`
and `&` characters, breaking formulas containing those symbols.

This fix replaces `{`, `}`, `|`, and `&` inside math expressions with
Unicode Private Use Area placeholders during markdown parsing, then
restores them before passing to KaTeX/MathJax for rendering.

- `<E000>` / `<E001>`: temporary replacements for `{` / `}`
- `<E002>`: temporary replacement for `|` (table cell delimiter)
- `<E003>`: temporary replacement for `&` (table cell delimiter in
  multiline tables, used by LaTeX cases/arrays)

The placeholder approach was chosen over HTML escaping because it
preserves LaTeX environments like `\begin{array}` that were broken
by the previous `{{}}` escaping method.

Fixes #1581
Fixes #1462

Co-authored-by: Claude <noreply@anthropic.com>
AI-model: kimi-for-coding/k2p6
pull/8000/head
Hugo Gu 3 weeks ago
parent 6f042e97cc
commit b41c9cd4e9

@ -1,3 +1,22 @@
// Unicode Private Use Area characters to temporarily replace special
// characters during markdown parsing:
// - braces: prevent markdown-it-attrs from interpreting them as attribute
// delimiters.
// - pipe: prevent markdown table parser from interpreting them as cell
// delimiters.
const BRACE_OPEN_PLACEHOLDER = '\uE000'
const BRACE_CLOSE_PLACEHOLDER = '\uE001'
const PIPE_PLACEHOLDER = '\uE002'
const AMPERSAND_PLACEHOLDER = '\uE003'
export function restoreBraces (str) {
return str
.replaceAll(BRACE_OPEN_PLACEHOLDER, '{')
.replaceAll(BRACE_CLOSE_PLACEHOLDER, '}')
.replaceAll(PIPE_PLACEHOLDER, '|')
.replaceAll(AMPERSAND_PLACEHOLDER, '&')
}
// Test if potential opening or closing delimieter
// Assumes that there is a "$" at state.src[pos]
function isValidDelim (state, pos) {
@ -27,6 +46,8 @@ function isValidDelim (state, pos) {
}
export default {
restoreBraces,
katexInline (state, silent) {
let start, match, token, res, pos
@ -84,11 +105,13 @@ export default {
token.content = state.src
// Extract the math part without the $
.slice(start, match)
// Escape the curly braces since they will be interpreted as
// attributes by markdown-it-attrs (the "curly_attributes"
// core rule)
.replaceAll("{", "{{")
.replaceAll("}", "}}")
// Replace curly braces with temporary placeholders to prevent
// markdown-it-attrs from interpreting them as attribute delimiters.
.replaceAll('{', BRACE_OPEN_PLACEHOLDER)
.replaceAll('}', BRACE_CLOSE_PLACEHOLDER)
// Replace pipe with temporary placeholder to prevent markdown
// table parser from interpreting it as a cell delimiter.
.replaceAll('|', PIPE_PLACEHOLDER)
}
state.pos = match + 1
@ -133,15 +156,22 @@ export default {
}
}
state.line = next + 1
token = state.push('katex_block', 'math', 0)
token.block = true
token.content = (firstLine && firstLine.trim() ? firstLine + '\n' : '') +
state.getLines(start + 1, next, state.tShift[start], true) +
(lastLine && lastLine.trim() ? lastLine : '')
token.map = [ start, state.line ]
token.markup = '$$'
return true
}
state.line = next + 1
token = state.push('katex_block', 'math', 0)
token.block = true
token.content = ((firstLine && firstLine.trim() ? firstLine + '\n' : '') +
state.getLines(start + 1, next, state.tShift[start], true) +
(lastLine && lastLine.trim() ? lastLine : ''))
// Replace curly braces with temporary placeholders to prevent
// markdown-it-attrs from interpreting them as attribute delimiters.
.replaceAll('{', BRACE_OPEN_PLACEHOLDER)
.replaceAll('}', BRACE_CLOSE_PLACEHOLDER)
// Replace pipe with temporary placeholder to prevent markdown
// table parser from interpreting it as a cell delimiter.
.replaceAll('|', PIPE_PLACEHOLDER)
token.map = [ start, state.line ]
token.markup = '$$'
return true
}
}

@ -296,6 +296,53 @@ DOMPurify.addHook('uponSanitizeElement', (elm) => {
// HELPER FUNCTIONS
// ========================================
// Unicode Private Use Area characters to temporarily replace special
// characters inside math expressions:
// - pipe (|): prevent markdown table parser from interpreting them as cell
// delimiters.
// - ampersand (&): prevent markdown-it-multimd-table from interpreting them
// as cell delimiters in multiline tables.
const PIPE_PLACEHOLDER = '\uE002'
const AMPERSAND_PLACEHOLDER = '\uE003'
/**
* Replace pipe and ampersand characters inside inline ($...$) and block
* ($$...$$) math expressions with placeholders to prevent markdown table
* parsers from splitting formulas containing | (e.g., |x|) or &
* (e.g., \begin{cases} ... & ... \\ ... \end{cases}).
*/
function protectMathPipes (text) {
let result = ''
let i = 0
while (i < text.length) {
// Check for block math ($$...$$)
if (text.slice(i, i + 2) === '$$') {
const end = text.indexOf('$$', i + 2)
if (end !== -1) {
result += text.slice(i, end + 2)
.replace(/\|/g, PIPE_PLACEHOLDER)
.replace(/&/g, AMPERSAND_PLACEHOLDER)
i = end + 2
continue
}
}
// Check for inline math ($...$)
if (text[i] === '$' && text[i + 1] !== '$') {
const end = text.indexOf('$', i + 1)
if (end !== -1) {
result += text.slice(i, end + 1)
.replace(/\|/g, PIPE_PLACEHOLDER)
.replace(/&/g, AMPERSAND_PLACEHOLDER)
i = end + 1
continue
}
}
result += text[i]
i++
}
return result
}
// Inject line numbers for preview scroll sync
let linesMap = []
function injectLineNumbers (tokens, idx, options, env, slf) {
@ -328,7 +375,7 @@ const macros = {}
md.inline.ruler.after('escape', 'katex_inline', katexHelper.katexInline)
md.renderer.rules.katex_inline = (tokens, idx) => {
try {
return katex.renderToString(tokens[idx].content, {
return katex.renderToString(katexHelper.restoreBraces(tokens[idx].content), {
displayMode: false, macros
})
} catch (err) {
@ -341,7 +388,7 @@ md.block.ruler.after('blockquote', 'katex_block', katexHelper.katexBlock, {
})
md.renderer.rules.katex_block = (tokens, idx) => {
try {
return `<p>` + katex.renderToString(tokens[idx].content, {
return `<p>` + katex.renderToString(katexHelper.restoreBraces(tokens[idx].content), {
displayMode: true, macros
}) + `</p>`
} catch (err) {
@ -453,7 +500,9 @@ export default {
linesMap = []
// this.$store.set('editor/content', newContent)
this.processMarkers(this.cm.firstLine(), this.cm.lastLine())
this.previewHTML = DOMPurify.sanitize(md.render(newContent), {
// Protect pipe characters inside math expressions before markdown parsing
const protectedContent = protectMathPipes(newContent)
this.previewHTML = DOMPurify.sanitize(md.render(protectedContent), {
ADD_TAGS: ['foreignObject'],
HTML_INTEGRATION_POINTS: { foreignobject: true }
})

@ -19,6 +19,53 @@ const quoteStyles = {
Swedish: '””’’'
}
// Unicode Private Use Area characters to temporarily replace special
// characters inside math expressions:
// - pipe (|): prevent markdown table parser from interpreting them as cell
// delimiters.
// - ampersand (&): prevent markdown table parser from interpreting them
// as cell delimiters in multiline tables.
const PIPE_PLACEHOLDER = '\uE002'
const AMPERSAND_PLACEHOLDER = '\uE003'
/**
* Replace pipe and ampersand characters inside inline ($...$) and block
* ($$...$$) math expressions with placeholders to prevent markdown table
* parsers from splitting formulas containing | (e.g., |x|) or &
* (e.g., \begin{cases} ... & ... \\ ... \end{cases}).
*/
function protectMathPipes (text) {
let result = ''
let i = 0
while (i < text.length) {
// Check for block math ($$...$$)
if (text.slice(i, i + 2) === '$$') {
const end = text.indexOf('$$', i + 2)
if (end !== -1) {
result += text.slice(i, end + 2)
.replace(/\|/g, PIPE_PLACEHOLDER)
.replace(/&/g, AMPERSAND_PLACEHOLDER)
i = end + 2
continue
}
}
// Check for inline math ($...$)
if (text[i] === '$' && text[i + 1] !== '$') {
const end = text.indexOf('$', i + 1)
if (end !== -1) {
result += text.slice(i, end + 1)
.replace(/\|/g, PIPE_PLACEHOLDER)
.replace(/&/g, AMPERSAND_PLACEHOLDER)
i = end + 1
continue
}
}
result += text[i]
i++
}
return result
}
module.exports = {
async render() {
const mkdown = md({
@ -50,6 +97,8 @@ module.exports = {
await renderer.init(mkdown, child.config)
}
return mkdown.render(this.input)
// Protect pipe characters inside math expressions before markdown parsing
const protectedInput = protectMathPipes(this.input)
return mkdown.render(protectedInput)
}
}

@ -3,6 +3,25 @@ const chemParse = require('./mhchem')
/* global WIKI */
// Unicode Private Use Area characters to temporarily replace special
// characters during markdown parsing:
// - braces: prevent markdown-it-attrs from interpreting them as attribute
// delimiters.
// - pipe: prevent markdown table parser from interpreting them as cell
// delimiters.
const BRACE_OPEN_PLACEHOLDER = '\uE000'
const BRACE_CLOSE_PLACEHOLDER = '\uE001'
const PIPE_PLACEHOLDER = '\uE002'
const AMPERSAND_PLACEHOLDER = '\uE003'
function restoreBraces (str) {
return str
.replaceAll(BRACE_OPEN_PLACEHOLDER, '{')
.replaceAll(BRACE_CLOSE_PLACEHOLDER, '}')
.replaceAll(PIPE_PLACEHOLDER, '|')
.replaceAll(AMPERSAND_PLACEHOLDER, '&')
}
// ------------------------------------
// Markdown - KaTeX Renderer
// ------------------------------------
@ -29,7 +48,7 @@ module.exports = {
mdinst.inline.ruler.after('escape', 'katex_inline', katexInline)
mdinst.renderer.rules.katex_inline = (tokens, idx) => {
try {
return katex.renderToString(tokens[idx].content, {
return katex.renderToString(restoreBraces(tokens[idx].content), {
displayMode: false, macros
})
} catch (err) {
@ -44,7 +63,7 @@ module.exports = {
})
mdinst.renderer.rules.katex_block = (tokens, idx) => {
try {
return `<p>` + katex.renderToString(tokens[idx].content, {
return `<p>` + katex.renderToString(restoreBraces(tokens[idx].content), {
displayMode: true, macros
}) + `</p>`
} catch (err) {
@ -135,11 +154,19 @@ function katexInline (state, silent) {
return true
}
if (!silent) {
token = state.push('katex_inline', 'math', 0)
token.markup = '$'
token.content = state.src.slice(start, match)
}
if (!silent) {
token = state.push('katex_inline', 'math', 0)
token.markup = '$'
token.content = state.src
.slice(start, match)
// Replace curly braces with temporary placeholders to prevent
// markdown-it-attrs from interpreting them as attribute delimiters.
.replaceAll('{', BRACE_OPEN_PLACEHOLDER)
.replaceAll('}', BRACE_CLOSE_PLACEHOLDER)
// Replace pipe with temporary placeholder to prevent markdown
// table parser from interpreting it as a cell delimiter.
.replaceAll('|', PIPE_PLACEHOLDER)
}
state.pos = match + 1
return true
@ -187,9 +214,16 @@ function katexBlock (state, start, end, silent) {
token = state.push('katex_block', 'math', 0)
token.block = true
token.content = (firstLine && firstLine.trim() ? firstLine + '\n' : '') +
token.content = ((firstLine && firstLine.trim() ? firstLine + '\n' : '') +
state.getLines(start + 1, next, state.tShift[start], true) +
(lastLine && lastLine.trim() ? lastLine : '')
(lastLine && lastLine.trim() ? lastLine : ''))
// Replace curly braces with temporary placeholders to prevent
// markdown-it-attrs from interpreting them as attribute delimiters.
.replaceAll('{', BRACE_OPEN_PLACEHOLDER)
.replaceAll('}', BRACE_CLOSE_PLACEHOLDER)
// Replace pipe with temporary placeholder to prevent markdown
// table parser from interpreting it as a cell delimiter.
.replaceAll('|', PIPE_PLACEHOLDER)
token.map = [ start, state.line ]
token.markup = '$$'
return true

@ -2,6 +2,25 @@ const mjax = require('mathjax')
/* global WIKI */
// Unicode Private Use Area characters to temporarily replace special
// characters during markdown parsing:
// - braces: prevent markdown-it-attrs from interpreting them as attribute
// delimiters.
// - pipe: prevent markdown table parser from interpreting them as cell
// delimiters.
const BRACE_OPEN_PLACEHOLDER = '\uE000'
const BRACE_CLOSE_PLACEHOLDER = '\uE001'
const PIPE_PLACEHOLDER = '\uE002'
const AMPERSAND_PLACEHOLDER = '\uE003'
function restoreBraces (str) {
return str
.replaceAll(BRACE_OPEN_PLACEHOLDER, '{')
.replaceAll(BRACE_CLOSE_PLACEHOLDER, '}')
.replaceAll(PIPE_PLACEHOLDER, '|')
.replaceAll(AMPERSAND_PLACEHOLDER, '&')
}
// ------------------------------------
// Markdown - MathJax Renderer
// ------------------------------------
@ -38,7 +57,7 @@ module.exports = {
mdinst.inline.ruler.after('escape', 'mathjax_inline', mathjaxInline)
mdinst.renderer.rules.mathjax_inline = (tokens, idx) => {
try {
const result = MathJax.tex2svg(tokens[idx].content, {
const result = MathJax.tex2svg(restoreBraces(tokens[idx].content), {
display: false
})
return MathJax.startup.adaptor.innerHTML(result)
@ -54,7 +73,7 @@ module.exports = {
})
mdinst.renderer.rules.mathjax_block = (tokens, idx) => {
try {
const result = MathJax.tex2svg(tokens[idx].content, {
const result = MathJax.tex2svg(restoreBraces(tokens[idx].content), {
display: true
})
return `<p>` + MathJax.startup.adaptor.innerHTML(result) + `</p>`
@ -149,7 +168,15 @@ function mathjaxInline (state, silent) {
if (!silent) {
token = state.push('mathjax_inline', 'math', 0)
token.markup = '$'
token.content = state.src.slice(start, match)
token.content = state.src
.slice(start, match)
// Replace curly braces with temporary placeholders to prevent
// markdown-it-attrs from interpreting them as attribute delimiters.
.replaceAll('{', BRACE_OPEN_PLACEHOLDER)
.replaceAll('}', BRACE_CLOSE_PLACEHOLDER)
// Replace pipe with temporary placeholder to prevent markdown
// table parser from interpreting it as a cell delimiter.
.replaceAll('|', PIPE_PLACEHOLDER)
}
state.pos = match + 1
@ -198,9 +225,16 @@ function mathjaxBlock (state, start, end, silent) {
token = state.push('mathjax_block', 'math', 0)
token.block = true
token.content = (firstLine && firstLine.trim() ? firstLine + '\n' : '') +
token.content = ((firstLine && firstLine.trim() ? firstLine + '\n' : '') +
state.getLines(start + 1, next, state.tShift[start], true) +
(lastLine && lastLine.trim() ? lastLine : '')
(lastLine && lastLine.trim() ? lastLine : ''))
// Replace curly braces with temporary placeholders to prevent
// markdown-it-attrs from interpreting them as attribute delimiters.
.replaceAll('{', BRACE_OPEN_PLACEHOLDER)
.replaceAll('}', BRACE_CLOSE_PLACEHOLDER)
// Replace pipe with temporary placeholder to prevent markdown
// table parser from interpreting it as a cell delimiter.
.replaceAll('|', PIPE_PLACEHOLDER)
token.map = [ start, state.line ]
token.markup = '$$'
return true

Loading…
Cancel
Save