perf: optimize parser hot paths for ~18% faster compilation

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
pull/17811/head
Mathias Picker 14 hours ago
parent fa4f1c45f4
commit 020a683642

@ -0,0 +1,5 @@
---
'svelte': patch
---
perf: optimize parser hot paths for faster compilation

@ -4,7 +4,6 @@
// @ts-expect-error acorn type definitions are borked in the release we use
import { isIdentifierStart, isIdentifierChar } from 'acorn';
import fragment from './state/fragment.js';
import { regex_whitespace } from '../patterns.js';
import * as e from '../../errors.js';
import { create_fragment } from './utils/create.js';
import read_options from './read/options.js';
@ -14,6 +13,25 @@ import * as state from '../../state.js';
const regex_position_indicator = / \(\d+:\d+\)$/;
/** @param {number} cc */
function is_whitespace(cc) {
// fast path for common whitespace
if (cc === 32 || (cc <= 13 && cc >= 9)) return true;
// rare whitespace — \u00a0, \u1680, \u2000-\u200a, \u2028, \u2029, \u202f, \u205f, \u3000, \ufeff
if (cc < 160) return false;
return (
cc === 160 ||
cc === 5760 ||
(cc >= 8192 && cc <= 8202) ||
cc === 8232 ||
cc === 8233 ||
cc === 8239 ||
cc === 8287 ||
cc === 12288 ||
cc === 65279
);
}
const regex_lang_attribute =
/<!--[^]*?-->|<script\s+(?:[^>]*|(?:[^=>'"/]+=(?:"[^"]*"|'[^']*'|[^>\s]+)\s+)*)lang=(["'])?([^"' >]+)\1[^>]*>/g;
@ -191,22 +209,26 @@ export class Parser {
return this.template[this.index] === str;
}
return this.template.slice(this.index, this.index + length) === str;
return this.template.startsWith(str, this.index);
}
/**
* Match a regex at the current index
* @param {RegExp} pattern Should have a ^ anchor at the start so the regex doesn't search past the beginning, resulting in worse performance
* @param {RegExp} pattern Should have the sticky (`y`) flag so that it only matches at the current index
*/
match_regex(pattern) {
const match = pattern.exec(this.template.slice(this.index));
if (!match || match.index !== 0) return null;
pattern.lastIndex = this.index;
const match = pattern.exec(this.template);
if (!match || match.index !== this.index) return null;
return match[0];
}
allow_whitespace() {
while (this.index < this.template.length && regex_whitespace.test(this.template[this.index])) {
while (
this.index < this.template.length &&
is_whitespace(this.template.charCodeAt(this.index))
) {
this.index++;
}
}
@ -282,7 +304,7 @@ export class Parser {
}
require_whitespace() {
if (!regex_whitespace.test(this.template[this.index])) {
if (!is_whitespace(this.template.charCodeAt(this.index))) {
e.expected_whitespace(this.index);
}

@ -9,7 +9,7 @@ import { is_text_attribute } from '../../../utils/ast.js';
import { locator } from '../../../state.js';
const regex_closing_script_tag = /<\/script\s*>/;
const regex_starts_with_closing_script_tag = /^<\/script\s*>/;
const regex_starts_with_closing_script_tag = /<\/script\s*>/y;
const RESERVED_ATTRIBUTES = ['server', 'client', 'worker', 'test', 'default'];
const ALLOWED_ATTRIBUTES = ['context', 'generics', 'lang', 'module'];

@ -2,17 +2,17 @@
/** @import { Parser } from '../index.js' */
import * as e from '../../../errors.js';
const REGEX_MATCHER = /^[~^$*|]?=/;
const REGEX_MATCHER = /[~^$*|]?=/y;
const REGEX_CLOSING_BRACKET = /[\s\]]/;
const REGEX_ATTRIBUTE_FLAGS = /^[a-zA-Z]+/; // only `i` and `s` are valid today, but make it future-proof
const REGEX_COMBINATOR = /^(\+|~|>|\|\|)/;
const REGEX_PERCENTAGE = /^\d+(\.\d+)?%/;
const REGEX_ATTRIBUTE_FLAGS = /[a-zA-Z]+/y; // only `i` and `s` are valid today, but make it future-proof
const REGEX_COMBINATOR = /(\+|~|>|\|\|)/y;
const REGEX_PERCENTAGE = /\d+(\.\d+)?%/y;
const REGEX_NTH_OF =
/^(even|odd|\+?(\d+|\d*n(\s*[+-]\s*\d+)?)|-\d*n(\s*\+\s*\d+))((?=\s*[,)])|\s+of\s+)/;
/(even|odd|\+?(\d+|\d*n(\s*[+-]\s*\d+)?)|-\d*n(\s*\+\s*\d+))((?=\s*[,)])|\s+of\s+)/y;
const REGEX_WHITESPACE_OR_COLON = /[\s:]/;
const REGEX_LEADING_HYPHEN_OR_DIGIT = /-?\d/;
const REGEX_LEADING_HYPHEN_OR_DIGIT = /-?\d/y;
const REGEX_VALID_IDENTIFIER_CHAR = /[a-zA-Z0-9_-]/;
const REGEX_UNICODE_SEQUENCE = /^\\[0-9a-fA-F]{1,6}(\r\n|\s)?/;
const REGEX_UNICODE_SEQUENCE = /\\[0-9a-fA-F]{1,6}(\r\n|\s)?/y;
const REGEX_COMMENT_CLOSE = /\*\//;
const REGEX_HTML_COMMENT_CLOSE = /-->/;
@ -28,7 +28,7 @@ export default function read_style(parser, start, attributes) {
const content_end = parser.index;
parser.eat('</style', true);
parser.read(/^\s*>/);
parser.read(/\s*>/y);
return {
type: 'StyleSheet',

@ -17,19 +17,22 @@ import { list } from '../../../utils/string.js';
import { locator } from '../../../state.js';
import * as b from '#compiler/builders';
const regex_invalid_unquoted_attribute_value = /^(\/>|[\s"'=<>`])/;
const regex_closing_textarea_tag = /^<\/textarea(\s[^>]*)?>/i;
const regex_invalid_unquoted_attribute_value = /(\/>|[\s"'=<>`])/y;
const regex_closing_textarea_tag = /<\/textarea(\s[^>]*)?>/iy;
const regex_closing_comment = /-->/;
const regex_whitespace_or_slash_or_closing_tag = /(\s|\/|>)/;
const regex_token_ending_character = /[\s=/>"']/;
const regex_starts_with_quote_characters = /^["']/;
const regex_attribute_value = /^(?:"([^"]*)"|'([^'])*'|([^>\s]+))/;
const regex_starts_with_quote_characters = /["']/y;
const regex_attribute_value = /(?:"([^"]*)"|'([^'])*'|([^>\s]+))/y;
const regex_doctype_name = /^![a-zA-Z]+$/;
const regex_namespaced_name = /^[a-zA-Z][a-zA-Z0-9]*:[a-zA-Z][a-zA-Z0-9-]*[a-zA-Z0-9]$/;
/** @param {string} name */
function is_valid_element_name(name) {
// DOCTYPE (e.g. !DOCTYPE)
if (/^![a-zA-Z]+$/.test(name)) return true;
if (regex_doctype_name.test(name)) return true;
// svelte:* meta tags (e.g. svelte:element, svelte:head)
if (/^[a-zA-Z][a-zA-Z0-9]*:[a-zA-Z][a-zA-Z0-9-]*[a-zA-Z0-9]$/.test(name)) return true;
if (regex_namespaced_name.test(name)) return true;
// standard HTML/SVG/MathML elements and custom elements
return REGEX_VALID_TAG_NAME.test(name);
}
@ -399,7 +402,10 @@ export default function element(parser) {
// special case
element.fragment.nodes = read_sequence(
parser,
() => regex_closing_textarea_tag.test(parser.template.slice(parser.index)),
() => {
regex_closing_textarea_tag.lastIndex = parser.index;
return regex_closing_textarea_tag.test(parser.template);
},
'inside <textarea>'
);
parser.read(regex_closing_textarea_tag);
@ -407,7 +413,13 @@ export default function element(parser) {
} else if (tag.name === 'script' || tag.name === 'style') {
// special case
const start = parser.index;
const data = parser.read_until(new RegExp(`</${tag.name}>`));
const close_tag = `</${tag.name}>`;
const close_index = parser.template.indexOf(close_tag, parser.index);
const data = parser.template.slice(
parser.index,
close_index === -1 ? parser.template.length : close_index
);
parser.index = close_index === -1 ? parser.template.length : close_index;
const end = parser.index;
/** @type {AST.Text} */
@ -849,7 +861,8 @@ function read_sequence(parser, done, location) {
/** @param {number} end */
function flush(end) {
if (current_chunk.raw) {
if (end > current_chunk.start) {
current_chunk.raw = parser.template.slice(current_chunk.start, end);
current_chunk.data = decode_character_references(current_chunk.raw, true);
current_chunk.end = end;
chunks.push(current_chunk);
@ -903,7 +916,7 @@ function read_sequence(parser, done, location) {
data: ''
};
} else {
current_chunk.raw += parser.template[parser.index++];
parser.index++;
}
}

@ -10,7 +10,7 @@ import read_expression, { get_loose_identifier } from '../read/expression.js';
import { create_fragment } from '../utils/create.js';
import { match_bracket } from '../utils/bracket.js';
const regex_whitespace_with_closing_curly_brace = /^\s*}/;
const regex_whitespace_with_closing_curly_brace = /\s*}/y;
const pointy_bois = { '<': '>' };

@ -6,12 +6,12 @@ import { decode_character_references } from '../utils/html.js';
export default function text(parser) {
const start = parser.index;
let data = '';
while (parser.index < parser.template.length && !parser.match('<') && !parser.match('{')) {
data += parser.template[parser.index++];
parser.index++;
}
const data = parser.template.slice(start, parser.index);
/** @type {AST.Text} */
parser.append({
type: 'Text',

@ -141,13 +141,15 @@ const default_brackets = {
'[': ']'
};
const default_close = new Set(Object.values(default_brackets));
/**
* @param {Parser} parser
* @param {number} start
* @param {Record<string, string>} brackets
*/
export function match_bracket(parser, start, brackets = default_brackets) {
const close = Object.values(brackets);
const close = brackets === default_brackets ? default_close : new Set(Object.values(brackets));
const bracket_stack = [];
let i = start;
@ -162,7 +164,7 @@ export function match_bracket(parser, start, brackets = default_brackets) {
if (char in brackets) {
bracket_stack.push(char);
} else if (close.includes(char)) {
} else if (close.has(char)) {
const popped = /** @type {string} */ (bracket_stack.pop());
const expected = /** @type {string} */ (brackets[popped]);

Loading…
Cancel
Save