perf: optimize parser hot paths for ~18% faster compilation

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
14 hours ago · 020a683642
parent fa4f1c45f4
commit 020a683642
8 changed files with 74 additions and 32 deletions
--- a/.changeset/fast-parser-hotpaths.md
+++ b/.changeset/fast-parser-hotpaths.md
@ -0,0 +1,5 @@
+---
+'svelte': patch
+---
+
+perf: optimize parser hot paths for faster compilation
--- a/packages/svelte/src/compiler/phases/1-parse/index.js
+++ b/packages/svelte/src/compiler/phases/1-parse/index.js
@ -4,7 +4,6 @@
 // @ts-expect-error acorn type definitions are borked in the release we use
 import { isIdentifierStart, isIdentifierChar } from 'acorn';
 import fragment from './state/fragment.js';
-import { regex_whitespace } from '../patterns.js';
 import * as e from '../../errors.js';
 import { create_fragment } from './utils/create.js';
 import read_options from './read/options.js';
@ -14,6 +13,25 @@ import * as state from '../../state.js';

 const regex_position_indicator = / \(\d+:\d+\)$/;

+/** @param {number} cc */
+function is_whitespace(cc) {
+	// fast path for common whitespace
+	if (cc === 32 || (cc <= 13 && cc >= 9)) return true;
+	// rare whitespace — \u00a0, \u1680, \u2000-\u200a, \u2028, \u2029, \u202f, \u205f, \u3000, \ufeff
+	if (cc < 160) return false;
+	return (
+		cc === 160 ||
+		cc === 5760 ||
+		(cc >= 8192 && cc <= 8202) ||
+		cc === 8232 ||
+		cc === 8233 ||
+		cc === 8239 ||
+		cc === 8287 ||
+		cc === 12288 ||
+		cc === 65279
+	);
+}
+
 const regex_lang_attribute =
 	/<!--[^]*?-->|<script\s+(?:[^>]*|(?:[^=>'"/]+=(?:"[^"]*"|'[^']*'|[^>\s]+)\s+)*)lang=(["'])?([^"' >]+)\1[^>]*>/g;

@ -191,22 +209,26 @@ export class Parser {
 			return this.template[this.index] === str;
 		}

-		return this.template.slice(this.index, this.index + length) === str;
+		return this.template.startsWith(str, this.index);
 	}

 	/**
 	 * Match a regex at the current index
-	 * @param {RegExp} pattern  Should have a ^ anchor at the start so the regex doesn't search past the beginning, resulting in worse performance
+	 * @param {RegExp} pattern  Should have the sticky (`y`) flag so that it only matches at the current index
 	 */
 	match_regex(pattern) {
-		const match = pattern.exec(this.template.slice(this.index));
-		if (!match || match.index !== 0) return null;
+		pattern.lastIndex = this.index;
+		const match = pattern.exec(this.template);
+		if (!match || match.index !== this.index) return null;

 		return match[0];
 	}

 	allow_whitespace() {
-		while (this.index < this.template.length && regex_whitespace.test(this.template[this.index])) {
+		while (
+			this.index < this.template.length &&
+			is_whitespace(this.template.charCodeAt(this.index))
+		) {
 			this.index++;
 		}
 	}
@ -282,7 +304,7 @@ export class Parser {
 	}

 	require_whitespace() {
-		if (!regex_whitespace.test(this.template[this.index])) {
+		if (!is_whitespace(this.template.charCodeAt(this.index))) {
 			e.expected_whitespace(this.index);
 		}

--- a/packages/svelte/src/compiler/phases/1-parse/read/script.js
+++ b/packages/svelte/src/compiler/phases/1-parse/read/script.js
@ -9,7 +9,7 @@ import { is_text_attribute } from '../../../utils/ast.js';
 import { locator } from '../../../state.js';

 const regex_closing_script_tag = /<\/script\s*>/;
-const regex_starts_with_closing_script_tag = /^<\/script\s*>/;
+const regex_starts_with_closing_script_tag = /<\/script\s*>/y;

 const RESERVED_ATTRIBUTES = ['server', 'client', 'worker', 'test', 'default'];
 const ALLOWED_ATTRIBUTES = ['context', 'generics', 'lang', 'module'];
--- a/packages/svelte/src/compiler/phases/1-parse/read/style.js
+++ b/packages/svelte/src/compiler/phases/1-parse/read/style.js
@ -2,17 +2,17 @@
 /** @import { Parser } from '../index.js' */
 import * as e from '../../../errors.js';

-const REGEX_MATCHER = /^[~^$*|]?=/;
+const REGEX_MATCHER = /[~^$*|]?=/y;
 const REGEX_CLOSING_BRACKET = /[\s\]]/;
-const REGEX_ATTRIBUTE_FLAGS = /^[a-zA-Z]+/; // only `i` and `s` are valid today, but make it future-proof
-const REGEX_COMBINATOR = /^(\+|~|>|\|\|)/;
-const REGEX_PERCENTAGE = /^\d+(\.\d+)?%/;
+const REGEX_ATTRIBUTE_FLAGS = /[a-zA-Z]+/y; // only `i` and `s` are valid today, but make it future-proof
+const REGEX_COMBINATOR = /(\+|~|>|\|\|)/y;
+const REGEX_PERCENTAGE = /\d+(\.\d+)?%/y;
 const REGEX_NTH_OF =
-	/^(even|odd|\+?(\d+|\d*n(\s*[+-]\s*\d+)?)|-\d*n(\s*\+\s*\d+))((?=\s*[,)])|\s+of\s+)/;
+	/(even|odd|\+?(\d+|\d*n(\s*[+-]\s*\d+)?)|-\d*n(\s*\+\s*\d+))((?=\s*[,)])|\s+of\s+)/y;
 const REGEX_WHITESPACE_OR_COLON = /[\s:]/;
-const REGEX_LEADING_HYPHEN_OR_DIGIT = /-?\d/;
+const REGEX_LEADING_HYPHEN_OR_DIGIT = /-?\d/y;
 const REGEX_VALID_IDENTIFIER_CHAR = /[a-zA-Z0-9_-]/;
-const REGEX_UNICODE_SEQUENCE = /^\\[0-9a-fA-F]{1,6}(\r\n|\s)?/;
+const REGEX_UNICODE_SEQUENCE = /\\[0-9a-fA-F]{1,6}(\r\n|\s)?/y;
 const REGEX_COMMENT_CLOSE = /\*\//;
 const REGEX_HTML_COMMENT_CLOSE = /-->/;

@ -28,7 +28,7 @@ export default function read_style(parser, start, attributes) {
 	const content_end = parser.index;

 	parser.eat('</style', true);
-	parser.read(/^\s*>/);
+	parser.read(/\s*>/y);

 	return {
 		type: 'StyleSheet',
--- a/packages/svelte/src/compiler/phases/1-parse/state/element.js
+++ b/packages/svelte/src/compiler/phases/1-parse/state/element.js
@ -17,19 +17,22 @@ import { list } from '../../../utils/string.js';
 import { locator } from '../../../state.js';
 import * as b from '#compiler/builders';

-const regex_invalid_unquoted_attribute_value = /^(\/>|[\s"'=<>`])/;
-const regex_closing_textarea_tag = /^<\/textarea(\s[^>]*)?>/i;
+const regex_invalid_unquoted_attribute_value = /(\/>|[\s"'=<>`])/y;
+const regex_closing_textarea_tag = /<\/textarea(\s[^>]*)?>/iy;
 const regex_closing_comment = /-->/;
 const regex_whitespace_or_slash_or_closing_tag = /(\s|\/|>)/;
 const regex_token_ending_character = /[\s=/>"']/;
-const regex_starts_with_quote_characters = /^["']/;
-const regex_attribute_value = /^(?:"([^"]*)"|'([^'])*'|([^>\s]+))/;
+const regex_starts_with_quote_characters = /["']/y;
+const regex_attribute_value = /(?:"([^"]*)"|'([^'])*'|([^>\s]+))/y;
+const regex_doctype_name = /^![a-zA-Z]+$/;
+const regex_namespaced_name = /^[a-zA-Z][a-zA-Z0-9]*:[a-zA-Z][a-zA-Z0-9-]*[a-zA-Z0-9]$/;
+
 /** @param {string} name */
 function is_valid_element_name(name) {
 	// DOCTYPE (e.g. !DOCTYPE)
-	if (/^![a-zA-Z]+$/.test(name)) return true;
+	if (regex_doctype_name.test(name)) return true;
 	// svelte:* meta tags (e.g. svelte:element, svelte:head)
-	if (/^[a-zA-Z][a-zA-Z0-9]*:[a-zA-Z][a-zA-Z0-9-]*[a-zA-Z0-9]$/.test(name)) return true;
+	if (regex_namespaced_name.test(name)) return true;
 	// standard HTML/SVG/MathML elements and custom elements
 	return REGEX_VALID_TAG_NAME.test(name);
 }
@ -399,7 +402,10 @@ export default function element(parser) {
 		// special case
 		element.fragment.nodes = read_sequence(
 			parser,
-			() => regex_closing_textarea_tag.test(parser.template.slice(parser.index)),
+			() => {
+				regex_closing_textarea_tag.lastIndex = parser.index;
+				return regex_closing_textarea_tag.test(parser.template);
+			},
 			'inside <textarea>'
 		);
 		parser.read(regex_closing_textarea_tag);
@ -407,7 +413,13 @@ export default function element(parser) {
 	} else if (tag.name === 'script' || tag.name === 'style') {
 		// special case
 		const start = parser.index;
-		const data = parser.read_until(new RegExp(`</${tag.name}>`));
+		const close_tag = `</${tag.name}>`;
+		const close_index = parser.template.indexOf(close_tag, parser.index);
+		const data = parser.template.slice(
+			parser.index,
+			close_index === -1 ? parser.template.length : close_index
+		);
+		parser.index = close_index === -1 ? parser.template.length : close_index;
 		const end = parser.index;

 		/** @type {AST.Text} */
@ -849,7 +861,8 @@ function read_sequence(parser, done, location) {

 	/** @param {number} end */
 	function flush(end) {
-		if (current_chunk.raw) {
+		if (end > current_chunk.start) {
+			current_chunk.raw = parser.template.slice(current_chunk.start, end);
 			current_chunk.data = decode_character_references(current_chunk.raw, true);
 			current_chunk.end = end;
 			chunks.push(current_chunk);
@ -903,7 +916,7 @@ function read_sequence(parser, done, location) {
 				data: ''
 			};
 		} else {
-			current_chunk.raw += parser.template[parser.index++];
+			parser.index++;
 		}
 	}

--- a/packages/svelte/src/compiler/phases/1-parse/state/tag.js
+++ b/packages/svelte/src/compiler/phases/1-parse/state/tag.js
@ -10,7 +10,7 @@ import read_expression, { get_loose_identifier } from '../read/expression.js';
 import { create_fragment } from '../utils/create.js';
 import { match_bracket } from '../utils/bracket.js';

-const regex_whitespace_with_closing_curly_brace = /^\s*}/;
+const regex_whitespace_with_closing_curly_brace = /\s*}/y;

 const pointy_bois = { '<': '>' };

--- a/packages/svelte/src/compiler/phases/1-parse/state/text.js
+++ b/packages/svelte/src/compiler/phases/1-parse/state/text.js
@ -6,12 +6,12 @@ import { decode_character_references } from '../utils/html.js';
 export default function text(parser) {
 	const start = parser.index;

-	let data = '';
-
 	while (parser.index < parser.template.length && !parser.match('<') && !parser.match('{')) {
-		data += parser.template[parser.index++];
+		parser.index++;
 	}

+	const data = parser.template.slice(start, parser.index);
+
 	/** @type {AST.Text} */
 	parser.append({
 		type: 'Text',
--- a/packages/svelte/src/compiler/phases/1-parse/utils/bracket.js
+++ b/packages/svelte/src/compiler/phases/1-parse/utils/bracket.js
@ -141,13 +141,15 @@ const default_brackets = {
 	'[': ']'
 };

+const default_close = new Set(Object.values(default_brackets));
+
 /**
 * @param {Parser} parser
 * @param {number} start
 * @param {Record<string, string>} brackets
 */
 export function match_bracket(parser, start, brackets = default_brackets) {
-	const close = Object.values(brackets);
+	const close = brackets === default_brackets ? default_close : new Set(Object.values(brackets));
 	const bracket_stack = [];

 	let i = start;
@ -162,7 +164,7 @@ export function match_bracket(parser, start, brackets = default_brackets) {

 		if (char in brackets) {
 			bracket_stack.push(char);
-		} else if (close.includes(char)) {
+		} else if (close.has(char)) {
 			const popped = /** @type {string} */ (bracket_stack.pop());
 			const expected = /** @type {string} */ (brackets[popped]);