From a71b8b99585bd636dd0e3ea250c9a092afb0533d Mon Sep 17 00:00:00 2001 From: Simon H <5968653+dummdidumm@users.noreply.github.com> Date: Wed, 22 Feb 2023 16:18:36 +0100 Subject: [PATCH] chore: improve parser performance (#8303) - fast path for attribute quote marks common case - all regexes exclusively passed into read or match_regex which are only successful if matched at the beginning are altered so that the regex has this condition built in, preventing it from searching past the start index --------- Co-authored-by: Yuichiro Yamashita --- src/compiler/parse/index.ts | 8 ++++++++ src/compiler/parse/read/script.ts | 3 ++- src/compiler/parse/read/style.ts | 5 +++-- src/compiler/parse/state/mustache.ts | 2 +- src/compiler/parse/state/tag.ts | 19 ++++++++++--------- 5 files changed, 24 insertions(+), 13 deletions(-) diff --git a/src/compiler/parse/index.ts b/src/compiler/parse/index.ts index 8de6563c8b..86c27b33cf 100644 --- a/src/compiler/parse/index.ts +++ b/src/compiler/parse/index.ts @@ -132,6 +132,10 @@ export class Parser { return this.template.slice(this.index, this.index + str.length) === str; } + /** + * Match a regex at the current index + * @param pattern Should have a ^ anchor at the start so the regex doesn't search past the beginning, resulting in worse performance + */ match_regex(pattern: RegExp) { const match = pattern.exec(this.template.slice(this.index)); if (!match || match.index !== 0) return null; @@ -148,6 +152,10 @@ export class Parser { } } + /** + * Search for a regex starting at the current index and return the result if it matches + * @param pattern Should have a ^ anchor at the start so the regex doesn't search past the beginning, resulting in worse performance + */ read(pattern: RegExp) { const result = this.match_regex(pattern); if (result) this.index += result.length; diff --git a/src/compiler/parse/read/script.ts b/src/compiler/parse/read/script.ts index 02506ab3d5..43974c26ab 100644 --- a/src/compiler/parse/read/script.ts +++ b/src/compiler/parse/read/script.ts @@ -6,6 +6,7 @@ import parser_errors from '../errors'; import { regex_not_newline_characters } from '../../utils/patterns'; const regex_closing_script_tag = /<\/script\s*>/; +const regex_starts_with_closing_script_tag = /^<\/script\s*>/; function get_context(parser: Parser, attributes: any[], start: number): string { const context = attributes.find(attribute => attribute.name === 'context'); @@ -32,7 +33,7 @@ export default function read_script(parser: Parser, start: number, attributes: N } const source = parser.template.slice(0, script_start).replace(regex_not_newline_characters, ' ') + data; - parser.read(regex_closing_script_tag); + parser.read(regex_starts_with_closing_script_tag); let ast: Program; diff --git a/src/compiler/parse/read/style.ts b/src/compiler/parse/read/style.ts index 560d377cec..3ed510c3d0 100644 --- a/src/compiler/parse/read/style.ts +++ b/src/compiler/parse/read/style.ts @@ -7,6 +7,7 @@ import { Style } from '../../interfaces'; import parser_errors from '../errors'; const regex_closing_style_tag = /<\/style\s*>/; +const regex_starts_with_closing_style_tag = /^<\/style\s*>/; export default function read_style(parser: Parser, start: number, attributes: Node[]): Style { const content_start = parser.index; @@ -21,7 +22,7 @@ export default function read_style(parser: Parser, start: number, attributes: No // discard styles when css is disabled if (parser.css_mode === 'none') { - parser.read(regex_closing_style_tag); + parser.read(regex_starts_with_closing_style_tag); return null; } @@ -76,7 +77,7 @@ export default function read_style(parser: Parser, start: number, attributes: No } }); - parser.read(regex_closing_style_tag); + parser.read(regex_starts_with_closing_style_tag); const end = parser.index; diff --git a/src/compiler/parse/state/mustache.ts b/src/compiler/parse/state/mustache.ts index c3b58afe2a..328a043677 100644 --- a/src/compiler/parse/state/mustache.ts +++ b/src/compiler/parse/state/mustache.ts @@ -33,7 +33,7 @@ function trim_whitespace(block: TemplateNode, trim_before: boolean, trim_after: } } -const regex_whitespace_with_closing_curly_brace = /\s*}/; +const regex_whitespace_with_closing_curly_brace = /^\s*}/; export default function mustache(parser: Parser) { const start = parser.index; diff --git a/src/compiler/parse/state/tag.ts b/src/compiler/parse/state/tag.ts index 9d1bf5e5c2..efcff645a7 100644 --- a/src/compiler/parse/state/tag.ts +++ b/src/compiler/parse/state/tag.ts @@ -12,6 +12,9 @@ import { closing_tag_omitted, decode_character_references } from '../utils/html' // eslint-disable-next-line no-useless-escape const valid_tag_name = /^\!?[a-zA-Z]{1,}:?[a-zA-Z0-9\-]*/; +/** Invalid attribute characters if the attribute is not surrounded by quotes */ +const regex_starts_with_invalid_attr_value = /^(\/>|[\s"'=<>`])/; + const meta_tags = new Map([ ['svelte:head', 'Head'], ['svelte:options', 'Options'], @@ -293,7 +296,7 @@ function read_tag_name(parser: Parser) { // eslint-disable-next-line no-useless-escape const regex_token_ending_character = /[\s=\/>"']/; -const regex_quote_characters = /["']/; +const regex_starts_with_quote_characters = /^["']/; function read_attribute(parser: Parser, unique_names: Set) { const start = parser.index; @@ -368,7 +371,7 @@ function read_attribute(parser: Parser, unique_names: Set) { parser.allow_whitespace(); value = read_attribute_value(parser); end = parser.index; - } else if (parser.match_regex(regex_quote_characters)) { + } else if (parser.match_regex(regex_starts_with_quote_characters)) { parser.error(parser_errors.unexpected_token('='), parser.index); } @@ -475,15 +478,13 @@ function read_attribute_value(parser: Parser) { }]; } - const regex = ( - quote_mark === "'" ? /'/ : - quote_mark === '"' ? /"/ : - /(\/>|[\s"'=<>`])/ - ); - let value; try { - value = read_sequence(parser, () => !!parser.match_regex(regex), 'in attribute value'); + value = read_sequence(parser, () => { + // handle common case of quote marks existing outside of regex for performance reasons + if (quote_mark) return parser.match(quote_mark); + return !!parser.match_regex(regex_starts_with_invalid_attr_value); + }, 'in attribute value'); } catch (error) { if (error.code === 'parse-error') { // if the attribute value didn't close + self-closing tag