diff --git a/compiler/parse/state/tag.js b/compiler/parse/state/tag.js index e9775b54d8..15e65d8cea 100644 --- a/compiler/parse/state/tag.js +++ b/compiler/parse/state/tag.js @@ -3,6 +3,7 @@ import readScript from '../read/script.js'; import readStyle from '../read/style.js'; import { readEventHandlerDirective, readBindingDirective } from '../read/directives.js'; import { trimStart, trimEnd } from '../utils/trim.js'; +import { decodeCharacterReferences } from '../utils/html.js'; const validTagName = /^[a-zA-Z]{1,}:?[a-zA-Z0-9\-]*/; const voidElementNames = /^(?:area|base|br|col|command|doctype|embed|hr|img|input|keygen|link|meta|param|source|track|wbr)$/i; @@ -183,8 +184,6 @@ function readAttributeValue ( parser ) { data: '' }; - let escaped = false; - const done = quoteMark ? char => char === quoteMark : char => invalidUnquotedAttributeCharacters.test( char ); @@ -192,56 +191,50 @@ function readAttributeValue ( parser ) { const chunks = []; while ( parser.index < parser.template.length ) { - if ( escaped ) { - currentChunk.data += parser.template[ parser.index++ ]; - } - - else { - const index = parser.index; + const index = parser.index; - if ( parser.eat( '{{' ) ) { + if ( parser.eat( '{{' ) ) { + if ( currentChunk.data ) { currentChunk.end = index; - - if ( currentChunk.data ) { - chunks.push( currentChunk ); - } - - const expression = readExpression( parser ); - parser.allowWhitespace(); - if ( !parser.eat( '}}' ) ) { - parser.error( `Expected }}` ); - } - - chunks.push({ - start: index, - end: parser.index, - type: 'MustacheTag', - expression - }); - - currentChunk = { - start: parser.index, - end: null, - type: 'Text', - data: '' - }; + chunks.push( currentChunk ); } - else if ( parser.eat( '\\' ) ) { - escaped = true; + const expression = readExpression( parser ); + parser.allowWhitespace(); + if ( !parser.eat( '}}' ) ) { + parser.error( `Expected }}` ); } - else if ( done( parser.template[ parser.index ] ) ) { - currentChunk.end = parser.index; - if ( quoteMark ) parser.index += 1; + chunks.push({ + start: index, + end: parser.index, + type: 'MustacheTag', + expression + }); + + currentChunk = { + start: parser.index, + end: null, + type: 'Text', + data: '' + }; + } - if ( currentChunk.data ) chunks.push( currentChunk ); - return chunks; - } + else if ( done( parser.template[ parser.index ] ) ) { + currentChunk.end = parser.index; + if ( quoteMark ) parser.index += 1; - else { - currentChunk.data += parser.template[ parser.index++ ]; - } + if ( currentChunk.data ) chunks.push( currentChunk ); + + chunks.forEach( chunk => { + if ( chunk.type === 'Text' ) chunk.data = decodeCharacterReferences( chunk.data ); + }); + + return chunks; + } + + else { + currentChunk.data += parser.template[ parser.index++ ]; } } diff --git a/compiler/parse/utils/html.js b/compiler/parse/utils/html.js new file mode 100644 index 0000000000..124bf03f4f --- /dev/null +++ b/compiler/parse/utils/html.js @@ -0,0 +1,80 @@ +// borrowed from Ractive – https://github.com/ractivejs/ractive/blob/faf93cbd33c3fedfd8d6dd48340990540bde4a94/src/utils/html.js +const htmlEntities = { quot: 34, amp: 38, apos: 39, lt: 60, gt: 62, nbsp: 160, iexcl: 161, cent: 162, pound: 163, curren: 164, yen: 165, brvbar: 166, sect: 167, uml: 168, copy: 169, ordf: 170, laquo: 171, not: 172, shy: 173, reg: 174, macr: 175, deg: 176, plusmn: 177, sup2: 178, sup3: 179, acute: 180, micro: 181, para: 182, middot: 183, cedil: 184, sup1: 185, ordm: 186, raquo: 187, frac14: 188, frac12: 189, frac34: 190, iquest: 191, Agrave: 192, Aacute: 193, Acirc: 194, Atilde: 195, Auml: 196, Aring: 197, AElig: 198, Ccedil: 199, Egrave: 200, Eacute: 201, Ecirc: 202, Euml: 203, Igrave: 204, Iacute: 205, Icirc: 206, Iuml: 207, ETH: 208, Ntilde: 209, Ograve: 210, Oacute: 211, Ocirc: 212, Otilde: 213, Ouml: 214, times: 215, Oslash: 216, Ugrave: 217, Uacute: 218, Ucirc: 219, Uuml: 220, Yacute: 221, THORN: 222, szlig: 223, agrave: 224, aacute: 225, acirc: 226, atilde: 227, auml: 228, aring: 229, aelig: 230, ccedil: 231, egrave: 232, eacute: 233, ecirc: 234, euml: 235, igrave: 236, iacute: 237, icirc: 238, iuml: 239, eth: 240, ntilde: 241, ograve: 242, oacute: 243, ocirc: 244, otilde: 245, ouml: 246, divide: 247, oslash: 248, ugrave: 249, uacute: 250, ucirc: 251, uuml: 252, yacute: 253, thorn: 254, yuml: 255, OElig: 338, oelig: 339, Scaron: 352, scaron: 353, Yuml: 376, fnof: 402, circ: 710, tilde: 732, Alpha: 913, Beta: 914, Gamma: 915, Delta: 916, Epsilon: 917, Zeta: 918, Eta: 919, Theta: 920, Iota: 921, Kappa: 922, Lambda: 923, Mu: 924, Nu: 925, Xi: 926, Omicron: 927, Pi: 928, Rho: 929, Sigma: 931, Tau: 932, Upsilon: 933, Phi: 934, Chi: 935, Psi: 936, Omega: 937, alpha: 945, beta: 946, gamma: 947, delta: 948, epsilon: 949, zeta: 950, eta: 951, theta: 952, iota: 953, kappa: 954, lambda: 955, mu: 956, nu: 957, xi: 958, omicron: 959, pi: 960, rho: 961, sigmaf: 962, sigma: 963, tau: 964, upsilon: 965, phi: 966, chi: 967, psi: 968, omega: 969, thetasym: 977, upsih: 978, piv: 982, ensp: 8194, emsp: 8195, thinsp: 8201, zwnj: 8204, zwj: 8205, lrm: 8206, rlm: 8207, ndash: 8211, mdash: 8212, lsquo: 8216, rsquo: 8217, sbquo: 8218, ldquo: 8220, rdquo: 8221, bdquo: 8222, dagger: 8224, Dagger: 8225, bull: 8226, hellip: 8230, permil: 8240, prime: 8242, Prime: 8243, lsaquo: 8249, rsaquo: 8250, oline: 8254, frasl: 8260, euro: 8364, image: 8465, weierp: 8472, real: 8476, trade: 8482, alefsym: 8501, larr: 8592, uarr: 8593, rarr: 8594, darr: 8595, harr: 8596, crarr: 8629, lArr: 8656, uArr: 8657, rArr: 8658, dArr: 8659, hArr: 8660, forall: 8704, part: 8706, exist: 8707, empty: 8709, nabla: 8711, isin: 8712, notin: 8713, ni: 8715, prod: 8719, sum: 8721, minus: 8722, lowast: 8727, radic: 8730, prop: 8733, infin: 8734, ang: 8736, and: 8743, or: 8744, cap: 8745, cup: 8746, int: 8747, there4: 8756, sim: 8764, cong: 8773, asymp: 8776, ne: 8800, equiv: 8801, le: 8804, ge: 8805, sub: 8834, sup: 8835, nsub: 8836, sube: 8838, supe: 8839, oplus: 8853, otimes: 8855, perp: 8869, sdot: 8901, lceil: 8968, rceil: 8969, lfloor: 8970, rfloor: 8971, lang: 9001, rang: 9002, loz: 9674, spades: 9824, clubs: 9827, hearts: 9829, diams: 9830 }; +const controlCharacters = [ 8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 352, 8249, 338, 141, 381, 143, 144, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 353, 8250, 339, 157, 382, 376 ]; +const entityPattern = new RegExp( '&(#?(?:x[\\w\\d]+|\\d+|' + Object.keys( htmlEntities ).join( '|' ) + '));?', 'g' ); + +export function decodeCharacterReferences ( html ) { + return html.replace( entityPattern, ( match, entity ) => { + let code; + + // Handle named entities + if ( entity[0] !== '#' ) { + code = htmlEntities[ entity ]; + } else if ( entity[1] === 'x' ) { + code = parseInt( entity.substring( 2 ), 16 ); + } else { + code = parseInt( entity.substring( 1 ), 10 ); + } + + if ( !code ) { + return match; + } + + return String.fromCodePoint( validateCode( code ) ); + }); +} +const invalid = 65533; + +// some code points are verboten. If we were inserting HTML, the browser would replace the illegal +// code points with alternatives in some cases - since we're bypassing that mechanism, we need +// to replace them ourselves +// +// Source: http://en.wikipedia.org/wiki/Character_encodings_in_HTML#Illegal_characters +function validateCode ( code ) { + if ( !code ) { + return invalid; + } + + // line feed becomes generic whitespace + if ( code === 10 ) { + return 32; + } + + // ASCII range. (Why someone would use HTML entities for ASCII characters I don't know, but...) + if ( code < 128 ) { + return code; + } + + // code points 128-159 are dealt with leniently by browsers, but they're incorrect. We need + // to correct the mistake or we'll end up with missing € signs and so on + if ( code <= 159 ) { + return controlCharacters[ code - 128 ]; + } + + // basic multilingual plane + if ( code < 55296 ) { + return code; + } + + // UTF-16 surrogate halves + if ( code <= 57343 ) { + return invalid; + } + + // rest of the basic multilingual plane + if ( code <= 65535 ) { + return code; + } + + // supplementary multilingual plane 0x10000 - 0x1ffff + if ( code >= 65536 && code <= 131071 ) { + return code; + } + + // supplementary ideographic plane 0x20000 - 0x2ffff + if ( code >= 131072 && code <= 196607 ) { + return code; + } + + return invalid; +} diff --git a/test/parser/attribute-escaped/input.html b/test/parser/attribute-escaped/input.html new file mode 100644 index 0000000000..82186dcee4 --- /dev/null +++ b/test/parser/attribute-escaped/input.html @@ -0,0 +1 @@ +
diff --git a/test/parser/attribute-escaped/output.json b/test/parser/attribute-escaped/output.json new file mode 100644 index 0000000000..4dcc1f561b --- /dev/null +++ b/test/parser/attribute-escaped/output.json @@ -0,0 +1,34 @@ +{ + "html": { + "start": 0, + "end": 41, + "type": "Fragment", + "children": [ + { + "start": 0, + "end": 41, + "type": "Element", + "name": "div", + "attributes": [ + { + "start": 5, + "end": 34, + "type": "Attribute", + "name": "data-foo", + "value": [ + { + "start": 15, + "end": 33, + "type": "Text", + "data": "\"quoted\"" + } + ] + } + ], + "children": [] + } + ] + }, + "css": null, + "js": null +}