fix entities in attributes

pull/31/head
Rich-Harris 8 years ago
parent 4ce0f2fd58
commit 0d4728faf2

@ -3,6 +3,7 @@ import readScript from '../read/script.js';
import readStyle from '../read/style.js';
import { readEventHandlerDirective, readBindingDirective } from '../read/directives.js';
import { trimStart, trimEnd } from '../utils/trim.js';
import { decodeCharacterReferences } from '../utils/html.js';
const validTagName = /^[a-zA-Z]{1,}:?[a-zA-Z0-9\-]*/;
const voidElementNames = /^(?:area|base|br|col|command|doctype|embed|hr|img|input|keygen|link|meta|param|source|track|wbr)$/i;
@ -183,8 +184,6 @@ function readAttributeValue ( parser ) {
data: ''
};
let escaped = false;
const done = quoteMark ?
char => char === quoteMark :
char => invalidUnquotedAttributeCharacters.test( char );
@ -192,56 +191,50 @@ function readAttributeValue ( parser ) {
const chunks = [];
while ( parser.index < parser.template.length ) {
if ( escaped ) {
currentChunk.data += parser.template[ parser.index++ ];
}
else {
const index = parser.index;
const index = parser.index;
if ( parser.eat( '{{' ) ) {
if ( parser.eat( '{{' ) ) {
if ( currentChunk.data ) {
currentChunk.end = index;
if ( currentChunk.data ) {
chunks.push( currentChunk );
}
const expression = readExpression( parser );
parser.allowWhitespace();
if ( !parser.eat( '}}' ) ) {
parser.error( `Expected }}` );
}
chunks.push({
start: index,
end: parser.index,
type: 'MustacheTag',
expression
});
currentChunk = {
start: parser.index,
end: null,
type: 'Text',
data: ''
};
chunks.push( currentChunk );
}
else if ( parser.eat( '\\' ) ) {
escaped = true;
const expression = readExpression( parser );
parser.allowWhitespace();
if ( !parser.eat( '}}' ) ) {
parser.error( `Expected }}` );
}
else if ( done( parser.template[ parser.index ] ) ) {
currentChunk.end = parser.index;
if ( quoteMark ) parser.index += 1;
chunks.push({
start: index,
end: parser.index,
type: 'MustacheTag',
expression
});
currentChunk = {
start: parser.index,
end: null,
type: 'Text',
data: ''
};
}
if ( currentChunk.data ) chunks.push( currentChunk );
return chunks;
}
else if ( done( parser.template[ parser.index ] ) ) {
currentChunk.end = parser.index;
if ( quoteMark ) parser.index += 1;
else {
currentChunk.data += parser.template[ parser.index++ ];
}
if ( currentChunk.data ) chunks.push( currentChunk );
chunks.forEach( chunk => {
if ( chunk.type === 'Text' ) chunk.data = decodeCharacterReferences( chunk.data );
});
return chunks;
}
else {
currentChunk.data += parser.template[ parser.index++ ];
}
}

@ -0,0 +1,80 @@
// borrowed from Ractive https://github.com/ractivejs/ractive/blob/faf93cbd33c3fedfd8d6dd48340990540bde4a94/src/utils/html.js
const htmlEntities = { quot: 34, amp: 38, apos: 39, lt: 60, gt: 62, nbsp: 160, iexcl: 161, cent: 162, pound: 163, curren: 164, yen: 165, brvbar: 166, sect: 167, uml: 168, copy: 169, ordf: 170, laquo: 171, not: 172, shy: 173, reg: 174, macr: 175, deg: 176, plusmn: 177, sup2: 178, sup3: 179, acute: 180, micro: 181, para: 182, middot: 183, cedil: 184, sup1: 185, ordm: 186, raquo: 187, frac14: 188, frac12: 189, frac34: 190, iquest: 191, Agrave: 192, Aacute: 193, Acirc: 194, Atilde: 195, Auml: 196, Aring: 197, AElig: 198, Ccedil: 199, Egrave: 200, Eacute: 201, Ecirc: 202, Euml: 203, Igrave: 204, Iacute: 205, Icirc: 206, Iuml: 207, ETH: 208, Ntilde: 209, Ograve: 210, Oacute: 211, Ocirc: 212, Otilde: 213, Ouml: 214, times: 215, Oslash: 216, Ugrave: 217, Uacute: 218, Ucirc: 219, Uuml: 220, Yacute: 221, THORN: 222, szlig: 223, agrave: 224, aacute: 225, acirc: 226, atilde: 227, auml: 228, aring: 229, aelig: 230, ccedil: 231, egrave: 232, eacute: 233, ecirc: 234, euml: 235, igrave: 236, iacute: 237, icirc: 238, iuml: 239, eth: 240, ntilde: 241, ograve: 242, oacute: 243, ocirc: 244, otilde: 245, ouml: 246, divide: 247, oslash: 248, ugrave: 249, uacute: 250, ucirc: 251, uuml: 252, yacute: 253, thorn: 254, yuml: 255, OElig: 338, oelig: 339, Scaron: 352, scaron: 353, Yuml: 376, fnof: 402, circ: 710, tilde: 732, Alpha: 913, Beta: 914, Gamma: 915, Delta: 916, Epsilon: 917, Zeta: 918, Eta: 919, Theta: 920, Iota: 921, Kappa: 922, Lambda: 923, Mu: 924, Nu: 925, Xi: 926, Omicron: 927, Pi: 928, Rho: 929, Sigma: 931, Tau: 932, Upsilon: 933, Phi: 934, Chi: 935, Psi: 936, Omega: 937, alpha: 945, beta: 946, gamma: 947, delta: 948, epsilon: 949, zeta: 950, eta: 951, theta: 952, iota: 953, kappa: 954, lambda: 955, mu: 956, nu: 957, xi: 958, omicron: 959, pi: 960, rho: 961, sigmaf: 962, sigma: 963, tau: 964, upsilon: 965, phi: 966, chi: 967, psi: 968, omega: 969, thetasym: 977, upsih: 978, piv: 982, ensp: 8194, emsp: 8195, thinsp: 8201, zwnj: 8204, zwj: 8205, lrm: 8206, rlm: 8207, ndash: 8211, mdash: 8212, lsquo: 8216, rsquo: 8217, sbquo: 8218, ldquo: 8220, rdquo: 8221, bdquo: 8222, dagger: 8224, Dagger: 8225, bull: 8226, hellip: 8230, permil: 8240, prime: 8242, Prime: 8243, lsaquo: 8249, rsaquo: 8250, oline: 8254, frasl: 8260, euro: 8364, image: 8465, weierp: 8472, real: 8476, trade: 8482, alefsym: 8501, larr: 8592, uarr: 8593, rarr: 8594, darr: 8595, harr: 8596, crarr: 8629, lArr: 8656, uArr: 8657, rArr: 8658, dArr: 8659, hArr: 8660, forall: 8704, part: 8706, exist: 8707, empty: 8709, nabla: 8711, isin: 8712, notin: 8713, ni: 8715, prod: 8719, sum: 8721, minus: 8722, lowast: 8727, radic: 8730, prop: 8733, infin: 8734, ang: 8736, and: 8743, or: 8744, cap: 8745, cup: 8746, int: 8747, there4: 8756, sim: 8764, cong: 8773, asymp: 8776, ne: 8800, equiv: 8801, le: 8804, ge: 8805, sub: 8834, sup: 8835, nsub: 8836, sube: 8838, supe: 8839, oplus: 8853, otimes: 8855, perp: 8869, sdot: 8901, lceil: 8968, rceil: 8969, lfloor: 8970, rfloor: 8971, lang: 9001, rang: 9002, loz: 9674, spades: 9824, clubs: 9827, hearts: 9829, diams: 9830 };
const controlCharacters = [ 8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 352, 8249, 338, 141, 381, 143, 144, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 353, 8250, 339, 157, 382, 376 ];
const entityPattern = new RegExp( '&(#?(?:x[\\w\\d]+|\\d+|' + Object.keys( htmlEntities ).join( '|' ) + '));?', 'g' );
export function decodeCharacterReferences ( html ) {
return html.replace( entityPattern, ( match, entity ) => {
let code;
// Handle named entities
if ( entity[0] !== '#' ) {
code = htmlEntities[ entity ];
} else if ( entity[1] === 'x' ) {
code = parseInt( entity.substring( 2 ), 16 );
} else {
code = parseInt( entity.substring( 1 ), 10 );
}
if ( !code ) {
return match;
}
return String.fromCodePoint( validateCode( code ) );
});
}
const invalid = 65533;
// some code points are verboten. If we were inserting HTML, the browser would replace the illegal
// code points with alternatives in some cases - since we're bypassing that mechanism, we need
// to replace them ourselves
//
// Source: http://en.wikipedia.org/wiki/Character_encodings_in_HTML#Illegal_characters
function validateCode ( code ) {
if ( !code ) {
return invalid;
}
// line feed becomes generic whitespace
if ( code === 10 ) {
return 32;
}
// ASCII range. (Why someone would use HTML entities for ASCII characters I don't know, but...)
if ( code < 128 ) {
return code;
}
// code points 128-159 are dealt with leniently by browsers, but they're incorrect. We need
// to correct the mistake or we'll end up with missing € signs and so on
if ( code <= 159 ) {
return controlCharacters[ code - 128 ];
}
// basic multilingual plane
if ( code < 55296 ) {
return code;
}
// UTF-16 surrogate halves
if ( code <= 57343 ) {
return invalid;
}
// rest of the basic multilingual plane
if ( code <= 65535 ) {
return code;
}
// supplementary multilingual plane 0x10000 - 0x1ffff
if ( code >= 65536 && code <= 131071 ) {
return code;
}
// supplementary ideographic plane 0x20000 - 0x2ffff
if ( code >= 131072 && code <= 196607 ) {
return code;
}
return invalid;
}

@ -0,0 +1 @@
<div data-foo='&quot;quoted&quot;'></div>

@ -0,0 +1,34 @@
{
"html": {
"start": 0,
"end": 41,
"type": "Fragment",
"children": [
{
"start": 0,
"end": 41,
"type": "Element",
"name": "div",
"attributes": [
{
"start": 5,
"end": 34,
"type": "Attribute",
"name": "data-foo",
"value": [
{
"start": 15,
"end": 33,
"type": "Text",
"data": "\"quoted\""
}
]
}
],
"children": []
}
]
},
"css": null,
"js": null
}
Loading…
Cancel
Save