You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
568 lines
14 KiB
568 lines
14 KiB
var Tokenizer = require('./tokenizer'); |
|
var TAB = 9; |
|
var N = 10; |
|
var F = 12; |
|
var R = 13; |
|
var SPACE = 32; |
|
var EXCLAMATIONMARK = 33; // ! |
|
var NUMBERSIGN = 35; // # |
|
var AMPERSAND = 38; // & |
|
var APOSTROPHE = 39; // ' |
|
var LEFTPARENTHESIS = 40; // ( |
|
var RIGHTPARENTHESIS = 41; // ) |
|
var ASTERISK = 42; // * |
|
var PLUSSIGN = 43; // + |
|
var COMMA = 44; // , |
|
var HYPERMINUS = 45; // - |
|
var LESSTHANSIGN = 60; // < |
|
var GREATERTHANSIGN = 62; // > |
|
var QUESTIONMARK = 63; // ? |
|
var COMMERCIALAT = 64; // @ |
|
var LEFTSQUAREBRACKET = 91; // [ |
|
var RIGHTSQUAREBRACKET = 93; // ] |
|
var LEFTCURLYBRACKET = 123; // { |
|
var VERTICALLINE = 124; // | |
|
var RIGHTCURLYBRACKET = 125; // } |
|
var INFINITY = 8734; // ∞ |
|
var NAME_CHAR = createCharMap(function(ch) { |
|
return /[a-zA-Z0-9\-]/.test(ch); |
|
}); |
|
var COMBINATOR_PRECEDENCE = { |
|
' ': 1, |
|
'&&': 2, |
|
'||': 3, |
|
'|': 4 |
|
}; |
|
|
|
function createCharMap(fn) { |
|
var array = typeof Uint32Array === 'function' ? new Uint32Array(128) : new Array(128); |
|
for (var i = 0; i < 128; i++) { |
|
array[i] = fn(String.fromCharCode(i)) ? 1 : 0; |
|
} |
|
return array; |
|
} |
|
|
|
function scanSpaces(tokenizer) { |
|
return tokenizer.substringToPos( |
|
tokenizer.findWsEnd(tokenizer.pos) |
|
); |
|
} |
|
|
|
function scanWord(tokenizer) { |
|
var end = tokenizer.pos; |
|
|
|
for (; end < tokenizer.str.length; end++) { |
|
var code = tokenizer.str.charCodeAt(end); |
|
if (code >= 128 || NAME_CHAR[code] === 0) { |
|
break; |
|
} |
|
} |
|
|
|
if (tokenizer.pos === end) { |
|
tokenizer.error('Expect a keyword'); |
|
} |
|
|
|
return tokenizer.substringToPos(end); |
|
} |
|
|
|
function scanNumber(tokenizer) { |
|
var end = tokenizer.pos; |
|
|
|
for (; end < tokenizer.str.length; end++) { |
|
var code = tokenizer.str.charCodeAt(end); |
|
if (code < 48 || code > 57) { |
|
break; |
|
} |
|
} |
|
|
|
if (tokenizer.pos === end) { |
|
tokenizer.error('Expect a number'); |
|
} |
|
|
|
return tokenizer.substringToPos(end); |
|
} |
|
|
|
function scanString(tokenizer) { |
|
var end = tokenizer.str.indexOf('\'', tokenizer.pos + 1); |
|
|
|
if (end === -1) { |
|
tokenizer.pos = tokenizer.str.length; |
|
tokenizer.error('Expect an apostrophe'); |
|
} |
|
|
|
return tokenizer.substringToPos(end + 1); |
|
} |
|
|
|
function readMultiplierRange(tokenizer) { |
|
var min = null; |
|
var max = null; |
|
|
|
tokenizer.eat(LEFTCURLYBRACKET); |
|
|
|
min = scanNumber(tokenizer); |
|
|
|
if (tokenizer.charCode() === COMMA) { |
|
tokenizer.pos++; |
|
if (tokenizer.charCode() !== RIGHTCURLYBRACKET) { |
|
max = scanNumber(tokenizer); |
|
} |
|
} else { |
|
max = min; |
|
} |
|
|
|
tokenizer.eat(RIGHTCURLYBRACKET); |
|
|
|
return { |
|
min: Number(min), |
|
max: max ? Number(max) : 0 |
|
}; |
|
} |
|
|
|
function readMultiplier(tokenizer) { |
|
var range = null; |
|
var comma = false; |
|
|
|
switch (tokenizer.charCode()) { |
|
case ASTERISK: |
|
tokenizer.pos++; |
|
|
|
range = { |
|
min: 0, |
|
max: 0 |
|
}; |
|
|
|
break; |
|
|
|
case PLUSSIGN: |
|
tokenizer.pos++; |
|
|
|
range = { |
|
min: 1, |
|
max: 0 |
|
}; |
|
|
|
break; |
|
|
|
case QUESTIONMARK: |
|
tokenizer.pos++; |
|
|
|
range = { |
|
min: 0, |
|
max: 1 |
|
}; |
|
|
|
break; |
|
|
|
case NUMBERSIGN: |
|
tokenizer.pos++; |
|
|
|
comma = true; |
|
|
|
if (tokenizer.charCode() === LEFTCURLYBRACKET) { |
|
range = readMultiplierRange(tokenizer); |
|
} else { |
|
range = { |
|
min: 1, |
|
max: 0 |
|
}; |
|
} |
|
|
|
break; |
|
|
|
case LEFTCURLYBRACKET: |
|
range = readMultiplierRange(tokenizer); |
|
break; |
|
|
|
default: |
|
return null; |
|
} |
|
|
|
return { |
|
type: 'Multiplier', |
|
comma: comma, |
|
min: range.min, |
|
max: range.max, |
|
term: null |
|
}; |
|
} |
|
|
|
function maybeMultiplied(tokenizer, node) { |
|
var multiplier = readMultiplier(tokenizer); |
|
|
|
if (multiplier !== null) { |
|
multiplier.term = node; |
|
return multiplier; |
|
} |
|
|
|
return node; |
|
} |
|
|
|
function maybeToken(tokenizer) { |
|
var ch = tokenizer.peek(); |
|
|
|
if (ch === '') { |
|
return null; |
|
} |
|
|
|
return { |
|
type: 'Token', |
|
value: ch |
|
}; |
|
} |
|
|
|
function readProperty(tokenizer) { |
|
var name; |
|
|
|
tokenizer.eat(LESSTHANSIGN); |
|
tokenizer.eat(APOSTROPHE); |
|
|
|
name = scanWord(tokenizer); |
|
|
|
tokenizer.eat(APOSTROPHE); |
|
tokenizer.eat(GREATERTHANSIGN); |
|
|
|
return maybeMultiplied(tokenizer, { |
|
type: 'Property', |
|
name: name |
|
}); |
|
} |
|
|
|
// https://drafts.csswg.org/css-values-3/#numeric-ranges |
|
// 4.1. Range Restrictions and Range Definition Notation |
|
// |
|
// Range restrictions can be annotated in the numeric type notation using CSS bracketed |
|
// range notation—[min,max]—within the angle brackets, after the identifying keyword, |
|
// indicating a closed range between (and including) min and max. |
|
// For example, <integer [0, 10]> indicates an integer between 0 and 10, inclusive. |
|
function readTypeRange(tokenizer) { |
|
// use null for Infinity to make AST format JSON serializable/deserializable |
|
var min = null; // -Infinity |
|
var max = null; // Infinity |
|
var sign = 1; |
|
|
|
tokenizer.eat(LEFTSQUAREBRACKET); |
|
|
|
if (tokenizer.charCode() === HYPERMINUS) { |
|
tokenizer.peek(); |
|
sign = -1; |
|
} |
|
|
|
if (sign == -1 && tokenizer.charCode() === INFINITY) { |
|
tokenizer.peek(); |
|
} else { |
|
min = sign * Number(scanNumber(tokenizer)); |
|
} |
|
|
|
scanSpaces(tokenizer); |
|
tokenizer.eat(COMMA); |
|
scanSpaces(tokenizer); |
|
|
|
if (tokenizer.charCode() === INFINITY) { |
|
tokenizer.peek(); |
|
} else { |
|
sign = 1; |
|
|
|
if (tokenizer.charCode() === HYPERMINUS) { |
|
tokenizer.peek(); |
|
sign = -1; |
|
} |
|
|
|
max = sign * Number(scanNumber(tokenizer)); |
|
} |
|
|
|
tokenizer.eat(RIGHTSQUAREBRACKET); |
|
|
|
// If no range is indicated, either by using the bracketed range notation |
|
// or in the property description, then [−∞,∞] is assumed. |
|
if (min === null && max === null) { |
|
return null; |
|
} |
|
|
|
return { |
|
type: 'Range', |
|
min: min, |
|
max: max |
|
}; |
|
} |
|
|
|
function readType(tokenizer) { |
|
var name; |
|
var opts = null; |
|
|
|
tokenizer.eat(LESSTHANSIGN); |
|
name = scanWord(tokenizer); |
|
|
|
if (tokenizer.charCode() === LEFTPARENTHESIS && |
|
tokenizer.nextCharCode() === RIGHTPARENTHESIS) { |
|
tokenizer.pos += 2; |
|
name += '()'; |
|
} |
|
|
|
if (tokenizer.charCodeAt(tokenizer.findWsEnd(tokenizer.pos)) === LEFTSQUAREBRACKET) { |
|
scanSpaces(tokenizer); |
|
opts = readTypeRange(tokenizer); |
|
} |
|
|
|
tokenizer.eat(GREATERTHANSIGN); |
|
|
|
return maybeMultiplied(tokenizer, { |
|
type: 'Type', |
|
name: name, |
|
opts: opts |
|
}); |
|
} |
|
|
|
function readKeywordOrFunction(tokenizer) { |
|
var name; |
|
|
|
name = scanWord(tokenizer); |
|
|
|
if (tokenizer.charCode() === LEFTPARENTHESIS) { |
|
tokenizer.pos++; |
|
|
|
return { |
|
type: 'Function', |
|
name: name |
|
}; |
|
} |
|
|
|
return maybeMultiplied(tokenizer, { |
|
type: 'Keyword', |
|
name: name |
|
}); |
|
} |
|
|
|
function regroupTerms(terms, combinators) { |
|
function createGroup(terms, combinator) { |
|
return { |
|
type: 'Group', |
|
terms: terms, |
|
combinator: combinator, |
|
disallowEmpty: false, |
|
explicit: false |
|
}; |
|
} |
|
|
|
combinators = Object.keys(combinators).sort(function(a, b) { |
|
return COMBINATOR_PRECEDENCE[a] - COMBINATOR_PRECEDENCE[b]; |
|
}); |
|
|
|
while (combinators.length > 0) { |
|
var combinator = combinators.shift(); |
|
for (var i = 0, subgroupStart = 0; i < terms.length; i++) { |
|
var term = terms[i]; |
|
if (term.type === 'Combinator') { |
|
if (term.value === combinator) { |
|
if (subgroupStart === -1) { |
|
subgroupStart = i - 1; |
|
} |
|
terms.splice(i, 1); |
|
i--; |
|
} else { |
|
if (subgroupStart !== -1 && i - subgroupStart > 1) { |
|
terms.splice( |
|
subgroupStart, |
|
i - subgroupStart, |
|
createGroup(terms.slice(subgroupStart, i), combinator) |
|
); |
|
i = subgroupStart + 1; |
|
} |
|
subgroupStart = -1; |
|
} |
|
} |
|
} |
|
|
|
if (subgroupStart !== -1 && combinators.length) { |
|
terms.splice( |
|
subgroupStart, |
|
i - subgroupStart, |
|
createGroup(terms.slice(subgroupStart, i), combinator) |
|
); |
|
} |
|
} |
|
|
|
return combinator; |
|
} |
|
|
|
function readImplicitGroup(tokenizer) { |
|
var terms = []; |
|
var combinators = {}; |
|
var token; |
|
var prevToken = null; |
|
var prevTokenPos = tokenizer.pos; |
|
|
|
while (token = peek(tokenizer)) { |
|
if (token.type !== 'Spaces') { |
|
if (token.type === 'Combinator') { |
|
// check for combinator in group beginning and double combinator sequence |
|
if (prevToken === null || prevToken.type === 'Combinator') { |
|
tokenizer.pos = prevTokenPos; |
|
tokenizer.error('Unexpected combinator'); |
|
} |
|
|
|
combinators[token.value] = true; |
|
} else if (prevToken !== null && prevToken.type !== 'Combinator') { |
|
combinators[' '] = true; // a b |
|
terms.push({ |
|
type: 'Combinator', |
|
value: ' ' |
|
}); |
|
} |
|
|
|
terms.push(token); |
|
prevToken = token; |
|
prevTokenPos = tokenizer.pos; |
|
} |
|
} |
|
|
|
// check for combinator in group ending |
|
if (prevToken !== null && prevToken.type === 'Combinator') { |
|
tokenizer.pos -= prevTokenPos; |
|
tokenizer.error('Unexpected combinator'); |
|
} |
|
|
|
return { |
|
type: 'Group', |
|
terms: terms, |
|
combinator: regroupTerms(terms, combinators) || ' ', |
|
disallowEmpty: false, |
|
explicit: false |
|
}; |
|
} |
|
|
|
function readGroup(tokenizer) { |
|
var result; |
|
|
|
tokenizer.eat(LEFTSQUAREBRACKET); |
|
result = readImplicitGroup(tokenizer); |
|
tokenizer.eat(RIGHTSQUAREBRACKET); |
|
|
|
result.explicit = true; |
|
|
|
if (tokenizer.charCode() === EXCLAMATIONMARK) { |
|
tokenizer.pos++; |
|
result.disallowEmpty = true; |
|
} |
|
|
|
return result; |
|
} |
|
|
|
function peek(tokenizer) { |
|
var code = tokenizer.charCode(); |
|
|
|
if (code < 128 && NAME_CHAR[code] === 1) { |
|
return readKeywordOrFunction(tokenizer); |
|
} |
|
|
|
switch (code) { |
|
case RIGHTSQUAREBRACKET: |
|
// don't eat, stop scan a group |
|
break; |
|
|
|
case LEFTSQUAREBRACKET: |
|
return maybeMultiplied(tokenizer, readGroup(tokenizer)); |
|
|
|
case LESSTHANSIGN: |
|
return tokenizer.nextCharCode() === APOSTROPHE |
|
? readProperty(tokenizer) |
|
: readType(tokenizer); |
|
|
|
case VERTICALLINE: |
|
return { |
|
type: 'Combinator', |
|
value: tokenizer.substringToPos( |
|
tokenizer.nextCharCode() === VERTICALLINE |
|
? tokenizer.pos + 2 |
|
: tokenizer.pos + 1 |
|
) |
|
}; |
|
|
|
case AMPERSAND: |
|
tokenizer.pos++; |
|
tokenizer.eat(AMPERSAND); |
|
|
|
return { |
|
type: 'Combinator', |
|
value: '&&' |
|
}; |
|
|
|
case COMMA: |
|
tokenizer.pos++; |
|
return { |
|
type: 'Comma' |
|
}; |
|
|
|
case APOSTROPHE: |
|
return maybeMultiplied(tokenizer, { |
|
type: 'String', |
|
value: scanString(tokenizer) |
|
}); |
|
|
|
case SPACE: |
|
case TAB: |
|
case N: |
|
case R: |
|
case F: |
|
return { |
|
type: 'Spaces', |
|
value: scanSpaces(tokenizer) |
|
}; |
|
|
|
case COMMERCIALAT: |
|
code = tokenizer.nextCharCode(); |
|
|
|
if (code < 128 && NAME_CHAR[code] === 1) { |
|
tokenizer.pos++; |
|
return { |
|
type: 'AtKeyword', |
|
name: scanWord(tokenizer) |
|
}; |
|
} |
|
|
|
return maybeToken(tokenizer); |
|
|
|
case ASTERISK: |
|
case PLUSSIGN: |
|
case QUESTIONMARK: |
|
case NUMBERSIGN: |
|
case EXCLAMATIONMARK: |
|
// prohibited tokens (used as a multiplier start) |
|
break; |
|
|
|
case LEFTCURLYBRACKET: |
|
// LEFTCURLYBRACKET is allowed since mdn/data uses it w/o quoting |
|
// check next char isn't a number, because it's likely a disjoined multiplier |
|
code = tokenizer.nextCharCode(); |
|
|
|
if (code < 48 || code > 57) { |
|
return maybeToken(tokenizer); |
|
} |
|
|
|
break; |
|
|
|
default: |
|
return maybeToken(tokenizer); |
|
} |
|
} |
|
|
|
function parse(source) { |
|
var tokenizer = new Tokenizer(source); |
|
var result = readImplicitGroup(tokenizer); |
|
|
|
if (tokenizer.pos !== source.length) { |
|
tokenizer.error('Unexpected input'); |
|
} |
|
|
|
// reduce redundant groups with single group term |
|
if (result.terms.length === 1 && result.terms[0].type === 'Group') { |
|
result = result.terms[0]; |
|
} |
|
|
|
return result; |
|
} |
|
|
|
// warm up parse to elimitate code branches that never execute |
|
// fix soft deoptimizations (insufficient type feedback) |
|
parse('[a&&<b>#|<\'c\'>*||e() f{2} /,(% g#{1,2} h{2,})]!'); |
|
|
|
module.exports = parse;
|
|
|