You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1087 lines
21 KiB
1087 lines
21 KiB
const util = require('./util') |
|
|
|
let source |
|
let parseState |
|
let stack |
|
let pos |
|
let line |
|
let column |
|
let token |
|
let key |
|
let root |
|
|
|
module.exports = function parse (text, reviver) { |
|
source = String(text) |
|
parseState = 'start' |
|
stack = [] |
|
pos = 0 |
|
line = 1 |
|
column = 0 |
|
token = undefined |
|
key = undefined |
|
root = undefined |
|
|
|
do { |
|
token = lex() |
|
|
|
// This code is unreachable. |
|
// if (!parseStates[parseState]) { |
|
// throw invalidParseState() |
|
// } |
|
|
|
parseStates[parseState]() |
|
} while (token.type !== 'eof') |
|
|
|
if (typeof reviver === 'function') { |
|
return internalize({'': root}, '', reviver) |
|
} |
|
|
|
return root |
|
} |
|
|
|
function internalize (holder, name, reviver) { |
|
const value = holder[name] |
|
if (value != null && typeof value === 'object') { |
|
for (const key in value) { |
|
const replacement = internalize(value, key, reviver) |
|
if (replacement === undefined) { |
|
delete value[key] |
|
} else { |
|
value[key] = replacement |
|
} |
|
} |
|
} |
|
|
|
return reviver.call(holder, name, value) |
|
} |
|
|
|
let lexState |
|
let buffer |
|
let doubleQuote |
|
let sign |
|
let c |
|
|
|
function lex () { |
|
lexState = 'default' |
|
buffer = '' |
|
doubleQuote = false |
|
sign = 1 |
|
|
|
for (;;) { |
|
c = peek() |
|
|
|
// This code is unreachable. |
|
// if (!lexStates[lexState]) { |
|
// throw invalidLexState(lexState) |
|
// } |
|
|
|
const token = lexStates[lexState]() |
|
if (token) { |
|
return token |
|
} |
|
} |
|
} |
|
|
|
function peek () { |
|
if (source[pos]) { |
|
return String.fromCodePoint(source.codePointAt(pos)) |
|
} |
|
} |
|
|
|
function read () { |
|
const c = peek() |
|
|
|
if (c === '\n') { |
|
line++ |
|
column = 0 |
|
} else if (c) { |
|
column += c.length |
|
} else { |
|
column++ |
|
} |
|
|
|
if (c) { |
|
pos += c.length |
|
} |
|
|
|
return c |
|
} |
|
|
|
const lexStates = { |
|
default () { |
|
switch (c) { |
|
case '\t': |
|
case '\v': |
|
case '\f': |
|
case ' ': |
|
case '\u00A0': |
|
case '\uFEFF': |
|
case '\n': |
|
case '\r': |
|
case '\u2028': |
|
case '\u2029': |
|
read() |
|
return |
|
|
|
case '/': |
|
read() |
|
lexState = 'comment' |
|
return |
|
|
|
case undefined: |
|
read() |
|
return newToken('eof') |
|
} |
|
|
|
if (util.isSpaceSeparator(c)) { |
|
read() |
|
return |
|
} |
|
|
|
// This code is unreachable. |
|
// if (!lexStates[parseState]) { |
|
// throw invalidLexState(parseState) |
|
// } |
|
|
|
return lexStates[parseState]() |
|
}, |
|
|
|
comment () { |
|
switch (c) { |
|
case '*': |
|
read() |
|
lexState = 'multiLineComment' |
|
return |
|
|
|
case '/': |
|
read() |
|
lexState = 'singleLineComment' |
|
return |
|
} |
|
|
|
throw invalidChar(read()) |
|
}, |
|
|
|
multiLineComment () { |
|
switch (c) { |
|
case '*': |
|
read() |
|
lexState = 'multiLineCommentAsterisk' |
|
return |
|
|
|
case undefined: |
|
throw invalidChar(read()) |
|
} |
|
|
|
read() |
|
}, |
|
|
|
multiLineCommentAsterisk () { |
|
switch (c) { |
|
case '*': |
|
read() |
|
return |
|
|
|
case '/': |
|
read() |
|
lexState = 'default' |
|
return |
|
|
|
case undefined: |
|
throw invalidChar(read()) |
|
} |
|
|
|
read() |
|
lexState = 'multiLineComment' |
|
}, |
|
|
|
singleLineComment () { |
|
switch (c) { |
|
case '\n': |
|
case '\r': |
|
case '\u2028': |
|
case '\u2029': |
|
read() |
|
lexState = 'default' |
|
return |
|
|
|
case undefined: |
|
read() |
|
return newToken('eof') |
|
} |
|
|
|
read() |
|
}, |
|
|
|
value () { |
|
switch (c) { |
|
case '{': |
|
case '[': |
|
return newToken('punctuator', read()) |
|
|
|
case 'n': |
|
read() |
|
literal('ull') |
|
return newToken('null', null) |
|
|
|
case 't': |
|
read() |
|
literal('rue') |
|
return newToken('boolean', true) |
|
|
|
case 'f': |
|
read() |
|
literal('alse') |
|
return newToken('boolean', false) |
|
|
|
case '-': |
|
case '+': |
|
if (read() === '-') { |
|
sign = -1 |
|
} |
|
|
|
lexState = 'sign' |
|
return |
|
|
|
case '.': |
|
buffer = read() |
|
lexState = 'decimalPointLeading' |
|
return |
|
|
|
case '0': |
|
buffer = read() |
|
lexState = 'zero' |
|
return |
|
|
|
case '1': |
|
case '2': |
|
case '3': |
|
case '4': |
|
case '5': |
|
case '6': |
|
case '7': |
|
case '8': |
|
case '9': |
|
buffer = read() |
|
lexState = 'decimalInteger' |
|
return |
|
|
|
case 'I': |
|
read() |
|
literal('nfinity') |
|
return newToken('numeric', Infinity) |
|
|
|
case 'N': |
|
read() |
|
literal('aN') |
|
return newToken('numeric', NaN) |
|
|
|
case '"': |
|
case "'": |
|
doubleQuote = (read() === '"') |
|
buffer = '' |
|
lexState = 'string' |
|
return |
|
} |
|
|
|
throw invalidChar(read()) |
|
}, |
|
|
|
identifierNameStartEscape () { |
|
if (c !== 'u') { |
|
throw invalidChar(read()) |
|
} |
|
|
|
read() |
|
const u = unicodeEscape() |
|
switch (u) { |
|
case '$': |
|
case '_': |
|
break |
|
|
|
default: |
|
if (!util.isIdStartChar(u)) { |
|
throw invalidIdentifier() |
|
} |
|
|
|
break |
|
} |
|
|
|
buffer += u |
|
lexState = 'identifierName' |
|
}, |
|
|
|
identifierName () { |
|
switch (c) { |
|
case '$': |
|
case '_': |
|
case '\u200C': |
|
case '\u200D': |
|
buffer += read() |
|
return |
|
|
|
case '\\': |
|
read() |
|
lexState = 'identifierNameEscape' |
|
return |
|
} |
|
|
|
if (util.isIdContinueChar(c)) { |
|
buffer += read() |
|
return |
|
} |
|
|
|
return newToken('identifier', buffer) |
|
}, |
|
|
|
identifierNameEscape () { |
|
if (c !== 'u') { |
|
throw invalidChar(read()) |
|
} |
|
|
|
read() |
|
const u = unicodeEscape() |
|
switch (u) { |
|
case '$': |
|
case '_': |
|
case '\u200C': |
|
case '\u200D': |
|
break |
|
|
|
default: |
|
if (!util.isIdContinueChar(u)) { |
|
throw invalidIdentifier() |
|
} |
|
|
|
break |
|
} |
|
|
|
buffer += u |
|
lexState = 'identifierName' |
|
}, |
|
|
|
sign () { |
|
switch (c) { |
|
case '.': |
|
buffer = read() |
|
lexState = 'decimalPointLeading' |
|
return |
|
|
|
case '0': |
|
buffer = read() |
|
lexState = 'zero' |
|
return |
|
|
|
case '1': |
|
case '2': |
|
case '3': |
|
case '4': |
|
case '5': |
|
case '6': |
|
case '7': |
|
case '8': |
|
case '9': |
|
buffer = read() |
|
lexState = 'decimalInteger' |
|
return |
|
|
|
case 'I': |
|
read() |
|
literal('nfinity') |
|
return newToken('numeric', sign * Infinity) |
|
|
|
case 'N': |
|
read() |
|
literal('aN') |
|
return newToken('numeric', NaN) |
|
} |
|
|
|
throw invalidChar(read()) |
|
}, |
|
|
|
zero () { |
|
switch (c) { |
|
case '.': |
|
buffer += read() |
|
lexState = 'decimalPoint' |
|
return |
|
|
|
case 'e': |
|
case 'E': |
|
buffer += read() |
|
lexState = 'decimalExponent' |
|
return |
|
|
|
case 'x': |
|
case 'X': |
|
buffer += read() |
|
lexState = 'hexadecimal' |
|
return |
|
} |
|
|
|
return newToken('numeric', sign * 0) |
|
}, |
|
|
|
decimalInteger () { |
|
switch (c) { |
|
case '.': |
|
buffer += read() |
|
lexState = 'decimalPoint' |
|
return |
|
|
|
case 'e': |
|
case 'E': |
|
buffer += read() |
|
lexState = 'decimalExponent' |
|
return |
|
} |
|
|
|
if (util.isDigit(c)) { |
|
buffer += read() |
|
return |
|
} |
|
|
|
return newToken('numeric', sign * Number(buffer)) |
|
}, |
|
|
|
decimalPointLeading () { |
|
if (util.isDigit(c)) { |
|
buffer += read() |
|
lexState = 'decimalFraction' |
|
return |
|
} |
|
|
|
throw invalidChar(read()) |
|
}, |
|
|
|
decimalPoint () { |
|
switch (c) { |
|
case 'e': |
|
case 'E': |
|
buffer += read() |
|
lexState = 'decimalExponent' |
|
return |
|
} |
|
|
|
if (util.isDigit(c)) { |
|
buffer += read() |
|
lexState = 'decimalFraction' |
|
return |
|
} |
|
|
|
return newToken('numeric', sign * Number(buffer)) |
|
}, |
|
|
|
decimalFraction () { |
|
switch (c) { |
|
case 'e': |
|
case 'E': |
|
buffer += read() |
|
lexState = 'decimalExponent' |
|
return |
|
} |
|
|
|
if (util.isDigit(c)) { |
|
buffer += read() |
|
return |
|
} |
|
|
|
return newToken('numeric', sign * Number(buffer)) |
|
}, |
|
|
|
decimalExponent () { |
|
switch (c) { |
|
case '+': |
|
case '-': |
|
buffer += read() |
|
lexState = 'decimalExponentSign' |
|
return |
|
} |
|
|
|
if (util.isDigit(c)) { |
|
buffer += read() |
|
lexState = 'decimalExponentInteger' |
|
return |
|
} |
|
|
|
throw invalidChar(read()) |
|
}, |
|
|
|
decimalExponentSign () { |
|
if (util.isDigit(c)) { |
|
buffer += read() |
|
lexState = 'decimalExponentInteger' |
|
return |
|
} |
|
|
|
throw invalidChar(read()) |
|
}, |
|
|
|
decimalExponentInteger () { |
|
if (util.isDigit(c)) { |
|
buffer += read() |
|
return |
|
} |
|
|
|
return newToken('numeric', sign * Number(buffer)) |
|
}, |
|
|
|
hexadecimal () { |
|
if (util.isHexDigit(c)) { |
|
buffer += read() |
|
lexState = 'hexadecimalInteger' |
|
return |
|
} |
|
|
|
throw invalidChar(read()) |
|
}, |
|
|
|
hexadecimalInteger () { |
|
if (util.isHexDigit(c)) { |
|
buffer += read() |
|
return |
|
} |
|
|
|
return newToken('numeric', sign * Number(buffer)) |
|
}, |
|
|
|
string () { |
|
switch (c) { |
|
case '\\': |
|
read() |
|
buffer += escape() |
|
return |
|
|
|
case '"': |
|
if (doubleQuote) { |
|
read() |
|
return newToken('string', buffer) |
|
} |
|
|
|
buffer += read() |
|
return |
|
|
|
case "'": |
|
if (!doubleQuote) { |
|
read() |
|
return newToken('string', buffer) |
|
} |
|
|
|
buffer += read() |
|
return |
|
|
|
case '\n': |
|
case '\r': |
|
throw invalidChar(read()) |
|
|
|
case '\u2028': |
|
case '\u2029': |
|
separatorChar(c) |
|
break |
|
|
|
case undefined: |
|
throw invalidChar(read()) |
|
} |
|
|
|
buffer += read() |
|
}, |
|
|
|
start () { |
|
switch (c) { |
|
case '{': |
|
case '[': |
|
return newToken('punctuator', read()) |
|
|
|
// This code is unreachable since the default lexState handles eof. |
|
// case undefined: |
|
// return newToken('eof') |
|
} |
|
|
|
lexState = 'value' |
|
}, |
|
|
|
beforePropertyName () { |
|
switch (c) { |
|
case '$': |
|
case '_': |
|
buffer = read() |
|
lexState = 'identifierName' |
|
return |
|
|
|
case '\\': |
|
read() |
|
lexState = 'identifierNameStartEscape' |
|
return |
|
|
|
case '}': |
|
return newToken('punctuator', read()) |
|
|
|
case '"': |
|
case "'": |
|
doubleQuote = (read() === '"') |
|
lexState = 'string' |
|
return |
|
} |
|
|
|
if (util.isIdStartChar(c)) { |
|
buffer += read() |
|
lexState = 'identifierName' |
|
return |
|
} |
|
|
|
throw invalidChar(read()) |
|
}, |
|
|
|
afterPropertyName () { |
|
if (c === ':') { |
|
return newToken('punctuator', read()) |
|
} |
|
|
|
throw invalidChar(read()) |
|
}, |
|
|
|
beforePropertyValue () { |
|
lexState = 'value' |
|
}, |
|
|
|
afterPropertyValue () { |
|
switch (c) { |
|
case ',': |
|
case '}': |
|
return newToken('punctuator', read()) |
|
} |
|
|
|
throw invalidChar(read()) |
|
}, |
|
|
|
beforeArrayValue () { |
|
if (c === ']') { |
|
return newToken('punctuator', read()) |
|
} |
|
|
|
lexState = 'value' |
|
}, |
|
|
|
afterArrayValue () { |
|
switch (c) { |
|
case ',': |
|
case ']': |
|
return newToken('punctuator', read()) |
|
} |
|
|
|
throw invalidChar(read()) |
|
}, |
|
|
|
end () { |
|
// This code is unreachable since it's handled by the default lexState. |
|
// if (c === undefined) { |
|
// read() |
|
// return newToken('eof') |
|
// } |
|
|
|
throw invalidChar(read()) |
|
}, |
|
} |
|
|
|
function newToken (type, value) { |
|
return { |
|
type, |
|
value, |
|
line, |
|
column, |
|
} |
|
} |
|
|
|
function literal (s) { |
|
for (const c of s) { |
|
const p = peek() |
|
|
|
if (p !== c) { |
|
throw invalidChar(read()) |
|
} |
|
|
|
read() |
|
} |
|
} |
|
|
|
function escape () { |
|
const c = peek() |
|
switch (c) { |
|
case 'b': |
|
read() |
|
return '\b' |
|
|
|
case 'f': |
|
read() |
|
return '\f' |
|
|
|
case 'n': |
|
read() |
|
return '\n' |
|
|
|
case 'r': |
|
read() |
|
return '\r' |
|
|
|
case 't': |
|
read() |
|
return '\t' |
|
|
|
case 'v': |
|
read() |
|
return '\v' |
|
|
|
case '0': |
|
read() |
|
if (util.isDigit(peek())) { |
|
throw invalidChar(read()) |
|
} |
|
|
|
return '\0' |
|
|
|
case 'x': |
|
read() |
|
return hexEscape() |
|
|
|
case 'u': |
|
read() |
|
return unicodeEscape() |
|
|
|
case '\n': |
|
case '\u2028': |
|
case '\u2029': |
|
read() |
|
return '' |
|
|
|
case '\r': |
|
read() |
|
if (peek() === '\n') { |
|
read() |
|
} |
|
|
|
return '' |
|
|
|
case '1': |
|
case '2': |
|
case '3': |
|
case '4': |
|
case '5': |
|
case '6': |
|
case '7': |
|
case '8': |
|
case '9': |
|
throw invalidChar(read()) |
|
|
|
case undefined: |
|
throw invalidChar(read()) |
|
} |
|
|
|
return read() |
|
} |
|
|
|
function hexEscape () { |
|
let buffer = '' |
|
let c = peek() |
|
|
|
if (!util.isHexDigit(c)) { |
|
throw invalidChar(read()) |
|
} |
|
|
|
buffer += read() |
|
|
|
c = peek() |
|
if (!util.isHexDigit(c)) { |
|
throw invalidChar(read()) |
|
} |
|
|
|
buffer += read() |
|
|
|
return String.fromCodePoint(parseInt(buffer, 16)) |
|
} |
|
|
|
function unicodeEscape () { |
|
let buffer = '' |
|
let count = 4 |
|
|
|
while (count-- > 0) { |
|
const c = peek() |
|
if (!util.isHexDigit(c)) { |
|
throw invalidChar(read()) |
|
} |
|
|
|
buffer += read() |
|
} |
|
|
|
return String.fromCodePoint(parseInt(buffer, 16)) |
|
} |
|
|
|
const parseStates = { |
|
start () { |
|
if (token.type === 'eof') { |
|
throw invalidEOF() |
|
} |
|
|
|
push() |
|
}, |
|
|
|
beforePropertyName () { |
|
switch (token.type) { |
|
case 'identifier': |
|
case 'string': |
|
key = token.value |
|
parseState = 'afterPropertyName' |
|
return |
|
|
|
case 'punctuator': |
|
// This code is unreachable since it's handled by the lexState. |
|
// if (token.value !== '}') { |
|
// throw invalidToken() |
|
// } |
|
|
|
pop() |
|
return |
|
|
|
case 'eof': |
|
throw invalidEOF() |
|
} |
|
|
|
// This code is unreachable since it's handled by the lexState. |
|
// throw invalidToken() |
|
}, |
|
|
|
afterPropertyName () { |
|
// This code is unreachable since it's handled by the lexState. |
|
// if (token.type !== 'punctuator' || token.value !== ':') { |
|
// throw invalidToken() |
|
// } |
|
|
|
if (token.type === 'eof') { |
|
throw invalidEOF() |
|
} |
|
|
|
parseState = 'beforePropertyValue' |
|
}, |
|
|
|
beforePropertyValue () { |
|
if (token.type === 'eof') { |
|
throw invalidEOF() |
|
} |
|
|
|
push() |
|
}, |
|
|
|
beforeArrayValue () { |
|
if (token.type === 'eof') { |
|
throw invalidEOF() |
|
} |
|
|
|
if (token.type === 'punctuator' && token.value === ']') { |
|
pop() |
|
return |
|
} |
|
|
|
push() |
|
}, |
|
|
|
afterPropertyValue () { |
|
// This code is unreachable since it's handled by the lexState. |
|
// if (token.type !== 'punctuator') { |
|
// throw invalidToken() |
|
// } |
|
|
|
if (token.type === 'eof') { |
|
throw invalidEOF() |
|
} |
|
|
|
switch (token.value) { |
|
case ',': |
|
parseState = 'beforePropertyName' |
|
return |
|
|
|
case '}': |
|
pop() |
|
} |
|
|
|
// This code is unreachable since it's handled by the lexState. |
|
// throw invalidToken() |
|
}, |
|
|
|
afterArrayValue () { |
|
// This code is unreachable since it's handled by the lexState. |
|
// if (token.type !== 'punctuator') { |
|
// throw invalidToken() |
|
// } |
|
|
|
if (token.type === 'eof') { |
|
throw invalidEOF() |
|
} |
|
|
|
switch (token.value) { |
|
case ',': |
|
parseState = 'beforeArrayValue' |
|
return |
|
|
|
case ']': |
|
pop() |
|
} |
|
|
|
// This code is unreachable since it's handled by the lexState. |
|
// throw invalidToken() |
|
}, |
|
|
|
end () { |
|
// This code is unreachable since it's handled by the lexState. |
|
// if (token.type !== 'eof') { |
|
// throw invalidToken() |
|
// } |
|
}, |
|
} |
|
|
|
function push () { |
|
let value |
|
|
|
switch (token.type) { |
|
case 'punctuator': |
|
switch (token.value) { |
|
case '{': |
|
value = {} |
|
break |
|
|
|
case '[': |
|
value = [] |
|
break |
|
} |
|
|
|
break |
|
|
|
case 'null': |
|
case 'boolean': |
|
case 'numeric': |
|
case 'string': |
|
value = token.value |
|
break |
|
|
|
// This code is unreachable. |
|
// default: |
|
// throw invalidToken() |
|
} |
|
|
|
if (root === undefined) { |
|
root = value |
|
} else { |
|
const parent = stack[stack.length - 1] |
|
if (Array.isArray(parent)) { |
|
parent.push(value) |
|
} else { |
|
parent[key] = value |
|
} |
|
} |
|
|
|
if (value !== null && typeof value === 'object') { |
|
stack.push(value) |
|
|
|
if (Array.isArray(value)) { |
|
parseState = 'beforeArrayValue' |
|
} else { |
|
parseState = 'beforePropertyName' |
|
} |
|
} else { |
|
const current = stack[stack.length - 1] |
|
if (current == null) { |
|
parseState = 'end' |
|
} else if (Array.isArray(current)) { |
|
parseState = 'afterArrayValue' |
|
} else { |
|
parseState = 'afterPropertyValue' |
|
} |
|
} |
|
} |
|
|
|
function pop () { |
|
stack.pop() |
|
|
|
const current = stack[stack.length - 1] |
|
if (current == null) { |
|
parseState = 'end' |
|
} else if (Array.isArray(current)) { |
|
parseState = 'afterArrayValue' |
|
} else { |
|
parseState = 'afterPropertyValue' |
|
} |
|
} |
|
|
|
// This code is unreachable. |
|
// function invalidParseState () { |
|
// return new Error(`JSON5: invalid parse state '${parseState}'`) |
|
// } |
|
|
|
// This code is unreachable. |
|
// function invalidLexState (state) { |
|
// return new Error(`JSON5: invalid lex state '${state}'`) |
|
// } |
|
|
|
function invalidChar (c) { |
|
if (c === undefined) { |
|
return syntaxError(`JSON5: invalid end of input at ${line}:${column}`) |
|
} |
|
|
|
return syntaxError(`JSON5: invalid character '${formatChar(c)}' at ${line}:${column}`) |
|
} |
|
|
|
function invalidEOF () { |
|
return syntaxError(`JSON5: invalid end of input at ${line}:${column}`) |
|
} |
|
|
|
// This code is unreachable. |
|
// function invalidToken () { |
|
// if (token.type === 'eof') { |
|
// return syntaxError(`JSON5: invalid end of input at ${line}:${column}`) |
|
// } |
|
|
|
// const c = String.fromCodePoint(token.value.codePointAt(0)) |
|
// return syntaxError(`JSON5: invalid character '${formatChar(c)}' at ${line}:${column}`) |
|
// } |
|
|
|
function invalidIdentifier () { |
|
column -= 5 |
|
return syntaxError(`JSON5: invalid identifier character at ${line}:${column}`) |
|
} |
|
|
|
function separatorChar (c) { |
|
console.warn(`JSON5: '${formatChar(c)}' in strings is not valid ECMAScript; consider escaping`) |
|
} |
|
|
|
function formatChar (c) { |
|
const replacements = { |
|
"'": "\\'", |
|
'"': '\\"', |
|
'\\': '\\\\', |
|
'\b': '\\b', |
|
'\f': '\\f', |
|
'\n': '\\n', |
|
'\r': '\\r', |
|
'\t': '\\t', |
|
'\v': '\\v', |
|
'\0': '\\0', |
|
'\u2028': '\\u2028', |
|
'\u2029': '\\u2029', |
|
} |
|
|
|
if (replacements[c]) { |
|
return replacements[c] |
|
} |
|
|
|
if (c < ' ') { |
|
const hexString = c.charCodeAt(0).toString(16) |
|
return '\\x' + ('00' + hexString).substring(hexString.length) |
|
} |
|
|
|
return c |
|
} |
|
|
|
function syntaxError (message) { |
|
const err = new SyntaxError(message) |
|
err.lineNumber = line |
|
err.columnNumber = column |
|
return err |
|
}
|
|
|