123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752 |
- /*
- * Author: Alex Kocharin <alex@kocharin.ru>
- * GIT: https://github.com/rlidwka/jju
- * License: WTFPL, grab your copy here: http://www.wtfpl.net/txt/copying/
- */
- // RTFM: http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf
- var Uni = require('./unicode')
- function isHexDigit(x) {
- return (x >= '0' && x <= '9')
- || (x >= 'A' && x <= 'F')
- || (x >= 'a' && x <= 'f')
- }
- function isOctDigit(x) {
- return x >= '0' && x <= '7'
- }
- function isDecDigit(x) {
- return x >= '0' && x <= '9'
- }
- var unescapeMap = {
- '\'': '\'',
- '"' : '"',
- '\\': '\\',
- 'b' : '\b',
- 'f' : '\f',
- 'n' : '\n',
- 'r' : '\r',
- 't' : '\t',
- 'v' : '\v',
- '/' : '/',
- }
- function formatError(input, msg, position, lineno, column, json5) {
- var result = msg + ' at ' + (lineno + 1) + ':' + (column + 1)
- , tmppos = position - column - 1
- , srcline = ''
- , underline = ''
- var isLineTerminator = json5 ? Uni.isLineTerminator : Uni.isLineTerminatorJSON
- // output no more than 70 characters before the wrong ones
- if (tmppos < position - 70) {
- tmppos = position - 70
- }
- while (1) {
- var chr = input[++tmppos]
- if (isLineTerminator(chr) || tmppos === input.length) {
- if (position >= tmppos) {
- // ending line error, so show it after the last char
- underline += '^'
- }
- break
- }
- srcline += chr
- if (position === tmppos) {
- underline += '^'
- } else if (position > tmppos) {
- underline += input[tmppos] === '\t' ? '\t' : ' '
- }
- // output no more than 78 characters on the string
- if (srcline.length > 78) break
- }
- return result + '\n' + srcline + '\n' + underline
- }
- function parse(input, options) {
- // parse as a standard JSON mode
- var json5 = !(options.mode === 'json' || options.legacy)
- var isLineTerminator = json5 ? Uni.isLineTerminator : Uni.isLineTerminatorJSON
- var isWhiteSpace = json5 ? Uni.isWhiteSpace : Uni.isWhiteSpaceJSON
- var length = input.length
- , lineno = 0
- , linestart = 0
- , position = 0
- , stack = []
- var tokenStart = function() {}
- var tokenEnd = function(v) {return v}
- /* tokenize({
- raw: '...',
- type: 'whitespace'|'comment'|'key'|'literal'|'separator'|'newline',
- value: 'number'|'string'|'whatever',
- path: [...],
- })
- */
- if (options._tokenize) {
- ;(function() {
- var start = null
- tokenStart = function() {
- if (start !== null) throw Error('internal error, token overlap')
- start = position
- }
- tokenEnd = function(v, type) {
- if (start != position) {
- var hash = {
- raw: input.substr(start, position-start),
- type: type,
- stack: stack.slice(0),
- }
- if (v !== undefined) hash.value = v
- options._tokenize.call(null, hash)
- }
- start = null
- return v
- }
- })()
- }
- function fail(msg) {
- var column = position - linestart
- if (!msg) {
- if (position < length) {
- var token = '\'' +
- JSON
- .stringify(input[position])
- .replace(/^"|"$/g, '')
- .replace(/'/g, "\\'")
- .replace(/\\"/g, '"')
- + '\''
- if (!msg) msg = 'Unexpected token ' + token
- } else {
- if (!msg) msg = 'Unexpected end of input'
- }
- }
- var error = SyntaxError(formatError(input, msg, position, lineno, column, json5))
- error.row = lineno + 1
- error.column = column + 1
- throw error
- }
- function newline(chr) {
- // account for <cr><lf>
- if (chr === '\r' && input[position] === '\n') position++
- linestart = position
- lineno++
- }
- function parseGeneric() {
- var result
- while (position < length) {
- tokenStart()
- var chr = input[position++]
- if (chr === '"' || (chr === '\'' && json5)) {
- return tokenEnd(parseString(chr), 'literal')
- } else if (chr === '{') {
- tokenEnd(undefined, 'separator')
- return parseObject()
- } else if (chr === '[') {
- tokenEnd(undefined, 'separator')
- return parseArray()
- } else if (chr === '-'
- || chr === '.'
- || isDecDigit(chr)
- // + number Infinity NaN
- || (json5 && (chr === '+' || chr === 'I' || chr === 'N'))
- ) {
- return tokenEnd(parseNumber(), 'literal')
- } else if (chr === 'n') {
- parseKeyword('null')
- return tokenEnd(null, 'literal')
- } else if (chr === 't') {
- parseKeyword('true')
- return tokenEnd(true, 'literal')
- } else if (chr === 'f') {
- parseKeyword('false')
- return tokenEnd(false, 'literal')
- } else {
- position--
- return tokenEnd(undefined)
- }
- }
- }
- function parseKey() {
- var result
- while (position < length) {
- tokenStart()
- var chr = input[position++]
- if (chr === '"' || (chr === '\'' && json5)) {
- return tokenEnd(parseString(chr), 'key')
- } else if (chr === '{') {
- tokenEnd(undefined, 'separator')
- return parseObject()
- } else if (chr === '[') {
- tokenEnd(undefined, 'separator')
- return parseArray()
- } else if (chr === '.'
- || isDecDigit(chr)
- ) {
- return tokenEnd(parseNumber(true), 'key')
- } else if (json5
- && Uni.isIdentifierStart(chr) || (chr === '\\' && input[position] === 'u')) {
- // unicode char or a unicode sequence
- var rollback = position - 1
- var result = parseIdentifier()
- if (result === undefined) {
- position = rollback
- return tokenEnd(undefined)
- } else {
- return tokenEnd(result, 'key')
- }
- } else {
- position--
- return tokenEnd(undefined)
- }
- }
- }
- function skipWhiteSpace() {
- tokenStart()
- while (position < length) {
- var chr = input[position++]
- if (isLineTerminator(chr)) {
- position--
- tokenEnd(undefined, 'whitespace')
- tokenStart()
- position++
- newline(chr)
- tokenEnd(undefined, 'newline')
- tokenStart()
- } else if (isWhiteSpace(chr)) {
- // nothing
- } else if (chr === '/'
- && json5
- && (input[position] === '/' || input[position] === '*')
- ) {
- position--
- tokenEnd(undefined, 'whitespace')
- tokenStart()
- position++
- skipComment(input[position++] === '*')
- tokenEnd(undefined, 'comment')
- tokenStart()
- } else {
- position--
- break
- }
- }
- return tokenEnd(undefined, 'whitespace')
- }
- function skipComment(multi) {
- while (position < length) {
- var chr = input[position++]
- if (isLineTerminator(chr)) {
- // LineTerminator is an end of singleline comment
- if (!multi) {
- // let parent function deal with newline
- position--
- return
- }
- newline(chr)
- } else if (chr === '*' && multi) {
- // end of multiline comment
- if (input[position] === '/') {
- position++
- return
- }
- } else {
- // nothing
- }
- }
- if (multi) {
- fail('Unclosed multiline comment')
- }
- }
- function parseKeyword(keyword) {
- // keyword[0] is not checked because it should've checked earlier
- var _pos = position
- var len = keyword.length
- for (var i=1; i<len; i++) {
- if (position >= length || keyword[i] != input[position]) {
- position = _pos-1
- fail()
- }
- position++
- }
- }
- function parseObject() {
- var result = options.null_prototype ? Object.create(null) : {}
- , empty_object = {}
- , is_non_empty = false
- while (position < length) {
- skipWhiteSpace()
- var item1 = parseKey()
- skipWhiteSpace()
- tokenStart()
- var chr = input[position++]
- tokenEnd(undefined, 'separator')
- if (chr === '}' && item1 === undefined) {
- if (!json5 && is_non_empty) {
- position--
- fail('Trailing comma in object')
- }
- return result
- } else if (chr === ':' && item1 !== undefined) {
- skipWhiteSpace()
- stack.push(item1)
- var item2 = parseGeneric()
- stack.pop()
- if (item2 === undefined) fail('No value found for key ' + item1)
- if (typeof(item1) !== 'string') {
- if (!json5 || typeof(item1) !== 'number') {
- fail('Wrong key type: ' + item1)
- }
- }
- if ((item1 in empty_object || empty_object[item1] != null) && options.reserved_keys !== 'replace') {
- if (options.reserved_keys === 'throw') {
- fail('Reserved key: ' + item1)
- } else {
- // silently ignore it
- }
- } else {
- if (typeof(options.reviver) === 'function') {
- item2 = options.reviver.call(null, item1, item2)
- }
- if (item2 !== undefined) {
- is_non_empty = true
- Object.defineProperty(result, item1, {
- value: item2,
- enumerable: true,
- configurable: true,
- writable: true,
- })
- }
- }
- skipWhiteSpace()
- tokenStart()
- var chr = input[position++]
- tokenEnd(undefined, 'separator')
- if (chr === ',') {
- continue
- } else if (chr === '}') {
- return result
- } else {
- fail()
- }
- } else {
- position--
- fail()
- }
- }
- fail()
- }
- function parseArray() {
- var result = []
- while (position < length) {
- skipWhiteSpace()
- stack.push(result.length)
- var item = parseGeneric()
- stack.pop()
- skipWhiteSpace()
- tokenStart()
- var chr = input[position++]
- tokenEnd(undefined, 'separator')
- if (item !== undefined) {
- if (typeof(options.reviver) === 'function') {
- item = options.reviver.call(null, String(result.length), item)
- }
- if (item === undefined) {
- result.length++
- item = true // hack for check below, not included into result
- } else {
- result.push(item)
- }
- }
- if (chr === ',') {
- if (item === undefined) {
- fail('Elisions are not supported')
- }
- } else if (chr === ']') {
- if (!json5 && item === undefined && result.length) {
- position--
- fail('Trailing comma in array')
- }
- return result
- } else {
- position--
- fail()
- }
- }
- }
- function parseNumber() {
- // rewind because we don't know first char
- position--
- var start = position
- , chr = input[position++]
- , t
- var to_num = function(is_octal) {
- var str = input.substr(start, position - start)
- if (is_octal) {
- var result = parseInt(str.replace(/^0o?/, ''), 8)
- } else {
- var result = Number(str)
- }
- if (Number.isNaN(result)) {
- position--
- fail('Bad numeric literal - "' + input.substr(start, position - start + 1) + '"')
- } else if (!json5 && !str.match(/^-?(0|[1-9][0-9]*)(\.[0-9]+)?(e[+-]?[0-9]+)?$/i)) {
- // additional restrictions imposed by json
- position--
- fail('Non-json numeric literal - "' + input.substr(start, position - start + 1) + '"')
- } else {
- return result
- }
- }
- // ex: -5982475.249875e+29384
- // ^ skipping this
- if (chr === '-' || (chr === '+' && json5)) chr = input[position++]
- if (chr === 'N' && json5) {
- parseKeyword('NaN')
- return NaN
- }
- if (chr === 'I' && json5) {
- parseKeyword('Infinity')
- // returning +inf or -inf
- return to_num()
- }
- if (chr >= '1' && chr <= '9') {
- // ex: -5982475.249875e+29384
- // ^^^ skipping these
- while (position < length && isDecDigit(input[position])) position++
- chr = input[position++]
- }
- // special case for leading zero: 0.123456
- if (chr === '0') {
- chr = input[position++]
- // new syntax, "0o777" old syntax, "0777"
- var is_octal = chr === 'o' || chr === 'O' || isOctDigit(chr)
- var is_hex = chr === 'x' || chr === 'X'
- if (json5 && (is_octal || is_hex)) {
- while (position < length
- && (is_hex ? isHexDigit : isOctDigit)( input[position] )
- ) position++
- var sign = 1
- if (input[start] === '-') {
- sign = -1
- start++
- } else if (input[start] === '+') {
- start++
- }
- return sign * to_num(is_octal)
- }
- }
- if (chr === '.') {
- // ex: -5982475.249875e+29384
- // ^^^ skipping these
- while (position < length && isDecDigit(input[position])) position++
- chr = input[position++]
- }
- if (chr === 'e' || chr === 'E') {
- chr = input[position++]
- if (chr === '-' || chr === '+') position++
- // ex: -5982475.249875e+29384
- // ^^^ skipping these
- while (position < length && isDecDigit(input[position])) position++
- chr = input[position++]
- }
- // we have char in the buffer, so count for it
- position--
- return to_num()
- }
- function parseIdentifier() {
- // rewind because we don't know first char
- position--
- var result = ''
- while (position < length) {
- var chr = input[position++]
- if (chr === '\\'
- && input[position] === 'u'
- && isHexDigit(input[position+1])
- && isHexDigit(input[position+2])
- && isHexDigit(input[position+3])
- && isHexDigit(input[position+4])
- ) {
- // UnicodeEscapeSequence
- chr = String.fromCharCode(parseInt(input.substr(position+1, 4), 16))
- position += 5
- }
- if (result.length) {
- // identifier started
- if (Uni.isIdentifierPart(chr)) {
- result += chr
- } else {
- position--
- return result
- }
- } else {
- if (Uni.isIdentifierStart(chr)) {
- result += chr
- } else {
- return undefined
- }
- }
- }
- fail()
- }
- function parseString(endChar) {
- // 7.8.4 of ES262 spec
- var result = ''
- while (position < length) {
- var chr = input[position++]
- if (chr === endChar) {
- return result
- } else if (chr === '\\') {
- if (position >= length) fail()
- chr = input[position++]
- if (unescapeMap[chr] && (json5 || (chr != 'v' && chr != "'"))) {
- result += unescapeMap[chr]
- } else if (json5 && isLineTerminator(chr)) {
- // line continuation
- newline(chr)
- } else if (chr === 'u' || (chr === 'x' && json5)) {
- // unicode/character escape sequence
- var off = chr === 'u' ? 4 : 2
- // validation for \uXXXX
- for (var i=0; i<off; i++) {
- if (position >= length) fail()
- if (!isHexDigit(input[position])) fail('Bad escape sequence')
- position++
- }
- result += String.fromCharCode(parseInt(input.substr(position-off, off), 16))
- } else if (json5 && isOctDigit(chr)) {
- if (chr < '4' && isOctDigit(input[position]) && isOctDigit(input[position+1])) {
- // three-digit octal
- var digits = 3
- } else if (isOctDigit(input[position])) {
- // two-digit octal
- var digits = 2
- } else {
- var digits = 1
- }
- position += digits - 1
- result += String.fromCharCode(parseInt(input.substr(position-digits, digits), 8))
- /*if (!isOctDigit(input[position])) {
- // \0 is allowed still
- result += '\0'
- } else {
- fail('Octal literals are not supported')
- }*/
- } else if (json5) {
- // \X -> x
- result += chr
- } else {
- position--
- fail()
- }
- } else if (isLineTerminator(chr)) {
- fail()
- } else {
- if (!json5 && chr.charCodeAt(0) < 32) {
- position--
- fail('Unexpected control character')
- }
- // SourceCharacter but not one of " or \ or LineTerminator
- result += chr
- }
- }
- fail()
- }
- skipWhiteSpace()
- var return_value = parseGeneric()
- if (return_value !== undefined || position < length) {
- skipWhiteSpace()
- if (position >= length) {
- if (typeof(options.reviver) === 'function') {
- return_value = options.reviver.call(null, '', return_value)
- }
- return return_value
- } else {
- fail()
- }
- } else {
- if (position) {
- fail('No data, only a whitespace')
- } else {
- fail('No data, empty input')
- }
- }
- }
- /*
- * parse(text, options)
- * or
- * parse(text, reviver)
- *
- * where:
- * text - string
- * options - object
- * reviver - function
- */
- module.exports.parse = function parseJSON(input, options) {
- // support legacy functions
- if (typeof(options) === 'function') {
- options = {
- reviver: options
- }
- }
- if (input === undefined) {
- // parse(stringify(x)) should be equal x
- // with JSON functions it is not 'cause of undefined
- // so we're fixing it
- return undefined
- }
- // JSON.parse compat
- if (typeof(input) !== 'string') input = String(input)
- if (options == null) options = {}
- if (options.reserved_keys == null) options.reserved_keys = 'ignore'
- if (options.reserved_keys === 'throw' || options.reserved_keys === 'ignore') {
- if (options.null_prototype == null) {
- options.null_prototype = true
- }
- }
- try {
- return parse(input, options)
- } catch(err) {
- // jju is a recursive parser, so JSON.parse("{{{{{{{") could blow up the stack
- //
- // this catch is used to skip all those internal calls
- if (err instanceof SyntaxError && err.row != null && err.column != null) {
- var old_err = err
- err = SyntaxError(old_err.message)
- err.column = old_err.column
- err.row = old_err.row
- }
- throw err
- }
- }
- module.exports.tokenize = function tokenizeJSON(input, options) {
- if (options == null) options = {}
- options._tokenize = function(smth) {
- if (options._addstack) smth.stack.unshift.apply(smth.stack, options._addstack)
- tokens.push(smth)
- }
- var tokens = []
- tokens.data = module.exports.parse(input, options)
- return tokens
- }
|